]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/task.c
xnu-4903.231.4.tar.gz
[apple/xnu.git] / osfmk / kern / task.c
CommitLineData
1c79356b 1/*
39037602 2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63/*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
2d21ac55
A
81/*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
1c79356b 88
91447636 89#include <mach/mach_types.h>
1c79356b 90#include <mach/boolean.h>
91447636 91#include <mach/host_priv.h>
1c79356b
A
92#include <mach/machine/vm_types.h>
93#include <mach/vm_param.h>
3e170ce0 94#include <mach/mach_vm.h>
1c79356b
A
95#include <mach/semaphore.h>
96#include <mach/task_info.h>
5ba3f43e 97#include <mach/task_inspect.h>
1c79356b 98#include <mach/task_special_ports.h>
39037602 99#include <mach/sdt.h>
91447636 100
fe8ab488 101#include <ipc/ipc_importance.h>
91447636 102#include <ipc/ipc_types.h>
1c79356b
A
103#include <ipc/ipc_space.h>
104#include <ipc/ipc_entry.h>
39236c6e 105#include <ipc/ipc_hash.h>
91447636
A
106
107#include <kern/kern_types.h>
1c79356b
A
108#include <kern/mach_param.h>
109#include <kern/misc_protos.h>
110#include <kern/task.h>
111#include <kern/thread.h>
fe8ab488 112#include <kern/coalition.h>
1c79356b
A
113#include <kern/zalloc.h>
114#include <kern/kalloc.h>
3e170ce0 115#include <kern/kern_cdata.h>
1c79356b
A
116#include <kern/processor.h>
117#include <kern/sched_prim.h> /* for thread_wakeup */
1c79356b 118#include <kern/ipc_tt.h>
1c79356b 119#include <kern/host.h>
91447636
A
120#include <kern/clock.h>
121#include <kern/timer.h>
1c79356b
A
122#include <kern/assert.h>
123#include <kern/sync_lock.h>
2d21ac55 124#include <kern/affinity.h>
39236c6e 125#include <kern/exc_resource.h>
3e170ce0 126#include <kern/machine.h>
39037602
A
127#include <kern/policy_internal.h>
128
3e170ce0 129#include <corpses/task_corpse.h>
39236c6e
A
130#if CONFIG_TELEMETRY
131#include <kern/telemetry.h>
132#endif
91447636 133
5ba3f43e
A
134#if MONOTONIC
135#include <kern/monotonic.h>
136#include <machine/monotonic.h>
137#endif /* MONOTONIC */
138
139#include <os/log.h>
140
91447636
A
141#include <vm/pmap.h>
142#include <vm/vm_map.h>
143#include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
144#include <vm/vm_pageout.h>
2d21ac55 145#include <vm/vm_protos.h>
39236c6e 146#include <vm/vm_purgeable_internal.h>
91447636 147
39236c6e 148#include <sys/resource.h>
3e170ce0
A
149#include <sys/signalvar.h> /* for coredump */
150
1c79356b
A
151/*
152 * Exported interfaces
153 */
154
155#include <mach/task_server.h>
156#include <mach/mach_host_server.h>
157#include <mach/host_security_server.h>
91447636
A
158#include <mach/mach_port_server.h>
159
2d21ac55
A
160#include <vm/vm_shared_region.h>
161
39236c6e
A
162#include <libkern/OSDebug.h>
163#include <libkern/OSAtomic.h>
d9a64523 164#include <libkern/section_keywords.h>
39236c6e 165
fe8ab488
A
166#if CONFIG_ATM
167#include <atm/atm_internal.h>
168#endif
169
39037602
A
170#include <kern/sfi.h> /* picks up ledger.h */
171
172#if CONFIG_MACF
173#include <security/mac_mach_internal.h>
174#endif
fe8ab488
A
175
176#if KPERF
177extern int kpc_force_all_ctrs(task_t, int);
178#endif
179
b0d623f7
A
180task_t kernel_task;
181zone_t task_zone;
182lck_attr_t task_lck_attr;
183lck_grp_t task_lck_grp;
184lck_grp_attr_t task_lck_grp_attr;
185
39037602 186extern int exc_via_corpse_forking;
39037602 187extern int corpse_for_fatal_memkill;
d9a64523 188extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
39037602 189
15129b1c
A
190/* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
191int audio_active = 0;
192
6d2010ae
A
193zinfo_usage_store_t tasks_tkm_private;
194zinfo_usage_store_t tasks_tkm_shared;
195
4b17d6b6 196/* A container to accumulate statistics for expired tasks */
39236c6e
A
197expired_task_statistics_t dead_task_statistics;
198lck_spin_t dead_task_statistics_lock;
4b17d6b6 199
fe8ab488
A
200ledger_template_t task_ledger_template = NULL;
201
d9a64523
A
202SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
203{.cpu_time = -1,
204 .tkm_private = -1,
205 .tkm_shared = -1,
206 .phys_mem = -1,
207 .wired_mem = -1,
208 .internal = -1,
209 .iokit_mapped = -1,
210 .alternate_accounting = -1,
211 .alternate_accounting_compressed = -1,
212 .page_table = -1,
213 .phys_footprint = -1,
214 .internal_compressed = -1,
215 .purgeable_volatile = -1,
216 .purgeable_nonvolatile = -1,
217 .purgeable_volatile_compressed = -1,
218 .purgeable_nonvolatile_compressed = -1,
219 .network_volatile = -1,
220 .network_nonvolatile = -1,
221 .network_volatile_compressed = -1,
222 .network_nonvolatile_compressed = -1,
223 .platform_idle_wakeups = -1,
224 .interrupt_wakeups = -1,
5ba3f43e 225#if !CONFIG_EMBEDDED
d9a64523 226 .sfi_wait_times = { 0 /* initialized at runtime */},
5ba3f43e 227#endif /* !CONFIG_EMBEDDED */
d9a64523
A
228 .cpu_time_billed_to_me = -1,
229 .cpu_time_billed_to_others = -1,
230 .physical_writes = -1,
231 .logical_writes = -1,
232 .energy_billed_to_me = -1,
233 .energy_billed_to_others = -1
234};
fe8ab488 235
4bd07ac2
A
236/* System sleep state */
237boolean_t tasks_suspend_state;
238
239
316670eb 240void init_task_ledgers(void);
39236c6e
A
241void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
242void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
39037602
A
243void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
244void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
245void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
246void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
39236c6e
A
247
248kern_return_t task_suspend_internal(task_t);
249kern_return_t task_resume_internal(task_t);
3e170ce0
A
250static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
251
7e41aa88 252extern kern_return_t iokit_task_terminate(task_t task);
39236c6e 253
3e170ce0 254extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
39037602 255extern void bsd_copythreadname(void *dst_uth, void *src_uth);
743345f9 256extern kern_return_t thread_resume(thread_t thread);
39236c6e
A
257
258// Warn tasks when they hit 80% of their memory limit.
259#define PHYS_FOOTPRINT_WARNING_LEVEL 80
260
261#define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
262#define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
263
264/*
265 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
266 *
267 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
268 * stacktraces, aka micro-stackshots)
269 */
270#define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
271
272int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
273int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
274
275int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
316670eb 276
39236c6e 277int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
316670eb 278
3e170ce0 279ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
39037602 280int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
3e170ce0
A
281int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
282
39037602
A
283/* I/O Monitor Limits */
284#define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
285#define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
286
287uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
288uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
289
290#define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
291int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
292int64_t global_logical_writes_count = 0; /* Global count for logical writes */
293static boolean_t global_update_logical_writes(int64_t);
294
d9a64523
A
295#define TASK_MAX_THREAD_LIMIT 256
296
fe8ab488
A
297#if MACH_ASSERT
298int pmap_ledgers_panic = 1;
d9a64523 299int pmap_ledgers_panic_leeway = 3;
fe8ab488
A
300#endif /* MACH_ASSERT */
301
b0d623f7 302int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
1c79356b 303
39037602 304#if CONFIG_COREDUMP
39236c6e 305int hwm_user_cores = 0; /* high watermark violations generate user core files */
39037602 306#endif
39236c6e
A
307
308#ifdef MACH_BSD
309extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
310extern int proc_pid(struct proc *p);
311extern int proc_selfpid(void);
d9a64523 312extern struct proc *current_proc(void);
39236c6e 313extern char *proc_name_address(struct proc *p);
3e170ce0 314extern uint64_t get_dispatchqueue_offset_from_proc(void *);
5ba3f43e 315extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
d9a64523
A
316extern void workq_proc_suspended(struct proc *p);
317extern void workq_proc_resumed(struct proc *p);
39037602
A
318
319#if CONFIG_MEMORYSTATUS
3e170ce0 320extern void proc_memstat_terminated(struct proc* p, boolean_t set);
813fb2f6
A
321extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
322extern void memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
a39ff7e2
A
323extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
324
325#if DEVELOPMENT || DEBUG
326extern void memorystatus_abort_vm_map_fork(task_t);
327#endif
328
39037602
A
329#endif /* CONFIG_MEMORYSTATUS */
330
331#endif /* MACH_BSD */
6d2010ae 332
d9a64523
A
333#if DEVELOPMENT || DEBUG
334int exc_resource_threads_enabled;
335#endif /* DEVELOPMENT || DEBUG */
336
337#if (DEVELOPMENT || DEBUG) && TASK_EXC_GUARD_DELIVER_CORPSE
338uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE |
339 TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_CORPSE;
340#else
341uint32_t task_exc_guard_default = 0;
342#endif
343
1c79356b
A
344/* Forwards */
345
39037602
A
346static void task_hold_locked(task_t task);
347static void task_wait_locked(task_t task, boolean_t until_not_runnable);
348static void task_release_locked(task_t task);
349
350static void task_synchronizer_destroy_all(task_t task);
b0d623f7 351
91447636
A
352
353void
354task_set_64bit(
355 task_t task,
d9a64523
A
356 boolean_t is_64bit,
357 boolean_t is_64bit_data)
91447636 358{
fe8ab488 359#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
2d21ac55 360 thread_t thread;
fe8ab488 361#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
39236c6e
A
362
363 task_lock(task);
0c530ab8 364
d9a64523
A
365 /*
366 * Switching to/from 64-bit address spaces
367 */
368 if (is_64bit) {
369 if (!task_has_64Bit_addr(task)) {
370 task_set_64Bit_addr(task);
371 }
372 } else {
373 if (task_has_64Bit_addr(task)) {
374 task_clear_64Bit_addr(task);
375 }
376 }
377
378 /*
379 * Switching to/from 64-bit register state.
380 */
381 if (is_64bit_data) {
382 if (task_has_64Bit_data(task))
39236c6e 383 goto out;
d9a64523
A
384
385 task_set_64Bit_data(task);
91447636 386 } else {
d9a64523 387 if ( !task_has_64Bit_data(task))
39236c6e 388 goto out;
d9a64523
A
389
390 task_clear_64Bit_data(task);
91447636 391 }
d9a64523 392
0c530ab8
A
393 /* FIXME: On x86, the thread save state flavor can diverge from the
394 * task's 64-bit feature flag due to the 32-bit/64-bit register save
395 * state dichotomy. Since we can be pre-empted in this interval,
396 * certain routines may observe the thread as being in an inconsistent
397 * state with respect to its task's 64-bitness.
398 */
39236c6e 399
00867663 400#if defined(__x86_64__) || defined(__arm64__)
0c530ab8 401 queue_iterate(&task->threads, thread, thread_t, task_threads) {
b0d623f7 402 thread_mtx_lock(thread);
2d21ac55 403 machine_thread_switch_addrmode(thread);
b0d623f7 404 thread_mtx_unlock(thread);
39037602 405
5ba3f43e
A
406#if defined(__arm64__)
407 /* specifically, if running on H9 */
39037602
A
408 if (thread == current_thread()) {
409 uint64_t arg1, arg2;
410 int urgency;
411 spl_t spl = splsched();
412 /*
413 * This call tell that the current thread changed it's 32bitness.
414 * Other thread were no more on core when 32bitness was changed,
415 * but current_thread() is on core and the previous call to
416 * machine_thread_going_on_core() gave 32bitness which is now wrong.
417 *
418 * This is needed for bring-up, a different callback should be used
419 * in the future.
5ba3f43e
A
420 *
421 * TODO: Remove this callout when we no longer support 32-bit code on H9
39037602
A
422 */
423 thread_lock(thread);
424 urgency = thread_get_urgency(thread, &arg1, &arg2);
5ba3f43e 425 machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
39037602
A
426 thread_unlock(thread);
427 splx(spl);
428 }
5ba3f43e 429#endif /* defined(__arm64__) */
0c530ab8 430 }
00867663 431#endif /* defined(__x86_64__) || defined(__arm64__) */
39236c6e
A
432
433out:
b0d623f7 434 task_unlock(task);
91447636
A
435}
436
d9a64523
A
437boolean_t
438task_get_64bit_data(task_t task)
439{
440 return task_has_64Bit_data(task);
441}
442
5ba3f43e
A
443void
444task_set_platform_binary(
445 task_t task,
446 boolean_t is_platform)
447{
448 task_lock(task);
449 if (is_platform) {
450 task->t_flags |= TF_PLATFORM;
451 } else {
452 task->t_flags &= ~(TF_PLATFORM);
453 }
454 task_unlock(task);
455}
b0d623f7 456
a39ff7e2
A
457/*
458 * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
459 * Returns "false" if flag is already set, and "true" in other cases.
460 */
461bool
462task_set_ca_client_wi(
463 task_t task,
464 boolean_t set_or_clear)
465{
466 bool ret = true;
467 task_lock(task);
468 if (set_or_clear) {
469 /* Tasks can have only one CA_CLIENT work interval */
470 if (task->t_flags & TF_CA_CLIENT_WI)
471 ret = false;
472 else
473 task->t_flags |= TF_CA_CLIENT_WI;
474 } else {
475 task->t_flags &= ~TF_CA_CLIENT_WI;
476 }
477 task_unlock(task);
478 return ret;
479}
480
b0d623f7 481void
5ba3f43e
A
482task_set_dyld_info(
483 task_t task,
484 mach_vm_address_t addr,
485 mach_vm_size_t size)
b0d623f7
A
486{
487 task_lock(task);
488 task->all_image_info_addr = addr;
5ba3f43e
A
489 task->all_image_info_size = size;
490 task_unlock(task);
b0d623f7
A
491}
492
fe8ab488
A
493void
494task_atm_reset(__unused task_t task) {
495
496#if CONFIG_ATM
497 if (task->atm_context != NULL) {
498 atm_task_descriptor_destroy(task->atm_context);
499 task->atm_context = NULL;
500 }
501#endif
502
503}
504
490019cf
A
505void
506task_bank_reset(__unused task_t task) {
507
490019cf
A
508 if (task->bank_context != NULL) {
509 bank_task_destroy(task);
510 }
490019cf
A
511}
512
513/*
514 * NOTE: This should only be called when the P_LINTRANSIT
515 * flag is set (the proc_trans lock is held) on the
516 * proc associated with the task.
517 */
518void
519task_bank_init(__unused task_t task) {
520
490019cf
A
521 if (task->bank_context != NULL) {
522 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
523 }
524 bank_task_initialize(task);
490019cf
A
525}
526
743345f9
A
527void
528task_set_did_exec_flag(task_t task)
529{
530 task->t_procflags |= TPF_DID_EXEC;
531}
532
533void
534task_clear_exec_copy_flag(task_t task)
535{
536 task->t_procflags &= ~TPF_EXEC_COPY;
537}
538
539/*
540 * This wait event is t_procflags instead of t_flags because t_flags is volatile
541 *
542 * TODO: store the flags in the same place as the event
543 * rdar://problem/28501994
544 */
545event_t
546task_get_return_wait_event(task_t task)
547{
548 return (event_t)&task->t_procflags;
549}
550
551void
552task_clear_return_wait(task_t task)
553{
554 task_lock(task);
555
556 task->t_flags &= ~TF_LRETURNWAIT;
557
558 if (task->t_flags & TF_LRETURNWAITER) {
559 thread_wakeup(task_get_return_wait_event(task));
560 task->t_flags &= ~TF_LRETURNWAITER;
561 }
562
563 task_unlock(task);
564}
565
a39ff7e2 566void __attribute__((noreturn))
743345f9
A
567task_wait_to_return(void)
568{
569 task_t task;
570
571 task = current_task();
572 task_lock(task);
573
574 if (task->t_flags & TF_LRETURNWAIT) {
575 do {
576 task->t_flags |= TF_LRETURNWAITER;
577 assert_wait(task_get_return_wait_event(task), THREAD_UNINT);
578 task_unlock(task);
579
580 thread_block(THREAD_CONTINUE_NULL);
581
582 task_lock(task);
583 } while (task->t_flags & TF_LRETURNWAIT);
584 }
585
586 task_unlock(task);
587
d9a64523
A
588#if CONFIG_MACF
589 /*
590 * Before jumping to userspace and allowing this process to execute any code,
591 * notify any interested parties.
592 */
593 mac_proc_notify_exec_complete(current_proc());
594#endif
595
743345f9
A
596 thread_bootstrap_return();
597}
598
527f9951
A
599#ifdef CONFIG_32BIT_TELEMETRY
600boolean_t
601task_consume_32bit_log_flag(task_t task)
602{
603 if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
604 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
605 return TRUE;
606 } else {
607 return FALSE;
608 }
609}
610
611void
612task_set_32bit_log_flag(task_t task)
613{
614 task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
615}
616#endif /* CONFIG_32BIT_TELEMETRY */
617
743345f9
A
618boolean_t
619task_is_exec_copy(task_t task)
620{
621 return task_is_exec_copy_internal(task);
622}
623
624boolean_t
625task_did_exec(task_t task)
626{
627 return task_did_exec_internal(task);
628}
629
630boolean_t
631task_is_active(task_t task)
632{
633 return task->active;
634}
635
5ba3f43e
A
636boolean_t
637task_is_halting(task_t task)
638{
639 return task->halting;
640}
641
39236c6e
A
642#if TASK_REFERENCE_LEAK_DEBUG
643#include <kern/btlog.h>
644
39236c6e
A
645static btlog_t *task_ref_btlog;
646#define TASK_REF_OP_INCR 0x1
647#define TASK_REF_OP_DECR 0x2
648
39037602 649#define TASK_REF_NUM_RECORDS 100000
39236c6e
A
650#define TASK_REF_BTDEPTH 7
651
39236c6e
A
652void
653task_reference_internal(task_t task)
654{
655 void * bt[TASK_REF_BTDEPTH];
656 int numsaved = 0;
657
e8c3f781
A
658 os_ref_retain(&task->ref_count);
659
39236c6e 660 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
39236c6e
A
661 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
662 bt, numsaved);
663}
664
e8c3f781 665os_ref_count_t
39236c6e
A
666task_deallocate_internal(task_t task)
667{
668 void * bt[TASK_REF_BTDEPTH];
669 int numsaved = 0;
670
671 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
39236c6e
A
672 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
673 bt, numsaved);
e8c3f781
A
674
675 return os_ref_release(&task->ref_count);
39236c6e
A
676}
677
678#endif /* TASK_REFERENCE_LEAK_DEBUG */
679
1c79356b
A
680void
681task_init(void)
682{
b0d623f7
A
683
684 lck_grp_attr_setdefault(&task_lck_grp_attr);
685 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
686 lck_attr_setdefault(&task_lck_attr);
687 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
39037602 688 lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
b0d623f7 689
1c79356b
A
690 task_zone = zinit(
691 sizeof(struct task),
b0d623f7 692 task_max * sizeof(struct task),
1c79356b
A
693 TASK_CHUNK * sizeof(struct task),
694 "tasks");
6d2010ae 695
0b4c1975 696 zone_change(task_zone, Z_NOENCRYPT, TRUE);
1c79356b 697
5ba3f43e
A
698#if CONFIG_EMBEDDED
699 task_watch_init();
700#endif /* CONFIG_EMBEDDED */
39037602 701
39236c6e 702 /*
fe8ab488
A
703 * Configure per-task memory limit.
704 * The boot-arg is interpreted as Megabytes,
705 * and takes precedence over the device tree.
706 * Setting the boot-arg to 0 disables task limits.
39236c6e 707 */
3e170ce0
A
708 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
709 sizeof (max_task_footprint_mb))) {
39236c6e
A
710 /*
711 * No limit was found in boot-args, so go look in the device tree.
712 */
3e170ce0
A
713 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
714 sizeof(max_task_footprint_mb))) {
fe8ab488
A
715 /*
716 * No limit was found in device tree.
717 */
3e170ce0 718 max_task_footprint_mb = 0;
39236c6e
A
719 }
720 }
721
3e170ce0 722 if (max_task_footprint_mb != 0) {
39037602 723#if CONFIG_MEMORYSTATUS
3e170ce0 724 if (max_task_footprint_mb < 50) {
39236c6e 725 printf("Warning: max_task_pmem %d below minimum.\n",
3e170ce0
A
726 max_task_footprint_mb);
727 max_task_footprint_mb = 50;
39236c6e
A
728 }
729 printf("Limiting task physical memory footprint to %d MB\n",
3e170ce0
A
730 max_task_footprint_mb);
731
732 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
39037602
A
733
734 /*
735 * Configure the per-task memory limit warning level.
736 * This is computed as a percentage.
737 */
738 max_task_footprint_warning_level = 0;
739
740 if (max_mem < 0x40000000) {
741 /*
742 * On devices with < 1GB of memory:
743 * -- set warnings to 50MB below the per-task limit.
744 */
745 if (max_task_footprint_mb > 50) {
746 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
747 }
748 } else {
749 /*
750 * On devices with >= 1GB of memory:
751 * -- set warnings to 100MB below the per-task limit.
752 */
753 if (max_task_footprint_mb > 100) {
754 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
755 }
756 }
757
758 /*
759 * Never allow warning level to land below the default.
760 */
761 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
762 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
763 }
764
765 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
766
39236c6e 767#else
39037602
A
768 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
769#endif /* CONFIG_MEMORYSTATUS */
39236c6e
A
770 }
771
d9a64523
A
772#if DEVELOPMENT || DEBUG
773 if (!PE_parse_boot_argn("exc_resource_threads",
774 &exc_resource_threads_enabled,
775 sizeof(exc_resource_threads_enabled))) {
776 exc_resource_threads_enabled = 1;
777 }
778 PE_parse_boot_argn("task_exc_guard_default",
779 &task_exc_guard_default,
780 sizeof(task_exc_guard_default));
781#endif /* DEVELOPMENT || DEBUG */
fe8ab488 782
39037602 783#if CONFIG_COREDUMP
39236c6e
A
784 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
785 sizeof (hwm_user_cores))) {
786 hwm_user_cores = 0;
787 }
39037602 788#endif
a1c7dba1 789
39236c6e
A
790 proc_init_cpumon_params();
791
792 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
793 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
794 }
795
796 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
797 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
798 }
799
800 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
801 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
802 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
803 }
804
805 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
806 sizeof (disable_exc_resource))) {
807 disable_exc_resource = 0;
808 }
809
39037602
A
810 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof (task_iomon_limit_mb))) {
811 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
812 }
813
814 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof (task_iomon_interval_secs))) {
815 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
816 }
817
818 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof (io_telemetry_limit))) {
819 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
820 }
821
fe8ab488
A
822/*
823 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
824 * sets up the ledgers for the default coalition. If we don't have coalitions,
825 * then we have to call it now.
826 */
827#if CONFIG_COALITIONS
828 assert(task_ledger_template);
829#else /* CONFIG_COALITIONS */
316670eb 830 init_task_ledgers();
fe8ab488 831#endif /* CONFIG_COALITIONS */
316670eb 832
39236c6e 833#if TASK_REFERENCE_LEAK_DEBUG
39037602 834 task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
39236c6e
A
835 assert(task_ref_btlog);
836#endif
837
1c79356b
A
838 /*
839 * Create the kernel task as the first task.
1c79356b 840 */
b0d623f7 841#ifdef __LP64__
d9a64523 842 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
b0d623f7 843#else
d9a64523 844 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
b0d623f7 845#endif
1c79356b 846 panic("task_init\n");
55e303ae 847
5ba3f43e 848
1c79356b
A
849 vm_map_deallocate(kernel_task->map);
850 kernel_task->map = kernel_map;
4b17d6b6 851 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
1c79356b
A
852}
853
1c79356b
A
854/*
855 * Create a task running in the kernel address space. It may
856 * have its own map of size mem_size and may have ipc privileges.
857 */
858kern_return_t
859kernel_task_create(
91447636
A
860 __unused task_t parent_task,
861 __unused vm_offset_t map_base,
862 __unused vm_size_t map_size,
863 __unused task_t *child_task)
1c79356b 864{
55e303ae 865 return (KERN_INVALID_ARGUMENT);
1c79356b
A
866}
867
868kern_return_t
869task_create(
2d21ac55 870 task_t parent_task,
91447636 871 __unused ledger_port_array_t ledger_ports,
2d21ac55
A
872 __unused mach_msg_type_number_t num_ledger_ports,
873 __unused boolean_t inherit_memory,
874 __unused task_t *child_task) /* OUT */
1c79356b
A
875{
876 if (parent_task == TASK_NULL)
877 return(KERN_INVALID_ARGUMENT);
878
2d21ac55
A
879 /*
880 * No longer supported: too many calls assume that a task has a valid
881 * process attached.
882 */
883 return(KERN_FAILURE);
1c79356b
A
884}
885
886kern_return_t
887host_security_create_task_token(
91447636 888 host_security_t host_security,
2d21ac55
A
889 task_t parent_task,
890 __unused security_token_t sec_token,
891 __unused audit_token_t audit_token,
892 __unused host_priv_t host_priv,
91447636
A
893 __unused ledger_port_array_t ledger_ports,
894 __unused mach_msg_type_number_t num_ledger_ports,
2d21ac55
A
895 __unused boolean_t inherit_memory,
896 __unused task_t *child_task) /* OUT */
1c79356b 897{
1c79356b
A
898 if (parent_task == TASK_NULL)
899 return(KERN_INVALID_ARGUMENT);
900
901 if (host_security == HOST_NULL)
902 return(KERN_INVALID_SECURITY);
903
2d21ac55
A
904 /*
905 * No longer supported.
906 */
907 return(KERN_FAILURE);
1c79356b
A
908}
909
39236c6e
A
910/*
911 * Task ledgers
912 * ------------
913 *
914 * phys_footprint
915 * Physical footprint: This is the sum of:
3e170ce0
A
916 * + (internal - alternate_accounting)
917 * + (internal_compressed - alternate_accounting_compressed)
fe8ab488 918 * + iokit_mapped
3e170ce0
A
919 * + purgeable_nonvolatile
920 * + purgeable_nonvolatile_compressed
39037602 921 * + page_table
39236c6e 922 *
fe8ab488
A
923 * internal
924 * The task's anonymous memory, which on iOS is always resident.
925 *
926 * internal_compressed
927 * Amount of this task's internal memory which is held by the compressor.
39236c6e
A
928 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
929 * and could be either decompressed back into memory, or paged out to storage, depending
930 * on our implementation.
fe8ab488
A
931 *
932 * iokit_mapped
933 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
934 clean/dirty or internal/external state].
935 *
936 * alternate_accounting
937 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
938 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
939 * double counting.
39236c6e 940 */
316670eb
A
941void
942init_task_ledgers(void)
943{
944 ledger_template_t t;
945
946 assert(task_ledger_template == NULL);
947 assert(kernel_task == TASK_NULL);
948
39037602 949#if MACH_ASSERT
d9a64523
A
950 PE_parse_boot_argn("pmap_ledgers_panic",
951 &pmap_ledgers_panic,
39037602 952 sizeof (pmap_ledgers_panic));
d9a64523
A
953 PE_parse_boot_argn("pmap_ledgers_panic_leeway",
954 &pmap_ledgers_panic_leeway,
955 sizeof (pmap_ledgers_panic_leeway));
39037602
A
956#endif /* MACH_ASSERT */
957
316670eb
A
958 if ((t = ledger_template_create("Per-task ledger")) == NULL)
959 panic("couldn't create task ledger template");
960
961 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
962 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
963 "physmem", "bytes");
964 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
965 "bytes");
966 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
967 "bytes");
968 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
969 "bytes");
fe8ab488
A
970 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
971 "bytes");
972 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
973 "bytes");
974 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
39236c6e 975 "bytes");
3e170ce0
A
976 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
977 "bytes");
39037602
A
978 task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
979 "bytes");
39236c6e
A
980 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
981 "bytes");
fe8ab488 982 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
39236c6e 983 "bytes");
fe8ab488
A
984 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
985 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
986 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
987 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
d9a64523
A
988
989 task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes");
990 task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes");
991 task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes");
992 task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes");
993
4b17d6b6 994 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
39236c6e 995 "count");
4b17d6b6 996 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
39236c6e 997 "count");
fe8ab488 998
3e170ce0 999#if CONFIG_SCHED_SFI
fe8ab488
A
1000 sfi_class_id_t class_id, ledger_alias;
1001 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1002 task_ledgers.sfi_wait_times[class_id] = -1;
1003 }
1004
1005 /* don't account for UNSPECIFIED */
1006 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1007 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1008 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1009 /* Check to see if alias has been registered yet */
1010 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1011 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1012 } else {
1013 /* Otherwise, initialize it first */
1014 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1015 }
1016 } else {
1017 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1018 }
1019
1020 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1021 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1022 }
1023 }
316670eb 1024
3e170ce0
A
1025 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
1026#endif /* CONFIG_SCHED_SFI */
1027
fe8ab488
A
1028 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1029 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
39037602
A
1030 task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1031 task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
5ba3f43e
A
1032 task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1033 task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
39037602 1034
fe8ab488
A
1035 if ((task_ledgers.cpu_time < 0) ||
1036 (task_ledgers.tkm_private < 0) ||
1037 (task_ledgers.tkm_shared < 0) ||
1038 (task_ledgers.phys_mem < 0) ||
1039 (task_ledgers.wired_mem < 0) ||
1040 (task_ledgers.internal < 0) ||
1041 (task_ledgers.iokit_mapped < 0) ||
1042 (task_ledgers.alternate_accounting < 0) ||
3e170ce0 1043 (task_ledgers.alternate_accounting_compressed < 0) ||
39037602 1044 (task_ledgers.page_table < 0) ||
fe8ab488
A
1045 (task_ledgers.phys_footprint < 0) ||
1046 (task_ledgers.internal_compressed < 0) ||
1047 (task_ledgers.purgeable_volatile < 0) ||
1048 (task_ledgers.purgeable_nonvolatile < 0) ||
1049 (task_ledgers.purgeable_volatile_compressed < 0) ||
1050 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
d9a64523
A
1051 (task_ledgers.network_volatile < 0) ||
1052 (task_ledgers.network_nonvolatile < 0) ||
1053 (task_ledgers.network_volatile_compressed < 0) ||
1054 (task_ledgers.network_nonvolatile_compressed < 0) ||
fe8ab488 1055 (task_ledgers.platform_idle_wakeups < 0) ||
39037602 1056 (task_ledgers.interrupt_wakeups < 0) ||
39037602 1057 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
39037602 1058 (task_ledgers.physical_writes < 0) ||
5ba3f43e
A
1059 (task_ledgers.logical_writes < 0) ||
1060 (task_ledgers.energy_billed_to_me < 0) ||
1061 (task_ledgers.energy_billed_to_others < 0)
fe8ab488 1062 ) {
316670eb
A
1063 panic("couldn't create entries for task ledger template");
1064 }
1065
39037602 1066 ledger_track_credit_only(t, task_ledgers.phys_footprint);
813fb2f6 1067 ledger_track_credit_only(t, task_ledgers.page_table);
39037602
A
1068 ledger_track_credit_only(t, task_ledgers.internal);
1069 ledger_track_credit_only(t, task_ledgers.internal_compressed);
1070 ledger_track_credit_only(t, task_ledgers.iokit_mapped);
1071 ledger_track_credit_only(t, task_ledgers.alternate_accounting);
1072 ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
1073 ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
1074 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
1075 ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
1076 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
1077
d9a64523
A
1078 ledger_track_credit_only(t, task_ledgers.network_volatile);
1079 ledger_track_credit_only(t, task_ledgers.network_nonvolatile);
1080 ledger_track_credit_only(t, task_ledgers.network_volatile_compressed);
1081 ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed);
1082
39236c6e 1083 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
fe8ab488
A
1084#if MACH_ASSERT
1085 if (pmap_ledgers_panic) {
1086 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
39037602 1087 ledger_panic_on_negative(t, task_ledgers.page_table);
fe8ab488
A
1088 ledger_panic_on_negative(t, task_ledgers.internal);
1089 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
1090 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1091 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
3e170ce0 1092 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
fe8ab488
A
1093 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1094 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1095 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1096 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
d9a64523
A
1097
1098 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1099 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1100 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1101 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
fe8ab488
A
1102 }
1103#endif /* MACH_ASSERT */
39236c6e 1104
39037602 1105#if CONFIG_MEMORYSTATUS
39236c6e 1106 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
39037602 1107#endif /* CONFIG_MEMORYSTATUS */
39236c6e
A
1108
1109 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1110 task_wakeups_rate_exceeded, NULL, NULL);
39037602 1111 ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
5ba3f43e
A
1112 ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL);
1113
1114 ledger_template_complete(t);
316670eb
A
1115 task_ledger_template = t;
1116}
1117
e8c3f781
A
1118os_refgrp_decl(static, task_refgrp, "task", NULL);
1119
1c79356b 1120kern_return_t
55e303ae 1121task_create_internal(
1c79356b 1122 task_t parent_task,
3e170ce0 1123 coalition_t *parent_coalitions __unused,
1c79356b 1124 boolean_t inherit_memory,
813fb2f6 1125 __unused boolean_t is_64bit,
d9a64523 1126 boolean_t is_64bit_data,
39037602 1127 uint32_t t_flags,
743345f9 1128 uint32_t t_procflags,
1c79356b
A
1129 task_t *child_task) /* OUT */
1130{
2d21ac55
A
1131 task_t new_task;
1132 vm_shared_region_t shared_region;
316670eb 1133 ledger_t ledger = NULL;
1c79356b
A
1134
1135 new_task = (task_t) zalloc(task_zone);
1136
1137 if (new_task == TASK_NULL)
1138 return(KERN_RESOURCE_SHORTAGE);
1139
1140 /* one ref for just being alive; one for our caller */
e8c3f781 1141 os_ref_init_count(&new_task->ref_count, &task_refgrp, 2);
1c79356b 1142
316670eb
A
1143 /* allocate with active entries */
1144 assert(task_ledger_template != NULL);
1145 if ((ledger = ledger_instantiate(task_ledger_template,
1146 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1147 zfree(task_zone, new_task);
1148 return(KERN_RESOURCE_SHORTAGE);
1149 }
39236c6e 1150
5ba3f43e 1151
316670eb
A
1152 new_task->ledger = ledger;
1153
fe8ab488
A
1154#if defined(CONFIG_SCHED_MULTIQ)
1155 new_task->sched_group = sched_group_create();
1156#endif
1157
b0d623f7 1158 /* if inherit_memory is true, parent_task MUST not be NULL */
39037602
A
1159 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory)
1160 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1c79356b 1161 else
316670eb
A
1162 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
1163 (vm_map_offset_t)(VM_MIN_ADDRESS),
1164 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1c79356b 1165
2d21ac55
A
1166 /* Inherit memlock limit from parent */
1167 if (parent_task)
b0d623f7 1168 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
2d21ac55 1169
b0d623f7 1170 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
55e303ae 1171 queue_init(&new_task->threads);
1c79356b 1172 new_task->suspend_count = 0;
55e303ae 1173 new_task->thread_count = 0;
55e303ae 1174 new_task->active_thread_count = 0;
1c79356b 1175 new_task->user_stop_count = 0;
39236c6e 1176 new_task->legacy_stop_count = 0;
1c79356b 1177 new_task->active = TRUE;
b0d623f7 1178 new_task->halting = FALSE;
55e303ae 1179 new_task->priv_flags = 0;
39037602 1180 new_task->t_flags = t_flags;
743345f9 1181 new_task->t_procflags = t_procflags;
39236c6e 1182 new_task->importance = 0;
5ba3f43e 1183 new_task->crashed_thread_id = 0;
39037602 1184 new_task->exec_token = 0;
1c79356b 1185
d9a64523
A
1186 new_task->task_exc_guard = task_exc_guard_default;
1187
fe8ab488
A
1188#if CONFIG_ATM
1189 new_task->atm_context = NULL;
1190#endif
fe8ab488 1191 new_task->bank_context = NULL;
fe8ab488 1192
1c79356b 1193#ifdef MACH_BSD
2d21ac55 1194 new_task->bsd_info = NULL;
3e170ce0 1195 new_task->corpse_info = NULL;
1c79356b
A
1196#endif /* MACH_BSD */
1197
39037602
A
1198#if CONFIG_MACF
1199 new_task->crash_label = NULL;
1200#endif
1201
1202#if CONFIG_MEMORYSTATUS
39236c6e
A
1203 if (max_task_footprint != 0) {
1204 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1205 }
39037602 1206#endif /* CONFIG_MEMORYSTATUS */
39236c6e
A
1207
1208 if (task_wakeups_monitor_rate != 0) {
1209 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1210 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1211 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1212 }
1213
39037602
A
1214#if CONFIG_IO_ACCOUNTING
1215 uint32_t flags = IOMON_ENABLE;
1216 task_io_monitor_ctl(new_task, &flags);
1217#endif /* CONFIG_IO_ACCOUNTING */
1218
5ba3f43e 1219 machine_task_init(new_task, parent_task, inherit_memory);
0c530ab8 1220
39236c6e 1221 new_task->task_debug = NULL;
55e303ae 1222
39037602
A
1223#if DEVELOPMENT || DEBUG
1224 new_task->task_unnested = FALSE;
1225 new_task->task_disconnected_count = 0;
1226#endif
1c79356b 1227 queue_init(&new_task->semaphore_list);
1c79356b 1228 new_task->semaphores_owned = 0;
1c79356b 1229
1c79356b
A
1230 ipc_task_init(new_task, parent_task);
1231
2d21ac55 1232 new_task->vtimers = 0;
1c79356b 1233
2d21ac55
A
1234 new_task->shared_region = NULL;
1235
1236 new_task->affinity_space = NULL;
1c79356b 1237
a39ff7e2 1238 new_task->t_kpc = 0;
813fb2f6 1239
316670eb
A
1240 new_task->pidsuspended = FALSE;
1241 new_task->frozen = FALSE;
39236c6e 1242 new_task->changing_freeze_state = FALSE;
316670eb
A
1243 new_task->rusage_cpu_flags = 0;
1244 new_task->rusage_cpu_percentage = 0;
1245 new_task->rusage_cpu_interval = 0;
1246 new_task->rusage_cpu_deadline = 0;
1247 new_task->rusage_cpu_callt = NULL;
39236c6e
A
1248#if MACH_ASSERT
1249 new_task->suspends_outstanding = 0;
1250#endif
1251
fe8ab488
A
1252#if HYPERVISOR
1253 new_task->hv_task_target = NULL;
1254#endif /* HYPERVISOR */
1255
5ba3f43e
A
1256#if CONFIG_EMBEDDED
1257 queue_init(&new_task->task_watchers);
1258 new_task->num_taskwatchers = 0;
1259 new_task->watchapplying = 0;
1260#endif /* CONFIG_EMBEDDED */
316670eb 1261
39236c6e 1262 new_task->mem_notify_reserved = 0;
5ba3f43e 1263 new_task->memlimit_attrs_reserved = 0;
39236c6e 1264
39236c6e
A
1265 new_task->requested_policy = default_task_requested_policy;
1266 new_task->effective_policy = default_task_effective_policy;
db609669 1267
d9a64523
A
1268 task_importance_init_from_parent(new_task, parent_task);
1269
2d21ac55 1270 if (parent_task != TASK_NULL) {
1c79356b 1271 new_task->sec_token = parent_task->sec_token;
55e303ae 1272 new_task->audit_token = parent_task->audit_token;
1c79356b 1273
2d21ac55
A
1274 /* inherit the parent's shared region */
1275 shared_region = vm_shared_region_get(parent_task);
1276 vm_shared_region_set(new_task, shared_region);
1c79356b 1277
d9a64523
A
1278 if(task_has_64Bit_addr(parent_task)) {
1279 task_set_64Bit_addr(new_task);
1280 }
1281
1282 if(task_has_64Bit_data(parent_task)) {
1283 task_set_64Bit_data(new_task);
1284 }
1285
b0d623f7
A
1286 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1287 new_task->all_image_info_size = parent_task->all_image_info_size;
0c530ab8 1288
2d21ac55
A
1289 if (inherit_memory && parent_task->affinity_space)
1290 task_affinity_create(parent_task, new_task);
b0d623f7
A
1291
1292 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
39236c6e 1293
fe8ab488
A
1294 new_task->priority = BASEPRI_DEFAULT;
1295 new_task->max_priority = MAXPRI_USER;
1296
39037602 1297 task_policy_create(new_task, parent_task);
39236c6e 1298 } else {
1c79356b 1299 new_task->sec_token = KERNEL_SECURITY_TOKEN;
55e303ae 1300 new_task->audit_token = KERNEL_AUDIT_TOKEN;
b0d623f7 1301#ifdef __LP64__
d9a64523
A
1302 if(is_64bit) {
1303 task_set_64Bit_addr(new_task);
1304 }
b0d623f7 1305#endif
d9a64523
A
1306
1307 if(is_64bit_data) {
1308 task_set_64Bit_data(new_task);
1309 }
1310
6d2010ae
A
1311 new_task->all_image_info_addr = (mach_vm_address_t)0;
1312 new_task->all_image_info_size = (mach_vm_size_t)0;
b0d623f7
A
1313
1314 new_task->pset_hint = PROCESSOR_SET_NULL;
fe8ab488
A
1315
1316 if (kernel_task == TASK_NULL) {
1317 new_task->priority = BASEPRI_KERNEL;
1318 new_task->max_priority = MAXPRI_KERNEL;
1319 } else {
1320 new_task->priority = BASEPRI_DEFAULT;
1321 new_task->max_priority = MAXPRI_USER;
1322 }
1c79356b
A
1323 }
1324
3e170ce0
A
1325 bzero(new_task->coalition, sizeof(new_task->coalition));
1326 for (int i = 0; i < COALITION_NUM_TYPES; i++)
1327 queue_chain_init(new_task->task_coalition[i]);
fe8ab488
A
1328
1329 /* Allocate I/O Statistics */
1330 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1331 assert(new_task->task_io_stats != NULL);
1332 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1333
a39ff7e2
A
1334 bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1335 bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
fe8ab488 1336
6d2010ae 1337 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
3e170ce0 1338
39037602
A
1339 /* Copy resource acc. info from Parent for Corpe Forked task. */
1340 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
743345f9 1341 task_rollup_accounting_info(new_task, parent_task);
39037602
A
1342 } else {
1343 /* Initialize to zero for standard fork/spawn case */
1344 new_task->total_user_time = 0;
1345 new_task->total_system_time = 0;
5ba3f43e 1346 new_task->total_ptime = 0;
d9a64523 1347 new_task->total_runnable_time = 0;
39037602
A
1348 new_task->faults = 0;
1349 new_task->pageins = 0;
1350 new_task->cow_faults = 0;
1351 new_task->messages_sent = 0;
1352 new_task->messages_received = 0;
1353 new_task->syscalls_mach = 0;
1354 new_task->syscalls_unix = 0;
1355 new_task->c_switch = 0;
1356 new_task->p_switch = 0;
1357 new_task->ps_switch = 0;
1358 new_task->low_mem_notified_warn = 0;
1359 new_task->low_mem_notified_critical = 0;
1360 new_task->purged_memory_warn = 0;
1361 new_task->purged_memory_critical = 0;
1362 new_task->low_mem_privileged_listener = 0;
813fb2f6
A
1363 new_task->memlimit_is_active = 0;
1364 new_task->memlimit_is_fatal = 0;
1365 new_task->memlimit_active_exc_resource = 0;
1366 new_task->memlimit_inactive_exc_resource = 0;
39037602
A
1367 new_task->task_timer_wakeups_bin_1 = 0;
1368 new_task->task_timer_wakeups_bin_2 = 0;
1369 new_task->task_gpu_ns = 0;
1370 new_task->task_immediate_writes = 0;
1371 new_task->task_deferred_writes = 0;
1372 new_task->task_invalidated_writes = 0;
1373 new_task->task_metadata_writes = 0;
1374 new_task->task_energy = 0;
5ba3f43e
A
1375#if MONOTONIC
1376 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1377#endif /* MONOTONIC */
39037602 1378 }
3e170ce0 1379
39037602
A
1380
1381#if CONFIG_COALITIONS
1382 if (!(t_flags & TF_CORPSE_FORK)) {
1383 /* TODO: there is no graceful failure path here... */
1384 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1385 coalitions_adopt_task(parent_coalitions, new_task);
1386 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1387 /*
1388 * all tasks at least have a resource coalition, so
1389 * if the parent has one then inherit all coalitions
1390 * the parent is a part of
1391 */
1392 coalitions_adopt_task(parent_task->coalition, new_task);
1393 } else {
1394 /* TODO: assert that new_task will be PID 1 (launchd) */
1395 coalitions_adopt_init_task(new_task);
1396 }
5ba3f43e
A
1397 /*
1398 * on exec, we need to transfer the coalition roles from the
1399 * parent task to the exec copy task.
1400 */
1401 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1402 int coal_roles[COALITION_NUM_TYPES];
1403 task_coalition_roles(parent_task, coal_roles);
1404 (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1405 }
3e170ce0 1406 } else {
39037602 1407 coalitions_adopt_corpse_task(new_task);
3e170ce0
A
1408 }
1409
1410 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1411 panic("created task is not a member of a resource coalition");
1412 }
1413#endif /* CONFIG_COALITIONS */
1414
1415 new_task->dispatchqueue_offset = 0;
1416 if (parent_task != NULL) {
1417 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1418 }
1c79356b 1419
39236c6e 1420 new_task->task_volatile_objects = 0;
fe8ab488
A
1421 new_task->task_nonvolatile_objects = 0;
1422 new_task->task_purgeable_disowning = FALSE;
1423 new_task->task_purgeable_disowned = FALSE;
a39ff7e2
A
1424 queue_init(&new_task->task_objq);
1425 task_objq_lock_init(new_task);
1426
d9a64523
A
1427#if __arm64__
1428 new_task->task_legacy_footprint = FALSE;
1429#endif /* __arm64__ */
a39ff7e2 1430 new_task->task_region_footprint = FALSE;
d9a64523
A
1431 new_task->task_has_crossed_thread_limit = FALSE;
1432 new_task->task_thread_limit = 0;
39037602
A
1433#if CONFIG_SECLUDED_MEMORY
1434 new_task->task_can_use_secluded_mem = FALSE;
1435 new_task->task_could_use_secluded_mem = FALSE;
1436 new_task->task_could_also_use_secluded_mem = FALSE;
d9a64523 1437 new_task->task_suppressed_secluded = FALSE;
39037602
A
1438#endif /* CONFIG_SECLUDED_MEMORY */
1439
d9a64523
A
1440 /*
1441 * t_flags is set up above. But since we don't
1442 * support darkwake mode being set that way
1443 * currently, we clear it out here explicitly.
1444 */
1445 new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1446
7e41aa88
A
1447 queue_init(&new_task->io_user_clients);
1448
1c79356b
A
1449 ipc_task_enable(new_task);
1450
3e170ce0
A
1451 lck_mtx_lock(&tasks_threads_lock);
1452 queue_enter(&tasks, new_task, task_t, tasks);
1453 tasks_count++;
4bd07ac2
A
1454 if (tasks_suspend_state) {
1455 task_suspend_internal(new_task);
1456 }
3e170ce0
A
1457 lck_mtx_unlock(&tasks_threads_lock);
1458
1c79356b
A
1459 *child_task = new_task;
1460 return(KERN_SUCCESS);
1461}
1462
743345f9
A
1463/*
1464 * task_rollup_accounting_info
1465 *
1466 * Roll up accounting stats. Used to rollup stats
1467 * for exec copy task and corpse fork.
1468 */
1469void
1470task_rollup_accounting_info(task_t to_task, task_t from_task)
1471{
1472 assert(from_task != to_task);
1473
1474 to_task->total_user_time = from_task->total_user_time;
1475 to_task->total_system_time = from_task->total_system_time;
5ba3f43e 1476 to_task->total_ptime = from_task->total_ptime;
d9a64523 1477 to_task->total_runnable_time = from_task->total_runnable_time;
743345f9
A
1478 to_task->faults = from_task->faults;
1479 to_task->pageins = from_task->pageins;
1480 to_task->cow_faults = from_task->cow_faults;
1481 to_task->messages_sent = from_task->messages_sent;
1482 to_task->messages_received = from_task->messages_received;
1483 to_task->syscalls_mach = from_task->syscalls_mach;
1484 to_task->syscalls_unix = from_task->syscalls_unix;
1485 to_task->c_switch = from_task->c_switch;
1486 to_task->p_switch = from_task->p_switch;
1487 to_task->ps_switch = from_task->ps_switch;
1488 to_task->extmod_statistics = from_task->extmod_statistics;
1489 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1490 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1491 to_task->purged_memory_warn = from_task->purged_memory_warn;
1492 to_task->purged_memory_critical = from_task->purged_memory_critical;
1493 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1494 *to_task->task_io_stats = *from_task->task_io_stats;
a39ff7e2
A
1495 to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1496 to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
743345f9
A
1497 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1498 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1499 to_task->task_gpu_ns = from_task->task_gpu_ns;
1500 to_task->task_immediate_writes = from_task->task_immediate_writes;
1501 to_task->task_deferred_writes = from_task->task_deferred_writes;
1502 to_task->task_invalidated_writes = from_task->task_invalidated_writes;
1503 to_task->task_metadata_writes = from_task->task_metadata_writes;
1504 to_task->task_energy = from_task->task_energy;
1505
1506 /* Skip ledger roll up for memory accounting entries */
1507 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1508 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1509 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1510#if CONFIG_SCHED_SFI
1511 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1512 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1513 }
1514#endif
743345f9
A
1515 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1516 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
743345f9
A
1517 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1518 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
5ba3f43e
A
1519 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1520 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
743345f9
A
1521}
1522
fe8ab488
A
1523int task_dropped_imp_count = 0;
1524
1c79356b 1525/*
91447636 1526 * task_deallocate:
1c79356b 1527 *
91447636 1528 * Drop a reference on a task.
1c79356b
A
1529 */
1530void
9bccf70c 1531task_deallocate(
1c79356b
A
1532 task_t task)
1533{
4b17d6b6 1534 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
e8c3f781 1535 os_ref_count_t refs;
316670eb 1536
9bccf70c
A
1537 if (task == TASK_NULL)
1538 return;
1539
fe8ab488
A
1540 refs = task_deallocate_internal(task);
1541
1542#if IMPORTANCE_INHERITANCE
fe8ab488
A
1543 if (refs == 1) {
1544 /*
1545 * If last ref potentially comes from the task's importance,
1546 * disconnect it. But more task refs may be added before
1547 * that completes, so wait for the reference to go to zero
e8c3f781 1548 * naturally (it may happen on a recursive task_deallocate()
fe8ab488
A
1549 * from the ipc_importance_disconnect_task() call).
1550 */
1551 if (IIT_NULL != task->task_imp_base)
1552 ipc_importance_disconnect_task(task);
1553 return;
1554 }
fe8ab488 1555#endif /* IMPORTANCE_INHERITANCE */
1c79356b 1556
e8c3f781
A
1557 if (refs > 0) {
1558 return;
1559 }
1560
6d2010ae
A
1561 lck_mtx_lock(&tasks_threads_lock);
1562 queue_remove(&terminated_tasks, task, task_t, tasks);
39236c6e 1563 terminated_tasks_count--;
6d2010ae
A
1564 lck_mtx_unlock(&tasks_threads_lock);
1565
fe8ab488
A
1566 /*
1567 * remove the reference on atm descriptor
1568 */
490019cf 1569 task_atm_reset(task);
fe8ab488 1570
fe8ab488
A
1571 /*
1572 * remove the reference on bank context
1573 */
490019cf 1574 task_bank_reset(task);
fe8ab488
A
1575
1576 if (task->task_io_stats)
1577 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1578
316670eb
A
1579 /*
1580 * Give the machine dependent code a chance
1581 * to perform cleanup before ripping apart
1582 * the task.
1583 */
1584 machine_task_terminate(task);
1585
9bccf70c
A
1586 ipc_task_terminate(task);
1587
7e41aa88
A
1588 /* let iokit know */
1589 iokit_task_terminate(task);
1590
2d21ac55
A
1591 if (task->affinity_space)
1592 task_affinity_deallocate(task);
1593
fe8ab488
A
1594#if MACH_ASSERT
1595 if (task->ledger != NULL &&
1596 task->map != NULL &&
1597 task->map->pmap != NULL &&
1598 task->map->pmap->ledger != NULL) {
1599 assert(task->ledger == task->map->pmap->ledger);
1600 }
1601#endif /* MACH_ASSERT */
1602
1603 vm_purgeable_disown(task);
1604 assert(task->task_purgeable_disowned);
1605 if (task->task_volatile_objects != 0 ||
1606 task->task_nonvolatile_objects != 0) {
1607 panic("task_deallocate(%p): "
1608 "volatile_objects=%d nonvolatile_objects=%d\n",
1609 task,
1610 task->task_volatile_objects,
1611 task->task_nonvolatile_objects);
1612 }
1613
1c79356b
A
1614 vm_map_deallocate(task->map);
1615 is_release(task->itk_space);
1c79356b 1616
4b17d6b6
A
1617 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1618 &interrupt_wakeups, &debit);
1619 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1620 &platform_idle_wakeups, &debit);
1621
fe8ab488
A
1622#if defined(CONFIG_SCHED_MULTIQ)
1623 sched_group_destroy(task->sched_group);
1624#endif
1625
4b17d6b6
A
1626 /* Accumulate statistics for dead tasks */
1627 lck_spin_lock(&dead_task_statistics_lock);
1628 dead_task_statistics.total_user_time += task->total_user_time;
1629 dead_task_statistics.total_system_time += task->total_system_time;
1630
1631 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1632 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1633
1634 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1635 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
5ba3f43e
A
1636 dead_task_statistics.total_ptime += task->total_ptime;
1637 dead_task_statistics.total_pset_switches += task->ps_switch;
1638 dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1639 dead_task_statistics.task_energy += task->task_energy;
4b17d6b6
A
1640
1641 lck_spin_unlock(&dead_task_statistics_lock);
b0d623f7
A
1642 lck_mtx_destroy(&task->lock, &task_lck_grp);
1643
316670eb
A
1644 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1645 &debit)) {
1646 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1647 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1648 }
1649 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1650 &debit)) {
1651 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1652 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1653 }
1654 ledger_dereference(task->ledger);
39236c6e
A
1655
1656#if TASK_REFERENCE_LEAK_DEBUG
1657 btlog_remove_entries_for_element(task_ref_btlog, task);
1658#endif
1659
fe8ab488 1660#if CONFIG_COALITIONS
3e170ce0 1661 task_release_coalitions(task);
fe8ab488
A
1662#endif /* CONFIG_COALITIONS */
1663
3e170ce0
A
1664 bzero(task->coalition, sizeof(task->coalition));
1665
1666#if MACH_BSD
1667 /* clean up collected information since last reference to task is gone */
1668 if (task->corpse_info) {
5ba3f43e
A
1669 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1670 task_crashinfo_destroy(task->corpse_info);
3e170ce0 1671 task->corpse_info = NULL;
5ba3f43e
A
1672 if (corpse_info_kernel) {
1673 kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1674 }
3e170ce0
A
1675 }
1676#endif
39037602
A
1677
1678#if CONFIG_MACF
1679 if (task->crash_label) {
5ba3f43e
A
1680 mac_exc_free_label(task->crash_label);
1681 task->crash_label = NULL;
39037602
A
1682 }
1683#endif
39236c6e 1684
a39ff7e2
A
1685 assert(queue_empty(&task->task_objq));
1686
91447636 1687 zfree(task_zone, task);
1c79356b
A
1688}
1689
0c530ab8
A
1690/*
1691 * task_name_deallocate:
1692 *
1693 * Drop a reference on a task name.
1694 */
1695void
1696task_name_deallocate(
1697 task_name_t task_name)
1698{
1699 return(task_deallocate((task_t)task_name));
1700}
1701
813fb2f6
A
1702/*
1703 * task_inspect_deallocate:
1704 *
1705 * Drop a task inspection reference.
1706 */
1707void
1708task_inspect_deallocate(
1709 task_inspect_t task_inspect)
1710{
1711 return(task_deallocate((task_t)task_inspect));
1712}
1713
39236c6e
A
1714/*
1715 * task_suspension_token_deallocate:
1716 *
1717 * Drop a reference on a task suspension token.
1718 */
1719void
1720task_suspension_token_deallocate(
1721 task_suspension_token_t token)
1722{
1723 return(task_deallocate((task_t)token));
1724}
0c530ab8 1725
3e170ce0
A
1726
1727/*
1728 * task_collect_crash_info:
1729 *
1730 * collect crash info from bsd and mach based data
1731 */
1732kern_return_t
5ba3f43e
A
1733task_collect_crash_info(
1734 task_t task,
1735#ifdef CONFIG_MACF
1736 struct label *crash_label,
1737#endif
1738 int is_corpse_fork)
3e170ce0
A
1739{
1740 kern_return_t kr = KERN_SUCCESS;
1741
1742 kcdata_descriptor_t crash_data = NULL;
1743 kcdata_descriptor_t crash_data_release = NULL;
1744 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
39037602
A
1745 mach_vm_offset_t crash_data_ptr = 0;
1746 void *crash_data_kernel = NULL;
1747 void *crash_data_kernel_release = NULL;
5ba3f43e
A
1748#if CONFIG_MACF
1749 struct label *label, *free_label;
1750#endif
3e170ce0
A
1751
1752 if (!corpses_enabled()) {
1753 return KERN_NOT_SUPPORTED;
1754 }
1755
5ba3f43e
A
1756#if CONFIG_MACF
1757 free_label = label = mac_exc_create_label();
1758#endif
1759
3e170ce0 1760 task_lock(task);
39037602
A
1761
1762 assert(is_corpse_fork || task->bsd_info != NULL);
1763 if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
1764#if CONFIG_MACF
5ba3f43e
A
1765 /* Set the crash label, used by the exception delivery mac hook */
1766 free_label = task->crash_label; // Most likely NULL.
1767 task->crash_label = label;
1768 mac_exc_update_task_crash_label(task, crash_label);
39037602 1769#endif
3e170ce0 1770 task_unlock(task);
3e170ce0 1771
5ba3f43e
A
1772 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
1773 if (crash_data_kernel == NULL) {
1774 kr = KERN_RESOURCE_SHORTAGE;
3e170ce0 1775 goto out_no_lock;
5ba3f43e
A
1776 }
1777 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1778 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
3e170ce0 1779
39037602 1780 /* Do not get a corpse ref for corpse fork */
5ba3f43e
A
1781 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
1782 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
1783 KCFLAG_USE_MEMCOPY);
3e170ce0
A
1784 if (crash_data) {
1785 task_lock(task);
1786 crash_data_release = task->corpse_info;
5ba3f43e 1787 crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
3e170ce0 1788 task->corpse_info = crash_data;
39037602 1789
3e170ce0
A
1790 task_unlock(task);
1791 kr = KERN_SUCCESS;
1792 } else {
5ba3f43e 1793 kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
3e170ce0
A
1794 kr = KERN_FAILURE;
1795 }
1796
1797 if (crash_data_release != NULL) {
5ba3f43e 1798 task_crashinfo_destroy(crash_data_release);
39037602
A
1799 }
1800 if (crash_data_kernel_release != NULL) {
1801 kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
3e170ce0
A
1802 }
1803 } else {
1804 task_unlock(task);
1805 }
1806
1807out_no_lock:
5ba3f43e
A
1808#if CONFIG_MACF
1809 if (free_label != NULL) {
1810 mac_exc_free_label(free_label);
1811 }
1812#endif
3e170ce0
A
1813 return kr;
1814}
1815
1816/*
1817 * task_deliver_crash_notification:
1818 *
1819 * Makes outcall to registered host port for a corpse.
1820 */
1821kern_return_t
5ba3f43e
A
1822task_deliver_crash_notification(
1823 task_t task,
1824 thread_t thread,
1825 exception_type_t etype,
1826 mach_exception_subcode_t subcode)
3e170ce0
A
1827{
1828 kcdata_descriptor_t crash_info = task->corpse_info;
1829 thread_t th_iter = NULL;
1830 kern_return_t kr = KERN_SUCCESS;
1831 wait_interrupt_t wsave;
1832 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
39037602 1833 ipc_port_t task_port, old_notify;
3e170ce0
A
1834
1835 if (crash_info == NULL)
1836 return KERN_FAILURE;
1837
3e170ce0 1838 task_lock(task);
39037602 1839 if (task_is_a_corpse_fork(task)) {
5ba3f43e
A
1840 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
1841 code[0] = etype;
39037602 1842 code[1] = subcode;
5ba3f43e 1843 } else {
39037602
A
1844 /* Populate code with EXC_CRASH for corpses */
1845 code[0] = EXC_CRASH;
1846 code[1] = 0;
1847 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
1848 if (corpse_for_fatal_memkill) {
1849 code[1] = subcode;
1850 }
39037602 1851 }
5ba3f43e 1852
3e170ce0
A
1853 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1854 {
39037602
A
1855 if (th_iter->corpse_dup == FALSE) {
1856 ipc_thread_reset(th_iter);
1857 }
3e170ce0
A
1858 }
1859 task_unlock(task);
1860
39037602
A
1861 /* Arm the no-sender notification for taskport */
1862 task_reference(task);
1863 task_port = convert_task_to_port(task);
1864 ip_lock(task_port);
1865 assert(ip_active(task_port));
1866 ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
1867 /* port unlocked */
1868 assert(IP_NULL == old_notify);
1869
3e170ce0 1870 wsave = thread_interrupt_level(THREAD_UNINT);
39037602 1871 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
3e170ce0
A
1872 if (kr != KERN_SUCCESS) {
1873 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1874 }
1875
3e170ce0 1876 (void)thread_interrupt_level(wsave);
3e170ce0 1877
39037602
A
1878 /*
1879 * Drop the send right on task port, will fire the
1880 * no-sender notification if exception deliver failed.
1881 */
1882 ipc_port_release_send(task_port);
3e170ce0
A
1883 return kr;
1884}
1885
1c79356b
A
1886/*
1887 * task_terminate:
1888 *
1889 * Terminate the specified task. See comments on thread_terminate
1890 * (kern/thread.c) about problems with terminating the "current task."
1891 */
1892
1893kern_return_t
1894task_terminate(
1895 task_t task)
1896{
1897 if (task == TASK_NULL)
91447636
A
1898 return (KERN_INVALID_ARGUMENT);
1899
1c79356b 1900 if (task->bsd_info)
91447636
A
1901 return (KERN_FAILURE);
1902
1c79356b
A
1903 return (task_terminate_internal(task));
1904}
1905
fe8ab488
A
1906#if MACH_ASSERT
1907extern int proc_pid(struct proc *);
1908extern void proc_name_kdp(task_t t, char *buf, int size);
1909#endif /* MACH_ASSERT */
1910
1911#define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1912static void
1913__unused task_partial_reap(task_t task, __unused int pid)
1914{
1915 unsigned int reclaimed_resident = 0;
1916 unsigned int reclaimed_compressed = 0;
1917 uint64_t task_page_count;
1918
1919 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1920
1921 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1922 pid, task_page_count, 0, 0, 0);
1923
1924 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1925
1926 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1927 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1928}
1929
3e170ce0
A
1930kern_return_t
1931task_mark_corpse(task_t task)
1932{
1933 kern_return_t kr = KERN_SUCCESS;
1934 thread_t self_thread;
1935 (void) self_thread;
1936 wait_interrupt_t wsave;
5ba3f43e
A
1937#if CONFIG_MACF
1938 struct label *crash_label = NULL;
1939#endif
3e170ce0
A
1940
1941 assert(task != kernel_task);
1942 assert(task == current_task());
1943 assert(!task_is_a_corpse(task));
1944
5ba3f43e
A
1945#if CONFIG_MACF
1946 crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
1947#endif
1948
1949 kr = task_collect_crash_info(task,
1950#if CONFIG_MACF
1951 crash_label,
1952#endif
1953 FALSE);
3e170ce0 1954 if (kr != KERN_SUCCESS) {
5ba3f43e 1955 goto out;
3e170ce0
A
1956 }
1957
1958 self_thread = current_thread();
1959
1960 wsave = thread_interrupt_level(THREAD_UNINT);
1961 task_lock(task);
1962
1963 task_set_corpse_pending_report(task);
1964 task_set_corpse(task);
5ba3f43e 1965 task->crashed_thread_id = thread_tid(self_thread);
3e170ce0
A
1966
1967 kr = task_start_halt_locked(task, TRUE);
1968 assert(kr == KERN_SUCCESS);
39037602 1969
3e170ce0 1970 ipc_task_reset(task);
39037602
A
1971 /* Remove the naked send right for task port, needed to arm no sender notification */
1972 task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
3e170ce0
A
1973 ipc_task_enable(task);
1974
1975 task_unlock(task);
1976 /* terminate the ipc space */
1977 ipc_space_terminate(task->itk_space);
39037602
A
1978
1979 /* Add it to global corpse task list */
1980 task_add_to_corpse_task_list(task);
3e170ce0
A
1981
1982 task_start_halt(task);
1983 thread_terminate_internal(self_thread);
39037602 1984
3e170ce0
A
1985 (void) thread_interrupt_level(wsave);
1986 assert(task->halting == TRUE);
5ba3f43e
A
1987
1988out:
1989#if CONFIG_MACF
1990 mac_exc_free_label(crash_label);
1991#endif
3e170ce0
A
1992 return kr;
1993}
1994
39037602
A
1995/*
1996 * task_clear_corpse
1997 *
1998 * Clears the corpse pending bit on task.
1999 * Removes inspection bit on the threads.
2000 */
2001void
2002task_clear_corpse(task_t task)
2003{
2004 thread_t th_iter = NULL;
2005
2006 task_lock(task);
2007 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2008 {
2009 thread_mtx_lock(th_iter);
2010 th_iter->inspection = FALSE;
2011 thread_mtx_unlock(th_iter);
2012 }
2013
2014 thread_terminate_crashed_threads();
2015 /* remove the pending corpse report flag */
2016 task_clear_corpse_pending_report(task);
2017
2018 task_unlock(task);
2019}
2020
2021/*
2022 * task_port_notify
2023 *
2024 * Called whenever the Mach port system detects no-senders on
2025 * the task port of a corpse.
2026 * Each notification that comes in should terminate the task (corpse).
2027 */
2028void
2029task_port_notify(mach_msg_header_t *msg)
2030{
2031 mach_no_senders_notification_t *notification = (void *)msg;
2032 ipc_port_t port = notification->not_header.msgh_remote_port;
2033 task_t task;
2034
2035 assert(ip_active(port));
2036 assert(IKOT_TASK == ip_kotype(port));
2037 task = (task_t) port->ip_kobject;
2038
2039 assert(task_is_a_corpse(task));
2040
2041 /* Remove the task from global corpse task list */
2042 task_remove_from_corpse_task_list(task);
2043
2044 task_clear_corpse(task);
2045 task_terminate_internal(task);
2046}
2047
2048/*
2049 * task_wait_till_threads_terminate_locked
2050 *
2051 * Wait till all the threads in the task are terminated.
2052 * Might release the task lock and re-acquire it.
2053 */
2054void
2055task_wait_till_threads_terminate_locked(task_t task)
2056{
2057 /* wait for all the threads in the task to terminate */
2058 while (task->active_thread_count != 0) {
2059 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2060 task_unlock(task);
2061 thread_block(THREAD_CONTINUE_NULL);
2062
2063 task_lock(task);
2064 }
2065}
2066
2067/*
2068 * task_duplicate_map_and_threads
2069 *
2070 * Copy vmmap of source task.
2071 * Copy active threads from source task to destination task.
2072 * Source task would be suspended during the copy.
2073 */
2074kern_return_t
2075task_duplicate_map_and_threads(
2076 task_t task,
2077 void *p,
2078 task_t new_task,
2079 thread_t *thread_ret,
39037602
A
2080 uint64_t **udata_buffer,
2081 int *size,
2082 int *num_udata)
2083{
2084 kern_return_t kr = KERN_SUCCESS;
2085 int active;
2086 thread_t thread, self, thread_return = THREAD_NULL;
d9a64523 2087 thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
39037602
A
2088 thread_t *thread_array;
2089 uint32_t active_thread_count = 0, array_count = 0, i;
2090 vm_map_t oldmap;
2091 uint64_t *buffer = NULL;
2092 int buf_size = 0;
2093 int est_knotes = 0, num_knotes = 0;
2094
2095 self = current_thread();
2096
2097 /*
2098 * Suspend the task to copy thread state, use the internal
2099 * variant so that no user-space process can resume
2100 * the task from under us
2101 */
2102 kr = task_suspend_internal(task);
2103 if (kr != KERN_SUCCESS) {
2104 return kr;
2105 }
2106
2107 if (task->map->disable_vmentry_reuse == TRUE) {
2108 /*
2109 * Quite likely GuardMalloc (or some debugging tool)
2110 * is being used on this task. And it has gone through
2111 * its limit. Making a corpse will likely encounter
2112 * a lot of VM entries that will need COW.
2113 *
2114 * Skip it.
2115 */
a39ff7e2
A
2116#if DEVELOPMENT || DEBUG
2117 memorystatus_abort_vm_map_fork(task);
2118#endif
39037602
A
2119 task_resume_internal(task);
2120 return KERN_FAILURE;
2121 }
2122
5ba3f43e 2123 /* Check with VM if vm_map_fork is allowed for this task */
a39ff7e2 2124 if (memorystatus_allowed_vm_map_fork(task)) {
5ba3f43e
A
2125
2126 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2127 oldmap = new_task->map;
2128 new_task->map = vm_map_fork(new_task->ledger,
2129 task->map,
2130 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
d9a64523
A
2131 VM_MAP_FORK_PRESERVE_PURGEABLE |
2132 VM_MAP_FORK_CORPSE_FOOTPRINT));
5ba3f43e
A
2133 vm_map_deallocate(oldmap);
2134
d9a64523
A
2135 /* copy ledgers that impact the memory footprint */
2136 vm_map_copy_footprint_ledgers(task, new_task);
2137
5ba3f43e
A
2138 /* Get all the udata pointers from kqueue */
2139 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2140 if (est_knotes > 0) {
2141 buf_size = (est_knotes + 32) * sizeof(uint64_t);
2142 buffer = (uint64_t *) kalloc(buf_size);
2143 num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2144 if (num_knotes > est_knotes + 32) {
2145 num_knotes = est_knotes + 32;
2146 }
39037602
A
2147 }
2148 }
2149
2150 active_thread_count = task->active_thread_count;
2151 if (active_thread_count == 0) {
2152 if (buffer != NULL) {
2153 kfree(buffer, buf_size);
2154 }
2155 task_resume_internal(task);
2156 return KERN_FAILURE;
2157 }
2158
2159 thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2160
2161 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2162 task_lock(task);
2163 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2164 /* Skip inactive threads */
2165 active = thread->active;
2166 if (!active) {
2167 continue;
2168 }
2169
2170 if (array_count >= active_thread_count) {
2171 break;
2172 }
2173
2174 thread_array[array_count++] = thread;
2175 thread_reference(thread);
2176 }
2177 task_unlock(task);
2178
2179 for (i = 0; i < array_count; i++) {
2180
2181 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2182 if (kr != KERN_SUCCESS) {
2183 break;
2184 }
2185
2186 /* Equivalent of current thread in corpse */
2187 if (thread_array[i] == self) {
2188 thread_return = new_thread;
5ba3f43e 2189 new_task->crashed_thread_id = thread_tid(new_thread);
d9a64523
A
2190 } else if (first_thread == NULL) {
2191 first_thread = new_thread;
39037602
A
2192 } else {
2193 /* drop the extra ref returned by thread_create_with_continuation */
2194 thread_deallocate(new_thread);
2195 }
2196
2197 kr = thread_dup2(thread_array[i], new_thread);
2198 if (kr != KERN_SUCCESS) {
2199 thread_mtx_lock(new_thread);
2200 new_thread->corpse_dup = TRUE;
2201 thread_mtx_unlock(new_thread);
2202 continue;
2203 }
2204
2205 /* Copy thread name */
2206 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
d9a64523 2207 new_thread->thread_tag = thread_array[i]->thread_tag;
39037602
A
2208 thread_copy_resource_info(new_thread, thread_array[i]);
2209 }
2210
d9a64523
A
2211 /* return the first thread if we couldn't find the equivalent of current */
2212 if (thread_return == THREAD_NULL) {
2213 thread_return = first_thread;
2214 }
2215 else if (first_thread != THREAD_NULL) {
2216 /* drop the extra ref returned by thread_create_with_continuation */
2217 thread_deallocate(first_thread);
2218 }
2219
39037602
A
2220 task_resume_internal(task);
2221
2222 for (i = 0; i < array_count; i++) {
2223 thread_deallocate(thread_array[i]);
2224 }
2225 kfree(thread_array, sizeof(thread_t) * active_thread_count);
2226
2227 if (kr == KERN_SUCCESS) {
2228 *thread_ret = thread_return;
2229 *udata_buffer = buffer;
2230 *size = buf_size;
2231 *num_udata = num_knotes;
2232 } else {
2233 if (thread_return != THREAD_NULL) {
2234 thread_deallocate(thread_return);
2235 }
2236 if (buffer != NULL) {
2237 kfree(buffer, buf_size);
2238 }
2239 }
2240
2241 return kr;
2242}
2243
2244#if CONFIG_SECLUDED_MEMORY
2245extern void task_set_can_use_secluded_mem_locked(
2246 task_t task,
2247 boolean_t can_use_secluded_mem);
2248#endif /* CONFIG_SECLUDED_MEMORY */
2249
1c79356b
A
2250kern_return_t
2251task_terminate_internal(
91447636 2252 task_t task)
1c79356b 2253{
91447636
A
2254 thread_t thread, self;
2255 task_t self_task;
2256 boolean_t interrupt_save;
fe8ab488 2257 int pid = 0;
1c79356b
A
2258
2259 assert(task != kernel_task);
2260
91447636
A
2261 self = current_thread();
2262 self_task = self->task;
1c79356b
A
2263
2264 /*
2265 * Get the task locked and make sure that we are not racing
2266 * with someone else trying to terminate us.
2267 */
91447636 2268 if (task == self_task)
1c79356b 2269 task_lock(task);
91447636
A
2270 else
2271 if (task < self_task) {
1c79356b 2272 task_lock(task);
91447636
A
2273 task_lock(self_task);
2274 }
2275 else {
2276 task_lock(self_task);
1c79356b
A
2277 task_lock(task);
2278 }
2279
39037602
A
2280#if CONFIG_SECLUDED_MEMORY
2281 if (task->task_can_use_secluded_mem) {
2282 task_set_can_use_secluded_mem_locked(task, FALSE);
2283 }
2284 task->task_could_use_secluded_mem = FALSE;
2285 task->task_could_also_use_secluded_mem = FALSE;
d9a64523
A
2286
2287 if (task->task_suppressed_secluded) {
2288 stop_secluded_suppression(task);
2289 }
39037602
A
2290#endif /* CONFIG_SECLUDED_MEMORY */
2291
6d2010ae 2292 if (!task->active) {
1c79356b 2293 /*
6d2010ae 2294 * Task is already being terminated.
1c79356b
A
2295 * Just return an error. If we are dying, this will
2296 * just get us to our AST special handler and that
2297 * will get us to finalize the termination of ourselves.
2298 */
2299 task_unlock(task);
91447636
A
2300 if (self_task != task)
2301 task_unlock(self_task);
2302
2303 return (KERN_FAILURE);
1c79356b 2304 }
91447636 2305
3e170ce0
A
2306 if (task_corpse_pending_report(task)) {
2307 /*
2308 * Task is marked for reporting as corpse.
2309 * Just return an error. This will
2310 * just get us to our AST special handler and that
2311 * will get us to finish the path to death
2312 */
2313 task_unlock(task);
2314 if (self_task != task)
2315 task_unlock(self_task);
2316
2317 return (KERN_FAILURE);
2318 }
2319
91447636
A
2320 if (self_task != task)
2321 task_unlock(self_task);
1c79356b 2322
e7c99d92
A
2323 /*
2324 * Make sure the current thread does not get aborted out of
2325 * the waits inside these operations.
2326 */
9bccf70c 2327 interrupt_save = thread_interrupt_level(THREAD_UNINT);
e7c99d92 2328
1c79356b
A
2329 /*
2330 * Indicate that we want all the threads to stop executing
2331 * at user space by holding the task (we would have held
2332 * each thread independently in thread_terminate_internal -
2333 * but this way we may be more likely to already find it
2334 * held there). Mark the task inactive, and prevent
2335 * further task operations via the task port.
2336 */
2337 task_hold_locked(task);
2338 task->active = FALSE;
2339 ipc_task_disable(task);
2340
39236c6e
A
2341#if CONFIG_TELEMETRY
2342 /*
2343 * Notify telemetry that this task is going away.
2344 */
2345 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2346#endif
2347
1c79356b 2348 /*
91447636
A
2349 * Terminate each thread in the task.
2350 */
2351 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2352 thread_terminate_internal(thread);
1c79356b 2353 }
e7c99d92 2354
fe8ab488 2355#ifdef MACH_BSD
743345f9 2356 if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
fe8ab488
A
2357 pid = proc_pid(task->bsd_info);
2358 }
2359#endif /* MACH_BSD */
2360
316670eb
A
2361 task_unlock(task);
2362
39037602 2363 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
3e170ce0 2364 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
a1c7dba1 2365
fe8ab488
A
2366 /* Early object reap phase */
2367
2368// PR-17045188: Revisit implementation
2369// task_partial_reap(task, pid);
2370
5ba3f43e
A
2371#if CONFIG_EMBEDDED
2372 /*
2373 * remove all task watchers
2374 */
2375 task_removewatchers(task);
2376
2377#endif /* CONFIG_EMBEDDED */
1c79356b
A
2378
2379 /*
2380 * Destroy all synchronizers owned by the task.
2381 */
2382 task_synchronizer_destroy_all(task);
2383
1c79356b
A
2384 /*
2385 * Destroy the IPC space, leaving just a reference for it.
2386 */
316670eb 2387 ipc_space_terminate(task->itk_space);
1c79356b 2388
fe8ab488
A
2389#if 00
2390 /* if some ledgers go negative on tear-down again... */
2391 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2392 task_ledgers.phys_footprint);
2393 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2394 task_ledgers.internal);
2395 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2396 task_ledgers.internal_compressed);
2397 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2398 task_ledgers.iokit_mapped);
2399 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2400 task_ledgers.alternate_accounting);
3e170ce0
A
2401 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2402 task_ledgers.alternate_accounting_compressed);
fe8ab488 2403#endif
91447636 2404
1c79356b
A
2405 /*
2406 * If the current thread is a member of the task
2407 * being terminated, then the last reference to
2408 * the task will not be dropped until the thread
2409 * is finally reaped. To avoid incurring the
2410 * expense of removing the address space regions
2411 * at reap time, we do it explictly here.
2412 */
3e170ce0
A
2413
2414 vm_map_lock(task->map);
2415 vm_map_disable_hole_optimization(task->map);
2416 vm_map_unlock(task->map);
2417
fe8ab488
A
2418#if MACH_ASSERT
2419 /*
2420 * Identify the pmap's process, in case the pmap ledgers drift
2421 * and we have to report it.
2422 */
2423 char procname[17];
743345f9 2424 if (task->bsd_info && !task_is_exec_copy(task)) {
fe8ab488
A
2425 pid = proc_pid(task->bsd_info);
2426 proc_name_kdp(task, procname, sizeof (procname));
2427 } else {
2428 pid = 0;
2429 strlcpy(procname, "<unknown>", sizeof (procname));
2430 }
2431 pmap_set_process(task->map->pmap, pid, procname);
2432#endif /* MACH_ASSERT */
2433
5c9f4661
A
2434 vm_map_remove(task->map,
2435 task->map->min_offset,
2436 task->map->max_offset,
2437 /*
2438 * Final cleanup:
2439 * + no unnesting
2440 * + remove immutable mappings
d9a64523 2441 * + allow gaps in range
5c9f4661
A
2442 */
2443 (VM_MAP_REMOVE_NO_UNNESTING |
d9a64523
A
2444 VM_MAP_REMOVE_IMMUTABLE |
2445 VM_MAP_REMOVE_GAPS_OK));
5c9f4661
A
2446
2447 /* release our shared region */
2448 vm_shared_region_set(task, NULL);
2449
2450
b0d623f7 2451 lck_mtx_lock(&tasks_threads_lock);
2d21ac55 2452 queue_remove(&tasks, task, task_t, tasks);
6d2010ae 2453 queue_enter(&terminated_tasks, task, task_t, tasks);
2d21ac55 2454 tasks_count--;
39236c6e 2455 terminated_tasks_count++;
b0d623f7 2456 lck_mtx_unlock(&tasks_threads_lock);
9bccf70c 2457
1c79356b 2458 /*
e7c99d92
A
2459 * We no longer need to guard against being aborted, so restore
2460 * the previous interruptible state.
2461 */
9bccf70c 2462 thread_interrupt_level(interrupt_save);
e7c99d92 2463
a39ff7e2 2464#if KPC
fe8ab488 2465 /* force the task to release all ctrs */
a39ff7e2 2466 if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS)
fe8ab488 2467 kpc_force_all_ctrs(task, 0);
a39ff7e2 2468#endif /* KPC */
fe8ab488
A
2469
2470#if CONFIG_COALITIONS
2471 /*
3e170ce0 2472 * Leave our coalitions. (drop activation but not reference)
fe8ab488 2473 */
3e170ce0 2474 coalitions_remove_task(task);
fe8ab488
A
2475#endif
2476
e7c99d92
A
2477 /*
2478 * Get rid of the task active reference on itself.
1c79356b 2479 */
1c79356b
A
2480 task_deallocate(task);
2481
91447636 2482 return (KERN_SUCCESS);
1c79356b
A
2483}
2484
4bd07ac2
A
2485void
2486tasks_system_suspend(boolean_t suspend)
2487{
2488 task_t task;
2489
2490 lck_mtx_lock(&tasks_threads_lock);
2491 assert(tasks_suspend_state != suspend);
2492 tasks_suspend_state = suspend;
2493 queue_iterate(&tasks, task, task_t, tasks) {
2494 if (task == kernel_task) {
2495 continue;
2496 }
2497 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2498 }
2499 lck_mtx_unlock(&tasks_threads_lock);
2500}
2501
1c79356b 2502/*
b0d623f7 2503 * task_start_halt:
91447636
A
2504 *
2505 * Shut the current task down (except for the current thread) in
2506 * preparation for dramatic changes to the task (probably exec).
b0d623f7
A
2507 * We hold the task and mark all other threads in the task for
2508 * termination.
1c79356b
A
2509 */
2510kern_return_t
3e170ce0
A
2511task_start_halt(task_t task)
2512{
2513 kern_return_t kr = KERN_SUCCESS;
2514 task_lock(task);
2515 kr = task_start_halt_locked(task, FALSE);
2516 task_unlock(task);
2517 return kr;
2518}
2519
2520static kern_return_t
2521task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
1c79356b 2522{
3e170ce0
A
2523 thread_t thread, self;
2524 uint64_t dispatchqueue_offset;
1c79356b
A
2525
2526 assert(task != kernel_task);
2527
91447636 2528 self = current_thread();
1c79356b 2529
39037602 2530 if (task != self->task && !task_is_a_corpse_fork(task))
91447636 2531 return (KERN_INVALID_ARGUMENT);
1c79356b 2532
b0d623f7 2533 if (task->halting || !task->active || !self->active) {
1c79356b 2534 /*
3e170ce0
A
2535 * Task or current thread is already being terminated.
2536 * Hurry up and return out of the current kernel context
2537 * so that we run our AST special handler to terminate
2538 * ourselves.
1c79356b 2539 */
91447636 2540 return (KERN_FAILURE);
1c79356b
A
2541 }
2542
b0d623f7
A
2543 task->halting = TRUE;
2544
3e170ce0
A
2545 /*
2546 * Mark all the threads to keep them from starting any more
2547 * user-level execution. The thread_terminate_internal code
2548 * would do this on a thread by thread basis anyway, but this
2549 * gives us a better chance of not having to wait there.
2550 */
2551 task_hold_locked(task);
2552 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
1c79356b 2553
3e170ce0
A
2554 /*
2555 * Terminate all the other threads in the task.
2556 */
2557 queue_iterate(&task->threads, thread, thread_t, task_threads)
2558 {
2559 if (should_mark_corpse) {
2560 thread_mtx_lock(thread);
2561 thread->inspection = TRUE;
2562 thread_mtx_unlock(thread);
1c79356b 2563 }
3e170ce0
A
2564 if (thread != self)
2565 thread_terminate_internal(thread);
1c79356b 2566 }
3e170ce0
A
2567 task->dispatchqueue_offset = dispatchqueue_offset;
2568
2569 task_release_locked(task);
2570
b0d623f7
A
2571 return KERN_SUCCESS;
2572}
2573
2574
2575/*
2576 * task_complete_halt:
2577 *
2578 * Complete task halt by waiting for threads to terminate, then clean
2579 * up task resources (VM, port namespace, etc...) and then let the
2580 * current thread go in the (practically empty) task context.
743345f9
A
2581 *
2582 * Note: task->halting flag is not cleared in order to avoid creation
2583 * of new thread in old exec'ed task.
b0d623f7
A
2584 */
2585void
2586task_complete_halt(task_t task)
2587{
2588 task_lock(task);
2589 assert(task->halting);
2590 assert(task == current_task());
e7c99d92 2591
b0d623f7
A
2592 /*
2593 * Wait for the other threads to get shut down.
2594 * When the last other thread is reaped, we'll be
316670eb 2595 * woken up.
b0d623f7
A
2596 */
2597 if (task->thread_count > 1) {
2598 assert_wait((event_t)&task->halting, THREAD_UNINT);
2599 task_unlock(task);
2600 thread_block(THREAD_CONTINUE_NULL);
2601 } else {
2602 task_unlock(task);
2603 }
1c79356b 2604
316670eb
A
2605 /*
2606 * Give the machine dependent code a chance
2607 * to perform cleanup of task-level resources
2608 * associated with the current thread before
2609 * ripping apart the task.
2610 */
2611 machine_task_terminate(task);
2612
1c79356b
A
2613 /*
2614 * Destroy all synchronizers owned by the task.
2615 */
2616 task_synchronizer_destroy_all(task);
2617
2618 /*
9bccf70c
A
2619 * Destroy the contents of the IPC space, leaving just
2620 * a reference for it.
e7c99d92 2621 */
55e303ae 2622 ipc_space_clean(task->itk_space);
1c79356b
A
2623
2624 /*
2625 * Clean out the address space, as we are going to be
2626 * getting a new one.
2627 */
91447636 2628 vm_map_remove(task->map, task->map->min_offset,
3e170ce0 2629 task->map->max_offset,
5ba3f43e
A
2630 /*
2631 * Final cleanup:
2632 * + no unnesting
2633 * + remove immutable mappings
d9a64523 2634 * + allow gaps in the range
5ba3f43e
A
2635 */
2636 (VM_MAP_REMOVE_NO_UNNESTING |
d9a64523
A
2637 VM_MAP_REMOVE_IMMUTABLE |
2638 VM_MAP_REMOVE_GAPS_OK));
1c79356b 2639
39037602
A
2640 /*
2641 * Kick out any IOKitUser handles to the task. At best they're stale,
2642 * at worst someone is racing a SUID exec.
2643 */
2644 iokit_task_terminate(task);
1c79356b
A
2645}
2646
2647/*
2648 * task_hold_locked:
2649 *
2650 * Suspend execution of the specified task.
2651 * This is a recursive-style suspension of the task, a count of
2652 * suspends is maintained.
2653 *
d9a64523 2654 * CONDITIONS: the task is locked and active.
1c79356b
A
2655 */
2656void
2657task_hold_locked(
39037602 2658 task_t task)
1c79356b 2659{
39037602 2660 thread_t thread;
1c79356b
A
2661
2662 assert(task->active);
2663
9bccf70c
A
2664 if (task->suspend_count++ > 0)
2665 return;
1c79356b 2666
d9a64523
A
2667 if (task->bsd_info) {
2668 workq_proc_suspended(task->bsd_info);
2669 }
2670
1c79356b 2671 /*
91447636 2672 * Iterate through all the threads and hold them.
1c79356b 2673 */
91447636
A
2674 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2675 thread_mtx_lock(thread);
2676 thread_hold(thread);
2677 thread_mtx_unlock(thread);
1c79356b
A
2678 }
2679}
2680
2681/*
2682 * task_hold:
2683 *
2684 * Same as the internal routine above, except that is must lock
2685 * and verify that the task is active. This differs from task_suspend
2686 * in that it places a kernel hold on the task rather than just a
2687 * user-level hold. This keeps users from over resuming and setting
2688 * it running out from under the kernel.
2689 *
2690 * CONDITIONS: the caller holds a reference on the task
2691 */
2692kern_return_t
91447636 2693task_hold(
39037602 2694 task_t task)
1c79356b 2695{
1c79356b
A
2696 if (task == TASK_NULL)
2697 return (KERN_INVALID_ARGUMENT);
91447636 2698
1c79356b 2699 task_lock(task);
91447636 2700
1c79356b
A
2701 if (!task->active) {
2702 task_unlock(task);
91447636 2703
1c79356b
A
2704 return (KERN_FAILURE);
2705 }
1c79356b 2706
91447636
A
2707 task_hold_locked(task);
2708 task_unlock(task);
2709
2710 return (KERN_SUCCESS);
1c79356b
A
2711}
2712
316670eb
A
2713kern_return_t
2714task_wait(
2715 task_t task,
2716 boolean_t until_not_runnable)
2717{
2718 if (task == TASK_NULL)
2719 return (KERN_INVALID_ARGUMENT);
2720
2721 task_lock(task);
2722
2723 if (!task->active) {
2724 task_unlock(task);
2725
2726 return (KERN_FAILURE);
2727 }
2728
2729 task_wait_locked(task, until_not_runnable);
2730 task_unlock(task);
2731
2732 return (KERN_SUCCESS);
2733}
2734
1c79356b 2735/*
91447636
A
2736 * task_wait_locked:
2737 *
1c79356b
A
2738 * Wait for all threads in task to stop.
2739 *
2740 * Conditions:
2741 * Called with task locked, active, and held.
2742 */
2743void
2744task_wait_locked(
39037602 2745 task_t task,
316670eb 2746 boolean_t until_not_runnable)
1c79356b 2747{
39037602 2748 thread_t thread, self;
1c79356b
A
2749
2750 assert(task->active);
2751 assert(task->suspend_count > 0);
2752
91447636
A
2753 self = current_thread();
2754
1c79356b 2755 /*
91447636 2756 * Iterate through all the threads and wait for them to
1c79356b
A
2757 * stop. Do not wait for the current thread if it is within
2758 * the task.
2759 */
91447636
A
2760 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2761 if (thread != self)
316670eb 2762 thread_wait(thread, until_not_runnable);
1c79356b
A
2763 }
2764}
2765
2766/*
2767 * task_release_locked:
2768 *
2769 * Release a kernel hold on a task.
2770 *
2771 * CONDITIONS: the task is locked and active
2772 */
2773void
2774task_release_locked(
39037602 2775 task_t task)
1c79356b 2776{
39037602 2777 thread_t thread;
1c79356b
A
2778
2779 assert(task->active);
9bccf70c 2780 assert(task->suspend_count > 0);
1c79356b 2781
9bccf70c
A
2782 if (--task->suspend_count > 0)
2783 return;
1c79356b 2784
d9a64523
A
2785 if (task->bsd_info) {
2786 workq_proc_resumed(task->bsd_info);
2787 }
2788
91447636
A
2789 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2790 thread_mtx_lock(thread);
2791 thread_release(thread);
2792 thread_mtx_unlock(thread);
1c79356b
A
2793 }
2794}
2795
2796/*
2797 * task_release:
2798 *
2799 * Same as the internal routine above, except that it must lock
2800 * and verify that the task is active.
2801 *
2802 * CONDITIONS: The caller holds a reference to the task
2803 */
2804kern_return_t
91447636
A
2805task_release(
2806 task_t task)
1c79356b 2807{
1c79356b
A
2808 if (task == TASK_NULL)
2809 return (KERN_INVALID_ARGUMENT);
91447636 2810
1c79356b 2811 task_lock(task);
91447636 2812
1c79356b
A
2813 if (!task->active) {
2814 task_unlock(task);
91447636 2815
1c79356b
A
2816 return (KERN_FAILURE);
2817 }
1c79356b 2818
91447636
A
2819 task_release_locked(task);
2820 task_unlock(task);
2821
2822 return (KERN_SUCCESS);
1c79356b
A
2823}
2824
2825kern_return_t
2826task_threads(
91447636
A
2827 task_t task,
2828 thread_act_array_t *threads_out,
1c79356b
A
2829 mach_msg_type_number_t *count)
2830{
91447636 2831 mach_msg_type_number_t actual;
2d21ac55 2832 thread_t *thread_list;
91447636
A
2833 thread_t thread;
2834 vm_size_t size, size_needed;
2835 void *addr;
2836 unsigned int i, j;
1c79356b
A
2837
2838 if (task == TASK_NULL)
91447636 2839 return (KERN_INVALID_ARGUMENT);
1c79356b 2840
2d21ac55 2841 size = 0; addr = NULL;
1c79356b
A
2842
2843 for (;;) {
2844 task_lock(task);
2845 if (!task->active) {
2846 task_unlock(task);
91447636 2847
1c79356b
A
2848 if (size != 0)
2849 kfree(addr, size);
91447636
A
2850
2851 return (KERN_FAILURE);
1c79356b
A
2852 }
2853
55e303ae 2854 actual = task->thread_count;
1c79356b
A
2855
2856 /* do we have the memory we need? */
91447636 2857 size_needed = actual * sizeof (mach_port_t);
1c79356b
A
2858 if (size_needed <= size)
2859 break;
2860
2861 /* unlock the task and allocate more memory */
2862 task_unlock(task);
2863
2864 if (size != 0)
2865 kfree(addr, size);
2866
2867 assert(size_needed > 0);
2868 size = size_needed;
2869
2870 addr = kalloc(size);
2871 if (addr == 0)
91447636 2872 return (KERN_RESOURCE_SHORTAGE);
1c79356b
A
2873 }
2874
2875 /* OK, have memory and the task is locked & active */
2d21ac55 2876 thread_list = (thread_t *)addr;
91447636
A
2877
2878 i = j = 0;
2879
2880 for (thread = (thread_t)queue_first(&task->threads); i < actual;
2881 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2882 thread_reference_internal(thread);
2d21ac55 2883 thread_list[j++] = thread;
1c79356b 2884 }
91447636
A
2885
2886 assert(queue_end(&task->threads, (queue_entry_t)thread));
1c79356b
A
2887
2888 actual = j;
91447636 2889 size_needed = actual * sizeof (mach_port_t);
1c79356b 2890
91447636 2891 /* can unlock task now that we've got the thread refs */
1c79356b
A
2892 task_unlock(task);
2893
2894 if (actual == 0) {
91447636 2895 /* no threads, so return null pointer and deallocate memory */
1c79356b 2896
2d21ac55 2897 *threads_out = NULL;
1c79356b
A
2898 *count = 0;
2899
2900 if (size != 0)
2901 kfree(addr, size);
91447636
A
2902 }
2903 else {
1c79356b
A
2904 /* if we allocated too much, must copy */
2905
2906 if (size_needed < size) {
91447636 2907 void *newaddr;
1c79356b
A
2908
2909 newaddr = kalloc(size_needed);
2910 if (newaddr == 0) {
91447636 2911 for (i = 0; i < actual; ++i)
2d21ac55 2912 thread_deallocate(thread_list[i]);
1c79356b 2913 kfree(addr, size);
91447636 2914 return (KERN_RESOURCE_SHORTAGE);
1c79356b
A
2915 }
2916
91447636 2917 bcopy(addr, newaddr, size_needed);
1c79356b 2918 kfree(addr, size);
2d21ac55 2919 thread_list = (thread_t *)newaddr;
1c79356b
A
2920 }
2921
2d21ac55 2922 *threads_out = thread_list;
1c79356b
A
2923 *count = actual;
2924
2925 /* do the conversion that Mig should handle */
2926
91447636 2927 for (i = 0; i < actual; ++i)
2d21ac55 2928 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
1c79356b
A
2929 }
2930
91447636 2931 return (KERN_SUCCESS);
1c79356b
A
2932}
2933
39236c6e
A
2934#define TASK_HOLD_NORMAL 0
2935#define TASK_HOLD_PIDSUSPEND 1
2936#define TASK_HOLD_LEGACY 2
2937#define TASK_HOLD_LEGACY_ALL 3
2938
316670eb
A
2939static kern_return_t
2940place_task_hold (
39037602 2941 task_t task,
39236c6e 2942 int mode)
316670eb 2943{
39037602 2944 if (!task->active && !task_is_a_corpse(task)) {
1c79356b
A
2945 return (KERN_FAILURE);
2946 }
91447636 2947
39037602
A
2948 /* Return success for corpse task */
2949 if (task_is_a_corpse(task)) {
2950 return KERN_SUCCESS;
2951 }
2952
39236c6e
A
2953 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2954 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
3e170ce0 2955 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
39236c6e
A
2956 task->user_stop_count, task->user_stop_count + 1, 0);
2957
2958#if MACH_ASSERT
2959 current_task()->suspends_outstanding++;
2960#endif
2961
2962 if (mode == TASK_HOLD_LEGACY)
2963 task->legacy_stop_count++;
2964
91447636 2965 if (task->user_stop_count++ > 0) {
1c79356b
A
2966 /*
2967 * If the stop count was positive, the task is
2968 * already stopped and we can exit.
2969 */
1c79356b
A
2970 return (KERN_SUCCESS);
2971 }
2972
2973 /*
2974 * Put a kernel-level hold on the threads in the task (all
2975 * user-level task suspensions added together represent a
2976 * single kernel-level hold). We then wait for the threads
2977 * to stop executing user code.
2978 */
2979 task_hold_locked(task);
39236c6e 2980 task_wait_locked(task, FALSE);
316670eb
A
2981
2982 return (KERN_SUCCESS);
2983}
2984
2985static kern_return_t
2986release_task_hold (
39037602 2987 task_t task,
39236c6e 2988 int mode)
316670eb 2989{
39037602 2990 boolean_t release = FALSE;
316670eb 2991
39037602 2992 if (!task->active && !task_is_a_corpse(task)) {
316670eb
A
2993 return (KERN_FAILURE);
2994 }
39037602
A
2995
2996 /* Return success for corpse task */
2997 if (task_is_a_corpse(task)) {
2998 return KERN_SUCCESS;
2999 }
316670eb 3000
39236c6e 3001 if (mode == TASK_HOLD_PIDSUSPEND) {
316670eb 3002 if (task->pidsuspended == FALSE) {
39236c6e 3003 return (KERN_FAILURE);
316670eb
A
3004 }
3005 task->pidsuspended = FALSE;
3006 }
3007
39236c6e
A
3008 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3009
3010 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3011 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
3e170ce0 3012 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
39236c6e
A
3013 task->user_stop_count, mode, task->legacy_stop_count);
3014
3015#if MACH_ASSERT
3016 /*
3017 * This is obviously not robust; if we suspend one task and then resume a different one,
3018 * we'll fly under the radar. This is only meant to catch the common case of a crashed
3019 * or buggy suspender.
3020 */
3021 current_task()->suspends_outstanding--;
3022#endif
3023
3024 if (mode == TASK_HOLD_LEGACY_ALL) {
3025 if (task->legacy_stop_count >= task->user_stop_count) {
3026 task->user_stop_count = 0;
3027 release = TRUE;
3028 } else {
3029 task->user_stop_count -= task->legacy_stop_count;
3030 }
3031 task->legacy_stop_count = 0;
3032 } else {
3033 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
3034 task->legacy_stop_count--;
3035 if (--task->user_stop_count == 0)
3036 release = TRUE;
316670eb
A
3037 }
3038 }
3039 else {
3040 return (KERN_FAILURE);
3041 }
3042
3043 /*
3044 * Release the task if necessary.
3045 */
3046 if (release)
3047 task_release_locked(task);
3048
3049 return (KERN_SUCCESS);
3050}
3051
39236c6e 3052
316670eb
A
3053/*
3054 * task_suspend:
3055 *
39236c6e
A
3056 * Implement an (old-fashioned) user-level suspension on a task.
3057 *
3058 * Because the user isn't expecting to have to manage a suspension
3059 * token, we'll track it for him in the kernel in the form of a naked
3060 * send right to the task's resume port. All such send rights
3061 * account for a single suspension against the task (unlike task_suspend2()
3062 * where each caller gets a unique suspension count represented by a
3063 * unique send-once right).
316670eb
A
3064 *
3065 * Conditions:
3066 * The caller holds a reference to the task
3067 */
3068kern_return_t
3069task_suspend(
39037602 3070 task_t task)
316670eb 3071{
39236c6e
A
3072 kern_return_t kr;
3073 mach_port_t port, send, old_notify;
3074 mach_port_name_t name;
3075
316670eb
A
3076 if (task == TASK_NULL || task == kernel_task)
3077 return (KERN_INVALID_ARGUMENT);
3078
3079 task_lock(task);
3080
39236c6e
A
3081 /*
3082 * Claim a send right on the task resume port, and request a no-senders
3083 * notification on that port (if none outstanding).
3084 */
3085 if (task->itk_resume == IP_NULL) {
3086 task->itk_resume = ipc_port_alloc_kernel();
3087 if (!IP_VALID(task->itk_resume))
3088 panic("failed to create resume port");
3089 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
3090 }
3091
3092 port = task->itk_resume;
3093 ip_lock(port);
3094 assert(ip_active(port));
3095
3096 send = ipc_port_make_send_locked(port);
3097 assert(IP_VALID(send));
3098
3099 if (port->ip_nsrequest == IP_NULL) {
3100 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3101 assert(old_notify == IP_NULL);
3102 /* port unlocked */
3103 } else {
3104 ip_unlock(port);
3105 }
3106
3107 /*
3108 * place a legacy hold on the task.
3109 */
3110 kr = place_task_hold(task, TASK_HOLD_LEGACY);
3111 if (kr != KERN_SUCCESS) {
3112 task_unlock(task);
3113 ipc_port_release_send(send);
3114 return kr;
3115 }
91447636 3116
1c79356b 3117 task_unlock(task);
91447636 3118
39236c6e
A
3119 /*
3120 * Copyout the send right into the calling task's IPC space. It won't know it is there,
3121 * but we'll look it up when calling a traditional resume. Any IPC operations that
3122 * deallocate the send right will auto-release the suspension.
3123 */
3124 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
3125 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
3e170ce0
A
3126 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3127 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3128 task_pid(task), kr);
39236c6e
A
3129 return (kr);
3130 }
3131
316670eb 3132 return (kr);
1c79356b
A
3133}
3134
3135/*
91447636 3136 * task_resume:
39236c6e 3137 * Release a user hold on a task.
1c79356b
A
3138 *
3139 * Conditions:
3140 * The caller holds a reference to the task
3141 */
3142kern_return_t
91447636 3143task_resume(
39037602 3144 task_t task)
1c79356b 3145{
316670eb 3146 kern_return_t kr;
39236c6e
A
3147 mach_port_name_t resume_port_name;
3148 ipc_entry_t resume_port_entry;
3149 ipc_space_t space = current_task()->itk_space;
3150
3151 if (task == TASK_NULL || task == kernel_task )
3152 return (KERN_INVALID_ARGUMENT);
3153
3154 /* release a legacy task hold */
3155 task_lock(task);
3156 kr = release_task_hold(task, TASK_HOLD_LEGACY);
3157 task_unlock(task);
3158
3159 is_write_lock(space);
3160 if (is_active(space) && IP_VALID(task->itk_resume) &&
3161 ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
3162 /*
3163 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3164 * we are holding one less legacy hold on the task from this caller. If the release failed,
3165 * go ahead and drop all the rights, as someone either already released our holds or the task
3166 * is gone.
3167 */
3168 if (kr == KERN_SUCCESS)
3169 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3170 else
3171 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3172 /* space unlocked */
3173 } else {
3174 is_write_unlock(space);
3175 if (kr == KERN_SUCCESS)
3e170ce0 3176 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
39236c6e 3177 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3e170ce0 3178 task_pid(task));
39236c6e
A
3179 }
3180
3181 return kr;
3182}
1c79356b 3183
39236c6e
A
3184/*
3185 * Suspend the target task.
3186 * Making/holding a token/reference/port is the callers responsibility.
3187 */
3188kern_return_t
3189task_suspend_internal(task_t task)
3190{
3191 kern_return_t kr;
3192
91447636
A
3193 if (task == TASK_NULL || task == kernel_task)
3194 return (KERN_INVALID_ARGUMENT);
1c79356b 3195
1c79356b 3196 task_lock(task);
39236c6e
A
3197 kr = place_task_hold(task, TASK_HOLD_NORMAL);
3198 task_unlock(task);
3199 return (kr);
3200}
3201
3202/*
3203 * Suspend the target task, and return a suspension token. The token
3204 * represents a reference on the suspended task.
3205 */
3206kern_return_t
3207task_suspend2(
39037602 3208 task_t task,
39236c6e
A
3209 task_suspension_token_t *suspend_token)
3210{
3211 kern_return_t kr;
3212
3213 kr = task_suspend_internal(task);
3214 if (kr != KERN_SUCCESS) {
3215 *suspend_token = TASK_NULL;
3216 return (kr);
3217 }
3218
3219 /*
3220 * Take a reference on the target task and return that to the caller
3221 * as a "suspension token," which can be converted into an SO right to
3222 * the now-suspended task's resume port.
3223 */
3224 task_reference_internal(task);
3225 *suspend_token = task;
3226
3227 return (KERN_SUCCESS);
3228}
3229
3230/*
3231 * Resume the task
3232 * (reference/token/port management is caller's responsibility).
3233 */
3234kern_return_t
3235task_resume_internal(
39037602 3236 task_suspension_token_t task)
39236c6e
A
3237{
3238 kern_return_t kr;
91447636 3239
39236c6e
A
3240 if (task == TASK_NULL || task == kernel_task)
3241 return (KERN_INVALID_ARGUMENT);
91447636 3242
39236c6e
A
3243 task_lock(task);
3244 kr = release_task_hold(task, TASK_HOLD_NORMAL);
316670eb 3245 task_unlock(task);
39236c6e
A
3246 return (kr);
3247}
3248
3249/*
3250 * Resume the task using a suspension token. Consumes the token's ref.
3251 */
3252kern_return_t
3253task_resume2(
39037602 3254 task_suspension_token_t task)
39236c6e
A
3255{
3256 kern_return_t kr;
3257
3258 kr = task_resume_internal(task);
3259 task_suspension_token_deallocate(task);
91447636 3260
316670eb
A
3261 return (kr);
3262}
3263
39236c6e
A
3264boolean_t
3265task_suspension_notify(mach_msg_header_t *request_header)
3266{
3267 ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
3268 task_t task = convert_port_to_task_suspension_token(port);
3269 mach_msg_type_number_t not_count;
3270
3271 if (task == TASK_NULL || task == kernel_task)
3272 return TRUE; /* nothing to do */
3273
3274 switch (request_header->msgh_id) {
3275
3276 case MACH_NOTIFY_SEND_ONCE:
3277 /* release the hold held by this specific send-once right */
3278 task_lock(task);
3279 release_task_hold(task, TASK_HOLD_NORMAL);
3280 task_unlock(task);
3281 break;
3282
3283 case MACH_NOTIFY_NO_SENDERS:
3284 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3285
3286 task_lock(task);
3287 ip_lock(port);
3288 if (port->ip_mscount == not_count) {
3289
3290 /* release all the [remaining] outstanding legacy holds */
3291 assert(port->ip_nsrequest == IP_NULL);
3292 ip_unlock(port);
3293 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3294 task_unlock(task);
3295
3296 } else if (port->ip_nsrequest == IP_NULL) {
3297 ipc_port_t old_notify;
3298
3299 task_unlock(task);
3300 /* new send rights, re-arm notification at current make-send count */
3301 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3302 assert(old_notify == IP_NULL);
3303 /* port unlocked */
3304 } else {
3305 ip_unlock(port);
3306 task_unlock(task);
3307 }
3308 break;
3309
3310 default:
3311 break;
3312 }
3313
3314 task_suspension_token_deallocate(task); /* drop token reference */
3315 return TRUE;
3316}
3317
316670eb
A
3318kern_return_t
3319task_pidsuspend_locked(task_t task)
3320{
3321 kern_return_t kr;
3322
3323 if (task->pidsuspended) {
3324 kr = KERN_FAILURE;
3325 goto out;
1c79356b 3326 }
91447636 3327
316670eb
A
3328 task->pidsuspended = TRUE;
3329
39236c6e 3330 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
316670eb
A
3331 if (kr != KERN_SUCCESS) {
3332 task->pidsuspended = FALSE;
1c79356b 3333 }
316670eb
A
3334out:
3335 return(kr);
3336}
1c79356b 3337
316670eb
A
3338
3339/*
3340 * task_pidsuspend:
3341 *
3342 * Suspends a task by placing a hold on its threads.
3343 *
3344 * Conditions:
3345 * The caller holds a reference to the task
3346 */
3347kern_return_t
3348task_pidsuspend(
39037602 3349 task_t task)
316670eb
A
3350{
3351 kern_return_t kr;
3352
3353 if (task == TASK_NULL || task == kernel_task)
3354 return (KERN_INVALID_ARGUMENT);
3355
3356 task_lock(task);
3357
3358 kr = task_pidsuspend_locked(task);
1c79356b
A
3359
3360 task_unlock(task);
91447636 3361
316670eb
A
3362 return (kr);
3363}
3364
316670eb
A
3365/*
3366 * task_pidresume:
3367 * Resumes a previously suspended task.
3368 *
3369 * Conditions:
3370 * The caller holds a reference to the task
3371 */
3372kern_return_t
3373task_pidresume(
39037602 3374 task_t task)
316670eb
A
3375{
3376 kern_return_t kr;
316670eb
A
3377
3378 if (task == TASK_NULL || task == kernel_task)
3379 return (KERN_INVALID_ARGUMENT);
3380
3381 task_lock(task);
3382
39037602 3383#if CONFIG_FREEZE
316670eb 3384
39236c6e 3385 while (task->changing_freeze_state) {
316670eb 3386
39236c6e
A
3387 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3388 task_unlock(task);
3389 thread_block(THREAD_CONTINUE_NULL);
316670eb 3390
39236c6e 3391 task_lock(task);
316670eb 3392 }
39236c6e
A
3393 task->changing_freeze_state = TRUE;
3394#endif
3395
3396 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3397
3398 task_unlock(task);
3399
39037602 3400#if CONFIG_FREEZE
39236c6e 3401
39236c6e
A
3402 task_lock(task);
3403
3404 if (kr == KERN_SUCCESS)
3405 task->frozen = FALSE;
3406 task->changing_freeze_state = FALSE;
3407 thread_wakeup(&task->changing_freeze_state);
3408
3409 task_unlock(task);
316670eb
A
3410#endif
3411
3412 return (kr);
1c79356b
A
3413}
3414
39037602
A
3415
3416#if DEVELOPMENT || DEBUG
3417
3418extern void IOSleep(int);
3419
3420kern_return_t
3421task_disconnect_page_mappings(task_t task)
3422{
3423 int n;
3424
3425 if (task == TASK_NULL || task == kernel_task)
3426 return (KERN_INVALID_ARGUMENT);
3427
3428 /*
3429 * this function is used to strip all of the mappings from
3430 * the pmap for the specified task to force the task to
3431 * re-fault all of the pages it is actively using... this
3432 * allows us to approximate the true working set of the
3433 * specified task. We only engage if at least 1 of the
3434 * threads in the task is runnable, but we want to continuously
3435 * sweep (at least for a while - I've arbitrarily set the limit at
3436 * 100 sweeps to be re-looked at as we gain experience) to get a better
3437 * view into what areas within a page are being visited (as opposed to only
3438 * seeing the first fault of a page after the task becomes
3439 * runnable)... in the future I may
3440 * try to block until awakened by a thread in this task
3441 * being made runnable, but for now we'll periodically poll from the
3442 * user level debug tool driving the sysctl
3443 */
3444 for (n = 0; n < 100; n++) {
3445 thread_t thread;
3446 boolean_t runnable;
3447 boolean_t do_unnest;
3448 int page_count;
3449
3450 runnable = FALSE;
3451 do_unnest = FALSE;
3452
3453 task_lock(task);
3454
3455 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3456
3457 if (thread->state & TH_RUN) {
3458 runnable = TRUE;
3459 break;
3460 }
3461 }
3462 if (n == 0)
3463 task->task_disconnected_count++;
3464
3465 if (task->task_unnested == FALSE) {
3466 if (runnable == TRUE) {
3467 task->task_unnested = TRUE;
3468 do_unnest = TRUE;
3469 }
3470 }
3471 task_unlock(task);
3472
3473 if (runnable == FALSE)
3474 break;
3475
3476 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
3477 task, do_unnest, task->task_disconnected_count, 0, 0);
3478
3479 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
3480
3481 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
3482 task, page_count, 0, 0, 0);
3483
3484 if ((n % 5) == 4)
3485 IOSleep(1);
3486 }
3487 return (KERN_SUCCESS);
3488}
3489
3490#endif
3491
3492
6d2010ae
A
3493#if CONFIG_FREEZE
3494
3495/*
3496 * task_freeze:
3497 *
316670eb 3498 * Freeze a task.
6d2010ae
A
3499 *
3500 * Conditions:
3501 * The caller holds a reference to the task
3502 */
5ba3f43e 3503extern void vm_wake_compactor_swapper(void);
3e170ce0
A
3504extern queue_head_t c_swapout_list_head;
3505
6d2010ae
A
3506kern_return_t
3507task_freeze(
39037602 3508 task_t task,
6d2010ae
A
3509 uint32_t *purgeable_count,
3510 uint32_t *wired_count,
3511 uint32_t *clean_count,
3512 uint32_t *dirty_count,
316670eb 3513 uint32_t dirty_budget,
d9a64523
A
3514 uint32_t *shared_count,
3515 int *freezer_error_code,
3516 boolean_t eval_only)
6d2010ae 3517{
39037602 3518 kern_return_t kr = KERN_SUCCESS;
316670eb 3519
6d2010ae
A
3520 if (task == TASK_NULL || task == kernel_task)
3521 return (KERN_INVALID_ARGUMENT);
3522
316670eb
A
3523 task_lock(task);
3524
39236c6e
A
3525 while (task->changing_freeze_state) {
3526
3527 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3528 task_unlock(task);
3529 thread_block(THREAD_CONTINUE_NULL);
3530
3531 task_lock(task);
3532 }
316670eb 3533 if (task->frozen) {
39236c6e
A
3534 task_unlock(task);
3535 return (KERN_FAILURE);
316670eb 3536 }
39236c6e 3537 task->changing_freeze_state = TRUE;
316670eb
A
3538
3539 task_unlock(task);
3540
d9a64523
A
3541 kr = vm_map_freeze(task->map,
3542 purgeable_count,
3543 wired_count,
3544 clean_count,
3545 dirty_count,
3546 dirty_budget,
3547 shared_count,
3548 freezer_error_code,
3549 eval_only);
6d2010ae 3550
39236c6e
A
3551 task_lock(task);
3552
d9a64523 3553 if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
39236c6e 3554 task->frozen = TRUE;
d9a64523
A
3555 }
3556
39236c6e
A
3557 task->changing_freeze_state = FALSE;
3558 thread_wakeup(&task->changing_freeze_state);
3559
3560 task_unlock(task);
3561
d9a64523
A
3562 if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
3563 (eval_only == FALSE)) {
3e170ce0
A
3564 vm_wake_compactor_swapper();
3565 /*
3566 * We do an explicit wakeup of the swapout thread here
3567 * because the compact_and_swap routines don't have
3568 * knowledge about these kind of "per-task packed c_segs"
3569 * and so will not be evaluating whether we need to do
3570 * a wakeup there.
3571 */
3572 thread_wakeup((event_t)&c_swapout_list_head);
3573 }
3574
316670eb 3575 return (kr);
6d2010ae
A
3576}
3577
3578/*
3579 * task_thaw:
3580 *
3581 * Thaw a currently frozen task.
3582 *
3583 * Conditions:
3584 * The caller holds a reference to the task
3585 */
3586kern_return_t
3587task_thaw(
39037602 3588 task_t task)
6d2010ae
A
3589{
3590 if (task == TASK_NULL || task == kernel_task)
3591 return (KERN_INVALID_ARGUMENT);
3592
316670eb
A
3593 task_lock(task);
3594
39236c6e
A
3595 while (task->changing_freeze_state) {
3596
3597 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3598 task_unlock(task);
3599 thread_block(THREAD_CONTINUE_NULL);
3600
3601 task_lock(task);
3602 }
316670eb 3603 if (!task->frozen) {
39236c6e
A
3604 task_unlock(task);
3605 return (KERN_FAILURE);
316670eb 3606 }
39037602 3607 task->frozen = FALSE;
39236c6e 3608
316670eb
A
3609 task_unlock(task);
3610
39037602 3611 return (KERN_SUCCESS);
6d2010ae
A
3612}
3613
3614#endif /* CONFIG_FREEZE */
3615
1c79356b
A
3616kern_return_t
3617host_security_set_task_token(
3618 host_security_t host_security,
3619 task_t task,
3620 security_token_t sec_token,
55e303ae 3621 audit_token_t audit_token,
1c79356b
A
3622 host_priv_t host_priv)
3623{
55e303ae 3624 ipc_port_t host_port;
1c79356b
A
3625 kern_return_t kr;
3626
3627 if (task == TASK_NULL)
3628 return(KERN_INVALID_ARGUMENT);
3629
3630 if (host_security == HOST_NULL)
3631 return(KERN_INVALID_SECURITY);
3632
3633 task_lock(task);
3634 task->sec_token = sec_token;
55e303ae 3635 task->audit_token = audit_token;
39236c6e
A
3636
3637 task_unlock(task);
1c79356b
A
3638
3639 if (host_priv != HOST_PRIV_NULL) {
55e303ae 3640 kr = host_get_host_priv_port(host_priv, &host_port);
1c79356b 3641 } else {
55e303ae 3642 kr = host_get_host_port(host_priv_self(), &host_port);
1c79356b 3643 }
55e303ae
A
3644 assert(kr == KERN_SUCCESS);
3645 kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
1c79356b
A
3646 return(kr);
3647}
3648
fe8ab488
A
3649kern_return_t
3650task_send_trace_memory(
e8c3f781 3651 __unused task_t target_task,
fe8ab488
A
3652 __unused uint32_t pid,
3653 __unused uint64_t uniqueid)
3654{
e8c3f781 3655 return KERN_INVALID_ARGUMENT;
fe8ab488 3656}
e8c3f781 3657
1c79356b
A
3658/*
3659 * This routine was added, pretty much exclusively, for registering the
3660 * RPC glue vector for in-kernel short circuited tasks. Rather than
3661 * removing it completely, I have only disabled that feature (which was
3662 * the only feature at the time). It just appears that we are going to
3663 * want to add some user data to tasks in the future (i.e. bsd info,
3664 * task names, etc...), so I left it in the formal task interface.
3665 */
3666kern_return_t
3667task_set_info(
3668 task_t task,
3669 task_flavor_t flavor,
91447636
A
3670 __unused task_info_t task_info_in, /* pointer to IN array */
3671 __unused mach_msg_type_number_t task_info_count)
1c79356b 3672{
1c79356b
A
3673 if (task == TASK_NULL)
3674 return(KERN_INVALID_ARGUMENT);
3675
3676 switch (flavor) {
fe8ab488
A
3677
3678#if CONFIG_ATM
3679 case TASK_TRACE_MEMORY_INFO:
3680 {
3681 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
3682 return (KERN_INVALID_ARGUMENT);
3683
3684 assert(task_info_in != NULL);
3685 task_trace_memory_info_t mem_info;
3686 mem_info = (task_trace_memory_info_t) task_info_in;
3687 kern_return_t kr = atm_register_trace_memory(task,
3688 mem_info->user_memory_address,
3e170ce0 3689 mem_info->buffer_size);
fe8ab488 3690 return kr;
fe8ab488
A
3691 }
3692
3693#endif
1c79356b
A
3694 default:
3695 return (KERN_INVALID_ARGUMENT);
3696 }
3697 return (KERN_SUCCESS);
3698}
3699
3e170ce0 3700int radar_20146450 = 1;
1c79356b
A
3701kern_return_t
3702task_info(
39236c6e
A
3703 task_t task,
3704 task_flavor_t flavor,
3705 task_info_t task_info_out,
1c79356b
A
3706 mach_msg_type_number_t *task_info_count)
3707{
b0d623f7 3708 kern_return_t error = KERN_SUCCESS;
39037602 3709 mach_msg_type_number_t original_task_info_count;
b0d623f7 3710
1c79356b 3711 if (task == TASK_NULL)
91447636 3712 return (KERN_INVALID_ARGUMENT);
1c79356b 3713
39037602 3714 original_task_info_count = *task_info_count;
b0d623f7
A
3715 task_lock(task);
3716
3717 if ((task != current_task()) && (!task->active)) {
3718 task_unlock(task);
3719 return (KERN_INVALID_ARGUMENT);
3720 }
3721
1c79356b
A
3722 switch (flavor) {
3723
91447636 3724 case TASK_BASIC_INFO_32:
2d21ac55 3725 case TASK_BASIC2_INFO_32:
5ba3f43e
A
3726#if defined(__arm__) || defined(__arm64__)
3727 case TASK_BASIC_INFO_64:
3728#endif
91447636
A
3729 {
3730 task_basic_info_32_t basic_info;
b0d623f7
A
3731 vm_map_t map;
3732 clock_sec_t secs;
3733 clock_usec_t usecs;
1c79356b 3734
b0d623f7
A
3735 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
3736 error = KERN_INVALID_ARGUMENT;
3737 break;
3738 }
1c79356b 3739
91447636 3740 basic_info = (task_basic_info_32_t)task_info_out;
1c79356b 3741
91447636 3742 map = (task == kernel_task)? kernel_map: task->map;
b0d623f7 3743 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2d21ac55
A
3744 if (flavor == TASK_BASIC2_INFO_32) {
3745 /*
3746 * The "BASIC2" flavor gets the maximum resident
3747 * size instead of the current resident size...
3748 */
3749 basic_info->resident_size = pmap_resident_max(map->pmap);
3750 } else {
3751 basic_info->resident_size = pmap_resident_count(map->pmap);
3752 }
3753 basic_info->resident_size *= PAGE_SIZE;
1c79356b 3754
0b4e3aa0
A
3755 basic_info->policy = ((task != kernel_task)?
3756 POLICY_TIMESHARE: POLICY_RR);
1c79356b 3757 basic_info->suspend_count = task->user_stop_count;
91447636 3758
b0d623f7
A
3759 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3760 basic_info->user_time.seconds =
3761 (typeof(basic_info->user_time.seconds))secs;
3762 basic_info->user_time.microseconds = usecs;
3763
3764 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3765 basic_info->system_time.seconds =
3766 (typeof(basic_info->system_time.seconds))secs;
3767 basic_info->system_time.microseconds = usecs;
1c79356b 3768
91447636 3769 *task_info_count = TASK_BASIC_INFO_32_COUNT;
1c79356b 3770 break;
91447636 3771 }
1c79356b 3772
5ba3f43e
A
3773#if defined(__arm__) || defined(__arm64__)
3774 case TASK_BASIC_INFO_64_2:
3775 {
3776 task_basic_info_64_2_t basic_info;
3777 vm_map_t map;
3778 clock_sec_t secs;
3779 clock_usec_t usecs;
3780
3781 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
3782 error = KERN_INVALID_ARGUMENT;
3783 break;
3784 }
3785
3786 basic_info = (task_basic_info_64_2_t)task_info_out;
3787
3788 map = (task == kernel_task)? kernel_map: task->map;
3789 basic_info->virtual_size = map->size;
3790 basic_info->resident_size =
3791 (mach_vm_size_t)(pmap_resident_count(map->pmap))
3792 * PAGE_SIZE_64;
3793
3794 basic_info->policy = ((task != kernel_task)?
3795 POLICY_TIMESHARE: POLICY_RR);
3796 basic_info->suspend_count = task->user_stop_count;
3797
3798 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3799 basic_info->user_time.seconds =
3800 (typeof(basic_info->user_time.seconds))secs;
3801 basic_info->user_time.microseconds = usecs;
3802
3803 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3804 basic_info->system_time.seconds =
3805 (typeof(basic_info->system_time.seconds))secs;
3806 basic_info->system_time.microseconds = usecs;
3807
3808 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
3809 break;
3810 }
3811
3812#else /* defined(__arm__) || defined(__arm64__) */
91447636
A
3813 case TASK_BASIC_INFO_64:
3814 {
3815 task_basic_info_64_t basic_info;
b0d623f7
A
3816 vm_map_t map;
3817 clock_sec_t secs;
3818 clock_usec_t usecs;
1c79356b 3819
b0d623f7
A
3820 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
3821 error = KERN_INVALID_ARGUMENT;
3822 break;
3823 }
91447636
A
3824
3825 basic_info = (task_basic_info_64_t)task_info_out;
3826
3827 map = (task == kernel_task)? kernel_map: task->map;
3828 basic_info->virtual_size = map->size;
2d21ac55
A
3829 basic_info->resident_size =
3830 (mach_vm_size_t)(pmap_resident_count(map->pmap))
3831 * PAGE_SIZE_64;
91447636 3832
91447636
A
3833 basic_info->policy = ((task != kernel_task)?
3834 POLICY_TIMESHARE: POLICY_RR);
3835 basic_info->suspend_count = task->user_stop_count;
3836
b0d623f7
A
3837 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3838 basic_info->user_time.seconds =
3839 (typeof(basic_info->user_time.seconds))secs;
3840 basic_info->user_time.microseconds = usecs;
3841
3842 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3843 basic_info->system_time.seconds =
3844 (typeof(basic_info->system_time.seconds))secs;
3845 basic_info->system_time.microseconds = usecs;
91447636
A
3846
3847 *task_info_count = TASK_BASIC_INFO_64_COUNT;
3848 break;
3849 }
5ba3f43e 3850#endif /* defined(__arm__) || defined(__arm64__) */
91447636 3851
316670eb
A
3852 case MACH_TASK_BASIC_INFO:
3853 {
3854 mach_task_basic_info_t basic_info;
3855 vm_map_t map;
3856 clock_sec_t secs;
3857 clock_usec_t usecs;
3858
3859 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
3860 error = KERN_INVALID_ARGUMENT;
3861 break;
3862 }
3863
3864 basic_info = (mach_task_basic_info_t)task_info_out;
3865
3866 map = (task == kernel_task) ? kernel_map : task->map;
3867
3868 basic_info->virtual_size = map->size;
3869
3870 basic_info->resident_size =
3871 (mach_vm_size_t)(pmap_resident_count(map->pmap));
3872 basic_info->resident_size *= PAGE_SIZE_64;
3873
3874 basic_info->resident_size_max =
3875 (mach_vm_size_t)(pmap_resident_max(map->pmap));
3876 basic_info->resident_size_max *= PAGE_SIZE_64;
3877
3878 basic_info->policy = ((task != kernel_task) ?
3879 POLICY_TIMESHARE : POLICY_RR);
3880
3881 basic_info->suspend_count = task->user_stop_count;
3882
3883 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3884 basic_info->user_time.seconds =
3885 (typeof(basic_info->user_time.seconds))secs;
3886 basic_info->user_time.microseconds = usecs;
3887
3888 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3889 basic_info->system_time.seconds =
3890 (typeof(basic_info->system_time.seconds))secs;
3891 basic_info->system_time.microseconds = usecs;
3892
3893 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
3894 break;
3895 }
3896
91447636
A
3897 case TASK_THREAD_TIMES_INFO:
3898 {
39037602
A
3899 task_thread_times_info_t times_info;
3900 thread_t thread;
91447636 3901
b0d623f7
A
3902 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
3903 error = KERN_INVALID_ARGUMENT;
3904 break;
3905 }
1c79356b
A
3906
3907 times_info = (task_thread_times_info_t) task_info_out;
3908 times_info->user_time.seconds = 0;
3909 times_info->user_time.microseconds = 0;
3910 times_info->system_time.seconds = 0;
3911 times_info->system_time.microseconds = 0;
3912
1c79356b 3913
91447636 3914 queue_iterate(&task->threads, thread, thread_t, task_threads) {
39236c6e 3915 time_value_t user_time, system_time;
1c79356b 3916
39236c6e
A
3917 if (thread->options & TH_OPT_IDLE_THREAD)
3918 continue;
1c79356b 3919
d9a64523 3920 thread_read_times(thread, &user_time, &system_time, NULL);
91447636 3921
39236c6e
A
3922 time_value_add(&times_info->user_time, &user_time);
3923 time_value_add(&times_info->system_time, &system_time);
3924 }
1c79356b
A
3925
3926 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
3927 break;
91447636
A
3928 }
3929
3930 case TASK_ABSOLUTETIME_INFO:
3931 {
3932 task_absolutetime_info_t info;
39037602 3933 thread_t thread;
91447636 3934
b0d623f7
A
3935 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
3936 error = KERN_INVALID_ARGUMENT;
3937 break;
3938 }
91447636
A
3939
3940 info = (task_absolutetime_info_t)task_info_out;
3941 info->threads_user = info->threads_system = 0;
3942
91447636
A
3943
3944 info->total_user = task->total_user_time;
3945 info->total_system = task->total_system_time;
3946
3947 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3948 uint64_t tval;
316670eb
A
3949 spl_t x;
3950
39236c6e
A
3951 if (thread->options & TH_OPT_IDLE_THREAD)
3952 continue;
3953
316670eb
A
3954 x = splsched();
3955 thread_lock(thread);
91447636
A
3956
3957 tval = timer_grab(&thread->user_timer);
3958 info->threads_user += tval;
3959 info->total_user += tval;
3960
3961 tval = timer_grab(&thread->system_timer);
316670eb
A
3962 if (thread->precise_user_kernel_time) {
3963 info->threads_system += tval;
3964 info->total_system += tval;
3965 } else {
3966 /* system_timer may represent either sys or user */
3967 info->threads_user += tval;
3968 info->total_user += tval;
3969 }
3970
3971 thread_unlock(thread);
3972 splx(x);
91447636
A
3973 }
3974
91447636
A
3975
3976 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3977 break;
3978 }
1c79356b 3979
b0d623f7
A
3980 case TASK_DYLD_INFO:
3981 {
3982 task_dyld_info_t info;
3983
6d2010ae
A
3984 /*
3985 * We added the format field to TASK_DYLD_INFO output. For
3986 * temporary backward compatibility, accept the fact that
3987 * clients may ask for the old version - distinquished by the
3988 * size of the expected result structure.
3989 */
3990#define TASK_LEGACY_DYLD_INFO_COUNT \
3991 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3992
3993 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
b0d623f7
A
3994 error = KERN_INVALID_ARGUMENT;
3995 break;
3996 }
6d2010ae 3997
b0d623f7
A
3998 info = (task_dyld_info_t)task_info_out;
3999 info->all_image_info_addr = task->all_image_info_addr;
4000 info->all_image_info_size = task->all_image_info_size;
6d2010ae
A
4001
4002 /* only set format on output for those expecting it */
4003 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
d9a64523 4004 info->all_image_info_format = task_has_64Bit_addr(task) ?
6d2010ae
A
4005 TASK_DYLD_ALL_IMAGE_INFO_64 :
4006 TASK_DYLD_ALL_IMAGE_INFO_32 ;
4007 *task_info_count = TASK_DYLD_INFO_COUNT;
4008 } else {
4009 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
4010 }
b0d623f7
A
4011 break;
4012 }
4013
6d2010ae
A
4014 case TASK_EXTMOD_INFO:
4015 {
4016 task_extmod_info_t info;
4017 void *p;
4018
4019 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
4020 error = KERN_INVALID_ARGUMENT;
4021 break;
4022 }
4023
4024 info = (task_extmod_info_t)task_info_out;
4025
4026 p = get_bsdtask_info(task);
4027 if (p) {
4028 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
4029 } else {
4030 bzero(info->task_uuid, sizeof(info->task_uuid));
4031 }
4032 info->extmod_statistics = task->extmod_statistics;
4033 *task_info_count = TASK_EXTMOD_INFO_COUNT;
4034
4035 break;
4036 }
4037
4038 case TASK_KERNELMEMORY_INFO:
4039 {
4040 task_kernelmemory_info_t tkm_info;
316670eb 4041 ledger_amount_t credit, debit;
6d2010ae
A
4042
4043 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
4044 error = KERN_INVALID_ARGUMENT;
4045 break;
4046 }
4047
4048 tkm_info = (task_kernelmemory_info_t) task_info_out;
316670eb
A
4049 tkm_info->total_palloc = 0;
4050 tkm_info->total_pfree = 0;
4051 tkm_info->total_salloc = 0;
4052 tkm_info->total_sfree = 0;
6d2010ae
A
4053
4054 if (task == kernel_task) {
4055 /*
4056 * All shared allocs/frees from other tasks count against
4057 * the kernel private memory usage. If we are looking up
4058 * info for the kernel task, gather from everywhere.
4059 */
4060 task_unlock(task);
4061
4062 /* start by accounting for all the terminated tasks against the kernel */
4063 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
4064 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
6d2010ae
A
4065
4066 /* count all other task/thread shared alloc/free against the kernel */
4067 lck_mtx_lock(&tasks_threads_lock);
316670eb
A
4068
4069 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
6d2010ae
A
4070 queue_iterate(&tasks, task, task_t, tasks) {
4071 if (task == kernel_task) {
316670eb
A
4072 if (ledger_get_entries(task->ledger,
4073 task_ledgers.tkm_private, &credit,
4074 &debit) == KERN_SUCCESS) {
4075 tkm_info->total_palloc += credit;
4076 tkm_info->total_pfree += debit;
4077 }
6d2010ae 4078 }
316670eb
A
4079 if (!ledger_get_entries(task->ledger,
4080 task_ledgers.tkm_shared, &credit, &debit)) {
4081 tkm_info->total_palloc += credit;
4082 tkm_info->total_pfree += debit;
6d2010ae 4083 }
6d2010ae
A
4084 }
4085 lck_mtx_unlock(&tasks_threads_lock);
4086 } else {
316670eb
A
4087 if (!ledger_get_entries(task->ledger,
4088 task_ledgers.tkm_private, &credit, &debit)) {
4089 tkm_info->total_palloc = credit;
4090 tkm_info->total_pfree = debit;
4091 }
4092 if (!ledger_get_entries(task->ledger,
4093 task_ledgers.tkm_shared, &credit, &debit)) {
4094 tkm_info->total_salloc = credit;
4095 tkm_info->total_sfree = debit;
6d2010ae
A
4096 }
4097 task_unlock(task);
4098 }
4099
4100 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
4101 return KERN_SUCCESS;
4102 }
4103
91447636
A
4104 /* OBSOLETE */
4105 case TASK_SCHED_FIFO_INFO:
4106 {
1c79356b 4107
b0d623f7
A
4108 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
4109 error = KERN_INVALID_ARGUMENT;
4110 break;
4111 }
1c79356b 4112
b0d623f7 4113 error = KERN_INVALID_POLICY;
6d2010ae 4114 break;
91447636 4115 }
1c79356b 4116
91447636
A
4117 /* OBSOLETE */
4118 case TASK_SCHED_RR_INFO:
4119 {
39037602 4120 policy_rr_base_t rr_base;
6d2010ae
A
4121 uint32_t quantum_time;
4122 uint64_t quantum_ns;
1c79356b 4123
b0d623f7
A
4124 if (*task_info_count < POLICY_RR_BASE_COUNT) {
4125 error = KERN_INVALID_ARGUMENT;
4126 break;
4127 }
1c79356b
A
4128
4129 rr_base = (policy_rr_base_t) task_info_out;
4130
0b4e3aa0 4131 if (task != kernel_task) {
b0d623f7
A
4132 error = KERN_INVALID_POLICY;
4133 break;
1c79356b
A
4134 }
4135
4136 rr_base->base_priority = task->priority;
1c79356b 4137
6d2010ae
A
4138 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
4139 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
4140
4141 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
1c79356b
A
4142
4143 *task_info_count = POLICY_RR_BASE_COUNT;
4144 break;
91447636 4145 }
1c79356b 4146
91447636
A
4147 /* OBSOLETE */
4148 case TASK_SCHED_TIMESHARE_INFO:
4149 {
39037602 4150 policy_timeshare_base_t ts_base;
1c79356b 4151
b0d623f7
A
4152 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
4153 error = KERN_INVALID_ARGUMENT;
4154 break;
4155 }
1c79356b
A
4156
4157 ts_base = (policy_timeshare_base_t) task_info_out;
4158
0b4e3aa0 4159 if (task == kernel_task) {
b0d623f7
A
4160 error = KERN_INVALID_POLICY;
4161 break;
1c79356b
A
4162 }
4163
4164 ts_base->base_priority = task->priority;
1c79356b
A
4165
4166 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4167 break;
91447636 4168 }
1c79356b 4169
91447636
A
4170 case TASK_SECURITY_TOKEN:
4171 {
39037602 4172 security_token_t *sec_token_p;
1c79356b 4173
b0d623f7
A
4174 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4175 error = KERN_INVALID_ARGUMENT;
4176 break;
4177 }
1c79356b
A
4178
4179 sec_token_p = (security_token_t *) task_info_out;
4180
1c79356b 4181 *sec_token_p = task->sec_token;
1c79356b
A
4182
4183 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
91447636
A
4184 break;
4185 }
1c79356b 4186
91447636
A
4187 case TASK_AUDIT_TOKEN:
4188 {
39037602 4189 audit_token_t *audit_token_p;
55e303ae 4190
b0d623f7
A
4191 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4192 error = KERN_INVALID_ARGUMENT;
4193 break;
4194 }
55e303ae
A
4195
4196 audit_token_p = (audit_token_t *) task_info_out;
4197
55e303ae 4198 *audit_token_p = task->audit_token;
55e303ae
A
4199
4200 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
91447636
A
4201 break;
4202 }
55e303ae 4203
91447636 4204 case TASK_SCHED_INFO:
b0d623f7 4205 error = KERN_INVALID_ARGUMENT;
6d2010ae 4206 break;
1c79356b 4207
91447636
A
4208 case TASK_EVENTS_INFO:
4209 {
39037602
A
4210 task_events_info_t events_info;
4211 thread_t thread;
1c79356b 4212
b0d623f7
A
4213 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4214 error = KERN_INVALID_ARGUMENT;
4215 break;
4216 }
1c79356b
A
4217
4218 events_info = (task_events_info_t) task_info_out;
4219
2d21ac55 4220
1c79356b
A
4221 events_info->faults = task->faults;
4222 events_info->pageins = task->pageins;
4223 events_info->cow_faults = task->cow_faults;
4224 events_info->messages_sent = task->messages_sent;
4225 events_info->messages_received = task->messages_received;
4226 events_info->syscalls_mach = task->syscalls_mach;
4227 events_info->syscalls_unix = task->syscalls_unix;
2d21ac55
A
4228
4229 events_info->csw = task->c_switch;
4230
4231 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6d2010ae
A
4232 events_info->csw += thread->c_switch;
4233 events_info->syscalls_mach += thread->syscalls_mach;
4234 events_info->syscalls_unix += thread->syscalls_unix;
2d21ac55
A
4235 }
4236
1c79356b
A
4237
4238 *task_info_count = TASK_EVENTS_INFO_COUNT;
4239 break;
91447636 4240 }
2d21ac55
A
4241 case TASK_AFFINITY_TAG_INFO:
4242 {
b0d623f7
A
4243 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4244 error = KERN_INVALID_ARGUMENT;
4245 break;
4246 }
2d21ac55 4247
b0d623f7 4248 error = task_affinity_info(task, task_info_out, task_info_count);
6d2010ae 4249 break;
2d21ac55 4250 }
4b17d6b6
A
4251 case TASK_POWER_INFO:
4252 {
4b17d6b6
A
4253 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4254 error = KERN_INVALID_ARGUMENT;
4255 break;
4256 }
4257
39037602 4258 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL);
fe8ab488
A
4259 break;
4260 }
4261
4262 case TASK_POWER_INFO_V2:
4263 {
5ba3f43e 4264 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
fe8ab488
A
4265 error = KERN_INVALID_ARGUMENT;
4266 break;
4267 }
4268 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
5ba3f43e 4269 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2);
39236c6e
A
4270 break;
4271 }
4b17d6b6 4272
39236c6e
A
4273 case TASK_VM_INFO:
4274 case TASK_VM_INFO_PURGEABLE:
4275 {
4276 task_vm_info_t vm_info;
4277 vm_map_t map;
4b17d6b6 4278
3e170ce0 4279 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
39236c6e
A
4280 error = KERN_INVALID_ARGUMENT;
4281 break;
4282 }
4b17d6b6 4283
39236c6e 4284 vm_info = (task_vm_info_t)task_info_out;
4b17d6b6 4285
39236c6e
A
4286 if (task == kernel_task) {
4287 map = kernel_map;
4288 /* no lock */
4289 } else {
4290 map = task->map;
4291 vm_map_lock_read(map);
4292 }
4b17d6b6 4293
39236c6e
A
4294 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
4295 vm_info->region_count = map->hdr.nentries;
4296 vm_info->page_size = vm_map_page_size(map);
4297
4298 vm_info->resident_size = pmap_resident_count(map->pmap);
4299 vm_info->resident_size *= PAGE_SIZE;
4300 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
4301 vm_info->resident_size_peak *= PAGE_SIZE;
4302
4303#define _VM_INFO(_name) \
4304 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
4305
4306 _VM_INFO(device);
4307 _VM_INFO(device_peak);
4308 _VM_INFO(external);
4309 _VM_INFO(external_peak);
4310 _VM_INFO(internal);
4311 _VM_INFO(internal_peak);
4312 _VM_INFO(reusable);
4313 _VM_INFO(reusable_peak);
4314 _VM_INFO(compressed);
4315 _VM_INFO(compressed_peak);
4316 _VM_INFO(compressed_lifetime);
4317
4318 vm_info->purgeable_volatile_pmap = 0;
4319 vm_info->purgeable_volatile_resident = 0;
4320 vm_info->purgeable_volatile_virtual = 0;
4321 if (task == kernel_task) {
4322 /*
4323 * We do not maintain the detailed stats for the
4324 * kernel_pmap, so just count everything as
4325 * "internal"...
4326 */
4327 vm_info->internal = vm_info->resident_size;
4328 /*
4329 * ... but since the memory held by the VM compressor
4330 * in the kernel address space ought to be attributed
4331 * to user-space tasks, we subtract it from "internal"
4332 * to give memory reporting tools a more accurate idea
4333 * of what the kernel itself is actually using, instead
4334 * of making it look like the kernel is leaking memory
4335 * when the system is under memory pressure.
4336 */
4337 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
4338 PAGE_SIZE);
4339 } else {
4340 mach_vm_size_t volatile_virtual_size;
4341 mach_vm_size_t volatile_resident_size;
3e170ce0 4342 mach_vm_size_t volatile_compressed_size;
39236c6e 4343 mach_vm_size_t volatile_pmap_size;
3e170ce0 4344 mach_vm_size_t volatile_compressed_pmap_size;
39236c6e
A
4345 kern_return_t kr;
4346
4347 if (flavor == TASK_VM_INFO_PURGEABLE) {
4348 kr = vm_map_query_volatile(
4349 map,
4350 &volatile_virtual_size,
4351 &volatile_resident_size,
3e170ce0
A
4352 &volatile_compressed_size,
4353 &volatile_pmap_size,
4354 &volatile_compressed_pmap_size);
39236c6e
A
4355 if (kr == KERN_SUCCESS) {
4356 vm_info->purgeable_volatile_pmap =
4357 volatile_pmap_size;
3e170ce0
A
4358 if (radar_20146450) {
4359 vm_info->compressed -=
4360 volatile_compressed_pmap_size;
4361 }
39236c6e
A
4362 vm_info->purgeable_volatile_resident =
4363 volatile_resident_size;
4364 vm_info->purgeable_volatile_virtual =
4365 volatile_virtual_size;
4366 }
4b17d6b6 4367 }
4b17d6b6 4368 }
39037602 4369 *task_info_count = TASK_VM_INFO_REV0_COUNT;
39236c6e 4370
39037602
A
4371 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
4372 vm_info->phys_footprint =
4373 (mach_vm_size_t) get_task_phys_footprint(task);
4374 *task_info_count = TASK_VM_INFO_REV1_COUNT;
4375 }
4376 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
4377 vm_info->min_address = map->min_offset;
4378 vm_info->max_address = map->max_offset;
4379 *task_info_count = TASK_VM_INFO_REV2_COUNT;
4380 }
4381
4382 if (task != kernel_task) {
4383 vm_map_unlock_read(map);
3e170ce0
A
4384 }
4385
4b17d6b6
A
4386 break;
4387 }
4388
fe8ab488
A
4389 case TASK_WAIT_STATE_INFO:
4390 {
4391 /*
4392 * Deprecated flavor. Currently allowing some results until all users
4393 * stop calling it. The results may not be accurate.
4394 */
4395 task_wait_state_info_t wait_state_info;
4396 uint64_t total_sfi_ledger_val = 0;
4397
4398 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
4399 error = KERN_INVALID_ARGUMENT;
4400 break;
4401 }
4402
4403 wait_state_info = (task_wait_state_info_t) task_info_out;
4404
4405 wait_state_info->total_wait_state_time = 0;
4406 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
4407
3e170ce0 4408#if CONFIG_SCHED_SFI
fe8ab488
A
4409 int i, prev_lentry = -1;
4410 int64_t val_credit, val_debit;
4411
4412 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
4413 val_credit =0;
4414 /*
4415 * checking with prev_lentry != entry ensures adjacent classes
4416 * which share the same ledger do not add wait times twice.
4417 * Note: Use ledger() call to get data for each individual sfi class.
4418 */
4419 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
4420 KERN_SUCCESS == ledger_get_entries(task->ledger,
4421 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
4422 total_sfi_ledger_val += val_credit;
4423 }
4424 prev_lentry = task_ledgers.sfi_wait_times[i];
4425 }
4426
3e170ce0 4427#endif /* CONFIG_SCHED_SFI */
fe8ab488
A
4428 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
4429 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
4430
4431 break;
4432 }
3e170ce0
A
4433 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
4434 {
4435#if DEVELOPMENT || DEBUG
4436 pvm_account_info_t acnt_info;
4437
4438 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
4439 error = KERN_INVALID_ARGUMENT;
4440 break;
4441 }
fe8ab488 4442
3e170ce0
A
4443 if (task_info_out == NULL) {
4444 error = KERN_INVALID_ARGUMENT;
4445 break;
4446 }
4447
4448 acnt_info = (pvm_account_info_t) task_info_out;
4449
4450 error = vm_purgeable_account(task, acnt_info);
4451
4452 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
4453
4454 break;
4455#else /* DEVELOPMENT || DEBUG */
4456 error = KERN_NOT_SUPPORTED;
4457 break;
4458#endif /* DEVELOPMENT || DEBUG */
4459 }
4460 case TASK_FLAGS_INFO:
4461 {
4462 task_flags_info_t flags_info;
4463
4464 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
4465 error = KERN_INVALID_ARGUMENT;
4466 break;
4467 }
4468
4469 flags_info = (task_flags_info_t)task_info_out;
4470
4471 /* only publish the 64-bit flag of the task */
d9a64523 4472 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
3e170ce0
A
4473
4474 *task_info_count = TASK_FLAGS_INFO_COUNT;
4475 break;
4476 }
4477
4478 case TASK_DEBUG_INFO_INTERNAL:
4479 {
4480#if DEVELOPMENT || DEBUG
4481 task_debug_info_internal_t dbg_info;
4482 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
4483 error = KERN_NOT_SUPPORTED;
4484 break;
4485 }
4486
4487 if (task_info_out == NULL) {
4488 error = KERN_INVALID_ARGUMENT;
4489 break;
4490 }
4491 dbg_info = (task_debug_info_internal_t) task_info_out;
4492 dbg_info->ipc_space_size = 0;
4493 if (task->itk_space){
4494 dbg_info->ipc_space_size = task->itk_space->is_table_size;
4495 }
a39ff7e2
A
4496
4497 dbg_info->suspend_count = task->suspend_count;
3e170ce0
A
4498
4499 error = KERN_SUCCESS;
4500 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
4501 break;
4502#else /* DEVELOPMENT || DEBUG */
4503 error = KERN_NOT_SUPPORTED;
4504 break;
4505#endif /* DEVELOPMENT || DEBUG */
4506 }
91447636 4507 default:
b0d623f7 4508 error = KERN_INVALID_ARGUMENT;
1c79356b
A
4509 }
4510
b0d623f7
A
4511 task_unlock(task);
4512 return (error);
1c79356b
A
4513}
4514
5ba3f43e
A
4515/*
4516 * task_info_from_user
4517 *
4518 * When calling task_info from user space,
4519 * this function will be executed as mig server side
4520 * instead of calling directly into task_info.
4521 * This gives the possibility to perform more security
4522 * checks on task_port.
4523 *
4524 * In the case of TASK_DYLD_INFO, we require the more
4525 * privileged task_port not the less-privileged task_name_port.
4526 *
4527 */
4528kern_return_t
4529task_info_from_user(
4530 mach_port_t task_port,
4531 task_flavor_t flavor,
4532 task_info_t task_info_out,
4533 mach_msg_type_number_t *task_info_count)
4534{
4535 task_t task;
4536 kern_return_t ret;
4537
4538 if (flavor == TASK_DYLD_INFO)
4539 task = convert_port_to_task(task_port);
4540 else
4541 task = convert_port_to_task_name(task_port);
4542
4543 ret = task_info(task, flavor, task_info_out, task_info_count);
4544
4545 task_deallocate(task);
4546
4547 return ret;
4548}
4549
39236c6e
A
4550/*
4551 * task_power_info
4552 *
4553 * Returns power stats for the task.
4554 * Note: Called with task locked.
4555 */
4556void
4557task_power_info_locked(
4558 task_t task,
fe8ab488 4559 task_power_info_t info,
39037602 4560 gpu_energy_data_t ginfo,
5ba3f43e 4561 task_power_info_v2_t infov2)
39236c6e
A
4562{
4563 thread_t thread;
4564 ledger_amount_t tmp;
4565
4566 task_lock_assert_owned(task);
4567
4568 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
4569 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
4570 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
4571 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
4572
4573 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
4574 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
4575
4576 info->total_user = task->total_user_time;
4577 info->total_system = task->total_system_time;
4578
5ba3f43e
A
4579#if CONFIG_EMBEDDED
4580 if (infov2) {
4581 infov2->task_energy = task->task_energy;
39037602 4582 }
5ba3f43e 4583#endif
39037602 4584
fe8ab488
A
4585 if (ginfo) {
4586 ginfo->task_gpu_utilisation = task->task_gpu_ns;
4587 }
4588
5ba3f43e
A
4589 if (infov2) {
4590 infov2->task_ptime = task->total_ptime;
4591 infov2->task_pset_switches = task->ps_switch;
4592 }
4593
39236c6e
A
4594 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4595 uint64_t tval;
4596 spl_t x;
4597
4598 if (thread->options & TH_OPT_IDLE_THREAD)
4599 continue;
4600
4601 x = splsched();
4602 thread_lock(thread);
4603
4604 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
4605 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
4606
5ba3f43e
A
4607#if CONFIG_EMBEDDED
4608 if (infov2) {
4609 infov2->task_energy += ml_energy_stat(thread);
39037602 4610 }
5ba3f43e 4611#endif
39037602 4612
39236c6e
A
4613 tval = timer_grab(&thread->user_timer);
4614 info->total_user += tval;
4615
5ba3f43e
A
4616 if (infov2) {
4617 tval = timer_grab(&thread->ptime);
4618 infov2->task_ptime += tval;
4619 infov2->task_pset_switches += thread->ps_switch;
4620 }
4621
39236c6e
A
4622 tval = timer_grab(&thread->system_timer);
4623 if (thread->precise_user_kernel_time) {
4624 info->total_system += tval;
4625 } else {
4626 /* system_timer may represent either sys or user */
4627 info->total_user += tval;
4628 }
4629
fe8ab488
A
4630 if (ginfo) {
4631 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
4632 }
4633 thread_unlock(thread);
4634 splx(x);
4635 }
4636}
4637
4638/*
4639 * task_gpu_utilisation
4640 *
4641 * Returns the total gpu time used by the all the threads of the task
4642 * (both dead and alive)
4643 */
4644uint64_t
4645task_gpu_utilisation(
4646 task_t task)
4647{
4648 uint64_t gpu_time = 0;
5ba3f43e 4649#if !CONFIG_EMBEDDED
fe8ab488
A
4650 thread_t thread;
4651
4652 task_lock(task);
4653 gpu_time += task->task_gpu_ns;
4654
4655 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4656 spl_t x;
4657 x = splsched();
4658 thread_lock(thread);
4659 gpu_time += ml_gpu_stat(thread);
39236c6e
A
4660 thread_unlock(thread);
4661 splx(x);
4662 }
fe8ab488
A
4663
4664 task_unlock(task);
5ba3f43e
A
4665#else /* CONFIG_EMBEDDED */
4666 /* silence compiler warning */
4667 (void)task;
4668#endif /* !CONFIG_EMBEDDED */
fe8ab488 4669 return gpu_time;
39236c6e
A
4670}
4671
39037602
A
4672/*
4673 * task_energy
4674 *
4675 * Returns the total energy used by the all the threads of the task
4676 * (both dead and alive)
4677 */
4678uint64_t
4679task_energy(
4680 task_t task)
4681{
4682 uint64_t energy = 0;
4683 thread_t thread;
4684
4685 task_lock(task);
4686 energy += task->task_energy;
4687
4688 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4689 spl_t x;
4690 x = splsched();
4691 thread_lock(thread);
4692 energy += ml_energy_stat(thread);
4693 thread_unlock(thread);
4694 splx(x);
4695 }
4696
4697 task_unlock(task);
4698 return energy;
4699}
4700
5ba3f43e
A
4701
4702uint64_t
4703task_cpu_ptime(
4704 __unused task_t task)
4705{
4706 return 0;
4707}
4708
4709
a39ff7e2
A
4710/* This function updates the cpu time in the arrays for each
4711 * effective and requested QoS class
4712 */
4713void
4714task_update_cpu_time_qos_stats(
4715 task_t task,
4716 uint64_t *eqos_stats,
4717 uint64_t *rqos_stats)
4718{
4719 if (!eqos_stats && !rqos_stats) {
4720 return;
4721 }
4722
4723 task_lock(task);
4724 thread_t thread;
4725 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4726 if (thread->options & TH_OPT_IDLE_THREAD) {
4727 continue;
4728 }
4729
4730 thread_update_qos_cpu_time(thread);
4731 }
4732
4733 if (eqos_stats) {
4734 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
4735 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
4736 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
4737 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
4738 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
4739 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
4740 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
4741 }
4742
4743 if (rqos_stats) {
4744 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
4745 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
4746 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
4747 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
4748 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
4749 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
4750 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
4751 }
4752
4753 task_unlock(task);
4754}
4755
39236c6e
A
4756kern_return_t
4757task_purgable_info(
4758 task_t task,
4759 task_purgable_info_t *stats)
4760{
4761 if (task == TASK_NULL || stats == NULL)
4762 return KERN_INVALID_ARGUMENT;
4763 /* Take task reference */
4764 task_reference(task);
4765 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
4766 /* Drop task reference */
4767 task_deallocate(task);
4768 return KERN_SUCCESS;
4769}
4770
2d21ac55
A
4771void
4772task_vtimer_set(
4773 task_t task,
4774 integer_t which)
4775{
4776 thread_t thread;
316670eb 4777 spl_t x;
2d21ac55 4778
2d21ac55
A
4779 task_lock(task);
4780
4781 task->vtimers |= which;
4782
4783 switch (which) {
4784
4785 case TASK_VTIMER_USER:
4786 queue_iterate(&task->threads, thread, thread_t, task_threads) {
316670eb
A
4787 x = splsched();
4788 thread_lock(thread);
4789 if (thread->precise_user_kernel_time)
4790 thread->vtimer_user_save = timer_grab(&thread->user_timer);
4791 else
4792 thread->vtimer_user_save = timer_grab(&thread->system_timer);
4793 thread_unlock(thread);
4794 splx(x);
2d21ac55
A
4795 }
4796 break;
4797
4798 case TASK_VTIMER_PROF:
4799 queue_iterate(&task->threads, thread, thread_t, task_threads) {
316670eb
A
4800 x = splsched();
4801 thread_lock(thread);
2d21ac55
A
4802 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
4803 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
316670eb
A
4804 thread_unlock(thread);
4805 splx(x);
2d21ac55
A
4806 }
4807 break;
4808
4809 case TASK_VTIMER_RLIM:
4810 queue_iterate(&task->threads, thread, thread_t, task_threads) {
316670eb
A
4811 x = splsched();
4812 thread_lock(thread);
2d21ac55
A
4813 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
4814 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
316670eb
A
4815 thread_unlock(thread);
4816 splx(x);
2d21ac55
A
4817 }
4818 break;
4819 }
4820
4821 task_unlock(task);
4822}
4823
4824void
4825task_vtimer_clear(
4826 task_t task,
4827 integer_t which)
4828{
4829 assert(task == current_task());
4830
4831 task_lock(task);
4832
4833 task->vtimers &= ~which;
4834
4835 task_unlock(task);
4836}
4837
4838void
4839task_vtimer_update(
4840__unused
4841 task_t task,
4842 integer_t which,
4843 uint32_t *microsecs)
4844{
4845 thread_t thread = current_thread();
39037602
A
4846 uint32_t tdelt = 0;
4847 clock_sec_t secs = 0;
2d21ac55
A
4848 uint64_t tsum;
4849
4850 assert(task == current_task());
4851
39037602
A
4852 spl_t s = splsched();
4853 thread_lock(thread);
2d21ac55 4854
39037602
A
4855 if ((task->vtimers & which) != (uint32_t)which) {
4856 thread_unlock(thread);
4857 splx(s);
4858 return;
4859 }
2d21ac55
A
4860
4861 switch (which) {
4862
4863 case TASK_VTIMER_USER:
316670eb
A
4864 if (thread->precise_user_kernel_time) {
4865 tdelt = (uint32_t)timer_delta(&thread->user_timer,
4866 &thread->vtimer_user_save);
4867 } else {
4868 tdelt = (uint32_t)timer_delta(&thread->system_timer,
2d21ac55 4869 &thread->vtimer_user_save);
316670eb 4870 }
b0d623f7 4871 absolutetime_to_microtime(tdelt, &secs, microsecs);
2d21ac55
A
4872 break;
4873
4874 case TASK_VTIMER_PROF:
4875 tsum = timer_grab(&thread->user_timer);
4876 tsum += timer_grab(&thread->system_timer);
b0d623f7
A
4877 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
4878 absolutetime_to_microtime(tdelt, &secs, microsecs);
4879 /* if the time delta is smaller than a usec, ignore */
4880 if (*microsecs != 0)
4881 thread->vtimer_prof_save = tsum;
2d21ac55
A
4882 break;
4883
4884 case TASK_VTIMER_RLIM:
4885 tsum = timer_grab(&thread->user_timer);
4886 tsum += timer_grab(&thread->system_timer);
b0d623f7 4887 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
2d21ac55 4888 thread->vtimer_rlim_save = tsum;
b0d623f7 4889 absolutetime_to_microtime(tdelt, &secs, microsecs);
2d21ac55
A
4890 break;
4891 }
4892
39037602
A
4893 thread_unlock(thread);
4894 splx(s);
2d21ac55
A
4895}
4896
1c79356b
A
4897/*
4898 * task_assign:
4899 *
4900 * Change the assigned processor set for the task
4901 */
4902kern_return_t
4903task_assign(
91447636
A
4904 __unused task_t task,
4905 __unused processor_set_t new_pset,
4906 __unused boolean_t assign_threads)
1c79356b 4907{
1c79356b
A
4908 return(KERN_FAILURE);
4909}
4910
4911/*
4912 * task_assign_default:
4913 *
4914 * Version of task_assign to assign to default processor set.
4915 */
4916kern_return_t
4917task_assign_default(
4918 task_t task,
4919 boolean_t assign_threads)
4920{
2d21ac55 4921 return (task_assign(task, &pset0, assign_threads));
1c79356b
A
4922}
4923
4924/*
4925 * task_get_assignment
4926 *
4927 * Return name of processor set that task is assigned to.
4928 */
4929kern_return_t
4930task_get_assignment(
4931 task_t task,
4932 processor_set_t *pset)
4933{
7e41aa88
A
4934 if (!task || !task->active)
4935 return KERN_FAILURE;
1c79356b 4936
2d21ac55
A
4937 *pset = &pset0;
4938
7e41aa88 4939 return KERN_SUCCESS;
1c79356b
A
4940}
4941
3e170ce0
A
4942uint64_t
4943get_task_dispatchqueue_offset(
4944 task_t task)
4945{
4946 return task->dispatchqueue_offset;
4947}
1c79356b
A
4948
4949/*
4950 * task_policy
4951 *
4952 * Set scheduling policy and parameters, both base and limit, for
4953 * the given task. Policy must be a policy which is enabled for the
4954 * processor set. Change contained threads if requested.
4955 */
4956kern_return_t
4957task_policy(
91447636
A
4958 __unused task_t task,
4959 __unused policy_t policy_id,
4960 __unused policy_base_t base,
4961 __unused mach_msg_type_number_t count,
4962 __unused boolean_t set_limit,
4963 __unused boolean_t change)
1c79356b
A
4964{
4965 return(KERN_FAILURE);
4966}
4967
4968/*
4969 * task_set_policy
4970 *
4971 * Set scheduling policy and parameters, both base and limit, for
4972 * the given task. Policy can be any policy implemented by the
4973 * processor set, whether enabled or not. Change contained threads
4974 * if requested.
4975 */
4976kern_return_t
4977task_set_policy(
91447636
A
4978 __unused task_t task,
4979 __unused processor_set_t pset,
4980 __unused policy_t policy_id,
4981 __unused policy_base_t base,
4982 __unused mach_msg_type_number_t base_count,
4983 __unused policy_limit_t limit,
4984 __unused mach_msg_type_number_t limit_count,
4985 __unused boolean_t change)
1c79356b
A
4986{
4987 return(KERN_FAILURE);
4988}
4989
91447636
A
4990kern_return_t
4991task_set_ras_pc(
4992 __unused task_t task,
4993 __unused vm_offset_t pc,
4994 __unused vm_offset_t endpc)
4995{
1c79356b 4996 return KERN_FAILURE;
1c79356b
A
4997}
4998
4999void
5000task_synchronizer_destroy_all(task_t task)
5001{
1c79356b
A
5002 /*
5003 * Destroy owned semaphores
5004 */
4bd07ac2 5005 semaphore_destroy_all(task);
1c79356b
A
5006}
5007
b0d623f7
A
5008/*
5009 * Install default (machine-dependent) initial thread state
5010 * on the task. Subsequent thread creation will have this initial
5011 * state set on the thread by machine_thread_inherit_taskwide().
5012 * Flavors and structures are exactly the same as those to thread_set_state()
5013 */
5014kern_return_t
5015task_set_state(
5016 task_t task,
5017 int flavor,
5018 thread_state_t state,
5019 mach_msg_type_number_t state_count)
5020{
5021 kern_return_t ret;
5022
5023 if (task == TASK_NULL) {
5024 return (KERN_INVALID_ARGUMENT);
5025 }
5026
5027 task_lock(task);
5028
5029 if (!task->active) {
5030 task_unlock(task);
5031 return (KERN_FAILURE);
5032 }
5033
5034 ret = machine_task_set_state(task, flavor, state, state_count);
5035
5036 task_unlock(task);
5037 return ret;
5038}
5039
5040/*
5041 * Examine the default (machine-dependent) initial thread state
5042 * on the task, as set by task_set_state(). Flavors and structures
5043 * are exactly the same as those passed to thread_get_state().
5044 */
5045kern_return_t
5046task_get_state(
5047 task_t task,
5048 int flavor,
5049 thread_state_t state,
5050 mach_msg_type_number_t *state_count)
5051{
5052 kern_return_t ret;
5053
5054 if (task == TASK_NULL) {
5055 return (KERN_INVALID_ARGUMENT);
5056 }
5057
5058 task_lock(task);
5059
5060 if (!task->active) {
5061 task_unlock(task);
5062 return (KERN_FAILURE);
5063 }
5064
5065 ret = machine_task_get_state(task, flavor, state, state_count);
5066
5067 task_unlock(task);
5068 return ret;
5069}
5070
5ba3f43e
A
5071
5072static kern_return_t __attribute__((noinline,not_tail_called))
5073PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
5074 mach_exception_code_t code,
5075 mach_exception_subcode_t subcode,
5076 void *reason)
5077{
5078#ifdef MACH_BSD
5079 if (1 == proc_selfpid())
5080 return KERN_NOT_SUPPORTED; // initproc is immune
5081#endif
5082 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
5083 [0] = code,
5084 [1] = subcode,
5085 };
5086 task_t task = current_task();
5087 kern_return_t kr;
5088
5089 /* (See jetsam-related comments below) */
5090
5091 proc_memstat_terminated(task->bsd_info, TRUE);
5092 kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
5093 proc_memstat_terminated(task->bsd_info, FALSE);
5094 return kr;
5095}
5096
5ba3f43e
A
5097kern_return_t
5098task_violated_guard(
5099 mach_exception_code_t code,
5100 mach_exception_subcode_t subcode,
5101 void *reason)
5102{
5103 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
5104}
5105
5106
39037602 5107#if CONFIG_MEMORYSTATUS
813fb2f6
A
5108
5109boolean_t
5110task_get_memlimit_is_active(task_t task)
5111{
5112 assert (task != NULL);
5113
5ba3f43e
A
5114 if (task->memlimit_is_active == 1) {
5115 return(TRUE);
5116 } else {
5117 return (FALSE);
5118 }
813fb2f6
A
5119}
5120
5121void
5122task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
5123{
5124 assert (task != NULL);
5125
5ba3f43e
A
5126 if (memlimit_is_active) {
5127 task->memlimit_is_active = 1;
5128 } else {
5129 task->memlimit_is_active = 0;
5130 }
813fb2f6
A
5131}
5132
5133boolean_t
5134task_get_memlimit_is_fatal(task_t task)
5135{
5136 assert(task != NULL);
5137
5ba3f43e
A
5138 if (task->memlimit_is_fatal == 1) {
5139 return(TRUE);
5140 } else {
5141 return(FALSE);
5142 }
813fb2f6
A
5143}
5144
5145void
5146task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
5147{
5148 assert (task != NULL);
5149
5ba3f43e
A
5150 if (memlimit_is_fatal) {
5151 task->memlimit_is_fatal = 1;
5152 } else {
5153 task->memlimit_is_fatal = 0;
5154 }
813fb2f6
A
5155}
5156
5157boolean_t
5158task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5159{
5160 boolean_t triggered = FALSE;
5161
5162 assert(task == current_task());
5163
5164 /*
5165 * Returns true, if task has already triggered an exc_resource exception.
5166 */
5167
5168 if (memlimit_is_active) {
5169 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
5170 } else {
5171 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
5172 }
5173
5174 return(triggered);
5175}
5176
5177void
5178task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5179{
5180 assert(task == current_task());
5181
5182 /*
5183 * We allow one exc_resource per process per active/inactive limit.
5184 * The limit's fatal attribute does not come into play.
5185 */
5186
5187 if (memlimit_is_active) {
5188 task->memlimit_active_exc_resource = 1;
5189 } else {
5190 task->memlimit_inactive_exc_resource = 1;
5191 }
5192}
5193
39236c6e
A
5194#define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
5195
5196void __attribute__((noinline))
39037602 5197PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
39236c6e
A
5198{
5199 task_t task = current_task();
5200 int pid = 0;
3e170ce0 5201 const char *procname = "unknown";
39236c6e 5202 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
d9a64523 5203 boolean_t send_sync_exc_resource = FALSE;
39236c6e
A
5204
5205#ifdef MACH_BSD
5206 pid = proc_selfpid();
fe8ab488
A
5207
5208 if (pid == 1) {
5209 /*
5210 * Cannot have ReportCrash analyzing
5211 * a suspended initproc.
5212 */
5213 return;
5214 }
5215
d9a64523 5216 if (task->bsd_info != NULL) {
39236c6e 5217 procname = proc_name_address(current_task()->bsd_info);
d9a64523
A
5218 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
5219 }
39236c6e 5220#endif
39037602 5221#if CONFIG_COREDUMP
39236c6e
A
5222 if (hwm_user_cores) {
5223 int error;
5224 uint64_t starttime, end;
5225 clock_sec_t secs = 0;
5226 uint32_t microsecs = 0;
5227
5228 starttime = mach_absolute_time();
5229 /*
5230 * Trigger a coredump of this process. Don't proceed unless we know we won't
5231 * be filling up the disk; and ignore the core size resource limit for this
5232 * core file.
5233 */
3e170ce0 5234 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
39236c6e
A
5235 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
5236 }
5237 /*
5238 * coredump() leaves the task suspended.
5239 */
5240 task_resume_internal(current_task());
5241
5242 end = mach_absolute_time();
5243 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
5244 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
5245 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
5246 }
39037602 5247#endif /* CONFIG_COREDUMP */
39236c6e
A
5248
5249 if (disable_exc_resource) {
5250 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5251 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
5252 return;
5253 }
5254
3e170ce0
A
5255 /*
5256 * A task that has triggered an EXC_RESOURCE, should not be
5257 * jetsammed when the device is under memory pressure. Here
5258 * we set the P_MEMSTAT_TERMINATED flag so that the process
5259 * will be skipped if the memorystatus_thread wakes up.
5260 */
5261 proc_memstat_terminated(current_task()->bsd_info, TRUE);
5262
39236c6e
A
5263 code[0] = code[1] = 0;
5264 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
5265 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
5266 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
3e170ce0 5267
d9a64523
A
5268 /*
5269 * Do not generate a corpse fork if the violation is a fatal one
5270 * or the process wants synchronous EXC_RESOURCE exceptions.
5271 */
5272 if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) {
5273 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
5274 if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) {
39037602
A
5275 /*
5276 * Use the _internal_ variant so that no user-space
5277 * process can resume our task from under us.
5278 */
5279 task_suspend_internal(task);
5280 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5281 task_resume_internal(task);
5282 }
5283 } else {
5c9f4661
A
5284 if (audio_active) {
5285 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5286 "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
5287 } else {
5288 task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
5ba3f43e 5289 code, EXCEPTION_CODE_MAX, NULL);
5c9f4661 5290 }
39037602 5291 }
3e170ce0
A
5292
5293 /*
5294 * After the EXC_RESOURCE has been handled, we must clear the
5295 * P_MEMSTAT_TERMINATED flag so that the process can again be
5296 * considered for jetsam if the memorystatus_thread wakes up.
5297 */
5298 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
39236c6e
A
5299}
5300
5301/*
5302 * Callback invoked when a task exceeds its physical footprint limit.
5303 */
5304void
5305task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5306{
fe8ab488 5307 ledger_amount_t max_footprint, max_footprint_mb;
fe8ab488 5308 task_t task;
813fb2f6
A
5309 boolean_t is_warning;
5310 boolean_t memlimit_is_active;
5311 boolean_t memlimit_is_fatal;
39236c6e
A
5312
5313 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
5314 /*
5315 * Task memory limits only provide a warning on the way up.
5316 */
5317 return;
813fb2f6
A
5318 } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5319 /*
5320 * This task is in danger of violating a memory limit,
5321 * It has exceeded a percentage level of the limit.
5322 */
5323 is_warning = TRUE;
5324 } else {
5325 /*
5326 * The task has exceeded the physical footprint limit.
5327 * This is not a warning but a true limit violation.
5328 */
5329 is_warning = FALSE;
5330 }
39236c6e 5331
fe8ab488
A
5332 task = current_task();
5333
5334 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
5335 max_footprint_mb = max_footprint >> 20;
5336
813fb2f6
A
5337 memlimit_is_active = task_get_memlimit_is_active(task);
5338 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
39236c6e
A
5339
5340 /*
813fb2f6
A
5341 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
5342 * We only generate the exception once per process per memlimit (active/inactive limit).
5343 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
5344 * and we disable it by marking that memlimit as exception triggered.
39236c6e 5345 */
813fb2f6
A
5346 if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
5347 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
5348 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
5349 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
39236c6e
A
5350 }
5351
813fb2f6 5352 memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
39236c6e
A
5353}
5354
5355extern int proc_check_footprint_priv(void);
5356
5357kern_return_t
5358task_set_phys_footprint_limit(
5359 task_t task,
5360 int new_limit_mb,
5361 int *old_limit_mb)
5362{
5363 kern_return_t error;
5364
813fb2f6
A
5365 boolean_t memlimit_is_active;
5366 boolean_t memlimit_is_fatal;
5367
39236c6e
A
5368 if ((error = proc_check_footprint_priv())) {
5369 return (KERN_NO_ACCESS);
5370 }
5371
813fb2f6
A
5372 /*
5373 * This call should probably be obsoleted.
5374 * But for now, we default to current state.
5375 */
5376 memlimit_is_active = task_get_memlimit_is_active(task);
5377 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5378
5379 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
39236c6e
A
5380}
5381
3e170ce0
A
5382kern_return_t
5383task_convert_phys_footprint_limit(
5384 int limit_mb,
5385 int *converted_limit_mb)
5386{
5387 if (limit_mb == -1) {
5388 /*
5389 * No limit
5390 */
5391 if (max_task_footprint != 0) {
5392 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
5393 } else {
5394 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
5395 }
5396 } else {
5397 /* nothing to convert */
5398 *converted_limit_mb = limit_mb;
5399 }
5400 return (KERN_SUCCESS);
5401}
5402
5403
39236c6e
A
5404kern_return_t
5405task_set_phys_footprint_limit_internal(
5406 task_t task,
5407 int new_limit_mb,
5408 int *old_limit_mb,
813fb2f6
A
5409 boolean_t memlimit_is_active,
5410 boolean_t memlimit_is_fatal)
39236c6e
A
5411{
5412 ledger_amount_t old;
5413
5414 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
5ba3f43e
A
5415
5416 /*
5417 * Check that limit >> 20 will not give an "unexpected" 32-bit
5418 * result. There are, however, implicit assumptions that -1 mb limit
5419 * equates to LEDGER_LIMIT_INFINITY.
5420 */
5421 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
39236c6e
A
5422
5423 if (old_limit_mb) {
3e170ce0 5424 *old_limit_mb = (int)(old >> 20);
39236c6e
A
5425 }
5426
5427 if (new_limit_mb == -1) {
5428 /*
5429 * Caller wishes to remove the limit.
5430 */
5431 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5432 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
39037602 5433 max_task_footprint ? max_task_footprint_warning_level : 0);
813fb2f6 5434
5ba3f43e 5435 task_lock(task);
813fb2f6
A
5436 task_set_memlimit_is_active(task, memlimit_is_active);
5437 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5ba3f43e 5438 task_unlock(task);
813fb2f6 5439
39236c6e
A
5440 return (KERN_SUCCESS);
5441 }
5442
5443#ifdef CONFIG_NOMONITORS
5444 return (KERN_SUCCESS);
5445#endif /* CONFIG_NOMONITORS */
5446
5447 task_lock(task);
5448
5ba3f43e
A
5449 if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
5450 (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
5451 (((ledger_amount_t)new_limit_mb << 20) == old)) {
5452 /*
5453 * memlimit state is not changing
5454 */
5455 task_unlock(task);
5456 return(KERN_SUCCESS);
5457 }
5458
813fb2f6
A
5459 task_set_memlimit_is_active(task, memlimit_is_active);
5460 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
39236c6e
A
5461
5462 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5463 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
5464
3e170ce0 5465 if (task == current_task()) {
5c9f4661
A
5466 ledger_check_new_balance(current_thread(), task->ledger,
5467 task_ledgers.phys_footprint);
3e170ce0
A
5468 }
5469
39236c6e
A
5470 task_unlock(task);
5471
5472 return (KERN_SUCCESS);
5473}
5474
5475kern_return_t
5476task_get_phys_footprint_limit(
5477 task_t task,
5478 int *limit_mb)
5479{
5480 ledger_amount_t limit;
5481
5482 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
3e170ce0
A
5483 /*
5484 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
5485 * result. There are, however, implicit assumptions that -1 mb limit
5486 * equates to LEDGER_LIMIT_INFINITY.
5487 */
5488 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
5489 *limit_mb = (int)(limit >> 20);
39236c6e
A
5490
5491 return (KERN_SUCCESS);
5492}
39037602 5493#else /* CONFIG_MEMORYSTATUS */
39236c6e
A
5494kern_return_t
5495task_set_phys_footprint_limit(
5496 __unused task_t task,
5497 __unused int new_limit_mb,
5498 __unused int *old_limit_mb)
5499{
5500 return (KERN_FAILURE);
5501}
5502
5503kern_return_t
5504task_get_phys_footprint_limit(
5505 __unused task_t task,
5506 __unused int *limit_mb)
5507{
5508 return (KERN_FAILURE);
5509}
39037602 5510#endif /* CONFIG_MEMORYSTATUS */
b0d623f7 5511
d9a64523
A
5512void
5513task_set_thread_limit(task_t task, uint16_t thread_limit)
5514{
5515 assert(task != kernel_task);
5516 if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
5517 task_lock(task);
5518 task->task_thread_limit = thread_limit;
5519 task_unlock(task);
5520 }
5521}
5522
1c79356b
A
5523/*
5524 * We need to export some functions to other components that
5525 * are currently implemented in macros within the osfmk
5526 * component. Just export them as functions of the same name.
5527 */
5528boolean_t is_kerneltask(task_t t)
5529{
5530 if (t == kernel_task)
55e303ae
A
5531 return (TRUE);
5532
5533 return (FALSE);
1c79356b
A
5534}
5535
39037602 5536boolean_t is_corpsetask(task_t t)
b0d623f7 5537{
39037602 5538 return (task_is_a_corpse(t));
b0d623f7
A
5539}
5540
1c79356b 5541#undef current_task
91447636
A
5542task_t current_task(void);
5543task_t current_task(void)
1c79356b
A
5544{
5545 return (current_task_fast());
5546}
91447636
A
5547
5548#undef task_reference
5549void task_reference(task_t task);
5550void
5551task_reference(
5552 task_t task)
5553{
5554 if (task != TASK_NULL)
5555 task_reference_internal(task);
5556}
2d21ac55 5557
3e170ce0
A
5558/* defined in bsd/kern/kern_prot.c */
5559extern int get_audit_token_pid(audit_token_t *audit_token);
5560
5561int task_pid(task_t task)
5562{
5563 if (task)
5564 return get_audit_token_pid(&task->audit_token);
5565 return -1;
5566}
5567
5568
39037602
A
5569/*
5570 * This routine finds a thread in a task by its unique id
5571 * Returns a referenced thread or THREAD_NULL if the thread was not found
5572 *
5573 * TODO: This is super inefficient - it's an O(threads in task) list walk!
5574 * We should make a tid hash, or transition all tid clients to thread ports
5575 *
5576 * Precondition: No locks held (will take task lock)
6d2010ae
A
5577 */
5578thread_t
5579task_findtid(task_t task, uint64_t tid)
5580{
39037602
A
5581 thread_t self = current_thread();
5582 thread_t found_thread = THREAD_NULL;
5583 thread_t iter_thread = THREAD_NULL;
6d2010ae 5584
39037602
A
5585 /* Short-circuit the lookup if we're looking up ourselves */
5586 if (tid == self->thread_id || tid == TID_NULL) {
5587 assert(self->task == task);
5588
5589 thread_reference(self);
5590
5591 return self;
6d2010ae 5592 }
39037602
A
5593
5594 task_lock(task);
5595
5596 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
5597 if (iter_thread->thread_id == tid) {
5598 found_thread = iter_thread;
5599 thread_reference(found_thread);
5600 break;
5601 }
5602 }
5603
5604 task_unlock(task);
5605
5606 return (found_thread);
6d2010ae
A
5607}
5608
813fb2f6
A
5609int pid_from_task(task_t task)
5610{
5611 int pid = -1;
5612
5613 if (task->bsd_info) {
5614 pid = proc_pid(task->bsd_info);
5615 } else {
5616 pid = task_pid(task);
5617 }
5618
5619 return pid;
5620}
39037602 5621
39236c6e
A
5622/*
5623 * Control the CPU usage monitor for a task.
5624 */
5625kern_return_t
5626task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
5627{
5628 int error = KERN_SUCCESS;
5629
5630 if (*flags & CPUMON_MAKE_FATAL) {
5631 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
5632 } else {
5633 error = KERN_INVALID_ARGUMENT;
5634 }
5635
5636 return error;
5637}
5638
5639/*
5640 * Control the wakeups monitor for a task.
5641 */
5642kern_return_t
5643task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
5644{
5645 ledger_t ledger = task->ledger;
5646
5647 task_lock(task);
5648 if (*flags & WAKEMON_GET_PARAMS) {
5649 ledger_amount_t limit;
5650 uint64_t period;
5651
5652 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
5653 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
5654
5655 if (limit != LEDGER_LIMIT_INFINITY) {
5656 /*
5657 * An active limit means the wakeups monitor is enabled.
5658 */
5659 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
5660 *flags = WAKEMON_ENABLE;
5661 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
5662 *flags |= WAKEMON_MAKE_FATAL;
5663 }
5664 } else {
5665 *flags = WAKEMON_DISABLE;
5666 *rate_hz = -1;
5667 }
5668
5669 /*
5670 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
5671 */
5672 task_unlock(task);
5673 return KERN_SUCCESS;
5674 }
5675
5676 if (*flags & WAKEMON_ENABLE) {
5677 if (*flags & WAKEMON_SET_DEFAULTS) {
5678 *rate_hz = task_wakeups_monitor_rate;
5679 }
5680
5681#ifndef CONFIG_NOMONITORS
5682 if (*flags & WAKEMON_MAKE_FATAL) {
5683 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5684 }
5685#endif /* CONFIG_NOMONITORS */
5686
39037602 5687 if (*rate_hz <= 0) {
39236c6e
A
5688 task_unlock(task);
5689 return KERN_INVALID_ARGUMENT;
5690 }
5691
5692#ifndef CONFIG_NOMONITORS
5693 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
5694 task_wakeups_monitor_ustackshots_trigger_pct);
5695 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
5696 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
5697#endif /* CONFIG_NOMONITORS */
5698 } else if (*flags & WAKEMON_DISABLE) {
5699 /*
5700 * Caller wishes to disable wakeups monitor on the task.
5701 *
5702 * Disable telemetry if it was triggered by the wakeups monitor, and
5703 * remove the limit & callback on the wakeups ledger entry.
5704 */
5705#if CONFIG_TELEMETRY
490019cf 5706 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
39236c6e
A
5707#endif
5708 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
5709 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
5710 }
5711
5712 task_unlock(task);
5713 return KERN_SUCCESS;
5714}
5715
5716void
5717task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5718{
5719 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5720#if CONFIG_TELEMETRY
5721 /*
5722 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
5723 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
5724 */
5725 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
5726#endif
5727 return;
5728 }
5729
5730#if CONFIG_TELEMETRY
5731 /*
5732 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
5733 * exceeded the limit, turn telemetry off for the task.
5734 */
5735 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
5736#endif
5737
5738 if (warning == 0) {
39037602 5739 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
39236c6e
A
5740 }
5741}
5742
5743void __attribute__((noinline))
39037602 5744SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
39236c6e 5745{
39037602
A
5746 task_t task = current_task();
5747 int pid = 0;
5748 const char *procname = "unknown";
5749 boolean_t fatal;
5750 kern_return_t kr;
5751#ifdef EXC_RESOURCE_MONITORS
5752 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5753#endif /* EXC_RESOURCE_MONITORS */
5754 struct ledger_entry_info lei;
39236c6e
A
5755
5756#ifdef MACH_BSD
5757 pid = proc_selfpid();
5758 if (task->bsd_info != NULL)
5759 procname = proc_name_address(current_task()->bsd_info);
5760#endif
5761
5762 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
5763
5764 /*
5765 * Disable the exception notification so we don't overwhelm
5766 * the listener with an endless stream of redundant exceptions.
39037602 5767 * TODO: detect whether another thread is already reporting the violation.
39236c6e
A
5768 */
5769 uint32_t flags = WAKEMON_DISABLE;
5770 task_wakeups_monitor_ctl(task, &flags, NULL);
5771
39037602
A
5772 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5773 trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
5ba3f43e 5774 os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
39037602
A
5775 "over ~%llu seconds, averaging %llu wakes / second and "
5776 "violating a %slimit of %llu wakes over %llu seconds.\n",
5777 procname, pid,
5778 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
5779 lei.lei_last_refill == 0 ? 0 :
5780 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
5781 fatal ? "FATAL " : "",
5782 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
5783
5784 kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
5785 fatal ? kRNFatalLimitFlag : 0);
5786 if (kr) {
5787 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
5788 }
5789
5790#ifdef EXC_RESOURCE_MONITORS
39236c6e
A
5791 if (disable_exc_resource) {
5792 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5793 "supressed by a boot-arg\n", procname, pid);
5794 return;
5795 }
15129b1c 5796 if (audio_active) {
5ba3f43e 5797 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
15129b1c
A
5798 "supressed due to audio playback\n", procname, pid);
5799 return;
5800 }
39037602 5801 if (lei.lei_last_refill == 0) {
5ba3f43e 5802 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
39037602
A
5803 "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
5804 }
39236c6e
A
5805
5806 code[0] = code[1] = 0;
5807 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
5808 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
39037602
A
5809 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
5810 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
5811 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
5812 lei.lei_last_refill);
5813 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
5814 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
39236c6e 5815 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
39037602 5816#endif /* EXC_RESOURCE_MONITORS */
39236c6e 5817
39037602 5818 if (fatal) {
39236c6e
A
5819 task_terminate_internal(task);
5820 }
5821}
fe8ab488 5822
39037602
A
5823static boolean_t
5824global_update_logical_writes(int64_t io_delta)
fe8ab488 5825{
39037602
A
5826 int64_t old_count, new_count;
5827 boolean_t needs_telemetry;
5828
5829 do {
5830 new_count = old_count = global_logical_writes_count;
5831 new_count += io_delta;
5832 if (new_count >= io_telemetry_limit) {
5833 new_count = 0;
5834 needs_telemetry = TRUE;
5835 } else {
5836 needs_telemetry = FALSE;
5837 }
5838 } while(!OSCompareAndSwap64(old_count, new_count, &global_logical_writes_count));
5839 return needs_telemetry;
5840}
fe8ab488 5841
39037602
A
5842void task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
5843{
5844 int64_t io_delta = 0;
5845 boolean_t needs_telemetry = FALSE;
fe8ab488 5846
39037602
A
5847 if ((!task) || (!io_size) || (!vp))
5848 return;
5849
5850 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
5851 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
5852 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
5853 switch(flags) {
5854 case TASK_WRITE_IMMEDIATE:
5855 OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
5856 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5857 break;
5858 case TASK_WRITE_DEFERRED:
5859 OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
5860 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5861 break;
5862 case TASK_WRITE_INVALIDATED:
5863 OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
5864 ledger_debit(task->ledger, task_ledgers.logical_writes, io_size);
5865 break;
5866 case TASK_WRITE_METADATA:
5867 OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
5868 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5869 break;
fe8ab488 5870 }
39037602
A
5871
5872 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
5873 if (io_telemetry_limit != 0) {
5874 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
5875 needs_telemetry = global_update_logical_writes(io_delta);
5876 if (needs_telemetry) {
5877 act_set_io_telemetry_ast(current_thread());
5878 }
fe8ab488 5879 }
39037602 5880}
fe8ab488 5881
39037602
A
5882/*
5883 * Control the I/O monitor for a task.
5884 */
5885kern_return_t
5886task_io_monitor_ctl(task_t task, uint32_t *flags)
5887{
5888 ledger_t ledger = task->ledger;
fe8ab488 5889
39037602
A
5890 task_lock(task);
5891 if (*flags & IOMON_ENABLE) {
5892 /* Configure the physical I/O ledger */
5893 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5894 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5895
5896 /* Configure the logical I/O ledger */
5897 ledger_set_limit(ledger, task_ledgers.logical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5898 ledger_set_period(ledger, task_ledgers.logical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5899
5900 } else if (*flags & IOMON_DISABLE) {
5901 /*
5902 * Caller wishes to disable I/O monitor on the task.
5903 */
5904 ledger_disable_refill(ledger, task_ledgers.physical_writes);
5905 ledger_disable_callback(ledger, task_ledgers.physical_writes);
5906 ledger_disable_refill(ledger, task_ledgers.logical_writes);
5907 ledger_disable_callback(ledger, task_ledgers.logical_writes);
5908 }
fe8ab488 5909
39037602 5910 task_unlock(task);
fe8ab488
A
5911 return KERN_SUCCESS;
5912}
5913
39037602
A
5914void
5915task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
5916{
5917 if (warning == 0) {
5918 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
5919 }
5920}
5921
5922void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
5923{
5924 int pid = 0;
5925 task_t task = current_task();
5926#ifdef EXC_RESOURCE_MONITORS
5927 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5928#endif /* EXC_RESOURCE_MONITORS */
5929 struct ledger_entry_info lei;
5930 kern_return_t kr;
5931
5932#ifdef MACH_BSD
5933 pid = proc_selfpid();
5934#endif
5935 /*
5936 * Get the ledger entry info. We need to do this before disabling the exception
5937 * to get correct values for all fields.
5938 */
5939 switch(flavor) {
5940 case FLAVOR_IO_PHYSICAL_WRITES:
5941 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
5942 break;
5943 case FLAVOR_IO_LOGICAL_WRITES:
5944 ledger_get_entry_info(task->ledger, task_ledgers.logical_writes, &lei);
5945 break;
5946 }
5947
5948
5949 /*
5950 * Disable the exception notification so we don't overwhelm
5951 * the listener with an endless stream of redundant exceptions.
5952 * TODO: detect whether another thread is already reporting the violation.
5953 */
5954 uint32_t flags = IOMON_DISABLE;
5955 task_io_monitor_ctl(task, &flags);
5956
5957 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
5958 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
5959 }
5ba3f43e 5960 os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
39037602
A
5961 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
5962
5963 kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
5964 if (kr) {
5965 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
5966 }
5967
5968#ifdef EXC_RESOURCE_MONITORS
5969 code[0] = code[1] = 0;
5970 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
5971 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
5972 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
5973 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
5974 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
5975 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5976#endif /* EXC_RESOURCE_MONITORS */
5977}
5978
fe8ab488
A
5979/* Placeholders for the task set/get voucher interfaces */
5980kern_return_t
5981task_get_mach_voucher(
5982 task_t task,
5983 mach_voucher_selector_t __unused which,
5984 ipc_voucher_t *voucher)
5985{
5986 if (TASK_NULL == task)
5987 return KERN_INVALID_TASK;
5988
5989 *voucher = NULL;
5990 return KERN_SUCCESS;
5991}
5992
5993kern_return_t
5994task_set_mach_voucher(
5995 task_t task,
5996 ipc_voucher_t __unused voucher)
5997{
5998 if (TASK_NULL == task)
5999 return KERN_INVALID_TASK;
6000
6001 return KERN_SUCCESS;
6002}
6003
6004kern_return_t
6005task_swap_mach_voucher(
6006 task_t task,
6007 ipc_voucher_t new_voucher,
6008 ipc_voucher_t *in_out_old_voucher)
6009{
6010 if (TASK_NULL == task)
6011 return KERN_INVALID_TASK;
6012
6013 *in_out_old_voucher = new_voucher;
6014 return KERN_SUCCESS;
6015}
6016
6017void task_set_gpu_denied(task_t task, boolean_t denied)
6018{
6019 task_lock(task);
6020
6021 if (denied) {
6022 task->t_flags |= TF_GPU_DENIED;
6023 } else {
6024 task->t_flags &= ~TF_GPU_DENIED;
6025 }
6026
6027 task_unlock(task);
6028}
6029
6030boolean_t task_is_gpu_denied(task_t task)
6031{
6032 /* We don't need the lock to read this flag */
6033 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
6034}
4bd07ac2 6035
39037602
A
6036
6037uint64_t get_task_memory_region_count(task_t task)
4bd07ac2 6038{
39037602
A
6039 vm_map_t map;
6040 map = (task == kernel_task) ? kernel_map: task->map;
6041 return((uint64_t)get_map_nentries(map));
6042}
6043
6044static void
6045kdebug_trace_dyld_internal(uint32_t base_code,
6046 struct dyld_kernel_image_info *info)
6047{
6048 static_assert(sizeof(info->uuid) >= 16);
6049
6050#if defined(__LP64__)
6051 uint64_t *uuid = (uint64_t *)&(info->uuid);
6052
6053 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6054 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
6055 uuid[1], info->load_addr,
6056 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
6057 0);
6058 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6059 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
6060 (uint64_t)info->fsobjid.fid_objno |
6061 ((uint64_t)info->fsobjid.fid_generation << 32),
6062 0, 0, 0, 0);
6063#else /* defined(__LP64__) */
6064 uint32_t *uuid = (uint32_t *)&(info->uuid);
6065
6066 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6067 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
6068 uuid[1], uuid[2], uuid[3], 0);
6069 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6070 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
6071 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
6072 info->fsobjid.fid_objno, 0);
6073 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6074 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
6075 info->fsobjid.fid_generation, 0, 0, 0, 0);
6076#endif /* !defined(__LP64__) */
6077}
6078
6079static kern_return_t
6080kdebug_trace_dyld(task_t task, uint32_t base_code,
6081 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
6082{
6083 kern_return_t kr;
6084 dyld_kernel_image_info_array_t infos;
6085 vm_map_offset_t map_data;
6086 vm_offset_t data;
6087
5ba3f43e
A
6088 if (!infos_copy) {
6089 return KERN_INVALID_ADDRESS;
6090 }
6091
d190cdc3
A
6092 if (!kdebug_enable ||
6093 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0)))
6094 {
6095 vm_map_copy_discard(infos_copy);
6096 return KERN_SUCCESS;
6097 }
6098
39037602
A
6099 if (task == NULL || task != current_task()) {
6100 return KERN_INVALID_TASK;
4bd07ac2 6101 }
39037602
A
6102
6103 kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
6104 if (kr != KERN_SUCCESS) {
6105 return kr;
6106 }
6107
6108 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
6109
6110 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
6111 kdebug_trace_dyld_internal(base_code, &(infos[i]));
6112 }
6113
6114 data = CAST_DOWN(vm_offset_t, map_data);
6115 mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
6116 return KERN_SUCCESS;
6117}
6118
6119kern_return_t
6120task_register_dyld_image_infos(task_t task,
6121 dyld_kernel_image_info_array_t infos_copy,
6122 mach_msg_type_number_t infos_len)
6123{
6124 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
6125 (vm_map_copy_t)infos_copy, infos_len);
6126}
6127
6128kern_return_t
6129task_unregister_dyld_image_infos(task_t task,
6130 dyld_kernel_image_info_array_t infos_copy,
6131 mach_msg_type_number_t infos_len)
6132{
6133 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
6134 (vm_map_copy_t)infos_copy, infos_len);
6135}
6136
6137kern_return_t
6138task_get_dyld_image_infos(__unused task_t task,
6139 __unused dyld_kernel_image_info_array_t * dyld_images,
6140 __unused mach_msg_type_number_t * dyld_imagesCnt)
6141{
6142 return KERN_NOT_SUPPORTED;
6143}
6144
6145kern_return_t
6146task_register_dyld_shared_cache_image_info(task_t task,
6147 dyld_kernel_image_info_t cache_img,
6148 __unused boolean_t no_cache,
6149 __unused boolean_t private_cache)
6150{
6151 if (task == NULL || task != current_task()) {
6152 return KERN_INVALID_TASK;
6153 }
6154
6155 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
6156 return KERN_SUCCESS;
6157}
6158
6159kern_return_t
6160task_register_dyld_set_dyld_state(__unused task_t task,
6161 __unused uint8_t dyld_state)
6162{
6163 return KERN_NOT_SUPPORTED;
6164}
6165
6166kern_return_t
6167task_register_dyld_get_process_state(__unused task_t task,
6168 __unused dyld_kernel_process_info_t * dyld_process_state)
6169{
6170 return KERN_NOT_SUPPORTED;
6171}
6172
5ba3f43e
A
6173kern_return_t
6174task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
6175 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
6176{
6177#if MONOTONIC
6178 task_t task = (task_t)task_insp;
6179 kern_return_t kr = KERN_SUCCESS;
6180 mach_msg_type_number_t size;
6181
6182 if (task == TASK_NULL) {
6183 return KERN_INVALID_ARGUMENT;
6184 }
6185
6186 size = *size_in_out;
6187
6188 switch (flavor) {
6189 case TASK_INSPECT_BASIC_COUNTS: {
6190 struct task_inspect_basic_counts *bc;
e8c3f781 6191 uint64_t task_counts[MT_CORE_NFIXED] = { 0 };
5ba3f43e
A
6192
6193 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
6194 kr = KERN_INVALID_ARGUMENT;
6195 break;
6196 }
6197
6198 mt_fixed_task_counts(task, task_counts);
6199 bc = (struct task_inspect_basic_counts *)info_out;
6200#ifdef MT_CORE_INSTRS
6201 bc->instructions = task_counts[MT_CORE_INSTRS];
6202#else /* defined(MT_CORE_INSTRS) */
6203 bc->instructions = 0;
6204#endif /* !defined(MT_CORE_INSTRS) */
6205 bc->cycles = task_counts[MT_CORE_CYCLES];
6206 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
6207 break;
6208 }
6209 default:
6210 kr = KERN_INVALID_ARGUMENT;
6211 break;
6212 }
6213
6214 if (kr == KERN_SUCCESS) {
6215 *size_in_out = size;
6216 }
6217 return kr;
6218#else /* MONOTONIC */
6219#pragma unused(task_insp, flavor, info_out, size_in_out)
6220 return KERN_NOT_SUPPORTED;
6221#endif /* !MONOTONIC */
6222}
6223
39037602
A
6224#if CONFIG_SECLUDED_MEMORY
6225int num_tasks_can_use_secluded_mem = 0;
6226
6227void
6228task_set_can_use_secluded_mem(
6229 task_t task,
6230 boolean_t can_use_secluded_mem)
6231{
6232 if (!task->task_could_use_secluded_mem) {
6233 return;
6234 }
6235 task_lock(task);
6236 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
6237 task_unlock(task);
6238}
6239
6240void
6241task_set_can_use_secluded_mem_locked(
6242 task_t task,
6243 boolean_t can_use_secluded_mem)
6244{
6245 assert(task->task_could_use_secluded_mem);
6246 if (can_use_secluded_mem &&
6247 secluded_for_apps && /* global boot-arg */
6248 !task->task_can_use_secluded_mem) {
6249 assert(num_tasks_can_use_secluded_mem >= 0);
6250 OSAddAtomic(+1,
6251 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6252 task->task_can_use_secluded_mem = TRUE;
6253 } else if (!can_use_secluded_mem &&
6254 task->task_can_use_secluded_mem) {
6255 assert(num_tasks_can_use_secluded_mem > 0);
6256 OSAddAtomic(-1,
6257 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6258 task->task_can_use_secluded_mem = FALSE;
6259 }
6260}
6261
6262void
6263task_set_could_use_secluded_mem(
6264 task_t task,
6265 boolean_t could_use_secluded_mem)
6266{
6267 task->task_could_use_secluded_mem = could_use_secluded_mem;
6268}
6269
6270void
6271task_set_could_also_use_secluded_mem(
6272 task_t task,
6273 boolean_t could_also_use_secluded_mem)
6274{
6275 task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
6276}
6277
6278boolean_t
6279task_can_use_secluded_mem(
d9a64523
A
6280 task_t task,
6281 boolean_t is_alloc)
39037602
A
6282{
6283 if (task->task_can_use_secluded_mem) {
6284 assert(task->task_could_use_secluded_mem);
6285 assert(num_tasks_can_use_secluded_mem > 0);
6286 return TRUE;
6287 }
6288 if (task->task_could_also_use_secluded_mem &&
6289 num_tasks_can_use_secluded_mem > 0) {
6290 assert(num_tasks_can_use_secluded_mem > 0);
6291 return TRUE;
6292 }
d9a64523
A
6293
6294 /*
6295 * If a single task is using more than some amount of
6296 * memory, allow it to dip into secluded and also begin
6297 * suppression of secluded memory until the tasks exits.
6298 */
6299 if (is_alloc && secluded_shutoff_trigger != 0) {
6300 uint64_t phys_used = get_task_phys_footprint(task);
6301 if (phys_used > secluded_shutoff_trigger) {
6302 start_secluded_suppression(task);
6303 return TRUE;
6304 }
6305 }
6306
39037602
A
6307 return FALSE;
6308}
6309
6310boolean_t
6311task_could_use_secluded_mem(
6312 task_t task)
6313{
6314 return task->task_could_use_secluded_mem;
4bd07ac2 6315}
39037602 6316#endif /* CONFIG_SECLUDED_MEMORY */
7e41aa88
A
6317
6318queue_head_t *
6319task_io_user_clients(task_t task)
6320{
39037602 6321 return (&task->io_user_clients);
7e41aa88 6322}
5ba3f43e
A
6323
6324void
6325task_copy_fields_for_exec(task_t dst_task, task_t src_task)
6326{
6327 dst_task->vtimers = src_task->vtimers;
6328}
a39ff7e2
A
6329
6330#if DEVELOPMENT || DEBUG
6331int vm_region_footprint = 0;
6332#endif /* DEVELOPMENT || DEBUG */
6333
6334boolean_t
6335task_self_region_footprint(void)
6336{
6337#if DEVELOPMENT || DEBUG
6338 if (vm_region_footprint) {
6339 /* system-wide override */
6340 return TRUE;
6341 }
6342#endif /* DEVELOPMENT || DEBUG */
6343 return current_task()->task_region_footprint;
6344}
6345
6346void
6347task_self_region_footprint_set(
6348 boolean_t newval)
6349{
6350 task_t curtask;
6351
6352 curtask = current_task();
6353 task_lock(curtask);
6354 if (newval) {
6355 curtask->task_region_footprint = TRUE;
6356 } else {
6357 curtask->task_region_footprint = FALSE;
6358 }
6359 task_unlock(curtask);
6360}
d9a64523
A
6361
6362void
6363task_set_darkwake_mode(task_t task, boolean_t set_mode)
6364{
6365 assert(task);
6366
6367 task_lock(task);
6368
6369 if (set_mode) {
6370 task->t_flags |= TF_DARKWAKE_MODE;
6371 } else {
6372 task->t_flags &= ~(TF_DARKWAKE_MODE);
6373 }
6374
6375 task_unlock(task);
6376}
6377
6378boolean_t
6379task_get_darkwake_mode(task_t task)
6380{
6381 assert(task);
6382 return ((task->t_flags & TF_DARKWAKE_MODE) != 0);
6383}
6384
6385#if __arm64__
6386void
6387task_set_legacy_footprint(
6388 task_t task,
6389 boolean_t new_val)
6390{
6391 task_lock(task);
6392 task->task_legacy_footprint = new_val;
6393 task_unlock(task);
6394}
6395#endif /* __arm64__ */