]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/kern/task.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / kern / task.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63/*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81/*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89#include <mach/mach_types.h>
90#include <mach/boolean.h>
91#include <mach/host_priv.h>
92#include <mach/machine/vm_types.h>
93#include <mach/vm_param.h>
94#include <mach/mach_vm.h>
95#include <mach/semaphore.h>
96#include <mach/task_info.h>
97#include <mach/task_inspect.h>
98#include <mach/task_special_ports.h>
99#include <mach/sdt.h>
100
101#include <ipc/ipc_importance.h>
102#include <ipc/ipc_types.h>
103#include <ipc/ipc_space.h>
104#include <ipc/ipc_entry.h>
105#include <ipc/ipc_hash.h>
106
107#include <kern/kern_types.h>
108#include <kern/mach_param.h>
109#include <kern/misc_protos.h>
110#include <kern/task.h>
111#include <kern/thread.h>
112#include <kern/coalition.h>
113#include <kern/zalloc.h>
114#include <kern/kalloc.h>
115#include <kern/kern_cdata.h>
116#include <kern/processor.h>
117#include <kern/sched_prim.h> /* for thread_wakeup */
118#include <kern/ipc_tt.h>
119#include <kern/host.h>
120#include <kern/clock.h>
121#include <kern/timer.h>
122#include <kern/assert.h>
123#include <kern/sync_lock.h>
124#include <kern/affinity.h>
125#include <kern/exc_resource.h>
126#include <kern/machine.h>
127#include <kern/policy_internal.h>
128#include <kern/restartable.h>
129
130#include <corpses/task_corpse.h>
131#if CONFIG_TELEMETRY
132#include <kern/telemetry.h>
133#endif
134
135#if MONOTONIC
136#include <kern/monotonic.h>
137#include <machine/monotonic.h>
138#endif /* MONOTONIC */
139
140#include <os/log.h>
141
142#include <vm/pmap.h>
143#include <vm/vm_map.h>
144#include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
145#include <vm/vm_pageout.h>
146#include <vm/vm_protos.h>
147#include <vm/vm_purgeable_internal.h>
148#include <vm/vm_compressor_pager.h>
149
150#include <sys/resource.h>
151#include <sys/signalvar.h> /* for coredump */
152#include <sys/bsdtask_info.h>
153/*
154 * Exported interfaces
155 */
156
157#include <mach/task_server.h>
158#include <mach/mach_host_server.h>
159#include <mach/host_security_server.h>
160#include <mach/mach_port_server.h>
161
162#include <vm/vm_shared_region.h>
163
164#include <libkern/OSDebug.h>
165#include <libkern/OSAtomic.h>
166#include <libkern/section_keywords.h>
167
168#include <mach-o/loader.h>
169#include <kdp/kdp_dyld.h>
170
171#include <kern/sfi.h> /* picks up ledger.h */
172
173#if CONFIG_MACF
174#include <security/mac_mach_internal.h>
175#endif
176
177#include <IOKit/IOBSD.h>
178
179#if KPERF
180extern int kpc_force_all_ctrs(task_t, int);
181#endif
182
183SECURITY_READ_ONLY_LATE(task_t) kernel_task;
184
185static SECURITY_READ_ONLY_LATE(zone_t) task_zone;
186ZONE_INIT(&task_zone, "tasks", sizeof(struct task),
187 ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM,
188 ZONE_ID_TASK, NULL);
189
190extern int exc_via_corpse_forking;
191extern int corpse_for_fatal_memkill;
192extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
193extern void task_disown_frozen_csegs(task_t owner_task);
194
195/* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
196int audio_active = 0;
197
198/*
199 * structure for tracking zone usage
200 * Used either one per task/thread for all zones or <per-task,per-zone>.
201 */
202typedef struct zinfo_usage_store_t {
203 /* These fields may be updated atomically, and so must be 8 byte aligned */
204 uint64_t alloc __attribute__((aligned(8))); /* allocation counter */
205 uint64_t free __attribute__((aligned(8))); /* free counter */
206} zinfo_usage_store_t;
207
208zinfo_usage_store_t tasks_tkm_private;
209zinfo_usage_store_t tasks_tkm_shared;
210
211/* A container to accumulate statistics for expired tasks */
212expired_task_statistics_t dead_task_statistics;
213LCK_SPIN_DECLARE_ATTR(dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
214
215ledger_template_t task_ledger_template = NULL;
216
217/* global lock for task_dyld_process_info_notify_{register, deregister, get_trap} */
218LCK_GRP_DECLARE(g_dyldinfo_mtx_grp, "g_dyldinfo");
219LCK_MTX_DECLARE(g_dyldinfo_mtx, &g_dyldinfo_mtx_grp);
220
221SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
222{.cpu_time = -1,
223 .tkm_private = -1,
224 .tkm_shared = -1,
225 .phys_mem = -1,
226 .wired_mem = -1,
227 .internal = -1,
228 .iokit_mapped = -1,
229 .alternate_accounting = -1,
230 .alternate_accounting_compressed = -1,
231 .page_table = -1,
232 .phys_footprint = -1,
233 .internal_compressed = -1,
234 .purgeable_volatile = -1,
235 .purgeable_nonvolatile = -1,
236 .purgeable_volatile_compressed = -1,
237 .purgeable_nonvolatile_compressed = -1,
238 .tagged_nofootprint = -1,
239 .tagged_footprint = -1,
240 .tagged_nofootprint_compressed = -1,
241 .tagged_footprint_compressed = -1,
242 .network_volatile = -1,
243 .network_nonvolatile = -1,
244 .network_volatile_compressed = -1,
245 .network_nonvolatile_compressed = -1,
246 .media_nofootprint = -1,
247 .media_footprint = -1,
248 .media_nofootprint_compressed = -1,
249 .media_footprint_compressed = -1,
250 .graphics_nofootprint = -1,
251 .graphics_footprint = -1,
252 .graphics_nofootprint_compressed = -1,
253 .graphics_footprint_compressed = -1,
254 .neural_nofootprint = -1,
255 .neural_footprint = -1,
256 .neural_nofootprint_compressed = -1,
257 .neural_footprint_compressed = -1,
258 .platform_idle_wakeups = -1,
259 .interrupt_wakeups = -1,
260#if CONFIG_SCHED_SFI
261 .sfi_wait_times = { 0 /* initialized at runtime */},
262#endif /* CONFIG_SCHED_SFI */
263 .cpu_time_billed_to_me = -1,
264 .cpu_time_billed_to_others = -1,
265 .physical_writes = -1,
266 .logical_writes = -1,
267 .logical_writes_to_external = -1,
268#if DEBUG || DEVELOPMENT
269 .pages_grabbed = -1,
270 .pages_grabbed_kern = -1,
271 .pages_grabbed_iopl = -1,
272 .pages_grabbed_upl = -1,
273#endif
274#if CONFIG_FREEZE
275 .frozen_to_swap = -1,
276#endif /* CONFIG_FREEZE */
277 .energy_billed_to_me = -1,
278 .energy_billed_to_others = -1,
279#if CONFIG_PHYS_WRITE_ACCT
280 .fs_metadata_writes = -1,
281#endif /* CONFIG_PHYS_WRITE_ACCT */
282};
283
284/* System sleep state */
285boolean_t tasks_suspend_state;
286
287
288void init_task_ledgers(void);
289void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
290void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
291void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
292void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
293void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
294void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
295
296kern_return_t task_suspend_internal(task_t);
297kern_return_t task_resume_internal(task_t);
298static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
299
300extern kern_return_t iokit_task_terminate(task_t task);
301extern void iokit_task_app_suspended_changed(task_t task);
302
303extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
304extern void bsd_copythreadname(void *dst_uth, void *src_uth);
305extern kern_return_t thread_resume(thread_t thread);
306
307// Warn tasks when they hit 80% of their memory limit.
308#define PHYS_FOOTPRINT_WARNING_LEVEL 80
309
310#define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
311#define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
312
313/*
314 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
315 *
316 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
317 * stacktraces, aka micro-stackshots)
318 */
319#define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
320
321int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
322int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
323
324unsigned int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
325
326int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
327
328ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
329unsigned int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
330int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
331
332/* I/O Monitor Limits */
333#define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
334#define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
335
336uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
337uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
338
339#define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
340int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
341int64_t global_logical_writes_count = 0; /* Global count for logical writes */
342int64_t global_logical_writes_to_external_count = 0; /* Global count for logical writes to external storage*/
343static boolean_t global_update_logical_writes(int64_t, int64_t*);
344
345#define TASK_MAX_THREAD_LIMIT 256
346
347#if MACH_ASSERT
348int pmap_ledgers_panic = 1;
349int pmap_ledgers_panic_leeway = 3;
350#endif /* MACH_ASSERT */
351
352int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
353
354#if CONFIG_COREDUMP
355int hwm_user_cores = 0; /* high watermark violations generate user core files */
356#endif
357
358#ifdef MACH_BSD
359extern uint32_t proc_platform(const struct proc *);
360extern uint32_t proc_min_sdk(struct proc *);
361extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
362extern int proc_pid(struct proc *p);
363extern int proc_selfpid(void);
364extern struct proc *current_proc(void);
365extern char *proc_name_address(struct proc *p);
366extern uint64_t get_dispatchqueue_offset_from_proc(void *);
367extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, uint32_t bufsize);
368extern void workq_proc_suspended(struct proc *p);
369extern void workq_proc_resumed(struct proc *p);
370
371#if CONFIG_MEMORYSTATUS
372extern void proc_memstat_terminated(struct proc* p, boolean_t set);
373extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
374extern void memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
375extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
376extern uint64_t memorystatus_available_memory_internal(struct proc *p);
377
378#if DEVELOPMENT || DEBUG
379extern void memorystatus_abort_vm_map_fork(task_t);
380#endif
381
382#endif /* CONFIG_MEMORYSTATUS */
383
384#endif /* MACH_BSD */
385
386#if DEVELOPMENT || DEBUG
387int exc_resource_threads_enabled;
388#endif /* DEVELOPMENT || DEBUG */
389
390#if (DEVELOPMENT || DEBUG)
391uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_MP_CORPSE |
392 TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE | TASK_EXC_GUARD_VM_CORPSE;
393#else
394uint32_t task_exc_guard_default = 0;
395#endif
396
397/* Forwards */
398
399static void task_hold_locked(task_t task);
400static void task_wait_locked(task_t task, boolean_t until_not_runnable);
401static void task_release_locked(task_t task);
402
403static void task_synchronizer_destroy_all(task_t task);
404static os_ref_count_t
405task_add_turnstile_watchports_locked(
406 task_t task,
407 struct task_watchports *watchports,
408 struct task_watchport_elem **previous_elem_array,
409 ipc_port_t *portwatch_ports,
410 uint32_t portwatch_count);
411
412static os_ref_count_t
413task_remove_turnstile_watchports_locked(
414 task_t task,
415 struct task_watchports *watchports,
416 ipc_port_t *port_freelist);
417
418static struct task_watchports *
419task_watchports_alloc_init(
420 task_t task,
421 thread_t thread,
422 uint32_t count);
423
424static void
425task_watchports_deallocate(
426 struct task_watchports *watchports);
427
428void
429task_set_64bit(
430 task_t task,
431 boolean_t is_64bit,
432 boolean_t is_64bit_data)
433{
434#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
435 thread_t thread;
436#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
437
438 task_lock(task);
439
440 /*
441 * Switching to/from 64-bit address spaces
442 */
443 if (is_64bit) {
444 if (!task_has_64Bit_addr(task)) {
445 task_set_64Bit_addr(task);
446 }
447 } else {
448 if (task_has_64Bit_addr(task)) {
449 task_clear_64Bit_addr(task);
450 }
451 }
452
453 /*
454 * Switching to/from 64-bit register state.
455 */
456 if (is_64bit_data) {
457 if (task_has_64Bit_data(task)) {
458 goto out;
459 }
460
461 task_set_64Bit_data(task);
462 } else {
463 if (!task_has_64Bit_data(task)) {
464 goto out;
465 }
466
467 task_clear_64Bit_data(task);
468 }
469
470 /* FIXME: On x86, the thread save state flavor can diverge from the
471 * task's 64-bit feature flag due to the 32-bit/64-bit register save
472 * state dichotomy. Since we can be pre-empted in this interval,
473 * certain routines may observe the thread as being in an inconsistent
474 * state with respect to its task's 64-bitness.
475 */
476
477#if defined(__x86_64__) || defined(__arm64__)
478 queue_iterate(&task->threads, thread, thread_t, task_threads) {
479 thread_mtx_lock(thread);
480 machine_thread_switch_addrmode(thread);
481 thread_mtx_unlock(thread);
482 }
483#endif /* defined(__x86_64__) || defined(__arm64__) */
484
485out:
486 task_unlock(task);
487}
488
489boolean_t
490task_get_64bit_data(task_t task)
491{
492 return task_has_64Bit_data(task);
493}
494
495void
496task_set_platform_binary(
497 task_t task,
498 boolean_t is_platform)
499{
500 task_lock(task);
501 if (is_platform) {
502 task->t_flags |= TF_PLATFORM;
503 /* set exc guard default behavior for first-party code */
504 task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
505 } else {
506 task->t_flags &= ~(TF_PLATFORM);
507 /* set exc guard default behavior for third-party code */
508 task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
509 }
510 task_unlock(task);
511}
512
513/*
514 * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
515 * Returns "false" if flag is already set, and "true" in other cases.
516 */
517bool
518task_set_ca_client_wi(
519 task_t task,
520 boolean_t set_or_clear)
521{
522 bool ret = true;
523 task_lock(task);
524 if (set_or_clear) {
525 /* Tasks can have only one CA_CLIENT work interval */
526 if (task->t_flags & TF_CA_CLIENT_WI) {
527 ret = false;
528 } else {
529 task->t_flags |= TF_CA_CLIENT_WI;
530 }
531 } else {
532 task->t_flags &= ~TF_CA_CLIENT_WI;
533 }
534 task_unlock(task);
535 return ret;
536}
537
538void
539task_set_dyld_info(
540 task_t task,
541 mach_vm_address_t addr,
542 mach_vm_size_t size)
543{
544 task_lock(task);
545 task->all_image_info_addr = addr;
546 task->all_image_info_size = size;
547 task_unlock(task);
548}
549
550void
551task_set_mach_header_address(
552 task_t task,
553 mach_vm_address_t addr)
554{
555 task_lock(task);
556 task->mach_header_vm_address = addr;
557 task_unlock(task);
558}
559
560void
561task_bank_reset(__unused task_t task)
562{
563 if (task->bank_context != NULL) {
564 bank_task_destroy(task);
565 }
566}
567
568/*
569 * NOTE: This should only be called when the P_LINTRANSIT
570 * flag is set (the proc_trans lock is held) on the
571 * proc associated with the task.
572 */
573void
574task_bank_init(__unused task_t task)
575{
576 if (task->bank_context != NULL) {
577 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
578 }
579 bank_task_initialize(task);
580}
581
582void
583task_set_did_exec_flag(task_t task)
584{
585 task->t_procflags |= TPF_DID_EXEC;
586}
587
588void
589task_clear_exec_copy_flag(task_t task)
590{
591 task->t_procflags &= ~TPF_EXEC_COPY;
592}
593
594event_t
595task_get_return_wait_event(task_t task)
596{
597 return (event_t)&task->returnwait_inheritor;
598}
599
600void
601task_clear_return_wait(task_t task, uint32_t flags)
602{
603 if (flags & TCRW_CLEAR_INITIAL_WAIT) {
604 thread_wakeup(task_get_return_wait_event(task));
605 }
606
607 if (flags & TCRW_CLEAR_FINAL_WAIT) {
608 is_write_lock(task->itk_space);
609
610 task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
611 task->returnwait_inheritor = NULL;
612
613 if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
614 struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
615 NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
616
617 waitq_wakeup64_all(&turnstile->ts_waitq,
618 CAST_EVENT64_T(task_get_return_wait_event(task)),
619 THREAD_AWAKENED, 0);
620
621 turnstile_update_inheritor(turnstile, NULL,
622 TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
623 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
624
625 turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
626 turnstile_cleanup();
627 task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
628 }
629 is_write_unlock(task->itk_space);
630 }
631}
632
633void __attribute__((noreturn))
634task_wait_to_return(void)
635{
636 task_t task = current_task();
637
638 is_write_lock(task->itk_space);
639
640 if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
641 struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
642 NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
643
644 do {
645 task->t_returnwaitflags |= TRW_LRETURNWAITER;
646 turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
647 (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
648
649 waitq_assert_wait64(&turnstile->ts_waitq,
650 CAST_EVENT64_T(task_get_return_wait_event(task)),
651 THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
652
653 is_write_unlock(task->itk_space);
654
655 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
656
657 thread_block(THREAD_CONTINUE_NULL);
658
659 is_write_lock(task->itk_space);
660 } while (task->t_returnwaitflags & TRW_LRETURNWAIT);
661
662 turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
663 }
664
665 is_write_unlock(task->itk_space);
666 turnstile_cleanup();
667
668
669#if CONFIG_MACF
670 /*
671 * Before jumping to userspace and allowing this process to execute any code,
672 * notify any interested parties.
673 */
674 mac_proc_notify_exec_complete(current_proc());
675#endif
676
677 thread_bootstrap_return();
678}
679
680#ifdef CONFIG_32BIT_TELEMETRY
681boolean_t
682task_consume_32bit_log_flag(task_t task)
683{
684 if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
685 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
686 return TRUE;
687 } else {
688 return FALSE;
689 }
690}
691
692void
693task_set_32bit_log_flag(task_t task)
694{
695 task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
696}
697#endif /* CONFIG_32BIT_TELEMETRY */
698
699boolean_t
700task_is_exec_copy(task_t task)
701{
702 return task_is_exec_copy_internal(task);
703}
704
705boolean_t
706task_did_exec(task_t task)
707{
708 return task_did_exec_internal(task);
709}
710
711boolean_t
712task_is_active(task_t task)
713{
714 return task->active;
715}
716
717boolean_t
718task_is_halting(task_t task)
719{
720 return task->halting;
721}
722
723#if TASK_REFERENCE_LEAK_DEBUG
724#include <kern/btlog.h>
725
726static btlog_t *task_ref_btlog;
727#define TASK_REF_OP_INCR 0x1
728#define TASK_REF_OP_DECR 0x2
729
730#define TASK_REF_NUM_RECORDS 100000
731#define TASK_REF_BTDEPTH 7
732
733void
734task_reference_internal(task_t task)
735{
736 void * bt[TASK_REF_BTDEPTH];
737 int numsaved = 0;
738
739 task_require(task);
740 os_ref_retain(&task->ref_count);
741
742 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
743 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
744 bt, numsaved);
745}
746
747os_ref_count_t
748task_deallocate_internal(task_t task)
749{
750 void * bt[TASK_REF_BTDEPTH];
751 int numsaved = 0;
752
753 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
754 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
755 bt, numsaved);
756
757 return os_ref_release(&task->ref_count);
758}
759
760#endif /* TASK_REFERENCE_LEAK_DEBUG */
761
762void
763task_init(void)
764{
765 /*
766 * Configure per-task memory limit.
767 * The boot-arg is interpreted as Megabytes,
768 * and takes precedence over the device tree.
769 * Setting the boot-arg to 0 disables task limits.
770 */
771 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
772 sizeof(max_task_footprint_mb))) {
773 /*
774 * No limit was found in boot-args, so go look in the device tree.
775 */
776 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
777 sizeof(max_task_footprint_mb))) {
778 /*
779 * No limit was found in device tree.
780 */
781 max_task_footprint_mb = 0;
782 }
783 }
784
785 if (max_task_footprint_mb != 0) {
786#if CONFIG_MEMORYSTATUS
787 if (max_task_footprint_mb < 50) {
788 printf("Warning: max_task_pmem %d below minimum.\n",
789 max_task_footprint_mb);
790 max_task_footprint_mb = 50;
791 }
792 printf("Limiting task physical memory footprint to %d MB\n",
793 max_task_footprint_mb);
794
795 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
796
797 /*
798 * Configure the per-task memory limit warning level.
799 * This is computed as a percentage.
800 */
801 max_task_footprint_warning_level = 0;
802
803 if (max_mem < 0x40000000) {
804 /*
805 * On devices with < 1GB of memory:
806 * -- set warnings to 50MB below the per-task limit.
807 */
808 if (max_task_footprint_mb > 50) {
809 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
810 }
811 } else {
812 /*
813 * On devices with >= 1GB of memory:
814 * -- set warnings to 100MB below the per-task limit.
815 */
816 if (max_task_footprint_mb > 100) {
817 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
818 }
819 }
820
821 /*
822 * Never allow warning level to land below the default.
823 */
824 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
825 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
826 }
827
828 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
829
830#else
831 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
832#endif /* CONFIG_MEMORYSTATUS */
833 }
834
835#if DEVELOPMENT || DEBUG
836 if (!PE_parse_boot_argn("exc_resource_threads",
837 &exc_resource_threads_enabled,
838 sizeof(exc_resource_threads_enabled))) {
839 exc_resource_threads_enabled = 1;
840 }
841 PE_parse_boot_argn("task_exc_guard_default",
842 &task_exc_guard_default,
843 sizeof(task_exc_guard_default));
844#endif /* DEVELOPMENT || DEBUG */
845
846#if CONFIG_COREDUMP
847 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
848 sizeof(hwm_user_cores))) {
849 hwm_user_cores = 0;
850 }
851#endif
852
853 proc_init_cpumon_params();
854
855 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
856 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
857 }
858
859 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
860 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
861 }
862
863 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
864 sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
865 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
866 }
867
868 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
869 sizeof(disable_exc_resource))) {
870 disable_exc_resource = 0;
871 }
872
873 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
874 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
875 }
876
877 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
878 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
879 }
880
881 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
882 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
883 }
884
885/*
886 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
887 * sets up the ledgers for the default coalition. If we don't have coalitions,
888 * then we have to call it now.
889 */
890#if CONFIG_COALITIONS
891 assert(task_ledger_template);
892#else /* CONFIG_COALITIONS */
893 init_task_ledgers();
894#endif /* CONFIG_COALITIONS */
895
896#if TASK_REFERENCE_LEAK_DEBUG
897 task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
898 assert(task_ref_btlog);
899#endif
900
901 /*
902 * Create the kernel task as the first task.
903 */
904#ifdef __LP64__
905 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
906#else
907 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
908#endif
909 { panic("task_init\n");}
910
911#if defined(HAS_APPLE_PAC)
912 kernel_task->rop_pid = ml_default_rop_pid();
913 kernel_task->jop_pid = ml_default_jop_pid();
914 // kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
915 // disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
916 ml_task_set_disable_user_jop(kernel_task, FALSE);
917#endif
918
919 vm_map_deallocate(kernel_task->map);
920 kernel_task->map = kernel_map;
921}
922
923/*
924 * Create a task running in the kernel address space. It may
925 * have its own map of size mem_size and may have ipc privileges.
926 */
927kern_return_t
928kernel_task_create(
929 __unused task_t parent_task,
930 __unused vm_offset_t map_base,
931 __unused vm_size_t map_size,
932 __unused task_t *child_task)
933{
934 return KERN_INVALID_ARGUMENT;
935}
936
937kern_return_t
938task_create(
939 task_t parent_task,
940 __unused ledger_port_array_t ledger_ports,
941 __unused mach_msg_type_number_t num_ledger_ports,
942 __unused boolean_t inherit_memory,
943 __unused task_t *child_task) /* OUT */
944{
945 if (parent_task == TASK_NULL) {
946 return KERN_INVALID_ARGUMENT;
947 }
948
949 /*
950 * No longer supported: too many calls assume that a task has a valid
951 * process attached.
952 */
953 return KERN_FAILURE;
954}
955
956kern_return_t
957host_security_create_task_token(
958 host_security_t host_security,
959 task_t parent_task,
960 __unused security_token_t sec_token,
961 __unused audit_token_t audit_token,
962 __unused host_priv_t host_priv,
963 __unused ledger_port_array_t ledger_ports,
964 __unused mach_msg_type_number_t num_ledger_ports,
965 __unused boolean_t inherit_memory,
966 __unused task_t *child_task) /* OUT */
967{
968 if (parent_task == TASK_NULL) {
969 return KERN_INVALID_ARGUMENT;
970 }
971
972 if (host_security == HOST_NULL) {
973 return KERN_INVALID_SECURITY;
974 }
975
976 /*
977 * No longer supported.
978 */
979 return KERN_FAILURE;
980}
981
982/*
983 * Task ledgers
984 * ------------
985 *
986 * phys_footprint
987 * Physical footprint: This is the sum of:
988 * + (internal - alternate_accounting)
989 * + (internal_compressed - alternate_accounting_compressed)
990 * + iokit_mapped
991 * + purgeable_nonvolatile
992 * + purgeable_nonvolatile_compressed
993 * + page_table
994 *
995 * internal
996 * The task's anonymous memory, which on iOS is always resident.
997 *
998 * internal_compressed
999 * Amount of this task's internal memory which is held by the compressor.
1000 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1001 * and could be either decompressed back into memory, or paged out to storage, depending
1002 * on our implementation.
1003 *
1004 * iokit_mapped
1005 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1006 * clean/dirty or internal/external state].
1007 *
1008 * alternate_accounting
1009 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1010 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1011 * double counting.
1012 *
1013 * pages_grabbed
1014 * pages_grabbed counts all page grabs in a task. It is also broken out into three subtypes
1015 * which track UPL, IOPL and Kernel page grabs.
1016 */
1017void
1018init_task_ledgers(void)
1019{
1020 ledger_template_t t;
1021
1022 assert(task_ledger_template == NULL);
1023 assert(kernel_task == TASK_NULL);
1024
1025#if MACH_ASSERT
1026 PE_parse_boot_argn("pmap_ledgers_panic",
1027 &pmap_ledgers_panic,
1028 sizeof(pmap_ledgers_panic));
1029 PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1030 &pmap_ledgers_panic_leeway,
1031 sizeof(pmap_ledgers_panic_leeway));
1032#endif /* MACH_ASSERT */
1033
1034 if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1035 panic("couldn't create task ledger template");
1036 }
1037
1038 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1039 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1040 "physmem", "bytes");
1041 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1042 "bytes");
1043 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1044 "bytes");
1045 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1046 "bytes");
1047 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1048 "bytes");
1049 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
1050 "bytes");
1051 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
1052 "bytes");
1053 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
1054 "bytes");
1055 task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
1056 "bytes");
1057 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1058 "bytes");
1059 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1060 "bytes");
1061 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
1062 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
1063 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
1064 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
1065#if DEBUG || DEVELOPMENT
1066 task_ledgers.pages_grabbed = ledger_entry_add(t, "pages_grabbed", "physmem", "count");
1067 task_ledgers.pages_grabbed_kern = ledger_entry_add(t, "pages_grabbed_kern", "physmem", "count");
1068 task_ledgers.pages_grabbed_iopl = ledger_entry_add(t, "pages_grabbed_iopl", "physmem", "count");
1069 task_ledgers.pages_grabbed_upl = ledger_entry_add(t, "pages_grabbed_upl", "physmem", "count");
1070#endif
1071 task_ledgers.tagged_nofootprint = ledger_entry_add(t, "tagged_nofootprint", "physmem", "bytes");
1072 task_ledgers.tagged_footprint = ledger_entry_add(t, "tagged_footprint", "physmem", "bytes");
1073 task_ledgers.tagged_nofootprint_compressed = ledger_entry_add(t, "tagged_nofootprint_compressed", "physmem", "bytes");
1074 task_ledgers.tagged_footprint_compressed = ledger_entry_add(t, "tagged_footprint_compressed", "physmem", "bytes");
1075 task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes");
1076 task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes");
1077 task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes");
1078 task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes");
1079 task_ledgers.media_nofootprint = ledger_entry_add(t, "media_nofootprint", "physmem", "bytes");
1080 task_ledgers.media_footprint = ledger_entry_add(t, "media_footprint", "physmem", "bytes");
1081 task_ledgers.media_nofootprint_compressed = ledger_entry_add(t, "media_nofootprint_compressed", "physmem", "bytes");
1082 task_ledgers.media_footprint_compressed = ledger_entry_add(t, "media_footprint_compressed", "physmem", "bytes");
1083 task_ledgers.graphics_nofootprint = ledger_entry_add(t, "graphics_nofootprint", "physmem", "bytes");
1084 task_ledgers.graphics_footprint = ledger_entry_add(t, "graphics_footprint", "physmem", "bytes");
1085 task_ledgers.graphics_nofootprint_compressed = ledger_entry_add(t, "graphics_nofootprint_compressed", "physmem", "bytes");
1086 task_ledgers.graphics_footprint_compressed = ledger_entry_add(t, "graphics_footprint_compressed", "physmem", "bytes");
1087 task_ledgers.neural_nofootprint = ledger_entry_add(t, "neural_nofootprint", "physmem", "bytes");
1088 task_ledgers.neural_footprint = ledger_entry_add(t, "neural_footprint", "physmem", "bytes");
1089 task_ledgers.neural_nofootprint_compressed = ledger_entry_add(t, "neural_nofootprint_compressed", "physmem", "bytes");
1090 task_ledgers.neural_footprint_compressed = ledger_entry_add(t, "neural_footprint_compressed", "physmem", "bytes");
1091
1092#if CONFIG_FREEZE
1093 task_ledgers.frozen_to_swap = ledger_entry_add(t, "frozen_to_swap", "physmem", "bytes");
1094#endif /* CONFIG_FREEZE */
1095
1096 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1097 "count");
1098 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1099 "count");
1100
1101#if CONFIG_SCHED_SFI
1102 sfi_class_id_t class_id, ledger_alias;
1103 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1104 task_ledgers.sfi_wait_times[class_id] = -1;
1105 }
1106
1107 /* don't account for UNSPECIFIED */
1108 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1109 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1110 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1111 /* Check to see if alias has been registered yet */
1112 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1113 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1114 } else {
1115 /* Otherwise, initialize it first */
1116 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1117 }
1118 } else {
1119 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1120 }
1121
1122 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1123 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1124 }
1125 }
1126
1127 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1128#endif /* CONFIG_SCHED_SFI */
1129
1130 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1131 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1132 task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1133 task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1134 task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1135#if CONFIG_PHYS_WRITE_ACCT
1136 task_ledgers.fs_metadata_writes = ledger_entry_add(t, "fs_metadata_writes", "res", "bytes");
1137#endif /* CONFIG_PHYS_WRITE_ACCT */
1138 task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1139 task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1140
1141 if ((task_ledgers.cpu_time < 0) ||
1142 (task_ledgers.tkm_private < 0) ||
1143 (task_ledgers.tkm_shared < 0) ||
1144 (task_ledgers.phys_mem < 0) ||
1145 (task_ledgers.wired_mem < 0) ||
1146 (task_ledgers.internal < 0) ||
1147 (task_ledgers.iokit_mapped < 0) ||
1148 (task_ledgers.alternate_accounting < 0) ||
1149 (task_ledgers.alternate_accounting_compressed < 0) ||
1150 (task_ledgers.page_table < 0) ||
1151 (task_ledgers.phys_footprint < 0) ||
1152 (task_ledgers.internal_compressed < 0) ||
1153 (task_ledgers.purgeable_volatile < 0) ||
1154 (task_ledgers.purgeable_nonvolatile < 0) ||
1155 (task_ledgers.purgeable_volatile_compressed < 0) ||
1156 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1157 (task_ledgers.tagged_nofootprint < 0) ||
1158 (task_ledgers.tagged_footprint < 0) ||
1159 (task_ledgers.tagged_nofootprint_compressed < 0) ||
1160 (task_ledgers.tagged_footprint_compressed < 0) ||
1161#if CONFIG_FREEZE
1162 (task_ledgers.frozen_to_swap < 0) ||
1163#endif /* CONFIG_FREEZE */
1164 (task_ledgers.network_volatile < 0) ||
1165 (task_ledgers.network_nonvolatile < 0) ||
1166 (task_ledgers.network_volatile_compressed < 0) ||
1167 (task_ledgers.network_nonvolatile_compressed < 0) ||
1168 (task_ledgers.media_nofootprint < 0) ||
1169 (task_ledgers.media_footprint < 0) ||
1170 (task_ledgers.media_nofootprint_compressed < 0) ||
1171 (task_ledgers.media_footprint_compressed < 0) ||
1172 (task_ledgers.graphics_nofootprint < 0) ||
1173 (task_ledgers.graphics_footprint < 0) ||
1174 (task_ledgers.graphics_nofootprint_compressed < 0) ||
1175 (task_ledgers.graphics_footprint_compressed < 0) ||
1176 (task_ledgers.neural_nofootprint < 0) ||
1177 (task_ledgers.neural_footprint < 0) ||
1178 (task_ledgers.neural_nofootprint_compressed < 0) ||
1179 (task_ledgers.neural_footprint_compressed < 0) ||
1180 (task_ledgers.platform_idle_wakeups < 0) ||
1181 (task_ledgers.interrupt_wakeups < 0) ||
1182 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1183 (task_ledgers.physical_writes < 0) ||
1184 (task_ledgers.logical_writes < 0) ||
1185 (task_ledgers.logical_writes_to_external < 0) ||
1186#if CONFIG_PHYS_WRITE_ACCT
1187 (task_ledgers.fs_metadata_writes < 0) ||
1188#endif /* CONFIG_PHYS_WRITE_ACCT */
1189 (task_ledgers.energy_billed_to_me < 0) ||
1190 (task_ledgers.energy_billed_to_others < 0)
1191 ) {
1192 panic("couldn't create entries for task ledger template");
1193 }
1194
1195 ledger_track_credit_only(t, task_ledgers.phys_footprint);
1196 ledger_track_credit_only(t, task_ledgers.page_table);
1197 ledger_track_credit_only(t, task_ledgers.internal);
1198 ledger_track_credit_only(t, task_ledgers.internal_compressed);
1199 ledger_track_credit_only(t, task_ledgers.iokit_mapped);
1200 ledger_track_credit_only(t, task_ledgers.alternate_accounting);
1201 ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
1202 ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
1203 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
1204 ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
1205 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
1206#if DEBUG || DEVELOPMENT
1207 ledger_track_credit_only(t, task_ledgers.pages_grabbed);
1208 ledger_track_credit_only(t, task_ledgers.pages_grabbed_kern);
1209 ledger_track_credit_only(t, task_ledgers.pages_grabbed_iopl);
1210 ledger_track_credit_only(t, task_ledgers.pages_grabbed_upl);
1211#endif
1212
1213 ledger_track_credit_only(t, task_ledgers.tagged_nofootprint);
1214 ledger_track_credit_only(t, task_ledgers.tagged_footprint);
1215 ledger_track_credit_only(t, task_ledgers.tagged_nofootprint_compressed);
1216 ledger_track_credit_only(t, task_ledgers.tagged_footprint_compressed);
1217 ledger_track_credit_only(t, task_ledgers.network_volatile);
1218 ledger_track_credit_only(t, task_ledgers.network_nonvolatile);
1219 ledger_track_credit_only(t, task_ledgers.network_volatile_compressed);
1220 ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed);
1221 ledger_track_credit_only(t, task_ledgers.media_nofootprint);
1222 ledger_track_credit_only(t, task_ledgers.media_footprint);
1223 ledger_track_credit_only(t, task_ledgers.media_nofootprint_compressed);
1224 ledger_track_credit_only(t, task_ledgers.media_footprint_compressed);
1225 ledger_track_credit_only(t, task_ledgers.graphics_nofootprint);
1226 ledger_track_credit_only(t, task_ledgers.graphics_footprint);
1227 ledger_track_credit_only(t, task_ledgers.graphics_nofootprint_compressed);
1228 ledger_track_credit_only(t, task_ledgers.graphics_footprint_compressed);
1229 ledger_track_credit_only(t, task_ledgers.neural_nofootprint);
1230 ledger_track_credit_only(t, task_ledgers.neural_footprint);
1231 ledger_track_credit_only(t, task_ledgers.neural_nofootprint_compressed);
1232 ledger_track_credit_only(t, task_ledgers.neural_footprint_compressed);
1233
1234 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1235#if MACH_ASSERT
1236 if (pmap_ledgers_panic) {
1237 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1238 ledger_panic_on_negative(t, task_ledgers.page_table);
1239 ledger_panic_on_negative(t, task_ledgers.internal);
1240 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
1241 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1242 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1243 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1244 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1245 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1246 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1247 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1248#if CONFIG_PHYS_WRITE_ACCT
1249 ledger_panic_on_negative(t, task_ledgers.fs_metadata_writes);
1250#endif /* CONFIG_PHYS_WRITE_ACCT */
1251
1252 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1253 ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1254 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1255 ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1256 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1257 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1258 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1259 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1260 ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1261 ledger_panic_on_negative(t, task_ledgers.media_footprint);
1262 ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1263 ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1264 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1265 ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1266 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1267 ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1268 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1269 ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1270 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1271 ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1272 }
1273#endif /* MACH_ASSERT */
1274
1275#if CONFIG_MEMORYSTATUS
1276 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1277#endif /* CONFIG_MEMORYSTATUS */
1278
1279 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1280 task_wakeups_rate_exceeded, NULL, NULL);
1281 ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1282
1283#if XNU_MONITOR
1284 ledger_template_complete_secure_alloc(t);
1285#else /* XNU_MONITOR */
1286 ledger_template_complete(t);
1287#endif /* XNU_MONITOR */
1288 task_ledger_template = t;
1289}
1290
1291os_refgrp_decl(static, task_refgrp, "task", NULL);
1292
1293kern_return_t
1294task_create_internal(
1295 task_t parent_task,
1296 coalition_t *parent_coalitions __unused,
1297 boolean_t inherit_memory,
1298 __unused boolean_t is_64bit,
1299 boolean_t is_64bit_data,
1300 uint32_t t_flags,
1301 uint32_t t_procflags,
1302 uint8_t t_returnwaitflags,
1303 task_t *child_task) /* OUT */
1304{
1305 task_t new_task;
1306 vm_shared_region_t shared_region;
1307 ledger_t ledger = NULL;
1308
1309 new_task = (task_t) zalloc(task_zone);
1310
1311 if (new_task == TASK_NULL) {
1312 return KERN_RESOURCE_SHORTAGE;
1313 }
1314
1315 /* one ref for just being alive; one for our caller */
1316 os_ref_init_count(&new_task->ref_count, &task_refgrp, 2);
1317
1318 /* allocate with active entries */
1319 assert(task_ledger_template != NULL);
1320 if ((ledger = ledger_instantiate(task_ledger_template,
1321 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1322 zfree(task_zone, new_task);
1323 return KERN_RESOURCE_SHORTAGE;
1324 }
1325
1326 counter_alloc(&(new_task->faults));
1327
1328#if defined(HAS_APPLE_PAC)
1329 ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1330 ml_task_set_jop_pid(new_task, parent_task, inherit_memory);
1331 ml_task_set_disable_user_jop(new_task, inherit_memory ? parent_task->disable_user_jop : FALSE);
1332#endif
1333
1334
1335 new_task->ledger = ledger;
1336
1337#if defined(CONFIG_SCHED_MULTIQ)
1338 new_task->sched_group = sched_group_create();
1339#endif
1340
1341 /* if inherit_memory is true, parent_task MUST not be NULL */
1342 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1343 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1344 } else {
1345 unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1346 new_task->map = vm_map_create(pmap_create_options(ledger, 0, pmap_flags),
1347 (vm_map_offset_t)(VM_MIN_ADDRESS),
1348 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1349 }
1350
1351 /* Inherit memlock limit from parent */
1352 if (parent_task) {
1353 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1354 }
1355
1356 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1357 queue_init(&new_task->threads);
1358 new_task->suspend_count = 0;
1359 new_task->thread_count = 0;
1360 new_task->active_thread_count = 0;
1361 new_task->user_stop_count = 0;
1362 new_task->legacy_stop_count = 0;
1363 new_task->active = TRUE;
1364 new_task->halting = FALSE;
1365 new_task->priv_flags = 0;
1366 new_task->t_flags = t_flags;
1367 new_task->t_procflags = t_procflags;
1368 new_task->t_returnwaitflags = t_returnwaitflags;
1369 new_task->returnwait_inheritor = current_thread();
1370 new_task->importance = 0;
1371 new_task->crashed_thread_id = 0;
1372 new_task->exec_token = 0;
1373 new_task->watchports = NULL;
1374 new_task->restartable_ranges = NULL;
1375 new_task->task_exc_guard = 0;
1376
1377 new_task->bank_context = NULL;
1378
1379#ifdef MACH_BSD
1380 new_task->bsd_info = NULL;
1381 new_task->corpse_info = NULL;
1382#endif /* MACH_BSD */
1383
1384#if CONFIG_MACF
1385 new_task->crash_label = NULL;
1386
1387 new_task->mach_trap_filter_mask = NULL;
1388 new_task->mach_kobj_filter_mask = NULL;
1389#endif
1390
1391#if CONFIG_MEMORYSTATUS
1392 if (max_task_footprint != 0) {
1393 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1394 }
1395#endif /* CONFIG_MEMORYSTATUS */
1396
1397 if (task_wakeups_monitor_rate != 0) {
1398 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1399 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1400 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1401 }
1402
1403#if CONFIG_IO_ACCOUNTING
1404 uint32_t flags = IOMON_ENABLE;
1405 task_io_monitor_ctl(new_task, &flags);
1406#endif /* CONFIG_IO_ACCOUNTING */
1407
1408 machine_task_init(new_task, parent_task, inherit_memory);
1409
1410 new_task->task_debug = NULL;
1411
1412#if DEVELOPMENT || DEBUG
1413 new_task->task_unnested = FALSE;
1414 new_task->task_disconnected_count = 0;
1415#endif
1416 queue_init(&new_task->semaphore_list);
1417 new_task->semaphores_owned = 0;
1418
1419 ipc_task_init(new_task, parent_task);
1420
1421 new_task->vtimers = 0;
1422
1423 new_task->shared_region = NULL;
1424
1425 new_task->affinity_space = NULL;
1426
1427 new_task->t_kpc = 0;
1428
1429 new_task->pidsuspended = FALSE;
1430 new_task->frozen = FALSE;
1431 new_task->changing_freeze_state = FALSE;
1432 new_task->rusage_cpu_flags = 0;
1433 new_task->rusage_cpu_percentage = 0;
1434 new_task->rusage_cpu_interval = 0;
1435 new_task->rusage_cpu_deadline = 0;
1436 new_task->rusage_cpu_callt = NULL;
1437#if MACH_ASSERT
1438 new_task->suspends_outstanding = 0;
1439#endif
1440
1441#if HYPERVISOR
1442 new_task->hv_task_target = NULL;
1443#endif /* HYPERVISOR */
1444
1445#if CONFIG_TASKWATCH
1446 queue_init(&new_task->task_watchers);
1447 new_task->num_taskwatchers = 0;
1448 new_task->watchapplying = 0;
1449#endif /* CONFIG_TASKWATCH */
1450
1451 new_task->mem_notify_reserved = 0;
1452 new_task->memlimit_attrs_reserved = 0;
1453
1454 new_task->requested_policy = default_task_requested_policy;
1455 new_task->effective_policy = default_task_effective_policy;
1456
1457 new_task->task_shared_region_slide = -1;
1458
1459 task_importance_init_from_parent(new_task, parent_task);
1460
1461 if (parent_task != TASK_NULL) {
1462 new_task->sec_token = parent_task->sec_token;
1463 new_task->audit_token = parent_task->audit_token;
1464
1465 /* inherit the parent's shared region */
1466 shared_region = vm_shared_region_get(parent_task);
1467 vm_shared_region_set(new_task, shared_region);
1468
1469#if __has_feature(ptrauth_calls)
1470 /* use parent's shared_region_id */
1471 char *shared_region_id = task_get_vm_shared_region_id_and_jop_pid(parent_task, NULL);
1472 if (shared_region_id != NULL) {
1473 shared_region_key_alloc(shared_region_id, FALSE, 0); /* get a reference */
1474 }
1475 task_set_shared_region_id(new_task, shared_region_id);
1476#endif /* __has_feature(ptrauth_calls) */
1477
1478 if (task_has_64Bit_addr(parent_task)) {
1479 task_set_64Bit_addr(new_task);
1480 }
1481
1482 if (task_has_64Bit_data(parent_task)) {
1483 task_set_64Bit_data(new_task);
1484 }
1485
1486 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1487 new_task->all_image_info_size = parent_task->all_image_info_size;
1488 new_task->mach_header_vm_address = 0;
1489
1490 if (inherit_memory && parent_task->affinity_space) {
1491 task_affinity_create(parent_task, new_task);
1492 }
1493
1494 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1495
1496 if (parent_task->t_flags & TF_NO_SMT) {
1497 new_task->t_flags |= TF_NO_SMT;
1498 }
1499
1500 if (parent_task->t_flags & TF_TECS) {
1501 new_task->t_flags |= TF_TECS;
1502 }
1503
1504 if (parent_task->t_flags & TF_FILTER_MSG) {
1505 new_task->t_flags |= TF_FILTER_MSG;
1506 }
1507
1508 new_task->priority = BASEPRI_DEFAULT;
1509 new_task->max_priority = MAXPRI_USER;
1510
1511 task_policy_create(new_task, parent_task);
1512 } else {
1513 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1514 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1515#ifdef __LP64__
1516 if (is_64bit) {
1517 task_set_64Bit_addr(new_task);
1518 }
1519#endif
1520
1521 if (is_64bit_data) {
1522 task_set_64Bit_data(new_task);
1523 }
1524
1525 new_task->all_image_info_addr = (mach_vm_address_t)0;
1526 new_task->all_image_info_size = (mach_vm_size_t)0;
1527
1528 new_task->pset_hint = PROCESSOR_SET_NULL;
1529
1530 if (kernel_task == TASK_NULL) {
1531 new_task->priority = BASEPRI_KERNEL;
1532 new_task->max_priority = MAXPRI_KERNEL;
1533 } else {
1534 new_task->priority = BASEPRI_DEFAULT;
1535 new_task->max_priority = MAXPRI_USER;
1536 }
1537 }
1538
1539 bzero(new_task->coalition, sizeof(new_task->coalition));
1540 for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1541 queue_chain_init(new_task->task_coalition[i]);
1542 }
1543
1544 /* Allocate I/O Statistics */
1545 new_task->task_io_stats = kheap_alloc(KHEAP_DATA_BUFFERS,
1546 sizeof(struct io_stat_info), Z_WAITOK | Z_ZERO);
1547 assert(new_task->task_io_stats != NULL);
1548
1549 bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1550 bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1551
1552 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1553
1554 /* Copy resource acc. info from Parent for Corpe Forked task. */
1555 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1556 task_rollup_accounting_info(new_task, parent_task);
1557 } else {
1558 /* Initialize to zero for standard fork/spawn case */
1559 new_task->total_user_time = 0;
1560 new_task->total_system_time = 0;
1561 new_task->total_ptime = 0;
1562 new_task->total_runnable_time = 0;
1563 new_task->pageins = 0;
1564 new_task->cow_faults = 0;
1565 new_task->messages_sent = 0;
1566 new_task->messages_received = 0;
1567 new_task->syscalls_mach = 0;
1568 new_task->syscalls_unix = 0;
1569 new_task->c_switch = 0;
1570 new_task->p_switch = 0;
1571 new_task->ps_switch = 0;
1572 new_task->decompressions = 0;
1573 new_task->low_mem_notified_warn = 0;
1574 new_task->low_mem_notified_critical = 0;
1575 new_task->purged_memory_warn = 0;
1576 new_task->purged_memory_critical = 0;
1577 new_task->low_mem_privileged_listener = 0;
1578 new_task->memlimit_is_active = 0;
1579 new_task->memlimit_is_fatal = 0;
1580 new_task->memlimit_active_exc_resource = 0;
1581 new_task->memlimit_inactive_exc_resource = 0;
1582 new_task->task_timer_wakeups_bin_1 = 0;
1583 new_task->task_timer_wakeups_bin_2 = 0;
1584 new_task->task_gpu_ns = 0;
1585 new_task->task_writes_counters_internal.task_immediate_writes = 0;
1586 new_task->task_writes_counters_internal.task_deferred_writes = 0;
1587 new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1588 new_task->task_writes_counters_internal.task_metadata_writes = 0;
1589 new_task->task_writes_counters_external.task_immediate_writes = 0;
1590 new_task->task_writes_counters_external.task_deferred_writes = 0;
1591 new_task->task_writes_counters_external.task_invalidated_writes = 0;
1592 new_task->task_writes_counters_external.task_metadata_writes = 0;
1593#if CONFIG_PHYS_WRITE_ACCT
1594 new_task->task_fs_metadata_writes = 0;
1595#endif /* CONFIG_PHYS_WRITE_ACCT */
1596
1597 new_task->task_energy = 0;
1598#if MONOTONIC
1599 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1600#endif /* MONOTONIC */
1601 }
1602
1603
1604#if CONFIG_COALITIONS
1605 if (!(t_flags & TF_CORPSE_FORK)) {
1606 /* TODO: there is no graceful failure path here... */
1607 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1608 coalitions_adopt_task(parent_coalitions, new_task);
1609 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1610 /*
1611 * all tasks at least have a resource coalition, so
1612 * if the parent has one then inherit all coalitions
1613 * the parent is a part of
1614 */
1615 coalitions_adopt_task(parent_task->coalition, new_task);
1616 } else {
1617 /* TODO: assert that new_task will be PID 1 (launchd) */
1618 coalitions_adopt_init_task(new_task);
1619 }
1620 /*
1621 * on exec, we need to transfer the coalition roles from the
1622 * parent task to the exec copy task.
1623 */
1624 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1625 int coal_roles[COALITION_NUM_TYPES];
1626 task_coalition_roles(parent_task, coal_roles);
1627 (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1628 }
1629 } else {
1630 coalitions_adopt_corpse_task(new_task);
1631 }
1632
1633 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1634 panic("created task is not a member of a resource coalition");
1635 }
1636#endif /* CONFIG_COALITIONS */
1637
1638 new_task->dispatchqueue_offset = 0;
1639 if (parent_task != NULL) {
1640 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1641 }
1642
1643 new_task->task_can_transfer_memory_ownership = FALSE;
1644 new_task->task_volatile_objects = 0;
1645 new_task->task_nonvolatile_objects = 0;
1646 new_task->task_objects_disowning = FALSE;
1647 new_task->task_objects_disowned = FALSE;
1648 new_task->task_owned_objects = 0;
1649 queue_init(&new_task->task_objq);
1650
1651#if CONFIG_FREEZE
1652 queue_init(&new_task->task_frozen_cseg_q);
1653#endif /* CONFIG_FREEZE */
1654
1655 task_objq_lock_init(new_task);
1656
1657#if __arm64__
1658 new_task->task_legacy_footprint = FALSE;
1659 new_task->task_extra_footprint_limit = FALSE;
1660 new_task->task_ios13extended_footprint_limit = FALSE;
1661#endif /* __arm64__ */
1662 new_task->task_region_footprint = FALSE;
1663 new_task->task_has_crossed_thread_limit = FALSE;
1664 new_task->task_thread_limit = 0;
1665#if CONFIG_SECLUDED_MEMORY
1666 new_task->task_can_use_secluded_mem = FALSE;
1667 new_task->task_could_use_secluded_mem = FALSE;
1668 new_task->task_could_also_use_secluded_mem = FALSE;
1669 new_task->task_suppressed_secluded = FALSE;
1670#endif /* CONFIG_SECLUDED_MEMORY */
1671
1672 /*
1673 * t_flags is set up above. But since we don't
1674 * support darkwake mode being set that way
1675 * currently, we clear it out here explicitly.
1676 */
1677 new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1678
1679 queue_init(&new_task->io_user_clients);
1680 new_task->loadTag = 0;
1681
1682 ipc_task_enable(new_task);
1683
1684 lck_mtx_lock(&tasks_threads_lock);
1685 queue_enter(&tasks, new_task, task_t, tasks);
1686 tasks_count++;
1687 if (tasks_suspend_state) {
1688 task_suspend_internal(new_task);
1689 }
1690 lck_mtx_unlock(&tasks_threads_lock);
1691
1692 *child_task = new_task;
1693 return KERN_SUCCESS;
1694}
1695
1696/*
1697 * task_rollup_accounting_info
1698 *
1699 * Roll up accounting stats. Used to rollup stats
1700 * for exec copy task and corpse fork.
1701 */
1702void
1703task_rollup_accounting_info(task_t to_task, task_t from_task)
1704{
1705 assert(from_task != to_task);
1706
1707 to_task->total_user_time = from_task->total_user_time;
1708 to_task->total_system_time = from_task->total_system_time;
1709 to_task->total_ptime = from_task->total_ptime;
1710 to_task->total_runnable_time = from_task->total_runnable_time;
1711 counter_add(&to_task->faults, counter_load(&from_task->faults));
1712 to_task->pageins = from_task->pageins;
1713 to_task->cow_faults = from_task->cow_faults;
1714 to_task->decompressions = from_task->decompressions;
1715 to_task->messages_sent = from_task->messages_sent;
1716 to_task->messages_received = from_task->messages_received;
1717 to_task->syscalls_mach = from_task->syscalls_mach;
1718 to_task->syscalls_unix = from_task->syscalls_unix;
1719 to_task->c_switch = from_task->c_switch;
1720 to_task->p_switch = from_task->p_switch;
1721 to_task->ps_switch = from_task->ps_switch;
1722 to_task->extmod_statistics = from_task->extmod_statistics;
1723 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1724 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1725 to_task->purged_memory_warn = from_task->purged_memory_warn;
1726 to_task->purged_memory_critical = from_task->purged_memory_critical;
1727 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1728 *to_task->task_io_stats = *from_task->task_io_stats;
1729 to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1730 to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1731 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1732 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1733 to_task->task_gpu_ns = from_task->task_gpu_ns;
1734 to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
1735 to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
1736 to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
1737 to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
1738 to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
1739 to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
1740 to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
1741 to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
1742#if CONFIG_PHYS_WRITE_ACCT
1743 to_task->task_fs_metadata_writes = from_task->task_fs_metadata_writes;
1744#endif /* CONFIG_PHYS_WRITE_ACCT */
1745 to_task->task_energy = from_task->task_energy;
1746
1747 /* Skip ledger roll up for memory accounting entries */
1748 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1749 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1750 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1751#if CONFIG_SCHED_SFI
1752 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1753 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1754 }
1755#endif
1756 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1757 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1758 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1759 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1760 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1761 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1762}
1763
1764int task_dropped_imp_count = 0;
1765
1766/*
1767 * task_deallocate:
1768 *
1769 * Drop a reference on a task.
1770 */
1771void
1772task_deallocate(
1773 task_t task)
1774{
1775 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1776 os_ref_count_t refs;
1777
1778 if (task == TASK_NULL) {
1779 return;
1780 }
1781
1782 refs = task_deallocate_internal(task);
1783
1784#if IMPORTANCE_INHERITANCE
1785 if (refs == 1) {
1786 /*
1787 * If last ref potentially comes from the task's importance,
1788 * disconnect it. But more task refs may be added before
1789 * that completes, so wait for the reference to go to zero
1790 * naturally (it may happen on a recursive task_deallocate()
1791 * from the ipc_importance_disconnect_task() call).
1792 */
1793 if (IIT_NULL != task->task_imp_base) {
1794 ipc_importance_disconnect_task(task);
1795 }
1796 return;
1797 }
1798#endif /* IMPORTANCE_INHERITANCE */
1799
1800 if (refs > 0) {
1801 return;
1802 }
1803
1804 /*
1805 * The task should be dead at this point. Ensure other resources
1806 * like threads, are gone before we trash the world.
1807 */
1808 assert(queue_empty(&task->threads));
1809 assert(task->bsd_info == NULL);
1810 assert(!is_active(task->itk_space));
1811 assert(!task->active);
1812 assert(task->active_thread_count == 0);
1813
1814 lck_mtx_lock(&tasks_threads_lock);
1815 assert(terminated_tasks_count > 0);
1816 queue_remove(&terminated_tasks, task, task_t, tasks);
1817 terminated_tasks_count--;
1818 lck_mtx_unlock(&tasks_threads_lock);
1819
1820 /*
1821 * remove the reference on bank context
1822 */
1823 task_bank_reset(task);
1824
1825 if (task->task_io_stats) {
1826 kheap_free(KHEAP_DATA_BUFFERS, task->task_io_stats,
1827 sizeof(struct io_stat_info));
1828 }
1829
1830 /*
1831 * Give the machine dependent code a chance
1832 * to perform cleanup before ripping apart
1833 * the task.
1834 */
1835 machine_task_terminate(task);
1836
1837 ipc_task_terminate(task);
1838
1839 /* let iokit know */
1840 iokit_task_terminate(task);
1841
1842 if (task->affinity_space) {
1843 task_affinity_deallocate(task);
1844 }
1845
1846#if MACH_ASSERT
1847 if (task->ledger != NULL &&
1848 task->map != NULL &&
1849 task->map->pmap != NULL &&
1850 task->map->pmap->ledger != NULL) {
1851 assert(task->ledger == task->map->pmap->ledger);
1852 }
1853#endif /* MACH_ASSERT */
1854
1855 vm_owned_objects_disown(task);
1856 assert(task->task_objects_disowned);
1857 if (task->task_volatile_objects != 0 ||
1858 task->task_nonvolatile_objects != 0 ||
1859 task->task_owned_objects != 0) {
1860 panic("task_deallocate(%p): "
1861 "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
1862 task,
1863 task->task_volatile_objects,
1864 task->task_nonvolatile_objects,
1865 task->task_owned_objects);
1866 }
1867
1868 vm_map_deallocate(task->map);
1869 is_release(task->itk_space);
1870 if (task->restartable_ranges) {
1871 restartable_ranges_release(task->restartable_ranges);
1872 }
1873
1874 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1875 &interrupt_wakeups, &debit);
1876 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1877 &platform_idle_wakeups, &debit);
1878
1879#if defined(CONFIG_SCHED_MULTIQ)
1880 sched_group_destroy(task->sched_group);
1881#endif
1882
1883 /* Accumulate statistics for dead tasks */
1884 lck_spin_lock(&dead_task_statistics_lock);
1885 dead_task_statistics.total_user_time += task->total_user_time;
1886 dead_task_statistics.total_system_time += task->total_system_time;
1887
1888 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1889 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1890
1891 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1892 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1893 dead_task_statistics.total_ptime += task->total_ptime;
1894 dead_task_statistics.total_pset_switches += task->ps_switch;
1895 dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1896 dead_task_statistics.task_energy += task->task_energy;
1897
1898 lck_spin_unlock(&dead_task_statistics_lock);
1899 lck_mtx_destroy(&task->lock, &task_lck_grp);
1900
1901 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1902 &debit)) {
1903 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1904 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1905 }
1906 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1907 &debit)) {
1908 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1909 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1910 }
1911 ledger_dereference(task->ledger);
1912
1913#if TASK_REFERENCE_LEAK_DEBUG
1914 btlog_remove_entries_for_element(task_ref_btlog, task);
1915#endif
1916
1917 counter_free(&task->faults);
1918
1919#if CONFIG_COALITIONS
1920 task_release_coalitions(task);
1921#endif /* CONFIG_COALITIONS */
1922
1923 bzero(task->coalition, sizeof(task->coalition));
1924
1925#if MACH_BSD
1926 /* clean up collected information since last reference to task is gone */
1927 if (task->corpse_info) {
1928 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1929 task_crashinfo_destroy(task->corpse_info);
1930 task->corpse_info = NULL;
1931 if (corpse_info_kernel) {
1932 kheap_free(KHEAP_DATA_BUFFERS, corpse_info_kernel,
1933 CORPSEINFO_ALLOCATION_SIZE);
1934 }
1935 }
1936#endif
1937
1938#if CONFIG_MACF
1939 if (task->crash_label) {
1940 mac_exc_free_label(task->crash_label);
1941 task->crash_label = NULL;
1942 }
1943#endif
1944
1945 assert(queue_empty(&task->task_objq));
1946 task_objq_lock_destroy(task);
1947
1948 zfree(task_zone, task);
1949}
1950
1951/*
1952 * task_name_deallocate:
1953 *
1954 * Drop a reference on a task name.
1955 */
1956void
1957task_name_deallocate(
1958 task_name_t task_name)
1959{
1960 return task_deallocate((task_t)task_name);
1961}
1962
1963/*
1964 * task_policy_set_deallocate:
1965 *
1966 * Drop a reference on a task type.
1967 */
1968void
1969task_policy_set_deallocate(task_policy_set_t task_policy_set)
1970{
1971 return task_deallocate((task_t)task_policy_set);
1972}
1973
1974/*
1975 * task_policy_get_deallocate:
1976 *
1977 * Drop a reference on a task type.
1978 */
1979void
1980task_policy_get_deallocate(task_policy_get_t task_policy_get)
1981{
1982 return task_deallocate((task_t)task_policy_get);
1983}
1984
1985/*
1986 * task_inspect_deallocate:
1987 *
1988 * Drop a task inspection reference.
1989 */
1990void
1991task_inspect_deallocate(
1992 task_inspect_t task_inspect)
1993{
1994 return task_deallocate((task_t)task_inspect);
1995}
1996
1997/*
1998 * task_read_deallocate:
1999 *
2000 * Drop a reference on task read port.
2001 */
2002void
2003task_read_deallocate(
2004 task_read_t task_read)
2005{
2006 return task_deallocate((task_t)task_read);
2007}
2008
2009/*
2010 * task_suspension_token_deallocate:
2011 *
2012 * Drop a reference on a task suspension token.
2013 */
2014void
2015task_suspension_token_deallocate(
2016 task_suspension_token_t token)
2017{
2018 return task_deallocate((task_t)token);
2019}
2020
2021
2022/*
2023 * task_collect_crash_info:
2024 *
2025 * collect crash info from bsd and mach based data
2026 */
2027kern_return_t
2028task_collect_crash_info(
2029 task_t task,
2030#ifdef CONFIG_MACF
2031 struct label *crash_label,
2032#endif
2033 int is_corpse_fork)
2034{
2035 kern_return_t kr = KERN_SUCCESS;
2036
2037 kcdata_descriptor_t crash_data = NULL;
2038 kcdata_descriptor_t crash_data_release = NULL;
2039 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
2040 mach_vm_offset_t crash_data_ptr = 0;
2041 void *crash_data_kernel = NULL;
2042 void *crash_data_kernel_release = NULL;
2043#if CONFIG_MACF
2044 struct label *label, *free_label;
2045#endif
2046
2047 if (!corpses_enabled()) {
2048 return KERN_NOT_SUPPORTED;
2049 }
2050
2051#if CONFIG_MACF
2052 free_label = label = mac_exc_create_label();
2053#endif
2054
2055 task_lock(task);
2056
2057 assert(is_corpse_fork || task->bsd_info != NULL);
2058 if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
2059#if CONFIG_MACF
2060 /* Set the crash label, used by the exception delivery mac hook */
2061 free_label = task->crash_label; // Most likely NULL.
2062 task->crash_label = label;
2063 mac_exc_update_task_crash_label(task, crash_label);
2064#endif
2065 task_unlock(task);
2066
2067 crash_data_kernel = kheap_alloc(KHEAP_DATA_BUFFERS,
2068 CORPSEINFO_ALLOCATION_SIZE, Z_WAITOK | Z_ZERO);
2069 if (crash_data_kernel == NULL) {
2070 kr = KERN_RESOURCE_SHORTAGE;
2071 goto out_no_lock;
2072 }
2073 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2074
2075 /* Do not get a corpse ref for corpse fork */
2076 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2077 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2078 KCFLAG_USE_MEMCOPY);
2079 if (crash_data) {
2080 task_lock(task);
2081 crash_data_release = task->corpse_info;
2082 crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2083 task->corpse_info = crash_data;
2084
2085 task_unlock(task);
2086 kr = KERN_SUCCESS;
2087 } else {
2088 kheap_free(KHEAP_DATA_BUFFERS, crash_data_kernel,
2089 CORPSEINFO_ALLOCATION_SIZE);
2090 kr = KERN_FAILURE;
2091 }
2092
2093 if (crash_data_release != NULL) {
2094 task_crashinfo_destroy(crash_data_release);
2095 }
2096 if (crash_data_kernel_release != NULL) {
2097 kheap_free(KHEAP_DATA_BUFFERS, crash_data_kernel_release,
2098 CORPSEINFO_ALLOCATION_SIZE);
2099 }
2100 } else {
2101 task_unlock(task);
2102 }
2103
2104out_no_lock:
2105#if CONFIG_MACF
2106 if (free_label != NULL) {
2107 mac_exc_free_label(free_label);
2108 }
2109#endif
2110 return kr;
2111}
2112
2113/*
2114 * task_deliver_crash_notification:
2115 *
2116 * Makes outcall to registered host port for a corpse.
2117 */
2118kern_return_t
2119task_deliver_crash_notification(
2120 task_t task,
2121 thread_t thread,
2122 exception_type_t etype,
2123 mach_exception_subcode_t subcode)
2124{
2125 kcdata_descriptor_t crash_info = task->corpse_info;
2126 thread_t th_iter = NULL;
2127 kern_return_t kr = KERN_SUCCESS;
2128 wait_interrupt_t wsave;
2129 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2130 ipc_port_t task_port, old_notify;
2131
2132 if (crash_info == NULL) {
2133 return KERN_FAILURE;
2134 }
2135
2136 task_lock(task);
2137 if (task_is_a_corpse_fork(task)) {
2138 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
2139 code[0] = etype;
2140 code[1] = subcode;
2141 } else {
2142 /* Populate code with EXC_CRASH for corpses */
2143 code[0] = EXC_CRASH;
2144 code[1] = 0;
2145 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
2146 if (corpse_for_fatal_memkill) {
2147 code[1] = subcode;
2148 }
2149 }
2150
2151 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2152 {
2153 if (th_iter->corpse_dup == FALSE) {
2154 ipc_thread_reset(th_iter);
2155 }
2156 }
2157 task_unlock(task);
2158
2159 /* Arm the no-sender notification for taskport */
2160 task_reference(task);
2161 task_port = convert_task_to_port(task);
2162 ip_lock(task_port);
2163 require_ip_active(task_port);
2164 ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
2165 /* port unlocked */
2166 assert(IP_NULL == old_notify);
2167
2168 wsave = thread_interrupt_level(THREAD_UNINT);
2169 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2170 if (kr != KERN_SUCCESS) {
2171 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
2172 }
2173
2174 (void)thread_interrupt_level(wsave);
2175
2176 /*
2177 * Drop the send right on task port, will fire the
2178 * no-sender notification if exception deliver failed.
2179 */
2180 ipc_port_release_send(task_port);
2181 return kr;
2182}
2183
2184/*
2185 * task_terminate:
2186 *
2187 * Terminate the specified task. See comments on thread_terminate
2188 * (kern/thread.c) about problems with terminating the "current task."
2189 */
2190
2191kern_return_t
2192task_terminate(
2193 task_t task)
2194{
2195 if (task == TASK_NULL) {
2196 return KERN_INVALID_ARGUMENT;
2197 }
2198
2199 if (task->bsd_info) {
2200 return KERN_FAILURE;
2201 }
2202
2203 return task_terminate_internal(task);
2204}
2205
2206#if MACH_ASSERT
2207extern int proc_pid(struct proc *);
2208extern void proc_name_kdp(task_t t, char *buf, int size);
2209#endif /* MACH_ASSERT */
2210
2211#define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
2212static void
2213__unused task_partial_reap(task_t task, __unused int pid)
2214{
2215 unsigned int reclaimed_resident = 0;
2216 unsigned int reclaimed_compressed = 0;
2217 uint64_t task_page_count;
2218
2219 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2220
2221 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2222 pid, task_page_count, 0, 0, 0);
2223
2224 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2225
2226 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2227 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2228}
2229
2230kern_return_t
2231task_mark_corpse(task_t task)
2232{
2233 kern_return_t kr = KERN_SUCCESS;
2234 thread_t self_thread;
2235 (void) self_thread;
2236 wait_interrupt_t wsave;
2237#if CONFIG_MACF
2238 struct label *crash_label = NULL;
2239#endif
2240
2241 assert(task != kernel_task);
2242 assert(task == current_task());
2243 assert(!task_is_a_corpse(task));
2244
2245#if CONFIG_MACF
2246 crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
2247#endif
2248
2249 kr = task_collect_crash_info(task,
2250#if CONFIG_MACF
2251 crash_label,
2252#endif
2253 FALSE);
2254 if (kr != KERN_SUCCESS) {
2255 goto out;
2256 }
2257
2258 self_thread = current_thread();
2259
2260 wsave = thread_interrupt_level(THREAD_UNINT);
2261 task_lock(task);
2262
2263 task_set_corpse_pending_report(task);
2264 task_set_corpse(task);
2265 task->crashed_thread_id = thread_tid(self_thread);
2266
2267 kr = task_start_halt_locked(task, TRUE);
2268 assert(kr == KERN_SUCCESS);
2269
2270 ipc_task_reset(task);
2271 /* Remove the naked send right for task port, needed to arm no sender notification */
2272 task_set_special_port_internal(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
2273 ipc_task_enable(task);
2274
2275 task_unlock(task);
2276 /* terminate the ipc space */
2277 ipc_space_terminate(task->itk_space);
2278
2279 /* Add it to global corpse task list */
2280 task_add_to_corpse_task_list(task);
2281
2282 task_start_halt(task);
2283 thread_terminate_internal(self_thread, TH_TERMINATE_OPTION_NONE);
2284
2285 (void) thread_interrupt_level(wsave);
2286 assert(task->halting == TRUE);
2287
2288out:
2289#if CONFIG_MACF
2290 mac_exc_free_label(crash_label);
2291#endif
2292 return kr;
2293}
2294
2295/*
2296 * task_clear_corpse
2297 *
2298 * Clears the corpse pending bit on task.
2299 * Removes inspection bit on the threads.
2300 */
2301void
2302task_clear_corpse(task_t task)
2303{
2304 thread_t th_iter = NULL;
2305
2306 task_lock(task);
2307 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2308 {
2309 thread_mtx_lock(th_iter);
2310 th_iter->inspection = FALSE;
2311 ipc_thread_disable(th_iter);
2312 thread_mtx_unlock(th_iter);
2313 }
2314
2315 thread_terminate_crashed_threads();
2316 /* remove the pending corpse report flag */
2317 task_clear_corpse_pending_report(task);
2318
2319 task_unlock(task);
2320}
2321
2322/*
2323 * task_port_notify
2324 *
2325 * Called whenever the Mach port system detects no-senders on
2326 * the task port of a corpse.
2327 * Each notification that comes in should terminate the task (corpse).
2328 */
2329void
2330task_port_notify(mach_msg_header_t *msg)
2331{
2332 mach_no_senders_notification_t *notification = (void *)msg;
2333 ipc_port_t port = notification->not_header.msgh_remote_port;
2334 task_t task;
2335
2336 require_ip_active(port);
2337 assert(IKOT_TASK_CONTROL == ip_kotype(port));
2338 task = (task_t) ip_get_kobject(port);
2339
2340 assert(task_is_a_corpse(task));
2341
2342 /* Remove the task from global corpse task list */
2343 task_remove_from_corpse_task_list(task);
2344
2345 task_clear_corpse(task);
2346 task_terminate_internal(task);
2347}
2348
2349/*
2350 * task_port_with_flavor_notify
2351 *
2352 * Called whenever the Mach port system detects no-senders on
2353 * the task inspect or read port. These ports are allocated lazily and
2354 * should be deallocated here when there are no senders remaining.
2355 */
2356void
2357task_port_with_flavor_notify(mach_msg_header_t *msg)
2358{
2359 mach_no_senders_notification_t *notification = (void *)msg;
2360 ipc_port_t port = notification->not_header.msgh_remote_port;
2361 task_t task;
2362 mach_task_flavor_t flavor;
2363 ipc_kobject_type_t kotype;
2364
2365 ip_lock(port);
2366 if (port->ip_srights > 0) {
2367 ip_unlock(port);
2368 return;
2369 }
2370 task = (task_t)ipc_kobject_get(port);
2371 kotype = ip_kotype(port);
2372 if (task != TASK_NULL) {
2373 assert((IKOT_TASK_READ == kotype) || (IKOT_TASK_INSPECT == kotype));
2374 task_reference_internal(task);
2375 }
2376 ip_unlock(port);
2377
2378 if (task == TASK_NULL) {
2379 /* The task is exiting or disabled; it will eventually deallocate the port */
2380 return;
2381 }
2382
2383 if (kotype == IKOT_TASK_READ) {
2384 flavor = TASK_FLAVOR_READ;
2385 } else {
2386 flavor = TASK_FLAVOR_INSPECT;
2387 }
2388
2389 itk_lock(task);
2390 ip_lock(port);
2391 /*
2392 * If the port is no longer active, then ipc_task_terminate() ran
2393 * and destroyed the kobject already. Just deallocate the task
2394 * ref we took and go away.
2395 *
2396 * It is also possible that several nsrequests are in flight,
2397 * only one shall NULL-out the port entry, and this is the one
2398 * that gets to dealloc the port.
2399 *
2400 * Check for a stale no-senders notification. A call to any function
2401 * that vends out send rights to this port could resurrect it between
2402 * this notification being generated and actually being handled here.
2403 */
2404 if (!ip_active(port) ||
2405 task->itk_task_ports[flavor] != port ||
2406 port->ip_srights > 0) {
2407 ip_unlock(port);
2408 itk_unlock(task);
2409 task_deallocate(task);
2410 return;
2411 }
2412
2413 assert(task->itk_task_ports[flavor] == port);
2414 task->itk_task_ports[flavor] = IP_NULL;
2415
2416 ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
2417 ip_unlock(port);
2418 itk_unlock(task);
2419 task_deallocate(task);
2420
2421 ipc_port_dealloc_kernel(port);
2422}
2423
2424/*
2425 * task_wait_till_threads_terminate_locked
2426 *
2427 * Wait till all the threads in the task are terminated.
2428 * Might release the task lock and re-acquire it.
2429 */
2430void
2431task_wait_till_threads_terminate_locked(task_t task)
2432{
2433 /* wait for all the threads in the task to terminate */
2434 while (task->active_thread_count != 0) {
2435 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2436 task_unlock(task);
2437 thread_block(THREAD_CONTINUE_NULL);
2438
2439 task_lock(task);
2440 }
2441}
2442
2443/*
2444 * task_duplicate_map_and_threads
2445 *
2446 * Copy vmmap of source task.
2447 * Copy active threads from source task to destination task.
2448 * Source task would be suspended during the copy.
2449 */
2450kern_return_t
2451task_duplicate_map_and_threads(
2452 task_t task,
2453 void *p,
2454 task_t new_task,
2455 thread_t *thread_ret,
2456 uint64_t **udata_buffer,
2457 int *size,
2458 int *num_udata)
2459{
2460 kern_return_t kr = KERN_SUCCESS;
2461 int active;
2462 thread_t thread, self, thread_return = THREAD_NULL;
2463 thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2464 thread_t *thread_array;
2465 uint32_t active_thread_count = 0, array_count = 0, i;
2466 vm_map_t oldmap;
2467 uint64_t *buffer = NULL;
2468 int buf_size = 0;
2469 int est_knotes = 0, num_knotes = 0;
2470
2471 self = current_thread();
2472
2473 /*
2474 * Suspend the task to copy thread state, use the internal
2475 * variant so that no user-space process can resume
2476 * the task from under us
2477 */
2478 kr = task_suspend_internal(task);
2479 if (kr != KERN_SUCCESS) {
2480 return kr;
2481 }
2482
2483 if (task->map->disable_vmentry_reuse == TRUE) {
2484 /*
2485 * Quite likely GuardMalloc (or some debugging tool)
2486 * is being used on this task. And it has gone through
2487 * its limit. Making a corpse will likely encounter
2488 * a lot of VM entries that will need COW.
2489 *
2490 * Skip it.
2491 */
2492#if DEVELOPMENT || DEBUG
2493 memorystatus_abort_vm_map_fork(task);
2494#endif
2495 task_resume_internal(task);
2496 return KERN_FAILURE;
2497 }
2498
2499 /* Check with VM if vm_map_fork is allowed for this task */
2500 if (memorystatus_allowed_vm_map_fork(task)) {
2501 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2502 oldmap = new_task->map;
2503 new_task->map = vm_map_fork(new_task->ledger,
2504 task->map,
2505 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2506 VM_MAP_FORK_PRESERVE_PURGEABLE |
2507 VM_MAP_FORK_CORPSE_FOOTPRINT));
2508 vm_map_deallocate(oldmap);
2509
2510 /* copy ledgers that impact the memory footprint */
2511 vm_map_copy_footprint_ledgers(task, new_task);
2512
2513 /* Get all the udata pointers from kqueue */
2514 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2515 if (est_knotes > 0) {
2516 buf_size = (est_knotes + 32) * sizeof(uint64_t);
2517 buffer = kheap_alloc(KHEAP_DATA_BUFFERS, buf_size, Z_WAITOK);
2518 num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2519 if (num_knotes > est_knotes + 32) {
2520 num_knotes = est_knotes + 32;
2521 }
2522 }
2523 }
2524
2525 active_thread_count = task->active_thread_count;
2526 if (active_thread_count == 0) {
2527 if (buffer != NULL) {
2528 kheap_free(KHEAP_DATA_BUFFERS, buffer, buf_size);
2529 }
2530 task_resume_internal(task);
2531 return KERN_FAILURE;
2532 }
2533
2534 thread_array = kheap_alloc(KHEAP_TEMP,
2535 sizeof(thread_t) * active_thread_count, Z_WAITOK);
2536
2537 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2538 task_lock(task);
2539 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2540 /* Skip inactive threads */
2541 active = thread->active;
2542 if (!active) {
2543 continue;
2544 }
2545
2546 if (array_count >= active_thread_count) {
2547 break;
2548 }
2549
2550 thread_array[array_count++] = thread;
2551 thread_reference(thread);
2552 }
2553 task_unlock(task);
2554
2555 for (i = 0; i < array_count; i++) {
2556 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2557 if (kr != KERN_SUCCESS) {
2558 break;
2559 }
2560
2561 /* Equivalent of current thread in corpse */
2562 if (thread_array[i] == self) {
2563 thread_return = new_thread;
2564 new_task->crashed_thread_id = thread_tid(new_thread);
2565 } else if (first_thread == NULL) {
2566 first_thread = new_thread;
2567 } else {
2568 /* drop the extra ref returned by thread_create_with_continuation */
2569 thread_deallocate(new_thread);
2570 }
2571
2572 kr = thread_dup2(thread_array[i], new_thread);
2573 if (kr != KERN_SUCCESS) {
2574 thread_mtx_lock(new_thread);
2575 new_thread->corpse_dup = TRUE;
2576 thread_mtx_unlock(new_thread);
2577 continue;
2578 }
2579
2580 /* Copy thread name */
2581 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2582 new_thread->thread_tag = thread_array[i]->thread_tag;
2583 thread_copy_resource_info(new_thread, thread_array[i]);
2584 }
2585
2586 /* return the first thread if we couldn't find the equivalent of current */
2587 if (thread_return == THREAD_NULL) {
2588 thread_return = first_thread;
2589 } else if (first_thread != THREAD_NULL) {
2590 /* drop the extra ref returned by thread_create_with_continuation */
2591 thread_deallocate(first_thread);
2592 }
2593
2594 task_resume_internal(task);
2595
2596 for (i = 0; i < array_count; i++) {
2597 thread_deallocate(thread_array[i]);
2598 }
2599 kheap_free(KHEAP_TEMP, thread_array, sizeof(thread_t) * active_thread_count);
2600
2601 if (kr == KERN_SUCCESS) {
2602 *thread_ret = thread_return;
2603 *udata_buffer = buffer;
2604 *size = buf_size;
2605 *num_udata = num_knotes;
2606 } else {
2607 if (thread_return != THREAD_NULL) {
2608 thread_deallocate(thread_return);
2609 }
2610 if (buffer != NULL) {
2611 kheap_free(KHEAP_DATA_BUFFERS, buffer, buf_size);
2612 }
2613 }
2614
2615 return kr;
2616}
2617
2618#if CONFIG_SECLUDED_MEMORY
2619extern void task_set_can_use_secluded_mem_locked(
2620 task_t task,
2621 boolean_t can_use_secluded_mem);
2622#endif /* CONFIG_SECLUDED_MEMORY */
2623
2624#if MACH_ASSERT
2625int debug4k_panic_on_terminate = 0;
2626#endif /* MACH_ASSERT */
2627kern_return_t
2628task_terminate_internal(
2629 task_t task)
2630{
2631 thread_t thread, self;
2632 task_t self_task;
2633 boolean_t interrupt_save;
2634 int pid = 0;
2635
2636 assert(task != kernel_task);
2637
2638 self = current_thread();
2639 self_task = self->task;
2640
2641 /*
2642 * Get the task locked and make sure that we are not racing
2643 * with someone else trying to terminate us.
2644 */
2645 if (task == self_task) {
2646 task_lock(task);
2647 } else if (task < self_task) {
2648 task_lock(task);
2649 task_lock(self_task);
2650 } else {
2651 task_lock(self_task);
2652 task_lock(task);
2653 }
2654
2655#if CONFIG_SECLUDED_MEMORY
2656 if (task->task_can_use_secluded_mem) {
2657 task_set_can_use_secluded_mem_locked(task, FALSE);
2658 }
2659 task->task_could_use_secluded_mem = FALSE;
2660 task->task_could_also_use_secluded_mem = FALSE;
2661
2662 if (task->task_suppressed_secluded) {
2663 stop_secluded_suppression(task);
2664 }
2665#endif /* CONFIG_SECLUDED_MEMORY */
2666
2667 if (!task->active) {
2668 /*
2669 * Task is already being terminated.
2670 * Just return an error. If we are dying, this will
2671 * just get us to our AST special handler and that
2672 * will get us to finalize the termination of ourselves.
2673 */
2674 task_unlock(task);
2675 if (self_task != task) {
2676 task_unlock(self_task);
2677 }
2678
2679 return KERN_FAILURE;
2680 }
2681
2682 if (task_corpse_pending_report(task)) {
2683 /*
2684 * Task is marked for reporting as corpse.
2685 * Just return an error. This will
2686 * just get us to our AST special handler and that
2687 * will get us to finish the path to death
2688 */
2689 task_unlock(task);
2690 if (self_task != task) {
2691 task_unlock(self_task);
2692 }
2693
2694 return KERN_FAILURE;
2695 }
2696
2697 if (self_task != task) {
2698 task_unlock(self_task);
2699 }
2700
2701 /*
2702 * Make sure the current thread does not get aborted out of
2703 * the waits inside these operations.
2704 */
2705 interrupt_save = thread_interrupt_level(THREAD_UNINT);
2706
2707 /*
2708 * Indicate that we want all the threads to stop executing
2709 * at user space by holding the task (we would have held
2710 * each thread independently in thread_terminate_internal -
2711 * but this way we may be more likely to already find it
2712 * held there). Mark the task inactive, and prevent
2713 * further task operations via the task port.
2714 */
2715 task_hold_locked(task);
2716 task->active = FALSE;
2717 ipc_task_disable(task);
2718
2719#if CONFIG_TELEMETRY
2720 /*
2721 * Notify telemetry that this task is going away.
2722 */
2723 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2724#endif
2725
2726 /*
2727 * Terminate each thread in the task.
2728 */
2729 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2730 thread_terminate_internal(thread, TH_TERMINATE_OPTION_NONE);
2731 }
2732
2733#ifdef MACH_BSD
2734 if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2735 pid = proc_pid(task->bsd_info);
2736 }
2737#endif /* MACH_BSD */
2738
2739 task_unlock(task);
2740
2741 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2742 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2743
2744 /* Early object reap phase */
2745
2746// PR-17045188: Revisit implementation
2747// task_partial_reap(task, pid);
2748
2749#if CONFIG_TASKWATCH
2750 /*
2751 * remove all task watchers
2752 */
2753 task_removewatchers(task);
2754
2755#endif /* CONFIG_TASKWATCH */
2756
2757 /*
2758 * Destroy all synchronizers owned by the task.
2759 */
2760 task_synchronizer_destroy_all(task);
2761
2762 /*
2763 * Clear the watchport boost on the task.
2764 */
2765 task_remove_turnstile_watchports(task);
2766
2767 /*
2768 * Destroy the IPC space, leaving just a reference for it.
2769 */
2770 ipc_space_terminate(task->itk_space);
2771
2772#if 00
2773 /* if some ledgers go negative on tear-down again... */
2774 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2775 task_ledgers.phys_footprint);
2776 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2777 task_ledgers.internal);
2778 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2779 task_ledgers.internal_compressed);
2780 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2781 task_ledgers.iokit_mapped);
2782 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2783 task_ledgers.alternate_accounting);
2784 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2785 task_ledgers.alternate_accounting_compressed);
2786#endif
2787
2788 /*
2789 * If the current thread is a member of the task
2790 * being terminated, then the last reference to
2791 * the task will not be dropped until the thread
2792 * is finally reaped. To avoid incurring the
2793 * expense of removing the address space regions
2794 * at reap time, we do it explictly here.
2795 */
2796
2797 vm_map_lock(task->map);
2798 vm_map_disable_hole_optimization(task->map);
2799 vm_map_unlock(task->map);
2800
2801#if MACH_ASSERT
2802 /*
2803 * Identify the pmap's process, in case the pmap ledgers drift
2804 * and we have to report it.
2805 */
2806 char procname[17];
2807 if (task->bsd_info && !task_is_exec_copy(task)) {
2808 pid = proc_pid(task->bsd_info);
2809 proc_name_kdp(task, procname, sizeof(procname));
2810 } else {
2811 pid = 0;
2812 strlcpy(procname, "<unknown>", sizeof(procname));
2813 }
2814 pmap_set_process(task->map->pmap, pid, procname);
2815 if (vm_map_page_shift(task->map) < (int)PAGE_SHIFT) {
2816 DEBUG4K_LIFE("map %p procname: %s\n", task->map, procname);
2817 if (debug4k_panic_on_terminate) {
2818 panic("DEBUG4K: %s:%d %d[%s] map %p\n", __FUNCTION__, __LINE__, pid, procname, task->map);
2819 }
2820 }
2821#endif /* MACH_ASSERT */
2822
2823 vm_map_terminate(task->map);
2824
2825 /* release our shared region */
2826 vm_shared_region_set(task, NULL);
2827
2828#if __has_feature(ptrauth_calls)
2829 task_set_shared_region_id(task, NULL);
2830#endif /* __has_feature(ptrauth_calls) */
2831
2832 lck_mtx_lock(&tasks_threads_lock);
2833 queue_remove(&tasks, task, task_t, tasks);
2834 queue_enter(&terminated_tasks, task, task_t, tasks);
2835 tasks_count--;
2836 terminated_tasks_count++;
2837 lck_mtx_unlock(&tasks_threads_lock);
2838
2839 /*
2840 * We no longer need to guard against being aborted, so restore
2841 * the previous interruptible state.
2842 */
2843 thread_interrupt_level(interrupt_save);
2844
2845#if KPC
2846 /* force the task to release all ctrs */
2847 if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
2848 kpc_force_all_ctrs(task, 0);
2849 }
2850#endif /* KPC */
2851
2852#if CONFIG_COALITIONS
2853 /*
2854 * Leave our coalitions. (drop activation but not reference)
2855 */
2856 coalitions_remove_task(task);
2857#endif
2858
2859#if CONFIG_FREEZE
2860 extern int vm_compressor_available;
2861 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE && vm_compressor_available) {
2862 task_disown_frozen_csegs(task);
2863 assert(queue_empty(&task->task_frozen_cseg_q));
2864 }
2865#endif /* CONFIG_FREEZE */
2866
2867 /*
2868 * Get rid of the task active reference on itself.
2869 */
2870 task_deallocate(task);
2871
2872 return KERN_SUCCESS;
2873}
2874
2875void
2876tasks_system_suspend(boolean_t suspend)
2877{
2878 task_t task;
2879
2880 lck_mtx_lock(&tasks_threads_lock);
2881 assert(tasks_suspend_state != suspend);
2882 tasks_suspend_state = suspend;
2883 queue_iterate(&tasks, task, task_t, tasks) {
2884 if (task == kernel_task) {
2885 continue;
2886 }
2887 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2888 }
2889 lck_mtx_unlock(&tasks_threads_lock);
2890}
2891
2892/*
2893 * task_start_halt:
2894 *
2895 * Shut the current task down (except for the current thread) in
2896 * preparation for dramatic changes to the task (probably exec).
2897 * We hold the task and mark all other threads in the task for
2898 * termination.
2899 */
2900kern_return_t
2901task_start_halt(task_t task)
2902{
2903 kern_return_t kr = KERN_SUCCESS;
2904 task_lock(task);
2905 kr = task_start_halt_locked(task, FALSE);
2906 task_unlock(task);
2907 return kr;
2908}
2909
2910static kern_return_t
2911task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2912{
2913 thread_t thread, self;
2914 uint64_t dispatchqueue_offset;
2915
2916 assert(task != kernel_task);
2917
2918 self = current_thread();
2919
2920 if (task != self->task && !task_is_a_corpse_fork(task)) {
2921 return KERN_INVALID_ARGUMENT;
2922 }
2923
2924 if (task->halting || !task->active || !self->active) {
2925 /*
2926 * Task or current thread is already being terminated.
2927 * Hurry up and return out of the current kernel context
2928 * so that we run our AST special handler to terminate
2929 * ourselves.
2930 */
2931 return KERN_FAILURE;
2932 }
2933
2934 task->halting = TRUE;
2935
2936 /*
2937 * Mark all the threads to keep them from starting any more
2938 * user-level execution. The thread_terminate_internal code
2939 * would do this on a thread by thread basis anyway, but this
2940 * gives us a better chance of not having to wait there.
2941 */
2942 task_hold_locked(task);
2943 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2944
2945 /*
2946 * Terminate all the other threads in the task.
2947 */
2948 queue_iterate(&task->threads, thread, thread_t, task_threads)
2949 {
2950 if (should_mark_corpse) {
2951 thread_mtx_lock(thread);
2952 thread->inspection = TRUE;
2953 thread_mtx_unlock(thread);
2954 }
2955 if (thread != self) {
2956 thread_terminate_internal(thread, TH_TERMINATE_OPTION_NONE);
2957 }
2958 }
2959 task->dispatchqueue_offset = dispatchqueue_offset;
2960
2961 task_release_locked(task);
2962
2963 return KERN_SUCCESS;
2964}
2965
2966
2967/*
2968 * task_complete_halt:
2969 *
2970 * Complete task halt by waiting for threads to terminate, then clean
2971 * up task resources (VM, port namespace, etc...) and then let the
2972 * current thread go in the (practically empty) task context.
2973 *
2974 * Note: task->halting flag is not cleared in order to avoid creation
2975 * of new thread in old exec'ed task.
2976 */
2977void
2978task_complete_halt(task_t task)
2979{
2980 task_lock(task);
2981 assert(task->halting);
2982 assert(task == current_task());
2983
2984 /*
2985 * Wait for the other threads to get shut down.
2986 * When the last other thread is reaped, we'll be
2987 * woken up.
2988 */
2989 if (task->thread_count > 1) {
2990 assert_wait((event_t)&task->halting, THREAD_UNINT);
2991 task_unlock(task);
2992 thread_block(THREAD_CONTINUE_NULL);
2993 } else {
2994 task_unlock(task);
2995 }
2996
2997 /*
2998 * Give the machine dependent code a chance
2999 * to perform cleanup of task-level resources
3000 * associated with the current thread before
3001 * ripping apart the task.
3002 */
3003 machine_task_terminate(task);
3004
3005 /*
3006 * Destroy all synchronizers owned by the task.
3007 */
3008 task_synchronizer_destroy_all(task);
3009
3010 /*
3011 * Destroy the contents of the IPC space, leaving just
3012 * a reference for it.
3013 */
3014 ipc_space_clean(task->itk_space);
3015
3016 /*
3017 * Clean out the address space, as we are going to be
3018 * getting a new one.
3019 */
3020 vm_map_remove(task->map, task->map->min_offset,
3021 task->map->max_offset,
3022 /*
3023 * Final cleanup:
3024 * + no unnesting
3025 * + remove immutable mappings
3026 * + allow gaps in the range
3027 */
3028 (VM_MAP_REMOVE_NO_UNNESTING |
3029 VM_MAP_REMOVE_IMMUTABLE |
3030 VM_MAP_REMOVE_GAPS_OK));
3031
3032 /*
3033 * Kick out any IOKitUser handles to the task. At best they're stale,
3034 * at worst someone is racing a SUID exec.
3035 */
3036 iokit_task_terminate(task);
3037}
3038
3039/*
3040 * task_hold_locked:
3041 *
3042 * Suspend execution of the specified task.
3043 * This is a recursive-style suspension of the task, a count of
3044 * suspends is maintained.
3045 *
3046 * CONDITIONS: the task is locked and active.
3047 */
3048void
3049task_hold_locked(
3050 task_t task)
3051{
3052 thread_t thread;
3053
3054 assert(task->active);
3055
3056 if (task->suspend_count++ > 0) {
3057 return;
3058 }
3059
3060 if (task->bsd_info) {
3061 workq_proc_suspended(task->bsd_info);
3062 }
3063
3064 /*
3065 * Iterate through all the threads and hold them.
3066 */
3067 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3068 thread_mtx_lock(thread);
3069 thread_hold(thread);
3070 thread_mtx_unlock(thread);
3071 }
3072}
3073
3074/*
3075 * task_hold:
3076 *
3077 * Same as the internal routine above, except that is must lock
3078 * and verify that the task is active. This differs from task_suspend
3079 * in that it places a kernel hold on the task rather than just a
3080 * user-level hold. This keeps users from over resuming and setting
3081 * it running out from under the kernel.
3082 *
3083 * CONDITIONS: the caller holds a reference on the task
3084 */
3085kern_return_t
3086task_hold(
3087 task_t task)
3088{
3089 if (task == TASK_NULL) {
3090 return KERN_INVALID_ARGUMENT;
3091 }
3092
3093 task_lock(task);
3094
3095 if (!task->active) {
3096 task_unlock(task);
3097
3098 return KERN_FAILURE;
3099 }
3100
3101 task_hold_locked(task);
3102 task_unlock(task);
3103
3104 return KERN_SUCCESS;
3105}
3106
3107kern_return_t
3108task_wait(
3109 task_t task,
3110 boolean_t until_not_runnable)
3111{
3112 if (task == TASK_NULL) {
3113 return KERN_INVALID_ARGUMENT;
3114 }
3115
3116 task_lock(task);
3117
3118 if (!task->active) {
3119 task_unlock(task);
3120
3121 return KERN_FAILURE;
3122 }
3123
3124 task_wait_locked(task, until_not_runnable);
3125 task_unlock(task);
3126
3127 return KERN_SUCCESS;
3128}
3129
3130/*
3131 * task_wait_locked:
3132 *
3133 * Wait for all threads in task to stop.
3134 *
3135 * Conditions:
3136 * Called with task locked, active, and held.
3137 */
3138void
3139task_wait_locked(
3140 task_t task,
3141 boolean_t until_not_runnable)
3142{
3143 thread_t thread, self;
3144
3145 assert(task->active);
3146 assert(task->suspend_count > 0);
3147
3148 self = current_thread();
3149
3150 /*
3151 * Iterate through all the threads and wait for them to
3152 * stop. Do not wait for the current thread if it is within
3153 * the task.
3154 */
3155 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3156 if (thread != self) {
3157 thread_wait(thread, until_not_runnable);
3158 }
3159 }
3160}
3161
3162boolean_t
3163task_is_app_suspended(task_t task)
3164{
3165 return task->pidsuspended;
3166}
3167
3168/*
3169 * task_release_locked:
3170 *
3171 * Release a kernel hold on a task.
3172 *
3173 * CONDITIONS: the task is locked and active
3174 */
3175void
3176task_release_locked(
3177 task_t task)
3178{
3179 thread_t thread;
3180
3181 assert(task->active);
3182 assert(task->suspend_count > 0);
3183
3184 if (--task->suspend_count > 0) {
3185 return;
3186 }
3187
3188 if (task->bsd_info) {
3189 workq_proc_resumed(task->bsd_info);
3190 }
3191
3192 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3193 thread_mtx_lock(thread);
3194 thread_release(thread);
3195 thread_mtx_unlock(thread);
3196 }
3197}
3198
3199/*
3200 * task_release:
3201 *
3202 * Same as the internal routine above, except that it must lock
3203 * and verify that the task is active.
3204 *
3205 * CONDITIONS: The caller holds a reference to the task
3206 */
3207kern_return_t
3208task_release(
3209 task_t task)
3210{
3211 if (task == TASK_NULL) {
3212 return KERN_INVALID_ARGUMENT;
3213 }
3214
3215 task_lock(task);
3216
3217 if (!task->active) {
3218 task_unlock(task);
3219
3220 return KERN_FAILURE;
3221 }
3222
3223 task_release_locked(task);
3224 task_unlock(task);
3225
3226 return KERN_SUCCESS;
3227}
3228
3229static kern_return_t
3230task_threads_internal(
3231 task_t task,
3232 thread_act_array_t *threads_out,
3233 mach_msg_type_number_t *count,
3234 mach_thread_flavor_t flavor)
3235{
3236 mach_msg_type_number_t actual;
3237 thread_t *thread_list;
3238 thread_t thread;
3239 vm_size_t size, size_needed;
3240 void *addr;
3241 unsigned int i, j;
3242
3243 size = 0; addr = NULL;
3244
3245 if (task == TASK_NULL) {
3246 return KERN_INVALID_ARGUMENT;
3247 }
3248
3249 assert(flavor <= THREAD_FLAVOR_INSPECT);
3250
3251 for (;;) {
3252 task_lock(task);
3253 if (!task->active) {
3254 task_unlock(task);
3255
3256 if (size != 0) {
3257 kfree(addr, size);
3258 }
3259
3260 return KERN_FAILURE;
3261 }
3262
3263 actual = task->thread_count;
3264
3265 /* do we have the memory we need? */
3266 size_needed = actual * sizeof(mach_port_t);
3267 if (size_needed <= size) {
3268 break;
3269 }
3270
3271 /* unlock the task and allocate more memory */
3272 task_unlock(task);
3273
3274 if (size != 0) {
3275 kfree(addr, size);
3276 }
3277
3278 assert(size_needed > 0);
3279 size = size_needed;
3280
3281 addr = kalloc(size);
3282 if (addr == 0) {
3283 return KERN_RESOURCE_SHORTAGE;
3284 }
3285 }
3286
3287 /* OK, have memory and the task is locked & active */
3288 thread_list = (thread_t *)addr;
3289
3290 i = j = 0;
3291
3292 for (thread = (thread_t)queue_first(&task->threads); i < actual;
3293 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
3294 thread_reference_internal(thread);
3295 thread_list[j++] = thread;
3296 }
3297
3298 assert(queue_end(&task->threads, (queue_entry_t)thread));
3299
3300 actual = j;
3301 size_needed = actual * sizeof(mach_port_t);
3302
3303 /* can unlock task now that we've got the thread refs */
3304 task_unlock(task);
3305
3306 if (actual == 0) {
3307 /* no threads, so return null pointer and deallocate memory */
3308
3309 *threads_out = NULL;
3310 *count = 0;
3311
3312 if (size != 0) {
3313 kfree(addr, size);
3314 }
3315 } else {
3316 /* if we allocated too much, must copy */
3317
3318 if (size_needed < size) {
3319 void *newaddr;
3320
3321 newaddr = kalloc(size_needed);
3322 if (newaddr == 0) {
3323 for (i = 0; i < actual; ++i) {
3324 thread_deallocate(thread_list[i]);
3325 }
3326 kfree(addr, size);
3327 return KERN_RESOURCE_SHORTAGE;
3328 }
3329
3330 bcopy(addr, newaddr, size_needed);
3331 kfree(addr, size);
3332 thread_list = (thread_t *)newaddr;
3333 }
3334
3335 *threads_out = thread_list;
3336 *count = actual;
3337
3338 /* do the conversion that Mig should handle */
3339
3340 switch (flavor) {
3341 case THREAD_FLAVOR_CONTROL:
3342 if (task == current_task()) {
3343 for (i = 0; i < actual; ++i) {
3344 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port_pinned(thread_list[i]);
3345 }
3346 } else {
3347 for (i = 0; i < actual; ++i) {
3348 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3349 }
3350 }
3351 break;
3352 case THREAD_FLAVOR_READ:
3353 for (i = 0; i < actual; ++i) {
3354 ((ipc_port_t *) thread_list)[i] = convert_thread_read_to_port(thread_list[i]);
3355 }
3356 break;
3357 case THREAD_FLAVOR_INSPECT:
3358 for (i = 0; i < actual; ++i) {
3359 ((ipc_port_t *) thread_list)[i] = convert_thread_inspect_to_port(thread_list[i]);
3360 }
3361 break;
3362 }
3363 }
3364
3365 return KERN_SUCCESS;
3366}
3367
3368kern_return_t
3369task_threads(
3370 task_t task,
3371 thread_act_array_t *threads_out,
3372 mach_msg_type_number_t *count)
3373{
3374 return task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3375}
3376
3377
3378kern_return_t
3379task_threads_from_user(
3380 mach_port_t port,
3381 thread_act_array_t *threads_out,
3382 mach_msg_type_number_t *count)
3383{
3384 ipc_kobject_type_t kotype;
3385 kern_return_t kr;
3386
3387 task_t task = convert_port_to_task_check_type(port, &kotype, TASK_FLAVOR_INSPECT, FALSE);
3388
3389 if (task == TASK_NULL) {
3390 return KERN_INVALID_ARGUMENT;
3391 }
3392
3393 switch (kotype) {
3394 case IKOT_TASK_CONTROL:
3395 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3396 break;
3397 case IKOT_TASK_READ:
3398 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_READ);
3399 break;
3400 case IKOT_TASK_INSPECT:
3401 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_INSPECT);
3402 break;
3403 default:
3404 panic("strange kobject type");
3405 break;
3406 }
3407
3408 task_deallocate(task);
3409 return kr;
3410}
3411
3412#define TASK_HOLD_NORMAL 0
3413#define TASK_HOLD_PIDSUSPEND 1
3414#define TASK_HOLD_LEGACY 2
3415#define TASK_HOLD_LEGACY_ALL 3
3416
3417static kern_return_t
3418place_task_hold(
3419 task_t task,
3420 int mode)
3421{
3422 if (!task->active && !task_is_a_corpse(task)) {
3423 return KERN_FAILURE;
3424 }
3425
3426 /* Return success for corpse task */
3427 if (task_is_a_corpse(task)) {
3428 return KERN_SUCCESS;
3429 }
3430
3431 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND),
3432 task_pid(task),
3433 task->thread_count > 0 ?((thread_t)queue_first(&task->threads))->thread_id : 0,
3434 task->user_stop_count, task->user_stop_count + 1);
3435
3436#if MACH_ASSERT
3437 current_task()->suspends_outstanding++;
3438#endif
3439
3440 if (mode == TASK_HOLD_LEGACY) {
3441 task->legacy_stop_count++;
3442 }
3443
3444 if (task->user_stop_count++ > 0) {
3445 /*
3446 * If the stop count was positive, the task is
3447 * already stopped and we can exit.
3448 */
3449 return KERN_SUCCESS;
3450 }
3451
3452 /*
3453 * Put a kernel-level hold on the threads in the task (all
3454 * user-level task suspensions added together represent a
3455 * single kernel-level hold). We then wait for the threads
3456 * to stop executing user code.
3457 */
3458 task_hold_locked(task);
3459 task_wait_locked(task, FALSE);
3460
3461 return KERN_SUCCESS;
3462}
3463
3464static kern_return_t
3465release_task_hold(
3466 task_t task,
3467 int mode)
3468{
3469 boolean_t release = FALSE;
3470
3471 if (!task->active && !task_is_a_corpse(task)) {
3472 return KERN_FAILURE;
3473 }
3474
3475 /* Return success for corpse task */
3476 if (task_is_a_corpse(task)) {
3477 return KERN_SUCCESS;
3478 }
3479
3480 if (mode == TASK_HOLD_PIDSUSPEND) {
3481 if (task->pidsuspended == FALSE) {
3482 return KERN_FAILURE;
3483 }
3484 task->pidsuspended = FALSE;
3485 }
3486
3487 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3488 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3489 MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3490 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3491 task->user_stop_count, mode, task->legacy_stop_count);
3492
3493#if MACH_ASSERT
3494 /*
3495 * This is obviously not robust; if we suspend one task and then resume a different one,
3496 * we'll fly under the radar. This is only meant to catch the common case of a crashed
3497 * or buggy suspender.
3498 */
3499 current_task()->suspends_outstanding--;
3500#endif
3501
3502 if (mode == TASK_HOLD_LEGACY_ALL) {
3503 if (task->legacy_stop_count >= task->user_stop_count) {
3504 task->user_stop_count = 0;
3505 release = TRUE;
3506 } else {
3507 task->user_stop_count -= task->legacy_stop_count;
3508 }
3509 task->legacy_stop_count = 0;
3510 } else {
3511 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3512 task->legacy_stop_count--;
3513 }
3514 if (--task->user_stop_count == 0) {
3515 release = TRUE;
3516 }
3517 }
3518 } else {
3519 return KERN_FAILURE;
3520 }
3521
3522 /*
3523 * Release the task if necessary.
3524 */
3525 if (release) {
3526 task_release_locked(task);
3527 }
3528
3529 return KERN_SUCCESS;
3530}
3531
3532boolean_t
3533get_task_suspended(task_t task)
3534{
3535 return 0 != task->user_stop_count;
3536}
3537
3538/*
3539 * task_suspend:
3540 *
3541 * Implement an (old-fashioned) user-level suspension on a task.
3542 *
3543 * Because the user isn't expecting to have to manage a suspension
3544 * token, we'll track it for him in the kernel in the form of a naked
3545 * send right to the task's resume port. All such send rights
3546 * account for a single suspension against the task (unlike task_suspend2()
3547 * where each caller gets a unique suspension count represented by a
3548 * unique send-once right).
3549 *
3550 * Conditions:
3551 * The caller holds a reference to the task
3552 */
3553kern_return_t
3554task_suspend(
3555 task_t task)
3556{
3557 kern_return_t kr;
3558 mach_port_t port;
3559 mach_port_name_t name;
3560
3561 if (task == TASK_NULL || task == kernel_task) {
3562 return KERN_INVALID_ARGUMENT;
3563 }
3564
3565 task_lock(task);
3566
3567 /*
3568 * place a legacy hold on the task.
3569 */
3570 kr = place_task_hold(task, TASK_HOLD_LEGACY);
3571 if (kr != KERN_SUCCESS) {
3572 task_unlock(task);
3573 return kr;
3574 }
3575
3576 /*
3577 * Claim a send right on the task resume port, and request a no-senders
3578 * notification on that port (if none outstanding).
3579 */
3580 (void)ipc_kobject_make_send_lazy_alloc_port((ipc_port_t *) &task->itk_resume,
3581 (ipc_kobject_t)task, IKOT_TASK_RESUME, IPC_KOBJECT_ALLOC_NONE, true,
3582 OS_PTRAUTH_DISCRIMINATOR("task.itk_resume"));
3583 port = task->itk_resume;
3584 task_unlock(task);
3585
3586 /*
3587 * Copyout the send right into the calling task's IPC space. It won't know it is there,
3588 * but we'll look it up when calling a traditional resume. Any IPC operations that
3589 * deallocate the send right will auto-release the suspension.
3590 */
3591 if (IP_VALID(port)) {
3592 kr = ipc_object_copyout(current_space(), ip_to_object(port),
3593 MACH_MSG_TYPE_MOVE_SEND, IPC_OBJECT_COPYOUT_FLAGS_NONE,
3594 NULL, NULL, &name);
3595 } else {
3596 kr = KERN_SUCCESS;
3597 }
3598 if (kr != KERN_SUCCESS) {
3599 printf("warning: %s(%d) failed to copyout suspension "
3600 "token for pid %d with error: %d\n",
3601 proc_name_address(current_task()->bsd_info),
3602 proc_pid(current_task()->bsd_info),
3603 task_pid(task), kr);
3604 }
3605
3606 return kr;
3607}
3608
3609/*
3610 * task_resume:
3611 * Release a user hold on a task.
3612 *
3613 * Conditions:
3614 * The caller holds a reference to the task
3615 */
3616kern_return_t
3617task_resume(
3618 task_t task)
3619{
3620 kern_return_t kr;
3621 mach_port_name_t resume_port_name;
3622 ipc_entry_t resume_port_entry;
3623 ipc_space_t space = current_task()->itk_space;
3624
3625 if (task == TASK_NULL || task == kernel_task) {
3626 return KERN_INVALID_ARGUMENT;
3627 }
3628
3629 /* release a legacy task hold */
3630 task_lock(task);
3631 kr = release_task_hold(task, TASK_HOLD_LEGACY);
3632 task_unlock(task);
3633
3634 is_write_lock(space);
3635 if (is_active(space) && IP_VALID(task->itk_resume) &&
3636 ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
3637 /*
3638 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3639 * we are holding one less legacy hold on the task from this caller. If the release failed,
3640 * go ahead and drop all the rights, as someone either already released our holds or the task
3641 * is gone.
3642 */
3643 if (kr == KERN_SUCCESS) {
3644 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3645 } else {
3646 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3647 }
3648 /* space unlocked */
3649 } else {
3650 is_write_unlock(space);
3651 if (kr == KERN_SUCCESS) {
3652 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3653 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3654 task_pid(task));
3655 }
3656 }
3657
3658 return kr;
3659}
3660
3661/*
3662 * Suspend the target task.
3663 * Making/holding a token/reference/port is the callers responsibility.
3664 */
3665kern_return_t
3666task_suspend_internal(task_t task)
3667{
3668 kern_return_t kr;
3669
3670 if (task == TASK_NULL || task == kernel_task) {
3671 return KERN_INVALID_ARGUMENT;
3672 }
3673
3674 task_lock(task);
3675 kr = place_task_hold(task, TASK_HOLD_NORMAL);
3676 task_unlock(task);
3677 return kr;
3678}
3679
3680/*
3681 * Suspend the target task, and return a suspension token. The token
3682 * represents a reference on the suspended task.
3683 */
3684kern_return_t
3685task_suspend2(
3686 task_t task,
3687 task_suspension_token_t *suspend_token)
3688{
3689 kern_return_t kr;
3690
3691 kr = task_suspend_internal(task);
3692 if (kr != KERN_SUCCESS) {
3693 *suspend_token = TASK_NULL;
3694 return kr;
3695 }
3696
3697 /*
3698 * Take a reference on the target task and return that to the caller
3699 * as a "suspension token," which can be converted into an SO right to
3700 * the now-suspended task's resume port.
3701 */
3702 task_reference_internal(task);
3703 *suspend_token = task;
3704
3705 return KERN_SUCCESS;
3706}
3707
3708/*
3709 * Resume the task
3710 * (reference/token/port management is caller's responsibility).
3711 */
3712kern_return_t
3713task_resume_internal(
3714 task_suspension_token_t task)
3715{
3716 kern_return_t kr;
3717
3718 if (task == TASK_NULL || task == kernel_task) {
3719 return KERN_INVALID_ARGUMENT;
3720 }
3721
3722 task_lock(task);
3723 kr = release_task_hold(task, TASK_HOLD_NORMAL);
3724 task_unlock(task);
3725 return kr;
3726}
3727
3728/*
3729 * Resume the task using a suspension token. Consumes the token's ref.
3730 */
3731kern_return_t
3732task_resume2(
3733 task_suspension_token_t task)
3734{
3735 kern_return_t kr;
3736
3737 kr = task_resume_internal(task);
3738 task_suspension_token_deallocate(task);
3739
3740 return kr;
3741}
3742
3743boolean_t
3744task_suspension_notify(mach_msg_header_t *request_header)
3745{
3746 ipc_port_t port = request_header->msgh_remote_port;
3747 task_t task = convert_port_to_task_suspension_token(port);
3748 mach_msg_type_number_t not_count;
3749
3750 if (task == TASK_NULL || task == kernel_task) {
3751 return TRUE; /* nothing to do */
3752 }
3753 switch (request_header->msgh_id) {
3754 case MACH_NOTIFY_SEND_ONCE:
3755 /* release the hold held by this specific send-once right */
3756 task_lock(task);
3757 release_task_hold(task, TASK_HOLD_NORMAL);
3758 task_unlock(task);
3759 break;
3760
3761 case MACH_NOTIFY_NO_SENDERS:
3762 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3763
3764 task_lock(task);
3765 ip_lock(port);
3766 if (port->ip_mscount == not_count) {
3767 /* release all the [remaining] outstanding legacy holds */
3768 assert(port->ip_nsrequest == IP_NULL);
3769 ip_unlock(port);
3770 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3771 task_unlock(task);
3772 } else if (port->ip_nsrequest == IP_NULL) {
3773 ipc_port_t old_notify;
3774
3775 task_unlock(task);
3776 /* new send rights, re-arm notification at current make-send count */
3777 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3778 assert(old_notify == IP_NULL);
3779 /* port unlocked */
3780 } else {
3781 ip_unlock(port);
3782 task_unlock(task);
3783 }
3784 break;
3785
3786 default:
3787 break;
3788 }
3789
3790 task_suspension_token_deallocate(task); /* drop token reference */
3791 return TRUE;
3792}
3793
3794static kern_return_t
3795task_pidsuspend_locked(task_t task)
3796{
3797 kern_return_t kr;
3798
3799 if (task->pidsuspended) {
3800 kr = KERN_FAILURE;
3801 goto out;
3802 }
3803
3804 task->pidsuspended = TRUE;
3805
3806 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3807 if (kr != KERN_SUCCESS) {
3808 task->pidsuspended = FALSE;
3809 }
3810out:
3811 return kr;
3812}
3813
3814
3815/*
3816 * task_pidsuspend:
3817 *
3818 * Suspends a task by placing a hold on its threads.
3819 *
3820 * Conditions:
3821 * The caller holds a reference to the task
3822 */
3823kern_return_t
3824task_pidsuspend(
3825 task_t task)
3826{
3827 kern_return_t kr;
3828
3829 if (task == TASK_NULL || task == kernel_task) {
3830 return KERN_INVALID_ARGUMENT;
3831 }
3832
3833 task_lock(task);
3834
3835 kr = task_pidsuspend_locked(task);
3836
3837 task_unlock(task);
3838
3839 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3840 iokit_task_app_suspended_changed(task);
3841 }
3842
3843 return kr;
3844}
3845
3846/*
3847 * task_pidresume:
3848 * Resumes a previously suspended task.
3849 *
3850 * Conditions:
3851 * The caller holds a reference to the task
3852 */
3853kern_return_t
3854task_pidresume(
3855 task_t task)
3856{
3857 kern_return_t kr;
3858
3859 if (task == TASK_NULL || task == kernel_task) {
3860 return KERN_INVALID_ARGUMENT;
3861 }
3862
3863 task_lock(task);
3864
3865#if CONFIG_FREEZE
3866
3867 while (task->changing_freeze_state) {
3868 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3869 task_unlock(task);
3870 thread_block(THREAD_CONTINUE_NULL);
3871
3872 task_lock(task);
3873 }
3874 task->changing_freeze_state = TRUE;
3875#endif
3876
3877 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3878
3879 task_unlock(task);
3880
3881 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3882 iokit_task_app_suspended_changed(task);
3883 }
3884
3885#if CONFIG_FREEZE
3886
3887 task_lock(task);
3888
3889 if (kr == KERN_SUCCESS) {
3890 task->frozen = FALSE;
3891 }
3892 task->changing_freeze_state = FALSE;
3893 thread_wakeup(&task->changing_freeze_state);
3894
3895 task_unlock(task);
3896#endif
3897
3898 return kr;
3899}
3900
3901os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
3902
3903/*
3904 * task_add_turnstile_watchports:
3905 * Setup watchports to boost the main thread of the task.
3906 *
3907 * Arguments:
3908 * task: task being spawned
3909 * thread: main thread of task
3910 * portwatch_ports: array of watchports
3911 * portwatch_count: number of watchports
3912 *
3913 * Conditions:
3914 * Nothing locked.
3915 */
3916void
3917task_add_turnstile_watchports(
3918 task_t task,
3919 thread_t thread,
3920 ipc_port_t *portwatch_ports,
3921 uint32_t portwatch_count)
3922{
3923 struct task_watchports *watchports = NULL;
3924 struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
3925 os_ref_count_t refs;
3926
3927 /* Check if the task has terminated */
3928 if (!task->active) {
3929 return;
3930 }
3931
3932 assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
3933
3934 watchports = task_watchports_alloc_init(task, thread, portwatch_count);
3935
3936 /* Lock the ipc space */
3937 is_write_lock(task->itk_space);
3938
3939 /* Setup watchports to boost the main thread */
3940 refs = task_add_turnstile_watchports_locked(task,
3941 watchports, previous_elem_array, portwatch_ports,
3942 portwatch_count);
3943
3944 /* Drop the space lock */
3945 is_write_unlock(task->itk_space);
3946
3947 if (refs == 0) {
3948 task_watchports_deallocate(watchports);
3949 }
3950
3951 /* Drop the ref on previous_elem_array */
3952 for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
3953 task_watchport_elem_deallocate(previous_elem_array[i]);
3954 }
3955}
3956
3957/*
3958 * task_remove_turnstile_watchports:
3959 * Clear all turnstile boost on the task from watchports.
3960 *
3961 * Arguments:
3962 * task: task being terminated
3963 *
3964 * Conditions:
3965 * Nothing locked.
3966 */
3967void
3968task_remove_turnstile_watchports(
3969 task_t task)
3970{
3971 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3972 struct task_watchports *watchports = NULL;
3973 ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
3974 uint32_t portwatch_count;
3975
3976 /* Lock the ipc space */
3977 is_write_lock(task->itk_space);
3978
3979 /* Check if watchport boost exist */
3980 if (task->watchports == NULL) {
3981 is_write_unlock(task->itk_space);
3982 return;
3983 }
3984 watchports = task->watchports;
3985 portwatch_count = watchports->tw_elem_array_count;
3986
3987 refs = task_remove_turnstile_watchports_locked(task, watchports,
3988 port_freelist);
3989
3990 is_write_unlock(task->itk_space);
3991
3992 /* Drop all the port references */
3993 for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
3994 ip_release(port_freelist[i]);
3995 }
3996
3997 /* Clear the task and thread references for task_watchport */
3998 if (refs == 0) {
3999 task_watchports_deallocate(watchports);
4000 }
4001}
4002
4003/*
4004 * task_transfer_turnstile_watchports:
4005 * Transfer all watchport turnstile boost from old task to new task.
4006 *
4007 * Arguments:
4008 * old_task: task calling exec
4009 * new_task: new exec'ed task
4010 * thread: main thread of new task
4011 *
4012 * Conditions:
4013 * Nothing locked.
4014 */
4015void
4016task_transfer_turnstile_watchports(
4017 task_t old_task,
4018 task_t new_task,
4019 thread_t new_thread)
4020{
4021 struct task_watchports *old_watchports = NULL;
4022 struct task_watchports *new_watchports = NULL;
4023 os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
4024 os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
4025 uint32_t portwatch_count;
4026
4027 if (old_task->watchports == NULL || !new_task->active) {
4028 return;
4029 }
4030
4031 /* Get the watch port count from the old task */
4032 is_write_lock(old_task->itk_space);
4033 if (old_task->watchports == NULL) {
4034 is_write_unlock(old_task->itk_space);
4035 return;
4036 }
4037
4038 portwatch_count = old_task->watchports->tw_elem_array_count;
4039 is_write_unlock(old_task->itk_space);
4040
4041 new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
4042
4043 /* Lock the ipc space for old task */
4044 is_write_lock(old_task->itk_space);
4045
4046 /* Lock the ipc space for new task */
4047 is_write_lock(new_task->itk_space);
4048
4049 /* Check if watchport boost exist */
4050 if (old_task->watchports == NULL || !new_task->active) {
4051 is_write_unlock(new_task->itk_space);
4052 is_write_unlock(old_task->itk_space);
4053 (void)task_watchports_release(new_watchports);
4054 task_watchports_deallocate(new_watchports);
4055 return;
4056 }
4057
4058 old_watchports = old_task->watchports;
4059 assert(portwatch_count == old_task->watchports->tw_elem_array_count);
4060
4061 /* Setup new task watchports */
4062 new_task->watchports = new_watchports;
4063
4064 for (uint32_t i = 0; i < portwatch_count; i++) {
4065 ipc_port_t port = old_watchports->tw_elem[i].twe_port;
4066
4067 if (port == NULL) {
4068 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4069 continue;
4070 }
4071
4072 /* Lock the port and check if it has the entry */
4073 ip_lock(port);
4074 imq_lock(&port->ip_messages);
4075
4076 task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
4077
4078 if (ipc_port_replace_watchport_elem_conditional_locked(port,
4079 &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
4080 task_watchport_elem_clear(&old_watchports->tw_elem[i]);
4081
4082 task_watchports_retain(new_watchports);
4083 old_refs = task_watchports_release(old_watchports);
4084
4085 /* Check if all ports are cleaned */
4086 if (old_refs == 0) {
4087 old_task->watchports = NULL;
4088 }
4089 } else {
4090 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4091 }
4092 /* mqueue and port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
4093 }
4094
4095 /* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
4096 new_refs = task_watchports_release(new_watchports);
4097 if (new_refs == 0) {
4098 new_task->watchports = NULL;
4099 }
4100
4101 is_write_unlock(new_task->itk_space);
4102 is_write_unlock(old_task->itk_space);
4103
4104 /* Clear the task and thread references for old_watchport */
4105 if (old_refs == 0) {
4106 task_watchports_deallocate(old_watchports);
4107 }
4108
4109 /* Clear the task and thread references for new_watchport */
4110 if (new_refs == 0) {
4111 task_watchports_deallocate(new_watchports);
4112 }
4113}
4114
4115/*
4116 * task_add_turnstile_watchports_locked:
4117 * Setup watchports to boost the main thread of the task.
4118 *
4119 * Arguments:
4120 * task: task to boost
4121 * watchports: watchport structure to be attached to the task
4122 * previous_elem_array: an array of old watchport_elem to be returned to caller
4123 * portwatch_ports: array of watchports
4124 * portwatch_count: number of watchports
4125 *
4126 * Conditions:
4127 * ipc space of the task locked.
4128 * returns array of old watchport_elem in previous_elem_array
4129 */
4130static os_ref_count_t
4131task_add_turnstile_watchports_locked(
4132 task_t task,
4133 struct task_watchports *watchports,
4134 struct task_watchport_elem **previous_elem_array,
4135 ipc_port_t *portwatch_ports,
4136 uint32_t portwatch_count)
4137{
4138 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4139
4140 /* Check if the task is still active */
4141 if (!task->active) {
4142 refs = task_watchports_release(watchports);
4143 return refs;
4144 }
4145
4146 assert(task->watchports == NULL);
4147 task->watchports = watchports;
4148
4149 for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
4150 ipc_port_t port = portwatch_ports[i];
4151
4152 task_watchport_elem_init(&watchports->tw_elem[i], task, port);
4153 if (port == NULL) {
4154 task_watchport_elem_clear(&watchports->tw_elem[i]);
4155 continue;
4156 }
4157
4158 ip_lock(port);
4159 imq_lock(&port->ip_messages);
4160
4161 /* Check if port is in valid state to be setup as watchport */
4162 if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
4163 &previous_elem_array[j]) != KERN_SUCCESS) {
4164 task_watchport_elem_clear(&watchports->tw_elem[i]);
4165 continue;
4166 }
4167 /* port and mqueue unlocked on return */
4168
4169 ip_reference(port);
4170 task_watchports_retain(watchports);
4171 if (previous_elem_array[j] != NULL) {
4172 j++;
4173 }
4174 }
4175
4176 /* Drop the reference on task_watchport struct returned by os_ref_init */
4177 refs = task_watchports_release(watchports);
4178 if (refs == 0) {
4179 task->watchports = NULL;
4180 }
4181
4182 return refs;
4183}
4184
4185/*
4186 * task_remove_turnstile_watchports_locked:
4187 * Clear all turnstile boost on the task from watchports.
4188 *
4189 * Arguments:
4190 * task: task to remove watchports from
4191 * watchports: watchports structure for the task
4192 * port_freelist: array of ports returned with ref to caller
4193 *
4194 *
4195 * Conditions:
4196 * ipc space of the task locked.
4197 * array of ports with refs are returned in port_freelist
4198 */
4199static os_ref_count_t
4200task_remove_turnstile_watchports_locked(
4201 task_t task,
4202 struct task_watchports *watchports,
4203 ipc_port_t *port_freelist)
4204{
4205 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4206
4207 for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
4208 ipc_port_t port = watchports->tw_elem[i].twe_port;
4209 if (port == NULL) {
4210 continue;
4211 }
4212
4213 /* Lock the port and check if it has the entry */
4214 ip_lock(port);
4215 imq_lock(&port->ip_messages);
4216 if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4217 &watchports->tw_elem[i]) == KERN_SUCCESS) {
4218 task_watchport_elem_clear(&watchports->tw_elem[i]);
4219 port_freelist[j++] = port;
4220 refs = task_watchports_release(watchports);
4221
4222 /* Check if all ports are cleaned */
4223 if (refs == 0) {
4224 task->watchports = NULL;
4225 break;
4226 }
4227 }
4228 /* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4229 }
4230 return refs;
4231}
4232
4233/*
4234 * task_watchports_alloc_init:
4235 * Allocate and initialize task watchport struct.
4236 *
4237 * Conditions:
4238 * Nothing locked.
4239 */
4240static struct task_watchports *
4241task_watchports_alloc_init(
4242 task_t task,
4243 thread_t thread,
4244 uint32_t count)
4245{
4246 struct task_watchports *watchports = kalloc(sizeof(struct task_watchports) +
4247 count * sizeof(struct task_watchport_elem));
4248
4249 task_reference(task);
4250 thread_reference(thread);
4251 watchports->tw_task = task;
4252 watchports->tw_thread = thread;
4253 watchports->tw_elem_array_count = count;
4254 os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4255
4256 return watchports;
4257}
4258
4259/*
4260 * task_watchports_deallocate:
4261 * Deallocate task watchport struct.
4262 *
4263 * Conditions:
4264 * Nothing locked.
4265 */
4266static void
4267task_watchports_deallocate(
4268 struct task_watchports *watchports)
4269{
4270 uint32_t portwatch_count = watchports->tw_elem_array_count;
4271
4272 task_deallocate(watchports->tw_task);
4273 thread_deallocate(watchports->tw_thread);
4274 kfree(watchports, sizeof(struct task_watchports) + portwatch_count * sizeof(struct task_watchport_elem));
4275}
4276
4277/*
4278 * task_watchport_elem_deallocate:
4279 * Deallocate task watchport element and release its ref on task_watchport.
4280 *
4281 * Conditions:
4282 * Nothing locked.
4283 */
4284void
4285task_watchport_elem_deallocate(
4286 struct task_watchport_elem *watchport_elem)
4287{
4288 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4289 task_t task = watchport_elem->twe_task;
4290 struct task_watchports *watchports = NULL;
4291 ipc_port_t port = NULL;
4292
4293 assert(task != NULL);
4294
4295 /* Take the space lock to modify the elememt */
4296 is_write_lock(task->itk_space);
4297
4298 watchports = task->watchports;
4299 assert(watchports != NULL);
4300
4301 port = watchport_elem->twe_port;
4302 assert(port != NULL);
4303
4304 task_watchport_elem_clear(watchport_elem);
4305 refs = task_watchports_release(watchports);
4306
4307 if (refs == 0) {
4308 task->watchports = NULL;
4309 }
4310
4311 is_write_unlock(task->itk_space);
4312
4313 ip_release(port);
4314 if (refs == 0) {
4315 task_watchports_deallocate(watchports);
4316 }
4317}
4318
4319/*
4320 * task_has_watchports:
4321 * Return TRUE if task has watchport boosts.
4322 *
4323 * Conditions:
4324 * Nothing locked.
4325 */
4326boolean_t
4327task_has_watchports(task_t task)
4328{
4329 return task->watchports != NULL;
4330}
4331
4332#if DEVELOPMENT || DEBUG
4333
4334extern void IOSleep(int);
4335
4336kern_return_t
4337task_disconnect_page_mappings(task_t task)
4338{
4339 int n;
4340
4341 if (task == TASK_NULL || task == kernel_task) {
4342 return KERN_INVALID_ARGUMENT;
4343 }
4344
4345 /*
4346 * this function is used to strip all of the mappings from
4347 * the pmap for the specified task to force the task to
4348 * re-fault all of the pages it is actively using... this
4349 * allows us to approximate the true working set of the
4350 * specified task. We only engage if at least 1 of the
4351 * threads in the task is runnable, but we want to continuously
4352 * sweep (at least for a while - I've arbitrarily set the limit at
4353 * 100 sweeps to be re-looked at as we gain experience) to get a better
4354 * view into what areas within a page are being visited (as opposed to only
4355 * seeing the first fault of a page after the task becomes
4356 * runnable)... in the future I may
4357 * try to block until awakened by a thread in this task
4358 * being made runnable, but for now we'll periodically poll from the
4359 * user level debug tool driving the sysctl
4360 */
4361 for (n = 0; n < 100; n++) {
4362 thread_t thread;
4363 boolean_t runnable;
4364 boolean_t do_unnest;
4365 int page_count;
4366
4367 runnable = FALSE;
4368 do_unnest = FALSE;
4369
4370 task_lock(task);
4371
4372 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4373 if (thread->state & TH_RUN) {
4374 runnable = TRUE;
4375 break;
4376 }
4377 }
4378 if (n == 0) {
4379 task->task_disconnected_count++;
4380 }
4381
4382 if (task->task_unnested == FALSE) {
4383 if (runnable == TRUE) {
4384 task->task_unnested = TRUE;
4385 do_unnest = TRUE;
4386 }
4387 }
4388 task_unlock(task);
4389
4390 if (runnable == FALSE) {
4391 break;
4392 }
4393
4394 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4395 task, do_unnest, task->task_disconnected_count, 0, 0);
4396
4397 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4398
4399 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4400 task, page_count, 0, 0, 0);
4401
4402 if ((n % 5) == 4) {
4403 IOSleep(1);
4404 }
4405 }
4406 return KERN_SUCCESS;
4407}
4408
4409#endif
4410
4411
4412#if CONFIG_FREEZE
4413
4414/*
4415 * task_freeze:
4416 *
4417 * Freeze a task.
4418 *
4419 * Conditions:
4420 * The caller holds a reference to the task
4421 */
4422extern void vm_wake_compactor_swapper(void);
4423extern queue_head_t c_swapout_list_head;
4424extern struct freezer_context freezer_context_global;
4425
4426kern_return_t
4427task_freeze(
4428 task_t task,
4429 uint32_t *purgeable_count,
4430 uint32_t *wired_count,
4431 uint32_t *clean_count,
4432 uint32_t *dirty_count,
4433 uint32_t dirty_budget,
4434 uint32_t *shared_count,
4435 int *freezer_error_code,
4436 boolean_t eval_only)
4437{
4438 kern_return_t kr = KERN_SUCCESS;
4439
4440 if (task == TASK_NULL || task == kernel_task) {
4441 return KERN_INVALID_ARGUMENT;
4442 }
4443
4444 task_lock(task);
4445
4446 while (task->changing_freeze_state) {
4447 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4448 task_unlock(task);
4449 thread_block(THREAD_CONTINUE_NULL);
4450
4451 task_lock(task);
4452 }
4453 if (task->frozen) {
4454 task_unlock(task);
4455 return KERN_FAILURE;
4456 }
4457 task->changing_freeze_state = TRUE;
4458
4459 freezer_context_global.freezer_ctx_task = task;
4460
4461 task_unlock(task);
4462
4463 kr = vm_map_freeze(task,
4464 purgeable_count,
4465 wired_count,
4466 clean_count,
4467 dirty_count,
4468 dirty_budget,
4469 shared_count,
4470 freezer_error_code,
4471 eval_only);
4472
4473 task_lock(task);
4474
4475 if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
4476 task->frozen = TRUE;
4477
4478 freezer_context_global.freezer_ctx_task = NULL;
4479 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
4480
4481 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
4482 /*
4483 * reset the counter tracking the # of swapped compressed pages
4484 * because we are now done with this freeze session and task.
4485 */
4486
4487 *dirty_count = (uint32_t) (freezer_context_global.freezer_ctx_swapped_bytes / PAGE_SIZE_64); /*used to track pageouts*/
4488 }
4489
4490 freezer_context_global.freezer_ctx_swapped_bytes = 0;
4491 }
4492
4493 task->changing_freeze_state = FALSE;
4494 thread_wakeup(&task->changing_freeze_state);
4495
4496 task_unlock(task);
4497
4498 if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
4499 (kr == KERN_SUCCESS) &&
4500 (eval_only == FALSE)) {
4501 vm_wake_compactor_swapper();
4502 /*
4503 * We do an explicit wakeup of the swapout thread here
4504 * because the compact_and_swap routines don't have
4505 * knowledge about these kind of "per-task packed c_segs"
4506 * and so will not be evaluating whether we need to do
4507 * a wakeup there.
4508 */
4509 thread_wakeup((event_t)&c_swapout_list_head);
4510 }
4511
4512 return kr;
4513}
4514
4515/*
4516 * task_thaw:
4517 *
4518 * Thaw a currently frozen task.
4519 *
4520 * Conditions:
4521 * The caller holds a reference to the task
4522 */
4523kern_return_t
4524task_thaw(
4525 task_t task)
4526{
4527 if (task == TASK_NULL || task == kernel_task) {
4528 return KERN_INVALID_ARGUMENT;
4529 }
4530
4531 task_lock(task);
4532
4533 while (task->changing_freeze_state) {
4534 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4535 task_unlock(task);
4536 thread_block(THREAD_CONTINUE_NULL);
4537
4538 task_lock(task);
4539 }
4540 if (!task->frozen) {
4541 task_unlock(task);
4542 return KERN_FAILURE;
4543 }
4544 task->frozen = FALSE;
4545
4546 task_unlock(task);
4547
4548 return KERN_SUCCESS;
4549}
4550
4551void
4552task_update_frozen_to_swap_acct(task_t task, int64_t amount, freezer_acct_op_t op)
4553{
4554 /*
4555 * We don't assert that the task lock is held because we call this
4556 * routine from the decompression path and we won't be holding the
4557 * task lock. However, since we are in the context of the task we are
4558 * safe.
4559 * In the case of the task_freeze path, we call it from behind the task
4560 * lock but we don't need to because we have a reference on the proc
4561 * being frozen.
4562 */
4563
4564 assert(task);
4565 if (amount == 0) {
4566 return;
4567 }
4568
4569 if (op == CREDIT_TO_SWAP) {
4570 ledger_credit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
4571 } else if (op == DEBIT_FROM_SWAP) {
4572 ledger_debit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
4573 } else {
4574 panic("task_update_frozen_to_swap_acct: Invalid ledger op\n");
4575 }
4576}
4577#endif /* CONFIG_FREEZE */
4578
4579kern_return_t
4580host_security_set_task_token(
4581 host_security_t host_security,
4582 task_t task,
4583 security_token_t sec_token,
4584 audit_token_t audit_token,
4585 host_priv_t host_priv)
4586{
4587 ipc_port_t host_port;
4588 kern_return_t kr;
4589
4590 if (task == TASK_NULL) {
4591 return KERN_INVALID_ARGUMENT;
4592 }
4593
4594 if (host_security == HOST_NULL) {
4595 return KERN_INVALID_SECURITY;
4596 }
4597
4598 task_lock(task);
4599 task->sec_token = sec_token;
4600 task->audit_token = audit_token;
4601 task_unlock(task);
4602
4603 if (host_priv != HOST_PRIV_NULL) {
4604 kr = host_get_host_priv_port(host_priv, &host_port);
4605 } else {
4606 kr = host_get_host_port(host_priv_self(), &host_port);
4607 }
4608 assert(kr == KERN_SUCCESS);
4609
4610 kr = task_set_special_port_internal(task, TASK_HOST_PORT, host_port);
4611 return kr;
4612}
4613
4614kern_return_t
4615task_send_trace_memory(
4616 __unused task_t target_task,
4617 __unused uint32_t pid,
4618 __unused uint64_t uniqueid)
4619{
4620 return KERN_INVALID_ARGUMENT;
4621}
4622
4623/*
4624 * This routine was added, pretty much exclusively, for registering the
4625 * RPC glue vector for in-kernel short circuited tasks. Rather than
4626 * removing it completely, I have only disabled that feature (which was
4627 * the only feature at the time). It just appears that we are going to
4628 * want to add some user data to tasks in the future (i.e. bsd info,
4629 * task names, etc...), so I left it in the formal task interface.
4630 */
4631kern_return_t
4632task_set_info(
4633 task_t task,
4634 task_flavor_t flavor,
4635 __unused task_info_t task_info_in, /* pointer to IN array */
4636 __unused mach_msg_type_number_t task_info_count)
4637{
4638 if (task == TASK_NULL) {
4639 return KERN_INVALID_ARGUMENT;
4640 }
4641 switch (flavor) {
4642#if CONFIG_ATM
4643 case TASK_TRACE_MEMORY_INFO:
4644 return KERN_NOT_SUPPORTED;
4645#endif // CONFIG_ATM
4646 default:
4647 return KERN_INVALID_ARGUMENT;
4648 }
4649}
4650
4651int radar_20146450 = 1;
4652kern_return_t
4653task_info(
4654 task_t task,
4655 task_flavor_t flavor,
4656 task_info_t task_info_out,
4657 mach_msg_type_number_t *task_info_count)
4658{
4659 kern_return_t error = KERN_SUCCESS;
4660 mach_msg_type_number_t original_task_info_count;
4661 bool is_kernel_task = (task == kernel_task);
4662
4663 if (task == TASK_NULL) {
4664 return KERN_INVALID_ARGUMENT;
4665 }
4666
4667 original_task_info_count = *task_info_count;
4668 task_lock(task);
4669
4670 if ((task != current_task()) && (!task->active)) {
4671 task_unlock(task);
4672 return KERN_INVALID_ARGUMENT;
4673 }
4674
4675
4676 switch (flavor) {
4677 case TASK_BASIC_INFO_32:
4678 case TASK_BASIC2_INFO_32:
4679#if defined(__arm__) || defined(__arm64__)
4680 case TASK_BASIC_INFO_64:
4681#endif
4682 {
4683 task_basic_info_32_t basic_info;
4684 vm_map_t map;
4685 clock_sec_t secs;
4686 clock_usec_t usecs;
4687
4688 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
4689 error = KERN_INVALID_ARGUMENT;
4690 break;
4691 }
4692
4693 basic_info = (task_basic_info_32_t)task_info_out;
4694
4695 map = (task == kernel_task)? kernel_map: task->map;
4696 basic_info->virtual_size = (typeof(basic_info->virtual_size))vm_map_adjusted_size(map);
4697 if (flavor == TASK_BASIC2_INFO_32) {
4698 /*
4699 * The "BASIC2" flavor gets the maximum resident
4700 * size instead of the current resident size...
4701 */
4702 basic_info->resident_size = pmap_resident_max(map->pmap);
4703 } else {
4704 basic_info->resident_size = pmap_resident_count(map->pmap);
4705 }
4706 basic_info->resident_size *= PAGE_SIZE;
4707
4708 basic_info->policy = ((task != kernel_task)?
4709 POLICY_TIMESHARE: POLICY_RR);
4710 basic_info->suspend_count = task->user_stop_count;
4711
4712 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4713 basic_info->user_time.seconds =
4714 (typeof(basic_info->user_time.seconds))secs;
4715 basic_info->user_time.microseconds = usecs;
4716
4717 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4718 basic_info->system_time.seconds =
4719 (typeof(basic_info->system_time.seconds))secs;
4720 basic_info->system_time.microseconds = usecs;
4721
4722 *task_info_count = TASK_BASIC_INFO_32_COUNT;
4723 break;
4724 }
4725
4726#if defined(__arm__) || defined(__arm64__)
4727 case TASK_BASIC_INFO_64_2:
4728 {
4729 task_basic_info_64_2_t basic_info;
4730 vm_map_t map;
4731 clock_sec_t secs;
4732 clock_usec_t usecs;
4733
4734 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
4735 error = KERN_INVALID_ARGUMENT;
4736 break;
4737 }
4738
4739 basic_info = (task_basic_info_64_2_t)task_info_out;
4740
4741 map = (task == kernel_task)? kernel_map: task->map;
4742 basic_info->virtual_size = vm_map_adjusted_size(map);
4743 basic_info->resident_size =
4744 (mach_vm_size_t)(pmap_resident_count(map->pmap))
4745 * PAGE_SIZE_64;
4746
4747 basic_info->policy = ((task != kernel_task)?
4748 POLICY_TIMESHARE: POLICY_RR);
4749 basic_info->suspend_count = task->user_stop_count;
4750
4751 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4752 basic_info->user_time.seconds =
4753 (typeof(basic_info->user_time.seconds))secs;
4754 basic_info->user_time.microseconds = usecs;
4755
4756 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4757 basic_info->system_time.seconds =
4758 (typeof(basic_info->system_time.seconds))secs;
4759 basic_info->system_time.microseconds = usecs;
4760
4761 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
4762 break;
4763 }
4764
4765#else /* defined(__arm__) || defined(__arm64__) */
4766 case TASK_BASIC_INFO_64:
4767 {
4768 task_basic_info_64_t basic_info;
4769 vm_map_t map;
4770 clock_sec_t secs;
4771 clock_usec_t usecs;
4772
4773 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
4774 error = KERN_INVALID_ARGUMENT;
4775 break;
4776 }
4777
4778 basic_info = (task_basic_info_64_t)task_info_out;
4779
4780 map = (task == kernel_task)? kernel_map: task->map;
4781 basic_info->virtual_size = vm_map_adjusted_size(map);
4782 basic_info->resident_size =
4783 (mach_vm_size_t)(pmap_resident_count(map->pmap))
4784 * PAGE_SIZE_64;
4785
4786 basic_info->policy = ((task != kernel_task)?
4787 POLICY_TIMESHARE: POLICY_RR);
4788 basic_info->suspend_count = task->user_stop_count;
4789
4790 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4791 basic_info->user_time.seconds =
4792 (typeof(basic_info->user_time.seconds))secs;
4793 basic_info->user_time.microseconds = usecs;
4794
4795 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4796 basic_info->system_time.seconds =
4797 (typeof(basic_info->system_time.seconds))secs;
4798 basic_info->system_time.microseconds = usecs;
4799
4800 *task_info_count = TASK_BASIC_INFO_64_COUNT;
4801 break;
4802 }
4803#endif /* defined(__arm__) || defined(__arm64__) */
4804
4805 case MACH_TASK_BASIC_INFO:
4806 {
4807 mach_task_basic_info_t basic_info;
4808 vm_map_t map;
4809 clock_sec_t secs;
4810 clock_usec_t usecs;
4811
4812 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
4813 error = KERN_INVALID_ARGUMENT;
4814 break;
4815 }
4816
4817 basic_info = (mach_task_basic_info_t)task_info_out;
4818
4819 map = (task == kernel_task) ? kernel_map : task->map;
4820
4821 basic_info->virtual_size = vm_map_adjusted_size(map);
4822
4823 basic_info->resident_size =
4824 (mach_vm_size_t)(pmap_resident_count(map->pmap));
4825 basic_info->resident_size *= PAGE_SIZE_64;
4826
4827 basic_info->resident_size_max =
4828 (mach_vm_size_t)(pmap_resident_max(map->pmap));
4829 basic_info->resident_size_max *= PAGE_SIZE_64;
4830
4831 basic_info->policy = ((task != kernel_task) ?
4832 POLICY_TIMESHARE : POLICY_RR);
4833
4834 basic_info->suspend_count = task->user_stop_count;
4835
4836 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4837 basic_info->user_time.seconds =
4838 (typeof(basic_info->user_time.seconds))secs;
4839 basic_info->user_time.microseconds = usecs;
4840
4841 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4842 basic_info->system_time.seconds =
4843 (typeof(basic_info->system_time.seconds))secs;
4844 basic_info->system_time.microseconds = usecs;
4845
4846 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
4847 break;
4848 }
4849
4850 case TASK_THREAD_TIMES_INFO:
4851 {
4852 task_thread_times_info_t times_info;
4853 thread_t thread;
4854
4855 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
4856 error = KERN_INVALID_ARGUMENT;
4857 break;
4858 }
4859
4860 times_info = (task_thread_times_info_t) task_info_out;
4861 times_info->user_time.seconds = 0;
4862 times_info->user_time.microseconds = 0;
4863 times_info->system_time.seconds = 0;
4864 times_info->system_time.microseconds = 0;
4865
4866
4867 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4868 time_value_t user_time, system_time;
4869
4870 if (thread->options & TH_OPT_IDLE_THREAD) {
4871 continue;
4872 }
4873
4874 thread_read_times(thread, &user_time, &system_time, NULL);
4875
4876 time_value_add(&times_info->user_time, &user_time);
4877 time_value_add(&times_info->system_time, &system_time);
4878 }
4879
4880 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
4881 break;
4882 }
4883
4884 case TASK_ABSOLUTETIME_INFO:
4885 {
4886 task_absolutetime_info_t info;
4887 thread_t thread;
4888
4889 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
4890 error = KERN_INVALID_ARGUMENT;
4891 break;
4892 }
4893
4894 info = (task_absolutetime_info_t)task_info_out;
4895 info->threads_user = info->threads_system = 0;
4896
4897
4898 info->total_user = task->total_user_time;
4899 info->total_system = task->total_system_time;
4900
4901 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4902 uint64_t tval;
4903 spl_t x;
4904
4905 if (thread->options & TH_OPT_IDLE_THREAD) {
4906 continue;
4907 }
4908
4909 x = splsched();
4910 thread_lock(thread);
4911
4912 tval = timer_grab(&thread->user_timer);
4913 info->threads_user += tval;
4914 info->total_user += tval;
4915
4916 tval = timer_grab(&thread->system_timer);
4917 if (thread->precise_user_kernel_time) {
4918 info->threads_system += tval;
4919 info->total_system += tval;
4920 } else {
4921 /* system_timer may represent either sys or user */
4922 info->threads_user += tval;
4923 info->total_user += tval;
4924 }
4925
4926 thread_unlock(thread);
4927 splx(x);
4928 }
4929
4930
4931 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
4932 break;
4933 }
4934
4935 case TASK_DYLD_INFO:
4936 {
4937 task_dyld_info_t info;
4938
4939 /*
4940 * We added the format field to TASK_DYLD_INFO output. For
4941 * temporary backward compatibility, accept the fact that
4942 * clients may ask for the old version - distinquished by the
4943 * size of the expected result structure.
4944 */
4945#define TASK_LEGACY_DYLD_INFO_COUNT \
4946 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
4947
4948 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
4949 error = KERN_INVALID_ARGUMENT;
4950 break;
4951 }
4952
4953 info = (task_dyld_info_t)task_info_out;
4954 info->all_image_info_addr = task->all_image_info_addr;
4955 info->all_image_info_size = task->all_image_info_size;
4956
4957 /* only set format on output for those expecting it */
4958 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
4959 info->all_image_info_format = task_has_64Bit_addr(task) ?
4960 TASK_DYLD_ALL_IMAGE_INFO_64 :
4961 TASK_DYLD_ALL_IMAGE_INFO_32;
4962 *task_info_count = TASK_DYLD_INFO_COUNT;
4963 } else {
4964 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
4965 }
4966 break;
4967 }
4968
4969 case TASK_EXTMOD_INFO:
4970 {
4971 task_extmod_info_t info;
4972 void *p;
4973
4974 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
4975 error = KERN_INVALID_ARGUMENT;
4976 break;
4977 }
4978
4979 info = (task_extmod_info_t)task_info_out;
4980
4981 p = get_bsdtask_info(task);
4982 if (p) {
4983 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
4984 } else {
4985 bzero(info->task_uuid, sizeof(info->task_uuid));
4986 }
4987 info->extmod_statistics = task->extmod_statistics;
4988 *task_info_count = TASK_EXTMOD_INFO_COUNT;
4989
4990 break;
4991 }
4992
4993 case TASK_KERNELMEMORY_INFO:
4994 {
4995 task_kernelmemory_info_t tkm_info;
4996 ledger_amount_t credit, debit;
4997
4998 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
4999 error = KERN_INVALID_ARGUMENT;
5000 break;
5001 }
5002
5003 tkm_info = (task_kernelmemory_info_t) task_info_out;
5004 tkm_info->total_palloc = 0;
5005 tkm_info->total_pfree = 0;
5006 tkm_info->total_salloc = 0;
5007 tkm_info->total_sfree = 0;
5008
5009 if (task == kernel_task) {
5010 /*
5011 * All shared allocs/frees from other tasks count against
5012 * the kernel private memory usage. If we are looking up
5013 * info for the kernel task, gather from everywhere.
5014 */
5015 task_unlock(task);
5016
5017 /* start by accounting for all the terminated tasks against the kernel */
5018 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
5019 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
5020
5021 /* count all other task/thread shared alloc/free against the kernel */
5022 lck_mtx_lock(&tasks_threads_lock);
5023
5024 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
5025 queue_iterate(&tasks, task, task_t, tasks) {
5026 if (task == kernel_task) {
5027 if (ledger_get_entries(task->ledger,
5028 task_ledgers.tkm_private, &credit,
5029 &debit) == KERN_SUCCESS) {
5030 tkm_info->total_palloc += credit;
5031 tkm_info->total_pfree += debit;
5032 }
5033 }
5034 if (!ledger_get_entries(task->ledger,
5035 task_ledgers.tkm_shared, &credit, &debit)) {
5036 tkm_info->total_palloc += credit;
5037 tkm_info->total_pfree += debit;
5038 }
5039 }
5040 lck_mtx_unlock(&tasks_threads_lock);
5041 } else {
5042 if (!ledger_get_entries(task->ledger,
5043 task_ledgers.tkm_private, &credit, &debit)) {
5044 tkm_info->total_palloc = credit;
5045 tkm_info->total_pfree = debit;
5046 }
5047 if (!ledger_get_entries(task->ledger,
5048 task_ledgers.tkm_shared, &credit, &debit)) {
5049 tkm_info->total_salloc = credit;
5050 tkm_info->total_sfree = debit;
5051 }
5052 task_unlock(task);
5053 }
5054
5055 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
5056 return KERN_SUCCESS;
5057 }
5058
5059 /* OBSOLETE */
5060 case TASK_SCHED_FIFO_INFO:
5061 {
5062 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
5063 error = KERN_INVALID_ARGUMENT;
5064 break;
5065 }
5066
5067 error = KERN_INVALID_POLICY;
5068 break;
5069 }
5070
5071 /* OBSOLETE */
5072 case TASK_SCHED_RR_INFO:
5073 {
5074 policy_rr_base_t rr_base;
5075 uint32_t quantum_time;
5076 uint64_t quantum_ns;
5077
5078 if (*task_info_count < POLICY_RR_BASE_COUNT) {
5079 error = KERN_INVALID_ARGUMENT;
5080 break;
5081 }
5082
5083 rr_base = (policy_rr_base_t) task_info_out;
5084
5085 if (task != kernel_task) {
5086 error = KERN_INVALID_POLICY;
5087 break;
5088 }
5089
5090 rr_base->base_priority = task->priority;
5091
5092 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
5093 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
5094
5095 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
5096
5097 *task_info_count = POLICY_RR_BASE_COUNT;
5098 break;
5099 }
5100
5101 /* OBSOLETE */
5102 case TASK_SCHED_TIMESHARE_INFO:
5103 {
5104 policy_timeshare_base_t ts_base;
5105
5106 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
5107 error = KERN_INVALID_ARGUMENT;
5108 break;
5109 }
5110
5111 ts_base = (policy_timeshare_base_t) task_info_out;
5112
5113 if (task == kernel_task) {
5114 error = KERN_INVALID_POLICY;
5115 break;
5116 }
5117
5118 ts_base->base_priority = task->priority;
5119
5120 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
5121 break;
5122 }
5123
5124 case TASK_SECURITY_TOKEN:
5125 {
5126 security_token_t *sec_token_p;
5127
5128 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
5129 error = KERN_INVALID_ARGUMENT;
5130 break;
5131 }
5132
5133 sec_token_p = (security_token_t *) task_info_out;
5134
5135 *sec_token_p = task->sec_token;
5136
5137 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
5138 break;
5139 }
5140
5141 case TASK_AUDIT_TOKEN:
5142 {
5143 audit_token_t *audit_token_p;
5144
5145 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
5146 error = KERN_INVALID_ARGUMENT;
5147 break;
5148 }
5149
5150 audit_token_p = (audit_token_t *) task_info_out;
5151
5152 *audit_token_p = task->audit_token;
5153
5154 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
5155 break;
5156 }
5157
5158 case TASK_SCHED_INFO:
5159 error = KERN_INVALID_ARGUMENT;
5160 break;
5161
5162 case TASK_EVENTS_INFO:
5163 {
5164 task_events_info_t events_info;
5165 thread_t thread;
5166
5167 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
5168 error = KERN_INVALID_ARGUMENT;
5169 break;
5170 }
5171
5172 events_info = (task_events_info_t) task_info_out;
5173
5174
5175 events_info->faults = (int32_t) MIN(counter_load(&task->faults), INT32_MAX);
5176 events_info->pageins = task->pageins;
5177 events_info->cow_faults = task->cow_faults;
5178 events_info->messages_sent = task->messages_sent;
5179 events_info->messages_received = task->messages_received;
5180 events_info->syscalls_mach = task->syscalls_mach;
5181 events_info->syscalls_unix = task->syscalls_unix;
5182
5183 events_info->csw = task->c_switch;
5184
5185 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5186 events_info->csw += thread->c_switch;
5187 events_info->syscalls_mach += thread->syscalls_mach;
5188 events_info->syscalls_unix += thread->syscalls_unix;
5189 }
5190
5191
5192 *task_info_count = TASK_EVENTS_INFO_COUNT;
5193 break;
5194 }
5195 case TASK_AFFINITY_TAG_INFO:
5196 {
5197 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
5198 error = KERN_INVALID_ARGUMENT;
5199 break;
5200 }
5201
5202 error = task_affinity_info(task, task_info_out, task_info_count);
5203 break;
5204 }
5205 case TASK_POWER_INFO:
5206 {
5207 if (*task_info_count < TASK_POWER_INFO_COUNT) {
5208 error = KERN_INVALID_ARGUMENT;
5209 break;
5210 }
5211
5212 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
5213 break;
5214 }
5215
5216 case TASK_POWER_INFO_V2:
5217 {
5218 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
5219 error = KERN_INVALID_ARGUMENT;
5220 break;
5221 }
5222 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
5223 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
5224 break;
5225 }
5226
5227 case TASK_VM_INFO:
5228 case TASK_VM_INFO_PURGEABLE:
5229 {
5230 task_vm_info_t vm_info;
5231 vm_map_t map;
5232
5233#if __arm64__
5234 struct proc *p;
5235 uint32_t platform, sdk;
5236 p = current_proc();
5237 platform = proc_platform(p);
5238 sdk = proc_min_sdk(p);
5239 if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
5240 platform == PLATFORM_IOS &&
5241 sdk != 0 &&
5242 (sdk >> 16) <= 12) {
5243 /*
5244 * Some iOS apps pass an incorrect value for
5245 * task_info_count, expressed in number of bytes
5246 * instead of number of "natural_t" elements.
5247 * For the sake of backwards binary compatibility
5248 * for apps built with an iOS12 or older SDK and using
5249 * the "rev2" data structure, let's fix task_info_count
5250 * for them, to avoid stomping past the actual end
5251 * of their buffer.
5252 */
5253#if DEVELOPMENT || DEBUG
5254 printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p), original_task_info_count, TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5255#endif /* DEVELOPMENT || DEBUG */
5256 DTRACE_VM4(workaround_task_vm_info_count,
5257 mach_msg_type_number_t, original_task_info_count,
5258 mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5259 uint32_t, platform,
5260 uint32_t, sdk);
5261 original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5262 *task_info_count = original_task_info_count;
5263 }
5264#endif /* __arm64__ */
5265
5266 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5267 error = KERN_INVALID_ARGUMENT;
5268 break;
5269 }
5270
5271 vm_info = (task_vm_info_t)task_info_out;
5272
5273 /*
5274 * Do not hold both the task and map locks,
5275 * so convert the task lock into a map reference,
5276 * drop the task lock, then lock the map.
5277 */
5278 if (is_kernel_task) {
5279 map = kernel_map;
5280 task_unlock(task);
5281 /* no lock, no reference */
5282 } else {
5283 map = task->map;
5284 vm_map_reference(map);
5285 task_unlock(task);
5286 vm_map_lock_read(map);
5287 }
5288
5289 vm_info->virtual_size = (typeof(vm_info->virtual_size))vm_map_adjusted_size(map);
5290 vm_info->region_count = map->hdr.nentries;
5291 vm_info->page_size = vm_map_page_size(map);
5292
5293 vm_info->resident_size = pmap_resident_count(map->pmap);
5294 vm_info->resident_size *= PAGE_SIZE;
5295 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
5296 vm_info->resident_size_peak *= PAGE_SIZE;
5297
5298#define _VM_INFO(_name) \
5299 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
5300
5301 _VM_INFO(device);
5302 _VM_INFO(device_peak);
5303 _VM_INFO(external);
5304 _VM_INFO(external_peak);
5305 _VM_INFO(internal);
5306 _VM_INFO(internal_peak);
5307 _VM_INFO(reusable);
5308 _VM_INFO(reusable_peak);
5309 _VM_INFO(compressed);
5310 _VM_INFO(compressed_peak);
5311 _VM_INFO(compressed_lifetime);
5312
5313 vm_info->purgeable_volatile_pmap = 0;
5314 vm_info->purgeable_volatile_resident = 0;
5315 vm_info->purgeable_volatile_virtual = 0;
5316 if (is_kernel_task) {
5317 /*
5318 * We do not maintain the detailed stats for the
5319 * kernel_pmap, so just count everything as
5320 * "internal"...
5321 */
5322 vm_info->internal = vm_info->resident_size;
5323 /*
5324 * ... but since the memory held by the VM compressor
5325 * in the kernel address space ought to be attributed
5326 * to user-space tasks, we subtract it from "internal"
5327 * to give memory reporting tools a more accurate idea
5328 * of what the kernel itself is actually using, instead
5329 * of making it look like the kernel is leaking memory
5330 * when the system is under memory pressure.
5331 */
5332 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5333 PAGE_SIZE);
5334 } else {
5335 mach_vm_size_t volatile_virtual_size;
5336 mach_vm_size_t volatile_resident_size;
5337 mach_vm_size_t volatile_compressed_size;
5338 mach_vm_size_t volatile_pmap_size;
5339 mach_vm_size_t volatile_compressed_pmap_size;
5340 kern_return_t kr;
5341
5342 if (flavor == TASK_VM_INFO_PURGEABLE) {
5343 kr = vm_map_query_volatile(
5344 map,
5345 &volatile_virtual_size,
5346 &volatile_resident_size,
5347 &volatile_compressed_size,
5348 &volatile_pmap_size,
5349 &volatile_compressed_pmap_size);
5350 if (kr == KERN_SUCCESS) {
5351 vm_info->purgeable_volatile_pmap =
5352 volatile_pmap_size;
5353 if (radar_20146450) {
5354 vm_info->compressed -=
5355 volatile_compressed_pmap_size;
5356 }
5357 vm_info->purgeable_volatile_resident =
5358 volatile_resident_size;
5359 vm_info->purgeable_volatile_virtual =
5360 volatile_virtual_size;
5361 }
5362 }
5363 }
5364 *task_info_count = TASK_VM_INFO_REV0_COUNT;
5365
5366 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5367 /* must be captured while we still have the map lock */
5368 vm_info->min_address = map->min_offset;
5369 vm_info->max_address = map->max_offset;
5370 }
5371
5372 /*
5373 * Done with vm map things, can drop the map lock and reference,
5374 * and take the task lock back.
5375 *
5376 * Re-validate that the task didn't die on us.
5377 */
5378 if (!is_kernel_task) {
5379 vm_map_unlock_read(map);
5380 vm_map_deallocate(map);
5381 }
5382 map = VM_MAP_NULL;
5383
5384 task_lock(task);
5385
5386 if ((task != current_task()) && (!task->active)) {
5387 error = KERN_INVALID_ARGUMENT;
5388 break;
5389 }
5390
5391 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5392 vm_info->phys_footprint =
5393 (mach_vm_size_t) get_task_phys_footprint(task);
5394 *task_info_count = TASK_VM_INFO_REV1_COUNT;
5395 }
5396 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5397 /* data was captured above */
5398 *task_info_count = TASK_VM_INFO_REV2_COUNT;
5399 }
5400
5401 if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5402 ledger_get_lifetime_max(task->ledger,
5403 task_ledgers.phys_footprint,
5404 &vm_info->ledger_phys_footprint_peak);
5405 ledger_get_balance(task->ledger,
5406 task_ledgers.purgeable_nonvolatile,
5407 &vm_info->ledger_purgeable_nonvolatile);
5408 ledger_get_balance(task->ledger,
5409 task_ledgers.purgeable_nonvolatile_compressed,
5410 &vm_info->ledger_purgeable_novolatile_compressed);
5411 ledger_get_balance(task->ledger,
5412 task_ledgers.purgeable_volatile,
5413 &vm_info->ledger_purgeable_volatile);
5414 ledger_get_balance(task->ledger,
5415 task_ledgers.purgeable_volatile_compressed,
5416 &vm_info->ledger_purgeable_volatile_compressed);
5417 ledger_get_balance(task->ledger,
5418 task_ledgers.network_nonvolatile,
5419 &vm_info->ledger_tag_network_nonvolatile);
5420 ledger_get_balance(task->ledger,
5421 task_ledgers.network_nonvolatile_compressed,
5422 &vm_info->ledger_tag_network_nonvolatile_compressed);
5423 ledger_get_balance(task->ledger,
5424 task_ledgers.network_volatile,
5425 &vm_info->ledger_tag_network_volatile);
5426 ledger_get_balance(task->ledger,
5427 task_ledgers.network_volatile_compressed,
5428 &vm_info->ledger_tag_network_volatile_compressed);
5429 ledger_get_balance(task->ledger,
5430 task_ledgers.media_footprint,
5431 &vm_info->ledger_tag_media_footprint);
5432 ledger_get_balance(task->ledger,
5433 task_ledgers.media_footprint_compressed,
5434 &vm_info->ledger_tag_media_footprint_compressed);
5435 ledger_get_balance(task->ledger,
5436 task_ledgers.media_nofootprint,
5437 &vm_info->ledger_tag_media_nofootprint);
5438 ledger_get_balance(task->ledger,
5439 task_ledgers.media_nofootprint_compressed,
5440 &vm_info->ledger_tag_media_nofootprint_compressed);
5441 ledger_get_balance(task->ledger,
5442 task_ledgers.graphics_footprint,
5443 &vm_info->ledger_tag_graphics_footprint);
5444 ledger_get_balance(task->ledger,
5445 task_ledgers.graphics_footprint_compressed,
5446 &vm_info->ledger_tag_graphics_footprint_compressed);
5447 ledger_get_balance(task->ledger,
5448 task_ledgers.graphics_nofootprint,
5449 &vm_info->ledger_tag_graphics_nofootprint);
5450 ledger_get_balance(task->ledger,
5451 task_ledgers.graphics_nofootprint_compressed,
5452 &vm_info->ledger_tag_graphics_nofootprint_compressed);
5453 ledger_get_balance(task->ledger,
5454 task_ledgers.neural_footprint,
5455 &vm_info->ledger_tag_neural_footprint);
5456 ledger_get_balance(task->ledger,
5457 task_ledgers.neural_footprint_compressed,
5458 &vm_info->ledger_tag_neural_footprint_compressed);
5459 ledger_get_balance(task->ledger,
5460 task_ledgers.neural_nofootprint,
5461 &vm_info->ledger_tag_neural_nofootprint);
5462 ledger_get_balance(task->ledger,
5463 task_ledgers.neural_nofootprint_compressed,
5464 &vm_info->ledger_tag_neural_nofootprint_compressed);
5465 *task_info_count = TASK_VM_INFO_REV3_COUNT;
5466 }
5467 if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
5468 if (task->bsd_info) {
5469 vm_info->limit_bytes_remaining =
5470 memorystatus_available_memory_internal(task->bsd_info);
5471 } else {
5472 vm_info->limit_bytes_remaining = 0;
5473 }
5474 *task_info_count = TASK_VM_INFO_REV4_COUNT;
5475 }
5476 if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
5477 thread_t thread;
5478 integer_t total = task->decompressions;
5479 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5480 total += thread->decompressions;
5481 }
5482 vm_info->decompressions = total;
5483 *task_info_count = TASK_VM_INFO_REV5_COUNT;
5484 }
5485
5486 break;
5487 }
5488
5489 case TASK_WAIT_STATE_INFO:
5490 {
5491 /*
5492 * Deprecated flavor. Currently allowing some results until all users
5493 * stop calling it. The results may not be accurate.
5494 */
5495 task_wait_state_info_t wait_state_info;
5496 uint64_t total_sfi_ledger_val = 0;
5497
5498 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
5499 error = KERN_INVALID_ARGUMENT;
5500 break;
5501 }
5502
5503 wait_state_info = (task_wait_state_info_t) task_info_out;
5504
5505 wait_state_info->total_wait_state_time = 0;
5506 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
5507
5508#if CONFIG_SCHED_SFI
5509 int i, prev_lentry = -1;
5510 int64_t val_credit, val_debit;
5511
5512 for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
5513 val_credit = 0;
5514 /*
5515 * checking with prev_lentry != entry ensures adjacent classes
5516 * which share the same ledger do not add wait times twice.
5517 * Note: Use ledger() call to get data for each individual sfi class.
5518 */
5519 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
5520 KERN_SUCCESS == ledger_get_entries(task->ledger,
5521 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
5522 total_sfi_ledger_val += val_credit;
5523 }
5524 prev_lentry = task_ledgers.sfi_wait_times[i];
5525 }
5526
5527#endif /* CONFIG_SCHED_SFI */
5528 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
5529 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
5530
5531 break;
5532 }
5533 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
5534 {
5535#if DEVELOPMENT || DEBUG
5536 pvm_account_info_t acnt_info;
5537
5538 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
5539 error = KERN_INVALID_ARGUMENT;
5540 break;
5541 }
5542
5543 if (task_info_out == NULL) {
5544 error = KERN_INVALID_ARGUMENT;
5545 break;
5546 }
5547
5548 acnt_info = (pvm_account_info_t) task_info_out;
5549
5550 error = vm_purgeable_account(task, acnt_info);
5551
5552 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
5553
5554 break;
5555#else /* DEVELOPMENT || DEBUG */
5556 error = KERN_NOT_SUPPORTED;
5557 break;
5558#endif /* DEVELOPMENT || DEBUG */
5559 }
5560 case TASK_FLAGS_INFO:
5561 {
5562 task_flags_info_t flags_info;
5563
5564 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
5565 error = KERN_INVALID_ARGUMENT;
5566 break;
5567 }
5568
5569 flags_info = (task_flags_info_t)task_info_out;
5570
5571 /* only publish the 64-bit flag of the task */
5572 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
5573
5574 *task_info_count = TASK_FLAGS_INFO_COUNT;
5575 break;
5576 }
5577
5578 case TASK_DEBUG_INFO_INTERNAL:
5579 {
5580#if DEVELOPMENT || DEBUG
5581 task_debug_info_internal_t dbg_info;
5582 ipc_space_t space = task->itk_space;
5583 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
5584 error = KERN_NOT_SUPPORTED;
5585 break;
5586 }
5587
5588 if (task_info_out == NULL) {
5589 error = KERN_INVALID_ARGUMENT;
5590 break;
5591 }
5592 dbg_info = (task_debug_info_internal_t) task_info_out;
5593 dbg_info->ipc_space_size = 0;
5594
5595 if (space) {
5596 is_read_lock(space);
5597 dbg_info->ipc_space_size = space->is_table_size;
5598 is_read_unlock(space);
5599 }
5600
5601 dbg_info->suspend_count = task->suspend_count;
5602
5603 error = KERN_SUCCESS;
5604 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
5605 break;
5606#else /* DEVELOPMENT || DEBUG */
5607 error = KERN_NOT_SUPPORTED;
5608 break;
5609#endif /* DEVELOPMENT || DEBUG */
5610 }
5611 default:
5612 error = KERN_INVALID_ARGUMENT;
5613 }
5614
5615 task_unlock(task);
5616 return error;
5617}
5618
5619/*
5620 * task_info_from_user
5621 *
5622 * When calling task_info from user space,
5623 * this function will be executed as mig server side
5624 * instead of calling directly into task_info.
5625 * This gives the possibility to perform more security
5626 * checks on task_port.
5627 *
5628 * In the case of TASK_DYLD_INFO, we require the more
5629 * privileged task_read_port not the less-privileged task_name_port.
5630 *
5631 */
5632kern_return_t
5633task_info_from_user(
5634 mach_port_t task_port,
5635 task_flavor_t flavor,
5636 task_info_t task_info_out,
5637 mach_msg_type_number_t *task_info_count)
5638{
5639 task_t task;
5640 kern_return_t ret;
5641
5642 if (flavor == TASK_DYLD_INFO) {
5643 task = convert_port_to_task_read(task_port);
5644 } else {
5645 task = convert_port_to_task_name(task_port);
5646 }
5647
5648 ret = task_info(task, flavor, task_info_out, task_info_count);
5649
5650 task_deallocate(task);
5651
5652 return ret;
5653}
5654
5655/*
5656 * Routine: task_dyld_process_info_update_helper
5657 *
5658 * Release send rights in release_ports.
5659 *
5660 * If no active ports found in task's dyld notifier array, unset the magic value
5661 * in user space to indicate so.
5662 *
5663 * Condition:
5664 * task's itk_lock is locked, and is unlocked upon return.
5665 * Global g_dyldinfo_mtx is locked, and is unlocked upon return.
5666 */
5667void
5668task_dyld_process_info_update_helper(
5669 task_t task,
5670 size_t active_count,
5671 vm_map_address_t magic_addr, /* a userspace address */
5672 ipc_port_t *release_ports,
5673 size_t release_count)
5674{
5675 void *notifiers_ptr = NULL;
5676
5677 assert(release_count <= DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
5678
5679 if (active_count == 0) {
5680 assert(task->itk_dyld_notify != NULL);
5681 notifiers_ptr = task->itk_dyld_notify;
5682 task->itk_dyld_notify = NULL;
5683 itk_unlock(task);
5684
5685 kfree(notifiers_ptr, (vm_size_t)sizeof(ipc_port_t) * DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
5686 (void)copyoutmap_atomic32(task->map, MACH_PORT_NULL, magic_addr); /* unset magic */
5687 } else {
5688 itk_unlock(task);
5689 (void)copyoutmap_atomic32(task->map, (mach_port_name_t)DYLD_PROCESS_INFO_NOTIFY_MAGIC,
5690 magic_addr); /* reset magic */
5691 }
5692
5693 lck_mtx_unlock(&g_dyldinfo_mtx);
5694
5695 for (size_t i = 0; i < release_count; i++) {
5696 ipc_port_release_send(release_ports[i]);
5697 }
5698}
5699
5700/*
5701 * Routine: task_dyld_process_info_notify_register
5702 *
5703 * Insert a send right to target task's itk_dyld_notify array. Allocate kernel
5704 * memory for the array if it's the first port to be registered. Also cleanup
5705 * any dead rights found in the array.
5706 *
5707 * Consumes sright if returns KERN_SUCCESS, otherwise MIG will destroy it.
5708 *
5709 * Args:
5710 * task: Target task for the registration.
5711 * sright: A send right.
5712 *
5713 * Returns:
5714 * KERN_SUCCESS: Registration succeeded.
5715 * KERN_INVALID_TASK: task is invalid.
5716 * KERN_INVALID_RIGHT: sright is invalid.
5717 * KERN_DENIED: Security policy denied this call.
5718 * KERN_RESOURCE_SHORTAGE: Kernel memory allocation failed.
5719 * KERN_NO_SPACE: No available notifier port slot left for this task.
5720 * KERN_RIGHT_EXISTS: The notifier port is already registered and active.
5721 *
5722 * Other error code see task_info().
5723 *
5724 * See Also:
5725 * task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
5726 */
5727kern_return_t
5728task_dyld_process_info_notify_register(
5729 task_t task,
5730 ipc_port_t sright)
5731{
5732 struct task_dyld_info dyld_info;
5733 mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
5734 ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
5735 uint32_t release_count = 0, active_count = 0;
5736 mach_vm_address_t ports_addr; /* a user space address */
5737 kern_return_t kr;
5738 boolean_t right_exists = false;
5739 ipc_port_t *notifiers_ptr = NULL;
5740 ipc_port_t *portp;
5741
5742 if (task == TASK_NULL || task == kernel_task) {
5743 return KERN_INVALID_TASK;
5744 }
5745
5746 if (!IP_VALID(sright)) {
5747 return KERN_INVALID_RIGHT;
5748 }
5749
5750#if CONFIG_MACF
5751 if (mac_task_check_dyld_process_info_notify_register()) {
5752 return KERN_DENIED;
5753 }
5754#endif
5755
5756 kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
5757 if (kr) {
5758 return kr;
5759 }
5760
5761 if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
5762 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
5763 offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
5764 } else {
5765 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
5766 offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
5767 }
5768
5769 if (task->itk_dyld_notify == NULL) {
5770 notifiers_ptr = (ipc_port_t *)
5771 kalloc_flags(sizeof(ipc_port_t) * DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, Z_ZERO);
5772 if (!notifiers_ptr) {
5773 return KERN_RESOURCE_SHORTAGE;
5774 }
5775 }
5776
5777 lck_mtx_lock(&g_dyldinfo_mtx);
5778 itk_lock(task);
5779
5780 if (task->itk_dyld_notify == NULL) {
5781 task->itk_dyld_notify = notifiers_ptr;
5782 notifiers_ptr = NULL;
5783 }
5784
5785 assert(task->itk_dyld_notify != NULL);
5786 /* First pass: clear dead names and check for duplicate registration */
5787 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
5788 portp = &task->itk_dyld_notify[slot];
5789 if (*portp != IPC_PORT_NULL && !ip_active(*portp)) {
5790 release_ports[release_count++] = *portp;
5791 *portp = IPC_PORT_NULL;
5792 } else if (*portp == sright) {
5793 /* the port is already registered and is active */
5794 right_exists = true;
5795 }
5796
5797 if (*portp != IPC_PORT_NULL) {
5798 active_count++;
5799 }
5800 }
5801
5802 if (right_exists) {
5803 /* skip second pass */
5804 kr = KERN_RIGHT_EXISTS;
5805 goto out;
5806 }
5807
5808 /* Second pass: register the port */
5809 kr = KERN_NO_SPACE;
5810 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
5811 portp = &task->itk_dyld_notify[slot];
5812 if (*portp == IPC_PORT_NULL) {
5813 *portp = sright;
5814 active_count++;
5815 kr = KERN_SUCCESS;
5816 break;
5817 }
5818 }
5819
5820out:
5821 assert(active_count > 0);
5822
5823 task_dyld_process_info_update_helper(task, active_count,
5824 (vm_map_address_t)ports_addr, release_ports, release_count);
5825 /* itk_lock, g_dyldinfo_mtx are unlocked upon return */
5826
5827 if (notifiers_ptr) {
5828 kfree(notifiers_ptr, sizeof(ipc_port_t) * DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
5829 }
5830
5831 return kr;
5832}
5833
5834/*
5835 * Routine: task_dyld_process_info_notify_deregister
5836 *
5837 * Remove a send right in target task's itk_dyld_notify array matching the receive
5838 * right name passed in. Deallocate kernel memory for the array if it's the last port to
5839 * be deregistered, or all ports have died. Also cleanup any dead rights found in the array.
5840 *
5841 * Does not consume any reference.
5842 *
5843 * Args:
5844 * task: Target task for the deregistration.
5845 * rcv_name: The name denoting the receive right in caller's space.
5846 *
5847 * Returns:
5848 * KERN_SUCCESS: A matching entry found and degistration succeeded.
5849 * KERN_INVALID_TASK: task is invalid.
5850 * KERN_INVALID_NAME: name is invalid.
5851 * KERN_DENIED: Security policy denied this call.
5852 * KERN_FAILURE: A matching entry is not found.
5853 * KERN_INVALID_RIGHT: The name passed in does not represent a valid rcv right.
5854 *
5855 * Other error code see task_info().
5856 *
5857 * See Also:
5858 * task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
5859 */
5860kern_return_t
5861task_dyld_process_info_notify_deregister(
5862 task_t task,
5863 mach_port_name_t rcv_name)
5864{
5865 struct task_dyld_info dyld_info;
5866 mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
5867 ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
5868 uint32_t release_count = 0, active_count = 0;
5869 boolean_t port_found = false;
5870 mach_vm_address_t ports_addr; /* a user space address */
5871 ipc_port_t sright;
5872 kern_return_t kr;
5873 ipc_port_t *portp;
5874
5875 if (task == TASK_NULL || task == kernel_task) {
5876 return KERN_INVALID_TASK;
5877 }
5878
5879 if (!MACH_PORT_VALID(rcv_name)) {
5880 return KERN_INVALID_NAME;
5881 }
5882
5883#if CONFIG_MACF
5884 if (mac_task_check_dyld_process_info_notify_register()) {
5885 return KERN_DENIED;
5886 }
5887#endif
5888
5889 kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
5890 if (kr) {
5891 return kr;
5892 }
5893
5894 if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
5895 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
5896 offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
5897 } else {
5898 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
5899 offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
5900 }
5901
5902 kr = ipc_port_translate_receive(current_space(), rcv_name, &sright); /* does not produce port ref */
5903 if (kr) {
5904 return KERN_INVALID_RIGHT;
5905 }
5906
5907 ip_reference(sright);
5908 ip_unlock(sright);
5909
5910 assert(sright != IPC_PORT_NULL);
5911
5912 lck_mtx_lock(&g_dyldinfo_mtx);
5913 itk_lock(task);
5914
5915 if (task->itk_dyld_notify == NULL) {
5916 itk_unlock(task);
5917 lck_mtx_unlock(&g_dyldinfo_mtx);
5918 ip_release(sright);
5919 return KERN_FAILURE;
5920 }
5921
5922 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
5923 portp = &task->itk_dyld_notify[slot];
5924 if (*portp == sright) {
5925 release_ports[release_count++] = *portp;
5926 *portp = IPC_PORT_NULL;
5927 port_found = true;
5928 } else if ((*portp != IPC_PORT_NULL && !ip_active(*portp))) {
5929 release_ports[release_count++] = *portp;
5930 *portp = IPC_PORT_NULL;
5931 }
5932
5933 if (*portp != IPC_PORT_NULL) {
5934 active_count++;
5935 }
5936 }
5937
5938 task_dyld_process_info_update_helper(task, active_count,
5939 (vm_map_address_t)ports_addr, release_ports, release_count);
5940 /* itk_lock, g_dyldinfo_mtx are unlocked upon return */
5941
5942 ip_release(sright);
5943
5944 return port_found ? KERN_SUCCESS : KERN_FAILURE;
5945}
5946
5947/*
5948 * task_power_info
5949 *
5950 * Returns power stats for the task.
5951 * Note: Called with task locked.
5952 */
5953void
5954task_power_info_locked(
5955 task_t task,
5956 task_power_info_t info,
5957 gpu_energy_data_t ginfo,
5958 task_power_info_v2_t infov2,
5959 uint64_t *runnable_time)
5960{
5961 thread_t thread;
5962 ledger_amount_t tmp;
5963
5964 uint64_t runnable_time_sum = 0;
5965
5966 task_lock_assert_owned(task);
5967
5968 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
5969 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
5970 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
5971 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
5972
5973 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
5974 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
5975
5976 info->total_user = task->total_user_time;
5977 info->total_system = task->total_system_time;
5978 runnable_time_sum = task->total_runnable_time;
5979
5980#if defined(__arm__) || defined(__arm64__)
5981 if (infov2) {
5982 infov2->task_energy = task->task_energy;
5983 }
5984#endif /* defined(__arm__) || defined(__arm64__) */
5985
5986 if (ginfo) {
5987 ginfo->task_gpu_utilisation = task->task_gpu_ns;
5988 }
5989
5990 if (infov2) {
5991 infov2->task_ptime = task->total_ptime;
5992 infov2->task_pset_switches = task->ps_switch;
5993 }
5994
5995 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5996 uint64_t tval;
5997 spl_t x;
5998
5999 if (thread->options & TH_OPT_IDLE_THREAD) {
6000 continue;
6001 }
6002
6003 x = splsched();
6004 thread_lock(thread);
6005
6006 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
6007 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
6008
6009#if defined(__arm__) || defined(__arm64__)
6010 if (infov2) {
6011 infov2->task_energy += ml_energy_stat(thread);
6012 }
6013#endif /* defined(__arm__) || defined(__arm64__) */
6014
6015 tval = timer_grab(&thread->user_timer);
6016 info->total_user += tval;
6017
6018 if (infov2) {
6019 tval = timer_grab(&thread->ptime);
6020 infov2->task_ptime += tval;
6021 infov2->task_pset_switches += thread->ps_switch;
6022 }
6023
6024 tval = timer_grab(&thread->system_timer);
6025 if (thread->precise_user_kernel_time) {
6026 info->total_system += tval;
6027 } else {
6028 /* system_timer may represent either sys or user */
6029 info->total_user += tval;
6030 }
6031
6032 tval = timer_grab(&thread->runnable_timer);
6033
6034 runnable_time_sum += tval;
6035
6036 if (ginfo) {
6037 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
6038 }
6039 thread_unlock(thread);
6040 splx(x);
6041 }
6042
6043 if (runnable_time) {
6044 *runnable_time = runnable_time_sum;
6045 }
6046}
6047
6048/*
6049 * task_gpu_utilisation
6050 *
6051 * Returns the total gpu time used by the all the threads of the task
6052 * (both dead and alive)
6053 */
6054uint64_t
6055task_gpu_utilisation(
6056 task_t task)
6057{
6058 uint64_t gpu_time = 0;
6059#if defined(__x86_64__)
6060 thread_t thread;
6061
6062 task_lock(task);
6063 gpu_time += task->task_gpu_ns;
6064
6065 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6066 spl_t x;
6067 x = splsched();
6068 thread_lock(thread);
6069 gpu_time += ml_gpu_stat(thread);
6070 thread_unlock(thread);
6071 splx(x);
6072 }
6073
6074 task_unlock(task);
6075#else /* defined(__x86_64__) */
6076 /* silence compiler warning */
6077 (void)task;
6078#endif /* defined(__x86_64__) */
6079 return gpu_time;
6080}
6081
6082/*
6083 * task_energy
6084 *
6085 * Returns the total energy used by the all the threads of the task
6086 * (both dead and alive)
6087 */
6088uint64_t
6089task_energy(
6090 task_t task)
6091{
6092 uint64_t energy = 0;
6093 thread_t thread;
6094
6095 task_lock(task);
6096 energy += task->task_energy;
6097
6098 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6099 spl_t x;
6100 x = splsched();
6101 thread_lock(thread);
6102 energy += ml_energy_stat(thread);
6103 thread_unlock(thread);
6104 splx(x);
6105 }
6106
6107 task_unlock(task);
6108 return energy;
6109}
6110
6111#if __AMP__
6112
6113uint64_t
6114task_cpu_ptime(
6115 task_t task)
6116{
6117 uint64_t cpu_ptime = 0;
6118 thread_t thread;
6119
6120 task_lock(task);
6121 cpu_ptime += task->total_ptime;
6122
6123 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6124 cpu_ptime += timer_grab(&thread->ptime);
6125 }
6126
6127 task_unlock(task);
6128 return cpu_ptime;
6129}
6130
6131#else /* __AMP__ */
6132
6133uint64_t
6134task_cpu_ptime(
6135 __unused task_t task)
6136{
6137 return 0;
6138}
6139
6140#endif /* __AMP__ */
6141
6142/* This function updates the cpu time in the arrays for each
6143 * effective and requested QoS class
6144 */
6145void
6146task_update_cpu_time_qos_stats(
6147 task_t task,
6148 uint64_t *eqos_stats,
6149 uint64_t *rqos_stats)
6150{
6151 if (!eqos_stats && !rqos_stats) {
6152 return;
6153 }
6154
6155 task_lock(task);
6156 thread_t thread;
6157 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6158 if (thread->options & TH_OPT_IDLE_THREAD) {
6159 continue;
6160 }
6161
6162 thread_update_qos_cpu_time(thread);
6163 }
6164
6165 if (eqos_stats) {
6166 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
6167 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
6168 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
6169 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
6170 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
6171 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
6172 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
6173 }
6174
6175 if (rqos_stats) {
6176 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
6177 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
6178 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
6179 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
6180 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
6181 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
6182 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
6183 }
6184
6185 task_unlock(task);
6186}
6187
6188kern_return_t
6189task_purgable_info(
6190 task_t task,
6191 task_purgable_info_t *stats)
6192{
6193 if (task == TASK_NULL || stats == NULL) {
6194 return KERN_INVALID_ARGUMENT;
6195 }
6196 /* Take task reference */
6197 task_reference(task);
6198 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
6199 /* Drop task reference */
6200 task_deallocate(task);
6201 return KERN_SUCCESS;
6202}
6203
6204void
6205task_vtimer_set(
6206 task_t task,
6207 integer_t which)
6208{
6209 thread_t thread;
6210 spl_t x;
6211
6212 task_lock(task);
6213
6214 task->vtimers |= which;
6215
6216 switch (which) {
6217 case TASK_VTIMER_USER:
6218 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6219 x = splsched();
6220 thread_lock(thread);
6221 if (thread->precise_user_kernel_time) {
6222 thread->vtimer_user_save = timer_grab(&thread->user_timer);
6223 } else {
6224 thread->vtimer_user_save = timer_grab(&thread->system_timer);
6225 }
6226 thread_unlock(thread);
6227 splx(x);
6228 }
6229 break;
6230
6231 case TASK_VTIMER_PROF:
6232 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6233 x = splsched();
6234 thread_lock(thread);
6235 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
6236 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
6237 thread_unlock(thread);
6238 splx(x);
6239 }
6240 break;
6241
6242 case TASK_VTIMER_RLIM:
6243 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6244 x = splsched();
6245 thread_lock(thread);
6246 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
6247 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
6248 thread_unlock(thread);
6249 splx(x);
6250 }
6251 break;
6252 }
6253
6254 task_unlock(task);
6255}
6256
6257void
6258task_vtimer_clear(
6259 task_t task,
6260 integer_t which)
6261{
6262 assert(task == current_task());
6263
6264 task_lock(task);
6265
6266 task->vtimers &= ~which;
6267
6268 task_unlock(task);
6269}
6270
6271void
6272task_vtimer_update(
6273 __unused
6274 task_t task,
6275 integer_t which,
6276 uint32_t *microsecs)
6277{
6278 thread_t thread = current_thread();
6279 uint32_t tdelt = 0;
6280 clock_sec_t secs = 0;
6281 uint64_t tsum;
6282
6283 assert(task == current_task());
6284
6285 spl_t s = splsched();
6286 thread_lock(thread);
6287
6288 if ((task->vtimers & which) != (uint32_t)which) {
6289 thread_unlock(thread);
6290 splx(s);
6291 return;
6292 }
6293
6294 switch (which) {
6295 case TASK_VTIMER_USER:
6296 if (thread->precise_user_kernel_time) {
6297 tdelt = (uint32_t)timer_delta(&thread->user_timer,
6298 &thread->vtimer_user_save);
6299 } else {
6300 tdelt = (uint32_t)timer_delta(&thread->system_timer,
6301 &thread->vtimer_user_save);
6302 }
6303 absolutetime_to_microtime(tdelt, &secs, microsecs);
6304 break;
6305
6306 case TASK_VTIMER_PROF:
6307 tsum = timer_grab(&thread->user_timer);
6308 tsum += timer_grab(&thread->system_timer);
6309 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
6310 absolutetime_to_microtime(tdelt, &secs, microsecs);
6311 /* if the time delta is smaller than a usec, ignore */
6312 if (*microsecs != 0) {
6313 thread->vtimer_prof_save = tsum;
6314 }
6315 break;
6316
6317 case TASK_VTIMER_RLIM:
6318 tsum = timer_grab(&thread->user_timer);
6319 tsum += timer_grab(&thread->system_timer);
6320 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
6321 thread->vtimer_rlim_save = tsum;
6322 absolutetime_to_microtime(tdelt, &secs, microsecs);
6323 break;
6324 }
6325
6326 thread_unlock(thread);
6327 splx(s);
6328}
6329
6330/*
6331 * task_assign:
6332 *
6333 * Change the assigned processor set for the task
6334 */
6335kern_return_t
6336task_assign(
6337 __unused task_t task,
6338 __unused processor_set_t new_pset,
6339 __unused boolean_t assign_threads)
6340{
6341 return KERN_FAILURE;
6342}
6343
6344/*
6345 * task_assign_default:
6346 *
6347 * Version of task_assign to assign to default processor set.
6348 */
6349kern_return_t
6350task_assign_default(
6351 task_t task,
6352 boolean_t assign_threads)
6353{
6354 return task_assign(task, &pset0, assign_threads);
6355}
6356
6357/*
6358 * task_get_assignment
6359 *
6360 * Return name of processor set that task is assigned to.
6361 */
6362kern_return_t
6363task_get_assignment(
6364 task_t task,
6365 processor_set_t *pset)
6366{
6367 if (!task || !task->active) {
6368 return KERN_FAILURE;
6369 }
6370
6371 *pset = &pset0;
6372
6373 return KERN_SUCCESS;
6374}
6375
6376uint64_t
6377get_task_dispatchqueue_offset(
6378 task_t task)
6379{
6380 return task->dispatchqueue_offset;
6381}
6382
6383/*
6384 * task_policy
6385 *
6386 * Set scheduling policy and parameters, both base and limit, for
6387 * the given task. Policy must be a policy which is enabled for the
6388 * processor set. Change contained threads if requested.
6389 */
6390kern_return_t
6391task_policy(
6392 __unused task_t task,
6393 __unused policy_t policy_id,
6394 __unused policy_base_t base,
6395 __unused mach_msg_type_number_t count,
6396 __unused boolean_t set_limit,
6397 __unused boolean_t change)
6398{
6399 return KERN_FAILURE;
6400}
6401
6402/*
6403 * task_set_policy
6404 *
6405 * Set scheduling policy and parameters, both base and limit, for
6406 * the given task. Policy can be any policy implemented by the
6407 * processor set, whether enabled or not. Change contained threads
6408 * if requested.
6409 */
6410kern_return_t
6411task_set_policy(
6412 __unused task_t task,
6413 __unused processor_set_t pset,
6414 __unused policy_t policy_id,
6415 __unused policy_base_t base,
6416 __unused mach_msg_type_number_t base_count,
6417 __unused policy_limit_t limit,
6418 __unused mach_msg_type_number_t limit_count,
6419 __unused boolean_t change)
6420{
6421 return KERN_FAILURE;
6422}
6423
6424kern_return_t
6425task_set_ras_pc(
6426 __unused task_t task,
6427 __unused vm_offset_t pc,
6428 __unused vm_offset_t endpc)
6429{
6430 return KERN_FAILURE;
6431}
6432
6433void
6434task_synchronizer_destroy_all(task_t task)
6435{
6436 /*
6437 * Destroy owned semaphores
6438 */
6439 semaphore_destroy_all(task);
6440}
6441
6442/*
6443 * Install default (machine-dependent) initial thread state
6444 * on the task. Subsequent thread creation will have this initial
6445 * state set on the thread by machine_thread_inherit_taskwide().
6446 * Flavors and structures are exactly the same as those to thread_set_state()
6447 */
6448kern_return_t
6449task_set_state(
6450 task_t task,
6451 int flavor,
6452 thread_state_t state,
6453 mach_msg_type_number_t state_count)
6454{
6455 kern_return_t ret;
6456
6457 if (task == TASK_NULL) {
6458 return KERN_INVALID_ARGUMENT;
6459 }
6460
6461 task_lock(task);
6462
6463 if (!task->active) {
6464 task_unlock(task);
6465 return KERN_FAILURE;
6466 }
6467
6468 ret = machine_task_set_state(task, flavor, state, state_count);
6469
6470 task_unlock(task);
6471 return ret;
6472}
6473
6474/*
6475 * Examine the default (machine-dependent) initial thread state
6476 * on the task, as set by task_set_state(). Flavors and structures
6477 * are exactly the same as those passed to thread_get_state().
6478 */
6479kern_return_t
6480task_get_state(
6481 task_t task,
6482 int flavor,
6483 thread_state_t state,
6484 mach_msg_type_number_t *state_count)
6485{
6486 kern_return_t ret;
6487
6488 if (task == TASK_NULL) {
6489 return KERN_INVALID_ARGUMENT;
6490 }
6491
6492 task_lock(task);
6493
6494 if (!task->active) {
6495 task_unlock(task);
6496 return KERN_FAILURE;
6497 }
6498
6499 ret = machine_task_get_state(task, flavor, state, state_count);
6500
6501 task_unlock(task);
6502 return ret;
6503}
6504
6505
6506static kern_return_t __attribute__((noinline, not_tail_called))
6507PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
6508 mach_exception_code_t code,
6509 mach_exception_subcode_t subcode,
6510 void *reason)
6511{
6512#ifdef MACH_BSD
6513 if (1 == proc_selfpid()) {
6514 return KERN_NOT_SUPPORTED; // initproc is immune
6515 }
6516#endif
6517 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
6518 [0] = code,
6519 [1] = subcode,
6520 };
6521 task_t task = current_task();
6522 kern_return_t kr;
6523
6524 /* (See jetsam-related comments below) */
6525
6526 proc_memstat_terminated(task->bsd_info, TRUE);
6527 kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
6528 proc_memstat_terminated(task->bsd_info, FALSE);
6529 return kr;
6530}
6531
6532kern_return_t
6533task_violated_guard(
6534 mach_exception_code_t code,
6535 mach_exception_subcode_t subcode,
6536 void *reason)
6537{
6538 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
6539}
6540
6541
6542#if CONFIG_MEMORYSTATUS
6543
6544boolean_t
6545task_get_memlimit_is_active(task_t task)
6546{
6547 assert(task != NULL);
6548
6549 if (task->memlimit_is_active == 1) {
6550 return TRUE;
6551 } else {
6552 return FALSE;
6553 }
6554}
6555
6556void
6557task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
6558{
6559 assert(task != NULL);
6560
6561 if (memlimit_is_active) {
6562 task->memlimit_is_active = 1;
6563 } else {
6564 task->memlimit_is_active = 0;
6565 }
6566}
6567
6568boolean_t
6569task_get_memlimit_is_fatal(task_t task)
6570{
6571 assert(task != NULL);
6572
6573 if (task->memlimit_is_fatal == 1) {
6574 return TRUE;
6575 } else {
6576 return FALSE;
6577 }
6578}
6579
6580void
6581task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
6582{
6583 assert(task != NULL);
6584
6585 if (memlimit_is_fatal) {
6586 task->memlimit_is_fatal = 1;
6587 } else {
6588 task->memlimit_is_fatal = 0;
6589 }
6590}
6591
6592boolean_t
6593task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6594{
6595 boolean_t triggered = FALSE;
6596
6597 assert(task == current_task());
6598
6599 /*
6600 * Returns true, if task has already triggered an exc_resource exception.
6601 */
6602
6603 if (memlimit_is_active) {
6604 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
6605 } else {
6606 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
6607 }
6608
6609 return triggered;
6610}
6611
6612void
6613task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6614{
6615 assert(task == current_task());
6616
6617 /*
6618 * We allow one exc_resource per process per active/inactive limit.
6619 * The limit's fatal attribute does not come into play.
6620 */
6621
6622 if (memlimit_is_active) {
6623 task->memlimit_active_exc_resource = 1;
6624 } else {
6625 task->memlimit_inactive_exc_resource = 1;
6626 }
6627}
6628
6629#define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
6630
6631void __attribute__((noinline))
6632PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
6633{
6634 task_t task = current_task();
6635 int pid = 0;
6636 const char *procname = "unknown";
6637 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
6638 boolean_t send_sync_exc_resource = FALSE;
6639
6640#ifdef MACH_BSD
6641 pid = proc_selfpid();
6642
6643 if (pid == 1) {
6644 /*
6645 * Cannot have ReportCrash analyzing
6646 * a suspended initproc.
6647 */
6648 return;
6649 }
6650
6651 if (task->bsd_info != NULL) {
6652 procname = proc_name_address(current_task()->bsd_info);
6653 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
6654 }
6655#endif
6656#if CONFIG_COREDUMP
6657 if (hwm_user_cores) {
6658 int error;
6659 uint64_t starttime, end;
6660 clock_sec_t secs = 0;
6661 uint32_t microsecs = 0;
6662
6663 starttime = mach_absolute_time();
6664 /*
6665 * Trigger a coredump of this process. Don't proceed unless we know we won't
6666 * be filling up the disk; and ignore the core size resource limit for this
6667 * core file.
6668 */
6669 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
6670 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
6671 }
6672 /*
6673 * coredump() leaves the task suspended.
6674 */
6675 task_resume_internal(current_task());
6676
6677 end = mach_absolute_time();
6678 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
6679 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
6680 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
6681 }
6682#endif /* CONFIG_COREDUMP */
6683
6684 if (disable_exc_resource) {
6685 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6686 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
6687 return;
6688 }
6689
6690 /*
6691 * A task that has triggered an EXC_RESOURCE, should not be
6692 * jetsammed when the device is under memory pressure. Here
6693 * we set the P_MEMSTAT_TERMINATED flag so that the process
6694 * will be skipped if the memorystatus_thread wakes up.
6695 */
6696 proc_memstat_terminated(current_task()->bsd_info, TRUE);
6697
6698 code[0] = code[1] = 0;
6699 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
6700 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
6701 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
6702
6703 /*
6704 * Do not generate a corpse fork if the violation is a fatal one
6705 * or the process wants synchronous EXC_RESOURCE exceptions.
6706 */
6707 if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) {
6708 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
6709 if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) {
6710 /*
6711 * Use the _internal_ variant so that no user-space
6712 * process can resume our task from under us.
6713 */
6714 task_suspend_internal(task);
6715 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6716 task_resume_internal(task);
6717 }
6718 } else {
6719 if (audio_active) {
6720 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6721 "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
6722 } else {
6723 task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
6724 code, EXCEPTION_CODE_MAX, NULL);
6725 }
6726 }
6727
6728 /*
6729 * After the EXC_RESOURCE has been handled, we must clear the
6730 * P_MEMSTAT_TERMINATED flag so that the process can again be
6731 * considered for jetsam if the memorystatus_thread wakes up.
6732 */
6733 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
6734}
6735
6736/*
6737 * Callback invoked when a task exceeds its physical footprint limit.
6738 */
6739void
6740task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6741{
6742 ledger_amount_t max_footprint, max_footprint_mb;
6743 task_t task;
6744 boolean_t is_warning;
6745 boolean_t memlimit_is_active;
6746 boolean_t memlimit_is_fatal;
6747
6748 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
6749 /*
6750 * Task memory limits only provide a warning on the way up.
6751 */
6752 return;
6753 } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6754 /*
6755 * This task is in danger of violating a memory limit,
6756 * It has exceeded a percentage level of the limit.
6757 */
6758 is_warning = TRUE;
6759 } else {
6760 /*
6761 * The task has exceeded the physical footprint limit.
6762 * This is not a warning but a true limit violation.
6763 */
6764 is_warning = FALSE;
6765 }
6766
6767 task = current_task();
6768
6769 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
6770 max_footprint_mb = max_footprint >> 20;
6771
6772 memlimit_is_active = task_get_memlimit_is_active(task);
6773 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6774
6775 /*
6776 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
6777 * We only generate the exception once per process per memlimit (active/inactive limit).
6778 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
6779 * and we disable it by marking that memlimit as exception triggered.
6780 */
6781 if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
6782 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
6783 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
6784 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
6785 }
6786
6787 memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
6788}
6789
6790extern int proc_check_footprint_priv(void);
6791
6792kern_return_t
6793task_set_phys_footprint_limit(
6794 task_t task,
6795 int new_limit_mb,
6796 int *old_limit_mb)
6797{
6798 kern_return_t error;
6799
6800 boolean_t memlimit_is_active;
6801 boolean_t memlimit_is_fatal;
6802
6803 if ((error = proc_check_footprint_priv())) {
6804 return KERN_NO_ACCESS;
6805 }
6806
6807 /*
6808 * This call should probably be obsoleted.
6809 * But for now, we default to current state.
6810 */
6811 memlimit_is_active = task_get_memlimit_is_active(task);
6812 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6813
6814 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
6815}
6816
6817kern_return_t
6818task_convert_phys_footprint_limit(
6819 int limit_mb,
6820 int *converted_limit_mb)
6821{
6822 if (limit_mb == -1) {
6823 /*
6824 * No limit
6825 */
6826 if (max_task_footprint != 0) {
6827 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
6828 } else {
6829 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
6830 }
6831 } else {
6832 /* nothing to convert */
6833 *converted_limit_mb = limit_mb;
6834 }
6835 return KERN_SUCCESS;
6836}
6837
6838
6839kern_return_t
6840task_set_phys_footprint_limit_internal(
6841 task_t task,
6842 int new_limit_mb,
6843 int *old_limit_mb,
6844 boolean_t memlimit_is_active,
6845 boolean_t memlimit_is_fatal)
6846{
6847 ledger_amount_t old;
6848 kern_return_t ret;
6849
6850 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
6851
6852 if (ret != KERN_SUCCESS) {
6853 return ret;
6854 }
6855
6856 /*
6857 * Check that limit >> 20 will not give an "unexpected" 32-bit
6858 * result. There are, however, implicit assumptions that -1 mb limit
6859 * equates to LEDGER_LIMIT_INFINITY.
6860 */
6861 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
6862
6863 if (old_limit_mb) {
6864 *old_limit_mb = (int)(old >> 20);
6865 }
6866
6867 if (new_limit_mb == -1) {
6868 /*
6869 * Caller wishes to remove the limit.
6870 */
6871 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6872 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
6873 max_task_footprint ? (uint8_t)max_task_footprint_warning_level : 0);
6874
6875 task_lock(task);
6876 task_set_memlimit_is_active(task, memlimit_is_active);
6877 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6878 task_unlock(task);
6879
6880 return KERN_SUCCESS;
6881 }
6882
6883#ifdef CONFIG_NOMONITORS
6884 return KERN_SUCCESS;
6885#endif /* CONFIG_NOMONITORS */
6886
6887 task_lock(task);
6888
6889 if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
6890 (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
6891 (((ledger_amount_t)new_limit_mb << 20) == old)) {
6892 /*
6893 * memlimit state is not changing
6894 */
6895 task_unlock(task);
6896 return KERN_SUCCESS;
6897 }
6898
6899 task_set_memlimit_is_active(task, memlimit_is_active);
6900 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6901
6902 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6903 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
6904
6905 if (task == current_task()) {
6906 ledger_check_new_balance(current_thread(), task->ledger,
6907 task_ledgers.phys_footprint);
6908 }
6909
6910 task_unlock(task);
6911
6912 return KERN_SUCCESS;
6913}
6914
6915kern_return_t
6916task_get_phys_footprint_limit(
6917 task_t task,
6918 int *limit_mb)
6919{
6920 ledger_amount_t limit;
6921 kern_return_t ret;
6922
6923 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
6924 if (ret != KERN_SUCCESS) {
6925 return ret;
6926 }
6927
6928 /*
6929 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
6930 * result. There are, however, implicit assumptions that -1 mb limit
6931 * equates to LEDGER_LIMIT_INFINITY.
6932 */
6933 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
6934 *limit_mb = (int)(limit >> 20);
6935
6936 return KERN_SUCCESS;
6937}
6938#else /* CONFIG_MEMORYSTATUS */
6939kern_return_t
6940task_set_phys_footprint_limit(
6941 __unused task_t task,
6942 __unused int new_limit_mb,
6943 __unused int *old_limit_mb)
6944{
6945 return KERN_FAILURE;
6946}
6947
6948kern_return_t
6949task_get_phys_footprint_limit(
6950 __unused task_t task,
6951 __unused int *limit_mb)
6952{
6953 return KERN_FAILURE;
6954}
6955#endif /* CONFIG_MEMORYSTATUS */
6956
6957void
6958task_set_thread_limit(task_t task, uint16_t thread_limit)
6959{
6960 assert(task != kernel_task);
6961 if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
6962 task_lock(task);
6963 task->task_thread_limit = thread_limit;
6964 task_unlock(task);
6965 }
6966}
6967
6968#if XNU_TARGET_OS_OSX
6969boolean_t
6970task_has_system_version_compat_enabled(task_t task)
6971{
6972 boolean_t enabled = FALSE;
6973
6974 task_lock(task);
6975 enabled = (task->t_flags & TF_SYS_VERSION_COMPAT);
6976 task_unlock(task);
6977
6978 return enabled;
6979}
6980
6981void
6982task_set_system_version_compat_enabled(task_t task, boolean_t enable_system_version_compat)
6983{
6984 assert(task == current_task());
6985 assert(task != kernel_task);
6986
6987 task_lock(task);
6988 if (enable_system_version_compat) {
6989 task->t_flags |= TF_SYS_VERSION_COMPAT;
6990 } else {
6991 task->t_flags &= ~TF_SYS_VERSION_COMPAT;
6992 }
6993 task_unlock(task);
6994}
6995#endif /* XNU_TARGET_OS_OSX */
6996
6997/*
6998 * We need to export some functions to other components that
6999 * are currently implemented in macros within the osfmk
7000 * component. Just export them as functions of the same name.
7001 */
7002boolean_t
7003is_kerneltask(task_t t)
7004{
7005 if (t == kernel_task) {
7006 return TRUE;
7007 }
7008
7009 return FALSE;
7010}
7011
7012boolean_t
7013is_corpsetask(task_t t)
7014{
7015 return task_is_a_corpse(t);
7016}
7017
7018#undef current_task
7019task_t current_task(void);
7020task_t
7021current_task(void)
7022{
7023 return current_task_fast();
7024}
7025
7026#undef task_reference
7027void task_reference(task_t task);
7028void
7029task_reference(
7030 task_t task)
7031{
7032 if (task != TASK_NULL) {
7033 task_reference_internal(task);
7034 }
7035}
7036
7037/* defined in bsd/kern/kern_prot.c */
7038extern int get_audit_token_pid(audit_token_t *audit_token);
7039
7040int
7041task_pid(task_t task)
7042{
7043 if (task) {
7044 return get_audit_token_pid(&task->audit_token);
7045 }
7046 return -1;
7047}
7048
7049#if __has_feature(ptrauth_calls)
7050/*
7051 * Get the shared region id and jop signing key for the task.
7052 * The function will allocate a kalloc buffer and return
7053 * it to caller, the caller needs to free it. This is used
7054 * for getting the information via task port.
7055 */
7056char *
7057task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid)
7058{
7059 size_t len;
7060 char *shared_region_id = NULL;
7061
7062 task_lock(task);
7063 if (task->shared_region_id == NULL) {
7064 task_unlock(task);
7065 return NULL;
7066 }
7067 len = strlen(task->shared_region_id) + 1;
7068
7069 /* don't hold task lock while allocating */
7070 task_unlock(task);
7071 shared_region_id = kheap_alloc(KHEAP_DATA_BUFFERS, len, Z_WAITOK);
7072 task_lock(task);
7073
7074 if (task->shared_region_id == NULL) {
7075 task_unlock(task);
7076 kheap_free(KHEAP_DATA_BUFFERS, shared_region_id, len);
7077 return NULL;
7078 }
7079 assert(len == strlen(task->shared_region_id) + 1); /* should never change */
7080 strlcpy(shared_region_id, task->shared_region_id, len);
7081 task_unlock(task);
7082
7083 /* find key from its auth pager */
7084 if (jop_pid != NULL) {
7085 *jop_pid = shared_region_find_key(shared_region_id);
7086 }
7087
7088 return shared_region_id;
7089}
7090
7091/*
7092 * set the shared region id for a task
7093 */
7094void
7095task_set_shared_region_id(task_t task, char *id)
7096{
7097 char *old_id;
7098
7099 task_lock(task);
7100 old_id = task->shared_region_id;
7101 task->shared_region_id = id;
7102 task->shared_region_auth_remapped = FALSE;
7103 task_unlock(task);
7104
7105 /* free any pre-existing shared region id */
7106 if (old_id != NULL) {
7107 shared_region_key_dealloc(old_id);
7108 kheap_free(KHEAP_DATA_BUFFERS, old_id, strlen(old_id) + 1);
7109 }
7110}
7111#endif /* __has_feature(ptrauth_calls) */
7112
7113/*
7114 * This routine finds a thread in a task by its unique id
7115 * Returns a referenced thread or THREAD_NULL if the thread was not found
7116 *
7117 * TODO: This is super inefficient - it's an O(threads in task) list walk!
7118 * We should make a tid hash, or transition all tid clients to thread ports
7119 *
7120 * Precondition: No locks held (will take task lock)
7121 */
7122thread_t
7123task_findtid(task_t task, uint64_t tid)
7124{
7125 thread_t self = current_thread();
7126 thread_t found_thread = THREAD_NULL;
7127 thread_t iter_thread = THREAD_NULL;
7128
7129 /* Short-circuit the lookup if we're looking up ourselves */
7130 if (tid == self->thread_id || tid == TID_NULL) {
7131 assert(self->task == task);
7132
7133 thread_reference(self);
7134
7135 return self;
7136 }
7137
7138 task_lock(task);
7139
7140 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
7141 if (iter_thread->thread_id == tid) {
7142 found_thread = iter_thread;
7143 thread_reference(found_thread);
7144 break;
7145 }
7146 }
7147
7148 task_unlock(task);
7149
7150 return found_thread;
7151}
7152
7153int
7154pid_from_task(task_t task)
7155{
7156 int pid = -1;
7157
7158 if (task->bsd_info) {
7159 pid = proc_pid(task->bsd_info);
7160 } else {
7161 pid = task_pid(task);
7162 }
7163
7164 return pid;
7165}
7166
7167/*
7168 * Control the CPU usage monitor for a task.
7169 */
7170kern_return_t
7171task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
7172{
7173 int error = KERN_SUCCESS;
7174
7175 if (*flags & CPUMON_MAKE_FATAL) {
7176 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
7177 } else {
7178 error = KERN_INVALID_ARGUMENT;
7179 }
7180
7181 return error;
7182}
7183
7184/*
7185 * Control the wakeups monitor for a task.
7186 */
7187kern_return_t
7188task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
7189{
7190 ledger_t ledger = task->ledger;
7191
7192 task_lock(task);
7193 if (*flags & WAKEMON_GET_PARAMS) {
7194 ledger_amount_t limit;
7195 uint64_t period;
7196
7197 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
7198 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
7199
7200 if (limit != LEDGER_LIMIT_INFINITY) {
7201 /*
7202 * An active limit means the wakeups monitor is enabled.
7203 */
7204 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
7205 *flags = WAKEMON_ENABLE;
7206 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
7207 *flags |= WAKEMON_MAKE_FATAL;
7208 }
7209 } else {
7210 *flags = WAKEMON_DISABLE;
7211 *rate_hz = -1;
7212 }
7213
7214 /*
7215 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
7216 */
7217 task_unlock(task);
7218 return KERN_SUCCESS;
7219 }
7220
7221 if (*flags & WAKEMON_ENABLE) {
7222 if (*flags & WAKEMON_SET_DEFAULTS) {
7223 *rate_hz = task_wakeups_monitor_rate;
7224 }
7225
7226#ifndef CONFIG_NOMONITORS
7227 if (*flags & WAKEMON_MAKE_FATAL) {
7228 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7229 }
7230#endif /* CONFIG_NOMONITORS */
7231
7232 if (*rate_hz <= 0) {
7233 task_unlock(task);
7234 return KERN_INVALID_ARGUMENT;
7235 }
7236
7237#ifndef CONFIG_NOMONITORS
7238 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
7239 (uint8_t)task_wakeups_monitor_ustackshots_trigger_pct);
7240 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
7241 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
7242#endif /* CONFIG_NOMONITORS */
7243 } else if (*flags & WAKEMON_DISABLE) {
7244 /*
7245 * Caller wishes to disable wakeups monitor on the task.
7246 *
7247 * Disable telemetry if it was triggered by the wakeups monitor, and
7248 * remove the limit & callback on the wakeups ledger entry.
7249 */
7250#if CONFIG_TELEMETRY
7251 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
7252#endif
7253 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
7254 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
7255 }
7256
7257 task_unlock(task);
7258 return KERN_SUCCESS;
7259}
7260
7261void
7262task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
7263{
7264 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
7265#if CONFIG_TELEMETRY
7266 /*
7267 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
7268 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
7269 */
7270 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
7271#endif
7272 return;
7273 }
7274
7275#if CONFIG_TELEMETRY
7276 /*
7277 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
7278 * exceeded the limit, turn telemetry off for the task.
7279 */
7280 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
7281#endif
7282
7283 if (warning == 0) {
7284 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
7285 }
7286}
7287
7288void __attribute__((noinline))
7289SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
7290{
7291 task_t task = current_task();
7292 int pid = 0;
7293 const char *procname = "unknown";
7294 boolean_t fatal;
7295 kern_return_t kr;
7296#ifdef EXC_RESOURCE_MONITORS
7297 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
7298#endif /* EXC_RESOURCE_MONITORS */
7299 struct ledger_entry_info lei;
7300
7301#ifdef MACH_BSD
7302 pid = proc_selfpid();
7303 if (task->bsd_info != NULL) {
7304 procname = proc_name_address(current_task()->bsd_info);
7305 }
7306#endif
7307
7308 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
7309
7310 /*
7311 * Disable the exception notification so we don't overwhelm
7312 * the listener with an endless stream of redundant exceptions.
7313 * TODO: detect whether another thread is already reporting the violation.
7314 */
7315 uint32_t flags = WAKEMON_DISABLE;
7316 task_wakeups_monitor_ctl(task, &flags, NULL);
7317
7318 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7319 trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
7320 os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
7321 "over ~%llu seconds, averaging %llu wakes / second and "
7322 "violating a %slimit of %llu wakes over %llu seconds.\n",
7323 procname, pid,
7324 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
7325 lei.lei_last_refill == 0 ? 0 :
7326 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
7327 fatal ? "FATAL " : "",
7328 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
7329
7330 kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
7331 fatal ? kRNFatalLimitFlag : 0);
7332 if (kr) {
7333 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
7334 }
7335
7336#ifdef EXC_RESOURCE_MONITORS
7337 if (disable_exc_resource) {
7338 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7339 "supressed by a boot-arg\n", procname, pid);
7340 return;
7341 }
7342 if (audio_active) {
7343 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7344 "supressed due to audio playback\n", procname, pid);
7345 return;
7346 }
7347 if (lei.lei_last_refill == 0) {
7348 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
7349 "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
7350 }
7351
7352 code[0] = code[1] = 0;
7353 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
7354 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
7355 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
7356 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
7357 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
7358 lei.lei_last_refill);
7359 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
7360 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
7361 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7362#endif /* EXC_RESOURCE_MONITORS */
7363
7364 if (fatal) {
7365 task_terminate_internal(task);
7366 }
7367}
7368
7369static boolean_t
7370global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
7371{
7372 int64_t old_count, new_count;
7373 boolean_t needs_telemetry;
7374
7375 do {
7376 new_count = old_count = *global_write_count;
7377 new_count += io_delta;
7378 if (new_count >= io_telemetry_limit) {
7379 new_count = 0;
7380 needs_telemetry = TRUE;
7381 } else {
7382 needs_telemetry = FALSE;
7383 }
7384 } while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
7385 return needs_telemetry;
7386}
7387
7388void
7389task_update_physical_writes(__unused task_t task, __unused task_physical_write_flavor_t flavor, __unused uint64_t io_size, __unused task_balance_flags_t flags)
7390{
7391#if CONFIG_PHYS_WRITE_ACCT
7392 if (!io_size) {
7393 return;
7394 }
7395
7396 /*
7397 * task == NULL means that we have to update kernel_task ledgers
7398 */
7399 if (!task) {
7400 task = kernel_task;
7401 }
7402
7403 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PHYS_WRITE_ACCT)) | DBG_FUNC_NONE,
7404 task_pid(task), flavor, io_size, flags, 0);
7405 DTRACE_IO4(physical_writes, struct task *, task, task_physical_write_flavor_t, flavor, uint64_t, io_size, task_balance_flags_t, flags);
7406
7407 if (flags & TASK_BALANCE_CREDIT) {
7408 if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
7409 OSAddAtomic64(io_size, (SInt64 *)&(task->task_fs_metadata_writes));
7410 ledger_credit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
7411 }
7412 } else if (flags & TASK_BALANCE_DEBIT) {
7413 if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
7414 OSAddAtomic64(-1 * io_size, (SInt64 *)&(task->task_fs_metadata_writes));
7415 ledger_debit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
7416 }
7417 }
7418#endif /* CONFIG_PHYS_WRITE_ACCT */
7419}
7420
7421void
7422task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
7423{
7424 int64_t io_delta = 0;
7425 int64_t * global_counter_to_update;
7426 boolean_t needs_telemetry = FALSE;
7427 boolean_t is_external_device = FALSE;
7428 int ledger_to_update = 0;
7429 struct task_writes_counters * writes_counters_to_update;
7430
7431 if ((!task) || (!io_size) || (!vp)) {
7432 return;
7433 }
7434
7435 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
7436 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
7437 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
7438
7439 // Is the drive backing this vnode internal or external to the system?
7440 if (vnode_isonexternalstorage(vp) == false) {
7441 global_counter_to_update = &global_logical_writes_count;
7442 ledger_to_update = task_ledgers.logical_writes;
7443 writes_counters_to_update = &task->task_writes_counters_internal;
7444 is_external_device = FALSE;
7445 } else {
7446 global_counter_to_update = &global_logical_writes_to_external_count;
7447 ledger_to_update = task_ledgers.logical_writes_to_external;
7448 writes_counters_to_update = &task->task_writes_counters_external;
7449 is_external_device = TRUE;
7450 }
7451
7452 switch (flags) {
7453 case TASK_WRITE_IMMEDIATE:
7454 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
7455 ledger_credit(task->ledger, ledger_to_update, io_size);
7456 if (!is_external_device) {
7457 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7458 }
7459 break;
7460 case TASK_WRITE_DEFERRED:
7461 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
7462 ledger_credit(task->ledger, ledger_to_update, io_size);
7463 if (!is_external_device) {
7464 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7465 }
7466 break;
7467 case TASK_WRITE_INVALIDATED:
7468 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
7469 ledger_debit(task->ledger, ledger_to_update, io_size);
7470 if (!is_external_device) {
7471 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
7472 }
7473 break;
7474 case TASK_WRITE_METADATA:
7475 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
7476 ledger_credit(task->ledger, ledger_to_update, io_size);
7477 if (!is_external_device) {
7478 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7479 }
7480 break;
7481 }
7482
7483 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
7484 if (io_telemetry_limit != 0) {
7485 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
7486 needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
7487 if (needs_telemetry && !is_external_device) {
7488 act_set_io_telemetry_ast(current_thread());
7489 }
7490 }
7491}
7492
7493/*
7494 * Control the I/O monitor for a task.
7495 */
7496kern_return_t
7497task_io_monitor_ctl(task_t task, uint32_t *flags)
7498{
7499 ledger_t ledger = task->ledger;
7500
7501 task_lock(task);
7502 if (*flags & IOMON_ENABLE) {
7503 /* Configure the physical I/O ledger */
7504 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
7505 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
7506 } else if (*flags & IOMON_DISABLE) {
7507 /*
7508 * Caller wishes to disable I/O monitor on the task.
7509 */
7510 ledger_disable_refill(ledger, task_ledgers.physical_writes);
7511 ledger_disable_callback(ledger, task_ledgers.physical_writes);
7512 }
7513
7514 task_unlock(task);
7515 return KERN_SUCCESS;
7516}
7517
7518void
7519task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
7520{
7521 if (warning == 0) {
7522 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
7523 }
7524}
7525
7526void __attribute__((noinline))
7527SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
7528{
7529 int pid = 0;
7530 task_t task = current_task();
7531#ifdef EXC_RESOURCE_MONITORS
7532 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
7533#endif /* EXC_RESOURCE_MONITORS */
7534 struct ledger_entry_info lei;
7535 kern_return_t kr;
7536
7537#ifdef MACH_BSD
7538 pid = proc_selfpid();
7539#endif
7540 /*
7541 * Get the ledger entry info. We need to do this before disabling the exception
7542 * to get correct values for all fields.
7543 */
7544 switch (flavor) {
7545 case FLAVOR_IO_PHYSICAL_WRITES:
7546 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
7547 break;
7548 }
7549
7550
7551 /*
7552 * Disable the exception notification so we don't overwhelm
7553 * the listener with an endless stream of redundant exceptions.
7554 * TODO: detect whether another thread is already reporting the violation.
7555 */
7556 uint32_t flags = IOMON_DISABLE;
7557 task_io_monitor_ctl(task, &flags);
7558
7559 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
7560 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
7561 }
7562 os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
7563 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
7564
7565 kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
7566 if (kr) {
7567 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
7568 }
7569
7570#ifdef EXC_RESOURCE_MONITORS
7571 code[0] = code[1] = 0;
7572 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
7573 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
7574 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
7575 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
7576 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
7577 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7578#endif /* EXC_RESOURCE_MONITORS */
7579}
7580
7581/* Placeholders for the task set/get voucher interfaces */
7582kern_return_t
7583task_get_mach_voucher(
7584 task_t task,
7585 mach_voucher_selector_t __unused which,
7586 ipc_voucher_t *voucher)
7587{
7588 if (TASK_NULL == task) {
7589 return KERN_INVALID_TASK;
7590 }
7591
7592 *voucher = NULL;
7593 return KERN_SUCCESS;
7594}
7595
7596kern_return_t
7597task_set_mach_voucher(
7598 task_t task,
7599 ipc_voucher_t __unused voucher)
7600{
7601 if (TASK_NULL == task) {
7602 return KERN_INVALID_TASK;
7603 }
7604
7605 return KERN_SUCCESS;
7606}
7607
7608kern_return_t
7609task_swap_mach_voucher(
7610 __unused task_t task,
7611 __unused ipc_voucher_t new_voucher,
7612 ipc_voucher_t *in_out_old_voucher)
7613{
7614 /*
7615 * Currently this function is only called from a MIG generated
7616 * routine which doesn't release the reference on the voucher
7617 * addressed by in_out_old_voucher. To avoid leaking this reference,
7618 * a call to release it has been added here.
7619 */
7620 ipc_voucher_release(*in_out_old_voucher);
7621 return KERN_NOT_SUPPORTED;
7622}
7623
7624void
7625task_set_gpu_denied(task_t task, boolean_t denied)
7626{
7627 task_lock(task);
7628
7629 if (denied) {
7630 task->t_flags |= TF_GPU_DENIED;
7631 } else {
7632 task->t_flags &= ~TF_GPU_DENIED;
7633 }
7634
7635 task_unlock(task);
7636}
7637
7638boolean_t
7639task_is_gpu_denied(task_t task)
7640{
7641 /* We don't need the lock to read this flag */
7642 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
7643}
7644
7645
7646uint64_t
7647get_task_memory_region_count(task_t task)
7648{
7649 vm_map_t map;
7650 map = (task == kernel_task) ? kernel_map: task->map;
7651 return (uint64_t)get_map_nentries(map);
7652}
7653
7654static void
7655kdebug_trace_dyld_internal(uint32_t base_code,
7656 struct dyld_kernel_image_info *info)
7657{
7658 static_assert(sizeof(info->uuid) >= 16);
7659
7660#if defined(__LP64__)
7661 uint64_t *uuid = (uint64_t *)&(info->uuid);
7662
7663 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
7664 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
7665 uuid[1], info->load_addr,
7666 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
7667 0);
7668 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
7669 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
7670 (uint64_t)info->fsobjid.fid_objno |
7671 ((uint64_t)info->fsobjid.fid_generation << 32),
7672 0, 0, 0, 0);
7673#else /* defined(__LP64__) */
7674 uint32_t *uuid = (uint32_t *)&(info->uuid);
7675
7676 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
7677 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
7678 uuid[1], uuid[2], uuid[3], 0);
7679 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
7680 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
7681 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
7682 info->fsobjid.fid_objno, 0);
7683 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
7684 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
7685 info->fsobjid.fid_generation, 0, 0, 0, 0);
7686#endif /* !defined(__LP64__) */
7687}
7688
7689static kern_return_t
7690kdebug_trace_dyld(task_t task, uint32_t base_code,
7691 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
7692{
7693 kern_return_t kr;
7694 dyld_kernel_image_info_array_t infos;
7695 vm_map_offset_t map_data;
7696 vm_offset_t data;
7697
7698 if (!infos_copy) {
7699 return KERN_INVALID_ADDRESS;
7700 }
7701
7702 if (!kdebug_enable ||
7703 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
7704 vm_map_copy_discard(infos_copy);
7705 return KERN_SUCCESS;
7706 }
7707
7708 if (task == NULL || task != current_task()) {
7709 return KERN_INVALID_TASK;
7710 }
7711
7712 kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
7713 if (kr != KERN_SUCCESS) {
7714 return kr;
7715 }
7716
7717 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
7718
7719 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
7720 kdebug_trace_dyld_internal(base_code, &(infos[i]));
7721 }
7722
7723 data = CAST_DOWN(vm_offset_t, map_data);
7724 mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
7725 return KERN_SUCCESS;
7726}
7727
7728kern_return_t
7729task_register_dyld_image_infos(task_t task,
7730 dyld_kernel_image_info_array_t infos_copy,
7731 mach_msg_type_number_t infos_len)
7732{
7733 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
7734 (vm_map_copy_t)infos_copy, infos_len);
7735}
7736
7737kern_return_t
7738task_unregister_dyld_image_infos(task_t task,
7739 dyld_kernel_image_info_array_t infos_copy,
7740 mach_msg_type_number_t infos_len)
7741{
7742 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
7743 (vm_map_copy_t)infos_copy, infos_len);
7744}
7745
7746kern_return_t
7747task_get_dyld_image_infos(__unused task_t task,
7748 __unused dyld_kernel_image_info_array_t * dyld_images,
7749 __unused mach_msg_type_number_t * dyld_imagesCnt)
7750{
7751 return KERN_NOT_SUPPORTED;
7752}
7753
7754kern_return_t
7755task_register_dyld_shared_cache_image_info(task_t task,
7756 dyld_kernel_image_info_t cache_img,
7757 __unused boolean_t no_cache,
7758 __unused boolean_t private_cache)
7759{
7760 if (task == NULL || task != current_task()) {
7761 return KERN_INVALID_TASK;
7762 }
7763
7764 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
7765 return KERN_SUCCESS;
7766}
7767
7768kern_return_t
7769task_register_dyld_set_dyld_state(__unused task_t task,
7770 __unused uint8_t dyld_state)
7771{
7772 return KERN_NOT_SUPPORTED;
7773}
7774
7775kern_return_t
7776task_register_dyld_get_process_state(__unused task_t task,
7777 __unused dyld_kernel_process_info_t * dyld_process_state)
7778{
7779 return KERN_NOT_SUPPORTED;
7780}
7781
7782kern_return_t
7783task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
7784 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
7785{
7786#if MONOTONIC
7787 task_t task = (task_t)task_insp;
7788 kern_return_t kr = KERN_SUCCESS;
7789 mach_msg_type_number_t size;
7790
7791 if (task == TASK_NULL) {
7792 return KERN_INVALID_ARGUMENT;
7793 }
7794
7795 size = *size_in_out;
7796
7797 switch (flavor) {
7798 case TASK_INSPECT_BASIC_COUNTS: {
7799 struct task_inspect_basic_counts *bc;
7800 uint64_t task_counts[MT_CORE_NFIXED] = { 0 };
7801
7802 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
7803 kr = KERN_INVALID_ARGUMENT;
7804 break;
7805 }
7806
7807 mt_fixed_task_counts(task, task_counts);
7808 bc = (struct task_inspect_basic_counts *)info_out;
7809#ifdef MT_CORE_INSTRS
7810 bc->instructions = task_counts[MT_CORE_INSTRS];
7811#else /* defined(MT_CORE_INSTRS) */
7812 bc->instructions = 0;
7813#endif /* !defined(MT_CORE_INSTRS) */
7814 bc->cycles = task_counts[MT_CORE_CYCLES];
7815 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
7816 break;
7817 }
7818 default:
7819 kr = KERN_INVALID_ARGUMENT;
7820 break;
7821 }
7822
7823 if (kr == KERN_SUCCESS) {
7824 *size_in_out = size;
7825 }
7826 return kr;
7827#else /* MONOTONIC */
7828#pragma unused(task_insp, flavor, info_out, size_in_out)
7829 return KERN_NOT_SUPPORTED;
7830#endif /* !MONOTONIC */
7831}
7832
7833#if CONFIG_SECLUDED_MEMORY
7834int num_tasks_can_use_secluded_mem = 0;
7835
7836void
7837task_set_can_use_secluded_mem(
7838 task_t task,
7839 boolean_t can_use_secluded_mem)
7840{
7841 if (!task->task_could_use_secluded_mem) {
7842 return;
7843 }
7844 task_lock(task);
7845 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
7846 task_unlock(task);
7847}
7848
7849void
7850task_set_can_use_secluded_mem_locked(
7851 task_t task,
7852 boolean_t can_use_secluded_mem)
7853{
7854 assert(task->task_could_use_secluded_mem);
7855 if (can_use_secluded_mem &&
7856 secluded_for_apps && /* global boot-arg */
7857 !task->task_can_use_secluded_mem) {
7858 assert(num_tasks_can_use_secluded_mem >= 0);
7859 OSAddAtomic(+1,
7860 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7861 task->task_can_use_secluded_mem = TRUE;
7862 } else if (!can_use_secluded_mem &&
7863 task->task_can_use_secluded_mem) {
7864 assert(num_tasks_can_use_secluded_mem > 0);
7865 OSAddAtomic(-1,
7866 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7867 task->task_can_use_secluded_mem = FALSE;
7868 }
7869}
7870
7871void
7872task_set_could_use_secluded_mem(
7873 task_t task,
7874 boolean_t could_use_secluded_mem)
7875{
7876 task->task_could_use_secluded_mem = !!could_use_secluded_mem;
7877}
7878
7879void
7880task_set_could_also_use_secluded_mem(
7881 task_t task,
7882 boolean_t could_also_use_secluded_mem)
7883{
7884 task->task_could_also_use_secluded_mem = !!could_also_use_secluded_mem;
7885}
7886
7887boolean_t
7888task_can_use_secluded_mem(
7889 task_t task,
7890 boolean_t is_alloc)
7891{
7892 if (task->task_can_use_secluded_mem) {
7893 assert(task->task_could_use_secluded_mem);
7894 assert(num_tasks_can_use_secluded_mem > 0);
7895 return TRUE;
7896 }
7897 if (task->task_could_also_use_secluded_mem &&
7898 num_tasks_can_use_secluded_mem > 0) {
7899 assert(num_tasks_can_use_secluded_mem > 0);
7900 return TRUE;
7901 }
7902
7903 /*
7904 * If a single task is using more than some large amount of
7905 * memory (i.e. secluded_shutoff_trigger) and is approaching
7906 * its task limit, allow it to dip into secluded and begin
7907 * suppression of rebuilding secluded memory until that task exits.
7908 */
7909 if (is_alloc && secluded_shutoff_trigger != 0) {
7910 uint64_t phys_used = get_task_phys_footprint(task);
7911 uint64_t limit = get_task_phys_footprint_limit(task);
7912 if (phys_used > secluded_shutoff_trigger &&
7913 limit > secluded_shutoff_trigger &&
7914 phys_used > limit - secluded_shutoff_headroom) {
7915 start_secluded_suppression(task);
7916 return TRUE;
7917 }
7918 }
7919
7920 return FALSE;
7921}
7922
7923boolean_t
7924task_could_use_secluded_mem(
7925 task_t task)
7926{
7927 return task->task_could_use_secluded_mem;
7928}
7929
7930boolean_t
7931task_could_also_use_secluded_mem(
7932 task_t task)
7933{
7934 return task->task_could_also_use_secluded_mem;
7935}
7936#endif /* CONFIG_SECLUDED_MEMORY */
7937
7938queue_head_t *
7939task_io_user_clients(task_t task)
7940{
7941 return &task->io_user_clients;
7942}
7943
7944void
7945task_set_message_app_suspended(task_t task, boolean_t enable)
7946{
7947 task->message_app_suspended = enable;
7948}
7949
7950void
7951task_copy_fields_for_exec(task_t dst_task, task_t src_task)
7952{
7953 dst_task->vtimers = src_task->vtimers;
7954}
7955
7956#if DEVELOPMENT || DEBUG
7957int vm_region_footprint = 0;
7958#endif /* DEVELOPMENT || DEBUG */
7959
7960boolean_t
7961task_self_region_footprint(void)
7962{
7963#if DEVELOPMENT || DEBUG
7964 if (vm_region_footprint) {
7965 /* system-wide override */
7966 return TRUE;
7967 }
7968#endif /* DEVELOPMENT || DEBUG */
7969 return current_task()->task_region_footprint;
7970}
7971
7972void
7973task_self_region_footprint_set(
7974 boolean_t newval)
7975{
7976 task_t curtask;
7977
7978 curtask = current_task();
7979 task_lock(curtask);
7980 if (newval) {
7981 curtask->task_region_footprint = TRUE;
7982 } else {
7983 curtask->task_region_footprint = FALSE;
7984 }
7985 task_unlock(curtask);
7986}
7987
7988void
7989task_set_darkwake_mode(task_t task, boolean_t set_mode)
7990{
7991 assert(task);
7992
7993 task_lock(task);
7994
7995 if (set_mode) {
7996 task->t_flags |= TF_DARKWAKE_MODE;
7997 } else {
7998 task->t_flags &= ~(TF_DARKWAKE_MODE);
7999 }
8000
8001 task_unlock(task);
8002}
8003
8004boolean_t
8005task_get_darkwake_mode(task_t task)
8006{
8007 assert(task);
8008 return (task->t_flags & TF_DARKWAKE_MODE) != 0;
8009}
8010
8011kern_return_t
8012task_get_exc_guard_behavior(
8013 task_t task,
8014 task_exc_guard_behavior_t *behaviorp)
8015{
8016 if (task == TASK_NULL) {
8017 return KERN_INVALID_TASK;
8018 }
8019 *behaviorp = task->task_exc_guard;
8020 return KERN_SUCCESS;
8021}
8022
8023#ifndef TASK_EXC_GUARD_ALL
8024/* Temporary define until two branches are merged */
8025#define TASK_EXC_GUARD_ALL (TASK_EXC_GUARD_VM_ALL | 0xf0)
8026#endif
8027
8028kern_return_t
8029task_set_exc_guard_behavior(
8030 task_t task,
8031 task_exc_guard_behavior_t behavior)
8032{
8033 if (task == TASK_NULL) {
8034 return KERN_INVALID_TASK;
8035 }
8036 if (behavior & ~TASK_EXC_GUARD_ALL) {
8037 return KERN_INVALID_VALUE;
8038 }
8039 task->task_exc_guard = behavior;
8040 return KERN_SUCCESS;
8041}
8042
8043#if __arm64__
8044extern int legacy_footprint_entitlement_mode;
8045extern void memorystatus_act_on_legacy_footprint_entitlement(struct proc *, boolean_t);
8046extern void memorystatus_act_on_ios13extended_footprint_entitlement(struct proc *);
8047
8048
8049void
8050task_set_legacy_footprint(
8051 task_t task)
8052{
8053 task_lock(task);
8054 task->task_legacy_footprint = TRUE;
8055 task_unlock(task);
8056}
8057
8058void
8059task_set_extra_footprint_limit(
8060 task_t task)
8061{
8062 if (task->task_extra_footprint_limit) {
8063 return;
8064 }
8065 task_lock(task);
8066 if (task->task_extra_footprint_limit) {
8067 task_unlock(task);
8068 return;
8069 }
8070 task->task_extra_footprint_limit = TRUE;
8071 task_unlock(task);
8072 memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
8073}
8074
8075void
8076task_set_ios13extended_footprint_limit(
8077 task_t task)
8078{
8079 if (task->task_ios13extended_footprint_limit) {
8080 return;
8081 }
8082 task_lock(task);
8083 if (task->task_ios13extended_footprint_limit) {
8084 task_unlock(task);
8085 return;
8086 }
8087 task->task_ios13extended_footprint_limit = TRUE;
8088 task_unlock(task);
8089 memorystatus_act_on_ios13extended_footprint_entitlement(task->bsd_info);
8090}
8091#endif /* __arm64__ */
8092
8093static inline ledger_amount_t
8094task_ledger_get_balance(
8095 ledger_t ledger,
8096 int ledger_idx)
8097{
8098 ledger_amount_t amount;
8099 amount = 0;
8100 ledger_get_balance(ledger, ledger_idx, &amount);
8101 return amount;
8102}
8103
8104/*
8105 * Gather the amount of memory counted in a task's footprint due to
8106 * being in a specific set of ledgers.
8107 */
8108void
8109task_ledgers_footprint(
8110 ledger_t ledger,
8111 ledger_amount_t *ledger_resident,
8112 ledger_amount_t *ledger_compressed)
8113{
8114 *ledger_resident = 0;
8115 *ledger_compressed = 0;
8116
8117 /* purgeable non-volatile memory */
8118 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
8119 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
8120
8121 /* "default" tagged memory */
8122 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
8123 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
8124
8125 /* "network" currently never counts in the footprint... */
8126
8127 /* "media" tagged memory */
8128 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
8129 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
8130
8131 /* "graphics" tagged memory */
8132 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
8133 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
8134
8135 /* "neural" tagged memory */
8136 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
8137 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
8138}
8139
8140void
8141task_set_memory_ownership_transfer(
8142 task_t task,
8143 boolean_t value)
8144{
8145 task_lock(task);
8146 task->task_can_transfer_memory_ownership = !!value;
8147 task_unlock(task);
8148}
8149
8150void
8151task_copy_vmobjects(task_t task, vm_object_query_t query, size_t len, size_t *num)
8152{
8153 vm_object_t find_vmo;
8154 size_t size = 0;
8155
8156 task_objq_lock(task);
8157 if (query != NULL) {
8158 queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
8159 {
8160 vm_object_query_t p = &query[size++];
8161
8162 /* make sure to not overrun */
8163 if (size * sizeof(vm_object_query_data_t) > len) {
8164 --size;
8165 break;
8166 }
8167
8168 bzero(p, sizeof(*p));
8169 p->object_id = (vm_object_id_t) VM_KERNEL_ADDRPERM(find_vmo);
8170 p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
8171 p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
8172 p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
8173 p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
8174 p->vo_no_footprint = find_vmo->vo_no_footprint;
8175 p->vo_ledger_tag = find_vmo->vo_ledger_tag;
8176 p->purgable = find_vmo->purgable;
8177
8178 if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
8179 p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
8180 } else {
8181 p->compressed_size = 0;
8182 }
8183 }
8184 } else {
8185 size = (size_t)task->task_owned_objects;
8186 }
8187 task_objq_unlock(task);
8188
8189 *num = size;
8190}
8191
8192void
8193task_set_filter_msg_flag(
8194 task_t task,
8195 boolean_t flag)
8196{
8197 assert(task != TASK_NULL);
8198
8199 task_lock(task);
8200 if (flag) {
8201 task->t_flags |= TF_FILTER_MSG;
8202 } else {
8203 task->t_flags &= ~TF_FILTER_MSG;
8204 }
8205 task_unlock(task);
8206}
8207
8208boolean_t
8209task_get_filter_msg_flag(
8210 task_t task)
8211{
8212 uint32_t flags = 0;
8213
8214 if (!task) {
8215 return false;
8216 }
8217
8218 flags = os_atomic_load(&task->t_flags, relaxed);
8219 return (flags & TF_FILTER_MSG) ? TRUE : FALSE;
8220}
8221bool
8222task_is_exotic(
8223 task_t task)
8224{
8225 if (task == TASK_NULL) {
8226 return false;
8227 }
8228 return vm_map_is_exotic(get_task_map(task));
8229}
8230
8231bool
8232task_is_alien(
8233 task_t task)
8234{
8235 if (task == TASK_NULL) {
8236 return false;
8237 }
8238 return vm_map_is_alien(get_task_map(task));
8239}
8240
8241
8242
8243#if CONFIG_MACF
8244/* Set the filter mask for Mach traps. */
8245void
8246mac_task_set_mach_filter_mask(task_t task, uint8_t *maskptr)
8247{
8248 assert(task);
8249
8250 task->mach_trap_filter_mask = maskptr;
8251}
8252
8253/* Set the filter mask for kobject msgs. */
8254void
8255mac_task_set_kobj_filter_mask(task_t task, uint8_t *maskptr)
8256{
8257 assert(task);
8258
8259 task->mach_kobj_filter_mask = maskptr;
8260}
8261
8262/* Hook for mach trap/sc filter evaluation policy. */
8263mac_task_mach_filter_cbfunc_t mac_task_mach_trap_evaluate = NULL;
8264
8265/* Hook for kobj message filter evaluation policy. */
8266mac_task_kobj_filter_cbfunc_t mac_task_kobj_msg_evaluate = NULL;
8267
8268/* Set the callback hooks for the filtering policy. */
8269int
8270mac_task_register_filter_callbacks(
8271 const mac_task_mach_filter_cbfunc_t mach_cbfunc,
8272 const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)
8273{
8274 if (mach_cbfunc != NULL) {
8275 if (mac_task_mach_trap_evaluate != NULL) {
8276 return KERN_FAILURE;
8277 }
8278 mac_task_mach_trap_evaluate = mach_cbfunc;
8279 }
8280 if (kobj_cbfunc != NULL) {
8281 if (mac_task_kobj_msg_evaluate != NULL) {
8282 return KERN_FAILURE;
8283 }
8284 mac_task_kobj_msg_evaluate = kobj_cbfunc;
8285 }
8286
8287 return KERN_SUCCESS;
8288}
8289#endif /* CONFIG_MACF */
8290
8291void
8292task_transfer_mach_filter_bits(
8293 task_t new_task,
8294 task_t old_task)
8295{
8296#ifdef CONFIG_MACF
8297 /* Copy mach trap and kernel object mask pointers to new task. */
8298 new_task->mach_trap_filter_mask = old_task->mach_trap_filter_mask;
8299 new_task->mach_kobj_filter_mask = old_task->mach_kobj_filter_mask;
8300#endif
8301 /* If filter message flag is set then set it in the new task. */
8302 if (task_get_filter_msg_flag(old_task)) {
8303 new_task->t_flags |= TF_FILTER_MSG;
8304 }
8305}
8306
8307
8308#if __has_feature(ptrauth_calls)
8309
8310#define PAC_EXCEPTION_ENTITLEMENT "com.apple.private.pac.exception"
8311
8312void
8313task_set_pac_exception_fatal_flag(
8314 task_t task)
8315{
8316 assert(task != TASK_NULL);
8317
8318 if (!IOTaskHasEntitlement(task, PAC_EXCEPTION_ENTITLEMENT)) {
8319 return;
8320 }
8321
8322 task_lock(task);
8323 task->t_flags |= TF_PAC_EXC_FATAL;
8324 task_unlock(task);
8325}
8326
8327bool
8328task_is_pac_exception_fatal(
8329 task_t task)
8330{
8331 uint32_t flags = 0;
8332
8333 assert(task != TASK_NULL);
8334
8335 flags = os_atomic_load(&task->t_flags, relaxed);
8336 return (bool)(flags & TF_PAC_EXC_FATAL);
8337}
8338#endif /* __has_feature(ptrauth_calls) */
8339
8340void
8341task_set_tecs(task_t task)
8342{
8343 if (task == TASK_NULL) {
8344 task = current_task();
8345 }
8346
8347 if (!machine_csv(CPUVN_CI)) {
8348 return;
8349 }
8350
8351 LCK_MTX_ASSERT(&task->lock, LCK_MTX_ASSERT_NOTOWNED);
8352
8353 task_lock(task);
8354
8355 task->t_flags |= TF_TECS;
8356
8357 thread_t thread;
8358 queue_iterate(&task->threads, thread, thread_t, task_threads) {
8359 machine_tecs(thread);
8360 }
8361 task_unlock(task);
8362}