]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / osfmk / kern / task.c
1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
99 #include <mach/sdt.h>
100
101 #include <ipc/ipc_importance.h>
102 #include <ipc/ipc_types.h>
103 #include <ipc/ipc_space.h>
104 #include <ipc/ipc_entry.h>
105 #include <ipc/ipc_hash.h>
106
107 #include <kern/kern_types.h>
108 #include <kern/mach_param.h>
109 #include <kern/misc_protos.h>
110 #include <kern/task.h>
111 #include <kern/thread.h>
112 #include <kern/coalition.h>
113 #include <kern/zalloc.h>
114 #include <kern/kalloc.h>
115 #include <kern/kern_cdata.h>
116 #include <kern/processor.h>
117 #include <kern/sched_prim.h> /* for thread_wakeup */
118 #include <kern/ipc_tt.h>
119 #include <kern/host.h>
120 #include <kern/clock.h>
121 #include <kern/timer.h>
122 #include <kern/assert.h>
123 #include <kern/sync_lock.h>
124 #include <kern/affinity.h>
125 #include <kern/exc_resource.h>
126 #include <kern/machine.h>
127 #include <kern/policy_internal.h>
128 #include <kern/restartable.h>
129
130 #include <corpses/task_corpse.h>
131 #if CONFIG_TELEMETRY
132 #include <kern/telemetry.h>
133 #endif
134
135 #if MONOTONIC
136 #include <kern/monotonic.h>
137 #include <machine/monotonic.h>
138 #endif /* MONOTONIC */
139
140 #include <os/log.h>
141
142 #include <vm/pmap.h>
143 #include <vm/vm_map.h>
144 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
145 #include <vm/vm_pageout.h>
146 #include <vm/vm_protos.h>
147 #include <vm/vm_purgeable_internal.h>
148 #include <vm/vm_compressor_pager.h>
149
150 #include <sys/resource.h>
151 #include <sys/signalvar.h> /* for coredump */
152 #include <sys/bsdtask_info.h>
153 /*
154 * Exported interfaces
155 */
156
157 #include <mach/task_server.h>
158 #include <mach/mach_host_server.h>
159 #include <mach/host_security_server.h>
160 #include <mach/mach_port_server.h>
161
162 #include <vm/vm_shared_region.h>
163
164 #include <libkern/OSDebug.h>
165 #include <libkern/OSAtomic.h>
166 #include <libkern/section_keywords.h>
167
168 #include <mach-o/loader.h>
169
170 #include <kern/sfi.h> /* picks up ledger.h */
171
172 #if CONFIG_MACF
173 #include <security/mac_mach_internal.h>
174 #endif
175
176 #include <IOKit/IOBSD.h>
177
178 #if KPERF
179 extern int kpc_force_all_ctrs(task_t, int);
180 #endif
181
182 SECURITY_READ_ONLY_LATE(task_t) kernel_task;
183
184 static SECURITY_READ_ONLY_LATE(zone_t) task_zone;
185 ZONE_INIT(&task_zone, "tasks", sizeof(struct task),
186 ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM,
187 ZONE_ID_TASK, NULL);
188
189 extern int exc_via_corpse_forking;
190 extern int corpse_for_fatal_memkill;
191 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
192 extern void task_disown_frozen_csegs(task_t owner_task);
193
194 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
195 int audio_active = 0;
196
197 /*
198 * structure for tracking zone usage
199 * Used either one per task/thread for all zones or <per-task,per-zone>.
200 */
201 typedef struct zinfo_usage_store_t {
202 /* These fields may be updated atomically, and so must be 8 byte aligned */
203 uint64_t alloc __attribute__((aligned(8))); /* allocation counter */
204 uint64_t free __attribute__((aligned(8))); /* free counter */
205 } zinfo_usage_store_t;
206
207 zinfo_usage_store_t tasks_tkm_private;
208 zinfo_usage_store_t tasks_tkm_shared;
209
210 /* A container to accumulate statistics for expired tasks */
211 expired_task_statistics_t dead_task_statistics;
212 LCK_SPIN_DECLARE_ATTR(dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
213
214 ledger_template_t task_ledger_template = NULL;
215
216 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
217 {.cpu_time = -1,
218 .tkm_private = -1,
219 .tkm_shared = -1,
220 .phys_mem = -1,
221 .wired_mem = -1,
222 .internal = -1,
223 .iokit_mapped = -1,
224 .alternate_accounting = -1,
225 .alternate_accounting_compressed = -1,
226 .page_table = -1,
227 .phys_footprint = -1,
228 .internal_compressed = -1,
229 .purgeable_volatile = -1,
230 .purgeable_nonvolatile = -1,
231 .purgeable_volatile_compressed = -1,
232 .purgeable_nonvolatile_compressed = -1,
233 .tagged_nofootprint = -1,
234 .tagged_footprint = -1,
235 .tagged_nofootprint_compressed = -1,
236 .tagged_footprint_compressed = -1,
237 .network_volatile = -1,
238 .network_nonvolatile = -1,
239 .network_volatile_compressed = -1,
240 .network_nonvolatile_compressed = -1,
241 .media_nofootprint = -1,
242 .media_footprint = -1,
243 .media_nofootprint_compressed = -1,
244 .media_footprint_compressed = -1,
245 .graphics_nofootprint = -1,
246 .graphics_footprint = -1,
247 .graphics_nofootprint_compressed = -1,
248 .graphics_footprint_compressed = -1,
249 .neural_nofootprint = -1,
250 .neural_footprint = -1,
251 .neural_nofootprint_compressed = -1,
252 .neural_footprint_compressed = -1,
253 .platform_idle_wakeups = -1,
254 .interrupt_wakeups = -1,
255 #if CONFIG_SCHED_SFI
256 .sfi_wait_times = { 0 /* initialized at runtime */},
257 #endif /* CONFIG_SCHED_SFI */
258 .cpu_time_billed_to_me = -1,
259 .cpu_time_billed_to_others = -1,
260 .physical_writes = -1,
261 .logical_writes = -1,
262 .logical_writes_to_external = -1,
263 #if DEBUG || DEVELOPMENT
264 .pages_grabbed = -1,
265 .pages_grabbed_kern = -1,
266 .pages_grabbed_iopl = -1,
267 .pages_grabbed_upl = -1,
268 #endif
269 #if CONFIG_FREEZE
270 .frozen_to_swap = -1,
271 #endif /* CONFIG_FREEZE */
272 .energy_billed_to_me = -1,
273 .energy_billed_to_others = -1,
274 #if CONFIG_PHYS_WRITE_ACCT
275 .fs_metadata_writes = -1,
276 #endif /* CONFIG_PHYS_WRITE_ACCT */
277 };
278
279 /* System sleep state */
280 boolean_t tasks_suspend_state;
281
282
283 void init_task_ledgers(void);
284 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
285 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
286 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
287 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
288 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
289 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
290
291 kern_return_t task_suspend_internal(task_t);
292 kern_return_t task_resume_internal(task_t);
293 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
294
295 extern kern_return_t iokit_task_terminate(task_t task);
296 extern void iokit_task_app_suspended_changed(task_t task);
297
298 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
299 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
300 extern kern_return_t thread_resume(thread_t thread);
301
302 // Warn tasks when they hit 80% of their memory limit.
303 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
304
305 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
306 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
307
308 /*
309 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
310 *
311 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
312 * stacktraces, aka micro-stackshots)
313 */
314 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
315
316 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
317 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
318
319 unsigned int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
320
321 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
322
323 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
324 unsigned int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
325 int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
326
327 /* I/O Monitor Limits */
328 #define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
329 #define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
330
331 uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
332 uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
333
334 #define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
335 int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
336 int64_t global_logical_writes_count = 0; /* Global count for logical writes */
337 int64_t global_logical_writes_to_external_count = 0; /* Global count for logical writes to external storage*/
338 static boolean_t global_update_logical_writes(int64_t, int64_t*);
339
340 #define TASK_MAX_THREAD_LIMIT 256
341
342 #if MACH_ASSERT
343 int pmap_ledgers_panic = 1;
344 int pmap_ledgers_panic_leeway = 3;
345 #endif /* MACH_ASSERT */
346
347 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
348
349 #if CONFIG_COREDUMP
350 int hwm_user_cores = 0; /* high watermark violations generate user core files */
351 #endif
352
353 #ifdef MACH_BSD
354 extern uint32_t proc_platform(const struct proc *);
355 extern uint32_t proc_min_sdk(struct proc *);
356 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
357 extern int proc_pid(struct proc *p);
358 extern int proc_selfpid(void);
359 extern struct proc *current_proc(void);
360 extern char *proc_name_address(struct proc *p);
361 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
362 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, uint32_t bufsize);
363 extern void workq_proc_suspended(struct proc *p);
364 extern void workq_proc_resumed(struct proc *p);
365
366 #if CONFIG_MEMORYSTATUS
367 extern void proc_memstat_terminated(struct proc* p, boolean_t set);
368 extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
369 extern void memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
370 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
371 extern uint64_t memorystatus_available_memory_internal(struct proc *p);
372
373 #if DEVELOPMENT || DEBUG
374 extern void memorystatus_abort_vm_map_fork(task_t);
375 #endif
376
377 #endif /* CONFIG_MEMORYSTATUS */
378
379 #endif /* MACH_BSD */
380
381 #if DEVELOPMENT || DEBUG
382 int exc_resource_threads_enabled;
383 #endif /* DEVELOPMENT || DEBUG */
384
385 #if (DEVELOPMENT || DEBUG)
386 uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_MP_CORPSE |
387 TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE | TASK_EXC_GUARD_VM_CORPSE;
388 #else
389 uint32_t task_exc_guard_default = 0;
390 #endif
391
392 /* Forwards */
393
394 static void task_hold_locked(task_t task);
395 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
396 static void task_release_locked(task_t task);
397
398 static void task_synchronizer_destroy_all(task_t task);
399 static os_ref_count_t
400 task_add_turnstile_watchports_locked(
401 task_t task,
402 struct task_watchports *watchports,
403 struct task_watchport_elem **previous_elem_array,
404 ipc_port_t *portwatch_ports,
405 uint32_t portwatch_count);
406
407 static os_ref_count_t
408 task_remove_turnstile_watchports_locked(
409 task_t task,
410 struct task_watchports *watchports,
411 ipc_port_t *port_freelist);
412
413 static struct task_watchports *
414 task_watchports_alloc_init(
415 task_t task,
416 thread_t thread,
417 uint32_t count);
418
419 static void
420 task_watchports_deallocate(
421 struct task_watchports *watchports);
422
423 void
424 task_set_64bit(
425 task_t task,
426 boolean_t is_64bit,
427 boolean_t is_64bit_data)
428 {
429 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
430 thread_t thread;
431 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
432
433 task_lock(task);
434
435 /*
436 * Switching to/from 64-bit address spaces
437 */
438 if (is_64bit) {
439 if (!task_has_64Bit_addr(task)) {
440 task_set_64Bit_addr(task);
441 }
442 } else {
443 if (task_has_64Bit_addr(task)) {
444 task_clear_64Bit_addr(task);
445 }
446 }
447
448 /*
449 * Switching to/from 64-bit register state.
450 */
451 if (is_64bit_data) {
452 if (task_has_64Bit_data(task)) {
453 goto out;
454 }
455
456 task_set_64Bit_data(task);
457 } else {
458 if (!task_has_64Bit_data(task)) {
459 goto out;
460 }
461
462 task_clear_64Bit_data(task);
463 }
464
465 /* FIXME: On x86, the thread save state flavor can diverge from the
466 * task's 64-bit feature flag due to the 32-bit/64-bit register save
467 * state dichotomy. Since we can be pre-empted in this interval,
468 * certain routines may observe the thread as being in an inconsistent
469 * state with respect to its task's 64-bitness.
470 */
471
472 #if defined(__x86_64__) || defined(__arm64__)
473 queue_iterate(&task->threads, thread, thread_t, task_threads) {
474 thread_mtx_lock(thread);
475 machine_thread_switch_addrmode(thread);
476 thread_mtx_unlock(thread);
477 }
478 #endif /* defined(__x86_64__) || defined(__arm64__) */
479
480 out:
481 task_unlock(task);
482 }
483
484 boolean_t
485 task_get_64bit_data(task_t task)
486 {
487 return task_has_64Bit_data(task);
488 }
489
490 void
491 task_set_platform_binary(
492 task_t task,
493 boolean_t is_platform)
494 {
495 task_lock(task);
496 if (is_platform) {
497 task->t_flags |= TF_PLATFORM;
498 /* set exc guard default behavior for first-party code */
499 task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
500 } else {
501 task->t_flags &= ~(TF_PLATFORM);
502 /* set exc guard default behavior for third-party code */
503 task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
504 }
505 task_unlock(task);
506 }
507
508 /*
509 * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
510 * Returns "false" if flag is already set, and "true" in other cases.
511 */
512 bool
513 task_set_ca_client_wi(
514 task_t task,
515 boolean_t set_or_clear)
516 {
517 bool ret = true;
518 task_lock(task);
519 if (set_or_clear) {
520 /* Tasks can have only one CA_CLIENT work interval */
521 if (task->t_flags & TF_CA_CLIENT_WI) {
522 ret = false;
523 } else {
524 task->t_flags |= TF_CA_CLIENT_WI;
525 }
526 } else {
527 task->t_flags &= ~TF_CA_CLIENT_WI;
528 }
529 task_unlock(task);
530 return ret;
531 }
532
533 void
534 task_set_dyld_info(
535 task_t task,
536 mach_vm_address_t addr,
537 mach_vm_size_t size)
538 {
539 task_lock(task);
540 task->all_image_info_addr = addr;
541 task->all_image_info_size = size;
542 task_unlock(task);
543 }
544
545 void
546 task_set_mach_header_address(
547 task_t task,
548 mach_vm_address_t addr)
549 {
550 task_lock(task);
551 task->mach_header_vm_address = addr;
552 task_unlock(task);
553 }
554
555 void
556 task_bank_reset(__unused task_t task)
557 {
558 if (task->bank_context != NULL) {
559 bank_task_destroy(task);
560 }
561 }
562
563 /*
564 * NOTE: This should only be called when the P_LINTRANSIT
565 * flag is set (the proc_trans lock is held) on the
566 * proc associated with the task.
567 */
568 void
569 task_bank_init(__unused task_t task)
570 {
571 if (task->bank_context != NULL) {
572 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
573 }
574 bank_task_initialize(task);
575 }
576
577 void
578 task_set_did_exec_flag(task_t task)
579 {
580 task->t_procflags |= TPF_DID_EXEC;
581 }
582
583 void
584 task_clear_exec_copy_flag(task_t task)
585 {
586 task->t_procflags &= ~TPF_EXEC_COPY;
587 }
588
589 event_t
590 task_get_return_wait_event(task_t task)
591 {
592 return (event_t)&task->returnwait_inheritor;
593 }
594
595 void
596 task_clear_return_wait(task_t task, uint32_t flags)
597 {
598 if (flags & TCRW_CLEAR_INITIAL_WAIT) {
599 thread_wakeup(task_get_return_wait_event(task));
600 }
601
602 if (flags & TCRW_CLEAR_FINAL_WAIT) {
603 is_write_lock(task->itk_space);
604
605 task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
606 task->returnwait_inheritor = NULL;
607
608 if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
609 struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
610 NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
611
612 waitq_wakeup64_all(&turnstile->ts_waitq,
613 CAST_EVENT64_T(task_get_return_wait_event(task)),
614 THREAD_AWAKENED, 0);
615
616 turnstile_update_inheritor(turnstile, NULL,
617 TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
618 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
619
620 turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
621 turnstile_cleanup();
622 task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
623 }
624 is_write_unlock(task->itk_space);
625 }
626 }
627
628 void __attribute__((noreturn))
629 task_wait_to_return(void)
630 {
631 task_t task = current_task();
632
633 is_write_lock(task->itk_space);
634
635 if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
636 struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
637 NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
638
639 do {
640 task->t_returnwaitflags |= TRW_LRETURNWAITER;
641 turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
642 (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
643
644 waitq_assert_wait64(&turnstile->ts_waitq,
645 CAST_EVENT64_T(task_get_return_wait_event(task)),
646 THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
647
648 is_write_unlock(task->itk_space);
649
650 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
651
652 thread_block(THREAD_CONTINUE_NULL);
653
654 is_write_lock(task->itk_space);
655 } while (task->t_returnwaitflags & TRW_LRETURNWAIT);
656
657 turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
658 }
659
660 is_write_unlock(task->itk_space);
661 turnstile_cleanup();
662
663
664 #if CONFIG_MACF
665 /*
666 * Before jumping to userspace and allowing this process to execute any code,
667 * notify any interested parties.
668 */
669 mac_proc_notify_exec_complete(current_proc());
670 #endif
671
672 thread_bootstrap_return();
673 }
674
675 #ifdef CONFIG_32BIT_TELEMETRY
676 boolean_t
677 task_consume_32bit_log_flag(task_t task)
678 {
679 if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
680 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
681 return TRUE;
682 } else {
683 return FALSE;
684 }
685 }
686
687 void
688 task_set_32bit_log_flag(task_t task)
689 {
690 task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
691 }
692 #endif /* CONFIG_32BIT_TELEMETRY */
693
694 boolean_t
695 task_is_exec_copy(task_t task)
696 {
697 return task_is_exec_copy_internal(task);
698 }
699
700 boolean_t
701 task_did_exec(task_t task)
702 {
703 return task_did_exec_internal(task);
704 }
705
706 boolean_t
707 task_is_active(task_t task)
708 {
709 return task->active;
710 }
711
712 boolean_t
713 task_is_halting(task_t task)
714 {
715 return task->halting;
716 }
717
718 #if TASK_REFERENCE_LEAK_DEBUG
719 #include <kern/btlog.h>
720
721 static btlog_t *task_ref_btlog;
722 #define TASK_REF_OP_INCR 0x1
723 #define TASK_REF_OP_DECR 0x2
724
725 #define TASK_REF_NUM_RECORDS 100000
726 #define TASK_REF_BTDEPTH 7
727
728 void
729 task_reference_internal(task_t task)
730 {
731 void * bt[TASK_REF_BTDEPTH];
732 int numsaved = 0;
733
734 task_require(task);
735 os_ref_retain(&task->ref_count);
736
737 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
738 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
739 bt, numsaved);
740 }
741
742 os_ref_count_t
743 task_deallocate_internal(task_t task)
744 {
745 void * bt[TASK_REF_BTDEPTH];
746 int numsaved = 0;
747
748 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
749 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
750 bt, numsaved);
751
752 return os_ref_release(&task->ref_count);
753 }
754
755 #endif /* TASK_REFERENCE_LEAK_DEBUG */
756
757 void
758 task_init(void)
759 {
760 /*
761 * Configure per-task memory limit.
762 * The boot-arg is interpreted as Megabytes,
763 * and takes precedence over the device tree.
764 * Setting the boot-arg to 0 disables task limits.
765 */
766 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
767 sizeof(max_task_footprint_mb))) {
768 /*
769 * No limit was found in boot-args, so go look in the device tree.
770 */
771 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
772 sizeof(max_task_footprint_mb))) {
773 /*
774 * No limit was found in device tree.
775 */
776 max_task_footprint_mb = 0;
777 }
778 }
779
780 if (max_task_footprint_mb != 0) {
781 #if CONFIG_MEMORYSTATUS
782 if (max_task_footprint_mb < 50) {
783 printf("Warning: max_task_pmem %d below minimum.\n",
784 max_task_footprint_mb);
785 max_task_footprint_mb = 50;
786 }
787 printf("Limiting task physical memory footprint to %d MB\n",
788 max_task_footprint_mb);
789
790 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
791
792 /*
793 * Configure the per-task memory limit warning level.
794 * This is computed as a percentage.
795 */
796 max_task_footprint_warning_level = 0;
797
798 if (max_mem < 0x40000000) {
799 /*
800 * On devices with < 1GB of memory:
801 * -- set warnings to 50MB below the per-task limit.
802 */
803 if (max_task_footprint_mb > 50) {
804 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
805 }
806 } else {
807 /*
808 * On devices with >= 1GB of memory:
809 * -- set warnings to 100MB below the per-task limit.
810 */
811 if (max_task_footprint_mb > 100) {
812 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
813 }
814 }
815
816 /*
817 * Never allow warning level to land below the default.
818 */
819 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
820 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
821 }
822
823 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
824
825 #else
826 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
827 #endif /* CONFIG_MEMORYSTATUS */
828 }
829
830 #if DEVELOPMENT || DEBUG
831 if (!PE_parse_boot_argn("exc_resource_threads",
832 &exc_resource_threads_enabled,
833 sizeof(exc_resource_threads_enabled))) {
834 exc_resource_threads_enabled = 1;
835 }
836 PE_parse_boot_argn("task_exc_guard_default",
837 &task_exc_guard_default,
838 sizeof(task_exc_guard_default));
839 #endif /* DEVELOPMENT || DEBUG */
840
841 #if CONFIG_COREDUMP
842 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
843 sizeof(hwm_user_cores))) {
844 hwm_user_cores = 0;
845 }
846 #endif
847
848 proc_init_cpumon_params();
849
850 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
851 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
852 }
853
854 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
855 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
856 }
857
858 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
859 sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
860 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
861 }
862
863 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
864 sizeof(disable_exc_resource))) {
865 disable_exc_resource = 0;
866 }
867
868 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
869 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
870 }
871
872 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
873 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
874 }
875
876 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
877 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
878 }
879
880 /*
881 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
882 * sets up the ledgers for the default coalition. If we don't have coalitions,
883 * then we have to call it now.
884 */
885 #if CONFIG_COALITIONS
886 assert(task_ledger_template);
887 #else /* CONFIG_COALITIONS */
888 init_task_ledgers();
889 #endif /* CONFIG_COALITIONS */
890
891 #if TASK_REFERENCE_LEAK_DEBUG
892 task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
893 assert(task_ref_btlog);
894 #endif
895
896 /*
897 * Create the kernel task as the first task.
898 */
899 #ifdef __LP64__
900 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
901 #else
902 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
903 #endif
904 { panic("task_init\n");}
905
906 #if defined(HAS_APPLE_PAC)
907 kernel_task->rop_pid = ml_default_rop_pid();
908 kernel_task->jop_pid = ml_default_jop_pid();
909 // kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
910 // disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
911 ml_task_set_disable_user_jop(kernel_task, FALSE);
912 #endif
913
914 vm_map_deallocate(kernel_task->map);
915 kernel_task->map = kernel_map;
916 }
917
918 /*
919 * Create a task running in the kernel address space. It may
920 * have its own map of size mem_size and may have ipc privileges.
921 */
922 kern_return_t
923 kernel_task_create(
924 __unused task_t parent_task,
925 __unused vm_offset_t map_base,
926 __unused vm_size_t map_size,
927 __unused task_t *child_task)
928 {
929 return KERN_INVALID_ARGUMENT;
930 }
931
932 kern_return_t
933 task_create(
934 task_t parent_task,
935 __unused ledger_port_array_t ledger_ports,
936 __unused mach_msg_type_number_t num_ledger_ports,
937 __unused boolean_t inherit_memory,
938 __unused task_t *child_task) /* OUT */
939 {
940 if (parent_task == TASK_NULL) {
941 return KERN_INVALID_ARGUMENT;
942 }
943
944 /*
945 * No longer supported: too many calls assume that a task has a valid
946 * process attached.
947 */
948 return KERN_FAILURE;
949 }
950
951 kern_return_t
952 host_security_create_task_token(
953 host_security_t host_security,
954 task_t parent_task,
955 __unused security_token_t sec_token,
956 __unused audit_token_t audit_token,
957 __unused host_priv_t host_priv,
958 __unused ledger_port_array_t ledger_ports,
959 __unused mach_msg_type_number_t num_ledger_ports,
960 __unused boolean_t inherit_memory,
961 __unused task_t *child_task) /* OUT */
962 {
963 if (parent_task == TASK_NULL) {
964 return KERN_INVALID_ARGUMENT;
965 }
966
967 if (host_security == HOST_NULL) {
968 return KERN_INVALID_SECURITY;
969 }
970
971 /*
972 * No longer supported.
973 */
974 return KERN_FAILURE;
975 }
976
977 /*
978 * Task ledgers
979 * ------------
980 *
981 * phys_footprint
982 * Physical footprint: This is the sum of:
983 * + (internal - alternate_accounting)
984 * + (internal_compressed - alternate_accounting_compressed)
985 * + iokit_mapped
986 * + purgeable_nonvolatile
987 * + purgeable_nonvolatile_compressed
988 * + page_table
989 *
990 * internal
991 * The task's anonymous memory, which on iOS is always resident.
992 *
993 * internal_compressed
994 * Amount of this task's internal memory which is held by the compressor.
995 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
996 * and could be either decompressed back into memory, or paged out to storage, depending
997 * on our implementation.
998 *
999 * iokit_mapped
1000 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1001 * clean/dirty or internal/external state].
1002 *
1003 * alternate_accounting
1004 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1005 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1006 * double counting.
1007 *
1008 * pages_grabbed
1009 * pages_grabbed counts all page grabs in a task. It is also broken out into three subtypes
1010 * which track UPL, IOPL and Kernel page grabs.
1011 */
1012 void
1013 init_task_ledgers(void)
1014 {
1015 ledger_template_t t;
1016
1017 assert(task_ledger_template == NULL);
1018 assert(kernel_task == TASK_NULL);
1019
1020 #if MACH_ASSERT
1021 PE_parse_boot_argn("pmap_ledgers_panic",
1022 &pmap_ledgers_panic,
1023 sizeof(pmap_ledgers_panic));
1024 PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1025 &pmap_ledgers_panic_leeway,
1026 sizeof(pmap_ledgers_panic_leeway));
1027 #endif /* MACH_ASSERT */
1028
1029 if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1030 panic("couldn't create task ledger template");
1031 }
1032
1033 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1034 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1035 "physmem", "bytes");
1036 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1037 "bytes");
1038 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1039 "bytes");
1040 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1041 "bytes");
1042 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1043 "bytes");
1044 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
1045 "bytes");
1046 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
1047 "bytes");
1048 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
1049 "bytes");
1050 task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
1051 "bytes");
1052 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1053 "bytes");
1054 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1055 "bytes");
1056 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
1057 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
1058 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
1059 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
1060 #if DEBUG || DEVELOPMENT
1061 task_ledgers.pages_grabbed = ledger_entry_add(t, "pages_grabbed", "physmem", "count");
1062 task_ledgers.pages_grabbed_kern = ledger_entry_add(t, "pages_grabbed_kern", "physmem", "count");
1063 task_ledgers.pages_grabbed_iopl = ledger_entry_add(t, "pages_grabbed_iopl", "physmem", "count");
1064 task_ledgers.pages_grabbed_upl = ledger_entry_add(t, "pages_grabbed_upl", "physmem", "count");
1065 #endif
1066 task_ledgers.tagged_nofootprint = ledger_entry_add(t, "tagged_nofootprint", "physmem", "bytes");
1067 task_ledgers.tagged_footprint = ledger_entry_add(t, "tagged_footprint", "physmem", "bytes");
1068 task_ledgers.tagged_nofootprint_compressed = ledger_entry_add(t, "tagged_nofootprint_compressed", "physmem", "bytes");
1069 task_ledgers.tagged_footprint_compressed = ledger_entry_add(t, "tagged_footprint_compressed", "physmem", "bytes");
1070 task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes");
1071 task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes");
1072 task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes");
1073 task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes");
1074 task_ledgers.media_nofootprint = ledger_entry_add(t, "media_nofootprint", "physmem", "bytes");
1075 task_ledgers.media_footprint = ledger_entry_add(t, "media_footprint", "physmem", "bytes");
1076 task_ledgers.media_nofootprint_compressed = ledger_entry_add(t, "media_nofootprint_compressed", "physmem", "bytes");
1077 task_ledgers.media_footprint_compressed = ledger_entry_add(t, "media_footprint_compressed", "physmem", "bytes");
1078 task_ledgers.graphics_nofootprint = ledger_entry_add(t, "graphics_nofootprint", "physmem", "bytes");
1079 task_ledgers.graphics_footprint = ledger_entry_add(t, "graphics_footprint", "physmem", "bytes");
1080 task_ledgers.graphics_nofootprint_compressed = ledger_entry_add(t, "graphics_nofootprint_compressed", "physmem", "bytes");
1081 task_ledgers.graphics_footprint_compressed = ledger_entry_add(t, "graphics_footprint_compressed", "physmem", "bytes");
1082 task_ledgers.neural_nofootprint = ledger_entry_add(t, "neural_nofootprint", "physmem", "bytes");
1083 task_ledgers.neural_footprint = ledger_entry_add(t, "neural_footprint", "physmem", "bytes");
1084 task_ledgers.neural_nofootprint_compressed = ledger_entry_add(t, "neural_nofootprint_compressed", "physmem", "bytes");
1085 task_ledgers.neural_footprint_compressed = ledger_entry_add(t, "neural_footprint_compressed", "physmem", "bytes");
1086
1087 #if CONFIG_FREEZE
1088 task_ledgers.frozen_to_swap = ledger_entry_add(t, "frozen_to_swap", "physmem", "bytes");
1089 #endif /* CONFIG_FREEZE */
1090
1091 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1092 "count");
1093 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1094 "count");
1095
1096 #if CONFIG_SCHED_SFI
1097 sfi_class_id_t class_id, ledger_alias;
1098 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1099 task_ledgers.sfi_wait_times[class_id] = -1;
1100 }
1101
1102 /* don't account for UNSPECIFIED */
1103 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1104 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1105 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1106 /* Check to see if alias has been registered yet */
1107 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1108 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1109 } else {
1110 /* Otherwise, initialize it first */
1111 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1112 }
1113 } else {
1114 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1115 }
1116
1117 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1118 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1119 }
1120 }
1121
1122 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1123 #endif /* CONFIG_SCHED_SFI */
1124
1125 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1126 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1127 task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1128 task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1129 task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1130 #if CONFIG_PHYS_WRITE_ACCT
1131 task_ledgers.fs_metadata_writes = ledger_entry_add(t, "fs_metadata_writes", "res", "bytes");
1132 #endif /* CONFIG_PHYS_WRITE_ACCT */
1133 task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1134 task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1135
1136 if ((task_ledgers.cpu_time < 0) ||
1137 (task_ledgers.tkm_private < 0) ||
1138 (task_ledgers.tkm_shared < 0) ||
1139 (task_ledgers.phys_mem < 0) ||
1140 (task_ledgers.wired_mem < 0) ||
1141 (task_ledgers.internal < 0) ||
1142 (task_ledgers.iokit_mapped < 0) ||
1143 (task_ledgers.alternate_accounting < 0) ||
1144 (task_ledgers.alternate_accounting_compressed < 0) ||
1145 (task_ledgers.page_table < 0) ||
1146 (task_ledgers.phys_footprint < 0) ||
1147 (task_ledgers.internal_compressed < 0) ||
1148 (task_ledgers.purgeable_volatile < 0) ||
1149 (task_ledgers.purgeable_nonvolatile < 0) ||
1150 (task_ledgers.purgeable_volatile_compressed < 0) ||
1151 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1152 (task_ledgers.tagged_nofootprint < 0) ||
1153 (task_ledgers.tagged_footprint < 0) ||
1154 (task_ledgers.tagged_nofootprint_compressed < 0) ||
1155 (task_ledgers.tagged_footprint_compressed < 0) ||
1156 #if CONFIG_FREEZE
1157 (task_ledgers.frozen_to_swap < 0) ||
1158 #endif /* CONFIG_FREEZE */
1159 (task_ledgers.network_volatile < 0) ||
1160 (task_ledgers.network_nonvolatile < 0) ||
1161 (task_ledgers.network_volatile_compressed < 0) ||
1162 (task_ledgers.network_nonvolatile_compressed < 0) ||
1163 (task_ledgers.media_nofootprint < 0) ||
1164 (task_ledgers.media_footprint < 0) ||
1165 (task_ledgers.media_nofootprint_compressed < 0) ||
1166 (task_ledgers.media_footprint_compressed < 0) ||
1167 (task_ledgers.graphics_nofootprint < 0) ||
1168 (task_ledgers.graphics_footprint < 0) ||
1169 (task_ledgers.graphics_nofootprint_compressed < 0) ||
1170 (task_ledgers.graphics_footprint_compressed < 0) ||
1171 (task_ledgers.neural_nofootprint < 0) ||
1172 (task_ledgers.neural_footprint < 0) ||
1173 (task_ledgers.neural_nofootprint_compressed < 0) ||
1174 (task_ledgers.neural_footprint_compressed < 0) ||
1175 (task_ledgers.platform_idle_wakeups < 0) ||
1176 (task_ledgers.interrupt_wakeups < 0) ||
1177 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1178 (task_ledgers.physical_writes < 0) ||
1179 (task_ledgers.logical_writes < 0) ||
1180 (task_ledgers.logical_writes_to_external < 0) ||
1181 #if CONFIG_PHYS_WRITE_ACCT
1182 (task_ledgers.fs_metadata_writes < 0) ||
1183 #endif /* CONFIG_PHYS_WRITE_ACCT */
1184 (task_ledgers.energy_billed_to_me < 0) ||
1185 (task_ledgers.energy_billed_to_others < 0)
1186 ) {
1187 panic("couldn't create entries for task ledger template");
1188 }
1189
1190 ledger_track_credit_only(t, task_ledgers.phys_footprint);
1191 ledger_track_credit_only(t, task_ledgers.page_table);
1192 ledger_track_credit_only(t, task_ledgers.internal);
1193 ledger_track_credit_only(t, task_ledgers.internal_compressed);
1194 ledger_track_credit_only(t, task_ledgers.iokit_mapped);
1195 ledger_track_credit_only(t, task_ledgers.alternate_accounting);
1196 ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
1197 ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
1198 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
1199 ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
1200 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
1201 #if DEBUG || DEVELOPMENT
1202 ledger_track_credit_only(t, task_ledgers.pages_grabbed);
1203 ledger_track_credit_only(t, task_ledgers.pages_grabbed_kern);
1204 ledger_track_credit_only(t, task_ledgers.pages_grabbed_iopl);
1205 ledger_track_credit_only(t, task_ledgers.pages_grabbed_upl);
1206 #endif
1207
1208 ledger_track_credit_only(t, task_ledgers.tagged_nofootprint);
1209 ledger_track_credit_only(t, task_ledgers.tagged_footprint);
1210 ledger_track_credit_only(t, task_ledgers.tagged_nofootprint_compressed);
1211 ledger_track_credit_only(t, task_ledgers.tagged_footprint_compressed);
1212 ledger_track_credit_only(t, task_ledgers.network_volatile);
1213 ledger_track_credit_only(t, task_ledgers.network_nonvolatile);
1214 ledger_track_credit_only(t, task_ledgers.network_volatile_compressed);
1215 ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed);
1216 ledger_track_credit_only(t, task_ledgers.media_nofootprint);
1217 ledger_track_credit_only(t, task_ledgers.media_footprint);
1218 ledger_track_credit_only(t, task_ledgers.media_nofootprint_compressed);
1219 ledger_track_credit_only(t, task_ledgers.media_footprint_compressed);
1220 ledger_track_credit_only(t, task_ledgers.graphics_nofootprint);
1221 ledger_track_credit_only(t, task_ledgers.graphics_footprint);
1222 ledger_track_credit_only(t, task_ledgers.graphics_nofootprint_compressed);
1223 ledger_track_credit_only(t, task_ledgers.graphics_footprint_compressed);
1224 ledger_track_credit_only(t, task_ledgers.neural_nofootprint);
1225 ledger_track_credit_only(t, task_ledgers.neural_footprint);
1226 ledger_track_credit_only(t, task_ledgers.neural_nofootprint_compressed);
1227 ledger_track_credit_only(t, task_ledgers.neural_footprint_compressed);
1228
1229 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1230 #if MACH_ASSERT
1231 if (pmap_ledgers_panic) {
1232 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1233 ledger_panic_on_negative(t, task_ledgers.page_table);
1234 ledger_panic_on_negative(t, task_ledgers.internal);
1235 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
1236 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1237 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1238 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1239 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1240 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1241 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1242 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1243 #if CONFIG_PHYS_WRITE_ACCT
1244 ledger_panic_on_negative(t, task_ledgers.fs_metadata_writes);
1245 #endif /* CONFIG_PHYS_WRITE_ACCT */
1246
1247 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1248 ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1249 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1250 ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1251 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1252 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1253 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1254 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1255 ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1256 ledger_panic_on_negative(t, task_ledgers.media_footprint);
1257 ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1258 ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1259 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1260 ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1261 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1262 ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1263 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1264 ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1265 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1266 ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1267 }
1268 #endif /* MACH_ASSERT */
1269
1270 #if CONFIG_MEMORYSTATUS
1271 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1272 #endif /* CONFIG_MEMORYSTATUS */
1273
1274 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1275 task_wakeups_rate_exceeded, NULL, NULL);
1276 ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1277
1278 #if XNU_MONITOR
1279 ledger_template_complete_secure_alloc(t);
1280 #else /* XNU_MONITOR */
1281 ledger_template_complete(t);
1282 #endif /* XNU_MONITOR */
1283 task_ledger_template = t;
1284 }
1285
1286 os_refgrp_decl(static, task_refgrp, "task", NULL);
1287
1288 kern_return_t
1289 task_create_internal(
1290 task_t parent_task,
1291 coalition_t *parent_coalitions __unused,
1292 boolean_t inherit_memory,
1293 __unused boolean_t is_64bit,
1294 boolean_t is_64bit_data,
1295 uint32_t t_flags,
1296 uint32_t t_procflags,
1297 uint8_t t_returnwaitflags,
1298 task_t *child_task) /* OUT */
1299 {
1300 task_t new_task;
1301 vm_shared_region_t shared_region;
1302 ledger_t ledger = NULL;
1303
1304 new_task = (task_t) zalloc(task_zone);
1305
1306 if (new_task == TASK_NULL) {
1307 return KERN_RESOURCE_SHORTAGE;
1308 }
1309
1310 /* one ref for just being alive; one for our caller */
1311 os_ref_init_count(&new_task->ref_count, &task_refgrp, 2);
1312
1313 /* allocate with active entries */
1314 assert(task_ledger_template != NULL);
1315 if ((ledger = ledger_instantiate(task_ledger_template,
1316 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1317 zfree(task_zone, new_task);
1318 return KERN_RESOURCE_SHORTAGE;
1319 }
1320
1321 #if defined(HAS_APPLE_PAC)
1322 ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1323 ml_task_set_jop_pid(new_task, parent_task, inherit_memory);
1324 ml_task_set_disable_user_jop(new_task, inherit_memory ? parent_task->disable_user_jop : FALSE);
1325 #endif
1326
1327
1328 new_task->ledger = ledger;
1329
1330 #if defined(CONFIG_SCHED_MULTIQ)
1331 new_task->sched_group = sched_group_create();
1332 #endif
1333
1334 /* if inherit_memory is true, parent_task MUST not be NULL */
1335 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1336 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1337 } else {
1338 unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1339 new_task->map = vm_map_create(pmap_create_options(ledger, 0, pmap_flags),
1340 (vm_map_offset_t)(VM_MIN_ADDRESS),
1341 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1342 }
1343
1344 /* Inherit memlock limit from parent */
1345 if (parent_task) {
1346 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1347 }
1348
1349 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1350 queue_init(&new_task->threads);
1351 new_task->suspend_count = 0;
1352 new_task->thread_count = 0;
1353 new_task->active_thread_count = 0;
1354 new_task->user_stop_count = 0;
1355 new_task->legacy_stop_count = 0;
1356 new_task->active = TRUE;
1357 new_task->halting = FALSE;
1358 new_task->priv_flags = 0;
1359 new_task->t_flags = t_flags;
1360 new_task->t_procflags = t_procflags;
1361 new_task->t_returnwaitflags = t_returnwaitflags;
1362 new_task->returnwait_inheritor = current_thread();
1363 new_task->importance = 0;
1364 new_task->crashed_thread_id = 0;
1365 new_task->exec_token = 0;
1366 new_task->watchports = NULL;
1367 new_task->restartable_ranges = NULL;
1368 new_task->task_exc_guard = 0;
1369
1370 new_task->bank_context = NULL;
1371
1372 #ifdef MACH_BSD
1373 new_task->bsd_info = NULL;
1374 new_task->corpse_info = NULL;
1375 #endif /* MACH_BSD */
1376
1377 #if CONFIG_MACF
1378 new_task->crash_label = NULL;
1379
1380 new_task->mach_trap_filter_mask = NULL;
1381 new_task->mach_kobj_filter_mask = NULL;
1382 #endif
1383
1384 #if CONFIG_MEMORYSTATUS
1385 if (max_task_footprint != 0) {
1386 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1387 }
1388 #endif /* CONFIG_MEMORYSTATUS */
1389
1390 if (task_wakeups_monitor_rate != 0) {
1391 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1392 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1393 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1394 }
1395
1396 #if CONFIG_IO_ACCOUNTING
1397 uint32_t flags = IOMON_ENABLE;
1398 task_io_monitor_ctl(new_task, &flags);
1399 #endif /* CONFIG_IO_ACCOUNTING */
1400
1401 machine_task_init(new_task, parent_task, inherit_memory);
1402
1403 new_task->task_debug = NULL;
1404
1405 #if DEVELOPMENT || DEBUG
1406 new_task->task_unnested = FALSE;
1407 new_task->task_disconnected_count = 0;
1408 #endif
1409 queue_init(&new_task->semaphore_list);
1410 new_task->semaphores_owned = 0;
1411
1412 ipc_task_init(new_task, parent_task);
1413
1414 new_task->vtimers = 0;
1415
1416 new_task->shared_region = NULL;
1417
1418 new_task->affinity_space = NULL;
1419
1420 new_task->t_kpc = 0;
1421
1422 new_task->pidsuspended = FALSE;
1423 new_task->frozen = FALSE;
1424 new_task->changing_freeze_state = FALSE;
1425 new_task->rusage_cpu_flags = 0;
1426 new_task->rusage_cpu_percentage = 0;
1427 new_task->rusage_cpu_interval = 0;
1428 new_task->rusage_cpu_deadline = 0;
1429 new_task->rusage_cpu_callt = NULL;
1430 #if MACH_ASSERT
1431 new_task->suspends_outstanding = 0;
1432 #endif
1433
1434 #if HYPERVISOR
1435 new_task->hv_task_target = NULL;
1436 #endif /* HYPERVISOR */
1437
1438 #if CONFIG_TASKWATCH
1439 queue_init(&new_task->task_watchers);
1440 new_task->num_taskwatchers = 0;
1441 new_task->watchapplying = 0;
1442 #endif /* CONFIG_TASKWATCH */
1443
1444 new_task->mem_notify_reserved = 0;
1445 new_task->memlimit_attrs_reserved = 0;
1446
1447 new_task->requested_policy = default_task_requested_policy;
1448 new_task->effective_policy = default_task_effective_policy;
1449
1450 task_importance_init_from_parent(new_task, parent_task);
1451
1452 if (parent_task != TASK_NULL) {
1453 new_task->sec_token = parent_task->sec_token;
1454 new_task->audit_token = parent_task->audit_token;
1455
1456 /* inherit the parent's shared region */
1457 shared_region = vm_shared_region_get(parent_task);
1458 vm_shared_region_set(new_task, shared_region);
1459
1460 #if __has_feature(ptrauth_calls)
1461 /* use parent's shared_region_id */
1462 char *shared_region_id = task_get_vm_shared_region_id_and_jop_pid(parent_task, NULL);
1463 if (shared_region_id != NULL) {
1464 shared_region_key_alloc(shared_region_id, FALSE, 0); /* get a reference */
1465 }
1466 task_set_shared_region_id(new_task, shared_region_id);
1467 #endif /* __has_feature(ptrauth_calls) */
1468
1469 if (task_has_64Bit_addr(parent_task)) {
1470 task_set_64Bit_addr(new_task);
1471 }
1472
1473 if (task_has_64Bit_data(parent_task)) {
1474 task_set_64Bit_data(new_task);
1475 }
1476
1477 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1478 new_task->all_image_info_size = parent_task->all_image_info_size;
1479 new_task->mach_header_vm_address = 0;
1480
1481 if (inherit_memory && parent_task->affinity_space) {
1482 task_affinity_create(parent_task, new_task);
1483 }
1484
1485 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1486
1487 if (parent_task->t_flags & TF_NO_SMT) {
1488 new_task->t_flags |= TF_NO_SMT;
1489 }
1490
1491 if (parent_task->t_flags & TF_TECS) {
1492 new_task->t_flags |= TF_TECS;
1493 }
1494
1495 if (parent_task->t_flags & TF_FILTER_MSG) {
1496 new_task->t_flags |= TF_FILTER_MSG;
1497 }
1498
1499 new_task->priority = BASEPRI_DEFAULT;
1500 new_task->max_priority = MAXPRI_USER;
1501
1502 task_policy_create(new_task, parent_task);
1503 } else {
1504 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1505 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1506 #ifdef __LP64__
1507 if (is_64bit) {
1508 task_set_64Bit_addr(new_task);
1509 }
1510 #endif
1511
1512 if (is_64bit_data) {
1513 task_set_64Bit_data(new_task);
1514 }
1515
1516 new_task->all_image_info_addr = (mach_vm_address_t)0;
1517 new_task->all_image_info_size = (mach_vm_size_t)0;
1518
1519 new_task->pset_hint = PROCESSOR_SET_NULL;
1520
1521 if (kernel_task == TASK_NULL) {
1522 new_task->priority = BASEPRI_KERNEL;
1523 new_task->max_priority = MAXPRI_KERNEL;
1524 } else {
1525 new_task->priority = BASEPRI_DEFAULT;
1526 new_task->max_priority = MAXPRI_USER;
1527 }
1528 }
1529
1530 bzero(new_task->coalition, sizeof(new_task->coalition));
1531 for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1532 queue_chain_init(new_task->task_coalition[i]);
1533 }
1534
1535 /* Allocate I/O Statistics */
1536 new_task->task_io_stats = kheap_alloc(KHEAP_DATA_BUFFERS,
1537 sizeof(struct io_stat_info), Z_WAITOK | Z_ZERO);
1538 assert(new_task->task_io_stats != NULL);
1539
1540 bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1541 bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1542
1543 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1544
1545 /* Copy resource acc. info from Parent for Corpe Forked task. */
1546 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1547 task_rollup_accounting_info(new_task, parent_task);
1548 } else {
1549 /* Initialize to zero for standard fork/spawn case */
1550 new_task->total_user_time = 0;
1551 new_task->total_system_time = 0;
1552 new_task->total_ptime = 0;
1553 new_task->total_runnable_time = 0;
1554 new_task->faults = 0;
1555 new_task->pageins = 0;
1556 new_task->cow_faults = 0;
1557 new_task->messages_sent = 0;
1558 new_task->messages_received = 0;
1559 new_task->syscalls_mach = 0;
1560 new_task->syscalls_unix = 0;
1561 new_task->c_switch = 0;
1562 new_task->p_switch = 0;
1563 new_task->ps_switch = 0;
1564 new_task->decompressions = 0;
1565 new_task->low_mem_notified_warn = 0;
1566 new_task->low_mem_notified_critical = 0;
1567 new_task->purged_memory_warn = 0;
1568 new_task->purged_memory_critical = 0;
1569 new_task->low_mem_privileged_listener = 0;
1570 new_task->memlimit_is_active = 0;
1571 new_task->memlimit_is_fatal = 0;
1572 new_task->memlimit_active_exc_resource = 0;
1573 new_task->memlimit_inactive_exc_resource = 0;
1574 new_task->task_timer_wakeups_bin_1 = 0;
1575 new_task->task_timer_wakeups_bin_2 = 0;
1576 new_task->task_gpu_ns = 0;
1577 new_task->task_writes_counters_internal.task_immediate_writes = 0;
1578 new_task->task_writes_counters_internal.task_deferred_writes = 0;
1579 new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1580 new_task->task_writes_counters_internal.task_metadata_writes = 0;
1581 new_task->task_writes_counters_external.task_immediate_writes = 0;
1582 new_task->task_writes_counters_external.task_deferred_writes = 0;
1583 new_task->task_writes_counters_external.task_invalidated_writes = 0;
1584 new_task->task_writes_counters_external.task_metadata_writes = 0;
1585 #if CONFIG_PHYS_WRITE_ACCT
1586 new_task->task_fs_metadata_writes = 0;
1587 #endif /* CONFIG_PHYS_WRITE_ACCT */
1588
1589 new_task->task_energy = 0;
1590 #if MONOTONIC
1591 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1592 #endif /* MONOTONIC */
1593 }
1594
1595
1596 #if CONFIG_COALITIONS
1597 if (!(t_flags & TF_CORPSE_FORK)) {
1598 /* TODO: there is no graceful failure path here... */
1599 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1600 coalitions_adopt_task(parent_coalitions, new_task);
1601 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1602 /*
1603 * all tasks at least have a resource coalition, so
1604 * if the parent has one then inherit all coalitions
1605 * the parent is a part of
1606 */
1607 coalitions_adopt_task(parent_task->coalition, new_task);
1608 } else {
1609 /* TODO: assert that new_task will be PID 1 (launchd) */
1610 coalitions_adopt_init_task(new_task);
1611 }
1612 /*
1613 * on exec, we need to transfer the coalition roles from the
1614 * parent task to the exec copy task.
1615 */
1616 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1617 int coal_roles[COALITION_NUM_TYPES];
1618 task_coalition_roles(parent_task, coal_roles);
1619 (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1620 }
1621 } else {
1622 coalitions_adopt_corpse_task(new_task);
1623 }
1624
1625 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1626 panic("created task is not a member of a resource coalition");
1627 }
1628 #endif /* CONFIG_COALITIONS */
1629
1630 new_task->dispatchqueue_offset = 0;
1631 if (parent_task != NULL) {
1632 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1633 }
1634
1635 new_task->task_can_transfer_memory_ownership = FALSE;
1636 new_task->task_volatile_objects = 0;
1637 new_task->task_nonvolatile_objects = 0;
1638 new_task->task_objects_disowning = FALSE;
1639 new_task->task_objects_disowned = FALSE;
1640 new_task->task_owned_objects = 0;
1641 queue_init(&new_task->task_objq);
1642
1643 #if CONFIG_FREEZE
1644 queue_init(&new_task->task_frozen_cseg_q);
1645 #endif /* CONFIG_FREEZE */
1646
1647 task_objq_lock_init(new_task);
1648
1649 #if __arm64__
1650 new_task->task_legacy_footprint = FALSE;
1651 new_task->task_extra_footprint_limit = FALSE;
1652 new_task->task_ios13extended_footprint_limit = FALSE;
1653 #endif /* __arm64__ */
1654 new_task->task_region_footprint = FALSE;
1655 new_task->task_has_crossed_thread_limit = FALSE;
1656 new_task->task_thread_limit = 0;
1657 #if CONFIG_SECLUDED_MEMORY
1658 new_task->task_can_use_secluded_mem = FALSE;
1659 new_task->task_could_use_secluded_mem = FALSE;
1660 new_task->task_could_also_use_secluded_mem = FALSE;
1661 new_task->task_suppressed_secluded = FALSE;
1662 #endif /* CONFIG_SECLUDED_MEMORY */
1663
1664 /*
1665 * t_flags is set up above. But since we don't
1666 * support darkwake mode being set that way
1667 * currently, we clear it out here explicitly.
1668 */
1669 new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1670
1671 queue_init(&new_task->io_user_clients);
1672 new_task->loadTag = 0;
1673
1674 ipc_task_enable(new_task);
1675
1676 lck_mtx_lock(&tasks_threads_lock);
1677 queue_enter(&tasks, new_task, task_t, tasks);
1678 tasks_count++;
1679 if (tasks_suspend_state) {
1680 task_suspend_internal(new_task);
1681 }
1682 lck_mtx_unlock(&tasks_threads_lock);
1683
1684 *child_task = new_task;
1685 return KERN_SUCCESS;
1686 }
1687
1688 /*
1689 * task_rollup_accounting_info
1690 *
1691 * Roll up accounting stats. Used to rollup stats
1692 * for exec copy task and corpse fork.
1693 */
1694 void
1695 task_rollup_accounting_info(task_t to_task, task_t from_task)
1696 {
1697 assert(from_task != to_task);
1698
1699 to_task->total_user_time = from_task->total_user_time;
1700 to_task->total_system_time = from_task->total_system_time;
1701 to_task->total_ptime = from_task->total_ptime;
1702 to_task->total_runnable_time = from_task->total_runnable_time;
1703 to_task->faults = from_task->faults;
1704 to_task->pageins = from_task->pageins;
1705 to_task->cow_faults = from_task->cow_faults;
1706 to_task->decompressions = from_task->decompressions;
1707 to_task->messages_sent = from_task->messages_sent;
1708 to_task->messages_received = from_task->messages_received;
1709 to_task->syscalls_mach = from_task->syscalls_mach;
1710 to_task->syscalls_unix = from_task->syscalls_unix;
1711 to_task->c_switch = from_task->c_switch;
1712 to_task->p_switch = from_task->p_switch;
1713 to_task->ps_switch = from_task->ps_switch;
1714 to_task->extmod_statistics = from_task->extmod_statistics;
1715 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1716 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1717 to_task->purged_memory_warn = from_task->purged_memory_warn;
1718 to_task->purged_memory_critical = from_task->purged_memory_critical;
1719 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1720 *to_task->task_io_stats = *from_task->task_io_stats;
1721 to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1722 to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1723 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1724 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1725 to_task->task_gpu_ns = from_task->task_gpu_ns;
1726 to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
1727 to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
1728 to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
1729 to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
1730 to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
1731 to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
1732 to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
1733 to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
1734 #if CONFIG_PHYS_WRITE_ACCT
1735 to_task->task_fs_metadata_writes = from_task->task_fs_metadata_writes;
1736 #endif /* CONFIG_PHYS_WRITE_ACCT */
1737 to_task->task_energy = from_task->task_energy;
1738
1739 /* Skip ledger roll up for memory accounting entries */
1740 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1741 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1742 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1743 #if CONFIG_SCHED_SFI
1744 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1745 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1746 }
1747 #endif
1748 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1749 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1750 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1751 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1752 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1753 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1754 }
1755
1756 int task_dropped_imp_count = 0;
1757
1758 /*
1759 * task_deallocate:
1760 *
1761 * Drop a reference on a task.
1762 */
1763 void
1764 task_deallocate(
1765 task_t task)
1766 {
1767 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1768 os_ref_count_t refs;
1769
1770 if (task == TASK_NULL) {
1771 return;
1772 }
1773
1774 refs = task_deallocate_internal(task);
1775
1776 #if IMPORTANCE_INHERITANCE
1777 if (refs == 1) {
1778 /*
1779 * If last ref potentially comes from the task's importance,
1780 * disconnect it. But more task refs may be added before
1781 * that completes, so wait for the reference to go to zero
1782 * naturally (it may happen on a recursive task_deallocate()
1783 * from the ipc_importance_disconnect_task() call).
1784 */
1785 if (IIT_NULL != task->task_imp_base) {
1786 ipc_importance_disconnect_task(task);
1787 }
1788 return;
1789 }
1790 #endif /* IMPORTANCE_INHERITANCE */
1791
1792 if (refs > 0) {
1793 return;
1794 }
1795
1796 /*
1797 * The task should be dead at this point. Ensure other resources
1798 * like threads, are gone before we trash the world.
1799 */
1800 assert(queue_empty(&task->threads));
1801 assert(task->bsd_info == NULL);
1802 assert(!is_active(task->itk_space));
1803 assert(!task->active);
1804 assert(task->active_thread_count == 0);
1805
1806 lck_mtx_lock(&tasks_threads_lock);
1807 assert(terminated_tasks_count > 0);
1808 queue_remove(&terminated_tasks, task, task_t, tasks);
1809 terminated_tasks_count--;
1810 lck_mtx_unlock(&tasks_threads_lock);
1811
1812 /*
1813 * remove the reference on bank context
1814 */
1815 task_bank_reset(task);
1816
1817 if (task->task_io_stats) {
1818 kheap_free(KHEAP_DATA_BUFFERS, task->task_io_stats,
1819 sizeof(struct io_stat_info));
1820 }
1821
1822 /*
1823 * Give the machine dependent code a chance
1824 * to perform cleanup before ripping apart
1825 * the task.
1826 */
1827 machine_task_terminate(task);
1828
1829 ipc_task_terminate(task);
1830
1831 /* let iokit know */
1832 iokit_task_terminate(task);
1833
1834 if (task->affinity_space) {
1835 task_affinity_deallocate(task);
1836 }
1837
1838 #if MACH_ASSERT
1839 if (task->ledger != NULL &&
1840 task->map != NULL &&
1841 task->map->pmap != NULL &&
1842 task->map->pmap->ledger != NULL) {
1843 assert(task->ledger == task->map->pmap->ledger);
1844 }
1845 #endif /* MACH_ASSERT */
1846
1847 vm_owned_objects_disown(task);
1848 assert(task->task_objects_disowned);
1849 if (task->task_volatile_objects != 0 ||
1850 task->task_nonvolatile_objects != 0 ||
1851 task->task_owned_objects != 0) {
1852 panic("task_deallocate(%p): "
1853 "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
1854 task,
1855 task->task_volatile_objects,
1856 task->task_nonvolatile_objects,
1857 task->task_owned_objects);
1858 }
1859
1860 vm_map_deallocate(task->map);
1861 is_release(task->itk_space);
1862 if (task->restartable_ranges) {
1863 restartable_ranges_release(task->restartable_ranges);
1864 }
1865
1866 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1867 &interrupt_wakeups, &debit);
1868 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1869 &platform_idle_wakeups, &debit);
1870
1871 #if defined(CONFIG_SCHED_MULTIQ)
1872 sched_group_destroy(task->sched_group);
1873 #endif
1874
1875 /* Accumulate statistics for dead tasks */
1876 lck_spin_lock(&dead_task_statistics_lock);
1877 dead_task_statistics.total_user_time += task->total_user_time;
1878 dead_task_statistics.total_system_time += task->total_system_time;
1879
1880 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1881 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1882
1883 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1884 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1885 dead_task_statistics.total_ptime += task->total_ptime;
1886 dead_task_statistics.total_pset_switches += task->ps_switch;
1887 dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1888 dead_task_statistics.task_energy += task->task_energy;
1889
1890 lck_spin_unlock(&dead_task_statistics_lock);
1891 lck_mtx_destroy(&task->lock, &task_lck_grp);
1892
1893 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1894 &debit)) {
1895 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1896 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1897 }
1898 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1899 &debit)) {
1900 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1901 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1902 }
1903 ledger_dereference(task->ledger);
1904
1905 #if TASK_REFERENCE_LEAK_DEBUG
1906 btlog_remove_entries_for_element(task_ref_btlog, task);
1907 #endif
1908
1909 #if CONFIG_COALITIONS
1910 task_release_coalitions(task);
1911 #endif /* CONFIG_COALITIONS */
1912
1913 bzero(task->coalition, sizeof(task->coalition));
1914
1915 #if MACH_BSD
1916 /* clean up collected information since last reference to task is gone */
1917 if (task->corpse_info) {
1918 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1919 task_crashinfo_destroy(task->corpse_info);
1920 task->corpse_info = NULL;
1921 if (corpse_info_kernel) {
1922 kheap_free(KHEAP_DATA_BUFFERS, corpse_info_kernel,
1923 CORPSEINFO_ALLOCATION_SIZE);
1924 }
1925 }
1926 #endif
1927
1928 #if CONFIG_MACF
1929 if (task->crash_label) {
1930 mac_exc_free_label(task->crash_label);
1931 task->crash_label = NULL;
1932 }
1933 #endif
1934
1935 assert(queue_empty(&task->task_objq));
1936 task_objq_lock_destroy(task);
1937
1938 zfree(task_zone, task);
1939 }
1940
1941 /*
1942 * task_name_deallocate:
1943 *
1944 * Drop a reference on a task name.
1945 */
1946 void
1947 task_name_deallocate(
1948 task_name_t task_name)
1949 {
1950 return task_deallocate((task_t)task_name);
1951 }
1952
1953 /*
1954 * task_policy_set_deallocate:
1955 *
1956 * Drop a reference on a task type.
1957 */
1958 void
1959 task_policy_set_deallocate(task_policy_set_t task_policy_set)
1960 {
1961 return task_deallocate((task_t)task_policy_set);
1962 }
1963
1964 /*
1965 * task_policy_get_deallocate:
1966 *
1967 * Drop a reference on a task type.
1968 */
1969 void
1970 task_policy_get_deallocate(task_policy_get_t task_policy_get)
1971 {
1972 return task_deallocate((task_t)task_policy_get);
1973 }
1974
1975 /*
1976 * task_inspect_deallocate:
1977 *
1978 * Drop a task inspection reference.
1979 */
1980 void
1981 task_inspect_deallocate(
1982 task_inspect_t task_inspect)
1983 {
1984 return task_deallocate((task_t)task_inspect);
1985 }
1986
1987 /*
1988 * task_read_deallocate:
1989 *
1990 * Drop a reference on task read port.
1991 */
1992 void
1993 task_read_deallocate(
1994 task_read_t task_read)
1995 {
1996 return task_deallocate((task_t)task_read);
1997 }
1998
1999 /*
2000 * task_suspension_token_deallocate:
2001 *
2002 * Drop a reference on a task suspension token.
2003 */
2004 void
2005 task_suspension_token_deallocate(
2006 task_suspension_token_t token)
2007 {
2008 return task_deallocate((task_t)token);
2009 }
2010
2011
2012 /*
2013 * task_collect_crash_info:
2014 *
2015 * collect crash info from bsd and mach based data
2016 */
2017 kern_return_t
2018 task_collect_crash_info(
2019 task_t task,
2020 #ifdef CONFIG_MACF
2021 struct label *crash_label,
2022 #endif
2023 int is_corpse_fork)
2024 {
2025 kern_return_t kr = KERN_SUCCESS;
2026
2027 kcdata_descriptor_t crash_data = NULL;
2028 kcdata_descriptor_t crash_data_release = NULL;
2029 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
2030 mach_vm_offset_t crash_data_ptr = 0;
2031 void *crash_data_kernel = NULL;
2032 void *crash_data_kernel_release = NULL;
2033 #if CONFIG_MACF
2034 struct label *label, *free_label;
2035 #endif
2036
2037 if (!corpses_enabled()) {
2038 return KERN_NOT_SUPPORTED;
2039 }
2040
2041 #if CONFIG_MACF
2042 free_label = label = mac_exc_create_label();
2043 #endif
2044
2045 task_lock(task);
2046
2047 assert(is_corpse_fork || task->bsd_info != NULL);
2048 if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
2049 #if CONFIG_MACF
2050 /* Set the crash label, used by the exception delivery mac hook */
2051 free_label = task->crash_label; // Most likely NULL.
2052 task->crash_label = label;
2053 mac_exc_update_task_crash_label(task, crash_label);
2054 #endif
2055 task_unlock(task);
2056
2057 crash_data_kernel = kheap_alloc(KHEAP_DATA_BUFFERS,
2058 CORPSEINFO_ALLOCATION_SIZE, Z_WAITOK | Z_ZERO);
2059 if (crash_data_kernel == NULL) {
2060 kr = KERN_RESOURCE_SHORTAGE;
2061 goto out_no_lock;
2062 }
2063 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2064
2065 /* Do not get a corpse ref for corpse fork */
2066 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2067 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2068 KCFLAG_USE_MEMCOPY);
2069 if (crash_data) {
2070 task_lock(task);
2071 crash_data_release = task->corpse_info;
2072 crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2073 task->corpse_info = crash_data;
2074
2075 task_unlock(task);
2076 kr = KERN_SUCCESS;
2077 } else {
2078 kheap_free(KHEAP_DATA_BUFFERS, crash_data_kernel,
2079 CORPSEINFO_ALLOCATION_SIZE);
2080 kr = KERN_FAILURE;
2081 }
2082
2083 if (crash_data_release != NULL) {
2084 task_crashinfo_destroy(crash_data_release);
2085 }
2086 if (crash_data_kernel_release != NULL) {
2087 kheap_free(KHEAP_DATA_BUFFERS, crash_data_kernel_release,
2088 CORPSEINFO_ALLOCATION_SIZE);
2089 }
2090 } else {
2091 task_unlock(task);
2092 }
2093
2094 out_no_lock:
2095 #if CONFIG_MACF
2096 if (free_label != NULL) {
2097 mac_exc_free_label(free_label);
2098 }
2099 #endif
2100 return kr;
2101 }
2102
2103 /*
2104 * task_deliver_crash_notification:
2105 *
2106 * Makes outcall to registered host port for a corpse.
2107 */
2108 kern_return_t
2109 task_deliver_crash_notification(
2110 task_t task,
2111 thread_t thread,
2112 exception_type_t etype,
2113 mach_exception_subcode_t subcode)
2114 {
2115 kcdata_descriptor_t crash_info = task->corpse_info;
2116 thread_t th_iter = NULL;
2117 kern_return_t kr = KERN_SUCCESS;
2118 wait_interrupt_t wsave;
2119 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2120 ipc_port_t task_port, old_notify;
2121
2122 if (crash_info == NULL) {
2123 return KERN_FAILURE;
2124 }
2125
2126 task_lock(task);
2127 if (task_is_a_corpse_fork(task)) {
2128 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
2129 code[0] = etype;
2130 code[1] = subcode;
2131 } else {
2132 /* Populate code with EXC_CRASH for corpses */
2133 code[0] = EXC_CRASH;
2134 code[1] = 0;
2135 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
2136 if (corpse_for_fatal_memkill) {
2137 code[1] = subcode;
2138 }
2139 }
2140
2141 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2142 {
2143 if (th_iter->corpse_dup == FALSE) {
2144 ipc_thread_reset(th_iter);
2145 }
2146 }
2147 task_unlock(task);
2148
2149 /* Arm the no-sender notification for taskport */
2150 task_reference(task);
2151 task_port = convert_task_to_port(task);
2152 ip_lock(task_port);
2153 require_ip_active(task_port);
2154 ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
2155 /* port unlocked */
2156 assert(IP_NULL == old_notify);
2157
2158 wsave = thread_interrupt_level(THREAD_UNINT);
2159 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2160 if (kr != KERN_SUCCESS) {
2161 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
2162 }
2163
2164 (void)thread_interrupt_level(wsave);
2165
2166 /*
2167 * Drop the send right on task port, will fire the
2168 * no-sender notification if exception deliver failed.
2169 */
2170 ipc_port_release_send(task_port);
2171 return kr;
2172 }
2173
2174 /*
2175 * task_terminate:
2176 *
2177 * Terminate the specified task. See comments on thread_terminate
2178 * (kern/thread.c) about problems with terminating the "current task."
2179 */
2180
2181 kern_return_t
2182 task_terminate(
2183 task_t task)
2184 {
2185 if (task == TASK_NULL) {
2186 return KERN_INVALID_ARGUMENT;
2187 }
2188
2189 if (task->bsd_info) {
2190 return KERN_FAILURE;
2191 }
2192
2193 return task_terminate_internal(task);
2194 }
2195
2196 #if MACH_ASSERT
2197 extern int proc_pid(struct proc *);
2198 extern void proc_name_kdp(task_t t, char *buf, int size);
2199 #endif /* MACH_ASSERT */
2200
2201 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
2202 static void
2203 __unused task_partial_reap(task_t task, __unused int pid)
2204 {
2205 unsigned int reclaimed_resident = 0;
2206 unsigned int reclaimed_compressed = 0;
2207 uint64_t task_page_count;
2208
2209 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2210
2211 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2212 pid, task_page_count, 0, 0, 0);
2213
2214 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2215
2216 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2217 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2218 }
2219
2220 kern_return_t
2221 task_mark_corpse(task_t task)
2222 {
2223 kern_return_t kr = KERN_SUCCESS;
2224 thread_t self_thread;
2225 (void) self_thread;
2226 wait_interrupt_t wsave;
2227 #if CONFIG_MACF
2228 struct label *crash_label = NULL;
2229 #endif
2230
2231 assert(task != kernel_task);
2232 assert(task == current_task());
2233 assert(!task_is_a_corpse(task));
2234
2235 #if CONFIG_MACF
2236 crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
2237 #endif
2238
2239 kr = task_collect_crash_info(task,
2240 #if CONFIG_MACF
2241 crash_label,
2242 #endif
2243 FALSE);
2244 if (kr != KERN_SUCCESS) {
2245 goto out;
2246 }
2247
2248 self_thread = current_thread();
2249
2250 wsave = thread_interrupt_level(THREAD_UNINT);
2251 task_lock(task);
2252
2253 task_set_corpse_pending_report(task);
2254 task_set_corpse(task);
2255 task->crashed_thread_id = thread_tid(self_thread);
2256
2257 kr = task_start_halt_locked(task, TRUE);
2258 assert(kr == KERN_SUCCESS);
2259
2260 ipc_task_reset(task);
2261 /* Remove the naked send right for task port, needed to arm no sender notification */
2262 task_set_special_port_internal(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
2263 ipc_task_enable(task);
2264
2265 task_unlock(task);
2266 /* terminate the ipc space */
2267 ipc_space_terminate(task->itk_space);
2268
2269 /* Add it to global corpse task list */
2270 task_add_to_corpse_task_list(task);
2271
2272 task_start_halt(task);
2273 thread_terminate_internal(self_thread);
2274
2275 (void) thread_interrupt_level(wsave);
2276 assert(task->halting == TRUE);
2277
2278 out:
2279 #if CONFIG_MACF
2280 mac_exc_free_label(crash_label);
2281 #endif
2282 return kr;
2283 }
2284
2285 /*
2286 * task_clear_corpse
2287 *
2288 * Clears the corpse pending bit on task.
2289 * Removes inspection bit on the threads.
2290 */
2291 void
2292 task_clear_corpse(task_t task)
2293 {
2294 thread_t th_iter = NULL;
2295
2296 task_lock(task);
2297 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2298 {
2299 thread_mtx_lock(th_iter);
2300 th_iter->inspection = FALSE;
2301 thread_mtx_unlock(th_iter);
2302 }
2303
2304 thread_terminate_crashed_threads();
2305 /* remove the pending corpse report flag */
2306 task_clear_corpse_pending_report(task);
2307
2308 task_unlock(task);
2309 }
2310
2311 /*
2312 * task_port_notify
2313 *
2314 * Called whenever the Mach port system detects no-senders on
2315 * the task port of a corpse.
2316 * Each notification that comes in should terminate the task (corpse).
2317 */
2318 void
2319 task_port_notify(mach_msg_header_t *msg)
2320 {
2321 mach_no_senders_notification_t *notification = (void *)msg;
2322 ipc_port_t port = notification->not_header.msgh_remote_port;
2323 task_t task;
2324
2325 require_ip_active(port);
2326 assert(IKOT_TASK_CONTROL == ip_kotype(port));
2327 task = (task_t) ip_get_kobject(port);
2328
2329 assert(task_is_a_corpse(task));
2330
2331 /* Remove the task from global corpse task list */
2332 task_remove_from_corpse_task_list(task);
2333
2334 task_clear_corpse(task);
2335 task_terminate_internal(task);
2336 }
2337
2338 /*
2339 * task_port_with_flavor_notify
2340 *
2341 * Called whenever the Mach port system detects no-senders on
2342 * the task inspect or read port. These ports are allocated lazily and
2343 * should be deallocated here when there are no senders remaining.
2344 */
2345 void
2346 task_port_with_flavor_notify(mach_msg_header_t *msg)
2347 {
2348 mach_no_senders_notification_t *notification = (void *)msg;
2349 ipc_port_t port = notification->not_header.msgh_remote_port;
2350 task_t task;
2351 mach_task_flavor_t flavor;
2352 ipc_kobject_type_t kotype;
2353
2354 ip_lock(port);
2355 if (port->ip_srights > 0) {
2356 ip_unlock(port);
2357 return;
2358 }
2359 task = (task_t)port->ip_kobject;
2360 kotype = ip_kotype(port);
2361 if (task != TASK_NULL) {
2362 assert((IKOT_TASK_READ == kotype) || (IKOT_TASK_INSPECT == kotype));
2363 task_reference_internal(task);
2364 }
2365 ip_unlock(port);
2366
2367 if (task == TASK_NULL) {
2368 /* The task is exiting or disabled; it will eventually deallocate the port */
2369 return;
2370 }
2371
2372 itk_lock(task);
2373 ip_lock(port);
2374 require_ip_active(port);
2375 /*
2376 * Check for a stale no-senders notification. A call to any function
2377 * that vends out send rights to this port could resurrect it between
2378 * this notification being generated and actually being handled here.
2379 */
2380 if (port->ip_srights > 0) {
2381 ip_unlock(port);
2382 itk_unlock(task);
2383 task_deallocate(task);
2384 return;
2385 }
2386
2387 if (kotype == IKOT_TASK_READ) {
2388 flavor = TASK_FLAVOR_READ;
2389 } else {
2390 flavor = TASK_FLAVOR_INSPECT;
2391 }
2392 assert(task->itk_self[flavor] == port);
2393 task->itk_self[flavor] = IP_NULL;
2394 port->ip_kobject = IKOT_NONE;
2395 ip_unlock(port);
2396 itk_unlock(task);
2397 task_deallocate(task);
2398
2399 ipc_port_dealloc_kernel(port);
2400 }
2401
2402 /*
2403 * task_wait_till_threads_terminate_locked
2404 *
2405 * Wait till all the threads in the task are terminated.
2406 * Might release the task lock and re-acquire it.
2407 */
2408 void
2409 task_wait_till_threads_terminate_locked(task_t task)
2410 {
2411 /* wait for all the threads in the task to terminate */
2412 while (task->active_thread_count != 0) {
2413 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2414 task_unlock(task);
2415 thread_block(THREAD_CONTINUE_NULL);
2416
2417 task_lock(task);
2418 }
2419 }
2420
2421 /*
2422 * task_duplicate_map_and_threads
2423 *
2424 * Copy vmmap of source task.
2425 * Copy active threads from source task to destination task.
2426 * Source task would be suspended during the copy.
2427 */
2428 kern_return_t
2429 task_duplicate_map_and_threads(
2430 task_t task,
2431 void *p,
2432 task_t new_task,
2433 thread_t *thread_ret,
2434 uint64_t **udata_buffer,
2435 int *size,
2436 int *num_udata)
2437 {
2438 kern_return_t kr = KERN_SUCCESS;
2439 int active;
2440 thread_t thread, self, thread_return = THREAD_NULL;
2441 thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2442 thread_t *thread_array;
2443 uint32_t active_thread_count = 0, array_count = 0, i;
2444 vm_map_t oldmap;
2445 uint64_t *buffer = NULL;
2446 int buf_size = 0;
2447 int est_knotes = 0, num_knotes = 0;
2448
2449 self = current_thread();
2450
2451 /*
2452 * Suspend the task to copy thread state, use the internal
2453 * variant so that no user-space process can resume
2454 * the task from under us
2455 */
2456 kr = task_suspend_internal(task);
2457 if (kr != KERN_SUCCESS) {
2458 return kr;
2459 }
2460
2461 if (task->map->disable_vmentry_reuse == TRUE) {
2462 /*
2463 * Quite likely GuardMalloc (or some debugging tool)
2464 * is being used on this task. And it has gone through
2465 * its limit. Making a corpse will likely encounter
2466 * a lot of VM entries that will need COW.
2467 *
2468 * Skip it.
2469 */
2470 #if DEVELOPMENT || DEBUG
2471 memorystatus_abort_vm_map_fork(task);
2472 #endif
2473 task_resume_internal(task);
2474 return KERN_FAILURE;
2475 }
2476
2477 /* Check with VM if vm_map_fork is allowed for this task */
2478 if (memorystatus_allowed_vm_map_fork(task)) {
2479 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2480 oldmap = new_task->map;
2481 new_task->map = vm_map_fork(new_task->ledger,
2482 task->map,
2483 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2484 VM_MAP_FORK_PRESERVE_PURGEABLE |
2485 VM_MAP_FORK_CORPSE_FOOTPRINT));
2486 vm_map_deallocate(oldmap);
2487
2488 /* copy ledgers that impact the memory footprint */
2489 vm_map_copy_footprint_ledgers(task, new_task);
2490
2491 /* Get all the udata pointers from kqueue */
2492 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2493 if (est_knotes > 0) {
2494 buf_size = (est_knotes + 32) * sizeof(uint64_t);
2495 buffer = kheap_alloc(KHEAP_DATA_BUFFERS, buf_size, Z_WAITOK);
2496 num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2497 if (num_knotes > est_knotes + 32) {
2498 num_knotes = est_knotes + 32;
2499 }
2500 }
2501 }
2502
2503 active_thread_count = task->active_thread_count;
2504 if (active_thread_count == 0) {
2505 if (buffer != NULL) {
2506 kheap_free(KHEAP_DATA_BUFFERS, buffer, buf_size);
2507 }
2508 task_resume_internal(task);
2509 return KERN_FAILURE;
2510 }
2511
2512 thread_array = kheap_alloc(KHEAP_TEMP,
2513 sizeof(thread_t) * active_thread_count, Z_WAITOK);
2514
2515 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2516 task_lock(task);
2517 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2518 /* Skip inactive threads */
2519 active = thread->active;
2520 if (!active) {
2521 continue;
2522 }
2523
2524 if (array_count >= active_thread_count) {
2525 break;
2526 }
2527
2528 thread_array[array_count++] = thread;
2529 thread_reference(thread);
2530 }
2531 task_unlock(task);
2532
2533 for (i = 0; i < array_count; i++) {
2534 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2535 if (kr != KERN_SUCCESS) {
2536 break;
2537 }
2538
2539 /* Equivalent of current thread in corpse */
2540 if (thread_array[i] == self) {
2541 thread_return = new_thread;
2542 new_task->crashed_thread_id = thread_tid(new_thread);
2543 } else if (first_thread == NULL) {
2544 first_thread = new_thread;
2545 } else {
2546 /* drop the extra ref returned by thread_create_with_continuation */
2547 thread_deallocate(new_thread);
2548 }
2549
2550 kr = thread_dup2(thread_array[i], new_thread);
2551 if (kr != KERN_SUCCESS) {
2552 thread_mtx_lock(new_thread);
2553 new_thread->corpse_dup = TRUE;
2554 thread_mtx_unlock(new_thread);
2555 continue;
2556 }
2557
2558 /* Copy thread name */
2559 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2560 new_thread->thread_tag = thread_array[i]->thread_tag;
2561 thread_copy_resource_info(new_thread, thread_array[i]);
2562 }
2563
2564 /* return the first thread if we couldn't find the equivalent of current */
2565 if (thread_return == THREAD_NULL) {
2566 thread_return = first_thread;
2567 } else if (first_thread != THREAD_NULL) {
2568 /* drop the extra ref returned by thread_create_with_continuation */
2569 thread_deallocate(first_thread);
2570 }
2571
2572 task_resume_internal(task);
2573
2574 for (i = 0; i < array_count; i++) {
2575 thread_deallocate(thread_array[i]);
2576 }
2577 kheap_free(KHEAP_TEMP, thread_array, sizeof(thread_t) * active_thread_count);
2578
2579 if (kr == KERN_SUCCESS) {
2580 *thread_ret = thread_return;
2581 *udata_buffer = buffer;
2582 *size = buf_size;
2583 *num_udata = num_knotes;
2584 } else {
2585 if (thread_return != THREAD_NULL) {
2586 thread_deallocate(thread_return);
2587 }
2588 if (buffer != NULL) {
2589 kheap_free(KHEAP_DATA_BUFFERS, buffer, buf_size);
2590 }
2591 }
2592
2593 return kr;
2594 }
2595
2596 #if CONFIG_SECLUDED_MEMORY
2597 extern void task_set_can_use_secluded_mem_locked(
2598 task_t task,
2599 boolean_t can_use_secluded_mem);
2600 #endif /* CONFIG_SECLUDED_MEMORY */
2601
2602 #if MACH_ASSERT
2603 int debug4k_panic_on_terminate = 0;
2604 #endif /* MACH_ASSERT */
2605 kern_return_t
2606 task_terminate_internal(
2607 task_t task)
2608 {
2609 thread_t thread, self;
2610 task_t self_task;
2611 boolean_t interrupt_save;
2612 int pid = 0;
2613
2614 assert(task != kernel_task);
2615
2616 self = current_thread();
2617 self_task = self->task;
2618
2619 /*
2620 * Get the task locked and make sure that we are not racing
2621 * with someone else trying to terminate us.
2622 */
2623 if (task == self_task) {
2624 task_lock(task);
2625 } else if (task < self_task) {
2626 task_lock(task);
2627 task_lock(self_task);
2628 } else {
2629 task_lock(self_task);
2630 task_lock(task);
2631 }
2632
2633 #if CONFIG_SECLUDED_MEMORY
2634 if (task->task_can_use_secluded_mem) {
2635 task_set_can_use_secluded_mem_locked(task, FALSE);
2636 }
2637 task->task_could_use_secluded_mem = FALSE;
2638 task->task_could_also_use_secluded_mem = FALSE;
2639
2640 if (task->task_suppressed_secluded) {
2641 stop_secluded_suppression(task);
2642 }
2643 #endif /* CONFIG_SECLUDED_MEMORY */
2644
2645 if (!task->active) {
2646 /*
2647 * Task is already being terminated.
2648 * Just return an error. If we are dying, this will
2649 * just get us to our AST special handler and that
2650 * will get us to finalize the termination of ourselves.
2651 */
2652 task_unlock(task);
2653 if (self_task != task) {
2654 task_unlock(self_task);
2655 }
2656
2657 return KERN_FAILURE;
2658 }
2659
2660 if (task_corpse_pending_report(task)) {
2661 /*
2662 * Task is marked for reporting as corpse.
2663 * Just return an error. This will
2664 * just get us to our AST special handler and that
2665 * will get us to finish the path to death
2666 */
2667 task_unlock(task);
2668 if (self_task != task) {
2669 task_unlock(self_task);
2670 }
2671
2672 return KERN_FAILURE;
2673 }
2674
2675 if (self_task != task) {
2676 task_unlock(self_task);
2677 }
2678
2679 /*
2680 * Make sure the current thread does not get aborted out of
2681 * the waits inside these operations.
2682 */
2683 interrupt_save = thread_interrupt_level(THREAD_UNINT);
2684
2685 /*
2686 * Indicate that we want all the threads to stop executing
2687 * at user space by holding the task (we would have held
2688 * each thread independently in thread_terminate_internal -
2689 * but this way we may be more likely to already find it
2690 * held there). Mark the task inactive, and prevent
2691 * further task operations via the task port.
2692 */
2693 task_hold_locked(task);
2694 task->active = FALSE;
2695 ipc_task_disable(task);
2696
2697 #if CONFIG_TELEMETRY
2698 /*
2699 * Notify telemetry that this task is going away.
2700 */
2701 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2702 #endif
2703
2704 /*
2705 * Terminate each thread in the task.
2706 */
2707 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2708 thread_terminate_internal(thread);
2709 }
2710
2711 #ifdef MACH_BSD
2712 if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2713 pid = proc_pid(task->bsd_info);
2714 }
2715 #endif /* MACH_BSD */
2716
2717 task_unlock(task);
2718
2719 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2720 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2721
2722 /* Early object reap phase */
2723
2724 // PR-17045188: Revisit implementation
2725 // task_partial_reap(task, pid);
2726
2727 #if CONFIG_TASKWATCH
2728 /*
2729 * remove all task watchers
2730 */
2731 task_removewatchers(task);
2732
2733 #endif /* CONFIG_TASKWATCH */
2734
2735 /*
2736 * Destroy all synchronizers owned by the task.
2737 */
2738 task_synchronizer_destroy_all(task);
2739
2740 /*
2741 * Clear the watchport boost on the task.
2742 */
2743 task_remove_turnstile_watchports(task);
2744
2745 /*
2746 * Destroy the IPC space, leaving just a reference for it.
2747 */
2748 ipc_space_terminate(task->itk_space);
2749
2750 #if 00
2751 /* if some ledgers go negative on tear-down again... */
2752 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2753 task_ledgers.phys_footprint);
2754 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2755 task_ledgers.internal);
2756 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2757 task_ledgers.internal_compressed);
2758 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2759 task_ledgers.iokit_mapped);
2760 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2761 task_ledgers.alternate_accounting);
2762 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2763 task_ledgers.alternate_accounting_compressed);
2764 #endif
2765
2766 /*
2767 * If the current thread is a member of the task
2768 * being terminated, then the last reference to
2769 * the task will not be dropped until the thread
2770 * is finally reaped. To avoid incurring the
2771 * expense of removing the address space regions
2772 * at reap time, we do it explictly here.
2773 */
2774
2775 vm_map_lock(task->map);
2776 vm_map_disable_hole_optimization(task->map);
2777 vm_map_unlock(task->map);
2778
2779 #if MACH_ASSERT
2780 /*
2781 * Identify the pmap's process, in case the pmap ledgers drift
2782 * and we have to report it.
2783 */
2784 char procname[17];
2785 if (task->bsd_info && !task_is_exec_copy(task)) {
2786 pid = proc_pid(task->bsd_info);
2787 proc_name_kdp(task, procname, sizeof(procname));
2788 } else {
2789 pid = 0;
2790 strlcpy(procname, "<unknown>", sizeof(procname));
2791 }
2792 pmap_set_process(task->map->pmap, pid, procname);
2793 if (vm_map_page_shift(task->map) < (int)PAGE_SHIFT) {
2794 DEBUG4K_LIFE("map %p procname: %s\n", task->map, procname);
2795 if (debug4k_panic_on_terminate) {
2796 panic("DEBUG4K: %s:%d %d[%s] map %p\n", __FUNCTION__, __LINE__, pid, procname, task->map);
2797 }
2798 }
2799 #endif /* MACH_ASSERT */
2800
2801 vm_map_terminate(task->map);
2802
2803 /* release our shared region */
2804 vm_shared_region_set(task, NULL);
2805
2806 #if __has_feature(ptrauth_calls)
2807 task_set_shared_region_id(task, NULL);
2808 #endif /* __has_feature(ptrauth_calls) */
2809
2810 lck_mtx_lock(&tasks_threads_lock);
2811 queue_remove(&tasks, task, task_t, tasks);
2812 queue_enter(&terminated_tasks, task, task_t, tasks);
2813 tasks_count--;
2814 terminated_tasks_count++;
2815 lck_mtx_unlock(&tasks_threads_lock);
2816
2817 /*
2818 * We no longer need to guard against being aborted, so restore
2819 * the previous interruptible state.
2820 */
2821 thread_interrupt_level(interrupt_save);
2822
2823 #if KPC
2824 /* force the task to release all ctrs */
2825 if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
2826 kpc_force_all_ctrs(task, 0);
2827 }
2828 #endif /* KPC */
2829
2830 #if CONFIG_COALITIONS
2831 /*
2832 * Leave our coalitions. (drop activation but not reference)
2833 */
2834 coalitions_remove_task(task);
2835 #endif
2836
2837 #if CONFIG_FREEZE
2838 extern int vm_compressor_available;
2839 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE && vm_compressor_available) {
2840 task_disown_frozen_csegs(task);
2841 assert(queue_empty(&task->task_frozen_cseg_q));
2842 }
2843 #endif /* CONFIG_FREEZE */
2844
2845 /*
2846 * Get rid of the task active reference on itself.
2847 */
2848 task_deallocate(task);
2849
2850 return KERN_SUCCESS;
2851 }
2852
2853 void
2854 tasks_system_suspend(boolean_t suspend)
2855 {
2856 task_t task;
2857
2858 lck_mtx_lock(&tasks_threads_lock);
2859 assert(tasks_suspend_state != suspend);
2860 tasks_suspend_state = suspend;
2861 queue_iterate(&tasks, task, task_t, tasks) {
2862 if (task == kernel_task) {
2863 continue;
2864 }
2865 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2866 }
2867 lck_mtx_unlock(&tasks_threads_lock);
2868 }
2869
2870 /*
2871 * task_start_halt:
2872 *
2873 * Shut the current task down (except for the current thread) in
2874 * preparation for dramatic changes to the task (probably exec).
2875 * We hold the task and mark all other threads in the task for
2876 * termination.
2877 */
2878 kern_return_t
2879 task_start_halt(task_t task)
2880 {
2881 kern_return_t kr = KERN_SUCCESS;
2882 task_lock(task);
2883 kr = task_start_halt_locked(task, FALSE);
2884 task_unlock(task);
2885 return kr;
2886 }
2887
2888 static kern_return_t
2889 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2890 {
2891 thread_t thread, self;
2892 uint64_t dispatchqueue_offset;
2893
2894 assert(task != kernel_task);
2895
2896 self = current_thread();
2897
2898 if (task != self->task && !task_is_a_corpse_fork(task)) {
2899 return KERN_INVALID_ARGUMENT;
2900 }
2901
2902 if (task->halting || !task->active || !self->active) {
2903 /*
2904 * Task or current thread is already being terminated.
2905 * Hurry up and return out of the current kernel context
2906 * so that we run our AST special handler to terminate
2907 * ourselves.
2908 */
2909 return KERN_FAILURE;
2910 }
2911
2912 task->halting = TRUE;
2913
2914 /*
2915 * Mark all the threads to keep them from starting any more
2916 * user-level execution. The thread_terminate_internal code
2917 * would do this on a thread by thread basis anyway, but this
2918 * gives us a better chance of not having to wait there.
2919 */
2920 task_hold_locked(task);
2921 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2922
2923 /*
2924 * Terminate all the other threads in the task.
2925 */
2926 queue_iterate(&task->threads, thread, thread_t, task_threads)
2927 {
2928 if (should_mark_corpse) {
2929 thread_mtx_lock(thread);
2930 thread->inspection = TRUE;
2931 thread_mtx_unlock(thread);
2932 }
2933 if (thread != self) {
2934 thread_terminate_internal(thread);
2935 }
2936 }
2937 task->dispatchqueue_offset = dispatchqueue_offset;
2938
2939 task_release_locked(task);
2940
2941 return KERN_SUCCESS;
2942 }
2943
2944
2945 /*
2946 * task_complete_halt:
2947 *
2948 * Complete task halt by waiting for threads to terminate, then clean
2949 * up task resources (VM, port namespace, etc...) and then let the
2950 * current thread go in the (practically empty) task context.
2951 *
2952 * Note: task->halting flag is not cleared in order to avoid creation
2953 * of new thread in old exec'ed task.
2954 */
2955 void
2956 task_complete_halt(task_t task)
2957 {
2958 task_lock(task);
2959 assert(task->halting);
2960 assert(task == current_task());
2961
2962 /*
2963 * Wait for the other threads to get shut down.
2964 * When the last other thread is reaped, we'll be
2965 * woken up.
2966 */
2967 if (task->thread_count > 1) {
2968 assert_wait((event_t)&task->halting, THREAD_UNINT);
2969 task_unlock(task);
2970 thread_block(THREAD_CONTINUE_NULL);
2971 } else {
2972 task_unlock(task);
2973 }
2974
2975 /*
2976 * Give the machine dependent code a chance
2977 * to perform cleanup of task-level resources
2978 * associated with the current thread before
2979 * ripping apart the task.
2980 */
2981 machine_task_terminate(task);
2982
2983 /*
2984 * Destroy all synchronizers owned by the task.
2985 */
2986 task_synchronizer_destroy_all(task);
2987
2988 /*
2989 * Destroy the contents of the IPC space, leaving just
2990 * a reference for it.
2991 */
2992 ipc_space_clean(task->itk_space);
2993
2994 /*
2995 * Clean out the address space, as we are going to be
2996 * getting a new one.
2997 */
2998 vm_map_remove(task->map, task->map->min_offset,
2999 task->map->max_offset,
3000 /*
3001 * Final cleanup:
3002 * + no unnesting
3003 * + remove immutable mappings
3004 * + allow gaps in the range
3005 */
3006 (VM_MAP_REMOVE_NO_UNNESTING |
3007 VM_MAP_REMOVE_IMMUTABLE |
3008 VM_MAP_REMOVE_GAPS_OK));
3009
3010 /*
3011 * Kick out any IOKitUser handles to the task. At best they're stale,
3012 * at worst someone is racing a SUID exec.
3013 */
3014 iokit_task_terminate(task);
3015 }
3016
3017 /*
3018 * task_hold_locked:
3019 *
3020 * Suspend execution of the specified task.
3021 * This is a recursive-style suspension of the task, a count of
3022 * suspends is maintained.
3023 *
3024 * CONDITIONS: the task is locked and active.
3025 */
3026 void
3027 task_hold_locked(
3028 task_t task)
3029 {
3030 thread_t thread;
3031
3032 assert(task->active);
3033
3034 if (task->suspend_count++ > 0) {
3035 return;
3036 }
3037
3038 if (task->bsd_info) {
3039 workq_proc_suspended(task->bsd_info);
3040 }
3041
3042 /*
3043 * Iterate through all the threads and hold them.
3044 */
3045 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3046 thread_mtx_lock(thread);
3047 thread_hold(thread);
3048 thread_mtx_unlock(thread);
3049 }
3050 }
3051
3052 /*
3053 * task_hold:
3054 *
3055 * Same as the internal routine above, except that is must lock
3056 * and verify that the task is active. This differs from task_suspend
3057 * in that it places a kernel hold on the task rather than just a
3058 * user-level hold. This keeps users from over resuming and setting
3059 * it running out from under the kernel.
3060 *
3061 * CONDITIONS: the caller holds a reference on the task
3062 */
3063 kern_return_t
3064 task_hold(
3065 task_t task)
3066 {
3067 if (task == TASK_NULL) {
3068 return KERN_INVALID_ARGUMENT;
3069 }
3070
3071 task_lock(task);
3072
3073 if (!task->active) {
3074 task_unlock(task);
3075
3076 return KERN_FAILURE;
3077 }
3078
3079 task_hold_locked(task);
3080 task_unlock(task);
3081
3082 return KERN_SUCCESS;
3083 }
3084
3085 kern_return_t
3086 task_wait(
3087 task_t task,
3088 boolean_t until_not_runnable)
3089 {
3090 if (task == TASK_NULL) {
3091 return KERN_INVALID_ARGUMENT;
3092 }
3093
3094 task_lock(task);
3095
3096 if (!task->active) {
3097 task_unlock(task);
3098
3099 return KERN_FAILURE;
3100 }
3101
3102 task_wait_locked(task, until_not_runnable);
3103 task_unlock(task);
3104
3105 return KERN_SUCCESS;
3106 }
3107
3108 /*
3109 * task_wait_locked:
3110 *
3111 * Wait for all threads in task to stop.
3112 *
3113 * Conditions:
3114 * Called with task locked, active, and held.
3115 */
3116 void
3117 task_wait_locked(
3118 task_t task,
3119 boolean_t until_not_runnable)
3120 {
3121 thread_t thread, self;
3122
3123 assert(task->active);
3124 assert(task->suspend_count > 0);
3125
3126 self = current_thread();
3127
3128 /*
3129 * Iterate through all the threads and wait for them to
3130 * stop. Do not wait for the current thread if it is within
3131 * the task.
3132 */
3133 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3134 if (thread != self) {
3135 thread_wait(thread, until_not_runnable);
3136 }
3137 }
3138 }
3139
3140 boolean_t
3141 task_is_app_suspended(task_t task)
3142 {
3143 return task->pidsuspended;
3144 }
3145
3146 /*
3147 * task_release_locked:
3148 *
3149 * Release a kernel hold on a task.
3150 *
3151 * CONDITIONS: the task is locked and active
3152 */
3153 void
3154 task_release_locked(
3155 task_t task)
3156 {
3157 thread_t thread;
3158
3159 assert(task->active);
3160 assert(task->suspend_count > 0);
3161
3162 if (--task->suspend_count > 0) {
3163 return;
3164 }
3165
3166 if (task->bsd_info) {
3167 workq_proc_resumed(task->bsd_info);
3168 }
3169
3170 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3171 thread_mtx_lock(thread);
3172 thread_release(thread);
3173 thread_mtx_unlock(thread);
3174 }
3175 }
3176
3177 /*
3178 * task_release:
3179 *
3180 * Same as the internal routine above, except that it must lock
3181 * and verify that the task is active.
3182 *
3183 * CONDITIONS: The caller holds a reference to the task
3184 */
3185 kern_return_t
3186 task_release(
3187 task_t task)
3188 {
3189 if (task == TASK_NULL) {
3190 return KERN_INVALID_ARGUMENT;
3191 }
3192
3193 task_lock(task);
3194
3195 if (!task->active) {
3196 task_unlock(task);
3197
3198 return KERN_FAILURE;
3199 }
3200
3201 task_release_locked(task);
3202 task_unlock(task);
3203
3204 return KERN_SUCCESS;
3205 }
3206
3207 static kern_return_t
3208 task_threads_internal(
3209 task_t task,
3210 thread_act_array_t *threads_out,
3211 mach_msg_type_number_t *count,
3212 mach_thread_flavor_t flavor)
3213 {
3214 mach_msg_type_number_t actual;
3215 thread_t *thread_list;
3216 thread_t thread;
3217 vm_size_t size, size_needed;
3218 void *addr;
3219 unsigned int i, j;
3220
3221 size = 0; addr = NULL;
3222
3223 if (task == TASK_NULL) {
3224 return KERN_INVALID_ARGUMENT;
3225 }
3226
3227 for (;;) {
3228 task_lock(task);
3229 if (!task->active) {
3230 task_unlock(task);
3231
3232 if (size != 0) {
3233 kfree(addr, size);
3234 }
3235
3236 return KERN_FAILURE;
3237 }
3238
3239 actual = task->thread_count;
3240
3241 /* do we have the memory we need? */
3242 size_needed = actual * sizeof(mach_port_t);
3243 if (size_needed <= size) {
3244 break;
3245 }
3246
3247 /* unlock the task and allocate more memory */
3248 task_unlock(task);
3249
3250 if (size != 0) {
3251 kfree(addr, size);
3252 }
3253
3254 assert(size_needed > 0);
3255 size = size_needed;
3256
3257 addr = kalloc(size);
3258 if (addr == 0) {
3259 return KERN_RESOURCE_SHORTAGE;
3260 }
3261 }
3262
3263 /* OK, have memory and the task is locked & active */
3264 thread_list = (thread_t *)addr;
3265
3266 i = j = 0;
3267
3268 for (thread = (thread_t)queue_first(&task->threads); i < actual;
3269 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
3270 thread_reference_internal(thread);
3271 thread_list[j++] = thread;
3272 }
3273
3274 assert(queue_end(&task->threads, (queue_entry_t)thread));
3275
3276 actual = j;
3277 size_needed = actual * sizeof(mach_port_t);
3278
3279 /* can unlock task now that we've got the thread refs */
3280 task_unlock(task);
3281
3282 if (actual == 0) {
3283 /* no threads, so return null pointer and deallocate memory */
3284
3285 *threads_out = NULL;
3286 *count = 0;
3287
3288 if (size != 0) {
3289 kfree(addr, size);
3290 }
3291 } else {
3292 /* if we allocated too much, must copy */
3293
3294 if (size_needed < size) {
3295 void *newaddr;
3296
3297 newaddr = kalloc(size_needed);
3298 if (newaddr == 0) {
3299 for (i = 0; i < actual; ++i) {
3300 thread_deallocate(thread_list[i]);
3301 }
3302 kfree(addr, size);
3303 return KERN_RESOURCE_SHORTAGE;
3304 }
3305
3306 bcopy(addr, newaddr, size_needed);
3307 kfree(addr, size);
3308 thread_list = (thread_t *)newaddr;
3309 }
3310
3311 *threads_out = thread_list;
3312 *count = actual;
3313
3314 /* do the conversion that Mig should handle */
3315
3316 switch (flavor) {
3317 case THREAD_FLAVOR_CONTROL:
3318 for (i = 0; i < actual; ++i) {
3319 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3320 }
3321 break;
3322 case THREAD_FLAVOR_READ:
3323 for (i = 0; i < actual; ++i) {
3324 ((ipc_port_t *) thread_list)[i] = convert_thread_read_to_port(thread_list[i]);
3325 }
3326 break;
3327 case THREAD_FLAVOR_INSPECT:
3328 for (i = 0; i < actual; ++i) {
3329 ((ipc_port_t *) thread_list)[i] = convert_thread_inspect_to_port(thread_list[i]);
3330 }
3331 break;
3332 default:
3333 return KERN_INVALID_ARGUMENT;
3334 }
3335 }
3336
3337 return KERN_SUCCESS;
3338 }
3339
3340 kern_return_t
3341 task_threads(
3342 task_t task,
3343 thread_act_array_t *threads_out,
3344 mach_msg_type_number_t *count)
3345 {
3346 return task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3347 }
3348
3349
3350 kern_return_t
3351 task_threads_from_user(
3352 mach_port_t port,
3353 thread_act_array_t *threads_out,
3354 mach_msg_type_number_t *count)
3355 {
3356 ipc_kobject_type_t kotype;
3357 kern_return_t kr;
3358
3359 task_t task = convert_port_to_task_check_type(port, &kotype, TASK_FLAVOR_INSPECT, FALSE);
3360
3361 if (task == TASK_NULL) {
3362 return KERN_INVALID_ARGUMENT;
3363 }
3364
3365 switch (kotype) {
3366 case IKOT_TASK_CONTROL:
3367 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_CONTROL);
3368 break;
3369 case IKOT_TASK_READ:
3370 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_READ);
3371 break;
3372 case IKOT_TASK_INSPECT:
3373 kr = task_threads_internal(task, threads_out, count, THREAD_FLAVOR_INSPECT);
3374 break;
3375 default:
3376 panic("strange kobject type");
3377 break;
3378 }
3379
3380 task_deallocate(task);
3381 return kr;
3382 }
3383
3384 #define TASK_HOLD_NORMAL 0
3385 #define TASK_HOLD_PIDSUSPEND 1
3386 #define TASK_HOLD_LEGACY 2
3387 #define TASK_HOLD_LEGACY_ALL 3
3388
3389 static kern_return_t
3390 place_task_hold(
3391 task_t task,
3392 int mode)
3393 {
3394 if (!task->active && !task_is_a_corpse(task)) {
3395 return KERN_FAILURE;
3396 }
3397
3398 /* Return success for corpse task */
3399 if (task_is_a_corpse(task)) {
3400 return KERN_SUCCESS;
3401 }
3402
3403 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND),
3404 task_pid(task),
3405 task->thread_count > 0 ?((thread_t)queue_first(&task->threads))->thread_id : 0,
3406 task->user_stop_count, task->user_stop_count + 1);
3407
3408 #if MACH_ASSERT
3409 current_task()->suspends_outstanding++;
3410 #endif
3411
3412 if (mode == TASK_HOLD_LEGACY) {
3413 task->legacy_stop_count++;
3414 }
3415
3416 if (task->user_stop_count++ > 0) {
3417 /*
3418 * If the stop count was positive, the task is
3419 * already stopped and we can exit.
3420 */
3421 return KERN_SUCCESS;
3422 }
3423
3424 /*
3425 * Put a kernel-level hold on the threads in the task (all
3426 * user-level task suspensions added together represent a
3427 * single kernel-level hold). We then wait for the threads
3428 * to stop executing user code.
3429 */
3430 task_hold_locked(task);
3431 task_wait_locked(task, FALSE);
3432
3433 return KERN_SUCCESS;
3434 }
3435
3436 static kern_return_t
3437 release_task_hold(
3438 task_t task,
3439 int mode)
3440 {
3441 boolean_t release = FALSE;
3442
3443 if (!task->active && !task_is_a_corpse(task)) {
3444 return KERN_FAILURE;
3445 }
3446
3447 /* Return success for corpse task */
3448 if (task_is_a_corpse(task)) {
3449 return KERN_SUCCESS;
3450 }
3451
3452 if (mode == TASK_HOLD_PIDSUSPEND) {
3453 if (task->pidsuspended == FALSE) {
3454 return KERN_FAILURE;
3455 }
3456 task->pidsuspended = FALSE;
3457 }
3458
3459 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3460 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3461 MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3462 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3463 task->user_stop_count, mode, task->legacy_stop_count);
3464
3465 #if MACH_ASSERT
3466 /*
3467 * This is obviously not robust; if we suspend one task and then resume a different one,
3468 * we'll fly under the radar. This is only meant to catch the common case of a crashed
3469 * or buggy suspender.
3470 */
3471 current_task()->suspends_outstanding--;
3472 #endif
3473
3474 if (mode == TASK_HOLD_LEGACY_ALL) {
3475 if (task->legacy_stop_count >= task->user_stop_count) {
3476 task->user_stop_count = 0;
3477 release = TRUE;
3478 } else {
3479 task->user_stop_count -= task->legacy_stop_count;
3480 }
3481 task->legacy_stop_count = 0;
3482 } else {
3483 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3484 task->legacy_stop_count--;
3485 }
3486 if (--task->user_stop_count == 0) {
3487 release = TRUE;
3488 }
3489 }
3490 } else {
3491 return KERN_FAILURE;
3492 }
3493
3494 /*
3495 * Release the task if necessary.
3496 */
3497 if (release) {
3498 task_release_locked(task);
3499 }
3500
3501 return KERN_SUCCESS;
3502 }
3503
3504 boolean_t
3505 get_task_suspended(task_t task)
3506 {
3507 return 0 != task->user_stop_count;
3508 }
3509
3510 /*
3511 * task_suspend:
3512 *
3513 * Implement an (old-fashioned) user-level suspension on a task.
3514 *
3515 * Because the user isn't expecting to have to manage a suspension
3516 * token, we'll track it for him in the kernel in the form of a naked
3517 * send right to the task's resume port. All such send rights
3518 * account for a single suspension against the task (unlike task_suspend2()
3519 * where each caller gets a unique suspension count represented by a
3520 * unique send-once right).
3521 *
3522 * Conditions:
3523 * The caller holds a reference to the task
3524 */
3525 kern_return_t
3526 task_suspend(
3527 task_t task)
3528 {
3529 kern_return_t kr;
3530 mach_port_t port;
3531 mach_port_name_t name;
3532
3533 if (task == TASK_NULL || task == kernel_task) {
3534 return KERN_INVALID_ARGUMENT;
3535 }
3536
3537 task_lock(task);
3538
3539 /*
3540 * place a legacy hold on the task.
3541 */
3542 kr = place_task_hold(task, TASK_HOLD_LEGACY);
3543 if (kr != KERN_SUCCESS) {
3544 task_unlock(task);
3545 return kr;
3546 }
3547
3548 /*
3549 * Claim a send right on the task resume port, and request a no-senders
3550 * notification on that port (if none outstanding).
3551 */
3552 (void)ipc_kobject_make_send_lazy_alloc_port((ipc_port_t *) &task->itk_resume,
3553 (ipc_kobject_t)task, IKOT_TASK_RESUME, true, OS_PTRAUTH_DISCRIMINATOR("task.itk_resume"));
3554 port = task->itk_resume;
3555 task_unlock(task);
3556
3557 /*
3558 * Copyout the send right into the calling task's IPC space. It won't know it is there,
3559 * but we'll look it up when calling a traditional resume. Any IPC operations that
3560 * deallocate the send right will auto-release the suspension.
3561 */
3562 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, ip_to_object(port),
3563 MACH_MSG_TYPE_MOVE_SEND, NULL, NULL, &name)) != KERN_SUCCESS) {
3564 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3565 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3566 task_pid(task), kr);
3567 return kr;
3568 }
3569
3570 return kr;
3571 }
3572
3573 /*
3574 * task_resume:
3575 * Release a user hold on a task.
3576 *
3577 * Conditions:
3578 * The caller holds a reference to the task
3579 */
3580 kern_return_t
3581 task_resume(
3582 task_t task)
3583 {
3584 kern_return_t kr;
3585 mach_port_name_t resume_port_name;
3586 ipc_entry_t resume_port_entry;
3587 ipc_space_t space = current_task()->itk_space;
3588
3589 if (task == TASK_NULL || task == kernel_task) {
3590 return KERN_INVALID_ARGUMENT;
3591 }
3592
3593 /* release a legacy task hold */
3594 task_lock(task);
3595 kr = release_task_hold(task, TASK_HOLD_LEGACY);
3596 task_unlock(task);
3597
3598 is_write_lock(space);
3599 if (is_active(space) && IP_VALID(task->itk_resume) &&
3600 ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
3601 /*
3602 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3603 * we are holding one less legacy hold on the task from this caller. If the release failed,
3604 * go ahead and drop all the rights, as someone either already released our holds or the task
3605 * is gone.
3606 */
3607 if (kr == KERN_SUCCESS) {
3608 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3609 } else {
3610 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3611 }
3612 /* space unlocked */
3613 } else {
3614 is_write_unlock(space);
3615 if (kr == KERN_SUCCESS) {
3616 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3617 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3618 task_pid(task));
3619 }
3620 }
3621
3622 return kr;
3623 }
3624
3625 /*
3626 * Suspend the target task.
3627 * Making/holding a token/reference/port is the callers responsibility.
3628 */
3629 kern_return_t
3630 task_suspend_internal(task_t task)
3631 {
3632 kern_return_t kr;
3633
3634 if (task == TASK_NULL || task == kernel_task) {
3635 return KERN_INVALID_ARGUMENT;
3636 }
3637
3638 task_lock(task);
3639 kr = place_task_hold(task, TASK_HOLD_NORMAL);
3640 task_unlock(task);
3641 return kr;
3642 }
3643
3644 /*
3645 * Suspend the target task, and return a suspension token. The token
3646 * represents a reference on the suspended task.
3647 */
3648 kern_return_t
3649 task_suspend2(
3650 task_t task,
3651 task_suspension_token_t *suspend_token)
3652 {
3653 kern_return_t kr;
3654
3655 kr = task_suspend_internal(task);
3656 if (kr != KERN_SUCCESS) {
3657 *suspend_token = TASK_NULL;
3658 return kr;
3659 }
3660
3661 /*
3662 * Take a reference on the target task and return that to the caller
3663 * as a "suspension token," which can be converted into an SO right to
3664 * the now-suspended task's resume port.
3665 */
3666 task_reference_internal(task);
3667 *suspend_token = task;
3668
3669 return KERN_SUCCESS;
3670 }
3671
3672 /*
3673 * Resume the task
3674 * (reference/token/port management is caller's responsibility).
3675 */
3676 kern_return_t
3677 task_resume_internal(
3678 task_suspension_token_t task)
3679 {
3680 kern_return_t kr;
3681
3682 if (task == TASK_NULL || task == kernel_task) {
3683 return KERN_INVALID_ARGUMENT;
3684 }
3685
3686 task_lock(task);
3687 kr = release_task_hold(task, TASK_HOLD_NORMAL);
3688 task_unlock(task);
3689 return kr;
3690 }
3691
3692 /*
3693 * Resume the task using a suspension token. Consumes the token's ref.
3694 */
3695 kern_return_t
3696 task_resume2(
3697 task_suspension_token_t task)
3698 {
3699 kern_return_t kr;
3700
3701 kr = task_resume_internal(task);
3702 task_suspension_token_deallocate(task);
3703
3704 return kr;
3705 }
3706
3707 boolean_t
3708 task_suspension_notify(mach_msg_header_t *request_header)
3709 {
3710 ipc_port_t port = request_header->msgh_remote_port;
3711 task_t task = convert_port_to_task_suspension_token(port);
3712 mach_msg_type_number_t not_count;
3713
3714 if (task == TASK_NULL || task == kernel_task) {
3715 return TRUE; /* nothing to do */
3716 }
3717 switch (request_header->msgh_id) {
3718 case MACH_NOTIFY_SEND_ONCE:
3719 /* release the hold held by this specific send-once right */
3720 task_lock(task);
3721 release_task_hold(task, TASK_HOLD_NORMAL);
3722 task_unlock(task);
3723 break;
3724
3725 case MACH_NOTIFY_NO_SENDERS:
3726 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3727
3728 task_lock(task);
3729 ip_lock(port);
3730 if (port->ip_mscount == not_count) {
3731 /* release all the [remaining] outstanding legacy holds */
3732 assert(port->ip_nsrequest == IP_NULL);
3733 ip_unlock(port);
3734 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3735 task_unlock(task);
3736 } else if (port->ip_nsrequest == IP_NULL) {
3737 ipc_port_t old_notify;
3738
3739 task_unlock(task);
3740 /* new send rights, re-arm notification at current make-send count */
3741 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3742 assert(old_notify == IP_NULL);
3743 /* port unlocked */
3744 } else {
3745 ip_unlock(port);
3746 task_unlock(task);
3747 }
3748 break;
3749
3750 default:
3751 break;
3752 }
3753
3754 task_suspension_token_deallocate(task); /* drop token reference */
3755 return TRUE;
3756 }
3757
3758 static kern_return_t
3759 task_pidsuspend_locked(task_t task)
3760 {
3761 kern_return_t kr;
3762
3763 if (task->pidsuspended) {
3764 kr = KERN_FAILURE;
3765 goto out;
3766 }
3767
3768 task->pidsuspended = TRUE;
3769
3770 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3771 if (kr != KERN_SUCCESS) {
3772 task->pidsuspended = FALSE;
3773 }
3774 out:
3775 return kr;
3776 }
3777
3778
3779 /*
3780 * task_pidsuspend:
3781 *
3782 * Suspends a task by placing a hold on its threads.
3783 *
3784 * Conditions:
3785 * The caller holds a reference to the task
3786 */
3787 kern_return_t
3788 task_pidsuspend(
3789 task_t task)
3790 {
3791 kern_return_t kr;
3792
3793 if (task == TASK_NULL || task == kernel_task) {
3794 return KERN_INVALID_ARGUMENT;
3795 }
3796
3797 task_lock(task);
3798
3799 kr = task_pidsuspend_locked(task);
3800
3801 task_unlock(task);
3802
3803 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3804 iokit_task_app_suspended_changed(task);
3805 }
3806
3807 return kr;
3808 }
3809
3810 /*
3811 * task_pidresume:
3812 * Resumes a previously suspended task.
3813 *
3814 * Conditions:
3815 * The caller holds a reference to the task
3816 */
3817 kern_return_t
3818 task_pidresume(
3819 task_t task)
3820 {
3821 kern_return_t kr;
3822
3823 if (task == TASK_NULL || task == kernel_task) {
3824 return KERN_INVALID_ARGUMENT;
3825 }
3826
3827 task_lock(task);
3828
3829 #if CONFIG_FREEZE
3830
3831 while (task->changing_freeze_state) {
3832 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3833 task_unlock(task);
3834 thread_block(THREAD_CONTINUE_NULL);
3835
3836 task_lock(task);
3837 }
3838 task->changing_freeze_state = TRUE;
3839 #endif
3840
3841 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3842
3843 task_unlock(task);
3844
3845 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3846 iokit_task_app_suspended_changed(task);
3847 }
3848
3849 #if CONFIG_FREEZE
3850
3851 task_lock(task);
3852
3853 if (kr == KERN_SUCCESS) {
3854 task->frozen = FALSE;
3855 }
3856 task->changing_freeze_state = FALSE;
3857 thread_wakeup(&task->changing_freeze_state);
3858
3859 task_unlock(task);
3860 #endif
3861
3862 return kr;
3863 }
3864
3865 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
3866
3867 /*
3868 * task_add_turnstile_watchports:
3869 * Setup watchports to boost the main thread of the task.
3870 *
3871 * Arguments:
3872 * task: task being spawned
3873 * thread: main thread of task
3874 * portwatch_ports: array of watchports
3875 * portwatch_count: number of watchports
3876 *
3877 * Conditions:
3878 * Nothing locked.
3879 */
3880 void
3881 task_add_turnstile_watchports(
3882 task_t task,
3883 thread_t thread,
3884 ipc_port_t *portwatch_ports,
3885 uint32_t portwatch_count)
3886 {
3887 struct task_watchports *watchports = NULL;
3888 struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
3889 os_ref_count_t refs;
3890
3891 /* Check if the task has terminated */
3892 if (!task->active) {
3893 return;
3894 }
3895
3896 assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
3897
3898 watchports = task_watchports_alloc_init(task, thread, portwatch_count);
3899
3900 /* Lock the ipc space */
3901 is_write_lock(task->itk_space);
3902
3903 /* Setup watchports to boost the main thread */
3904 refs = task_add_turnstile_watchports_locked(task,
3905 watchports, previous_elem_array, portwatch_ports,
3906 portwatch_count);
3907
3908 /* Drop the space lock */
3909 is_write_unlock(task->itk_space);
3910
3911 if (refs == 0) {
3912 task_watchports_deallocate(watchports);
3913 }
3914
3915 /* Drop the ref on previous_elem_array */
3916 for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
3917 task_watchport_elem_deallocate(previous_elem_array[i]);
3918 }
3919 }
3920
3921 /*
3922 * task_remove_turnstile_watchports:
3923 * Clear all turnstile boost on the task from watchports.
3924 *
3925 * Arguments:
3926 * task: task being terminated
3927 *
3928 * Conditions:
3929 * Nothing locked.
3930 */
3931 void
3932 task_remove_turnstile_watchports(
3933 task_t task)
3934 {
3935 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3936 struct task_watchports *watchports = NULL;
3937 ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
3938 uint32_t portwatch_count;
3939
3940 /* Lock the ipc space */
3941 is_write_lock(task->itk_space);
3942
3943 /* Check if watchport boost exist */
3944 if (task->watchports == NULL) {
3945 is_write_unlock(task->itk_space);
3946 return;
3947 }
3948 watchports = task->watchports;
3949 portwatch_count = watchports->tw_elem_array_count;
3950
3951 refs = task_remove_turnstile_watchports_locked(task, watchports,
3952 port_freelist);
3953
3954 is_write_unlock(task->itk_space);
3955
3956 /* Drop all the port references */
3957 for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
3958 ip_release(port_freelist[i]);
3959 }
3960
3961 /* Clear the task and thread references for task_watchport */
3962 if (refs == 0) {
3963 task_watchports_deallocate(watchports);
3964 }
3965 }
3966
3967 /*
3968 * task_transfer_turnstile_watchports:
3969 * Transfer all watchport turnstile boost from old task to new task.
3970 *
3971 * Arguments:
3972 * old_task: task calling exec
3973 * new_task: new exec'ed task
3974 * thread: main thread of new task
3975 *
3976 * Conditions:
3977 * Nothing locked.
3978 */
3979 void
3980 task_transfer_turnstile_watchports(
3981 task_t old_task,
3982 task_t new_task,
3983 thread_t new_thread)
3984 {
3985 struct task_watchports *old_watchports = NULL;
3986 struct task_watchports *new_watchports = NULL;
3987 os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
3988 os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
3989 uint32_t portwatch_count;
3990
3991 if (old_task->watchports == NULL || !new_task->active) {
3992 return;
3993 }
3994
3995 /* Get the watch port count from the old task */
3996 is_write_lock(old_task->itk_space);
3997 if (old_task->watchports == NULL) {
3998 is_write_unlock(old_task->itk_space);
3999 return;
4000 }
4001
4002 portwatch_count = old_task->watchports->tw_elem_array_count;
4003 is_write_unlock(old_task->itk_space);
4004
4005 new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
4006
4007 /* Lock the ipc space for old task */
4008 is_write_lock(old_task->itk_space);
4009
4010 /* Lock the ipc space for new task */
4011 is_write_lock(new_task->itk_space);
4012
4013 /* Check if watchport boost exist */
4014 if (old_task->watchports == NULL || !new_task->active) {
4015 is_write_unlock(new_task->itk_space);
4016 is_write_unlock(old_task->itk_space);
4017 (void)task_watchports_release(new_watchports);
4018 task_watchports_deallocate(new_watchports);
4019 return;
4020 }
4021
4022 old_watchports = old_task->watchports;
4023 assert(portwatch_count == old_task->watchports->tw_elem_array_count);
4024
4025 /* Setup new task watchports */
4026 new_task->watchports = new_watchports;
4027
4028 for (uint32_t i = 0; i < portwatch_count; i++) {
4029 ipc_port_t port = old_watchports->tw_elem[i].twe_port;
4030
4031 if (port == NULL) {
4032 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4033 continue;
4034 }
4035
4036 /* Lock the port and check if it has the entry */
4037 ip_lock(port);
4038 imq_lock(&port->ip_messages);
4039
4040 task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
4041
4042 if (ipc_port_replace_watchport_elem_conditional_locked(port,
4043 &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
4044 task_watchport_elem_clear(&old_watchports->tw_elem[i]);
4045
4046 task_watchports_retain(new_watchports);
4047 old_refs = task_watchports_release(old_watchports);
4048
4049 /* Check if all ports are cleaned */
4050 if (old_refs == 0) {
4051 old_task->watchports = NULL;
4052 }
4053 } else {
4054 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4055 }
4056 /* mqueue and port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
4057 }
4058
4059 /* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
4060 new_refs = task_watchports_release(new_watchports);
4061 if (new_refs == 0) {
4062 new_task->watchports = NULL;
4063 }
4064
4065 is_write_unlock(new_task->itk_space);
4066 is_write_unlock(old_task->itk_space);
4067
4068 /* Clear the task and thread references for old_watchport */
4069 if (old_refs == 0) {
4070 task_watchports_deallocate(old_watchports);
4071 }
4072
4073 /* Clear the task and thread references for new_watchport */
4074 if (new_refs == 0) {
4075 task_watchports_deallocate(new_watchports);
4076 }
4077 }
4078
4079 /*
4080 * task_add_turnstile_watchports_locked:
4081 * Setup watchports to boost the main thread of the task.
4082 *
4083 * Arguments:
4084 * task: task to boost
4085 * watchports: watchport structure to be attached to the task
4086 * previous_elem_array: an array of old watchport_elem to be returned to caller
4087 * portwatch_ports: array of watchports
4088 * portwatch_count: number of watchports
4089 *
4090 * Conditions:
4091 * ipc space of the task locked.
4092 * returns array of old watchport_elem in previous_elem_array
4093 */
4094 static os_ref_count_t
4095 task_add_turnstile_watchports_locked(
4096 task_t task,
4097 struct task_watchports *watchports,
4098 struct task_watchport_elem **previous_elem_array,
4099 ipc_port_t *portwatch_ports,
4100 uint32_t portwatch_count)
4101 {
4102 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4103
4104 /* Check if the task is still active */
4105 if (!task->active) {
4106 refs = task_watchports_release(watchports);
4107 return refs;
4108 }
4109
4110 assert(task->watchports == NULL);
4111 task->watchports = watchports;
4112
4113 for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
4114 ipc_port_t port = portwatch_ports[i];
4115
4116 task_watchport_elem_init(&watchports->tw_elem[i], task, port);
4117 if (port == NULL) {
4118 task_watchport_elem_clear(&watchports->tw_elem[i]);
4119 continue;
4120 }
4121
4122 ip_lock(port);
4123 imq_lock(&port->ip_messages);
4124
4125 /* Check if port is in valid state to be setup as watchport */
4126 if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
4127 &previous_elem_array[j]) != KERN_SUCCESS) {
4128 task_watchport_elem_clear(&watchports->tw_elem[i]);
4129 continue;
4130 }
4131 /* port and mqueue unlocked on return */
4132
4133 ip_reference(port);
4134 task_watchports_retain(watchports);
4135 if (previous_elem_array[j] != NULL) {
4136 j++;
4137 }
4138 }
4139
4140 /* Drop the reference on task_watchport struct returned by os_ref_init */
4141 refs = task_watchports_release(watchports);
4142 if (refs == 0) {
4143 task->watchports = NULL;
4144 }
4145
4146 return refs;
4147 }
4148
4149 /*
4150 * task_remove_turnstile_watchports_locked:
4151 * Clear all turnstile boost on the task from watchports.
4152 *
4153 * Arguments:
4154 * task: task to remove watchports from
4155 * watchports: watchports structure for the task
4156 * port_freelist: array of ports returned with ref to caller
4157 *
4158 *
4159 * Conditions:
4160 * ipc space of the task locked.
4161 * array of ports with refs are returned in port_freelist
4162 */
4163 static os_ref_count_t
4164 task_remove_turnstile_watchports_locked(
4165 task_t task,
4166 struct task_watchports *watchports,
4167 ipc_port_t *port_freelist)
4168 {
4169 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4170
4171 for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
4172 ipc_port_t port = watchports->tw_elem[i].twe_port;
4173 if (port == NULL) {
4174 continue;
4175 }
4176
4177 /* Lock the port and check if it has the entry */
4178 ip_lock(port);
4179 imq_lock(&port->ip_messages);
4180 if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4181 &watchports->tw_elem[i]) == KERN_SUCCESS) {
4182 task_watchport_elem_clear(&watchports->tw_elem[i]);
4183 port_freelist[j++] = port;
4184 refs = task_watchports_release(watchports);
4185
4186 /* Check if all ports are cleaned */
4187 if (refs == 0) {
4188 task->watchports = NULL;
4189 break;
4190 }
4191 }
4192 /* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4193 }
4194 return refs;
4195 }
4196
4197 /*
4198 * task_watchports_alloc_init:
4199 * Allocate and initialize task watchport struct.
4200 *
4201 * Conditions:
4202 * Nothing locked.
4203 */
4204 static struct task_watchports *
4205 task_watchports_alloc_init(
4206 task_t task,
4207 thread_t thread,
4208 uint32_t count)
4209 {
4210 struct task_watchports *watchports = kalloc(sizeof(struct task_watchports) +
4211 count * sizeof(struct task_watchport_elem));
4212
4213 task_reference(task);
4214 thread_reference(thread);
4215 watchports->tw_task = task;
4216 watchports->tw_thread = thread;
4217 watchports->tw_elem_array_count = count;
4218 os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4219
4220 return watchports;
4221 }
4222
4223 /*
4224 * task_watchports_deallocate:
4225 * Deallocate task watchport struct.
4226 *
4227 * Conditions:
4228 * Nothing locked.
4229 */
4230 static void
4231 task_watchports_deallocate(
4232 struct task_watchports *watchports)
4233 {
4234 uint32_t portwatch_count = watchports->tw_elem_array_count;
4235
4236 task_deallocate(watchports->tw_task);
4237 thread_deallocate(watchports->tw_thread);
4238 kfree(watchports, sizeof(struct task_watchports) + portwatch_count * sizeof(struct task_watchport_elem));
4239 }
4240
4241 /*
4242 * task_watchport_elem_deallocate:
4243 * Deallocate task watchport element and release its ref on task_watchport.
4244 *
4245 * Conditions:
4246 * Nothing locked.
4247 */
4248 void
4249 task_watchport_elem_deallocate(
4250 struct task_watchport_elem *watchport_elem)
4251 {
4252 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4253 task_t task = watchport_elem->twe_task;
4254 struct task_watchports *watchports = NULL;
4255 ipc_port_t port = NULL;
4256
4257 assert(task != NULL);
4258
4259 /* Take the space lock to modify the elememt */
4260 is_write_lock(task->itk_space);
4261
4262 watchports = task->watchports;
4263 assert(watchports != NULL);
4264
4265 port = watchport_elem->twe_port;
4266 assert(port != NULL);
4267
4268 task_watchport_elem_clear(watchport_elem);
4269 refs = task_watchports_release(watchports);
4270
4271 if (refs == 0) {
4272 task->watchports = NULL;
4273 }
4274
4275 is_write_unlock(task->itk_space);
4276
4277 ip_release(port);
4278 if (refs == 0) {
4279 task_watchports_deallocate(watchports);
4280 }
4281 }
4282
4283 /*
4284 * task_has_watchports:
4285 * Return TRUE if task has watchport boosts.
4286 *
4287 * Conditions:
4288 * Nothing locked.
4289 */
4290 boolean_t
4291 task_has_watchports(task_t task)
4292 {
4293 return task->watchports != NULL;
4294 }
4295
4296 #if DEVELOPMENT || DEBUG
4297
4298 extern void IOSleep(int);
4299
4300 kern_return_t
4301 task_disconnect_page_mappings(task_t task)
4302 {
4303 int n;
4304
4305 if (task == TASK_NULL || task == kernel_task) {
4306 return KERN_INVALID_ARGUMENT;
4307 }
4308
4309 /*
4310 * this function is used to strip all of the mappings from
4311 * the pmap for the specified task to force the task to
4312 * re-fault all of the pages it is actively using... this
4313 * allows us to approximate the true working set of the
4314 * specified task. We only engage if at least 1 of the
4315 * threads in the task is runnable, but we want to continuously
4316 * sweep (at least for a while - I've arbitrarily set the limit at
4317 * 100 sweeps to be re-looked at as we gain experience) to get a better
4318 * view into what areas within a page are being visited (as opposed to only
4319 * seeing the first fault of a page after the task becomes
4320 * runnable)... in the future I may
4321 * try to block until awakened by a thread in this task
4322 * being made runnable, but for now we'll periodically poll from the
4323 * user level debug tool driving the sysctl
4324 */
4325 for (n = 0; n < 100; n++) {
4326 thread_t thread;
4327 boolean_t runnable;
4328 boolean_t do_unnest;
4329 int page_count;
4330
4331 runnable = FALSE;
4332 do_unnest = FALSE;
4333
4334 task_lock(task);
4335
4336 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4337 if (thread->state & TH_RUN) {
4338 runnable = TRUE;
4339 break;
4340 }
4341 }
4342 if (n == 0) {
4343 task->task_disconnected_count++;
4344 }
4345
4346 if (task->task_unnested == FALSE) {
4347 if (runnable == TRUE) {
4348 task->task_unnested = TRUE;
4349 do_unnest = TRUE;
4350 }
4351 }
4352 task_unlock(task);
4353
4354 if (runnable == FALSE) {
4355 break;
4356 }
4357
4358 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4359 task, do_unnest, task->task_disconnected_count, 0, 0);
4360
4361 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4362
4363 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4364 task, page_count, 0, 0, 0);
4365
4366 if ((n % 5) == 4) {
4367 IOSleep(1);
4368 }
4369 }
4370 return KERN_SUCCESS;
4371 }
4372
4373 #endif
4374
4375
4376 #if CONFIG_FREEZE
4377
4378 /*
4379 * task_freeze:
4380 *
4381 * Freeze a task.
4382 *
4383 * Conditions:
4384 * The caller holds a reference to the task
4385 */
4386 extern void vm_wake_compactor_swapper(void);
4387 extern queue_head_t c_swapout_list_head;
4388 extern struct freezer_context freezer_context_global;
4389
4390 kern_return_t
4391 task_freeze(
4392 task_t task,
4393 uint32_t *purgeable_count,
4394 uint32_t *wired_count,
4395 uint32_t *clean_count,
4396 uint32_t *dirty_count,
4397 uint32_t dirty_budget,
4398 uint32_t *shared_count,
4399 int *freezer_error_code,
4400 boolean_t eval_only)
4401 {
4402 kern_return_t kr = KERN_SUCCESS;
4403
4404 if (task == TASK_NULL || task == kernel_task) {
4405 return KERN_INVALID_ARGUMENT;
4406 }
4407
4408 task_lock(task);
4409
4410 while (task->changing_freeze_state) {
4411 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4412 task_unlock(task);
4413 thread_block(THREAD_CONTINUE_NULL);
4414
4415 task_lock(task);
4416 }
4417 if (task->frozen) {
4418 task_unlock(task);
4419 return KERN_FAILURE;
4420 }
4421 task->changing_freeze_state = TRUE;
4422
4423 freezer_context_global.freezer_ctx_task = task;
4424
4425 task_unlock(task);
4426
4427 kr = vm_map_freeze(task,
4428 purgeable_count,
4429 wired_count,
4430 clean_count,
4431 dirty_count,
4432 dirty_budget,
4433 shared_count,
4434 freezer_error_code,
4435 eval_only);
4436
4437 task_lock(task);
4438
4439 if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
4440 task->frozen = TRUE;
4441
4442 freezer_context_global.freezer_ctx_task = NULL;
4443 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
4444
4445 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
4446 /*
4447 * reset the counter tracking the # of swapped compressed pages
4448 * because we are now done with this freeze session and task.
4449 */
4450
4451 *dirty_count = (uint32_t) (freezer_context_global.freezer_ctx_swapped_bytes / PAGE_SIZE_64); /*used to track pageouts*/
4452 }
4453
4454 freezer_context_global.freezer_ctx_swapped_bytes = 0;
4455 }
4456
4457 task->changing_freeze_state = FALSE;
4458 thread_wakeup(&task->changing_freeze_state);
4459
4460 task_unlock(task);
4461
4462 if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
4463 (kr == KERN_SUCCESS) &&
4464 (eval_only == FALSE)) {
4465 vm_wake_compactor_swapper();
4466 /*
4467 * We do an explicit wakeup of the swapout thread here
4468 * because the compact_and_swap routines don't have
4469 * knowledge about these kind of "per-task packed c_segs"
4470 * and so will not be evaluating whether we need to do
4471 * a wakeup there.
4472 */
4473 thread_wakeup((event_t)&c_swapout_list_head);
4474 }
4475
4476 return kr;
4477 }
4478
4479 /*
4480 * task_thaw:
4481 *
4482 * Thaw a currently frozen task.
4483 *
4484 * Conditions:
4485 * The caller holds a reference to the task
4486 */
4487 kern_return_t
4488 task_thaw(
4489 task_t task)
4490 {
4491 if (task == TASK_NULL || task == kernel_task) {
4492 return KERN_INVALID_ARGUMENT;
4493 }
4494
4495 task_lock(task);
4496
4497 while (task->changing_freeze_state) {
4498 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4499 task_unlock(task);
4500 thread_block(THREAD_CONTINUE_NULL);
4501
4502 task_lock(task);
4503 }
4504 if (!task->frozen) {
4505 task_unlock(task);
4506 return KERN_FAILURE;
4507 }
4508 task->frozen = FALSE;
4509
4510 task_unlock(task);
4511
4512 return KERN_SUCCESS;
4513 }
4514
4515 void
4516 task_update_frozen_to_swap_acct(task_t task, int64_t amount, freezer_acct_op_t op)
4517 {
4518 /*
4519 * We don't assert that the task lock is held because we call this
4520 * routine from the decompression path and we won't be holding the
4521 * task lock. However, since we are in the context of the task we are
4522 * safe.
4523 * In the case of the task_freeze path, we call it from behind the task
4524 * lock but we don't need to because we have a reference on the proc
4525 * being frozen.
4526 */
4527
4528 assert(task);
4529 if (amount == 0) {
4530 return;
4531 }
4532
4533 if (op == CREDIT_TO_SWAP) {
4534 ledger_credit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
4535 } else if (op == DEBIT_FROM_SWAP) {
4536 ledger_debit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
4537 } else {
4538 panic("task_update_frozen_to_swap_acct: Invalid ledger op\n");
4539 }
4540 }
4541 #endif /* CONFIG_FREEZE */
4542
4543 kern_return_t
4544 host_security_set_task_token(
4545 host_security_t host_security,
4546 task_t task,
4547 security_token_t sec_token,
4548 audit_token_t audit_token,
4549 host_priv_t host_priv)
4550 {
4551 ipc_port_t host_port;
4552 kern_return_t kr;
4553
4554 if (task == TASK_NULL) {
4555 return KERN_INVALID_ARGUMENT;
4556 }
4557
4558 if (host_security == HOST_NULL) {
4559 return KERN_INVALID_SECURITY;
4560 }
4561
4562 task_lock(task);
4563 task->sec_token = sec_token;
4564 task->audit_token = audit_token;
4565 task_unlock(task);
4566
4567 if (host_priv != HOST_PRIV_NULL) {
4568 kr = host_get_host_priv_port(host_priv, &host_port);
4569 } else {
4570 kr = host_get_host_port(host_priv_self(), &host_port);
4571 }
4572 assert(kr == KERN_SUCCESS);
4573
4574 kr = task_set_special_port_internal(task, TASK_HOST_PORT, host_port);
4575 return kr;
4576 }
4577
4578 kern_return_t
4579 task_send_trace_memory(
4580 __unused task_t target_task,
4581 __unused uint32_t pid,
4582 __unused uint64_t uniqueid)
4583 {
4584 return KERN_INVALID_ARGUMENT;
4585 }
4586
4587 /*
4588 * This routine was added, pretty much exclusively, for registering the
4589 * RPC glue vector for in-kernel short circuited tasks. Rather than
4590 * removing it completely, I have only disabled that feature (which was
4591 * the only feature at the time). It just appears that we are going to
4592 * want to add some user data to tasks in the future (i.e. bsd info,
4593 * task names, etc...), so I left it in the formal task interface.
4594 */
4595 kern_return_t
4596 task_set_info(
4597 task_t task,
4598 task_flavor_t flavor,
4599 __unused task_info_t task_info_in, /* pointer to IN array */
4600 __unused mach_msg_type_number_t task_info_count)
4601 {
4602 if (task == TASK_NULL) {
4603 return KERN_INVALID_ARGUMENT;
4604 }
4605 switch (flavor) {
4606 #if CONFIG_ATM
4607 case TASK_TRACE_MEMORY_INFO:
4608 return KERN_NOT_SUPPORTED;
4609 #endif // CONFIG_ATM
4610 default:
4611 return KERN_INVALID_ARGUMENT;
4612 }
4613 }
4614
4615 int radar_20146450 = 1;
4616 kern_return_t
4617 task_info(
4618 task_t task,
4619 task_flavor_t flavor,
4620 task_info_t task_info_out,
4621 mach_msg_type_number_t *task_info_count)
4622 {
4623 kern_return_t error = KERN_SUCCESS;
4624 mach_msg_type_number_t original_task_info_count;
4625
4626 if (task == TASK_NULL) {
4627 return KERN_INVALID_ARGUMENT;
4628 }
4629
4630 original_task_info_count = *task_info_count;
4631 task_lock(task);
4632
4633 if ((task != current_task()) && (!task->active)) {
4634 task_unlock(task);
4635 return KERN_INVALID_ARGUMENT;
4636 }
4637
4638
4639 switch (flavor) {
4640 case TASK_BASIC_INFO_32:
4641 case TASK_BASIC2_INFO_32:
4642 #if defined(__arm__) || defined(__arm64__)
4643 case TASK_BASIC_INFO_64:
4644 #endif
4645 {
4646 task_basic_info_32_t basic_info;
4647 vm_map_t map;
4648 clock_sec_t secs;
4649 clock_usec_t usecs;
4650
4651 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
4652 error = KERN_INVALID_ARGUMENT;
4653 break;
4654 }
4655
4656 basic_info = (task_basic_info_32_t)task_info_out;
4657
4658 map = (task == kernel_task)? kernel_map: task->map;
4659 basic_info->virtual_size = (typeof(basic_info->virtual_size))vm_map_adjusted_size(map);
4660 if (flavor == TASK_BASIC2_INFO_32) {
4661 /*
4662 * The "BASIC2" flavor gets the maximum resident
4663 * size instead of the current resident size...
4664 */
4665 basic_info->resident_size = pmap_resident_max(map->pmap);
4666 } else {
4667 basic_info->resident_size = pmap_resident_count(map->pmap);
4668 }
4669 basic_info->resident_size *= PAGE_SIZE;
4670
4671 basic_info->policy = ((task != kernel_task)?
4672 POLICY_TIMESHARE: POLICY_RR);
4673 basic_info->suspend_count = task->user_stop_count;
4674
4675 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4676 basic_info->user_time.seconds =
4677 (typeof(basic_info->user_time.seconds))secs;
4678 basic_info->user_time.microseconds = usecs;
4679
4680 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4681 basic_info->system_time.seconds =
4682 (typeof(basic_info->system_time.seconds))secs;
4683 basic_info->system_time.microseconds = usecs;
4684
4685 *task_info_count = TASK_BASIC_INFO_32_COUNT;
4686 break;
4687 }
4688
4689 #if defined(__arm__) || defined(__arm64__)
4690 case TASK_BASIC_INFO_64_2:
4691 {
4692 task_basic_info_64_2_t basic_info;
4693 vm_map_t map;
4694 clock_sec_t secs;
4695 clock_usec_t usecs;
4696
4697 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
4698 error = KERN_INVALID_ARGUMENT;
4699 break;
4700 }
4701
4702 basic_info = (task_basic_info_64_2_t)task_info_out;
4703
4704 map = (task == kernel_task)? kernel_map: task->map;
4705 basic_info->virtual_size = vm_map_adjusted_size(map);
4706 basic_info->resident_size =
4707 (mach_vm_size_t)(pmap_resident_count(map->pmap))
4708 * PAGE_SIZE_64;
4709
4710 basic_info->policy = ((task != kernel_task)?
4711 POLICY_TIMESHARE: POLICY_RR);
4712 basic_info->suspend_count = task->user_stop_count;
4713
4714 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4715 basic_info->user_time.seconds =
4716 (typeof(basic_info->user_time.seconds))secs;
4717 basic_info->user_time.microseconds = usecs;
4718
4719 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4720 basic_info->system_time.seconds =
4721 (typeof(basic_info->system_time.seconds))secs;
4722 basic_info->system_time.microseconds = usecs;
4723
4724 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
4725 break;
4726 }
4727
4728 #else /* defined(__arm__) || defined(__arm64__) */
4729 case TASK_BASIC_INFO_64:
4730 {
4731 task_basic_info_64_t basic_info;
4732 vm_map_t map;
4733 clock_sec_t secs;
4734 clock_usec_t usecs;
4735
4736 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
4737 error = KERN_INVALID_ARGUMENT;
4738 break;
4739 }
4740
4741 basic_info = (task_basic_info_64_t)task_info_out;
4742
4743 map = (task == kernel_task)? kernel_map: task->map;
4744 basic_info->virtual_size = vm_map_adjusted_size(map);
4745 basic_info->resident_size =
4746 (mach_vm_size_t)(pmap_resident_count(map->pmap))
4747 * PAGE_SIZE_64;
4748
4749 basic_info->policy = ((task != kernel_task)?
4750 POLICY_TIMESHARE: POLICY_RR);
4751 basic_info->suspend_count = task->user_stop_count;
4752
4753 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4754 basic_info->user_time.seconds =
4755 (typeof(basic_info->user_time.seconds))secs;
4756 basic_info->user_time.microseconds = usecs;
4757
4758 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4759 basic_info->system_time.seconds =
4760 (typeof(basic_info->system_time.seconds))secs;
4761 basic_info->system_time.microseconds = usecs;
4762
4763 *task_info_count = TASK_BASIC_INFO_64_COUNT;
4764 break;
4765 }
4766 #endif /* defined(__arm__) || defined(__arm64__) */
4767
4768 case MACH_TASK_BASIC_INFO:
4769 {
4770 mach_task_basic_info_t basic_info;
4771 vm_map_t map;
4772 clock_sec_t secs;
4773 clock_usec_t usecs;
4774
4775 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
4776 error = KERN_INVALID_ARGUMENT;
4777 break;
4778 }
4779
4780 basic_info = (mach_task_basic_info_t)task_info_out;
4781
4782 map = (task == kernel_task) ? kernel_map : task->map;
4783
4784 basic_info->virtual_size = vm_map_adjusted_size(map);
4785
4786 basic_info->resident_size =
4787 (mach_vm_size_t)(pmap_resident_count(map->pmap));
4788 basic_info->resident_size *= PAGE_SIZE_64;
4789
4790 basic_info->resident_size_max =
4791 (mach_vm_size_t)(pmap_resident_max(map->pmap));
4792 basic_info->resident_size_max *= PAGE_SIZE_64;
4793
4794 basic_info->policy = ((task != kernel_task) ?
4795 POLICY_TIMESHARE : POLICY_RR);
4796
4797 basic_info->suspend_count = task->user_stop_count;
4798
4799 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4800 basic_info->user_time.seconds =
4801 (typeof(basic_info->user_time.seconds))secs;
4802 basic_info->user_time.microseconds = usecs;
4803
4804 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4805 basic_info->system_time.seconds =
4806 (typeof(basic_info->system_time.seconds))secs;
4807 basic_info->system_time.microseconds = usecs;
4808
4809 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
4810 break;
4811 }
4812
4813 case TASK_THREAD_TIMES_INFO:
4814 {
4815 task_thread_times_info_t times_info;
4816 thread_t thread;
4817
4818 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
4819 error = KERN_INVALID_ARGUMENT;
4820 break;
4821 }
4822
4823 times_info = (task_thread_times_info_t) task_info_out;
4824 times_info->user_time.seconds = 0;
4825 times_info->user_time.microseconds = 0;
4826 times_info->system_time.seconds = 0;
4827 times_info->system_time.microseconds = 0;
4828
4829
4830 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4831 time_value_t user_time, system_time;
4832
4833 if (thread->options & TH_OPT_IDLE_THREAD) {
4834 continue;
4835 }
4836
4837 thread_read_times(thread, &user_time, &system_time, NULL);
4838
4839 time_value_add(&times_info->user_time, &user_time);
4840 time_value_add(&times_info->system_time, &system_time);
4841 }
4842
4843 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
4844 break;
4845 }
4846
4847 case TASK_ABSOLUTETIME_INFO:
4848 {
4849 task_absolutetime_info_t info;
4850 thread_t thread;
4851
4852 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
4853 error = KERN_INVALID_ARGUMENT;
4854 break;
4855 }
4856
4857 info = (task_absolutetime_info_t)task_info_out;
4858 info->threads_user = info->threads_system = 0;
4859
4860
4861 info->total_user = task->total_user_time;
4862 info->total_system = task->total_system_time;
4863
4864 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4865 uint64_t tval;
4866 spl_t x;
4867
4868 if (thread->options & TH_OPT_IDLE_THREAD) {
4869 continue;
4870 }
4871
4872 x = splsched();
4873 thread_lock(thread);
4874
4875 tval = timer_grab(&thread->user_timer);
4876 info->threads_user += tval;
4877 info->total_user += tval;
4878
4879 tval = timer_grab(&thread->system_timer);
4880 if (thread->precise_user_kernel_time) {
4881 info->threads_system += tval;
4882 info->total_system += tval;
4883 } else {
4884 /* system_timer may represent either sys or user */
4885 info->threads_user += tval;
4886 info->total_user += tval;
4887 }
4888
4889 thread_unlock(thread);
4890 splx(x);
4891 }
4892
4893
4894 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
4895 break;
4896 }
4897
4898 case TASK_DYLD_INFO:
4899 {
4900 task_dyld_info_t info;
4901
4902 /*
4903 * We added the format field to TASK_DYLD_INFO output. For
4904 * temporary backward compatibility, accept the fact that
4905 * clients may ask for the old version - distinquished by the
4906 * size of the expected result structure.
4907 */
4908 #define TASK_LEGACY_DYLD_INFO_COUNT \
4909 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
4910
4911 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
4912 error = KERN_INVALID_ARGUMENT;
4913 break;
4914 }
4915
4916 info = (task_dyld_info_t)task_info_out;
4917 info->all_image_info_addr = task->all_image_info_addr;
4918 info->all_image_info_size = task->all_image_info_size;
4919
4920 /* only set format on output for those expecting it */
4921 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
4922 info->all_image_info_format = task_has_64Bit_addr(task) ?
4923 TASK_DYLD_ALL_IMAGE_INFO_64 :
4924 TASK_DYLD_ALL_IMAGE_INFO_32;
4925 *task_info_count = TASK_DYLD_INFO_COUNT;
4926 } else {
4927 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
4928 }
4929 break;
4930 }
4931
4932 case TASK_EXTMOD_INFO:
4933 {
4934 task_extmod_info_t info;
4935 void *p;
4936
4937 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
4938 error = KERN_INVALID_ARGUMENT;
4939 break;
4940 }
4941
4942 info = (task_extmod_info_t)task_info_out;
4943
4944 p = get_bsdtask_info(task);
4945 if (p) {
4946 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
4947 } else {
4948 bzero(info->task_uuid, sizeof(info->task_uuid));
4949 }
4950 info->extmod_statistics = task->extmod_statistics;
4951 *task_info_count = TASK_EXTMOD_INFO_COUNT;
4952
4953 break;
4954 }
4955
4956 case TASK_KERNELMEMORY_INFO:
4957 {
4958 task_kernelmemory_info_t tkm_info;
4959 ledger_amount_t credit, debit;
4960
4961 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
4962 error = KERN_INVALID_ARGUMENT;
4963 break;
4964 }
4965
4966 tkm_info = (task_kernelmemory_info_t) task_info_out;
4967 tkm_info->total_palloc = 0;
4968 tkm_info->total_pfree = 0;
4969 tkm_info->total_salloc = 0;
4970 tkm_info->total_sfree = 0;
4971
4972 if (task == kernel_task) {
4973 /*
4974 * All shared allocs/frees from other tasks count against
4975 * the kernel private memory usage. If we are looking up
4976 * info for the kernel task, gather from everywhere.
4977 */
4978 task_unlock(task);
4979
4980 /* start by accounting for all the terminated tasks against the kernel */
4981 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
4982 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
4983
4984 /* count all other task/thread shared alloc/free against the kernel */
4985 lck_mtx_lock(&tasks_threads_lock);
4986
4987 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
4988 queue_iterate(&tasks, task, task_t, tasks) {
4989 if (task == kernel_task) {
4990 if (ledger_get_entries(task->ledger,
4991 task_ledgers.tkm_private, &credit,
4992 &debit) == KERN_SUCCESS) {
4993 tkm_info->total_palloc += credit;
4994 tkm_info->total_pfree += debit;
4995 }
4996 }
4997 if (!ledger_get_entries(task->ledger,
4998 task_ledgers.tkm_shared, &credit, &debit)) {
4999 tkm_info->total_palloc += credit;
5000 tkm_info->total_pfree += debit;
5001 }
5002 }
5003 lck_mtx_unlock(&tasks_threads_lock);
5004 } else {
5005 if (!ledger_get_entries(task->ledger,
5006 task_ledgers.tkm_private, &credit, &debit)) {
5007 tkm_info->total_palloc = credit;
5008 tkm_info->total_pfree = debit;
5009 }
5010 if (!ledger_get_entries(task->ledger,
5011 task_ledgers.tkm_shared, &credit, &debit)) {
5012 tkm_info->total_salloc = credit;
5013 tkm_info->total_sfree = debit;
5014 }
5015 task_unlock(task);
5016 }
5017
5018 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
5019 return KERN_SUCCESS;
5020 }
5021
5022 /* OBSOLETE */
5023 case TASK_SCHED_FIFO_INFO:
5024 {
5025 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
5026 error = KERN_INVALID_ARGUMENT;
5027 break;
5028 }
5029
5030 error = KERN_INVALID_POLICY;
5031 break;
5032 }
5033
5034 /* OBSOLETE */
5035 case TASK_SCHED_RR_INFO:
5036 {
5037 policy_rr_base_t rr_base;
5038 uint32_t quantum_time;
5039 uint64_t quantum_ns;
5040
5041 if (*task_info_count < POLICY_RR_BASE_COUNT) {
5042 error = KERN_INVALID_ARGUMENT;
5043 break;
5044 }
5045
5046 rr_base = (policy_rr_base_t) task_info_out;
5047
5048 if (task != kernel_task) {
5049 error = KERN_INVALID_POLICY;
5050 break;
5051 }
5052
5053 rr_base->base_priority = task->priority;
5054
5055 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
5056 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
5057
5058 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
5059
5060 *task_info_count = POLICY_RR_BASE_COUNT;
5061 break;
5062 }
5063
5064 /* OBSOLETE */
5065 case TASK_SCHED_TIMESHARE_INFO:
5066 {
5067 policy_timeshare_base_t ts_base;
5068
5069 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
5070 error = KERN_INVALID_ARGUMENT;
5071 break;
5072 }
5073
5074 ts_base = (policy_timeshare_base_t) task_info_out;
5075
5076 if (task == kernel_task) {
5077 error = KERN_INVALID_POLICY;
5078 break;
5079 }
5080
5081 ts_base->base_priority = task->priority;
5082
5083 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
5084 break;
5085 }
5086
5087 case TASK_SECURITY_TOKEN:
5088 {
5089 security_token_t *sec_token_p;
5090
5091 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
5092 error = KERN_INVALID_ARGUMENT;
5093 break;
5094 }
5095
5096 sec_token_p = (security_token_t *) task_info_out;
5097
5098 *sec_token_p = task->sec_token;
5099
5100 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
5101 break;
5102 }
5103
5104 case TASK_AUDIT_TOKEN:
5105 {
5106 audit_token_t *audit_token_p;
5107
5108 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
5109 error = KERN_INVALID_ARGUMENT;
5110 break;
5111 }
5112
5113 audit_token_p = (audit_token_t *) task_info_out;
5114
5115 *audit_token_p = task->audit_token;
5116
5117 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
5118 break;
5119 }
5120
5121 case TASK_SCHED_INFO:
5122 error = KERN_INVALID_ARGUMENT;
5123 break;
5124
5125 case TASK_EVENTS_INFO:
5126 {
5127 task_events_info_t events_info;
5128 thread_t thread;
5129
5130 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
5131 error = KERN_INVALID_ARGUMENT;
5132 break;
5133 }
5134
5135 events_info = (task_events_info_t) task_info_out;
5136
5137
5138 events_info->faults = task->faults;
5139 events_info->pageins = task->pageins;
5140 events_info->cow_faults = task->cow_faults;
5141 events_info->messages_sent = task->messages_sent;
5142 events_info->messages_received = task->messages_received;
5143 events_info->syscalls_mach = task->syscalls_mach;
5144 events_info->syscalls_unix = task->syscalls_unix;
5145
5146 events_info->csw = task->c_switch;
5147
5148 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5149 events_info->csw += thread->c_switch;
5150 events_info->syscalls_mach += thread->syscalls_mach;
5151 events_info->syscalls_unix += thread->syscalls_unix;
5152 }
5153
5154
5155 *task_info_count = TASK_EVENTS_INFO_COUNT;
5156 break;
5157 }
5158 case TASK_AFFINITY_TAG_INFO:
5159 {
5160 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
5161 error = KERN_INVALID_ARGUMENT;
5162 break;
5163 }
5164
5165 error = task_affinity_info(task, task_info_out, task_info_count);
5166 break;
5167 }
5168 case TASK_POWER_INFO:
5169 {
5170 if (*task_info_count < TASK_POWER_INFO_COUNT) {
5171 error = KERN_INVALID_ARGUMENT;
5172 break;
5173 }
5174
5175 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
5176 break;
5177 }
5178
5179 case TASK_POWER_INFO_V2:
5180 {
5181 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
5182 error = KERN_INVALID_ARGUMENT;
5183 break;
5184 }
5185 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
5186 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
5187 break;
5188 }
5189
5190 case TASK_VM_INFO:
5191 case TASK_VM_INFO_PURGEABLE:
5192 {
5193 task_vm_info_t vm_info;
5194 vm_map_t map;
5195
5196 #if __arm64__
5197 struct proc *p;
5198 uint32_t platform, sdk;
5199 p = current_proc();
5200 platform = proc_platform(p);
5201 sdk = proc_min_sdk(p);
5202 if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
5203 platform == PLATFORM_IOS &&
5204 sdk != 0 &&
5205 (sdk >> 16) <= 12) {
5206 /*
5207 * Some iOS apps pass an incorrect value for
5208 * task_info_count, expressed in number of bytes
5209 * instead of number of "natural_t" elements.
5210 * For the sake of backwards binary compatibility
5211 * for apps built with an iOS12 or older SDK and using
5212 * the "rev2" data structure, let's fix task_info_count
5213 * for them, to avoid stomping past the actual end
5214 * of their buffer.
5215 */
5216 #if DEVELOPMENT || DEBUG
5217 printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p), original_task_info_count, TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5218 #endif /* DEVELOPMENT || DEBUG */
5219 DTRACE_VM4(workaround_task_vm_info_count,
5220 mach_msg_type_number_t, original_task_info_count,
5221 mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5222 uint32_t, platform,
5223 uint32_t, sdk);
5224 original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5225 *task_info_count = original_task_info_count;
5226 }
5227 #endif /* __arm64__ */
5228
5229 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5230 error = KERN_INVALID_ARGUMENT;
5231 break;
5232 }
5233
5234 vm_info = (task_vm_info_t)task_info_out;
5235
5236 if (task == kernel_task) {
5237 map = kernel_map;
5238 /* no lock */
5239 } else {
5240 map = task->map;
5241 vm_map_lock_read(map);
5242 }
5243
5244 vm_info->virtual_size = (typeof(vm_info->virtual_size))vm_map_adjusted_size(map);
5245 vm_info->region_count = map->hdr.nentries;
5246 vm_info->page_size = vm_map_page_size(map);
5247
5248 vm_info->resident_size = pmap_resident_count(map->pmap);
5249 vm_info->resident_size *= PAGE_SIZE;
5250 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
5251 vm_info->resident_size_peak *= PAGE_SIZE;
5252
5253 #define _VM_INFO(_name) \
5254 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
5255
5256 _VM_INFO(device);
5257 _VM_INFO(device_peak);
5258 _VM_INFO(external);
5259 _VM_INFO(external_peak);
5260 _VM_INFO(internal);
5261 _VM_INFO(internal_peak);
5262 _VM_INFO(reusable);
5263 _VM_INFO(reusable_peak);
5264 _VM_INFO(compressed);
5265 _VM_INFO(compressed_peak);
5266 _VM_INFO(compressed_lifetime);
5267
5268 vm_info->purgeable_volatile_pmap = 0;
5269 vm_info->purgeable_volatile_resident = 0;
5270 vm_info->purgeable_volatile_virtual = 0;
5271 if (task == kernel_task) {
5272 /*
5273 * We do not maintain the detailed stats for the
5274 * kernel_pmap, so just count everything as
5275 * "internal"...
5276 */
5277 vm_info->internal = vm_info->resident_size;
5278 /*
5279 * ... but since the memory held by the VM compressor
5280 * in the kernel address space ought to be attributed
5281 * to user-space tasks, we subtract it from "internal"
5282 * to give memory reporting tools a more accurate idea
5283 * of what the kernel itself is actually using, instead
5284 * of making it look like the kernel is leaking memory
5285 * when the system is under memory pressure.
5286 */
5287 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5288 PAGE_SIZE);
5289 } else {
5290 mach_vm_size_t volatile_virtual_size;
5291 mach_vm_size_t volatile_resident_size;
5292 mach_vm_size_t volatile_compressed_size;
5293 mach_vm_size_t volatile_pmap_size;
5294 mach_vm_size_t volatile_compressed_pmap_size;
5295 kern_return_t kr;
5296
5297 if (flavor == TASK_VM_INFO_PURGEABLE) {
5298 kr = vm_map_query_volatile(
5299 map,
5300 &volatile_virtual_size,
5301 &volatile_resident_size,
5302 &volatile_compressed_size,
5303 &volatile_pmap_size,
5304 &volatile_compressed_pmap_size);
5305 if (kr == KERN_SUCCESS) {
5306 vm_info->purgeable_volatile_pmap =
5307 volatile_pmap_size;
5308 if (radar_20146450) {
5309 vm_info->compressed -=
5310 volatile_compressed_pmap_size;
5311 }
5312 vm_info->purgeable_volatile_resident =
5313 volatile_resident_size;
5314 vm_info->purgeable_volatile_virtual =
5315 volatile_virtual_size;
5316 }
5317 }
5318 }
5319 *task_info_count = TASK_VM_INFO_REV0_COUNT;
5320
5321 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5322 vm_info->phys_footprint =
5323 (mach_vm_size_t) get_task_phys_footprint(task);
5324 *task_info_count = TASK_VM_INFO_REV1_COUNT;
5325 }
5326 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5327 vm_info->min_address = map->min_offset;
5328 vm_info->max_address = map->max_offset;
5329 *task_info_count = TASK_VM_INFO_REV2_COUNT;
5330 }
5331 if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5332 ledger_get_lifetime_max(task->ledger,
5333 task_ledgers.phys_footprint,
5334 &vm_info->ledger_phys_footprint_peak);
5335 ledger_get_balance(task->ledger,
5336 task_ledgers.purgeable_nonvolatile,
5337 &vm_info->ledger_purgeable_nonvolatile);
5338 ledger_get_balance(task->ledger,
5339 task_ledgers.purgeable_nonvolatile_compressed,
5340 &vm_info->ledger_purgeable_novolatile_compressed);
5341 ledger_get_balance(task->ledger,
5342 task_ledgers.purgeable_volatile,
5343 &vm_info->ledger_purgeable_volatile);
5344 ledger_get_balance(task->ledger,
5345 task_ledgers.purgeable_volatile_compressed,
5346 &vm_info->ledger_purgeable_volatile_compressed);
5347 ledger_get_balance(task->ledger,
5348 task_ledgers.network_nonvolatile,
5349 &vm_info->ledger_tag_network_nonvolatile);
5350 ledger_get_balance(task->ledger,
5351 task_ledgers.network_nonvolatile_compressed,
5352 &vm_info->ledger_tag_network_nonvolatile_compressed);
5353 ledger_get_balance(task->ledger,
5354 task_ledgers.network_volatile,
5355 &vm_info->ledger_tag_network_volatile);
5356 ledger_get_balance(task->ledger,
5357 task_ledgers.network_volatile_compressed,
5358 &vm_info->ledger_tag_network_volatile_compressed);
5359 ledger_get_balance(task->ledger,
5360 task_ledgers.media_footprint,
5361 &vm_info->ledger_tag_media_footprint);
5362 ledger_get_balance(task->ledger,
5363 task_ledgers.media_footprint_compressed,
5364 &vm_info->ledger_tag_media_footprint_compressed);
5365 ledger_get_balance(task->ledger,
5366 task_ledgers.media_nofootprint,
5367 &vm_info->ledger_tag_media_nofootprint);
5368 ledger_get_balance(task->ledger,
5369 task_ledgers.media_nofootprint_compressed,
5370 &vm_info->ledger_tag_media_nofootprint_compressed);
5371 ledger_get_balance(task->ledger,
5372 task_ledgers.graphics_footprint,
5373 &vm_info->ledger_tag_graphics_footprint);
5374 ledger_get_balance(task->ledger,
5375 task_ledgers.graphics_footprint_compressed,
5376 &vm_info->ledger_tag_graphics_footprint_compressed);
5377 ledger_get_balance(task->ledger,
5378 task_ledgers.graphics_nofootprint,
5379 &vm_info->ledger_tag_graphics_nofootprint);
5380 ledger_get_balance(task->ledger,
5381 task_ledgers.graphics_nofootprint_compressed,
5382 &vm_info->ledger_tag_graphics_nofootprint_compressed);
5383 ledger_get_balance(task->ledger,
5384 task_ledgers.neural_footprint,
5385 &vm_info->ledger_tag_neural_footprint);
5386 ledger_get_balance(task->ledger,
5387 task_ledgers.neural_footprint_compressed,
5388 &vm_info->ledger_tag_neural_footprint_compressed);
5389 ledger_get_balance(task->ledger,
5390 task_ledgers.neural_nofootprint,
5391 &vm_info->ledger_tag_neural_nofootprint);
5392 ledger_get_balance(task->ledger,
5393 task_ledgers.neural_nofootprint_compressed,
5394 &vm_info->ledger_tag_neural_nofootprint_compressed);
5395 *task_info_count = TASK_VM_INFO_REV3_COUNT;
5396 }
5397 if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
5398 if (task->bsd_info) {
5399 vm_info->limit_bytes_remaining =
5400 memorystatus_available_memory_internal(task->bsd_info);
5401 } else {
5402 vm_info->limit_bytes_remaining = 0;
5403 }
5404 *task_info_count = TASK_VM_INFO_REV4_COUNT;
5405 }
5406 if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
5407 thread_t thread;
5408 integer_t total = task->decompressions;
5409 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5410 total += thread->decompressions;
5411 }
5412 vm_info->decompressions = total;
5413 *task_info_count = TASK_VM_INFO_REV5_COUNT;
5414 }
5415
5416 if (task != kernel_task) {
5417 vm_map_unlock_read(map);
5418 }
5419
5420 break;
5421 }
5422
5423 case TASK_WAIT_STATE_INFO:
5424 {
5425 /*
5426 * Deprecated flavor. Currently allowing some results until all users
5427 * stop calling it. The results may not be accurate.
5428 */
5429 task_wait_state_info_t wait_state_info;
5430 uint64_t total_sfi_ledger_val = 0;
5431
5432 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
5433 error = KERN_INVALID_ARGUMENT;
5434 break;
5435 }
5436
5437 wait_state_info = (task_wait_state_info_t) task_info_out;
5438
5439 wait_state_info->total_wait_state_time = 0;
5440 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
5441
5442 #if CONFIG_SCHED_SFI
5443 int i, prev_lentry = -1;
5444 int64_t val_credit, val_debit;
5445
5446 for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
5447 val_credit = 0;
5448 /*
5449 * checking with prev_lentry != entry ensures adjacent classes
5450 * which share the same ledger do not add wait times twice.
5451 * Note: Use ledger() call to get data for each individual sfi class.
5452 */
5453 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
5454 KERN_SUCCESS == ledger_get_entries(task->ledger,
5455 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
5456 total_sfi_ledger_val += val_credit;
5457 }
5458 prev_lentry = task_ledgers.sfi_wait_times[i];
5459 }
5460
5461 #endif /* CONFIG_SCHED_SFI */
5462 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
5463 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
5464
5465 break;
5466 }
5467 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
5468 {
5469 #if DEVELOPMENT || DEBUG
5470 pvm_account_info_t acnt_info;
5471
5472 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
5473 error = KERN_INVALID_ARGUMENT;
5474 break;
5475 }
5476
5477 if (task_info_out == NULL) {
5478 error = KERN_INVALID_ARGUMENT;
5479 break;
5480 }
5481
5482 acnt_info = (pvm_account_info_t) task_info_out;
5483
5484 error = vm_purgeable_account(task, acnt_info);
5485
5486 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
5487
5488 break;
5489 #else /* DEVELOPMENT || DEBUG */
5490 error = KERN_NOT_SUPPORTED;
5491 break;
5492 #endif /* DEVELOPMENT || DEBUG */
5493 }
5494 case TASK_FLAGS_INFO:
5495 {
5496 task_flags_info_t flags_info;
5497
5498 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
5499 error = KERN_INVALID_ARGUMENT;
5500 break;
5501 }
5502
5503 flags_info = (task_flags_info_t)task_info_out;
5504
5505 /* only publish the 64-bit flag of the task */
5506 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
5507
5508 *task_info_count = TASK_FLAGS_INFO_COUNT;
5509 break;
5510 }
5511
5512 case TASK_DEBUG_INFO_INTERNAL:
5513 {
5514 #if DEVELOPMENT || DEBUG
5515 task_debug_info_internal_t dbg_info;
5516 ipc_space_t space = task->itk_space;
5517 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
5518 error = KERN_NOT_SUPPORTED;
5519 break;
5520 }
5521
5522 if (task_info_out == NULL) {
5523 error = KERN_INVALID_ARGUMENT;
5524 break;
5525 }
5526 dbg_info = (task_debug_info_internal_t) task_info_out;
5527 dbg_info->ipc_space_size = 0;
5528
5529 if (space) {
5530 is_read_lock(space);
5531 dbg_info->ipc_space_size = space->is_table_size;
5532 is_read_unlock(space);
5533 }
5534
5535 dbg_info->suspend_count = task->suspend_count;
5536
5537 error = KERN_SUCCESS;
5538 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
5539 break;
5540 #else /* DEVELOPMENT || DEBUG */
5541 error = KERN_NOT_SUPPORTED;
5542 break;
5543 #endif /* DEVELOPMENT || DEBUG */
5544 }
5545 default:
5546 error = KERN_INVALID_ARGUMENT;
5547 }
5548
5549 task_unlock(task);
5550 return error;
5551 }
5552
5553 /*
5554 * task_info_from_user
5555 *
5556 * When calling task_info from user space,
5557 * this function will be executed as mig server side
5558 * instead of calling directly into task_info.
5559 * This gives the possibility to perform more security
5560 * checks on task_port.
5561 *
5562 * In the case of TASK_DYLD_INFO, we require the more
5563 * privileged task_port not the less-privileged task_name_port.
5564 *
5565 */
5566 kern_return_t
5567 task_info_from_user(
5568 mach_port_t task_port,
5569 task_flavor_t flavor,
5570 task_info_t task_info_out,
5571 mach_msg_type_number_t *task_info_count)
5572 {
5573 task_t task;
5574 kern_return_t ret;
5575
5576 if (flavor == TASK_DYLD_INFO) {
5577 task = convert_port_to_task(task_port);
5578 } else {
5579 task = convert_port_to_task_name(task_port);
5580 }
5581
5582 ret = task_info(task, flavor, task_info_out, task_info_count);
5583
5584 task_deallocate(task);
5585
5586 return ret;
5587 }
5588
5589 /*
5590 * task_power_info
5591 *
5592 * Returns power stats for the task.
5593 * Note: Called with task locked.
5594 */
5595 void
5596 task_power_info_locked(
5597 task_t task,
5598 task_power_info_t info,
5599 gpu_energy_data_t ginfo,
5600 task_power_info_v2_t infov2,
5601 uint64_t *runnable_time)
5602 {
5603 thread_t thread;
5604 ledger_amount_t tmp;
5605
5606 uint64_t runnable_time_sum = 0;
5607
5608 task_lock_assert_owned(task);
5609
5610 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
5611 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
5612 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
5613 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
5614
5615 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
5616 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
5617
5618 info->total_user = task->total_user_time;
5619 info->total_system = task->total_system_time;
5620 runnable_time_sum = task->total_runnable_time;
5621
5622 #if defined(__arm__) || defined(__arm64__)
5623 if (infov2) {
5624 infov2->task_energy = task->task_energy;
5625 }
5626 #endif /* defined(__arm__) || defined(__arm64__) */
5627
5628 if (ginfo) {
5629 ginfo->task_gpu_utilisation = task->task_gpu_ns;
5630 }
5631
5632 if (infov2) {
5633 infov2->task_ptime = task->total_ptime;
5634 infov2->task_pset_switches = task->ps_switch;
5635 }
5636
5637 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5638 uint64_t tval;
5639 spl_t x;
5640
5641 if (thread->options & TH_OPT_IDLE_THREAD) {
5642 continue;
5643 }
5644
5645 x = splsched();
5646 thread_lock(thread);
5647
5648 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
5649 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
5650
5651 #if defined(__arm__) || defined(__arm64__)
5652 if (infov2) {
5653 infov2->task_energy += ml_energy_stat(thread);
5654 }
5655 #endif /* defined(__arm__) || defined(__arm64__) */
5656
5657 tval = timer_grab(&thread->user_timer);
5658 info->total_user += tval;
5659
5660 if (infov2) {
5661 tval = timer_grab(&thread->ptime);
5662 infov2->task_ptime += tval;
5663 infov2->task_pset_switches += thread->ps_switch;
5664 }
5665
5666 tval = timer_grab(&thread->system_timer);
5667 if (thread->precise_user_kernel_time) {
5668 info->total_system += tval;
5669 } else {
5670 /* system_timer may represent either sys or user */
5671 info->total_user += tval;
5672 }
5673
5674 tval = timer_grab(&thread->runnable_timer);
5675
5676 runnable_time_sum += tval;
5677
5678 if (ginfo) {
5679 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
5680 }
5681 thread_unlock(thread);
5682 splx(x);
5683 }
5684
5685 if (runnable_time) {
5686 *runnable_time = runnable_time_sum;
5687 }
5688 }
5689
5690 /*
5691 * task_gpu_utilisation
5692 *
5693 * Returns the total gpu time used by the all the threads of the task
5694 * (both dead and alive)
5695 */
5696 uint64_t
5697 task_gpu_utilisation(
5698 task_t task)
5699 {
5700 uint64_t gpu_time = 0;
5701 #if defined(__x86_64__)
5702 thread_t thread;
5703
5704 task_lock(task);
5705 gpu_time += task->task_gpu_ns;
5706
5707 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5708 spl_t x;
5709 x = splsched();
5710 thread_lock(thread);
5711 gpu_time += ml_gpu_stat(thread);
5712 thread_unlock(thread);
5713 splx(x);
5714 }
5715
5716 task_unlock(task);
5717 #else /* defined(__x86_64__) */
5718 /* silence compiler warning */
5719 (void)task;
5720 #endif /* defined(__x86_64__) */
5721 return gpu_time;
5722 }
5723
5724 /*
5725 * task_energy
5726 *
5727 * Returns the total energy used by the all the threads of the task
5728 * (both dead and alive)
5729 */
5730 uint64_t
5731 task_energy(
5732 task_t task)
5733 {
5734 uint64_t energy = 0;
5735 thread_t thread;
5736
5737 task_lock(task);
5738 energy += task->task_energy;
5739
5740 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5741 spl_t x;
5742 x = splsched();
5743 thread_lock(thread);
5744 energy += ml_energy_stat(thread);
5745 thread_unlock(thread);
5746 splx(x);
5747 }
5748
5749 task_unlock(task);
5750 return energy;
5751 }
5752
5753 #if __AMP__
5754
5755 uint64_t
5756 task_cpu_ptime(
5757 task_t task)
5758 {
5759 uint64_t cpu_ptime = 0;
5760 thread_t thread;
5761
5762 task_lock(task);
5763 cpu_ptime += task->total_ptime;
5764
5765 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5766 cpu_ptime += timer_grab(&thread->ptime);
5767 }
5768
5769 task_unlock(task);
5770 return cpu_ptime;
5771 }
5772
5773 #else /* __AMP__ */
5774
5775 uint64_t
5776 task_cpu_ptime(
5777 __unused task_t task)
5778 {
5779 return 0;
5780 }
5781
5782 #endif /* __AMP__ */
5783
5784 /* This function updates the cpu time in the arrays for each
5785 * effective and requested QoS class
5786 */
5787 void
5788 task_update_cpu_time_qos_stats(
5789 task_t task,
5790 uint64_t *eqos_stats,
5791 uint64_t *rqos_stats)
5792 {
5793 if (!eqos_stats && !rqos_stats) {
5794 return;
5795 }
5796
5797 task_lock(task);
5798 thread_t thread;
5799 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5800 if (thread->options & TH_OPT_IDLE_THREAD) {
5801 continue;
5802 }
5803
5804 thread_update_qos_cpu_time(thread);
5805 }
5806
5807 if (eqos_stats) {
5808 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
5809 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
5810 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
5811 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
5812 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
5813 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
5814 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
5815 }
5816
5817 if (rqos_stats) {
5818 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
5819 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
5820 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
5821 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
5822 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
5823 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
5824 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
5825 }
5826
5827 task_unlock(task);
5828 }
5829
5830 kern_return_t
5831 task_purgable_info(
5832 task_t task,
5833 task_purgable_info_t *stats)
5834 {
5835 if (task == TASK_NULL || stats == NULL) {
5836 return KERN_INVALID_ARGUMENT;
5837 }
5838 /* Take task reference */
5839 task_reference(task);
5840 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
5841 /* Drop task reference */
5842 task_deallocate(task);
5843 return KERN_SUCCESS;
5844 }
5845
5846 void
5847 task_vtimer_set(
5848 task_t task,
5849 integer_t which)
5850 {
5851 thread_t thread;
5852 spl_t x;
5853
5854 task_lock(task);
5855
5856 task->vtimers |= which;
5857
5858 switch (which) {
5859 case TASK_VTIMER_USER:
5860 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5861 x = splsched();
5862 thread_lock(thread);
5863 if (thread->precise_user_kernel_time) {
5864 thread->vtimer_user_save = timer_grab(&thread->user_timer);
5865 } else {
5866 thread->vtimer_user_save = timer_grab(&thread->system_timer);
5867 }
5868 thread_unlock(thread);
5869 splx(x);
5870 }
5871 break;
5872
5873 case TASK_VTIMER_PROF:
5874 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5875 x = splsched();
5876 thread_lock(thread);
5877 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
5878 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
5879 thread_unlock(thread);
5880 splx(x);
5881 }
5882 break;
5883
5884 case TASK_VTIMER_RLIM:
5885 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5886 x = splsched();
5887 thread_lock(thread);
5888 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
5889 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
5890 thread_unlock(thread);
5891 splx(x);
5892 }
5893 break;
5894 }
5895
5896 task_unlock(task);
5897 }
5898
5899 void
5900 task_vtimer_clear(
5901 task_t task,
5902 integer_t which)
5903 {
5904 assert(task == current_task());
5905
5906 task_lock(task);
5907
5908 task->vtimers &= ~which;
5909
5910 task_unlock(task);
5911 }
5912
5913 void
5914 task_vtimer_update(
5915 __unused
5916 task_t task,
5917 integer_t which,
5918 uint32_t *microsecs)
5919 {
5920 thread_t thread = current_thread();
5921 uint32_t tdelt = 0;
5922 clock_sec_t secs = 0;
5923 uint64_t tsum;
5924
5925 assert(task == current_task());
5926
5927 spl_t s = splsched();
5928 thread_lock(thread);
5929
5930 if ((task->vtimers & which) != (uint32_t)which) {
5931 thread_unlock(thread);
5932 splx(s);
5933 return;
5934 }
5935
5936 switch (which) {
5937 case TASK_VTIMER_USER:
5938 if (thread->precise_user_kernel_time) {
5939 tdelt = (uint32_t)timer_delta(&thread->user_timer,
5940 &thread->vtimer_user_save);
5941 } else {
5942 tdelt = (uint32_t)timer_delta(&thread->system_timer,
5943 &thread->vtimer_user_save);
5944 }
5945 absolutetime_to_microtime(tdelt, &secs, microsecs);
5946 break;
5947
5948 case TASK_VTIMER_PROF:
5949 tsum = timer_grab(&thread->user_timer);
5950 tsum += timer_grab(&thread->system_timer);
5951 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
5952 absolutetime_to_microtime(tdelt, &secs, microsecs);
5953 /* if the time delta is smaller than a usec, ignore */
5954 if (*microsecs != 0) {
5955 thread->vtimer_prof_save = tsum;
5956 }
5957 break;
5958
5959 case TASK_VTIMER_RLIM:
5960 tsum = timer_grab(&thread->user_timer);
5961 tsum += timer_grab(&thread->system_timer);
5962 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
5963 thread->vtimer_rlim_save = tsum;
5964 absolutetime_to_microtime(tdelt, &secs, microsecs);
5965 break;
5966 }
5967
5968 thread_unlock(thread);
5969 splx(s);
5970 }
5971
5972 /*
5973 * task_assign:
5974 *
5975 * Change the assigned processor set for the task
5976 */
5977 kern_return_t
5978 task_assign(
5979 __unused task_t task,
5980 __unused processor_set_t new_pset,
5981 __unused boolean_t assign_threads)
5982 {
5983 return KERN_FAILURE;
5984 }
5985
5986 /*
5987 * task_assign_default:
5988 *
5989 * Version of task_assign to assign to default processor set.
5990 */
5991 kern_return_t
5992 task_assign_default(
5993 task_t task,
5994 boolean_t assign_threads)
5995 {
5996 return task_assign(task, &pset0, assign_threads);
5997 }
5998
5999 /*
6000 * task_get_assignment
6001 *
6002 * Return name of processor set that task is assigned to.
6003 */
6004 kern_return_t
6005 task_get_assignment(
6006 task_t task,
6007 processor_set_t *pset)
6008 {
6009 if (!task || !task->active) {
6010 return KERN_FAILURE;
6011 }
6012
6013 *pset = &pset0;
6014
6015 return KERN_SUCCESS;
6016 }
6017
6018 uint64_t
6019 get_task_dispatchqueue_offset(
6020 task_t task)
6021 {
6022 return task->dispatchqueue_offset;
6023 }
6024
6025 /*
6026 * task_policy
6027 *
6028 * Set scheduling policy and parameters, both base and limit, for
6029 * the given task. Policy must be a policy which is enabled for the
6030 * processor set. Change contained threads if requested.
6031 */
6032 kern_return_t
6033 task_policy(
6034 __unused task_t task,
6035 __unused policy_t policy_id,
6036 __unused policy_base_t base,
6037 __unused mach_msg_type_number_t count,
6038 __unused boolean_t set_limit,
6039 __unused boolean_t change)
6040 {
6041 return KERN_FAILURE;
6042 }
6043
6044 /*
6045 * task_set_policy
6046 *
6047 * Set scheduling policy and parameters, both base and limit, for
6048 * the given task. Policy can be any policy implemented by the
6049 * processor set, whether enabled or not. Change contained threads
6050 * if requested.
6051 */
6052 kern_return_t
6053 task_set_policy(
6054 __unused task_t task,
6055 __unused processor_set_t pset,
6056 __unused policy_t policy_id,
6057 __unused policy_base_t base,
6058 __unused mach_msg_type_number_t base_count,
6059 __unused policy_limit_t limit,
6060 __unused mach_msg_type_number_t limit_count,
6061 __unused boolean_t change)
6062 {
6063 return KERN_FAILURE;
6064 }
6065
6066 kern_return_t
6067 task_set_ras_pc(
6068 __unused task_t task,
6069 __unused vm_offset_t pc,
6070 __unused vm_offset_t endpc)
6071 {
6072 return KERN_FAILURE;
6073 }
6074
6075 void
6076 task_synchronizer_destroy_all(task_t task)
6077 {
6078 /*
6079 * Destroy owned semaphores
6080 */
6081 semaphore_destroy_all(task);
6082 }
6083
6084 /*
6085 * Install default (machine-dependent) initial thread state
6086 * on the task. Subsequent thread creation will have this initial
6087 * state set on the thread by machine_thread_inherit_taskwide().
6088 * Flavors and structures are exactly the same as those to thread_set_state()
6089 */
6090 kern_return_t
6091 task_set_state(
6092 task_t task,
6093 int flavor,
6094 thread_state_t state,
6095 mach_msg_type_number_t state_count)
6096 {
6097 kern_return_t ret;
6098
6099 if (task == TASK_NULL) {
6100 return KERN_INVALID_ARGUMENT;
6101 }
6102
6103 task_lock(task);
6104
6105 if (!task->active) {
6106 task_unlock(task);
6107 return KERN_FAILURE;
6108 }
6109
6110 ret = machine_task_set_state(task, flavor, state, state_count);
6111
6112 task_unlock(task);
6113 return ret;
6114 }
6115
6116 /*
6117 * Examine the default (machine-dependent) initial thread state
6118 * on the task, as set by task_set_state(). Flavors and structures
6119 * are exactly the same as those passed to thread_get_state().
6120 */
6121 kern_return_t
6122 task_get_state(
6123 task_t task,
6124 int flavor,
6125 thread_state_t state,
6126 mach_msg_type_number_t *state_count)
6127 {
6128 kern_return_t ret;
6129
6130 if (task == TASK_NULL) {
6131 return KERN_INVALID_ARGUMENT;
6132 }
6133
6134 task_lock(task);
6135
6136 if (!task->active) {
6137 task_unlock(task);
6138 return KERN_FAILURE;
6139 }
6140
6141 ret = machine_task_get_state(task, flavor, state, state_count);
6142
6143 task_unlock(task);
6144 return ret;
6145 }
6146
6147
6148 static kern_return_t __attribute__((noinline, not_tail_called))
6149 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
6150 mach_exception_code_t code,
6151 mach_exception_subcode_t subcode,
6152 void *reason)
6153 {
6154 #ifdef MACH_BSD
6155 if (1 == proc_selfpid()) {
6156 return KERN_NOT_SUPPORTED; // initproc is immune
6157 }
6158 #endif
6159 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
6160 [0] = code,
6161 [1] = subcode,
6162 };
6163 task_t task = current_task();
6164 kern_return_t kr;
6165
6166 /* (See jetsam-related comments below) */
6167
6168 proc_memstat_terminated(task->bsd_info, TRUE);
6169 kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
6170 proc_memstat_terminated(task->bsd_info, FALSE);
6171 return kr;
6172 }
6173
6174 kern_return_t
6175 task_violated_guard(
6176 mach_exception_code_t code,
6177 mach_exception_subcode_t subcode,
6178 void *reason)
6179 {
6180 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
6181 }
6182
6183
6184 #if CONFIG_MEMORYSTATUS
6185
6186 boolean_t
6187 task_get_memlimit_is_active(task_t task)
6188 {
6189 assert(task != NULL);
6190
6191 if (task->memlimit_is_active == 1) {
6192 return TRUE;
6193 } else {
6194 return FALSE;
6195 }
6196 }
6197
6198 void
6199 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
6200 {
6201 assert(task != NULL);
6202
6203 if (memlimit_is_active) {
6204 task->memlimit_is_active = 1;
6205 } else {
6206 task->memlimit_is_active = 0;
6207 }
6208 }
6209
6210 boolean_t
6211 task_get_memlimit_is_fatal(task_t task)
6212 {
6213 assert(task != NULL);
6214
6215 if (task->memlimit_is_fatal == 1) {
6216 return TRUE;
6217 } else {
6218 return FALSE;
6219 }
6220 }
6221
6222 void
6223 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
6224 {
6225 assert(task != NULL);
6226
6227 if (memlimit_is_fatal) {
6228 task->memlimit_is_fatal = 1;
6229 } else {
6230 task->memlimit_is_fatal = 0;
6231 }
6232 }
6233
6234 boolean_t
6235 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6236 {
6237 boolean_t triggered = FALSE;
6238
6239 assert(task == current_task());
6240
6241 /*
6242 * Returns true, if task has already triggered an exc_resource exception.
6243 */
6244
6245 if (memlimit_is_active) {
6246 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
6247 } else {
6248 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
6249 }
6250
6251 return triggered;
6252 }
6253
6254 void
6255 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6256 {
6257 assert(task == current_task());
6258
6259 /*
6260 * We allow one exc_resource per process per active/inactive limit.
6261 * The limit's fatal attribute does not come into play.
6262 */
6263
6264 if (memlimit_is_active) {
6265 task->memlimit_active_exc_resource = 1;
6266 } else {
6267 task->memlimit_inactive_exc_resource = 1;
6268 }
6269 }
6270
6271 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
6272
6273 void __attribute__((noinline))
6274 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
6275 {
6276 task_t task = current_task();
6277 int pid = 0;
6278 const char *procname = "unknown";
6279 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
6280 boolean_t send_sync_exc_resource = FALSE;
6281
6282 #ifdef MACH_BSD
6283 pid = proc_selfpid();
6284
6285 if (pid == 1) {
6286 /*
6287 * Cannot have ReportCrash analyzing
6288 * a suspended initproc.
6289 */
6290 return;
6291 }
6292
6293 if (task->bsd_info != NULL) {
6294 procname = proc_name_address(current_task()->bsd_info);
6295 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
6296 }
6297 #endif
6298 #if CONFIG_COREDUMP
6299 if (hwm_user_cores) {
6300 int error;
6301 uint64_t starttime, end;
6302 clock_sec_t secs = 0;
6303 uint32_t microsecs = 0;
6304
6305 starttime = mach_absolute_time();
6306 /*
6307 * Trigger a coredump of this process. Don't proceed unless we know we won't
6308 * be filling up the disk; and ignore the core size resource limit for this
6309 * core file.
6310 */
6311 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
6312 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
6313 }
6314 /*
6315 * coredump() leaves the task suspended.
6316 */
6317 task_resume_internal(current_task());
6318
6319 end = mach_absolute_time();
6320 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
6321 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
6322 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
6323 }
6324 #endif /* CONFIG_COREDUMP */
6325
6326 if (disable_exc_resource) {
6327 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6328 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
6329 return;
6330 }
6331
6332 /*
6333 * A task that has triggered an EXC_RESOURCE, should not be
6334 * jetsammed when the device is under memory pressure. Here
6335 * we set the P_MEMSTAT_TERMINATED flag so that the process
6336 * will be skipped if the memorystatus_thread wakes up.
6337 */
6338 proc_memstat_terminated(current_task()->bsd_info, TRUE);
6339
6340 code[0] = code[1] = 0;
6341 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
6342 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
6343 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
6344
6345 /*
6346 * Do not generate a corpse fork if the violation is a fatal one
6347 * or the process wants synchronous EXC_RESOURCE exceptions.
6348 */
6349 if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) {
6350 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
6351 if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) {
6352 /*
6353 * Use the _internal_ variant so that no user-space
6354 * process can resume our task from under us.
6355 */
6356 task_suspend_internal(task);
6357 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6358 task_resume_internal(task);
6359 }
6360 } else {
6361 if (audio_active) {
6362 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6363 "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
6364 } else {
6365 task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
6366 code, EXCEPTION_CODE_MAX, NULL);
6367 }
6368 }
6369
6370 /*
6371 * After the EXC_RESOURCE has been handled, we must clear the
6372 * P_MEMSTAT_TERMINATED flag so that the process can again be
6373 * considered for jetsam if the memorystatus_thread wakes up.
6374 */
6375 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
6376 }
6377
6378 /*
6379 * Callback invoked when a task exceeds its physical footprint limit.
6380 */
6381 void
6382 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6383 {
6384 ledger_amount_t max_footprint, max_footprint_mb;
6385 task_t task;
6386 boolean_t is_warning;
6387 boolean_t memlimit_is_active;
6388 boolean_t memlimit_is_fatal;
6389
6390 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
6391 /*
6392 * Task memory limits only provide a warning on the way up.
6393 */
6394 return;
6395 } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6396 /*
6397 * This task is in danger of violating a memory limit,
6398 * It has exceeded a percentage level of the limit.
6399 */
6400 is_warning = TRUE;
6401 } else {
6402 /*
6403 * The task has exceeded the physical footprint limit.
6404 * This is not a warning but a true limit violation.
6405 */
6406 is_warning = FALSE;
6407 }
6408
6409 task = current_task();
6410
6411 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
6412 max_footprint_mb = max_footprint >> 20;
6413
6414 memlimit_is_active = task_get_memlimit_is_active(task);
6415 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6416
6417 /*
6418 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
6419 * We only generate the exception once per process per memlimit (active/inactive limit).
6420 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
6421 * and we disable it by marking that memlimit as exception triggered.
6422 */
6423 if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
6424 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
6425 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
6426 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
6427 }
6428
6429 memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
6430 }
6431
6432 extern int proc_check_footprint_priv(void);
6433
6434 kern_return_t
6435 task_set_phys_footprint_limit(
6436 task_t task,
6437 int new_limit_mb,
6438 int *old_limit_mb)
6439 {
6440 kern_return_t error;
6441
6442 boolean_t memlimit_is_active;
6443 boolean_t memlimit_is_fatal;
6444
6445 if ((error = proc_check_footprint_priv())) {
6446 return KERN_NO_ACCESS;
6447 }
6448
6449 /*
6450 * This call should probably be obsoleted.
6451 * But for now, we default to current state.
6452 */
6453 memlimit_is_active = task_get_memlimit_is_active(task);
6454 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6455
6456 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
6457 }
6458
6459 kern_return_t
6460 task_convert_phys_footprint_limit(
6461 int limit_mb,
6462 int *converted_limit_mb)
6463 {
6464 if (limit_mb == -1) {
6465 /*
6466 * No limit
6467 */
6468 if (max_task_footprint != 0) {
6469 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
6470 } else {
6471 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
6472 }
6473 } else {
6474 /* nothing to convert */
6475 *converted_limit_mb = limit_mb;
6476 }
6477 return KERN_SUCCESS;
6478 }
6479
6480
6481 kern_return_t
6482 task_set_phys_footprint_limit_internal(
6483 task_t task,
6484 int new_limit_mb,
6485 int *old_limit_mb,
6486 boolean_t memlimit_is_active,
6487 boolean_t memlimit_is_fatal)
6488 {
6489 ledger_amount_t old;
6490 kern_return_t ret;
6491
6492 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
6493
6494 if (ret != KERN_SUCCESS) {
6495 return ret;
6496 }
6497
6498 /*
6499 * Check that limit >> 20 will not give an "unexpected" 32-bit
6500 * result. There are, however, implicit assumptions that -1 mb limit
6501 * equates to LEDGER_LIMIT_INFINITY.
6502 */
6503 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
6504
6505 if (old_limit_mb) {
6506 *old_limit_mb = (int)(old >> 20);
6507 }
6508
6509 if (new_limit_mb == -1) {
6510 /*
6511 * Caller wishes to remove the limit.
6512 */
6513 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6514 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
6515 max_task_footprint ? (uint8_t)max_task_footprint_warning_level : 0);
6516
6517 task_lock(task);
6518 task_set_memlimit_is_active(task, memlimit_is_active);
6519 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6520 task_unlock(task);
6521
6522 return KERN_SUCCESS;
6523 }
6524
6525 #ifdef CONFIG_NOMONITORS
6526 return KERN_SUCCESS;
6527 #endif /* CONFIG_NOMONITORS */
6528
6529 task_lock(task);
6530
6531 if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
6532 (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
6533 (((ledger_amount_t)new_limit_mb << 20) == old)) {
6534 /*
6535 * memlimit state is not changing
6536 */
6537 task_unlock(task);
6538 return KERN_SUCCESS;
6539 }
6540
6541 task_set_memlimit_is_active(task, memlimit_is_active);
6542 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6543
6544 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6545 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
6546
6547 if (task == current_task()) {
6548 ledger_check_new_balance(current_thread(), task->ledger,
6549 task_ledgers.phys_footprint);
6550 }
6551
6552 task_unlock(task);
6553
6554 return KERN_SUCCESS;
6555 }
6556
6557 kern_return_t
6558 task_get_phys_footprint_limit(
6559 task_t task,
6560 int *limit_mb)
6561 {
6562 ledger_amount_t limit;
6563 kern_return_t ret;
6564
6565 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
6566 if (ret != KERN_SUCCESS) {
6567 return ret;
6568 }
6569
6570 /*
6571 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
6572 * result. There are, however, implicit assumptions that -1 mb limit
6573 * equates to LEDGER_LIMIT_INFINITY.
6574 */
6575 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
6576 *limit_mb = (int)(limit >> 20);
6577
6578 return KERN_SUCCESS;
6579 }
6580 #else /* CONFIG_MEMORYSTATUS */
6581 kern_return_t
6582 task_set_phys_footprint_limit(
6583 __unused task_t task,
6584 __unused int new_limit_mb,
6585 __unused int *old_limit_mb)
6586 {
6587 return KERN_FAILURE;
6588 }
6589
6590 kern_return_t
6591 task_get_phys_footprint_limit(
6592 __unused task_t task,
6593 __unused int *limit_mb)
6594 {
6595 return KERN_FAILURE;
6596 }
6597 #endif /* CONFIG_MEMORYSTATUS */
6598
6599 void
6600 task_set_thread_limit(task_t task, uint16_t thread_limit)
6601 {
6602 assert(task != kernel_task);
6603 if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
6604 task_lock(task);
6605 task->task_thread_limit = thread_limit;
6606 task_unlock(task);
6607 }
6608 }
6609
6610 #if XNU_TARGET_OS_OSX
6611 boolean_t
6612 task_has_system_version_compat_enabled(task_t task)
6613 {
6614 boolean_t enabled = FALSE;
6615
6616 task_lock(task);
6617 enabled = (task->t_flags & TF_SYS_VERSION_COMPAT);
6618 task_unlock(task);
6619
6620 return enabled;
6621 }
6622
6623 void
6624 task_set_system_version_compat_enabled(task_t task, boolean_t enable_system_version_compat)
6625 {
6626 assert(task == current_task());
6627 assert(task != kernel_task);
6628
6629 task_lock(task);
6630 if (enable_system_version_compat) {
6631 task->t_flags |= TF_SYS_VERSION_COMPAT;
6632 } else {
6633 task->t_flags &= ~TF_SYS_VERSION_COMPAT;
6634 }
6635 task_unlock(task);
6636 }
6637 #endif /* XNU_TARGET_OS_OSX */
6638
6639 /*
6640 * We need to export some functions to other components that
6641 * are currently implemented in macros within the osfmk
6642 * component. Just export them as functions of the same name.
6643 */
6644 boolean_t
6645 is_kerneltask(task_t t)
6646 {
6647 if (t == kernel_task) {
6648 return TRUE;
6649 }
6650
6651 return FALSE;
6652 }
6653
6654 boolean_t
6655 is_corpsetask(task_t t)
6656 {
6657 return task_is_a_corpse(t);
6658 }
6659
6660 #undef current_task
6661 task_t current_task(void);
6662 task_t
6663 current_task(void)
6664 {
6665 return current_task_fast();
6666 }
6667
6668 #undef task_reference
6669 void task_reference(task_t task);
6670 void
6671 task_reference(
6672 task_t task)
6673 {
6674 if (task != TASK_NULL) {
6675 task_reference_internal(task);
6676 }
6677 }
6678
6679 /* defined in bsd/kern/kern_prot.c */
6680 extern int get_audit_token_pid(audit_token_t *audit_token);
6681
6682 int
6683 task_pid(task_t task)
6684 {
6685 if (task) {
6686 return get_audit_token_pid(&task->audit_token);
6687 }
6688 return -1;
6689 }
6690
6691 #if __has_feature(ptrauth_calls)
6692 /*
6693 * Get the shared region id and jop signing key for the task.
6694 * The function will allocate a kalloc buffer and return
6695 * it to caller, the caller needs to free it. This is used
6696 * for getting the information via task port.
6697 */
6698 char *
6699 task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid)
6700 {
6701 size_t len;
6702 char *shared_region_id = NULL;
6703
6704 task_lock(task);
6705 if (task->shared_region_id == NULL) {
6706 task_unlock(task);
6707 return NULL;
6708 }
6709 len = strlen(task->shared_region_id) + 1;
6710
6711 /* don't hold task lock while allocating */
6712 task_unlock(task);
6713 shared_region_id = kheap_alloc(KHEAP_DATA_BUFFERS, len, Z_WAITOK);
6714 task_lock(task);
6715
6716 if (task->shared_region_id == NULL) {
6717 task_unlock(task);
6718 kheap_free(KHEAP_DATA_BUFFERS, shared_region_id, len);
6719 return NULL;
6720 }
6721 assert(len == strlen(task->shared_region_id) + 1); /* should never change */
6722 strlcpy(shared_region_id, task->shared_region_id, len);
6723 task_unlock(task);
6724
6725 /* find key from its auth pager */
6726 if (jop_pid != NULL) {
6727 *jop_pid = shared_region_find_key(shared_region_id);
6728 }
6729
6730 return shared_region_id;
6731 }
6732
6733 /*
6734 * set the shared region id for a task
6735 */
6736 void
6737 task_set_shared_region_id(task_t task, char *id)
6738 {
6739 char *old_id;
6740
6741 task_lock(task);
6742 old_id = task->shared_region_id;
6743 task->shared_region_id = id;
6744 task->shared_region_auth_remapped = FALSE;
6745 task_unlock(task);
6746
6747 /* free any pre-existing shared region id */
6748 if (old_id != NULL) {
6749 shared_region_key_dealloc(old_id);
6750 kheap_free(KHEAP_DATA_BUFFERS, old_id, strlen(old_id) + 1);
6751 }
6752 }
6753 #endif /* __has_feature(ptrauth_calls) */
6754
6755 /*
6756 * This routine finds a thread in a task by its unique id
6757 * Returns a referenced thread or THREAD_NULL if the thread was not found
6758 *
6759 * TODO: This is super inefficient - it's an O(threads in task) list walk!
6760 * We should make a tid hash, or transition all tid clients to thread ports
6761 *
6762 * Precondition: No locks held (will take task lock)
6763 */
6764 thread_t
6765 task_findtid(task_t task, uint64_t tid)
6766 {
6767 thread_t self = current_thread();
6768 thread_t found_thread = THREAD_NULL;
6769 thread_t iter_thread = THREAD_NULL;
6770
6771 /* Short-circuit the lookup if we're looking up ourselves */
6772 if (tid == self->thread_id || tid == TID_NULL) {
6773 assert(self->task == task);
6774
6775 thread_reference(self);
6776
6777 return self;
6778 }
6779
6780 task_lock(task);
6781
6782 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
6783 if (iter_thread->thread_id == tid) {
6784 found_thread = iter_thread;
6785 thread_reference(found_thread);
6786 break;
6787 }
6788 }
6789
6790 task_unlock(task);
6791
6792 return found_thread;
6793 }
6794
6795 int
6796 pid_from_task(task_t task)
6797 {
6798 int pid = -1;
6799
6800 if (task->bsd_info) {
6801 pid = proc_pid(task->bsd_info);
6802 } else {
6803 pid = task_pid(task);
6804 }
6805
6806 return pid;
6807 }
6808
6809 /*
6810 * Control the CPU usage monitor for a task.
6811 */
6812 kern_return_t
6813 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
6814 {
6815 int error = KERN_SUCCESS;
6816
6817 if (*flags & CPUMON_MAKE_FATAL) {
6818 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
6819 } else {
6820 error = KERN_INVALID_ARGUMENT;
6821 }
6822
6823 return error;
6824 }
6825
6826 /*
6827 * Control the wakeups monitor for a task.
6828 */
6829 kern_return_t
6830 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
6831 {
6832 ledger_t ledger = task->ledger;
6833
6834 task_lock(task);
6835 if (*flags & WAKEMON_GET_PARAMS) {
6836 ledger_amount_t limit;
6837 uint64_t period;
6838
6839 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
6840 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
6841
6842 if (limit != LEDGER_LIMIT_INFINITY) {
6843 /*
6844 * An active limit means the wakeups monitor is enabled.
6845 */
6846 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
6847 *flags = WAKEMON_ENABLE;
6848 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
6849 *flags |= WAKEMON_MAKE_FATAL;
6850 }
6851 } else {
6852 *flags = WAKEMON_DISABLE;
6853 *rate_hz = -1;
6854 }
6855
6856 /*
6857 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
6858 */
6859 task_unlock(task);
6860 return KERN_SUCCESS;
6861 }
6862
6863 if (*flags & WAKEMON_ENABLE) {
6864 if (*flags & WAKEMON_SET_DEFAULTS) {
6865 *rate_hz = task_wakeups_monitor_rate;
6866 }
6867
6868 #ifndef CONFIG_NOMONITORS
6869 if (*flags & WAKEMON_MAKE_FATAL) {
6870 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6871 }
6872 #endif /* CONFIG_NOMONITORS */
6873
6874 if (*rate_hz <= 0) {
6875 task_unlock(task);
6876 return KERN_INVALID_ARGUMENT;
6877 }
6878
6879 #ifndef CONFIG_NOMONITORS
6880 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
6881 (uint8_t)task_wakeups_monitor_ustackshots_trigger_pct);
6882 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
6883 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
6884 #endif /* CONFIG_NOMONITORS */
6885 } else if (*flags & WAKEMON_DISABLE) {
6886 /*
6887 * Caller wishes to disable wakeups monitor on the task.
6888 *
6889 * Disable telemetry if it was triggered by the wakeups monitor, and
6890 * remove the limit & callback on the wakeups ledger entry.
6891 */
6892 #if CONFIG_TELEMETRY
6893 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
6894 #endif
6895 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
6896 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
6897 }
6898
6899 task_unlock(task);
6900 return KERN_SUCCESS;
6901 }
6902
6903 void
6904 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6905 {
6906 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6907 #if CONFIG_TELEMETRY
6908 /*
6909 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
6910 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
6911 */
6912 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
6913 #endif
6914 return;
6915 }
6916
6917 #if CONFIG_TELEMETRY
6918 /*
6919 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
6920 * exceeded the limit, turn telemetry off for the task.
6921 */
6922 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
6923 #endif
6924
6925 if (warning == 0) {
6926 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
6927 }
6928 }
6929
6930 void __attribute__((noinline))
6931 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
6932 {
6933 task_t task = current_task();
6934 int pid = 0;
6935 const char *procname = "unknown";
6936 boolean_t fatal;
6937 kern_return_t kr;
6938 #ifdef EXC_RESOURCE_MONITORS
6939 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
6940 #endif /* EXC_RESOURCE_MONITORS */
6941 struct ledger_entry_info lei;
6942
6943 #ifdef MACH_BSD
6944 pid = proc_selfpid();
6945 if (task->bsd_info != NULL) {
6946 procname = proc_name_address(current_task()->bsd_info);
6947 }
6948 #endif
6949
6950 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
6951
6952 /*
6953 * Disable the exception notification so we don't overwhelm
6954 * the listener with an endless stream of redundant exceptions.
6955 * TODO: detect whether another thread is already reporting the violation.
6956 */
6957 uint32_t flags = WAKEMON_DISABLE;
6958 task_wakeups_monitor_ctl(task, &flags, NULL);
6959
6960 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6961 trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
6962 os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
6963 "over ~%llu seconds, averaging %llu wakes / second and "
6964 "violating a %slimit of %llu wakes over %llu seconds.\n",
6965 procname, pid,
6966 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
6967 lei.lei_last_refill == 0 ? 0 :
6968 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
6969 fatal ? "FATAL " : "",
6970 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
6971
6972 kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
6973 fatal ? kRNFatalLimitFlag : 0);
6974 if (kr) {
6975 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
6976 }
6977
6978 #ifdef EXC_RESOURCE_MONITORS
6979 if (disable_exc_resource) {
6980 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6981 "supressed by a boot-arg\n", procname, pid);
6982 return;
6983 }
6984 if (audio_active) {
6985 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6986 "supressed due to audio playback\n", procname, pid);
6987 return;
6988 }
6989 if (lei.lei_last_refill == 0) {
6990 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6991 "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
6992 }
6993
6994 code[0] = code[1] = 0;
6995 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
6996 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
6997 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
6998 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
6999 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
7000 lei.lei_last_refill);
7001 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
7002 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
7003 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7004 #endif /* EXC_RESOURCE_MONITORS */
7005
7006 if (fatal) {
7007 task_terminate_internal(task);
7008 }
7009 }
7010
7011 static boolean_t
7012 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
7013 {
7014 int64_t old_count, new_count;
7015 boolean_t needs_telemetry;
7016
7017 do {
7018 new_count = old_count = *global_write_count;
7019 new_count += io_delta;
7020 if (new_count >= io_telemetry_limit) {
7021 new_count = 0;
7022 needs_telemetry = TRUE;
7023 } else {
7024 needs_telemetry = FALSE;
7025 }
7026 } while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
7027 return needs_telemetry;
7028 }
7029
7030 void
7031 task_update_physical_writes(__unused task_t task, __unused task_physical_write_flavor_t flavor, __unused uint64_t io_size, __unused task_balance_flags_t flags)
7032 {
7033 #if CONFIG_PHYS_WRITE_ACCT
7034 if (!io_size) {
7035 return;
7036 }
7037
7038 /*
7039 * task == NULL means that we have to update kernel_task ledgers
7040 */
7041 if (!task) {
7042 task = kernel_task;
7043 }
7044
7045 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PHYS_WRITE_ACCT)) | DBG_FUNC_NONE,
7046 task_pid(task), flavor, io_size, flags, 0);
7047 DTRACE_IO4(physical_writes, struct task *, task, task_physical_write_flavor_t, flavor, uint64_t, io_size, task_balance_flags_t, flags);
7048
7049 if (flags & TASK_BALANCE_CREDIT) {
7050 if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
7051 OSAddAtomic64(io_size, (SInt64 *)&(task->task_fs_metadata_writes));
7052 ledger_credit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
7053 }
7054 } else if (flags & TASK_BALANCE_DEBIT) {
7055 if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
7056 OSAddAtomic64(-1 * io_size, (SInt64 *)&(task->task_fs_metadata_writes));
7057 ledger_debit_nocheck(task->ledger, task_ledgers.fs_metadata_writes, io_size);
7058 }
7059 }
7060 #endif /* CONFIG_PHYS_WRITE_ACCT */
7061 }
7062
7063 void
7064 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
7065 {
7066 int64_t io_delta = 0;
7067 int64_t * global_counter_to_update;
7068 boolean_t needs_telemetry = FALSE;
7069 boolean_t is_external_device = FALSE;
7070 int ledger_to_update = 0;
7071 struct task_writes_counters * writes_counters_to_update;
7072
7073 if ((!task) || (!io_size) || (!vp)) {
7074 return;
7075 }
7076
7077 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
7078 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
7079 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
7080
7081 // Is the drive backing this vnode internal or external to the system?
7082 if (vnode_isonexternalstorage(vp) == false) {
7083 global_counter_to_update = &global_logical_writes_count;
7084 ledger_to_update = task_ledgers.logical_writes;
7085 writes_counters_to_update = &task->task_writes_counters_internal;
7086 is_external_device = FALSE;
7087 } else {
7088 global_counter_to_update = &global_logical_writes_to_external_count;
7089 ledger_to_update = task_ledgers.logical_writes_to_external;
7090 writes_counters_to_update = &task->task_writes_counters_external;
7091 is_external_device = TRUE;
7092 }
7093
7094 switch (flags) {
7095 case TASK_WRITE_IMMEDIATE:
7096 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
7097 ledger_credit(task->ledger, ledger_to_update, io_size);
7098 if (!is_external_device) {
7099 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7100 }
7101 break;
7102 case TASK_WRITE_DEFERRED:
7103 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
7104 ledger_credit(task->ledger, ledger_to_update, io_size);
7105 if (!is_external_device) {
7106 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7107 }
7108 break;
7109 case TASK_WRITE_INVALIDATED:
7110 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
7111 ledger_debit(task->ledger, ledger_to_update, io_size);
7112 if (!is_external_device) {
7113 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
7114 }
7115 break;
7116 case TASK_WRITE_METADATA:
7117 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
7118 ledger_credit(task->ledger, ledger_to_update, io_size);
7119 if (!is_external_device) {
7120 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
7121 }
7122 break;
7123 }
7124
7125 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
7126 if (io_telemetry_limit != 0) {
7127 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
7128 needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
7129 if (needs_telemetry && !is_external_device) {
7130 act_set_io_telemetry_ast(current_thread());
7131 }
7132 }
7133 }
7134
7135 /*
7136 * Control the I/O monitor for a task.
7137 */
7138 kern_return_t
7139 task_io_monitor_ctl(task_t task, uint32_t *flags)
7140 {
7141 ledger_t ledger = task->ledger;
7142
7143 task_lock(task);
7144 if (*flags & IOMON_ENABLE) {
7145 /* Configure the physical I/O ledger */
7146 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
7147 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
7148 } else if (*flags & IOMON_DISABLE) {
7149 /*
7150 * Caller wishes to disable I/O monitor on the task.
7151 */
7152 ledger_disable_refill(ledger, task_ledgers.physical_writes);
7153 ledger_disable_callback(ledger, task_ledgers.physical_writes);
7154 }
7155
7156 task_unlock(task);
7157 return KERN_SUCCESS;
7158 }
7159
7160 void
7161 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
7162 {
7163 if (warning == 0) {
7164 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
7165 }
7166 }
7167
7168 void __attribute__((noinline))
7169 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
7170 {
7171 int pid = 0;
7172 task_t task = current_task();
7173 #ifdef EXC_RESOURCE_MONITORS
7174 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
7175 #endif /* EXC_RESOURCE_MONITORS */
7176 struct ledger_entry_info lei;
7177 kern_return_t kr;
7178
7179 #ifdef MACH_BSD
7180 pid = proc_selfpid();
7181 #endif
7182 /*
7183 * Get the ledger entry info. We need to do this before disabling the exception
7184 * to get correct values for all fields.
7185 */
7186 switch (flavor) {
7187 case FLAVOR_IO_PHYSICAL_WRITES:
7188 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
7189 break;
7190 }
7191
7192
7193 /*
7194 * Disable the exception notification so we don't overwhelm
7195 * the listener with an endless stream of redundant exceptions.
7196 * TODO: detect whether another thread is already reporting the violation.
7197 */
7198 uint32_t flags = IOMON_DISABLE;
7199 task_io_monitor_ctl(task, &flags);
7200
7201 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
7202 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
7203 }
7204 os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
7205 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
7206
7207 kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
7208 if (kr) {
7209 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
7210 }
7211
7212 #ifdef EXC_RESOURCE_MONITORS
7213 code[0] = code[1] = 0;
7214 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
7215 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
7216 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
7217 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
7218 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
7219 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7220 #endif /* EXC_RESOURCE_MONITORS */
7221 }
7222
7223 /* Placeholders for the task set/get voucher interfaces */
7224 kern_return_t
7225 task_get_mach_voucher(
7226 task_t task,
7227 mach_voucher_selector_t __unused which,
7228 ipc_voucher_t *voucher)
7229 {
7230 if (TASK_NULL == task) {
7231 return KERN_INVALID_TASK;
7232 }
7233
7234 *voucher = NULL;
7235 return KERN_SUCCESS;
7236 }
7237
7238 kern_return_t
7239 task_set_mach_voucher(
7240 task_t task,
7241 ipc_voucher_t __unused voucher)
7242 {
7243 if (TASK_NULL == task) {
7244 return KERN_INVALID_TASK;
7245 }
7246
7247 return KERN_SUCCESS;
7248 }
7249
7250 kern_return_t
7251 task_swap_mach_voucher(
7252 __unused task_t task,
7253 __unused ipc_voucher_t new_voucher,
7254 ipc_voucher_t *in_out_old_voucher)
7255 {
7256 /*
7257 * Currently this function is only called from a MIG generated
7258 * routine which doesn't release the reference on the voucher
7259 * addressed by in_out_old_voucher. To avoid leaking this reference,
7260 * a call to release it has been added here.
7261 */
7262 ipc_voucher_release(*in_out_old_voucher);
7263 return KERN_NOT_SUPPORTED;
7264 }
7265
7266 void
7267 task_set_gpu_denied(task_t task, boolean_t denied)
7268 {
7269 task_lock(task);
7270
7271 if (denied) {
7272 task->t_flags |= TF_GPU_DENIED;
7273 } else {
7274 task->t_flags &= ~TF_GPU_DENIED;
7275 }
7276
7277 task_unlock(task);
7278 }
7279
7280 boolean_t
7281 task_is_gpu_denied(task_t task)
7282 {
7283 /* We don't need the lock to read this flag */
7284 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
7285 }
7286
7287
7288 uint64_t
7289 get_task_memory_region_count(task_t task)
7290 {
7291 vm_map_t map;
7292 map = (task == kernel_task) ? kernel_map: task->map;
7293 return (uint64_t)get_map_nentries(map);
7294 }
7295
7296 static void
7297 kdebug_trace_dyld_internal(uint32_t base_code,
7298 struct dyld_kernel_image_info *info)
7299 {
7300 static_assert(sizeof(info->uuid) >= 16);
7301
7302 #if defined(__LP64__)
7303 uint64_t *uuid = (uint64_t *)&(info->uuid);
7304
7305 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
7306 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
7307 uuid[1], info->load_addr,
7308 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
7309 0);
7310 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
7311 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
7312 (uint64_t)info->fsobjid.fid_objno |
7313 ((uint64_t)info->fsobjid.fid_generation << 32),
7314 0, 0, 0, 0);
7315 #else /* defined(__LP64__) */
7316 uint32_t *uuid = (uint32_t *)&(info->uuid);
7317
7318 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
7319 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
7320 uuid[1], uuid[2], uuid[3], 0);
7321 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
7322 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
7323 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
7324 info->fsobjid.fid_objno, 0);
7325 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
7326 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
7327 info->fsobjid.fid_generation, 0, 0, 0, 0);
7328 #endif /* !defined(__LP64__) */
7329 }
7330
7331 static kern_return_t
7332 kdebug_trace_dyld(task_t task, uint32_t base_code,
7333 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
7334 {
7335 kern_return_t kr;
7336 dyld_kernel_image_info_array_t infos;
7337 vm_map_offset_t map_data;
7338 vm_offset_t data;
7339
7340 if (!infos_copy) {
7341 return KERN_INVALID_ADDRESS;
7342 }
7343
7344 if (!kdebug_enable ||
7345 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
7346 vm_map_copy_discard(infos_copy);
7347 return KERN_SUCCESS;
7348 }
7349
7350 if (task == NULL || task != current_task()) {
7351 return KERN_INVALID_TASK;
7352 }
7353
7354 kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
7355 if (kr != KERN_SUCCESS) {
7356 return kr;
7357 }
7358
7359 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
7360
7361 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
7362 kdebug_trace_dyld_internal(base_code, &(infos[i]));
7363 }
7364
7365 data = CAST_DOWN(vm_offset_t, map_data);
7366 mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
7367 return KERN_SUCCESS;
7368 }
7369
7370 kern_return_t
7371 task_register_dyld_image_infos(task_t task,
7372 dyld_kernel_image_info_array_t infos_copy,
7373 mach_msg_type_number_t infos_len)
7374 {
7375 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
7376 (vm_map_copy_t)infos_copy, infos_len);
7377 }
7378
7379 kern_return_t
7380 task_unregister_dyld_image_infos(task_t task,
7381 dyld_kernel_image_info_array_t infos_copy,
7382 mach_msg_type_number_t infos_len)
7383 {
7384 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
7385 (vm_map_copy_t)infos_copy, infos_len);
7386 }
7387
7388 kern_return_t
7389 task_get_dyld_image_infos(__unused task_t task,
7390 __unused dyld_kernel_image_info_array_t * dyld_images,
7391 __unused mach_msg_type_number_t * dyld_imagesCnt)
7392 {
7393 return KERN_NOT_SUPPORTED;
7394 }
7395
7396 kern_return_t
7397 task_register_dyld_shared_cache_image_info(task_t task,
7398 dyld_kernel_image_info_t cache_img,
7399 __unused boolean_t no_cache,
7400 __unused boolean_t private_cache)
7401 {
7402 if (task == NULL || task != current_task()) {
7403 return KERN_INVALID_TASK;
7404 }
7405
7406 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
7407 return KERN_SUCCESS;
7408 }
7409
7410 kern_return_t
7411 task_register_dyld_set_dyld_state(__unused task_t task,
7412 __unused uint8_t dyld_state)
7413 {
7414 return KERN_NOT_SUPPORTED;
7415 }
7416
7417 kern_return_t
7418 task_register_dyld_get_process_state(__unused task_t task,
7419 __unused dyld_kernel_process_info_t * dyld_process_state)
7420 {
7421 return KERN_NOT_SUPPORTED;
7422 }
7423
7424 kern_return_t
7425 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
7426 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
7427 {
7428 #if MONOTONIC
7429 task_t task = (task_t)task_insp;
7430 kern_return_t kr = KERN_SUCCESS;
7431 mach_msg_type_number_t size;
7432
7433 if (task == TASK_NULL) {
7434 return KERN_INVALID_ARGUMENT;
7435 }
7436
7437 size = *size_in_out;
7438
7439 switch (flavor) {
7440 case TASK_INSPECT_BASIC_COUNTS: {
7441 struct task_inspect_basic_counts *bc;
7442 uint64_t task_counts[MT_CORE_NFIXED] = { 0 };
7443
7444 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
7445 kr = KERN_INVALID_ARGUMENT;
7446 break;
7447 }
7448
7449 mt_fixed_task_counts(task, task_counts);
7450 bc = (struct task_inspect_basic_counts *)info_out;
7451 #ifdef MT_CORE_INSTRS
7452 bc->instructions = task_counts[MT_CORE_INSTRS];
7453 #else /* defined(MT_CORE_INSTRS) */
7454 bc->instructions = 0;
7455 #endif /* !defined(MT_CORE_INSTRS) */
7456 bc->cycles = task_counts[MT_CORE_CYCLES];
7457 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
7458 break;
7459 }
7460 default:
7461 kr = KERN_INVALID_ARGUMENT;
7462 break;
7463 }
7464
7465 if (kr == KERN_SUCCESS) {
7466 *size_in_out = size;
7467 }
7468 return kr;
7469 #else /* MONOTONIC */
7470 #pragma unused(task_insp, flavor, info_out, size_in_out)
7471 return KERN_NOT_SUPPORTED;
7472 #endif /* !MONOTONIC */
7473 }
7474
7475 #if CONFIG_SECLUDED_MEMORY
7476 int num_tasks_can_use_secluded_mem = 0;
7477
7478 void
7479 task_set_can_use_secluded_mem(
7480 task_t task,
7481 boolean_t can_use_secluded_mem)
7482 {
7483 if (!task->task_could_use_secluded_mem) {
7484 return;
7485 }
7486 task_lock(task);
7487 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
7488 task_unlock(task);
7489 }
7490
7491 void
7492 task_set_can_use_secluded_mem_locked(
7493 task_t task,
7494 boolean_t can_use_secluded_mem)
7495 {
7496 assert(task->task_could_use_secluded_mem);
7497 if (can_use_secluded_mem &&
7498 secluded_for_apps && /* global boot-arg */
7499 !task->task_can_use_secluded_mem) {
7500 assert(num_tasks_can_use_secluded_mem >= 0);
7501 OSAddAtomic(+1,
7502 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7503 task->task_can_use_secluded_mem = TRUE;
7504 } else if (!can_use_secluded_mem &&
7505 task->task_can_use_secluded_mem) {
7506 assert(num_tasks_can_use_secluded_mem > 0);
7507 OSAddAtomic(-1,
7508 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7509 task->task_can_use_secluded_mem = FALSE;
7510 }
7511 }
7512
7513 void
7514 task_set_could_use_secluded_mem(
7515 task_t task,
7516 boolean_t could_use_secluded_mem)
7517 {
7518 task->task_could_use_secluded_mem = !!could_use_secluded_mem;
7519 }
7520
7521 void
7522 task_set_could_also_use_secluded_mem(
7523 task_t task,
7524 boolean_t could_also_use_secluded_mem)
7525 {
7526 task->task_could_also_use_secluded_mem = !!could_also_use_secluded_mem;
7527 }
7528
7529 boolean_t
7530 task_can_use_secluded_mem(
7531 task_t task,
7532 boolean_t is_alloc)
7533 {
7534 if (task->task_can_use_secluded_mem) {
7535 assert(task->task_could_use_secluded_mem);
7536 assert(num_tasks_can_use_secluded_mem > 0);
7537 return TRUE;
7538 }
7539 if (task->task_could_also_use_secluded_mem &&
7540 num_tasks_can_use_secluded_mem > 0) {
7541 assert(num_tasks_can_use_secluded_mem > 0);
7542 return TRUE;
7543 }
7544
7545 /*
7546 * If a single task is using more than some large amount of
7547 * memory (i.e. secluded_shutoff_trigger) and is approaching
7548 * its task limit, allow it to dip into secluded and begin
7549 * suppression of rebuilding secluded memory until that task exits.
7550 */
7551 if (is_alloc && secluded_shutoff_trigger != 0) {
7552 uint64_t phys_used = get_task_phys_footprint(task);
7553 uint64_t limit = get_task_phys_footprint_limit(task);
7554 if (phys_used > secluded_shutoff_trigger &&
7555 limit > secluded_shutoff_trigger &&
7556 phys_used > limit - secluded_shutoff_headroom) {
7557 start_secluded_suppression(task);
7558 return TRUE;
7559 }
7560 }
7561
7562 return FALSE;
7563 }
7564
7565 boolean_t
7566 task_could_use_secluded_mem(
7567 task_t task)
7568 {
7569 return task->task_could_use_secluded_mem;
7570 }
7571
7572 boolean_t
7573 task_could_also_use_secluded_mem(
7574 task_t task)
7575 {
7576 return task->task_could_also_use_secluded_mem;
7577 }
7578 #endif /* CONFIG_SECLUDED_MEMORY */
7579
7580 queue_head_t *
7581 task_io_user_clients(task_t task)
7582 {
7583 return &task->io_user_clients;
7584 }
7585
7586 void
7587 task_set_message_app_suspended(task_t task, boolean_t enable)
7588 {
7589 task->message_app_suspended = enable;
7590 }
7591
7592 void
7593 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
7594 {
7595 dst_task->vtimers = src_task->vtimers;
7596 }
7597
7598 #if DEVELOPMENT || DEBUG
7599 int vm_region_footprint = 0;
7600 #endif /* DEVELOPMENT || DEBUG */
7601
7602 boolean_t
7603 task_self_region_footprint(void)
7604 {
7605 #if DEVELOPMENT || DEBUG
7606 if (vm_region_footprint) {
7607 /* system-wide override */
7608 return TRUE;
7609 }
7610 #endif /* DEVELOPMENT || DEBUG */
7611 return current_task()->task_region_footprint;
7612 }
7613
7614 void
7615 task_self_region_footprint_set(
7616 boolean_t newval)
7617 {
7618 task_t curtask;
7619
7620 curtask = current_task();
7621 task_lock(curtask);
7622 if (newval) {
7623 curtask->task_region_footprint = TRUE;
7624 } else {
7625 curtask->task_region_footprint = FALSE;
7626 }
7627 task_unlock(curtask);
7628 }
7629
7630 void
7631 task_set_darkwake_mode(task_t task, boolean_t set_mode)
7632 {
7633 assert(task);
7634
7635 task_lock(task);
7636
7637 if (set_mode) {
7638 task->t_flags |= TF_DARKWAKE_MODE;
7639 } else {
7640 task->t_flags &= ~(TF_DARKWAKE_MODE);
7641 }
7642
7643 task_unlock(task);
7644 }
7645
7646 boolean_t
7647 task_get_darkwake_mode(task_t task)
7648 {
7649 assert(task);
7650 return (task->t_flags & TF_DARKWAKE_MODE) != 0;
7651 }
7652
7653 kern_return_t
7654 task_get_exc_guard_behavior(
7655 task_t task,
7656 task_exc_guard_behavior_t *behaviorp)
7657 {
7658 if (task == TASK_NULL) {
7659 return KERN_INVALID_TASK;
7660 }
7661 *behaviorp = task->task_exc_guard;
7662 return KERN_SUCCESS;
7663 }
7664
7665 #ifndef TASK_EXC_GUARD_ALL
7666 /* Temporary define until two branches are merged */
7667 #define TASK_EXC_GUARD_ALL (TASK_EXC_GUARD_VM_ALL | 0xf0)
7668 #endif
7669
7670 kern_return_t
7671 task_set_exc_guard_behavior(
7672 task_t task,
7673 task_exc_guard_behavior_t behavior)
7674 {
7675 if (task == TASK_NULL) {
7676 return KERN_INVALID_TASK;
7677 }
7678 if (behavior & ~TASK_EXC_GUARD_ALL) {
7679 return KERN_INVALID_VALUE;
7680 }
7681 task->task_exc_guard = behavior;
7682 return KERN_SUCCESS;
7683 }
7684
7685 #if __arm64__
7686 extern int legacy_footprint_entitlement_mode;
7687 extern void memorystatus_act_on_legacy_footprint_entitlement(struct proc *, boolean_t);
7688 extern void memorystatus_act_on_ios13extended_footprint_entitlement(struct proc *);
7689
7690
7691 void
7692 task_set_legacy_footprint(
7693 task_t task)
7694 {
7695 task_lock(task);
7696 task->task_legacy_footprint = TRUE;
7697 task_unlock(task);
7698 }
7699
7700 void
7701 task_set_extra_footprint_limit(
7702 task_t task)
7703 {
7704 if (task->task_extra_footprint_limit) {
7705 return;
7706 }
7707 task_lock(task);
7708 if (task->task_extra_footprint_limit) {
7709 task_unlock(task);
7710 return;
7711 }
7712 task->task_extra_footprint_limit = TRUE;
7713 task_unlock(task);
7714 memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
7715 }
7716
7717 void
7718 task_set_ios13extended_footprint_limit(
7719 task_t task)
7720 {
7721 if (task->task_ios13extended_footprint_limit) {
7722 return;
7723 }
7724 task_lock(task);
7725 if (task->task_ios13extended_footprint_limit) {
7726 task_unlock(task);
7727 return;
7728 }
7729 task->task_ios13extended_footprint_limit = TRUE;
7730 task_unlock(task);
7731 memorystatus_act_on_ios13extended_footprint_entitlement(task->bsd_info);
7732 }
7733 #endif /* __arm64__ */
7734
7735 static inline ledger_amount_t
7736 task_ledger_get_balance(
7737 ledger_t ledger,
7738 int ledger_idx)
7739 {
7740 ledger_amount_t amount;
7741 amount = 0;
7742 ledger_get_balance(ledger, ledger_idx, &amount);
7743 return amount;
7744 }
7745
7746 /*
7747 * Gather the amount of memory counted in a task's footprint due to
7748 * being in a specific set of ledgers.
7749 */
7750 void
7751 task_ledgers_footprint(
7752 ledger_t ledger,
7753 ledger_amount_t *ledger_resident,
7754 ledger_amount_t *ledger_compressed)
7755 {
7756 *ledger_resident = 0;
7757 *ledger_compressed = 0;
7758
7759 /* purgeable non-volatile memory */
7760 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
7761 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
7762
7763 /* "default" tagged memory */
7764 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
7765 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
7766
7767 /* "network" currently never counts in the footprint... */
7768
7769 /* "media" tagged memory */
7770 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
7771 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
7772
7773 /* "graphics" tagged memory */
7774 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
7775 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
7776
7777 /* "neural" tagged memory */
7778 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
7779 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
7780 }
7781
7782 void
7783 task_set_memory_ownership_transfer(
7784 task_t task,
7785 boolean_t value)
7786 {
7787 task_lock(task);
7788 task->task_can_transfer_memory_ownership = !!value;
7789 task_unlock(task);
7790 }
7791
7792 void
7793 task_copy_vmobjects(task_t task, vm_object_query_t query, size_t len, size_t *num)
7794 {
7795 vm_object_t find_vmo;
7796 size_t size = 0;
7797
7798 task_objq_lock(task);
7799 if (query != NULL) {
7800 queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
7801 {
7802 vm_object_query_t p = &query[size++];
7803
7804 /* make sure to not overrun */
7805 if (size * sizeof(vm_object_query_data_t) > len) {
7806 --size;
7807 break;
7808 }
7809
7810 bzero(p, sizeof(*p));
7811 p->object_id = (vm_object_id_t) VM_KERNEL_ADDRPERM(find_vmo);
7812 p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
7813 p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
7814 p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
7815 p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
7816 p->vo_no_footprint = find_vmo->vo_no_footprint;
7817 p->vo_ledger_tag = find_vmo->vo_ledger_tag;
7818 p->purgable = find_vmo->purgable;
7819
7820 if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
7821 p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
7822 } else {
7823 p->compressed_size = 0;
7824 }
7825 }
7826 } else {
7827 size = (size_t)task->task_owned_objects;
7828 }
7829 task_objq_unlock(task);
7830
7831 *num = size;
7832 }
7833
7834 void
7835 task_set_filter_msg_flag(
7836 task_t task,
7837 boolean_t flag)
7838 {
7839 assert(task != TASK_NULL);
7840
7841 task_lock(task);
7842 if (flag) {
7843 task->t_flags |= TF_FILTER_MSG;
7844 } else {
7845 task->t_flags &= ~TF_FILTER_MSG;
7846 }
7847 task_unlock(task);
7848 }
7849
7850 boolean_t
7851 task_get_filter_msg_flag(
7852 task_t task)
7853 {
7854 uint32_t flags = 0;
7855
7856 if (!task) {
7857 return false;
7858 }
7859
7860 flags = os_atomic_load(&task->t_flags, relaxed);
7861 return (flags & TF_FILTER_MSG) ? TRUE : FALSE;
7862 }
7863 bool
7864 task_is_exotic(
7865 task_t task)
7866 {
7867 if (task == TASK_NULL) {
7868 return false;
7869 }
7870 return vm_map_is_exotic(get_task_map(task));
7871 }
7872
7873 bool
7874 task_is_alien(
7875 task_t task)
7876 {
7877 if (task == TASK_NULL) {
7878 return false;
7879 }
7880 return vm_map_is_alien(get_task_map(task));
7881 }
7882
7883
7884
7885 #if CONFIG_MACF
7886 /* Set the filter mask for Mach traps. */
7887 void
7888 mac_task_set_mach_filter_mask(task_t task, uint8_t *maskptr)
7889 {
7890 assert(task);
7891
7892 task->mach_trap_filter_mask = maskptr;
7893 }
7894
7895 /* Set the filter mask for kobject msgs. */
7896 void
7897 mac_task_set_kobj_filter_mask(task_t task, uint8_t *maskptr)
7898 {
7899 assert(task);
7900
7901 task->mach_kobj_filter_mask = maskptr;
7902 }
7903
7904 /* Hook for mach trap/sc filter evaluation policy. */
7905 mac_task_mach_filter_cbfunc_t mac_task_mach_trap_evaluate = NULL;
7906
7907 /* Hook for kobj message filter evaluation policy. */
7908 mac_task_kobj_filter_cbfunc_t mac_task_kobj_msg_evaluate = NULL;
7909
7910 /* Set the callback hooks for the filtering policy. */
7911 int
7912 mac_task_register_filter_callbacks(
7913 const mac_task_mach_filter_cbfunc_t mach_cbfunc,
7914 const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)
7915 {
7916 if (mach_cbfunc != NULL) {
7917 if (mac_task_mach_trap_evaluate != NULL) {
7918 return KERN_FAILURE;
7919 }
7920 mac_task_mach_trap_evaluate = mach_cbfunc;
7921 }
7922 if (kobj_cbfunc != NULL) {
7923 if (mac_task_kobj_msg_evaluate != NULL) {
7924 return KERN_FAILURE;
7925 }
7926 mac_task_kobj_msg_evaluate = kobj_cbfunc;
7927 }
7928
7929 return KERN_SUCCESS;
7930 }
7931 #endif /* CONFIG_MACF */
7932
7933 void
7934 task_transfer_mach_filter_bits(
7935 task_t new_task,
7936 task_t old_task)
7937 {
7938 #ifdef CONFIG_MACF
7939 /* Copy mach trap and kernel object mask pointers to new task. */
7940 new_task->mach_trap_filter_mask = old_task->mach_trap_filter_mask;
7941 new_task->mach_kobj_filter_mask = old_task->mach_kobj_filter_mask;
7942 #endif
7943 /* If filter message flag is set then set it in the new task. */
7944 if (task_get_filter_msg_flag(old_task)) {
7945 new_task->t_flags |= TF_FILTER_MSG;
7946 }
7947 }
7948
7949
7950 #if __has_feature(ptrauth_calls)
7951
7952 #define PAC_EXCEPTION_ENTITLEMENT "com.apple.private.pac.exception"
7953
7954 void
7955 task_set_pac_exception_fatal_flag(
7956 task_t task)
7957 {
7958 assert(task != TASK_NULL);
7959
7960 if (!IOTaskHasEntitlement(task, PAC_EXCEPTION_ENTITLEMENT)) {
7961 return;
7962 }
7963
7964 task_lock(task);
7965 task->t_flags |= TF_PAC_EXC_FATAL;
7966 task_unlock(task);
7967 }
7968
7969 bool
7970 task_is_pac_exception_fatal(
7971 task_t task)
7972 {
7973 uint32_t flags = 0;
7974
7975 assert(task != TASK_NULL);
7976
7977 flags = os_atomic_load(&task->t_flags, relaxed);
7978 return (bool)(flags & TF_PAC_EXC_FATAL);
7979 }
7980 #endif /* __has_feature(ptrauth_calls) */
7981
7982 void
7983 task_set_tecs(task_t task)
7984 {
7985 if (task == TASK_NULL) {
7986 task = current_task();
7987 }
7988
7989 if (!machine_csv(CPUVN_CI)) {
7990 return;
7991 }
7992
7993 LCK_MTX_ASSERT(&task->lock, LCK_MTX_ASSERT_NOTOWNED);
7994
7995 task_lock(task);
7996
7997 task->t_flags |= TF_TECS;
7998
7999 thread_t thread;
8000 queue_iterate(&task->threads, thread, thread_t, task_threads) {
8001 machine_tecs(thread);
8002 }
8003 task_unlock(task);
8004 }