]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task.c
98d7250c059e5006633dbba875d5e7a90daf816c
[apple/xnu.git] / osfmk / kern / task.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
99 #include <mach/sdt.h>
100
101 #include <ipc/ipc_importance.h>
102 #include <ipc/ipc_types.h>
103 #include <ipc/ipc_space.h>
104 #include <ipc/ipc_entry.h>
105 #include <ipc/ipc_hash.h>
106
107 #include <kern/kern_types.h>
108 #include <kern/mach_param.h>
109 #include <kern/misc_protos.h>
110 #include <kern/task.h>
111 #include <kern/thread.h>
112 #include <kern/coalition.h>
113 #include <kern/zalloc.h>
114 #include <kern/kalloc.h>
115 #include <kern/kern_cdata.h>
116 #include <kern/processor.h>
117 #include <kern/sched_prim.h> /* for thread_wakeup */
118 #include <kern/ipc_tt.h>
119 #include <kern/host.h>
120 #include <kern/clock.h>
121 #include <kern/timer.h>
122 #include <kern/assert.h>
123 #include <kern/sync_lock.h>
124 #include <kern/affinity.h>
125 #include <kern/exc_resource.h>
126 #include <kern/machine.h>
127 #include <kern/policy_internal.h>
128 #include <kern/restartable.h>
129
130 #include <corpses/task_corpse.h>
131 #if CONFIG_TELEMETRY
132 #include <kern/telemetry.h>
133 #endif
134
135 #if MONOTONIC
136 #include <kern/monotonic.h>
137 #include <machine/monotonic.h>
138 #endif /* MONOTONIC */
139
140 #include <os/log.h>
141
142 #include <vm/pmap.h>
143 #include <vm/vm_map.h>
144 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
145 #include <vm/vm_pageout.h>
146 #include <vm/vm_protos.h>
147 #include <vm/vm_purgeable_internal.h>
148 #include <vm/vm_compressor_pager.h>
149
150 #include <sys/resource.h>
151 #include <sys/signalvar.h> /* for coredump */
152 #include <sys/bsdtask_info.h>
153 /*
154 * Exported interfaces
155 */
156
157 #include <mach/task_server.h>
158 #include <mach/mach_host_server.h>
159 #include <mach/host_security_server.h>
160 #include <mach/mach_port_server.h>
161
162 #include <vm/vm_shared_region.h>
163
164 #include <libkern/OSDebug.h>
165 #include <libkern/OSAtomic.h>
166 #include <libkern/section_keywords.h>
167
168 #include <mach-o/loader.h>
169
170 #if CONFIG_ATM
171 #include <atm/atm_internal.h>
172 #endif
173
174 #include <kern/sfi.h> /* picks up ledger.h */
175
176 #if CONFIG_MACF
177 #include <security/mac_mach_internal.h>
178 #endif
179
180 #if KPERF
181 extern int kpc_force_all_ctrs(task_t, int);
182 #endif
183
184 task_t kernel_task;
185 zone_t task_zone;
186 lck_attr_t task_lck_attr;
187 lck_grp_t task_lck_grp;
188 lck_grp_attr_t task_lck_grp_attr;
189
190 extern int exc_via_corpse_forking;
191 extern int corpse_for_fatal_memkill;
192 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
193
194 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
195 int audio_active = 0;
196
197 zinfo_usage_store_t tasks_tkm_private;
198 zinfo_usage_store_t tasks_tkm_shared;
199
200 /* A container to accumulate statistics for expired tasks */
201 expired_task_statistics_t dead_task_statistics;
202 lck_spin_t dead_task_statistics_lock;
203
204 ledger_template_t task_ledger_template = NULL;
205
206 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
207 {.cpu_time = -1,
208 .tkm_private = -1,
209 .tkm_shared = -1,
210 .phys_mem = -1,
211 .wired_mem = -1,
212 .internal = -1,
213 .iokit_mapped = -1,
214 .alternate_accounting = -1,
215 .alternate_accounting_compressed = -1,
216 .page_table = -1,
217 .phys_footprint = -1,
218 .internal_compressed = -1,
219 .purgeable_volatile = -1,
220 .purgeable_nonvolatile = -1,
221 .purgeable_volatile_compressed = -1,
222 .purgeable_nonvolatile_compressed = -1,
223 .tagged_nofootprint = -1,
224 .tagged_footprint = -1,
225 .tagged_nofootprint_compressed = -1,
226 .tagged_footprint_compressed = -1,
227 .network_volatile = -1,
228 .network_nonvolatile = -1,
229 .network_volatile_compressed = -1,
230 .network_nonvolatile_compressed = -1,
231 .media_nofootprint = -1,
232 .media_footprint = -1,
233 .media_nofootprint_compressed = -1,
234 .media_footprint_compressed = -1,
235 .graphics_nofootprint = -1,
236 .graphics_footprint = -1,
237 .graphics_nofootprint_compressed = -1,
238 .graphics_footprint_compressed = -1,
239 .neural_nofootprint = -1,
240 .neural_footprint = -1,
241 .neural_nofootprint_compressed = -1,
242 .neural_footprint_compressed = -1,
243 .platform_idle_wakeups = -1,
244 .interrupt_wakeups = -1,
245 #if !CONFIG_EMBEDDED
246 .sfi_wait_times = { 0 /* initialized at runtime */},
247 #endif /* !CONFIG_EMBEDDED */
248 .cpu_time_billed_to_me = -1,
249 .cpu_time_billed_to_others = -1,
250 .physical_writes = -1,
251 .logical_writes = -1,
252 .logical_writes_to_external = -1,
253 #if DEBUG || DEVELOPMENT
254 .pages_grabbed = -1,
255 .pages_grabbed_kern = -1,
256 .pages_grabbed_iopl = -1,
257 .pages_grabbed_upl = -1,
258 #endif
259 .energy_billed_to_me = -1,
260 .energy_billed_to_others = -1};
261
262 /* System sleep state */
263 boolean_t tasks_suspend_state;
264
265
266 void init_task_ledgers(void);
267 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
268 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
269 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
270 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
271 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
272 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
273
274 kern_return_t task_suspend_internal(task_t);
275 kern_return_t task_resume_internal(task_t);
276 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
277
278 extern kern_return_t iokit_task_terminate(task_t task);
279 extern void iokit_task_app_suspended_changed(task_t task);
280
281 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
282 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
283 extern kern_return_t thread_resume(thread_t thread);
284
285 // Warn tasks when they hit 80% of their memory limit.
286 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
287
288 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
289 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
290
291 /*
292 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
293 *
294 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
295 * stacktraces, aka micro-stackshots)
296 */
297 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
298
299 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
300 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
301
302 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
303
304 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
305
306 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
307 int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
308 int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
309
310 /* I/O Monitor Limits */
311 #define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
312 #define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
313
314 uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
315 uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
316
317 #define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
318 int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
319 int64_t global_logical_writes_count = 0; /* Global count for logical writes */
320 int64_t global_logical_writes_to_external_count = 0; /* Global count for logical writes to external storage*/
321 static boolean_t global_update_logical_writes(int64_t, int64_t*);
322
323 #define TASK_MAX_THREAD_LIMIT 256
324
325 #if MACH_ASSERT
326 int pmap_ledgers_panic = 1;
327 int pmap_ledgers_panic_leeway = 3;
328 #endif /* MACH_ASSERT */
329
330 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
331
332 #if CONFIG_COREDUMP
333 int hwm_user_cores = 0; /* high watermark violations generate user core files */
334 #endif
335
336 #ifdef MACH_BSD
337 extern uint32_t proc_platform(struct proc *);
338 extern uint32_t proc_sdk(struct proc *);
339 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
340 extern int proc_pid(struct proc *p);
341 extern int proc_selfpid(void);
342 extern struct proc *current_proc(void);
343 extern char *proc_name_address(struct proc *p);
344 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
345 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
346 extern void workq_proc_suspended(struct proc *p);
347 extern void workq_proc_resumed(struct proc *p);
348
349 #if CONFIG_MEMORYSTATUS
350 extern void proc_memstat_terminated(struct proc* p, boolean_t set);
351 extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
352 extern void memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
353 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
354 extern uint64_t memorystatus_available_memory_internal(proc_t p);
355
356 #if DEVELOPMENT || DEBUG
357 extern void memorystatus_abort_vm_map_fork(task_t);
358 #endif
359
360 #endif /* CONFIG_MEMORYSTATUS */
361
362 #endif /* MACH_BSD */
363
364 #if DEVELOPMENT || DEBUG
365 int exc_resource_threads_enabled;
366 #endif /* DEVELOPMENT || DEBUG */
367
368 #if (DEVELOPMENT || DEBUG)
369 uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_MP_CORPSE |
370 TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE | TASK_EXC_GUARD_VM_CORPSE;
371 #else
372 uint32_t task_exc_guard_default = 0;
373 #endif
374
375 /* Forwards */
376
377 static void task_hold_locked(task_t task);
378 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
379 static void task_release_locked(task_t task);
380
381 static void task_synchronizer_destroy_all(task_t task);
382 static os_ref_count_t
383 task_add_turnstile_watchports_locked(
384 task_t task,
385 struct task_watchports *watchports,
386 struct task_watchport_elem **previous_elem_array,
387 ipc_port_t *portwatch_ports,
388 uint32_t portwatch_count);
389
390 static os_ref_count_t
391 task_remove_turnstile_watchports_locked(
392 task_t task,
393 struct task_watchports *watchports,
394 ipc_port_t *port_freelist);
395
396 static struct task_watchports *
397 task_watchports_alloc_init(
398 task_t task,
399 thread_t thread,
400 uint32_t count);
401
402 static void
403 task_watchports_deallocate(
404 struct task_watchports *watchports);
405
406 void
407 task_set_64bit(
408 task_t task,
409 boolean_t is_64bit,
410 boolean_t is_64bit_data)
411 {
412 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
413 thread_t thread;
414 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
415
416 task_lock(task);
417
418 /*
419 * Switching to/from 64-bit address spaces
420 */
421 if (is_64bit) {
422 if (!task_has_64Bit_addr(task)) {
423 task_set_64Bit_addr(task);
424 }
425 } else {
426 if (task_has_64Bit_addr(task)) {
427 task_clear_64Bit_addr(task);
428 }
429 }
430
431 /*
432 * Switching to/from 64-bit register state.
433 */
434 if (is_64bit_data) {
435 if (task_has_64Bit_data(task)) {
436 goto out;
437 }
438
439 task_set_64Bit_data(task);
440 } else {
441 if (!task_has_64Bit_data(task)) {
442 goto out;
443 }
444
445 task_clear_64Bit_data(task);
446 }
447
448 /* FIXME: On x86, the thread save state flavor can diverge from the
449 * task's 64-bit feature flag due to the 32-bit/64-bit register save
450 * state dichotomy. Since we can be pre-empted in this interval,
451 * certain routines may observe the thread as being in an inconsistent
452 * state with respect to its task's 64-bitness.
453 */
454
455 #if defined(__x86_64__) || defined(__arm64__)
456 queue_iterate(&task->threads, thread, thread_t, task_threads) {
457 thread_mtx_lock(thread);
458 machine_thread_switch_addrmode(thread);
459 thread_mtx_unlock(thread);
460
461 #if defined(__arm64__)
462 /* specifically, if running on H9 */
463 if (thread == current_thread()) {
464 uint64_t arg1, arg2;
465 int urgency;
466 spl_t spl = splsched();
467 /*
468 * This call tell that the current thread changed it's 32bitness.
469 * Other thread were no more on core when 32bitness was changed,
470 * but current_thread() is on core and the previous call to
471 * machine_thread_going_on_core() gave 32bitness which is now wrong.
472 *
473 * This is needed for bring-up, a different callback should be used
474 * in the future.
475 *
476 * TODO: Remove this callout when we no longer support 32-bit code on H9
477 */
478 thread_lock(thread);
479 urgency = thread_get_urgency(thread, &arg1, &arg2);
480 machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
481 thread_unlock(thread);
482 splx(spl);
483 }
484 #endif /* defined(__arm64__) */
485 }
486 #endif /* defined(__x86_64__) || defined(__arm64__) */
487
488 out:
489 task_unlock(task);
490 }
491
492 boolean_t
493 task_get_64bit_data(task_t task)
494 {
495 return task_has_64Bit_data(task);
496 }
497
498 void
499 task_set_platform_binary(
500 task_t task,
501 boolean_t is_platform)
502 {
503 task_lock(task);
504 if (is_platform) {
505 task->t_flags |= TF_PLATFORM;
506 /* set exc guard default behavior for first-party code */
507 task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
508 } else {
509 task->t_flags &= ~(TF_PLATFORM);
510 /* set exc guard default behavior for third-party code */
511 task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
512 }
513 task_unlock(task);
514 }
515
516 /*
517 * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
518 * Returns "false" if flag is already set, and "true" in other cases.
519 */
520 bool
521 task_set_ca_client_wi(
522 task_t task,
523 boolean_t set_or_clear)
524 {
525 bool ret = true;
526 task_lock(task);
527 if (set_or_clear) {
528 /* Tasks can have only one CA_CLIENT work interval */
529 if (task->t_flags & TF_CA_CLIENT_WI) {
530 ret = false;
531 } else {
532 task->t_flags |= TF_CA_CLIENT_WI;
533 }
534 } else {
535 task->t_flags &= ~TF_CA_CLIENT_WI;
536 }
537 task_unlock(task);
538 return ret;
539 }
540
541 void
542 task_set_dyld_info(
543 task_t task,
544 mach_vm_address_t addr,
545 mach_vm_size_t size)
546 {
547 task_lock(task);
548 task->all_image_info_addr = addr;
549 task->all_image_info_size = size;
550 task_unlock(task);
551 }
552
553 void
554 task_set_mach_header_address(
555 task_t task,
556 mach_vm_address_t addr)
557 {
558 task_lock(task);
559 task->mach_header_vm_address = addr;
560 task_unlock(task);
561 }
562
563 void
564 task_atm_reset(__unused task_t task)
565 {
566 #if CONFIG_ATM
567 if (task->atm_context != NULL) {
568 atm_task_descriptor_destroy(task->atm_context);
569 task->atm_context = NULL;
570 }
571 #endif
572 }
573
574 void
575 task_bank_reset(__unused task_t task)
576 {
577 if (task->bank_context != NULL) {
578 bank_task_destroy(task);
579 }
580 }
581
582 /*
583 * NOTE: This should only be called when the P_LINTRANSIT
584 * flag is set (the proc_trans lock is held) on the
585 * proc associated with the task.
586 */
587 void
588 task_bank_init(__unused task_t task)
589 {
590 if (task->bank_context != NULL) {
591 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
592 }
593 bank_task_initialize(task);
594 }
595
596 void
597 task_set_did_exec_flag(task_t task)
598 {
599 task->t_procflags |= TPF_DID_EXEC;
600 }
601
602 void
603 task_clear_exec_copy_flag(task_t task)
604 {
605 task->t_procflags &= ~TPF_EXEC_COPY;
606 }
607
608 event_t
609 task_get_return_wait_event(task_t task)
610 {
611 return (event_t)&task->returnwait_inheritor;
612 }
613
614 void
615 task_clear_return_wait(task_t task, uint32_t flags)
616 {
617 if (flags & TCRW_CLEAR_INITIAL_WAIT) {
618 thread_wakeup(task_get_return_wait_event(task));
619 }
620
621 if (flags & TCRW_CLEAR_FINAL_WAIT) {
622 is_write_lock(task->itk_space);
623
624 task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
625 task->returnwait_inheritor = NULL;
626
627 if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
628 struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
629 NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
630
631 waitq_wakeup64_all(&turnstile->ts_waitq,
632 CAST_EVENT64_T(task_get_return_wait_event(task)),
633 THREAD_AWAKENED, 0);
634
635 turnstile_update_inheritor(turnstile, NULL,
636 TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
637 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
638
639 turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
640 turnstile_cleanup();
641 task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
642 }
643 is_write_unlock(task->itk_space);
644 }
645 }
646
647 void __attribute__((noreturn))
648 task_wait_to_return(void)
649 {
650 task_t task = current_task();
651
652 is_write_lock(task->itk_space);
653
654 if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
655 struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
656 NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
657
658 do {
659 task->t_returnwaitflags |= TRW_LRETURNWAITER;
660 turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
661 (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
662
663 waitq_assert_wait64(&turnstile->ts_waitq,
664 CAST_EVENT64_T(task_get_return_wait_event(task)),
665 THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
666
667 is_write_unlock(task->itk_space);
668
669 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
670
671 thread_block(THREAD_CONTINUE_NULL);
672
673 is_write_lock(task->itk_space);
674 } while (task->t_returnwaitflags & TRW_LRETURNWAIT);
675
676 turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
677 }
678
679 is_write_unlock(task->itk_space);
680 turnstile_cleanup();
681
682
683 #if CONFIG_MACF
684 /*
685 * Before jumping to userspace and allowing this process to execute any code,
686 * notify any interested parties.
687 */
688 mac_proc_notify_exec_complete(current_proc());
689 #endif
690
691 thread_bootstrap_return();
692 }
693
694 #ifdef CONFIG_32BIT_TELEMETRY
695 boolean_t
696 task_consume_32bit_log_flag(task_t task)
697 {
698 if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
699 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
700 return TRUE;
701 } else {
702 return FALSE;
703 }
704 }
705
706 void
707 task_set_32bit_log_flag(task_t task)
708 {
709 task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
710 }
711 #endif /* CONFIG_32BIT_TELEMETRY */
712
713 boolean_t
714 task_is_exec_copy(task_t task)
715 {
716 return task_is_exec_copy_internal(task);
717 }
718
719 boolean_t
720 task_did_exec(task_t task)
721 {
722 return task_did_exec_internal(task);
723 }
724
725 boolean_t
726 task_is_active(task_t task)
727 {
728 return task->active;
729 }
730
731 boolean_t
732 task_is_halting(task_t task)
733 {
734 return task->halting;
735 }
736
737 #if TASK_REFERENCE_LEAK_DEBUG
738 #include <kern/btlog.h>
739
740 static btlog_t *task_ref_btlog;
741 #define TASK_REF_OP_INCR 0x1
742 #define TASK_REF_OP_DECR 0x2
743
744 #define TASK_REF_NUM_RECORDS 100000
745 #define TASK_REF_BTDEPTH 7
746
747 void
748 task_reference_internal(task_t task)
749 {
750 void * bt[TASK_REF_BTDEPTH];
751 int numsaved = 0;
752
753 os_ref_retain(&task->ref_count);
754
755 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
756 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
757 bt, numsaved);
758 }
759
760 os_ref_count_t
761 task_deallocate_internal(task_t task)
762 {
763 void * bt[TASK_REF_BTDEPTH];
764 int numsaved = 0;
765
766 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
767 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
768 bt, numsaved);
769
770 return os_ref_release(&task->ref_count);
771 }
772
773 #endif /* TASK_REFERENCE_LEAK_DEBUG */
774
775 void
776 task_init(void)
777 {
778 lck_grp_attr_setdefault(&task_lck_grp_attr);
779 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
780 lck_attr_setdefault(&task_lck_attr);
781 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
782 lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
783
784 task_zone = zinit(
785 sizeof(struct task),
786 task_max * sizeof(struct task),
787 TASK_CHUNK * sizeof(struct task),
788 "tasks");
789
790 zone_change(task_zone, Z_NOENCRYPT, TRUE);
791
792 #if CONFIG_EMBEDDED
793 task_watch_init();
794 #endif /* CONFIG_EMBEDDED */
795
796 /*
797 * Configure per-task memory limit.
798 * The boot-arg is interpreted as Megabytes,
799 * and takes precedence over the device tree.
800 * Setting the boot-arg to 0 disables task limits.
801 */
802 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
803 sizeof(max_task_footprint_mb))) {
804 /*
805 * No limit was found in boot-args, so go look in the device tree.
806 */
807 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
808 sizeof(max_task_footprint_mb))) {
809 /*
810 * No limit was found in device tree.
811 */
812 max_task_footprint_mb = 0;
813 }
814 }
815
816 if (max_task_footprint_mb != 0) {
817 #if CONFIG_MEMORYSTATUS
818 if (max_task_footprint_mb < 50) {
819 printf("Warning: max_task_pmem %d below minimum.\n",
820 max_task_footprint_mb);
821 max_task_footprint_mb = 50;
822 }
823 printf("Limiting task physical memory footprint to %d MB\n",
824 max_task_footprint_mb);
825
826 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
827
828 /*
829 * Configure the per-task memory limit warning level.
830 * This is computed as a percentage.
831 */
832 max_task_footprint_warning_level = 0;
833
834 if (max_mem < 0x40000000) {
835 /*
836 * On devices with < 1GB of memory:
837 * -- set warnings to 50MB below the per-task limit.
838 */
839 if (max_task_footprint_mb > 50) {
840 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
841 }
842 } else {
843 /*
844 * On devices with >= 1GB of memory:
845 * -- set warnings to 100MB below the per-task limit.
846 */
847 if (max_task_footprint_mb > 100) {
848 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
849 }
850 }
851
852 /*
853 * Never allow warning level to land below the default.
854 */
855 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
856 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
857 }
858
859 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
860
861 #else
862 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
863 #endif /* CONFIG_MEMORYSTATUS */
864 }
865
866 #if DEVELOPMENT || DEBUG
867 if (!PE_parse_boot_argn("exc_resource_threads",
868 &exc_resource_threads_enabled,
869 sizeof(exc_resource_threads_enabled))) {
870 exc_resource_threads_enabled = 1;
871 }
872 PE_parse_boot_argn("task_exc_guard_default",
873 &task_exc_guard_default,
874 sizeof(task_exc_guard_default));
875 #endif /* DEVELOPMENT || DEBUG */
876
877 #if CONFIG_COREDUMP
878 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
879 sizeof(hwm_user_cores))) {
880 hwm_user_cores = 0;
881 }
882 #endif
883
884 proc_init_cpumon_params();
885
886 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
887 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
888 }
889
890 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
891 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
892 }
893
894 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
895 sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
896 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
897 }
898
899 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
900 sizeof(disable_exc_resource))) {
901 disable_exc_resource = 0;
902 }
903
904 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
905 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
906 }
907
908 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
909 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
910 }
911
912 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
913 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
914 }
915
916 /*
917 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
918 * sets up the ledgers for the default coalition. If we don't have coalitions,
919 * then we have to call it now.
920 */
921 #if CONFIG_COALITIONS
922 assert(task_ledger_template);
923 #else /* CONFIG_COALITIONS */
924 init_task_ledgers();
925 #endif /* CONFIG_COALITIONS */
926
927 #if TASK_REFERENCE_LEAK_DEBUG
928 task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
929 assert(task_ref_btlog);
930 #endif
931
932 /*
933 * Create the kernel task as the first task.
934 */
935 #ifdef __LP64__
936 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
937 #else
938 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
939 #endif
940 { panic("task_init\n");}
941
942 #if defined(HAS_APPLE_PAC)
943 kernel_task->rop_pid = KERNEL_ROP_ID;
944 // kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
945 // disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
946 ml_task_set_disable_user_jop(kernel_task, FALSE);
947 #endif
948
949 vm_map_deallocate(kernel_task->map);
950 kernel_task->map = kernel_map;
951 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
952 }
953
954 /*
955 * Create a task running in the kernel address space. It may
956 * have its own map of size mem_size and may have ipc privileges.
957 */
958 kern_return_t
959 kernel_task_create(
960 __unused task_t parent_task,
961 __unused vm_offset_t map_base,
962 __unused vm_size_t map_size,
963 __unused task_t *child_task)
964 {
965 return KERN_INVALID_ARGUMENT;
966 }
967
968 kern_return_t
969 task_create(
970 task_t parent_task,
971 __unused ledger_port_array_t ledger_ports,
972 __unused mach_msg_type_number_t num_ledger_ports,
973 __unused boolean_t inherit_memory,
974 __unused task_t *child_task) /* OUT */
975 {
976 if (parent_task == TASK_NULL) {
977 return KERN_INVALID_ARGUMENT;
978 }
979
980 /*
981 * No longer supported: too many calls assume that a task has a valid
982 * process attached.
983 */
984 return KERN_FAILURE;
985 }
986
987 kern_return_t
988 host_security_create_task_token(
989 host_security_t host_security,
990 task_t parent_task,
991 __unused security_token_t sec_token,
992 __unused audit_token_t audit_token,
993 __unused host_priv_t host_priv,
994 __unused ledger_port_array_t ledger_ports,
995 __unused mach_msg_type_number_t num_ledger_ports,
996 __unused boolean_t inherit_memory,
997 __unused task_t *child_task) /* OUT */
998 {
999 if (parent_task == TASK_NULL) {
1000 return KERN_INVALID_ARGUMENT;
1001 }
1002
1003 if (host_security == HOST_NULL) {
1004 return KERN_INVALID_SECURITY;
1005 }
1006
1007 /*
1008 * No longer supported.
1009 */
1010 return KERN_FAILURE;
1011 }
1012
1013 /*
1014 * Task ledgers
1015 * ------------
1016 *
1017 * phys_footprint
1018 * Physical footprint: This is the sum of:
1019 * + (internal - alternate_accounting)
1020 * + (internal_compressed - alternate_accounting_compressed)
1021 * + iokit_mapped
1022 * + purgeable_nonvolatile
1023 * + purgeable_nonvolatile_compressed
1024 * + page_table
1025 *
1026 * internal
1027 * The task's anonymous memory, which on iOS is always resident.
1028 *
1029 * internal_compressed
1030 * Amount of this task's internal memory which is held by the compressor.
1031 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1032 * and could be either decompressed back into memory, or paged out to storage, depending
1033 * on our implementation.
1034 *
1035 * iokit_mapped
1036 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1037 * clean/dirty or internal/external state].
1038 *
1039 * alternate_accounting
1040 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1041 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1042 * double counting.
1043 *
1044 * pages_grabbed
1045 * pages_grabbed counts all page grabs in a task. It is also broken out into three subtypes
1046 * which track UPL, IOPL and Kernel page grabs.
1047 */
1048 void
1049 init_task_ledgers(void)
1050 {
1051 ledger_template_t t;
1052
1053 assert(task_ledger_template == NULL);
1054 assert(kernel_task == TASK_NULL);
1055
1056 #if MACH_ASSERT
1057 PE_parse_boot_argn("pmap_ledgers_panic",
1058 &pmap_ledgers_panic,
1059 sizeof(pmap_ledgers_panic));
1060 PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1061 &pmap_ledgers_panic_leeway,
1062 sizeof(pmap_ledgers_panic_leeway));
1063 #endif /* MACH_ASSERT */
1064
1065 if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1066 panic("couldn't create task ledger template");
1067 }
1068
1069 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1070 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1071 "physmem", "bytes");
1072 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1073 "bytes");
1074 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1075 "bytes");
1076 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1077 "bytes");
1078 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1079 "bytes");
1080 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
1081 "bytes");
1082 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
1083 "bytes");
1084 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
1085 "bytes");
1086 task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
1087 "bytes");
1088 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1089 "bytes");
1090 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1091 "bytes");
1092 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
1093 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
1094 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
1095 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
1096 #if DEBUG || DEVELOPMENT
1097 task_ledgers.pages_grabbed = ledger_entry_add(t, "pages_grabbed", "physmem", "count");
1098 task_ledgers.pages_grabbed_kern = ledger_entry_add(t, "pages_grabbed_kern", "physmem", "count");
1099 task_ledgers.pages_grabbed_iopl = ledger_entry_add(t, "pages_grabbed_iopl", "physmem", "count");
1100 task_ledgers.pages_grabbed_upl = ledger_entry_add(t, "pages_grabbed_upl", "physmem", "count");
1101 #endif
1102 task_ledgers.tagged_nofootprint = ledger_entry_add(t, "tagged_nofootprint", "physmem", "bytes");
1103 task_ledgers.tagged_footprint = ledger_entry_add(t, "tagged_footprint", "physmem", "bytes");
1104 task_ledgers.tagged_nofootprint_compressed = ledger_entry_add(t, "tagged_nofootprint_compressed", "physmem", "bytes");
1105 task_ledgers.tagged_footprint_compressed = ledger_entry_add(t, "tagged_footprint_compressed", "physmem", "bytes");
1106 task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes");
1107 task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes");
1108 task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes");
1109 task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes");
1110 task_ledgers.media_nofootprint = ledger_entry_add(t, "media_nofootprint", "physmem", "bytes");
1111 task_ledgers.media_footprint = ledger_entry_add(t, "media_footprint", "physmem", "bytes");
1112 task_ledgers.media_nofootprint_compressed = ledger_entry_add(t, "media_nofootprint_compressed", "physmem", "bytes");
1113 task_ledgers.media_footprint_compressed = ledger_entry_add(t, "media_footprint_compressed", "physmem", "bytes");
1114 task_ledgers.graphics_nofootprint = ledger_entry_add(t, "graphics_nofootprint", "physmem", "bytes");
1115 task_ledgers.graphics_footprint = ledger_entry_add(t, "graphics_footprint", "physmem", "bytes");
1116 task_ledgers.graphics_nofootprint_compressed = ledger_entry_add(t, "graphics_nofootprint_compressed", "physmem", "bytes");
1117 task_ledgers.graphics_footprint_compressed = ledger_entry_add(t, "graphics_footprint_compressed", "physmem", "bytes");
1118 task_ledgers.neural_nofootprint = ledger_entry_add(t, "neural_nofootprint", "physmem", "bytes");
1119 task_ledgers.neural_footprint = ledger_entry_add(t, "neural_footprint", "physmem", "bytes");
1120 task_ledgers.neural_nofootprint_compressed = ledger_entry_add(t, "neural_nofootprint_compressed", "physmem", "bytes");
1121 task_ledgers.neural_footprint_compressed = ledger_entry_add(t, "neural_footprint_compressed", "physmem", "bytes");
1122
1123
1124 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1125 "count");
1126 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1127 "count");
1128
1129 #if CONFIG_SCHED_SFI
1130 sfi_class_id_t class_id, ledger_alias;
1131 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1132 task_ledgers.sfi_wait_times[class_id] = -1;
1133 }
1134
1135 /* don't account for UNSPECIFIED */
1136 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1137 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1138 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1139 /* Check to see if alias has been registered yet */
1140 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1141 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1142 } else {
1143 /* Otherwise, initialize it first */
1144 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1145 }
1146 } else {
1147 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1148 }
1149
1150 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1151 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1152 }
1153 }
1154
1155 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1156 #endif /* CONFIG_SCHED_SFI */
1157
1158 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1159 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1160 task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1161 task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1162 task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1163 task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1164 task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1165
1166 if ((task_ledgers.cpu_time < 0) ||
1167 (task_ledgers.tkm_private < 0) ||
1168 (task_ledgers.tkm_shared < 0) ||
1169 (task_ledgers.phys_mem < 0) ||
1170 (task_ledgers.wired_mem < 0) ||
1171 (task_ledgers.internal < 0) ||
1172 (task_ledgers.iokit_mapped < 0) ||
1173 (task_ledgers.alternate_accounting < 0) ||
1174 (task_ledgers.alternate_accounting_compressed < 0) ||
1175 (task_ledgers.page_table < 0) ||
1176 (task_ledgers.phys_footprint < 0) ||
1177 (task_ledgers.internal_compressed < 0) ||
1178 (task_ledgers.purgeable_volatile < 0) ||
1179 (task_ledgers.purgeable_nonvolatile < 0) ||
1180 (task_ledgers.purgeable_volatile_compressed < 0) ||
1181 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1182 (task_ledgers.tagged_nofootprint < 0) ||
1183 (task_ledgers.tagged_footprint < 0) ||
1184 (task_ledgers.tagged_nofootprint_compressed < 0) ||
1185 (task_ledgers.tagged_footprint_compressed < 0) ||
1186 (task_ledgers.network_volatile < 0) ||
1187 (task_ledgers.network_nonvolatile < 0) ||
1188 (task_ledgers.network_volatile_compressed < 0) ||
1189 (task_ledgers.network_nonvolatile_compressed < 0) ||
1190 (task_ledgers.media_nofootprint < 0) ||
1191 (task_ledgers.media_footprint < 0) ||
1192 (task_ledgers.media_nofootprint_compressed < 0) ||
1193 (task_ledgers.media_footprint_compressed < 0) ||
1194 (task_ledgers.graphics_nofootprint < 0) ||
1195 (task_ledgers.graphics_footprint < 0) ||
1196 (task_ledgers.graphics_nofootprint_compressed < 0) ||
1197 (task_ledgers.graphics_footprint_compressed < 0) ||
1198 (task_ledgers.neural_nofootprint < 0) ||
1199 (task_ledgers.neural_footprint < 0) ||
1200 (task_ledgers.neural_nofootprint_compressed < 0) ||
1201 (task_ledgers.neural_footprint_compressed < 0) ||
1202 (task_ledgers.platform_idle_wakeups < 0) ||
1203 (task_ledgers.interrupt_wakeups < 0) ||
1204 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1205 (task_ledgers.physical_writes < 0) ||
1206 (task_ledgers.logical_writes < 0) ||
1207 (task_ledgers.logical_writes_to_external < 0) ||
1208 (task_ledgers.energy_billed_to_me < 0) ||
1209 (task_ledgers.energy_billed_to_others < 0)
1210 ) {
1211 panic("couldn't create entries for task ledger template");
1212 }
1213
1214 ledger_track_credit_only(t, task_ledgers.phys_footprint);
1215 ledger_track_credit_only(t, task_ledgers.page_table);
1216 ledger_track_credit_only(t, task_ledgers.internal);
1217 ledger_track_credit_only(t, task_ledgers.internal_compressed);
1218 ledger_track_credit_only(t, task_ledgers.iokit_mapped);
1219 ledger_track_credit_only(t, task_ledgers.alternate_accounting);
1220 ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
1221 ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
1222 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
1223 ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
1224 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
1225 #if DEBUG || DEVELOPMENT
1226 ledger_track_credit_only(t, task_ledgers.pages_grabbed);
1227 ledger_track_credit_only(t, task_ledgers.pages_grabbed_kern);
1228 ledger_track_credit_only(t, task_ledgers.pages_grabbed_iopl);
1229 ledger_track_credit_only(t, task_ledgers.pages_grabbed_upl);
1230 #endif
1231 ledger_track_credit_only(t, task_ledgers.tagged_nofootprint);
1232 ledger_track_credit_only(t, task_ledgers.tagged_footprint);
1233 ledger_track_credit_only(t, task_ledgers.tagged_nofootprint_compressed);
1234 ledger_track_credit_only(t, task_ledgers.tagged_footprint_compressed);
1235 ledger_track_credit_only(t, task_ledgers.network_volatile);
1236 ledger_track_credit_only(t, task_ledgers.network_nonvolatile);
1237 ledger_track_credit_only(t, task_ledgers.network_volatile_compressed);
1238 ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed);
1239 ledger_track_credit_only(t, task_ledgers.media_nofootprint);
1240 ledger_track_credit_only(t, task_ledgers.media_footprint);
1241 ledger_track_credit_only(t, task_ledgers.media_nofootprint_compressed);
1242 ledger_track_credit_only(t, task_ledgers.media_footprint_compressed);
1243 ledger_track_credit_only(t, task_ledgers.graphics_nofootprint);
1244 ledger_track_credit_only(t, task_ledgers.graphics_footprint);
1245 ledger_track_credit_only(t, task_ledgers.graphics_nofootprint_compressed);
1246 ledger_track_credit_only(t, task_ledgers.graphics_footprint_compressed);
1247 ledger_track_credit_only(t, task_ledgers.neural_nofootprint);
1248 ledger_track_credit_only(t, task_ledgers.neural_footprint);
1249 ledger_track_credit_only(t, task_ledgers.neural_nofootprint_compressed);
1250 ledger_track_credit_only(t, task_ledgers.neural_footprint_compressed);
1251
1252 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1253 #if MACH_ASSERT
1254 if (pmap_ledgers_panic) {
1255 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1256 ledger_panic_on_negative(t, task_ledgers.page_table);
1257 ledger_panic_on_negative(t, task_ledgers.internal);
1258 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
1259 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1260 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1261 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1262 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1263 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1264 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1265 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1266
1267 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1268 ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1269 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1270 ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1271 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1272 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1273 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1274 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1275 ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1276 ledger_panic_on_negative(t, task_ledgers.media_footprint);
1277 ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1278 ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1279 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1280 ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1281 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1282 ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1283 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1284 ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1285 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1286 ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1287 }
1288 #endif /* MACH_ASSERT */
1289
1290 #if CONFIG_MEMORYSTATUS
1291 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1292 #endif /* CONFIG_MEMORYSTATUS */
1293
1294 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1295 task_wakeups_rate_exceeded, NULL, NULL);
1296 ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1297
1298 #if XNU_MONITOR
1299 ledger_template_complete_secure_alloc(t);
1300 #else /* XNU_MONITOR */
1301 ledger_template_complete(t);
1302 #endif /* XNU_MONITOR */
1303 task_ledger_template = t;
1304 }
1305
1306 os_refgrp_decl(static, task_refgrp, "task", NULL);
1307
1308 kern_return_t
1309 task_create_internal(
1310 task_t parent_task,
1311 coalition_t *parent_coalitions __unused,
1312 boolean_t inherit_memory,
1313 __unused boolean_t is_64bit,
1314 boolean_t is_64bit_data,
1315 uint32_t t_flags,
1316 uint32_t t_procflags,
1317 uint8_t t_returnwaitflags,
1318 task_t *child_task) /* OUT */
1319 {
1320 task_t new_task;
1321 vm_shared_region_t shared_region;
1322 ledger_t ledger = NULL;
1323
1324 new_task = (task_t) zalloc(task_zone);
1325
1326 if (new_task == TASK_NULL) {
1327 return KERN_RESOURCE_SHORTAGE;
1328 }
1329
1330 /* one ref for just being alive; one for our caller */
1331 os_ref_init_count(&new_task->ref_count, &task_refgrp, 2);
1332
1333 /* allocate with active entries */
1334 assert(task_ledger_template != NULL);
1335 if ((ledger = ledger_instantiate(task_ledger_template,
1336 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1337 zfree(task_zone, new_task);
1338 return KERN_RESOURCE_SHORTAGE;
1339 }
1340
1341 #if defined(HAS_APPLE_PAC)
1342 ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1343 ml_task_set_disable_user_jop(new_task, inherit_memory ? parent_task->disable_user_jop : FALSE);
1344 #endif
1345
1346 new_task->ledger = ledger;
1347
1348 #if defined(CONFIG_SCHED_MULTIQ)
1349 new_task->sched_group = sched_group_create();
1350 #endif
1351
1352 /* if inherit_memory is true, parent_task MUST not be NULL */
1353 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1354 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1355 } else {
1356 unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1357 new_task->map = vm_map_create(pmap_create_options(ledger, 0, pmap_flags),
1358 (vm_map_offset_t)(VM_MIN_ADDRESS),
1359 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1360 }
1361
1362 /* Inherit memlock limit from parent */
1363 if (parent_task) {
1364 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1365 }
1366
1367 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1368 queue_init(&new_task->threads);
1369 new_task->suspend_count = 0;
1370 new_task->thread_count = 0;
1371 new_task->active_thread_count = 0;
1372 new_task->user_stop_count = 0;
1373 new_task->legacy_stop_count = 0;
1374 new_task->active = TRUE;
1375 new_task->halting = FALSE;
1376 new_task->priv_flags = 0;
1377 new_task->t_flags = t_flags;
1378 new_task->t_procflags = t_procflags;
1379 new_task->t_returnwaitflags = t_returnwaitflags;
1380 new_task->returnwait_inheritor = current_thread();
1381 new_task->importance = 0;
1382 new_task->crashed_thread_id = 0;
1383 new_task->exec_token = 0;
1384 new_task->watchports = NULL;
1385 new_task->restartable_ranges = NULL;
1386 new_task->task_exc_guard = 0;
1387
1388 #if CONFIG_ATM
1389 new_task->atm_context = NULL;
1390 #endif
1391 new_task->bank_context = NULL;
1392
1393 #ifdef MACH_BSD
1394 new_task->bsd_info = NULL;
1395 new_task->corpse_info = NULL;
1396 #endif /* MACH_BSD */
1397
1398 #if CONFIG_MACF
1399 new_task->crash_label = NULL;
1400 #endif
1401
1402 #if CONFIG_MEMORYSTATUS
1403 if (max_task_footprint != 0) {
1404 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1405 }
1406 #endif /* CONFIG_MEMORYSTATUS */
1407
1408 if (task_wakeups_monitor_rate != 0) {
1409 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1410 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1411 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1412 }
1413
1414 #if CONFIG_IO_ACCOUNTING
1415 uint32_t flags = IOMON_ENABLE;
1416 task_io_monitor_ctl(new_task, &flags);
1417 #endif /* CONFIG_IO_ACCOUNTING */
1418
1419 machine_task_init(new_task, parent_task, inherit_memory);
1420
1421 new_task->task_debug = NULL;
1422
1423 #if DEVELOPMENT || DEBUG
1424 new_task->task_unnested = FALSE;
1425 new_task->task_disconnected_count = 0;
1426 #endif
1427 queue_init(&new_task->semaphore_list);
1428 new_task->semaphores_owned = 0;
1429
1430 ipc_task_init(new_task, parent_task);
1431
1432 new_task->vtimers = 0;
1433
1434 new_task->shared_region = NULL;
1435
1436 new_task->affinity_space = NULL;
1437
1438 new_task->t_kpc = 0;
1439
1440 new_task->pidsuspended = FALSE;
1441 new_task->frozen = FALSE;
1442 new_task->changing_freeze_state = FALSE;
1443 new_task->rusage_cpu_flags = 0;
1444 new_task->rusage_cpu_percentage = 0;
1445 new_task->rusage_cpu_interval = 0;
1446 new_task->rusage_cpu_deadline = 0;
1447 new_task->rusage_cpu_callt = NULL;
1448 #if MACH_ASSERT
1449 new_task->suspends_outstanding = 0;
1450 #endif
1451
1452 #if HYPERVISOR
1453 new_task->hv_task_target = NULL;
1454 #endif /* HYPERVISOR */
1455
1456 #if CONFIG_EMBEDDED
1457 queue_init(&new_task->task_watchers);
1458 new_task->num_taskwatchers = 0;
1459 new_task->watchapplying = 0;
1460 #endif /* CONFIG_EMBEDDED */
1461
1462 new_task->mem_notify_reserved = 0;
1463 new_task->memlimit_attrs_reserved = 0;
1464
1465 new_task->requested_policy = default_task_requested_policy;
1466 new_task->effective_policy = default_task_effective_policy;
1467
1468 task_importance_init_from_parent(new_task, parent_task);
1469
1470 if (parent_task != TASK_NULL) {
1471 new_task->sec_token = parent_task->sec_token;
1472 new_task->audit_token = parent_task->audit_token;
1473
1474 /* inherit the parent's shared region */
1475 shared_region = vm_shared_region_get(parent_task);
1476 vm_shared_region_set(new_task, shared_region);
1477
1478 if (task_has_64Bit_addr(parent_task)) {
1479 task_set_64Bit_addr(new_task);
1480 }
1481
1482 if (task_has_64Bit_data(parent_task)) {
1483 task_set_64Bit_data(new_task);
1484 }
1485
1486 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1487 new_task->all_image_info_size = parent_task->all_image_info_size;
1488 new_task->mach_header_vm_address = 0;
1489
1490 if (inherit_memory && parent_task->affinity_space) {
1491 task_affinity_create(parent_task, new_task);
1492 }
1493
1494 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1495
1496 #if DEBUG || DEVELOPMENT
1497 if (parent_task->t_flags & TF_NO_SMT) {
1498 new_task->t_flags |= TF_NO_SMT;
1499 }
1500 #endif
1501
1502 new_task->priority = BASEPRI_DEFAULT;
1503 new_task->max_priority = MAXPRI_USER;
1504
1505 task_policy_create(new_task, parent_task);
1506 } else {
1507 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1508 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1509 #ifdef __LP64__
1510 if (is_64bit) {
1511 task_set_64Bit_addr(new_task);
1512 }
1513 #endif
1514
1515 if (is_64bit_data) {
1516 task_set_64Bit_data(new_task);
1517 }
1518
1519 new_task->all_image_info_addr = (mach_vm_address_t)0;
1520 new_task->all_image_info_size = (mach_vm_size_t)0;
1521
1522 new_task->pset_hint = PROCESSOR_SET_NULL;
1523
1524 if (kernel_task == TASK_NULL) {
1525 new_task->priority = BASEPRI_KERNEL;
1526 new_task->max_priority = MAXPRI_KERNEL;
1527 } else {
1528 new_task->priority = BASEPRI_DEFAULT;
1529 new_task->max_priority = MAXPRI_USER;
1530 }
1531 }
1532
1533 bzero(new_task->coalition, sizeof(new_task->coalition));
1534 for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1535 queue_chain_init(new_task->task_coalition[i]);
1536 }
1537
1538 /* Allocate I/O Statistics */
1539 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1540 assert(new_task->task_io_stats != NULL);
1541 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1542
1543 bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1544 bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1545
1546 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1547
1548 /* Copy resource acc. info from Parent for Corpe Forked task. */
1549 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1550 task_rollup_accounting_info(new_task, parent_task);
1551 } else {
1552 /* Initialize to zero for standard fork/spawn case */
1553 new_task->total_user_time = 0;
1554 new_task->total_system_time = 0;
1555 new_task->total_ptime = 0;
1556 new_task->total_runnable_time = 0;
1557 new_task->faults = 0;
1558 new_task->pageins = 0;
1559 new_task->cow_faults = 0;
1560 new_task->messages_sent = 0;
1561 new_task->messages_received = 0;
1562 new_task->syscalls_mach = 0;
1563 new_task->syscalls_unix = 0;
1564 new_task->c_switch = 0;
1565 new_task->p_switch = 0;
1566 new_task->ps_switch = 0;
1567 new_task->decompressions = 0;
1568 new_task->low_mem_notified_warn = 0;
1569 new_task->low_mem_notified_critical = 0;
1570 new_task->purged_memory_warn = 0;
1571 new_task->purged_memory_critical = 0;
1572 new_task->low_mem_privileged_listener = 0;
1573 new_task->memlimit_is_active = 0;
1574 new_task->memlimit_is_fatal = 0;
1575 new_task->memlimit_active_exc_resource = 0;
1576 new_task->memlimit_inactive_exc_resource = 0;
1577 new_task->task_timer_wakeups_bin_1 = 0;
1578 new_task->task_timer_wakeups_bin_2 = 0;
1579 new_task->task_gpu_ns = 0;
1580 new_task->task_writes_counters_internal.task_immediate_writes = 0;
1581 new_task->task_writes_counters_internal.task_deferred_writes = 0;
1582 new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1583 new_task->task_writes_counters_internal.task_metadata_writes = 0;
1584 new_task->task_writes_counters_external.task_immediate_writes = 0;
1585 new_task->task_writes_counters_external.task_deferred_writes = 0;
1586 new_task->task_writes_counters_external.task_invalidated_writes = 0;
1587 new_task->task_writes_counters_external.task_metadata_writes = 0;
1588
1589 new_task->task_energy = 0;
1590 #if MONOTONIC
1591 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1592 #endif /* MONOTONIC */
1593 }
1594
1595
1596 #if CONFIG_COALITIONS
1597 if (!(t_flags & TF_CORPSE_FORK)) {
1598 /* TODO: there is no graceful failure path here... */
1599 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1600 coalitions_adopt_task(parent_coalitions, new_task);
1601 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1602 /*
1603 * all tasks at least have a resource coalition, so
1604 * if the parent has one then inherit all coalitions
1605 * the parent is a part of
1606 */
1607 coalitions_adopt_task(parent_task->coalition, new_task);
1608 } else {
1609 /* TODO: assert that new_task will be PID 1 (launchd) */
1610 coalitions_adopt_init_task(new_task);
1611 }
1612 /*
1613 * on exec, we need to transfer the coalition roles from the
1614 * parent task to the exec copy task.
1615 */
1616 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1617 int coal_roles[COALITION_NUM_TYPES];
1618 task_coalition_roles(parent_task, coal_roles);
1619 (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1620 }
1621 } else {
1622 coalitions_adopt_corpse_task(new_task);
1623 }
1624
1625 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1626 panic("created task is not a member of a resource coalition");
1627 }
1628 #endif /* CONFIG_COALITIONS */
1629
1630 new_task->dispatchqueue_offset = 0;
1631 if (parent_task != NULL) {
1632 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1633 }
1634
1635 new_task->task_can_transfer_memory_ownership = FALSE;
1636 new_task->task_volatile_objects = 0;
1637 new_task->task_nonvolatile_objects = 0;
1638 new_task->task_objects_disowning = FALSE;
1639 new_task->task_objects_disowned = FALSE;
1640 new_task->task_owned_objects = 0;
1641 queue_init(&new_task->task_objq);
1642 task_objq_lock_init(new_task);
1643
1644 #if __arm64__
1645 new_task->task_legacy_footprint = FALSE;
1646 new_task->task_extra_footprint_limit = FALSE;
1647 new_task->task_ios13extended_footprint_limit = FALSE;
1648 #endif /* __arm64__ */
1649 new_task->task_region_footprint = FALSE;
1650 new_task->task_has_crossed_thread_limit = FALSE;
1651 new_task->task_thread_limit = 0;
1652 #if CONFIG_SECLUDED_MEMORY
1653 new_task->task_can_use_secluded_mem = FALSE;
1654 new_task->task_could_use_secluded_mem = FALSE;
1655 new_task->task_could_also_use_secluded_mem = FALSE;
1656 new_task->task_suppressed_secluded = FALSE;
1657 #endif /* CONFIG_SECLUDED_MEMORY */
1658
1659 /*
1660 * t_flags is set up above. But since we don't
1661 * support darkwake mode being set that way
1662 * currently, we clear it out here explicitly.
1663 */
1664 new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1665
1666 queue_init(&new_task->io_user_clients);
1667 new_task->loadTag = 0;
1668
1669 ipc_task_enable(new_task);
1670
1671 lck_mtx_lock(&tasks_threads_lock);
1672 queue_enter(&tasks, new_task, task_t, tasks);
1673 tasks_count++;
1674 if (tasks_suspend_state) {
1675 task_suspend_internal(new_task);
1676 }
1677 lck_mtx_unlock(&tasks_threads_lock);
1678
1679 *child_task = new_task;
1680 return KERN_SUCCESS;
1681 }
1682
1683 /*
1684 * task_rollup_accounting_info
1685 *
1686 * Roll up accounting stats. Used to rollup stats
1687 * for exec copy task and corpse fork.
1688 */
1689 void
1690 task_rollup_accounting_info(task_t to_task, task_t from_task)
1691 {
1692 assert(from_task != to_task);
1693
1694 to_task->total_user_time = from_task->total_user_time;
1695 to_task->total_system_time = from_task->total_system_time;
1696 to_task->total_ptime = from_task->total_ptime;
1697 to_task->total_runnable_time = from_task->total_runnable_time;
1698 to_task->faults = from_task->faults;
1699 to_task->pageins = from_task->pageins;
1700 to_task->cow_faults = from_task->cow_faults;
1701 to_task->decompressions = from_task->decompressions;
1702 to_task->messages_sent = from_task->messages_sent;
1703 to_task->messages_received = from_task->messages_received;
1704 to_task->syscalls_mach = from_task->syscalls_mach;
1705 to_task->syscalls_unix = from_task->syscalls_unix;
1706 to_task->c_switch = from_task->c_switch;
1707 to_task->p_switch = from_task->p_switch;
1708 to_task->ps_switch = from_task->ps_switch;
1709 to_task->extmod_statistics = from_task->extmod_statistics;
1710 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1711 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1712 to_task->purged_memory_warn = from_task->purged_memory_warn;
1713 to_task->purged_memory_critical = from_task->purged_memory_critical;
1714 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1715 *to_task->task_io_stats = *from_task->task_io_stats;
1716 to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1717 to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1718 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1719 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1720 to_task->task_gpu_ns = from_task->task_gpu_ns;
1721 to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
1722 to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
1723 to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
1724 to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
1725 to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
1726 to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
1727 to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
1728 to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
1729 to_task->task_energy = from_task->task_energy;
1730
1731 /* Skip ledger roll up for memory accounting entries */
1732 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1733 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1734 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1735 #if CONFIG_SCHED_SFI
1736 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1737 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1738 }
1739 #endif
1740 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1741 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1742 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1743 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1744 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1745 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1746 }
1747
1748 int task_dropped_imp_count = 0;
1749
1750 /*
1751 * task_deallocate:
1752 *
1753 * Drop a reference on a task.
1754 */
1755 void
1756 task_deallocate(
1757 task_t task)
1758 {
1759 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1760 os_ref_count_t refs;
1761
1762 if (task == TASK_NULL) {
1763 return;
1764 }
1765
1766 refs = task_deallocate_internal(task);
1767
1768 #if IMPORTANCE_INHERITANCE
1769 if (refs == 1) {
1770 /*
1771 * If last ref potentially comes from the task's importance,
1772 * disconnect it. But more task refs may be added before
1773 * that completes, so wait for the reference to go to zero
1774 * naturally (it may happen on a recursive task_deallocate()
1775 * from the ipc_importance_disconnect_task() call).
1776 */
1777 if (IIT_NULL != task->task_imp_base) {
1778 ipc_importance_disconnect_task(task);
1779 }
1780 return;
1781 }
1782 #endif /* IMPORTANCE_INHERITANCE */
1783
1784 if (refs > 0) {
1785 return;
1786 }
1787
1788 /*
1789 * The task should be dead at this point. Ensure other resources
1790 * like threads, are gone before we trash the world.
1791 */
1792 assert(queue_empty(&task->threads));
1793 assert(task->bsd_info == NULL);
1794 assert(!is_active(task->itk_space));
1795 assert(!task->active);
1796 assert(task->active_thread_count == 0);
1797
1798 lck_mtx_lock(&tasks_threads_lock);
1799 assert(terminated_tasks_count > 0);
1800 queue_remove(&terminated_tasks, task, task_t, tasks);
1801 terminated_tasks_count--;
1802 lck_mtx_unlock(&tasks_threads_lock);
1803
1804 /*
1805 * remove the reference on atm descriptor
1806 */
1807 task_atm_reset(task);
1808
1809 /*
1810 * remove the reference on bank context
1811 */
1812 task_bank_reset(task);
1813
1814 if (task->task_io_stats) {
1815 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1816 }
1817
1818 /*
1819 * Give the machine dependent code a chance
1820 * to perform cleanup before ripping apart
1821 * the task.
1822 */
1823 machine_task_terminate(task);
1824
1825 ipc_task_terminate(task);
1826
1827 /* let iokit know */
1828 iokit_task_terminate(task);
1829
1830 if (task->affinity_space) {
1831 task_affinity_deallocate(task);
1832 }
1833
1834 #if MACH_ASSERT
1835 if (task->ledger != NULL &&
1836 task->map != NULL &&
1837 task->map->pmap != NULL &&
1838 task->map->pmap->ledger != NULL) {
1839 assert(task->ledger == task->map->pmap->ledger);
1840 }
1841 #endif /* MACH_ASSERT */
1842
1843 vm_owned_objects_disown(task);
1844 assert(task->task_objects_disowned);
1845 if (task->task_volatile_objects != 0 ||
1846 task->task_nonvolatile_objects != 0 ||
1847 task->task_owned_objects != 0) {
1848 panic("task_deallocate(%p): "
1849 "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
1850 task,
1851 task->task_volatile_objects,
1852 task->task_nonvolatile_objects,
1853 task->task_owned_objects);
1854 }
1855
1856 vm_map_deallocate(task->map);
1857 is_release(task->itk_space);
1858 if (task->restartable_ranges) {
1859 restartable_ranges_release(task->restartable_ranges);
1860 }
1861
1862 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1863 &interrupt_wakeups, &debit);
1864 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1865 &platform_idle_wakeups, &debit);
1866
1867 #if defined(CONFIG_SCHED_MULTIQ)
1868 sched_group_destroy(task->sched_group);
1869 #endif
1870
1871 /* Accumulate statistics for dead tasks */
1872 lck_spin_lock(&dead_task_statistics_lock);
1873 dead_task_statistics.total_user_time += task->total_user_time;
1874 dead_task_statistics.total_system_time += task->total_system_time;
1875
1876 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1877 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1878
1879 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1880 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1881 dead_task_statistics.total_ptime += task->total_ptime;
1882 dead_task_statistics.total_pset_switches += task->ps_switch;
1883 dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1884 dead_task_statistics.task_energy += task->task_energy;
1885
1886 lck_spin_unlock(&dead_task_statistics_lock);
1887 lck_mtx_destroy(&task->lock, &task_lck_grp);
1888
1889 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1890 &debit)) {
1891 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1892 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1893 }
1894 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1895 &debit)) {
1896 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1897 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1898 }
1899 ledger_dereference(task->ledger);
1900
1901 #if TASK_REFERENCE_LEAK_DEBUG
1902 btlog_remove_entries_for_element(task_ref_btlog, task);
1903 #endif
1904
1905 #if CONFIG_COALITIONS
1906 task_release_coalitions(task);
1907 #endif /* CONFIG_COALITIONS */
1908
1909 bzero(task->coalition, sizeof(task->coalition));
1910
1911 #if MACH_BSD
1912 /* clean up collected information since last reference to task is gone */
1913 if (task->corpse_info) {
1914 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1915 task_crashinfo_destroy(task->corpse_info);
1916 task->corpse_info = NULL;
1917 if (corpse_info_kernel) {
1918 kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1919 }
1920 }
1921 #endif
1922
1923 #if CONFIG_MACF
1924 if (task->crash_label) {
1925 mac_exc_free_label(task->crash_label);
1926 task->crash_label = NULL;
1927 }
1928 #endif
1929
1930 assert(queue_empty(&task->task_objq));
1931
1932 zfree(task_zone, task);
1933 }
1934
1935 /*
1936 * task_name_deallocate:
1937 *
1938 * Drop a reference on a task name.
1939 */
1940 void
1941 task_name_deallocate(
1942 task_name_t task_name)
1943 {
1944 return task_deallocate((task_t)task_name);
1945 }
1946
1947 /*
1948 * task_inspect_deallocate:
1949 *
1950 * Drop a task inspection reference.
1951 */
1952 void
1953 task_inspect_deallocate(
1954 task_inspect_t task_inspect)
1955 {
1956 return task_deallocate((task_t)task_inspect);
1957 }
1958
1959 /*
1960 * task_suspension_token_deallocate:
1961 *
1962 * Drop a reference on a task suspension token.
1963 */
1964 void
1965 task_suspension_token_deallocate(
1966 task_suspension_token_t token)
1967 {
1968 return task_deallocate((task_t)token);
1969 }
1970
1971
1972 /*
1973 * task_collect_crash_info:
1974 *
1975 * collect crash info from bsd and mach based data
1976 */
1977 kern_return_t
1978 task_collect_crash_info(
1979 task_t task,
1980 #ifdef CONFIG_MACF
1981 struct label *crash_label,
1982 #endif
1983 int is_corpse_fork)
1984 {
1985 kern_return_t kr = KERN_SUCCESS;
1986
1987 kcdata_descriptor_t crash_data = NULL;
1988 kcdata_descriptor_t crash_data_release = NULL;
1989 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1990 mach_vm_offset_t crash_data_ptr = 0;
1991 void *crash_data_kernel = NULL;
1992 void *crash_data_kernel_release = NULL;
1993 #if CONFIG_MACF
1994 struct label *label, *free_label;
1995 #endif
1996
1997 if (!corpses_enabled()) {
1998 return KERN_NOT_SUPPORTED;
1999 }
2000
2001 #if CONFIG_MACF
2002 free_label = label = mac_exc_create_label();
2003 #endif
2004
2005 task_lock(task);
2006
2007 assert(is_corpse_fork || task->bsd_info != NULL);
2008 if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
2009 #if CONFIG_MACF
2010 /* Set the crash label, used by the exception delivery mac hook */
2011 free_label = task->crash_label; // Most likely NULL.
2012 task->crash_label = label;
2013 mac_exc_update_task_crash_label(task, crash_label);
2014 #endif
2015 task_unlock(task);
2016
2017 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
2018 if (crash_data_kernel == NULL) {
2019 kr = KERN_RESOURCE_SHORTAGE;
2020 goto out_no_lock;
2021 }
2022 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
2023 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2024
2025 /* Do not get a corpse ref for corpse fork */
2026 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2027 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2028 KCFLAG_USE_MEMCOPY);
2029 if (crash_data) {
2030 task_lock(task);
2031 crash_data_release = task->corpse_info;
2032 crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2033 task->corpse_info = crash_data;
2034
2035 task_unlock(task);
2036 kr = KERN_SUCCESS;
2037 } else {
2038 kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
2039 kr = KERN_FAILURE;
2040 }
2041
2042 if (crash_data_release != NULL) {
2043 task_crashinfo_destroy(crash_data_release);
2044 }
2045 if (crash_data_kernel_release != NULL) {
2046 kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2047 }
2048 } else {
2049 task_unlock(task);
2050 }
2051
2052 out_no_lock:
2053 #if CONFIG_MACF
2054 if (free_label != NULL) {
2055 mac_exc_free_label(free_label);
2056 }
2057 #endif
2058 return kr;
2059 }
2060
2061 /*
2062 * task_deliver_crash_notification:
2063 *
2064 * Makes outcall to registered host port for a corpse.
2065 */
2066 kern_return_t
2067 task_deliver_crash_notification(
2068 task_t task,
2069 thread_t thread,
2070 exception_type_t etype,
2071 mach_exception_subcode_t subcode)
2072 {
2073 kcdata_descriptor_t crash_info = task->corpse_info;
2074 thread_t th_iter = NULL;
2075 kern_return_t kr = KERN_SUCCESS;
2076 wait_interrupt_t wsave;
2077 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2078 ipc_port_t task_port, old_notify;
2079
2080 if (crash_info == NULL) {
2081 return KERN_FAILURE;
2082 }
2083
2084 task_lock(task);
2085 if (task_is_a_corpse_fork(task)) {
2086 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
2087 code[0] = etype;
2088 code[1] = subcode;
2089 } else {
2090 /* Populate code with EXC_CRASH for corpses */
2091 code[0] = EXC_CRASH;
2092 code[1] = 0;
2093 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
2094 if (corpse_for_fatal_memkill) {
2095 code[1] = subcode;
2096 }
2097 }
2098
2099 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2100 {
2101 if (th_iter->corpse_dup == FALSE) {
2102 ipc_thread_reset(th_iter);
2103 }
2104 }
2105 task_unlock(task);
2106
2107 /* Arm the no-sender notification for taskport */
2108 task_reference(task);
2109 task_port = convert_task_to_port(task);
2110 ip_lock(task_port);
2111 require_ip_active(task_port);
2112 ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
2113 /* port unlocked */
2114 assert(IP_NULL == old_notify);
2115
2116 wsave = thread_interrupt_level(THREAD_UNINT);
2117 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2118 if (kr != KERN_SUCCESS) {
2119 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
2120 }
2121
2122 (void)thread_interrupt_level(wsave);
2123
2124 /*
2125 * Drop the send right on task port, will fire the
2126 * no-sender notification if exception deliver failed.
2127 */
2128 ipc_port_release_send(task_port);
2129 return kr;
2130 }
2131
2132 /*
2133 * task_terminate:
2134 *
2135 * Terminate the specified task. See comments on thread_terminate
2136 * (kern/thread.c) about problems with terminating the "current task."
2137 */
2138
2139 kern_return_t
2140 task_terminate(
2141 task_t task)
2142 {
2143 if (task == TASK_NULL) {
2144 return KERN_INVALID_ARGUMENT;
2145 }
2146
2147 if (task->bsd_info) {
2148 return KERN_FAILURE;
2149 }
2150
2151 return task_terminate_internal(task);
2152 }
2153
2154 #if MACH_ASSERT
2155 extern int proc_pid(struct proc *);
2156 extern void proc_name_kdp(task_t t, char *buf, int size);
2157 #endif /* MACH_ASSERT */
2158
2159 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
2160 static void
2161 __unused task_partial_reap(task_t task, __unused int pid)
2162 {
2163 unsigned int reclaimed_resident = 0;
2164 unsigned int reclaimed_compressed = 0;
2165 uint64_t task_page_count;
2166
2167 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2168
2169 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2170 pid, task_page_count, 0, 0, 0);
2171
2172 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2173
2174 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2175 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2176 }
2177
2178 kern_return_t
2179 task_mark_corpse(task_t task)
2180 {
2181 kern_return_t kr = KERN_SUCCESS;
2182 thread_t self_thread;
2183 (void) self_thread;
2184 wait_interrupt_t wsave;
2185 #if CONFIG_MACF
2186 struct label *crash_label = NULL;
2187 #endif
2188
2189 assert(task != kernel_task);
2190 assert(task == current_task());
2191 assert(!task_is_a_corpse(task));
2192
2193 #if CONFIG_MACF
2194 crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
2195 #endif
2196
2197 kr = task_collect_crash_info(task,
2198 #if CONFIG_MACF
2199 crash_label,
2200 #endif
2201 FALSE);
2202 if (kr != KERN_SUCCESS) {
2203 goto out;
2204 }
2205
2206 self_thread = current_thread();
2207
2208 wsave = thread_interrupt_level(THREAD_UNINT);
2209 task_lock(task);
2210
2211 task_set_corpse_pending_report(task);
2212 task_set_corpse(task);
2213 task->crashed_thread_id = thread_tid(self_thread);
2214
2215 kr = task_start_halt_locked(task, TRUE);
2216 assert(kr == KERN_SUCCESS);
2217
2218 ipc_task_reset(task);
2219 /* Remove the naked send right for task port, needed to arm no sender notification */
2220 task_set_special_port_internal(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
2221 ipc_task_enable(task);
2222
2223 task_unlock(task);
2224 /* terminate the ipc space */
2225 ipc_space_terminate(task->itk_space);
2226
2227 /* Add it to global corpse task list */
2228 task_add_to_corpse_task_list(task);
2229
2230 task_start_halt(task);
2231 thread_terminate_internal(self_thread);
2232
2233 (void) thread_interrupt_level(wsave);
2234 assert(task->halting == TRUE);
2235
2236 out:
2237 #if CONFIG_MACF
2238 mac_exc_free_label(crash_label);
2239 #endif
2240 return kr;
2241 }
2242
2243 /*
2244 * task_clear_corpse
2245 *
2246 * Clears the corpse pending bit on task.
2247 * Removes inspection bit on the threads.
2248 */
2249 void
2250 task_clear_corpse(task_t task)
2251 {
2252 thread_t th_iter = NULL;
2253
2254 task_lock(task);
2255 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2256 {
2257 thread_mtx_lock(th_iter);
2258 th_iter->inspection = FALSE;
2259 thread_mtx_unlock(th_iter);
2260 }
2261
2262 thread_terminate_crashed_threads();
2263 /* remove the pending corpse report flag */
2264 task_clear_corpse_pending_report(task);
2265
2266 task_unlock(task);
2267 }
2268
2269 /*
2270 * task_port_notify
2271 *
2272 * Called whenever the Mach port system detects no-senders on
2273 * the task port of a corpse.
2274 * Each notification that comes in should terminate the task (corpse).
2275 */
2276 void
2277 task_port_notify(mach_msg_header_t *msg)
2278 {
2279 mach_no_senders_notification_t *notification = (void *)msg;
2280 ipc_port_t port = notification->not_header.msgh_remote_port;
2281 task_t task;
2282
2283 require_ip_active(port);
2284 assert(IKOT_TASK == ip_kotype(port));
2285 task = (task_t) ip_get_kobject(port);
2286
2287 assert(task_is_a_corpse(task));
2288
2289 /* Remove the task from global corpse task list */
2290 task_remove_from_corpse_task_list(task);
2291
2292 task_clear_corpse(task);
2293 task_terminate_internal(task);
2294 }
2295
2296 /*
2297 * task_wait_till_threads_terminate_locked
2298 *
2299 * Wait till all the threads in the task are terminated.
2300 * Might release the task lock and re-acquire it.
2301 */
2302 void
2303 task_wait_till_threads_terminate_locked(task_t task)
2304 {
2305 /* wait for all the threads in the task to terminate */
2306 while (task->active_thread_count != 0) {
2307 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2308 task_unlock(task);
2309 thread_block(THREAD_CONTINUE_NULL);
2310
2311 task_lock(task);
2312 }
2313 }
2314
2315 /*
2316 * task_duplicate_map_and_threads
2317 *
2318 * Copy vmmap of source task.
2319 * Copy active threads from source task to destination task.
2320 * Source task would be suspended during the copy.
2321 */
2322 kern_return_t
2323 task_duplicate_map_and_threads(
2324 task_t task,
2325 void *p,
2326 task_t new_task,
2327 thread_t *thread_ret,
2328 uint64_t **udata_buffer,
2329 int *size,
2330 int *num_udata)
2331 {
2332 kern_return_t kr = KERN_SUCCESS;
2333 int active;
2334 thread_t thread, self, thread_return = THREAD_NULL;
2335 thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2336 thread_t *thread_array;
2337 uint32_t active_thread_count = 0, array_count = 0, i;
2338 vm_map_t oldmap;
2339 uint64_t *buffer = NULL;
2340 int buf_size = 0;
2341 int est_knotes = 0, num_knotes = 0;
2342
2343 self = current_thread();
2344
2345 /*
2346 * Suspend the task to copy thread state, use the internal
2347 * variant so that no user-space process can resume
2348 * the task from under us
2349 */
2350 kr = task_suspend_internal(task);
2351 if (kr != KERN_SUCCESS) {
2352 return kr;
2353 }
2354
2355 if (task->map->disable_vmentry_reuse == TRUE) {
2356 /*
2357 * Quite likely GuardMalloc (or some debugging tool)
2358 * is being used on this task. And it has gone through
2359 * its limit. Making a corpse will likely encounter
2360 * a lot of VM entries that will need COW.
2361 *
2362 * Skip it.
2363 */
2364 #if DEVELOPMENT || DEBUG
2365 memorystatus_abort_vm_map_fork(task);
2366 #endif
2367 task_resume_internal(task);
2368 return KERN_FAILURE;
2369 }
2370
2371 /* Check with VM if vm_map_fork is allowed for this task */
2372 if (memorystatus_allowed_vm_map_fork(task)) {
2373 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2374 oldmap = new_task->map;
2375 new_task->map = vm_map_fork(new_task->ledger,
2376 task->map,
2377 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2378 VM_MAP_FORK_PRESERVE_PURGEABLE |
2379 VM_MAP_FORK_CORPSE_FOOTPRINT));
2380 vm_map_deallocate(oldmap);
2381
2382 /* copy ledgers that impact the memory footprint */
2383 vm_map_copy_footprint_ledgers(task, new_task);
2384
2385 /* Get all the udata pointers from kqueue */
2386 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2387 if (est_knotes > 0) {
2388 buf_size = (est_knotes + 32) * sizeof(uint64_t);
2389 buffer = (uint64_t *) kalloc(buf_size);
2390 num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2391 if (num_knotes > est_knotes + 32) {
2392 num_knotes = est_knotes + 32;
2393 }
2394 }
2395 }
2396
2397 active_thread_count = task->active_thread_count;
2398 if (active_thread_count == 0) {
2399 if (buffer != NULL) {
2400 kfree(buffer, buf_size);
2401 }
2402 task_resume_internal(task);
2403 return KERN_FAILURE;
2404 }
2405
2406 thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2407
2408 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2409 task_lock(task);
2410 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2411 /* Skip inactive threads */
2412 active = thread->active;
2413 if (!active) {
2414 continue;
2415 }
2416
2417 if (array_count >= active_thread_count) {
2418 break;
2419 }
2420
2421 thread_array[array_count++] = thread;
2422 thread_reference(thread);
2423 }
2424 task_unlock(task);
2425
2426 for (i = 0; i < array_count; i++) {
2427 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2428 if (kr != KERN_SUCCESS) {
2429 break;
2430 }
2431
2432 /* Equivalent of current thread in corpse */
2433 if (thread_array[i] == self) {
2434 thread_return = new_thread;
2435 new_task->crashed_thread_id = thread_tid(new_thread);
2436 } else if (first_thread == NULL) {
2437 first_thread = new_thread;
2438 } else {
2439 /* drop the extra ref returned by thread_create_with_continuation */
2440 thread_deallocate(new_thread);
2441 }
2442
2443 kr = thread_dup2(thread_array[i], new_thread);
2444 if (kr != KERN_SUCCESS) {
2445 thread_mtx_lock(new_thread);
2446 new_thread->corpse_dup = TRUE;
2447 thread_mtx_unlock(new_thread);
2448 continue;
2449 }
2450
2451 /* Copy thread name */
2452 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2453 new_thread->thread_tag = thread_array[i]->thread_tag;
2454 thread_copy_resource_info(new_thread, thread_array[i]);
2455 }
2456
2457 /* return the first thread if we couldn't find the equivalent of current */
2458 if (thread_return == THREAD_NULL) {
2459 thread_return = first_thread;
2460 } else if (first_thread != THREAD_NULL) {
2461 /* drop the extra ref returned by thread_create_with_continuation */
2462 thread_deallocate(first_thread);
2463 }
2464
2465 task_resume_internal(task);
2466
2467 for (i = 0; i < array_count; i++) {
2468 thread_deallocate(thread_array[i]);
2469 }
2470 kfree(thread_array, sizeof(thread_t) * active_thread_count);
2471
2472 if (kr == KERN_SUCCESS) {
2473 *thread_ret = thread_return;
2474 *udata_buffer = buffer;
2475 *size = buf_size;
2476 *num_udata = num_knotes;
2477 } else {
2478 if (thread_return != THREAD_NULL) {
2479 thread_deallocate(thread_return);
2480 }
2481 if (buffer != NULL) {
2482 kfree(buffer, buf_size);
2483 }
2484 }
2485
2486 return kr;
2487 }
2488
2489 #if CONFIG_SECLUDED_MEMORY
2490 extern void task_set_can_use_secluded_mem_locked(
2491 task_t task,
2492 boolean_t can_use_secluded_mem);
2493 #endif /* CONFIG_SECLUDED_MEMORY */
2494
2495 kern_return_t
2496 task_terminate_internal(
2497 task_t task)
2498 {
2499 thread_t thread, self;
2500 task_t self_task;
2501 boolean_t interrupt_save;
2502 int pid = 0;
2503
2504 assert(task != kernel_task);
2505
2506 self = current_thread();
2507 self_task = self->task;
2508
2509 /*
2510 * Get the task locked and make sure that we are not racing
2511 * with someone else trying to terminate us.
2512 */
2513 if (task == self_task) {
2514 task_lock(task);
2515 } else if (task < self_task) {
2516 task_lock(task);
2517 task_lock(self_task);
2518 } else {
2519 task_lock(self_task);
2520 task_lock(task);
2521 }
2522
2523 #if CONFIG_SECLUDED_MEMORY
2524 if (task->task_can_use_secluded_mem) {
2525 task_set_can_use_secluded_mem_locked(task, FALSE);
2526 }
2527 task->task_could_use_secluded_mem = FALSE;
2528 task->task_could_also_use_secluded_mem = FALSE;
2529
2530 if (task->task_suppressed_secluded) {
2531 stop_secluded_suppression(task);
2532 }
2533 #endif /* CONFIG_SECLUDED_MEMORY */
2534
2535 if (!task->active) {
2536 /*
2537 * Task is already being terminated.
2538 * Just return an error. If we are dying, this will
2539 * just get us to our AST special handler and that
2540 * will get us to finalize the termination of ourselves.
2541 */
2542 task_unlock(task);
2543 if (self_task != task) {
2544 task_unlock(self_task);
2545 }
2546
2547 return KERN_FAILURE;
2548 }
2549
2550 if (task_corpse_pending_report(task)) {
2551 /*
2552 * Task is marked for reporting as corpse.
2553 * Just return an error. This will
2554 * just get us to our AST special handler and that
2555 * will get us to finish the path to death
2556 */
2557 task_unlock(task);
2558 if (self_task != task) {
2559 task_unlock(self_task);
2560 }
2561
2562 return KERN_FAILURE;
2563 }
2564
2565 if (self_task != task) {
2566 task_unlock(self_task);
2567 }
2568
2569 /*
2570 * Make sure the current thread does not get aborted out of
2571 * the waits inside these operations.
2572 */
2573 interrupt_save = thread_interrupt_level(THREAD_UNINT);
2574
2575 /*
2576 * Indicate that we want all the threads to stop executing
2577 * at user space by holding the task (we would have held
2578 * each thread independently in thread_terminate_internal -
2579 * but this way we may be more likely to already find it
2580 * held there). Mark the task inactive, and prevent
2581 * further task operations via the task port.
2582 */
2583 task_hold_locked(task);
2584 task->active = FALSE;
2585 ipc_task_disable(task);
2586
2587 #if CONFIG_TELEMETRY
2588 /*
2589 * Notify telemetry that this task is going away.
2590 */
2591 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2592 #endif
2593
2594 /*
2595 * Terminate each thread in the task.
2596 */
2597 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2598 thread_terminate_internal(thread);
2599 }
2600
2601 #ifdef MACH_BSD
2602 if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2603 pid = proc_pid(task->bsd_info);
2604 }
2605 #endif /* MACH_BSD */
2606
2607 task_unlock(task);
2608
2609 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2610 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2611
2612 /* Early object reap phase */
2613
2614 // PR-17045188: Revisit implementation
2615 // task_partial_reap(task, pid);
2616
2617 #if CONFIG_EMBEDDED
2618 /*
2619 * remove all task watchers
2620 */
2621 task_removewatchers(task);
2622
2623 #endif /* CONFIG_EMBEDDED */
2624
2625 /*
2626 * Destroy all synchronizers owned by the task.
2627 */
2628 task_synchronizer_destroy_all(task);
2629
2630 /*
2631 * Clear the watchport boost on the task.
2632 */
2633 task_remove_turnstile_watchports(task);
2634
2635 /*
2636 * Destroy the IPC space, leaving just a reference for it.
2637 */
2638 ipc_space_terminate(task->itk_space);
2639
2640 #if 00
2641 /* if some ledgers go negative on tear-down again... */
2642 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2643 task_ledgers.phys_footprint);
2644 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2645 task_ledgers.internal);
2646 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2647 task_ledgers.internal_compressed);
2648 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2649 task_ledgers.iokit_mapped);
2650 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2651 task_ledgers.alternate_accounting);
2652 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2653 task_ledgers.alternate_accounting_compressed);
2654 #endif
2655
2656 /*
2657 * If the current thread is a member of the task
2658 * being terminated, then the last reference to
2659 * the task will not be dropped until the thread
2660 * is finally reaped. To avoid incurring the
2661 * expense of removing the address space regions
2662 * at reap time, we do it explictly here.
2663 */
2664
2665 vm_map_lock(task->map);
2666 vm_map_disable_hole_optimization(task->map);
2667 vm_map_unlock(task->map);
2668
2669 #if MACH_ASSERT
2670 /*
2671 * Identify the pmap's process, in case the pmap ledgers drift
2672 * and we have to report it.
2673 */
2674 char procname[17];
2675 if (task->bsd_info && !task_is_exec_copy(task)) {
2676 pid = proc_pid(task->bsd_info);
2677 proc_name_kdp(task, procname, sizeof(procname));
2678 } else {
2679 pid = 0;
2680 strlcpy(procname, "<unknown>", sizeof(procname));
2681 }
2682 pmap_set_process(task->map->pmap, pid, procname);
2683 #endif /* MACH_ASSERT */
2684
2685 vm_map_terminate(task->map);
2686
2687 /* release our shared region */
2688 vm_shared_region_set(task, NULL);
2689
2690
2691 lck_mtx_lock(&tasks_threads_lock);
2692 queue_remove(&tasks, task, task_t, tasks);
2693 queue_enter(&terminated_tasks, task, task_t, tasks);
2694 tasks_count--;
2695 terminated_tasks_count++;
2696 lck_mtx_unlock(&tasks_threads_lock);
2697
2698 /*
2699 * We no longer need to guard against being aborted, so restore
2700 * the previous interruptible state.
2701 */
2702 thread_interrupt_level(interrupt_save);
2703
2704 #if KPC
2705 /* force the task to release all ctrs */
2706 if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
2707 kpc_force_all_ctrs(task, 0);
2708 }
2709 #endif /* KPC */
2710
2711 #if CONFIG_COALITIONS
2712 /*
2713 * Leave our coalitions. (drop activation but not reference)
2714 */
2715 coalitions_remove_task(task);
2716 #endif
2717
2718 /*
2719 * Get rid of the task active reference on itself.
2720 */
2721 task_deallocate(task);
2722
2723 return KERN_SUCCESS;
2724 }
2725
2726 void
2727 tasks_system_suspend(boolean_t suspend)
2728 {
2729 task_t task;
2730
2731 lck_mtx_lock(&tasks_threads_lock);
2732 assert(tasks_suspend_state != suspend);
2733 tasks_suspend_state = suspend;
2734 queue_iterate(&tasks, task, task_t, tasks) {
2735 if (task == kernel_task) {
2736 continue;
2737 }
2738 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2739 }
2740 lck_mtx_unlock(&tasks_threads_lock);
2741 }
2742
2743 /*
2744 * task_start_halt:
2745 *
2746 * Shut the current task down (except for the current thread) in
2747 * preparation for dramatic changes to the task (probably exec).
2748 * We hold the task and mark all other threads in the task for
2749 * termination.
2750 */
2751 kern_return_t
2752 task_start_halt(task_t task)
2753 {
2754 kern_return_t kr = KERN_SUCCESS;
2755 task_lock(task);
2756 kr = task_start_halt_locked(task, FALSE);
2757 task_unlock(task);
2758 return kr;
2759 }
2760
2761 static kern_return_t
2762 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2763 {
2764 thread_t thread, self;
2765 uint64_t dispatchqueue_offset;
2766
2767 assert(task != kernel_task);
2768
2769 self = current_thread();
2770
2771 if (task != self->task && !task_is_a_corpse_fork(task)) {
2772 return KERN_INVALID_ARGUMENT;
2773 }
2774
2775 if (task->halting || !task->active || !self->active) {
2776 /*
2777 * Task or current thread is already being terminated.
2778 * Hurry up and return out of the current kernel context
2779 * so that we run our AST special handler to terminate
2780 * ourselves.
2781 */
2782 return KERN_FAILURE;
2783 }
2784
2785 task->halting = TRUE;
2786
2787 /*
2788 * Mark all the threads to keep them from starting any more
2789 * user-level execution. The thread_terminate_internal code
2790 * would do this on a thread by thread basis anyway, but this
2791 * gives us a better chance of not having to wait there.
2792 */
2793 task_hold_locked(task);
2794 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2795
2796 /*
2797 * Terminate all the other threads in the task.
2798 */
2799 queue_iterate(&task->threads, thread, thread_t, task_threads)
2800 {
2801 if (should_mark_corpse) {
2802 thread_mtx_lock(thread);
2803 thread->inspection = TRUE;
2804 thread_mtx_unlock(thread);
2805 }
2806 if (thread != self) {
2807 thread_terminate_internal(thread);
2808 }
2809 }
2810 task->dispatchqueue_offset = dispatchqueue_offset;
2811
2812 task_release_locked(task);
2813
2814 return KERN_SUCCESS;
2815 }
2816
2817
2818 /*
2819 * task_complete_halt:
2820 *
2821 * Complete task halt by waiting for threads to terminate, then clean
2822 * up task resources (VM, port namespace, etc...) and then let the
2823 * current thread go in the (practically empty) task context.
2824 *
2825 * Note: task->halting flag is not cleared in order to avoid creation
2826 * of new thread in old exec'ed task.
2827 */
2828 void
2829 task_complete_halt(task_t task)
2830 {
2831 task_lock(task);
2832 assert(task->halting);
2833 assert(task == current_task());
2834
2835 /*
2836 * Wait for the other threads to get shut down.
2837 * When the last other thread is reaped, we'll be
2838 * woken up.
2839 */
2840 if (task->thread_count > 1) {
2841 assert_wait((event_t)&task->halting, THREAD_UNINT);
2842 task_unlock(task);
2843 thread_block(THREAD_CONTINUE_NULL);
2844 } else {
2845 task_unlock(task);
2846 }
2847
2848 /*
2849 * Give the machine dependent code a chance
2850 * to perform cleanup of task-level resources
2851 * associated with the current thread before
2852 * ripping apart the task.
2853 */
2854 machine_task_terminate(task);
2855
2856 /*
2857 * Destroy all synchronizers owned by the task.
2858 */
2859 task_synchronizer_destroy_all(task);
2860
2861 /*
2862 * Destroy the contents of the IPC space, leaving just
2863 * a reference for it.
2864 */
2865 ipc_space_clean(task->itk_space);
2866
2867 /*
2868 * Clean out the address space, as we are going to be
2869 * getting a new one.
2870 */
2871 vm_map_remove(task->map, task->map->min_offset,
2872 task->map->max_offset,
2873 /*
2874 * Final cleanup:
2875 * + no unnesting
2876 * + remove immutable mappings
2877 * + allow gaps in the range
2878 */
2879 (VM_MAP_REMOVE_NO_UNNESTING |
2880 VM_MAP_REMOVE_IMMUTABLE |
2881 VM_MAP_REMOVE_GAPS_OK));
2882
2883 /*
2884 * Kick out any IOKitUser handles to the task. At best they're stale,
2885 * at worst someone is racing a SUID exec.
2886 */
2887 iokit_task_terminate(task);
2888 }
2889
2890 /*
2891 * task_hold_locked:
2892 *
2893 * Suspend execution of the specified task.
2894 * This is a recursive-style suspension of the task, a count of
2895 * suspends is maintained.
2896 *
2897 * CONDITIONS: the task is locked and active.
2898 */
2899 void
2900 task_hold_locked(
2901 task_t task)
2902 {
2903 thread_t thread;
2904
2905 assert(task->active);
2906
2907 if (task->suspend_count++ > 0) {
2908 return;
2909 }
2910
2911 if (task->bsd_info) {
2912 workq_proc_suspended(task->bsd_info);
2913 }
2914
2915 /*
2916 * Iterate through all the threads and hold them.
2917 */
2918 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2919 thread_mtx_lock(thread);
2920 thread_hold(thread);
2921 thread_mtx_unlock(thread);
2922 }
2923 }
2924
2925 /*
2926 * task_hold:
2927 *
2928 * Same as the internal routine above, except that is must lock
2929 * and verify that the task is active. This differs from task_suspend
2930 * in that it places a kernel hold on the task rather than just a
2931 * user-level hold. This keeps users from over resuming and setting
2932 * it running out from under the kernel.
2933 *
2934 * CONDITIONS: the caller holds a reference on the task
2935 */
2936 kern_return_t
2937 task_hold(
2938 task_t task)
2939 {
2940 if (task == TASK_NULL) {
2941 return KERN_INVALID_ARGUMENT;
2942 }
2943
2944 task_lock(task);
2945
2946 if (!task->active) {
2947 task_unlock(task);
2948
2949 return KERN_FAILURE;
2950 }
2951
2952 task_hold_locked(task);
2953 task_unlock(task);
2954
2955 return KERN_SUCCESS;
2956 }
2957
2958 kern_return_t
2959 task_wait(
2960 task_t task,
2961 boolean_t until_not_runnable)
2962 {
2963 if (task == TASK_NULL) {
2964 return KERN_INVALID_ARGUMENT;
2965 }
2966
2967 task_lock(task);
2968
2969 if (!task->active) {
2970 task_unlock(task);
2971
2972 return KERN_FAILURE;
2973 }
2974
2975 task_wait_locked(task, until_not_runnable);
2976 task_unlock(task);
2977
2978 return KERN_SUCCESS;
2979 }
2980
2981 /*
2982 * task_wait_locked:
2983 *
2984 * Wait for all threads in task to stop.
2985 *
2986 * Conditions:
2987 * Called with task locked, active, and held.
2988 */
2989 void
2990 task_wait_locked(
2991 task_t task,
2992 boolean_t until_not_runnable)
2993 {
2994 thread_t thread, self;
2995
2996 assert(task->active);
2997 assert(task->suspend_count > 0);
2998
2999 self = current_thread();
3000
3001 /*
3002 * Iterate through all the threads and wait for them to
3003 * stop. Do not wait for the current thread if it is within
3004 * the task.
3005 */
3006 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3007 if (thread != self) {
3008 thread_wait(thread, until_not_runnable);
3009 }
3010 }
3011 }
3012
3013 boolean_t
3014 task_is_app_suspended(task_t task)
3015 {
3016 return task->pidsuspended;
3017 }
3018
3019 /*
3020 * task_release_locked:
3021 *
3022 * Release a kernel hold on a task.
3023 *
3024 * CONDITIONS: the task is locked and active
3025 */
3026 void
3027 task_release_locked(
3028 task_t task)
3029 {
3030 thread_t thread;
3031
3032 assert(task->active);
3033 assert(task->suspend_count > 0);
3034
3035 if (--task->suspend_count > 0) {
3036 return;
3037 }
3038
3039 if (task->bsd_info) {
3040 workq_proc_resumed(task->bsd_info);
3041 }
3042
3043 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3044 thread_mtx_lock(thread);
3045 thread_release(thread);
3046 thread_mtx_unlock(thread);
3047 }
3048 }
3049
3050 /*
3051 * task_release:
3052 *
3053 * Same as the internal routine above, except that it must lock
3054 * and verify that the task is active.
3055 *
3056 * CONDITIONS: The caller holds a reference to the task
3057 */
3058 kern_return_t
3059 task_release(
3060 task_t task)
3061 {
3062 if (task == TASK_NULL) {
3063 return KERN_INVALID_ARGUMENT;
3064 }
3065
3066 task_lock(task);
3067
3068 if (!task->active) {
3069 task_unlock(task);
3070
3071 return KERN_FAILURE;
3072 }
3073
3074 task_release_locked(task);
3075 task_unlock(task);
3076
3077 return KERN_SUCCESS;
3078 }
3079
3080 kern_return_t
3081 task_threads(
3082 task_t task,
3083 thread_act_array_t *threads_out,
3084 mach_msg_type_number_t *count)
3085 {
3086 mach_msg_type_number_t actual;
3087 thread_t *thread_list;
3088 thread_t thread;
3089 vm_size_t size, size_needed;
3090 void *addr;
3091 unsigned int i, j;
3092
3093 if (task == TASK_NULL) {
3094 return KERN_INVALID_ARGUMENT;
3095 }
3096
3097 size = 0; addr = NULL;
3098
3099 for (;;) {
3100 task_lock(task);
3101 if (!task->active) {
3102 task_unlock(task);
3103
3104 if (size != 0) {
3105 kfree(addr, size);
3106 }
3107
3108 return KERN_FAILURE;
3109 }
3110
3111 actual = task->thread_count;
3112
3113 /* do we have the memory we need? */
3114 size_needed = actual * sizeof(mach_port_t);
3115 if (size_needed <= size) {
3116 break;
3117 }
3118
3119 /* unlock the task and allocate more memory */
3120 task_unlock(task);
3121
3122 if (size != 0) {
3123 kfree(addr, size);
3124 }
3125
3126 assert(size_needed > 0);
3127 size = size_needed;
3128
3129 addr = kalloc(size);
3130 if (addr == 0) {
3131 return KERN_RESOURCE_SHORTAGE;
3132 }
3133 }
3134
3135 /* OK, have memory and the task is locked & active */
3136 thread_list = (thread_t *)addr;
3137
3138 i = j = 0;
3139
3140 for (thread = (thread_t)queue_first(&task->threads); i < actual;
3141 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
3142 thread_reference_internal(thread);
3143 thread_list[j++] = thread;
3144 }
3145
3146 assert(queue_end(&task->threads, (queue_entry_t)thread));
3147
3148 actual = j;
3149 size_needed = actual * sizeof(mach_port_t);
3150
3151 /* can unlock task now that we've got the thread refs */
3152 task_unlock(task);
3153
3154 if (actual == 0) {
3155 /* no threads, so return null pointer and deallocate memory */
3156
3157 *threads_out = NULL;
3158 *count = 0;
3159
3160 if (size != 0) {
3161 kfree(addr, size);
3162 }
3163 } else {
3164 /* if we allocated too much, must copy */
3165
3166 if (size_needed < size) {
3167 void *newaddr;
3168
3169 newaddr = kalloc(size_needed);
3170 if (newaddr == 0) {
3171 for (i = 0; i < actual; ++i) {
3172 thread_deallocate(thread_list[i]);
3173 }
3174 kfree(addr, size);
3175 return KERN_RESOURCE_SHORTAGE;
3176 }
3177
3178 bcopy(addr, newaddr, size_needed);
3179 kfree(addr, size);
3180 thread_list = (thread_t *)newaddr;
3181 }
3182
3183 *threads_out = thread_list;
3184 *count = actual;
3185
3186 /* do the conversion that Mig should handle */
3187
3188 for (i = 0; i < actual; ++i) {
3189 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3190 }
3191 }
3192
3193 return KERN_SUCCESS;
3194 }
3195
3196 #define TASK_HOLD_NORMAL 0
3197 #define TASK_HOLD_PIDSUSPEND 1
3198 #define TASK_HOLD_LEGACY 2
3199 #define TASK_HOLD_LEGACY_ALL 3
3200
3201 static kern_return_t
3202 place_task_hold(
3203 task_t task,
3204 int mode)
3205 {
3206 if (!task->active && !task_is_a_corpse(task)) {
3207 return KERN_FAILURE;
3208 }
3209
3210 /* Return success for corpse task */
3211 if (task_is_a_corpse(task)) {
3212 return KERN_SUCCESS;
3213 }
3214
3215 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3216 MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
3217 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3218 task->user_stop_count, task->user_stop_count + 1, 0);
3219
3220 #if MACH_ASSERT
3221 current_task()->suspends_outstanding++;
3222 #endif
3223
3224 if (mode == TASK_HOLD_LEGACY) {
3225 task->legacy_stop_count++;
3226 }
3227
3228 if (task->user_stop_count++ > 0) {
3229 /*
3230 * If the stop count was positive, the task is
3231 * already stopped and we can exit.
3232 */
3233 return KERN_SUCCESS;
3234 }
3235
3236 /*
3237 * Put a kernel-level hold on the threads in the task (all
3238 * user-level task suspensions added together represent a
3239 * single kernel-level hold). We then wait for the threads
3240 * to stop executing user code.
3241 */
3242 task_hold_locked(task);
3243 task_wait_locked(task, FALSE);
3244
3245 return KERN_SUCCESS;
3246 }
3247
3248 static kern_return_t
3249 release_task_hold(
3250 task_t task,
3251 int mode)
3252 {
3253 boolean_t release = FALSE;
3254
3255 if (!task->active && !task_is_a_corpse(task)) {
3256 return KERN_FAILURE;
3257 }
3258
3259 /* Return success for corpse task */
3260 if (task_is_a_corpse(task)) {
3261 return KERN_SUCCESS;
3262 }
3263
3264 if (mode == TASK_HOLD_PIDSUSPEND) {
3265 if (task->pidsuspended == FALSE) {
3266 return KERN_FAILURE;
3267 }
3268 task->pidsuspended = FALSE;
3269 }
3270
3271 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3272 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3273 MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3274 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3275 task->user_stop_count, mode, task->legacy_stop_count);
3276
3277 #if MACH_ASSERT
3278 /*
3279 * This is obviously not robust; if we suspend one task and then resume a different one,
3280 * we'll fly under the radar. This is only meant to catch the common case of a crashed
3281 * or buggy suspender.
3282 */
3283 current_task()->suspends_outstanding--;
3284 #endif
3285
3286 if (mode == TASK_HOLD_LEGACY_ALL) {
3287 if (task->legacy_stop_count >= task->user_stop_count) {
3288 task->user_stop_count = 0;
3289 release = TRUE;
3290 } else {
3291 task->user_stop_count -= task->legacy_stop_count;
3292 }
3293 task->legacy_stop_count = 0;
3294 } else {
3295 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3296 task->legacy_stop_count--;
3297 }
3298 if (--task->user_stop_count == 0) {
3299 release = TRUE;
3300 }
3301 }
3302 } else {
3303 return KERN_FAILURE;
3304 }
3305
3306 /*
3307 * Release the task if necessary.
3308 */
3309 if (release) {
3310 task_release_locked(task);
3311 }
3312
3313 return KERN_SUCCESS;
3314 }
3315
3316 boolean_t
3317 get_task_suspended(task_t task)
3318 {
3319 return 0 != task->user_stop_count;
3320 }
3321
3322 /*
3323 * task_suspend:
3324 *
3325 * Implement an (old-fashioned) user-level suspension on a task.
3326 *
3327 * Because the user isn't expecting to have to manage a suspension
3328 * token, we'll track it for him in the kernel in the form of a naked
3329 * send right to the task's resume port. All such send rights
3330 * account for a single suspension against the task (unlike task_suspend2()
3331 * where each caller gets a unique suspension count represented by a
3332 * unique send-once right).
3333 *
3334 * Conditions:
3335 * The caller holds a reference to the task
3336 */
3337 kern_return_t
3338 task_suspend(
3339 task_t task)
3340 {
3341 kern_return_t kr;
3342 mach_port_t port;
3343 mach_port_name_t name;
3344
3345 if (task == TASK_NULL || task == kernel_task) {
3346 return KERN_INVALID_ARGUMENT;
3347 }
3348
3349 task_lock(task);
3350
3351 /*
3352 * place a legacy hold on the task.
3353 */
3354 kr = place_task_hold(task, TASK_HOLD_LEGACY);
3355 if (kr != KERN_SUCCESS) {
3356 task_unlock(task);
3357 return kr;
3358 }
3359
3360 /*
3361 * Claim a send right on the task resume port, and request a no-senders
3362 * notification on that port (if none outstanding).
3363 */
3364 (void)ipc_kobject_make_send_lazy_alloc_port(&task->itk_resume,
3365 (ipc_kobject_t)task, IKOT_TASK_RESUME);
3366 port = task->itk_resume;
3367
3368 task_unlock(task);
3369
3370 /*
3371 * Copyout the send right into the calling task's IPC space. It won't know it is there,
3372 * but we'll look it up when calling a traditional resume. Any IPC operations that
3373 * deallocate the send right will auto-release the suspension.
3374 */
3375 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, ip_to_object(port),
3376 MACH_MSG_TYPE_MOVE_SEND, NULL, NULL, &name)) != KERN_SUCCESS) {
3377 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3378 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3379 task_pid(task), kr);
3380 return kr;
3381 }
3382
3383 return kr;
3384 }
3385
3386 /*
3387 * task_resume:
3388 * Release a user hold on a task.
3389 *
3390 * Conditions:
3391 * The caller holds a reference to the task
3392 */
3393 kern_return_t
3394 task_resume(
3395 task_t task)
3396 {
3397 kern_return_t kr;
3398 mach_port_name_t resume_port_name;
3399 ipc_entry_t resume_port_entry;
3400 ipc_space_t space = current_task()->itk_space;
3401
3402 if (task == TASK_NULL || task == kernel_task) {
3403 return KERN_INVALID_ARGUMENT;
3404 }
3405
3406 /* release a legacy task hold */
3407 task_lock(task);
3408 kr = release_task_hold(task, TASK_HOLD_LEGACY);
3409 task_unlock(task);
3410
3411 is_write_lock(space);
3412 if (is_active(space) && IP_VALID(task->itk_resume) &&
3413 ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
3414 /*
3415 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3416 * we are holding one less legacy hold on the task from this caller. If the release failed,
3417 * go ahead and drop all the rights, as someone either already released our holds or the task
3418 * is gone.
3419 */
3420 if (kr == KERN_SUCCESS) {
3421 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3422 } else {
3423 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3424 }
3425 /* space unlocked */
3426 } else {
3427 is_write_unlock(space);
3428 if (kr == KERN_SUCCESS) {
3429 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3430 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3431 task_pid(task));
3432 }
3433 }
3434
3435 return kr;
3436 }
3437
3438 /*
3439 * Suspend the target task.
3440 * Making/holding a token/reference/port is the callers responsibility.
3441 */
3442 kern_return_t
3443 task_suspend_internal(task_t task)
3444 {
3445 kern_return_t kr;
3446
3447 if (task == TASK_NULL || task == kernel_task) {
3448 return KERN_INVALID_ARGUMENT;
3449 }
3450
3451 task_lock(task);
3452 kr = place_task_hold(task, TASK_HOLD_NORMAL);
3453 task_unlock(task);
3454 return kr;
3455 }
3456
3457 /*
3458 * Suspend the target task, and return a suspension token. The token
3459 * represents a reference on the suspended task.
3460 */
3461 kern_return_t
3462 task_suspend2(
3463 task_t task,
3464 task_suspension_token_t *suspend_token)
3465 {
3466 kern_return_t kr;
3467
3468 kr = task_suspend_internal(task);
3469 if (kr != KERN_SUCCESS) {
3470 *suspend_token = TASK_NULL;
3471 return kr;
3472 }
3473
3474 /*
3475 * Take a reference on the target task and return that to the caller
3476 * as a "suspension token," which can be converted into an SO right to
3477 * the now-suspended task's resume port.
3478 */
3479 task_reference_internal(task);
3480 *suspend_token = task;
3481
3482 return KERN_SUCCESS;
3483 }
3484
3485 /*
3486 * Resume the task
3487 * (reference/token/port management is caller's responsibility).
3488 */
3489 kern_return_t
3490 task_resume_internal(
3491 task_suspension_token_t task)
3492 {
3493 kern_return_t kr;
3494
3495 if (task == TASK_NULL || task == kernel_task) {
3496 return KERN_INVALID_ARGUMENT;
3497 }
3498
3499 task_lock(task);
3500 kr = release_task_hold(task, TASK_HOLD_NORMAL);
3501 task_unlock(task);
3502 return kr;
3503 }
3504
3505 /*
3506 * Resume the task using a suspension token. Consumes the token's ref.
3507 */
3508 kern_return_t
3509 task_resume2(
3510 task_suspension_token_t task)
3511 {
3512 kern_return_t kr;
3513
3514 kr = task_resume_internal(task);
3515 task_suspension_token_deallocate(task);
3516
3517 return kr;
3518 }
3519
3520 boolean_t
3521 task_suspension_notify(mach_msg_header_t *request_header)
3522 {
3523 ipc_port_t port = request_header->msgh_remote_port;
3524 task_t task = convert_port_to_task_suspension_token(port);
3525 mach_msg_type_number_t not_count;
3526
3527 if (task == TASK_NULL || task == kernel_task) {
3528 return TRUE; /* nothing to do */
3529 }
3530 switch (request_header->msgh_id) {
3531 case MACH_NOTIFY_SEND_ONCE:
3532 /* release the hold held by this specific send-once right */
3533 task_lock(task);
3534 release_task_hold(task, TASK_HOLD_NORMAL);
3535 task_unlock(task);
3536 break;
3537
3538 case MACH_NOTIFY_NO_SENDERS:
3539 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3540
3541 task_lock(task);
3542 ip_lock(port);
3543 if (port->ip_mscount == not_count) {
3544 /* release all the [remaining] outstanding legacy holds */
3545 assert(port->ip_nsrequest == IP_NULL);
3546 ip_unlock(port);
3547 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3548 task_unlock(task);
3549 } else if (port->ip_nsrequest == IP_NULL) {
3550 ipc_port_t old_notify;
3551
3552 task_unlock(task);
3553 /* new send rights, re-arm notification at current make-send count */
3554 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3555 assert(old_notify == IP_NULL);
3556 /* port unlocked */
3557 } else {
3558 ip_unlock(port);
3559 task_unlock(task);
3560 }
3561 break;
3562
3563 default:
3564 break;
3565 }
3566
3567 task_suspension_token_deallocate(task); /* drop token reference */
3568 return TRUE;
3569 }
3570
3571 static kern_return_t
3572 task_pidsuspend_locked(task_t task)
3573 {
3574 kern_return_t kr;
3575
3576 if (task->pidsuspended) {
3577 kr = KERN_FAILURE;
3578 goto out;
3579 }
3580
3581 task->pidsuspended = TRUE;
3582
3583 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3584 if (kr != KERN_SUCCESS) {
3585 task->pidsuspended = FALSE;
3586 }
3587 out:
3588 return kr;
3589 }
3590
3591
3592 /*
3593 * task_pidsuspend:
3594 *
3595 * Suspends a task by placing a hold on its threads.
3596 *
3597 * Conditions:
3598 * The caller holds a reference to the task
3599 */
3600 kern_return_t
3601 task_pidsuspend(
3602 task_t task)
3603 {
3604 kern_return_t kr;
3605
3606 if (task == TASK_NULL || task == kernel_task) {
3607 return KERN_INVALID_ARGUMENT;
3608 }
3609
3610 task_lock(task);
3611
3612 kr = task_pidsuspend_locked(task);
3613
3614 task_unlock(task);
3615
3616 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3617 iokit_task_app_suspended_changed(task);
3618 }
3619
3620 return kr;
3621 }
3622
3623 /*
3624 * task_pidresume:
3625 * Resumes a previously suspended task.
3626 *
3627 * Conditions:
3628 * The caller holds a reference to the task
3629 */
3630 kern_return_t
3631 task_pidresume(
3632 task_t task)
3633 {
3634 kern_return_t kr;
3635
3636 if (task == TASK_NULL || task == kernel_task) {
3637 return KERN_INVALID_ARGUMENT;
3638 }
3639
3640 task_lock(task);
3641
3642 #if CONFIG_FREEZE
3643
3644 while (task->changing_freeze_state) {
3645 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3646 task_unlock(task);
3647 thread_block(THREAD_CONTINUE_NULL);
3648
3649 task_lock(task);
3650 }
3651 task->changing_freeze_state = TRUE;
3652 #endif
3653
3654 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3655
3656 task_unlock(task);
3657
3658 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3659 iokit_task_app_suspended_changed(task);
3660 }
3661
3662 #if CONFIG_FREEZE
3663
3664 task_lock(task);
3665
3666 if (kr == KERN_SUCCESS) {
3667 task->frozen = FALSE;
3668 }
3669 task->changing_freeze_state = FALSE;
3670 thread_wakeup(&task->changing_freeze_state);
3671
3672 task_unlock(task);
3673 #endif
3674
3675 return kr;
3676 }
3677
3678 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
3679
3680 /*
3681 * task_add_turnstile_watchports:
3682 * Setup watchports to boost the main thread of the task.
3683 *
3684 * Arguments:
3685 * task: task being spawned
3686 * thread: main thread of task
3687 * portwatch_ports: array of watchports
3688 * portwatch_count: number of watchports
3689 *
3690 * Conditions:
3691 * Nothing locked.
3692 */
3693 void
3694 task_add_turnstile_watchports(
3695 task_t task,
3696 thread_t thread,
3697 ipc_port_t *portwatch_ports,
3698 uint32_t portwatch_count)
3699 {
3700 struct task_watchports *watchports = NULL;
3701 struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
3702 os_ref_count_t refs;
3703
3704 /* Check if the task has terminated */
3705 if (!task->active) {
3706 return;
3707 }
3708
3709 assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
3710
3711 watchports = task_watchports_alloc_init(task, thread, portwatch_count);
3712
3713 /* Lock the ipc space */
3714 is_write_lock(task->itk_space);
3715
3716 /* Setup watchports to boost the main thread */
3717 refs = task_add_turnstile_watchports_locked(task,
3718 watchports, previous_elem_array, portwatch_ports,
3719 portwatch_count);
3720
3721 /* Drop the space lock */
3722 is_write_unlock(task->itk_space);
3723
3724 if (refs == 0) {
3725 task_watchports_deallocate(watchports);
3726 }
3727
3728 /* Drop the ref on previous_elem_array */
3729 for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
3730 task_watchport_elem_deallocate(previous_elem_array[i]);
3731 }
3732 }
3733
3734 /*
3735 * task_remove_turnstile_watchports:
3736 * Clear all turnstile boost on the task from watchports.
3737 *
3738 * Arguments:
3739 * task: task being terminated
3740 *
3741 * Conditions:
3742 * Nothing locked.
3743 */
3744 void
3745 task_remove_turnstile_watchports(
3746 task_t task)
3747 {
3748 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3749 struct task_watchports *watchports = NULL;
3750 ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
3751 uint32_t portwatch_count;
3752
3753 /* Lock the ipc space */
3754 is_write_lock(task->itk_space);
3755
3756 /* Check if watchport boost exist */
3757 if (task->watchports == NULL) {
3758 is_write_unlock(task->itk_space);
3759 return;
3760 }
3761 watchports = task->watchports;
3762 portwatch_count = watchports->tw_elem_array_count;
3763
3764 refs = task_remove_turnstile_watchports_locked(task, watchports,
3765 port_freelist);
3766
3767 is_write_unlock(task->itk_space);
3768
3769 /* Drop all the port references */
3770 for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
3771 ip_release(port_freelist[i]);
3772 }
3773
3774 /* Clear the task and thread references for task_watchport */
3775 if (refs == 0) {
3776 task_watchports_deallocate(watchports);
3777 }
3778 }
3779
3780 /*
3781 * task_transfer_turnstile_watchports:
3782 * Transfer all watchport turnstile boost from old task to new task.
3783 *
3784 * Arguments:
3785 * old_task: task calling exec
3786 * new_task: new exec'ed task
3787 * thread: main thread of new task
3788 *
3789 * Conditions:
3790 * Nothing locked.
3791 */
3792 void
3793 task_transfer_turnstile_watchports(
3794 task_t old_task,
3795 task_t new_task,
3796 thread_t new_thread)
3797 {
3798 struct task_watchports *old_watchports = NULL;
3799 struct task_watchports *new_watchports = NULL;
3800 os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
3801 os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
3802 uint32_t portwatch_count;
3803
3804 if (old_task->watchports == NULL || !new_task->active) {
3805 return;
3806 }
3807
3808 /* Get the watch port count from the old task */
3809 is_write_lock(old_task->itk_space);
3810 if (old_task->watchports == NULL) {
3811 is_write_unlock(old_task->itk_space);
3812 return;
3813 }
3814
3815 portwatch_count = old_task->watchports->tw_elem_array_count;
3816 is_write_unlock(old_task->itk_space);
3817
3818 new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
3819
3820 /* Lock the ipc space for old task */
3821 is_write_lock(old_task->itk_space);
3822
3823 /* Lock the ipc space for new task */
3824 is_write_lock(new_task->itk_space);
3825
3826 /* Check if watchport boost exist */
3827 if (old_task->watchports == NULL || !new_task->active) {
3828 is_write_unlock(new_task->itk_space);
3829 is_write_unlock(old_task->itk_space);
3830 (void)task_watchports_release(new_watchports);
3831 task_watchports_deallocate(new_watchports);
3832 return;
3833 }
3834
3835 old_watchports = old_task->watchports;
3836 assert(portwatch_count == old_task->watchports->tw_elem_array_count);
3837
3838 /* Setup new task watchports */
3839 new_task->watchports = new_watchports;
3840
3841 for (uint32_t i = 0; i < portwatch_count; i++) {
3842 ipc_port_t port = old_watchports->tw_elem[i].twe_port;
3843
3844 if (port == NULL) {
3845 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
3846 continue;
3847 }
3848
3849 /* Lock the port and check if it has the entry */
3850 ip_lock(port);
3851 imq_lock(&port->ip_messages);
3852
3853 task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
3854
3855 if (ipc_port_replace_watchport_elem_conditional_locked(port,
3856 &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
3857 task_watchport_elem_clear(&old_watchports->tw_elem[i]);
3858
3859 task_watchports_retain(new_watchports);
3860 old_refs = task_watchports_release(old_watchports);
3861
3862 /* Check if all ports are cleaned */
3863 if (old_refs == 0) {
3864 old_task->watchports = NULL;
3865 }
3866 } else {
3867 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
3868 }
3869 /* mqueue and port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
3870 }
3871
3872 /* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
3873 new_refs = task_watchports_release(new_watchports);
3874 if (new_refs == 0) {
3875 new_task->watchports = NULL;
3876 }
3877
3878 is_write_unlock(new_task->itk_space);
3879 is_write_unlock(old_task->itk_space);
3880
3881 /* Clear the task and thread references for old_watchport */
3882 if (old_refs == 0) {
3883 task_watchports_deallocate(old_watchports);
3884 }
3885
3886 /* Clear the task and thread references for new_watchport */
3887 if (new_refs == 0) {
3888 task_watchports_deallocate(new_watchports);
3889 }
3890 }
3891
3892 /*
3893 * task_add_turnstile_watchports_locked:
3894 * Setup watchports to boost the main thread of the task.
3895 *
3896 * Arguments:
3897 * task: task to boost
3898 * watchports: watchport structure to be attached to the task
3899 * previous_elem_array: an array of old watchport_elem to be returned to caller
3900 * portwatch_ports: array of watchports
3901 * portwatch_count: number of watchports
3902 *
3903 * Conditions:
3904 * ipc space of the task locked.
3905 * returns array of old watchport_elem in previous_elem_array
3906 */
3907 static os_ref_count_t
3908 task_add_turnstile_watchports_locked(
3909 task_t task,
3910 struct task_watchports *watchports,
3911 struct task_watchport_elem **previous_elem_array,
3912 ipc_port_t *portwatch_ports,
3913 uint32_t portwatch_count)
3914 {
3915 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3916
3917 /* Check if the task is still active */
3918 if (!task->active) {
3919 refs = task_watchports_release(watchports);
3920 return refs;
3921 }
3922
3923 assert(task->watchports == NULL);
3924 task->watchports = watchports;
3925
3926 for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
3927 ipc_port_t port = portwatch_ports[i];
3928
3929 task_watchport_elem_init(&watchports->tw_elem[i], task, port);
3930 if (port == NULL) {
3931 task_watchport_elem_clear(&watchports->tw_elem[i]);
3932 continue;
3933 }
3934
3935 ip_lock(port);
3936 imq_lock(&port->ip_messages);
3937
3938 /* Check if port is in valid state to be setup as watchport */
3939 if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
3940 &previous_elem_array[j]) != KERN_SUCCESS) {
3941 task_watchport_elem_clear(&watchports->tw_elem[i]);
3942 continue;
3943 }
3944 /* port and mqueue unlocked on return */
3945
3946 ip_reference(port);
3947 task_watchports_retain(watchports);
3948 if (previous_elem_array[j] != NULL) {
3949 j++;
3950 }
3951 }
3952
3953 /* Drop the reference on task_watchport struct returned by os_ref_init */
3954 refs = task_watchports_release(watchports);
3955 if (refs == 0) {
3956 task->watchports = NULL;
3957 }
3958
3959 return refs;
3960 }
3961
3962 /*
3963 * task_remove_turnstile_watchports_locked:
3964 * Clear all turnstile boost on the task from watchports.
3965 *
3966 * Arguments:
3967 * task: task to remove watchports from
3968 * watchports: watchports structure for the task
3969 * port_freelist: array of ports returned with ref to caller
3970 *
3971 *
3972 * Conditions:
3973 * ipc space of the task locked.
3974 * array of ports with refs are returned in port_freelist
3975 */
3976 static os_ref_count_t
3977 task_remove_turnstile_watchports_locked(
3978 task_t task,
3979 struct task_watchports *watchports,
3980 ipc_port_t *port_freelist)
3981 {
3982 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3983
3984 for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
3985 ipc_port_t port = watchports->tw_elem[i].twe_port;
3986 if (port == NULL) {
3987 continue;
3988 }
3989
3990 /* Lock the port and check if it has the entry */
3991 ip_lock(port);
3992 imq_lock(&port->ip_messages);
3993 if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
3994 &watchports->tw_elem[i]) == KERN_SUCCESS) {
3995 task_watchport_elem_clear(&watchports->tw_elem[i]);
3996 port_freelist[j++] = port;
3997 refs = task_watchports_release(watchports);
3998
3999 /* Check if all ports are cleaned */
4000 if (refs == 0) {
4001 task->watchports = NULL;
4002 break;
4003 }
4004 }
4005 /* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4006 }
4007 return refs;
4008 }
4009
4010 /*
4011 * task_watchports_alloc_init:
4012 * Allocate and initialize task watchport struct.
4013 *
4014 * Conditions:
4015 * Nothing locked.
4016 */
4017 static struct task_watchports *
4018 task_watchports_alloc_init(
4019 task_t task,
4020 thread_t thread,
4021 uint32_t count)
4022 {
4023 struct task_watchports *watchports = kalloc(sizeof(struct task_watchports) +
4024 count * sizeof(struct task_watchport_elem));
4025
4026 task_reference(task);
4027 thread_reference(thread);
4028 watchports->tw_task = task;
4029 watchports->tw_thread = thread;
4030 watchports->tw_elem_array_count = count;
4031 os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4032
4033 return watchports;
4034 }
4035
4036 /*
4037 * task_watchports_deallocate:
4038 * Deallocate task watchport struct.
4039 *
4040 * Conditions:
4041 * Nothing locked.
4042 */
4043 static void
4044 task_watchports_deallocate(
4045 struct task_watchports *watchports)
4046 {
4047 uint32_t portwatch_count = watchports->tw_elem_array_count;
4048
4049 task_deallocate(watchports->tw_task);
4050 thread_deallocate(watchports->tw_thread);
4051 kfree(watchports, sizeof(struct task_watchports) + portwatch_count * sizeof(struct task_watchport_elem));
4052 }
4053
4054 /*
4055 * task_watchport_elem_deallocate:
4056 * Deallocate task watchport element and release its ref on task_watchport.
4057 *
4058 * Conditions:
4059 * Nothing locked.
4060 */
4061 void
4062 task_watchport_elem_deallocate(
4063 struct task_watchport_elem *watchport_elem)
4064 {
4065 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4066 task_t task = watchport_elem->twe_task;
4067 struct task_watchports *watchports = NULL;
4068 ipc_port_t port = NULL;
4069
4070 assert(task != NULL);
4071
4072 /* Take the space lock to modify the elememt */
4073 is_write_lock(task->itk_space);
4074
4075 watchports = task->watchports;
4076 assert(watchports != NULL);
4077
4078 port = watchport_elem->twe_port;
4079 assert(port != NULL);
4080
4081 task_watchport_elem_clear(watchport_elem);
4082 refs = task_watchports_release(watchports);
4083
4084 if (refs == 0) {
4085 task->watchports = NULL;
4086 }
4087
4088 is_write_unlock(task->itk_space);
4089
4090 ip_release(port);
4091 if (refs == 0) {
4092 task_watchports_deallocate(watchports);
4093 }
4094 }
4095
4096 /*
4097 * task_has_watchports:
4098 * Return TRUE if task has watchport boosts.
4099 *
4100 * Conditions:
4101 * Nothing locked.
4102 */
4103 boolean_t
4104 task_has_watchports(task_t task)
4105 {
4106 return task->watchports != NULL;
4107 }
4108
4109 #if DEVELOPMENT || DEBUG
4110
4111 extern void IOSleep(int);
4112
4113 kern_return_t
4114 task_disconnect_page_mappings(task_t task)
4115 {
4116 int n;
4117
4118 if (task == TASK_NULL || task == kernel_task) {
4119 return KERN_INVALID_ARGUMENT;
4120 }
4121
4122 /*
4123 * this function is used to strip all of the mappings from
4124 * the pmap for the specified task to force the task to
4125 * re-fault all of the pages it is actively using... this
4126 * allows us to approximate the true working set of the
4127 * specified task. We only engage if at least 1 of the
4128 * threads in the task is runnable, but we want to continuously
4129 * sweep (at least for a while - I've arbitrarily set the limit at
4130 * 100 sweeps to be re-looked at as we gain experience) to get a better
4131 * view into what areas within a page are being visited (as opposed to only
4132 * seeing the first fault of a page after the task becomes
4133 * runnable)... in the future I may
4134 * try to block until awakened by a thread in this task
4135 * being made runnable, but for now we'll periodically poll from the
4136 * user level debug tool driving the sysctl
4137 */
4138 for (n = 0; n < 100; n++) {
4139 thread_t thread;
4140 boolean_t runnable;
4141 boolean_t do_unnest;
4142 int page_count;
4143
4144 runnable = FALSE;
4145 do_unnest = FALSE;
4146
4147 task_lock(task);
4148
4149 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4150 if (thread->state & TH_RUN) {
4151 runnable = TRUE;
4152 break;
4153 }
4154 }
4155 if (n == 0) {
4156 task->task_disconnected_count++;
4157 }
4158
4159 if (task->task_unnested == FALSE) {
4160 if (runnable == TRUE) {
4161 task->task_unnested = TRUE;
4162 do_unnest = TRUE;
4163 }
4164 }
4165 task_unlock(task);
4166
4167 if (runnable == FALSE) {
4168 break;
4169 }
4170
4171 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4172 task, do_unnest, task->task_disconnected_count, 0, 0);
4173
4174 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4175
4176 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4177 task, page_count, 0, 0, 0);
4178
4179 if ((n % 5) == 4) {
4180 IOSleep(1);
4181 }
4182 }
4183 return KERN_SUCCESS;
4184 }
4185
4186 #endif
4187
4188
4189 #if CONFIG_FREEZE
4190
4191 /*
4192 * task_freeze:
4193 *
4194 * Freeze a task.
4195 *
4196 * Conditions:
4197 * The caller holds a reference to the task
4198 */
4199 extern void vm_wake_compactor_swapper(void);
4200 extern queue_head_t c_swapout_list_head;
4201
4202 kern_return_t
4203 task_freeze(
4204 task_t task,
4205 uint32_t *purgeable_count,
4206 uint32_t *wired_count,
4207 uint32_t *clean_count,
4208 uint32_t *dirty_count,
4209 uint32_t dirty_budget,
4210 uint32_t *shared_count,
4211 int *freezer_error_code,
4212 boolean_t eval_only)
4213 {
4214 kern_return_t kr = KERN_SUCCESS;
4215
4216 if (task == TASK_NULL || task == kernel_task) {
4217 return KERN_INVALID_ARGUMENT;
4218 }
4219
4220 task_lock(task);
4221
4222 while (task->changing_freeze_state) {
4223 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4224 task_unlock(task);
4225 thread_block(THREAD_CONTINUE_NULL);
4226
4227 task_lock(task);
4228 }
4229 if (task->frozen) {
4230 task_unlock(task);
4231 return KERN_FAILURE;
4232 }
4233 task->changing_freeze_state = TRUE;
4234
4235 task_unlock(task);
4236
4237 kr = vm_map_freeze(task,
4238 purgeable_count,
4239 wired_count,
4240 clean_count,
4241 dirty_count,
4242 dirty_budget,
4243 shared_count,
4244 freezer_error_code,
4245 eval_only);
4246
4247 task_lock(task);
4248
4249 if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
4250 task->frozen = TRUE;
4251 }
4252
4253 task->changing_freeze_state = FALSE;
4254 thread_wakeup(&task->changing_freeze_state);
4255
4256 task_unlock(task);
4257
4258 if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
4259 (kr == KERN_SUCCESS) &&
4260 (eval_only == FALSE)) {
4261 vm_wake_compactor_swapper();
4262 /*
4263 * We do an explicit wakeup of the swapout thread here
4264 * because the compact_and_swap routines don't have
4265 * knowledge about these kind of "per-task packed c_segs"
4266 * and so will not be evaluating whether we need to do
4267 * a wakeup there.
4268 */
4269 thread_wakeup((event_t)&c_swapout_list_head);
4270 }
4271
4272 return kr;
4273 }
4274
4275 /*
4276 * task_thaw:
4277 *
4278 * Thaw a currently frozen task.
4279 *
4280 * Conditions:
4281 * The caller holds a reference to the task
4282 */
4283 kern_return_t
4284 task_thaw(
4285 task_t task)
4286 {
4287 if (task == TASK_NULL || task == kernel_task) {
4288 return KERN_INVALID_ARGUMENT;
4289 }
4290
4291 task_lock(task);
4292
4293 while (task->changing_freeze_state) {
4294 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4295 task_unlock(task);
4296 thread_block(THREAD_CONTINUE_NULL);
4297
4298 task_lock(task);
4299 }
4300 if (!task->frozen) {
4301 task_unlock(task);
4302 return KERN_FAILURE;
4303 }
4304 task->frozen = FALSE;
4305
4306 task_unlock(task);
4307
4308 return KERN_SUCCESS;
4309 }
4310
4311 #endif /* CONFIG_FREEZE */
4312
4313 kern_return_t
4314 host_security_set_task_token(
4315 host_security_t host_security,
4316 task_t task,
4317 security_token_t sec_token,
4318 audit_token_t audit_token,
4319 host_priv_t host_priv)
4320 {
4321 ipc_port_t host_port;
4322 kern_return_t kr;
4323
4324 if (task == TASK_NULL) {
4325 return KERN_INVALID_ARGUMENT;
4326 }
4327
4328 if (host_security == HOST_NULL) {
4329 return KERN_INVALID_SECURITY;
4330 }
4331
4332 task_lock(task);
4333 task->sec_token = sec_token;
4334 task->audit_token = audit_token;
4335 task_unlock(task);
4336
4337 if (host_priv != HOST_PRIV_NULL) {
4338 kr = host_get_host_priv_port(host_priv, &host_port);
4339 } else {
4340 kr = host_get_host_port(host_priv_self(), &host_port);
4341 }
4342 assert(kr == KERN_SUCCESS);
4343
4344 kr = task_set_special_port_internal(task, TASK_HOST_PORT, host_port);
4345 return kr;
4346 }
4347
4348 kern_return_t
4349 task_send_trace_memory(
4350 __unused task_t target_task,
4351 __unused uint32_t pid,
4352 __unused uint64_t uniqueid)
4353 {
4354 return KERN_INVALID_ARGUMENT;
4355 }
4356
4357 /*
4358 * This routine was added, pretty much exclusively, for registering the
4359 * RPC glue vector for in-kernel short circuited tasks. Rather than
4360 * removing it completely, I have only disabled that feature (which was
4361 * the only feature at the time). It just appears that we are going to
4362 * want to add some user data to tasks in the future (i.e. bsd info,
4363 * task names, etc...), so I left it in the formal task interface.
4364 */
4365 kern_return_t
4366 task_set_info(
4367 task_t task,
4368 task_flavor_t flavor,
4369 __unused task_info_t task_info_in, /* pointer to IN array */
4370 __unused mach_msg_type_number_t task_info_count)
4371 {
4372 if (task == TASK_NULL) {
4373 return KERN_INVALID_ARGUMENT;
4374 }
4375
4376 switch (flavor) {
4377 #if CONFIG_ATM
4378 case TASK_TRACE_MEMORY_INFO:
4379 {
4380 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT) {
4381 return KERN_INVALID_ARGUMENT;
4382 }
4383
4384 assert(task_info_in != NULL);
4385 task_trace_memory_info_t mem_info;
4386 mem_info = (task_trace_memory_info_t) task_info_in;
4387 kern_return_t kr = atm_register_trace_memory(task,
4388 mem_info->user_memory_address,
4389 mem_info->buffer_size);
4390 return kr;
4391 }
4392
4393 #endif
4394 default:
4395 return KERN_INVALID_ARGUMENT;
4396 }
4397 return KERN_SUCCESS;
4398 }
4399
4400 int radar_20146450 = 1;
4401 kern_return_t
4402 task_info(
4403 task_t task,
4404 task_flavor_t flavor,
4405 task_info_t task_info_out,
4406 mach_msg_type_number_t *task_info_count)
4407 {
4408 kern_return_t error = KERN_SUCCESS;
4409 mach_msg_type_number_t original_task_info_count;
4410
4411 if (task == TASK_NULL) {
4412 return KERN_INVALID_ARGUMENT;
4413 }
4414
4415 original_task_info_count = *task_info_count;
4416 task_lock(task);
4417
4418 if ((task != current_task()) && (!task->active)) {
4419 task_unlock(task);
4420 return KERN_INVALID_ARGUMENT;
4421 }
4422
4423 switch (flavor) {
4424 case TASK_BASIC_INFO_32:
4425 case TASK_BASIC2_INFO_32:
4426 #if defined(__arm__) || defined(__arm64__)
4427 case TASK_BASIC_INFO_64:
4428 #endif
4429 {
4430 task_basic_info_32_t basic_info;
4431 vm_map_t map;
4432 clock_sec_t secs;
4433 clock_usec_t usecs;
4434
4435 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
4436 error = KERN_INVALID_ARGUMENT;
4437 break;
4438 }
4439
4440 basic_info = (task_basic_info_32_t)task_info_out;
4441
4442 map = (task == kernel_task)? kernel_map: task->map;
4443 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
4444 if (flavor == TASK_BASIC2_INFO_32) {
4445 /*
4446 * The "BASIC2" flavor gets the maximum resident
4447 * size instead of the current resident size...
4448 */
4449 basic_info->resident_size = pmap_resident_max(map->pmap);
4450 } else {
4451 basic_info->resident_size = pmap_resident_count(map->pmap);
4452 }
4453 basic_info->resident_size *= PAGE_SIZE;
4454
4455 basic_info->policy = ((task != kernel_task)?
4456 POLICY_TIMESHARE: POLICY_RR);
4457 basic_info->suspend_count = task->user_stop_count;
4458
4459 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4460 basic_info->user_time.seconds =
4461 (typeof(basic_info->user_time.seconds))secs;
4462 basic_info->user_time.microseconds = usecs;
4463
4464 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4465 basic_info->system_time.seconds =
4466 (typeof(basic_info->system_time.seconds))secs;
4467 basic_info->system_time.microseconds = usecs;
4468
4469 *task_info_count = TASK_BASIC_INFO_32_COUNT;
4470 break;
4471 }
4472
4473 #if defined(__arm__) || defined(__arm64__)
4474 case TASK_BASIC_INFO_64_2:
4475 {
4476 task_basic_info_64_2_t basic_info;
4477 vm_map_t map;
4478 clock_sec_t secs;
4479 clock_usec_t usecs;
4480
4481 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
4482 error = KERN_INVALID_ARGUMENT;
4483 break;
4484 }
4485
4486 basic_info = (task_basic_info_64_2_t)task_info_out;
4487
4488 map = (task == kernel_task)? kernel_map: task->map;
4489 basic_info->virtual_size = map->size;
4490 basic_info->resident_size =
4491 (mach_vm_size_t)(pmap_resident_count(map->pmap))
4492 * PAGE_SIZE_64;
4493
4494 basic_info->policy = ((task != kernel_task)?
4495 POLICY_TIMESHARE: POLICY_RR);
4496 basic_info->suspend_count = task->user_stop_count;
4497
4498 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4499 basic_info->user_time.seconds =
4500 (typeof(basic_info->user_time.seconds))secs;
4501 basic_info->user_time.microseconds = usecs;
4502
4503 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4504 basic_info->system_time.seconds =
4505 (typeof(basic_info->system_time.seconds))secs;
4506 basic_info->system_time.microseconds = usecs;
4507
4508 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
4509 break;
4510 }
4511
4512 #else /* defined(__arm__) || defined(__arm64__) */
4513 case TASK_BASIC_INFO_64:
4514 {
4515 task_basic_info_64_t basic_info;
4516 vm_map_t map;
4517 clock_sec_t secs;
4518 clock_usec_t usecs;
4519
4520 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
4521 error = KERN_INVALID_ARGUMENT;
4522 break;
4523 }
4524
4525 basic_info = (task_basic_info_64_t)task_info_out;
4526
4527 map = (task == kernel_task)? kernel_map: task->map;
4528 basic_info->virtual_size = map->size;
4529 basic_info->resident_size =
4530 (mach_vm_size_t)(pmap_resident_count(map->pmap))
4531 * PAGE_SIZE_64;
4532
4533 basic_info->policy = ((task != kernel_task)?
4534 POLICY_TIMESHARE: POLICY_RR);
4535 basic_info->suspend_count = task->user_stop_count;
4536
4537 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4538 basic_info->user_time.seconds =
4539 (typeof(basic_info->user_time.seconds))secs;
4540 basic_info->user_time.microseconds = usecs;
4541
4542 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4543 basic_info->system_time.seconds =
4544 (typeof(basic_info->system_time.seconds))secs;
4545 basic_info->system_time.microseconds = usecs;
4546
4547 *task_info_count = TASK_BASIC_INFO_64_COUNT;
4548 break;
4549 }
4550 #endif /* defined(__arm__) || defined(__arm64__) */
4551
4552 case MACH_TASK_BASIC_INFO:
4553 {
4554 mach_task_basic_info_t basic_info;
4555 vm_map_t map;
4556 clock_sec_t secs;
4557 clock_usec_t usecs;
4558
4559 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
4560 error = KERN_INVALID_ARGUMENT;
4561 break;
4562 }
4563
4564 basic_info = (mach_task_basic_info_t)task_info_out;
4565
4566 map = (task == kernel_task) ? kernel_map : task->map;
4567
4568 basic_info->virtual_size = map->size;
4569
4570 basic_info->resident_size =
4571 (mach_vm_size_t)(pmap_resident_count(map->pmap));
4572 basic_info->resident_size *= PAGE_SIZE_64;
4573
4574 basic_info->resident_size_max =
4575 (mach_vm_size_t)(pmap_resident_max(map->pmap));
4576 basic_info->resident_size_max *= PAGE_SIZE_64;
4577
4578 basic_info->policy = ((task != kernel_task) ?
4579 POLICY_TIMESHARE : POLICY_RR);
4580
4581 basic_info->suspend_count = task->user_stop_count;
4582
4583 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4584 basic_info->user_time.seconds =
4585 (typeof(basic_info->user_time.seconds))secs;
4586 basic_info->user_time.microseconds = usecs;
4587
4588 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4589 basic_info->system_time.seconds =
4590 (typeof(basic_info->system_time.seconds))secs;
4591 basic_info->system_time.microseconds = usecs;
4592
4593 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
4594 break;
4595 }
4596
4597 case TASK_THREAD_TIMES_INFO:
4598 {
4599 task_thread_times_info_t times_info;
4600 thread_t thread;
4601
4602 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
4603 error = KERN_INVALID_ARGUMENT;
4604 break;
4605 }
4606
4607 times_info = (task_thread_times_info_t) task_info_out;
4608 times_info->user_time.seconds = 0;
4609 times_info->user_time.microseconds = 0;
4610 times_info->system_time.seconds = 0;
4611 times_info->system_time.microseconds = 0;
4612
4613
4614 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4615 time_value_t user_time, system_time;
4616
4617 if (thread->options & TH_OPT_IDLE_THREAD) {
4618 continue;
4619 }
4620
4621 thread_read_times(thread, &user_time, &system_time, NULL);
4622
4623 time_value_add(&times_info->user_time, &user_time);
4624 time_value_add(&times_info->system_time, &system_time);
4625 }
4626
4627 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
4628 break;
4629 }
4630
4631 case TASK_ABSOLUTETIME_INFO:
4632 {
4633 task_absolutetime_info_t info;
4634 thread_t thread;
4635
4636 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
4637 error = KERN_INVALID_ARGUMENT;
4638 break;
4639 }
4640
4641 info = (task_absolutetime_info_t)task_info_out;
4642 info->threads_user = info->threads_system = 0;
4643
4644
4645 info->total_user = task->total_user_time;
4646 info->total_system = task->total_system_time;
4647
4648 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4649 uint64_t tval;
4650 spl_t x;
4651
4652 if (thread->options & TH_OPT_IDLE_THREAD) {
4653 continue;
4654 }
4655
4656 x = splsched();
4657 thread_lock(thread);
4658
4659 tval = timer_grab(&thread->user_timer);
4660 info->threads_user += tval;
4661 info->total_user += tval;
4662
4663 tval = timer_grab(&thread->system_timer);
4664 if (thread->precise_user_kernel_time) {
4665 info->threads_system += tval;
4666 info->total_system += tval;
4667 } else {
4668 /* system_timer may represent either sys or user */
4669 info->threads_user += tval;
4670 info->total_user += tval;
4671 }
4672
4673 thread_unlock(thread);
4674 splx(x);
4675 }
4676
4677
4678 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
4679 break;
4680 }
4681
4682 case TASK_DYLD_INFO:
4683 {
4684 task_dyld_info_t info;
4685
4686 /*
4687 * We added the format field to TASK_DYLD_INFO output. For
4688 * temporary backward compatibility, accept the fact that
4689 * clients may ask for the old version - distinquished by the
4690 * size of the expected result structure.
4691 */
4692 #define TASK_LEGACY_DYLD_INFO_COUNT \
4693 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
4694
4695 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
4696 error = KERN_INVALID_ARGUMENT;
4697 break;
4698 }
4699
4700 info = (task_dyld_info_t)task_info_out;
4701 info->all_image_info_addr = task->all_image_info_addr;
4702 info->all_image_info_size = task->all_image_info_size;
4703
4704 /* only set format on output for those expecting it */
4705 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
4706 info->all_image_info_format = task_has_64Bit_addr(task) ?
4707 TASK_DYLD_ALL_IMAGE_INFO_64 :
4708 TASK_DYLD_ALL_IMAGE_INFO_32;
4709 *task_info_count = TASK_DYLD_INFO_COUNT;
4710 } else {
4711 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
4712 }
4713 break;
4714 }
4715
4716 case TASK_EXTMOD_INFO:
4717 {
4718 task_extmod_info_t info;
4719 void *p;
4720
4721 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
4722 error = KERN_INVALID_ARGUMENT;
4723 break;
4724 }
4725
4726 info = (task_extmod_info_t)task_info_out;
4727
4728 p = get_bsdtask_info(task);
4729 if (p) {
4730 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
4731 } else {
4732 bzero(info->task_uuid, sizeof(info->task_uuid));
4733 }
4734 info->extmod_statistics = task->extmod_statistics;
4735 *task_info_count = TASK_EXTMOD_INFO_COUNT;
4736
4737 break;
4738 }
4739
4740 case TASK_KERNELMEMORY_INFO:
4741 {
4742 task_kernelmemory_info_t tkm_info;
4743 ledger_amount_t credit, debit;
4744
4745 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
4746 error = KERN_INVALID_ARGUMENT;
4747 break;
4748 }
4749
4750 tkm_info = (task_kernelmemory_info_t) task_info_out;
4751 tkm_info->total_palloc = 0;
4752 tkm_info->total_pfree = 0;
4753 tkm_info->total_salloc = 0;
4754 tkm_info->total_sfree = 0;
4755
4756 if (task == kernel_task) {
4757 /*
4758 * All shared allocs/frees from other tasks count against
4759 * the kernel private memory usage. If we are looking up
4760 * info for the kernel task, gather from everywhere.
4761 */
4762 task_unlock(task);
4763
4764 /* start by accounting for all the terminated tasks against the kernel */
4765 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
4766 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
4767
4768 /* count all other task/thread shared alloc/free against the kernel */
4769 lck_mtx_lock(&tasks_threads_lock);
4770
4771 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
4772 queue_iterate(&tasks, task, task_t, tasks) {
4773 if (task == kernel_task) {
4774 if (ledger_get_entries(task->ledger,
4775 task_ledgers.tkm_private, &credit,
4776 &debit) == KERN_SUCCESS) {
4777 tkm_info->total_palloc += credit;
4778 tkm_info->total_pfree += debit;
4779 }
4780 }
4781 if (!ledger_get_entries(task->ledger,
4782 task_ledgers.tkm_shared, &credit, &debit)) {
4783 tkm_info->total_palloc += credit;
4784 tkm_info->total_pfree += debit;
4785 }
4786 }
4787 lck_mtx_unlock(&tasks_threads_lock);
4788 } else {
4789 if (!ledger_get_entries(task->ledger,
4790 task_ledgers.tkm_private, &credit, &debit)) {
4791 tkm_info->total_palloc = credit;
4792 tkm_info->total_pfree = debit;
4793 }
4794 if (!ledger_get_entries(task->ledger,
4795 task_ledgers.tkm_shared, &credit, &debit)) {
4796 tkm_info->total_salloc = credit;
4797 tkm_info->total_sfree = debit;
4798 }
4799 task_unlock(task);
4800 }
4801
4802 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
4803 return KERN_SUCCESS;
4804 }
4805
4806 /* OBSOLETE */
4807 case TASK_SCHED_FIFO_INFO:
4808 {
4809 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
4810 error = KERN_INVALID_ARGUMENT;
4811 break;
4812 }
4813
4814 error = KERN_INVALID_POLICY;
4815 break;
4816 }
4817
4818 /* OBSOLETE */
4819 case TASK_SCHED_RR_INFO:
4820 {
4821 policy_rr_base_t rr_base;
4822 uint32_t quantum_time;
4823 uint64_t quantum_ns;
4824
4825 if (*task_info_count < POLICY_RR_BASE_COUNT) {
4826 error = KERN_INVALID_ARGUMENT;
4827 break;
4828 }
4829
4830 rr_base = (policy_rr_base_t) task_info_out;
4831
4832 if (task != kernel_task) {
4833 error = KERN_INVALID_POLICY;
4834 break;
4835 }
4836
4837 rr_base->base_priority = task->priority;
4838
4839 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
4840 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
4841
4842 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
4843
4844 *task_info_count = POLICY_RR_BASE_COUNT;
4845 break;
4846 }
4847
4848 /* OBSOLETE */
4849 case TASK_SCHED_TIMESHARE_INFO:
4850 {
4851 policy_timeshare_base_t ts_base;
4852
4853 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
4854 error = KERN_INVALID_ARGUMENT;
4855 break;
4856 }
4857
4858 ts_base = (policy_timeshare_base_t) task_info_out;
4859
4860 if (task == kernel_task) {
4861 error = KERN_INVALID_POLICY;
4862 break;
4863 }
4864
4865 ts_base->base_priority = task->priority;
4866
4867 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4868 break;
4869 }
4870
4871 case TASK_SECURITY_TOKEN:
4872 {
4873 security_token_t *sec_token_p;
4874
4875 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4876 error = KERN_INVALID_ARGUMENT;
4877 break;
4878 }
4879
4880 sec_token_p = (security_token_t *) task_info_out;
4881
4882 *sec_token_p = task->sec_token;
4883
4884 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
4885 break;
4886 }
4887
4888 case TASK_AUDIT_TOKEN:
4889 {
4890 audit_token_t *audit_token_p;
4891
4892 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4893 error = KERN_INVALID_ARGUMENT;
4894 break;
4895 }
4896
4897 audit_token_p = (audit_token_t *) task_info_out;
4898
4899 *audit_token_p = task->audit_token;
4900
4901 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
4902 break;
4903 }
4904
4905 case TASK_SCHED_INFO:
4906 error = KERN_INVALID_ARGUMENT;
4907 break;
4908
4909 case TASK_EVENTS_INFO:
4910 {
4911 task_events_info_t events_info;
4912 thread_t thread;
4913
4914 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4915 error = KERN_INVALID_ARGUMENT;
4916 break;
4917 }
4918
4919 events_info = (task_events_info_t) task_info_out;
4920
4921
4922 events_info->faults = task->faults;
4923 events_info->pageins = task->pageins;
4924 events_info->cow_faults = task->cow_faults;
4925 events_info->messages_sent = task->messages_sent;
4926 events_info->messages_received = task->messages_received;
4927 events_info->syscalls_mach = task->syscalls_mach;
4928 events_info->syscalls_unix = task->syscalls_unix;
4929
4930 events_info->csw = task->c_switch;
4931
4932 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4933 events_info->csw += thread->c_switch;
4934 events_info->syscalls_mach += thread->syscalls_mach;
4935 events_info->syscalls_unix += thread->syscalls_unix;
4936 }
4937
4938
4939 *task_info_count = TASK_EVENTS_INFO_COUNT;
4940 break;
4941 }
4942 case TASK_AFFINITY_TAG_INFO:
4943 {
4944 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4945 error = KERN_INVALID_ARGUMENT;
4946 break;
4947 }
4948
4949 error = task_affinity_info(task, task_info_out, task_info_count);
4950 break;
4951 }
4952 case TASK_POWER_INFO:
4953 {
4954 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4955 error = KERN_INVALID_ARGUMENT;
4956 break;
4957 }
4958
4959 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
4960 break;
4961 }
4962
4963 case TASK_POWER_INFO_V2:
4964 {
4965 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4966 error = KERN_INVALID_ARGUMENT;
4967 break;
4968 }
4969 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4970 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
4971 break;
4972 }
4973
4974 case TASK_VM_INFO:
4975 case TASK_VM_INFO_PURGEABLE:
4976 {
4977 task_vm_info_t vm_info;
4978 vm_map_t map;
4979
4980 #if __arm64__
4981 struct proc *p;
4982 uint32_t platform, sdk;
4983 p = current_proc();
4984 platform = proc_platform(p);
4985 sdk = proc_sdk(p);
4986 if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
4987 platform == PLATFORM_IOS &&
4988 sdk != 0 &&
4989 (sdk >> 16) <= 12) {
4990 /*
4991 * Some iOS apps pass an incorrect value for
4992 * task_info_count, expressed in number of bytes
4993 * instead of number of "natural_t" elements.
4994 * For the sake of backwards binary compatibility
4995 * for apps built with an iOS12 or older SDK and using
4996 * the "rev2" data structure, let's fix task_info_count
4997 * for them, to avoid stomping past the actual end
4998 * of their buffer.
4999 */
5000 #if DEVELOPMENT || DEBUG
5001 printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p), original_task_info_count, TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5002 #endif /* DEVELOPMENT || DEBUG */
5003 DTRACE_VM4(workaround_task_vm_info_count,
5004 mach_msg_type_number_t, original_task_info_count,
5005 mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5006 uint32_t, platform,
5007 uint32_t, sdk);
5008 original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5009 *task_info_count = original_task_info_count;
5010 }
5011 #endif /* __arm64__ */
5012
5013 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5014 error = KERN_INVALID_ARGUMENT;
5015 break;
5016 }
5017
5018 vm_info = (task_vm_info_t)task_info_out;
5019
5020 if (task == kernel_task) {
5021 map = kernel_map;
5022 /* no lock */
5023 } else {
5024 map = task->map;
5025 vm_map_lock_read(map);
5026 }
5027
5028 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
5029 vm_info->region_count = map->hdr.nentries;
5030 vm_info->page_size = vm_map_page_size(map);
5031
5032 vm_info->resident_size = pmap_resident_count(map->pmap);
5033 vm_info->resident_size *= PAGE_SIZE;
5034 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
5035 vm_info->resident_size_peak *= PAGE_SIZE;
5036
5037 #define _VM_INFO(_name) \
5038 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
5039
5040 _VM_INFO(device);
5041 _VM_INFO(device_peak);
5042 _VM_INFO(external);
5043 _VM_INFO(external_peak);
5044 _VM_INFO(internal);
5045 _VM_INFO(internal_peak);
5046 _VM_INFO(reusable);
5047 _VM_INFO(reusable_peak);
5048 _VM_INFO(compressed);
5049 _VM_INFO(compressed_peak);
5050 _VM_INFO(compressed_lifetime);
5051
5052 vm_info->purgeable_volatile_pmap = 0;
5053 vm_info->purgeable_volatile_resident = 0;
5054 vm_info->purgeable_volatile_virtual = 0;
5055 if (task == kernel_task) {
5056 /*
5057 * We do not maintain the detailed stats for the
5058 * kernel_pmap, so just count everything as
5059 * "internal"...
5060 */
5061 vm_info->internal = vm_info->resident_size;
5062 /*
5063 * ... but since the memory held by the VM compressor
5064 * in the kernel address space ought to be attributed
5065 * to user-space tasks, we subtract it from "internal"
5066 * to give memory reporting tools a more accurate idea
5067 * of what the kernel itself is actually using, instead
5068 * of making it look like the kernel is leaking memory
5069 * when the system is under memory pressure.
5070 */
5071 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5072 PAGE_SIZE);
5073 } else {
5074 mach_vm_size_t volatile_virtual_size;
5075 mach_vm_size_t volatile_resident_size;
5076 mach_vm_size_t volatile_compressed_size;
5077 mach_vm_size_t volatile_pmap_size;
5078 mach_vm_size_t volatile_compressed_pmap_size;
5079 kern_return_t kr;
5080
5081 if (flavor == TASK_VM_INFO_PURGEABLE) {
5082 kr = vm_map_query_volatile(
5083 map,
5084 &volatile_virtual_size,
5085 &volatile_resident_size,
5086 &volatile_compressed_size,
5087 &volatile_pmap_size,
5088 &volatile_compressed_pmap_size);
5089 if (kr == KERN_SUCCESS) {
5090 vm_info->purgeable_volatile_pmap =
5091 volatile_pmap_size;
5092 if (radar_20146450) {
5093 vm_info->compressed -=
5094 volatile_compressed_pmap_size;
5095 }
5096 vm_info->purgeable_volatile_resident =
5097 volatile_resident_size;
5098 vm_info->purgeable_volatile_virtual =
5099 volatile_virtual_size;
5100 }
5101 }
5102 }
5103 *task_info_count = TASK_VM_INFO_REV0_COUNT;
5104
5105 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5106 vm_info->phys_footprint =
5107 (mach_vm_size_t) get_task_phys_footprint(task);
5108 *task_info_count = TASK_VM_INFO_REV1_COUNT;
5109 }
5110 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5111 vm_info->min_address = map->min_offset;
5112 vm_info->max_address = map->max_offset;
5113 *task_info_count = TASK_VM_INFO_REV2_COUNT;
5114 }
5115 if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5116 ledger_get_lifetime_max(task->ledger,
5117 task_ledgers.phys_footprint,
5118 &vm_info->ledger_phys_footprint_peak);
5119 ledger_get_balance(task->ledger,
5120 task_ledgers.purgeable_nonvolatile,
5121 &vm_info->ledger_purgeable_nonvolatile);
5122 ledger_get_balance(task->ledger,
5123 task_ledgers.purgeable_nonvolatile_compressed,
5124 &vm_info->ledger_purgeable_novolatile_compressed);
5125 ledger_get_balance(task->ledger,
5126 task_ledgers.purgeable_volatile,
5127 &vm_info->ledger_purgeable_volatile);
5128 ledger_get_balance(task->ledger,
5129 task_ledgers.purgeable_volatile_compressed,
5130 &vm_info->ledger_purgeable_volatile_compressed);
5131 ledger_get_balance(task->ledger,
5132 task_ledgers.network_nonvolatile,
5133 &vm_info->ledger_tag_network_nonvolatile);
5134 ledger_get_balance(task->ledger,
5135 task_ledgers.network_nonvolatile_compressed,
5136 &vm_info->ledger_tag_network_nonvolatile_compressed);
5137 ledger_get_balance(task->ledger,
5138 task_ledgers.network_volatile,
5139 &vm_info->ledger_tag_network_volatile);
5140 ledger_get_balance(task->ledger,
5141 task_ledgers.network_volatile_compressed,
5142 &vm_info->ledger_tag_network_volatile_compressed);
5143 ledger_get_balance(task->ledger,
5144 task_ledgers.media_footprint,
5145 &vm_info->ledger_tag_media_footprint);
5146 ledger_get_balance(task->ledger,
5147 task_ledgers.media_footprint_compressed,
5148 &vm_info->ledger_tag_media_footprint_compressed);
5149 ledger_get_balance(task->ledger,
5150 task_ledgers.media_nofootprint,
5151 &vm_info->ledger_tag_media_nofootprint);
5152 ledger_get_balance(task->ledger,
5153 task_ledgers.media_nofootprint_compressed,
5154 &vm_info->ledger_tag_media_nofootprint_compressed);
5155 ledger_get_balance(task->ledger,
5156 task_ledgers.graphics_footprint,
5157 &vm_info->ledger_tag_graphics_footprint);
5158 ledger_get_balance(task->ledger,
5159 task_ledgers.graphics_footprint_compressed,
5160 &vm_info->ledger_tag_graphics_footprint_compressed);
5161 ledger_get_balance(task->ledger,
5162 task_ledgers.graphics_nofootprint,
5163 &vm_info->ledger_tag_graphics_nofootprint);
5164 ledger_get_balance(task->ledger,
5165 task_ledgers.graphics_nofootprint_compressed,
5166 &vm_info->ledger_tag_graphics_nofootprint_compressed);
5167 ledger_get_balance(task->ledger,
5168 task_ledgers.neural_footprint,
5169 &vm_info->ledger_tag_neural_footprint);
5170 ledger_get_balance(task->ledger,
5171 task_ledgers.neural_footprint_compressed,
5172 &vm_info->ledger_tag_neural_footprint_compressed);
5173 ledger_get_balance(task->ledger,
5174 task_ledgers.neural_nofootprint,
5175 &vm_info->ledger_tag_neural_nofootprint);
5176 ledger_get_balance(task->ledger,
5177 task_ledgers.neural_nofootprint_compressed,
5178 &vm_info->ledger_tag_neural_nofootprint_compressed);
5179 *task_info_count = TASK_VM_INFO_REV3_COUNT;
5180 }
5181 if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
5182 if (task->bsd_info) {
5183 vm_info->limit_bytes_remaining =
5184 memorystatus_available_memory_internal(task->bsd_info);
5185 } else {
5186 vm_info->limit_bytes_remaining = 0;
5187 }
5188 *task_info_count = TASK_VM_INFO_REV4_COUNT;
5189 }
5190 if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
5191 thread_t thread;
5192 integer_t total = task->decompressions;
5193 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5194 total += thread->decompressions;
5195 }
5196 vm_info->decompressions = total;
5197 *task_info_count = TASK_VM_INFO_REV5_COUNT;
5198 }
5199
5200 if (task != kernel_task) {
5201 vm_map_unlock_read(map);
5202 }
5203
5204 break;
5205 }
5206
5207 case TASK_WAIT_STATE_INFO:
5208 {
5209 /*
5210 * Deprecated flavor. Currently allowing some results until all users
5211 * stop calling it. The results may not be accurate.
5212 */
5213 task_wait_state_info_t wait_state_info;
5214 uint64_t total_sfi_ledger_val = 0;
5215
5216 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
5217 error = KERN_INVALID_ARGUMENT;
5218 break;
5219 }
5220
5221 wait_state_info = (task_wait_state_info_t) task_info_out;
5222
5223 wait_state_info->total_wait_state_time = 0;
5224 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
5225
5226 #if CONFIG_SCHED_SFI
5227 int i, prev_lentry = -1;
5228 int64_t val_credit, val_debit;
5229
5230 for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
5231 val_credit = 0;
5232 /*
5233 * checking with prev_lentry != entry ensures adjacent classes
5234 * which share the same ledger do not add wait times twice.
5235 * Note: Use ledger() call to get data for each individual sfi class.
5236 */
5237 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
5238 KERN_SUCCESS == ledger_get_entries(task->ledger,
5239 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
5240 total_sfi_ledger_val += val_credit;
5241 }
5242 prev_lentry = task_ledgers.sfi_wait_times[i];
5243 }
5244
5245 #endif /* CONFIG_SCHED_SFI */
5246 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
5247 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
5248
5249 break;
5250 }
5251 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
5252 {
5253 #if DEVELOPMENT || DEBUG
5254 pvm_account_info_t acnt_info;
5255
5256 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
5257 error = KERN_INVALID_ARGUMENT;
5258 break;
5259 }
5260
5261 if (task_info_out == NULL) {
5262 error = KERN_INVALID_ARGUMENT;
5263 break;
5264 }
5265
5266 acnt_info = (pvm_account_info_t) task_info_out;
5267
5268 error = vm_purgeable_account(task, acnt_info);
5269
5270 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
5271
5272 break;
5273 #else /* DEVELOPMENT || DEBUG */
5274 error = KERN_NOT_SUPPORTED;
5275 break;
5276 #endif /* DEVELOPMENT || DEBUG */
5277 }
5278 case TASK_FLAGS_INFO:
5279 {
5280 task_flags_info_t flags_info;
5281
5282 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
5283 error = KERN_INVALID_ARGUMENT;
5284 break;
5285 }
5286
5287 flags_info = (task_flags_info_t)task_info_out;
5288
5289 /* only publish the 64-bit flag of the task */
5290 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
5291
5292 *task_info_count = TASK_FLAGS_INFO_COUNT;
5293 break;
5294 }
5295
5296 case TASK_DEBUG_INFO_INTERNAL:
5297 {
5298 #if DEVELOPMENT || DEBUG
5299 task_debug_info_internal_t dbg_info;
5300 ipc_space_t space = task->itk_space;
5301 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
5302 error = KERN_NOT_SUPPORTED;
5303 break;
5304 }
5305
5306 if (task_info_out == NULL) {
5307 error = KERN_INVALID_ARGUMENT;
5308 break;
5309 }
5310 dbg_info = (task_debug_info_internal_t) task_info_out;
5311 dbg_info->ipc_space_size = 0;
5312
5313 if (space) {
5314 is_read_lock(space);
5315 dbg_info->ipc_space_size = space->is_table_size;
5316 is_read_unlock(space);
5317 }
5318
5319 dbg_info->suspend_count = task->suspend_count;
5320
5321 error = KERN_SUCCESS;
5322 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
5323 break;
5324 #else /* DEVELOPMENT || DEBUG */
5325 error = KERN_NOT_SUPPORTED;
5326 break;
5327 #endif /* DEVELOPMENT || DEBUG */
5328 }
5329 default:
5330 error = KERN_INVALID_ARGUMENT;
5331 }
5332
5333 task_unlock(task);
5334 return error;
5335 }
5336
5337 /*
5338 * task_info_from_user
5339 *
5340 * When calling task_info from user space,
5341 * this function will be executed as mig server side
5342 * instead of calling directly into task_info.
5343 * This gives the possibility to perform more security
5344 * checks on task_port.
5345 *
5346 * In the case of TASK_DYLD_INFO, we require the more
5347 * privileged task_port not the less-privileged task_name_port.
5348 *
5349 */
5350 kern_return_t
5351 task_info_from_user(
5352 mach_port_t task_port,
5353 task_flavor_t flavor,
5354 task_info_t task_info_out,
5355 mach_msg_type_number_t *task_info_count)
5356 {
5357 task_t task;
5358 kern_return_t ret;
5359
5360 if (flavor == TASK_DYLD_INFO) {
5361 task = convert_port_to_task(task_port);
5362 } else {
5363 task = convert_port_to_task_name(task_port);
5364 }
5365
5366 ret = task_info(task, flavor, task_info_out, task_info_count);
5367
5368 task_deallocate(task);
5369
5370 return ret;
5371 }
5372
5373 /*
5374 * task_power_info
5375 *
5376 * Returns power stats for the task.
5377 * Note: Called with task locked.
5378 */
5379 void
5380 task_power_info_locked(
5381 task_t task,
5382 task_power_info_t info,
5383 gpu_energy_data_t ginfo,
5384 task_power_info_v2_t infov2,
5385 uint64_t *runnable_time)
5386 {
5387 thread_t thread;
5388 ledger_amount_t tmp;
5389
5390 uint64_t runnable_time_sum = 0;
5391
5392 task_lock_assert_owned(task);
5393
5394 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
5395 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
5396 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
5397 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
5398
5399 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
5400 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
5401
5402 info->total_user = task->total_user_time;
5403 info->total_system = task->total_system_time;
5404 runnable_time_sum = task->total_runnable_time;
5405
5406 #if CONFIG_EMBEDDED
5407 if (infov2) {
5408 infov2->task_energy = task->task_energy;
5409 }
5410 #endif
5411
5412 if (ginfo) {
5413 ginfo->task_gpu_utilisation = task->task_gpu_ns;
5414 }
5415
5416 if (infov2) {
5417 infov2->task_ptime = task->total_ptime;
5418 infov2->task_pset_switches = task->ps_switch;
5419 }
5420
5421 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5422 uint64_t tval;
5423 spl_t x;
5424
5425 if (thread->options & TH_OPT_IDLE_THREAD) {
5426 continue;
5427 }
5428
5429 x = splsched();
5430 thread_lock(thread);
5431
5432 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
5433 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
5434
5435 #if CONFIG_EMBEDDED
5436 if (infov2) {
5437 infov2->task_energy += ml_energy_stat(thread);
5438 }
5439 #endif
5440
5441 tval = timer_grab(&thread->user_timer);
5442 info->total_user += tval;
5443
5444 if (infov2) {
5445 tval = timer_grab(&thread->ptime);
5446 infov2->task_ptime += tval;
5447 infov2->task_pset_switches += thread->ps_switch;
5448 }
5449
5450 tval = timer_grab(&thread->system_timer);
5451 if (thread->precise_user_kernel_time) {
5452 info->total_system += tval;
5453 } else {
5454 /* system_timer may represent either sys or user */
5455 info->total_user += tval;
5456 }
5457
5458 tval = timer_grab(&thread->runnable_timer);
5459
5460 runnable_time_sum += tval;
5461
5462 if (ginfo) {
5463 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
5464 }
5465 thread_unlock(thread);
5466 splx(x);
5467 }
5468
5469 if (runnable_time) {
5470 *runnable_time = runnable_time_sum;
5471 }
5472 }
5473
5474 /*
5475 * task_gpu_utilisation
5476 *
5477 * Returns the total gpu time used by the all the threads of the task
5478 * (both dead and alive)
5479 */
5480 uint64_t
5481 task_gpu_utilisation(
5482 task_t task)
5483 {
5484 uint64_t gpu_time = 0;
5485 #if !CONFIG_EMBEDDED
5486 thread_t thread;
5487
5488 task_lock(task);
5489 gpu_time += task->task_gpu_ns;
5490
5491 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5492 spl_t x;
5493 x = splsched();
5494 thread_lock(thread);
5495 gpu_time += ml_gpu_stat(thread);
5496 thread_unlock(thread);
5497 splx(x);
5498 }
5499
5500 task_unlock(task);
5501 #else /* CONFIG_EMBEDDED */
5502 /* silence compiler warning */
5503 (void)task;
5504 #endif /* !CONFIG_EMBEDDED */
5505 return gpu_time;
5506 }
5507
5508 /*
5509 * task_energy
5510 *
5511 * Returns the total energy used by the all the threads of the task
5512 * (both dead and alive)
5513 */
5514 uint64_t
5515 task_energy(
5516 task_t task)
5517 {
5518 uint64_t energy = 0;
5519 thread_t thread;
5520
5521 task_lock(task);
5522 energy += task->task_energy;
5523
5524 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5525 spl_t x;
5526 x = splsched();
5527 thread_lock(thread);
5528 energy += ml_energy_stat(thread);
5529 thread_unlock(thread);
5530 splx(x);
5531 }
5532
5533 task_unlock(task);
5534 return energy;
5535 }
5536
5537 #if __AMP__
5538
5539 uint64_t
5540 task_cpu_ptime(
5541 task_t task)
5542 {
5543 uint64_t cpu_ptime = 0;
5544 thread_t thread;
5545
5546 task_lock(task);
5547 cpu_ptime += task->total_ptime;
5548
5549 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5550 cpu_ptime += timer_grab(&thread->ptime);
5551 }
5552
5553 task_unlock(task);
5554 return cpu_ptime;
5555 }
5556
5557 #else /* __AMP__ */
5558
5559 uint64_t
5560 task_cpu_ptime(
5561 __unused task_t task)
5562 {
5563 return 0;
5564 }
5565
5566 #endif /* __AMP__ */
5567
5568 /* This function updates the cpu time in the arrays for each
5569 * effective and requested QoS class
5570 */
5571 void
5572 task_update_cpu_time_qos_stats(
5573 task_t task,
5574 uint64_t *eqos_stats,
5575 uint64_t *rqos_stats)
5576 {
5577 if (!eqos_stats && !rqos_stats) {
5578 return;
5579 }
5580
5581 task_lock(task);
5582 thread_t thread;
5583 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5584 if (thread->options & TH_OPT_IDLE_THREAD) {
5585 continue;
5586 }
5587
5588 thread_update_qos_cpu_time(thread);
5589 }
5590
5591 if (eqos_stats) {
5592 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
5593 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
5594 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
5595 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
5596 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
5597 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
5598 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
5599 }
5600
5601 if (rqos_stats) {
5602 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
5603 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
5604 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
5605 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
5606 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
5607 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
5608 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
5609 }
5610
5611 task_unlock(task);
5612 }
5613
5614 kern_return_t
5615 task_purgable_info(
5616 task_t task,
5617 task_purgable_info_t *stats)
5618 {
5619 if (task == TASK_NULL || stats == NULL) {
5620 return KERN_INVALID_ARGUMENT;
5621 }
5622 /* Take task reference */
5623 task_reference(task);
5624 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
5625 /* Drop task reference */
5626 task_deallocate(task);
5627 return KERN_SUCCESS;
5628 }
5629
5630 void
5631 task_vtimer_set(
5632 task_t task,
5633 integer_t which)
5634 {
5635 thread_t thread;
5636 spl_t x;
5637
5638 task_lock(task);
5639
5640 task->vtimers |= which;
5641
5642 switch (which) {
5643 case TASK_VTIMER_USER:
5644 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5645 x = splsched();
5646 thread_lock(thread);
5647 if (thread->precise_user_kernel_time) {
5648 thread->vtimer_user_save = timer_grab(&thread->user_timer);
5649 } else {
5650 thread->vtimer_user_save = timer_grab(&thread->system_timer);
5651 }
5652 thread_unlock(thread);
5653 splx(x);
5654 }
5655 break;
5656
5657 case TASK_VTIMER_PROF:
5658 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5659 x = splsched();
5660 thread_lock(thread);
5661 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
5662 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
5663 thread_unlock(thread);
5664 splx(x);
5665 }
5666 break;
5667
5668 case TASK_VTIMER_RLIM:
5669 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5670 x = splsched();
5671 thread_lock(thread);
5672 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
5673 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
5674 thread_unlock(thread);
5675 splx(x);
5676 }
5677 break;
5678 }
5679
5680 task_unlock(task);
5681 }
5682
5683 void
5684 task_vtimer_clear(
5685 task_t task,
5686 integer_t which)
5687 {
5688 assert(task == current_task());
5689
5690 task_lock(task);
5691
5692 task->vtimers &= ~which;
5693
5694 task_unlock(task);
5695 }
5696
5697 void
5698 task_vtimer_update(
5699 __unused
5700 task_t task,
5701 integer_t which,
5702 uint32_t *microsecs)
5703 {
5704 thread_t thread = current_thread();
5705 uint32_t tdelt = 0;
5706 clock_sec_t secs = 0;
5707 uint64_t tsum;
5708
5709 assert(task == current_task());
5710
5711 spl_t s = splsched();
5712 thread_lock(thread);
5713
5714 if ((task->vtimers & which) != (uint32_t)which) {
5715 thread_unlock(thread);
5716 splx(s);
5717 return;
5718 }
5719
5720 switch (which) {
5721 case TASK_VTIMER_USER:
5722 if (thread->precise_user_kernel_time) {
5723 tdelt = (uint32_t)timer_delta(&thread->user_timer,
5724 &thread->vtimer_user_save);
5725 } else {
5726 tdelt = (uint32_t)timer_delta(&thread->system_timer,
5727 &thread->vtimer_user_save);
5728 }
5729 absolutetime_to_microtime(tdelt, &secs, microsecs);
5730 break;
5731
5732 case TASK_VTIMER_PROF:
5733 tsum = timer_grab(&thread->user_timer);
5734 tsum += timer_grab(&thread->system_timer);
5735 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
5736 absolutetime_to_microtime(tdelt, &secs, microsecs);
5737 /* if the time delta is smaller than a usec, ignore */
5738 if (*microsecs != 0) {
5739 thread->vtimer_prof_save = tsum;
5740 }
5741 break;
5742
5743 case TASK_VTIMER_RLIM:
5744 tsum = timer_grab(&thread->user_timer);
5745 tsum += timer_grab(&thread->system_timer);
5746 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
5747 thread->vtimer_rlim_save = tsum;
5748 absolutetime_to_microtime(tdelt, &secs, microsecs);
5749 break;
5750 }
5751
5752 thread_unlock(thread);
5753 splx(s);
5754 }
5755
5756 /*
5757 * task_assign:
5758 *
5759 * Change the assigned processor set for the task
5760 */
5761 kern_return_t
5762 task_assign(
5763 __unused task_t task,
5764 __unused processor_set_t new_pset,
5765 __unused boolean_t assign_threads)
5766 {
5767 return KERN_FAILURE;
5768 }
5769
5770 /*
5771 * task_assign_default:
5772 *
5773 * Version of task_assign to assign to default processor set.
5774 */
5775 kern_return_t
5776 task_assign_default(
5777 task_t task,
5778 boolean_t assign_threads)
5779 {
5780 return task_assign(task, &pset0, assign_threads);
5781 }
5782
5783 /*
5784 * task_get_assignment
5785 *
5786 * Return name of processor set that task is assigned to.
5787 */
5788 kern_return_t
5789 task_get_assignment(
5790 task_t task,
5791 processor_set_t *pset)
5792 {
5793 if (!task || !task->active) {
5794 return KERN_FAILURE;
5795 }
5796
5797 *pset = &pset0;
5798
5799 return KERN_SUCCESS;
5800 }
5801
5802 uint64_t
5803 get_task_dispatchqueue_offset(
5804 task_t task)
5805 {
5806 return task->dispatchqueue_offset;
5807 }
5808
5809 /*
5810 * task_policy
5811 *
5812 * Set scheduling policy and parameters, both base and limit, for
5813 * the given task. Policy must be a policy which is enabled for the
5814 * processor set. Change contained threads if requested.
5815 */
5816 kern_return_t
5817 task_policy(
5818 __unused task_t task,
5819 __unused policy_t policy_id,
5820 __unused policy_base_t base,
5821 __unused mach_msg_type_number_t count,
5822 __unused boolean_t set_limit,
5823 __unused boolean_t change)
5824 {
5825 return KERN_FAILURE;
5826 }
5827
5828 /*
5829 * task_set_policy
5830 *
5831 * Set scheduling policy and parameters, both base and limit, for
5832 * the given task. Policy can be any policy implemented by the
5833 * processor set, whether enabled or not. Change contained threads
5834 * if requested.
5835 */
5836 kern_return_t
5837 task_set_policy(
5838 __unused task_t task,
5839 __unused processor_set_t pset,
5840 __unused policy_t policy_id,
5841 __unused policy_base_t base,
5842 __unused mach_msg_type_number_t base_count,
5843 __unused policy_limit_t limit,
5844 __unused mach_msg_type_number_t limit_count,
5845 __unused boolean_t change)
5846 {
5847 return KERN_FAILURE;
5848 }
5849
5850 kern_return_t
5851 task_set_ras_pc(
5852 __unused task_t task,
5853 __unused vm_offset_t pc,
5854 __unused vm_offset_t endpc)
5855 {
5856 return KERN_FAILURE;
5857 }
5858
5859 void
5860 task_synchronizer_destroy_all(task_t task)
5861 {
5862 /*
5863 * Destroy owned semaphores
5864 */
5865 semaphore_destroy_all(task);
5866 }
5867
5868 /*
5869 * Install default (machine-dependent) initial thread state
5870 * on the task. Subsequent thread creation will have this initial
5871 * state set on the thread by machine_thread_inherit_taskwide().
5872 * Flavors and structures are exactly the same as those to thread_set_state()
5873 */
5874 kern_return_t
5875 task_set_state(
5876 task_t task,
5877 int flavor,
5878 thread_state_t state,
5879 mach_msg_type_number_t state_count)
5880 {
5881 kern_return_t ret;
5882
5883 if (task == TASK_NULL) {
5884 return KERN_INVALID_ARGUMENT;
5885 }
5886
5887 task_lock(task);
5888
5889 if (!task->active) {
5890 task_unlock(task);
5891 return KERN_FAILURE;
5892 }
5893
5894 ret = machine_task_set_state(task, flavor, state, state_count);
5895
5896 task_unlock(task);
5897 return ret;
5898 }
5899
5900 /*
5901 * Examine the default (machine-dependent) initial thread state
5902 * on the task, as set by task_set_state(). Flavors and structures
5903 * are exactly the same as those passed to thread_get_state().
5904 */
5905 kern_return_t
5906 task_get_state(
5907 task_t task,
5908 int flavor,
5909 thread_state_t state,
5910 mach_msg_type_number_t *state_count)
5911 {
5912 kern_return_t ret;
5913
5914 if (task == TASK_NULL) {
5915 return KERN_INVALID_ARGUMENT;
5916 }
5917
5918 task_lock(task);
5919
5920 if (!task->active) {
5921 task_unlock(task);
5922 return KERN_FAILURE;
5923 }
5924
5925 ret = machine_task_get_state(task, flavor, state, state_count);
5926
5927 task_unlock(task);
5928 return ret;
5929 }
5930
5931
5932 static kern_return_t __attribute__((noinline, not_tail_called))
5933 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
5934 mach_exception_code_t code,
5935 mach_exception_subcode_t subcode,
5936 void *reason)
5937 {
5938 #ifdef MACH_BSD
5939 if (1 == proc_selfpid()) {
5940 return KERN_NOT_SUPPORTED; // initproc is immune
5941 }
5942 #endif
5943 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
5944 [0] = code,
5945 [1] = subcode,
5946 };
5947 task_t task = current_task();
5948 kern_return_t kr;
5949
5950 /* (See jetsam-related comments below) */
5951
5952 proc_memstat_terminated(task->bsd_info, TRUE);
5953 kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
5954 proc_memstat_terminated(task->bsd_info, FALSE);
5955 return kr;
5956 }
5957
5958 kern_return_t
5959 task_violated_guard(
5960 mach_exception_code_t code,
5961 mach_exception_subcode_t subcode,
5962 void *reason)
5963 {
5964 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
5965 }
5966
5967
5968 #if CONFIG_MEMORYSTATUS
5969
5970 boolean_t
5971 task_get_memlimit_is_active(task_t task)
5972 {
5973 assert(task != NULL);
5974
5975 if (task->memlimit_is_active == 1) {
5976 return TRUE;
5977 } else {
5978 return FALSE;
5979 }
5980 }
5981
5982 void
5983 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
5984 {
5985 assert(task != NULL);
5986
5987 if (memlimit_is_active) {
5988 task->memlimit_is_active = 1;
5989 } else {
5990 task->memlimit_is_active = 0;
5991 }
5992 }
5993
5994 boolean_t
5995 task_get_memlimit_is_fatal(task_t task)
5996 {
5997 assert(task != NULL);
5998
5999 if (task->memlimit_is_fatal == 1) {
6000 return TRUE;
6001 } else {
6002 return FALSE;
6003 }
6004 }
6005
6006 void
6007 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
6008 {
6009 assert(task != NULL);
6010
6011 if (memlimit_is_fatal) {
6012 task->memlimit_is_fatal = 1;
6013 } else {
6014 task->memlimit_is_fatal = 0;
6015 }
6016 }
6017
6018 boolean_t
6019 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6020 {
6021 boolean_t triggered = FALSE;
6022
6023 assert(task == current_task());
6024
6025 /*
6026 * Returns true, if task has already triggered an exc_resource exception.
6027 */
6028
6029 if (memlimit_is_active) {
6030 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
6031 } else {
6032 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
6033 }
6034
6035 return triggered;
6036 }
6037
6038 void
6039 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6040 {
6041 assert(task == current_task());
6042
6043 /*
6044 * We allow one exc_resource per process per active/inactive limit.
6045 * The limit's fatal attribute does not come into play.
6046 */
6047
6048 if (memlimit_is_active) {
6049 task->memlimit_active_exc_resource = 1;
6050 } else {
6051 task->memlimit_inactive_exc_resource = 1;
6052 }
6053 }
6054
6055 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
6056
6057 void __attribute__((noinline))
6058 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
6059 {
6060 task_t task = current_task();
6061 int pid = 0;
6062 const char *procname = "unknown";
6063 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
6064 boolean_t send_sync_exc_resource = FALSE;
6065
6066 #ifdef MACH_BSD
6067 pid = proc_selfpid();
6068
6069 if (pid == 1) {
6070 /*
6071 * Cannot have ReportCrash analyzing
6072 * a suspended initproc.
6073 */
6074 return;
6075 }
6076
6077 if (task->bsd_info != NULL) {
6078 procname = proc_name_address(current_task()->bsd_info);
6079 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
6080 }
6081 #endif
6082 #if CONFIG_COREDUMP
6083 if (hwm_user_cores) {
6084 int error;
6085 uint64_t starttime, end;
6086 clock_sec_t secs = 0;
6087 uint32_t microsecs = 0;
6088
6089 starttime = mach_absolute_time();
6090 /*
6091 * Trigger a coredump of this process. Don't proceed unless we know we won't
6092 * be filling up the disk; and ignore the core size resource limit for this
6093 * core file.
6094 */
6095 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
6096 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
6097 }
6098 /*
6099 * coredump() leaves the task suspended.
6100 */
6101 task_resume_internal(current_task());
6102
6103 end = mach_absolute_time();
6104 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
6105 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
6106 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
6107 }
6108 #endif /* CONFIG_COREDUMP */
6109
6110 if (disable_exc_resource) {
6111 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6112 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
6113 return;
6114 }
6115
6116 /*
6117 * A task that has triggered an EXC_RESOURCE, should not be
6118 * jetsammed when the device is under memory pressure. Here
6119 * we set the P_MEMSTAT_TERMINATED flag so that the process
6120 * will be skipped if the memorystatus_thread wakes up.
6121 */
6122 proc_memstat_terminated(current_task()->bsd_info, TRUE);
6123
6124 code[0] = code[1] = 0;
6125 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
6126 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
6127 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
6128
6129 /*
6130 * Do not generate a corpse fork if the violation is a fatal one
6131 * or the process wants synchronous EXC_RESOURCE exceptions.
6132 */
6133 if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) {
6134 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
6135 if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) {
6136 /*
6137 * Use the _internal_ variant so that no user-space
6138 * process can resume our task from under us.
6139 */
6140 task_suspend_internal(task);
6141 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6142 task_resume_internal(task);
6143 }
6144 } else {
6145 if (audio_active) {
6146 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6147 "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
6148 } else {
6149 task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
6150 code, EXCEPTION_CODE_MAX, NULL);
6151 }
6152 }
6153
6154 /*
6155 * After the EXC_RESOURCE has been handled, we must clear the
6156 * P_MEMSTAT_TERMINATED flag so that the process can again be
6157 * considered for jetsam if the memorystatus_thread wakes up.
6158 */
6159 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
6160 }
6161
6162 /*
6163 * Callback invoked when a task exceeds its physical footprint limit.
6164 */
6165 void
6166 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6167 {
6168 ledger_amount_t max_footprint, max_footprint_mb;
6169 task_t task;
6170 boolean_t is_warning;
6171 boolean_t memlimit_is_active;
6172 boolean_t memlimit_is_fatal;
6173
6174 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
6175 /*
6176 * Task memory limits only provide a warning on the way up.
6177 */
6178 return;
6179 } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6180 /*
6181 * This task is in danger of violating a memory limit,
6182 * It has exceeded a percentage level of the limit.
6183 */
6184 is_warning = TRUE;
6185 } else {
6186 /*
6187 * The task has exceeded the physical footprint limit.
6188 * This is not a warning but a true limit violation.
6189 */
6190 is_warning = FALSE;
6191 }
6192
6193 task = current_task();
6194
6195 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
6196 max_footprint_mb = max_footprint >> 20;
6197
6198 memlimit_is_active = task_get_memlimit_is_active(task);
6199 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6200
6201 /*
6202 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
6203 * We only generate the exception once per process per memlimit (active/inactive limit).
6204 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
6205 * and we disable it by marking that memlimit as exception triggered.
6206 */
6207 if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
6208 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
6209 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
6210 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
6211 }
6212
6213 memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
6214 }
6215
6216 extern int proc_check_footprint_priv(void);
6217
6218 kern_return_t
6219 task_set_phys_footprint_limit(
6220 task_t task,
6221 int new_limit_mb,
6222 int *old_limit_mb)
6223 {
6224 kern_return_t error;
6225
6226 boolean_t memlimit_is_active;
6227 boolean_t memlimit_is_fatal;
6228
6229 if ((error = proc_check_footprint_priv())) {
6230 return KERN_NO_ACCESS;
6231 }
6232
6233 /*
6234 * This call should probably be obsoleted.
6235 * But for now, we default to current state.
6236 */
6237 memlimit_is_active = task_get_memlimit_is_active(task);
6238 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6239
6240 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
6241 }
6242
6243 kern_return_t
6244 task_convert_phys_footprint_limit(
6245 int limit_mb,
6246 int *converted_limit_mb)
6247 {
6248 if (limit_mb == -1) {
6249 /*
6250 * No limit
6251 */
6252 if (max_task_footprint != 0) {
6253 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
6254 } else {
6255 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
6256 }
6257 } else {
6258 /* nothing to convert */
6259 *converted_limit_mb = limit_mb;
6260 }
6261 return KERN_SUCCESS;
6262 }
6263
6264
6265 kern_return_t
6266 task_set_phys_footprint_limit_internal(
6267 task_t task,
6268 int new_limit_mb,
6269 int *old_limit_mb,
6270 boolean_t memlimit_is_active,
6271 boolean_t memlimit_is_fatal)
6272 {
6273 ledger_amount_t old;
6274 kern_return_t ret;
6275
6276 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
6277
6278 if (ret != KERN_SUCCESS) {
6279 return ret;
6280 }
6281
6282 /*
6283 * Check that limit >> 20 will not give an "unexpected" 32-bit
6284 * result. There are, however, implicit assumptions that -1 mb limit
6285 * equates to LEDGER_LIMIT_INFINITY.
6286 */
6287 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
6288
6289 if (old_limit_mb) {
6290 *old_limit_mb = (int)(old >> 20);
6291 }
6292
6293 if (new_limit_mb == -1) {
6294 /*
6295 * Caller wishes to remove the limit.
6296 */
6297 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6298 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
6299 max_task_footprint ? max_task_footprint_warning_level : 0);
6300
6301 task_lock(task);
6302 task_set_memlimit_is_active(task, memlimit_is_active);
6303 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6304 task_unlock(task);
6305
6306 return KERN_SUCCESS;
6307 }
6308
6309 #ifdef CONFIG_NOMONITORS
6310 return KERN_SUCCESS;
6311 #endif /* CONFIG_NOMONITORS */
6312
6313 task_lock(task);
6314
6315 if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
6316 (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
6317 (((ledger_amount_t)new_limit_mb << 20) == old)) {
6318 /*
6319 * memlimit state is not changing
6320 */
6321 task_unlock(task);
6322 return KERN_SUCCESS;
6323 }
6324
6325 task_set_memlimit_is_active(task, memlimit_is_active);
6326 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6327
6328 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6329 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
6330
6331 if (task == current_task()) {
6332 ledger_check_new_balance(current_thread(), task->ledger,
6333 task_ledgers.phys_footprint);
6334 }
6335
6336 task_unlock(task);
6337
6338 return KERN_SUCCESS;
6339 }
6340
6341 kern_return_t
6342 task_get_phys_footprint_limit(
6343 task_t task,
6344 int *limit_mb)
6345 {
6346 ledger_amount_t limit;
6347 kern_return_t ret;
6348
6349 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
6350 if (ret != KERN_SUCCESS) {
6351 return ret;
6352 }
6353
6354 /*
6355 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
6356 * result. There are, however, implicit assumptions that -1 mb limit
6357 * equates to LEDGER_LIMIT_INFINITY.
6358 */
6359 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
6360 *limit_mb = (int)(limit >> 20);
6361
6362 return KERN_SUCCESS;
6363 }
6364 #else /* CONFIG_MEMORYSTATUS */
6365 kern_return_t
6366 task_set_phys_footprint_limit(
6367 __unused task_t task,
6368 __unused int new_limit_mb,
6369 __unused int *old_limit_mb)
6370 {
6371 return KERN_FAILURE;
6372 }
6373
6374 kern_return_t
6375 task_get_phys_footprint_limit(
6376 __unused task_t task,
6377 __unused int *limit_mb)
6378 {
6379 return KERN_FAILURE;
6380 }
6381 #endif /* CONFIG_MEMORYSTATUS */
6382
6383 void
6384 task_set_thread_limit(task_t task, uint16_t thread_limit)
6385 {
6386 assert(task != kernel_task);
6387 if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
6388 task_lock(task);
6389 task->task_thread_limit = thread_limit;
6390 task_unlock(task);
6391 }
6392 }
6393
6394 /*
6395 * We need to export some functions to other components that
6396 * are currently implemented in macros within the osfmk
6397 * component. Just export them as functions of the same name.
6398 */
6399 boolean_t
6400 is_kerneltask(task_t t)
6401 {
6402 if (t == kernel_task) {
6403 return TRUE;
6404 }
6405
6406 return FALSE;
6407 }
6408
6409 boolean_t
6410 is_corpsetask(task_t t)
6411 {
6412 return task_is_a_corpse(t);
6413 }
6414
6415 #undef current_task
6416 task_t current_task(void);
6417 task_t
6418 current_task(void)
6419 {
6420 return current_task_fast();
6421 }
6422
6423 #undef task_reference
6424 void task_reference(task_t task);
6425 void
6426 task_reference(
6427 task_t task)
6428 {
6429 if (task != TASK_NULL) {
6430 task_reference_internal(task);
6431 }
6432 }
6433
6434 /* defined in bsd/kern/kern_prot.c */
6435 extern int get_audit_token_pid(audit_token_t *audit_token);
6436
6437 int
6438 task_pid(task_t task)
6439 {
6440 if (task) {
6441 return get_audit_token_pid(&task->audit_token);
6442 }
6443 return -1;
6444 }
6445
6446
6447 /*
6448 * This routine finds a thread in a task by its unique id
6449 * Returns a referenced thread or THREAD_NULL if the thread was not found
6450 *
6451 * TODO: This is super inefficient - it's an O(threads in task) list walk!
6452 * We should make a tid hash, or transition all tid clients to thread ports
6453 *
6454 * Precondition: No locks held (will take task lock)
6455 */
6456 thread_t
6457 task_findtid(task_t task, uint64_t tid)
6458 {
6459 thread_t self = current_thread();
6460 thread_t found_thread = THREAD_NULL;
6461 thread_t iter_thread = THREAD_NULL;
6462
6463 /* Short-circuit the lookup if we're looking up ourselves */
6464 if (tid == self->thread_id || tid == TID_NULL) {
6465 assert(self->task == task);
6466
6467 thread_reference(self);
6468
6469 return self;
6470 }
6471
6472 task_lock(task);
6473
6474 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
6475 if (iter_thread->thread_id == tid) {
6476 found_thread = iter_thread;
6477 thread_reference(found_thread);
6478 break;
6479 }
6480 }
6481
6482 task_unlock(task);
6483
6484 return found_thread;
6485 }
6486
6487 int
6488 pid_from_task(task_t task)
6489 {
6490 int pid = -1;
6491
6492 if (task->bsd_info) {
6493 pid = proc_pid(task->bsd_info);
6494 } else {
6495 pid = task_pid(task);
6496 }
6497
6498 return pid;
6499 }
6500
6501 /*
6502 * Control the CPU usage monitor for a task.
6503 */
6504 kern_return_t
6505 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
6506 {
6507 int error = KERN_SUCCESS;
6508
6509 if (*flags & CPUMON_MAKE_FATAL) {
6510 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
6511 } else {
6512 error = KERN_INVALID_ARGUMENT;
6513 }
6514
6515 return error;
6516 }
6517
6518 /*
6519 * Control the wakeups monitor for a task.
6520 */
6521 kern_return_t
6522 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
6523 {
6524 ledger_t ledger = task->ledger;
6525
6526 task_lock(task);
6527 if (*flags & WAKEMON_GET_PARAMS) {
6528 ledger_amount_t limit;
6529 uint64_t period;
6530
6531 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
6532 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
6533
6534 if (limit != LEDGER_LIMIT_INFINITY) {
6535 /*
6536 * An active limit means the wakeups monitor is enabled.
6537 */
6538 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
6539 *flags = WAKEMON_ENABLE;
6540 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
6541 *flags |= WAKEMON_MAKE_FATAL;
6542 }
6543 } else {
6544 *flags = WAKEMON_DISABLE;
6545 *rate_hz = -1;
6546 }
6547
6548 /*
6549 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
6550 */
6551 task_unlock(task);
6552 return KERN_SUCCESS;
6553 }
6554
6555 if (*flags & WAKEMON_ENABLE) {
6556 if (*flags & WAKEMON_SET_DEFAULTS) {
6557 *rate_hz = task_wakeups_monitor_rate;
6558 }
6559
6560 #ifndef CONFIG_NOMONITORS
6561 if (*flags & WAKEMON_MAKE_FATAL) {
6562 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6563 }
6564 #endif /* CONFIG_NOMONITORS */
6565
6566 if (*rate_hz <= 0) {
6567 task_unlock(task);
6568 return KERN_INVALID_ARGUMENT;
6569 }
6570
6571 #ifndef CONFIG_NOMONITORS
6572 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
6573 task_wakeups_monitor_ustackshots_trigger_pct);
6574 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
6575 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
6576 #endif /* CONFIG_NOMONITORS */
6577 } else if (*flags & WAKEMON_DISABLE) {
6578 /*
6579 * Caller wishes to disable wakeups monitor on the task.
6580 *
6581 * Disable telemetry if it was triggered by the wakeups monitor, and
6582 * remove the limit & callback on the wakeups ledger entry.
6583 */
6584 #if CONFIG_TELEMETRY
6585 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
6586 #endif
6587 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
6588 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
6589 }
6590
6591 task_unlock(task);
6592 return KERN_SUCCESS;
6593 }
6594
6595 void
6596 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6597 {
6598 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6599 #if CONFIG_TELEMETRY
6600 /*
6601 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
6602 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
6603 */
6604 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
6605 #endif
6606 return;
6607 }
6608
6609 #if CONFIG_TELEMETRY
6610 /*
6611 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
6612 * exceeded the limit, turn telemetry off for the task.
6613 */
6614 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
6615 #endif
6616
6617 if (warning == 0) {
6618 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
6619 }
6620 }
6621
6622 void __attribute__((noinline))
6623 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
6624 {
6625 task_t task = current_task();
6626 int pid = 0;
6627 const char *procname = "unknown";
6628 boolean_t fatal;
6629 kern_return_t kr;
6630 #ifdef EXC_RESOURCE_MONITORS
6631 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
6632 #endif /* EXC_RESOURCE_MONITORS */
6633 struct ledger_entry_info lei;
6634
6635 #ifdef MACH_BSD
6636 pid = proc_selfpid();
6637 if (task->bsd_info != NULL) {
6638 procname = proc_name_address(current_task()->bsd_info);
6639 }
6640 #endif
6641
6642 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
6643
6644 /*
6645 * Disable the exception notification so we don't overwhelm
6646 * the listener with an endless stream of redundant exceptions.
6647 * TODO: detect whether another thread is already reporting the violation.
6648 */
6649 uint32_t flags = WAKEMON_DISABLE;
6650 task_wakeups_monitor_ctl(task, &flags, NULL);
6651
6652 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6653 trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
6654 os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
6655 "over ~%llu seconds, averaging %llu wakes / second and "
6656 "violating a %slimit of %llu wakes over %llu seconds.\n",
6657 procname, pid,
6658 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
6659 lei.lei_last_refill == 0 ? 0 :
6660 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
6661 fatal ? "FATAL " : "",
6662 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
6663
6664 kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
6665 fatal ? kRNFatalLimitFlag : 0);
6666 if (kr) {
6667 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
6668 }
6669
6670 #ifdef EXC_RESOURCE_MONITORS
6671 if (disable_exc_resource) {
6672 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6673 "supressed by a boot-arg\n", procname, pid);
6674 return;
6675 }
6676 if (audio_active) {
6677 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6678 "supressed due to audio playback\n", procname, pid);
6679 return;
6680 }
6681 if (lei.lei_last_refill == 0) {
6682 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6683 "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
6684 }
6685
6686 code[0] = code[1] = 0;
6687 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
6688 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
6689 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
6690 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
6691 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
6692 lei.lei_last_refill);
6693 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
6694 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
6695 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6696 #endif /* EXC_RESOURCE_MONITORS */
6697
6698 if (fatal) {
6699 task_terminate_internal(task);
6700 }
6701 }
6702
6703 static boolean_t
6704 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
6705 {
6706 int64_t old_count, new_count;
6707 boolean_t needs_telemetry;
6708
6709 do {
6710 new_count = old_count = *global_write_count;
6711 new_count += io_delta;
6712 if (new_count >= io_telemetry_limit) {
6713 new_count = 0;
6714 needs_telemetry = TRUE;
6715 } else {
6716 needs_telemetry = FALSE;
6717 }
6718 } while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
6719 return needs_telemetry;
6720 }
6721
6722 void
6723 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
6724 {
6725 int64_t io_delta = 0;
6726 int64_t * global_counter_to_update;
6727 boolean_t needs_telemetry = FALSE;
6728 boolean_t is_external_device = FALSE;
6729 int ledger_to_update = 0;
6730 struct task_writes_counters * writes_counters_to_update;
6731
6732 if ((!task) || (!io_size) || (!vp)) {
6733 return;
6734 }
6735
6736 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
6737 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
6738 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
6739
6740 // Is the drive backing this vnode internal or external to the system?
6741 if (vnode_isonexternalstorage(vp) == false) {
6742 global_counter_to_update = &global_logical_writes_count;
6743 ledger_to_update = task_ledgers.logical_writes;
6744 writes_counters_to_update = &task->task_writes_counters_internal;
6745 is_external_device = FALSE;
6746 } else {
6747 global_counter_to_update = &global_logical_writes_to_external_count;
6748 ledger_to_update = task_ledgers.logical_writes_to_external;
6749 writes_counters_to_update = &task->task_writes_counters_external;
6750 is_external_device = TRUE;
6751 }
6752
6753 switch (flags) {
6754 case TASK_WRITE_IMMEDIATE:
6755 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
6756 ledger_credit(task->ledger, ledger_to_update, io_size);
6757 if (!is_external_device) {
6758 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6759 }
6760 break;
6761 case TASK_WRITE_DEFERRED:
6762 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
6763 ledger_credit(task->ledger, ledger_to_update, io_size);
6764 if (!is_external_device) {
6765 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6766 }
6767 break;
6768 case TASK_WRITE_INVALIDATED:
6769 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
6770 ledger_debit(task->ledger, ledger_to_update, io_size);
6771 if (!is_external_device) {
6772 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
6773 }
6774 break;
6775 case TASK_WRITE_METADATA:
6776 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
6777 ledger_credit(task->ledger, ledger_to_update, io_size);
6778 if (!is_external_device) {
6779 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6780 }
6781 break;
6782 }
6783
6784 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
6785 if (io_telemetry_limit != 0) {
6786 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
6787 needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
6788 if (needs_telemetry && !is_external_device) {
6789 act_set_io_telemetry_ast(current_thread());
6790 }
6791 }
6792 }
6793
6794 /*
6795 * Control the I/O monitor for a task.
6796 */
6797 kern_return_t
6798 task_io_monitor_ctl(task_t task, uint32_t *flags)
6799 {
6800 ledger_t ledger = task->ledger;
6801
6802 task_lock(task);
6803 if (*flags & IOMON_ENABLE) {
6804 /* Configure the physical I/O ledger */
6805 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
6806 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
6807 } else if (*flags & IOMON_DISABLE) {
6808 /*
6809 * Caller wishes to disable I/O monitor on the task.
6810 */
6811 ledger_disable_refill(ledger, task_ledgers.physical_writes);
6812 ledger_disable_callback(ledger, task_ledgers.physical_writes);
6813 }
6814
6815 task_unlock(task);
6816 return KERN_SUCCESS;
6817 }
6818
6819 void
6820 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
6821 {
6822 if (warning == 0) {
6823 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
6824 }
6825 }
6826
6827 void __attribute__((noinline))
6828 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
6829 {
6830 int pid = 0;
6831 task_t task = current_task();
6832 #ifdef EXC_RESOURCE_MONITORS
6833 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
6834 #endif /* EXC_RESOURCE_MONITORS */
6835 struct ledger_entry_info lei;
6836 kern_return_t kr;
6837
6838 #ifdef MACH_BSD
6839 pid = proc_selfpid();
6840 #endif
6841 /*
6842 * Get the ledger entry info. We need to do this before disabling the exception
6843 * to get correct values for all fields.
6844 */
6845 switch (flavor) {
6846 case FLAVOR_IO_PHYSICAL_WRITES:
6847 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
6848 break;
6849 }
6850
6851
6852 /*
6853 * Disable the exception notification so we don't overwhelm
6854 * the listener with an endless stream of redundant exceptions.
6855 * TODO: detect whether another thread is already reporting the violation.
6856 */
6857 uint32_t flags = IOMON_DISABLE;
6858 task_io_monitor_ctl(task, &flags);
6859
6860 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
6861 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
6862 }
6863 os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
6864 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
6865
6866 kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
6867 if (kr) {
6868 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
6869 }
6870
6871 #ifdef EXC_RESOURCE_MONITORS
6872 code[0] = code[1] = 0;
6873 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
6874 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
6875 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
6876 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
6877 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
6878 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6879 #endif /* EXC_RESOURCE_MONITORS */
6880 }
6881
6882 /* Placeholders for the task set/get voucher interfaces */
6883 kern_return_t
6884 task_get_mach_voucher(
6885 task_t task,
6886 mach_voucher_selector_t __unused which,
6887 ipc_voucher_t *voucher)
6888 {
6889 if (TASK_NULL == task) {
6890 return KERN_INVALID_TASK;
6891 }
6892
6893 *voucher = NULL;
6894 return KERN_SUCCESS;
6895 }
6896
6897 kern_return_t
6898 task_set_mach_voucher(
6899 task_t task,
6900 ipc_voucher_t __unused voucher)
6901 {
6902 if (TASK_NULL == task) {
6903 return KERN_INVALID_TASK;
6904 }
6905
6906 return KERN_SUCCESS;
6907 }
6908
6909 kern_return_t
6910 task_swap_mach_voucher(
6911 __unused task_t task,
6912 __unused ipc_voucher_t new_voucher,
6913 ipc_voucher_t *in_out_old_voucher)
6914 {
6915 /*
6916 * Currently this function is only called from a MIG generated
6917 * routine which doesn't release the reference on the voucher
6918 * addressed by in_out_old_voucher. To avoid leaking this reference,
6919 * a call to release it has been added here.
6920 */
6921 ipc_voucher_release(*in_out_old_voucher);
6922 return KERN_NOT_SUPPORTED;
6923 }
6924
6925 void
6926 task_set_gpu_denied(task_t task, boolean_t denied)
6927 {
6928 task_lock(task);
6929
6930 if (denied) {
6931 task->t_flags |= TF_GPU_DENIED;
6932 } else {
6933 task->t_flags &= ~TF_GPU_DENIED;
6934 }
6935
6936 task_unlock(task);
6937 }
6938
6939 boolean_t
6940 task_is_gpu_denied(task_t task)
6941 {
6942 /* We don't need the lock to read this flag */
6943 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
6944 }
6945
6946
6947 uint64_t
6948 get_task_memory_region_count(task_t task)
6949 {
6950 vm_map_t map;
6951 map = (task == kernel_task) ? kernel_map: task->map;
6952 return (uint64_t)get_map_nentries(map);
6953 }
6954
6955 static void
6956 kdebug_trace_dyld_internal(uint32_t base_code,
6957 struct dyld_kernel_image_info *info)
6958 {
6959 static_assert(sizeof(info->uuid) >= 16);
6960
6961 #if defined(__LP64__)
6962 uint64_t *uuid = (uint64_t *)&(info->uuid);
6963
6964 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6965 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
6966 uuid[1], info->load_addr,
6967 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
6968 0);
6969 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6970 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
6971 (uint64_t)info->fsobjid.fid_objno |
6972 ((uint64_t)info->fsobjid.fid_generation << 32),
6973 0, 0, 0, 0);
6974 #else /* defined(__LP64__) */
6975 uint32_t *uuid = (uint32_t *)&(info->uuid);
6976
6977 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6978 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
6979 uuid[1], uuid[2], uuid[3], 0);
6980 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6981 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
6982 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
6983 info->fsobjid.fid_objno, 0);
6984 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6985 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
6986 info->fsobjid.fid_generation, 0, 0, 0, 0);
6987 #endif /* !defined(__LP64__) */
6988 }
6989
6990 static kern_return_t
6991 kdebug_trace_dyld(task_t task, uint32_t base_code,
6992 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
6993 {
6994 kern_return_t kr;
6995 dyld_kernel_image_info_array_t infos;
6996 vm_map_offset_t map_data;
6997 vm_offset_t data;
6998
6999 if (!infos_copy) {
7000 return KERN_INVALID_ADDRESS;
7001 }
7002
7003 if (!kdebug_enable ||
7004 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
7005 vm_map_copy_discard(infos_copy);
7006 return KERN_SUCCESS;
7007 }
7008
7009 if (task == NULL || task != current_task()) {
7010 return KERN_INVALID_TASK;
7011 }
7012
7013 kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
7014 if (kr != KERN_SUCCESS) {
7015 return kr;
7016 }
7017
7018 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
7019
7020 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
7021 kdebug_trace_dyld_internal(base_code, &(infos[i]));
7022 }
7023
7024 data = CAST_DOWN(vm_offset_t, map_data);
7025 mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
7026 return KERN_SUCCESS;
7027 }
7028
7029 kern_return_t
7030 task_register_dyld_image_infos(task_t task,
7031 dyld_kernel_image_info_array_t infos_copy,
7032 mach_msg_type_number_t infos_len)
7033 {
7034 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
7035 (vm_map_copy_t)infos_copy, infos_len);
7036 }
7037
7038 kern_return_t
7039 task_unregister_dyld_image_infos(task_t task,
7040 dyld_kernel_image_info_array_t infos_copy,
7041 mach_msg_type_number_t infos_len)
7042 {
7043 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
7044 (vm_map_copy_t)infos_copy, infos_len);
7045 }
7046
7047 kern_return_t
7048 task_get_dyld_image_infos(__unused task_t task,
7049 __unused dyld_kernel_image_info_array_t * dyld_images,
7050 __unused mach_msg_type_number_t * dyld_imagesCnt)
7051 {
7052 return KERN_NOT_SUPPORTED;
7053 }
7054
7055 kern_return_t
7056 task_register_dyld_shared_cache_image_info(task_t task,
7057 dyld_kernel_image_info_t cache_img,
7058 __unused boolean_t no_cache,
7059 __unused boolean_t private_cache)
7060 {
7061 if (task == NULL || task != current_task()) {
7062 return KERN_INVALID_TASK;
7063 }
7064
7065 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
7066 return KERN_SUCCESS;
7067 }
7068
7069 kern_return_t
7070 task_register_dyld_set_dyld_state(__unused task_t task,
7071 __unused uint8_t dyld_state)
7072 {
7073 return KERN_NOT_SUPPORTED;
7074 }
7075
7076 kern_return_t
7077 task_register_dyld_get_process_state(__unused task_t task,
7078 __unused dyld_kernel_process_info_t * dyld_process_state)
7079 {
7080 return KERN_NOT_SUPPORTED;
7081 }
7082
7083 kern_return_t
7084 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
7085 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
7086 {
7087 #if MONOTONIC
7088 task_t task = (task_t)task_insp;
7089 kern_return_t kr = KERN_SUCCESS;
7090 mach_msg_type_number_t size;
7091
7092 if (task == TASK_NULL) {
7093 return KERN_INVALID_ARGUMENT;
7094 }
7095
7096 size = *size_in_out;
7097
7098 switch (flavor) {
7099 case TASK_INSPECT_BASIC_COUNTS: {
7100 struct task_inspect_basic_counts *bc;
7101 uint64_t task_counts[MT_CORE_NFIXED] = { 0 };
7102
7103 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
7104 kr = KERN_INVALID_ARGUMENT;
7105 break;
7106 }
7107
7108 mt_fixed_task_counts(task, task_counts);
7109 bc = (struct task_inspect_basic_counts *)info_out;
7110 #ifdef MT_CORE_INSTRS
7111 bc->instructions = task_counts[MT_CORE_INSTRS];
7112 #else /* defined(MT_CORE_INSTRS) */
7113 bc->instructions = 0;
7114 #endif /* !defined(MT_CORE_INSTRS) */
7115 bc->cycles = task_counts[MT_CORE_CYCLES];
7116 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
7117 break;
7118 }
7119 default:
7120 kr = KERN_INVALID_ARGUMENT;
7121 break;
7122 }
7123
7124 if (kr == KERN_SUCCESS) {
7125 *size_in_out = size;
7126 }
7127 return kr;
7128 #else /* MONOTONIC */
7129 #pragma unused(task_insp, flavor, info_out, size_in_out)
7130 return KERN_NOT_SUPPORTED;
7131 #endif /* !MONOTONIC */
7132 }
7133
7134 #if CONFIG_SECLUDED_MEMORY
7135 int num_tasks_can_use_secluded_mem = 0;
7136
7137 void
7138 task_set_can_use_secluded_mem(
7139 task_t task,
7140 boolean_t can_use_secluded_mem)
7141 {
7142 if (!task->task_could_use_secluded_mem) {
7143 return;
7144 }
7145 task_lock(task);
7146 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
7147 task_unlock(task);
7148 }
7149
7150 void
7151 task_set_can_use_secluded_mem_locked(
7152 task_t task,
7153 boolean_t can_use_secluded_mem)
7154 {
7155 assert(task->task_could_use_secluded_mem);
7156 if (can_use_secluded_mem &&
7157 secluded_for_apps && /* global boot-arg */
7158 !task->task_can_use_secluded_mem) {
7159 assert(num_tasks_can_use_secluded_mem >= 0);
7160 OSAddAtomic(+1,
7161 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7162 task->task_can_use_secluded_mem = TRUE;
7163 } else if (!can_use_secluded_mem &&
7164 task->task_can_use_secluded_mem) {
7165 assert(num_tasks_can_use_secluded_mem > 0);
7166 OSAddAtomic(-1,
7167 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7168 task->task_can_use_secluded_mem = FALSE;
7169 }
7170 }
7171
7172 void
7173 task_set_could_use_secluded_mem(
7174 task_t task,
7175 boolean_t could_use_secluded_mem)
7176 {
7177 task->task_could_use_secluded_mem = could_use_secluded_mem;
7178 }
7179
7180 void
7181 task_set_could_also_use_secluded_mem(
7182 task_t task,
7183 boolean_t could_also_use_secluded_mem)
7184 {
7185 task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
7186 }
7187
7188 boolean_t
7189 task_can_use_secluded_mem(
7190 task_t task,
7191 boolean_t is_alloc)
7192 {
7193 if (task->task_can_use_secluded_mem) {
7194 assert(task->task_could_use_secluded_mem);
7195 assert(num_tasks_can_use_secluded_mem > 0);
7196 return TRUE;
7197 }
7198 if (task->task_could_also_use_secluded_mem &&
7199 num_tasks_can_use_secluded_mem > 0) {
7200 assert(num_tasks_can_use_secluded_mem > 0);
7201 return TRUE;
7202 }
7203
7204 /*
7205 * If a single task is using more than some amount of
7206 * memory, allow it to dip into secluded and also begin
7207 * suppression of secluded memory until the tasks exits.
7208 */
7209 if (is_alloc && secluded_shutoff_trigger != 0) {
7210 uint64_t phys_used = get_task_phys_footprint(task);
7211 if (phys_used > secluded_shutoff_trigger) {
7212 start_secluded_suppression(task);
7213 return TRUE;
7214 }
7215 }
7216
7217 return FALSE;
7218 }
7219
7220 boolean_t
7221 task_could_use_secluded_mem(
7222 task_t task)
7223 {
7224 return task->task_could_use_secluded_mem;
7225 }
7226
7227 boolean_t
7228 task_could_also_use_secluded_mem(
7229 task_t task)
7230 {
7231 return task->task_could_also_use_secluded_mem;
7232 }
7233 #endif /* CONFIG_SECLUDED_MEMORY */
7234
7235 queue_head_t *
7236 task_io_user_clients(task_t task)
7237 {
7238 return &task->io_user_clients;
7239 }
7240
7241 void
7242 task_set_message_app_suspended(task_t task, boolean_t enable)
7243 {
7244 task->message_app_suspended = enable;
7245 }
7246
7247 void
7248 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
7249 {
7250 dst_task->vtimers = src_task->vtimers;
7251 }
7252
7253 #if DEVELOPMENT || DEBUG
7254 int vm_region_footprint = 0;
7255 #endif /* DEVELOPMENT || DEBUG */
7256
7257 boolean_t
7258 task_self_region_footprint(void)
7259 {
7260 #if DEVELOPMENT || DEBUG
7261 if (vm_region_footprint) {
7262 /* system-wide override */
7263 return TRUE;
7264 }
7265 #endif /* DEVELOPMENT || DEBUG */
7266 return current_task()->task_region_footprint;
7267 }
7268
7269 void
7270 task_self_region_footprint_set(
7271 boolean_t newval)
7272 {
7273 task_t curtask;
7274
7275 curtask = current_task();
7276 task_lock(curtask);
7277 if (newval) {
7278 curtask->task_region_footprint = TRUE;
7279 } else {
7280 curtask->task_region_footprint = FALSE;
7281 }
7282 task_unlock(curtask);
7283 }
7284
7285 void
7286 task_set_darkwake_mode(task_t task, boolean_t set_mode)
7287 {
7288 assert(task);
7289
7290 task_lock(task);
7291
7292 if (set_mode) {
7293 task->t_flags |= TF_DARKWAKE_MODE;
7294 } else {
7295 task->t_flags &= ~(TF_DARKWAKE_MODE);
7296 }
7297
7298 task_unlock(task);
7299 }
7300
7301 boolean_t
7302 task_get_darkwake_mode(task_t task)
7303 {
7304 assert(task);
7305 return (task->t_flags & TF_DARKWAKE_MODE) != 0;
7306 }
7307
7308 kern_return_t
7309 task_get_exc_guard_behavior(
7310 task_t task,
7311 task_exc_guard_behavior_t *behaviorp)
7312 {
7313 if (task == TASK_NULL) {
7314 return KERN_INVALID_TASK;
7315 }
7316 *behaviorp = task->task_exc_guard;
7317 return KERN_SUCCESS;
7318 }
7319
7320 #ifndef TASK_EXC_GUARD_ALL
7321 /* Temporary define until two branches are merged */
7322 #define TASK_EXC_GUARD_ALL (TASK_EXC_GUARD_VM_ALL | 0xf0)
7323 #endif
7324
7325 kern_return_t
7326 task_set_exc_guard_behavior(
7327 task_t task,
7328 task_exc_guard_behavior_t behavior)
7329 {
7330 if (task == TASK_NULL) {
7331 return KERN_INVALID_TASK;
7332 }
7333 if (behavior & ~TASK_EXC_GUARD_ALL) {
7334 return KERN_INVALID_VALUE;
7335 }
7336 task->task_exc_guard = behavior;
7337 return KERN_SUCCESS;
7338 }
7339
7340 #if __arm64__
7341 extern int legacy_footprint_entitlement_mode;
7342 extern void memorystatus_act_on_legacy_footprint_entitlement(proc_t, boolean_t);
7343 extern void memorystatus_act_on_ios13extended_footprint_entitlement(proc_t);
7344
7345 void
7346 task_set_legacy_footprint(
7347 task_t task)
7348 {
7349 task_lock(task);
7350 task->task_legacy_footprint = TRUE;
7351 task_unlock(task);
7352 }
7353
7354 void
7355 task_set_extra_footprint_limit(
7356 task_t task)
7357 {
7358 if (task->task_extra_footprint_limit) {
7359 return;
7360 }
7361 task_lock(task);
7362 if (task->task_extra_footprint_limit) {
7363 task_unlock(task);
7364 return;
7365 }
7366 task->task_extra_footprint_limit = TRUE;
7367 task_unlock(task);
7368 memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
7369 }
7370
7371 void
7372 task_set_ios13extended_footprint_limit(
7373 task_t task)
7374 {
7375 if (task->task_ios13extended_footprint_limit) {
7376 return;
7377 }
7378 task_lock(task);
7379 if (task->task_ios13extended_footprint_limit) {
7380 task_unlock(task);
7381 return;
7382 }
7383 task->task_ios13extended_footprint_limit = TRUE;
7384 task_unlock(task);
7385 memorystatus_act_on_ios13extended_footprint_entitlement(task->bsd_info);
7386 }
7387 #endif /* __arm64__ */
7388
7389 static inline ledger_amount_t
7390 task_ledger_get_balance(
7391 ledger_t ledger,
7392 int ledger_idx)
7393 {
7394 ledger_amount_t amount;
7395 amount = 0;
7396 ledger_get_balance(ledger, ledger_idx, &amount);
7397 return amount;
7398 }
7399
7400 /*
7401 * Gather the amount of memory counted in a task's footprint due to
7402 * being in a specific set of ledgers.
7403 */
7404 void
7405 task_ledgers_footprint(
7406 ledger_t ledger,
7407 ledger_amount_t *ledger_resident,
7408 ledger_amount_t *ledger_compressed)
7409 {
7410 *ledger_resident = 0;
7411 *ledger_compressed = 0;
7412
7413 /* purgeable non-volatile memory */
7414 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
7415 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
7416
7417 /* "default" tagged memory */
7418 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
7419 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
7420
7421 /* "network" currently never counts in the footprint... */
7422
7423 /* "media" tagged memory */
7424 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
7425 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
7426
7427 /* "graphics" tagged memory */
7428 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
7429 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
7430
7431 /* "neural" tagged memory */
7432 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
7433 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
7434 }
7435
7436 void
7437 task_set_memory_ownership_transfer(
7438 task_t task,
7439 boolean_t value)
7440 {
7441 task_lock(task);
7442 task->task_can_transfer_memory_ownership = value;
7443 task_unlock(task);
7444 }
7445
7446 void
7447 task_copy_vmobjects(task_t task, vm_object_query_t query, int len, int64_t* num)
7448 {
7449 vm_object_t find_vmo;
7450 int64_t size = 0;
7451
7452 task_objq_lock(task);
7453 if (query != NULL) {
7454 queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
7455 {
7456 int byte_size;
7457 vm_object_query_t p = &query[size++];
7458
7459 p->object_id = (vm_object_id_t) VM_KERNEL_ADDRPERM(find_vmo);
7460 p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
7461 p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
7462 p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
7463 p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
7464 p->vo_no_footprint = find_vmo->vo_no_footprint;
7465 p->vo_ledger_tag = find_vmo->vo_ledger_tag;
7466 p->purgable = find_vmo->purgable;
7467
7468 if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
7469 p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
7470 } else {
7471 p->compressed_size = 0;
7472 }
7473
7474 /* make sure to not overrun */
7475 byte_size = (int) size * sizeof(vm_object_query_data_t);
7476 if ((int)(byte_size + sizeof(vm_object_query_data_t)) > len) {
7477 break;
7478 }
7479 }
7480 } else {
7481 size = task->task_owned_objects;
7482 }
7483 task_objq_unlock(task);
7484
7485 *num = size;
7486 }