]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task.c
0374456e1090a163788a54542d0993dd1650b755
[apple/xnu.git] / osfmk / kern / task.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
99 #include <mach/sdt.h>
100
101 #include <ipc/ipc_importance.h>
102 #include <ipc/ipc_types.h>
103 #include <ipc/ipc_space.h>
104 #include <ipc/ipc_entry.h>
105 #include <ipc/ipc_hash.h>
106
107 #include <kern/kern_types.h>
108 #include <kern/mach_param.h>
109 #include <kern/misc_protos.h>
110 #include <kern/task.h>
111 #include <kern/thread.h>
112 #include <kern/coalition.h>
113 #include <kern/zalloc.h>
114 #include <kern/kalloc.h>
115 #include <kern/kern_cdata.h>
116 #include <kern/processor.h>
117 #include <kern/sched_prim.h> /* for thread_wakeup */
118 #include <kern/ipc_tt.h>
119 #include <kern/host.h>
120 #include <kern/clock.h>
121 #include <kern/timer.h>
122 #include <kern/assert.h>
123 #include <kern/sync_lock.h>
124 #include <kern/affinity.h>
125 #include <kern/exc_resource.h>
126 #include <kern/machine.h>
127 #include <kern/policy_internal.h>
128 #include <kern/restartable.h>
129
130 #include <corpses/task_corpse.h>
131 #if CONFIG_TELEMETRY
132 #include <kern/telemetry.h>
133 #endif
134
135 #if MONOTONIC
136 #include <kern/monotonic.h>
137 #include <machine/monotonic.h>
138 #endif /* MONOTONIC */
139
140 #include <os/log.h>
141
142 #include <vm/pmap.h>
143 #include <vm/vm_map.h>
144 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
145 #include <vm/vm_pageout.h>
146 #include <vm/vm_protos.h>
147 #include <vm/vm_purgeable_internal.h>
148 #include <vm/vm_compressor_pager.h>
149
150 #include <sys/resource.h>
151 #include <sys/signalvar.h> /* for coredump */
152 #include <sys/bsdtask_info.h>
153 /*
154 * Exported interfaces
155 */
156
157 #include <mach/task_server.h>
158 #include <mach/mach_host_server.h>
159 #include <mach/host_security_server.h>
160 #include <mach/mach_port_server.h>
161
162 #include <vm/vm_shared_region.h>
163
164 #include <libkern/OSDebug.h>
165 #include <libkern/OSAtomic.h>
166 #include <libkern/section_keywords.h>
167
168 #include <mach-o/loader.h>
169
170 #if CONFIG_ATM
171 #include <atm/atm_internal.h>
172 #endif
173
174 #include <kern/sfi.h> /* picks up ledger.h */
175
176 #if CONFIG_MACF
177 #include <security/mac_mach_internal.h>
178 #endif
179
180 #if KPERF
181 extern int kpc_force_all_ctrs(task_t, int);
182 #endif
183
184 task_t kernel_task;
185 zone_t task_zone;
186 lck_attr_t task_lck_attr;
187 lck_grp_t task_lck_grp;
188 lck_grp_attr_t task_lck_grp_attr;
189
190 extern int exc_via_corpse_forking;
191 extern int corpse_for_fatal_memkill;
192 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
193
194 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
195 int audio_active = 0;
196
197 zinfo_usage_store_t tasks_tkm_private;
198 zinfo_usage_store_t tasks_tkm_shared;
199
200 /* A container to accumulate statistics for expired tasks */
201 expired_task_statistics_t dead_task_statistics;
202 lck_spin_t dead_task_statistics_lock;
203
204 ledger_template_t task_ledger_template = NULL;
205
206 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
207 {.cpu_time = -1,
208 .tkm_private = -1,
209 .tkm_shared = -1,
210 .phys_mem = -1,
211 .wired_mem = -1,
212 .internal = -1,
213 .iokit_mapped = -1,
214 .alternate_accounting = -1,
215 .alternate_accounting_compressed = -1,
216 .page_table = -1,
217 .phys_footprint = -1,
218 .internal_compressed = -1,
219 .purgeable_volatile = -1,
220 .purgeable_nonvolatile = -1,
221 .purgeable_volatile_compressed = -1,
222 .purgeable_nonvolatile_compressed = -1,
223 .tagged_nofootprint = -1,
224 .tagged_footprint = -1,
225 .tagged_nofootprint_compressed = -1,
226 .tagged_footprint_compressed = -1,
227 .network_volatile = -1,
228 .network_nonvolatile = -1,
229 .network_volatile_compressed = -1,
230 .network_nonvolatile_compressed = -1,
231 .media_nofootprint = -1,
232 .media_footprint = -1,
233 .media_nofootprint_compressed = -1,
234 .media_footprint_compressed = -1,
235 .graphics_nofootprint = -1,
236 .graphics_footprint = -1,
237 .graphics_nofootprint_compressed = -1,
238 .graphics_footprint_compressed = -1,
239 .neural_nofootprint = -1,
240 .neural_footprint = -1,
241 .neural_nofootprint_compressed = -1,
242 .neural_footprint_compressed = -1,
243 .platform_idle_wakeups = -1,
244 .interrupt_wakeups = -1,
245 #if !CONFIG_EMBEDDED
246 .sfi_wait_times = { 0 /* initialized at runtime */},
247 #endif /* !CONFIG_EMBEDDED */
248 .cpu_time_billed_to_me = -1,
249 .cpu_time_billed_to_others = -1,
250 .physical_writes = -1,
251 .logical_writes = -1,
252 .logical_writes_to_external = -1,
253 #if DEBUG || DEVELOPMENT
254 .pages_grabbed = -1,
255 .pages_grabbed_kern = -1,
256 .pages_grabbed_iopl = -1,
257 .pages_grabbed_upl = -1,
258 #endif
259 .energy_billed_to_me = -1,
260 .energy_billed_to_others = -1};
261
262 /* System sleep state */
263 boolean_t tasks_suspend_state;
264
265
266 void init_task_ledgers(void);
267 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
268 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
269 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
270 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
271 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
272 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
273
274 kern_return_t task_suspend_internal(task_t);
275 kern_return_t task_resume_internal(task_t);
276 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
277
278 extern kern_return_t iokit_task_terminate(task_t task);
279 extern void iokit_task_app_suspended_changed(task_t task);
280
281 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
282 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
283 extern kern_return_t thread_resume(thread_t thread);
284
285 // Warn tasks when they hit 80% of their memory limit.
286 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
287
288 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
289 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
290
291 /*
292 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
293 *
294 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
295 * stacktraces, aka micro-stackshots)
296 */
297 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
298
299 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
300 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
301
302 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
303
304 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
305
306 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
307 int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
308 int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
309
310 /* I/O Monitor Limits */
311 #define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
312 #define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
313
314 uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
315 uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
316
317 #define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
318 int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
319 int64_t global_logical_writes_count = 0; /* Global count for logical writes */
320 int64_t global_logical_writes_to_external_count = 0; /* Global count for logical writes to external storage*/
321 static boolean_t global_update_logical_writes(int64_t, int64_t*);
322
323 #define TASK_MAX_THREAD_LIMIT 256
324
325 #if MACH_ASSERT
326 int pmap_ledgers_panic = 1;
327 int pmap_ledgers_panic_leeway = 3;
328 #endif /* MACH_ASSERT */
329
330 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
331
332 #if CONFIG_COREDUMP
333 int hwm_user_cores = 0; /* high watermark violations generate user core files */
334 #endif
335
336 #ifdef MACH_BSD
337 extern uint32_t proc_platform(struct proc *);
338 extern uint32_t proc_sdk(struct proc *);
339 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
340 extern int proc_pid(struct proc *p);
341 extern int proc_selfpid(void);
342 extern struct proc *current_proc(void);
343 extern char *proc_name_address(struct proc *p);
344 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
345 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
346 extern void workq_proc_suspended(struct proc *p);
347 extern void workq_proc_resumed(struct proc *p);
348
349 #if CONFIG_MEMORYSTATUS
350 extern void proc_memstat_terminated(struct proc* p, boolean_t set);
351 extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
352 extern void memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
353 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
354 extern uint64_t memorystatus_available_memory_internal(proc_t p);
355
356 #if DEVELOPMENT || DEBUG
357 extern void memorystatus_abort_vm_map_fork(task_t);
358 #endif
359
360 #endif /* CONFIG_MEMORYSTATUS */
361
362 #endif /* MACH_BSD */
363
364 #if DEVELOPMENT || DEBUG
365 int exc_resource_threads_enabled;
366 #endif /* DEVELOPMENT || DEBUG */
367
368 #if (DEVELOPMENT || DEBUG)
369 uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_MP_CORPSE |
370 TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE | TASK_EXC_GUARD_VM_CORPSE;
371 #else
372 uint32_t task_exc_guard_default = 0;
373 #endif
374
375 /* Forwards */
376
377 static void task_hold_locked(task_t task);
378 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
379 static void task_release_locked(task_t task);
380
381 static void task_synchronizer_destroy_all(task_t task);
382 static os_ref_count_t
383 task_add_turnstile_watchports_locked(
384 task_t task,
385 struct task_watchports *watchports,
386 struct task_watchport_elem **previous_elem_array,
387 ipc_port_t *portwatch_ports,
388 uint32_t portwatch_count);
389
390 static os_ref_count_t
391 task_remove_turnstile_watchports_locked(
392 task_t task,
393 struct task_watchports *watchports,
394 ipc_port_t *port_freelist);
395
396 static struct task_watchports *
397 task_watchports_alloc_init(
398 task_t task,
399 thread_t thread,
400 uint32_t count);
401
402 static void
403 task_watchports_deallocate(
404 struct task_watchports *watchports);
405
406 void
407 task_set_64bit(
408 task_t task,
409 boolean_t is_64bit,
410 boolean_t is_64bit_data)
411 {
412 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
413 thread_t thread;
414 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
415
416 task_lock(task);
417
418 /*
419 * Switching to/from 64-bit address spaces
420 */
421 if (is_64bit) {
422 if (!task_has_64Bit_addr(task)) {
423 task_set_64Bit_addr(task);
424 }
425 } else {
426 if (task_has_64Bit_addr(task)) {
427 task_clear_64Bit_addr(task);
428 }
429 }
430
431 /*
432 * Switching to/from 64-bit register state.
433 */
434 if (is_64bit_data) {
435 if (task_has_64Bit_data(task)) {
436 goto out;
437 }
438
439 task_set_64Bit_data(task);
440 } else {
441 if (!task_has_64Bit_data(task)) {
442 goto out;
443 }
444
445 task_clear_64Bit_data(task);
446 }
447
448 /* FIXME: On x86, the thread save state flavor can diverge from the
449 * task's 64-bit feature flag due to the 32-bit/64-bit register save
450 * state dichotomy. Since we can be pre-empted in this interval,
451 * certain routines may observe the thread as being in an inconsistent
452 * state with respect to its task's 64-bitness.
453 */
454
455 #if defined(__x86_64__) || defined(__arm64__)
456 queue_iterate(&task->threads, thread, thread_t, task_threads) {
457 thread_mtx_lock(thread);
458 machine_thread_switch_addrmode(thread);
459 thread_mtx_unlock(thread);
460
461 #if defined(__arm64__)
462 /* specifically, if running on H9 */
463 if (thread == current_thread()) {
464 uint64_t arg1, arg2;
465 int urgency;
466 spl_t spl = splsched();
467 /*
468 * This call tell that the current thread changed it's 32bitness.
469 * Other thread were no more on core when 32bitness was changed,
470 * but current_thread() is on core and the previous call to
471 * machine_thread_going_on_core() gave 32bitness which is now wrong.
472 *
473 * This is needed for bring-up, a different callback should be used
474 * in the future.
475 *
476 * TODO: Remove this callout when we no longer support 32-bit code on H9
477 */
478 thread_lock(thread);
479 urgency = thread_get_urgency(thread, &arg1, &arg2);
480 machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
481 thread_unlock(thread);
482 splx(spl);
483 }
484 #endif /* defined(__arm64__) */
485 }
486 #endif /* defined(__x86_64__) || defined(__arm64__) */
487
488 out:
489 task_unlock(task);
490 }
491
492 boolean_t
493 task_get_64bit_data(task_t task)
494 {
495 return task_has_64Bit_data(task);
496 }
497
498 void
499 task_set_platform_binary(
500 task_t task,
501 boolean_t is_platform)
502 {
503 task_lock(task);
504 if (is_platform) {
505 task->t_flags |= TF_PLATFORM;
506 /* set exc guard default behavior for first-party code */
507 task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
508 } else {
509 task->t_flags &= ~(TF_PLATFORM);
510 /* set exc guard default behavior for third-party code */
511 task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
512 }
513 task_unlock(task);
514 }
515
516 /*
517 * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
518 * Returns "false" if flag is already set, and "true" in other cases.
519 */
520 bool
521 task_set_ca_client_wi(
522 task_t task,
523 boolean_t set_or_clear)
524 {
525 bool ret = true;
526 task_lock(task);
527 if (set_or_clear) {
528 /* Tasks can have only one CA_CLIENT work interval */
529 if (task->t_flags & TF_CA_CLIENT_WI) {
530 ret = false;
531 } else {
532 task->t_flags |= TF_CA_CLIENT_WI;
533 }
534 } else {
535 task->t_flags &= ~TF_CA_CLIENT_WI;
536 }
537 task_unlock(task);
538 return ret;
539 }
540
541 void
542 task_set_dyld_info(
543 task_t task,
544 mach_vm_address_t addr,
545 mach_vm_size_t size)
546 {
547 task_lock(task);
548 task->all_image_info_addr = addr;
549 task->all_image_info_size = size;
550 task_unlock(task);
551 }
552
553 void
554 task_set_mach_header_address(
555 task_t task,
556 mach_vm_address_t addr)
557 {
558 task_lock(task);
559 task->mach_header_vm_address = addr;
560 task_unlock(task);
561 }
562
563 void
564 task_atm_reset(__unused task_t task)
565 {
566 #if CONFIG_ATM
567 if (task->atm_context != NULL) {
568 atm_task_descriptor_destroy(task->atm_context);
569 task->atm_context = NULL;
570 }
571 #endif
572 }
573
574 void
575 task_bank_reset(__unused task_t task)
576 {
577 if (task->bank_context != NULL) {
578 bank_task_destroy(task);
579 }
580 }
581
582 /*
583 * NOTE: This should only be called when the P_LINTRANSIT
584 * flag is set (the proc_trans lock is held) on the
585 * proc associated with the task.
586 */
587 void
588 task_bank_init(__unused task_t task)
589 {
590 if (task->bank_context != NULL) {
591 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
592 }
593 bank_task_initialize(task);
594 }
595
596 void
597 task_set_did_exec_flag(task_t task)
598 {
599 task->t_procflags |= TPF_DID_EXEC;
600 }
601
602 void
603 task_clear_exec_copy_flag(task_t task)
604 {
605 task->t_procflags &= ~TPF_EXEC_COPY;
606 }
607
608 event_t
609 task_get_return_wait_event(task_t task)
610 {
611 return (event_t)&task->returnwait_inheritor;
612 }
613
614 void
615 task_clear_return_wait(task_t task, uint32_t flags)
616 {
617 if (flags & TCRW_CLEAR_INITIAL_WAIT) {
618 thread_wakeup(task_get_return_wait_event(task));
619 }
620
621 if (flags & TCRW_CLEAR_FINAL_WAIT) {
622 is_write_lock(task->itk_space);
623
624 task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
625 task->returnwait_inheritor = NULL;
626
627 if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
628 struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
629 NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
630
631 waitq_wakeup64_all(&turnstile->ts_waitq,
632 CAST_EVENT64_T(task_get_return_wait_event(task)),
633 THREAD_AWAKENED, 0);
634
635 turnstile_update_inheritor(turnstile, NULL,
636 TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
637 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
638
639 turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
640 turnstile_cleanup();
641 task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
642 }
643 is_write_unlock(task->itk_space);
644 }
645 }
646
647 void __attribute__((noreturn))
648 task_wait_to_return(void)
649 {
650 task_t task = current_task();
651
652 is_write_lock(task->itk_space);
653
654 if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
655 struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
656 NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
657
658 do {
659 task->t_returnwaitflags |= TRW_LRETURNWAITER;
660 turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
661 (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
662
663 waitq_assert_wait64(&turnstile->ts_waitq,
664 CAST_EVENT64_T(task_get_return_wait_event(task)),
665 THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
666
667 is_write_unlock(task->itk_space);
668
669 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
670
671 thread_block(THREAD_CONTINUE_NULL);
672
673 is_write_lock(task->itk_space);
674 } while (task->t_returnwaitflags & TRW_LRETURNWAIT);
675
676 turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
677 }
678
679 is_write_unlock(task->itk_space);
680 turnstile_cleanup();
681
682
683 #if CONFIG_MACF
684 /*
685 * Before jumping to userspace and allowing this process to execute any code,
686 * notify any interested parties.
687 */
688 mac_proc_notify_exec_complete(current_proc());
689 #endif
690
691 thread_bootstrap_return();
692 }
693
694 #ifdef CONFIG_32BIT_TELEMETRY
695 boolean_t
696 task_consume_32bit_log_flag(task_t task)
697 {
698 if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
699 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
700 return TRUE;
701 } else {
702 return FALSE;
703 }
704 }
705
706 void
707 task_set_32bit_log_flag(task_t task)
708 {
709 task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
710 }
711 #endif /* CONFIG_32BIT_TELEMETRY */
712
713 boolean_t
714 task_is_exec_copy(task_t task)
715 {
716 return task_is_exec_copy_internal(task);
717 }
718
719 boolean_t
720 task_did_exec(task_t task)
721 {
722 return task_did_exec_internal(task);
723 }
724
725 boolean_t
726 task_is_active(task_t task)
727 {
728 return task->active;
729 }
730
731 boolean_t
732 task_is_halting(task_t task)
733 {
734 return task->halting;
735 }
736
737 #if TASK_REFERENCE_LEAK_DEBUG
738 #include <kern/btlog.h>
739
740 static btlog_t *task_ref_btlog;
741 #define TASK_REF_OP_INCR 0x1
742 #define TASK_REF_OP_DECR 0x2
743
744 #define TASK_REF_NUM_RECORDS 100000
745 #define TASK_REF_BTDEPTH 7
746
747 void
748 task_reference_internal(task_t task)
749 {
750 void * bt[TASK_REF_BTDEPTH];
751 int numsaved = 0;
752
753 os_ref_retain(&task->ref_count);
754
755 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
756 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
757 bt, numsaved);
758 }
759
760 os_ref_count_t
761 task_deallocate_internal(task_t task)
762 {
763 void * bt[TASK_REF_BTDEPTH];
764 int numsaved = 0;
765
766 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
767 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
768 bt, numsaved);
769
770 return os_ref_release(&task->ref_count);
771 }
772
773 #endif /* TASK_REFERENCE_LEAK_DEBUG */
774
775 void
776 task_init(void)
777 {
778 lck_grp_attr_setdefault(&task_lck_grp_attr);
779 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
780 lck_attr_setdefault(&task_lck_attr);
781 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
782 lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
783
784 task_zone = zinit(
785 sizeof(struct task),
786 task_max * sizeof(struct task),
787 TASK_CHUNK * sizeof(struct task),
788 "tasks");
789
790 zone_change(task_zone, Z_NOENCRYPT, TRUE);
791
792 #if CONFIG_EMBEDDED
793 task_watch_init();
794 #endif /* CONFIG_EMBEDDED */
795
796 /*
797 * Configure per-task memory limit.
798 * The boot-arg is interpreted as Megabytes,
799 * and takes precedence over the device tree.
800 * Setting the boot-arg to 0 disables task limits.
801 */
802 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
803 sizeof(max_task_footprint_mb))) {
804 /*
805 * No limit was found in boot-args, so go look in the device tree.
806 */
807 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
808 sizeof(max_task_footprint_mb))) {
809 /*
810 * No limit was found in device tree.
811 */
812 max_task_footprint_mb = 0;
813 }
814 }
815
816 if (max_task_footprint_mb != 0) {
817 #if CONFIG_MEMORYSTATUS
818 if (max_task_footprint_mb < 50) {
819 printf("Warning: max_task_pmem %d below minimum.\n",
820 max_task_footprint_mb);
821 max_task_footprint_mb = 50;
822 }
823 printf("Limiting task physical memory footprint to %d MB\n",
824 max_task_footprint_mb);
825
826 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
827
828 /*
829 * Configure the per-task memory limit warning level.
830 * This is computed as a percentage.
831 */
832 max_task_footprint_warning_level = 0;
833
834 if (max_mem < 0x40000000) {
835 /*
836 * On devices with < 1GB of memory:
837 * -- set warnings to 50MB below the per-task limit.
838 */
839 if (max_task_footprint_mb > 50) {
840 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
841 }
842 } else {
843 /*
844 * On devices with >= 1GB of memory:
845 * -- set warnings to 100MB below the per-task limit.
846 */
847 if (max_task_footprint_mb > 100) {
848 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
849 }
850 }
851
852 /*
853 * Never allow warning level to land below the default.
854 */
855 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
856 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
857 }
858
859 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
860
861 #else
862 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
863 #endif /* CONFIG_MEMORYSTATUS */
864 }
865
866 #if DEVELOPMENT || DEBUG
867 if (!PE_parse_boot_argn("exc_resource_threads",
868 &exc_resource_threads_enabled,
869 sizeof(exc_resource_threads_enabled))) {
870 exc_resource_threads_enabled = 1;
871 }
872 PE_parse_boot_argn("task_exc_guard_default",
873 &task_exc_guard_default,
874 sizeof(task_exc_guard_default));
875 #endif /* DEVELOPMENT || DEBUG */
876
877 #if CONFIG_COREDUMP
878 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
879 sizeof(hwm_user_cores))) {
880 hwm_user_cores = 0;
881 }
882 #endif
883
884 proc_init_cpumon_params();
885
886 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
887 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
888 }
889
890 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
891 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
892 }
893
894 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
895 sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
896 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
897 }
898
899 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
900 sizeof(disable_exc_resource))) {
901 disable_exc_resource = 0;
902 }
903
904 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
905 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
906 }
907
908 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
909 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
910 }
911
912 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
913 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
914 }
915
916 /*
917 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
918 * sets up the ledgers for the default coalition. If we don't have coalitions,
919 * then we have to call it now.
920 */
921 #if CONFIG_COALITIONS
922 assert(task_ledger_template);
923 #else /* CONFIG_COALITIONS */
924 init_task_ledgers();
925 #endif /* CONFIG_COALITIONS */
926
927 #if TASK_REFERENCE_LEAK_DEBUG
928 task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
929 assert(task_ref_btlog);
930 #endif
931
932 /*
933 * Create the kernel task as the first task.
934 */
935 #ifdef __LP64__
936 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
937 #else
938 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
939 #endif
940 { panic("task_init\n");}
941
942 #if defined(HAS_APPLE_PAC)
943 kernel_task->rop_pid = KERNEL_ROP_ID;
944 // kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
945 // disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
946 ml_task_set_disable_user_jop(kernel_task, FALSE);
947 #endif
948
949 vm_map_deallocate(kernel_task->map);
950 kernel_task->map = kernel_map;
951 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
952 }
953
954 /*
955 * Create a task running in the kernel address space. It may
956 * have its own map of size mem_size and may have ipc privileges.
957 */
958 kern_return_t
959 kernel_task_create(
960 __unused task_t parent_task,
961 __unused vm_offset_t map_base,
962 __unused vm_size_t map_size,
963 __unused task_t *child_task)
964 {
965 return KERN_INVALID_ARGUMENT;
966 }
967
968 kern_return_t
969 task_create(
970 task_t parent_task,
971 __unused ledger_port_array_t ledger_ports,
972 __unused mach_msg_type_number_t num_ledger_ports,
973 __unused boolean_t inherit_memory,
974 __unused task_t *child_task) /* OUT */
975 {
976 if (parent_task == TASK_NULL) {
977 return KERN_INVALID_ARGUMENT;
978 }
979
980 /*
981 * No longer supported: too many calls assume that a task has a valid
982 * process attached.
983 */
984 return KERN_FAILURE;
985 }
986
987 kern_return_t
988 host_security_create_task_token(
989 host_security_t host_security,
990 task_t parent_task,
991 __unused security_token_t sec_token,
992 __unused audit_token_t audit_token,
993 __unused host_priv_t host_priv,
994 __unused ledger_port_array_t ledger_ports,
995 __unused mach_msg_type_number_t num_ledger_ports,
996 __unused boolean_t inherit_memory,
997 __unused task_t *child_task) /* OUT */
998 {
999 if (parent_task == TASK_NULL) {
1000 return KERN_INVALID_ARGUMENT;
1001 }
1002
1003 if (host_security == HOST_NULL) {
1004 return KERN_INVALID_SECURITY;
1005 }
1006
1007 /*
1008 * No longer supported.
1009 */
1010 return KERN_FAILURE;
1011 }
1012
1013 /*
1014 * Task ledgers
1015 * ------------
1016 *
1017 * phys_footprint
1018 * Physical footprint: This is the sum of:
1019 * + (internal - alternate_accounting)
1020 * + (internal_compressed - alternate_accounting_compressed)
1021 * + iokit_mapped
1022 * + purgeable_nonvolatile
1023 * + purgeable_nonvolatile_compressed
1024 * + page_table
1025 *
1026 * internal
1027 * The task's anonymous memory, which on iOS is always resident.
1028 *
1029 * internal_compressed
1030 * Amount of this task's internal memory which is held by the compressor.
1031 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1032 * and could be either decompressed back into memory, or paged out to storage, depending
1033 * on our implementation.
1034 *
1035 * iokit_mapped
1036 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1037 * clean/dirty or internal/external state].
1038 *
1039 * alternate_accounting
1040 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1041 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1042 * double counting.
1043 *
1044 * pages_grabbed
1045 * pages_grabbed counts all page grabs in a task. It is also broken out into three subtypes
1046 * which track UPL, IOPL and Kernel page grabs.
1047 */
1048 void
1049 init_task_ledgers(void)
1050 {
1051 ledger_template_t t;
1052
1053 assert(task_ledger_template == NULL);
1054 assert(kernel_task == TASK_NULL);
1055
1056 #if MACH_ASSERT
1057 PE_parse_boot_argn("pmap_ledgers_panic",
1058 &pmap_ledgers_panic,
1059 sizeof(pmap_ledgers_panic));
1060 PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1061 &pmap_ledgers_panic_leeway,
1062 sizeof(pmap_ledgers_panic_leeway));
1063 #endif /* MACH_ASSERT */
1064
1065 if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1066 panic("couldn't create task ledger template");
1067 }
1068
1069 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1070 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1071 "physmem", "bytes");
1072 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1073 "bytes");
1074 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1075 "bytes");
1076 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1077 "bytes");
1078 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1079 "bytes");
1080 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
1081 "bytes");
1082 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
1083 "bytes");
1084 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
1085 "bytes");
1086 task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
1087 "bytes");
1088 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1089 "bytes");
1090 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1091 "bytes");
1092 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
1093 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
1094 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
1095 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
1096 #if DEBUG || DEVELOPMENT
1097 task_ledgers.pages_grabbed = ledger_entry_add(t, "pages_grabbed", "physmem", "count");
1098 task_ledgers.pages_grabbed_kern = ledger_entry_add(t, "pages_grabbed_kern", "physmem", "count");
1099 task_ledgers.pages_grabbed_iopl = ledger_entry_add(t, "pages_grabbed_iopl", "physmem", "count");
1100 task_ledgers.pages_grabbed_upl = ledger_entry_add(t, "pages_grabbed_upl", "physmem", "count");
1101 #endif
1102 task_ledgers.tagged_nofootprint = ledger_entry_add(t, "tagged_nofootprint", "physmem", "bytes");
1103 task_ledgers.tagged_footprint = ledger_entry_add(t, "tagged_footprint", "physmem", "bytes");
1104 task_ledgers.tagged_nofootprint_compressed = ledger_entry_add(t, "tagged_nofootprint_compressed", "physmem", "bytes");
1105 task_ledgers.tagged_footprint_compressed = ledger_entry_add(t, "tagged_footprint_compressed", "physmem", "bytes");
1106 task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes");
1107 task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes");
1108 task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes");
1109 task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes");
1110 task_ledgers.media_nofootprint = ledger_entry_add(t, "media_nofootprint", "physmem", "bytes");
1111 task_ledgers.media_footprint = ledger_entry_add(t, "media_footprint", "physmem", "bytes");
1112 task_ledgers.media_nofootprint_compressed = ledger_entry_add(t, "media_nofootprint_compressed", "physmem", "bytes");
1113 task_ledgers.media_footprint_compressed = ledger_entry_add(t, "media_footprint_compressed", "physmem", "bytes");
1114 task_ledgers.graphics_nofootprint = ledger_entry_add(t, "graphics_nofootprint", "physmem", "bytes");
1115 task_ledgers.graphics_footprint = ledger_entry_add(t, "graphics_footprint", "physmem", "bytes");
1116 task_ledgers.graphics_nofootprint_compressed = ledger_entry_add(t, "graphics_nofootprint_compressed", "physmem", "bytes");
1117 task_ledgers.graphics_footprint_compressed = ledger_entry_add(t, "graphics_footprint_compressed", "physmem", "bytes");
1118 task_ledgers.neural_nofootprint = ledger_entry_add(t, "neural_nofootprint", "physmem", "bytes");
1119 task_ledgers.neural_footprint = ledger_entry_add(t, "neural_footprint", "physmem", "bytes");
1120 task_ledgers.neural_nofootprint_compressed = ledger_entry_add(t, "neural_nofootprint_compressed", "physmem", "bytes");
1121 task_ledgers.neural_footprint_compressed = ledger_entry_add(t, "neural_footprint_compressed", "physmem", "bytes");
1122
1123
1124 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1125 "count");
1126 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1127 "count");
1128
1129 #if CONFIG_SCHED_SFI
1130 sfi_class_id_t class_id, ledger_alias;
1131 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1132 task_ledgers.sfi_wait_times[class_id] = -1;
1133 }
1134
1135 /* don't account for UNSPECIFIED */
1136 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1137 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1138 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1139 /* Check to see if alias has been registered yet */
1140 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1141 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1142 } else {
1143 /* Otherwise, initialize it first */
1144 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1145 }
1146 } else {
1147 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1148 }
1149
1150 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1151 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1152 }
1153 }
1154
1155 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1156 #endif /* CONFIG_SCHED_SFI */
1157
1158 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1159 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1160 task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1161 task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1162 task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1163 task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1164 task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1165
1166 if ((task_ledgers.cpu_time < 0) ||
1167 (task_ledgers.tkm_private < 0) ||
1168 (task_ledgers.tkm_shared < 0) ||
1169 (task_ledgers.phys_mem < 0) ||
1170 (task_ledgers.wired_mem < 0) ||
1171 (task_ledgers.internal < 0) ||
1172 (task_ledgers.iokit_mapped < 0) ||
1173 (task_ledgers.alternate_accounting < 0) ||
1174 (task_ledgers.alternate_accounting_compressed < 0) ||
1175 (task_ledgers.page_table < 0) ||
1176 (task_ledgers.phys_footprint < 0) ||
1177 (task_ledgers.internal_compressed < 0) ||
1178 (task_ledgers.purgeable_volatile < 0) ||
1179 (task_ledgers.purgeable_nonvolatile < 0) ||
1180 (task_ledgers.purgeable_volatile_compressed < 0) ||
1181 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1182 (task_ledgers.tagged_nofootprint < 0) ||
1183 (task_ledgers.tagged_footprint < 0) ||
1184 (task_ledgers.tagged_nofootprint_compressed < 0) ||
1185 (task_ledgers.tagged_footprint_compressed < 0) ||
1186 (task_ledgers.network_volatile < 0) ||
1187 (task_ledgers.network_nonvolatile < 0) ||
1188 (task_ledgers.network_volatile_compressed < 0) ||
1189 (task_ledgers.network_nonvolatile_compressed < 0) ||
1190 (task_ledgers.media_nofootprint < 0) ||
1191 (task_ledgers.media_footprint < 0) ||
1192 (task_ledgers.media_nofootprint_compressed < 0) ||
1193 (task_ledgers.media_footprint_compressed < 0) ||
1194 (task_ledgers.graphics_nofootprint < 0) ||
1195 (task_ledgers.graphics_footprint < 0) ||
1196 (task_ledgers.graphics_nofootprint_compressed < 0) ||
1197 (task_ledgers.graphics_footprint_compressed < 0) ||
1198 (task_ledgers.neural_nofootprint < 0) ||
1199 (task_ledgers.neural_footprint < 0) ||
1200 (task_ledgers.neural_nofootprint_compressed < 0) ||
1201 (task_ledgers.neural_footprint_compressed < 0) ||
1202 (task_ledgers.platform_idle_wakeups < 0) ||
1203 (task_ledgers.interrupt_wakeups < 0) ||
1204 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1205 (task_ledgers.physical_writes < 0) ||
1206 (task_ledgers.logical_writes < 0) ||
1207 (task_ledgers.logical_writes_to_external < 0) ||
1208 (task_ledgers.energy_billed_to_me < 0) ||
1209 (task_ledgers.energy_billed_to_others < 0)
1210 ) {
1211 panic("couldn't create entries for task ledger template");
1212 }
1213
1214 ledger_track_credit_only(t, task_ledgers.phys_footprint);
1215 ledger_track_credit_only(t, task_ledgers.page_table);
1216 ledger_track_credit_only(t, task_ledgers.internal);
1217 ledger_track_credit_only(t, task_ledgers.internal_compressed);
1218 ledger_track_credit_only(t, task_ledgers.iokit_mapped);
1219 ledger_track_credit_only(t, task_ledgers.alternate_accounting);
1220 ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
1221 ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
1222 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
1223 ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
1224 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
1225 #if DEBUG || DEVELOPMENT
1226 ledger_track_credit_only(t, task_ledgers.pages_grabbed);
1227 ledger_track_credit_only(t, task_ledgers.pages_grabbed_kern);
1228 ledger_track_credit_only(t, task_ledgers.pages_grabbed_iopl);
1229 ledger_track_credit_only(t, task_ledgers.pages_grabbed_upl);
1230 #endif
1231 ledger_track_credit_only(t, task_ledgers.tagged_nofootprint);
1232 ledger_track_credit_only(t, task_ledgers.tagged_footprint);
1233 ledger_track_credit_only(t, task_ledgers.tagged_nofootprint_compressed);
1234 ledger_track_credit_only(t, task_ledgers.tagged_footprint_compressed);
1235 ledger_track_credit_only(t, task_ledgers.network_volatile);
1236 ledger_track_credit_only(t, task_ledgers.network_nonvolatile);
1237 ledger_track_credit_only(t, task_ledgers.network_volatile_compressed);
1238 ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed);
1239 ledger_track_credit_only(t, task_ledgers.media_nofootprint);
1240 ledger_track_credit_only(t, task_ledgers.media_footprint);
1241 ledger_track_credit_only(t, task_ledgers.media_nofootprint_compressed);
1242 ledger_track_credit_only(t, task_ledgers.media_footprint_compressed);
1243 ledger_track_credit_only(t, task_ledgers.graphics_nofootprint);
1244 ledger_track_credit_only(t, task_ledgers.graphics_footprint);
1245 ledger_track_credit_only(t, task_ledgers.graphics_nofootprint_compressed);
1246 ledger_track_credit_only(t, task_ledgers.graphics_footprint_compressed);
1247 ledger_track_credit_only(t, task_ledgers.neural_nofootprint);
1248 ledger_track_credit_only(t, task_ledgers.neural_footprint);
1249 ledger_track_credit_only(t, task_ledgers.neural_nofootprint_compressed);
1250 ledger_track_credit_only(t, task_ledgers.neural_footprint_compressed);
1251
1252 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1253 #if MACH_ASSERT
1254 if (pmap_ledgers_panic) {
1255 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1256 ledger_panic_on_negative(t, task_ledgers.page_table);
1257 ledger_panic_on_negative(t, task_ledgers.internal);
1258 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
1259 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1260 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1261 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1262 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1263 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1264 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1265 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1266
1267 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1268 ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1269 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1270 ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1271 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1272 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1273 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1274 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1275 ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1276 ledger_panic_on_negative(t, task_ledgers.media_footprint);
1277 ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1278 ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1279 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1280 ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1281 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1282 ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1283 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1284 ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1285 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1286 ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1287 }
1288 #endif /* MACH_ASSERT */
1289
1290 #if CONFIG_MEMORYSTATUS
1291 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1292 #endif /* CONFIG_MEMORYSTATUS */
1293
1294 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1295 task_wakeups_rate_exceeded, NULL, NULL);
1296 ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1297
1298 ledger_template_complete(t);
1299 task_ledger_template = t;
1300 }
1301
1302 os_refgrp_decl(static, task_refgrp, "task", NULL);
1303
1304 kern_return_t
1305 task_create_internal(
1306 task_t parent_task,
1307 coalition_t *parent_coalitions __unused,
1308 boolean_t inherit_memory,
1309 __unused boolean_t is_64bit,
1310 boolean_t is_64bit_data,
1311 uint32_t t_flags,
1312 uint32_t t_procflags,
1313 uint8_t t_returnwaitflags,
1314 task_t *child_task) /* OUT */
1315 {
1316 task_t new_task;
1317 vm_shared_region_t shared_region;
1318 ledger_t ledger = NULL;
1319
1320 new_task = (task_t) zalloc(task_zone);
1321
1322 if (new_task == TASK_NULL) {
1323 return KERN_RESOURCE_SHORTAGE;
1324 }
1325
1326 /* one ref for just being alive; one for our caller */
1327 os_ref_init_count(&new_task->ref_count, &task_refgrp, 2);
1328
1329 /* allocate with active entries */
1330 assert(task_ledger_template != NULL);
1331 if ((ledger = ledger_instantiate(task_ledger_template,
1332 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1333 zfree(task_zone, new_task);
1334 return KERN_RESOURCE_SHORTAGE;
1335 }
1336
1337 #if defined(HAS_APPLE_PAC)
1338 ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1339 ml_task_set_disable_user_jop(new_task, inherit_memory ? parent_task->disable_user_jop : FALSE);
1340 #endif
1341
1342 new_task->ledger = ledger;
1343
1344 #if defined(CONFIG_SCHED_MULTIQ)
1345 new_task->sched_group = sched_group_create();
1346 #endif
1347
1348 /* if inherit_memory is true, parent_task MUST not be NULL */
1349 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1350 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1351 } else {
1352 unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1353 new_task->map = vm_map_create(pmap_create_options(ledger, 0, pmap_flags),
1354 (vm_map_offset_t)(VM_MIN_ADDRESS),
1355 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1356 }
1357
1358 /* Inherit memlock limit from parent */
1359 if (parent_task) {
1360 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1361 }
1362
1363 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1364 queue_init(&new_task->threads);
1365 new_task->suspend_count = 0;
1366 new_task->thread_count = 0;
1367 new_task->active_thread_count = 0;
1368 new_task->user_stop_count = 0;
1369 new_task->legacy_stop_count = 0;
1370 new_task->active = TRUE;
1371 new_task->halting = FALSE;
1372 new_task->priv_flags = 0;
1373 new_task->t_flags = t_flags;
1374 new_task->t_procflags = t_procflags;
1375 new_task->t_returnwaitflags = t_returnwaitflags;
1376 new_task->returnwait_inheritor = current_thread();
1377 new_task->importance = 0;
1378 new_task->crashed_thread_id = 0;
1379 new_task->exec_token = 0;
1380 new_task->watchports = NULL;
1381 new_task->restartable_ranges = NULL;
1382 new_task->task_exc_guard = 0;
1383
1384 #if CONFIG_ATM
1385 new_task->atm_context = NULL;
1386 #endif
1387 new_task->bank_context = NULL;
1388
1389 #ifdef MACH_BSD
1390 new_task->bsd_info = NULL;
1391 new_task->corpse_info = NULL;
1392 #endif /* MACH_BSD */
1393
1394 #if CONFIG_MACF
1395 new_task->crash_label = NULL;
1396 #endif
1397
1398 #if CONFIG_MEMORYSTATUS
1399 if (max_task_footprint != 0) {
1400 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1401 }
1402 #endif /* CONFIG_MEMORYSTATUS */
1403
1404 if (task_wakeups_monitor_rate != 0) {
1405 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1406 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1407 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1408 }
1409
1410 #if CONFIG_IO_ACCOUNTING
1411 uint32_t flags = IOMON_ENABLE;
1412 task_io_monitor_ctl(new_task, &flags);
1413 #endif /* CONFIG_IO_ACCOUNTING */
1414
1415 machine_task_init(new_task, parent_task, inherit_memory);
1416
1417 new_task->task_debug = NULL;
1418
1419 #if DEVELOPMENT || DEBUG
1420 new_task->task_unnested = FALSE;
1421 new_task->task_disconnected_count = 0;
1422 #endif
1423 queue_init(&new_task->semaphore_list);
1424 new_task->semaphores_owned = 0;
1425
1426 ipc_task_init(new_task, parent_task);
1427
1428 new_task->vtimers = 0;
1429
1430 new_task->shared_region = NULL;
1431
1432 new_task->affinity_space = NULL;
1433
1434 new_task->t_kpc = 0;
1435
1436 new_task->pidsuspended = FALSE;
1437 new_task->frozen = FALSE;
1438 new_task->changing_freeze_state = FALSE;
1439 new_task->rusage_cpu_flags = 0;
1440 new_task->rusage_cpu_percentage = 0;
1441 new_task->rusage_cpu_interval = 0;
1442 new_task->rusage_cpu_deadline = 0;
1443 new_task->rusage_cpu_callt = NULL;
1444 #if MACH_ASSERT
1445 new_task->suspends_outstanding = 0;
1446 #endif
1447
1448 #if HYPERVISOR
1449 new_task->hv_task_target = NULL;
1450 #endif /* HYPERVISOR */
1451
1452 #if CONFIG_EMBEDDED
1453 queue_init(&new_task->task_watchers);
1454 new_task->num_taskwatchers = 0;
1455 new_task->watchapplying = 0;
1456 #endif /* CONFIG_EMBEDDED */
1457
1458 new_task->mem_notify_reserved = 0;
1459 new_task->memlimit_attrs_reserved = 0;
1460
1461 new_task->requested_policy = default_task_requested_policy;
1462 new_task->effective_policy = default_task_effective_policy;
1463
1464 task_importance_init_from_parent(new_task, parent_task);
1465
1466 if (parent_task != TASK_NULL) {
1467 new_task->sec_token = parent_task->sec_token;
1468 new_task->audit_token = parent_task->audit_token;
1469
1470 /* inherit the parent's shared region */
1471 shared_region = vm_shared_region_get(parent_task);
1472 vm_shared_region_set(new_task, shared_region);
1473
1474 if (task_has_64Bit_addr(parent_task)) {
1475 task_set_64Bit_addr(new_task);
1476 }
1477
1478 if (task_has_64Bit_data(parent_task)) {
1479 task_set_64Bit_data(new_task);
1480 }
1481
1482 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1483 new_task->all_image_info_size = parent_task->all_image_info_size;
1484 new_task->mach_header_vm_address = 0;
1485
1486 if (inherit_memory && parent_task->affinity_space) {
1487 task_affinity_create(parent_task, new_task);
1488 }
1489
1490 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1491
1492 #if DEBUG || DEVELOPMENT
1493 if (parent_task->t_flags & TF_NO_SMT) {
1494 new_task->t_flags |= TF_NO_SMT;
1495 }
1496 #endif
1497
1498 new_task->priority = BASEPRI_DEFAULT;
1499 new_task->max_priority = MAXPRI_USER;
1500
1501 task_policy_create(new_task, parent_task);
1502 } else {
1503 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1504 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1505 #ifdef __LP64__
1506 if (is_64bit) {
1507 task_set_64Bit_addr(new_task);
1508 }
1509 #endif
1510
1511 if (is_64bit_data) {
1512 task_set_64Bit_data(new_task);
1513 }
1514
1515 new_task->all_image_info_addr = (mach_vm_address_t)0;
1516 new_task->all_image_info_size = (mach_vm_size_t)0;
1517
1518 new_task->pset_hint = PROCESSOR_SET_NULL;
1519
1520 if (kernel_task == TASK_NULL) {
1521 new_task->priority = BASEPRI_KERNEL;
1522 new_task->max_priority = MAXPRI_KERNEL;
1523 } else {
1524 new_task->priority = BASEPRI_DEFAULT;
1525 new_task->max_priority = MAXPRI_USER;
1526 }
1527 }
1528
1529 bzero(new_task->coalition, sizeof(new_task->coalition));
1530 for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1531 queue_chain_init(new_task->task_coalition[i]);
1532 }
1533
1534 /* Allocate I/O Statistics */
1535 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1536 assert(new_task->task_io_stats != NULL);
1537 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1538
1539 bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1540 bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1541
1542 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1543
1544 /* Copy resource acc. info from Parent for Corpe Forked task. */
1545 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1546 task_rollup_accounting_info(new_task, parent_task);
1547 } else {
1548 /* Initialize to zero for standard fork/spawn case */
1549 new_task->total_user_time = 0;
1550 new_task->total_system_time = 0;
1551 new_task->total_ptime = 0;
1552 new_task->total_runnable_time = 0;
1553 new_task->faults = 0;
1554 new_task->pageins = 0;
1555 new_task->cow_faults = 0;
1556 new_task->messages_sent = 0;
1557 new_task->messages_received = 0;
1558 new_task->syscalls_mach = 0;
1559 new_task->syscalls_unix = 0;
1560 new_task->c_switch = 0;
1561 new_task->p_switch = 0;
1562 new_task->ps_switch = 0;
1563 new_task->decompressions = 0;
1564 new_task->low_mem_notified_warn = 0;
1565 new_task->low_mem_notified_critical = 0;
1566 new_task->purged_memory_warn = 0;
1567 new_task->purged_memory_critical = 0;
1568 new_task->low_mem_privileged_listener = 0;
1569 new_task->memlimit_is_active = 0;
1570 new_task->memlimit_is_fatal = 0;
1571 new_task->memlimit_active_exc_resource = 0;
1572 new_task->memlimit_inactive_exc_resource = 0;
1573 new_task->task_timer_wakeups_bin_1 = 0;
1574 new_task->task_timer_wakeups_bin_2 = 0;
1575 new_task->task_gpu_ns = 0;
1576 new_task->task_writes_counters_internal.task_immediate_writes = 0;
1577 new_task->task_writes_counters_internal.task_deferred_writes = 0;
1578 new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1579 new_task->task_writes_counters_internal.task_metadata_writes = 0;
1580 new_task->task_writes_counters_external.task_immediate_writes = 0;
1581 new_task->task_writes_counters_external.task_deferred_writes = 0;
1582 new_task->task_writes_counters_external.task_invalidated_writes = 0;
1583 new_task->task_writes_counters_external.task_metadata_writes = 0;
1584
1585 new_task->task_energy = 0;
1586 #if MONOTONIC
1587 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1588 #endif /* MONOTONIC */
1589 }
1590
1591
1592 #if CONFIG_COALITIONS
1593 if (!(t_flags & TF_CORPSE_FORK)) {
1594 /* TODO: there is no graceful failure path here... */
1595 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1596 coalitions_adopt_task(parent_coalitions, new_task);
1597 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1598 /*
1599 * all tasks at least have a resource coalition, so
1600 * if the parent has one then inherit all coalitions
1601 * the parent is a part of
1602 */
1603 coalitions_adopt_task(parent_task->coalition, new_task);
1604 } else {
1605 /* TODO: assert that new_task will be PID 1 (launchd) */
1606 coalitions_adopt_init_task(new_task);
1607 }
1608 /*
1609 * on exec, we need to transfer the coalition roles from the
1610 * parent task to the exec copy task.
1611 */
1612 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1613 int coal_roles[COALITION_NUM_TYPES];
1614 task_coalition_roles(parent_task, coal_roles);
1615 (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1616 }
1617 } else {
1618 coalitions_adopt_corpse_task(new_task);
1619 }
1620
1621 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1622 panic("created task is not a member of a resource coalition");
1623 }
1624 #endif /* CONFIG_COALITIONS */
1625
1626 new_task->dispatchqueue_offset = 0;
1627 if (parent_task != NULL) {
1628 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1629 }
1630
1631 new_task->task_can_transfer_memory_ownership = FALSE;
1632 new_task->task_volatile_objects = 0;
1633 new_task->task_nonvolatile_objects = 0;
1634 new_task->task_objects_disowning = FALSE;
1635 new_task->task_objects_disowned = FALSE;
1636 new_task->task_owned_objects = 0;
1637 queue_init(&new_task->task_objq);
1638 task_objq_lock_init(new_task);
1639
1640 #if __arm64__
1641 new_task->task_legacy_footprint = FALSE;
1642 new_task->task_extra_footprint_limit = FALSE;
1643 #endif /* __arm64__ */
1644 new_task->task_region_footprint = FALSE;
1645 new_task->task_has_crossed_thread_limit = FALSE;
1646 new_task->task_thread_limit = 0;
1647 #if CONFIG_SECLUDED_MEMORY
1648 new_task->task_can_use_secluded_mem = FALSE;
1649 new_task->task_could_use_secluded_mem = FALSE;
1650 new_task->task_could_also_use_secluded_mem = FALSE;
1651 new_task->task_suppressed_secluded = FALSE;
1652 #endif /* CONFIG_SECLUDED_MEMORY */
1653
1654 /*
1655 * t_flags is set up above. But since we don't
1656 * support darkwake mode being set that way
1657 * currently, we clear it out here explicitly.
1658 */
1659 new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1660
1661 queue_init(&new_task->io_user_clients);
1662 new_task->loadTag = 0;
1663
1664 ipc_task_enable(new_task);
1665
1666 lck_mtx_lock(&tasks_threads_lock);
1667 queue_enter(&tasks, new_task, task_t, tasks);
1668 tasks_count++;
1669 if (tasks_suspend_state) {
1670 task_suspend_internal(new_task);
1671 }
1672 lck_mtx_unlock(&tasks_threads_lock);
1673
1674 *child_task = new_task;
1675 return KERN_SUCCESS;
1676 }
1677
1678 /*
1679 * task_rollup_accounting_info
1680 *
1681 * Roll up accounting stats. Used to rollup stats
1682 * for exec copy task and corpse fork.
1683 */
1684 void
1685 task_rollup_accounting_info(task_t to_task, task_t from_task)
1686 {
1687 assert(from_task != to_task);
1688
1689 to_task->total_user_time = from_task->total_user_time;
1690 to_task->total_system_time = from_task->total_system_time;
1691 to_task->total_ptime = from_task->total_ptime;
1692 to_task->total_runnable_time = from_task->total_runnable_time;
1693 to_task->faults = from_task->faults;
1694 to_task->pageins = from_task->pageins;
1695 to_task->cow_faults = from_task->cow_faults;
1696 to_task->decompressions = from_task->decompressions;
1697 to_task->messages_sent = from_task->messages_sent;
1698 to_task->messages_received = from_task->messages_received;
1699 to_task->syscalls_mach = from_task->syscalls_mach;
1700 to_task->syscalls_unix = from_task->syscalls_unix;
1701 to_task->c_switch = from_task->c_switch;
1702 to_task->p_switch = from_task->p_switch;
1703 to_task->ps_switch = from_task->ps_switch;
1704 to_task->extmod_statistics = from_task->extmod_statistics;
1705 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1706 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1707 to_task->purged_memory_warn = from_task->purged_memory_warn;
1708 to_task->purged_memory_critical = from_task->purged_memory_critical;
1709 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1710 *to_task->task_io_stats = *from_task->task_io_stats;
1711 to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1712 to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1713 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1714 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1715 to_task->task_gpu_ns = from_task->task_gpu_ns;
1716 to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
1717 to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
1718 to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
1719 to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
1720 to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
1721 to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
1722 to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
1723 to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
1724 to_task->task_energy = from_task->task_energy;
1725
1726 /* Skip ledger roll up for memory accounting entries */
1727 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1728 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1729 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1730 #if CONFIG_SCHED_SFI
1731 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1732 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1733 }
1734 #endif
1735 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1736 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1737 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1738 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1739 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1740 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1741 }
1742
1743 int task_dropped_imp_count = 0;
1744
1745 /*
1746 * task_deallocate:
1747 *
1748 * Drop a reference on a task.
1749 */
1750 void
1751 task_deallocate(
1752 task_t task)
1753 {
1754 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1755 os_ref_count_t refs;
1756
1757 if (task == TASK_NULL) {
1758 return;
1759 }
1760
1761 refs = task_deallocate_internal(task);
1762
1763 #if IMPORTANCE_INHERITANCE
1764 if (refs == 1) {
1765 /*
1766 * If last ref potentially comes from the task's importance,
1767 * disconnect it. But more task refs may be added before
1768 * that completes, so wait for the reference to go to zero
1769 * naturally (it may happen on a recursive task_deallocate()
1770 * from the ipc_importance_disconnect_task() call).
1771 */
1772 if (IIT_NULL != task->task_imp_base) {
1773 ipc_importance_disconnect_task(task);
1774 }
1775 return;
1776 }
1777 #endif /* IMPORTANCE_INHERITANCE */
1778
1779 if (refs > 0) {
1780 return;
1781 }
1782
1783 /*
1784 * The task should be dead at this point. Ensure other resources
1785 * like threads, are gone before we trash the world.
1786 */
1787 assert(queue_empty(&task->threads));
1788 assert(task->bsd_info == NULL);
1789 assert(!is_active(task->itk_space));
1790 assert(!task->active);
1791 assert(task->active_thread_count == 0);
1792
1793 lck_mtx_lock(&tasks_threads_lock);
1794 assert(terminated_tasks_count > 0);
1795 queue_remove(&terminated_tasks, task, task_t, tasks);
1796 terminated_tasks_count--;
1797 lck_mtx_unlock(&tasks_threads_lock);
1798
1799 /*
1800 * remove the reference on atm descriptor
1801 */
1802 task_atm_reset(task);
1803
1804 /*
1805 * remove the reference on bank context
1806 */
1807 task_bank_reset(task);
1808
1809 if (task->task_io_stats) {
1810 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1811 }
1812
1813 /*
1814 * Give the machine dependent code a chance
1815 * to perform cleanup before ripping apart
1816 * the task.
1817 */
1818 machine_task_terminate(task);
1819
1820 ipc_task_terminate(task);
1821
1822 /* let iokit know */
1823 iokit_task_terminate(task);
1824
1825 if (task->affinity_space) {
1826 task_affinity_deallocate(task);
1827 }
1828
1829 #if MACH_ASSERT
1830 if (task->ledger != NULL &&
1831 task->map != NULL &&
1832 task->map->pmap != NULL &&
1833 task->map->pmap->ledger != NULL) {
1834 assert(task->ledger == task->map->pmap->ledger);
1835 }
1836 #endif /* MACH_ASSERT */
1837
1838 vm_owned_objects_disown(task);
1839 assert(task->task_objects_disowned);
1840 if (task->task_volatile_objects != 0 ||
1841 task->task_nonvolatile_objects != 0 ||
1842 task->task_owned_objects != 0) {
1843 panic("task_deallocate(%p): "
1844 "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
1845 task,
1846 task->task_volatile_objects,
1847 task->task_nonvolatile_objects,
1848 task->task_owned_objects);
1849 }
1850
1851 vm_map_deallocate(task->map);
1852 is_release(task->itk_space);
1853 if (task->restartable_ranges) {
1854 restartable_ranges_release(task->restartable_ranges);
1855 }
1856
1857 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1858 &interrupt_wakeups, &debit);
1859 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1860 &platform_idle_wakeups, &debit);
1861
1862 #if defined(CONFIG_SCHED_MULTIQ)
1863 sched_group_destroy(task->sched_group);
1864 #endif
1865
1866 /* Accumulate statistics for dead tasks */
1867 lck_spin_lock(&dead_task_statistics_lock);
1868 dead_task_statistics.total_user_time += task->total_user_time;
1869 dead_task_statistics.total_system_time += task->total_system_time;
1870
1871 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1872 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1873
1874 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1875 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1876 dead_task_statistics.total_ptime += task->total_ptime;
1877 dead_task_statistics.total_pset_switches += task->ps_switch;
1878 dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1879 dead_task_statistics.task_energy += task->task_energy;
1880
1881 lck_spin_unlock(&dead_task_statistics_lock);
1882 lck_mtx_destroy(&task->lock, &task_lck_grp);
1883
1884 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1885 &debit)) {
1886 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1887 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1888 }
1889 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1890 &debit)) {
1891 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1892 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1893 }
1894 ledger_dereference(task->ledger);
1895
1896 #if TASK_REFERENCE_LEAK_DEBUG
1897 btlog_remove_entries_for_element(task_ref_btlog, task);
1898 #endif
1899
1900 #if CONFIG_COALITIONS
1901 task_release_coalitions(task);
1902 #endif /* CONFIG_COALITIONS */
1903
1904 bzero(task->coalition, sizeof(task->coalition));
1905
1906 #if MACH_BSD
1907 /* clean up collected information since last reference to task is gone */
1908 if (task->corpse_info) {
1909 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1910 task_crashinfo_destroy(task->corpse_info);
1911 task->corpse_info = NULL;
1912 if (corpse_info_kernel) {
1913 kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1914 }
1915 }
1916 #endif
1917
1918 #if CONFIG_MACF
1919 if (task->crash_label) {
1920 mac_exc_free_label(task->crash_label);
1921 task->crash_label = NULL;
1922 }
1923 #endif
1924
1925 assert(queue_empty(&task->task_objq));
1926
1927 zfree(task_zone, task);
1928 }
1929
1930 /*
1931 * task_name_deallocate:
1932 *
1933 * Drop a reference on a task name.
1934 */
1935 void
1936 task_name_deallocate(
1937 task_name_t task_name)
1938 {
1939 return task_deallocate((task_t)task_name);
1940 }
1941
1942 /*
1943 * task_inspect_deallocate:
1944 *
1945 * Drop a task inspection reference.
1946 */
1947 void
1948 task_inspect_deallocate(
1949 task_inspect_t task_inspect)
1950 {
1951 return task_deallocate((task_t)task_inspect);
1952 }
1953
1954 /*
1955 * task_suspension_token_deallocate:
1956 *
1957 * Drop a reference on a task suspension token.
1958 */
1959 void
1960 task_suspension_token_deallocate(
1961 task_suspension_token_t token)
1962 {
1963 return task_deallocate((task_t)token);
1964 }
1965
1966
1967 /*
1968 * task_collect_crash_info:
1969 *
1970 * collect crash info from bsd and mach based data
1971 */
1972 kern_return_t
1973 task_collect_crash_info(
1974 task_t task,
1975 #ifdef CONFIG_MACF
1976 struct label *crash_label,
1977 #endif
1978 int is_corpse_fork)
1979 {
1980 kern_return_t kr = KERN_SUCCESS;
1981
1982 kcdata_descriptor_t crash_data = NULL;
1983 kcdata_descriptor_t crash_data_release = NULL;
1984 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1985 mach_vm_offset_t crash_data_ptr = 0;
1986 void *crash_data_kernel = NULL;
1987 void *crash_data_kernel_release = NULL;
1988 #if CONFIG_MACF
1989 struct label *label, *free_label;
1990 #endif
1991
1992 if (!corpses_enabled()) {
1993 return KERN_NOT_SUPPORTED;
1994 }
1995
1996 #if CONFIG_MACF
1997 free_label = label = mac_exc_create_label();
1998 #endif
1999
2000 task_lock(task);
2001
2002 assert(is_corpse_fork || task->bsd_info != NULL);
2003 if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
2004 #if CONFIG_MACF
2005 /* Set the crash label, used by the exception delivery mac hook */
2006 free_label = task->crash_label; // Most likely NULL.
2007 task->crash_label = label;
2008 mac_exc_update_task_crash_label(task, crash_label);
2009 #endif
2010 task_unlock(task);
2011
2012 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
2013 if (crash_data_kernel == NULL) {
2014 kr = KERN_RESOURCE_SHORTAGE;
2015 goto out_no_lock;
2016 }
2017 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
2018 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2019
2020 /* Do not get a corpse ref for corpse fork */
2021 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2022 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2023 KCFLAG_USE_MEMCOPY);
2024 if (crash_data) {
2025 task_lock(task);
2026 crash_data_release = task->corpse_info;
2027 crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2028 task->corpse_info = crash_data;
2029
2030 task_unlock(task);
2031 kr = KERN_SUCCESS;
2032 } else {
2033 kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
2034 kr = KERN_FAILURE;
2035 }
2036
2037 if (crash_data_release != NULL) {
2038 task_crashinfo_destroy(crash_data_release);
2039 }
2040 if (crash_data_kernel_release != NULL) {
2041 kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2042 }
2043 } else {
2044 task_unlock(task);
2045 }
2046
2047 out_no_lock:
2048 #if CONFIG_MACF
2049 if (free_label != NULL) {
2050 mac_exc_free_label(free_label);
2051 }
2052 #endif
2053 return kr;
2054 }
2055
2056 /*
2057 * task_deliver_crash_notification:
2058 *
2059 * Makes outcall to registered host port for a corpse.
2060 */
2061 kern_return_t
2062 task_deliver_crash_notification(
2063 task_t task,
2064 thread_t thread,
2065 exception_type_t etype,
2066 mach_exception_subcode_t subcode)
2067 {
2068 kcdata_descriptor_t crash_info = task->corpse_info;
2069 thread_t th_iter = NULL;
2070 kern_return_t kr = KERN_SUCCESS;
2071 wait_interrupt_t wsave;
2072 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2073 ipc_port_t task_port, old_notify;
2074
2075 if (crash_info == NULL) {
2076 return KERN_FAILURE;
2077 }
2078
2079 task_lock(task);
2080 if (task_is_a_corpse_fork(task)) {
2081 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
2082 code[0] = etype;
2083 code[1] = subcode;
2084 } else {
2085 /* Populate code with EXC_CRASH for corpses */
2086 code[0] = EXC_CRASH;
2087 code[1] = 0;
2088 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
2089 if (corpse_for_fatal_memkill) {
2090 code[1] = subcode;
2091 }
2092 }
2093
2094 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2095 {
2096 if (th_iter->corpse_dup == FALSE) {
2097 ipc_thread_reset(th_iter);
2098 }
2099 }
2100 task_unlock(task);
2101
2102 /* Arm the no-sender notification for taskport */
2103 task_reference(task);
2104 task_port = convert_task_to_port(task);
2105 ip_lock(task_port);
2106 require_ip_active(task_port);
2107 ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
2108 /* port unlocked */
2109 assert(IP_NULL == old_notify);
2110
2111 wsave = thread_interrupt_level(THREAD_UNINT);
2112 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2113 if (kr != KERN_SUCCESS) {
2114 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
2115 }
2116
2117 (void)thread_interrupt_level(wsave);
2118
2119 /*
2120 * Drop the send right on task port, will fire the
2121 * no-sender notification if exception deliver failed.
2122 */
2123 ipc_port_release_send(task_port);
2124 return kr;
2125 }
2126
2127 /*
2128 * task_terminate:
2129 *
2130 * Terminate the specified task. See comments on thread_terminate
2131 * (kern/thread.c) about problems with terminating the "current task."
2132 */
2133
2134 kern_return_t
2135 task_terminate(
2136 task_t task)
2137 {
2138 if (task == TASK_NULL) {
2139 return KERN_INVALID_ARGUMENT;
2140 }
2141
2142 if (task->bsd_info) {
2143 return KERN_FAILURE;
2144 }
2145
2146 return task_terminate_internal(task);
2147 }
2148
2149 #if MACH_ASSERT
2150 extern int proc_pid(struct proc *);
2151 extern void proc_name_kdp(task_t t, char *buf, int size);
2152 #endif /* MACH_ASSERT */
2153
2154 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
2155 static void
2156 __unused task_partial_reap(task_t task, __unused int pid)
2157 {
2158 unsigned int reclaimed_resident = 0;
2159 unsigned int reclaimed_compressed = 0;
2160 uint64_t task_page_count;
2161
2162 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2163
2164 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2165 pid, task_page_count, 0, 0, 0);
2166
2167 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2168
2169 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2170 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2171 }
2172
2173 kern_return_t
2174 task_mark_corpse(task_t task)
2175 {
2176 kern_return_t kr = KERN_SUCCESS;
2177 thread_t self_thread;
2178 (void) self_thread;
2179 wait_interrupt_t wsave;
2180 #if CONFIG_MACF
2181 struct label *crash_label = NULL;
2182 #endif
2183
2184 assert(task != kernel_task);
2185 assert(task == current_task());
2186 assert(!task_is_a_corpse(task));
2187
2188 #if CONFIG_MACF
2189 crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
2190 #endif
2191
2192 kr = task_collect_crash_info(task,
2193 #if CONFIG_MACF
2194 crash_label,
2195 #endif
2196 FALSE);
2197 if (kr != KERN_SUCCESS) {
2198 goto out;
2199 }
2200
2201 self_thread = current_thread();
2202
2203 wsave = thread_interrupt_level(THREAD_UNINT);
2204 task_lock(task);
2205
2206 task_set_corpse_pending_report(task);
2207 task_set_corpse(task);
2208 task->crashed_thread_id = thread_tid(self_thread);
2209
2210 kr = task_start_halt_locked(task, TRUE);
2211 assert(kr == KERN_SUCCESS);
2212
2213 ipc_task_reset(task);
2214 /* Remove the naked send right for task port, needed to arm no sender notification */
2215 task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
2216 ipc_task_enable(task);
2217
2218 task_unlock(task);
2219 /* terminate the ipc space */
2220 ipc_space_terminate(task->itk_space);
2221
2222 /* Add it to global corpse task list */
2223 task_add_to_corpse_task_list(task);
2224
2225 task_start_halt(task);
2226 thread_terminate_internal(self_thread);
2227
2228 (void) thread_interrupt_level(wsave);
2229 assert(task->halting == TRUE);
2230
2231 out:
2232 #if CONFIG_MACF
2233 mac_exc_free_label(crash_label);
2234 #endif
2235 return kr;
2236 }
2237
2238 /*
2239 * task_clear_corpse
2240 *
2241 * Clears the corpse pending bit on task.
2242 * Removes inspection bit on the threads.
2243 */
2244 void
2245 task_clear_corpse(task_t task)
2246 {
2247 thread_t th_iter = NULL;
2248
2249 task_lock(task);
2250 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2251 {
2252 thread_mtx_lock(th_iter);
2253 th_iter->inspection = FALSE;
2254 thread_mtx_unlock(th_iter);
2255 }
2256
2257 thread_terminate_crashed_threads();
2258 /* remove the pending corpse report flag */
2259 task_clear_corpse_pending_report(task);
2260
2261 task_unlock(task);
2262 }
2263
2264 /*
2265 * task_port_notify
2266 *
2267 * Called whenever the Mach port system detects no-senders on
2268 * the task port of a corpse.
2269 * Each notification that comes in should terminate the task (corpse).
2270 */
2271 void
2272 task_port_notify(mach_msg_header_t *msg)
2273 {
2274 mach_no_senders_notification_t *notification = (void *)msg;
2275 ipc_port_t port = notification->not_header.msgh_remote_port;
2276 task_t task;
2277
2278 require_ip_active(port);
2279 assert(IKOT_TASK == ip_kotype(port));
2280 task = (task_t) port->ip_kobject;
2281
2282 assert(task_is_a_corpse(task));
2283
2284 /* Remove the task from global corpse task list */
2285 task_remove_from_corpse_task_list(task);
2286
2287 task_clear_corpse(task);
2288 task_terminate_internal(task);
2289 }
2290
2291 /*
2292 * task_wait_till_threads_terminate_locked
2293 *
2294 * Wait till all the threads in the task are terminated.
2295 * Might release the task lock and re-acquire it.
2296 */
2297 void
2298 task_wait_till_threads_terminate_locked(task_t task)
2299 {
2300 /* wait for all the threads in the task to terminate */
2301 while (task->active_thread_count != 0) {
2302 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2303 task_unlock(task);
2304 thread_block(THREAD_CONTINUE_NULL);
2305
2306 task_lock(task);
2307 }
2308 }
2309
2310 /*
2311 * task_duplicate_map_and_threads
2312 *
2313 * Copy vmmap of source task.
2314 * Copy active threads from source task to destination task.
2315 * Source task would be suspended during the copy.
2316 */
2317 kern_return_t
2318 task_duplicate_map_and_threads(
2319 task_t task,
2320 void *p,
2321 task_t new_task,
2322 thread_t *thread_ret,
2323 uint64_t **udata_buffer,
2324 int *size,
2325 int *num_udata)
2326 {
2327 kern_return_t kr = KERN_SUCCESS;
2328 int active;
2329 thread_t thread, self, thread_return = THREAD_NULL;
2330 thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2331 thread_t *thread_array;
2332 uint32_t active_thread_count = 0, array_count = 0, i;
2333 vm_map_t oldmap;
2334 uint64_t *buffer = NULL;
2335 int buf_size = 0;
2336 int est_knotes = 0, num_knotes = 0;
2337
2338 self = current_thread();
2339
2340 /*
2341 * Suspend the task to copy thread state, use the internal
2342 * variant so that no user-space process can resume
2343 * the task from under us
2344 */
2345 kr = task_suspend_internal(task);
2346 if (kr != KERN_SUCCESS) {
2347 return kr;
2348 }
2349
2350 if (task->map->disable_vmentry_reuse == TRUE) {
2351 /*
2352 * Quite likely GuardMalloc (or some debugging tool)
2353 * is being used on this task. And it has gone through
2354 * its limit. Making a corpse will likely encounter
2355 * a lot of VM entries that will need COW.
2356 *
2357 * Skip it.
2358 */
2359 #if DEVELOPMENT || DEBUG
2360 memorystatus_abort_vm_map_fork(task);
2361 #endif
2362 task_resume_internal(task);
2363 return KERN_FAILURE;
2364 }
2365
2366 /* Check with VM if vm_map_fork is allowed for this task */
2367 if (memorystatus_allowed_vm_map_fork(task)) {
2368 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2369 oldmap = new_task->map;
2370 new_task->map = vm_map_fork(new_task->ledger,
2371 task->map,
2372 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2373 VM_MAP_FORK_PRESERVE_PURGEABLE |
2374 VM_MAP_FORK_CORPSE_FOOTPRINT));
2375 vm_map_deallocate(oldmap);
2376
2377 /* copy ledgers that impact the memory footprint */
2378 vm_map_copy_footprint_ledgers(task, new_task);
2379
2380 /* Get all the udata pointers from kqueue */
2381 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2382 if (est_knotes > 0) {
2383 buf_size = (est_knotes + 32) * sizeof(uint64_t);
2384 buffer = (uint64_t *) kalloc(buf_size);
2385 num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2386 if (num_knotes > est_knotes + 32) {
2387 num_knotes = est_knotes + 32;
2388 }
2389 }
2390 }
2391
2392 active_thread_count = task->active_thread_count;
2393 if (active_thread_count == 0) {
2394 if (buffer != NULL) {
2395 kfree(buffer, buf_size);
2396 }
2397 task_resume_internal(task);
2398 return KERN_FAILURE;
2399 }
2400
2401 thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2402
2403 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2404 task_lock(task);
2405 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2406 /* Skip inactive threads */
2407 active = thread->active;
2408 if (!active) {
2409 continue;
2410 }
2411
2412 if (array_count >= active_thread_count) {
2413 break;
2414 }
2415
2416 thread_array[array_count++] = thread;
2417 thread_reference(thread);
2418 }
2419 task_unlock(task);
2420
2421 for (i = 0; i < array_count; i++) {
2422 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2423 if (kr != KERN_SUCCESS) {
2424 break;
2425 }
2426
2427 /* Equivalent of current thread in corpse */
2428 if (thread_array[i] == self) {
2429 thread_return = new_thread;
2430 new_task->crashed_thread_id = thread_tid(new_thread);
2431 } else if (first_thread == NULL) {
2432 first_thread = new_thread;
2433 } else {
2434 /* drop the extra ref returned by thread_create_with_continuation */
2435 thread_deallocate(new_thread);
2436 }
2437
2438 kr = thread_dup2(thread_array[i], new_thread);
2439 if (kr != KERN_SUCCESS) {
2440 thread_mtx_lock(new_thread);
2441 new_thread->corpse_dup = TRUE;
2442 thread_mtx_unlock(new_thread);
2443 continue;
2444 }
2445
2446 /* Copy thread name */
2447 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2448 new_thread->thread_tag = thread_array[i]->thread_tag;
2449 thread_copy_resource_info(new_thread, thread_array[i]);
2450 }
2451
2452 /* return the first thread if we couldn't find the equivalent of current */
2453 if (thread_return == THREAD_NULL) {
2454 thread_return = first_thread;
2455 } else if (first_thread != THREAD_NULL) {
2456 /* drop the extra ref returned by thread_create_with_continuation */
2457 thread_deallocate(first_thread);
2458 }
2459
2460 task_resume_internal(task);
2461
2462 for (i = 0; i < array_count; i++) {
2463 thread_deallocate(thread_array[i]);
2464 }
2465 kfree(thread_array, sizeof(thread_t) * active_thread_count);
2466
2467 if (kr == KERN_SUCCESS) {
2468 *thread_ret = thread_return;
2469 *udata_buffer = buffer;
2470 *size = buf_size;
2471 *num_udata = num_knotes;
2472 } else {
2473 if (thread_return != THREAD_NULL) {
2474 thread_deallocate(thread_return);
2475 }
2476 if (buffer != NULL) {
2477 kfree(buffer, buf_size);
2478 }
2479 }
2480
2481 return kr;
2482 }
2483
2484 #if CONFIG_SECLUDED_MEMORY
2485 extern void task_set_can_use_secluded_mem_locked(
2486 task_t task,
2487 boolean_t can_use_secluded_mem);
2488 #endif /* CONFIG_SECLUDED_MEMORY */
2489
2490 kern_return_t
2491 task_terminate_internal(
2492 task_t task)
2493 {
2494 thread_t thread, self;
2495 task_t self_task;
2496 boolean_t interrupt_save;
2497 int pid = 0;
2498
2499 assert(task != kernel_task);
2500
2501 self = current_thread();
2502 self_task = self->task;
2503
2504 /*
2505 * Get the task locked and make sure that we are not racing
2506 * with someone else trying to terminate us.
2507 */
2508 if (task == self_task) {
2509 task_lock(task);
2510 } else if (task < self_task) {
2511 task_lock(task);
2512 task_lock(self_task);
2513 } else {
2514 task_lock(self_task);
2515 task_lock(task);
2516 }
2517
2518 #if CONFIG_SECLUDED_MEMORY
2519 if (task->task_can_use_secluded_mem) {
2520 task_set_can_use_secluded_mem_locked(task, FALSE);
2521 }
2522 task->task_could_use_secluded_mem = FALSE;
2523 task->task_could_also_use_secluded_mem = FALSE;
2524
2525 if (task->task_suppressed_secluded) {
2526 stop_secluded_suppression(task);
2527 }
2528 #endif /* CONFIG_SECLUDED_MEMORY */
2529
2530 if (!task->active) {
2531 /*
2532 * Task is already being terminated.
2533 * Just return an error. If we are dying, this will
2534 * just get us to our AST special handler and that
2535 * will get us to finalize the termination of ourselves.
2536 */
2537 task_unlock(task);
2538 if (self_task != task) {
2539 task_unlock(self_task);
2540 }
2541
2542 return KERN_FAILURE;
2543 }
2544
2545 if (task_corpse_pending_report(task)) {
2546 /*
2547 * Task is marked for reporting as corpse.
2548 * Just return an error. This will
2549 * just get us to our AST special handler and that
2550 * will get us to finish the path to death
2551 */
2552 task_unlock(task);
2553 if (self_task != task) {
2554 task_unlock(self_task);
2555 }
2556
2557 return KERN_FAILURE;
2558 }
2559
2560 if (self_task != task) {
2561 task_unlock(self_task);
2562 }
2563
2564 /*
2565 * Make sure the current thread does not get aborted out of
2566 * the waits inside these operations.
2567 */
2568 interrupt_save = thread_interrupt_level(THREAD_UNINT);
2569
2570 /*
2571 * Indicate that we want all the threads to stop executing
2572 * at user space by holding the task (we would have held
2573 * each thread independently in thread_terminate_internal -
2574 * but this way we may be more likely to already find it
2575 * held there). Mark the task inactive, and prevent
2576 * further task operations via the task port.
2577 */
2578 task_hold_locked(task);
2579 task->active = FALSE;
2580 ipc_task_disable(task);
2581
2582 #if CONFIG_TELEMETRY
2583 /*
2584 * Notify telemetry that this task is going away.
2585 */
2586 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2587 #endif
2588
2589 /*
2590 * Terminate each thread in the task.
2591 */
2592 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2593 thread_terminate_internal(thread);
2594 }
2595
2596 #ifdef MACH_BSD
2597 if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2598 pid = proc_pid(task->bsd_info);
2599 }
2600 #endif /* MACH_BSD */
2601
2602 task_unlock(task);
2603
2604 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2605 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2606
2607 /* Early object reap phase */
2608
2609 // PR-17045188: Revisit implementation
2610 // task_partial_reap(task, pid);
2611
2612 #if CONFIG_EMBEDDED
2613 /*
2614 * remove all task watchers
2615 */
2616 task_removewatchers(task);
2617
2618 #endif /* CONFIG_EMBEDDED */
2619
2620 /*
2621 * Destroy all synchronizers owned by the task.
2622 */
2623 task_synchronizer_destroy_all(task);
2624
2625 /*
2626 * Clear the watchport boost on the task.
2627 */
2628 task_remove_turnstile_watchports(task);
2629
2630 /*
2631 * Destroy the IPC space, leaving just a reference for it.
2632 */
2633 ipc_space_terminate(task->itk_space);
2634
2635 #if 00
2636 /* if some ledgers go negative on tear-down again... */
2637 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2638 task_ledgers.phys_footprint);
2639 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2640 task_ledgers.internal);
2641 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2642 task_ledgers.internal_compressed);
2643 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2644 task_ledgers.iokit_mapped);
2645 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2646 task_ledgers.alternate_accounting);
2647 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2648 task_ledgers.alternate_accounting_compressed);
2649 #endif
2650
2651 /*
2652 * If the current thread is a member of the task
2653 * being terminated, then the last reference to
2654 * the task will not be dropped until the thread
2655 * is finally reaped. To avoid incurring the
2656 * expense of removing the address space regions
2657 * at reap time, we do it explictly here.
2658 */
2659
2660 vm_map_lock(task->map);
2661 vm_map_disable_hole_optimization(task->map);
2662 vm_map_unlock(task->map);
2663
2664 #if MACH_ASSERT
2665 /*
2666 * Identify the pmap's process, in case the pmap ledgers drift
2667 * and we have to report it.
2668 */
2669 char procname[17];
2670 if (task->bsd_info && !task_is_exec_copy(task)) {
2671 pid = proc_pid(task->bsd_info);
2672 proc_name_kdp(task, procname, sizeof(procname));
2673 } else {
2674 pid = 0;
2675 strlcpy(procname, "<unknown>", sizeof(procname));
2676 }
2677 pmap_set_process(task->map->pmap, pid, procname);
2678 #endif /* MACH_ASSERT */
2679
2680 vm_map_remove(task->map,
2681 task->map->min_offset,
2682 task->map->max_offset,
2683 /*
2684 * Final cleanup:
2685 * + no unnesting
2686 * + remove immutable mappings
2687 * + allow gaps in range
2688 */
2689 (VM_MAP_REMOVE_NO_UNNESTING |
2690 VM_MAP_REMOVE_IMMUTABLE |
2691 VM_MAP_REMOVE_GAPS_OK));
2692
2693 /* release our shared region */
2694 vm_shared_region_set(task, NULL);
2695
2696
2697 lck_mtx_lock(&tasks_threads_lock);
2698 queue_remove(&tasks, task, task_t, tasks);
2699 queue_enter(&terminated_tasks, task, task_t, tasks);
2700 tasks_count--;
2701 terminated_tasks_count++;
2702 lck_mtx_unlock(&tasks_threads_lock);
2703
2704 /*
2705 * We no longer need to guard against being aborted, so restore
2706 * the previous interruptible state.
2707 */
2708 thread_interrupt_level(interrupt_save);
2709
2710 #if KPC
2711 /* force the task to release all ctrs */
2712 if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
2713 kpc_force_all_ctrs(task, 0);
2714 }
2715 #endif /* KPC */
2716
2717 #if CONFIG_COALITIONS
2718 /*
2719 * Leave our coalitions. (drop activation but not reference)
2720 */
2721 coalitions_remove_task(task);
2722 #endif
2723
2724 /*
2725 * Get rid of the task active reference on itself.
2726 */
2727 task_deallocate(task);
2728
2729 return KERN_SUCCESS;
2730 }
2731
2732 void
2733 tasks_system_suspend(boolean_t suspend)
2734 {
2735 task_t task;
2736
2737 lck_mtx_lock(&tasks_threads_lock);
2738 assert(tasks_suspend_state != suspend);
2739 tasks_suspend_state = suspend;
2740 queue_iterate(&tasks, task, task_t, tasks) {
2741 if (task == kernel_task) {
2742 continue;
2743 }
2744 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2745 }
2746 lck_mtx_unlock(&tasks_threads_lock);
2747 }
2748
2749 /*
2750 * task_start_halt:
2751 *
2752 * Shut the current task down (except for the current thread) in
2753 * preparation for dramatic changes to the task (probably exec).
2754 * We hold the task and mark all other threads in the task for
2755 * termination.
2756 */
2757 kern_return_t
2758 task_start_halt(task_t task)
2759 {
2760 kern_return_t kr = KERN_SUCCESS;
2761 task_lock(task);
2762 kr = task_start_halt_locked(task, FALSE);
2763 task_unlock(task);
2764 return kr;
2765 }
2766
2767 static kern_return_t
2768 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2769 {
2770 thread_t thread, self;
2771 uint64_t dispatchqueue_offset;
2772
2773 assert(task != kernel_task);
2774
2775 self = current_thread();
2776
2777 if (task != self->task && !task_is_a_corpse_fork(task)) {
2778 return KERN_INVALID_ARGUMENT;
2779 }
2780
2781 if (task->halting || !task->active || !self->active) {
2782 /*
2783 * Task or current thread is already being terminated.
2784 * Hurry up and return out of the current kernel context
2785 * so that we run our AST special handler to terminate
2786 * ourselves.
2787 */
2788 return KERN_FAILURE;
2789 }
2790
2791 task->halting = TRUE;
2792
2793 /*
2794 * Mark all the threads to keep them from starting any more
2795 * user-level execution. The thread_terminate_internal code
2796 * would do this on a thread by thread basis anyway, but this
2797 * gives us a better chance of not having to wait there.
2798 */
2799 task_hold_locked(task);
2800 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2801
2802 /*
2803 * Terminate all the other threads in the task.
2804 */
2805 queue_iterate(&task->threads, thread, thread_t, task_threads)
2806 {
2807 if (should_mark_corpse) {
2808 thread_mtx_lock(thread);
2809 thread->inspection = TRUE;
2810 thread_mtx_unlock(thread);
2811 }
2812 if (thread != self) {
2813 thread_terminate_internal(thread);
2814 }
2815 }
2816 task->dispatchqueue_offset = dispatchqueue_offset;
2817
2818 task_release_locked(task);
2819
2820 return KERN_SUCCESS;
2821 }
2822
2823
2824 /*
2825 * task_complete_halt:
2826 *
2827 * Complete task halt by waiting for threads to terminate, then clean
2828 * up task resources (VM, port namespace, etc...) and then let the
2829 * current thread go in the (practically empty) task context.
2830 *
2831 * Note: task->halting flag is not cleared in order to avoid creation
2832 * of new thread in old exec'ed task.
2833 */
2834 void
2835 task_complete_halt(task_t task)
2836 {
2837 task_lock(task);
2838 assert(task->halting);
2839 assert(task == current_task());
2840
2841 /*
2842 * Wait for the other threads to get shut down.
2843 * When the last other thread is reaped, we'll be
2844 * woken up.
2845 */
2846 if (task->thread_count > 1) {
2847 assert_wait((event_t)&task->halting, THREAD_UNINT);
2848 task_unlock(task);
2849 thread_block(THREAD_CONTINUE_NULL);
2850 } else {
2851 task_unlock(task);
2852 }
2853
2854 /*
2855 * Give the machine dependent code a chance
2856 * to perform cleanup of task-level resources
2857 * associated with the current thread before
2858 * ripping apart the task.
2859 */
2860 machine_task_terminate(task);
2861
2862 /*
2863 * Destroy all synchronizers owned by the task.
2864 */
2865 task_synchronizer_destroy_all(task);
2866
2867 /*
2868 * Destroy the contents of the IPC space, leaving just
2869 * a reference for it.
2870 */
2871 ipc_space_clean(task->itk_space);
2872
2873 /*
2874 * Clean out the address space, as we are going to be
2875 * getting a new one.
2876 */
2877 vm_map_remove(task->map, task->map->min_offset,
2878 task->map->max_offset,
2879 /*
2880 * Final cleanup:
2881 * + no unnesting
2882 * + remove immutable mappings
2883 * + allow gaps in the range
2884 */
2885 (VM_MAP_REMOVE_NO_UNNESTING |
2886 VM_MAP_REMOVE_IMMUTABLE |
2887 VM_MAP_REMOVE_GAPS_OK));
2888
2889 /*
2890 * Kick out any IOKitUser handles to the task. At best they're stale,
2891 * at worst someone is racing a SUID exec.
2892 */
2893 iokit_task_terminate(task);
2894 }
2895
2896 /*
2897 * task_hold_locked:
2898 *
2899 * Suspend execution of the specified task.
2900 * This is a recursive-style suspension of the task, a count of
2901 * suspends is maintained.
2902 *
2903 * CONDITIONS: the task is locked and active.
2904 */
2905 void
2906 task_hold_locked(
2907 task_t task)
2908 {
2909 thread_t thread;
2910
2911 assert(task->active);
2912
2913 if (task->suspend_count++ > 0) {
2914 return;
2915 }
2916
2917 if (task->bsd_info) {
2918 workq_proc_suspended(task->bsd_info);
2919 }
2920
2921 /*
2922 * Iterate through all the threads and hold them.
2923 */
2924 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2925 thread_mtx_lock(thread);
2926 thread_hold(thread);
2927 thread_mtx_unlock(thread);
2928 }
2929 }
2930
2931 /*
2932 * task_hold:
2933 *
2934 * Same as the internal routine above, except that is must lock
2935 * and verify that the task is active. This differs from task_suspend
2936 * in that it places a kernel hold on the task rather than just a
2937 * user-level hold. This keeps users from over resuming and setting
2938 * it running out from under the kernel.
2939 *
2940 * CONDITIONS: the caller holds a reference on the task
2941 */
2942 kern_return_t
2943 task_hold(
2944 task_t task)
2945 {
2946 if (task == TASK_NULL) {
2947 return KERN_INVALID_ARGUMENT;
2948 }
2949
2950 task_lock(task);
2951
2952 if (!task->active) {
2953 task_unlock(task);
2954
2955 return KERN_FAILURE;
2956 }
2957
2958 task_hold_locked(task);
2959 task_unlock(task);
2960
2961 return KERN_SUCCESS;
2962 }
2963
2964 kern_return_t
2965 task_wait(
2966 task_t task,
2967 boolean_t until_not_runnable)
2968 {
2969 if (task == TASK_NULL) {
2970 return KERN_INVALID_ARGUMENT;
2971 }
2972
2973 task_lock(task);
2974
2975 if (!task->active) {
2976 task_unlock(task);
2977
2978 return KERN_FAILURE;
2979 }
2980
2981 task_wait_locked(task, until_not_runnable);
2982 task_unlock(task);
2983
2984 return KERN_SUCCESS;
2985 }
2986
2987 /*
2988 * task_wait_locked:
2989 *
2990 * Wait for all threads in task to stop.
2991 *
2992 * Conditions:
2993 * Called with task locked, active, and held.
2994 */
2995 void
2996 task_wait_locked(
2997 task_t task,
2998 boolean_t until_not_runnable)
2999 {
3000 thread_t thread, self;
3001
3002 assert(task->active);
3003 assert(task->suspend_count > 0);
3004
3005 self = current_thread();
3006
3007 /*
3008 * Iterate through all the threads and wait for them to
3009 * stop. Do not wait for the current thread if it is within
3010 * the task.
3011 */
3012 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3013 if (thread != self) {
3014 thread_wait(thread, until_not_runnable);
3015 }
3016 }
3017 }
3018
3019 boolean_t
3020 task_is_app_suspended(task_t task)
3021 {
3022 return task->pidsuspended;
3023 }
3024
3025 /*
3026 * task_release_locked:
3027 *
3028 * Release a kernel hold on a task.
3029 *
3030 * CONDITIONS: the task is locked and active
3031 */
3032 void
3033 task_release_locked(
3034 task_t task)
3035 {
3036 thread_t thread;
3037
3038 assert(task->active);
3039 assert(task->suspend_count > 0);
3040
3041 if (--task->suspend_count > 0) {
3042 return;
3043 }
3044
3045 if (task->bsd_info) {
3046 workq_proc_resumed(task->bsd_info);
3047 }
3048
3049 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3050 thread_mtx_lock(thread);
3051 thread_release(thread);
3052 thread_mtx_unlock(thread);
3053 }
3054 }
3055
3056 /*
3057 * task_release:
3058 *
3059 * Same as the internal routine above, except that it must lock
3060 * and verify that the task is active.
3061 *
3062 * CONDITIONS: The caller holds a reference to the task
3063 */
3064 kern_return_t
3065 task_release(
3066 task_t task)
3067 {
3068 if (task == TASK_NULL) {
3069 return KERN_INVALID_ARGUMENT;
3070 }
3071
3072 task_lock(task);
3073
3074 if (!task->active) {
3075 task_unlock(task);
3076
3077 return KERN_FAILURE;
3078 }
3079
3080 task_release_locked(task);
3081 task_unlock(task);
3082
3083 return KERN_SUCCESS;
3084 }
3085
3086 kern_return_t
3087 task_threads(
3088 task_t task,
3089 thread_act_array_t *threads_out,
3090 mach_msg_type_number_t *count)
3091 {
3092 mach_msg_type_number_t actual;
3093 thread_t *thread_list;
3094 thread_t thread;
3095 vm_size_t size, size_needed;
3096 void *addr;
3097 unsigned int i, j;
3098
3099 if (task == TASK_NULL) {
3100 return KERN_INVALID_ARGUMENT;
3101 }
3102
3103 size = 0; addr = NULL;
3104
3105 for (;;) {
3106 task_lock(task);
3107 if (!task->active) {
3108 task_unlock(task);
3109
3110 if (size != 0) {
3111 kfree(addr, size);
3112 }
3113
3114 return KERN_FAILURE;
3115 }
3116
3117 actual = task->thread_count;
3118
3119 /* do we have the memory we need? */
3120 size_needed = actual * sizeof(mach_port_t);
3121 if (size_needed <= size) {
3122 break;
3123 }
3124
3125 /* unlock the task and allocate more memory */
3126 task_unlock(task);
3127
3128 if (size != 0) {
3129 kfree(addr, size);
3130 }
3131
3132 assert(size_needed > 0);
3133 size = size_needed;
3134
3135 addr = kalloc(size);
3136 if (addr == 0) {
3137 return KERN_RESOURCE_SHORTAGE;
3138 }
3139 }
3140
3141 /* OK, have memory and the task is locked & active */
3142 thread_list = (thread_t *)addr;
3143
3144 i = j = 0;
3145
3146 for (thread = (thread_t)queue_first(&task->threads); i < actual;
3147 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
3148 thread_reference_internal(thread);
3149 thread_list[j++] = thread;
3150 }
3151
3152 assert(queue_end(&task->threads, (queue_entry_t)thread));
3153
3154 actual = j;
3155 size_needed = actual * sizeof(mach_port_t);
3156
3157 /* can unlock task now that we've got the thread refs */
3158 task_unlock(task);
3159
3160 if (actual == 0) {
3161 /* no threads, so return null pointer and deallocate memory */
3162
3163 *threads_out = NULL;
3164 *count = 0;
3165
3166 if (size != 0) {
3167 kfree(addr, size);
3168 }
3169 } else {
3170 /* if we allocated too much, must copy */
3171
3172 if (size_needed < size) {
3173 void *newaddr;
3174
3175 newaddr = kalloc(size_needed);
3176 if (newaddr == 0) {
3177 for (i = 0; i < actual; ++i) {
3178 thread_deallocate(thread_list[i]);
3179 }
3180 kfree(addr, size);
3181 return KERN_RESOURCE_SHORTAGE;
3182 }
3183
3184 bcopy(addr, newaddr, size_needed);
3185 kfree(addr, size);
3186 thread_list = (thread_t *)newaddr;
3187 }
3188
3189 *threads_out = thread_list;
3190 *count = actual;
3191
3192 /* do the conversion that Mig should handle */
3193
3194 for (i = 0; i < actual; ++i) {
3195 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3196 }
3197 }
3198
3199 return KERN_SUCCESS;
3200 }
3201
3202 #define TASK_HOLD_NORMAL 0
3203 #define TASK_HOLD_PIDSUSPEND 1
3204 #define TASK_HOLD_LEGACY 2
3205 #define TASK_HOLD_LEGACY_ALL 3
3206
3207 static kern_return_t
3208 place_task_hold(
3209 task_t task,
3210 int mode)
3211 {
3212 if (!task->active && !task_is_a_corpse(task)) {
3213 return KERN_FAILURE;
3214 }
3215
3216 /* Return success for corpse task */
3217 if (task_is_a_corpse(task)) {
3218 return KERN_SUCCESS;
3219 }
3220
3221 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3222 MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
3223 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3224 task->user_stop_count, task->user_stop_count + 1, 0);
3225
3226 #if MACH_ASSERT
3227 current_task()->suspends_outstanding++;
3228 #endif
3229
3230 if (mode == TASK_HOLD_LEGACY) {
3231 task->legacy_stop_count++;
3232 }
3233
3234 if (task->user_stop_count++ > 0) {
3235 /*
3236 * If the stop count was positive, the task is
3237 * already stopped and we can exit.
3238 */
3239 return KERN_SUCCESS;
3240 }
3241
3242 /*
3243 * Put a kernel-level hold on the threads in the task (all
3244 * user-level task suspensions added together represent a
3245 * single kernel-level hold). We then wait for the threads
3246 * to stop executing user code.
3247 */
3248 task_hold_locked(task);
3249 task_wait_locked(task, FALSE);
3250
3251 return KERN_SUCCESS;
3252 }
3253
3254 static kern_return_t
3255 release_task_hold(
3256 task_t task,
3257 int mode)
3258 {
3259 boolean_t release = FALSE;
3260
3261 if (!task->active && !task_is_a_corpse(task)) {
3262 return KERN_FAILURE;
3263 }
3264
3265 /* Return success for corpse task */
3266 if (task_is_a_corpse(task)) {
3267 return KERN_SUCCESS;
3268 }
3269
3270 if (mode == TASK_HOLD_PIDSUSPEND) {
3271 if (task->pidsuspended == FALSE) {
3272 return KERN_FAILURE;
3273 }
3274 task->pidsuspended = FALSE;
3275 }
3276
3277 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3278 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3279 MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3280 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3281 task->user_stop_count, mode, task->legacy_stop_count);
3282
3283 #if MACH_ASSERT
3284 /*
3285 * This is obviously not robust; if we suspend one task and then resume a different one,
3286 * we'll fly under the radar. This is only meant to catch the common case of a crashed
3287 * or buggy suspender.
3288 */
3289 current_task()->suspends_outstanding--;
3290 #endif
3291
3292 if (mode == TASK_HOLD_LEGACY_ALL) {
3293 if (task->legacy_stop_count >= task->user_stop_count) {
3294 task->user_stop_count = 0;
3295 release = TRUE;
3296 } else {
3297 task->user_stop_count -= task->legacy_stop_count;
3298 }
3299 task->legacy_stop_count = 0;
3300 } else {
3301 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3302 task->legacy_stop_count--;
3303 }
3304 if (--task->user_stop_count == 0) {
3305 release = TRUE;
3306 }
3307 }
3308 } else {
3309 return KERN_FAILURE;
3310 }
3311
3312 /*
3313 * Release the task if necessary.
3314 */
3315 if (release) {
3316 task_release_locked(task);
3317 }
3318
3319 return KERN_SUCCESS;
3320 }
3321
3322 boolean_t
3323 get_task_suspended(task_t task)
3324 {
3325 return 0 != task->user_stop_count;
3326 }
3327
3328 /*
3329 * task_suspend:
3330 *
3331 * Implement an (old-fashioned) user-level suspension on a task.
3332 *
3333 * Because the user isn't expecting to have to manage a suspension
3334 * token, we'll track it for him in the kernel in the form of a naked
3335 * send right to the task's resume port. All such send rights
3336 * account for a single suspension against the task (unlike task_suspend2()
3337 * where each caller gets a unique suspension count represented by a
3338 * unique send-once right).
3339 *
3340 * Conditions:
3341 * The caller holds a reference to the task
3342 */
3343 kern_return_t
3344 task_suspend(
3345 task_t task)
3346 {
3347 kern_return_t kr;
3348 mach_port_t port;
3349 mach_port_name_t name;
3350
3351 if (task == TASK_NULL || task == kernel_task) {
3352 return KERN_INVALID_ARGUMENT;
3353 }
3354
3355 task_lock(task);
3356
3357 /*
3358 * place a legacy hold on the task.
3359 */
3360 kr = place_task_hold(task, TASK_HOLD_LEGACY);
3361 if (kr != KERN_SUCCESS) {
3362 task_unlock(task);
3363 return kr;
3364 }
3365
3366 /*
3367 * Claim a send right on the task resume port, and request a no-senders
3368 * notification on that port (if none outstanding).
3369 */
3370 (void)ipc_kobject_make_send_lazy_alloc_port(&task->itk_resume,
3371 (ipc_kobject_t)task, IKOT_TASK_RESUME);
3372 port = task->itk_resume;
3373
3374 task_unlock(task);
3375
3376 /*
3377 * Copyout the send right into the calling task's IPC space. It won't know it is there,
3378 * but we'll look it up when calling a traditional resume. Any IPC operations that
3379 * deallocate the send right will auto-release the suspension.
3380 */
3381 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, ip_to_object(port),
3382 MACH_MSG_TYPE_MOVE_SEND, NULL, NULL, &name)) != KERN_SUCCESS) {
3383 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3384 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3385 task_pid(task), kr);
3386 return kr;
3387 }
3388
3389 return kr;
3390 }
3391
3392 /*
3393 * task_resume:
3394 * Release a user hold on a task.
3395 *
3396 * Conditions:
3397 * The caller holds a reference to the task
3398 */
3399 kern_return_t
3400 task_resume(
3401 task_t task)
3402 {
3403 kern_return_t kr;
3404 mach_port_name_t resume_port_name;
3405 ipc_entry_t resume_port_entry;
3406 ipc_space_t space = current_task()->itk_space;
3407
3408 if (task == TASK_NULL || task == kernel_task) {
3409 return KERN_INVALID_ARGUMENT;
3410 }
3411
3412 /* release a legacy task hold */
3413 task_lock(task);
3414 kr = release_task_hold(task, TASK_HOLD_LEGACY);
3415 task_unlock(task);
3416
3417 is_write_lock(space);
3418 if (is_active(space) && IP_VALID(task->itk_resume) &&
3419 ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
3420 /*
3421 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3422 * we are holding one less legacy hold on the task from this caller. If the release failed,
3423 * go ahead and drop all the rights, as someone either already released our holds or the task
3424 * is gone.
3425 */
3426 if (kr == KERN_SUCCESS) {
3427 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3428 } else {
3429 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3430 }
3431 /* space unlocked */
3432 } else {
3433 is_write_unlock(space);
3434 if (kr == KERN_SUCCESS) {
3435 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3436 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3437 task_pid(task));
3438 }
3439 }
3440
3441 return kr;
3442 }
3443
3444 /*
3445 * Suspend the target task.
3446 * Making/holding a token/reference/port is the callers responsibility.
3447 */
3448 kern_return_t
3449 task_suspend_internal(task_t task)
3450 {
3451 kern_return_t kr;
3452
3453 if (task == TASK_NULL || task == kernel_task) {
3454 return KERN_INVALID_ARGUMENT;
3455 }
3456
3457 task_lock(task);
3458 kr = place_task_hold(task, TASK_HOLD_NORMAL);
3459 task_unlock(task);
3460 return kr;
3461 }
3462
3463 /*
3464 * Suspend the target task, and return a suspension token. The token
3465 * represents a reference on the suspended task.
3466 */
3467 kern_return_t
3468 task_suspend2(
3469 task_t task,
3470 task_suspension_token_t *suspend_token)
3471 {
3472 kern_return_t kr;
3473
3474 kr = task_suspend_internal(task);
3475 if (kr != KERN_SUCCESS) {
3476 *suspend_token = TASK_NULL;
3477 return kr;
3478 }
3479
3480 /*
3481 * Take a reference on the target task and return that to the caller
3482 * as a "suspension token," which can be converted into an SO right to
3483 * the now-suspended task's resume port.
3484 */
3485 task_reference_internal(task);
3486 *suspend_token = task;
3487
3488 return KERN_SUCCESS;
3489 }
3490
3491 /*
3492 * Resume the task
3493 * (reference/token/port management is caller's responsibility).
3494 */
3495 kern_return_t
3496 task_resume_internal(
3497 task_suspension_token_t task)
3498 {
3499 kern_return_t kr;
3500
3501 if (task == TASK_NULL || task == kernel_task) {
3502 return KERN_INVALID_ARGUMENT;
3503 }
3504
3505 task_lock(task);
3506 kr = release_task_hold(task, TASK_HOLD_NORMAL);
3507 task_unlock(task);
3508 return kr;
3509 }
3510
3511 /*
3512 * Resume the task using a suspension token. Consumes the token's ref.
3513 */
3514 kern_return_t
3515 task_resume2(
3516 task_suspension_token_t task)
3517 {
3518 kern_return_t kr;
3519
3520 kr = task_resume_internal(task);
3521 task_suspension_token_deallocate(task);
3522
3523 return kr;
3524 }
3525
3526 boolean_t
3527 task_suspension_notify(mach_msg_header_t *request_header)
3528 {
3529 ipc_port_t port = request_header->msgh_remote_port;
3530 task_t task = convert_port_to_task_suspension_token(port);
3531 mach_msg_type_number_t not_count;
3532
3533 if (task == TASK_NULL || task == kernel_task) {
3534 return TRUE; /* nothing to do */
3535 }
3536 switch (request_header->msgh_id) {
3537 case MACH_NOTIFY_SEND_ONCE:
3538 /* release the hold held by this specific send-once right */
3539 task_lock(task);
3540 release_task_hold(task, TASK_HOLD_NORMAL);
3541 task_unlock(task);
3542 break;
3543
3544 case MACH_NOTIFY_NO_SENDERS:
3545 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3546
3547 task_lock(task);
3548 ip_lock(port);
3549 if (port->ip_mscount == not_count) {
3550 /* release all the [remaining] outstanding legacy holds */
3551 assert(port->ip_nsrequest == IP_NULL);
3552 ip_unlock(port);
3553 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3554 task_unlock(task);
3555 } else if (port->ip_nsrequest == IP_NULL) {
3556 ipc_port_t old_notify;
3557
3558 task_unlock(task);
3559 /* new send rights, re-arm notification at current make-send count */
3560 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3561 assert(old_notify == IP_NULL);
3562 /* port unlocked */
3563 } else {
3564 ip_unlock(port);
3565 task_unlock(task);
3566 }
3567 break;
3568
3569 default:
3570 break;
3571 }
3572
3573 task_suspension_token_deallocate(task); /* drop token reference */
3574 return TRUE;
3575 }
3576
3577 static kern_return_t
3578 task_pidsuspend_locked(task_t task)
3579 {
3580 kern_return_t kr;
3581
3582 if (task->pidsuspended) {
3583 kr = KERN_FAILURE;
3584 goto out;
3585 }
3586
3587 task->pidsuspended = TRUE;
3588
3589 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3590 if (kr != KERN_SUCCESS) {
3591 task->pidsuspended = FALSE;
3592 }
3593 out:
3594 return kr;
3595 }
3596
3597
3598 /*
3599 * task_pidsuspend:
3600 *
3601 * Suspends a task by placing a hold on its threads.
3602 *
3603 * Conditions:
3604 * The caller holds a reference to the task
3605 */
3606 kern_return_t
3607 task_pidsuspend(
3608 task_t task)
3609 {
3610 kern_return_t kr;
3611
3612 if (task == TASK_NULL || task == kernel_task) {
3613 return KERN_INVALID_ARGUMENT;
3614 }
3615
3616 task_lock(task);
3617
3618 kr = task_pidsuspend_locked(task);
3619
3620 task_unlock(task);
3621
3622 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3623 iokit_task_app_suspended_changed(task);
3624 }
3625
3626 return kr;
3627 }
3628
3629 /*
3630 * task_pidresume:
3631 * Resumes a previously suspended task.
3632 *
3633 * Conditions:
3634 * The caller holds a reference to the task
3635 */
3636 kern_return_t
3637 task_pidresume(
3638 task_t task)
3639 {
3640 kern_return_t kr;
3641
3642 if (task == TASK_NULL || task == kernel_task) {
3643 return KERN_INVALID_ARGUMENT;
3644 }
3645
3646 task_lock(task);
3647
3648 #if CONFIG_FREEZE
3649
3650 while (task->changing_freeze_state) {
3651 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3652 task_unlock(task);
3653 thread_block(THREAD_CONTINUE_NULL);
3654
3655 task_lock(task);
3656 }
3657 task->changing_freeze_state = TRUE;
3658 #endif
3659
3660 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3661
3662 task_unlock(task);
3663
3664 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3665 iokit_task_app_suspended_changed(task);
3666 }
3667
3668 #if CONFIG_FREEZE
3669
3670 task_lock(task);
3671
3672 if (kr == KERN_SUCCESS) {
3673 task->frozen = FALSE;
3674 }
3675 task->changing_freeze_state = FALSE;
3676 thread_wakeup(&task->changing_freeze_state);
3677
3678 task_unlock(task);
3679 #endif
3680
3681 return kr;
3682 }
3683
3684 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
3685
3686 /*
3687 * task_add_turnstile_watchports:
3688 * Setup watchports to boost the main thread of the task.
3689 *
3690 * Arguments:
3691 * task: task being spawned
3692 * thread: main thread of task
3693 * portwatch_ports: array of watchports
3694 * portwatch_count: number of watchports
3695 *
3696 * Conditions:
3697 * Nothing locked.
3698 */
3699 void
3700 task_add_turnstile_watchports(
3701 task_t task,
3702 thread_t thread,
3703 ipc_port_t *portwatch_ports,
3704 uint32_t portwatch_count)
3705 {
3706 struct task_watchports *watchports = NULL;
3707 struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
3708 os_ref_count_t refs;
3709
3710 /* Check if the task has terminated */
3711 if (!task->active) {
3712 return;
3713 }
3714
3715 assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
3716
3717 watchports = task_watchports_alloc_init(task, thread, portwatch_count);
3718
3719 /* Lock the ipc space */
3720 is_write_lock(task->itk_space);
3721
3722 /* Setup watchports to boost the main thread */
3723 refs = task_add_turnstile_watchports_locked(task,
3724 watchports, previous_elem_array, portwatch_ports,
3725 portwatch_count);
3726
3727 /* Drop the space lock */
3728 is_write_unlock(task->itk_space);
3729
3730 if (refs == 0) {
3731 task_watchports_deallocate(watchports);
3732 }
3733
3734 /* Drop the ref on previous_elem_array */
3735 for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
3736 task_watchport_elem_deallocate(previous_elem_array[i]);
3737 }
3738 }
3739
3740 /*
3741 * task_remove_turnstile_watchports:
3742 * Clear all turnstile boost on the task from watchports.
3743 *
3744 * Arguments:
3745 * task: task being terminated
3746 *
3747 * Conditions:
3748 * Nothing locked.
3749 */
3750 void
3751 task_remove_turnstile_watchports(
3752 task_t task)
3753 {
3754 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3755 struct task_watchports *watchports = NULL;
3756 ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
3757 uint32_t portwatch_count;
3758
3759 /* Lock the ipc space */
3760 is_write_lock(task->itk_space);
3761
3762 /* Check if watchport boost exist */
3763 if (task->watchports == NULL) {
3764 is_write_unlock(task->itk_space);
3765 return;
3766 }
3767 watchports = task->watchports;
3768 portwatch_count = watchports->tw_elem_array_count;
3769
3770 refs = task_remove_turnstile_watchports_locked(task, watchports,
3771 port_freelist);
3772
3773 is_write_unlock(task->itk_space);
3774
3775 /* Drop all the port references */
3776 for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
3777 ip_release(port_freelist[i]);
3778 }
3779
3780 /* Clear the task and thread references for task_watchport */
3781 if (refs == 0) {
3782 task_watchports_deallocate(watchports);
3783 }
3784 }
3785
3786 /*
3787 * task_transfer_turnstile_watchports:
3788 * Transfer all watchport turnstile boost from old task to new task.
3789 *
3790 * Arguments:
3791 * old_task: task calling exec
3792 * new_task: new exec'ed task
3793 * thread: main thread of new task
3794 *
3795 * Conditions:
3796 * Nothing locked.
3797 */
3798 void
3799 task_transfer_turnstile_watchports(
3800 task_t old_task,
3801 task_t new_task,
3802 thread_t new_thread)
3803 {
3804 struct task_watchports *old_watchports = NULL;
3805 struct task_watchports *new_watchports = NULL;
3806 os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
3807 os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
3808 uint32_t portwatch_count;
3809
3810 if (old_task->watchports == NULL || !new_task->active) {
3811 return;
3812 }
3813
3814 /* Get the watch port count from the old task */
3815 is_write_lock(old_task->itk_space);
3816 if (old_task->watchports == NULL) {
3817 is_write_unlock(old_task->itk_space);
3818 return;
3819 }
3820
3821 portwatch_count = old_task->watchports->tw_elem_array_count;
3822 is_write_unlock(old_task->itk_space);
3823
3824 new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
3825
3826 /* Lock the ipc space for old task */
3827 is_write_lock(old_task->itk_space);
3828
3829 /* Lock the ipc space for new task */
3830 is_write_lock(new_task->itk_space);
3831
3832 /* Check if watchport boost exist */
3833 if (old_task->watchports == NULL || !new_task->active) {
3834 is_write_unlock(new_task->itk_space);
3835 is_write_unlock(old_task->itk_space);
3836 (void)task_watchports_release(new_watchports);
3837 task_watchports_deallocate(new_watchports);
3838 return;
3839 }
3840
3841 old_watchports = old_task->watchports;
3842 assert(portwatch_count == old_task->watchports->tw_elem_array_count);
3843
3844 /* Setup new task watchports */
3845 new_task->watchports = new_watchports;
3846
3847 for (uint32_t i = 0; i < portwatch_count; i++) {
3848 ipc_port_t port = old_watchports->tw_elem[i].twe_port;
3849
3850 if (port == NULL) {
3851 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
3852 continue;
3853 }
3854
3855 /* Lock the port and check if it has the entry */
3856 ip_lock(port);
3857 imq_lock(&port->ip_messages);
3858
3859 task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
3860
3861 if (ipc_port_replace_watchport_elem_conditional_locked(port,
3862 &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
3863 task_watchport_elem_clear(&old_watchports->tw_elem[i]);
3864
3865 task_watchports_retain(new_watchports);
3866 old_refs = task_watchports_release(old_watchports);
3867
3868 /* Check if all ports are cleaned */
3869 if (old_refs == 0) {
3870 old_task->watchports = NULL;
3871 }
3872 } else {
3873 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
3874 }
3875 /* mqueue and port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
3876 }
3877
3878 /* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
3879 new_refs = task_watchports_release(new_watchports);
3880 if (new_refs == 0) {
3881 new_task->watchports = NULL;
3882 }
3883
3884 is_write_unlock(new_task->itk_space);
3885 is_write_unlock(old_task->itk_space);
3886
3887 /* Clear the task and thread references for old_watchport */
3888 if (old_refs == 0) {
3889 task_watchports_deallocate(old_watchports);
3890 }
3891
3892 /* Clear the task and thread references for new_watchport */
3893 if (new_refs == 0) {
3894 task_watchports_deallocate(new_watchports);
3895 }
3896 }
3897
3898 /*
3899 * task_add_turnstile_watchports_locked:
3900 * Setup watchports to boost the main thread of the task.
3901 *
3902 * Arguments:
3903 * task: task to boost
3904 * watchports: watchport structure to be attached to the task
3905 * previous_elem_array: an array of old watchport_elem to be returned to caller
3906 * portwatch_ports: array of watchports
3907 * portwatch_count: number of watchports
3908 *
3909 * Conditions:
3910 * ipc space of the task locked.
3911 * returns array of old watchport_elem in previous_elem_array
3912 */
3913 static os_ref_count_t
3914 task_add_turnstile_watchports_locked(
3915 task_t task,
3916 struct task_watchports *watchports,
3917 struct task_watchport_elem **previous_elem_array,
3918 ipc_port_t *portwatch_ports,
3919 uint32_t portwatch_count)
3920 {
3921 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3922
3923 /* Check if the task is still active */
3924 if (!task->active) {
3925 refs = task_watchports_release(watchports);
3926 return refs;
3927 }
3928
3929 assert(task->watchports == NULL);
3930 task->watchports = watchports;
3931
3932 for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
3933 ipc_port_t port = portwatch_ports[i];
3934
3935 task_watchport_elem_init(&watchports->tw_elem[i], task, port);
3936 if (port == NULL) {
3937 task_watchport_elem_clear(&watchports->tw_elem[i]);
3938 continue;
3939 }
3940
3941 ip_lock(port);
3942 imq_lock(&port->ip_messages);
3943
3944 /* Check if port is in valid state to be setup as watchport */
3945 if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
3946 &previous_elem_array[j]) != KERN_SUCCESS) {
3947 task_watchport_elem_clear(&watchports->tw_elem[i]);
3948 continue;
3949 }
3950 /* port and mqueue unlocked on return */
3951
3952 ip_reference(port);
3953 task_watchports_retain(watchports);
3954 if (previous_elem_array[j] != NULL) {
3955 j++;
3956 }
3957 }
3958
3959 /* Drop the reference on task_watchport struct returned by os_ref_init */
3960 refs = task_watchports_release(watchports);
3961 if (refs == 0) {
3962 task->watchports = NULL;
3963 }
3964
3965 return refs;
3966 }
3967
3968 /*
3969 * task_remove_turnstile_watchports_locked:
3970 * Clear all turnstile boost on the task from watchports.
3971 *
3972 * Arguments:
3973 * task: task to remove watchports from
3974 * watchports: watchports structure for the task
3975 * port_freelist: array of ports returned with ref to caller
3976 *
3977 *
3978 * Conditions:
3979 * ipc space of the task locked.
3980 * array of ports with refs are returned in port_freelist
3981 */
3982 static os_ref_count_t
3983 task_remove_turnstile_watchports_locked(
3984 task_t task,
3985 struct task_watchports *watchports,
3986 ipc_port_t *port_freelist)
3987 {
3988 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3989
3990 for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
3991 ipc_port_t port = watchports->tw_elem[i].twe_port;
3992 if (port == NULL) {
3993 continue;
3994 }
3995
3996 /* Lock the port and check if it has the entry */
3997 ip_lock(port);
3998 imq_lock(&port->ip_messages);
3999 if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4000 &watchports->tw_elem[i]) == KERN_SUCCESS) {
4001 task_watchport_elem_clear(&watchports->tw_elem[i]);
4002 port_freelist[j++] = port;
4003 refs = task_watchports_release(watchports);
4004
4005 /* Check if all ports are cleaned */
4006 if (refs == 0) {
4007 task->watchports = NULL;
4008 break;
4009 }
4010 }
4011 /* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4012 }
4013 return refs;
4014 }
4015
4016 /*
4017 * task_watchports_alloc_init:
4018 * Allocate and initialize task watchport struct.
4019 *
4020 * Conditions:
4021 * Nothing locked.
4022 */
4023 static struct task_watchports *
4024 task_watchports_alloc_init(
4025 task_t task,
4026 thread_t thread,
4027 uint32_t count)
4028 {
4029 struct task_watchports *watchports = kalloc(sizeof(struct task_watchports) +
4030 count * sizeof(struct task_watchport_elem));
4031
4032 task_reference(task);
4033 thread_reference(thread);
4034 watchports->tw_task = task;
4035 watchports->tw_thread = thread;
4036 watchports->tw_elem_array_count = count;
4037 os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4038
4039 return watchports;
4040 }
4041
4042 /*
4043 * task_watchports_deallocate:
4044 * Deallocate task watchport struct.
4045 *
4046 * Conditions:
4047 * Nothing locked.
4048 */
4049 static void
4050 task_watchports_deallocate(
4051 struct task_watchports *watchports)
4052 {
4053 uint32_t portwatch_count = watchports->tw_elem_array_count;
4054
4055 task_deallocate(watchports->tw_task);
4056 thread_deallocate(watchports->tw_thread);
4057 kfree(watchports, sizeof(struct task_watchports) + portwatch_count * sizeof(struct task_watchport_elem));
4058 }
4059
4060 /*
4061 * task_watchport_elem_deallocate:
4062 * Deallocate task watchport element and release its ref on task_watchport.
4063 *
4064 * Conditions:
4065 * Nothing locked.
4066 */
4067 void
4068 task_watchport_elem_deallocate(
4069 struct task_watchport_elem *watchport_elem)
4070 {
4071 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4072 task_t task = watchport_elem->twe_task;
4073 struct task_watchports *watchports = NULL;
4074 ipc_port_t port = NULL;
4075
4076 assert(task != NULL);
4077
4078 /* Take the space lock to modify the elememt */
4079 is_write_lock(task->itk_space);
4080
4081 watchports = task->watchports;
4082 assert(watchports != NULL);
4083
4084 port = watchport_elem->twe_port;
4085 assert(port != NULL);
4086
4087 task_watchport_elem_clear(watchport_elem);
4088 refs = task_watchports_release(watchports);
4089
4090 if (refs == 0) {
4091 task->watchports = NULL;
4092 }
4093
4094 is_write_unlock(task->itk_space);
4095
4096 ip_release(port);
4097 if (refs == 0) {
4098 task_watchports_deallocate(watchports);
4099 }
4100 }
4101
4102 /*
4103 * task_has_watchports:
4104 * Return TRUE if task has watchport boosts.
4105 *
4106 * Conditions:
4107 * Nothing locked.
4108 */
4109 boolean_t
4110 task_has_watchports(task_t task)
4111 {
4112 return task->watchports != NULL;
4113 }
4114
4115 #if DEVELOPMENT || DEBUG
4116
4117 extern void IOSleep(int);
4118
4119 kern_return_t
4120 task_disconnect_page_mappings(task_t task)
4121 {
4122 int n;
4123
4124 if (task == TASK_NULL || task == kernel_task) {
4125 return KERN_INVALID_ARGUMENT;
4126 }
4127
4128 /*
4129 * this function is used to strip all of the mappings from
4130 * the pmap for the specified task to force the task to
4131 * re-fault all of the pages it is actively using... this
4132 * allows us to approximate the true working set of the
4133 * specified task. We only engage if at least 1 of the
4134 * threads in the task is runnable, but we want to continuously
4135 * sweep (at least for a while - I've arbitrarily set the limit at
4136 * 100 sweeps to be re-looked at as we gain experience) to get a better
4137 * view into what areas within a page are being visited (as opposed to only
4138 * seeing the first fault of a page after the task becomes
4139 * runnable)... in the future I may
4140 * try to block until awakened by a thread in this task
4141 * being made runnable, but for now we'll periodically poll from the
4142 * user level debug tool driving the sysctl
4143 */
4144 for (n = 0; n < 100; n++) {
4145 thread_t thread;
4146 boolean_t runnable;
4147 boolean_t do_unnest;
4148 int page_count;
4149
4150 runnable = FALSE;
4151 do_unnest = FALSE;
4152
4153 task_lock(task);
4154
4155 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4156 if (thread->state & TH_RUN) {
4157 runnable = TRUE;
4158 break;
4159 }
4160 }
4161 if (n == 0) {
4162 task->task_disconnected_count++;
4163 }
4164
4165 if (task->task_unnested == FALSE) {
4166 if (runnable == TRUE) {
4167 task->task_unnested = TRUE;
4168 do_unnest = TRUE;
4169 }
4170 }
4171 task_unlock(task);
4172
4173 if (runnable == FALSE) {
4174 break;
4175 }
4176
4177 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4178 task, do_unnest, task->task_disconnected_count, 0, 0);
4179
4180 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4181
4182 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4183 task, page_count, 0, 0, 0);
4184
4185 if ((n % 5) == 4) {
4186 IOSleep(1);
4187 }
4188 }
4189 return KERN_SUCCESS;
4190 }
4191
4192 #endif
4193
4194
4195 #if CONFIG_FREEZE
4196
4197 /*
4198 * task_freeze:
4199 *
4200 * Freeze a task.
4201 *
4202 * Conditions:
4203 * The caller holds a reference to the task
4204 */
4205 extern void vm_wake_compactor_swapper(void);
4206 extern queue_head_t c_swapout_list_head;
4207
4208 kern_return_t
4209 task_freeze(
4210 task_t task,
4211 uint32_t *purgeable_count,
4212 uint32_t *wired_count,
4213 uint32_t *clean_count,
4214 uint32_t *dirty_count,
4215 uint32_t dirty_budget,
4216 uint32_t *shared_count,
4217 int *freezer_error_code,
4218 boolean_t eval_only)
4219 {
4220 kern_return_t kr = KERN_SUCCESS;
4221
4222 if (task == TASK_NULL || task == kernel_task) {
4223 return KERN_INVALID_ARGUMENT;
4224 }
4225
4226 task_lock(task);
4227
4228 while (task->changing_freeze_state) {
4229 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4230 task_unlock(task);
4231 thread_block(THREAD_CONTINUE_NULL);
4232
4233 task_lock(task);
4234 }
4235 if (task->frozen) {
4236 task_unlock(task);
4237 return KERN_FAILURE;
4238 }
4239 task->changing_freeze_state = TRUE;
4240
4241 task_unlock(task);
4242
4243 kr = vm_map_freeze(task,
4244 purgeable_count,
4245 wired_count,
4246 clean_count,
4247 dirty_count,
4248 dirty_budget,
4249 shared_count,
4250 freezer_error_code,
4251 eval_only);
4252
4253 task_lock(task);
4254
4255 if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
4256 task->frozen = TRUE;
4257 }
4258
4259 task->changing_freeze_state = FALSE;
4260 thread_wakeup(&task->changing_freeze_state);
4261
4262 task_unlock(task);
4263
4264 if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
4265 (eval_only == FALSE)) {
4266 vm_wake_compactor_swapper();
4267 /*
4268 * We do an explicit wakeup of the swapout thread here
4269 * because the compact_and_swap routines don't have
4270 * knowledge about these kind of "per-task packed c_segs"
4271 * and so will not be evaluating whether we need to do
4272 * a wakeup there.
4273 */
4274 thread_wakeup((event_t)&c_swapout_list_head);
4275 }
4276
4277 return kr;
4278 }
4279
4280 /*
4281 * task_thaw:
4282 *
4283 * Thaw a currently frozen task.
4284 *
4285 * Conditions:
4286 * The caller holds a reference to the task
4287 */
4288 kern_return_t
4289 task_thaw(
4290 task_t task)
4291 {
4292 if (task == TASK_NULL || task == kernel_task) {
4293 return KERN_INVALID_ARGUMENT;
4294 }
4295
4296 task_lock(task);
4297
4298 while (task->changing_freeze_state) {
4299 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4300 task_unlock(task);
4301 thread_block(THREAD_CONTINUE_NULL);
4302
4303 task_lock(task);
4304 }
4305 if (!task->frozen) {
4306 task_unlock(task);
4307 return KERN_FAILURE;
4308 }
4309 task->frozen = FALSE;
4310
4311 task_unlock(task);
4312
4313 return KERN_SUCCESS;
4314 }
4315
4316 #endif /* CONFIG_FREEZE */
4317
4318 kern_return_t
4319 host_security_set_task_token(
4320 host_security_t host_security,
4321 task_t task,
4322 security_token_t sec_token,
4323 audit_token_t audit_token,
4324 host_priv_t host_priv)
4325 {
4326 ipc_port_t host_port;
4327 kern_return_t kr;
4328
4329 if (task == TASK_NULL) {
4330 return KERN_INVALID_ARGUMENT;
4331 }
4332
4333 if (host_security == HOST_NULL) {
4334 return KERN_INVALID_SECURITY;
4335 }
4336
4337 task_lock(task);
4338 task->sec_token = sec_token;
4339 task->audit_token = audit_token;
4340
4341 task_unlock(task);
4342
4343 if (host_priv != HOST_PRIV_NULL) {
4344 kr = host_get_host_priv_port(host_priv, &host_port);
4345 } else {
4346 kr = host_get_host_port(host_priv_self(), &host_port);
4347 }
4348 assert(kr == KERN_SUCCESS);
4349 kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
4350 return kr;
4351 }
4352
4353 kern_return_t
4354 task_send_trace_memory(
4355 __unused task_t target_task,
4356 __unused uint32_t pid,
4357 __unused uint64_t uniqueid)
4358 {
4359 return KERN_INVALID_ARGUMENT;
4360 }
4361
4362 /*
4363 * This routine was added, pretty much exclusively, for registering the
4364 * RPC glue vector for in-kernel short circuited tasks. Rather than
4365 * removing it completely, I have only disabled that feature (which was
4366 * the only feature at the time). It just appears that we are going to
4367 * want to add some user data to tasks in the future (i.e. bsd info,
4368 * task names, etc...), so I left it in the formal task interface.
4369 */
4370 kern_return_t
4371 task_set_info(
4372 task_t task,
4373 task_flavor_t flavor,
4374 __unused task_info_t task_info_in, /* pointer to IN array */
4375 __unused mach_msg_type_number_t task_info_count)
4376 {
4377 if (task == TASK_NULL) {
4378 return KERN_INVALID_ARGUMENT;
4379 }
4380
4381 switch (flavor) {
4382 #if CONFIG_ATM
4383 case TASK_TRACE_MEMORY_INFO:
4384 {
4385 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT) {
4386 return KERN_INVALID_ARGUMENT;
4387 }
4388
4389 assert(task_info_in != NULL);
4390 task_trace_memory_info_t mem_info;
4391 mem_info = (task_trace_memory_info_t) task_info_in;
4392 kern_return_t kr = atm_register_trace_memory(task,
4393 mem_info->user_memory_address,
4394 mem_info->buffer_size);
4395 return kr;
4396 }
4397
4398 #endif
4399 default:
4400 return KERN_INVALID_ARGUMENT;
4401 }
4402 return KERN_SUCCESS;
4403 }
4404
4405 int radar_20146450 = 1;
4406 kern_return_t
4407 task_info(
4408 task_t task,
4409 task_flavor_t flavor,
4410 task_info_t task_info_out,
4411 mach_msg_type_number_t *task_info_count)
4412 {
4413 kern_return_t error = KERN_SUCCESS;
4414 mach_msg_type_number_t original_task_info_count;
4415
4416 if (task == TASK_NULL) {
4417 return KERN_INVALID_ARGUMENT;
4418 }
4419
4420 original_task_info_count = *task_info_count;
4421 task_lock(task);
4422
4423 if ((task != current_task()) && (!task->active)) {
4424 task_unlock(task);
4425 return KERN_INVALID_ARGUMENT;
4426 }
4427
4428 switch (flavor) {
4429 case TASK_BASIC_INFO_32:
4430 case TASK_BASIC2_INFO_32:
4431 #if defined(__arm__) || defined(__arm64__)
4432 case TASK_BASIC_INFO_64:
4433 #endif
4434 {
4435 task_basic_info_32_t basic_info;
4436 vm_map_t map;
4437 clock_sec_t secs;
4438 clock_usec_t usecs;
4439
4440 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
4441 error = KERN_INVALID_ARGUMENT;
4442 break;
4443 }
4444
4445 basic_info = (task_basic_info_32_t)task_info_out;
4446
4447 map = (task == kernel_task)? kernel_map: task->map;
4448 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
4449 if (flavor == TASK_BASIC2_INFO_32) {
4450 /*
4451 * The "BASIC2" flavor gets the maximum resident
4452 * size instead of the current resident size...
4453 */
4454 basic_info->resident_size = pmap_resident_max(map->pmap);
4455 } else {
4456 basic_info->resident_size = pmap_resident_count(map->pmap);
4457 }
4458 basic_info->resident_size *= PAGE_SIZE;
4459
4460 basic_info->policy = ((task != kernel_task)?
4461 POLICY_TIMESHARE: POLICY_RR);
4462 basic_info->suspend_count = task->user_stop_count;
4463
4464 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4465 basic_info->user_time.seconds =
4466 (typeof(basic_info->user_time.seconds))secs;
4467 basic_info->user_time.microseconds = usecs;
4468
4469 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4470 basic_info->system_time.seconds =
4471 (typeof(basic_info->system_time.seconds))secs;
4472 basic_info->system_time.microseconds = usecs;
4473
4474 *task_info_count = TASK_BASIC_INFO_32_COUNT;
4475 break;
4476 }
4477
4478 #if defined(__arm__) || defined(__arm64__)
4479 case TASK_BASIC_INFO_64_2:
4480 {
4481 task_basic_info_64_2_t basic_info;
4482 vm_map_t map;
4483 clock_sec_t secs;
4484 clock_usec_t usecs;
4485
4486 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
4487 error = KERN_INVALID_ARGUMENT;
4488 break;
4489 }
4490
4491 basic_info = (task_basic_info_64_2_t)task_info_out;
4492
4493 map = (task == kernel_task)? kernel_map: task->map;
4494 basic_info->virtual_size = map->size;
4495 basic_info->resident_size =
4496 (mach_vm_size_t)(pmap_resident_count(map->pmap))
4497 * PAGE_SIZE_64;
4498
4499 basic_info->policy = ((task != kernel_task)?
4500 POLICY_TIMESHARE: POLICY_RR);
4501 basic_info->suspend_count = task->user_stop_count;
4502
4503 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4504 basic_info->user_time.seconds =
4505 (typeof(basic_info->user_time.seconds))secs;
4506 basic_info->user_time.microseconds = usecs;
4507
4508 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4509 basic_info->system_time.seconds =
4510 (typeof(basic_info->system_time.seconds))secs;
4511 basic_info->system_time.microseconds = usecs;
4512
4513 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
4514 break;
4515 }
4516
4517 #else /* defined(__arm__) || defined(__arm64__) */
4518 case TASK_BASIC_INFO_64:
4519 {
4520 task_basic_info_64_t basic_info;
4521 vm_map_t map;
4522 clock_sec_t secs;
4523 clock_usec_t usecs;
4524
4525 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
4526 error = KERN_INVALID_ARGUMENT;
4527 break;
4528 }
4529
4530 basic_info = (task_basic_info_64_t)task_info_out;
4531
4532 map = (task == kernel_task)? kernel_map: task->map;
4533 basic_info->virtual_size = map->size;
4534 basic_info->resident_size =
4535 (mach_vm_size_t)(pmap_resident_count(map->pmap))
4536 * PAGE_SIZE_64;
4537
4538 basic_info->policy = ((task != kernel_task)?
4539 POLICY_TIMESHARE: POLICY_RR);
4540 basic_info->suspend_count = task->user_stop_count;
4541
4542 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4543 basic_info->user_time.seconds =
4544 (typeof(basic_info->user_time.seconds))secs;
4545 basic_info->user_time.microseconds = usecs;
4546
4547 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4548 basic_info->system_time.seconds =
4549 (typeof(basic_info->system_time.seconds))secs;
4550 basic_info->system_time.microseconds = usecs;
4551
4552 *task_info_count = TASK_BASIC_INFO_64_COUNT;
4553 break;
4554 }
4555 #endif /* defined(__arm__) || defined(__arm64__) */
4556
4557 case MACH_TASK_BASIC_INFO:
4558 {
4559 mach_task_basic_info_t basic_info;
4560 vm_map_t map;
4561 clock_sec_t secs;
4562 clock_usec_t usecs;
4563
4564 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
4565 error = KERN_INVALID_ARGUMENT;
4566 break;
4567 }
4568
4569 basic_info = (mach_task_basic_info_t)task_info_out;
4570
4571 map = (task == kernel_task) ? kernel_map : task->map;
4572
4573 basic_info->virtual_size = map->size;
4574
4575 basic_info->resident_size =
4576 (mach_vm_size_t)(pmap_resident_count(map->pmap));
4577 basic_info->resident_size *= PAGE_SIZE_64;
4578
4579 basic_info->resident_size_max =
4580 (mach_vm_size_t)(pmap_resident_max(map->pmap));
4581 basic_info->resident_size_max *= PAGE_SIZE_64;
4582
4583 basic_info->policy = ((task != kernel_task) ?
4584 POLICY_TIMESHARE : POLICY_RR);
4585
4586 basic_info->suspend_count = task->user_stop_count;
4587
4588 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4589 basic_info->user_time.seconds =
4590 (typeof(basic_info->user_time.seconds))secs;
4591 basic_info->user_time.microseconds = usecs;
4592
4593 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4594 basic_info->system_time.seconds =
4595 (typeof(basic_info->system_time.seconds))secs;
4596 basic_info->system_time.microseconds = usecs;
4597
4598 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
4599 break;
4600 }
4601
4602 case TASK_THREAD_TIMES_INFO:
4603 {
4604 task_thread_times_info_t times_info;
4605 thread_t thread;
4606
4607 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
4608 error = KERN_INVALID_ARGUMENT;
4609 break;
4610 }
4611
4612 times_info = (task_thread_times_info_t) task_info_out;
4613 times_info->user_time.seconds = 0;
4614 times_info->user_time.microseconds = 0;
4615 times_info->system_time.seconds = 0;
4616 times_info->system_time.microseconds = 0;
4617
4618
4619 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4620 time_value_t user_time, system_time;
4621
4622 if (thread->options & TH_OPT_IDLE_THREAD) {
4623 continue;
4624 }
4625
4626 thread_read_times(thread, &user_time, &system_time, NULL);
4627
4628 time_value_add(&times_info->user_time, &user_time);
4629 time_value_add(&times_info->system_time, &system_time);
4630 }
4631
4632 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
4633 break;
4634 }
4635
4636 case TASK_ABSOLUTETIME_INFO:
4637 {
4638 task_absolutetime_info_t info;
4639 thread_t thread;
4640
4641 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
4642 error = KERN_INVALID_ARGUMENT;
4643 break;
4644 }
4645
4646 info = (task_absolutetime_info_t)task_info_out;
4647 info->threads_user = info->threads_system = 0;
4648
4649
4650 info->total_user = task->total_user_time;
4651 info->total_system = task->total_system_time;
4652
4653 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4654 uint64_t tval;
4655 spl_t x;
4656
4657 if (thread->options & TH_OPT_IDLE_THREAD) {
4658 continue;
4659 }
4660
4661 x = splsched();
4662 thread_lock(thread);
4663
4664 tval = timer_grab(&thread->user_timer);
4665 info->threads_user += tval;
4666 info->total_user += tval;
4667
4668 tval = timer_grab(&thread->system_timer);
4669 if (thread->precise_user_kernel_time) {
4670 info->threads_system += tval;
4671 info->total_system += tval;
4672 } else {
4673 /* system_timer may represent either sys or user */
4674 info->threads_user += tval;
4675 info->total_user += tval;
4676 }
4677
4678 thread_unlock(thread);
4679 splx(x);
4680 }
4681
4682
4683 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
4684 break;
4685 }
4686
4687 case TASK_DYLD_INFO:
4688 {
4689 task_dyld_info_t info;
4690
4691 /*
4692 * We added the format field to TASK_DYLD_INFO output. For
4693 * temporary backward compatibility, accept the fact that
4694 * clients may ask for the old version - distinquished by the
4695 * size of the expected result structure.
4696 */
4697 #define TASK_LEGACY_DYLD_INFO_COUNT \
4698 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
4699
4700 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
4701 error = KERN_INVALID_ARGUMENT;
4702 break;
4703 }
4704
4705 info = (task_dyld_info_t)task_info_out;
4706 info->all_image_info_addr = task->all_image_info_addr;
4707 info->all_image_info_size = task->all_image_info_size;
4708
4709 /* only set format on output for those expecting it */
4710 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
4711 info->all_image_info_format = task_has_64Bit_addr(task) ?
4712 TASK_DYLD_ALL_IMAGE_INFO_64 :
4713 TASK_DYLD_ALL_IMAGE_INFO_32;
4714 *task_info_count = TASK_DYLD_INFO_COUNT;
4715 } else {
4716 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
4717 }
4718 break;
4719 }
4720
4721 case TASK_EXTMOD_INFO:
4722 {
4723 task_extmod_info_t info;
4724 void *p;
4725
4726 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
4727 error = KERN_INVALID_ARGUMENT;
4728 break;
4729 }
4730
4731 info = (task_extmod_info_t)task_info_out;
4732
4733 p = get_bsdtask_info(task);
4734 if (p) {
4735 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
4736 } else {
4737 bzero(info->task_uuid, sizeof(info->task_uuid));
4738 }
4739 info->extmod_statistics = task->extmod_statistics;
4740 *task_info_count = TASK_EXTMOD_INFO_COUNT;
4741
4742 break;
4743 }
4744
4745 case TASK_KERNELMEMORY_INFO:
4746 {
4747 task_kernelmemory_info_t tkm_info;
4748 ledger_amount_t credit, debit;
4749
4750 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
4751 error = KERN_INVALID_ARGUMENT;
4752 break;
4753 }
4754
4755 tkm_info = (task_kernelmemory_info_t) task_info_out;
4756 tkm_info->total_palloc = 0;
4757 tkm_info->total_pfree = 0;
4758 tkm_info->total_salloc = 0;
4759 tkm_info->total_sfree = 0;
4760
4761 if (task == kernel_task) {
4762 /*
4763 * All shared allocs/frees from other tasks count against
4764 * the kernel private memory usage. If we are looking up
4765 * info for the kernel task, gather from everywhere.
4766 */
4767 task_unlock(task);
4768
4769 /* start by accounting for all the terminated tasks against the kernel */
4770 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
4771 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
4772
4773 /* count all other task/thread shared alloc/free against the kernel */
4774 lck_mtx_lock(&tasks_threads_lock);
4775
4776 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
4777 queue_iterate(&tasks, task, task_t, tasks) {
4778 if (task == kernel_task) {
4779 if (ledger_get_entries(task->ledger,
4780 task_ledgers.tkm_private, &credit,
4781 &debit) == KERN_SUCCESS) {
4782 tkm_info->total_palloc += credit;
4783 tkm_info->total_pfree += debit;
4784 }
4785 }
4786 if (!ledger_get_entries(task->ledger,
4787 task_ledgers.tkm_shared, &credit, &debit)) {
4788 tkm_info->total_palloc += credit;
4789 tkm_info->total_pfree += debit;
4790 }
4791 }
4792 lck_mtx_unlock(&tasks_threads_lock);
4793 } else {
4794 if (!ledger_get_entries(task->ledger,
4795 task_ledgers.tkm_private, &credit, &debit)) {
4796 tkm_info->total_palloc = credit;
4797 tkm_info->total_pfree = debit;
4798 }
4799 if (!ledger_get_entries(task->ledger,
4800 task_ledgers.tkm_shared, &credit, &debit)) {
4801 tkm_info->total_salloc = credit;
4802 tkm_info->total_sfree = debit;
4803 }
4804 task_unlock(task);
4805 }
4806
4807 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
4808 return KERN_SUCCESS;
4809 }
4810
4811 /* OBSOLETE */
4812 case TASK_SCHED_FIFO_INFO:
4813 {
4814 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
4815 error = KERN_INVALID_ARGUMENT;
4816 break;
4817 }
4818
4819 error = KERN_INVALID_POLICY;
4820 break;
4821 }
4822
4823 /* OBSOLETE */
4824 case TASK_SCHED_RR_INFO:
4825 {
4826 policy_rr_base_t rr_base;
4827 uint32_t quantum_time;
4828 uint64_t quantum_ns;
4829
4830 if (*task_info_count < POLICY_RR_BASE_COUNT) {
4831 error = KERN_INVALID_ARGUMENT;
4832 break;
4833 }
4834
4835 rr_base = (policy_rr_base_t) task_info_out;
4836
4837 if (task != kernel_task) {
4838 error = KERN_INVALID_POLICY;
4839 break;
4840 }
4841
4842 rr_base->base_priority = task->priority;
4843
4844 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
4845 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
4846
4847 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
4848
4849 *task_info_count = POLICY_RR_BASE_COUNT;
4850 break;
4851 }
4852
4853 /* OBSOLETE */
4854 case TASK_SCHED_TIMESHARE_INFO:
4855 {
4856 policy_timeshare_base_t ts_base;
4857
4858 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
4859 error = KERN_INVALID_ARGUMENT;
4860 break;
4861 }
4862
4863 ts_base = (policy_timeshare_base_t) task_info_out;
4864
4865 if (task == kernel_task) {
4866 error = KERN_INVALID_POLICY;
4867 break;
4868 }
4869
4870 ts_base->base_priority = task->priority;
4871
4872 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4873 break;
4874 }
4875
4876 case TASK_SECURITY_TOKEN:
4877 {
4878 security_token_t *sec_token_p;
4879
4880 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4881 error = KERN_INVALID_ARGUMENT;
4882 break;
4883 }
4884
4885 sec_token_p = (security_token_t *) task_info_out;
4886
4887 *sec_token_p = task->sec_token;
4888
4889 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
4890 break;
4891 }
4892
4893 case TASK_AUDIT_TOKEN:
4894 {
4895 audit_token_t *audit_token_p;
4896
4897 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4898 error = KERN_INVALID_ARGUMENT;
4899 break;
4900 }
4901
4902 audit_token_p = (audit_token_t *) task_info_out;
4903
4904 *audit_token_p = task->audit_token;
4905
4906 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
4907 break;
4908 }
4909
4910 case TASK_SCHED_INFO:
4911 error = KERN_INVALID_ARGUMENT;
4912 break;
4913
4914 case TASK_EVENTS_INFO:
4915 {
4916 task_events_info_t events_info;
4917 thread_t thread;
4918
4919 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4920 error = KERN_INVALID_ARGUMENT;
4921 break;
4922 }
4923
4924 events_info = (task_events_info_t) task_info_out;
4925
4926
4927 events_info->faults = task->faults;
4928 events_info->pageins = task->pageins;
4929 events_info->cow_faults = task->cow_faults;
4930 events_info->messages_sent = task->messages_sent;
4931 events_info->messages_received = task->messages_received;
4932 events_info->syscalls_mach = task->syscalls_mach;
4933 events_info->syscalls_unix = task->syscalls_unix;
4934
4935 events_info->csw = task->c_switch;
4936
4937 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4938 events_info->csw += thread->c_switch;
4939 events_info->syscalls_mach += thread->syscalls_mach;
4940 events_info->syscalls_unix += thread->syscalls_unix;
4941 }
4942
4943
4944 *task_info_count = TASK_EVENTS_INFO_COUNT;
4945 break;
4946 }
4947 case TASK_AFFINITY_TAG_INFO:
4948 {
4949 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4950 error = KERN_INVALID_ARGUMENT;
4951 break;
4952 }
4953
4954 error = task_affinity_info(task, task_info_out, task_info_count);
4955 break;
4956 }
4957 case TASK_POWER_INFO:
4958 {
4959 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4960 error = KERN_INVALID_ARGUMENT;
4961 break;
4962 }
4963
4964 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
4965 break;
4966 }
4967
4968 case TASK_POWER_INFO_V2:
4969 {
4970 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4971 error = KERN_INVALID_ARGUMENT;
4972 break;
4973 }
4974 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4975 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
4976 break;
4977 }
4978
4979 case TASK_VM_INFO:
4980 case TASK_VM_INFO_PURGEABLE:
4981 {
4982 task_vm_info_t vm_info;
4983 vm_map_t map;
4984
4985 #if __arm64__
4986 struct proc *p;
4987 uint32_t platform, sdk;
4988 p = current_proc();
4989 platform = proc_platform(p);
4990 sdk = proc_sdk(p);
4991 if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
4992 platform == PLATFORM_IOS &&
4993 sdk != 0 &&
4994 (sdk >> 16) <= 12) {
4995 /*
4996 * Some iOS apps pass an incorrect value for
4997 * task_info_count, expressed in number of bytes
4998 * instead of number of "natural_t" elements.
4999 * For the sake of backwards binary compatibility
5000 * for apps built with an iOS12 or older SDK and using
5001 * the "rev2" data structure, let's fix task_info_count
5002 * for them, to avoid stomping past the actual end
5003 * of their buffer.
5004 */
5005 #if DEVELOPMENT || DEBUG
5006 printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p), original_task_info_count, TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5007 #endif /* DEVELOPMENT || DEBUG */
5008 DTRACE_VM4(workaround_task_vm_info_count,
5009 mach_msg_type_number_t, original_task_info_count,
5010 mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5011 uint32_t, platform,
5012 uint32_t, sdk);
5013 original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5014 *task_info_count = original_task_info_count;
5015 }
5016 #endif /* __arm64__ */
5017
5018 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5019 error = KERN_INVALID_ARGUMENT;
5020 break;
5021 }
5022
5023 vm_info = (task_vm_info_t)task_info_out;
5024
5025 if (task == kernel_task) {
5026 map = kernel_map;
5027 /* no lock */
5028 } else {
5029 map = task->map;
5030 vm_map_lock_read(map);
5031 }
5032
5033 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
5034 vm_info->region_count = map->hdr.nentries;
5035 vm_info->page_size = vm_map_page_size(map);
5036
5037 vm_info->resident_size = pmap_resident_count(map->pmap);
5038 vm_info->resident_size *= PAGE_SIZE;
5039 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
5040 vm_info->resident_size_peak *= PAGE_SIZE;
5041
5042 #define _VM_INFO(_name) \
5043 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
5044
5045 _VM_INFO(device);
5046 _VM_INFO(device_peak);
5047 _VM_INFO(external);
5048 _VM_INFO(external_peak);
5049 _VM_INFO(internal);
5050 _VM_INFO(internal_peak);
5051 _VM_INFO(reusable);
5052 _VM_INFO(reusable_peak);
5053 _VM_INFO(compressed);
5054 _VM_INFO(compressed_peak);
5055 _VM_INFO(compressed_lifetime);
5056
5057 vm_info->purgeable_volatile_pmap = 0;
5058 vm_info->purgeable_volatile_resident = 0;
5059 vm_info->purgeable_volatile_virtual = 0;
5060 if (task == kernel_task) {
5061 /*
5062 * We do not maintain the detailed stats for the
5063 * kernel_pmap, so just count everything as
5064 * "internal"...
5065 */
5066 vm_info->internal = vm_info->resident_size;
5067 /*
5068 * ... but since the memory held by the VM compressor
5069 * in the kernel address space ought to be attributed
5070 * to user-space tasks, we subtract it from "internal"
5071 * to give memory reporting tools a more accurate idea
5072 * of what the kernel itself is actually using, instead
5073 * of making it look like the kernel is leaking memory
5074 * when the system is under memory pressure.
5075 */
5076 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5077 PAGE_SIZE);
5078 } else {
5079 mach_vm_size_t volatile_virtual_size;
5080 mach_vm_size_t volatile_resident_size;
5081 mach_vm_size_t volatile_compressed_size;
5082 mach_vm_size_t volatile_pmap_size;
5083 mach_vm_size_t volatile_compressed_pmap_size;
5084 kern_return_t kr;
5085
5086 if (flavor == TASK_VM_INFO_PURGEABLE) {
5087 kr = vm_map_query_volatile(
5088 map,
5089 &volatile_virtual_size,
5090 &volatile_resident_size,
5091 &volatile_compressed_size,
5092 &volatile_pmap_size,
5093 &volatile_compressed_pmap_size);
5094 if (kr == KERN_SUCCESS) {
5095 vm_info->purgeable_volatile_pmap =
5096 volatile_pmap_size;
5097 if (radar_20146450) {
5098 vm_info->compressed -=
5099 volatile_compressed_pmap_size;
5100 }
5101 vm_info->purgeable_volatile_resident =
5102 volatile_resident_size;
5103 vm_info->purgeable_volatile_virtual =
5104 volatile_virtual_size;
5105 }
5106 }
5107 }
5108 *task_info_count = TASK_VM_INFO_REV0_COUNT;
5109
5110 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5111 vm_info->phys_footprint =
5112 (mach_vm_size_t) get_task_phys_footprint(task);
5113 *task_info_count = TASK_VM_INFO_REV1_COUNT;
5114 }
5115 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5116 vm_info->min_address = map->min_offset;
5117 vm_info->max_address = map->max_offset;
5118 *task_info_count = TASK_VM_INFO_REV2_COUNT;
5119 }
5120 if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5121 ledger_get_lifetime_max(task->ledger,
5122 task_ledgers.phys_footprint,
5123 &vm_info->ledger_phys_footprint_peak);
5124 ledger_get_balance(task->ledger,
5125 task_ledgers.purgeable_nonvolatile,
5126 &vm_info->ledger_purgeable_nonvolatile);
5127 ledger_get_balance(task->ledger,
5128 task_ledgers.purgeable_nonvolatile_compressed,
5129 &vm_info->ledger_purgeable_novolatile_compressed);
5130 ledger_get_balance(task->ledger,
5131 task_ledgers.purgeable_volatile,
5132 &vm_info->ledger_purgeable_volatile);
5133 ledger_get_balance(task->ledger,
5134 task_ledgers.purgeable_volatile_compressed,
5135 &vm_info->ledger_purgeable_volatile_compressed);
5136 ledger_get_balance(task->ledger,
5137 task_ledgers.network_nonvolatile,
5138 &vm_info->ledger_tag_network_nonvolatile);
5139 ledger_get_balance(task->ledger,
5140 task_ledgers.network_nonvolatile_compressed,
5141 &vm_info->ledger_tag_network_nonvolatile_compressed);
5142 ledger_get_balance(task->ledger,
5143 task_ledgers.network_volatile,
5144 &vm_info->ledger_tag_network_volatile);
5145 ledger_get_balance(task->ledger,
5146 task_ledgers.network_volatile_compressed,
5147 &vm_info->ledger_tag_network_volatile_compressed);
5148 ledger_get_balance(task->ledger,
5149 task_ledgers.media_footprint,
5150 &vm_info->ledger_tag_media_footprint);
5151 ledger_get_balance(task->ledger,
5152 task_ledgers.media_footprint_compressed,
5153 &vm_info->ledger_tag_media_footprint_compressed);
5154 ledger_get_balance(task->ledger,
5155 task_ledgers.media_nofootprint,
5156 &vm_info->ledger_tag_media_nofootprint);
5157 ledger_get_balance(task->ledger,
5158 task_ledgers.media_nofootprint_compressed,
5159 &vm_info->ledger_tag_media_nofootprint_compressed);
5160 ledger_get_balance(task->ledger,
5161 task_ledgers.graphics_footprint,
5162 &vm_info->ledger_tag_graphics_footprint);
5163 ledger_get_balance(task->ledger,
5164 task_ledgers.graphics_footprint_compressed,
5165 &vm_info->ledger_tag_graphics_footprint_compressed);
5166 ledger_get_balance(task->ledger,
5167 task_ledgers.graphics_nofootprint,
5168 &vm_info->ledger_tag_graphics_nofootprint);
5169 ledger_get_balance(task->ledger,
5170 task_ledgers.graphics_nofootprint_compressed,
5171 &vm_info->ledger_tag_graphics_nofootprint_compressed);
5172 ledger_get_balance(task->ledger,
5173 task_ledgers.neural_footprint,
5174 &vm_info->ledger_tag_neural_footprint);
5175 ledger_get_balance(task->ledger,
5176 task_ledgers.neural_footprint_compressed,
5177 &vm_info->ledger_tag_neural_footprint_compressed);
5178 ledger_get_balance(task->ledger,
5179 task_ledgers.neural_nofootprint,
5180 &vm_info->ledger_tag_neural_nofootprint);
5181 ledger_get_balance(task->ledger,
5182 task_ledgers.neural_nofootprint_compressed,
5183 &vm_info->ledger_tag_neural_nofootprint_compressed);
5184 *task_info_count = TASK_VM_INFO_REV3_COUNT;
5185 }
5186 if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
5187 if (task->bsd_info) {
5188 vm_info->limit_bytes_remaining =
5189 memorystatus_available_memory_internal(task->bsd_info);
5190 } else {
5191 vm_info->limit_bytes_remaining = 0;
5192 }
5193 *task_info_count = TASK_VM_INFO_REV4_COUNT;
5194 }
5195 if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
5196 thread_t thread;
5197 integer_t total = task->decompressions;
5198 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5199 total += thread->decompressions;
5200 }
5201 vm_info->decompressions = total;
5202 *task_info_count = TASK_VM_INFO_REV5_COUNT;
5203 }
5204
5205 if (task != kernel_task) {
5206 vm_map_unlock_read(map);
5207 }
5208
5209 break;
5210 }
5211
5212 case TASK_WAIT_STATE_INFO:
5213 {
5214 /*
5215 * Deprecated flavor. Currently allowing some results until all users
5216 * stop calling it. The results may not be accurate.
5217 */
5218 task_wait_state_info_t wait_state_info;
5219 uint64_t total_sfi_ledger_val = 0;
5220
5221 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
5222 error = KERN_INVALID_ARGUMENT;
5223 break;
5224 }
5225
5226 wait_state_info = (task_wait_state_info_t) task_info_out;
5227
5228 wait_state_info->total_wait_state_time = 0;
5229 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
5230
5231 #if CONFIG_SCHED_SFI
5232 int i, prev_lentry = -1;
5233 int64_t val_credit, val_debit;
5234
5235 for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
5236 val_credit = 0;
5237 /*
5238 * checking with prev_lentry != entry ensures adjacent classes
5239 * which share the same ledger do not add wait times twice.
5240 * Note: Use ledger() call to get data for each individual sfi class.
5241 */
5242 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
5243 KERN_SUCCESS == ledger_get_entries(task->ledger,
5244 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
5245 total_sfi_ledger_val += val_credit;
5246 }
5247 prev_lentry = task_ledgers.sfi_wait_times[i];
5248 }
5249
5250 #endif /* CONFIG_SCHED_SFI */
5251 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
5252 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
5253
5254 break;
5255 }
5256 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
5257 {
5258 #if DEVELOPMENT || DEBUG
5259 pvm_account_info_t acnt_info;
5260
5261 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
5262 error = KERN_INVALID_ARGUMENT;
5263 break;
5264 }
5265
5266 if (task_info_out == NULL) {
5267 error = KERN_INVALID_ARGUMENT;
5268 break;
5269 }
5270
5271 acnt_info = (pvm_account_info_t) task_info_out;
5272
5273 error = vm_purgeable_account(task, acnt_info);
5274
5275 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
5276
5277 break;
5278 #else /* DEVELOPMENT || DEBUG */
5279 error = KERN_NOT_SUPPORTED;
5280 break;
5281 #endif /* DEVELOPMENT || DEBUG */
5282 }
5283 case TASK_FLAGS_INFO:
5284 {
5285 task_flags_info_t flags_info;
5286
5287 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
5288 error = KERN_INVALID_ARGUMENT;
5289 break;
5290 }
5291
5292 flags_info = (task_flags_info_t)task_info_out;
5293
5294 /* only publish the 64-bit flag of the task */
5295 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
5296
5297 *task_info_count = TASK_FLAGS_INFO_COUNT;
5298 break;
5299 }
5300
5301 case TASK_DEBUG_INFO_INTERNAL:
5302 {
5303 #if DEVELOPMENT || DEBUG
5304 task_debug_info_internal_t dbg_info;
5305 ipc_space_t space = task->itk_space;
5306 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
5307 error = KERN_NOT_SUPPORTED;
5308 break;
5309 }
5310
5311 if (task_info_out == NULL) {
5312 error = KERN_INVALID_ARGUMENT;
5313 break;
5314 }
5315 dbg_info = (task_debug_info_internal_t) task_info_out;
5316 dbg_info->ipc_space_size = 0;
5317
5318 if (space) {
5319 is_read_lock(space);
5320 dbg_info->ipc_space_size = space->is_table_size;
5321 is_read_unlock(space);
5322 }
5323
5324 dbg_info->suspend_count = task->suspend_count;
5325
5326 error = KERN_SUCCESS;
5327 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
5328 break;
5329 #else /* DEVELOPMENT || DEBUG */
5330 error = KERN_NOT_SUPPORTED;
5331 break;
5332 #endif /* DEVELOPMENT || DEBUG */
5333 }
5334 default:
5335 error = KERN_INVALID_ARGUMENT;
5336 }
5337
5338 task_unlock(task);
5339 return error;
5340 }
5341
5342 /*
5343 * task_info_from_user
5344 *
5345 * When calling task_info from user space,
5346 * this function will be executed as mig server side
5347 * instead of calling directly into task_info.
5348 * This gives the possibility to perform more security
5349 * checks on task_port.
5350 *
5351 * In the case of TASK_DYLD_INFO, we require the more
5352 * privileged task_port not the less-privileged task_name_port.
5353 *
5354 */
5355 kern_return_t
5356 task_info_from_user(
5357 mach_port_t task_port,
5358 task_flavor_t flavor,
5359 task_info_t task_info_out,
5360 mach_msg_type_number_t *task_info_count)
5361 {
5362 task_t task;
5363 kern_return_t ret;
5364
5365 if (flavor == TASK_DYLD_INFO) {
5366 task = convert_port_to_task(task_port);
5367 } else {
5368 task = convert_port_to_task_name(task_port);
5369 }
5370
5371 ret = task_info(task, flavor, task_info_out, task_info_count);
5372
5373 task_deallocate(task);
5374
5375 return ret;
5376 }
5377
5378 /*
5379 * task_power_info
5380 *
5381 * Returns power stats for the task.
5382 * Note: Called with task locked.
5383 */
5384 void
5385 task_power_info_locked(
5386 task_t task,
5387 task_power_info_t info,
5388 gpu_energy_data_t ginfo,
5389 task_power_info_v2_t infov2,
5390 uint64_t *runnable_time)
5391 {
5392 thread_t thread;
5393 ledger_amount_t tmp;
5394
5395 uint64_t runnable_time_sum = 0;
5396
5397 task_lock_assert_owned(task);
5398
5399 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
5400 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
5401 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
5402 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
5403
5404 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
5405 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
5406
5407 info->total_user = task->total_user_time;
5408 info->total_system = task->total_system_time;
5409 runnable_time_sum = task->total_runnable_time;
5410
5411 #if CONFIG_EMBEDDED
5412 if (infov2) {
5413 infov2->task_energy = task->task_energy;
5414 }
5415 #endif
5416
5417 if (ginfo) {
5418 ginfo->task_gpu_utilisation = task->task_gpu_ns;
5419 }
5420
5421 if (infov2) {
5422 infov2->task_ptime = task->total_ptime;
5423 infov2->task_pset_switches = task->ps_switch;
5424 }
5425
5426 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5427 uint64_t tval;
5428 spl_t x;
5429
5430 if (thread->options & TH_OPT_IDLE_THREAD) {
5431 continue;
5432 }
5433
5434 x = splsched();
5435 thread_lock(thread);
5436
5437 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
5438 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
5439
5440 #if CONFIG_EMBEDDED
5441 if (infov2) {
5442 infov2->task_energy += ml_energy_stat(thread);
5443 }
5444 #endif
5445
5446 tval = timer_grab(&thread->user_timer);
5447 info->total_user += tval;
5448
5449 if (infov2) {
5450 tval = timer_grab(&thread->ptime);
5451 infov2->task_ptime += tval;
5452 infov2->task_pset_switches += thread->ps_switch;
5453 }
5454
5455 tval = timer_grab(&thread->system_timer);
5456 if (thread->precise_user_kernel_time) {
5457 info->total_system += tval;
5458 } else {
5459 /* system_timer may represent either sys or user */
5460 info->total_user += tval;
5461 }
5462
5463 tval = timer_grab(&thread->runnable_timer);
5464
5465 runnable_time_sum += tval;
5466
5467 if (ginfo) {
5468 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
5469 }
5470 thread_unlock(thread);
5471 splx(x);
5472 }
5473
5474 if (runnable_time) {
5475 *runnable_time = runnable_time_sum;
5476 }
5477 }
5478
5479 /*
5480 * task_gpu_utilisation
5481 *
5482 * Returns the total gpu time used by the all the threads of the task
5483 * (both dead and alive)
5484 */
5485 uint64_t
5486 task_gpu_utilisation(
5487 task_t task)
5488 {
5489 uint64_t gpu_time = 0;
5490 #if !CONFIG_EMBEDDED
5491 thread_t thread;
5492
5493 task_lock(task);
5494 gpu_time += task->task_gpu_ns;
5495
5496 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5497 spl_t x;
5498 x = splsched();
5499 thread_lock(thread);
5500 gpu_time += ml_gpu_stat(thread);
5501 thread_unlock(thread);
5502 splx(x);
5503 }
5504
5505 task_unlock(task);
5506 #else /* CONFIG_EMBEDDED */
5507 /* silence compiler warning */
5508 (void)task;
5509 #endif /* !CONFIG_EMBEDDED */
5510 return gpu_time;
5511 }
5512
5513 /*
5514 * task_energy
5515 *
5516 * Returns the total energy used by the all the threads of the task
5517 * (both dead and alive)
5518 */
5519 uint64_t
5520 task_energy(
5521 task_t task)
5522 {
5523 uint64_t energy = 0;
5524 thread_t thread;
5525
5526 task_lock(task);
5527 energy += task->task_energy;
5528
5529 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5530 spl_t x;
5531 x = splsched();
5532 thread_lock(thread);
5533 energy += ml_energy_stat(thread);
5534 thread_unlock(thread);
5535 splx(x);
5536 }
5537
5538 task_unlock(task);
5539 return energy;
5540 }
5541
5542
5543 uint64_t
5544 task_cpu_ptime(
5545 __unused task_t task)
5546 {
5547 return 0;
5548 }
5549
5550
5551 /* This function updates the cpu time in the arrays for each
5552 * effective and requested QoS class
5553 */
5554 void
5555 task_update_cpu_time_qos_stats(
5556 task_t task,
5557 uint64_t *eqos_stats,
5558 uint64_t *rqos_stats)
5559 {
5560 if (!eqos_stats && !rqos_stats) {
5561 return;
5562 }
5563
5564 task_lock(task);
5565 thread_t thread;
5566 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5567 if (thread->options & TH_OPT_IDLE_THREAD) {
5568 continue;
5569 }
5570
5571 thread_update_qos_cpu_time(thread);
5572 }
5573
5574 if (eqos_stats) {
5575 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
5576 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
5577 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
5578 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
5579 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
5580 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
5581 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
5582 }
5583
5584 if (rqos_stats) {
5585 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
5586 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
5587 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
5588 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
5589 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
5590 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
5591 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
5592 }
5593
5594 task_unlock(task);
5595 }
5596
5597 kern_return_t
5598 task_purgable_info(
5599 task_t task,
5600 task_purgable_info_t *stats)
5601 {
5602 if (task == TASK_NULL || stats == NULL) {
5603 return KERN_INVALID_ARGUMENT;
5604 }
5605 /* Take task reference */
5606 task_reference(task);
5607 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
5608 /* Drop task reference */
5609 task_deallocate(task);
5610 return KERN_SUCCESS;
5611 }
5612
5613 void
5614 task_vtimer_set(
5615 task_t task,
5616 integer_t which)
5617 {
5618 thread_t thread;
5619 spl_t x;
5620
5621 task_lock(task);
5622
5623 task->vtimers |= which;
5624
5625 switch (which) {
5626 case TASK_VTIMER_USER:
5627 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5628 x = splsched();
5629 thread_lock(thread);
5630 if (thread->precise_user_kernel_time) {
5631 thread->vtimer_user_save = timer_grab(&thread->user_timer);
5632 } else {
5633 thread->vtimer_user_save = timer_grab(&thread->system_timer);
5634 }
5635 thread_unlock(thread);
5636 splx(x);
5637 }
5638 break;
5639
5640 case TASK_VTIMER_PROF:
5641 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5642 x = splsched();
5643 thread_lock(thread);
5644 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
5645 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
5646 thread_unlock(thread);
5647 splx(x);
5648 }
5649 break;
5650
5651 case TASK_VTIMER_RLIM:
5652 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5653 x = splsched();
5654 thread_lock(thread);
5655 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
5656 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
5657 thread_unlock(thread);
5658 splx(x);
5659 }
5660 break;
5661 }
5662
5663 task_unlock(task);
5664 }
5665
5666 void
5667 task_vtimer_clear(
5668 task_t task,
5669 integer_t which)
5670 {
5671 assert(task == current_task());
5672
5673 task_lock(task);
5674
5675 task->vtimers &= ~which;
5676
5677 task_unlock(task);
5678 }
5679
5680 void
5681 task_vtimer_update(
5682 __unused
5683 task_t task,
5684 integer_t which,
5685 uint32_t *microsecs)
5686 {
5687 thread_t thread = current_thread();
5688 uint32_t tdelt = 0;
5689 clock_sec_t secs = 0;
5690 uint64_t tsum;
5691
5692 assert(task == current_task());
5693
5694 spl_t s = splsched();
5695 thread_lock(thread);
5696
5697 if ((task->vtimers & which) != (uint32_t)which) {
5698 thread_unlock(thread);
5699 splx(s);
5700 return;
5701 }
5702
5703 switch (which) {
5704 case TASK_VTIMER_USER:
5705 if (thread->precise_user_kernel_time) {
5706 tdelt = (uint32_t)timer_delta(&thread->user_timer,
5707 &thread->vtimer_user_save);
5708 } else {
5709 tdelt = (uint32_t)timer_delta(&thread->system_timer,
5710 &thread->vtimer_user_save);
5711 }
5712 absolutetime_to_microtime(tdelt, &secs, microsecs);
5713 break;
5714
5715 case TASK_VTIMER_PROF:
5716 tsum = timer_grab(&thread->user_timer);
5717 tsum += timer_grab(&thread->system_timer);
5718 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
5719 absolutetime_to_microtime(tdelt, &secs, microsecs);
5720 /* if the time delta is smaller than a usec, ignore */
5721 if (*microsecs != 0) {
5722 thread->vtimer_prof_save = tsum;
5723 }
5724 break;
5725
5726 case TASK_VTIMER_RLIM:
5727 tsum = timer_grab(&thread->user_timer);
5728 tsum += timer_grab(&thread->system_timer);
5729 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
5730 thread->vtimer_rlim_save = tsum;
5731 absolutetime_to_microtime(tdelt, &secs, microsecs);
5732 break;
5733 }
5734
5735 thread_unlock(thread);
5736 splx(s);
5737 }
5738
5739 /*
5740 * task_assign:
5741 *
5742 * Change the assigned processor set for the task
5743 */
5744 kern_return_t
5745 task_assign(
5746 __unused task_t task,
5747 __unused processor_set_t new_pset,
5748 __unused boolean_t assign_threads)
5749 {
5750 return KERN_FAILURE;
5751 }
5752
5753 /*
5754 * task_assign_default:
5755 *
5756 * Version of task_assign to assign to default processor set.
5757 */
5758 kern_return_t
5759 task_assign_default(
5760 task_t task,
5761 boolean_t assign_threads)
5762 {
5763 return task_assign(task, &pset0, assign_threads);
5764 }
5765
5766 /*
5767 * task_get_assignment
5768 *
5769 * Return name of processor set that task is assigned to.
5770 */
5771 kern_return_t
5772 task_get_assignment(
5773 task_t task,
5774 processor_set_t *pset)
5775 {
5776 if (!task || !task->active) {
5777 return KERN_FAILURE;
5778 }
5779
5780 *pset = &pset0;
5781
5782 return KERN_SUCCESS;
5783 }
5784
5785 uint64_t
5786 get_task_dispatchqueue_offset(
5787 task_t task)
5788 {
5789 return task->dispatchqueue_offset;
5790 }
5791
5792 /*
5793 * task_policy
5794 *
5795 * Set scheduling policy and parameters, both base and limit, for
5796 * the given task. Policy must be a policy which is enabled for the
5797 * processor set. Change contained threads if requested.
5798 */
5799 kern_return_t
5800 task_policy(
5801 __unused task_t task,
5802 __unused policy_t policy_id,
5803 __unused policy_base_t base,
5804 __unused mach_msg_type_number_t count,
5805 __unused boolean_t set_limit,
5806 __unused boolean_t change)
5807 {
5808 return KERN_FAILURE;
5809 }
5810
5811 /*
5812 * task_set_policy
5813 *
5814 * Set scheduling policy and parameters, both base and limit, for
5815 * the given task. Policy can be any policy implemented by the
5816 * processor set, whether enabled or not. Change contained threads
5817 * if requested.
5818 */
5819 kern_return_t
5820 task_set_policy(
5821 __unused task_t task,
5822 __unused processor_set_t pset,
5823 __unused policy_t policy_id,
5824 __unused policy_base_t base,
5825 __unused mach_msg_type_number_t base_count,
5826 __unused policy_limit_t limit,
5827 __unused mach_msg_type_number_t limit_count,
5828 __unused boolean_t change)
5829 {
5830 return KERN_FAILURE;
5831 }
5832
5833 kern_return_t
5834 task_set_ras_pc(
5835 __unused task_t task,
5836 __unused vm_offset_t pc,
5837 __unused vm_offset_t endpc)
5838 {
5839 return KERN_FAILURE;
5840 }
5841
5842 void
5843 task_synchronizer_destroy_all(task_t task)
5844 {
5845 /*
5846 * Destroy owned semaphores
5847 */
5848 semaphore_destroy_all(task);
5849 }
5850
5851 /*
5852 * Install default (machine-dependent) initial thread state
5853 * on the task. Subsequent thread creation will have this initial
5854 * state set on the thread by machine_thread_inherit_taskwide().
5855 * Flavors and structures are exactly the same as those to thread_set_state()
5856 */
5857 kern_return_t
5858 task_set_state(
5859 task_t task,
5860 int flavor,
5861 thread_state_t state,
5862 mach_msg_type_number_t state_count)
5863 {
5864 kern_return_t ret;
5865
5866 if (task == TASK_NULL) {
5867 return KERN_INVALID_ARGUMENT;
5868 }
5869
5870 task_lock(task);
5871
5872 if (!task->active) {
5873 task_unlock(task);
5874 return KERN_FAILURE;
5875 }
5876
5877 ret = machine_task_set_state(task, flavor, state, state_count);
5878
5879 task_unlock(task);
5880 return ret;
5881 }
5882
5883 /*
5884 * Examine the default (machine-dependent) initial thread state
5885 * on the task, as set by task_set_state(). Flavors and structures
5886 * are exactly the same as those passed to thread_get_state().
5887 */
5888 kern_return_t
5889 task_get_state(
5890 task_t task,
5891 int flavor,
5892 thread_state_t state,
5893 mach_msg_type_number_t *state_count)
5894 {
5895 kern_return_t ret;
5896
5897 if (task == TASK_NULL) {
5898 return KERN_INVALID_ARGUMENT;
5899 }
5900
5901 task_lock(task);
5902
5903 if (!task->active) {
5904 task_unlock(task);
5905 return KERN_FAILURE;
5906 }
5907
5908 ret = machine_task_get_state(task, flavor, state, state_count);
5909
5910 task_unlock(task);
5911 return ret;
5912 }
5913
5914
5915 static kern_return_t __attribute__((noinline, not_tail_called))
5916 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
5917 mach_exception_code_t code,
5918 mach_exception_subcode_t subcode,
5919 void *reason)
5920 {
5921 #ifdef MACH_BSD
5922 if (1 == proc_selfpid()) {
5923 return KERN_NOT_SUPPORTED; // initproc is immune
5924 }
5925 #endif
5926 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
5927 [0] = code,
5928 [1] = subcode,
5929 };
5930 task_t task = current_task();
5931 kern_return_t kr;
5932
5933 /* (See jetsam-related comments below) */
5934
5935 proc_memstat_terminated(task->bsd_info, TRUE);
5936 kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
5937 proc_memstat_terminated(task->bsd_info, FALSE);
5938 return kr;
5939 }
5940
5941 kern_return_t
5942 task_violated_guard(
5943 mach_exception_code_t code,
5944 mach_exception_subcode_t subcode,
5945 void *reason)
5946 {
5947 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
5948 }
5949
5950
5951 #if CONFIG_MEMORYSTATUS
5952
5953 boolean_t
5954 task_get_memlimit_is_active(task_t task)
5955 {
5956 assert(task != NULL);
5957
5958 if (task->memlimit_is_active == 1) {
5959 return TRUE;
5960 } else {
5961 return FALSE;
5962 }
5963 }
5964
5965 void
5966 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
5967 {
5968 assert(task != NULL);
5969
5970 if (memlimit_is_active) {
5971 task->memlimit_is_active = 1;
5972 } else {
5973 task->memlimit_is_active = 0;
5974 }
5975 }
5976
5977 boolean_t
5978 task_get_memlimit_is_fatal(task_t task)
5979 {
5980 assert(task != NULL);
5981
5982 if (task->memlimit_is_fatal == 1) {
5983 return TRUE;
5984 } else {
5985 return FALSE;
5986 }
5987 }
5988
5989 void
5990 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
5991 {
5992 assert(task != NULL);
5993
5994 if (memlimit_is_fatal) {
5995 task->memlimit_is_fatal = 1;
5996 } else {
5997 task->memlimit_is_fatal = 0;
5998 }
5999 }
6000
6001 boolean_t
6002 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6003 {
6004 boolean_t triggered = FALSE;
6005
6006 assert(task == current_task());
6007
6008 /*
6009 * Returns true, if task has already triggered an exc_resource exception.
6010 */
6011
6012 if (memlimit_is_active) {
6013 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
6014 } else {
6015 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
6016 }
6017
6018 return triggered;
6019 }
6020
6021 void
6022 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6023 {
6024 assert(task == current_task());
6025
6026 /*
6027 * We allow one exc_resource per process per active/inactive limit.
6028 * The limit's fatal attribute does not come into play.
6029 */
6030
6031 if (memlimit_is_active) {
6032 task->memlimit_active_exc_resource = 1;
6033 } else {
6034 task->memlimit_inactive_exc_resource = 1;
6035 }
6036 }
6037
6038 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
6039
6040 void __attribute__((noinline))
6041 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
6042 {
6043 task_t task = current_task();
6044 int pid = 0;
6045 const char *procname = "unknown";
6046 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
6047 boolean_t send_sync_exc_resource = FALSE;
6048
6049 #ifdef MACH_BSD
6050 pid = proc_selfpid();
6051
6052 if (pid == 1) {
6053 /*
6054 * Cannot have ReportCrash analyzing
6055 * a suspended initproc.
6056 */
6057 return;
6058 }
6059
6060 if (task->bsd_info != NULL) {
6061 procname = proc_name_address(current_task()->bsd_info);
6062 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
6063 }
6064 #endif
6065 #if CONFIG_COREDUMP
6066 if (hwm_user_cores) {
6067 int error;
6068 uint64_t starttime, end;
6069 clock_sec_t secs = 0;
6070 uint32_t microsecs = 0;
6071
6072 starttime = mach_absolute_time();
6073 /*
6074 * Trigger a coredump of this process. Don't proceed unless we know we won't
6075 * be filling up the disk; and ignore the core size resource limit for this
6076 * core file.
6077 */
6078 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
6079 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
6080 }
6081 /*
6082 * coredump() leaves the task suspended.
6083 */
6084 task_resume_internal(current_task());
6085
6086 end = mach_absolute_time();
6087 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
6088 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
6089 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
6090 }
6091 #endif /* CONFIG_COREDUMP */
6092
6093 if (disable_exc_resource) {
6094 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6095 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
6096 return;
6097 }
6098
6099 /*
6100 * A task that has triggered an EXC_RESOURCE, should not be
6101 * jetsammed when the device is under memory pressure. Here
6102 * we set the P_MEMSTAT_TERMINATED flag so that the process
6103 * will be skipped if the memorystatus_thread wakes up.
6104 */
6105 proc_memstat_terminated(current_task()->bsd_info, TRUE);
6106
6107 code[0] = code[1] = 0;
6108 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
6109 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
6110 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
6111
6112 /*
6113 * Do not generate a corpse fork if the violation is a fatal one
6114 * or the process wants synchronous EXC_RESOURCE exceptions.
6115 */
6116 if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) {
6117 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
6118 if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) {
6119 /*
6120 * Use the _internal_ variant so that no user-space
6121 * process can resume our task from under us.
6122 */
6123 task_suspend_internal(task);
6124 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6125 task_resume_internal(task);
6126 }
6127 } else {
6128 if (audio_active) {
6129 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6130 "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
6131 } else {
6132 task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
6133 code, EXCEPTION_CODE_MAX, NULL);
6134 }
6135 }
6136
6137 /*
6138 * After the EXC_RESOURCE has been handled, we must clear the
6139 * P_MEMSTAT_TERMINATED flag so that the process can again be
6140 * considered for jetsam if the memorystatus_thread wakes up.
6141 */
6142 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
6143 }
6144
6145 /*
6146 * Callback invoked when a task exceeds its physical footprint limit.
6147 */
6148 void
6149 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6150 {
6151 ledger_amount_t max_footprint, max_footprint_mb;
6152 task_t task;
6153 boolean_t is_warning;
6154 boolean_t memlimit_is_active;
6155 boolean_t memlimit_is_fatal;
6156
6157 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
6158 /*
6159 * Task memory limits only provide a warning on the way up.
6160 */
6161 return;
6162 } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6163 /*
6164 * This task is in danger of violating a memory limit,
6165 * It has exceeded a percentage level of the limit.
6166 */
6167 is_warning = TRUE;
6168 } else {
6169 /*
6170 * The task has exceeded the physical footprint limit.
6171 * This is not a warning but a true limit violation.
6172 */
6173 is_warning = FALSE;
6174 }
6175
6176 task = current_task();
6177
6178 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
6179 max_footprint_mb = max_footprint >> 20;
6180
6181 memlimit_is_active = task_get_memlimit_is_active(task);
6182 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6183
6184 /*
6185 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
6186 * We only generate the exception once per process per memlimit (active/inactive limit).
6187 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
6188 * and we disable it by marking that memlimit as exception triggered.
6189 */
6190 if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
6191 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
6192 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
6193 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
6194 }
6195
6196 memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
6197 }
6198
6199 extern int proc_check_footprint_priv(void);
6200
6201 kern_return_t
6202 task_set_phys_footprint_limit(
6203 task_t task,
6204 int new_limit_mb,
6205 int *old_limit_mb)
6206 {
6207 kern_return_t error;
6208
6209 boolean_t memlimit_is_active;
6210 boolean_t memlimit_is_fatal;
6211
6212 if ((error = proc_check_footprint_priv())) {
6213 return KERN_NO_ACCESS;
6214 }
6215
6216 /*
6217 * This call should probably be obsoleted.
6218 * But for now, we default to current state.
6219 */
6220 memlimit_is_active = task_get_memlimit_is_active(task);
6221 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6222
6223 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
6224 }
6225
6226 kern_return_t
6227 task_convert_phys_footprint_limit(
6228 int limit_mb,
6229 int *converted_limit_mb)
6230 {
6231 if (limit_mb == -1) {
6232 /*
6233 * No limit
6234 */
6235 if (max_task_footprint != 0) {
6236 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
6237 } else {
6238 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
6239 }
6240 } else {
6241 /* nothing to convert */
6242 *converted_limit_mb = limit_mb;
6243 }
6244 return KERN_SUCCESS;
6245 }
6246
6247
6248 kern_return_t
6249 task_set_phys_footprint_limit_internal(
6250 task_t task,
6251 int new_limit_mb,
6252 int *old_limit_mb,
6253 boolean_t memlimit_is_active,
6254 boolean_t memlimit_is_fatal)
6255 {
6256 ledger_amount_t old;
6257 kern_return_t ret;
6258
6259 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
6260
6261 if (ret != KERN_SUCCESS) {
6262 return ret;
6263 }
6264
6265 /*
6266 * Check that limit >> 20 will not give an "unexpected" 32-bit
6267 * result. There are, however, implicit assumptions that -1 mb limit
6268 * equates to LEDGER_LIMIT_INFINITY.
6269 */
6270 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
6271
6272 if (old_limit_mb) {
6273 *old_limit_mb = (int)(old >> 20);
6274 }
6275
6276 if (new_limit_mb == -1) {
6277 /*
6278 * Caller wishes to remove the limit.
6279 */
6280 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6281 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
6282 max_task_footprint ? max_task_footprint_warning_level : 0);
6283
6284 task_lock(task);
6285 task_set_memlimit_is_active(task, memlimit_is_active);
6286 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6287 task_unlock(task);
6288
6289 return KERN_SUCCESS;
6290 }
6291
6292 #ifdef CONFIG_NOMONITORS
6293 return KERN_SUCCESS;
6294 #endif /* CONFIG_NOMONITORS */
6295
6296 task_lock(task);
6297
6298 if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
6299 (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
6300 (((ledger_amount_t)new_limit_mb << 20) == old)) {
6301 /*
6302 * memlimit state is not changing
6303 */
6304 task_unlock(task);
6305 return KERN_SUCCESS;
6306 }
6307
6308 task_set_memlimit_is_active(task, memlimit_is_active);
6309 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6310
6311 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6312 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
6313
6314 if (task == current_task()) {
6315 ledger_check_new_balance(current_thread(), task->ledger,
6316 task_ledgers.phys_footprint);
6317 }
6318
6319 task_unlock(task);
6320
6321 return KERN_SUCCESS;
6322 }
6323
6324 kern_return_t
6325 task_get_phys_footprint_limit(
6326 task_t task,
6327 int *limit_mb)
6328 {
6329 ledger_amount_t limit;
6330 kern_return_t ret;
6331
6332 ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
6333 if (ret != KERN_SUCCESS) {
6334 return ret;
6335 }
6336
6337 /*
6338 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
6339 * result. There are, however, implicit assumptions that -1 mb limit
6340 * equates to LEDGER_LIMIT_INFINITY.
6341 */
6342 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
6343 *limit_mb = (int)(limit >> 20);
6344
6345 return KERN_SUCCESS;
6346 }
6347 #else /* CONFIG_MEMORYSTATUS */
6348 kern_return_t
6349 task_set_phys_footprint_limit(
6350 __unused task_t task,
6351 __unused int new_limit_mb,
6352 __unused int *old_limit_mb)
6353 {
6354 return KERN_FAILURE;
6355 }
6356
6357 kern_return_t
6358 task_get_phys_footprint_limit(
6359 __unused task_t task,
6360 __unused int *limit_mb)
6361 {
6362 return KERN_FAILURE;
6363 }
6364 #endif /* CONFIG_MEMORYSTATUS */
6365
6366 void
6367 task_set_thread_limit(task_t task, uint16_t thread_limit)
6368 {
6369 assert(task != kernel_task);
6370 if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
6371 task_lock(task);
6372 task->task_thread_limit = thread_limit;
6373 task_unlock(task);
6374 }
6375 }
6376
6377 /*
6378 * We need to export some functions to other components that
6379 * are currently implemented in macros within the osfmk
6380 * component. Just export them as functions of the same name.
6381 */
6382 boolean_t
6383 is_kerneltask(task_t t)
6384 {
6385 if (t == kernel_task) {
6386 return TRUE;
6387 }
6388
6389 return FALSE;
6390 }
6391
6392 boolean_t
6393 is_corpsetask(task_t t)
6394 {
6395 return task_is_a_corpse(t);
6396 }
6397
6398 #undef current_task
6399 task_t current_task(void);
6400 task_t
6401 current_task(void)
6402 {
6403 return current_task_fast();
6404 }
6405
6406 #undef task_reference
6407 void task_reference(task_t task);
6408 void
6409 task_reference(
6410 task_t task)
6411 {
6412 if (task != TASK_NULL) {
6413 task_reference_internal(task);
6414 }
6415 }
6416
6417 /* defined in bsd/kern/kern_prot.c */
6418 extern int get_audit_token_pid(audit_token_t *audit_token);
6419
6420 int
6421 task_pid(task_t task)
6422 {
6423 if (task) {
6424 return get_audit_token_pid(&task->audit_token);
6425 }
6426 return -1;
6427 }
6428
6429
6430 /*
6431 * This routine finds a thread in a task by its unique id
6432 * Returns a referenced thread or THREAD_NULL if the thread was not found
6433 *
6434 * TODO: This is super inefficient - it's an O(threads in task) list walk!
6435 * We should make a tid hash, or transition all tid clients to thread ports
6436 *
6437 * Precondition: No locks held (will take task lock)
6438 */
6439 thread_t
6440 task_findtid(task_t task, uint64_t tid)
6441 {
6442 thread_t self = current_thread();
6443 thread_t found_thread = THREAD_NULL;
6444 thread_t iter_thread = THREAD_NULL;
6445
6446 /* Short-circuit the lookup if we're looking up ourselves */
6447 if (tid == self->thread_id || tid == TID_NULL) {
6448 assert(self->task == task);
6449
6450 thread_reference(self);
6451
6452 return self;
6453 }
6454
6455 task_lock(task);
6456
6457 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
6458 if (iter_thread->thread_id == tid) {
6459 found_thread = iter_thread;
6460 thread_reference(found_thread);
6461 break;
6462 }
6463 }
6464
6465 task_unlock(task);
6466
6467 return found_thread;
6468 }
6469
6470 int
6471 pid_from_task(task_t task)
6472 {
6473 int pid = -1;
6474
6475 if (task->bsd_info) {
6476 pid = proc_pid(task->bsd_info);
6477 } else {
6478 pid = task_pid(task);
6479 }
6480
6481 return pid;
6482 }
6483
6484 /*
6485 * Control the CPU usage monitor for a task.
6486 */
6487 kern_return_t
6488 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
6489 {
6490 int error = KERN_SUCCESS;
6491
6492 if (*flags & CPUMON_MAKE_FATAL) {
6493 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
6494 } else {
6495 error = KERN_INVALID_ARGUMENT;
6496 }
6497
6498 return error;
6499 }
6500
6501 /*
6502 * Control the wakeups monitor for a task.
6503 */
6504 kern_return_t
6505 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
6506 {
6507 ledger_t ledger = task->ledger;
6508
6509 task_lock(task);
6510 if (*flags & WAKEMON_GET_PARAMS) {
6511 ledger_amount_t limit;
6512 uint64_t period;
6513
6514 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
6515 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
6516
6517 if (limit != LEDGER_LIMIT_INFINITY) {
6518 /*
6519 * An active limit means the wakeups monitor is enabled.
6520 */
6521 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
6522 *flags = WAKEMON_ENABLE;
6523 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
6524 *flags |= WAKEMON_MAKE_FATAL;
6525 }
6526 } else {
6527 *flags = WAKEMON_DISABLE;
6528 *rate_hz = -1;
6529 }
6530
6531 /*
6532 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
6533 */
6534 task_unlock(task);
6535 return KERN_SUCCESS;
6536 }
6537
6538 if (*flags & WAKEMON_ENABLE) {
6539 if (*flags & WAKEMON_SET_DEFAULTS) {
6540 *rate_hz = task_wakeups_monitor_rate;
6541 }
6542
6543 #ifndef CONFIG_NOMONITORS
6544 if (*flags & WAKEMON_MAKE_FATAL) {
6545 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6546 }
6547 #endif /* CONFIG_NOMONITORS */
6548
6549 if (*rate_hz <= 0) {
6550 task_unlock(task);
6551 return KERN_INVALID_ARGUMENT;
6552 }
6553
6554 #ifndef CONFIG_NOMONITORS
6555 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
6556 task_wakeups_monitor_ustackshots_trigger_pct);
6557 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
6558 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
6559 #endif /* CONFIG_NOMONITORS */
6560 } else if (*flags & WAKEMON_DISABLE) {
6561 /*
6562 * Caller wishes to disable wakeups monitor on the task.
6563 *
6564 * Disable telemetry if it was triggered by the wakeups monitor, and
6565 * remove the limit & callback on the wakeups ledger entry.
6566 */
6567 #if CONFIG_TELEMETRY
6568 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
6569 #endif
6570 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
6571 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
6572 }
6573
6574 task_unlock(task);
6575 return KERN_SUCCESS;
6576 }
6577
6578 void
6579 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6580 {
6581 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6582 #if CONFIG_TELEMETRY
6583 /*
6584 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
6585 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
6586 */
6587 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
6588 #endif
6589 return;
6590 }
6591
6592 #if CONFIG_TELEMETRY
6593 /*
6594 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
6595 * exceeded the limit, turn telemetry off for the task.
6596 */
6597 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
6598 #endif
6599
6600 if (warning == 0) {
6601 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
6602 }
6603 }
6604
6605 void __attribute__((noinline))
6606 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
6607 {
6608 task_t task = current_task();
6609 int pid = 0;
6610 const char *procname = "unknown";
6611 boolean_t fatal;
6612 kern_return_t kr;
6613 #ifdef EXC_RESOURCE_MONITORS
6614 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
6615 #endif /* EXC_RESOURCE_MONITORS */
6616 struct ledger_entry_info lei;
6617
6618 #ifdef MACH_BSD
6619 pid = proc_selfpid();
6620 if (task->bsd_info != NULL) {
6621 procname = proc_name_address(current_task()->bsd_info);
6622 }
6623 #endif
6624
6625 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
6626
6627 /*
6628 * Disable the exception notification so we don't overwhelm
6629 * the listener with an endless stream of redundant exceptions.
6630 * TODO: detect whether another thread is already reporting the violation.
6631 */
6632 uint32_t flags = WAKEMON_DISABLE;
6633 task_wakeups_monitor_ctl(task, &flags, NULL);
6634
6635 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6636 trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
6637 os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
6638 "over ~%llu seconds, averaging %llu wakes / second and "
6639 "violating a %slimit of %llu wakes over %llu seconds.\n",
6640 procname, pid,
6641 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
6642 lei.lei_last_refill == 0 ? 0 :
6643 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
6644 fatal ? "FATAL " : "",
6645 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
6646
6647 kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
6648 fatal ? kRNFatalLimitFlag : 0);
6649 if (kr) {
6650 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
6651 }
6652
6653 #ifdef EXC_RESOURCE_MONITORS
6654 if (disable_exc_resource) {
6655 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6656 "supressed by a boot-arg\n", procname, pid);
6657 return;
6658 }
6659 if (audio_active) {
6660 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6661 "supressed due to audio playback\n", procname, pid);
6662 return;
6663 }
6664 if (lei.lei_last_refill == 0) {
6665 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6666 "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
6667 }
6668
6669 code[0] = code[1] = 0;
6670 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
6671 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
6672 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
6673 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
6674 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
6675 lei.lei_last_refill);
6676 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
6677 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
6678 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6679 #endif /* EXC_RESOURCE_MONITORS */
6680
6681 if (fatal) {
6682 task_terminate_internal(task);
6683 }
6684 }
6685
6686 static boolean_t
6687 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
6688 {
6689 int64_t old_count, new_count;
6690 boolean_t needs_telemetry;
6691
6692 do {
6693 new_count = old_count = *global_write_count;
6694 new_count += io_delta;
6695 if (new_count >= io_telemetry_limit) {
6696 new_count = 0;
6697 needs_telemetry = TRUE;
6698 } else {
6699 needs_telemetry = FALSE;
6700 }
6701 } while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
6702 return needs_telemetry;
6703 }
6704
6705 void
6706 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
6707 {
6708 int64_t io_delta = 0;
6709 int64_t * global_counter_to_update;
6710 boolean_t needs_telemetry = FALSE;
6711 int ledger_to_update = 0;
6712 struct task_writes_counters * writes_counters_to_update;
6713
6714 if ((!task) || (!io_size) || (!vp)) {
6715 return;
6716 }
6717
6718 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
6719 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
6720 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
6721
6722 // Is the drive backing this vnode internal or external to the system?
6723 if (vnode_isonexternalstorage(vp) == false) {
6724 global_counter_to_update = &global_logical_writes_count;
6725 ledger_to_update = task_ledgers.logical_writes;
6726 writes_counters_to_update = &task->task_writes_counters_internal;
6727 } else {
6728 global_counter_to_update = &global_logical_writes_to_external_count;
6729 ledger_to_update = task_ledgers.logical_writes_to_external;
6730 writes_counters_to_update = &task->task_writes_counters_external;
6731 }
6732
6733 switch (flags) {
6734 case TASK_WRITE_IMMEDIATE:
6735 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
6736 ledger_credit(task->ledger, ledger_to_update, io_size);
6737 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6738 break;
6739 case TASK_WRITE_DEFERRED:
6740 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
6741 ledger_credit(task->ledger, ledger_to_update, io_size);
6742 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6743 break;
6744 case TASK_WRITE_INVALIDATED:
6745 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
6746 ledger_debit(task->ledger, ledger_to_update, io_size);
6747 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
6748 break;
6749 case TASK_WRITE_METADATA:
6750 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
6751 ledger_credit(task->ledger, ledger_to_update, io_size);
6752 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6753 break;
6754 }
6755
6756 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
6757 if (io_telemetry_limit != 0) {
6758 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
6759 needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
6760 if (needs_telemetry) {
6761 act_set_io_telemetry_ast(current_thread());
6762 }
6763 }
6764 }
6765
6766 /*
6767 * Control the I/O monitor for a task.
6768 */
6769 kern_return_t
6770 task_io_monitor_ctl(task_t task, uint32_t *flags)
6771 {
6772 ledger_t ledger = task->ledger;
6773
6774 task_lock(task);
6775 if (*flags & IOMON_ENABLE) {
6776 /* Configure the physical I/O ledger */
6777 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
6778 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
6779 } else if (*flags & IOMON_DISABLE) {
6780 /*
6781 * Caller wishes to disable I/O monitor on the task.
6782 */
6783 ledger_disable_refill(ledger, task_ledgers.physical_writes);
6784 ledger_disable_callback(ledger, task_ledgers.physical_writes);
6785 }
6786
6787 task_unlock(task);
6788 return KERN_SUCCESS;
6789 }
6790
6791 void
6792 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
6793 {
6794 if (warning == 0) {
6795 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
6796 }
6797 }
6798
6799 void __attribute__((noinline))
6800 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
6801 {
6802 int pid = 0;
6803 task_t task = current_task();
6804 #ifdef EXC_RESOURCE_MONITORS
6805 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
6806 #endif /* EXC_RESOURCE_MONITORS */
6807 struct ledger_entry_info lei;
6808 kern_return_t kr;
6809
6810 #ifdef MACH_BSD
6811 pid = proc_selfpid();
6812 #endif
6813 /*
6814 * Get the ledger entry info. We need to do this before disabling the exception
6815 * to get correct values for all fields.
6816 */
6817 switch (flavor) {
6818 case FLAVOR_IO_PHYSICAL_WRITES:
6819 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
6820 break;
6821 }
6822
6823
6824 /*
6825 * Disable the exception notification so we don't overwhelm
6826 * the listener with an endless stream of redundant exceptions.
6827 * TODO: detect whether another thread is already reporting the violation.
6828 */
6829 uint32_t flags = IOMON_DISABLE;
6830 task_io_monitor_ctl(task, &flags);
6831
6832 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
6833 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
6834 }
6835 os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
6836 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
6837
6838 kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
6839 if (kr) {
6840 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
6841 }
6842
6843 #ifdef EXC_RESOURCE_MONITORS
6844 code[0] = code[1] = 0;
6845 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
6846 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
6847 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
6848 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
6849 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
6850 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6851 #endif /* EXC_RESOURCE_MONITORS */
6852 }
6853
6854 /* Placeholders for the task set/get voucher interfaces */
6855 kern_return_t
6856 task_get_mach_voucher(
6857 task_t task,
6858 mach_voucher_selector_t __unused which,
6859 ipc_voucher_t *voucher)
6860 {
6861 if (TASK_NULL == task) {
6862 return KERN_INVALID_TASK;
6863 }
6864
6865 *voucher = NULL;
6866 return KERN_SUCCESS;
6867 }
6868
6869 kern_return_t
6870 task_set_mach_voucher(
6871 task_t task,
6872 ipc_voucher_t __unused voucher)
6873 {
6874 if (TASK_NULL == task) {
6875 return KERN_INVALID_TASK;
6876 }
6877
6878 return KERN_SUCCESS;
6879 }
6880
6881 kern_return_t
6882 task_swap_mach_voucher(
6883 __unused task_t task,
6884 __unused ipc_voucher_t new_voucher,
6885 ipc_voucher_t *in_out_old_voucher)
6886 {
6887 /*
6888 * Currently this function is only called from a MIG generated
6889 * routine which doesn't release the reference on the voucher
6890 * addressed by in_out_old_voucher. To avoid leaking this reference,
6891 * a call to release it has been added here.
6892 */
6893 ipc_voucher_release(*in_out_old_voucher);
6894 return KERN_NOT_SUPPORTED;
6895 }
6896
6897 void
6898 task_set_gpu_denied(task_t task, boolean_t denied)
6899 {
6900 task_lock(task);
6901
6902 if (denied) {
6903 task->t_flags |= TF_GPU_DENIED;
6904 } else {
6905 task->t_flags &= ~TF_GPU_DENIED;
6906 }
6907
6908 task_unlock(task);
6909 }
6910
6911 boolean_t
6912 task_is_gpu_denied(task_t task)
6913 {
6914 /* We don't need the lock to read this flag */
6915 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
6916 }
6917
6918
6919 uint64_t
6920 get_task_memory_region_count(task_t task)
6921 {
6922 vm_map_t map;
6923 map = (task == kernel_task) ? kernel_map: task->map;
6924 return (uint64_t)get_map_nentries(map);
6925 }
6926
6927 static void
6928 kdebug_trace_dyld_internal(uint32_t base_code,
6929 struct dyld_kernel_image_info *info)
6930 {
6931 static_assert(sizeof(info->uuid) >= 16);
6932
6933 #if defined(__LP64__)
6934 uint64_t *uuid = (uint64_t *)&(info->uuid);
6935
6936 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6937 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
6938 uuid[1], info->load_addr,
6939 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
6940 0);
6941 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6942 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
6943 (uint64_t)info->fsobjid.fid_objno |
6944 ((uint64_t)info->fsobjid.fid_generation << 32),
6945 0, 0, 0, 0);
6946 #else /* defined(__LP64__) */
6947 uint32_t *uuid = (uint32_t *)&(info->uuid);
6948
6949 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6950 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
6951 uuid[1], uuid[2], uuid[3], 0);
6952 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6953 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
6954 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
6955 info->fsobjid.fid_objno, 0);
6956 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6957 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
6958 info->fsobjid.fid_generation, 0, 0, 0, 0);
6959 #endif /* !defined(__LP64__) */
6960 }
6961
6962 static kern_return_t
6963 kdebug_trace_dyld(task_t task, uint32_t base_code,
6964 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
6965 {
6966 kern_return_t kr;
6967 dyld_kernel_image_info_array_t infos;
6968 vm_map_offset_t map_data;
6969 vm_offset_t data;
6970
6971 if (!infos_copy) {
6972 return KERN_INVALID_ADDRESS;
6973 }
6974
6975 if (!kdebug_enable ||
6976 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
6977 vm_map_copy_discard(infos_copy);
6978 return KERN_SUCCESS;
6979 }
6980
6981 if (task == NULL || task != current_task()) {
6982 return KERN_INVALID_TASK;
6983 }
6984
6985 kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
6986 if (kr != KERN_SUCCESS) {
6987 return kr;
6988 }
6989
6990 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
6991
6992 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
6993 kdebug_trace_dyld_internal(base_code, &(infos[i]));
6994 }
6995
6996 data = CAST_DOWN(vm_offset_t, map_data);
6997 mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
6998 return KERN_SUCCESS;
6999 }
7000
7001 kern_return_t
7002 task_register_dyld_image_infos(task_t task,
7003 dyld_kernel_image_info_array_t infos_copy,
7004 mach_msg_type_number_t infos_len)
7005 {
7006 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
7007 (vm_map_copy_t)infos_copy, infos_len);
7008 }
7009
7010 kern_return_t
7011 task_unregister_dyld_image_infos(task_t task,
7012 dyld_kernel_image_info_array_t infos_copy,
7013 mach_msg_type_number_t infos_len)
7014 {
7015 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
7016 (vm_map_copy_t)infos_copy, infos_len);
7017 }
7018
7019 kern_return_t
7020 task_get_dyld_image_infos(__unused task_t task,
7021 __unused dyld_kernel_image_info_array_t * dyld_images,
7022 __unused mach_msg_type_number_t * dyld_imagesCnt)
7023 {
7024 return KERN_NOT_SUPPORTED;
7025 }
7026
7027 kern_return_t
7028 task_register_dyld_shared_cache_image_info(task_t task,
7029 dyld_kernel_image_info_t cache_img,
7030 __unused boolean_t no_cache,
7031 __unused boolean_t private_cache)
7032 {
7033 if (task == NULL || task != current_task()) {
7034 return KERN_INVALID_TASK;
7035 }
7036
7037 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
7038 return KERN_SUCCESS;
7039 }
7040
7041 kern_return_t
7042 task_register_dyld_set_dyld_state(__unused task_t task,
7043 __unused uint8_t dyld_state)
7044 {
7045 return KERN_NOT_SUPPORTED;
7046 }
7047
7048 kern_return_t
7049 task_register_dyld_get_process_state(__unused task_t task,
7050 __unused dyld_kernel_process_info_t * dyld_process_state)
7051 {
7052 return KERN_NOT_SUPPORTED;
7053 }
7054
7055 kern_return_t
7056 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
7057 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
7058 {
7059 #if MONOTONIC
7060 task_t task = (task_t)task_insp;
7061 kern_return_t kr = KERN_SUCCESS;
7062 mach_msg_type_number_t size;
7063
7064 if (task == TASK_NULL) {
7065 return KERN_INVALID_ARGUMENT;
7066 }
7067
7068 size = *size_in_out;
7069
7070 switch (flavor) {
7071 case TASK_INSPECT_BASIC_COUNTS: {
7072 struct task_inspect_basic_counts *bc;
7073 uint64_t task_counts[MT_CORE_NFIXED] = { 0 };
7074
7075 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
7076 kr = KERN_INVALID_ARGUMENT;
7077 break;
7078 }
7079
7080 mt_fixed_task_counts(task, task_counts);
7081 bc = (struct task_inspect_basic_counts *)info_out;
7082 #ifdef MT_CORE_INSTRS
7083 bc->instructions = task_counts[MT_CORE_INSTRS];
7084 #else /* defined(MT_CORE_INSTRS) */
7085 bc->instructions = 0;
7086 #endif /* !defined(MT_CORE_INSTRS) */
7087 bc->cycles = task_counts[MT_CORE_CYCLES];
7088 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
7089 break;
7090 }
7091 default:
7092 kr = KERN_INVALID_ARGUMENT;
7093 break;
7094 }
7095
7096 if (kr == KERN_SUCCESS) {
7097 *size_in_out = size;
7098 }
7099 return kr;
7100 #else /* MONOTONIC */
7101 #pragma unused(task_insp, flavor, info_out, size_in_out)
7102 return KERN_NOT_SUPPORTED;
7103 #endif /* !MONOTONIC */
7104 }
7105
7106 #if CONFIG_SECLUDED_MEMORY
7107 int num_tasks_can_use_secluded_mem = 0;
7108
7109 void
7110 task_set_can_use_secluded_mem(
7111 task_t task,
7112 boolean_t can_use_secluded_mem)
7113 {
7114 if (!task->task_could_use_secluded_mem) {
7115 return;
7116 }
7117 task_lock(task);
7118 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
7119 task_unlock(task);
7120 }
7121
7122 void
7123 task_set_can_use_secluded_mem_locked(
7124 task_t task,
7125 boolean_t can_use_secluded_mem)
7126 {
7127 assert(task->task_could_use_secluded_mem);
7128 if (can_use_secluded_mem &&
7129 secluded_for_apps && /* global boot-arg */
7130 !task->task_can_use_secluded_mem) {
7131 assert(num_tasks_can_use_secluded_mem >= 0);
7132 OSAddAtomic(+1,
7133 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7134 task->task_can_use_secluded_mem = TRUE;
7135 } else if (!can_use_secluded_mem &&
7136 task->task_can_use_secluded_mem) {
7137 assert(num_tasks_can_use_secluded_mem > 0);
7138 OSAddAtomic(-1,
7139 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7140 task->task_can_use_secluded_mem = FALSE;
7141 }
7142 }
7143
7144 void
7145 task_set_could_use_secluded_mem(
7146 task_t task,
7147 boolean_t could_use_secluded_mem)
7148 {
7149 task->task_could_use_secluded_mem = could_use_secluded_mem;
7150 }
7151
7152 void
7153 task_set_could_also_use_secluded_mem(
7154 task_t task,
7155 boolean_t could_also_use_secluded_mem)
7156 {
7157 task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
7158 }
7159
7160 boolean_t
7161 task_can_use_secluded_mem(
7162 task_t task,
7163 boolean_t is_alloc)
7164 {
7165 if (task->task_can_use_secluded_mem) {
7166 assert(task->task_could_use_secluded_mem);
7167 assert(num_tasks_can_use_secluded_mem > 0);
7168 return TRUE;
7169 }
7170 if (task->task_could_also_use_secluded_mem &&
7171 num_tasks_can_use_secluded_mem > 0) {
7172 assert(num_tasks_can_use_secluded_mem > 0);
7173 return TRUE;
7174 }
7175
7176 /*
7177 * If a single task is using more than some amount of
7178 * memory, allow it to dip into secluded and also begin
7179 * suppression of secluded memory until the tasks exits.
7180 */
7181 if (is_alloc && secluded_shutoff_trigger != 0) {
7182 uint64_t phys_used = get_task_phys_footprint(task);
7183 if (phys_used > secluded_shutoff_trigger) {
7184 start_secluded_suppression(task);
7185 return TRUE;
7186 }
7187 }
7188
7189 return FALSE;
7190 }
7191
7192 boolean_t
7193 task_could_use_secluded_mem(
7194 task_t task)
7195 {
7196 return task->task_could_use_secluded_mem;
7197 }
7198
7199 boolean_t
7200 task_could_also_use_secluded_mem(
7201 task_t task)
7202 {
7203 return task->task_could_also_use_secluded_mem;
7204 }
7205 #endif /* CONFIG_SECLUDED_MEMORY */
7206
7207 queue_head_t *
7208 task_io_user_clients(task_t task)
7209 {
7210 return &task->io_user_clients;
7211 }
7212
7213 void
7214 task_set_message_app_suspended(task_t task, boolean_t enable)
7215 {
7216 task->message_app_suspended = enable;
7217 }
7218
7219 void
7220 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
7221 {
7222 dst_task->vtimers = src_task->vtimers;
7223 }
7224
7225 #if DEVELOPMENT || DEBUG
7226 int vm_region_footprint = 0;
7227 #endif /* DEVELOPMENT || DEBUG */
7228
7229 boolean_t
7230 task_self_region_footprint(void)
7231 {
7232 #if DEVELOPMENT || DEBUG
7233 if (vm_region_footprint) {
7234 /* system-wide override */
7235 return TRUE;
7236 }
7237 #endif /* DEVELOPMENT || DEBUG */
7238 return current_task()->task_region_footprint;
7239 }
7240
7241 void
7242 task_self_region_footprint_set(
7243 boolean_t newval)
7244 {
7245 task_t curtask;
7246
7247 curtask = current_task();
7248 task_lock(curtask);
7249 if (newval) {
7250 curtask->task_region_footprint = TRUE;
7251 } else {
7252 curtask->task_region_footprint = FALSE;
7253 }
7254 task_unlock(curtask);
7255 }
7256
7257 void
7258 task_set_darkwake_mode(task_t task, boolean_t set_mode)
7259 {
7260 assert(task);
7261
7262 task_lock(task);
7263
7264 if (set_mode) {
7265 task->t_flags |= TF_DARKWAKE_MODE;
7266 } else {
7267 task->t_flags &= ~(TF_DARKWAKE_MODE);
7268 }
7269
7270 task_unlock(task);
7271 }
7272
7273 boolean_t
7274 task_get_darkwake_mode(task_t task)
7275 {
7276 assert(task);
7277 return (task->t_flags & TF_DARKWAKE_MODE) != 0;
7278 }
7279
7280 kern_return_t
7281 task_get_exc_guard_behavior(
7282 task_t task,
7283 task_exc_guard_behavior_t *behaviorp)
7284 {
7285 if (task == TASK_NULL) {
7286 return KERN_INVALID_TASK;
7287 }
7288 *behaviorp = task->task_exc_guard;
7289 return KERN_SUCCESS;
7290 }
7291
7292 #ifndef TASK_EXC_GUARD_ALL
7293 /* Temporary define until two branches are merged */
7294 #define TASK_EXC_GUARD_ALL (TASK_EXC_GUARD_VM_ALL | 0xf0)
7295 #endif
7296
7297 kern_return_t
7298 task_set_exc_guard_behavior(
7299 task_t task,
7300 task_exc_guard_behavior_t behavior)
7301 {
7302 if (task == TASK_NULL) {
7303 return KERN_INVALID_TASK;
7304 }
7305 if (behavior & ~TASK_EXC_GUARD_ALL) {
7306 return KERN_INVALID_VALUE;
7307 }
7308 task->task_exc_guard = behavior;
7309 return KERN_SUCCESS;
7310 }
7311
7312 #if __arm64__
7313 extern int legacy_footprint_entitlement_mode;
7314 extern void memorystatus_act_on_legacy_footprint_entitlement(proc_t, boolean_t);
7315
7316 void
7317 task_set_legacy_footprint(
7318 task_t task)
7319 {
7320 task_lock(task);
7321 task->task_legacy_footprint = TRUE;
7322 task_unlock(task);
7323 }
7324
7325 void
7326 task_set_extra_footprint_limit(
7327 task_t task)
7328 {
7329 if (task->task_extra_footprint_limit) {
7330 return;
7331 }
7332 task_lock(task);
7333 if (!task->task_extra_footprint_limit) {
7334 memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
7335 task->task_extra_footprint_limit = TRUE;
7336 }
7337 task_unlock(task);
7338 }
7339 #endif /* __arm64__ */
7340
7341 static inline ledger_amount_t
7342 task_ledger_get_balance(
7343 ledger_t ledger,
7344 int ledger_idx)
7345 {
7346 ledger_amount_t amount;
7347 amount = 0;
7348 ledger_get_balance(ledger, ledger_idx, &amount);
7349 return amount;
7350 }
7351
7352 /*
7353 * Gather the amount of memory counted in a task's footprint due to
7354 * being in a specific set of ledgers.
7355 */
7356 void
7357 task_ledgers_footprint(
7358 ledger_t ledger,
7359 ledger_amount_t *ledger_resident,
7360 ledger_amount_t *ledger_compressed)
7361 {
7362 *ledger_resident = 0;
7363 *ledger_compressed = 0;
7364
7365 /* purgeable non-volatile memory */
7366 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
7367 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
7368
7369 /* "default" tagged memory */
7370 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
7371 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
7372
7373 /* "network" currently never counts in the footprint... */
7374
7375 /* "media" tagged memory */
7376 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
7377 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
7378
7379 /* "graphics" tagged memory */
7380 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
7381 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
7382
7383 /* "neural" tagged memory */
7384 *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
7385 *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
7386 }
7387
7388 void
7389 task_set_memory_ownership_transfer(
7390 task_t task,
7391 boolean_t value)
7392 {
7393 task_lock(task);
7394 task->task_can_transfer_memory_ownership = value;
7395 task_unlock(task);
7396 }
7397
7398 void
7399 task_copy_vmobjects(task_t task, vm_object_query_t query, int len, int64_t* num)
7400 {
7401 vm_object_t find_vmo;
7402 int64_t size = 0;
7403
7404 task_objq_lock(task);
7405 if (query != NULL) {
7406 queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
7407 {
7408 int byte_size;
7409 vm_object_query_t p = &query[size++];
7410
7411 p->object_id = (vm_object_id_t) VM_KERNEL_ADDRPERM(find_vmo);
7412 p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
7413 p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
7414 p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
7415 p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
7416 p->vo_no_footprint = find_vmo->vo_no_footprint;
7417 p->vo_ledger_tag = find_vmo->vo_ledger_tag;
7418 p->purgable = find_vmo->purgable;
7419
7420 if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
7421 p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
7422 } else {
7423 p->compressed_size = 0;
7424 }
7425
7426 /* make sure to not overrun */
7427 byte_size = (int) size * sizeof(vm_object_query_data_t);
7428 if ((int)(byte_size + sizeof(vm_object_query_data_t)) > len) {
7429 break;
7430 }
7431 }
7432 } else {
7433 size = task->task_owned_objects;
7434 }
7435 task_objq_unlock(task);
7436
7437 *num = size;
7438 }