]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task.c
4be2588be6217732cbc6541dd8b8dbfa78301cec
[apple/xnu.git] / osfmk / kern / task.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_special_ports.h>
98 #include <mach/sdt.h>
99
100 #include <ipc/ipc_importance.h>
101 #include <ipc/ipc_types.h>
102 #include <ipc/ipc_space.h>
103 #include <ipc/ipc_entry.h>
104 #include <ipc/ipc_hash.h>
105
106 #include <kern/kern_types.h>
107 #include <kern/mach_param.h>
108 #include <kern/misc_protos.h>
109 #include <kern/task.h>
110 #include <kern/thread.h>
111 #include <kern/coalition.h>
112 #include <kern/zalloc.h>
113 #include <kern/kalloc.h>
114 #include <kern/kern_cdata.h>
115 #include <kern/processor.h>
116 #include <kern/sched_prim.h> /* for thread_wakeup */
117 #include <kern/ipc_tt.h>
118 #include <kern/host.h>
119 #include <kern/clock.h>
120 #include <kern/timer.h>
121 #include <kern/assert.h>
122 #include <kern/sync_lock.h>
123 #include <kern/affinity.h>
124 #include <kern/exc_resource.h>
125 #include <kern/machine.h>
126 #include <kern/policy_internal.h>
127
128 #include <corpses/task_corpse.h>
129 #if CONFIG_TELEMETRY
130 #include <kern/telemetry.h>
131 #endif
132
133 #include <vm/pmap.h>
134 #include <vm/vm_map.h>
135 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
136 #include <vm/vm_pageout.h>
137 #include <vm/vm_protos.h>
138 #include <vm/vm_purgeable_internal.h>
139
140 #include <sys/resource.h>
141 #include <sys/signalvar.h> /* for coredump */
142
143 /*
144 * Exported interfaces
145 */
146
147 #include <mach/task_server.h>
148 #include <mach/mach_host_server.h>
149 #include <mach/host_security_server.h>
150 #include <mach/mach_port_server.h>
151
152 #include <vm/vm_shared_region.h>
153
154 #include <libkern/OSDebug.h>
155 #include <libkern/OSAtomic.h>
156
157 #if CONFIG_ATM
158 #include <atm/atm_internal.h>
159 #endif
160
161 #include <kern/sfi.h> /* picks up ledger.h */
162
163 #if CONFIG_MACF
164 #include <security/mac_mach_internal.h>
165 #endif
166
167 #if KPERF
168 extern int kpc_force_all_ctrs(task_t, int);
169 #endif
170
171 task_t kernel_task;
172 zone_t task_zone;
173 lck_attr_t task_lck_attr;
174 lck_grp_t task_lck_grp;
175 lck_grp_attr_t task_lck_grp_attr;
176
177 extern int exc_via_corpse_forking;
178 extern int unify_corpse_blob_alloc;
179 extern int corpse_for_fatal_memkill;
180
181 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
182 int audio_active = 0;
183
184 zinfo_usage_store_t tasks_tkm_private;
185 zinfo_usage_store_t tasks_tkm_shared;
186
187 /* A container to accumulate statistics for expired tasks */
188 expired_task_statistics_t dead_task_statistics;
189 lck_spin_t dead_task_statistics_lock;
190
191 ledger_template_t task_ledger_template = NULL;
192
193 struct _task_ledger_indices task_ledgers __attribute__((used)) =
194 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
195 { 0 /* initialized at runtime */},
196 #ifdef CONFIG_BANK
197 -1, -1,
198 #endif
199 -1, -1,
200 };
201
202 /* System sleep state */
203 boolean_t tasks_suspend_state;
204
205
206 void init_task_ledgers(void);
207 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
208 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
209 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
210 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
211 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
212 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
213
214 kern_return_t task_suspend_internal(task_t);
215 kern_return_t task_resume_internal(task_t);
216 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
217 int proc_list_uptrs(void *p, uint64_t *udata_buffer, int size);
218
219 extern kern_return_t iokit_task_terminate(task_t task);
220
221 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
222 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
223 extern kern_return_t thread_resume(thread_t thread);
224
225 // Warn tasks when they hit 80% of their memory limit.
226 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
227
228 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
229 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
230
231 /*
232 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
233 *
234 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
235 * stacktraces, aka micro-stackshots)
236 */
237 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
238
239 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
240 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
241
242 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
243
244 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
245
246 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
247 int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
248 int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
249
250 /* I/O Monitor Limits */
251 #define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
252 #define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
253
254 uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
255 uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
256
257 #define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
258 int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
259 int64_t global_logical_writes_count = 0; /* Global count for logical writes */
260 static boolean_t global_update_logical_writes(int64_t);
261
262 #if MACH_ASSERT
263 int pmap_ledgers_panic = 1;
264 #endif /* MACH_ASSERT */
265
266 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
267
268 #if CONFIG_COREDUMP
269 int hwm_user_cores = 0; /* high watermark violations generate user core files */
270 #endif
271
272 #ifdef MACH_BSD
273 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
274 extern int proc_pid(struct proc *p);
275 extern int proc_selfpid(void);
276 extern char *proc_name_address(struct proc *p);
277 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
278
279 #if CONFIG_MEMORYSTATUS
280 extern void proc_memstat_terminated(struct proc* p, boolean_t set);
281 extern boolean_t memorystatus_turnoff_exception_and_get_fatalness(boolean_t warning, const int max_footprint_mb);
282 extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t is_fatal);
283 #endif /* CONFIG_MEMORYSTATUS */
284
285 #endif /* MACH_BSD */
286
287 /* Forwards */
288
289 static void task_hold_locked(task_t task);
290 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
291 static void task_release_locked(task_t task);
292
293 static void task_synchronizer_destroy_all(task_t task);
294
295 void
296 task_backing_store_privileged(
297 task_t task)
298 {
299 task_lock(task);
300 task->priv_flags |= VM_BACKING_STORE_PRIV;
301 task_unlock(task);
302 return;
303 }
304
305
306 void
307 task_set_64bit(
308 task_t task,
309 boolean_t is64bit)
310 {
311 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
312 thread_t thread;
313 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
314
315 task_lock(task);
316
317 if (is64bit) {
318 if (task_has_64BitAddr(task))
319 goto out;
320 task_set_64BitAddr(task);
321 } else {
322 if ( !task_has_64BitAddr(task))
323 goto out;
324 task_clear_64BitAddr(task);
325 }
326 /* FIXME: On x86, the thread save state flavor can diverge from the
327 * task's 64-bit feature flag due to the 32-bit/64-bit register save
328 * state dichotomy. Since we can be pre-empted in this interval,
329 * certain routines may observe the thread as being in an inconsistent
330 * state with respect to its task's 64-bitness.
331 */
332
333 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
334 queue_iterate(&task->threads, thread, thread_t, task_threads) {
335 thread_mtx_lock(thread);
336 machine_thread_switch_addrmode(thread);
337 thread_mtx_unlock(thread);
338
339 if (thread == current_thread()) {
340 uint64_t arg1, arg2;
341 int urgency;
342 spl_t spl = splsched();
343 /*
344 * This call tell that the current thread changed it's 32bitness.
345 * Other thread were no more on core when 32bitness was changed,
346 * but current_thread() is on core and the previous call to
347 * machine_thread_going_on_core() gave 32bitness which is now wrong.
348 *
349 * This is needed for bring-up, a different callback should be used
350 * in the future.
351 */
352 thread_lock(thread);
353 urgency = thread_get_urgency(thread, &arg1, &arg2);
354 machine_thread_going_on_core(thread, urgency, 0);
355 thread_unlock(thread);
356 splx(spl);
357 }
358 }
359 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
360
361 out:
362 task_unlock(task);
363 }
364
365
366 void
367 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
368 {
369 task_lock(task);
370 task->all_image_info_addr = addr;
371 task->all_image_info_size = size;
372 task_unlock(task);
373 }
374
375 void
376 task_atm_reset(__unused task_t task) {
377
378 #if CONFIG_ATM
379 if (task->atm_context != NULL) {
380 atm_task_descriptor_destroy(task->atm_context);
381 task->atm_context = NULL;
382 }
383 #endif
384
385 }
386
387 void
388 task_bank_reset(__unused task_t task) {
389
390 #if CONFIG_BANK
391 if (task->bank_context != NULL) {
392 bank_task_destroy(task);
393 }
394 #endif
395
396 }
397
398 /*
399 * NOTE: This should only be called when the P_LINTRANSIT
400 * flag is set (the proc_trans lock is held) on the
401 * proc associated with the task.
402 */
403 void
404 task_bank_init(__unused task_t task) {
405
406 #if CONFIG_BANK
407 if (task->bank_context != NULL) {
408 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
409 }
410 bank_task_initialize(task);
411 #endif
412
413 }
414
415 void
416 task_set_did_exec_flag(task_t task)
417 {
418 task->t_procflags |= TPF_DID_EXEC;
419 }
420
421 void
422 task_clear_exec_copy_flag(task_t task)
423 {
424 task->t_procflags &= ~TPF_EXEC_COPY;
425 }
426
427 /*
428 * This wait event is t_procflags instead of t_flags because t_flags is volatile
429 *
430 * TODO: store the flags in the same place as the event
431 * rdar://problem/28501994
432 */
433 event_t
434 task_get_return_wait_event(task_t task)
435 {
436 return (event_t)&task->t_procflags;
437 }
438
439 void
440 task_clear_return_wait(task_t task)
441 {
442 task_lock(task);
443
444 task->t_flags &= ~TF_LRETURNWAIT;
445
446 if (task->t_flags & TF_LRETURNWAITER) {
447 thread_wakeup(task_get_return_wait_event(task));
448 task->t_flags &= ~TF_LRETURNWAITER;
449 }
450
451 task_unlock(task);
452 }
453
454 void
455 task_wait_to_return(void)
456 {
457 task_t task;
458
459 task = current_task();
460 task_lock(task);
461
462 if (task->t_flags & TF_LRETURNWAIT) {
463 do {
464 task->t_flags |= TF_LRETURNWAITER;
465 assert_wait(task_get_return_wait_event(task), THREAD_UNINT);
466 task_unlock(task);
467
468 thread_block(THREAD_CONTINUE_NULL);
469
470 task_lock(task);
471 } while (task->t_flags & TF_LRETURNWAIT);
472 }
473
474 task_unlock(task);
475
476 thread_bootstrap_return();
477 }
478
479 boolean_t
480 task_is_exec_copy(task_t task)
481 {
482 return task_is_exec_copy_internal(task);
483 }
484
485 boolean_t
486 task_did_exec(task_t task)
487 {
488 return task_did_exec_internal(task);
489 }
490
491 boolean_t
492 task_is_active(task_t task)
493 {
494 return task->active;
495 }
496
497 #if TASK_REFERENCE_LEAK_DEBUG
498 #include <kern/btlog.h>
499
500 static btlog_t *task_ref_btlog;
501 #define TASK_REF_OP_INCR 0x1
502 #define TASK_REF_OP_DECR 0x2
503
504 #define TASK_REF_NUM_RECORDS 100000
505 #define TASK_REF_BTDEPTH 7
506
507 void
508 task_reference_internal(task_t task)
509 {
510 void * bt[TASK_REF_BTDEPTH];
511 int numsaved = 0;
512
513 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
514
515 (void)hw_atomic_add(&(task)->ref_count, 1);
516 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
517 bt, numsaved);
518 }
519
520 uint32_t
521 task_deallocate_internal(task_t task)
522 {
523 void * bt[TASK_REF_BTDEPTH];
524 int numsaved = 0;
525
526 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
527
528 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
529 bt, numsaved);
530 return hw_atomic_sub(&(task)->ref_count, 1);
531 }
532
533 #endif /* TASK_REFERENCE_LEAK_DEBUG */
534
535 void
536 task_init(void)
537 {
538
539 lck_grp_attr_setdefault(&task_lck_grp_attr);
540 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
541 lck_attr_setdefault(&task_lck_attr);
542 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
543 lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
544
545 task_zone = zinit(
546 sizeof(struct task),
547 task_max * sizeof(struct task),
548 TASK_CHUNK * sizeof(struct task),
549 "tasks");
550
551 zone_change(task_zone, Z_NOENCRYPT, TRUE);
552
553
554 /*
555 * Configure per-task memory limit.
556 * The boot-arg is interpreted as Megabytes,
557 * and takes precedence over the device tree.
558 * Setting the boot-arg to 0 disables task limits.
559 */
560 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
561 sizeof (max_task_footprint_mb))) {
562 /*
563 * No limit was found in boot-args, so go look in the device tree.
564 */
565 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
566 sizeof(max_task_footprint_mb))) {
567 /*
568 * No limit was found in device tree.
569 */
570 max_task_footprint_mb = 0;
571 }
572 }
573
574 if (max_task_footprint_mb != 0) {
575 #if CONFIG_MEMORYSTATUS
576 if (max_task_footprint_mb < 50) {
577 printf("Warning: max_task_pmem %d below minimum.\n",
578 max_task_footprint_mb);
579 max_task_footprint_mb = 50;
580 }
581 printf("Limiting task physical memory footprint to %d MB\n",
582 max_task_footprint_mb);
583
584 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
585
586 /*
587 * Configure the per-task memory limit warning level.
588 * This is computed as a percentage.
589 */
590 max_task_footprint_warning_level = 0;
591
592 if (max_mem < 0x40000000) {
593 /*
594 * On devices with < 1GB of memory:
595 * -- set warnings to 50MB below the per-task limit.
596 */
597 if (max_task_footprint_mb > 50) {
598 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
599 }
600 } else {
601 /*
602 * On devices with >= 1GB of memory:
603 * -- set warnings to 100MB below the per-task limit.
604 */
605 if (max_task_footprint_mb > 100) {
606 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
607 }
608 }
609
610 /*
611 * Never allow warning level to land below the default.
612 */
613 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
614 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
615 }
616
617 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
618
619 #else
620 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
621 #endif /* CONFIG_MEMORYSTATUS */
622 }
623
624 #if MACH_ASSERT
625 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
626 sizeof (pmap_ledgers_panic));
627 #endif /* MACH_ASSERT */
628
629 #if CONFIG_COREDUMP
630 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
631 sizeof (hwm_user_cores))) {
632 hwm_user_cores = 0;
633 }
634 #endif
635
636 proc_init_cpumon_params();
637
638 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
639 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
640 }
641
642 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
643 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
644 }
645
646 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
647 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
648 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
649 }
650
651 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
652 sizeof (disable_exc_resource))) {
653 disable_exc_resource = 0;
654 }
655
656 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof (task_iomon_limit_mb))) {
657 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
658 }
659
660 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof (task_iomon_interval_secs))) {
661 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
662 }
663
664 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof (io_telemetry_limit))) {
665 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
666 }
667
668 /*
669 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
670 * sets up the ledgers for the default coalition. If we don't have coalitions,
671 * then we have to call it now.
672 */
673 #if CONFIG_COALITIONS
674 assert(task_ledger_template);
675 #else /* CONFIG_COALITIONS */
676 init_task_ledgers();
677 #endif /* CONFIG_COALITIONS */
678
679 #if TASK_REFERENCE_LEAK_DEBUG
680 task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
681 assert(task_ref_btlog);
682 #endif
683
684 /*
685 * Create the kernel task as the first task.
686 */
687 #ifdef __LP64__
688 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
689 #else
690 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
691 #endif
692 panic("task_init\n");
693
694 vm_map_deallocate(kernel_task->map);
695 kernel_task->map = kernel_map;
696 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
697 }
698
699 /*
700 * Create a task running in the kernel address space. It may
701 * have its own map of size mem_size and may have ipc privileges.
702 */
703 kern_return_t
704 kernel_task_create(
705 __unused task_t parent_task,
706 __unused vm_offset_t map_base,
707 __unused vm_size_t map_size,
708 __unused task_t *child_task)
709 {
710 return (KERN_INVALID_ARGUMENT);
711 }
712
713 kern_return_t
714 task_create(
715 task_t parent_task,
716 __unused ledger_port_array_t ledger_ports,
717 __unused mach_msg_type_number_t num_ledger_ports,
718 __unused boolean_t inherit_memory,
719 __unused task_t *child_task) /* OUT */
720 {
721 if (parent_task == TASK_NULL)
722 return(KERN_INVALID_ARGUMENT);
723
724 /*
725 * No longer supported: too many calls assume that a task has a valid
726 * process attached.
727 */
728 return(KERN_FAILURE);
729 }
730
731 kern_return_t
732 host_security_create_task_token(
733 host_security_t host_security,
734 task_t parent_task,
735 __unused security_token_t sec_token,
736 __unused audit_token_t audit_token,
737 __unused host_priv_t host_priv,
738 __unused ledger_port_array_t ledger_ports,
739 __unused mach_msg_type_number_t num_ledger_ports,
740 __unused boolean_t inherit_memory,
741 __unused task_t *child_task) /* OUT */
742 {
743 if (parent_task == TASK_NULL)
744 return(KERN_INVALID_ARGUMENT);
745
746 if (host_security == HOST_NULL)
747 return(KERN_INVALID_SECURITY);
748
749 /*
750 * No longer supported.
751 */
752 return(KERN_FAILURE);
753 }
754
755 /*
756 * Task ledgers
757 * ------------
758 *
759 * phys_footprint
760 * Physical footprint: This is the sum of:
761 * + (internal - alternate_accounting)
762 * + (internal_compressed - alternate_accounting_compressed)
763 * + iokit_mapped
764 * + purgeable_nonvolatile
765 * + purgeable_nonvolatile_compressed
766 * + page_table
767 *
768 * internal
769 * The task's anonymous memory, which on iOS is always resident.
770 *
771 * internal_compressed
772 * Amount of this task's internal memory which is held by the compressor.
773 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
774 * and could be either decompressed back into memory, or paged out to storage, depending
775 * on our implementation.
776 *
777 * iokit_mapped
778 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
779 clean/dirty or internal/external state].
780 *
781 * alternate_accounting
782 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
783 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
784 * double counting.
785 */
786 void
787 init_task_ledgers(void)
788 {
789 ledger_template_t t;
790
791 assert(task_ledger_template == NULL);
792 assert(kernel_task == TASK_NULL);
793
794 #if MACH_ASSERT
795 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
796 sizeof (pmap_ledgers_panic));
797 #endif /* MACH_ASSERT */
798
799 if ((t = ledger_template_create("Per-task ledger")) == NULL)
800 panic("couldn't create task ledger template");
801
802 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
803 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
804 "physmem", "bytes");
805 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
806 "bytes");
807 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
808 "bytes");
809 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
810 "bytes");
811 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
812 "bytes");
813 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
814 "bytes");
815 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
816 "bytes");
817 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
818 "bytes");
819 task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
820 "bytes");
821 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
822 "bytes");
823 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
824 "bytes");
825 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
826 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
827 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
828 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
829 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
830 "count");
831 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
832 "count");
833
834 #if CONFIG_SCHED_SFI
835 sfi_class_id_t class_id, ledger_alias;
836 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
837 task_ledgers.sfi_wait_times[class_id] = -1;
838 }
839
840 /* don't account for UNSPECIFIED */
841 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
842 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
843 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
844 /* Check to see if alias has been registered yet */
845 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
846 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
847 } else {
848 /* Otherwise, initialize it first */
849 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
850 }
851 } else {
852 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
853 }
854
855 if (task_ledgers.sfi_wait_times[class_id] < 0) {
856 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
857 }
858 }
859
860 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
861 #endif /* CONFIG_SCHED_SFI */
862
863 #ifdef CONFIG_BANK
864 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
865 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
866 #endif
867 task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
868 task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
869
870 if ((task_ledgers.cpu_time < 0) ||
871 (task_ledgers.tkm_private < 0) ||
872 (task_ledgers.tkm_shared < 0) ||
873 (task_ledgers.phys_mem < 0) ||
874 (task_ledgers.wired_mem < 0) ||
875 (task_ledgers.internal < 0) ||
876 (task_ledgers.iokit_mapped < 0) ||
877 (task_ledgers.alternate_accounting < 0) ||
878 (task_ledgers.alternate_accounting_compressed < 0) ||
879 (task_ledgers.page_table < 0) ||
880 (task_ledgers.phys_footprint < 0) ||
881 (task_ledgers.internal_compressed < 0) ||
882 (task_ledgers.purgeable_volatile < 0) ||
883 (task_ledgers.purgeable_nonvolatile < 0) ||
884 (task_ledgers.purgeable_volatile_compressed < 0) ||
885 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
886 (task_ledgers.platform_idle_wakeups < 0) ||
887 (task_ledgers.interrupt_wakeups < 0) ||
888 #ifdef CONFIG_BANK
889 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
890 #endif
891 (task_ledgers.physical_writes < 0) ||
892 (task_ledgers.logical_writes < 0)
893 ) {
894 panic("couldn't create entries for task ledger template");
895 }
896
897 ledger_track_credit_only(t, task_ledgers.phys_footprint);
898 ledger_track_credit_only(t, task_ledgers.internal);
899 ledger_track_credit_only(t, task_ledgers.internal_compressed);
900 ledger_track_credit_only(t, task_ledgers.iokit_mapped);
901 ledger_track_credit_only(t, task_ledgers.alternate_accounting);
902 ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
903 ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
904 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
905 ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
906 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
907
908 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
909 #if MACH_ASSERT
910 if (pmap_ledgers_panic) {
911 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
912 ledger_panic_on_negative(t, task_ledgers.page_table);
913 ledger_panic_on_negative(t, task_ledgers.internal);
914 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
915 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
916 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
917 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
918 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
919 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
920 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
921 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
922 }
923 #endif /* MACH_ASSERT */
924
925 #if CONFIG_MEMORYSTATUS
926 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
927 #endif /* CONFIG_MEMORYSTATUS */
928
929 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
930 task_wakeups_rate_exceeded, NULL, NULL);
931 ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
932 ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL);
933 task_ledger_template = t;
934 }
935
936 kern_return_t
937 task_create_internal(
938 task_t parent_task,
939 coalition_t *parent_coalitions __unused,
940 boolean_t inherit_memory,
941 boolean_t is_64bit,
942 uint32_t t_flags,
943 uint32_t t_procflags,
944 task_t *child_task) /* OUT */
945 {
946 task_t new_task;
947 vm_shared_region_t shared_region;
948 ledger_t ledger = NULL;
949
950 new_task = (task_t) zalloc(task_zone);
951
952 if (new_task == TASK_NULL)
953 return(KERN_RESOURCE_SHORTAGE);
954
955 /* one ref for just being alive; one for our caller */
956 new_task->ref_count = 2;
957
958 /* allocate with active entries */
959 assert(task_ledger_template != NULL);
960 if ((ledger = ledger_instantiate(task_ledger_template,
961 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
962 zfree(task_zone, new_task);
963 return(KERN_RESOURCE_SHORTAGE);
964 }
965
966 new_task->ledger = ledger;
967
968 #if defined(CONFIG_SCHED_MULTIQ)
969 new_task->sched_group = sched_group_create();
970 #endif
971
972 /* if inherit_memory is true, parent_task MUST not be NULL */
973 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory)
974 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
975 else
976 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
977 (vm_map_offset_t)(VM_MIN_ADDRESS),
978 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
979
980 /* Inherit memlock limit from parent */
981 if (parent_task)
982 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
983
984 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
985 queue_init(&new_task->threads);
986 new_task->suspend_count = 0;
987 new_task->thread_count = 0;
988 new_task->active_thread_count = 0;
989 new_task->user_stop_count = 0;
990 new_task->legacy_stop_count = 0;
991 new_task->active = TRUE;
992 new_task->halting = FALSE;
993 new_task->user_data = NULL;
994 new_task->priv_flags = 0;
995 new_task->t_flags = t_flags;
996 new_task->t_procflags = t_procflags;
997 new_task->importance = 0;
998 new_task->corpse_info_kernel = NULL;
999 new_task->exec_token = 0;
1000
1001 #if CONFIG_ATM
1002 new_task->atm_context = NULL;
1003 #endif
1004 #if CONFIG_BANK
1005 new_task->bank_context = NULL;
1006 #endif
1007
1008 #ifdef MACH_BSD
1009 new_task->bsd_info = NULL;
1010 new_task->corpse_info = NULL;
1011 #endif /* MACH_BSD */
1012
1013 #if CONFIG_MACF
1014 new_task->crash_label = NULL;
1015 #endif
1016
1017 #if CONFIG_MEMORYSTATUS
1018 if (max_task_footprint != 0) {
1019 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1020 }
1021 #endif /* CONFIG_MEMORYSTATUS */
1022
1023 if (task_wakeups_monitor_rate != 0) {
1024 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1025 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1026 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1027 }
1028
1029 #if CONFIG_IO_ACCOUNTING
1030 uint32_t flags = IOMON_ENABLE;
1031 task_io_monitor_ctl(new_task, &flags);
1032 #endif /* CONFIG_IO_ACCOUNTING */
1033
1034 #if defined(__i386__) || defined(__x86_64__)
1035 new_task->i386_ldt = 0;
1036 #endif
1037
1038 new_task->task_debug = NULL;
1039
1040 #if DEVELOPMENT || DEBUG
1041 new_task->task_unnested = FALSE;
1042 new_task->task_disconnected_count = 0;
1043 #endif
1044 queue_init(&new_task->semaphore_list);
1045 new_task->semaphores_owned = 0;
1046
1047 ipc_task_init(new_task, parent_task);
1048
1049 new_task->vtimers = 0;
1050
1051 new_task->shared_region = NULL;
1052
1053 new_task->affinity_space = NULL;
1054
1055 new_task->pidsuspended = FALSE;
1056 new_task->frozen = FALSE;
1057 new_task->changing_freeze_state = FALSE;
1058 new_task->rusage_cpu_flags = 0;
1059 new_task->rusage_cpu_percentage = 0;
1060 new_task->rusage_cpu_interval = 0;
1061 new_task->rusage_cpu_deadline = 0;
1062 new_task->rusage_cpu_callt = NULL;
1063 #if MACH_ASSERT
1064 new_task->suspends_outstanding = 0;
1065 #endif
1066
1067 #if HYPERVISOR
1068 new_task->hv_task_target = NULL;
1069 #endif /* HYPERVISOR */
1070
1071
1072 new_task->mem_notify_reserved = 0;
1073 #if IMPORTANCE_INHERITANCE
1074 new_task->task_imp_base = NULL;
1075 #endif /* IMPORTANCE_INHERITANCE */
1076
1077 #if defined(__x86_64__)
1078 new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
1079 #endif
1080
1081 new_task->requested_policy = default_task_requested_policy;
1082 new_task->effective_policy = default_task_effective_policy;
1083
1084 if (parent_task != TASK_NULL) {
1085 new_task->sec_token = parent_task->sec_token;
1086 new_task->audit_token = parent_task->audit_token;
1087
1088 /* inherit the parent's shared region */
1089 shared_region = vm_shared_region_get(parent_task);
1090 vm_shared_region_set(new_task, shared_region);
1091
1092 if(task_has_64BitAddr(parent_task))
1093 task_set_64BitAddr(new_task);
1094 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1095 new_task->all_image_info_size = parent_task->all_image_info_size;
1096
1097 #if defined(__i386__) || defined(__x86_64__)
1098 if (inherit_memory && parent_task->i386_ldt)
1099 new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
1100 #endif
1101 if (inherit_memory && parent_task->affinity_space)
1102 task_affinity_create(parent_task, new_task);
1103
1104 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1105
1106 #if IMPORTANCE_INHERITANCE
1107 ipc_importance_task_t new_task_imp = IIT_NULL;
1108 boolean_t inherit_receive = TRUE;
1109
1110 if (task_is_marked_importance_donor(parent_task)) {
1111 new_task_imp = ipc_importance_for_task(new_task, FALSE);
1112 assert(IIT_NULL != new_task_imp);
1113 ipc_importance_task_mark_donor(new_task_imp, TRUE);
1114 }
1115
1116 if (inherit_receive) {
1117 if (task_is_marked_importance_receiver(parent_task)) {
1118 if (IIT_NULL == new_task_imp)
1119 new_task_imp = ipc_importance_for_task(new_task, FALSE);
1120 assert(IIT_NULL != new_task_imp);
1121 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
1122 }
1123 if (task_is_marked_importance_denap_receiver(parent_task)) {
1124 if (IIT_NULL == new_task_imp)
1125 new_task_imp = ipc_importance_for_task(new_task, FALSE);
1126 assert(IIT_NULL != new_task_imp);
1127 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
1128 }
1129 }
1130
1131 if (IIT_NULL != new_task_imp) {
1132 assert(new_task->task_imp_base == new_task_imp);
1133 ipc_importance_task_release(new_task_imp);
1134 }
1135 #endif /* IMPORTANCE_INHERITANCE */
1136
1137 new_task->priority = BASEPRI_DEFAULT;
1138 new_task->max_priority = MAXPRI_USER;
1139
1140 task_policy_create(new_task, parent_task);
1141 } else {
1142 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1143 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1144 #ifdef __LP64__
1145 if(is_64bit)
1146 task_set_64BitAddr(new_task);
1147 #endif
1148 new_task->all_image_info_addr = (mach_vm_address_t)0;
1149 new_task->all_image_info_size = (mach_vm_size_t)0;
1150
1151 new_task->pset_hint = PROCESSOR_SET_NULL;
1152
1153 if (kernel_task == TASK_NULL) {
1154 new_task->priority = BASEPRI_KERNEL;
1155 new_task->max_priority = MAXPRI_KERNEL;
1156 } else {
1157 new_task->priority = BASEPRI_DEFAULT;
1158 new_task->max_priority = MAXPRI_USER;
1159 }
1160 }
1161
1162 bzero(new_task->coalition, sizeof(new_task->coalition));
1163 for (int i = 0; i < COALITION_NUM_TYPES; i++)
1164 queue_chain_init(new_task->task_coalition[i]);
1165
1166 /* Allocate I/O Statistics */
1167 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1168 assert(new_task->task_io_stats != NULL);
1169 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1170
1171 bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
1172
1173 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1174
1175 /* Copy resource acc. info from Parent for Corpe Forked task. */
1176 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1177 task_rollup_accounting_info(new_task, parent_task);
1178 } else {
1179 /* Initialize to zero for standard fork/spawn case */
1180 new_task->total_user_time = 0;
1181 new_task->total_system_time = 0;
1182 new_task->faults = 0;
1183 new_task->pageins = 0;
1184 new_task->cow_faults = 0;
1185 new_task->messages_sent = 0;
1186 new_task->messages_received = 0;
1187 new_task->syscalls_mach = 0;
1188 new_task->syscalls_unix = 0;
1189 new_task->c_switch = 0;
1190 new_task->p_switch = 0;
1191 new_task->ps_switch = 0;
1192 new_task->low_mem_notified_warn = 0;
1193 new_task->low_mem_notified_critical = 0;
1194 new_task->purged_memory_warn = 0;
1195 new_task->purged_memory_critical = 0;
1196 new_task->low_mem_privileged_listener = 0;
1197 new_task->task_timer_wakeups_bin_1 = 0;
1198 new_task->task_timer_wakeups_bin_2 = 0;
1199 new_task->task_gpu_ns = 0;
1200 new_task->task_immediate_writes = 0;
1201 new_task->task_deferred_writes = 0;
1202 new_task->task_invalidated_writes = 0;
1203 new_task->task_metadata_writes = 0;
1204 new_task->task_energy = 0;
1205 }
1206
1207
1208 #if CONFIG_COALITIONS
1209 if (!(t_flags & TF_CORPSE_FORK)) {
1210 /* TODO: there is no graceful failure path here... */
1211 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1212 coalitions_adopt_task(parent_coalitions, new_task);
1213 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1214 /*
1215 * all tasks at least have a resource coalition, so
1216 * if the parent has one then inherit all coalitions
1217 * the parent is a part of
1218 */
1219 coalitions_adopt_task(parent_task->coalition, new_task);
1220 } else {
1221 /* TODO: assert that new_task will be PID 1 (launchd) */
1222 coalitions_adopt_init_task(new_task);
1223 }
1224 } else {
1225 coalitions_adopt_corpse_task(new_task);
1226 }
1227
1228 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1229 panic("created task is not a member of a resource coalition");
1230 }
1231 #endif /* CONFIG_COALITIONS */
1232
1233 new_task->dispatchqueue_offset = 0;
1234 if (parent_task != NULL) {
1235 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1236 }
1237
1238 if (vm_backing_store_low && parent_task != NULL)
1239 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1240
1241 new_task->task_volatile_objects = 0;
1242 new_task->task_nonvolatile_objects = 0;
1243 new_task->task_purgeable_disowning = FALSE;
1244 new_task->task_purgeable_disowned = FALSE;
1245
1246 #if CONFIG_SECLUDED_MEMORY
1247 new_task->task_can_use_secluded_mem = FALSE;
1248 new_task->task_could_use_secluded_mem = FALSE;
1249 new_task->task_could_also_use_secluded_mem = FALSE;
1250 #endif /* CONFIG_SECLUDED_MEMORY */
1251
1252 queue_init(&new_task->io_user_clients);
1253
1254 ipc_task_enable(new_task);
1255
1256 lck_mtx_lock(&tasks_threads_lock);
1257 queue_enter(&tasks, new_task, task_t, tasks);
1258 tasks_count++;
1259 if (tasks_suspend_state) {
1260 task_suspend_internal(new_task);
1261 }
1262 lck_mtx_unlock(&tasks_threads_lock);
1263
1264 *child_task = new_task;
1265 return(KERN_SUCCESS);
1266 }
1267
1268 /*
1269 * task_rollup_accounting_info
1270 *
1271 * Roll up accounting stats. Used to rollup stats
1272 * for exec copy task and corpse fork.
1273 */
1274 void
1275 task_rollup_accounting_info(task_t to_task, task_t from_task)
1276 {
1277 assert(from_task != to_task);
1278
1279 to_task->total_user_time = from_task->total_user_time;
1280 to_task->total_system_time = from_task->total_system_time;
1281 to_task->faults = from_task->faults;
1282 to_task->pageins = from_task->pageins;
1283 to_task->cow_faults = from_task->cow_faults;
1284 to_task->messages_sent = from_task->messages_sent;
1285 to_task->messages_received = from_task->messages_received;
1286 to_task->syscalls_mach = from_task->syscalls_mach;
1287 to_task->syscalls_unix = from_task->syscalls_unix;
1288 to_task->c_switch = from_task->c_switch;
1289 to_task->p_switch = from_task->p_switch;
1290 to_task->ps_switch = from_task->ps_switch;
1291 to_task->extmod_statistics = from_task->extmod_statistics;
1292 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1293 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1294 to_task->purged_memory_warn = from_task->purged_memory_warn;
1295 to_task->purged_memory_critical = from_task->purged_memory_critical;
1296 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1297 *to_task->task_io_stats = *from_task->task_io_stats;
1298 to_task->cpu_time_qos_stats = from_task->cpu_time_qos_stats;
1299 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1300 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1301 to_task->task_gpu_ns = from_task->task_gpu_ns;
1302 to_task->task_immediate_writes = from_task->task_immediate_writes;
1303 to_task->task_deferred_writes = from_task->task_deferred_writes;
1304 to_task->task_invalidated_writes = from_task->task_invalidated_writes;
1305 to_task->task_metadata_writes = from_task->task_metadata_writes;
1306 to_task->task_energy = from_task->task_energy;
1307
1308 /* Skip ledger roll up for memory accounting entries */
1309 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1310 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1311 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1312 #if CONFIG_SCHED_SFI
1313 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1314 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1315 }
1316 #endif
1317 #if CONFIG_BANK
1318 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1319 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1320 #endif
1321 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1322 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1323 }
1324
1325 int task_dropped_imp_count = 0;
1326
1327 /*
1328 * task_deallocate:
1329 *
1330 * Drop a reference on a task.
1331 */
1332 void
1333 task_deallocate(
1334 task_t task)
1335 {
1336 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1337 uint32_t refs;
1338
1339 if (task == TASK_NULL)
1340 return;
1341
1342 refs = task_deallocate_internal(task);
1343
1344 #if IMPORTANCE_INHERITANCE
1345 if (refs > 1)
1346 return;
1347
1348 if (refs == 1) {
1349 /*
1350 * If last ref potentially comes from the task's importance,
1351 * disconnect it. But more task refs may be added before
1352 * that completes, so wait for the reference to go to zero
1353 * naturually (it may happen on a recursive task_deallocate()
1354 * from the ipc_importance_disconnect_task() call).
1355 */
1356 if (IIT_NULL != task->task_imp_base)
1357 ipc_importance_disconnect_task(task);
1358 return;
1359 }
1360 #else
1361 if (refs > 0)
1362 return;
1363 #endif /* IMPORTANCE_INHERITANCE */
1364
1365 lck_mtx_lock(&tasks_threads_lock);
1366 queue_remove(&terminated_tasks, task, task_t, tasks);
1367 terminated_tasks_count--;
1368 lck_mtx_unlock(&tasks_threads_lock);
1369
1370 /*
1371 * remove the reference on atm descriptor
1372 */
1373 task_atm_reset(task);
1374
1375 /*
1376 * remove the reference on bank context
1377 */
1378 task_bank_reset(task);
1379
1380 if (task->task_io_stats)
1381 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1382
1383 /*
1384 * Give the machine dependent code a chance
1385 * to perform cleanup before ripping apart
1386 * the task.
1387 */
1388 machine_task_terminate(task);
1389
1390 ipc_task_terminate(task);
1391
1392 /* let iokit know */
1393 iokit_task_terminate(task);
1394
1395 if (task->affinity_space)
1396 task_affinity_deallocate(task);
1397
1398 #if MACH_ASSERT
1399 if (task->ledger != NULL &&
1400 task->map != NULL &&
1401 task->map->pmap != NULL &&
1402 task->map->pmap->ledger != NULL) {
1403 assert(task->ledger == task->map->pmap->ledger);
1404 }
1405 #endif /* MACH_ASSERT */
1406
1407 vm_purgeable_disown(task);
1408 assert(task->task_purgeable_disowned);
1409 if (task->task_volatile_objects != 0 ||
1410 task->task_nonvolatile_objects != 0) {
1411 panic("task_deallocate(%p): "
1412 "volatile_objects=%d nonvolatile_objects=%d\n",
1413 task,
1414 task->task_volatile_objects,
1415 task->task_nonvolatile_objects);
1416 }
1417
1418 vm_map_deallocate(task->map);
1419 is_release(task->itk_space);
1420
1421 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1422 &interrupt_wakeups, &debit);
1423 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1424 &platform_idle_wakeups, &debit);
1425
1426 #if defined(CONFIG_SCHED_MULTIQ)
1427 sched_group_destroy(task->sched_group);
1428 #endif
1429
1430 /* Accumulate statistics for dead tasks */
1431 lck_spin_lock(&dead_task_statistics_lock);
1432 dead_task_statistics.total_user_time += task->total_user_time;
1433 dead_task_statistics.total_system_time += task->total_system_time;
1434
1435 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1436 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1437
1438 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1439 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1440
1441 lck_spin_unlock(&dead_task_statistics_lock);
1442 lck_mtx_destroy(&task->lock, &task_lck_grp);
1443
1444 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1445 &debit)) {
1446 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1447 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1448 }
1449 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1450 &debit)) {
1451 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1452 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1453 }
1454 ledger_dereference(task->ledger);
1455
1456 #if TASK_REFERENCE_LEAK_DEBUG
1457 btlog_remove_entries_for_element(task_ref_btlog, task);
1458 #endif
1459
1460 #if CONFIG_COALITIONS
1461 task_release_coalitions(task);
1462 #endif /* CONFIG_COALITIONS */
1463
1464 bzero(task->coalition, sizeof(task->coalition));
1465
1466 #if MACH_BSD
1467 /* clean up collected information since last reference to task is gone */
1468 if (task->corpse_info) {
1469 task_crashinfo_destroy(task->corpse_info, RELEASE_CORPSE_REF);
1470 task->corpse_info = NULL;
1471 }
1472 #endif
1473 if (task->corpse_info_kernel) {
1474 kfree(task->corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1475 }
1476
1477 #if CONFIG_MACF
1478 if (task->crash_label) {
1479 mac_exc_action_label_task_destroy(task);
1480 }
1481 #endif
1482
1483 zfree(task_zone, task);
1484 }
1485
1486 /*
1487 * task_name_deallocate:
1488 *
1489 * Drop a reference on a task name.
1490 */
1491 void
1492 task_name_deallocate(
1493 task_name_t task_name)
1494 {
1495 return(task_deallocate((task_t)task_name));
1496 }
1497
1498 /*
1499 * task_suspension_token_deallocate:
1500 *
1501 * Drop a reference on a task suspension token.
1502 */
1503 void
1504 task_suspension_token_deallocate(
1505 task_suspension_token_t token)
1506 {
1507 return(task_deallocate((task_t)token));
1508 }
1509
1510
1511 /*
1512 * task_collect_crash_info:
1513 *
1514 * collect crash info from bsd and mach based data
1515 */
1516 kern_return_t
1517 task_collect_crash_info(task_t task, struct proc *proc, int is_corpse_fork)
1518 {
1519 kern_return_t kr = KERN_SUCCESS;
1520
1521 kcdata_descriptor_t crash_data = NULL;
1522 kcdata_descriptor_t crash_data_release = NULL;
1523 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1524 mach_vm_offset_t crash_data_ptr = 0;
1525 void *crash_data_kernel = NULL;
1526 void *crash_data_kernel_release = NULL;
1527 int corpse_blob_kernel_alloc = (is_corpse_fork || unify_corpse_blob_alloc);
1528
1529 if (!corpses_enabled()) {
1530 return KERN_NOT_SUPPORTED;
1531 }
1532
1533 task_lock(task);
1534
1535 assert(is_corpse_fork || task->bsd_info != NULL);
1536 if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
1537 #if CONFIG_MACF
1538 /* Update the corpse label, used by the exception delivery mac hook */
1539 mac_exc_action_label_task_update(task, proc);
1540 #endif
1541 task_unlock(task);
1542
1543 if (!corpse_blob_kernel_alloc) {
1544 /* map crash data memory in task's vm map */
1545 kr = mach_vm_allocate(task->map, &crash_data_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE));
1546 } else {
1547 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
1548 if (crash_data_kernel == 0)
1549 kr = KERN_RESOURCE_SHORTAGE;
1550 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1551 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
1552 }
1553 if (kr != KERN_SUCCESS)
1554 goto out_no_lock;
1555
1556 /* Do not get a corpse ref for corpse fork */
1557 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size, is_corpse_fork ? !GET_CORPSE_REF : GET_CORPSE_REF, corpse_blob_kernel_alloc ? KCFLAG_USE_MEMCOPY: KCFLAG_USE_COPYOUT);
1558 if (crash_data) {
1559 task_lock(task);
1560 crash_data_release = task->corpse_info;
1561 crash_data_kernel_release = task->corpse_info_kernel;
1562 task->corpse_info = crash_data;
1563 task->corpse_info_kernel = crash_data_kernel;
1564
1565 task_unlock(task);
1566 kr = KERN_SUCCESS;
1567 } else {
1568 /* if failed to create corpse info, free the mapping */
1569 if (!corpse_blob_kernel_alloc) {
1570 if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_ptr, size)) {
1571 printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task));
1572 }
1573 } else {
1574 kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1575 }
1576 kr = KERN_FAILURE;
1577 }
1578
1579 if (crash_data_release != NULL) {
1580 task_crashinfo_destroy(crash_data_release, is_corpse_fork ? !RELEASE_CORPSE_REF : RELEASE_CORPSE_REF);
1581 }
1582 if (crash_data_kernel_release != NULL) {
1583 kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
1584 }
1585 } else {
1586 task_unlock(task);
1587 }
1588
1589 out_no_lock:
1590 return kr;
1591 }
1592
1593 /*
1594 * task_deliver_crash_notification:
1595 *
1596 * Makes outcall to registered host port for a corpse.
1597 */
1598 kern_return_t
1599 task_deliver_crash_notification(task_t task, thread_t thread, mach_exception_data_type_t subcode)
1600 {
1601 kcdata_descriptor_t crash_info = task->corpse_info;
1602 thread_t th_iter = NULL;
1603 kern_return_t kr = KERN_SUCCESS;
1604 wait_interrupt_t wsave;
1605 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1606 ipc_port_t task_port, old_notify;
1607
1608 if (crash_info == NULL)
1609 return KERN_FAILURE;
1610
1611 task_lock(task);
1612 if (task_is_a_corpse_fork(task)) {
1613 /* Populate code with EXC_RESOURCE for corpse fork */
1614 code[0] = EXC_RESOURCE;
1615 code[1] = subcode;
1616 } else if (unify_corpse_blob_alloc) {
1617 /* Populate code with EXC_CRASH for corpses */
1618 code[0] = EXC_CRASH;
1619 code[1] = 0;
1620 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
1621 if (corpse_for_fatal_memkill) {
1622 code[1] = subcode;
1623 }
1624 } else {
1625 /* Populate code with address and length for EXC_CRASH */
1626 code[0] = crash_info->kcd_addr_begin;
1627 code[1] = crash_info->kcd_length;
1628 }
1629 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1630 {
1631 if (th_iter->corpse_dup == FALSE) {
1632 ipc_thread_reset(th_iter);
1633 }
1634 }
1635 task_unlock(task);
1636
1637 /* Arm the no-sender notification for taskport */
1638 task_reference(task);
1639 task_port = convert_task_to_port(task);
1640 ip_lock(task_port);
1641 assert(ip_active(task_port));
1642 ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
1643 /* port unlocked */
1644 assert(IP_NULL == old_notify);
1645
1646 wsave = thread_interrupt_level(THREAD_UNINT);
1647 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
1648 if (kr != KERN_SUCCESS) {
1649 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1650 }
1651
1652 (void)thread_interrupt_level(wsave);
1653
1654 /*
1655 * Drop the send right on task port, will fire the
1656 * no-sender notification if exception deliver failed.
1657 */
1658 ipc_port_release_send(task_port);
1659 return kr;
1660 }
1661
1662 /*
1663 * task_terminate:
1664 *
1665 * Terminate the specified task. See comments on thread_terminate
1666 * (kern/thread.c) about problems with terminating the "current task."
1667 */
1668
1669 kern_return_t
1670 task_terminate(
1671 task_t task)
1672 {
1673 if (task == TASK_NULL)
1674 return (KERN_INVALID_ARGUMENT);
1675
1676 if (task->bsd_info)
1677 return (KERN_FAILURE);
1678
1679 return (task_terminate_internal(task));
1680 }
1681
1682 #if MACH_ASSERT
1683 extern int proc_pid(struct proc *);
1684 extern void proc_name_kdp(task_t t, char *buf, int size);
1685 #endif /* MACH_ASSERT */
1686
1687 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1688 static void
1689 __unused task_partial_reap(task_t task, __unused int pid)
1690 {
1691 unsigned int reclaimed_resident = 0;
1692 unsigned int reclaimed_compressed = 0;
1693 uint64_t task_page_count;
1694
1695 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1696
1697 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1698 pid, task_page_count, 0, 0, 0);
1699
1700 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1701
1702 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1703 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1704 }
1705
1706 kern_return_t
1707 task_mark_corpse(task_t task)
1708 {
1709 kern_return_t kr = KERN_SUCCESS;
1710 thread_t self_thread;
1711 (void) self_thread;
1712 wait_interrupt_t wsave;
1713
1714 assert(task != kernel_task);
1715 assert(task == current_task());
1716 assert(!task_is_a_corpse(task));
1717
1718 kr = task_collect_crash_info(task, (struct proc*)task->bsd_info, FALSE);
1719 if (kr != KERN_SUCCESS) {
1720 return kr;
1721 }
1722
1723 self_thread = current_thread();
1724
1725 wsave = thread_interrupt_level(THREAD_UNINT);
1726 task_lock(task);
1727
1728 task_set_corpse_pending_report(task);
1729 task_set_corpse(task);
1730
1731 kr = task_start_halt_locked(task, TRUE);
1732 assert(kr == KERN_SUCCESS);
1733
1734 ipc_task_reset(task);
1735 /* Remove the naked send right for task port, needed to arm no sender notification */
1736 task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
1737 ipc_task_enable(task);
1738
1739 task_unlock(task);
1740 /* terminate the ipc space */
1741 ipc_space_terminate(task->itk_space);
1742
1743 /* Add it to global corpse task list */
1744 task_add_to_corpse_task_list(task);
1745
1746 task_start_halt(task);
1747 thread_terminate_internal(self_thread);
1748
1749 (void) thread_interrupt_level(wsave);
1750 assert(task->halting == TRUE);
1751 return kr;
1752 }
1753
1754 /*
1755 * task_clear_corpse
1756 *
1757 * Clears the corpse pending bit on task.
1758 * Removes inspection bit on the threads.
1759 */
1760 void
1761 task_clear_corpse(task_t task)
1762 {
1763 thread_t th_iter = NULL;
1764
1765 task_lock(task);
1766 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1767 {
1768 thread_mtx_lock(th_iter);
1769 th_iter->inspection = FALSE;
1770 thread_mtx_unlock(th_iter);
1771 }
1772
1773 thread_terminate_crashed_threads();
1774 /* remove the pending corpse report flag */
1775 task_clear_corpse_pending_report(task);
1776
1777 task_unlock(task);
1778 }
1779
1780 /*
1781 * task_port_notify
1782 *
1783 * Called whenever the Mach port system detects no-senders on
1784 * the task port of a corpse.
1785 * Each notification that comes in should terminate the task (corpse).
1786 */
1787 void
1788 task_port_notify(mach_msg_header_t *msg)
1789 {
1790 mach_no_senders_notification_t *notification = (void *)msg;
1791 ipc_port_t port = notification->not_header.msgh_remote_port;
1792 task_t task;
1793
1794 assert(ip_active(port));
1795 assert(IKOT_TASK == ip_kotype(port));
1796 task = (task_t) port->ip_kobject;
1797
1798 assert(task_is_a_corpse(task));
1799
1800 /* Remove the task from global corpse task list */
1801 task_remove_from_corpse_task_list(task);
1802
1803 task_clear_corpse(task);
1804 task_terminate_internal(task);
1805 }
1806
1807 /*
1808 * task_wait_till_threads_terminate_locked
1809 *
1810 * Wait till all the threads in the task are terminated.
1811 * Might release the task lock and re-acquire it.
1812 */
1813 void
1814 task_wait_till_threads_terminate_locked(task_t task)
1815 {
1816 /* wait for all the threads in the task to terminate */
1817 while (task->active_thread_count != 0) {
1818 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
1819 task_unlock(task);
1820 thread_block(THREAD_CONTINUE_NULL);
1821
1822 task_lock(task);
1823 }
1824 }
1825
1826 /*
1827 * task_duplicate_map_and_threads
1828 *
1829 * Copy vmmap of source task.
1830 * Copy active threads from source task to destination task.
1831 * Source task would be suspended during the copy.
1832 */
1833 kern_return_t
1834 task_duplicate_map_and_threads(
1835 task_t task,
1836 void *p,
1837 task_t new_task,
1838 thread_t *thread_ret,
1839 int is64bit,
1840 uint64_t **udata_buffer,
1841 int *size,
1842 int *num_udata)
1843 {
1844 kern_return_t kr = KERN_SUCCESS;
1845 int active;
1846 thread_t thread, self, thread_return = THREAD_NULL;
1847 thread_t new_thread = THREAD_NULL;
1848 thread_t *thread_array;
1849 uint32_t active_thread_count = 0, array_count = 0, i;
1850 vm_map_t oldmap;
1851 uint64_t *buffer = NULL;
1852 int buf_size = 0;
1853 int est_knotes = 0, num_knotes = 0;
1854
1855 self = current_thread();
1856
1857 /*
1858 * Suspend the task to copy thread state, use the internal
1859 * variant so that no user-space process can resume
1860 * the task from under us
1861 */
1862 kr = task_suspend_internal(task);
1863 if (kr != KERN_SUCCESS) {
1864 return kr;
1865 }
1866
1867 if (task->map->disable_vmentry_reuse == TRUE) {
1868 /*
1869 * Quite likely GuardMalloc (or some debugging tool)
1870 * is being used on this task. And it has gone through
1871 * its limit. Making a corpse will likely encounter
1872 * a lot of VM entries that will need COW.
1873 *
1874 * Skip it.
1875 */
1876 task_resume_internal(task);
1877 return KERN_FAILURE;
1878 }
1879
1880 /* Setup new task's vmmap, switch from parent task's map to it COW map */
1881 oldmap = new_task->map;
1882 new_task->map = vm_map_fork(new_task->ledger,
1883 task->map,
1884 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
1885 VM_MAP_FORK_PRESERVE_PURGEABLE));
1886 vm_map_deallocate(oldmap);
1887
1888 if (is64bit) {
1889 vm_map_set_64bit(get_task_map(new_task));
1890 } else {
1891 vm_map_set_32bit(get_task_map(new_task));
1892 }
1893
1894 /* Get all the udata pointers from kqueue */
1895 est_knotes = proc_list_uptrs(p, NULL, 0);
1896 if (est_knotes > 0) {
1897 buf_size = (est_knotes + 32) * sizeof(uint64_t);
1898 buffer = (uint64_t *) kalloc(buf_size);
1899 num_knotes = proc_list_uptrs(p, buffer, buf_size);
1900 if (num_knotes > est_knotes + 32) {
1901 num_knotes = est_knotes + 32;
1902 }
1903 }
1904
1905 active_thread_count = task->active_thread_count;
1906 if (active_thread_count == 0) {
1907 if (buffer != NULL) {
1908 kfree(buffer, buf_size);
1909 }
1910 task_resume_internal(task);
1911 return KERN_FAILURE;
1912 }
1913
1914 thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
1915
1916 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
1917 task_lock(task);
1918 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1919 /* Skip inactive threads */
1920 active = thread->active;
1921 if (!active) {
1922 continue;
1923 }
1924
1925 if (array_count >= active_thread_count) {
1926 break;
1927 }
1928
1929 thread_array[array_count++] = thread;
1930 thread_reference(thread);
1931 }
1932 task_unlock(task);
1933
1934 for (i = 0; i < array_count; i++) {
1935
1936 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
1937 if (kr != KERN_SUCCESS) {
1938 break;
1939 }
1940
1941 /* Equivalent of current thread in corpse */
1942 if (thread_array[i] == self) {
1943 thread_return = new_thread;
1944 } else {
1945 /* drop the extra ref returned by thread_create_with_continuation */
1946 thread_deallocate(new_thread);
1947 }
1948
1949 kr = thread_dup2(thread_array[i], new_thread);
1950 if (kr != KERN_SUCCESS) {
1951 thread_mtx_lock(new_thread);
1952 new_thread->corpse_dup = TRUE;
1953 thread_mtx_unlock(new_thread);
1954 continue;
1955 }
1956
1957 /* Copy thread name */
1958 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
1959 thread_copy_resource_info(new_thread, thread_array[i]);
1960 }
1961
1962 task_resume_internal(task);
1963
1964 for (i = 0; i < array_count; i++) {
1965 thread_deallocate(thread_array[i]);
1966 }
1967 kfree(thread_array, sizeof(thread_t) * active_thread_count);
1968
1969 if (kr == KERN_SUCCESS) {
1970 *thread_ret = thread_return;
1971 *udata_buffer = buffer;
1972 *size = buf_size;
1973 *num_udata = num_knotes;
1974 } else {
1975 if (thread_return != THREAD_NULL) {
1976 thread_deallocate(thread_return);
1977 }
1978 if (buffer != NULL) {
1979 kfree(buffer, buf_size);
1980 }
1981 }
1982
1983 return kr;
1984 }
1985
1986 #if CONFIG_SECLUDED_MEMORY
1987 extern void task_set_can_use_secluded_mem_locked(
1988 task_t task,
1989 boolean_t can_use_secluded_mem);
1990 #endif /* CONFIG_SECLUDED_MEMORY */
1991
1992 kern_return_t
1993 task_terminate_internal(
1994 task_t task)
1995 {
1996 thread_t thread, self;
1997 task_t self_task;
1998 boolean_t interrupt_save;
1999 int pid = 0;
2000
2001 assert(task != kernel_task);
2002
2003 self = current_thread();
2004 self_task = self->task;
2005
2006 /*
2007 * Get the task locked and make sure that we are not racing
2008 * with someone else trying to terminate us.
2009 */
2010 if (task == self_task)
2011 task_lock(task);
2012 else
2013 if (task < self_task) {
2014 task_lock(task);
2015 task_lock(self_task);
2016 }
2017 else {
2018 task_lock(self_task);
2019 task_lock(task);
2020 }
2021
2022 #if CONFIG_SECLUDED_MEMORY
2023 if (task->task_can_use_secluded_mem) {
2024 task_set_can_use_secluded_mem_locked(task, FALSE);
2025 }
2026 task->task_could_use_secluded_mem = FALSE;
2027 task->task_could_also_use_secluded_mem = FALSE;
2028 #endif /* CONFIG_SECLUDED_MEMORY */
2029
2030 if (!task->active) {
2031 /*
2032 * Task is already being terminated.
2033 * Just return an error. If we are dying, this will
2034 * just get us to our AST special handler and that
2035 * will get us to finalize the termination of ourselves.
2036 */
2037 task_unlock(task);
2038 if (self_task != task)
2039 task_unlock(self_task);
2040
2041 return (KERN_FAILURE);
2042 }
2043
2044 if (task_corpse_pending_report(task)) {
2045 /*
2046 * Task is marked for reporting as corpse.
2047 * Just return an error. This will
2048 * just get us to our AST special handler and that
2049 * will get us to finish the path to death
2050 */
2051 task_unlock(task);
2052 if (self_task != task)
2053 task_unlock(self_task);
2054
2055 return (KERN_FAILURE);
2056 }
2057
2058 if (self_task != task)
2059 task_unlock(self_task);
2060
2061 /*
2062 * Make sure the current thread does not get aborted out of
2063 * the waits inside these operations.
2064 */
2065 interrupt_save = thread_interrupt_level(THREAD_UNINT);
2066
2067 /*
2068 * Indicate that we want all the threads to stop executing
2069 * at user space by holding the task (we would have held
2070 * each thread independently in thread_terminate_internal -
2071 * but this way we may be more likely to already find it
2072 * held there). Mark the task inactive, and prevent
2073 * further task operations via the task port.
2074 */
2075 task_hold_locked(task);
2076 task->active = FALSE;
2077 ipc_task_disable(task);
2078
2079 #if CONFIG_TELEMETRY
2080 /*
2081 * Notify telemetry that this task is going away.
2082 */
2083 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2084 #endif
2085
2086 /*
2087 * Terminate each thread in the task.
2088 */
2089 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2090 thread_terminate_internal(thread);
2091 }
2092
2093 #ifdef MACH_BSD
2094 if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2095 pid = proc_pid(task->bsd_info);
2096 }
2097 #endif /* MACH_BSD */
2098
2099 task_unlock(task);
2100
2101 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2102 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2103
2104 /* Early object reap phase */
2105
2106 // PR-17045188: Revisit implementation
2107 // task_partial_reap(task, pid);
2108
2109
2110 /*
2111 * Destroy all synchronizers owned by the task.
2112 */
2113 task_synchronizer_destroy_all(task);
2114
2115 /*
2116 * Destroy the IPC space, leaving just a reference for it.
2117 */
2118 ipc_space_terminate(task->itk_space);
2119
2120 #if 00
2121 /* if some ledgers go negative on tear-down again... */
2122 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2123 task_ledgers.phys_footprint);
2124 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2125 task_ledgers.internal);
2126 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2127 task_ledgers.internal_compressed);
2128 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2129 task_ledgers.iokit_mapped);
2130 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2131 task_ledgers.alternate_accounting);
2132 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2133 task_ledgers.alternate_accounting_compressed);
2134 #endif
2135
2136 /*
2137 * If the current thread is a member of the task
2138 * being terminated, then the last reference to
2139 * the task will not be dropped until the thread
2140 * is finally reaped. To avoid incurring the
2141 * expense of removing the address space regions
2142 * at reap time, we do it explictly here.
2143 */
2144
2145 vm_map_lock(task->map);
2146 vm_map_disable_hole_optimization(task->map);
2147 vm_map_unlock(task->map);
2148
2149 vm_map_remove(task->map,
2150 task->map->min_offset,
2151 task->map->max_offset,
2152 /* no unnesting on final cleanup: */
2153 VM_MAP_REMOVE_NO_UNNESTING);
2154
2155 /* release our shared region */
2156 vm_shared_region_set(task, NULL);
2157
2158
2159 #if MACH_ASSERT
2160 /*
2161 * Identify the pmap's process, in case the pmap ledgers drift
2162 * and we have to report it.
2163 */
2164 char procname[17];
2165 if (task->bsd_info && !task_is_exec_copy(task)) {
2166 pid = proc_pid(task->bsd_info);
2167 proc_name_kdp(task, procname, sizeof (procname));
2168 } else {
2169 pid = 0;
2170 strlcpy(procname, "<unknown>", sizeof (procname));
2171 }
2172 pmap_set_process(task->map->pmap, pid, procname);
2173 #endif /* MACH_ASSERT */
2174
2175 lck_mtx_lock(&tasks_threads_lock);
2176 queue_remove(&tasks, task, task_t, tasks);
2177 queue_enter(&terminated_tasks, task, task_t, tasks);
2178 tasks_count--;
2179 terminated_tasks_count++;
2180 lck_mtx_unlock(&tasks_threads_lock);
2181
2182 /*
2183 * We no longer need to guard against being aborted, so restore
2184 * the previous interruptible state.
2185 */
2186 thread_interrupt_level(interrupt_save);
2187
2188 #if KPERF
2189 /* force the task to release all ctrs */
2190 if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
2191 kpc_force_all_ctrs(task, 0);
2192 #endif
2193
2194 #if CONFIG_COALITIONS
2195 /*
2196 * Leave our coalitions. (drop activation but not reference)
2197 */
2198 coalitions_remove_task(task);
2199 #endif
2200
2201 /*
2202 * Get rid of the task active reference on itself.
2203 */
2204 task_deallocate(task);
2205
2206 return (KERN_SUCCESS);
2207 }
2208
2209 void
2210 tasks_system_suspend(boolean_t suspend)
2211 {
2212 task_t task;
2213
2214 lck_mtx_lock(&tasks_threads_lock);
2215 assert(tasks_suspend_state != suspend);
2216 tasks_suspend_state = suspend;
2217 queue_iterate(&tasks, task, task_t, tasks) {
2218 if (task == kernel_task) {
2219 continue;
2220 }
2221 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2222 }
2223 lck_mtx_unlock(&tasks_threads_lock);
2224 }
2225
2226 /*
2227 * task_start_halt:
2228 *
2229 * Shut the current task down (except for the current thread) in
2230 * preparation for dramatic changes to the task (probably exec).
2231 * We hold the task and mark all other threads in the task for
2232 * termination.
2233 */
2234 kern_return_t
2235 task_start_halt(task_t task)
2236 {
2237 kern_return_t kr = KERN_SUCCESS;
2238 task_lock(task);
2239 kr = task_start_halt_locked(task, FALSE);
2240 task_unlock(task);
2241 return kr;
2242 }
2243
2244 static kern_return_t
2245 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2246 {
2247 thread_t thread, self;
2248 uint64_t dispatchqueue_offset;
2249
2250 assert(task != kernel_task);
2251
2252 self = current_thread();
2253
2254 if (task != self->task && !task_is_a_corpse_fork(task))
2255 return (KERN_INVALID_ARGUMENT);
2256
2257 if (task->halting || !task->active || !self->active) {
2258 /*
2259 * Task or current thread is already being terminated.
2260 * Hurry up and return out of the current kernel context
2261 * so that we run our AST special handler to terminate
2262 * ourselves.
2263 */
2264 return (KERN_FAILURE);
2265 }
2266
2267 task->halting = TRUE;
2268
2269 /*
2270 * Mark all the threads to keep them from starting any more
2271 * user-level execution. The thread_terminate_internal code
2272 * would do this on a thread by thread basis anyway, but this
2273 * gives us a better chance of not having to wait there.
2274 */
2275 task_hold_locked(task);
2276 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2277
2278 /*
2279 * Terminate all the other threads in the task.
2280 */
2281 queue_iterate(&task->threads, thread, thread_t, task_threads)
2282 {
2283 if (should_mark_corpse) {
2284 thread_mtx_lock(thread);
2285 thread->inspection = TRUE;
2286 thread_mtx_unlock(thread);
2287 }
2288 if (thread != self)
2289 thread_terminate_internal(thread);
2290 }
2291 task->dispatchqueue_offset = dispatchqueue_offset;
2292
2293 task_release_locked(task);
2294
2295 return KERN_SUCCESS;
2296 }
2297
2298
2299 /*
2300 * task_complete_halt:
2301 *
2302 * Complete task halt by waiting for threads to terminate, then clean
2303 * up task resources (VM, port namespace, etc...) and then let the
2304 * current thread go in the (practically empty) task context.
2305 *
2306 * Note: task->halting flag is not cleared in order to avoid creation
2307 * of new thread in old exec'ed task.
2308 */
2309 void
2310 task_complete_halt(task_t task)
2311 {
2312 task_lock(task);
2313 assert(task->halting);
2314 assert(task == current_task());
2315
2316 /*
2317 * Wait for the other threads to get shut down.
2318 * When the last other thread is reaped, we'll be
2319 * woken up.
2320 */
2321 if (task->thread_count > 1) {
2322 assert_wait((event_t)&task->halting, THREAD_UNINT);
2323 task_unlock(task);
2324 thread_block(THREAD_CONTINUE_NULL);
2325 } else {
2326 task_unlock(task);
2327 }
2328
2329 /*
2330 * Give the machine dependent code a chance
2331 * to perform cleanup of task-level resources
2332 * associated with the current thread before
2333 * ripping apart the task.
2334 */
2335 machine_task_terminate(task);
2336
2337 /*
2338 * Destroy all synchronizers owned by the task.
2339 */
2340 task_synchronizer_destroy_all(task);
2341
2342 /*
2343 * Destroy the contents of the IPC space, leaving just
2344 * a reference for it.
2345 */
2346 ipc_space_clean(task->itk_space);
2347
2348 /*
2349 * Clean out the address space, as we are going to be
2350 * getting a new one.
2351 */
2352 vm_map_remove(task->map, task->map->min_offset,
2353 task->map->max_offset,
2354 /* no unnesting on final cleanup: */
2355 VM_MAP_REMOVE_NO_UNNESTING);
2356
2357 /*
2358 * Kick out any IOKitUser handles to the task. At best they're stale,
2359 * at worst someone is racing a SUID exec.
2360 */
2361 iokit_task_terminate(task);
2362 }
2363
2364 /*
2365 * task_hold_locked:
2366 *
2367 * Suspend execution of the specified task.
2368 * This is a recursive-style suspension of the task, a count of
2369 * suspends is maintained.
2370 *
2371 * CONDITIONS: the task is locked and active.
2372 */
2373 void
2374 task_hold_locked(
2375 task_t task)
2376 {
2377 thread_t thread;
2378
2379 assert(task->active);
2380
2381 if (task->suspend_count++ > 0)
2382 return;
2383
2384 /*
2385 * Iterate through all the threads and hold them.
2386 */
2387 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2388 thread_mtx_lock(thread);
2389 thread_hold(thread);
2390 thread_mtx_unlock(thread);
2391 }
2392 }
2393
2394 /*
2395 * task_hold:
2396 *
2397 * Same as the internal routine above, except that is must lock
2398 * and verify that the task is active. This differs from task_suspend
2399 * in that it places a kernel hold on the task rather than just a
2400 * user-level hold. This keeps users from over resuming and setting
2401 * it running out from under the kernel.
2402 *
2403 * CONDITIONS: the caller holds a reference on the task
2404 */
2405 kern_return_t
2406 task_hold(
2407 task_t task)
2408 {
2409 if (task == TASK_NULL)
2410 return (KERN_INVALID_ARGUMENT);
2411
2412 task_lock(task);
2413
2414 if (!task->active) {
2415 task_unlock(task);
2416
2417 return (KERN_FAILURE);
2418 }
2419
2420 task_hold_locked(task);
2421 task_unlock(task);
2422
2423 return (KERN_SUCCESS);
2424 }
2425
2426 kern_return_t
2427 task_wait(
2428 task_t task,
2429 boolean_t until_not_runnable)
2430 {
2431 if (task == TASK_NULL)
2432 return (KERN_INVALID_ARGUMENT);
2433
2434 task_lock(task);
2435
2436 if (!task->active) {
2437 task_unlock(task);
2438
2439 return (KERN_FAILURE);
2440 }
2441
2442 task_wait_locked(task, until_not_runnable);
2443 task_unlock(task);
2444
2445 return (KERN_SUCCESS);
2446 }
2447
2448 /*
2449 * task_wait_locked:
2450 *
2451 * Wait for all threads in task to stop.
2452 *
2453 * Conditions:
2454 * Called with task locked, active, and held.
2455 */
2456 void
2457 task_wait_locked(
2458 task_t task,
2459 boolean_t until_not_runnable)
2460 {
2461 thread_t thread, self;
2462
2463 assert(task->active);
2464 assert(task->suspend_count > 0);
2465
2466 self = current_thread();
2467
2468 /*
2469 * Iterate through all the threads and wait for them to
2470 * stop. Do not wait for the current thread if it is within
2471 * the task.
2472 */
2473 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2474 if (thread != self)
2475 thread_wait(thread, until_not_runnable);
2476 }
2477 }
2478
2479 /*
2480 * task_release_locked:
2481 *
2482 * Release a kernel hold on a task.
2483 *
2484 * CONDITIONS: the task is locked and active
2485 */
2486 void
2487 task_release_locked(
2488 task_t task)
2489 {
2490 thread_t thread;
2491
2492 assert(task->active);
2493 assert(task->suspend_count > 0);
2494
2495 if (--task->suspend_count > 0)
2496 return;
2497
2498 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2499 thread_mtx_lock(thread);
2500 thread_release(thread);
2501 thread_mtx_unlock(thread);
2502 }
2503 }
2504
2505 /*
2506 * task_release:
2507 *
2508 * Same as the internal routine above, except that it must lock
2509 * and verify that the task is active.
2510 *
2511 * CONDITIONS: The caller holds a reference to the task
2512 */
2513 kern_return_t
2514 task_release(
2515 task_t task)
2516 {
2517 if (task == TASK_NULL)
2518 return (KERN_INVALID_ARGUMENT);
2519
2520 task_lock(task);
2521
2522 if (!task->active) {
2523 task_unlock(task);
2524
2525 return (KERN_FAILURE);
2526 }
2527
2528 task_release_locked(task);
2529 task_unlock(task);
2530
2531 return (KERN_SUCCESS);
2532 }
2533
2534 kern_return_t
2535 task_threads(
2536 task_t task,
2537 thread_act_array_t *threads_out,
2538 mach_msg_type_number_t *count)
2539 {
2540 mach_msg_type_number_t actual;
2541 thread_t *thread_list;
2542 thread_t thread;
2543 vm_size_t size, size_needed;
2544 void *addr;
2545 unsigned int i, j;
2546
2547 if (task == TASK_NULL)
2548 return (KERN_INVALID_ARGUMENT);
2549
2550 size = 0; addr = NULL;
2551
2552 for (;;) {
2553 task_lock(task);
2554 if (!task->active) {
2555 task_unlock(task);
2556
2557 if (size != 0)
2558 kfree(addr, size);
2559
2560 return (KERN_FAILURE);
2561 }
2562
2563 actual = task->thread_count;
2564
2565 /* do we have the memory we need? */
2566 size_needed = actual * sizeof (mach_port_t);
2567 if (size_needed <= size)
2568 break;
2569
2570 /* unlock the task and allocate more memory */
2571 task_unlock(task);
2572
2573 if (size != 0)
2574 kfree(addr, size);
2575
2576 assert(size_needed > 0);
2577 size = size_needed;
2578
2579 addr = kalloc(size);
2580 if (addr == 0)
2581 return (KERN_RESOURCE_SHORTAGE);
2582 }
2583
2584 /* OK, have memory and the task is locked & active */
2585 thread_list = (thread_t *)addr;
2586
2587 i = j = 0;
2588
2589 for (thread = (thread_t)queue_first(&task->threads); i < actual;
2590 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2591 thread_reference_internal(thread);
2592 thread_list[j++] = thread;
2593 }
2594
2595 assert(queue_end(&task->threads, (queue_entry_t)thread));
2596
2597 actual = j;
2598 size_needed = actual * sizeof (mach_port_t);
2599
2600 /* can unlock task now that we've got the thread refs */
2601 task_unlock(task);
2602
2603 if (actual == 0) {
2604 /* no threads, so return null pointer and deallocate memory */
2605
2606 *threads_out = NULL;
2607 *count = 0;
2608
2609 if (size != 0)
2610 kfree(addr, size);
2611 }
2612 else {
2613 /* if we allocated too much, must copy */
2614
2615 if (size_needed < size) {
2616 void *newaddr;
2617
2618 newaddr = kalloc(size_needed);
2619 if (newaddr == 0) {
2620 for (i = 0; i < actual; ++i)
2621 thread_deallocate(thread_list[i]);
2622 kfree(addr, size);
2623 return (KERN_RESOURCE_SHORTAGE);
2624 }
2625
2626 bcopy(addr, newaddr, size_needed);
2627 kfree(addr, size);
2628 thread_list = (thread_t *)newaddr;
2629 }
2630
2631 *threads_out = thread_list;
2632 *count = actual;
2633
2634 /* do the conversion that Mig should handle */
2635
2636 for (i = 0; i < actual; ++i)
2637 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2638 }
2639
2640 return (KERN_SUCCESS);
2641 }
2642
2643 #define TASK_HOLD_NORMAL 0
2644 #define TASK_HOLD_PIDSUSPEND 1
2645 #define TASK_HOLD_LEGACY 2
2646 #define TASK_HOLD_LEGACY_ALL 3
2647
2648 static kern_return_t
2649 place_task_hold (
2650 task_t task,
2651 int mode)
2652 {
2653 if (!task->active && !task_is_a_corpse(task)) {
2654 return (KERN_FAILURE);
2655 }
2656
2657 /* Return success for corpse task */
2658 if (task_is_a_corpse(task)) {
2659 return KERN_SUCCESS;
2660 }
2661
2662 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2663 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2664 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2665 task->user_stop_count, task->user_stop_count + 1, 0);
2666
2667 #if MACH_ASSERT
2668 current_task()->suspends_outstanding++;
2669 #endif
2670
2671 if (mode == TASK_HOLD_LEGACY)
2672 task->legacy_stop_count++;
2673
2674 if (task->user_stop_count++ > 0) {
2675 /*
2676 * If the stop count was positive, the task is
2677 * already stopped and we can exit.
2678 */
2679 return (KERN_SUCCESS);
2680 }
2681
2682 /*
2683 * Put a kernel-level hold on the threads in the task (all
2684 * user-level task suspensions added together represent a
2685 * single kernel-level hold). We then wait for the threads
2686 * to stop executing user code.
2687 */
2688 task_hold_locked(task);
2689 task_wait_locked(task, FALSE);
2690
2691 return (KERN_SUCCESS);
2692 }
2693
2694 static kern_return_t
2695 release_task_hold (
2696 task_t task,
2697 int mode)
2698 {
2699 boolean_t release = FALSE;
2700
2701 if (!task->active && !task_is_a_corpse(task)) {
2702 return (KERN_FAILURE);
2703 }
2704
2705 /* Return success for corpse task */
2706 if (task_is_a_corpse(task)) {
2707 return KERN_SUCCESS;
2708 }
2709
2710 if (mode == TASK_HOLD_PIDSUSPEND) {
2711 if (task->pidsuspended == FALSE) {
2712 return (KERN_FAILURE);
2713 }
2714 task->pidsuspended = FALSE;
2715 }
2716
2717 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2718
2719 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2720 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2721 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2722 task->user_stop_count, mode, task->legacy_stop_count);
2723
2724 #if MACH_ASSERT
2725 /*
2726 * This is obviously not robust; if we suspend one task and then resume a different one,
2727 * we'll fly under the radar. This is only meant to catch the common case of a crashed
2728 * or buggy suspender.
2729 */
2730 current_task()->suspends_outstanding--;
2731 #endif
2732
2733 if (mode == TASK_HOLD_LEGACY_ALL) {
2734 if (task->legacy_stop_count >= task->user_stop_count) {
2735 task->user_stop_count = 0;
2736 release = TRUE;
2737 } else {
2738 task->user_stop_count -= task->legacy_stop_count;
2739 }
2740 task->legacy_stop_count = 0;
2741 } else {
2742 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2743 task->legacy_stop_count--;
2744 if (--task->user_stop_count == 0)
2745 release = TRUE;
2746 }
2747 }
2748 else {
2749 return (KERN_FAILURE);
2750 }
2751
2752 /*
2753 * Release the task if necessary.
2754 */
2755 if (release)
2756 task_release_locked(task);
2757
2758 return (KERN_SUCCESS);
2759 }
2760
2761
2762 /*
2763 * task_suspend:
2764 *
2765 * Implement an (old-fashioned) user-level suspension on a task.
2766 *
2767 * Because the user isn't expecting to have to manage a suspension
2768 * token, we'll track it for him in the kernel in the form of a naked
2769 * send right to the task's resume port. All such send rights
2770 * account for a single suspension against the task (unlike task_suspend2()
2771 * where each caller gets a unique suspension count represented by a
2772 * unique send-once right).
2773 *
2774 * Conditions:
2775 * The caller holds a reference to the task
2776 */
2777 kern_return_t
2778 task_suspend(
2779 task_t task)
2780 {
2781 kern_return_t kr;
2782 mach_port_t port, send, old_notify;
2783 mach_port_name_t name;
2784
2785 if (task == TASK_NULL || task == kernel_task)
2786 return (KERN_INVALID_ARGUMENT);
2787
2788 task_lock(task);
2789
2790 /*
2791 * Claim a send right on the task resume port, and request a no-senders
2792 * notification on that port (if none outstanding).
2793 */
2794 if (task->itk_resume == IP_NULL) {
2795 task->itk_resume = ipc_port_alloc_kernel();
2796 if (!IP_VALID(task->itk_resume))
2797 panic("failed to create resume port");
2798 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2799 }
2800
2801 port = task->itk_resume;
2802 ip_lock(port);
2803 assert(ip_active(port));
2804
2805 send = ipc_port_make_send_locked(port);
2806 assert(IP_VALID(send));
2807
2808 if (port->ip_nsrequest == IP_NULL) {
2809 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2810 assert(old_notify == IP_NULL);
2811 /* port unlocked */
2812 } else {
2813 ip_unlock(port);
2814 }
2815
2816 /*
2817 * place a legacy hold on the task.
2818 */
2819 kr = place_task_hold(task, TASK_HOLD_LEGACY);
2820 if (kr != KERN_SUCCESS) {
2821 task_unlock(task);
2822 ipc_port_release_send(send);
2823 return kr;
2824 }
2825
2826 task_unlock(task);
2827
2828 /*
2829 * Copyout the send right into the calling task's IPC space. It won't know it is there,
2830 * but we'll look it up when calling a traditional resume. Any IPC operations that
2831 * deallocate the send right will auto-release the suspension.
2832 */
2833 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2834 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2835 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2836 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2837 task_pid(task), kr);
2838 return (kr);
2839 }
2840
2841 return (kr);
2842 }
2843
2844 /*
2845 * task_resume:
2846 * Release a user hold on a task.
2847 *
2848 * Conditions:
2849 * The caller holds a reference to the task
2850 */
2851 kern_return_t
2852 task_resume(
2853 task_t task)
2854 {
2855 kern_return_t kr;
2856 mach_port_name_t resume_port_name;
2857 ipc_entry_t resume_port_entry;
2858 ipc_space_t space = current_task()->itk_space;
2859
2860 if (task == TASK_NULL || task == kernel_task )
2861 return (KERN_INVALID_ARGUMENT);
2862
2863 /* release a legacy task hold */
2864 task_lock(task);
2865 kr = release_task_hold(task, TASK_HOLD_LEGACY);
2866 task_unlock(task);
2867
2868 is_write_lock(space);
2869 if (is_active(space) && IP_VALID(task->itk_resume) &&
2870 ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
2871 /*
2872 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
2873 * we are holding one less legacy hold on the task from this caller. If the release failed,
2874 * go ahead and drop all the rights, as someone either already released our holds or the task
2875 * is gone.
2876 */
2877 if (kr == KERN_SUCCESS)
2878 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
2879 else
2880 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
2881 /* space unlocked */
2882 } else {
2883 is_write_unlock(space);
2884 if (kr == KERN_SUCCESS)
2885 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
2886 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2887 task_pid(task));
2888 }
2889
2890 return kr;
2891 }
2892
2893 /*
2894 * Suspend the target task.
2895 * Making/holding a token/reference/port is the callers responsibility.
2896 */
2897 kern_return_t
2898 task_suspend_internal(task_t task)
2899 {
2900 kern_return_t kr;
2901
2902 if (task == TASK_NULL || task == kernel_task)
2903 return (KERN_INVALID_ARGUMENT);
2904
2905 task_lock(task);
2906 kr = place_task_hold(task, TASK_HOLD_NORMAL);
2907 task_unlock(task);
2908 return (kr);
2909 }
2910
2911 /*
2912 * Suspend the target task, and return a suspension token. The token
2913 * represents a reference on the suspended task.
2914 */
2915 kern_return_t
2916 task_suspend2(
2917 task_t task,
2918 task_suspension_token_t *suspend_token)
2919 {
2920 kern_return_t kr;
2921
2922 kr = task_suspend_internal(task);
2923 if (kr != KERN_SUCCESS) {
2924 *suspend_token = TASK_NULL;
2925 return (kr);
2926 }
2927
2928 /*
2929 * Take a reference on the target task and return that to the caller
2930 * as a "suspension token," which can be converted into an SO right to
2931 * the now-suspended task's resume port.
2932 */
2933 task_reference_internal(task);
2934 *suspend_token = task;
2935
2936 return (KERN_SUCCESS);
2937 }
2938
2939 /*
2940 * Resume the task
2941 * (reference/token/port management is caller's responsibility).
2942 */
2943 kern_return_t
2944 task_resume_internal(
2945 task_suspension_token_t task)
2946 {
2947 kern_return_t kr;
2948
2949 if (task == TASK_NULL || task == kernel_task)
2950 return (KERN_INVALID_ARGUMENT);
2951
2952 task_lock(task);
2953 kr = release_task_hold(task, TASK_HOLD_NORMAL);
2954 task_unlock(task);
2955 return (kr);
2956 }
2957
2958 /*
2959 * Resume the task using a suspension token. Consumes the token's ref.
2960 */
2961 kern_return_t
2962 task_resume2(
2963 task_suspension_token_t task)
2964 {
2965 kern_return_t kr;
2966
2967 kr = task_resume_internal(task);
2968 task_suspension_token_deallocate(task);
2969
2970 return (kr);
2971 }
2972
2973 boolean_t
2974 task_suspension_notify(mach_msg_header_t *request_header)
2975 {
2976 ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
2977 task_t task = convert_port_to_task_suspension_token(port);
2978 mach_msg_type_number_t not_count;
2979
2980 if (task == TASK_NULL || task == kernel_task)
2981 return TRUE; /* nothing to do */
2982
2983 switch (request_header->msgh_id) {
2984
2985 case MACH_NOTIFY_SEND_ONCE:
2986 /* release the hold held by this specific send-once right */
2987 task_lock(task);
2988 release_task_hold(task, TASK_HOLD_NORMAL);
2989 task_unlock(task);
2990 break;
2991
2992 case MACH_NOTIFY_NO_SENDERS:
2993 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
2994
2995 task_lock(task);
2996 ip_lock(port);
2997 if (port->ip_mscount == not_count) {
2998
2999 /* release all the [remaining] outstanding legacy holds */
3000 assert(port->ip_nsrequest == IP_NULL);
3001 ip_unlock(port);
3002 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3003 task_unlock(task);
3004
3005 } else if (port->ip_nsrequest == IP_NULL) {
3006 ipc_port_t old_notify;
3007
3008 task_unlock(task);
3009 /* new send rights, re-arm notification at current make-send count */
3010 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3011 assert(old_notify == IP_NULL);
3012 /* port unlocked */
3013 } else {
3014 ip_unlock(port);
3015 task_unlock(task);
3016 }
3017 break;
3018
3019 default:
3020 break;
3021 }
3022
3023 task_suspension_token_deallocate(task); /* drop token reference */
3024 return TRUE;
3025 }
3026
3027 kern_return_t
3028 task_pidsuspend_locked(task_t task)
3029 {
3030 kern_return_t kr;
3031
3032 if (task->pidsuspended) {
3033 kr = KERN_FAILURE;
3034 goto out;
3035 }
3036
3037 task->pidsuspended = TRUE;
3038
3039 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3040 if (kr != KERN_SUCCESS) {
3041 task->pidsuspended = FALSE;
3042 }
3043 out:
3044 return(kr);
3045 }
3046
3047
3048 /*
3049 * task_pidsuspend:
3050 *
3051 * Suspends a task by placing a hold on its threads.
3052 *
3053 * Conditions:
3054 * The caller holds a reference to the task
3055 */
3056 kern_return_t
3057 task_pidsuspend(
3058 task_t task)
3059 {
3060 kern_return_t kr;
3061
3062 if (task == TASK_NULL || task == kernel_task)
3063 return (KERN_INVALID_ARGUMENT);
3064
3065 task_lock(task);
3066
3067 kr = task_pidsuspend_locked(task);
3068
3069 task_unlock(task);
3070
3071 return (kr);
3072 }
3073
3074 /*
3075 * task_pidresume:
3076 * Resumes a previously suspended task.
3077 *
3078 * Conditions:
3079 * The caller holds a reference to the task
3080 */
3081 kern_return_t
3082 task_pidresume(
3083 task_t task)
3084 {
3085 kern_return_t kr;
3086
3087 if (task == TASK_NULL || task == kernel_task)
3088 return (KERN_INVALID_ARGUMENT);
3089
3090 task_lock(task);
3091
3092 #if CONFIG_FREEZE
3093
3094 while (task->changing_freeze_state) {
3095
3096 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3097 task_unlock(task);
3098 thread_block(THREAD_CONTINUE_NULL);
3099
3100 task_lock(task);
3101 }
3102 task->changing_freeze_state = TRUE;
3103 #endif
3104
3105 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3106
3107 task_unlock(task);
3108
3109 #if CONFIG_FREEZE
3110
3111 task_lock(task);
3112
3113 if (kr == KERN_SUCCESS)
3114 task->frozen = FALSE;
3115 task->changing_freeze_state = FALSE;
3116 thread_wakeup(&task->changing_freeze_state);
3117
3118 task_unlock(task);
3119 #endif
3120
3121 return (kr);
3122 }
3123
3124
3125 #if DEVELOPMENT || DEBUG
3126
3127 extern void IOSleep(int);
3128
3129 kern_return_t
3130 task_disconnect_page_mappings(task_t task)
3131 {
3132 int n;
3133
3134 if (task == TASK_NULL || task == kernel_task)
3135 return (KERN_INVALID_ARGUMENT);
3136
3137 /*
3138 * this function is used to strip all of the mappings from
3139 * the pmap for the specified task to force the task to
3140 * re-fault all of the pages it is actively using... this
3141 * allows us to approximate the true working set of the
3142 * specified task. We only engage if at least 1 of the
3143 * threads in the task is runnable, but we want to continuously
3144 * sweep (at least for a while - I've arbitrarily set the limit at
3145 * 100 sweeps to be re-looked at as we gain experience) to get a better
3146 * view into what areas within a page are being visited (as opposed to only
3147 * seeing the first fault of a page after the task becomes
3148 * runnable)... in the future I may
3149 * try to block until awakened by a thread in this task
3150 * being made runnable, but for now we'll periodically poll from the
3151 * user level debug tool driving the sysctl
3152 */
3153 for (n = 0; n < 100; n++) {
3154 thread_t thread;
3155 boolean_t runnable;
3156 boolean_t do_unnest;
3157 int page_count;
3158
3159 runnable = FALSE;
3160 do_unnest = FALSE;
3161
3162 task_lock(task);
3163
3164 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3165
3166 if (thread->state & TH_RUN) {
3167 runnable = TRUE;
3168 break;
3169 }
3170 }
3171 if (n == 0)
3172 task->task_disconnected_count++;
3173
3174 if (task->task_unnested == FALSE) {
3175 if (runnable == TRUE) {
3176 task->task_unnested = TRUE;
3177 do_unnest = TRUE;
3178 }
3179 }
3180 task_unlock(task);
3181
3182 if (runnable == FALSE)
3183 break;
3184
3185 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
3186 task, do_unnest, task->task_disconnected_count, 0, 0);
3187
3188 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
3189
3190 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
3191 task, page_count, 0, 0, 0);
3192
3193 if ((n % 5) == 4)
3194 IOSleep(1);
3195 }
3196 return (KERN_SUCCESS);
3197 }
3198
3199 #endif
3200
3201
3202 #if CONFIG_FREEZE
3203
3204 /*
3205 * task_freeze:
3206 *
3207 * Freeze a task.
3208 *
3209 * Conditions:
3210 * The caller holds a reference to the task
3211 */
3212 extern void vm_wake_compactor_swapper();
3213 extern queue_head_t c_swapout_list_head;
3214
3215 kern_return_t
3216 task_freeze(
3217 task_t task,
3218 uint32_t *purgeable_count,
3219 uint32_t *wired_count,
3220 uint32_t *clean_count,
3221 uint32_t *dirty_count,
3222 uint32_t dirty_budget,
3223 boolean_t *shared,
3224 boolean_t walk_only)
3225 {
3226 kern_return_t kr = KERN_SUCCESS;
3227
3228 if (task == TASK_NULL || task == kernel_task)
3229 return (KERN_INVALID_ARGUMENT);
3230
3231 task_lock(task);
3232
3233 while (task->changing_freeze_state) {
3234
3235 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3236 task_unlock(task);
3237 thread_block(THREAD_CONTINUE_NULL);
3238
3239 task_lock(task);
3240 }
3241 if (task->frozen) {
3242 task_unlock(task);
3243 return (KERN_FAILURE);
3244 }
3245 task->changing_freeze_state = TRUE;
3246
3247 task_unlock(task);
3248
3249 if (walk_only) {
3250 panic("task_freeze - walk_only == TRUE");
3251 } else {
3252 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
3253 }
3254
3255 task_lock(task);
3256
3257 if (walk_only == FALSE && kr == KERN_SUCCESS)
3258 task->frozen = TRUE;
3259 task->changing_freeze_state = FALSE;
3260 thread_wakeup(&task->changing_freeze_state);
3261
3262 task_unlock(task);
3263
3264 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3265 vm_wake_compactor_swapper();
3266 /*
3267 * We do an explicit wakeup of the swapout thread here
3268 * because the compact_and_swap routines don't have
3269 * knowledge about these kind of "per-task packed c_segs"
3270 * and so will not be evaluating whether we need to do
3271 * a wakeup there.
3272 */
3273 thread_wakeup((event_t)&c_swapout_list_head);
3274 }
3275
3276 return (kr);
3277 }
3278
3279 /*
3280 * task_thaw:
3281 *
3282 * Thaw a currently frozen task.
3283 *
3284 * Conditions:
3285 * The caller holds a reference to the task
3286 */
3287 kern_return_t
3288 task_thaw(
3289 task_t task)
3290 {
3291 if (task == TASK_NULL || task == kernel_task)
3292 return (KERN_INVALID_ARGUMENT);
3293
3294 task_lock(task);
3295
3296 while (task->changing_freeze_state) {
3297
3298 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3299 task_unlock(task);
3300 thread_block(THREAD_CONTINUE_NULL);
3301
3302 task_lock(task);
3303 }
3304 if (!task->frozen) {
3305 task_unlock(task);
3306 return (KERN_FAILURE);
3307 }
3308 task->frozen = FALSE;
3309
3310 task_unlock(task);
3311
3312 return (KERN_SUCCESS);
3313 }
3314
3315 #endif /* CONFIG_FREEZE */
3316
3317 kern_return_t
3318 host_security_set_task_token(
3319 host_security_t host_security,
3320 task_t task,
3321 security_token_t sec_token,
3322 audit_token_t audit_token,
3323 host_priv_t host_priv)
3324 {
3325 ipc_port_t host_port;
3326 kern_return_t kr;
3327
3328 if (task == TASK_NULL)
3329 return(KERN_INVALID_ARGUMENT);
3330
3331 if (host_security == HOST_NULL)
3332 return(KERN_INVALID_SECURITY);
3333
3334 task_lock(task);
3335 task->sec_token = sec_token;
3336 task->audit_token = audit_token;
3337
3338 task_unlock(task);
3339
3340 if (host_priv != HOST_PRIV_NULL) {
3341 kr = host_get_host_priv_port(host_priv, &host_port);
3342 } else {
3343 kr = host_get_host_port(host_priv_self(), &host_port);
3344 }
3345 assert(kr == KERN_SUCCESS);
3346 kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
3347 return(kr);
3348 }
3349
3350 kern_return_t
3351 task_send_trace_memory(
3352 task_t target_task,
3353 __unused uint32_t pid,
3354 __unused uint64_t uniqueid)
3355 {
3356 kern_return_t kr = KERN_INVALID_ARGUMENT;
3357 if (target_task == TASK_NULL)
3358 return (KERN_INVALID_ARGUMENT);
3359
3360 #if CONFIG_ATM
3361 kr = atm_send_proc_inspect_notification(target_task,
3362 pid,
3363 uniqueid);
3364
3365 #endif
3366 return (kr);
3367 }
3368 /*
3369 * This routine was added, pretty much exclusively, for registering the
3370 * RPC glue vector for in-kernel short circuited tasks. Rather than
3371 * removing it completely, I have only disabled that feature (which was
3372 * the only feature at the time). It just appears that we are going to
3373 * want to add some user data to tasks in the future (i.e. bsd info,
3374 * task names, etc...), so I left it in the formal task interface.
3375 */
3376 kern_return_t
3377 task_set_info(
3378 task_t task,
3379 task_flavor_t flavor,
3380 __unused task_info_t task_info_in, /* pointer to IN array */
3381 __unused mach_msg_type_number_t task_info_count)
3382 {
3383 if (task == TASK_NULL)
3384 return(KERN_INVALID_ARGUMENT);
3385
3386 switch (flavor) {
3387
3388 #if CONFIG_ATM
3389 case TASK_TRACE_MEMORY_INFO:
3390 {
3391 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
3392 return (KERN_INVALID_ARGUMENT);
3393
3394 assert(task_info_in != NULL);
3395 task_trace_memory_info_t mem_info;
3396 mem_info = (task_trace_memory_info_t) task_info_in;
3397 kern_return_t kr = atm_register_trace_memory(task,
3398 mem_info->user_memory_address,
3399 mem_info->buffer_size);
3400 return kr;
3401 }
3402
3403 #endif
3404 default:
3405 return (KERN_INVALID_ARGUMENT);
3406 }
3407 return (KERN_SUCCESS);
3408 }
3409
3410 int radar_20146450 = 1;
3411 kern_return_t
3412 task_info(
3413 task_t task,
3414 task_flavor_t flavor,
3415 task_info_t task_info_out,
3416 mach_msg_type_number_t *task_info_count)
3417 {
3418 kern_return_t error = KERN_SUCCESS;
3419 mach_msg_type_number_t original_task_info_count;
3420
3421 if (task == TASK_NULL)
3422 return (KERN_INVALID_ARGUMENT);
3423
3424 original_task_info_count = *task_info_count;
3425 task_lock(task);
3426
3427 if ((task != current_task()) && (!task->active)) {
3428 task_unlock(task);
3429 return (KERN_INVALID_ARGUMENT);
3430 }
3431
3432 switch (flavor) {
3433
3434 case TASK_BASIC_INFO_32:
3435 case TASK_BASIC2_INFO_32:
3436 {
3437 task_basic_info_32_t basic_info;
3438 vm_map_t map;
3439 clock_sec_t secs;
3440 clock_usec_t usecs;
3441
3442 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
3443 error = KERN_INVALID_ARGUMENT;
3444 break;
3445 }
3446
3447 basic_info = (task_basic_info_32_t)task_info_out;
3448
3449 map = (task == kernel_task)? kernel_map: task->map;
3450 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
3451 if (flavor == TASK_BASIC2_INFO_32) {
3452 /*
3453 * The "BASIC2" flavor gets the maximum resident
3454 * size instead of the current resident size...
3455 */
3456 basic_info->resident_size = pmap_resident_max(map->pmap);
3457 } else {
3458 basic_info->resident_size = pmap_resident_count(map->pmap);
3459 }
3460 basic_info->resident_size *= PAGE_SIZE;
3461
3462 basic_info->policy = ((task != kernel_task)?
3463 POLICY_TIMESHARE: POLICY_RR);
3464 basic_info->suspend_count = task->user_stop_count;
3465
3466 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3467 basic_info->user_time.seconds =
3468 (typeof(basic_info->user_time.seconds))secs;
3469 basic_info->user_time.microseconds = usecs;
3470
3471 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3472 basic_info->system_time.seconds =
3473 (typeof(basic_info->system_time.seconds))secs;
3474 basic_info->system_time.microseconds = usecs;
3475
3476 *task_info_count = TASK_BASIC_INFO_32_COUNT;
3477 break;
3478 }
3479
3480 case TASK_BASIC_INFO_64:
3481 {
3482 task_basic_info_64_t basic_info;
3483 vm_map_t map;
3484 clock_sec_t secs;
3485 clock_usec_t usecs;
3486
3487 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
3488 error = KERN_INVALID_ARGUMENT;
3489 break;
3490 }
3491
3492 basic_info = (task_basic_info_64_t)task_info_out;
3493
3494 map = (task == kernel_task)? kernel_map: task->map;
3495 basic_info->virtual_size = map->size;
3496 basic_info->resident_size =
3497 (mach_vm_size_t)(pmap_resident_count(map->pmap))
3498 * PAGE_SIZE_64;
3499
3500 basic_info->policy = ((task != kernel_task)?
3501 POLICY_TIMESHARE: POLICY_RR);
3502 basic_info->suspend_count = task->user_stop_count;
3503
3504 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3505 basic_info->user_time.seconds =
3506 (typeof(basic_info->user_time.seconds))secs;
3507 basic_info->user_time.microseconds = usecs;
3508
3509 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3510 basic_info->system_time.seconds =
3511 (typeof(basic_info->system_time.seconds))secs;
3512 basic_info->system_time.microseconds = usecs;
3513
3514 *task_info_count = TASK_BASIC_INFO_64_COUNT;
3515 break;
3516 }
3517
3518 case MACH_TASK_BASIC_INFO:
3519 {
3520 mach_task_basic_info_t basic_info;
3521 vm_map_t map;
3522 clock_sec_t secs;
3523 clock_usec_t usecs;
3524
3525 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
3526 error = KERN_INVALID_ARGUMENT;
3527 break;
3528 }
3529
3530 basic_info = (mach_task_basic_info_t)task_info_out;
3531
3532 map = (task == kernel_task) ? kernel_map : task->map;
3533
3534 basic_info->virtual_size = map->size;
3535
3536 basic_info->resident_size =
3537 (mach_vm_size_t)(pmap_resident_count(map->pmap));
3538 basic_info->resident_size *= PAGE_SIZE_64;
3539
3540 basic_info->resident_size_max =
3541 (mach_vm_size_t)(pmap_resident_max(map->pmap));
3542 basic_info->resident_size_max *= PAGE_SIZE_64;
3543
3544 basic_info->policy = ((task != kernel_task) ?
3545 POLICY_TIMESHARE : POLICY_RR);
3546
3547 basic_info->suspend_count = task->user_stop_count;
3548
3549 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3550 basic_info->user_time.seconds =
3551 (typeof(basic_info->user_time.seconds))secs;
3552 basic_info->user_time.microseconds = usecs;
3553
3554 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3555 basic_info->system_time.seconds =
3556 (typeof(basic_info->system_time.seconds))secs;
3557 basic_info->system_time.microseconds = usecs;
3558
3559 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
3560 break;
3561 }
3562
3563 case TASK_THREAD_TIMES_INFO:
3564 {
3565 task_thread_times_info_t times_info;
3566 thread_t thread;
3567
3568 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
3569 error = KERN_INVALID_ARGUMENT;
3570 break;
3571 }
3572
3573 times_info = (task_thread_times_info_t) task_info_out;
3574 times_info->user_time.seconds = 0;
3575 times_info->user_time.microseconds = 0;
3576 times_info->system_time.seconds = 0;
3577 times_info->system_time.microseconds = 0;
3578
3579
3580 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3581 time_value_t user_time, system_time;
3582
3583 if (thread->options & TH_OPT_IDLE_THREAD)
3584 continue;
3585
3586 thread_read_times(thread, &user_time, &system_time);
3587
3588 time_value_add(&times_info->user_time, &user_time);
3589 time_value_add(&times_info->system_time, &system_time);
3590 }
3591
3592 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
3593 break;
3594 }
3595
3596 case TASK_ABSOLUTETIME_INFO:
3597 {
3598 task_absolutetime_info_t info;
3599 thread_t thread;
3600
3601 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
3602 error = KERN_INVALID_ARGUMENT;
3603 break;
3604 }
3605
3606 info = (task_absolutetime_info_t)task_info_out;
3607 info->threads_user = info->threads_system = 0;
3608
3609
3610 info->total_user = task->total_user_time;
3611 info->total_system = task->total_system_time;
3612
3613 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3614 uint64_t tval;
3615 spl_t x;
3616
3617 if (thread->options & TH_OPT_IDLE_THREAD)
3618 continue;
3619
3620 x = splsched();
3621 thread_lock(thread);
3622
3623 tval = timer_grab(&thread->user_timer);
3624 info->threads_user += tval;
3625 info->total_user += tval;
3626
3627 tval = timer_grab(&thread->system_timer);
3628 if (thread->precise_user_kernel_time) {
3629 info->threads_system += tval;
3630 info->total_system += tval;
3631 } else {
3632 /* system_timer may represent either sys or user */
3633 info->threads_user += tval;
3634 info->total_user += tval;
3635 }
3636
3637 thread_unlock(thread);
3638 splx(x);
3639 }
3640
3641
3642 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3643 break;
3644 }
3645
3646 case TASK_DYLD_INFO:
3647 {
3648 task_dyld_info_t info;
3649
3650 /*
3651 * We added the format field to TASK_DYLD_INFO output. For
3652 * temporary backward compatibility, accept the fact that
3653 * clients may ask for the old version - distinquished by the
3654 * size of the expected result structure.
3655 */
3656 #define TASK_LEGACY_DYLD_INFO_COUNT \
3657 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3658
3659 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3660 error = KERN_INVALID_ARGUMENT;
3661 break;
3662 }
3663
3664 info = (task_dyld_info_t)task_info_out;
3665 info->all_image_info_addr = task->all_image_info_addr;
3666 info->all_image_info_size = task->all_image_info_size;
3667
3668 /* only set format on output for those expecting it */
3669 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3670 info->all_image_info_format = task_has_64BitAddr(task) ?
3671 TASK_DYLD_ALL_IMAGE_INFO_64 :
3672 TASK_DYLD_ALL_IMAGE_INFO_32 ;
3673 *task_info_count = TASK_DYLD_INFO_COUNT;
3674 } else {
3675 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3676 }
3677 break;
3678 }
3679
3680 case TASK_EXTMOD_INFO:
3681 {
3682 task_extmod_info_t info;
3683 void *p;
3684
3685 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3686 error = KERN_INVALID_ARGUMENT;
3687 break;
3688 }
3689
3690 info = (task_extmod_info_t)task_info_out;
3691
3692 p = get_bsdtask_info(task);
3693 if (p) {
3694 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3695 } else {
3696 bzero(info->task_uuid, sizeof(info->task_uuid));
3697 }
3698 info->extmod_statistics = task->extmod_statistics;
3699 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3700
3701 break;
3702 }
3703
3704 case TASK_KERNELMEMORY_INFO:
3705 {
3706 task_kernelmemory_info_t tkm_info;
3707 ledger_amount_t credit, debit;
3708
3709 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3710 error = KERN_INVALID_ARGUMENT;
3711 break;
3712 }
3713
3714 tkm_info = (task_kernelmemory_info_t) task_info_out;
3715 tkm_info->total_palloc = 0;
3716 tkm_info->total_pfree = 0;
3717 tkm_info->total_salloc = 0;
3718 tkm_info->total_sfree = 0;
3719
3720 if (task == kernel_task) {
3721 /*
3722 * All shared allocs/frees from other tasks count against
3723 * the kernel private memory usage. If we are looking up
3724 * info for the kernel task, gather from everywhere.
3725 */
3726 task_unlock(task);
3727
3728 /* start by accounting for all the terminated tasks against the kernel */
3729 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3730 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3731
3732 /* count all other task/thread shared alloc/free against the kernel */
3733 lck_mtx_lock(&tasks_threads_lock);
3734
3735 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3736 queue_iterate(&tasks, task, task_t, tasks) {
3737 if (task == kernel_task) {
3738 if (ledger_get_entries(task->ledger,
3739 task_ledgers.tkm_private, &credit,
3740 &debit) == KERN_SUCCESS) {
3741 tkm_info->total_palloc += credit;
3742 tkm_info->total_pfree += debit;
3743 }
3744 }
3745 if (!ledger_get_entries(task->ledger,
3746 task_ledgers.tkm_shared, &credit, &debit)) {
3747 tkm_info->total_palloc += credit;
3748 tkm_info->total_pfree += debit;
3749 }
3750 }
3751 lck_mtx_unlock(&tasks_threads_lock);
3752 } else {
3753 if (!ledger_get_entries(task->ledger,
3754 task_ledgers.tkm_private, &credit, &debit)) {
3755 tkm_info->total_palloc = credit;
3756 tkm_info->total_pfree = debit;
3757 }
3758 if (!ledger_get_entries(task->ledger,
3759 task_ledgers.tkm_shared, &credit, &debit)) {
3760 tkm_info->total_salloc = credit;
3761 tkm_info->total_sfree = debit;
3762 }
3763 task_unlock(task);
3764 }
3765
3766 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3767 return KERN_SUCCESS;
3768 }
3769
3770 /* OBSOLETE */
3771 case TASK_SCHED_FIFO_INFO:
3772 {
3773
3774 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3775 error = KERN_INVALID_ARGUMENT;
3776 break;
3777 }
3778
3779 error = KERN_INVALID_POLICY;
3780 break;
3781 }
3782
3783 /* OBSOLETE */
3784 case TASK_SCHED_RR_INFO:
3785 {
3786 policy_rr_base_t rr_base;
3787 uint32_t quantum_time;
3788 uint64_t quantum_ns;
3789
3790 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3791 error = KERN_INVALID_ARGUMENT;
3792 break;
3793 }
3794
3795 rr_base = (policy_rr_base_t) task_info_out;
3796
3797 if (task != kernel_task) {
3798 error = KERN_INVALID_POLICY;
3799 break;
3800 }
3801
3802 rr_base->base_priority = task->priority;
3803
3804 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3805 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3806
3807 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3808
3809 *task_info_count = POLICY_RR_BASE_COUNT;
3810 break;
3811 }
3812
3813 /* OBSOLETE */
3814 case TASK_SCHED_TIMESHARE_INFO:
3815 {
3816 policy_timeshare_base_t ts_base;
3817
3818 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3819 error = KERN_INVALID_ARGUMENT;
3820 break;
3821 }
3822
3823 ts_base = (policy_timeshare_base_t) task_info_out;
3824
3825 if (task == kernel_task) {
3826 error = KERN_INVALID_POLICY;
3827 break;
3828 }
3829
3830 ts_base->base_priority = task->priority;
3831
3832 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
3833 break;
3834 }
3835
3836 case TASK_SECURITY_TOKEN:
3837 {
3838 security_token_t *sec_token_p;
3839
3840 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
3841 error = KERN_INVALID_ARGUMENT;
3842 break;
3843 }
3844
3845 sec_token_p = (security_token_t *) task_info_out;
3846
3847 *sec_token_p = task->sec_token;
3848
3849 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
3850 break;
3851 }
3852
3853 case TASK_AUDIT_TOKEN:
3854 {
3855 audit_token_t *audit_token_p;
3856
3857 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
3858 error = KERN_INVALID_ARGUMENT;
3859 break;
3860 }
3861
3862 audit_token_p = (audit_token_t *) task_info_out;
3863
3864 *audit_token_p = task->audit_token;
3865
3866 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
3867 break;
3868 }
3869
3870 case TASK_SCHED_INFO:
3871 error = KERN_INVALID_ARGUMENT;
3872 break;
3873
3874 case TASK_EVENTS_INFO:
3875 {
3876 task_events_info_t events_info;
3877 thread_t thread;
3878
3879 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
3880 error = KERN_INVALID_ARGUMENT;
3881 break;
3882 }
3883
3884 events_info = (task_events_info_t) task_info_out;
3885
3886
3887 events_info->faults = task->faults;
3888 events_info->pageins = task->pageins;
3889 events_info->cow_faults = task->cow_faults;
3890 events_info->messages_sent = task->messages_sent;
3891 events_info->messages_received = task->messages_received;
3892 events_info->syscalls_mach = task->syscalls_mach;
3893 events_info->syscalls_unix = task->syscalls_unix;
3894
3895 events_info->csw = task->c_switch;
3896
3897 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3898 events_info->csw += thread->c_switch;
3899 events_info->syscalls_mach += thread->syscalls_mach;
3900 events_info->syscalls_unix += thread->syscalls_unix;
3901 }
3902
3903
3904 *task_info_count = TASK_EVENTS_INFO_COUNT;
3905 break;
3906 }
3907 case TASK_AFFINITY_TAG_INFO:
3908 {
3909 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
3910 error = KERN_INVALID_ARGUMENT;
3911 break;
3912 }
3913
3914 error = task_affinity_info(task, task_info_out, task_info_count);
3915 break;
3916 }
3917 case TASK_POWER_INFO:
3918 {
3919 if (*task_info_count < TASK_POWER_INFO_COUNT) {
3920 error = KERN_INVALID_ARGUMENT;
3921 break;
3922 }
3923
3924 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL);
3925 break;
3926 }
3927
3928 case TASK_POWER_INFO_V2:
3929 {
3930 if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
3931 error = KERN_INVALID_ARGUMENT;
3932 break;
3933 }
3934 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
3935
3936 uint64_t *task_energy = NULL;
3937 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, task_energy);
3938 break;
3939 }
3940
3941 case TASK_VM_INFO:
3942 case TASK_VM_INFO_PURGEABLE:
3943 {
3944 task_vm_info_t vm_info;
3945 vm_map_t map;
3946
3947 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
3948 error = KERN_INVALID_ARGUMENT;
3949 break;
3950 }
3951
3952 vm_info = (task_vm_info_t)task_info_out;
3953
3954 if (task == kernel_task) {
3955 map = kernel_map;
3956 /* no lock */
3957 } else {
3958 map = task->map;
3959 vm_map_lock_read(map);
3960 }
3961
3962 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
3963 vm_info->region_count = map->hdr.nentries;
3964 vm_info->page_size = vm_map_page_size(map);
3965
3966 vm_info->resident_size = pmap_resident_count(map->pmap);
3967 vm_info->resident_size *= PAGE_SIZE;
3968 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
3969 vm_info->resident_size_peak *= PAGE_SIZE;
3970
3971 #define _VM_INFO(_name) \
3972 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
3973
3974 _VM_INFO(device);
3975 _VM_INFO(device_peak);
3976 _VM_INFO(external);
3977 _VM_INFO(external_peak);
3978 _VM_INFO(internal);
3979 _VM_INFO(internal_peak);
3980 _VM_INFO(reusable);
3981 _VM_INFO(reusable_peak);
3982 _VM_INFO(compressed);
3983 _VM_INFO(compressed_peak);
3984 _VM_INFO(compressed_lifetime);
3985
3986 vm_info->purgeable_volatile_pmap = 0;
3987 vm_info->purgeable_volatile_resident = 0;
3988 vm_info->purgeable_volatile_virtual = 0;
3989 if (task == kernel_task) {
3990 /*
3991 * We do not maintain the detailed stats for the
3992 * kernel_pmap, so just count everything as
3993 * "internal"...
3994 */
3995 vm_info->internal = vm_info->resident_size;
3996 /*
3997 * ... but since the memory held by the VM compressor
3998 * in the kernel address space ought to be attributed
3999 * to user-space tasks, we subtract it from "internal"
4000 * to give memory reporting tools a more accurate idea
4001 * of what the kernel itself is actually using, instead
4002 * of making it look like the kernel is leaking memory
4003 * when the system is under memory pressure.
4004 */
4005 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
4006 PAGE_SIZE);
4007 } else {
4008 mach_vm_size_t volatile_virtual_size;
4009 mach_vm_size_t volatile_resident_size;
4010 mach_vm_size_t volatile_compressed_size;
4011 mach_vm_size_t volatile_pmap_size;
4012 mach_vm_size_t volatile_compressed_pmap_size;
4013 kern_return_t kr;
4014
4015 if (flavor == TASK_VM_INFO_PURGEABLE) {
4016 kr = vm_map_query_volatile(
4017 map,
4018 &volatile_virtual_size,
4019 &volatile_resident_size,
4020 &volatile_compressed_size,
4021 &volatile_pmap_size,
4022 &volatile_compressed_pmap_size);
4023 if (kr == KERN_SUCCESS) {
4024 vm_info->purgeable_volatile_pmap =
4025 volatile_pmap_size;
4026 if (radar_20146450) {
4027 vm_info->compressed -=
4028 volatile_compressed_pmap_size;
4029 }
4030 vm_info->purgeable_volatile_resident =
4031 volatile_resident_size;
4032 vm_info->purgeable_volatile_virtual =
4033 volatile_virtual_size;
4034 }
4035 }
4036 }
4037 *task_info_count = TASK_VM_INFO_REV0_COUNT;
4038
4039 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
4040 vm_info->phys_footprint =
4041 (mach_vm_size_t) get_task_phys_footprint(task);
4042 *task_info_count = TASK_VM_INFO_REV1_COUNT;
4043 }
4044 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
4045 vm_info->min_address = map->min_offset;
4046 vm_info->max_address = map->max_offset;
4047 *task_info_count = TASK_VM_INFO_REV2_COUNT;
4048 }
4049
4050 if (task != kernel_task) {
4051 vm_map_unlock_read(map);
4052 }
4053
4054 break;
4055 }
4056
4057 case TASK_WAIT_STATE_INFO:
4058 {
4059 /*
4060 * Deprecated flavor. Currently allowing some results until all users
4061 * stop calling it. The results may not be accurate.
4062 */
4063 task_wait_state_info_t wait_state_info;
4064 uint64_t total_sfi_ledger_val = 0;
4065
4066 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
4067 error = KERN_INVALID_ARGUMENT;
4068 break;
4069 }
4070
4071 wait_state_info = (task_wait_state_info_t) task_info_out;
4072
4073 wait_state_info->total_wait_state_time = 0;
4074 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
4075
4076 #if CONFIG_SCHED_SFI
4077 int i, prev_lentry = -1;
4078 int64_t val_credit, val_debit;
4079
4080 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
4081 val_credit =0;
4082 /*
4083 * checking with prev_lentry != entry ensures adjacent classes
4084 * which share the same ledger do not add wait times twice.
4085 * Note: Use ledger() call to get data for each individual sfi class.
4086 */
4087 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
4088 KERN_SUCCESS == ledger_get_entries(task->ledger,
4089 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
4090 total_sfi_ledger_val += val_credit;
4091 }
4092 prev_lentry = task_ledgers.sfi_wait_times[i];
4093 }
4094
4095 #endif /* CONFIG_SCHED_SFI */
4096 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
4097 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
4098
4099 break;
4100 }
4101 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
4102 {
4103 #if DEVELOPMENT || DEBUG
4104 pvm_account_info_t acnt_info;
4105
4106 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
4107 error = KERN_INVALID_ARGUMENT;
4108 break;
4109 }
4110
4111 if (task_info_out == NULL) {
4112 error = KERN_INVALID_ARGUMENT;
4113 break;
4114 }
4115
4116 acnt_info = (pvm_account_info_t) task_info_out;
4117
4118 error = vm_purgeable_account(task, acnt_info);
4119
4120 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
4121
4122 break;
4123 #else /* DEVELOPMENT || DEBUG */
4124 error = KERN_NOT_SUPPORTED;
4125 break;
4126 #endif /* DEVELOPMENT || DEBUG */
4127 }
4128 case TASK_FLAGS_INFO:
4129 {
4130 task_flags_info_t flags_info;
4131
4132 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
4133 error = KERN_INVALID_ARGUMENT;
4134 break;
4135 }
4136
4137 flags_info = (task_flags_info_t)task_info_out;
4138
4139 /* only publish the 64-bit flag of the task */
4140 flags_info->flags = task->t_flags & TF_64B_ADDR;
4141
4142 *task_info_count = TASK_FLAGS_INFO_COUNT;
4143 break;
4144 }
4145
4146 case TASK_DEBUG_INFO_INTERNAL:
4147 {
4148 #if DEVELOPMENT || DEBUG
4149 task_debug_info_internal_t dbg_info;
4150 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
4151 error = KERN_NOT_SUPPORTED;
4152 break;
4153 }
4154
4155 if (task_info_out == NULL) {
4156 error = KERN_INVALID_ARGUMENT;
4157 break;
4158 }
4159 dbg_info = (task_debug_info_internal_t) task_info_out;
4160 dbg_info->ipc_space_size = 0;
4161 if (task->itk_space){
4162 dbg_info->ipc_space_size = task->itk_space->is_table_size;
4163 }
4164
4165 error = KERN_SUCCESS;
4166 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
4167 break;
4168 #else /* DEVELOPMENT || DEBUG */
4169 error = KERN_NOT_SUPPORTED;
4170 break;
4171 #endif /* DEVELOPMENT || DEBUG */
4172 }
4173 default:
4174 error = KERN_INVALID_ARGUMENT;
4175 }
4176
4177 task_unlock(task);
4178 return (error);
4179 }
4180
4181 /*
4182 * task_power_info
4183 *
4184 * Returns power stats for the task.
4185 * Note: Called with task locked.
4186 */
4187 void
4188 task_power_info_locked(
4189 task_t task,
4190 task_power_info_t info,
4191 gpu_energy_data_t ginfo,
4192 uint64_t *task_energy)
4193 {
4194 thread_t thread;
4195 ledger_amount_t tmp;
4196
4197 task_lock_assert_owned(task);
4198
4199 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
4200 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
4201 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
4202 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
4203
4204 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
4205 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
4206
4207 info->total_user = task->total_user_time;
4208 info->total_system = task->total_system_time;
4209
4210 if (task_energy) {
4211 *task_energy = task->task_energy;
4212 }
4213
4214 if (ginfo) {
4215 ginfo->task_gpu_utilisation = task->task_gpu_ns;
4216 }
4217
4218 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4219 uint64_t tval;
4220 spl_t x;
4221
4222 if (thread->options & TH_OPT_IDLE_THREAD)
4223 continue;
4224
4225 x = splsched();
4226 thread_lock(thread);
4227
4228 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
4229 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
4230
4231 if (task_energy) {
4232 *task_energy += ml_energy_stat(thread);
4233 }
4234
4235 tval = timer_grab(&thread->user_timer);
4236 info->total_user += tval;
4237
4238 tval = timer_grab(&thread->system_timer);
4239 if (thread->precise_user_kernel_time) {
4240 info->total_system += tval;
4241 } else {
4242 /* system_timer may represent either sys or user */
4243 info->total_user += tval;
4244 }
4245
4246 if (ginfo) {
4247 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
4248 }
4249 thread_unlock(thread);
4250 splx(x);
4251 }
4252 }
4253
4254 /*
4255 * task_gpu_utilisation
4256 *
4257 * Returns the total gpu time used by the all the threads of the task
4258 * (both dead and alive)
4259 */
4260 uint64_t
4261 task_gpu_utilisation(
4262 task_t task)
4263 {
4264 uint64_t gpu_time = 0;
4265 thread_t thread;
4266
4267 task_lock(task);
4268 gpu_time += task->task_gpu_ns;
4269
4270 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4271 spl_t x;
4272 x = splsched();
4273 thread_lock(thread);
4274 gpu_time += ml_gpu_stat(thread);
4275 thread_unlock(thread);
4276 splx(x);
4277 }
4278
4279 task_unlock(task);
4280 return gpu_time;
4281 }
4282
4283 /*
4284 * task_energy
4285 *
4286 * Returns the total energy used by the all the threads of the task
4287 * (both dead and alive)
4288 */
4289 uint64_t
4290 task_energy(
4291 task_t task)
4292 {
4293 uint64_t energy = 0;
4294 thread_t thread;
4295
4296 task_lock(task);
4297 energy += task->task_energy;
4298
4299 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4300 spl_t x;
4301 x = splsched();
4302 thread_lock(thread);
4303 energy += ml_energy_stat(thread);
4304 thread_unlock(thread);
4305 splx(x);
4306 }
4307
4308 task_unlock(task);
4309 return energy;
4310 }
4311
4312 kern_return_t
4313 task_purgable_info(
4314 task_t task,
4315 task_purgable_info_t *stats)
4316 {
4317 if (task == TASK_NULL || stats == NULL)
4318 return KERN_INVALID_ARGUMENT;
4319 /* Take task reference */
4320 task_reference(task);
4321 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
4322 /* Drop task reference */
4323 task_deallocate(task);
4324 return KERN_SUCCESS;
4325 }
4326
4327 void
4328 task_vtimer_set(
4329 task_t task,
4330 integer_t which)
4331 {
4332 thread_t thread;
4333 spl_t x;
4334
4335 task_lock(task);
4336
4337 task->vtimers |= which;
4338
4339 switch (which) {
4340
4341 case TASK_VTIMER_USER:
4342 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4343 x = splsched();
4344 thread_lock(thread);
4345 if (thread->precise_user_kernel_time)
4346 thread->vtimer_user_save = timer_grab(&thread->user_timer);
4347 else
4348 thread->vtimer_user_save = timer_grab(&thread->system_timer);
4349 thread_unlock(thread);
4350 splx(x);
4351 }
4352 break;
4353
4354 case TASK_VTIMER_PROF:
4355 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4356 x = splsched();
4357 thread_lock(thread);
4358 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
4359 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
4360 thread_unlock(thread);
4361 splx(x);
4362 }
4363 break;
4364
4365 case TASK_VTIMER_RLIM:
4366 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4367 x = splsched();
4368 thread_lock(thread);
4369 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
4370 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
4371 thread_unlock(thread);
4372 splx(x);
4373 }
4374 break;
4375 }
4376
4377 task_unlock(task);
4378 }
4379
4380 void
4381 task_vtimer_clear(
4382 task_t task,
4383 integer_t which)
4384 {
4385 assert(task == current_task());
4386
4387 task_lock(task);
4388
4389 task->vtimers &= ~which;
4390
4391 task_unlock(task);
4392 }
4393
4394 void
4395 task_vtimer_update(
4396 __unused
4397 task_t task,
4398 integer_t which,
4399 uint32_t *microsecs)
4400 {
4401 thread_t thread = current_thread();
4402 uint32_t tdelt = 0;
4403 clock_sec_t secs = 0;
4404 uint64_t tsum;
4405
4406 assert(task == current_task());
4407
4408 spl_t s = splsched();
4409 thread_lock(thread);
4410
4411 if ((task->vtimers & which) != (uint32_t)which) {
4412 thread_unlock(thread);
4413 splx(s);
4414 return;
4415 }
4416
4417 switch (which) {
4418
4419 case TASK_VTIMER_USER:
4420 if (thread->precise_user_kernel_time) {
4421 tdelt = (uint32_t)timer_delta(&thread->user_timer,
4422 &thread->vtimer_user_save);
4423 } else {
4424 tdelt = (uint32_t)timer_delta(&thread->system_timer,
4425 &thread->vtimer_user_save);
4426 }
4427 absolutetime_to_microtime(tdelt, &secs, microsecs);
4428 break;
4429
4430 case TASK_VTIMER_PROF:
4431 tsum = timer_grab(&thread->user_timer);
4432 tsum += timer_grab(&thread->system_timer);
4433 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
4434 absolutetime_to_microtime(tdelt, &secs, microsecs);
4435 /* if the time delta is smaller than a usec, ignore */
4436 if (*microsecs != 0)
4437 thread->vtimer_prof_save = tsum;
4438 break;
4439
4440 case TASK_VTIMER_RLIM:
4441 tsum = timer_grab(&thread->user_timer);
4442 tsum += timer_grab(&thread->system_timer);
4443 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
4444 thread->vtimer_rlim_save = tsum;
4445 absolutetime_to_microtime(tdelt, &secs, microsecs);
4446 break;
4447 }
4448
4449 thread_unlock(thread);
4450 splx(s);
4451 }
4452
4453 /*
4454 * task_assign:
4455 *
4456 * Change the assigned processor set for the task
4457 */
4458 kern_return_t
4459 task_assign(
4460 __unused task_t task,
4461 __unused processor_set_t new_pset,
4462 __unused boolean_t assign_threads)
4463 {
4464 return(KERN_FAILURE);
4465 }
4466
4467 /*
4468 * task_assign_default:
4469 *
4470 * Version of task_assign to assign to default processor set.
4471 */
4472 kern_return_t
4473 task_assign_default(
4474 task_t task,
4475 boolean_t assign_threads)
4476 {
4477 return (task_assign(task, &pset0, assign_threads));
4478 }
4479
4480 /*
4481 * task_get_assignment
4482 *
4483 * Return name of processor set that task is assigned to.
4484 */
4485 kern_return_t
4486 task_get_assignment(
4487 task_t task,
4488 processor_set_t *pset)
4489 {
4490 if (!task || !task->active)
4491 return KERN_FAILURE;
4492
4493 *pset = &pset0;
4494
4495 return KERN_SUCCESS;
4496 }
4497
4498 uint64_t
4499 get_task_dispatchqueue_offset(
4500 task_t task)
4501 {
4502 return task->dispatchqueue_offset;
4503 }
4504
4505 /*
4506 * task_policy
4507 *
4508 * Set scheduling policy and parameters, both base and limit, for
4509 * the given task. Policy must be a policy which is enabled for the
4510 * processor set. Change contained threads if requested.
4511 */
4512 kern_return_t
4513 task_policy(
4514 __unused task_t task,
4515 __unused policy_t policy_id,
4516 __unused policy_base_t base,
4517 __unused mach_msg_type_number_t count,
4518 __unused boolean_t set_limit,
4519 __unused boolean_t change)
4520 {
4521 return(KERN_FAILURE);
4522 }
4523
4524 /*
4525 * task_set_policy
4526 *
4527 * Set scheduling policy and parameters, both base and limit, for
4528 * the given task. Policy can be any policy implemented by the
4529 * processor set, whether enabled or not. Change contained threads
4530 * if requested.
4531 */
4532 kern_return_t
4533 task_set_policy(
4534 __unused task_t task,
4535 __unused processor_set_t pset,
4536 __unused policy_t policy_id,
4537 __unused policy_base_t base,
4538 __unused mach_msg_type_number_t base_count,
4539 __unused policy_limit_t limit,
4540 __unused mach_msg_type_number_t limit_count,
4541 __unused boolean_t change)
4542 {
4543 return(KERN_FAILURE);
4544 }
4545
4546 kern_return_t
4547 task_set_ras_pc(
4548 __unused task_t task,
4549 __unused vm_offset_t pc,
4550 __unused vm_offset_t endpc)
4551 {
4552 return KERN_FAILURE;
4553 }
4554
4555 void
4556 task_synchronizer_destroy_all(task_t task)
4557 {
4558 /*
4559 * Destroy owned semaphores
4560 */
4561 semaphore_destroy_all(task);
4562 }
4563
4564 /*
4565 * Install default (machine-dependent) initial thread state
4566 * on the task. Subsequent thread creation will have this initial
4567 * state set on the thread by machine_thread_inherit_taskwide().
4568 * Flavors and structures are exactly the same as those to thread_set_state()
4569 */
4570 kern_return_t
4571 task_set_state(
4572 task_t task,
4573 int flavor,
4574 thread_state_t state,
4575 mach_msg_type_number_t state_count)
4576 {
4577 kern_return_t ret;
4578
4579 if (task == TASK_NULL) {
4580 return (KERN_INVALID_ARGUMENT);
4581 }
4582
4583 task_lock(task);
4584
4585 if (!task->active) {
4586 task_unlock(task);
4587 return (KERN_FAILURE);
4588 }
4589
4590 ret = machine_task_set_state(task, flavor, state, state_count);
4591
4592 task_unlock(task);
4593 return ret;
4594 }
4595
4596 /*
4597 * Examine the default (machine-dependent) initial thread state
4598 * on the task, as set by task_set_state(). Flavors and structures
4599 * are exactly the same as those passed to thread_get_state().
4600 */
4601 kern_return_t
4602 task_get_state(
4603 task_t task,
4604 int flavor,
4605 thread_state_t state,
4606 mach_msg_type_number_t *state_count)
4607 {
4608 kern_return_t ret;
4609
4610 if (task == TASK_NULL) {
4611 return (KERN_INVALID_ARGUMENT);
4612 }
4613
4614 task_lock(task);
4615
4616 if (!task->active) {
4617 task_unlock(task);
4618 return (KERN_FAILURE);
4619 }
4620
4621 ret = machine_task_get_state(task, flavor, state, state_count);
4622
4623 task_unlock(task);
4624 return ret;
4625 }
4626
4627 #if CONFIG_MEMORYSTATUS
4628 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
4629
4630 void __attribute__((noinline))
4631 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
4632 {
4633 task_t task = current_task();
4634 int pid = 0;
4635 const char *procname = "unknown";
4636 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
4637
4638 #ifdef MACH_BSD
4639 pid = proc_selfpid();
4640
4641 if (pid == 1) {
4642 /*
4643 * Cannot have ReportCrash analyzing
4644 * a suspended initproc.
4645 */
4646 return;
4647 }
4648
4649 if (task->bsd_info != NULL)
4650 procname = proc_name_address(current_task()->bsd_info);
4651 #endif
4652 #if CONFIG_COREDUMP
4653 if (hwm_user_cores) {
4654 int error;
4655 uint64_t starttime, end;
4656 clock_sec_t secs = 0;
4657 uint32_t microsecs = 0;
4658
4659 starttime = mach_absolute_time();
4660 /*
4661 * Trigger a coredump of this process. Don't proceed unless we know we won't
4662 * be filling up the disk; and ignore the core size resource limit for this
4663 * core file.
4664 */
4665 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
4666 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
4667 }
4668 /*
4669 * coredump() leaves the task suspended.
4670 */
4671 task_resume_internal(current_task());
4672
4673 end = mach_absolute_time();
4674 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
4675 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
4676 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
4677 }
4678 #endif /* CONFIG_COREDUMP */
4679
4680 if (disable_exc_resource) {
4681 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
4682 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
4683 return;
4684 }
4685
4686 /*
4687 * A task that has triggered an EXC_RESOURCE, should not be
4688 * jetsammed when the device is under memory pressure. Here
4689 * we set the P_MEMSTAT_TERMINATED flag so that the process
4690 * will be skipped if the memorystatus_thread wakes up.
4691 */
4692 proc_memstat_terminated(current_task()->bsd_info, TRUE);
4693
4694 printf("process %s[%d] crossed memory high watermark (%d MB); sending "
4695 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
4696
4697 code[0] = code[1] = 0;
4698 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
4699 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
4700 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
4701
4702 /* Do not generate a corpse fork if the violation is a fatal one */
4703 if (is_fatal || exc_via_corpse_forking == 0) {
4704 /* Do not send a EXC_RESOURCE is corpse_for_fatal_memkill is set */
4705 if (corpse_for_fatal_memkill == 0) {
4706 /*
4707 * Use the _internal_ variant so that no user-space
4708 * process can resume our task from under us.
4709 */
4710 task_suspend_internal(task);
4711 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4712 task_resume_internal(task);
4713 }
4714 } else {
4715 task_enqueue_exception_with_corpse(task, code, EXCEPTION_CODE_MAX);
4716 }
4717
4718 /*
4719 * After the EXC_RESOURCE has been handled, we must clear the
4720 * P_MEMSTAT_TERMINATED flag so that the process can again be
4721 * considered for jetsam if the memorystatus_thread wakes up.
4722 */
4723 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
4724 }
4725
4726 /*
4727 * Callback invoked when a task exceeds its physical footprint limit.
4728 */
4729 void
4730 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4731 {
4732 ledger_amount_t max_footprint, max_footprint_mb;
4733 task_t task;
4734 boolean_t is_fatal;
4735 boolean_t trigger_exception;
4736
4737 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
4738 /*
4739 * Task memory limits only provide a warning on the way up.
4740 */
4741 return;
4742 }
4743
4744 task = current_task();
4745
4746 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
4747 max_footprint_mb = max_footprint >> 20;
4748
4749 /*
4750 * Capture the trigger exception flag before turning off the exception.
4751 */
4752 trigger_exception = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION ? TRUE : FALSE;
4753
4754 is_fatal = memorystatus_turnoff_exception_and_get_fatalness((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE, (int)max_footprint_mb);
4755
4756 /*
4757 * If this an actual violation (not a warning),
4758 * generate a non-fatal high watermark EXC_RESOURCE.
4759 */
4760 if ((warning == 0) && trigger_exception) {
4761 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, is_fatal);
4762 }
4763
4764 memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
4765 is_fatal);
4766 }
4767
4768 extern int proc_check_footprint_priv(void);
4769
4770 kern_return_t
4771 task_set_phys_footprint_limit(
4772 task_t task,
4773 int new_limit_mb,
4774 int *old_limit_mb)
4775 {
4776 kern_return_t error;
4777
4778 if ((error = proc_check_footprint_priv())) {
4779 return (KERN_NO_ACCESS);
4780 }
4781
4782 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
4783 }
4784
4785 kern_return_t
4786 task_convert_phys_footprint_limit(
4787 int limit_mb,
4788 int *converted_limit_mb)
4789 {
4790 if (limit_mb == -1) {
4791 /*
4792 * No limit
4793 */
4794 if (max_task_footprint != 0) {
4795 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
4796 } else {
4797 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
4798 }
4799 } else {
4800 /* nothing to convert */
4801 *converted_limit_mb = limit_mb;
4802 }
4803 return (KERN_SUCCESS);
4804 }
4805
4806
4807 kern_return_t
4808 task_set_phys_footprint_limit_internal(
4809 task_t task,
4810 int new_limit_mb,
4811 int *old_limit_mb,
4812 boolean_t trigger_exception)
4813 {
4814 ledger_amount_t old;
4815
4816 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
4817
4818 if (old_limit_mb) {
4819 /*
4820 * Check that limit >> 20 will not give an "unexpected" 32-bit
4821 * result. There are, however, implicit assumptions that -1 mb limit
4822 * equates to LEDGER_LIMIT_INFINITY.
4823 */
4824 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
4825 *old_limit_mb = (int)(old >> 20);
4826 }
4827
4828 if (new_limit_mb == -1) {
4829 /*
4830 * Caller wishes to remove the limit.
4831 */
4832 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4833 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
4834 max_task_footprint ? max_task_footprint_warning_level : 0);
4835 return (KERN_SUCCESS);
4836 }
4837
4838 #ifdef CONFIG_NOMONITORS
4839 return (KERN_SUCCESS);
4840 #endif /* CONFIG_NOMONITORS */
4841
4842 task_lock(task);
4843
4844 if (trigger_exception) {
4845 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4846 } else {
4847 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4848 }
4849
4850 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4851 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
4852
4853 if (task == current_task()) {
4854 ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
4855 }
4856
4857 task_unlock(task);
4858
4859 return (KERN_SUCCESS);
4860 }
4861
4862 kern_return_t
4863 task_get_phys_footprint_limit(
4864 task_t task,
4865 int *limit_mb)
4866 {
4867 ledger_amount_t limit;
4868
4869 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
4870 /*
4871 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
4872 * result. There are, however, implicit assumptions that -1 mb limit
4873 * equates to LEDGER_LIMIT_INFINITY.
4874 */
4875 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
4876 *limit_mb = (int)(limit >> 20);
4877
4878 return (KERN_SUCCESS);
4879 }
4880 #else /* CONFIG_MEMORYSTATUS */
4881 kern_return_t
4882 task_set_phys_footprint_limit(
4883 __unused task_t task,
4884 __unused int new_limit_mb,
4885 __unused int *old_limit_mb)
4886 {
4887 return (KERN_FAILURE);
4888 }
4889
4890 kern_return_t
4891 task_get_phys_footprint_limit(
4892 __unused task_t task,
4893 __unused int *limit_mb)
4894 {
4895 return (KERN_FAILURE);
4896 }
4897 #endif /* CONFIG_MEMORYSTATUS */
4898
4899 /*
4900 * We need to export some functions to other components that
4901 * are currently implemented in macros within the osfmk
4902 * component. Just export them as functions of the same name.
4903 */
4904 boolean_t is_kerneltask(task_t t)
4905 {
4906 if (t == kernel_task)
4907 return (TRUE);
4908
4909 return (FALSE);
4910 }
4911
4912 boolean_t is_corpsetask(task_t t)
4913 {
4914 return (task_is_a_corpse(t));
4915 }
4916
4917 #undef current_task
4918 task_t current_task(void);
4919 task_t current_task(void)
4920 {
4921 return (current_task_fast());
4922 }
4923
4924 #undef task_reference
4925 void task_reference(task_t task);
4926 void
4927 task_reference(
4928 task_t task)
4929 {
4930 if (task != TASK_NULL)
4931 task_reference_internal(task);
4932 }
4933
4934 /* defined in bsd/kern/kern_prot.c */
4935 extern int get_audit_token_pid(audit_token_t *audit_token);
4936
4937 int task_pid(task_t task)
4938 {
4939 if (task)
4940 return get_audit_token_pid(&task->audit_token);
4941 return -1;
4942 }
4943
4944
4945 /*
4946 * This routine finds a thread in a task by its unique id
4947 * Returns a referenced thread or THREAD_NULL if the thread was not found
4948 *
4949 * TODO: This is super inefficient - it's an O(threads in task) list walk!
4950 * We should make a tid hash, or transition all tid clients to thread ports
4951 *
4952 * Precondition: No locks held (will take task lock)
4953 */
4954 thread_t
4955 task_findtid(task_t task, uint64_t tid)
4956 {
4957 thread_t self = current_thread();
4958 thread_t found_thread = THREAD_NULL;
4959 thread_t iter_thread = THREAD_NULL;
4960
4961 /* Short-circuit the lookup if we're looking up ourselves */
4962 if (tid == self->thread_id || tid == TID_NULL) {
4963 assert(self->task == task);
4964
4965 thread_reference(self);
4966
4967 return self;
4968 }
4969
4970 task_lock(task);
4971
4972 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
4973 if (iter_thread->thread_id == tid) {
4974 found_thread = iter_thread;
4975 thread_reference(found_thread);
4976 break;
4977 }
4978 }
4979
4980 task_unlock(task);
4981
4982 return (found_thread);
4983 }
4984
4985
4986 /*
4987 * Control the CPU usage monitor for a task.
4988 */
4989 kern_return_t
4990 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
4991 {
4992 int error = KERN_SUCCESS;
4993
4994 if (*flags & CPUMON_MAKE_FATAL) {
4995 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
4996 } else {
4997 error = KERN_INVALID_ARGUMENT;
4998 }
4999
5000 return error;
5001 }
5002
5003 /*
5004 * Control the wakeups monitor for a task.
5005 */
5006 kern_return_t
5007 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
5008 {
5009 ledger_t ledger = task->ledger;
5010
5011 task_lock(task);
5012 if (*flags & WAKEMON_GET_PARAMS) {
5013 ledger_amount_t limit;
5014 uint64_t period;
5015
5016 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
5017 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
5018
5019 if (limit != LEDGER_LIMIT_INFINITY) {
5020 /*
5021 * An active limit means the wakeups monitor is enabled.
5022 */
5023 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
5024 *flags = WAKEMON_ENABLE;
5025 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
5026 *flags |= WAKEMON_MAKE_FATAL;
5027 }
5028 } else {
5029 *flags = WAKEMON_DISABLE;
5030 *rate_hz = -1;
5031 }
5032
5033 /*
5034 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
5035 */
5036 task_unlock(task);
5037 return KERN_SUCCESS;
5038 }
5039
5040 if (*flags & WAKEMON_ENABLE) {
5041 if (*flags & WAKEMON_SET_DEFAULTS) {
5042 *rate_hz = task_wakeups_monitor_rate;
5043 }
5044
5045 #ifndef CONFIG_NOMONITORS
5046 if (*flags & WAKEMON_MAKE_FATAL) {
5047 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5048 }
5049 #endif /* CONFIG_NOMONITORS */
5050
5051 if (*rate_hz <= 0) {
5052 task_unlock(task);
5053 return KERN_INVALID_ARGUMENT;
5054 }
5055
5056 #ifndef CONFIG_NOMONITORS
5057 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
5058 task_wakeups_monitor_ustackshots_trigger_pct);
5059 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
5060 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
5061 #endif /* CONFIG_NOMONITORS */
5062 } else if (*flags & WAKEMON_DISABLE) {
5063 /*
5064 * Caller wishes to disable wakeups monitor on the task.
5065 *
5066 * Disable telemetry if it was triggered by the wakeups monitor, and
5067 * remove the limit & callback on the wakeups ledger entry.
5068 */
5069 #if CONFIG_TELEMETRY
5070 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
5071 #endif
5072 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
5073 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
5074 }
5075
5076 task_unlock(task);
5077 return KERN_SUCCESS;
5078 }
5079
5080 void
5081 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5082 {
5083 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5084 #if CONFIG_TELEMETRY
5085 /*
5086 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
5087 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
5088 */
5089 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
5090 #endif
5091 return;
5092 }
5093
5094 #if CONFIG_TELEMETRY
5095 /*
5096 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
5097 * exceeded the limit, turn telemetry off for the task.
5098 */
5099 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
5100 #endif
5101
5102 if (warning == 0) {
5103 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
5104 }
5105 }
5106
5107 void __attribute__((noinline))
5108 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
5109 {
5110 task_t task = current_task();
5111 int pid = 0;
5112 const char *procname = "unknown";
5113 boolean_t fatal;
5114 kern_return_t kr;
5115 #ifdef EXC_RESOURCE_MONITORS
5116 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5117 #endif /* EXC_RESOURCE_MONITORS */
5118 struct ledger_entry_info lei;
5119
5120 #ifdef MACH_BSD
5121 pid = proc_selfpid();
5122 if (task->bsd_info != NULL)
5123 procname = proc_name_address(current_task()->bsd_info);
5124 #endif
5125
5126 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
5127
5128 /*
5129 * Disable the exception notification so we don't overwhelm
5130 * the listener with an endless stream of redundant exceptions.
5131 * TODO: detect whether another thread is already reporting the violation.
5132 */
5133 uint32_t flags = WAKEMON_DISABLE;
5134 task_wakeups_monitor_ctl(task, &flags, NULL);
5135
5136 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5137 trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
5138 printf("process %s[%d] caught waking the CPU %llu times "
5139 "over ~%llu seconds, averaging %llu wakes / second and "
5140 "violating a %slimit of %llu wakes over %llu seconds.\n",
5141 procname, pid,
5142 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
5143 lei.lei_last_refill == 0 ? 0 :
5144 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
5145 fatal ? "FATAL " : "",
5146 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
5147
5148 kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
5149 fatal ? kRNFatalLimitFlag : 0);
5150 if (kr) {
5151 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
5152 }
5153
5154 #ifdef EXC_RESOURCE_MONITORS
5155 if (disable_exc_resource) {
5156 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5157 "supressed by a boot-arg\n", procname, pid);
5158 return;
5159 }
5160 if (audio_active) {
5161 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5162 "supressed due to audio playback\n", procname, pid);
5163 return;
5164 }
5165 if (lei.lei_last_refill == 0) {
5166 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5167 "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
5168 }
5169
5170 code[0] = code[1] = 0;
5171 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
5172 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
5173 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
5174 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
5175 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
5176 lei.lei_last_refill);
5177 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
5178 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
5179 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5180 #endif /* EXC_RESOURCE_MONITORS */
5181
5182 if (fatal) {
5183 task_terminate_internal(task);
5184 }
5185 }
5186
5187 static boolean_t
5188 global_update_logical_writes(int64_t io_delta)
5189 {
5190 int64_t old_count, new_count;
5191 boolean_t needs_telemetry;
5192
5193 do {
5194 new_count = old_count = global_logical_writes_count;
5195 new_count += io_delta;
5196 if (new_count >= io_telemetry_limit) {
5197 new_count = 0;
5198 needs_telemetry = TRUE;
5199 } else {
5200 needs_telemetry = FALSE;
5201 }
5202 } while(!OSCompareAndSwap64(old_count, new_count, &global_logical_writes_count));
5203 return needs_telemetry;
5204 }
5205
5206 void task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
5207 {
5208 int64_t io_delta = 0;
5209 boolean_t needs_telemetry = FALSE;
5210
5211 if ((!task) || (!io_size) || (!vp))
5212 return;
5213
5214 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
5215 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
5216 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
5217 switch(flags) {
5218 case TASK_WRITE_IMMEDIATE:
5219 OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
5220 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5221 break;
5222 case TASK_WRITE_DEFERRED:
5223 OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
5224 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5225 break;
5226 case TASK_WRITE_INVALIDATED:
5227 OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
5228 ledger_debit(task->ledger, task_ledgers.logical_writes, io_size);
5229 break;
5230 case TASK_WRITE_METADATA:
5231 OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
5232 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5233 break;
5234 }
5235
5236 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
5237 if (io_telemetry_limit != 0) {
5238 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
5239 needs_telemetry = global_update_logical_writes(io_delta);
5240 if (needs_telemetry) {
5241 act_set_io_telemetry_ast(current_thread());
5242 }
5243 }
5244 }
5245
5246 /*
5247 * Control the I/O monitor for a task.
5248 */
5249 kern_return_t
5250 task_io_monitor_ctl(task_t task, uint32_t *flags)
5251 {
5252 ledger_t ledger = task->ledger;
5253
5254 task_lock(task);
5255 if (*flags & IOMON_ENABLE) {
5256 /* Configure the physical I/O ledger */
5257 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5258 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5259
5260 /* Configure the logical I/O ledger */
5261 ledger_set_limit(ledger, task_ledgers.logical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5262 ledger_set_period(ledger, task_ledgers.logical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5263
5264 } else if (*flags & IOMON_DISABLE) {
5265 /*
5266 * Caller wishes to disable I/O monitor on the task.
5267 */
5268 ledger_disable_refill(ledger, task_ledgers.physical_writes);
5269 ledger_disable_callback(ledger, task_ledgers.physical_writes);
5270 ledger_disable_refill(ledger, task_ledgers.logical_writes);
5271 ledger_disable_callback(ledger, task_ledgers.logical_writes);
5272 }
5273
5274 task_unlock(task);
5275 return KERN_SUCCESS;
5276 }
5277
5278 void
5279 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
5280 {
5281 if (warning == 0) {
5282 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
5283 }
5284 }
5285
5286 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
5287 {
5288 int pid = 0;
5289 task_t task = current_task();
5290 #ifdef EXC_RESOURCE_MONITORS
5291 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5292 #endif /* EXC_RESOURCE_MONITORS */
5293 struct ledger_entry_info lei;
5294 kern_return_t kr;
5295
5296 #ifdef MACH_BSD
5297 pid = proc_selfpid();
5298 #endif
5299 /*
5300 * Get the ledger entry info. We need to do this before disabling the exception
5301 * to get correct values for all fields.
5302 */
5303 switch(flavor) {
5304 case FLAVOR_IO_PHYSICAL_WRITES:
5305 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
5306 break;
5307 case FLAVOR_IO_LOGICAL_WRITES:
5308 ledger_get_entry_info(task->ledger, task_ledgers.logical_writes, &lei);
5309 break;
5310 }
5311
5312
5313 /*
5314 * Disable the exception notification so we don't overwhelm
5315 * the listener with an endless stream of redundant exceptions.
5316 * TODO: detect whether another thread is already reporting the violation.
5317 */
5318 uint32_t flags = IOMON_DISABLE;
5319 task_io_monitor_ctl(task, &flags);
5320
5321 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
5322 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
5323 }
5324 printf("process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
5325 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
5326
5327 kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
5328 if (kr) {
5329 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
5330 }
5331
5332 #ifdef EXC_RESOURCE_MONITORS
5333 code[0] = code[1] = 0;
5334 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
5335 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
5336 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
5337 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
5338 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
5339 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5340 #endif /* EXC_RESOURCE_MONITORS */
5341 }
5342
5343 /* Placeholders for the task set/get voucher interfaces */
5344 kern_return_t
5345 task_get_mach_voucher(
5346 task_t task,
5347 mach_voucher_selector_t __unused which,
5348 ipc_voucher_t *voucher)
5349 {
5350 if (TASK_NULL == task)
5351 return KERN_INVALID_TASK;
5352
5353 *voucher = NULL;
5354 return KERN_SUCCESS;
5355 }
5356
5357 kern_return_t
5358 task_set_mach_voucher(
5359 task_t task,
5360 ipc_voucher_t __unused voucher)
5361 {
5362 if (TASK_NULL == task)
5363 return KERN_INVALID_TASK;
5364
5365 return KERN_SUCCESS;
5366 }
5367
5368 kern_return_t
5369 task_swap_mach_voucher(
5370 task_t task,
5371 ipc_voucher_t new_voucher,
5372 ipc_voucher_t *in_out_old_voucher)
5373 {
5374 if (TASK_NULL == task)
5375 return KERN_INVALID_TASK;
5376
5377 *in_out_old_voucher = new_voucher;
5378 return KERN_SUCCESS;
5379 }
5380
5381 void task_set_gpu_denied(task_t task, boolean_t denied)
5382 {
5383 task_lock(task);
5384
5385 if (denied) {
5386 task->t_flags |= TF_GPU_DENIED;
5387 } else {
5388 task->t_flags &= ~TF_GPU_DENIED;
5389 }
5390
5391 task_unlock(task);
5392 }
5393
5394 boolean_t task_is_gpu_denied(task_t task)
5395 {
5396 /* We don't need the lock to read this flag */
5397 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
5398 }
5399
5400
5401 uint64_t get_task_memory_region_count(task_t task)
5402 {
5403 vm_map_t map;
5404 map = (task == kernel_task) ? kernel_map: task->map;
5405 return((uint64_t)get_map_nentries(map));
5406 }
5407
5408 static void
5409 kdebug_trace_dyld_internal(uint32_t base_code,
5410 struct dyld_kernel_image_info *info)
5411 {
5412 static_assert(sizeof(info->uuid) >= 16);
5413
5414 #if defined(__LP64__)
5415 uint64_t *uuid = (uint64_t *)&(info->uuid);
5416
5417 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5418 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
5419 uuid[1], info->load_addr,
5420 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
5421 0);
5422 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5423 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
5424 (uint64_t)info->fsobjid.fid_objno |
5425 ((uint64_t)info->fsobjid.fid_generation << 32),
5426 0, 0, 0, 0);
5427 #else /* defined(__LP64__) */
5428 uint32_t *uuid = (uint32_t *)&(info->uuid);
5429
5430 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5431 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
5432 uuid[1], uuid[2], uuid[3], 0);
5433 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5434 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
5435 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
5436 info->fsobjid.fid_objno, 0);
5437 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5438 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
5439 info->fsobjid.fid_generation, 0, 0, 0, 0);
5440 #endif /* !defined(__LP64__) */
5441 }
5442
5443 static kern_return_t
5444 kdebug_trace_dyld(task_t task, uint32_t base_code,
5445 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
5446 {
5447 kern_return_t kr;
5448 dyld_kernel_image_info_array_t infos;
5449 vm_map_offset_t map_data;
5450 vm_offset_t data;
5451
5452 assert(infos_copy != NULL);
5453
5454 if (task == NULL || task != current_task()) {
5455 return KERN_INVALID_TASK;
5456 }
5457
5458 kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
5459 if (kr != KERN_SUCCESS) {
5460 return kr;
5461 }
5462
5463 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
5464
5465 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
5466 kdebug_trace_dyld_internal(base_code, &(infos[i]));
5467 }
5468
5469 data = CAST_DOWN(vm_offset_t, map_data);
5470 mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
5471 return KERN_SUCCESS;
5472 }
5473
5474 kern_return_t
5475 task_register_dyld_image_infos(task_t task,
5476 dyld_kernel_image_info_array_t infos_copy,
5477 mach_msg_type_number_t infos_len)
5478 {
5479 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
5480 (vm_map_copy_t)infos_copy, infos_len);
5481 }
5482
5483 kern_return_t
5484 task_unregister_dyld_image_infos(task_t task,
5485 dyld_kernel_image_info_array_t infos_copy,
5486 mach_msg_type_number_t infos_len)
5487 {
5488 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
5489 (vm_map_copy_t)infos_copy, infos_len);
5490 }
5491
5492 kern_return_t
5493 task_get_dyld_image_infos(__unused task_t task,
5494 __unused dyld_kernel_image_info_array_t * dyld_images,
5495 __unused mach_msg_type_number_t * dyld_imagesCnt)
5496 {
5497 return KERN_NOT_SUPPORTED;
5498 }
5499
5500 kern_return_t
5501 task_register_dyld_shared_cache_image_info(task_t task,
5502 dyld_kernel_image_info_t cache_img,
5503 __unused boolean_t no_cache,
5504 __unused boolean_t private_cache)
5505 {
5506 if (task == NULL || task != current_task()) {
5507 return KERN_INVALID_TASK;
5508 }
5509
5510 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
5511 return KERN_SUCCESS;
5512 }
5513
5514 kern_return_t
5515 task_register_dyld_set_dyld_state(__unused task_t task,
5516 __unused uint8_t dyld_state)
5517 {
5518 return KERN_NOT_SUPPORTED;
5519 }
5520
5521 kern_return_t
5522 task_register_dyld_get_process_state(__unused task_t task,
5523 __unused dyld_kernel_process_info_t * dyld_process_state)
5524 {
5525 return KERN_NOT_SUPPORTED;
5526 }
5527
5528 #if CONFIG_SECLUDED_MEMORY
5529 int num_tasks_can_use_secluded_mem = 0;
5530
5531 void
5532 task_set_can_use_secluded_mem(
5533 task_t task,
5534 boolean_t can_use_secluded_mem)
5535 {
5536 if (!task->task_could_use_secluded_mem) {
5537 return;
5538 }
5539 task_lock(task);
5540 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
5541 task_unlock(task);
5542 }
5543
5544 void
5545 task_set_can_use_secluded_mem_locked(
5546 task_t task,
5547 boolean_t can_use_secluded_mem)
5548 {
5549 assert(task->task_could_use_secluded_mem);
5550 if (can_use_secluded_mem &&
5551 secluded_for_apps && /* global boot-arg */
5552 !task->task_can_use_secluded_mem) {
5553 assert(num_tasks_can_use_secluded_mem >= 0);
5554 OSAddAtomic(+1,
5555 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
5556 task->task_can_use_secluded_mem = TRUE;
5557 } else if (!can_use_secluded_mem &&
5558 task->task_can_use_secluded_mem) {
5559 assert(num_tasks_can_use_secluded_mem > 0);
5560 OSAddAtomic(-1,
5561 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
5562 task->task_can_use_secluded_mem = FALSE;
5563 }
5564 }
5565
5566 void
5567 task_set_could_use_secluded_mem(
5568 task_t task,
5569 boolean_t could_use_secluded_mem)
5570 {
5571 task->task_could_use_secluded_mem = could_use_secluded_mem;
5572 }
5573
5574 void
5575 task_set_could_also_use_secluded_mem(
5576 task_t task,
5577 boolean_t could_also_use_secluded_mem)
5578 {
5579 task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
5580 }
5581
5582 boolean_t
5583 task_can_use_secluded_mem(
5584 task_t task)
5585 {
5586 if (task->task_can_use_secluded_mem) {
5587 assert(task->task_could_use_secluded_mem);
5588 assert(num_tasks_can_use_secluded_mem > 0);
5589 return TRUE;
5590 }
5591 if (task->task_could_also_use_secluded_mem &&
5592 num_tasks_can_use_secluded_mem > 0) {
5593 assert(num_tasks_can_use_secluded_mem > 0);
5594 return TRUE;
5595 }
5596 return FALSE;
5597 }
5598
5599 boolean_t
5600 task_could_use_secluded_mem(
5601 task_t task)
5602 {
5603 return task->task_could_use_secluded_mem;
5604 }
5605 #endif /* CONFIG_SECLUDED_MEMORY */
5606
5607 queue_head_t *
5608 task_io_user_clients(task_t task)
5609 {
5610 return (&task->io_user_clients);
5611 }