]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task.c
xnu-2782.20.48.tar.gz
[apple/xnu.git] / osfmk / kern / task.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/semaphore.h>
95 #include <mach/task_info.h>
96 #include <mach/task_special_ports.h>
97
98 #include <ipc/ipc_importance.h>
99 #include <ipc/ipc_types.h>
100 #include <ipc/ipc_space.h>
101 #include <ipc/ipc_entry.h>
102 #include <ipc/ipc_hash.h>
103
104 #include <kern/kern_types.h>
105 #include <kern/mach_param.h>
106 #include <kern/misc_protos.h>
107 #include <kern/task.h>
108 #include <kern/thread.h>
109 #include <kern/coalition.h>
110 #include <kern/zalloc.h>
111 #include <kern/kalloc.h>
112 #include <kern/processor.h>
113 #include <kern/sched_prim.h> /* for thread_wakeup */
114 #include <kern/ipc_tt.h>
115 #include <kern/host.h>
116 #include <kern/clock.h>
117 #include <kern/timer.h>
118 #include <kern/assert.h>
119 #include <kern/sync_lock.h>
120 #include <kern/affinity.h>
121 #include <kern/exc_resource.h>
122 #if CONFIG_TELEMETRY
123 #include <kern/telemetry.h>
124 #endif
125
126 #include <vm/pmap.h>
127 #include <vm/vm_map.h>
128 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
129 #include <vm/vm_pageout.h>
130 #include <vm/vm_protos.h>
131 #include <vm/vm_purgeable_internal.h>
132
133 #include <sys/resource.h>
134 /*
135 * Exported interfaces
136 */
137
138 #include <mach/task_server.h>
139 #include <mach/mach_host_server.h>
140 #include <mach/host_security_server.h>
141 #include <mach/mach_port_server.h>
142
143 #include <vm/vm_shared_region.h>
144
145 #if CONFIG_COUNTERS
146 #include <pmc/pmc.h>
147 #endif /* CONFIG_COUNTERS */
148
149 #include <libkern/OSDebug.h>
150 #include <libkern/OSAtomic.h>
151
152 #if CONFIG_ATM
153 #include <atm/atm_internal.h>
154 #endif
155
156 #include <kern/sfi.h>
157
158 #if KPERF
159 extern int kpc_force_all_ctrs(task_t, int);
160 #endif
161
162 uint32_t qos_override_mode;
163
164 task_t kernel_task;
165 zone_t task_zone;
166 lck_attr_t task_lck_attr;
167 lck_grp_t task_lck_grp;
168 lck_grp_attr_t task_lck_grp_attr;
169
170 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
171 int audio_active = 0;
172
173 zinfo_usage_store_t tasks_tkm_private;
174 zinfo_usage_store_t tasks_tkm_shared;
175
176 /* A container to accumulate statistics for expired tasks */
177 expired_task_statistics_t dead_task_statistics;
178 lck_spin_t dead_task_statistics_lock;
179
180 ledger_template_t task_ledger_template = NULL;
181
182 struct _task_ledger_indices task_ledgers __attribute__((used)) =
183 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
184 { 0 /* initialized at runtime */},
185 #ifdef CONFIG_BANK
186 -1, -1,
187 #endif
188 };
189
190 void init_task_ledgers(void);
191 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
192 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
193 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
194 void __attribute__((noinline)) THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb);
195 int coredump(void *core_proc, int reserve_mb, int ignore_ulimit);
196
197 kern_return_t task_suspend_internal(task_t);
198 kern_return_t task_resume_internal(task_t);
199
200 void proc_init_cpumon_params(void);
201
202 // Warn tasks when they hit 80% of their memory limit.
203 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
204
205 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
206 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
207
208 /*
209 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
210 *
211 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
212 * stacktraces, aka micro-stackshots)
213 */
214 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
215
216 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
217 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
218
219 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
220
221 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
222
223 int max_task_footprint = 0; /* Per-task limit on physical memory consumption */
224 #if MACH_ASSERT
225 int pmap_ledgers_panic = 1;
226 #endif /* MACH_ASSERT */
227
228 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
229
230 int hwm_user_cores = 0; /* high watermark violations generate user core files */
231
232 #ifdef MACH_BSD
233 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
234 extern int proc_pid(struct proc *p);
235 extern int proc_selfpid(void);
236 extern char *proc_name_address(struct proc *p);
237 #if CONFIG_JETSAM
238 extern void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
239 #endif
240 #endif
241 #if MACH_ASSERT
242 extern int pmap_ledgers_panic;
243 #endif /* MACH_ASSERT */
244
245 /* Forwards */
246
247 void task_hold_locked(
248 task_t task);
249 void task_wait_locked(
250 task_t task,
251 boolean_t until_not_runnable);
252 void task_release_locked(
253 task_t task);
254 void task_free(
255 task_t task );
256 void task_synchronizer_destroy_all(
257 task_t task);
258
259 int check_for_tasksuspend(
260 task_t task);
261
262 void
263 task_backing_store_privileged(
264 task_t task)
265 {
266 task_lock(task);
267 task->priv_flags |= VM_BACKING_STORE_PRIV;
268 task_unlock(task);
269 return;
270 }
271
272
273 void
274 task_set_64bit(
275 task_t task,
276 boolean_t is64bit)
277 {
278 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
279 thread_t thread;
280 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
281
282 task_lock(task);
283
284 if (is64bit) {
285 if (task_has_64BitAddr(task))
286 goto out;
287 task_set_64BitAddr(task);
288 } else {
289 if ( !task_has_64BitAddr(task))
290 goto out;
291 task_clear_64BitAddr(task);
292 }
293 /* FIXME: On x86, the thread save state flavor can diverge from the
294 * task's 64-bit feature flag due to the 32-bit/64-bit register save
295 * state dichotomy. Since we can be pre-empted in this interval,
296 * certain routines may observe the thread as being in an inconsistent
297 * state with respect to its task's 64-bitness.
298 */
299
300 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
301 queue_iterate(&task->threads, thread, thread_t, task_threads) {
302 thread_mtx_lock(thread);
303 machine_thread_switch_addrmode(thread);
304 thread_mtx_unlock(thread);
305 }
306 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
307
308 out:
309 task_unlock(task);
310 }
311
312
313 void
314 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
315 {
316 task_lock(task);
317 task->all_image_info_addr = addr;
318 task->all_image_info_size = size;
319 task_unlock(task);
320 }
321
322 void
323 task_atm_reset(__unused task_t task) {
324
325 #if CONFIG_ATM
326 if (task->atm_context != NULL) {
327 atm_task_descriptor_destroy(task->atm_context);
328 task->atm_context = NULL;
329 }
330 #endif
331
332 }
333
334 #if TASK_REFERENCE_LEAK_DEBUG
335 #include <kern/btlog.h>
336
337 decl_simple_lock_data(static,task_ref_lock);
338 static btlog_t *task_ref_btlog;
339 #define TASK_REF_OP_INCR 0x1
340 #define TASK_REF_OP_DECR 0x2
341
342 #define TASK_REF_BTDEPTH 7
343
344 static void
345 task_ref_lock_lock(void *context)
346 {
347 simple_lock((simple_lock_t)context);
348 }
349 static void
350 task_ref_lock_unlock(void *context)
351 {
352 simple_unlock((simple_lock_t)context);
353 }
354
355 void
356 task_reference_internal(task_t task)
357 {
358 void * bt[TASK_REF_BTDEPTH];
359 int numsaved = 0;
360
361 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
362
363 (void)hw_atomic_add(&(task)->ref_count, 1);
364 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
365 bt, numsaved);
366 }
367
368 uint32_t
369 task_deallocate_internal(task_t task)
370 {
371 void * bt[TASK_REF_BTDEPTH];
372 int numsaved = 0;
373
374 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
375
376 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
377 bt, numsaved);
378 return hw_atomic_sub(&(task)->ref_count, 1);
379 }
380
381 #endif /* TASK_REFERENCE_LEAK_DEBUG */
382
383 void
384 task_init(void)
385 {
386
387 lck_grp_attr_setdefault(&task_lck_grp_attr);
388 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
389 lck_attr_setdefault(&task_lck_attr);
390 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
391
392 task_zone = zinit(
393 sizeof(struct task),
394 task_max * sizeof(struct task),
395 TASK_CHUNK * sizeof(struct task),
396 "tasks");
397
398 zone_change(task_zone, Z_NOENCRYPT, TRUE);
399
400 /*
401 * Configure per-task memory limit.
402 * The boot-arg is interpreted as Megabytes,
403 * and takes precedence over the device tree.
404 * Setting the boot-arg to 0 disables task limits.
405 */
406 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint,
407 sizeof (max_task_footprint))) {
408 /*
409 * No limit was found in boot-args, so go look in the device tree.
410 */
411 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint,
412 sizeof(max_task_footprint))) {
413 /*
414 * No limit was found in device tree.
415 */
416 max_task_footprint = 0;
417 }
418 }
419
420 if (max_task_footprint != 0) {
421 #if CONFIG_JETSAM
422 if (max_task_footprint < 50) {
423 printf("Warning: max_task_pmem %d below minimum.\n",
424 max_task_footprint);
425 max_task_footprint = 50;
426 }
427 printf("Limiting task physical memory footprint to %d MB\n",
428 max_task_footprint);
429 max_task_footprint *= 1024 * 1024; // Convert MB to bytes
430 #else
431 printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
432 #endif
433 }
434
435 #if MACH_ASSERT
436 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
437 sizeof (pmap_ledgers_panic));
438 #endif /* MACH_ASSERT */
439
440 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
441 sizeof (hwm_user_cores))) {
442 hwm_user_cores = 0;
443 }
444
445 if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
446 printf("QOS override mode: 0x%08x\n", qos_override_mode);
447 } else {
448 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
449 }
450
451 proc_init_cpumon_params();
452
453 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
454 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
455 }
456
457 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
458 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
459 }
460
461 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
462 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
463 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
464 }
465
466 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
467 sizeof (disable_exc_resource))) {
468 disable_exc_resource = 0;
469 }
470
471 /*
472 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
473 * sets up the ledgers for the default coalition. If we don't have coalitions,
474 * then we have to call it now.
475 */
476 #if CONFIG_COALITIONS
477 assert(task_ledger_template);
478 #else /* CONFIG_COALITIONS */
479 init_task_ledgers();
480 #endif /* CONFIG_COALITIONS */
481
482 #if TASK_REFERENCE_LEAK_DEBUG
483 simple_lock_init(&task_ref_lock, 0);
484 task_ref_btlog = btlog_create(100000,
485 TASK_REF_BTDEPTH,
486 task_ref_lock_lock,
487 task_ref_lock_unlock,
488 &task_ref_lock);
489 assert(task_ref_btlog);
490 #endif
491
492 /*
493 * Create the kernel task as the first task.
494 */
495 #ifdef __LP64__
496 if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
497 #else
498 if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
499 #endif
500 panic("task_init\n");
501
502 vm_map_deallocate(kernel_task->map);
503 kernel_task->map = kernel_map;
504 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
505
506 }
507
508 /*
509 * Create a task running in the kernel address space. It may
510 * have its own map of size mem_size and may have ipc privileges.
511 */
512 kern_return_t
513 kernel_task_create(
514 __unused task_t parent_task,
515 __unused vm_offset_t map_base,
516 __unused vm_size_t map_size,
517 __unused task_t *child_task)
518 {
519 return (KERN_INVALID_ARGUMENT);
520 }
521
522 kern_return_t
523 task_create(
524 task_t parent_task,
525 __unused ledger_port_array_t ledger_ports,
526 __unused mach_msg_type_number_t num_ledger_ports,
527 __unused boolean_t inherit_memory,
528 __unused task_t *child_task) /* OUT */
529 {
530 if (parent_task == TASK_NULL)
531 return(KERN_INVALID_ARGUMENT);
532
533 /*
534 * No longer supported: too many calls assume that a task has a valid
535 * process attached.
536 */
537 return(KERN_FAILURE);
538 }
539
540 kern_return_t
541 host_security_create_task_token(
542 host_security_t host_security,
543 task_t parent_task,
544 __unused security_token_t sec_token,
545 __unused audit_token_t audit_token,
546 __unused host_priv_t host_priv,
547 __unused ledger_port_array_t ledger_ports,
548 __unused mach_msg_type_number_t num_ledger_ports,
549 __unused boolean_t inherit_memory,
550 __unused task_t *child_task) /* OUT */
551 {
552 if (parent_task == TASK_NULL)
553 return(KERN_INVALID_ARGUMENT);
554
555 if (host_security == HOST_NULL)
556 return(KERN_INVALID_SECURITY);
557
558 /*
559 * No longer supported.
560 */
561 return(KERN_FAILURE);
562 }
563
564 /*
565 * Task ledgers
566 * ------------
567 *
568 * phys_footprint
569 * Physical footprint: This is the sum of:
570 * + internal
571 * + internal_compressed
572 * + iokit_mapped
573 * - alternate_accounting
574 *
575 * internal
576 * The task's anonymous memory, which on iOS is always resident.
577 *
578 * internal_compressed
579 * Amount of this task's internal memory which is held by the compressor.
580 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
581 * and could be either decompressed back into memory, or paged out to storage, depending
582 * on our implementation.
583 *
584 * iokit_mapped
585 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
586 clean/dirty or internal/external state].
587 *
588 * alternate_accounting
589 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
590 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
591 * double counting.
592 */
593 void
594 init_task_ledgers(void)
595 {
596 ledger_template_t t;
597
598 assert(task_ledger_template == NULL);
599 assert(kernel_task == TASK_NULL);
600
601 if ((t = ledger_template_create("Per-task ledger")) == NULL)
602 panic("couldn't create task ledger template");
603
604 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
605 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
606 "physmem", "bytes");
607 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
608 "bytes");
609 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
610 "bytes");
611 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
612 "bytes");
613 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
614 "bytes");
615 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
616 "bytes");
617 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
618 "bytes");
619 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
620 "bytes");
621 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
622 "bytes");
623 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
624 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
625 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
626 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
627 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
628 "count");
629 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
630 "count");
631
632 sfi_class_id_t class_id, ledger_alias;
633 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
634 task_ledgers.sfi_wait_times[class_id] = -1;
635 }
636
637 /* don't account for UNSPECIFIED */
638 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
639 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
640 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
641 /* Check to see if alias has been registered yet */
642 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
643 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
644 } else {
645 /* Otherwise, initialize it first */
646 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
647 }
648 } else {
649 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
650 }
651
652 if (task_ledgers.sfi_wait_times[class_id] < 0) {
653 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
654 }
655 }
656
657 #ifdef CONFIG_BANK
658 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
659 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
660 #endif
661
662 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
663
664 if ((task_ledgers.cpu_time < 0) ||
665 (task_ledgers.tkm_private < 0) ||
666 (task_ledgers.tkm_shared < 0) ||
667 (task_ledgers.phys_mem < 0) ||
668 (task_ledgers.wired_mem < 0) ||
669 (task_ledgers.internal < 0) ||
670 (task_ledgers.iokit_mapped < 0) ||
671 (task_ledgers.alternate_accounting < 0) ||
672 (task_ledgers.phys_footprint < 0) ||
673 (task_ledgers.internal_compressed < 0) ||
674 (task_ledgers.purgeable_volatile < 0) ||
675 (task_ledgers.purgeable_nonvolatile < 0) ||
676 (task_ledgers.purgeable_volatile_compressed < 0) ||
677 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
678 (task_ledgers.platform_idle_wakeups < 0) ||
679 (task_ledgers.interrupt_wakeups < 0)
680 #ifdef CONFIG_BANK
681 || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0)
682 #endif
683 ) {
684 panic("couldn't create entries for task ledger template");
685 }
686
687 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
688 #if MACH_ASSERT
689 if (pmap_ledgers_panic) {
690 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
691 ledger_panic_on_negative(t, task_ledgers.internal);
692 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
693 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
694 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
695 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
696 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
697 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
698 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
699 }
700 #endif /* MACH_ASSERT */
701
702 #if CONFIG_JETSAM
703 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
704 #endif
705
706 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
707 task_wakeups_rate_exceeded, NULL, NULL);
708
709 task_ledger_template = t;
710 }
711
712 kern_return_t
713 task_create_internal(
714 task_t parent_task,
715 coalition_t parent_coalition __unused,
716 boolean_t inherit_memory,
717 boolean_t is_64bit,
718 task_t *child_task) /* OUT */
719 {
720 task_t new_task;
721 vm_shared_region_t shared_region;
722 ledger_t ledger = NULL;
723
724 new_task = (task_t) zalloc(task_zone);
725
726 if (new_task == TASK_NULL)
727 return(KERN_RESOURCE_SHORTAGE);
728
729 /* one ref for just being alive; one for our caller */
730 new_task->ref_count = 2;
731
732 /* allocate with active entries */
733 assert(task_ledger_template != NULL);
734 if ((ledger = ledger_instantiate(task_ledger_template,
735 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
736 zfree(task_zone, new_task);
737 return(KERN_RESOURCE_SHORTAGE);
738 }
739
740 new_task->ledger = ledger;
741
742 #if defined(CONFIG_SCHED_MULTIQ)
743 new_task->sched_group = sched_group_create();
744 #endif
745
746 /* if inherit_memory is true, parent_task MUST not be NULL */
747 if (inherit_memory)
748 new_task->map = vm_map_fork(ledger, parent_task->map);
749 else
750 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
751 (vm_map_offset_t)(VM_MIN_ADDRESS),
752 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
753
754 /* Inherit memlock limit from parent */
755 if (parent_task)
756 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
757
758 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
759 queue_init(&new_task->threads);
760 new_task->suspend_count = 0;
761 new_task->thread_count = 0;
762 new_task->active_thread_count = 0;
763 new_task->user_stop_count = 0;
764 new_task->legacy_stop_count = 0;
765 new_task->active = TRUE;
766 new_task->halting = FALSE;
767 new_task->user_data = NULL;
768 new_task->faults = 0;
769 new_task->cow_faults = 0;
770 new_task->pageins = 0;
771 new_task->messages_sent = 0;
772 new_task->messages_received = 0;
773 new_task->syscalls_mach = 0;
774 new_task->priv_flags = 0;
775 new_task->syscalls_unix=0;
776 new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
777 new_task->t_flags = 0;
778 new_task->importance = 0;
779
780 #if CONFIG_ATM
781 new_task->atm_context = NULL;
782 #endif
783 #if CONFIG_BANK
784 new_task->bank_context = NULL;
785 #endif
786
787 zinfo_task_init(new_task);
788
789 #ifdef MACH_BSD
790 new_task->bsd_info = NULL;
791 #endif /* MACH_BSD */
792
793 #if CONFIG_JETSAM
794 if (max_task_footprint != 0) {
795 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
796 }
797 #endif
798
799 if (task_wakeups_monitor_rate != 0) {
800 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
801 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
802 task_wakeups_monitor_ctl(new_task, &flags, &rate);
803 }
804
805 #if defined(__i386__) || defined(__x86_64__)
806 new_task->i386_ldt = 0;
807 #endif
808
809 new_task->task_debug = NULL;
810
811 queue_init(&new_task->semaphore_list);
812 new_task->semaphores_owned = 0;
813
814 ipc_task_init(new_task, parent_task);
815
816 new_task->total_user_time = 0;
817 new_task->total_system_time = 0;
818
819 new_task->vtimers = 0;
820
821 new_task->shared_region = NULL;
822
823 new_task->affinity_space = NULL;
824
825 #if CONFIG_COUNTERS
826 new_task->t_chud = 0U;
827 #endif
828
829 new_task->pidsuspended = FALSE;
830 new_task->frozen = FALSE;
831 new_task->changing_freeze_state = FALSE;
832 new_task->rusage_cpu_flags = 0;
833 new_task->rusage_cpu_percentage = 0;
834 new_task->rusage_cpu_interval = 0;
835 new_task->rusage_cpu_deadline = 0;
836 new_task->rusage_cpu_callt = NULL;
837 #if MACH_ASSERT
838 new_task->suspends_outstanding = 0;
839 #endif
840
841 #if HYPERVISOR
842 new_task->hv_task_target = NULL;
843 #endif /* HYPERVISOR */
844
845
846 new_task->low_mem_notified_warn = 0;
847 new_task->low_mem_notified_critical = 0;
848 new_task->purged_memory_warn = 0;
849 new_task->purged_memory_critical = 0;
850 new_task->mem_notify_reserved = 0;
851 #if IMPORTANCE_INHERITANCE
852 new_task->task_imp_base = NULL;
853 #endif /* IMPORTANCE_INHERITANCE */
854
855 #if defined(__x86_64__)
856 new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
857 #endif
858
859 new_task->requested_policy = default_task_requested_policy;
860 new_task->effective_policy = default_task_effective_policy;
861 new_task->pended_policy = default_task_pended_policy;
862
863 if (parent_task != TASK_NULL) {
864 new_task->sec_token = parent_task->sec_token;
865 new_task->audit_token = parent_task->audit_token;
866
867 /* inherit the parent's shared region */
868 shared_region = vm_shared_region_get(parent_task);
869 vm_shared_region_set(new_task, shared_region);
870
871 if(task_has_64BitAddr(parent_task))
872 task_set_64BitAddr(new_task);
873 new_task->all_image_info_addr = parent_task->all_image_info_addr;
874 new_task->all_image_info_size = parent_task->all_image_info_size;
875
876 #if defined(__i386__) || defined(__x86_64__)
877 if (inherit_memory && parent_task->i386_ldt)
878 new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
879 #endif
880 if (inherit_memory && parent_task->affinity_space)
881 task_affinity_create(parent_task, new_task);
882
883 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
884
885 #if IMPORTANCE_INHERITANCE
886 ipc_importance_task_t new_task_imp = IIT_NULL;
887
888 if (task_is_marked_importance_donor(parent_task)) {
889 new_task_imp = ipc_importance_for_task(new_task, FALSE);
890 assert(IIT_NULL != new_task_imp);
891 ipc_importance_task_mark_donor(new_task_imp, TRUE);
892 }
893 /* Embedded doesn't want this to inherit */
894 if (task_is_marked_importance_receiver(parent_task)) {
895 if (IIT_NULL == new_task_imp)
896 new_task_imp = ipc_importance_for_task(new_task, FALSE);
897 assert(IIT_NULL != new_task_imp);
898 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
899 }
900 if (task_is_marked_importance_denap_receiver(parent_task)) {
901 if (IIT_NULL == new_task_imp)
902 new_task_imp = ipc_importance_for_task(new_task, FALSE);
903 assert(IIT_NULL != new_task_imp);
904 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
905 }
906
907 if (IIT_NULL != new_task_imp) {
908 assert(new_task->task_imp_base == new_task_imp);
909 ipc_importance_task_release(new_task_imp);
910 }
911 #endif /* IMPORTANCE_INHERITANCE */
912
913 new_task->priority = BASEPRI_DEFAULT;
914 new_task->max_priority = MAXPRI_USER;
915
916 new_task->requested_policy.t_apptype = parent_task->requested_policy.t_apptype;
917
918 new_task->requested_policy.int_darwinbg = parent_task->requested_policy.int_darwinbg;
919 new_task->requested_policy.ext_darwinbg = parent_task->requested_policy.ext_darwinbg;
920 new_task->requested_policy.int_iotier = parent_task->requested_policy.int_iotier;
921 new_task->requested_policy.ext_iotier = parent_task->requested_policy.ext_iotier;
922 new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
923 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
924 new_task->requested_policy.bg_iotier = parent_task->requested_policy.bg_iotier;
925 new_task->requested_policy.terminated = parent_task->requested_policy.terminated;
926 new_task->requested_policy.t_qos_clamp = parent_task->requested_policy.t_qos_clamp;
927
928 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
929 } else {
930 new_task->sec_token = KERNEL_SECURITY_TOKEN;
931 new_task->audit_token = KERNEL_AUDIT_TOKEN;
932 #ifdef __LP64__
933 if(is_64bit)
934 task_set_64BitAddr(new_task);
935 #endif
936 new_task->all_image_info_addr = (mach_vm_address_t)0;
937 new_task->all_image_info_size = (mach_vm_size_t)0;
938
939 new_task->pset_hint = PROCESSOR_SET_NULL;
940
941 if (kernel_task == TASK_NULL) {
942 new_task->priority = BASEPRI_KERNEL;
943 new_task->max_priority = MAXPRI_KERNEL;
944 } else {
945 new_task->priority = BASEPRI_DEFAULT;
946 new_task->max_priority = MAXPRI_USER;
947 }
948 }
949
950 new_task->coalition = COALITION_NULL;
951
952 #if CONFIG_COALITIONS
953 if (parent_coalition) {
954 coalition_adopt_task(parent_coalition, new_task);
955 } else if (parent_task && parent_task->coalition) {
956 coalition_adopt_task(parent_task->coalition, new_task);
957 } else {
958 coalition_default_adopt_task(new_task);
959 }
960
961 if (new_task->coalition == COALITION_NULL) {
962 panic("created task is not a member of any coalition");
963 }
964 #endif /* CONFIG_COALITIONS */
965
966 /* Allocate I/O Statistics */
967 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
968 assert(new_task->task_io_stats != NULL);
969 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
970
971 bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
972
973 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
974 new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
975 new_task->task_gpu_ns = 0;
976 lck_mtx_lock(&tasks_threads_lock);
977 queue_enter(&tasks, new_task, task_t, tasks);
978 tasks_count++;
979 lck_mtx_unlock(&tasks_threads_lock);
980
981 if (vm_backing_store_low && parent_task != NULL)
982 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
983
984 new_task->task_volatile_objects = 0;
985 new_task->task_nonvolatile_objects = 0;
986 new_task->task_purgeable_disowning = FALSE;
987 new_task->task_purgeable_disowned = FALSE;
988
989 ipc_task_enable(new_task);
990
991 *child_task = new_task;
992 return(KERN_SUCCESS);
993 }
994
995 int task_dropped_imp_count = 0;
996
997 /*
998 * task_deallocate:
999 *
1000 * Drop a reference on a task.
1001 */
1002 void
1003 task_deallocate(
1004 task_t task)
1005 {
1006 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1007 uint32_t refs;
1008
1009 if (task == TASK_NULL)
1010 return;
1011
1012 refs = task_deallocate_internal(task);
1013
1014 #if IMPORTANCE_INHERITANCE
1015 if (refs > 1)
1016 return;
1017
1018 if (refs == 1) {
1019 /*
1020 * If last ref potentially comes from the task's importance,
1021 * disconnect it. But more task refs may be added before
1022 * that completes, so wait for the reference to go to zero
1023 * naturually (it may happen on a recursive task_deallocate()
1024 * from the ipc_importance_disconnect_task() call).
1025 */
1026 if (IIT_NULL != task->task_imp_base)
1027 ipc_importance_disconnect_task(task);
1028 return;
1029 }
1030 #else
1031 if (refs > 0)
1032 return;
1033 #endif /* IMPORTANCE_INHERITANCE */
1034
1035 lck_mtx_lock(&tasks_threads_lock);
1036 queue_remove(&terminated_tasks, task, task_t, tasks);
1037 terminated_tasks_count--;
1038 lck_mtx_unlock(&tasks_threads_lock);
1039
1040 /*
1041 * remove the reference on atm descriptor
1042 */
1043 task_atm_reset(task);
1044
1045 #if CONFIG_BANK
1046 /*
1047 * remove the reference on bank context
1048 */
1049 if (task->bank_context != NULL) {
1050 bank_task_destroy(task->bank_context);
1051 task->bank_context = NULL;
1052 }
1053 #endif
1054
1055 if (task->task_io_stats)
1056 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1057
1058 /*
1059 * Give the machine dependent code a chance
1060 * to perform cleanup before ripping apart
1061 * the task.
1062 */
1063 machine_task_terminate(task);
1064
1065 ipc_task_terminate(task);
1066
1067 if (task->affinity_space)
1068 task_affinity_deallocate(task);
1069
1070 #if MACH_ASSERT
1071 if (task->ledger != NULL &&
1072 task->map != NULL &&
1073 task->map->pmap != NULL &&
1074 task->map->pmap->ledger != NULL) {
1075 assert(task->ledger == task->map->pmap->ledger);
1076 }
1077 #endif /* MACH_ASSERT */
1078
1079 vm_purgeable_disown(task);
1080 assert(task->task_purgeable_disowned);
1081 if (task->task_volatile_objects != 0 ||
1082 task->task_nonvolatile_objects != 0) {
1083 panic("task_deallocate(%p): "
1084 "volatile_objects=%d nonvolatile_objects=%d\n",
1085 task,
1086 task->task_volatile_objects,
1087 task->task_nonvolatile_objects);
1088 }
1089
1090 vm_map_deallocate(task->map);
1091 is_release(task->itk_space);
1092
1093 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1094 &interrupt_wakeups, &debit);
1095 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1096 &platform_idle_wakeups, &debit);
1097
1098 #if defined(CONFIG_SCHED_MULTIQ)
1099 sched_group_destroy(task->sched_group);
1100 #endif
1101
1102 /* Accumulate statistics for dead tasks */
1103 lck_spin_lock(&dead_task_statistics_lock);
1104 dead_task_statistics.total_user_time += task->total_user_time;
1105 dead_task_statistics.total_system_time += task->total_system_time;
1106
1107 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1108 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1109
1110 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1111 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1112
1113 lck_spin_unlock(&dead_task_statistics_lock);
1114 lck_mtx_destroy(&task->lock, &task_lck_grp);
1115
1116 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1117 &debit)) {
1118 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1119 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1120 }
1121 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1122 &debit)) {
1123 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1124 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1125 }
1126 ledger_dereference(task->ledger);
1127 zinfo_task_free(task);
1128
1129 #if TASK_REFERENCE_LEAK_DEBUG
1130 btlog_remove_entries_for_element(task_ref_btlog, task);
1131 #endif
1132
1133 #if CONFIG_COALITIONS
1134 if (!task->coalition) {
1135 panic("deallocating task was not a member of any coalition");
1136 }
1137 coalition_release(task->coalition);
1138 #endif /* CONFIG_COALITIONS */
1139
1140 task->coalition = COALITION_NULL;
1141
1142 zfree(task_zone, task);
1143 }
1144
1145 /*
1146 * task_name_deallocate:
1147 *
1148 * Drop a reference on a task name.
1149 */
1150 void
1151 task_name_deallocate(
1152 task_name_t task_name)
1153 {
1154 return(task_deallocate((task_t)task_name));
1155 }
1156
1157 /*
1158 * task_suspension_token_deallocate:
1159 *
1160 * Drop a reference on a task suspension token.
1161 */
1162 void
1163 task_suspension_token_deallocate(
1164 task_suspension_token_t token)
1165 {
1166 return(task_deallocate((task_t)token));
1167 }
1168
1169 /*
1170 * task_terminate:
1171 *
1172 * Terminate the specified task. See comments on thread_terminate
1173 * (kern/thread.c) about problems with terminating the "current task."
1174 */
1175
1176 kern_return_t
1177 task_terminate(
1178 task_t task)
1179 {
1180 if (task == TASK_NULL)
1181 return (KERN_INVALID_ARGUMENT);
1182
1183 if (task->bsd_info)
1184 return (KERN_FAILURE);
1185
1186 return (task_terminate_internal(task));
1187 }
1188
1189 #if MACH_ASSERT
1190 extern int proc_pid(struct proc *);
1191 extern void proc_name_kdp(task_t t, char *buf, int size);
1192 #endif /* MACH_ASSERT */
1193
1194 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1195 static void
1196 __unused task_partial_reap(task_t task, __unused int pid)
1197 {
1198 unsigned int reclaimed_resident = 0;
1199 unsigned int reclaimed_compressed = 0;
1200 uint64_t task_page_count;
1201
1202 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1203
1204 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1205 pid, task_page_count, 0, 0, 0);
1206
1207 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1208
1209 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1210 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1211 }
1212
1213 kern_return_t
1214 task_terminate_internal(
1215 task_t task)
1216 {
1217 thread_t thread, self;
1218 task_t self_task;
1219 boolean_t interrupt_save;
1220 int pid = 0;
1221
1222 assert(task != kernel_task);
1223
1224 self = current_thread();
1225 self_task = self->task;
1226
1227 /*
1228 * Get the task locked and make sure that we are not racing
1229 * with someone else trying to terminate us.
1230 */
1231 if (task == self_task)
1232 task_lock(task);
1233 else
1234 if (task < self_task) {
1235 task_lock(task);
1236 task_lock(self_task);
1237 }
1238 else {
1239 task_lock(self_task);
1240 task_lock(task);
1241 }
1242
1243 if (!task->active) {
1244 /*
1245 * Task is already being terminated.
1246 * Just return an error. If we are dying, this will
1247 * just get us to our AST special handler and that
1248 * will get us to finalize the termination of ourselves.
1249 */
1250 task_unlock(task);
1251 if (self_task != task)
1252 task_unlock(self_task);
1253
1254 return (KERN_FAILURE);
1255 }
1256
1257 if (self_task != task)
1258 task_unlock(self_task);
1259
1260 /*
1261 * Make sure the current thread does not get aborted out of
1262 * the waits inside these operations.
1263 */
1264 interrupt_save = thread_interrupt_level(THREAD_UNINT);
1265
1266 /*
1267 * Indicate that we want all the threads to stop executing
1268 * at user space by holding the task (we would have held
1269 * each thread independently in thread_terminate_internal -
1270 * but this way we may be more likely to already find it
1271 * held there). Mark the task inactive, and prevent
1272 * further task operations via the task port.
1273 */
1274 task_hold_locked(task);
1275 task->active = FALSE;
1276 ipc_task_disable(task);
1277
1278 #if CONFIG_TELEMETRY
1279 /*
1280 * Notify telemetry that this task is going away.
1281 */
1282 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1283 #endif
1284
1285 /*
1286 * Terminate each thread in the task.
1287 */
1288 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1289 thread_terminate_internal(thread);
1290 }
1291
1292 #ifdef MACH_BSD
1293 if (task->bsd_info != NULL) {
1294 pid = proc_pid(task->bsd_info);
1295 }
1296 #endif /* MACH_BSD */
1297
1298 task_unlock(task);
1299
1300 proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
1301 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
1302
1303 /* Early object reap phase */
1304
1305 // PR-17045188: Revisit implementation
1306 // task_partial_reap(task, pid);
1307
1308
1309 /*
1310 * Destroy all synchronizers owned by the task.
1311 */
1312 task_synchronizer_destroy_all(task);
1313
1314 /*
1315 * Destroy the IPC space, leaving just a reference for it.
1316 */
1317 ipc_space_terminate(task->itk_space);
1318
1319 #if 00
1320 /* if some ledgers go negative on tear-down again... */
1321 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1322 task_ledgers.phys_footprint);
1323 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1324 task_ledgers.internal);
1325 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1326 task_ledgers.internal_compressed);
1327 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1328 task_ledgers.iokit_mapped);
1329 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1330 task_ledgers.alternate_accounting);
1331 #endif
1332
1333 /*
1334 * If the current thread is a member of the task
1335 * being terminated, then the last reference to
1336 * the task will not be dropped until the thread
1337 * is finally reaped. To avoid incurring the
1338 * expense of removing the address space regions
1339 * at reap time, we do it explictly here.
1340 */
1341 vm_map_remove(task->map,
1342 task->map->min_offset,
1343 task->map->max_offset,
1344 VM_MAP_NO_FLAGS);
1345
1346 /* release our shared region */
1347 vm_shared_region_set(task, NULL);
1348
1349 #if MACH_ASSERT
1350 /*
1351 * Identify the pmap's process, in case the pmap ledgers drift
1352 * and we have to report it.
1353 */
1354 char procname[17];
1355 if (task->bsd_info) {
1356 pid = proc_pid(task->bsd_info);
1357 proc_name_kdp(task, procname, sizeof (procname));
1358 } else {
1359 pid = 0;
1360 strlcpy(procname, "<unknown>", sizeof (procname));
1361 }
1362 pmap_set_process(task->map->pmap, pid, procname);
1363 #endif /* MACH_ASSERT */
1364
1365 lck_mtx_lock(&tasks_threads_lock);
1366 queue_remove(&tasks, task, task_t, tasks);
1367 queue_enter(&terminated_tasks, task, task_t, tasks);
1368 tasks_count--;
1369 terminated_tasks_count++;
1370 lck_mtx_unlock(&tasks_threads_lock);
1371
1372 /*
1373 * We no longer need to guard against being aborted, so restore
1374 * the previous interruptible state.
1375 */
1376 thread_interrupt_level(interrupt_save);
1377
1378 #if KPERF
1379 /* force the task to release all ctrs */
1380 if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
1381 kpc_force_all_ctrs(task, 0);
1382 #endif
1383
1384 #if CONFIG_COALITIONS
1385 /*
1386 * Leave our coalition. (drop activation but not reference)
1387 */
1388 coalition_remove_task(task);
1389 #endif
1390
1391 /*
1392 * Get rid of the task active reference on itself.
1393 */
1394 task_deallocate(task);
1395
1396 return (KERN_SUCCESS);
1397 }
1398
1399 /*
1400 * task_start_halt:
1401 *
1402 * Shut the current task down (except for the current thread) in
1403 * preparation for dramatic changes to the task (probably exec).
1404 * We hold the task and mark all other threads in the task for
1405 * termination.
1406 */
1407 kern_return_t
1408 task_start_halt(
1409 task_t task)
1410 {
1411 thread_t thread, self;
1412
1413 assert(task != kernel_task);
1414
1415 self = current_thread();
1416
1417 if (task != self->task)
1418 return (KERN_INVALID_ARGUMENT);
1419
1420 task_lock(task);
1421
1422 if (task->halting || !task->active || !self->active) {
1423 /*
1424 * Task or current thread is already being terminated.
1425 * Hurry up and return out of the current kernel context
1426 * so that we run our AST special handler to terminate
1427 * ourselves.
1428 */
1429 task_unlock(task);
1430
1431 return (KERN_FAILURE);
1432 }
1433
1434 task->halting = TRUE;
1435
1436 if (task->thread_count > 1) {
1437
1438 /*
1439 * Mark all the threads to keep them from starting any more
1440 * user-level execution. The thread_terminate_internal code
1441 * would do this on a thread by thread basis anyway, but this
1442 * gives us a better chance of not having to wait there.
1443 */
1444 task_hold_locked(task);
1445
1446 /*
1447 * Terminate all the other threads in the task.
1448 */
1449 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1450 if (thread != self)
1451 thread_terminate_internal(thread);
1452 }
1453
1454 task_release_locked(task);
1455 }
1456 task_unlock(task);
1457 return KERN_SUCCESS;
1458 }
1459
1460
1461 /*
1462 * task_complete_halt:
1463 *
1464 * Complete task halt by waiting for threads to terminate, then clean
1465 * up task resources (VM, port namespace, etc...) and then let the
1466 * current thread go in the (practically empty) task context.
1467 */
1468 void
1469 task_complete_halt(task_t task)
1470 {
1471 task_lock(task);
1472 assert(task->halting);
1473 assert(task == current_task());
1474
1475 /*
1476 * Wait for the other threads to get shut down.
1477 * When the last other thread is reaped, we'll be
1478 * woken up.
1479 */
1480 if (task->thread_count > 1) {
1481 assert_wait((event_t)&task->halting, THREAD_UNINT);
1482 task_unlock(task);
1483 thread_block(THREAD_CONTINUE_NULL);
1484 } else {
1485 task_unlock(task);
1486 }
1487
1488 /*
1489 * Give the machine dependent code a chance
1490 * to perform cleanup of task-level resources
1491 * associated with the current thread before
1492 * ripping apart the task.
1493 */
1494 machine_task_terminate(task);
1495
1496 /*
1497 * Destroy all synchronizers owned by the task.
1498 */
1499 task_synchronizer_destroy_all(task);
1500
1501 /*
1502 * Destroy the contents of the IPC space, leaving just
1503 * a reference for it.
1504 */
1505 ipc_space_clean(task->itk_space);
1506
1507 /*
1508 * Clean out the address space, as we are going to be
1509 * getting a new one.
1510 */
1511 vm_map_remove(task->map, task->map->min_offset,
1512 task->map->max_offset, VM_MAP_NO_FLAGS);
1513
1514 task->halting = FALSE;
1515 }
1516
1517 /*
1518 * task_hold_locked:
1519 *
1520 * Suspend execution of the specified task.
1521 * This is a recursive-style suspension of the task, a count of
1522 * suspends is maintained.
1523 *
1524 * CONDITIONS: the task is locked and active.
1525 */
1526 void
1527 task_hold_locked(
1528 register task_t task)
1529 {
1530 register thread_t thread;
1531
1532 assert(task->active);
1533
1534 if (task->suspend_count++ > 0)
1535 return;
1536
1537 /*
1538 * Iterate through all the threads and hold them.
1539 */
1540 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1541 thread_mtx_lock(thread);
1542 thread_hold(thread);
1543 thread_mtx_unlock(thread);
1544 }
1545 }
1546
1547 /*
1548 * task_hold:
1549 *
1550 * Same as the internal routine above, except that is must lock
1551 * and verify that the task is active. This differs from task_suspend
1552 * in that it places a kernel hold on the task rather than just a
1553 * user-level hold. This keeps users from over resuming and setting
1554 * it running out from under the kernel.
1555 *
1556 * CONDITIONS: the caller holds a reference on the task
1557 */
1558 kern_return_t
1559 task_hold(
1560 register task_t task)
1561 {
1562 if (task == TASK_NULL)
1563 return (KERN_INVALID_ARGUMENT);
1564
1565 task_lock(task);
1566
1567 if (!task->active) {
1568 task_unlock(task);
1569
1570 return (KERN_FAILURE);
1571 }
1572
1573 task_hold_locked(task);
1574 task_unlock(task);
1575
1576 return (KERN_SUCCESS);
1577 }
1578
1579 kern_return_t
1580 task_wait(
1581 task_t task,
1582 boolean_t until_not_runnable)
1583 {
1584 if (task == TASK_NULL)
1585 return (KERN_INVALID_ARGUMENT);
1586
1587 task_lock(task);
1588
1589 if (!task->active) {
1590 task_unlock(task);
1591
1592 return (KERN_FAILURE);
1593 }
1594
1595 task_wait_locked(task, until_not_runnable);
1596 task_unlock(task);
1597
1598 return (KERN_SUCCESS);
1599 }
1600
1601 /*
1602 * task_wait_locked:
1603 *
1604 * Wait for all threads in task to stop.
1605 *
1606 * Conditions:
1607 * Called with task locked, active, and held.
1608 */
1609 void
1610 task_wait_locked(
1611 register task_t task,
1612 boolean_t until_not_runnable)
1613 {
1614 register thread_t thread, self;
1615
1616 assert(task->active);
1617 assert(task->suspend_count > 0);
1618
1619 self = current_thread();
1620
1621 /*
1622 * Iterate through all the threads and wait for them to
1623 * stop. Do not wait for the current thread if it is within
1624 * the task.
1625 */
1626 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1627 if (thread != self)
1628 thread_wait(thread, until_not_runnable);
1629 }
1630 }
1631
1632 /*
1633 * task_release_locked:
1634 *
1635 * Release a kernel hold on a task.
1636 *
1637 * CONDITIONS: the task is locked and active
1638 */
1639 void
1640 task_release_locked(
1641 register task_t task)
1642 {
1643 register thread_t thread;
1644
1645 assert(task->active);
1646 assert(task->suspend_count > 0);
1647
1648 if (--task->suspend_count > 0)
1649 return;
1650
1651 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1652 thread_mtx_lock(thread);
1653 thread_release(thread);
1654 thread_mtx_unlock(thread);
1655 }
1656 }
1657
1658 /*
1659 * task_release:
1660 *
1661 * Same as the internal routine above, except that it must lock
1662 * and verify that the task is active.
1663 *
1664 * CONDITIONS: The caller holds a reference to the task
1665 */
1666 kern_return_t
1667 task_release(
1668 task_t task)
1669 {
1670 if (task == TASK_NULL)
1671 return (KERN_INVALID_ARGUMENT);
1672
1673 task_lock(task);
1674
1675 if (!task->active) {
1676 task_unlock(task);
1677
1678 return (KERN_FAILURE);
1679 }
1680
1681 task_release_locked(task);
1682 task_unlock(task);
1683
1684 return (KERN_SUCCESS);
1685 }
1686
1687 kern_return_t
1688 task_threads(
1689 task_t task,
1690 thread_act_array_t *threads_out,
1691 mach_msg_type_number_t *count)
1692 {
1693 mach_msg_type_number_t actual;
1694 thread_t *thread_list;
1695 thread_t thread;
1696 vm_size_t size, size_needed;
1697 void *addr;
1698 unsigned int i, j;
1699
1700 if (task == TASK_NULL)
1701 return (KERN_INVALID_ARGUMENT);
1702
1703 size = 0; addr = NULL;
1704
1705 for (;;) {
1706 task_lock(task);
1707 if (!task->active) {
1708 task_unlock(task);
1709
1710 if (size != 0)
1711 kfree(addr, size);
1712
1713 return (KERN_FAILURE);
1714 }
1715
1716 actual = task->thread_count;
1717
1718 /* do we have the memory we need? */
1719 size_needed = actual * sizeof (mach_port_t);
1720 if (size_needed <= size)
1721 break;
1722
1723 /* unlock the task and allocate more memory */
1724 task_unlock(task);
1725
1726 if (size != 0)
1727 kfree(addr, size);
1728
1729 assert(size_needed > 0);
1730 size = size_needed;
1731
1732 addr = kalloc(size);
1733 if (addr == 0)
1734 return (KERN_RESOURCE_SHORTAGE);
1735 }
1736
1737 /* OK, have memory and the task is locked & active */
1738 thread_list = (thread_t *)addr;
1739
1740 i = j = 0;
1741
1742 for (thread = (thread_t)queue_first(&task->threads); i < actual;
1743 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
1744 thread_reference_internal(thread);
1745 thread_list[j++] = thread;
1746 }
1747
1748 assert(queue_end(&task->threads, (queue_entry_t)thread));
1749
1750 actual = j;
1751 size_needed = actual * sizeof (mach_port_t);
1752
1753 /* can unlock task now that we've got the thread refs */
1754 task_unlock(task);
1755
1756 if (actual == 0) {
1757 /* no threads, so return null pointer and deallocate memory */
1758
1759 *threads_out = NULL;
1760 *count = 0;
1761
1762 if (size != 0)
1763 kfree(addr, size);
1764 }
1765 else {
1766 /* if we allocated too much, must copy */
1767
1768 if (size_needed < size) {
1769 void *newaddr;
1770
1771 newaddr = kalloc(size_needed);
1772 if (newaddr == 0) {
1773 for (i = 0; i < actual; ++i)
1774 thread_deallocate(thread_list[i]);
1775 kfree(addr, size);
1776 return (KERN_RESOURCE_SHORTAGE);
1777 }
1778
1779 bcopy(addr, newaddr, size_needed);
1780 kfree(addr, size);
1781 thread_list = (thread_t *)newaddr;
1782 }
1783
1784 *threads_out = thread_list;
1785 *count = actual;
1786
1787 /* do the conversion that Mig should handle */
1788
1789 for (i = 0; i < actual; ++i)
1790 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
1791 }
1792
1793 return (KERN_SUCCESS);
1794 }
1795
1796 #define TASK_HOLD_NORMAL 0
1797 #define TASK_HOLD_PIDSUSPEND 1
1798 #define TASK_HOLD_LEGACY 2
1799 #define TASK_HOLD_LEGACY_ALL 3
1800
1801 static kern_return_t
1802 place_task_hold (
1803 register task_t task,
1804 int mode)
1805 {
1806 if (!task->active) {
1807 return (KERN_FAILURE);
1808 }
1809
1810 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1811 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
1812 proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id,
1813 task->user_stop_count, task->user_stop_count + 1, 0);
1814
1815 #if MACH_ASSERT
1816 current_task()->suspends_outstanding++;
1817 #endif
1818
1819 if (mode == TASK_HOLD_LEGACY)
1820 task->legacy_stop_count++;
1821
1822 if (task->user_stop_count++ > 0) {
1823 /*
1824 * If the stop count was positive, the task is
1825 * already stopped and we can exit.
1826 */
1827 return (KERN_SUCCESS);
1828 }
1829
1830 /*
1831 * Put a kernel-level hold on the threads in the task (all
1832 * user-level task suspensions added together represent a
1833 * single kernel-level hold). We then wait for the threads
1834 * to stop executing user code.
1835 */
1836 task_hold_locked(task);
1837 task_wait_locked(task, FALSE);
1838
1839 return (KERN_SUCCESS);
1840 }
1841
1842 static kern_return_t
1843 release_task_hold (
1844 register task_t task,
1845 int mode)
1846 {
1847 register boolean_t release = FALSE;
1848
1849 if (!task->active) {
1850 return (KERN_FAILURE);
1851 }
1852
1853 if (mode == TASK_HOLD_PIDSUSPEND) {
1854 if (task->pidsuspended == FALSE) {
1855 return (KERN_FAILURE);
1856 }
1857 task->pidsuspended = FALSE;
1858 }
1859
1860 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
1861
1862 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1863 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
1864 proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id,
1865 task->user_stop_count, mode, task->legacy_stop_count);
1866
1867 #if MACH_ASSERT
1868 /*
1869 * This is obviously not robust; if we suspend one task and then resume a different one,
1870 * we'll fly under the radar. This is only meant to catch the common case of a crashed
1871 * or buggy suspender.
1872 */
1873 current_task()->suspends_outstanding--;
1874 #endif
1875
1876 if (mode == TASK_HOLD_LEGACY_ALL) {
1877 if (task->legacy_stop_count >= task->user_stop_count) {
1878 task->user_stop_count = 0;
1879 release = TRUE;
1880 } else {
1881 task->user_stop_count -= task->legacy_stop_count;
1882 }
1883 task->legacy_stop_count = 0;
1884 } else {
1885 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
1886 task->legacy_stop_count--;
1887 if (--task->user_stop_count == 0)
1888 release = TRUE;
1889 }
1890 }
1891 else {
1892 return (KERN_FAILURE);
1893 }
1894
1895 /*
1896 * Release the task if necessary.
1897 */
1898 if (release)
1899 task_release_locked(task);
1900
1901 return (KERN_SUCCESS);
1902 }
1903
1904
1905 /*
1906 * task_suspend:
1907 *
1908 * Implement an (old-fashioned) user-level suspension on a task.
1909 *
1910 * Because the user isn't expecting to have to manage a suspension
1911 * token, we'll track it for him in the kernel in the form of a naked
1912 * send right to the task's resume port. All such send rights
1913 * account for a single suspension against the task (unlike task_suspend2()
1914 * where each caller gets a unique suspension count represented by a
1915 * unique send-once right).
1916 *
1917 * Conditions:
1918 * The caller holds a reference to the task
1919 */
1920 kern_return_t
1921 task_suspend(
1922 register task_t task)
1923 {
1924 kern_return_t kr;
1925 mach_port_t port, send, old_notify;
1926 mach_port_name_t name;
1927
1928 if (task == TASK_NULL || task == kernel_task)
1929 return (KERN_INVALID_ARGUMENT);
1930
1931 task_lock(task);
1932
1933 /*
1934 * Claim a send right on the task resume port, and request a no-senders
1935 * notification on that port (if none outstanding).
1936 */
1937 if (task->itk_resume == IP_NULL) {
1938 task->itk_resume = ipc_port_alloc_kernel();
1939 if (!IP_VALID(task->itk_resume))
1940 panic("failed to create resume port");
1941 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
1942 }
1943
1944 port = task->itk_resume;
1945 ip_lock(port);
1946 assert(ip_active(port));
1947
1948 send = ipc_port_make_send_locked(port);
1949 assert(IP_VALID(send));
1950
1951 if (port->ip_nsrequest == IP_NULL) {
1952 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
1953 assert(old_notify == IP_NULL);
1954 /* port unlocked */
1955 } else {
1956 ip_unlock(port);
1957 }
1958
1959 /*
1960 * place a legacy hold on the task.
1961 */
1962 kr = place_task_hold(task, TASK_HOLD_LEGACY);
1963 if (kr != KERN_SUCCESS) {
1964 task_unlock(task);
1965 ipc_port_release_send(send);
1966 return kr;
1967 }
1968
1969 task_unlock(task);
1970
1971 /*
1972 * Copyout the send right into the calling task's IPC space. It won't know it is there,
1973 * but we'll look it up when calling a traditional resume. Any IPC operations that
1974 * deallocate the send right will auto-release the suspension.
1975 */
1976 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
1977 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
1978 printf("warning: %s(%d) failed to copyout suspension token for task %s(%d) with error: %d\n",
1979 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
1980 proc_name_address(task->bsd_info), proc_pid(task->bsd_info), kr);
1981 return (kr);
1982 }
1983
1984 return (kr);
1985 }
1986
1987 /*
1988 * task_resume:
1989 * Release a user hold on a task.
1990 *
1991 * Conditions:
1992 * The caller holds a reference to the task
1993 */
1994 kern_return_t
1995 task_resume(
1996 register task_t task)
1997 {
1998 kern_return_t kr;
1999 mach_port_name_t resume_port_name;
2000 ipc_entry_t resume_port_entry;
2001 ipc_space_t space = current_task()->itk_space;
2002
2003 if (task == TASK_NULL || task == kernel_task )
2004 return (KERN_INVALID_ARGUMENT);
2005
2006 /* release a legacy task hold */
2007 task_lock(task);
2008 kr = release_task_hold(task, TASK_HOLD_LEGACY);
2009 task_unlock(task);
2010
2011 is_write_lock(space);
2012 if (is_active(space) && IP_VALID(task->itk_resume) &&
2013 ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
2014 /*
2015 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
2016 * we are holding one less legacy hold on the task from this caller. If the release failed,
2017 * go ahead and drop all the rights, as someone either already released our holds or the task
2018 * is gone.
2019 */
2020 if (kr == KERN_SUCCESS)
2021 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
2022 else
2023 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
2024 /* space unlocked */
2025 } else {
2026 is_write_unlock(space);
2027 if (kr == KERN_SUCCESS)
2028 printf("warning: %s(%d) performed out-of-band resume on %s(%d)\n",
2029 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2030 proc_name_address(task->bsd_info), proc_pid(task->bsd_info));
2031 }
2032
2033 return kr;
2034 }
2035
2036 /*
2037 * Suspend the target task.
2038 * Making/holding a token/reference/port is the callers responsibility.
2039 */
2040 kern_return_t
2041 task_suspend_internal(task_t task)
2042 {
2043 kern_return_t kr;
2044
2045 if (task == TASK_NULL || task == kernel_task)
2046 return (KERN_INVALID_ARGUMENT);
2047
2048 task_lock(task);
2049 kr = place_task_hold(task, TASK_HOLD_NORMAL);
2050 task_unlock(task);
2051 return (kr);
2052 }
2053
2054 /*
2055 * Suspend the target task, and return a suspension token. The token
2056 * represents a reference on the suspended task.
2057 */
2058 kern_return_t
2059 task_suspend2(
2060 register task_t task,
2061 task_suspension_token_t *suspend_token)
2062 {
2063 kern_return_t kr;
2064
2065 kr = task_suspend_internal(task);
2066 if (kr != KERN_SUCCESS) {
2067 *suspend_token = TASK_NULL;
2068 return (kr);
2069 }
2070
2071 /*
2072 * Take a reference on the target task and return that to the caller
2073 * as a "suspension token," which can be converted into an SO right to
2074 * the now-suspended task's resume port.
2075 */
2076 task_reference_internal(task);
2077 *suspend_token = task;
2078
2079 return (KERN_SUCCESS);
2080 }
2081
2082 /*
2083 * Resume the task
2084 * (reference/token/port management is caller's responsibility).
2085 */
2086 kern_return_t
2087 task_resume_internal(
2088 register task_suspension_token_t task)
2089 {
2090 kern_return_t kr;
2091
2092 if (task == TASK_NULL || task == kernel_task)
2093 return (KERN_INVALID_ARGUMENT);
2094
2095 task_lock(task);
2096 kr = release_task_hold(task, TASK_HOLD_NORMAL);
2097 task_unlock(task);
2098 return (kr);
2099 }
2100
2101 /*
2102 * Resume the task using a suspension token. Consumes the token's ref.
2103 */
2104 kern_return_t
2105 task_resume2(
2106 register task_suspension_token_t task)
2107 {
2108 kern_return_t kr;
2109
2110 kr = task_resume_internal(task);
2111 task_suspension_token_deallocate(task);
2112
2113 return (kr);
2114 }
2115
2116 boolean_t
2117 task_suspension_notify(mach_msg_header_t *request_header)
2118 {
2119 ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
2120 task_t task = convert_port_to_task_suspension_token(port);
2121 mach_msg_type_number_t not_count;
2122
2123 if (task == TASK_NULL || task == kernel_task)
2124 return TRUE; /* nothing to do */
2125
2126 switch (request_header->msgh_id) {
2127
2128 case MACH_NOTIFY_SEND_ONCE:
2129 /* release the hold held by this specific send-once right */
2130 task_lock(task);
2131 release_task_hold(task, TASK_HOLD_NORMAL);
2132 task_unlock(task);
2133 break;
2134
2135 case MACH_NOTIFY_NO_SENDERS:
2136 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
2137
2138 task_lock(task);
2139 ip_lock(port);
2140 if (port->ip_mscount == not_count) {
2141
2142 /* release all the [remaining] outstanding legacy holds */
2143 assert(port->ip_nsrequest == IP_NULL);
2144 ip_unlock(port);
2145 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
2146 task_unlock(task);
2147
2148 } else if (port->ip_nsrequest == IP_NULL) {
2149 ipc_port_t old_notify;
2150
2151 task_unlock(task);
2152 /* new send rights, re-arm notification at current make-send count */
2153 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2154 assert(old_notify == IP_NULL);
2155 /* port unlocked */
2156 } else {
2157 ip_unlock(port);
2158 task_unlock(task);
2159 }
2160 break;
2161
2162 default:
2163 break;
2164 }
2165
2166 task_suspension_token_deallocate(task); /* drop token reference */
2167 return TRUE;
2168 }
2169
2170 kern_return_t
2171 task_pidsuspend_locked(task_t task)
2172 {
2173 kern_return_t kr;
2174
2175 if (task->pidsuspended) {
2176 kr = KERN_FAILURE;
2177 goto out;
2178 }
2179
2180 task->pidsuspended = TRUE;
2181
2182 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
2183 if (kr != KERN_SUCCESS) {
2184 task->pidsuspended = FALSE;
2185 }
2186 out:
2187 return(kr);
2188 }
2189
2190
2191 /*
2192 * task_pidsuspend:
2193 *
2194 * Suspends a task by placing a hold on its threads.
2195 *
2196 * Conditions:
2197 * The caller holds a reference to the task
2198 */
2199 kern_return_t
2200 task_pidsuspend(
2201 register task_t task)
2202 {
2203 kern_return_t kr;
2204
2205 if (task == TASK_NULL || task == kernel_task)
2206 return (KERN_INVALID_ARGUMENT);
2207
2208 task_lock(task);
2209
2210 kr = task_pidsuspend_locked(task);
2211
2212 task_unlock(task);
2213
2214 return (kr);
2215 }
2216
2217 /* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
2218 #define THAW_ON_RESUME 1
2219
2220 /*
2221 * task_pidresume:
2222 * Resumes a previously suspended task.
2223 *
2224 * Conditions:
2225 * The caller holds a reference to the task
2226 */
2227 kern_return_t
2228 task_pidresume(
2229 register task_t task)
2230 {
2231 kern_return_t kr;
2232
2233 if (task == TASK_NULL || task == kernel_task)
2234 return (KERN_INVALID_ARGUMENT);
2235
2236 task_lock(task);
2237
2238 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2239
2240 while (task->changing_freeze_state) {
2241
2242 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2243 task_unlock(task);
2244 thread_block(THREAD_CONTINUE_NULL);
2245
2246 task_lock(task);
2247 }
2248 task->changing_freeze_state = TRUE;
2249 #endif
2250
2251 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
2252
2253 task_unlock(task);
2254
2255 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2256 if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
2257
2258 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2259
2260 kr = KERN_SUCCESS;
2261 } else {
2262
2263 kr = vm_map_thaw(task->map);
2264 }
2265 }
2266 task_lock(task);
2267
2268 if (kr == KERN_SUCCESS)
2269 task->frozen = FALSE;
2270 task->changing_freeze_state = FALSE;
2271 thread_wakeup(&task->changing_freeze_state);
2272
2273 task_unlock(task);
2274 #endif
2275
2276 return (kr);
2277 }
2278
2279 #if CONFIG_FREEZE
2280
2281 /*
2282 * task_freeze:
2283 *
2284 * Freeze a task.
2285 *
2286 * Conditions:
2287 * The caller holds a reference to the task
2288 */
2289 kern_return_t
2290 task_freeze(
2291 register task_t task,
2292 uint32_t *purgeable_count,
2293 uint32_t *wired_count,
2294 uint32_t *clean_count,
2295 uint32_t *dirty_count,
2296 uint32_t dirty_budget,
2297 boolean_t *shared,
2298 boolean_t walk_only)
2299 {
2300 kern_return_t kr;
2301
2302 if (task == TASK_NULL || task == kernel_task)
2303 return (KERN_INVALID_ARGUMENT);
2304
2305 task_lock(task);
2306
2307 while (task->changing_freeze_state) {
2308
2309 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2310 task_unlock(task);
2311 thread_block(THREAD_CONTINUE_NULL);
2312
2313 task_lock(task);
2314 }
2315 if (task->frozen) {
2316 task_unlock(task);
2317 return (KERN_FAILURE);
2318 }
2319 task->changing_freeze_state = TRUE;
2320
2321 task_unlock(task);
2322
2323 if (walk_only) {
2324 kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2325 } else {
2326 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2327 }
2328
2329 task_lock(task);
2330
2331 if (walk_only == FALSE && kr == KERN_SUCCESS)
2332 task->frozen = TRUE;
2333 task->changing_freeze_state = FALSE;
2334 thread_wakeup(&task->changing_freeze_state);
2335
2336 task_unlock(task);
2337
2338 return (kr);
2339 }
2340
2341 /*
2342 * task_thaw:
2343 *
2344 * Thaw a currently frozen task.
2345 *
2346 * Conditions:
2347 * The caller holds a reference to the task
2348 */
2349 extern void
2350 vm_consider_waking_compactor_swapper(void);
2351
2352 kern_return_t
2353 task_thaw(
2354 register task_t task)
2355 {
2356 kern_return_t kr;
2357
2358 if (task == TASK_NULL || task == kernel_task)
2359 return (KERN_INVALID_ARGUMENT);
2360
2361 task_lock(task);
2362
2363 while (task->changing_freeze_state) {
2364
2365 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2366 task_unlock(task);
2367 thread_block(THREAD_CONTINUE_NULL);
2368
2369 task_lock(task);
2370 }
2371 if (!task->frozen) {
2372 task_unlock(task);
2373 return (KERN_FAILURE);
2374 }
2375 task->changing_freeze_state = TRUE;
2376
2377 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2378 task_unlock(task);
2379
2380 kr = vm_map_thaw(task->map);
2381
2382 task_lock(task);
2383
2384 if (kr == KERN_SUCCESS)
2385 task->frozen = FALSE;
2386 } else {
2387 task->frozen = FALSE;
2388 kr = KERN_SUCCESS;
2389 }
2390
2391 task->changing_freeze_state = FALSE;
2392 thread_wakeup(&task->changing_freeze_state);
2393
2394 task_unlock(task);
2395
2396 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2397 vm_consider_waking_compactor_swapper();
2398 }
2399
2400 return (kr);
2401 }
2402
2403 #endif /* CONFIG_FREEZE */
2404
2405 kern_return_t
2406 host_security_set_task_token(
2407 host_security_t host_security,
2408 task_t task,
2409 security_token_t sec_token,
2410 audit_token_t audit_token,
2411 host_priv_t host_priv)
2412 {
2413 ipc_port_t host_port;
2414 kern_return_t kr;
2415
2416 if (task == TASK_NULL)
2417 return(KERN_INVALID_ARGUMENT);
2418
2419 if (host_security == HOST_NULL)
2420 return(KERN_INVALID_SECURITY);
2421
2422 task_lock(task);
2423 task->sec_token = sec_token;
2424 task->audit_token = audit_token;
2425
2426 task_unlock(task);
2427
2428 if (host_priv != HOST_PRIV_NULL) {
2429 kr = host_get_host_priv_port(host_priv, &host_port);
2430 } else {
2431 kr = host_get_host_port(host_priv_self(), &host_port);
2432 }
2433 assert(kr == KERN_SUCCESS);
2434 kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2435 return(kr);
2436 }
2437
2438 kern_return_t
2439 task_send_trace_memory(
2440 task_t target_task,
2441 __unused uint32_t pid,
2442 __unused uint64_t uniqueid)
2443 {
2444 kern_return_t kr = KERN_INVALID_ARGUMENT;
2445 if (target_task == TASK_NULL)
2446 return (KERN_INVALID_ARGUMENT);
2447
2448 #if CONFIG_ATM
2449 kr = atm_send_proc_inspect_notification(target_task,
2450 pid,
2451 uniqueid);
2452
2453 #endif
2454 return (kr);
2455 }
2456 /*
2457 * This routine was added, pretty much exclusively, for registering the
2458 * RPC glue vector for in-kernel short circuited tasks. Rather than
2459 * removing it completely, I have only disabled that feature (which was
2460 * the only feature at the time). It just appears that we are going to
2461 * want to add some user data to tasks in the future (i.e. bsd info,
2462 * task names, etc...), so I left it in the formal task interface.
2463 */
2464 kern_return_t
2465 task_set_info(
2466 task_t task,
2467 task_flavor_t flavor,
2468 __unused task_info_t task_info_in, /* pointer to IN array */
2469 __unused mach_msg_type_number_t task_info_count)
2470 {
2471 if (task == TASK_NULL)
2472 return(KERN_INVALID_ARGUMENT);
2473
2474 switch (flavor) {
2475
2476 #if CONFIG_ATM
2477 case TASK_TRACE_MEMORY_INFO:
2478 {
2479 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
2480 return (KERN_INVALID_ARGUMENT);
2481
2482 assert(task_info_in != NULL);
2483 task_trace_memory_info_t mem_info;
2484 mem_info = (task_trace_memory_info_t) task_info_in;
2485 kern_return_t kr = atm_register_trace_memory(task,
2486 mem_info->user_memory_address,
2487 mem_info->buffer_size,
2488 mem_info->mailbox_array_size);
2489 return kr;
2490 break;
2491 }
2492
2493 #endif
2494 default:
2495 return (KERN_INVALID_ARGUMENT);
2496 }
2497 return (KERN_SUCCESS);
2498 }
2499
2500 kern_return_t
2501 task_info(
2502 task_t task,
2503 task_flavor_t flavor,
2504 task_info_t task_info_out,
2505 mach_msg_type_number_t *task_info_count)
2506 {
2507 kern_return_t error = KERN_SUCCESS;
2508
2509 if (task == TASK_NULL)
2510 return (KERN_INVALID_ARGUMENT);
2511
2512 task_lock(task);
2513
2514 if ((task != current_task()) && (!task->active)) {
2515 task_unlock(task);
2516 return (KERN_INVALID_ARGUMENT);
2517 }
2518
2519 switch (flavor) {
2520
2521 case TASK_BASIC_INFO_32:
2522 case TASK_BASIC2_INFO_32:
2523 {
2524 task_basic_info_32_t basic_info;
2525 vm_map_t map;
2526 clock_sec_t secs;
2527 clock_usec_t usecs;
2528
2529 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2530 error = KERN_INVALID_ARGUMENT;
2531 break;
2532 }
2533
2534 basic_info = (task_basic_info_32_t)task_info_out;
2535
2536 map = (task == kernel_task)? kernel_map: task->map;
2537 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2538 if (flavor == TASK_BASIC2_INFO_32) {
2539 /*
2540 * The "BASIC2" flavor gets the maximum resident
2541 * size instead of the current resident size...
2542 */
2543 basic_info->resident_size = pmap_resident_max(map->pmap);
2544 } else {
2545 basic_info->resident_size = pmap_resident_count(map->pmap);
2546 }
2547 basic_info->resident_size *= PAGE_SIZE;
2548
2549 basic_info->policy = ((task != kernel_task)?
2550 POLICY_TIMESHARE: POLICY_RR);
2551 basic_info->suspend_count = task->user_stop_count;
2552
2553 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2554 basic_info->user_time.seconds =
2555 (typeof(basic_info->user_time.seconds))secs;
2556 basic_info->user_time.microseconds = usecs;
2557
2558 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2559 basic_info->system_time.seconds =
2560 (typeof(basic_info->system_time.seconds))secs;
2561 basic_info->system_time.microseconds = usecs;
2562
2563 *task_info_count = TASK_BASIC_INFO_32_COUNT;
2564 break;
2565 }
2566
2567 case TASK_BASIC_INFO_64:
2568 {
2569 task_basic_info_64_t basic_info;
2570 vm_map_t map;
2571 clock_sec_t secs;
2572 clock_usec_t usecs;
2573
2574 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2575 error = KERN_INVALID_ARGUMENT;
2576 break;
2577 }
2578
2579 basic_info = (task_basic_info_64_t)task_info_out;
2580
2581 map = (task == kernel_task)? kernel_map: task->map;
2582 basic_info->virtual_size = map->size;
2583 basic_info->resident_size =
2584 (mach_vm_size_t)(pmap_resident_count(map->pmap))
2585 * PAGE_SIZE_64;
2586
2587 basic_info->policy = ((task != kernel_task)?
2588 POLICY_TIMESHARE: POLICY_RR);
2589 basic_info->suspend_count = task->user_stop_count;
2590
2591 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2592 basic_info->user_time.seconds =
2593 (typeof(basic_info->user_time.seconds))secs;
2594 basic_info->user_time.microseconds = usecs;
2595
2596 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2597 basic_info->system_time.seconds =
2598 (typeof(basic_info->system_time.seconds))secs;
2599 basic_info->system_time.microseconds = usecs;
2600
2601 *task_info_count = TASK_BASIC_INFO_64_COUNT;
2602 break;
2603 }
2604
2605 case MACH_TASK_BASIC_INFO:
2606 {
2607 mach_task_basic_info_t basic_info;
2608 vm_map_t map;
2609 clock_sec_t secs;
2610 clock_usec_t usecs;
2611
2612 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2613 error = KERN_INVALID_ARGUMENT;
2614 break;
2615 }
2616
2617 basic_info = (mach_task_basic_info_t)task_info_out;
2618
2619 map = (task == kernel_task) ? kernel_map : task->map;
2620
2621 basic_info->virtual_size = map->size;
2622
2623 basic_info->resident_size =
2624 (mach_vm_size_t)(pmap_resident_count(map->pmap));
2625 basic_info->resident_size *= PAGE_SIZE_64;
2626
2627 basic_info->resident_size_max =
2628 (mach_vm_size_t)(pmap_resident_max(map->pmap));
2629 basic_info->resident_size_max *= PAGE_SIZE_64;
2630
2631 basic_info->policy = ((task != kernel_task) ?
2632 POLICY_TIMESHARE : POLICY_RR);
2633
2634 basic_info->suspend_count = task->user_stop_count;
2635
2636 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2637 basic_info->user_time.seconds =
2638 (typeof(basic_info->user_time.seconds))secs;
2639 basic_info->user_time.microseconds = usecs;
2640
2641 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2642 basic_info->system_time.seconds =
2643 (typeof(basic_info->system_time.seconds))secs;
2644 basic_info->system_time.microseconds = usecs;
2645
2646 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2647 break;
2648 }
2649
2650 case TASK_THREAD_TIMES_INFO:
2651 {
2652 register task_thread_times_info_t times_info;
2653 register thread_t thread;
2654
2655 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2656 error = KERN_INVALID_ARGUMENT;
2657 break;
2658 }
2659
2660 times_info = (task_thread_times_info_t) task_info_out;
2661 times_info->user_time.seconds = 0;
2662 times_info->user_time.microseconds = 0;
2663 times_info->system_time.seconds = 0;
2664 times_info->system_time.microseconds = 0;
2665
2666
2667 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2668 time_value_t user_time, system_time;
2669
2670 if (thread->options & TH_OPT_IDLE_THREAD)
2671 continue;
2672
2673 thread_read_times(thread, &user_time, &system_time);
2674
2675 time_value_add(&times_info->user_time, &user_time);
2676 time_value_add(&times_info->system_time, &system_time);
2677 }
2678
2679 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2680 break;
2681 }
2682
2683 case TASK_ABSOLUTETIME_INFO:
2684 {
2685 task_absolutetime_info_t info;
2686 register thread_t thread;
2687
2688 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2689 error = KERN_INVALID_ARGUMENT;
2690 break;
2691 }
2692
2693 info = (task_absolutetime_info_t)task_info_out;
2694 info->threads_user = info->threads_system = 0;
2695
2696
2697 info->total_user = task->total_user_time;
2698 info->total_system = task->total_system_time;
2699
2700 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2701 uint64_t tval;
2702 spl_t x;
2703
2704 if (thread->options & TH_OPT_IDLE_THREAD)
2705 continue;
2706
2707 x = splsched();
2708 thread_lock(thread);
2709
2710 tval = timer_grab(&thread->user_timer);
2711 info->threads_user += tval;
2712 info->total_user += tval;
2713
2714 tval = timer_grab(&thread->system_timer);
2715 if (thread->precise_user_kernel_time) {
2716 info->threads_system += tval;
2717 info->total_system += tval;
2718 } else {
2719 /* system_timer may represent either sys or user */
2720 info->threads_user += tval;
2721 info->total_user += tval;
2722 }
2723
2724 thread_unlock(thread);
2725 splx(x);
2726 }
2727
2728
2729 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
2730 break;
2731 }
2732
2733 case TASK_DYLD_INFO:
2734 {
2735 task_dyld_info_t info;
2736
2737 /*
2738 * We added the format field to TASK_DYLD_INFO output. For
2739 * temporary backward compatibility, accept the fact that
2740 * clients may ask for the old version - distinquished by the
2741 * size of the expected result structure.
2742 */
2743 #define TASK_LEGACY_DYLD_INFO_COUNT \
2744 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
2745
2746 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
2747 error = KERN_INVALID_ARGUMENT;
2748 break;
2749 }
2750
2751 info = (task_dyld_info_t)task_info_out;
2752 info->all_image_info_addr = task->all_image_info_addr;
2753 info->all_image_info_size = task->all_image_info_size;
2754
2755 /* only set format on output for those expecting it */
2756 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
2757 info->all_image_info_format = task_has_64BitAddr(task) ?
2758 TASK_DYLD_ALL_IMAGE_INFO_64 :
2759 TASK_DYLD_ALL_IMAGE_INFO_32 ;
2760 *task_info_count = TASK_DYLD_INFO_COUNT;
2761 } else {
2762 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
2763 }
2764 break;
2765 }
2766
2767 case TASK_EXTMOD_INFO:
2768 {
2769 task_extmod_info_t info;
2770 void *p;
2771
2772 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
2773 error = KERN_INVALID_ARGUMENT;
2774 break;
2775 }
2776
2777 info = (task_extmod_info_t)task_info_out;
2778
2779 p = get_bsdtask_info(task);
2780 if (p) {
2781 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
2782 } else {
2783 bzero(info->task_uuid, sizeof(info->task_uuid));
2784 }
2785 info->extmod_statistics = task->extmod_statistics;
2786 *task_info_count = TASK_EXTMOD_INFO_COUNT;
2787
2788 break;
2789 }
2790
2791 case TASK_KERNELMEMORY_INFO:
2792 {
2793 task_kernelmemory_info_t tkm_info;
2794 ledger_amount_t credit, debit;
2795
2796 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
2797 error = KERN_INVALID_ARGUMENT;
2798 break;
2799 }
2800
2801 tkm_info = (task_kernelmemory_info_t) task_info_out;
2802 tkm_info->total_palloc = 0;
2803 tkm_info->total_pfree = 0;
2804 tkm_info->total_salloc = 0;
2805 tkm_info->total_sfree = 0;
2806
2807 if (task == kernel_task) {
2808 /*
2809 * All shared allocs/frees from other tasks count against
2810 * the kernel private memory usage. If we are looking up
2811 * info for the kernel task, gather from everywhere.
2812 */
2813 task_unlock(task);
2814
2815 /* start by accounting for all the terminated tasks against the kernel */
2816 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
2817 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
2818
2819 /* count all other task/thread shared alloc/free against the kernel */
2820 lck_mtx_lock(&tasks_threads_lock);
2821
2822 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
2823 queue_iterate(&tasks, task, task_t, tasks) {
2824 if (task == kernel_task) {
2825 if (ledger_get_entries(task->ledger,
2826 task_ledgers.tkm_private, &credit,
2827 &debit) == KERN_SUCCESS) {
2828 tkm_info->total_palloc += credit;
2829 tkm_info->total_pfree += debit;
2830 }
2831 }
2832 if (!ledger_get_entries(task->ledger,
2833 task_ledgers.tkm_shared, &credit, &debit)) {
2834 tkm_info->total_palloc += credit;
2835 tkm_info->total_pfree += debit;
2836 }
2837 }
2838 lck_mtx_unlock(&tasks_threads_lock);
2839 } else {
2840 if (!ledger_get_entries(task->ledger,
2841 task_ledgers.tkm_private, &credit, &debit)) {
2842 tkm_info->total_palloc = credit;
2843 tkm_info->total_pfree = debit;
2844 }
2845 if (!ledger_get_entries(task->ledger,
2846 task_ledgers.tkm_shared, &credit, &debit)) {
2847 tkm_info->total_salloc = credit;
2848 tkm_info->total_sfree = debit;
2849 }
2850 task_unlock(task);
2851 }
2852
2853 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
2854 return KERN_SUCCESS;
2855 }
2856
2857 /* OBSOLETE */
2858 case TASK_SCHED_FIFO_INFO:
2859 {
2860
2861 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
2862 error = KERN_INVALID_ARGUMENT;
2863 break;
2864 }
2865
2866 error = KERN_INVALID_POLICY;
2867 break;
2868 }
2869
2870 /* OBSOLETE */
2871 case TASK_SCHED_RR_INFO:
2872 {
2873 register policy_rr_base_t rr_base;
2874 uint32_t quantum_time;
2875 uint64_t quantum_ns;
2876
2877 if (*task_info_count < POLICY_RR_BASE_COUNT) {
2878 error = KERN_INVALID_ARGUMENT;
2879 break;
2880 }
2881
2882 rr_base = (policy_rr_base_t) task_info_out;
2883
2884 if (task != kernel_task) {
2885 error = KERN_INVALID_POLICY;
2886 break;
2887 }
2888
2889 rr_base->base_priority = task->priority;
2890
2891 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
2892 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
2893
2894 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
2895
2896 *task_info_count = POLICY_RR_BASE_COUNT;
2897 break;
2898 }
2899
2900 /* OBSOLETE */
2901 case TASK_SCHED_TIMESHARE_INFO:
2902 {
2903 register policy_timeshare_base_t ts_base;
2904
2905 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
2906 error = KERN_INVALID_ARGUMENT;
2907 break;
2908 }
2909
2910 ts_base = (policy_timeshare_base_t) task_info_out;
2911
2912 if (task == kernel_task) {
2913 error = KERN_INVALID_POLICY;
2914 break;
2915 }
2916
2917 ts_base->base_priority = task->priority;
2918
2919 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
2920 break;
2921 }
2922
2923 case TASK_SECURITY_TOKEN:
2924 {
2925 register security_token_t *sec_token_p;
2926
2927 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
2928 error = KERN_INVALID_ARGUMENT;
2929 break;
2930 }
2931
2932 sec_token_p = (security_token_t *) task_info_out;
2933
2934 *sec_token_p = task->sec_token;
2935
2936 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
2937 break;
2938 }
2939
2940 case TASK_AUDIT_TOKEN:
2941 {
2942 register audit_token_t *audit_token_p;
2943
2944 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
2945 error = KERN_INVALID_ARGUMENT;
2946 break;
2947 }
2948
2949 audit_token_p = (audit_token_t *) task_info_out;
2950
2951 *audit_token_p = task->audit_token;
2952
2953 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
2954 break;
2955 }
2956
2957 case TASK_SCHED_INFO:
2958 error = KERN_INVALID_ARGUMENT;
2959 break;
2960
2961 case TASK_EVENTS_INFO:
2962 {
2963 register task_events_info_t events_info;
2964 register thread_t thread;
2965
2966 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
2967 error = KERN_INVALID_ARGUMENT;
2968 break;
2969 }
2970
2971 events_info = (task_events_info_t) task_info_out;
2972
2973
2974 events_info->faults = task->faults;
2975 events_info->pageins = task->pageins;
2976 events_info->cow_faults = task->cow_faults;
2977 events_info->messages_sent = task->messages_sent;
2978 events_info->messages_received = task->messages_received;
2979 events_info->syscalls_mach = task->syscalls_mach;
2980 events_info->syscalls_unix = task->syscalls_unix;
2981
2982 events_info->csw = task->c_switch;
2983
2984 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2985 events_info->csw += thread->c_switch;
2986 events_info->syscalls_mach += thread->syscalls_mach;
2987 events_info->syscalls_unix += thread->syscalls_unix;
2988 }
2989
2990
2991 *task_info_count = TASK_EVENTS_INFO_COUNT;
2992 break;
2993 }
2994 case TASK_AFFINITY_TAG_INFO:
2995 {
2996 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
2997 error = KERN_INVALID_ARGUMENT;
2998 break;
2999 }
3000
3001 error = task_affinity_info(task, task_info_out, task_info_count);
3002 break;
3003 }
3004 case TASK_POWER_INFO:
3005 {
3006 if (*task_info_count < TASK_POWER_INFO_COUNT) {
3007 error = KERN_INVALID_ARGUMENT;
3008 break;
3009 }
3010
3011 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL);
3012 break;
3013 }
3014
3015 case TASK_POWER_INFO_V2:
3016 {
3017 if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
3018 error = KERN_INVALID_ARGUMENT;
3019 break;
3020 }
3021 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
3022 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy);
3023 break;
3024 }
3025
3026 case TASK_VM_INFO:
3027 case TASK_VM_INFO_PURGEABLE:
3028 {
3029 task_vm_info_t vm_info;
3030 vm_map_t map;
3031
3032 if (*task_info_count < TASK_VM_INFO_COUNT) {
3033 error = KERN_INVALID_ARGUMENT;
3034 break;
3035 }
3036
3037 vm_info = (task_vm_info_t)task_info_out;
3038
3039 if (task == kernel_task) {
3040 map = kernel_map;
3041 /* no lock */
3042 } else {
3043 map = task->map;
3044 vm_map_lock_read(map);
3045 }
3046
3047 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
3048 vm_info->region_count = map->hdr.nentries;
3049 vm_info->page_size = vm_map_page_size(map);
3050
3051 vm_info->resident_size = pmap_resident_count(map->pmap);
3052 vm_info->resident_size *= PAGE_SIZE;
3053 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
3054 vm_info->resident_size_peak *= PAGE_SIZE;
3055
3056 #define _VM_INFO(_name) \
3057 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
3058
3059 _VM_INFO(device);
3060 _VM_INFO(device_peak);
3061 _VM_INFO(external);
3062 _VM_INFO(external_peak);
3063 _VM_INFO(internal);
3064 _VM_INFO(internal_peak);
3065 _VM_INFO(reusable);
3066 _VM_INFO(reusable_peak);
3067 _VM_INFO(compressed);
3068 _VM_INFO(compressed_peak);
3069 _VM_INFO(compressed_lifetime);
3070
3071 vm_info->purgeable_volatile_pmap = 0;
3072 vm_info->purgeable_volatile_resident = 0;
3073 vm_info->purgeable_volatile_virtual = 0;
3074 if (task == kernel_task) {
3075 /*
3076 * We do not maintain the detailed stats for the
3077 * kernel_pmap, so just count everything as
3078 * "internal"...
3079 */
3080 vm_info->internal = vm_info->resident_size;
3081 /*
3082 * ... but since the memory held by the VM compressor
3083 * in the kernel address space ought to be attributed
3084 * to user-space tasks, we subtract it from "internal"
3085 * to give memory reporting tools a more accurate idea
3086 * of what the kernel itself is actually using, instead
3087 * of making it look like the kernel is leaking memory
3088 * when the system is under memory pressure.
3089 */
3090 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
3091 PAGE_SIZE);
3092 } else {
3093 mach_vm_size_t volatile_virtual_size;
3094 mach_vm_size_t volatile_resident_size;
3095 mach_vm_size_t volatile_pmap_size;
3096 kern_return_t kr;
3097
3098 if (flavor == TASK_VM_INFO_PURGEABLE) {
3099 kr = vm_map_query_volatile(
3100 map,
3101 &volatile_virtual_size,
3102 &volatile_resident_size,
3103 &volatile_pmap_size);
3104 if (kr == KERN_SUCCESS) {
3105 vm_info->purgeable_volatile_pmap =
3106 volatile_pmap_size;
3107 vm_info->purgeable_volatile_resident =
3108 volatile_resident_size;
3109 vm_info->purgeable_volatile_virtual =
3110 volatile_virtual_size;
3111 }
3112 }
3113 vm_map_unlock_read(map);
3114 }
3115
3116 *task_info_count = TASK_VM_INFO_COUNT;
3117 break;
3118 }
3119
3120 case TASK_WAIT_STATE_INFO:
3121 {
3122 /*
3123 * Deprecated flavor. Currently allowing some results until all users
3124 * stop calling it. The results may not be accurate.
3125 */
3126 task_wait_state_info_t wait_state_info;
3127 uint64_t total_sfi_ledger_val = 0;
3128
3129 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
3130 error = KERN_INVALID_ARGUMENT;
3131 break;
3132 }
3133
3134 wait_state_info = (task_wait_state_info_t) task_info_out;
3135
3136 wait_state_info->total_wait_state_time = 0;
3137 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
3138
3139 int i, prev_lentry = -1;
3140 int64_t val_credit, val_debit;
3141
3142 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
3143 val_credit =0;
3144 /*
3145 * checking with prev_lentry != entry ensures adjacent classes
3146 * which share the same ledger do not add wait times twice.
3147 * Note: Use ledger() call to get data for each individual sfi class.
3148 */
3149 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
3150 KERN_SUCCESS == ledger_get_entries(task->ledger,
3151 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
3152 total_sfi_ledger_val += val_credit;
3153 }
3154 prev_lentry = task_ledgers.sfi_wait_times[i];
3155 }
3156
3157 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
3158 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
3159
3160 break;
3161 }
3162
3163 default:
3164 error = KERN_INVALID_ARGUMENT;
3165 }
3166
3167 task_unlock(task);
3168 return (error);
3169 }
3170
3171 /*
3172 * task_power_info
3173 *
3174 * Returns power stats for the task.
3175 * Note: Called with task locked.
3176 */
3177 void
3178 task_power_info_locked(
3179 task_t task,
3180 task_power_info_t info,
3181 gpu_energy_data_t ginfo)
3182 {
3183 thread_t thread;
3184 ledger_amount_t tmp;
3185
3186 task_lock_assert_owned(task);
3187
3188 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
3189 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
3190 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
3191 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
3192
3193 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
3194 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
3195
3196 info->total_user = task->total_user_time;
3197 info->total_system = task->total_system_time;
3198
3199 if (ginfo) {
3200 ginfo->task_gpu_utilisation = task->task_gpu_ns;
3201 }
3202
3203 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3204 uint64_t tval;
3205 spl_t x;
3206
3207 if (thread->options & TH_OPT_IDLE_THREAD)
3208 continue;
3209
3210 x = splsched();
3211 thread_lock(thread);
3212
3213 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
3214 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
3215
3216 tval = timer_grab(&thread->user_timer);
3217 info->total_user += tval;
3218
3219 tval = timer_grab(&thread->system_timer);
3220 if (thread->precise_user_kernel_time) {
3221 info->total_system += tval;
3222 } else {
3223 /* system_timer may represent either sys or user */
3224 info->total_user += tval;
3225 }
3226
3227 if (ginfo) {
3228 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
3229 }
3230 thread_unlock(thread);
3231 splx(x);
3232 }
3233 }
3234
3235 /*
3236 * task_gpu_utilisation
3237 *
3238 * Returns the total gpu time used by the all the threads of the task
3239 * (both dead and alive)
3240 */
3241 uint64_t
3242 task_gpu_utilisation(
3243 task_t task)
3244 {
3245 uint64_t gpu_time = 0;
3246 thread_t thread;
3247
3248 task_lock(task);
3249 gpu_time += task->task_gpu_ns;
3250
3251 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3252 spl_t x;
3253 x = splsched();
3254 thread_lock(thread);
3255 gpu_time += ml_gpu_stat(thread);
3256 thread_unlock(thread);
3257 splx(x);
3258 }
3259
3260 task_unlock(task);
3261 return gpu_time;
3262 }
3263
3264 kern_return_t
3265 task_purgable_info(
3266 task_t task,
3267 task_purgable_info_t *stats)
3268 {
3269 if (task == TASK_NULL || stats == NULL)
3270 return KERN_INVALID_ARGUMENT;
3271 /* Take task reference */
3272 task_reference(task);
3273 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
3274 /* Drop task reference */
3275 task_deallocate(task);
3276 return KERN_SUCCESS;
3277 }
3278
3279 void
3280 task_vtimer_set(
3281 task_t task,
3282 integer_t which)
3283 {
3284 thread_t thread;
3285 spl_t x;
3286
3287 /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
3288
3289 task_lock(task);
3290
3291 task->vtimers |= which;
3292
3293 switch (which) {
3294
3295 case TASK_VTIMER_USER:
3296 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3297 x = splsched();
3298 thread_lock(thread);
3299 if (thread->precise_user_kernel_time)
3300 thread->vtimer_user_save = timer_grab(&thread->user_timer);
3301 else
3302 thread->vtimer_user_save = timer_grab(&thread->system_timer);
3303 thread_unlock(thread);
3304 splx(x);
3305 }
3306 break;
3307
3308 case TASK_VTIMER_PROF:
3309 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3310 x = splsched();
3311 thread_lock(thread);
3312 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
3313 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
3314 thread_unlock(thread);
3315 splx(x);
3316 }
3317 break;
3318
3319 case TASK_VTIMER_RLIM:
3320 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3321 x = splsched();
3322 thread_lock(thread);
3323 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
3324 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
3325 thread_unlock(thread);
3326 splx(x);
3327 }
3328 break;
3329 }
3330
3331 task_unlock(task);
3332 }
3333
3334 void
3335 task_vtimer_clear(
3336 task_t task,
3337 integer_t which)
3338 {
3339 assert(task == current_task());
3340
3341 task_lock(task);
3342
3343 task->vtimers &= ~which;
3344
3345 task_unlock(task);
3346 }
3347
3348 void
3349 task_vtimer_update(
3350 __unused
3351 task_t task,
3352 integer_t which,
3353 uint32_t *microsecs)
3354 {
3355 thread_t thread = current_thread();
3356 uint32_t tdelt;
3357 clock_sec_t secs;
3358 uint64_t tsum;
3359
3360 assert(task == current_task());
3361
3362 assert(task->vtimers & which);
3363
3364 secs = tdelt = 0;
3365
3366 switch (which) {
3367
3368 case TASK_VTIMER_USER:
3369 if (thread->precise_user_kernel_time) {
3370 tdelt = (uint32_t)timer_delta(&thread->user_timer,
3371 &thread->vtimer_user_save);
3372 } else {
3373 tdelt = (uint32_t)timer_delta(&thread->system_timer,
3374 &thread->vtimer_user_save);
3375 }
3376 absolutetime_to_microtime(tdelt, &secs, microsecs);
3377 break;
3378
3379 case TASK_VTIMER_PROF:
3380 tsum = timer_grab(&thread->user_timer);
3381 tsum += timer_grab(&thread->system_timer);
3382 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
3383 absolutetime_to_microtime(tdelt, &secs, microsecs);
3384 /* if the time delta is smaller than a usec, ignore */
3385 if (*microsecs != 0)
3386 thread->vtimer_prof_save = tsum;
3387 break;
3388
3389 case TASK_VTIMER_RLIM:
3390 tsum = timer_grab(&thread->user_timer);
3391 tsum += timer_grab(&thread->system_timer);
3392 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
3393 thread->vtimer_rlim_save = tsum;
3394 absolutetime_to_microtime(tdelt, &secs, microsecs);
3395 break;
3396 }
3397
3398 }
3399
3400 /*
3401 * task_assign:
3402 *
3403 * Change the assigned processor set for the task
3404 */
3405 kern_return_t
3406 task_assign(
3407 __unused task_t task,
3408 __unused processor_set_t new_pset,
3409 __unused boolean_t assign_threads)
3410 {
3411 return(KERN_FAILURE);
3412 }
3413
3414 /*
3415 * task_assign_default:
3416 *
3417 * Version of task_assign to assign to default processor set.
3418 */
3419 kern_return_t
3420 task_assign_default(
3421 task_t task,
3422 boolean_t assign_threads)
3423 {
3424 return (task_assign(task, &pset0, assign_threads));
3425 }
3426
3427 /*
3428 * task_get_assignment
3429 *
3430 * Return name of processor set that task is assigned to.
3431 */
3432 kern_return_t
3433 task_get_assignment(
3434 task_t task,
3435 processor_set_t *pset)
3436 {
3437 if (!task->active)
3438 return(KERN_FAILURE);
3439
3440 *pset = &pset0;
3441
3442 return (KERN_SUCCESS);
3443 }
3444
3445
3446 /*
3447 * task_policy
3448 *
3449 * Set scheduling policy and parameters, both base and limit, for
3450 * the given task. Policy must be a policy which is enabled for the
3451 * processor set. Change contained threads if requested.
3452 */
3453 kern_return_t
3454 task_policy(
3455 __unused task_t task,
3456 __unused policy_t policy_id,
3457 __unused policy_base_t base,
3458 __unused mach_msg_type_number_t count,
3459 __unused boolean_t set_limit,
3460 __unused boolean_t change)
3461 {
3462 return(KERN_FAILURE);
3463 }
3464
3465 /*
3466 * task_set_policy
3467 *
3468 * Set scheduling policy and parameters, both base and limit, for
3469 * the given task. Policy can be any policy implemented by the
3470 * processor set, whether enabled or not. Change contained threads
3471 * if requested.
3472 */
3473 kern_return_t
3474 task_set_policy(
3475 __unused task_t task,
3476 __unused processor_set_t pset,
3477 __unused policy_t policy_id,
3478 __unused policy_base_t base,
3479 __unused mach_msg_type_number_t base_count,
3480 __unused policy_limit_t limit,
3481 __unused mach_msg_type_number_t limit_count,
3482 __unused boolean_t change)
3483 {
3484 return(KERN_FAILURE);
3485 }
3486
3487 kern_return_t
3488 task_set_ras_pc(
3489 __unused task_t task,
3490 __unused vm_offset_t pc,
3491 __unused vm_offset_t endpc)
3492 {
3493 return KERN_FAILURE;
3494 }
3495
3496 void
3497 task_synchronizer_destroy_all(task_t task)
3498 {
3499 semaphore_t semaphore;
3500
3501 /*
3502 * Destroy owned semaphores
3503 */
3504
3505 while (!queue_empty(&task->semaphore_list)) {
3506 semaphore = (semaphore_t) queue_first(&task->semaphore_list);
3507 (void) semaphore_destroy(task, semaphore);
3508 }
3509 }
3510
3511 /*
3512 * Install default (machine-dependent) initial thread state
3513 * on the task. Subsequent thread creation will have this initial
3514 * state set on the thread by machine_thread_inherit_taskwide().
3515 * Flavors and structures are exactly the same as those to thread_set_state()
3516 */
3517 kern_return_t
3518 task_set_state(
3519 task_t task,
3520 int flavor,
3521 thread_state_t state,
3522 mach_msg_type_number_t state_count)
3523 {
3524 kern_return_t ret;
3525
3526 if (task == TASK_NULL) {
3527 return (KERN_INVALID_ARGUMENT);
3528 }
3529
3530 task_lock(task);
3531
3532 if (!task->active) {
3533 task_unlock(task);
3534 return (KERN_FAILURE);
3535 }
3536
3537 ret = machine_task_set_state(task, flavor, state, state_count);
3538
3539 task_unlock(task);
3540 return ret;
3541 }
3542
3543 /*
3544 * Examine the default (machine-dependent) initial thread state
3545 * on the task, as set by task_set_state(). Flavors and structures
3546 * are exactly the same as those passed to thread_get_state().
3547 */
3548 kern_return_t
3549 task_get_state(
3550 task_t task,
3551 int flavor,
3552 thread_state_t state,
3553 mach_msg_type_number_t *state_count)
3554 {
3555 kern_return_t ret;
3556
3557 if (task == TASK_NULL) {
3558 return (KERN_INVALID_ARGUMENT);
3559 }
3560
3561 task_lock(task);
3562
3563 if (!task->active) {
3564 task_unlock(task);
3565 return (KERN_FAILURE);
3566 }
3567
3568 ret = machine_task_get_state(task, flavor, state, state_count);
3569
3570 task_unlock(task);
3571 return ret;
3572 }
3573
3574 #if CONFIG_JETSAM
3575 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3576
3577 void __attribute__((noinline))
3578 THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
3579 {
3580 task_t task = current_task();
3581 int pid = 0;
3582 char *procname = (char *) "unknown";
3583 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
3584
3585 #ifdef MACH_BSD
3586 pid = proc_selfpid();
3587
3588 if (pid == 1) {
3589 /*
3590 * Cannot have ReportCrash analyzing
3591 * a suspended initproc.
3592 */
3593 return;
3594 }
3595
3596 if (task->bsd_info != NULL)
3597 procname = proc_name_address(current_task()->bsd_info);
3598 #endif
3599
3600 if (hwm_user_cores) {
3601 int error;
3602 uint64_t starttime, end;
3603 clock_sec_t secs = 0;
3604 uint32_t microsecs = 0;
3605
3606 starttime = mach_absolute_time();
3607 /*
3608 * Trigger a coredump of this process. Don't proceed unless we know we won't
3609 * be filling up the disk; and ignore the core size resource limit for this
3610 * core file.
3611 */
3612 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, 1)) != 0) {
3613 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
3614 }
3615 /*
3616 * coredump() leaves the task suspended.
3617 */
3618 task_resume_internal(current_task());
3619
3620 end = mach_absolute_time();
3621 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
3622 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
3623 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
3624 }
3625
3626 if (disable_exc_resource) {
3627 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
3628 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
3629 return;
3630 }
3631
3632 printf("process %s[%d] crossed memory high watermark (%d MB); sending "
3633 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
3634
3635 code[0] = code[1] = 0;
3636 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
3637 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
3638 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
3639
3640 /*
3641 * Use the _internal_ variant so that no user-space
3642 * process can resume our task from under us.
3643 */
3644 task_suspend_internal(task);
3645 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
3646 task_resume_internal(task);
3647 }
3648
3649 /*
3650 * Callback invoked when a task exceeds its physical footprint limit.
3651 */
3652 void
3653 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
3654 {
3655 ledger_amount_t max_footprint, max_footprint_mb;
3656 ledger_amount_t footprint_after_purge;
3657 task_t task;
3658
3659 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
3660 /*
3661 * Task memory limits only provide a warning on the way up.
3662 */
3663 return;
3664 }
3665
3666 task = current_task();
3667
3668 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
3669 max_footprint_mb = max_footprint >> 20;
3670
3671 /*
3672 * Try and purge all "volatile" memory in that task first.
3673 */
3674 (void) task_purge_volatile_memory(task);
3675 /* are we still over the limit ? */
3676 ledger_get_balance(task->ledger,
3677 task_ledgers.phys_footprint,
3678 &footprint_after_purge);
3679 if ((!warning &&
3680 footprint_after_purge <= max_footprint) ||
3681 (warning &&
3682 footprint_after_purge <= ((max_footprint *
3683 PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) {
3684 /* all better now */
3685 ledger_reset_callback_state(task->ledger,
3686 task_ledgers.phys_footprint);
3687 return;
3688 }
3689 /* still over the limit after purging... */
3690
3691 /*
3692 * If this an actual violation (not a warning),
3693 * generate a non-fatal high watermark EXC_RESOURCE.
3694 */
3695 if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
3696 THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE((int)max_footprint_mb);
3697 }
3698
3699 memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
3700 (int)max_footprint_mb);
3701 }
3702
3703 extern int proc_check_footprint_priv(void);
3704
3705 kern_return_t
3706 task_set_phys_footprint_limit(
3707 task_t task,
3708 int new_limit_mb,
3709 int *old_limit_mb)
3710 {
3711 kern_return_t error;
3712
3713 if ((error = proc_check_footprint_priv())) {
3714 return (KERN_NO_ACCESS);
3715 }
3716
3717 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
3718 }
3719
3720 kern_return_t
3721 task_set_phys_footprint_limit_internal(
3722 task_t task,
3723 int new_limit_mb,
3724 int *old_limit_mb,
3725 boolean_t trigger_exception)
3726 {
3727 ledger_amount_t old;
3728
3729 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
3730
3731 if (old_limit_mb) {
3732 *old_limit_mb = old >> 20;
3733 }
3734
3735 if (new_limit_mb == -1) {
3736 /*
3737 * Caller wishes to remove the limit.
3738 */
3739 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
3740 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
3741 max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
3742 return (KERN_SUCCESS);
3743 }
3744
3745 #ifdef CONFIG_NOMONITORS
3746 return (KERN_SUCCESS);
3747 #endif /* CONFIG_NOMONITORS */
3748
3749 task_lock(task);
3750
3751 if (trigger_exception) {
3752 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
3753 } else {
3754 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
3755 }
3756
3757 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
3758 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
3759
3760 task_unlock(task);
3761
3762 return (KERN_SUCCESS);
3763 }
3764
3765 kern_return_t
3766 task_get_phys_footprint_limit(
3767 task_t task,
3768 int *limit_mb)
3769 {
3770 ledger_amount_t limit;
3771
3772 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
3773 *limit_mb = limit >> 20;
3774
3775 return (KERN_SUCCESS);
3776 }
3777 #else /* CONFIG_JETSAM */
3778 kern_return_t
3779 task_set_phys_footprint_limit(
3780 __unused task_t task,
3781 __unused int new_limit_mb,
3782 __unused int *old_limit_mb)
3783 {
3784 return (KERN_FAILURE);
3785 }
3786
3787 kern_return_t
3788 task_get_phys_footprint_limit(
3789 __unused task_t task,
3790 __unused int *limit_mb)
3791 {
3792 return (KERN_FAILURE);
3793 }
3794 #endif /* CONFIG_JETSAM */
3795
3796 /*
3797 * We need to export some functions to other components that
3798 * are currently implemented in macros within the osfmk
3799 * component. Just export them as functions of the same name.
3800 */
3801 boolean_t is_kerneltask(task_t t)
3802 {
3803 if (t == kernel_task)
3804 return (TRUE);
3805
3806 return (FALSE);
3807 }
3808
3809 int
3810 check_for_tasksuspend(task_t task)
3811 {
3812
3813 if (task == TASK_NULL)
3814 return (0);
3815
3816 return (task->suspend_count > 0);
3817 }
3818
3819 #undef current_task
3820 task_t current_task(void);
3821 task_t current_task(void)
3822 {
3823 return (current_task_fast());
3824 }
3825
3826 #undef task_reference
3827 void task_reference(task_t task);
3828 void
3829 task_reference(
3830 task_t task)
3831 {
3832 if (task != TASK_NULL)
3833 task_reference_internal(task);
3834 }
3835
3836 /*
3837 * This routine is called always with task lock held.
3838 * And it returns a thread handle without reference as the caller
3839 * operates on it under the task lock held.
3840 */
3841 thread_t
3842 task_findtid(task_t task, uint64_t tid)
3843 {
3844 thread_t thread= THREAD_NULL;
3845
3846 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3847 if (thread->thread_id == tid)
3848 return(thread);
3849 }
3850 return(THREAD_NULL);
3851 }
3852
3853 /*
3854 * Control the CPU usage monitor for a task.
3855 */
3856 kern_return_t
3857 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
3858 {
3859 int error = KERN_SUCCESS;
3860
3861 if (*flags & CPUMON_MAKE_FATAL) {
3862 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
3863 } else {
3864 error = KERN_INVALID_ARGUMENT;
3865 }
3866
3867 return error;
3868 }
3869
3870 /*
3871 * Control the wakeups monitor for a task.
3872 */
3873 kern_return_t
3874 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
3875 {
3876 ledger_t ledger = task->ledger;
3877
3878 task_lock(task);
3879 if (*flags & WAKEMON_GET_PARAMS) {
3880 ledger_amount_t limit;
3881 uint64_t period;
3882
3883 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
3884 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
3885
3886 if (limit != LEDGER_LIMIT_INFINITY) {
3887 /*
3888 * An active limit means the wakeups monitor is enabled.
3889 */
3890 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
3891 *flags = WAKEMON_ENABLE;
3892 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
3893 *flags |= WAKEMON_MAKE_FATAL;
3894 }
3895 } else {
3896 *flags = WAKEMON_DISABLE;
3897 *rate_hz = -1;
3898 }
3899
3900 /*
3901 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
3902 */
3903 task_unlock(task);
3904 return KERN_SUCCESS;
3905 }
3906
3907 if (*flags & WAKEMON_ENABLE) {
3908 if (*flags & WAKEMON_SET_DEFAULTS) {
3909 *rate_hz = task_wakeups_monitor_rate;
3910 }
3911
3912 #ifndef CONFIG_NOMONITORS
3913 if (*flags & WAKEMON_MAKE_FATAL) {
3914 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
3915 }
3916 #endif /* CONFIG_NOMONITORS */
3917
3918 if (*rate_hz < 0) {
3919 task_unlock(task);
3920 return KERN_INVALID_ARGUMENT;
3921 }
3922
3923 #ifndef CONFIG_NOMONITORS
3924 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
3925 task_wakeups_monitor_ustackshots_trigger_pct);
3926 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
3927 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
3928 #endif /* CONFIG_NOMONITORS */
3929 } else if (*flags & WAKEMON_DISABLE) {
3930 /*
3931 * Caller wishes to disable wakeups monitor on the task.
3932 *
3933 * Disable telemetry if it was triggered by the wakeups monitor, and
3934 * remove the limit & callback on the wakeups ledger entry.
3935 */
3936 #if CONFIG_TELEMETRY
3937 telemetry_task_ctl_locked(current_task(), TF_WAKEMON_WARNING, 0);
3938 #endif
3939 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
3940 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
3941 }
3942
3943 task_unlock(task);
3944 return KERN_SUCCESS;
3945 }
3946
3947 void
3948 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
3949 {
3950 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
3951 #if CONFIG_TELEMETRY
3952 /*
3953 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
3954 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
3955 */
3956 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
3957 #endif
3958 return;
3959 }
3960
3961 #if CONFIG_TELEMETRY
3962 /*
3963 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
3964 * exceeded the limit, turn telemetry off for the task.
3965 */
3966 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
3967 #endif
3968
3969 if (warning == 0) {
3970 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
3971 }
3972 }
3973
3974 void __attribute__((noinline))
3975 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
3976 {
3977 task_t task = current_task();
3978 int pid = 0;
3979 char *procname = (char *) "unknown";
3980 uint64_t observed_wakeups_rate;
3981 uint64_t permitted_wakeups_rate;
3982 uint64_t observation_interval;
3983 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
3984 struct ledger_entry_info lei;
3985
3986 #ifdef MACH_BSD
3987 pid = proc_selfpid();
3988 if (task->bsd_info != NULL)
3989 procname = proc_name_address(current_task()->bsd_info);
3990 #endif
3991
3992 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
3993
3994 /*
3995 * Disable the exception notification so we don't overwhelm
3996 * the listener with an endless stream of redundant exceptions.
3997 */
3998 uint32_t flags = WAKEMON_DISABLE;
3999 task_wakeups_monitor_ctl(task, &flags, NULL);
4000
4001 observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
4002 permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
4003 observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
4004
4005 if (disable_exc_resource) {
4006 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4007 "supressed by a boot-arg\n", procname, pid);
4008 return;
4009 }
4010 if (audio_active) {
4011 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4012 "supressed due to audio playback\n", procname, pid);
4013 return;
4014 }
4015 printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
4016 "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
4017 "period: %lld seconds; Task lifetime number of wakeups: %lld\n",
4018 procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
4019 observation_interval, lei.lei_credit);
4020
4021 code[0] = code[1] = 0;
4022 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
4023 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
4024 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
4025 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
4026 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
4027 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4028
4029 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4030 task_terminate_internal(task);
4031 }
4032 }
4033
4034 kern_return_t
4035 task_purge_volatile_memory(
4036 task_t task)
4037 {
4038 vm_map_t map;
4039 int num_object_purged;
4040
4041 if (task == TASK_NULL)
4042 return KERN_INVALID_TASK;
4043
4044 task_lock(task);
4045
4046 if (!task->active) {
4047 task_unlock(task);
4048 return KERN_INVALID_TASK;
4049 }
4050 map = task->map;
4051 if (map == VM_MAP_NULL) {
4052 task_unlock(task);
4053 return KERN_INVALID_TASK;
4054 }
4055 vm_map_reference(task->map);
4056
4057 task_unlock(task);
4058
4059 num_object_purged = vm_map_purge(map);
4060 vm_map_deallocate(map);
4061
4062 return KERN_SUCCESS;
4063 }
4064
4065 /* Placeholders for the task set/get voucher interfaces */
4066 kern_return_t
4067 task_get_mach_voucher(
4068 task_t task,
4069 mach_voucher_selector_t __unused which,
4070 ipc_voucher_t *voucher)
4071 {
4072 if (TASK_NULL == task)
4073 return KERN_INVALID_TASK;
4074
4075 *voucher = NULL;
4076 return KERN_SUCCESS;
4077 }
4078
4079 kern_return_t
4080 task_set_mach_voucher(
4081 task_t task,
4082 ipc_voucher_t __unused voucher)
4083 {
4084 if (TASK_NULL == task)
4085 return KERN_INVALID_TASK;
4086
4087 return KERN_SUCCESS;
4088 }
4089
4090 kern_return_t
4091 task_swap_mach_voucher(
4092 task_t task,
4093 ipc_voucher_t new_voucher,
4094 ipc_voucher_t *in_out_old_voucher)
4095 {
4096 if (TASK_NULL == task)
4097 return KERN_INVALID_TASK;
4098
4099 *in_out_old_voucher = new_voucher;
4100 return KERN_SUCCESS;
4101 }
4102
4103 void task_set_gpu_denied(task_t task, boolean_t denied)
4104 {
4105 task_lock(task);
4106
4107 if (denied) {
4108 task->t_flags |= TF_GPU_DENIED;
4109 } else {
4110 task->t_flags &= ~TF_GPU_DENIED;
4111 }
4112
4113 task_unlock(task);
4114 }
4115
4116 boolean_t task_is_gpu_denied(task_t task)
4117 {
4118 /* We don't need the lock to read this flag */
4119 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
4120 }