]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <vm/vm_options.h>
44
45 #include <kern/task.h>
46 #include <kern/thread.h>
47 #include <kern/debug.h>
48 #include <kern/extmod_statistics.h>
49 #include <mach/mach_traps.h>
50 #include <mach/port.h>
51 #include <mach/sdt.h>
52 #include <mach/task.h>
53 #include <mach/task_access.h>
54 #include <mach/task_special_ports.h>
55 #include <mach/time_value.h>
56 #include <mach/vm_map.h>
57 #include <mach/vm_param.h>
58 #include <mach/vm_prot.h>
59
60 #include <sys/file_internal.h>
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/dir.h>
64 #include <sys/namei.h>
65 #include <sys/proc_internal.h>
66 #include <sys/kauth.h>
67 #include <sys/vm.h>
68 #include <sys/file.h>
69 #include <sys/vnode_internal.h>
70 #include <sys/mount.h>
71 #include <sys/trace.h>
72 #include <sys/kernel.h>
73 #include <sys/ubc_internal.h>
74 #include <sys/user.h>
75 #include <sys/syslog.h>
76 #include <sys/stat.h>
77 #include <sys/sysproto.h>
78 #include <sys/mman.h>
79 #include <sys/sysctl.h>
80 #include <sys/cprotect.h>
81 #include <sys/kpi_socket.h>
82 #include <sys/kas_info.h>
83 #include <sys/socket.h>
84 #include <sys/socketvar.h>
85
86 #include <security/audit/audit.h>
87 #include <security/mac.h>
88 #include <bsm/audit_kevents.h>
89
90 #include <kern/kalloc.h>
91 #include <vm/vm_map.h>
92 #include <vm/vm_kern.h>
93 #include <vm/vm_pageout.h>
94
95 #include <machine/spl.h>
96
97 #include <mach/shared_region.h>
98 #include <vm/vm_shared_region.h>
99
100 #include <vm/vm_protos.h>
101
102 #include <sys/kern_memorystatus.h>
103
104
105 int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
106 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
107
108 #if VM_MAP_DEBUG_APPLE_PROTECT
109 SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
110 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
111
112 #if VM_MAP_DEBUG_FOURK
113 SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
114 #endif /* VM_MAP_DEBUG_FOURK */
115
116 #if DEVELOPMENT || DEBUG
117
118 static int
119 sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
120 {
121 #pragma unused(arg1, arg2)
122 vm_offset_t kaddr;
123 kern_return_t kr;
124 int error = 0;
125 int size = 0;
126
127 error = sysctl_handle_int(oidp, &size, 0, req);
128 if (error || !req->newptr)
129 return (error);
130
131 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size, 0, 0, 0, 0, VM_KERN_MEMORY_IOKIT);
132
133 if (kr == KERN_SUCCESS)
134 kmem_free(kernel_map, kaddr, size);
135
136 return error;
137 }
138
139 SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
140 0, 0, &sysctl_kmem_alloc_contig, "I", "");
141
142 extern int vm_region_footprint;
143 SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
144
145 #endif /* DEVELOPMENT || DEBUG */
146
147
148
149 #if DEVELOPMENT || DEBUG
150 extern int radar_20146450;
151 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
152
153 extern int macho_printf;
154 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
155
156 extern int apple_protect_pager_data_request_debug;
157 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
158
159
160 #endif /* DEVELOPMENT || DEBUG */
161
162 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
163 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
164 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
165 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
166 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
167 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
168 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
169 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
170 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
171 #if VM_SCAN_FOR_SHADOW_CHAIN
172 static int vm_shadow_max_enabled = 0; /* Disabled by default */
173 extern int proc_shadow_max(void);
174 static int
175 vm_shadow_max SYSCTL_HANDLER_ARGS
176 {
177 #pragma unused(arg1, arg2, oidp)
178 int value = 0;
179
180 if (vm_shadow_max_enabled)
181 value = proc_shadow_max();
182
183 return SYSCTL_OUT(req, &value, sizeof(value));
184 }
185 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED,
186 0, 0, &vm_shadow_max, "I", "");
187
188 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
189
190 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
191
192 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
193
194 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
195 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid);
196 /*
197 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
198 */
199
200 #if DEVELOPMENT || DEBUG
201 extern int allow_stack_exec, allow_data_exec;
202
203 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
204 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
205
206 #endif /* DEVELOPMENT || DEBUG */
207
208 static const char *prot_values[] = {
209 "none",
210 "read-only",
211 "write-only",
212 "read-write",
213 "execute-only",
214 "read-execute",
215 "write-execute",
216 "read-write-execute"
217 };
218
219 void
220 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
221 {
222 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
223 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
224 }
225
226 /*
227 * shared_region_unnest_logging: level of logging of unnesting events
228 * 0 - no logging
229 * 1 - throttled logging of unexpected unnesting events (default)
230 * 2 - unthrottled logging of unexpected unnesting events
231 * 3+ - unthrottled logging of all unnesting events
232 */
233 int shared_region_unnest_logging = 1;
234
235 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
236 &shared_region_unnest_logging, 0, "");
237
238 int vm_shared_region_unnest_log_interval = 10;
239 int shared_region_unnest_log_count_threshold = 5;
240
241 /*
242 * Shared cache path enforcement.
243 */
244
245 static int scdir_enforce = 1;
246 static char scdir_path[] = "/var/db/dyld/";
247
248 #ifndef SECURE_KERNEL
249 SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, "");
250 #endif
251
252 /* These log rate throttling state variables aren't thread safe, but
253 * are sufficient unto the task.
254 */
255 static int64_t last_unnest_log_time = 0;
256 static int shared_region_unnest_log_count = 0;
257
258 void
259 log_unnest_badness(
260 vm_map_t m,
261 vm_map_offset_t s,
262 vm_map_offset_t e,
263 boolean_t is_nested_map,
264 vm_map_offset_t lowest_unnestable_addr)
265 {
266 struct timeval tv;
267
268 if (shared_region_unnest_logging == 0)
269 return;
270
271 if (shared_region_unnest_logging <= 2 &&
272 is_nested_map &&
273 s >= lowest_unnestable_addr) {
274 /*
275 * Unnesting of writable map entries is fine.
276 */
277 return;
278 }
279
280 if (shared_region_unnest_logging <= 1) {
281 microtime(&tv);
282 if ((tv.tv_sec - last_unnest_log_time) <
283 vm_shared_region_unnest_log_interval) {
284 if (shared_region_unnest_log_count++ >
285 shared_region_unnest_log_count_threshold)
286 return;
287 } else {
288 last_unnest_log_time = tv.tv_sec;
289 shared_region_unnest_log_count = 0;
290 }
291 }
292
293 DTRACE_VM4(log_unnest_badness,
294 vm_map_t, m,
295 vm_map_offset_t, s,
296 vm_map_offset_t, e,
297 vm_map_offset_t, lowest_unnestable_addr);
298 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, current_proc()->p_pid, (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
299 }
300
301 int
302 useracc(
303 user_addr_t addr,
304 user_size_t len,
305 int prot)
306 {
307 vm_map_t map;
308
309 map = current_map();
310 return (vm_map_check_protection(
311 map,
312 vm_map_trunc_page(addr,
313 vm_map_page_mask(map)),
314 vm_map_round_page(addr+len,
315 vm_map_page_mask(map)),
316 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
317 }
318
319 int
320 vslock(
321 user_addr_t addr,
322 user_size_t len)
323 {
324 kern_return_t kret;
325 vm_map_t map;
326
327 map = current_map();
328 kret = vm_map_wire(map,
329 vm_map_trunc_page(addr,
330 vm_map_page_mask(map)),
331 vm_map_round_page(addr+len,
332 vm_map_page_mask(map)),
333 VM_PROT_READ | VM_PROT_WRITE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_BSD),
334 FALSE);
335
336 switch (kret) {
337 case KERN_SUCCESS:
338 return (0);
339 case KERN_INVALID_ADDRESS:
340 case KERN_NO_SPACE:
341 return (ENOMEM);
342 case KERN_PROTECTION_FAILURE:
343 return (EACCES);
344 default:
345 return (EINVAL);
346 }
347 }
348
349 int
350 vsunlock(
351 user_addr_t addr,
352 user_size_t len,
353 __unused int dirtied)
354 {
355 #if FIXME /* [ */
356 pmap_t pmap;
357 vm_page_t pg;
358 vm_map_offset_t vaddr;
359 ppnum_t paddr;
360 #endif /* FIXME ] */
361 kern_return_t kret;
362 vm_map_t map;
363
364 map = current_map();
365
366 #if FIXME /* [ */
367 if (dirtied) {
368 pmap = get_task_pmap(current_task());
369 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
370 vaddr < vm_map_round_page(addr+len, PAGE_MASK);
371 vaddr += PAGE_SIZE) {
372 paddr = pmap_extract(pmap, vaddr);
373 pg = PHYS_TO_VM_PAGE(paddr);
374 vm_page_set_modified(pg);
375 }
376 }
377 #endif /* FIXME ] */
378 #ifdef lint
379 dirtied++;
380 #endif /* lint */
381 kret = vm_map_unwire(map,
382 vm_map_trunc_page(addr,
383 vm_map_page_mask(map)),
384 vm_map_round_page(addr+len,
385 vm_map_page_mask(map)),
386 FALSE);
387 switch (kret) {
388 case KERN_SUCCESS:
389 return (0);
390 case KERN_INVALID_ADDRESS:
391 case KERN_NO_SPACE:
392 return (ENOMEM);
393 case KERN_PROTECTION_FAILURE:
394 return (EACCES);
395 default:
396 return (EINVAL);
397 }
398 }
399
400 int
401 subyte(
402 user_addr_t addr,
403 int byte)
404 {
405 char character;
406
407 character = (char)byte;
408 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
409 }
410
411 int
412 suibyte(
413 user_addr_t addr,
414 int byte)
415 {
416 char character;
417
418 character = (char)byte;
419 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
420 }
421
422 int fubyte(user_addr_t addr)
423 {
424 unsigned char byte;
425
426 if (copyin(addr, (void *) &byte, sizeof(char)))
427 return(-1);
428 return(byte);
429 }
430
431 int fuibyte(user_addr_t addr)
432 {
433 unsigned char byte;
434
435 if (copyin(addr, (void *) &(byte), sizeof(char)))
436 return(-1);
437 return(byte);
438 }
439
440 int
441 suword(
442 user_addr_t addr,
443 long word)
444 {
445 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
446 }
447
448 long fuword(user_addr_t addr)
449 {
450 long word = 0;
451
452 if (copyin(addr, (void *) &word, sizeof(int)))
453 return(-1);
454 return(word);
455 }
456
457 /* suiword and fuiword are the same as suword and fuword, respectively */
458
459 int
460 suiword(
461 user_addr_t addr,
462 long word)
463 {
464 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
465 }
466
467 long fuiword(user_addr_t addr)
468 {
469 long word = 0;
470
471 if (copyin(addr, (void *) &word, sizeof(int)))
472 return(-1);
473 return(word);
474 }
475
476 /*
477 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
478 * fetching and setting of process-sized size_t and pointer values.
479 */
480 int
481 sulong(user_addr_t addr, int64_t word)
482 {
483
484 if (IS_64BIT_PROCESS(current_proc())) {
485 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
486 } else {
487 return(suiword(addr, (long)word));
488 }
489 }
490
491 int64_t
492 fulong(user_addr_t addr)
493 {
494 int64_t longword;
495
496 if (IS_64BIT_PROCESS(current_proc())) {
497 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
498 return(-1);
499 return(longword);
500 } else {
501 return((int64_t)fuiword(addr));
502 }
503 }
504
505 int
506 suulong(user_addr_t addr, uint64_t uword)
507 {
508
509 if (IS_64BIT_PROCESS(current_proc())) {
510 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
511 } else {
512 return(suiword(addr, (uint32_t)uword));
513 }
514 }
515
516 uint64_t
517 fuulong(user_addr_t addr)
518 {
519 uint64_t ulongword;
520
521 if (IS_64BIT_PROCESS(current_proc())) {
522 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
523 return(-1ULL);
524 return(ulongword);
525 } else {
526 return((uint64_t)fuiword(addr));
527 }
528 }
529
530 int
531 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
532 {
533 return(ENOTSUP);
534 }
535
536 /*
537 * pid_for_task
538 *
539 * Find the BSD process ID for the Mach task associated with the given Mach port
540 * name
541 *
542 * Parameters: args User argument descriptor (see below)
543 *
544 * Indirect parameters: args->t Mach port name
545 * args->pid Process ID (returned value; see below)
546 *
547 * Returns: KERL_SUCCESS Success
548 * KERN_FAILURE Not success
549 *
550 * Implicit returns: args->pid Process ID
551 *
552 */
553 kern_return_t
554 pid_for_task(
555 struct pid_for_task_args *args)
556 {
557 mach_port_name_t t = args->t;
558 user_addr_t pid_addr = args->pid;
559 proc_t p;
560 task_t t1;
561 int pid = -1;
562 kern_return_t err = KERN_SUCCESS;
563
564 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
565 AUDIT_ARG(mach_port1, t);
566
567 t1 = port_name_to_task(t);
568
569 if (t1 == TASK_NULL) {
570 err = KERN_FAILURE;
571 goto pftout;
572 } else {
573 p = get_bsdtask_info(t1);
574 if (p) {
575 pid = proc_pid(p);
576 err = KERN_SUCCESS;
577 } else if (is_corpsetask(t1)) {
578 pid = task_pid(t1);
579 err = KERN_SUCCESS;
580 }else {
581 err = KERN_FAILURE;
582 }
583 }
584 task_deallocate(t1);
585 pftout:
586 AUDIT_ARG(pid, pid);
587 (void) copyout((char *) &pid, pid_addr, sizeof(int));
588 AUDIT_MACH_SYSCALL_EXIT(err);
589 return(err);
590 }
591
592 /*
593 *
594 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
595 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
596 *
597 */
598 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
599
600 /*
601 * Routine: task_for_pid_posix_check
602 * Purpose:
603 * Verify that the current process should be allowed to
604 * get the target process's task port. This is only
605 * permitted if:
606 * - The current process is root
607 * OR all of the following are true:
608 * - The target process's real, effective, and saved uids
609 * are the same as the current proc's euid,
610 * - The target process's group set is a subset of the
611 * calling process's group set, and
612 * - The target process hasn't switched credentials.
613 *
614 * Returns: TRUE: permitted
615 * FALSE: denied
616 */
617 static int
618 task_for_pid_posix_check(proc_t target)
619 {
620 kauth_cred_t targetcred, mycred;
621 uid_t myuid;
622 int allowed;
623
624 /* No task_for_pid on bad targets */
625 if (target->p_stat == SZOMB) {
626 return FALSE;
627 }
628
629 mycred = kauth_cred_get();
630 myuid = kauth_cred_getuid(mycred);
631
632 /* If we're running as root, the check passes */
633 if (kauth_cred_issuser(mycred))
634 return TRUE;
635
636 /* We're allowed to get our own task port */
637 if (target == current_proc())
638 return TRUE;
639
640 /*
641 * Under DENY, only root can get another proc's task port,
642 * so no more checks are needed.
643 */
644 if (tfp_policy == KERN_TFP_POLICY_DENY) {
645 return FALSE;
646 }
647
648 targetcred = kauth_cred_proc_ref(target);
649 allowed = TRUE;
650
651 /* Do target's ruid, euid, and saved uid match my euid? */
652 if ((kauth_cred_getuid(targetcred) != myuid) ||
653 (kauth_cred_getruid(targetcred) != myuid) ||
654 (kauth_cred_getsvuid(targetcred) != myuid)) {
655 allowed = FALSE;
656 goto out;
657 }
658
659 /* Are target's groups a subset of my groups? */
660 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
661 allowed == 0) {
662 allowed = FALSE;
663 goto out;
664 }
665
666 /* Has target switched credentials? */
667 if (target->p_flag & P_SUGID) {
668 allowed = FALSE;
669 goto out;
670 }
671
672 out:
673 kauth_cred_unref(&targetcred);
674 return allowed;
675 }
676
677 /*
678 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
679 *
680 * Description: Waits for the user space daemon to respond to the request
681 * we made. Function declared non inline to be visible in
682 * stackshots and spindumps as well as debugging.
683 */
684 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
685 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid)
686 {
687 return check_task_access(task_access_port, calling_pid, calling_gid, target_pid);
688 }
689
690 /*
691 * Routine: task_for_pid
692 * Purpose:
693 * Get the task port for another "process", named by its
694 * process ID on the same host as "target_task".
695 *
696 * Only permitted to privileged processes, or processes
697 * with the same user ID.
698 *
699 * Note: if pid == 0, an error is return no matter who is calling.
700 *
701 * XXX This should be a BSD system call, not a Mach trap!!!
702 */
703 kern_return_t
704 task_for_pid(
705 struct task_for_pid_args *args)
706 {
707 mach_port_name_t target_tport = args->target_tport;
708 int pid = args->pid;
709 user_addr_t task_addr = args->t;
710 proc_t p = PROC_NULL;
711 task_t t1 = TASK_NULL;
712 mach_port_name_t tret = MACH_PORT_NULL;
713 ipc_port_t tfpport;
714 void * sright;
715 int error = 0;
716
717 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
718 AUDIT_ARG(pid, pid);
719 AUDIT_ARG(mach_port1, target_tport);
720
721 /* Always check if pid == 0 */
722 if (pid == 0) {
723 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
724 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
725 return(KERN_FAILURE);
726 }
727
728 t1 = port_name_to_task(target_tport);
729 if (t1 == TASK_NULL) {
730 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
731 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
732 return(KERN_FAILURE);
733 }
734
735
736 p = proc_find(pid);
737 if (p == PROC_NULL) {
738 error = KERN_FAILURE;
739 goto tfpout;
740 }
741
742 #if CONFIG_AUDIT
743 AUDIT_ARG(process, p);
744 #endif
745
746 if (!(task_for_pid_posix_check(p))) {
747 error = KERN_FAILURE;
748 goto tfpout;
749 }
750
751 if (p->task != TASK_NULL) {
752 /* If we aren't root and target's task access port is set... */
753 if (!kauth_cred_issuser(kauth_cred_get()) &&
754 p != current_proc() &&
755 (task_get_task_access_port(p->task, &tfpport) == 0) &&
756 (tfpport != IPC_PORT_NULL)) {
757
758 if (tfpport == IPC_PORT_DEAD) {
759 error = KERN_PROTECTION_FAILURE;
760 goto tfpout;
761 }
762
763 /* Call up to the task access server */
764 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
765
766 if (error != MACH_MSG_SUCCESS) {
767 if (error == MACH_RCV_INTERRUPTED)
768 error = KERN_ABORTED;
769 else
770 error = KERN_FAILURE;
771 goto tfpout;
772 }
773 }
774 #if CONFIG_MACF
775 error = mac_proc_check_get_task(kauth_cred_get(), p);
776 if (error) {
777 error = KERN_FAILURE;
778 goto tfpout;
779 }
780 #endif
781
782 /* Grant task port access */
783 task_reference(p->task);
784 extmod_statistics_incr_task_for_pid(p->task);
785
786 sright = (void *) convert_task_to_port(p->task);
787 tret = ipc_port_copyout_send(
788 sright,
789 get_task_ipcspace(current_task()));
790 }
791 error = KERN_SUCCESS;
792
793 tfpout:
794 task_deallocate(t1);
795 AUDIT_ARG(mach_port2, tret);
796 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
797 if (p != PROC_NULL)
798 proc_rele(p);
799 AUDIT_MACH_SYSCALL_EXIT(error);
800 return(error);
801 }
802
803 /*
804 * Routine: task_name_for_pid
805 * Purpose:
806 * Get the task name port for another "process", named by its
807 * process ID on the same host as "target_task".
808 *
809 * Only permitted to privileged processes, or processes
810 * with the same user ID.
811 *
812 * XXX This should be a BSD system call, not a Mach trap!!!
813 */
814
815 kern_return_t
816 task_name_for_pid(
817 struct task_name_for_pid_args *args)
818 {
819 mach_port_name_t target_tport = args->target_tport;
820 int pid = args->pid;
821 user_addr_t task_addr = args->t;
822 proc_t p = PROC_NULL;
823 task_t t1;
824 mach_port_name_t tret;
825 void * sright;
826 int error = 0, refheld = 0;
827 kauth_cred_t target_cred;
828
829 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
830 AUDIT_ARG(pid, pid);
831 AUDIT_ARG(mach_port1, target_tport);
832
833 t1 = port_name_to_task(target_tport);
834 if (t1 == TASK_NULL) {
835 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
836 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
837 return(KERN_FAILURE);
838 }
839
840 p = proc_find(pid);
841 if (p != PROC_NULL) {
842 AUDIT_ARG(process, p);
843 target_cred = kauth_cred_proc_ref(p);
844 refheld = 1;
845
846 if ((p->p_stat != SZOMB)
847 && ((current_proc() == p)
848 || kauth_cred_issuser(kauth_cred_get())
849 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
850 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
851
852 if (p->task != TASK_NULL) {
853 task_reference(p->task);
854 #if CONFIG_MACF
855 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
856 if (error) {
857 task_deallocate(p->task);
858 goto noperm;
859 }
860 #endif
861 sright = (void *)convert_task_name_to_port(p->task);
862 tret = ipc_port_copyout_send(sright,
863 get_task_ipcspace(current_task()));
864 } else
865 tret = MACH_PORT_NULL;
866
867 AUDIT_ARG(mach_port2, tret);
868 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
869 task_deallocate(t1);
870 error = KERN_SUCCESS;
871 goto tnfpout;
872 }
873 }
874
875 #if CONFIG_MACF
876 noperm:
877 #endif
878 task_deallocate(t1);
879 tret = MACH_PORT_NULL;
880 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
881 error = KERN_FAILURE;
882 tnfpout:
883 if (refheld != 0)
884 kauth_cred_unref(&target_cred);
885 if (p != PROC_NULL)
886 proc_rele(p);
887 AUDIT_MACH_SYSCALL_EXIT(error);
888 return(error);
889 }
890
891 kern_return_t
892 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
893 {
894 task_t target = NULL;
895 proc_t targetproc = PROC_NULL;
896 int pid = args->pid;
897 int error = 0;
898
899 #if CONFIG_MACF
900 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
901 if (error) {
902 error = EPERM;
903 goto out;
904 }
905 #endif
906
907 if (pid == 0) {
908 error = EPERM;
909 goto out;
910 }
911
912 targetproc = proc_find(pid);
913 if (targetproc == PROC_NULL) {
914 error = ESRCH;
915 goto out;
916 }
917
918 if (!task_for_pid_posix_check(targetproc)) {
919 error = EPERM;
920 goto out;
921 }
922
923 target = targetproc->task;
924 if (target != TASK_NULL) {
925 mach_port_t tfpport;
926
927 /* If we aren't root and target's task access port is set... */
928 if (!kauth_cred_issuser(kauth_cred_get()) &&
929 targetproc != current_proc() &&
930 (task_get_task_access_port(target, &tfpport) == 0) &&
931 (tfpport != IPC_PORT_NULL)) {
932
933 if (tfpport == IPC_PORT_DEAD) {
934 error = EACCES;
935 goto out;
936 }
937
938 /* Call up to the task access server */
939 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
940
941 if (error != MACH_MSG_SUCCESS) {
942 if (error == MACH_RCV_INTERRUPTED)
943 error = EINTR;
944 else
945 error = EPERM;
946 goto out;
947 }
948 }
949 }
950
951 task_reference(target);
952 error = task_pidsuspend(target);
953 if (error) {
954 if (error == KERN_INVALID_ARGUMENT) {
955 error = EINVAL;
956 } else {
957 error = EPERM;
958 }
959 }
960 #if CONFIG_MEMORYSTATUS
961 else {
962 memorystatus_on_suspend(targetproc);
963 }
964 #endif
965
966 task_deallocate(target);
967
968 out:
969 if (targetproc != PROC_NULL)
970 proc_rele(targetproc);
971 *ret = error;
972 return error;
973 }
974
975 kern_return_t
976 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
977 {
978 task_t target = NULL;
979 proc_t targetproc = PROC_NULL;
980 int pid = args->pid;
981 int error = 0;
982
983 #if CONFIG_MACF
984 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
985 if (error) {
986 error = EPERM;
987 goto out;
988 }
989 #endif
990
991 if (pid == 0) {
992 error = EPERM;
993 goto out;
994 }
995
996 targetproc = proc_find(pid);
997 if (targetproc == PROC_NULL) {
998 error = ESRCH;
999 goto out;
1000 }
1001
1002 if (!task_for_pid_posix_check(targetproc)) {
1003 error = EPERM;
1004 goto out;
1005 }
1006
1007 target = targetproc->task;
1008 if (target != TASK_NULL) {
1009 mach_port_t tfpport;
1010
1011 /* If we aren't root and target's task access port is set... */
1012 if (!kauth_cred_issuser(kauth_cred_get()) &&
1013 targetproc != current_proc() &&
1014 (task_get_task_access_port(target, &tfpport) == 0) &&
1015 (tfpport != IPC_PORT_NULL)) {
1016
1017 if (tfpport == IPC_PORT_DEAD) {
1018 error = EACCES;
1019 goto out;
1020 }
1021
1022 /* Call up to the task access server */
1023 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
1024
1025 if (error != MACH_MSG_SUCCESS) {
1026 if (error == MACH_RCV_INTERRUPTED)
1027 error = EINTR;
1028 else
1029 error = EPERM;
1030 goto out;
1031 }
1032 }
1033 }
1034
1035
1036 task_reference(target);
1037
1038 #if CONFIG_MEMORYSTATUS
1039 memorystatus_on_resume(targetproc);
1040 #endif
1041
1042 error = task_pidresume(target);
1043 if (error) {
1044 if (error == KERN_INVALID_ARGUMENT) {
1045 error = EINVAL;
1046 } else {
1047 if (error == KERN_MEMORY_ERROR) {
1048 psignal(targetproc, SIGKILL);
1049 error = EIO;
1050 } else
1051 error = EPERM;
1052 }
1053 }
1054
1055 task_deallocate(target);
1056
1057 out:
1058 if (targetproc != PROC_NULL)
1059 proc_rele(targetproc);
1060
1061 *ret = error;
1062 return error;
1063 }
1064
1065
1066 static int
1067 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1068 __unused int arg2, struct sysctl_req *req)
1069 {
1070 int error = 0;
1071 int new_value;
1072
1073 error = SYSCTL_OUT(req, arg1, sizeof(int));
1074 if (error || req->newptr == USER_ADDR_NULL)
1075 return(error);
1076
1077 if (!kauth_cred_issuser(kauth_cred_get()))
1078 return(EPERM);
1079
1080 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1081 goto out;
1082 }
1083 if ((new_value == KERN_TFP_POLICY_DENY)
1084 || (new_value == KERN_TFP_POLICY_DEFAULT))
1085 tfp_policy = new_value;
1086 else
1087 error = EINVAL;
1088 out:
1089 return(error);
1090
1091 }
1092
1093 #if defined(SECURE_KERNEL)
1094 static int kern_secure_kernel = 1;
1095 #else
1096 static int kern_secure_kernel = 0;
1097 #endif
1098
1099 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1100
1101 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1102 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1103 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
1104
1105 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1106 &shared_region_trace_level, 0, "");
1107 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1108 &shared_region_version, 0, "");
1109 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1110 &shared_region_persistence, 0, "");
1111
1112 /*
1113 * shared_region_check_np:
1114 *
1115 * This system call is intended for dyld.
1116 *
1117 * dyld calls this when any process starts to see if the process's shared
1118 * region is already set up and ready to use.
1119 * This call returns the base address of the first mapping in the
1120 * process's shared region's first mapping.
1121 * dyld will then check what's mapped at that address.
1122 *
1123 * If the shared region is empty, dyld will then attempt to map the shared
1124 * cache file in the shared region via the shared_region_map_np() system call.
1125 *
1126 * If something's already mapped in the shared region, dyld will check if it
1127 * matches the shared cache it would like to use for that process.
1128 * If it matches, evrything's ready and the process can proceed and use the
1129 * shared region.
1130 * If it doesn't match, dyld will unmap the shared region and map the shared
1131 * cache into the process's address space via mmap().
1132 *
1133 * ERROR VALUES
1134 * EINVAL no shared region
1135 * ENOMEM shared region is empty
1136 * EFAULT bad address for "start_address"
1137 */
1138 int
1139 shared_region_check_np(
1140 __unused struct proc *p,
1141 struct shared_region_check_np_args *uap,
1142 __unused int *retvalp)
1143 {
1144 vm_shared_region_t shared_region;
1145 mach_vm_offset_t start_address = 0;
1146 int error;
1147 kern_return_t kr;
1148
1149 SHARED_REGION_TRACE_DEBUG(
1150 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1151 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1152 p->p_pid, p->p_comm,
1153 (uint64_t)uap->start_address));
1154
1155 /* retrieve the current tasks's shared region */
1156 shared_region = vm_shared_region_get(current_task());
1157 if (shared_region != NULL) {
1158 /* retrieve address of its first mapping... */
1159 kr = vm_shared_region_start_address(shared_region,
1160 &start_address);
1161 if (kr != KERN_SUCCESS) {
1162 error = ENOMEM;
1163 } else {
1164 /* ... and give it to the caller */
1165 error = copyout(&start_address,
1166 (user_addr_t) uap->start_address,
1167 sizeof (start_address));
1168 if (error) {
1169 SHARED_REGION_TRACE_ERROR(
1170 ("shared_region: %p [%d(%s)] "
1171 "check_np(0x%llx) "
1172 "copyout(0x%llx) error %d\n",
1173 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1174 p->p_pid, p->p_comm,
1175 (uint64_t)uap->start_address, (uint64_t)start_address,
1176 error));
1177 }
1178 }
1179 vm_shared_region_deallocate(shared_region);
1180 } else {
1181 /* no shared region ! */
1182 error = EINVAL;
1183 }
1184
1185 SHARED_REGION_TRACE_DEBUG(
1186 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1187 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1188 p->p_pid, p->p_comm,
1189 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1190
1191 return error;
1192 }
1193
1194
1195 int
1196 shared_region_copyin_mappings(
1197 struct proc *p,
1198 user_addr_t user_mappings,
1199 unsigned int mappings_count,
1200 struct shared_file_mapping_np *mappings)
1201 {
1202 int error = 0;
1203 vm_size_t mappings_size = 0;
1204
1205 /* get the list of mappings the caller wants us to establish */
1206 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1207 error = copyin(user_mappings,
1208 mappings,
1209 mappings_size);
1210 if (error) {
1211 SHARED_REGION_TRACE_ERROR(
1212 ("shared_region: %p [%d(%s)] map(): "
1213 "copyin(0x%llx, %d) failed (error=%d)\n",
1214 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1215 p->p_pid, p->p_comm,
1216 (uint64_t)user_mappings, mappings_count, error));
1217 }
1218 return error;
1219 }
1220 /*
1221 * shared_region_map_np()
1222 *
1223 * This system call is intended for dyld.
1224 *
1225 * dyld uses this to map a shared cache file into a shared region.
1226 * This is usually done only the first time a shared cache is needed.
1227 * Subsequent processes will just use the populated shared region without
1228 * requiring any further setup.
1229 */
1230 int
1231 _shared_region_map_and_slide(
1232 struct proc *p,
1233 int fd,
1234 uint32_t mappings_count,
1235 struct shared_file_mapping_np *mappings,
1236 uint32_t slide,
1237 user_addr_t slide_start,
1238 user_addr_t slide_size)
1239 {
1240 int error;
1241 kern_return_t kr;
1242 struct fileproc *fp;
1243 struct vnode *vp, *root_vp, *scdir_vp;
1244 struct vnode_attr va;
1245 off_t fs;
1246 memory_object_size_t file_size;
1247 #if CONFIG_MACF
1248 vm_prot_t maxprot = VM_PROT_ALL;
1249 #endif
1250 memory_object_control_t file_control;
1251 struct vm_shared_region *shared_region;
1252 uint32_t i;
1253
1254 SHARED_REGION_TRACE_DEBUG(
1255 ("shared_region: %p [%d(%s)] -> map\n",
1256 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1257 p->p_pid, p->p_comm));
1258
1259 shared_region = NULL;
1260 fp = NULL;
1261 vp = NULL;
1262 scdir_vp = NULL;
1263
1264 /* get file structure from file descriptor */
1265 error = fp_lookup(p, fd, &fp, 0);
1266 if (error) {
1267 SHARED_REGION_TRACE_ERROR(
1268 ("shared_region: %p [%d(%s)] map: "
1269 "fd=%d lookup failed (error=%d)\n",
1270 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1271 p->p_pid, p->p_comm, fd, error));
1272 goto done;
1273 }
1274
1275 /* make sure we're attempting to map a vnode */
1276 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
1277 SHARED_REGION_TRACE_ERROR(
1278 ("shared_region: %p [%d(%s)] map: "
1279 "fd=%d not a vnode (type=%d)\n",
1280 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1281 p->p_pid, p->p_comm,
1282 fd, FILEGLOB_DTYPE(fp->f_fglob)));
1283 error = EINVAL;
1284 goto done;
1285 }
1286
1287 /* we need at least read permission on the file */
1288 if (! (fp->f_fglob->fg_flag & FREAD)) {
1289 SHARED_REGION_TRACE_ERROR(
1290 ("shared_region: %p [%d(%s)] map: "
1291 "fd=%d not readable\n",
1292 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1293 p->p_pid, p->p_comm, fd));
1294 error = EPERM;
1295 goto done;
1296 }
1297
1298 /* get vnode from file structure */
1299 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1300 if (error) {
1301 SHARED_REGION_TRACE_ERROR(
1302 ("shared_region: %p [%d(%s)] map: "
1303 "fd=%d getwithref failed (error=%d)\n",
1304 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1305 p->p_pid, p->p_comm, fd, error));
1306 goto done;
1307 }
1308 vp = (struct vnode *) fp->f_fglob->fg_data;
1309
1310 /* make sure the vnode is a regular file */
1311 if (vp->v_type != VREG) {
1312 SHARED_REGION_TRACE_ERROR(
1313 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1314 "not a file (type=%d)\n",
1315 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1316 p->p_pid, p->p_comm,
1317 (void *)VM_KERNEL_ADDRPERM(vp),
1318 vp->v_name, vp->v_type));
1319 error = EINVAL;
1320 goto done;
1321 }
1322
1323 #if CONFIG_MACF
1324 /* pass in 0 for the offset argument because AMFI does not need the offset
1325 of the shared cache */
1326 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1327 fp->f_fglob, VM_PROT_ALL, MAP_FILE, 0, &maxprot);
1328 if (error) {
1329 goto done;
1330 }
1331 #endif /* MAC */
1332
1333 /* make sure vnode is on the process's root volume */
1334 root_vp = p->p_fd->fd_rdir;
1335 if (root_vp == NULL) {
1336 root_vp = rootvnode;
1337 } else {
1338 /*
1339 * Chroot-ed processes can't use the shared_region.
1340 */
1341 error = EINVAL;
1342 goto done;
1343 }
1344
1345 if (vp->v_mount != root_vp->v_mount) {
1346 SHARED_REGION_TRACE_ERROR(
1347 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1348 "not on process's root volume\n",
1349 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1350 p->p_pid, p->p_comm,
1351 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1352 error = EPERM;
1353 goto done;
1354 }
1355
1356 /* make sure vnode is owned by "root" */
1357 VATTR_INIT(&va);
1358 VATTR_WANTED(&va, va_uid);
1359 error = vnode_getattr(vp, &va, vfs_context_current());
1360 if (error) {
1361 SHARED_REGION_TRACE_ERROR(
1362 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1363 "vnode_getattr(%p) failed (error=%d)\n",
1364 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1365 p->p_pid, p->p_comm,
1366 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1367 (void *)VM_KERNEL_ADDRPERM(vp), error));
1368 goto done;
1369 }
1370 if (va.va_uid != 0) {
1371 SHARED_REGION_TRACE_ERROR(
1372 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1373 "owned by uid=%d instead of 0\n",
1374 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1375 p->p_pid, p->p_comm,
1376 (void *)VM_KERNEL_ADDRPERM(vp),
1377 vp->v_name, va.va_uid));
1378 error = EPERM;
1379 goto done;
1380 }
1381
1382 if (scdir_enforce) {
1383 /* get vnode for scdir_path */
1384 error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current());
1385 if (error) {
1386 SHARED_REGION_TRACE_ERROR(
1387 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1388 "vnode_lookup(%s) failed (error=%d)\n",
1389 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1390 p->p_pid, p->p_comm,
1391 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1392 scdir_path, error));
1393 goto done;
1394 }
1395
1396 /* ensure parent is scdir_vp */
1397 if (vnode_parent(vp) != scdir_vp) {
1398 SHARED_REGION_TRACE_ERROR(
1399 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1400 "shared cache file not in %s\n",
1401 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1402 p->p_pid, p->p_comm,
1403 (void *)VM_KERNEL_ADDRPERM(vp),
1404 vp->v_name, scdir_path));
1405 error = EPERM;
1406 goto done;
1407 }
1408 }
1409
1410 /* get vnode size */
1411 error = vnode_size(vp, &fs, vfs_context_current());
1412 if (error) {
1413 SHARED_REGION_TRACE_ERROR(
1414 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1415 "vnode_size(%p) failed (error=%d)\n",
1416 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1417 p->p_pid, p->p_comm,
1418 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1419 (void *)VM_KERNEL_ADDRPERM(vp), error));
1420 goto done;
1421 }
1422 file_size = fs;
1423
1424 /* get the file's memory object handle */
1425 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1426 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1427 SHARED_REGION_TRACE_ERROR(
1428 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1429 "no memory object\n",
1430 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1431 p->p_pid, p->p_comm,
1432 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1433 error = EINVAL;
1434 goto done;
1435 }
1436
1437 /* check that the mappings are properly covered by code signatures */
1438 if (!cs_enforcement(NULL)) {
1439 /* code signing is not enforced: no need to check */
1440 } else for (i = 0; i < mappings_count; i++) {
1441 if (mappings[i].sfm_init_prot & VM_PROT_ZF) {
1442 /* zero-filled mapping: not backed by the file */
1443 continue;
1444 }
1445 if (ubc_cs_is_range_codesigned(vp,
1446 mappings[i].sfm_file_offset,
1447 mappings[i].sfm_size)) {
1448 /* this mapping is fully covered by code signatures */
1449 continue;
1450 }
1451 SHARED_REGION_TRACE_ERROR(
1452 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1453 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
1454 "is not code-signed\n",
1455 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1456 p->p_pid, p->p_comm,
1457 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1458 i, mappings_count,
1459 mappings[i].sfm_address,
1460 mappings[i].sfm_size,
1461 mappings[i].sfm_file_offset,
1462 mappings[i].sfm_max_prot,
1463 mappings[i].sfm_init_prot));
1464 error = EINVAL;
1465 goto done;
1466 }
1467
1468 /* get the process's shared region (setup in vm_map_exec()) */
1469 shared_region = vm_shared_region_get(current_task());
1470 if (shared_region == NULL) {
1471 SHARED_REGION_TRACE_ERROR(
1472 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1473 "no shared region\n",
1474 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1475 p->p_pid, p->p_comm,
1476 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1477 goto done;
1478 }
1479
1480 /* map the file into that shared region's submap */
1481 kr = vm_shared_region_map_file(shared_region,
1482 mappings_count,
1483 mappings,
1484 file_control,
1485 file_size,
1486 (void *) p->p_fd->fd_rdir,
1487 slide,
1488 slide_start,
1489 slide_size);
1490 if (kr != KERN_SUCCESS) {
1491 SHARED_REGION_TRACE_ERROR(
1492 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1493 "vm_shared_region_map_file() failed kr=0x%x\n",
1494 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1495 p->p_pid, p->p_comm,
1496 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr));
1497 switch (kr) {
1498 case KERN_INVALID_ADDRESS:
1499 error = EFAULT;
1500 break;
1501 case KERN_PROTECTION_FAILURE:
1502 error = EPERM;
1503 break;
1504 case KERN_NO_SPACE:
1505 error = ENOMEM;
1506 break;
1507 case KERN_FAILURE:
1508 case KERN_INVALID_ARGUMENT:
1509 default:
1510 error = EINVAL;
1511 break;
1512 }
1513 goto done;
1514 }
1515
1516 error = 0;
1517
1518 vnode_lock_spin(vp);
1519
1520 vp->v_flag |= VSHARED_DYLD;
1521
1522 vnode_unlock(vp);
1523
1524 /* update the vnode's access time */
1525 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1526 VATTR_INIT(&va);
1527 nanotime(&va.va_access_time);
1528 VATTR_SET_ACTIVE(&va, va_access_time);
1529 vnode_setattr(vp, &va, vfs_context_current());
1530 }
1531
1532 if (p->p_flag & P_NOSHLIB) {
1533 /* signal that this process is now using split libraries */
1534 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1535 }
1536
1537 done:
1538 if (vp != NULL) {
1539 /*
1540 * release the vnode...
1541 * ubc_map() still holds it for us in the non-error case
1542 */
1543 (void) vnode_put(vp);
1544 vp = NULL;
1545 }
1546 if (fp != NULL) {
1547 /* release the file descriptor */
1548 fp_drop(p, fd, fp, 0);
1549 fp = NULL;
1550 }
1551 if (scdir_vp != NULL) {
1552 (void)vnode_put(scdir_vp);
1553 scdir_vp = NULL;
1554 }
1555
1556 if (shared_region != NULL) {
1557 vm_shared_region_deallocate(shared_region);
1558 }
1559
1560 SHARED_REGION_TRACE_DEBUG(
1561 ("shared_region: %p [%d(%s)] <- map\n",
1562 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1563 p->p_pid, p->p_comm));
1564
1565 return error;
1566 }
1567
1568 int
1569 shared_region_map_and_slide_np(
1570 struct proc *p,
1571 struct shared_region_map_and_slide_np_args *uap,
1572 __unused int *retvalp)
1573 {
1574 struct shared_file_mapping_np *mappings;
1575 unsigned int mappings_count = uap->count;
1576 kern_return_t kr = KERN_SUCCESS;
1577 uint32_t slide = uap->slide;
1578
1579 #define SFM_MAX_STACK 8
1580 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
1581
1582 /* Is the process chrooted?? */
1583 if (p->p_fd->fd_rdir != NULL) {
1584 kr = EINVAL;
1585 goto done;
1586 }
1587
1588 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1589 if (kr == KERN_INVALID_ARGUMENT) {
1590 /*
1591 * This will happen if we request sliding again
1592 * with the same slide value that was used earlier
1593 * for the very first sliding.
1594 */
1595 kr = KERN_SUCCESS;
1596 }
1597 goto done;
1598 }
1599
1600 if (mappings_count == 0) {
1601 SHARED_REGION_TRACE_INFO(
1602 ("shared_region: %p [%d(%s)] map(): "
1603 "no mappings\n",
1604 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1605 p->p_pid, p->p_comm));
1606 kr = 0; /* no mappings: we're done ! */
1607 goto done;
1608 } else if (mappings_count <= SFM_MAX_STACK) {
1609 mappings = &stack_mappings[0];
1610 } else {
1611 SHARED_REGION_TRACE_ERROR(
1612 ("shared_region: %p [%d(%s)] map(): "
1613 "too many mappings (%d)\n",
1614 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1615 p->p_pid, p->p_comm,
1616 mappings_count));
1617 kr = KERN_FAILURE;
1618 goto done;
1619 }
1620
1621 if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1622 goto done;
1623 }
1624
1625
1626 kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings,
1627 slide,
1628 uap->slide_start, uap->slide_size);
1629 if (kr != KERN_SUCCESS) {
1630 return kr;
1631 }
1632
1633 done:
1634 return kr;
1635 }
1636
1637 /* sysctl overflow room */
1638
1639 SYSCTL_INT (_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
1640 (int *) &page_size, 0, "vm page size");
1641
1642 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1643 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1644 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1645 extern unsigned int vm_page_free_target;
1646 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1647 &vm_page_free_target, 0, "Pageout daemon free target");
1648
1649 extern unsigned int vm_memory_pressure;
1650 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1651 &vm_memory_pressure, 0, "Memory pressure indicator");
1652
1653 static int
1654 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1655 {
1656 #pragma unused(oidp, arg1, arg2)
1657 unsigned int page_free_wanted;
1658
1659 page_free_wanted = mach_vm_ctl_page_free_wanted();
1660 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1661 }
1662 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1663 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1664 0, 0, vm_ctl_page_free_wanted, "I", "");
1665
1666 extern unsigned int vm_page_purgeable_count;
1667 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1668 &vm_page_purgeable_count, 0, "Purgeable page count");
1669
1670 extern unsigned int vm_page_purgeable_wired_count;
1671 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1672 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1673
1674 extern unsigned int vm_pageout_purged_objects;
1675 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
1676 &vm_pageout_purged_objects, 0, "System purged object count");
1677
1678 extern int madvise_free_debug;
1679 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1680 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
1681
1682 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1683 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1684 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1685 &vm_page_stats_reusable.reusable_pages_success, "");
1686 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1687 &vm_page_stats_reusable.reusable_pages_failure, "");
1688 SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1689 &vm_page_stats_reusable.reusable_pages_shared, "");
1690 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1691 &vm_page_stats_reusable.all_reusable_calls, "");
1692 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1693 &vm_page_stats_reusable.partial_reusable_calls, "");
1694 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1695 &vm_page_stats_reusable.reuse_pages_success, "");
1696 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1697 &vm_page_stats_reusable.reuse_pages_failure, "");
1698 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1699 &vm_page_stats_reusable.all_reuse_calls, "");
1700 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1701 &vm_page_stats_reusable.partial_reuse_calls, "");
1702 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1703 &vm_page_stats_reusable.can_reuse_success, "");
1704 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1705 &vm_page_stats_reusable.can_reuse_failure, "");
1706 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
1707 &vm_page_stats_reusable.reusable_reclaimed, "");
1708 SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
1709 &vm_page_stats_reusable.reusable_nonwritable, "");
1710 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1711 &vm_page_stats_reusable.reusable_shared, "");
1712 SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1713 &vm_page_stats_reusable.free_shared, "");
1714
1715
1716 extern unsigned int vm_page_free_count, vm_page_speculative_count;
1717 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
1718 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
1719
1720 extern unsigned int vm_page_cleaned_count;
1721 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
1722
1723 extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
1724 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
1725 SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
1726
1727 /* pageout counts */
1728 extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
1729 extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
1730 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
1731 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
1732 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
1733 SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
1734 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
1735 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
1736 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
1737
1738 extern unsigned int vm_pageout_freed_from_cleaned;
1739 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
1740
1741 /* counts of pages entering the cleaned queue */
1742 extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
1743 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
1744 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
1745 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
1746
1747 /* counts of pages leaving the cleaned queue */
1748 extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
1749 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
1750 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1751 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
1752 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
1753 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
1754 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
1755 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
1756 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
1757
1758 /* counts of pages prefaulted when entering a memory object */
1759 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
1760 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
1761 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
1762
1763 #if CONFIG_SECLUDED_MEMORY
1764
1765 SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
1766 extern unsigned int vm_page_secluded_target;
1767 extern unsigned int vm_page_secluded_count;
1768 extern unsigned int vm_page_secluded_count_free;
1769 extern unsigned int vm_page_secluded_count_inuse;
1770 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
1771 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
1772 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
1773 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
1774
1775 extern struct vm_page_secluded_data vm_page_secluded;
1776 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
1777 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
1778 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
1779 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
1780 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
1781 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
1782 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
1783 SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
1784
1785 extern uint64_t vm_pageout_freed_from_secluded;
1786 extern uint64_t vm_pageout_secluded_reactivated;
1787 extern uint64_t vm_pageout_secluded_burst_count;
1788 SYSCTL_QUAD(_vm, OID_AUTO, pageout_freed_from_secluded, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_secluded, "");
1789 SYSCTL_QUAD(_vm, OID_AUTO, pageout_secluded_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_secluded_reactivated, "Secluded pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1790 SYSCTL_QUAD(_vm, OID_AUTO, pageout_secluded_burst_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_secluded_burst_count, "");
1791
1792 #endif /* CONFIG_SECLUDED_MEMORY */
1793
1794 #include <kern/thread.h>
1795 #include <sys/user.h>
1796
1797 void vm_pageout_io_throttle(void);
1798
1799 void vm_pageout_io_throttle(void) {
1800 struct uthread *uthread = get_bsdthread_info(current_thread());
1801
1802 /*
1803 * thread is marked as a low priority I/O type
1804 * and the I/O we issued while in this cleaning operation
1805 * collided with normal I/O operations... we'll
1806 * delay in order to mitigate the impact of this
1807 * task on the normal operation of the system
1808 */
1809
1810 if (uthread->uu_lowpri_window) {
1811 throttle_lowpri_io(1);
1812 }
1813
1814 }
1815
1816 int
1817 vm_pressure_monitor(
1818 __unused struct proc *p,
1819 struct vm_pressure_monitor_args *uap,
1820 int *retval)
1821 {
1822 kern_return_t kr;
1823 uint32_t pages_reclaimed;
1824 uint32_t pages_wanted;
1825
1826 kr = mach_vm_pressure_monitor(
1827 (boolean_t) uap->wait_for_pressure,
1828 uap->nsecs_monitored,
1829 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1830 &pages_wanted);
1831
1832 switch (kr) {
1833 case KERN_SUCCESS:
1834 break;
1835 case KERN_ABORTED:
1836 return EINTR;
1837 default:
1838 return EINVAL;
1839 }
1840
1841 if (uap->pages_reclaimed) {
1842 if (copyout((void *)&pages_reclaimed,
1843 uap->pages_reclaimed,
1844 sizeof (pages_reclaimed)) != 0) {
1845 return EFAULT;
1846 }
1847 }
1848
1849 *retval = (int) pages_wanted;
1850 return 0;
1851 }
1852
1853 int
1854 kas_info(struct proc *p,
1855 struct kas_info_args *uap,
1856 int *retval __unused)
1857 {
1858 #ifdef SECURE_KERNEL
1859 (void)p;
1860 (void)uap;
1861 return ENOTSUP;
1862 #else /* !SECURE_KERNEL */
1863 int selector = uap->selector;
1864 user_addr_t valuep = uap->value;
1865 user_addr_t sizep = uap->size;
1866 user_size_t size;
1867 int error;
1868
1869 if (!kauth_cred_issuser(kauth_cred_get())) {
1870 return EPERM;
1871 }
1872
1873 #if CONFIG_MACF
1874 error = mac_system_check_kas_info(kauth_cred_get(), selector);
1875 if (error) {
1876 return error;
1877 }
1878 #endif
1879
1880 if (IS_64BIT_PROCESS(p)) {
1881 user64_size_t size64;
1882 error = copyin(sizep, &size64, sizeof(size64));
1883 size = (user_size_t)size64;
1884 } else {
1885 user32_size_t size32;
1886 error = copyin(sizep, &size32, sizeof(size32));
1887 size = (user_size_t)size32;
1888 }
1889 if (error) {
1890 return error;
1891 }
1892
1893 switch (selector) {
1894 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
1895 {
1896 uint64_t slide = vm_kernel_slide;
1897
1898 if (sizeof(slide) != size) {
1899 return EINVAL;
1900 }
1901
1902 if (IS_64BIT_PROCESS(p)) {
1903 user64_size_t size64 = (user64_size_t)size;
1904 error = copyout(&size64, sizep, sizeof(size64));
1905 } else {
1906 user32_size_t size32 = (user32_size_t)size;
1907 error = copyout(&size32, sizep, sizeof(size32));
1908 }
1909 if (error) {
1910 return error;
1911 }
1912
1913 error = copyout(&slide, valuep, sizeof(slide));
1914 if (error) {
1915 return error;
1916 }
1917 }
1918 break;
1919 default:
1920 return EINVAL;
1921 }
1922
1923 return 0;
1924 #endif /* !SECURE_KERNEL */
1925 }