]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <vm/vm_options.h>
44
45 #include <kern/task.h>
46 #include <kern/thread.h>
47 #include <kern/debug.h>
48 #include <kern/extmod_statistics.h>
49 #include <mach/mach_traps.h>
50 #include <mach/port.h>
51 #include <mach/task.h>
52 #include <mach/task_access.h>
53 #include <mach/task_special_ports.h>
54 #include <mach/time_value.h>
55 #include <mach/vm_map.h>
56 #include <mach/vm_param.h>
57 #include <mach/vm_prot.h>
58
59 #include <sys/file_internal.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/dir.h>
63 #include <sys/namei.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/vm.h>
67 #include <sys/file.h>
68 #include <sys/vnode_internal.h>
69 #include <sys/mount.h>
70 #include <sys/trace.h>
71 #include <sys/kernel.h>
72 #include <sys/ubc_internal.h>
73 #include <sys/user.h>
74 #include <sys/syslog.h>
75 #include <sys/stat.h>
76 #include <sys/sysproto.h>
77 #include <sys/mman.h>
78 #include <sys/sysctl.h>
79 #include <sys/cprotect.h>
80 #include <sys/kpi_socket.h>
81 #include <sys/kas_info.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84
85 #include <security/audit/audit.h>
86 #include <security/mac.h>
87 #include <bsm/audit_kevents.h>
88
89 #include <kern/kalloc.h>
90 #include <vm/vm_map.h>
91 #include <vm/vm_kern.h>
92 #include <vm/vm_pageout.h>
93
94 #include <machine/spl.h>
95
96 #include <mach/shared_region.h>
97 #include <vm/vm_shared_region.h>
98
99 #include <vm/vm_protos.h>
100
101 #include <sys/kern_memorystatus.h>
102
103
104 int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
105 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
106
107
108 #if DEVELOPMENT || DEBUG
109 extern int radar_20146450;
110 SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
111
112 extern int macho_printf;
113 SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
114
115 extern int apple_protect_pager_data_request_debug;
116 SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
117
118 #endif /* DEVELOPMENT || DEBUG */
119
120 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
121 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
122 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
123 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
124 SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
125 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
126 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
127 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
128 SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
129 #if VM_SCAN_FOR_SHADOW_CHAIN
130 static int vm_shadow_max_enabled = 0; /* Disabled by default */
131 extern int proc_shadow_max(void);
132 static int
133 vm_shadow_max SYSCTL_HANDLER_ARGS
134 {
135 #pragma unused(arg1, arg2, oidp)
136 int value = 0;
137
138 if (vm_shadow_max_enabled)
139 value = proc_shadow_max();
140
141 return SYSCTL_OUT(req, &value, sizeof(value));
142 }
143 SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED,
144 0, 0, &vm_shadow_max, "I", "");
145
146 SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
147
148 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
149
150 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
151
152 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
153 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid);
154 /*
155 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
156 */
157
158 #ifndef SECURE_KERNEL
159 extern int allow_stack_exec, allow_data_exec;
160
161 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
162 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
163
164 #endif /* !SECURE_KERNEL */
165
166 static const char *prot_values[] = {
167 "none",
168 "read-only",
169 "write-only",
170 "read-write",
171 "execute-only",
172 "read-execute",
173 "write-execute",
174 "read-write-execute"
175 };
176
177 void
178 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
179 {
180 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
181 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
182 }
183
184 int shared_region_unnest_logging = 1;
185
186 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
187 &shared_region_unnest_logging, 0, "");
188
189 int vm_shared_region_unnest_log_interval = 10;
190 int shared_region_unnest_log_count_threshold = 5;
191
192 /*
193 * Shared cache path enforcement.
194 */
195
196 static int scdir_enforce = 1;
197 static char scdir_path[] = "/var/db/dyld/";
198
199 #ifndef SECURE_KERNEL
200 SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, "");
201 #endif
202
203 /* These log rate throttling state variables aren't thread safe, but
204 * are sufficient unto the task.
205 */
206 static int64_t last_unnest_log_time = 0;
207 static int shared_region_unnest_log_count = 0;
208
209 void log_unnest_badness(
210 vm_map_t m,
211 vm_map_offset_t s,
212 vm_map_offset_t e) {
213 struct timeval tv;
214
215 if (shared_region_unnest_logging == 0)
216 return;
217
218 if (shared_region_unnest_logging == 1) {
219 microtime(&tv);
220 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
221 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
222 return;
223 }
224 else {
225 last_unnest_log_time = tv.tv_sec;
226 shared_region_unnest_log_count = 0;
227 }
228 }
229
230 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, current_proc()->p_pid, (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
231 }
232
233 int
234 useracc(
235 user_addr_t addr,
236 user_size_t len,
237 int prot)
238 {
239 vm_map_t map;
240
241 map = current_map();
242 return (vm_map_check_protection(
243 map,
244 vm_map_trunc_page(addr,
245 vm_map_page_mask(map)),
246 vm_map_round_page(addr+len,
247 vm_map_page_mask(map)),
248 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
249 }
250
251 int
252 vslock(
253 user_addr_t addr,
254 user_size_t len)
255 {
256 kern_return_t kret;
257 vm_map_t map;
258
259 map = current_map();
260 kret = vm_map_wire(map,
261 vm_map_trunc_page(addr,
262 vm_map_page_mask(map)),
263 vm_map_round_page(addr+len,
264 vm_map_page_mask(map)),
265 VM_PROT_READ | VM_PROT_WRITE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_BSD),
266 FALSE);
267
268 switch (kret) {
269 case KERN_SUCCESS:
270 return (0);
271 case KERN_INVALID_ADDRESS:
272 case KERN_NO_SPACE:
273 return (ENOMEM);
274 case KERN_PROTECTION_FAILURE:
275 return (EACCES);
276 default:
277 return (EINVAL);
278 }
279 }
280
281 int
282 vsunlock(
283 user_addr_t addr,
284 user_size_t len,
285 __unused int dirtied)
286 {
287 #if FIXME /* [ */
288 pmap_t pmap;
289 vm_page_t pg;
290 vm_map_offset_t vaddr;
291 ppnum_t paddr;
292 #endif /* FIXME ] */
293 kern_return_t kret;
294 vm_map_t map;
295
296 map = current_map();
297
298 #if FIXME /* [ */
299 if (dirtied) {
300 pmap = get_task_pmap(current_task());
301 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
302 vaddr < vm_map_round_page(addr+len, PAGE_MASK);
303 vaddr += PAGE_SIZE) {
304 paddr = pmap_extract(pmap, vaddr);
305 pg = PHYS_TO_VM_PAGE(paddr);
306 vm_page_set_modified(pg);
307 }
308 }
309 #endif /* FIXME ] */
310 #ifdef lint
311 dirtied++;
312 #endif /* lint */
313 kret = vm_map_unwire(map,
314 vm_map_trunc_page(addr,
315 vm_map_page_mask(map)),
316 vm_map_round_page(addr+len,
317 vm_map_page_mask(map)),
318 FALSE);
319 switch (kret) {
320 case KERN_SUCCESS:
321 return (0);
322 case KERN_INVALID_ADDRESS:
323 case KERN_NO_SPACE:
324 return (ENOMEM);
325 case KERN_PROTECTION_FAILURE:
326 return (EACCES);
327 default:
328 return (EINVAL);
329 }
330 }
331
332 int
333 subyte(
334 user_addr_t addr,
335 int byte)
336 {
337 char character;
338
339 character = (char)byte;
340 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
341 }
342
343 int
344 suibyte(
345 user_addr_t addr,
346 int byte)
347 {
348 char character;
349
350 character = (char)byte;
351 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
352 }
353
354 int fubyte(user_addr_t addr)
355 {
356 unsigned char byte;
357
358 if (copyin(addr, (void *) &byte, sizeof(char)))
359 return(-1);
360 return(byte);
361 }
362
363 int fuibyte(user_addr_t addr)
364 {
365 unsigned char byte;
366
367 if (copyin(addr, (void *) &(byte), sizeof(char)))
368 return(-1);
369 return(byte);
370 }
371
372 int
373 suword(
374 user_addr_t addr,
375 long word)
376 {
377 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
378 }
379
380 long fuword(user_addr_t addr)
381 {
382 long word = 0;
383
384 if (copyin(addr, (void *) &word, sizeof(int)))
385 return(-1);
386 return(word);
387 }
388
389 /* suiword and fuiword are the same as suword and fuword, respectively */
390
391 int
392 suiword(
393 user_addr_t addr,
394 long word)
395 {
396 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
397 }
398
399 long fuiword(user_addr_t addr)
400 {
401 long word = 0;
402
403 if (copyin(addr, (void *) &word, sizeof(int)))
404 return(-1);
405 return(word);
406 }
407
408 /*
409 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
410 * fetching and setting of process-sized size_t and pointer values.
411 */
412 int
413 sulong(user_addr_t addr, int64_t word)
414 {
415
416 if (IS_64BIT_PROCESS(current_proc())) {
417 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
418 } else {
419 return(suiword(addr, (long)word));
420 }
421 }
422
423 int64_t
424 fulong(user_addr_t addr)
425 {
426 int64_t longword;
427
428 if (IS_64BIT_PROCESS(current_proc())) {
429 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
430 return(-1);
431 return(longword);
432 } else {
433 return((int64_t)fuiword(addr));
434 }
435 }
436
437 int
438 suulong(user_addr_t addr, uint64_t uword)
439 {
440
441 if (IS_64BIT_PROCESS(current_proc())) {
442 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
443 } else {
444 return(suiword(addr, (uint32_t)uword));
445 }
446 }
447
448 uint64_t
449 fuulong(user_addr_t addr)
450 {
451 uint64_t ulongword;
452
453 if (IS_64BIT_PROCESS(current_proc())) {
454 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
455 return(-1ULL);
456 return(ulongword);
457 } else {
458 return((uint64_t)fuiword(addr));
459 }
460 }
461
462 int
463 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
464 {
465 return(ENOTSUP);
466 }
467
468 /*
469 * pid_for_task
470 *
471 * Find the BSD process ID for the Mach task associated with the given Mach port
472 * name
473 *
474 * Parameters: args User argument descriptor (see below)
475 *
476 * Indirect parameters: args->t Mach port name
477 * args->pid Process ID (returned value; see below)
478 *
479 * Returns: KERL_SUCCESS Success
480 * KERN_FAILURE Not success
481 *
482 * Implicit returns: args->pid Process ID
483 *
484 */
485 kern_return_t
486 pid_for_task(
487 struct pid_for_task_args *args)
488 {
489 mach_port_name_t t = args->t;
490 user_addr_t pid_addr = args->pid;
491 proc_t p;
492 task_t t1;
493 int pid = -1;
494 kern_return_t err = KERN_SUCCESS;
495
496 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
497 AUDIT_ARG(mach_port1, t);
498
499 t1 = port_name_to_task(t);
500
501 if (t1 == TASK_NULL) {
502 err = KERN_FAILURE;
503 goto pftout;
504 } else {
505 p = get_bsdtask_info(t1);
506 if (p) {
507 pid = proc_pid(p);
508 err = KERN_SUCCESS;
509 } else {
510 err = KERN_FAILURE;
511 }
512 }
513 task_deallocate(t1);
514 pftout:
515 AUDIT_ARG(pid, pid);
516 (void) copyout((char *) &pid, pid_addr, sizeof(int));
517 AUDIT_MACH_SYSCALL_EXIT(err);
518 return(err);
519 }
520
521 /*
522 *
523 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
524 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
525 *
526 */
527 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
528
529 /*
530 * Routine: task_for_pid_posix_check
531 * Purpose:
532 * Verify that the current process should be allowed to
533 * get the target process's task port. This is only
534 * permitted if:
535 * - The current process is root
536 * OR all of the following are true:
537 * - The target process's real, effective, and saved uids
538 * are the same as the current proc's euid,
539 * - The target process's group set is a subset of the
540 * calling process's group set, and
541 * - The target process hasn't switched credentials.
542 *
543 * Returns: TRUE: permitted
544 * FALSE: denied
545 */
546 static int
547 task_for_pid_posix_check(proc_t target)
548 {
549 kauth_cred_t targetcred, mycred;
550 uid_t myuid;
551 int allowed;
552
553 /* No task_for_pid on bad targets */
554 if (target->p_stat == SZOMB) {
555 return FALSE;
556 }
557
558 mycred = kauth_cred_get();
559 myuid = kauth_cred_getuid(mycred);
560
561 /* If we're running as root, the check passes */
562 if (kauth_cred_issuser(mycred))
563 return TRUE;
564
565 /* We're allowed to get our own task port */
566 if (target == current_proc())
567 return TRUE;
568
569 /*
570 * Under DENY, only root can get another proc's task port,
571 * so no more checks are needed.
572 */
573 if (tfp_policy == KERN_TFP_POLICY_DENY) {
574 return FALSE;
575 }
576
577 targetcred = kauth_cred_proc_ref(target);
578 allowed = TRUE;
579
580 /* Do target's ruid, euid, and saved uid match my euid? */
581 if ((kauth_cred_getuid(targetcred) != myuid) ||
582 (kauth_cred_getruid(targetcred) != myuid) ||
583 (kauth_cred_getsvuid(targetcred) != myuid)) {
584 allowed = FALSE;
585 goto out;
586 }
587
588 /* Are target's groups a subset of my groups? */
589 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
590 allowed == 0) {
591 allowed = FALSE;
592 goto out;
593 }
594
595 /* Has target switched credentials? */
596 if (target->p_flag & P_SUGID) {
597 allowed = FALSE;
598 goto out;
599 }
600
601 out:
602 kauth_cred_unref(&targetcred);
603 return allowed;
604 }
605
606 /*
607 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
608 *
609 * Description: Waits for the user space daemon to respond to the request
610 * we made. Function declared non inline to be visible in
611 * stackshots and spindumps as well as debugging.
612 */
613 __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
614 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid)
615 {
616 return check_task_access(task_access_port, calling_pid, calling_gid, target_pid);
617 }
618
619 /*
620 * Routine: task_for_pid
621 * Purpose:
622 * Get the task port for another "process", named by its
623 * process ID on the same host as "target_task".
624 *
625 * Only permitted to privileged processes, or processes
626 * with the same user ID.
627 *
628 * Note: if pid == 0, an error is return no matter who is calling.
629 *
630 * XXX This should be a BSD system call, not a Mach trap!!!
631 */
632 kern_return_t
633 task_for_pid(
634 struct task_for_pid_args *args)
635 {
636 mach_port_name_t target_tport = args->target_tport;
637 int pid = args->pid;
638 user_addr_t task_addr = args->t;
639 proc_t p = PROC_NULL;
640 task_t t1 = TASK_NULL;
641 mach_port_name_t tret = MACH_PORT_NULL;
642 ipc_port_t tfpport;
643 void * sright;
644 int error = 0;
645
646 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
647 AUDIT_ARG(pid, pid);
648 AUDIT_ARG(mach_port1, target_tport);
649
650 /* Always check if pid == 0 */
651 if (pid == 0) {
652 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
653 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
654 return(KERN_FAILURE);
655 }
656
657 t1 = port_name_to_task(target_tport);
658 if (t1 == TASK_NULL) {
659 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
660 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
661 return(KERN_FAILURE);
662 }
663
664
665 p = proc_find(pid);
666 if (p == PROC_NULL) {
667 error = KERN_FAILURE;
668 goto tfpout;
669 }
670
671 #if CONFIG_AUDIT
672 AUDIT_ARG(process, p);
673 #endif
674
675 if (!(task_for_pid_posix_check(p))) {
676 error = KERN_FAILURE;
677 goto tfpout;
678 }
679
680 if (p->task != TASK_NULL) {
681 /* If we aren't root and target's task access port is set... */
682 if (!kauth_cred_issuser(kauth_cred_get()) &&
683 p != current_proc() &&
684 (task_get_task_access_port(p->task, &tfpport) == 0) &&
685 (tfpport != IPC_PORT_NULL)) {
686
687 if (tfpport == IPC_PORT_DEAD) {
688 error = KERN_PROTECTION_FAILURE;
689 goto tfpout;
690 }
691
692 /* Call up to the task access server */
693 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
694
695 if (error != MACH_MSG_SUCCESS) {
696 if (error == MACH_RCV_INTERRUPTED)
697 error = KERN_ABORTED;
698 else
699 error = KERN_FAILURE;
700 goto tfpout;
701 }
702 }
703 #if CONFIG_MACF
704 error = mac_proc_check_get_task(kauth_cred_get(), p);
705 if (error) {
706 error = KERN_FAILURE;
707 goto tfpout;
708 }
709 #endif
710
711 /* Grant task port access */
712 task_reference(p->task);
713 extmod_statistics_incr_task_for_pid(p->task);
714
715 sright = (void *) convert_task_to_port(p->task);
716 tret = ipc_port_copyout_send(
717 sright,
718 get_task_ipcspace(current_task()));
719 }
720 error = KERN_SUCCESS;
721
722 tfpout:
723 task_deallocate(t1);
724 AUDIT_ARG(mach_port2, tret);
725 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
726 if (p != PROC_NULL)
727 proc_rele(p);
728 AUDIT_MACH_SYSCALL_EXIT(error);
729 return(error);
730 }
731
732 /*
733 * Routine: task_name_for_pid
734 * Purpose:
735 * Get the task name port for another "process", named by its
736 * process ID on the same host as "target_task".
737 *
738 * Only permitted to privileged processes, or processes
739 * with the same user ID.
740 *
741 * XXX This should be a BSD system call, not a Mach trap!!!
742 */
743
744 kern_return_t
745 task_name_for_pid(
746 struct task_name_for_pid_args *args)
747 {
748 mach_port_name_t target_tport = args->target_tport;
749 int pid = args->pid;
750 user_addr_t task_addr = args->t;
751 proc_t p = PROC_NULL;
752 task_t t1;
753 mach_port_name_t tret;
754 void * sright;
755 int error = 0, refheld = 0;
756 kauth_cred_t target_cred;
757
758 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
759 AUDIT_ARG(pid, pid);
760 AUDIT_ARG(mach_port1, target_tport);
761
762 t1 = port_name_to_task(target_tport);
763 if (t1 == TASK_NULL) {
764 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
765 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
766 return(KERN_FAILURE);
767 }
768
769 p = proc_find(pid);
770 if (p != PROC_NULL) {
771 AUDIT_ARG(process, p);
772 target_cred = kauth_cred_proc_ref(p);
773 refheld = 1;
774
775 if ((p->p_stat != SZOMB)
776 && ((current_proc() == p)
777 || kauth_cred_issuser(kauth_cred_get())
778 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
779 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
780
781 if (p->task != TASK_NULL) {
782 task_reference(p->task);
783 #if CONFIG_MACF
784 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
785 if (error) {
786 task_deallocate(p->task);
787 goto noperm;
788 }
789 #endif
790 sright = (void *)convert_task_name_to_port(p->task);
791 tret = ipc_port_copyout_send(sright,
792 get_task_ipcspace(current_task()));
793 } else
794 tret = MACH_PORT_NULL;
795
796 AUDIT_ARG(mach_port2, tret);
797 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
798 task_deallocate(t1);
799 error = KERN_SUCCESS;
800 goto tnfpout;
801 }
802 }
803
804 #if CONFIG_MACF
805 noperm:
806 #endif
807 task_deallocate(t1);
808 tret = MACH_PORT_NULL;
809 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
810 error = KERN_FAILURE;
811 tnfpout:
812 if (refheld != 0)
813 kauth_cred_unref(&target_cred);
814 if (p != PROC_NULL)
815 proc_rele(p);
816 AUDIT_MACH_SYSCALL_EXIT(error);
817 return(error);
818 }
819
820 kern_return_t
821 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
822 {
823 task_t target = NULL;
824 proc_t targetproc = PROC_NULL;
825 int pid = args->pid;
826 int error = 0;
827
828 #if CONFIG_MACF
829 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
830 if (error) {
831 error = EPERM;
832 goto out;
833 }
834 #endif
835
836 if (pid == 0) {
837 error = EPERM;
838 goto out;
839 }
840
841 targetproc = proc_find(pid);
842 if (targetproc == PROC_NULL) {
843 error = ESRCH;
844 goto out;
845 }
846
847 if (!task_for_pid_posix_check(targetproc)) {
848 error = EPERM;
849 goto out;
850 }
851
852 target = targetproc->task;
853 if (target != TASK_NULL) {
854 mach_port_t tfpport;
855
856 /* If we aren't root and target's task access port is set... */
857 if (!kauth_cred_issuser(kauth_cred_get()) &&
858 targetproc != current_proc() &&
859 (task_get_task_access_port(target, &tfpport) == 0) &&
860 (tfpport != IPC_PORT_NULL)) {
861
862 if (tfpport == IPC_PORT_DEAD) {
863 error = EACCES;
864 goto out;
865 }
866
867 /* Call up to the task access server */
868 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
869
870 if (error != MACH_MSG_SUCCESS) {
871 if (error == MACH_RCV_INTERRUPTED)
872 error = EINTR;
873 else
874 error = EPERM;
875 goto out;
876 }
877 }
878 }
879
880 task_reference(target);
881 error = task_pidsuspend(target);
882 if (error) {
883 if (error == KERN_INVALID_ARGUMENT) {
884 error = EINVAL;
885 } else {
886 error = EPERM;
887 }
888 }
889 #if CONFIG_MEMORYSTATUS
890 else {
891 memorystatus_on_suspend(targetproc);
892 }
893 #endif
894
895 task_deallocate(target);
896
897 out:
898 if (targetproc != PROC_NULL)
899 proc_rele(targetproc);
900 *ret = error;
901 return error;
902 }
903
904 kern_return_t
905 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
906 {
907 task_t target = NULL;
908 proc_t targetproc = PROC_NULL;
909 int pid = args->pid;
910 int error = 0;
911
912 #if CONFIG_MACF
913 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
914 if (error) {
915 error = EPERM;
916 goto out;
917 }
918 #endif
919
920 if (pid == 0) {
921 error = EPERM;
922 goto out;
923 }
924
925 targetproc = proc_find(pid);
926 if (targetproc == PROC_NULL) {
927 error = ESRCH;
928 goto out;
929 }
930
931 if (!task_for_pid_posix_check(targetproc)) {
932 error = EPERM;
933 goto out;
934 }
935
936 target = targetproc->task;
937 if (target != TASK_NULL) {
938 mach_port_t tfpport;
939
940 /* If we aren't root and target's task access port is set... */
941 if (!kauth_cred_issuser(kauth_cred_get()) &&
942 targetproc != current_proc() &&
943 (task_get_task_access_port(target, &tfpport) == 0) &&
944 (tfpport != IPC_PORT_NULL)) {
945
946 if (tfpport == IPC_PORT_DEAD) {
947 error = EACCES;
948 goto out;
949 }
950
951 /* Call up to the task access server */
952 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
953
954 if (error != MACH_MSG_SUCCESS) {
955 if (error == MACH_RCV_INTERRUPTED)
956 error = EINTR;
957 else
958 error = EPERM;
959 goto out;
960 }
961 }
962 }
963
964
965 task_reference(target);
966
967 #if CONFIG_MEMORYSTATUS
968 memorystatus_on_resume(targetproc);
969 #endif
970
971 error = task_pidresume(target);
972 if (error) {
973 if (error == KERN_INVALID_ARGUMENT) {
974 error = EINVAL;
975 } else {
976 if (error == KERN_MEMORY_ERROR) {
977 psignal(targetproc, SIGKILL);
978 error = EIO;
979 } else
980 error = EPERM;
981 }
982 }
983
984 task_deallocate(target);
985
986 out:
987 if (targetproc != PROC_NULL)
988 proc_rele(targetproc);
989
990 *ret = error;
991 return error;
992 }
993
994
995 static int
996 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
997 __unused int arg2, struct sysctl_req *req)
998 {
999 int error = 0;
1000 int new_value;
1001
1002 error = SYSCTL_OUT(req, arg1, sizeof(int));
1003 if (error || req->newptr == USER_ADDR_NULL)
1004 return(error);
1005
1006 if (!kauth_cred_issuser(kauth_cred_get()))
1007 return(EPERM);
1008
1009 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1010 goto out;
1011 }
1012 if ((new_value == KERN_TFP_POLICY_DENY)
1013 || (new_value == KERN_TFP_POLICY_DEFAULT))
1014 tfp_policy = new_value;
1015 else
1016 error = EINVAL;
1017 out:
1018 return(error);
1019
1020 }
1021
1022 #if defined(SECURE_KERNEL)
1023 static int kern_secure_kernel = 1;
1024 #else
1025 static int kern_secure_kernel = 0;
1026 #endif
1027
1028 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1029
1030 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1031 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1032 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
1033
1034 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1035 &shared_region_trace_level, 0, "");
1036 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1037 &shared_region_version, 0, "");
1038 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1039 &shared_region_persistence, 0, "");
1040
1041 /*
1042 * shared_region_check_np:
1043 *
1044 * This system call is intended for dyld.
1045 *
1046 * dyld calls this when any process starts to see if the process's shared
1047 * region is already set up and ready to use.
1048 * This call returns the base address of the first mapping in the
1049 * process's shared region's first mapping.
1050 * dyld will then check what's mapped at that address.
1051 *
1052 * If the shared region is empty, dyld will then attempt to map the shared
1053 * cache file in the shared region via the shared_region_map_np() system call.
1054 *
1055 * If something's already mapped in the shared region, dyld will check if it
1056 * matches the shared cache it would like to use for that process.
1057 * If it matches, evrything's ready and the process can proceed and use the
1058 * shared region.
1059 * If it doesn't match, dyld will unmap the shared region and map the shared
1060 * cache into the process's address space via mmap().
1061 *
1062 * ERROR VALUES
1063 * EINVAL no shared region
1064 * ENOMEM shared region is empty
1065 * EFAULT bad address for "start_address"
1066 */
1067 int
1068 shared_region_check_np(
1069 __unused struct proc *p,
1070 struct shared_region_check_np_args *uap,
1071 __unused int *retvalp)
1072 {
1073 vm_shared_region_t shared_region;
1074 mach_vm_offset_t start_address = 0;
1075 int error;
1076 kern_return_t kr;
1077
1078 SHARED_REGION_TRACE_DEBUG(
1079 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1080 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1081 p->p_pid, p->p_comm,
1082 (uint64_t)uap->start_address));
1083
1084 /* retrieve the current tasks's shared region */
1085 shared_region = vm_shared_region_get(current_task());
1086 if (shared_region != NULL) {
1087 /* retrieve address of its first mapping... */
1088 kr = vm_shared_region_start_address(shared_region,
1089 &start_address);
1090 if (kr != KERN_SUCCESS) {
1091 error = ENOMEM;
1092 } else {
1093 /* ... and give it to the caller */
1094 error = copyout(&start_address,
1095 (user_addr_t) uap->start_address,
1096 sizeof (start_address));
1097 if (error) {
1098 SHARED_REGION_TRACE_ERROR(
1099 ("shared_region: %p [%d(%s)] "
1100 "check_np(0x%llx) "
1101 "copyout(0x%llx) error %d\n",
1102 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1103 p->p_pid, p->p_comm,
1104 (uint64_t)uap->start_address, (uint64_t)start_address,
1105 error));
1106 }
1107 }
1108 vm_shared_region_deallocate(shared_region);
1109 } else {
1110 /* no shared region ! */
1111 error = EINVAL;
1112 }
1113
1114 SHARED_REGION_TRACE_DEBUG(
1115 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1116 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1117 p->p_pid, p->p_comm,
1118 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1119
1120 return error;
1121 }
1122
1123
1124 int
1125 shared_region_copyin_mappings(
1126 struct proc *p,
1127 user_addr_t user_mappings,
1128 unsigned int mappings_count,
1129 struct shared_file_mapping_np *mappings)
1130 {
1131 int error = 0;
1132 vm_size_t mappings_size = 0;
1133
1134 /* get the list of mappings the caller wants us to establish */
1135 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1136 error = copyin(user_mappings,
1137 mappings,
1138 mappings_size);
1139 if (error) {
1140 SHARED_REGION_TRACE_ERROR(
1141 ("shared_region: %p [%d(%s)] map(): "
1142 "copyin(0x%llx, %d) failed (error=%d)\n",
1143 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1144 p->p_pid, p->p_comm,
1145 (uint64_t)user_mappings, mappings_count, error));
1146 }
1147 return error;
1148 }
1149 /*
1150 * shared_region_map_np()
1151 *
1152 * This system call is intended for dyld.
1153 *
1154 * dyld uses this to map a shared cache file into a shared region.
1155 * This is usually done only the first time a shared cache is needed.
1156 * Subsequent processes will just use the populated shared region without
1157 * requiring any further setup.
1158 */
1159 int
1160 _shared_region_map_and_slide(
1161 struct proc *p,
1162 int fd,
1163 uint32_t mappings_count,
1164 struct shared_file_mapping_np *mappings,
1165 uint32_t slide,
1166 user_addr_t slide_start,
1167 user_addr_t slide_size)
1168 {
1169 int error;
1170 kern_return_t kr;
1171 struct fileproc *fp;
1172 struct vnode *vp, *root_vp, *scdir_vp;
1173 struct vnode_attr va;
1174 off_t fs;
1175 memory_object_size_t file_size;
1176 #if CONFIG_MACF
1177 vm_prot_t maxprot = VM_PROT_ALL;
1178 #endif
1179 memory_object_control_t file_control;
1180 struct vm_shared_region *shared_region;
1181
1182 SHARED_REGION_TRACE_DEBUG(
1183 ("shared_region: %p [%d(%s)] -> map\n",
1184 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1185 p->p_pid, p->p_comm));
1186
1187 shared_region = NULL;
1188 fp = NULL;
1189 vp = NULL;
1190 scdir_vp = NULL;
1191
1192 /* get file structure from file descriptor */
1193 error = fp_lookup(p, fd, &fp, 0);
1194 if (error) {
1195 SHARED_REGION_TRACE_ERROR(
1196 ("shared_region: %p [%d(%s)] map: "
1197 "fd=%d lookup failed (error=%d)\n",
1198 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1199 p->p_pid, p->p_comm, fd, error));
1200 goto done;
1201 }
1202
1203 /* make sure we're attempting to map a vnode */
1204 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
1205 SHARED_REGION_TRACE_ERROR(
1206 ("shared_region: %p [%d(%s)] map: "
1207 "fd=%d not a vnode (type=%d)\n",
1208 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1209 p->p_pid, p->p_comm,
1210 fd, FILEGLOB_DTYPE(fp->f_fglob)));
1211 error = EINVAL;
1212 goto done;
1213 }
1214
1215 /* we need at least read permission on the file */
1216 if (! (fp->f_fglob->fg_flag & FREAD)) {
1217 SHARED_REGION_TRACE_ERROR(
1218 ("shared_region: %p [%d(%s)] map: "
1219 "fd=%d not readable\n",
1220 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1221 p->p_pid, p->p_comm, fd));
1222 error = EPERM;
1223 goto done;
1224 }
1225
1226 /* get vnode from file structure */
1227 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1228 if (error) {
1229 SHARED_REGION_TRACE_ERROR(
1230 ("shared_region: %p [%d(%s)] map: "
1231 "fd=%d getwithref failed (error=%d)\n",
1232 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1233 p->p_pid, p->p_comm, fd, error));
1234 goto done;
1235 }
1236 vp = (struct vnode *) fp->f_fglob->fg_data;
1237
1238 /* make sure the vnode is a regular file */
1239 if (vp->v_type != VREG) {
1240 SHARED_REGION_TRACE_ERROR(
1241 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1242 "not a file (type=%d)\n",
1243 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1244 p->p_pid, p->p_comm,
1245 (void *)VM_KERNEL_ADDRPERM(vp),
1246 vp->v_name, vp->v_type));
1247 error = EINVAL;
1248 goto done;
1249 }
1250
1251 #if CONFIG_MACF
1252 /* pass in 0 for the offset argument because AMFI does not need the offset
1253 of the shared cache */
1254 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1255 fp->f_fglob, VM_PROT_ALL, MAP_FILE, 0, &maxprot);
1256 if (error) {
1257 goto done;
1258 }
1259 #endif /* MAC */
1260
1261 #if CONFIG_PROTECT
1262 /* check for content protection access */
1263 {
1264 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
1265 if (error) {
1266 goto done;
1267 }
1268 }
1269 #endif /* CONFIG_PROTECT */
1270
1271 /* make sure vnode is on the process's root volume */
1272 root_vp = p->p_fd->fd_rdir;
1273 if (root_vp == NULL) {
1274 root_vp = rootvnode;
1275 } else {
1276 /*
1277 * Chroot-ed processes can't use the shared_region.
1278 */
1279 error = EINVAL;
1280 goto done;
1281 }
1282
1283 if (vp->v_mount != root_vp->v_mount) {
1284 SHARED_REGION_TRACE_ERROR(
1285 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1286 "not on process's root volume\n",
1287 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1288 p->p_pid, p->p_comm,
1289 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1290 error = EPERM;
1291 goto done;
1292 }
1293
1294 /* make sure vnode is owned by "root" */
1295 VATTR_INIT(&va);
1296 VATTR_WANTED(&va, va_uid);
1297 error = vnode_getattr(vp, &va, vfs_context_current());
1298 if (error) {
1299 SHARED_REGION_TRACE_ERROR(
1300 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1301 "vnode_getattr(%p) failed (error=%d)\n",
1302 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1303 p->p_pid, p->p_comm,
1304 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1305 (void *)VM_KERNEL_ADDRPERM(vp), error));
1306 goto done;
1307 }
1308 if (va.va_uid != 0) {
1309 SHARED_REGION_TRACE_ERROR(
1310 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1311 "owned by uid=%d instead of 0\n",
1312 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1313 p->p_pid, p->p_comm,
1314 (void *)VM_KERNEL_ADDRPERM(vp),
1315 vp->v_name, va.va_uid));
1316 error = EPERM;
1317 goto done;
1318 }
1319
1320 if (scdir_enforce) {
1321 /* get vnode for scdir_path */
1322 error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current());
1323 if (error) {
1324 SHARED_REGION_TRACE_ERROR(
1325 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1326 "vnode_lookup(%s) failed (error=%d)\n",
1327 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1328 p->p_pid, p->p_comm,
1329 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1330 scdir_path, error));
1331 goto done;
1332 }
1333
1334 /* ensure parent is scdir_vp */
1335 if (vnode_parent(vp) != scdir_vp) {
1336 SHARED_REGION_TRACE_ERROR(
1337 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1338 "shared cache file not in %s\n",
1339 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1340 p->p_pid, p->p_comm,
1341 (void *)VM_KERNEL_ADDRPERM(vp),
1342 vp->v_name, scdir_path));
1343 error = EPERM;
1344 goto done;
1345 }
1346 }
1347
1348 /* get vnode size */
1349 error = vnode_size(vp, &fs, vfs_context_current());
1350 if (error) {
1351 SHARED_REGION_TRACE_ERROR(
1352 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1353 "vnode_size(%p) failed (error=%d)\n",
1354 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1355 p->p_pid, p->p_comm,
1356 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1357 (void *)VM_KERNEL_ADDRPERM(vp), error));
1358 goto done;
1359 }
1360 file_size = fs;
1361
1362 /* get the file's memory object handle */
1363 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1364 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1365 SHARED_REGION_TRACE_ERROR(
1366 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1367 "no memory object\n",
1368 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1369 p->p_pid, p->p_comm,
1370 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1371 error = EINVAL;
1372 goto done;
1373 }
1374
1375
1376 /* get the process's shared region (setup in vm_map_exec()) */
1377 shared_region = vm_shared_region_get(current_task());
1378 if (shared_region == NULL) {
1379 SHARED_REGION_TRACE_ERROR(
1380 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1381 "no shared region\n",
1382 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1383 p->p_pid, p->p_comm,
1384 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1385 goto done;
1386 }
1387
1388 /* map the file into that shared region's submap */
1389 kr = vm_shared_region_map_file(shared_region,
1390 mappings_count,
1391 mappings,
1392 file_control,
1393 file_size,
1394 (void *) p->p_fd->fd_rdir,
1395 slide,
1396 slide_start,
1397 slide_size);
1398 if (kr != KERN_SUCCESS) {
1399 SHARED_REGION_TRACE_ERROR(
1400 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1401 "vm_shared_region_map_file() failed kr=0x%x\n",
1402 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1403 p->p_pid, p->p_comm,
1404 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr));
1405 switch (kr) {
1406 case KERN_INVALID_ADDRESS:
1407 error = EFAULT;
1408 break;
1409 case KERN_PROTECTION_FAILURE:
1410 error = EPERM;
1411 break;
1412 case KERN_NO_SPACE:
1413 error = ENOMEM;
1414 break;
1415 case KERN_FAILURE:
1416 case KERN_INVALID_ARGUMENT:
1417 default:
1418 error = EINVAL;
1419 break;
1420 }
1421 goto done;
1422 }
1423
1424 error = 0;
1425
1426 vnode_lock_spin(vp);
1427
1428 vp->v_flag |= VSHARED_DYLD;
1429
1430 vnode_unlock(vp);
1431
1432 /* update the vnode's access time */
1433 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1434 VATTR_INIT(&va);
1435 nanotime(&va.va_access_time);
1436 VATTR_SET_ACTIVE(&va, va_access_time);
1437 vnode_setattr(vp, &va, vfs_context_current());
1438 }
1439
1440 if (p->p_flag & P_NOSHLIB) {
1441 /* signal that this process is now using split libraries */
1442 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1443 }
1444
1445 done:
1446 if (vp != NULL) {
1447 /*
1448 * release the vnode...
1449 * ubc_map() still holds it for us in the non-error case
1450 */
1451 (void) vnode_put(vp);
1452 vp = NULL;
1453 }
1454 if (fp != NULL) {
1455 /* release the file descriptor */
1456 fp_drop(p, fd, fp, 0);
1457 fp = NULL;
1458 }
1459 if (scdir_vp != NULL) {
1460 (void)vnode_put(scdir_vp);
1461 scdir_vp = NULL;
1462 }
1463
1464 if (shared_region != NULL) {
1465 vm_shared_region_deallocate(shared_region);
1466 }
1467
1468 SHARED_REGION_TRACE_DEBUG(
1469 ("shared_region: %p [%d(%s)] <- map\n",
1470 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1471 p->p_pid, p->p_comm));
1472
1473 return error;
1474 }
1475
1476 int
1477 shared_region_map_and_slide_np(
1478 struct proc *p,
1479 struct shared_region_map_and_slide_np_args *uap,
1480 __unused int *retvalp)
1481 {
1482 struct shared_file_mapping_np *mappings;
1483 unsigned int mappings_count = uap->count;
1484 kern_return_t kr = KERN_SUCCESS;
1485 uint32_t slide = uap->slide;
1486
1487 #define SFM_MAX_STACK 8
1488 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
1489
1490 /* Is the process chrooted?? */
1491 if (p->p_fd->fd_rdir != NULL) {
1492 kr = EINVAL;
1493 goto done;
1494 }
1495
1496 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1497 if (kr == KERN_INVALID_ARGUMENT) {
1498 /*
1499 * This will happen if we request sliding again
1500 * with the same slide value that was used earlier
1501 * for the very first sliding.
1502 */
1503 kr = KERN_SUCCESS;
1504 }
1505 goto done;
1506 }
1507
1508 if (mappings_count == 0) {
1509 SHARED_REGION_TRACE_INFO(
1510 ("shared_region: %p [%d(%s)] map(): "
1511 "no mappings\n",
1512 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1513 p->p_pid, p->p_comm));
1514 kr = 0; /* no mappings: we're done ! */
1515 goto done;
1516 } else if (mappings_count <= SFM_MAX_STACK) {
1517 mappings = &stack_mappings[0];
1518 } else {
1519 SHARED_REGION_TRACE_ERROR(
1520 ("shared_region: %p [%d(%s)] map(): "
1521 "too many mappings (%d)\n",
1522 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1523 p->p_pid, p->p_comm,
1524 mappings_count));
1525 kr = KERN_FAILURE;
1526 goto done;
1527 }
1528
1529 if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1530 goto done;
1531 }
1532
1533
1534 kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings,
1535 slide,
1536 uap->slide_start, uap->slide_size);
1537 if (kr != KERN_SUCCESS) {
1538 return kr;
1539 }
1540
1541 done:
1542 return kr;
1543 }
1544
1545 /* sysctl overflow room */
1546
1547 SYSCTL_INT (_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
1548 (int *) &page_size, 0, "vm page size");
1549
1550 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1551 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1552 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1553 extern unsigned int vm_page_free_target;
1554 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1555 &vm_page_free_target, 0, "Pageout daemon free target");
1556
1557 extern unsigned int vm_memory_pressure;
1558 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1559 &vm_memory_pressure, 0, "Memory pressure indicator");
1560
1561 static int
1562 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1563 {
1564 #pragma unused(oidp, arg1, arg2)
1565 unsigned int page_free_wanted;
1566
1567 page_free_wanted = mach_vm_ctl_page_free_wanted();
1568 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1569 }
1570 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1571 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1572 0, 0, vm_ctl_page_free_wanted, "I", "");
1573
1574 extern unsigned int vm_page_purgeable_count;
1575 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1576 &vm_page_purgeable_count, 0, "Purgeable page count");
1577
1578 extern unsigned int vm_page_purgeable_wired_count;
1579 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1580 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1581
1582 extern unsigned int vm_pageout_purged_objects;
1583 SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
1584 &vm_pageout_purged_objects, 0, "System purged object count");
1585
1586 extern int madvise_free_debug;
1587 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1588 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
1589
1590 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1591 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1592 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1593 &vm_page_stats_reusable.reusable_pages_success, "");
1594 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1595 &vm_page_stats_reusable.reusable_pages_failure, "");
1596 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1597 &vm_page_stats_reusable.reusable_pages_shared, "");
1598 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1599 &vm_page_stats_reusable.all_reusable_calls, "");
1600 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1601 &vm_page_stats_reusable.partial_reusable_calls, "");
1602 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1603 &vm_page_stats_reusable.reuse_pages_success, "");
1604 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1605 &vm_page_stats_reusable.reuse_pages_failure, "");
1606 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1607 &vm_page_stats_reusable.all_reuse_calls, "");
1608 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1609 &vm_page_stats_reusable.partial_reuse_calls, "");
1610 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1611 &vm_page_stats_reusable.can_reuse_success, "");
1612 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1613 &vm_page_stats_reusable.can_reuse_failure, "");
1614 SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
1615 &vm_page_stats_reusable.reusable_reclaimed, "");
1616
1617
1618 extern unsigned int vm_page_free_count, vm_page_speculative_count;
1619 SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
1620 SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
1621
1622 extern unsigned int vm_page_cleaned_count;
1623 SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
1624
1625 /* pageout counts */
1626 extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
1627 extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
1628 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
1629 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
1630 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
1631 SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
1632 SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
1633 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
1634 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
1635
1636 extern unsigned int vm_pageout_freed_from_cleaned;
1637 SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
1638
1639 /* counts of pages entering the cleaned queue */
1640 extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
1641 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
1642 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
1643 SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
1644
1645 /* counts of pages leaving the cleaned queue */
1646 extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
1647 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
1648 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1649 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
1650 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
1651 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
1652 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
1653 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
1654 SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
1655
1656 /* counts of pages prefaulted when entering a memory object */
1657 extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
1658 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
1659 SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
1660
1661 #include <kern/thread.h>
1662 #include <sys/user.h>
1663
1664 void vm_pageout_io_throttle(void);
1665
1666 void vm_pageout_io_throttle(void) {
1667 struct uthread *uthread = get_bsdthread_info(current_thread());
1668
1669 /*
1670 * thread is marked as a low priority I/O type
1671 * and the I/O we issued while in this cleaning operation
1672 * collided with normal I/O operations... we'll
1673 * delay in order to mitigate the impact of this
1674 * task on the normal operation of the system
1675 */
1676
1677 if (uthread->uu_lowpri_window) {
1678 throttle_lowpri_io(1);
1679 }
1680
1681 }
1682
1683 int
1684 vm_pressure_monitor(
1685 __unused struct proc *p,
1686 struct vm_pressure_monitor_args *uap,
1687 int *retval)
1688 {
1689 kern_return_t kr;
1690 uint32_t pages_reclaimed;
1691 uint32_t pages_wanted;
1692
1693 kr = mach_vm_pressure_monitor(
1694 (boolean_t) uap->wait_for_pressure,
1695 uap->nsecs_monitored,
1696 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1697 &pages_wanted);
1698
1699 switch (kr) {
1700 case KERN_SUCCESS:
1701 break;
1702 case KERN_ABORTED:
1703 return EINTR;
1704 default:
1705 return EINVAL;
1706 }
1707
1708 if (uap->pages_reclaimed) {
1709 if (copyout((void *)&pages_reclaimed,
1710 uap->pages_reclaimed,
1711 sizeof (pages_reclaimed)) != 0) {
1712 return EFAULT;
1713 }
1714 }
1715
1716 *retval = (int) pages_wanted;
1717 return 0;
1718 }
1719
1720 int
1721 kas_info(struct proc *p,
1722 struct kas_info_args *uap,
1723 int *retval __unused)
1724 {
1725 #ifdef SECURE_KERNEL
1726 (void)p;
1727 (void)uap;
1728 return ENOTSUP;
1729 #else /* !SECURE_KERNEL */
1730 int selector = uap->selector;
1731 user_addr_t valuep = uap->value;
1732 user_addr_t sizep = uap->size;
1733 user_size_t size;
1734 int error;
1735
1736 if (!kauth_cred_issuser(kauth_cred_get())) {
1737 return EPERM;
1738 }
1739
1740 #if CONFIG_MACF
1741 error = mac_system_check_kas_info(kauth_cred_get(), selector);
1742 if (error) {
1743 return error;
1744 }
1745 #endif
1746
1747 if (IS_64BIT_PROCESS(p)) {
1748 user64_size_t size64;
1749 error = copyin(sizep, &size64, sizeof(size64));
1750 size = (user_size_t)size64;
1751 } else {
1752 user32_size_t size32;
1753 error = copyin(sizep, &size32, sizeof(size32));
1754 size = (user_size_t)size32;
1755 }
1756 if (error) {
1757 return error;
1758 }
1759
1760 switch (selector) {
1761 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
1762 {
1763 uint64_t slide = vm_kernel_slide;
1764
1765 if (sizeof(slide) != size) {
1766 return EINVAL;
1767 }
1768
1769 if (IS_64BIT_PROCESS(p)) {
1770 user64_size_t size64 = (user64_size_t)size;
1771 error = copyout(&size64, sizep, sizeof(size64));
1772 } else {
1773 user32_size_t size32 = (user32_size_t)size;
1774 error = copyout(&size32, sizep, sizeof(size32));
1775 }
1776 if (error) {
1777 return error;
1778 }
1779
1780 error = copyout(&slide, valuep, sizeof(slide));
1781 if (error) {
1782 return error;
1783 }
1784 }
1785 break;
1786 default:
1787 return EINVAL;
1788 }
1789
1790 return 0;
1791 #endif /* !SECURE_KERNEL */
1792 }