]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
54e6d30c6f1ea5c7c5358da6a38d1f661a94550d
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56
57 #include <sys/file_internal.h>
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/dir.h>
61 #include <sys/namei.h>
62 #include <sys/proc_internal.h>
63 #include <sys/kauth.h>
64 #include <sys/vm.h>
65 #include <sys/file.h>
66 #include <sys/vnode_internal.h>
67 #include <sys/mount.h>
68 #include <sys/trace.h>
69 #include <sys/kernel.h>
70 #include <sys/ubc_internal.h>
71 #include <sys/user.h>
72 #include <sys/syslog.h>
73 #include <sys/stat.h>
74 #include <sys/sysproto.h>
75 #include <sys/mman.h>
76 #include <sys/sysctl.h>
77
78 #include <bsm/audit_kernel.h>
79 #include <bsm/audit_kevents.h>
80
81 #include <kern/kalloc.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_kern.h>
84
85 #include <machine/spl.h>
86
87 #include <mach/shared_region.h>
88 #include <vm/vm_shared_region.h>
89
90 #include <vm/vm_protos.h>
91
92 /*
93 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
94 */
95
96 #ifndef SECURE_KERNEL
97 extern int allow_stack_exec, allow_data_exec;
98
99 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
100 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
101 #endif /* !SECURE_KERNEL */
102
103 #if CONFIG_NO_PRINTF_STRINGS
104 void
105 log_stack_execution_failure(__unused addr64_t a, __unused vm_prot_t b)
106 {
107 }
108 #else
109 static const char *prot_values[] = {
110 "none",
111 "read-only",
112 "write-only",
113 "read-write",
114 "execute-only",
115 "read-execute",
116 "write-execute",
117 "read-write-execute"
118 };
119
120 void
121 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
122 {
123 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
124 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
125 }
126 #endif
127
128
129 int
130 useracc(
131 user_addr_t addr,
132 user_size_t len,
133 int prot)
134 {
135 return (vm_map_check_protection(
136 current_map(),
137 vm_map_trunc_page(addr), vm_map_round_page(addr+len),
138 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
139 }
140
141 int
142 vslock(
143 user_addr_t addr,
144 user_size_t len)
145 {
146 kern_return_t kret;
147 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
148 vm_map_round_page(addr+len),
149 VM_PROT_READ | VM_PROT_WRITE ,FALSE);
150
151 switch (kret) {
152 case KERN_SUCCESS:
153 return (0);
154 case KERN_INVALID_ADDRESS:
155 case KERN_NO_SPACE:
156 return (ENOMEM);
157 case KERN_PROTECTION_FAILURE:
158 return (EACCES);
159 default:
160 return (EINVAL);
161 }
162 }
163
164 int
165 vsunlock(
166 user_addr_t addr,
167 user_size_t len,
168 __unused int dirtied)
169 {
170 #if FIXME /* [ */
171 pmap_t pmap;
172 vm_page_t pg;
173 vm_map_offset_t vaddr;
174 ppnum_t paddr;
175 #endif /* FIXME ] */
176 kern_return_t kret;
177
178 #if FIXME /* [ */
179 if (dirtied) {
180 pmap = get_task_pmap(current_task());
181 for (vaddr = vm_map_trunc_page(addr);
182 vaddr < vm_map_round_page(addr+len);
183 vaddr += PAGE_SIZE) {
184 paddr = pmap_extract(pmap, vaddr);
185 pg = PHYS_TO_VM_PAGE(paddr);
186 vm_page_set_modified(pg);
187 }
188 }
189 #endif /* FIXME ] */
190 #ifdef lint
191 dirtied++;
192 #endif /* lint */
193 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
194 vm_map_round_page(addr+len), FALSE);
195 switch (kret) {
196 case KERN_SUCCESS:
197 return (0);
198 case KERN_INVALID_ADDRESS:
199 case KERN_NO_SPACE:
200 return (ENOMEM);
201 case KERN_PROTECTION_FAILURE:
202 return (EACCES);
203 default:
204 return (EINVAL);
205 }
206 }
207
208 int
209 subyte(
210 user_addr_t addr,
211 int byte)
212 {
213 char character;
214
215 character = (char)byte;
216 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
217 }
218
219 int
220 suibyte(
221 user_addr_t addr,
222 int byte)
223 {
224 char character;
225
226 character = (char)byte;
227 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
228 }
229
230 int fubyte(user_addr_t addr)
231 {
232 unsigned char byte;
233
234 if (copyin(addr, (void *) &byte, sizeof(char)))
235 return(-1);
236 return(byte);
237 }
238
239 int fuibyte(user_addr_t addr)
240 {
241 unsigned char byte;
242
243 if (copyin(addr, (void *) &(byte), sizeof(char)))
244 return(-1);
245 return(byte);
246 }
247
248 int
249 suword(
250 user_addr_t addr,
251 long word)
252 {
253 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
254 }
255
256 long fuword(user_addr_t addr)
257 {
258 long word;
259
260 if (copyin(addr, (void *) &word, sizeof(int)))
261 return(-1);
262 return(word);
263 }
264
265 /* suiword and fuiword are the same as suword and fuword, respectively */
266
267 int
268 suiword(
269 user_addr_t addr,
270 long word)
271 {
272 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
273 }
274
275 long fuiword(user_addr_t addr)
276 {
277 long word;
278
279 if (copyin(addr, (void *) &word, sizeof(int)))
280 return(-1);
281 return(word);
282 }
283
284 /*
285 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
286 * fetching and setting of process-sized size_t and pointer values.
287 */
288 int
289 sulong(user_addr_t addr, int64_t word)
290 {
291
292 if (IS_64BIT_PROCESS(current_proc())) {
293 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
294 } else {
295 return(suiword(addr, (long)word));
296 }
297 }
298
299 int64_t
300 fulong(user_addr_t addr)
301 {
302 int64_t longword;
303
304 if (IS_64BIT_PROCESS(current_proc())) {
305 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
306 return(-1);
307 return(longword);
308 } else {
309 return((int64_t)fuiword(addr));
310 }
311 }
312
313 int
314 suulong(user_addr_t addr, uint64_t uword)
315 {
316
317 if (IS_64BIT_PROCESS(current_proc())) {
318 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
319 } else {
320 return(suiword(addr, (u_long)uword));
321 }
322 }
323
324 uint64_t
325 fuulong(user_addr_t addr)
326 {
327 uint64_t ulongword;
328
329 if (IS_64BIT_PROCESS(current_proc())) {
330 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
331 return(-1ULL);
332 return(ulongword);
333 } else {
334 return((uint64_t)fuiword(addr));
335 }
336 }
337
338 int
339 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
340 {
341 return(ENOTSUP);
342 }
343
344
345 kern_return_t
346 pid_for_task(
347 struct pid_for_task_args *args)
348 {
349 mach_port_name_t t = args->t;
350 user_addr_t pid_addr = args->pid;
351 proc_t p;
352 task_t t1;
353 int pid = -1;
354 kern_return_t err = KERN_SUCCESS;
355
356 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
357 AUDIT_ARG(mach_port1, t);
358
359 t1 = port_name_to_task(t);
360
361 if (t1 == TASK_NULL) {
362 err = KERN_FAILURE;
363 goto pftout;
364 } else {
365 p = get_bsdtask_info(t1);
366 if (p) {
367 pid = proc_pid(p);
368 err = KERN_SUCCESS;
369 } else {
370 err = KERN_FAILURE;
371 }
372 }
373 task_deallocate(t1);
374 pftout:
375 AUDIT_ARG(pid, pid);
376 (void) copyout((char *) &pid, pid_addr, sizeof(int));
377 AUDIT_MACH_SYSCALL_EXIT(err);
378 return(err);
379 }
380
381 /*
382 *
383 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
384 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
385 *
386 */
387 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
388
389 /*
390 * Routine: task_for_pid_posix_check
391 * Purpose:
392 * Verify that the current process should be allowed to
393 * get the target process's task port. This is only
394 * permitted if:
395 * - The current process is root
396 * OR all of the following are true:
397 * - The target process's real, effective, and saved uids
398 * are the same as the current proc's euid,
399 * - The target process's group set is a subset of the
400 * calling process's group set, and
401 * - The target process hasn't switched credentials.
402 *
403 * Returns: TRUE: permitted
404 * FALSE: denied
405 */
406 static int
407 task_for_pid_posix_check(proc_t target)
408 {
409 kauth_cred_t targetcred, mycred;
410 uid_t myuid;
411 int allowed;
412
413 /* No task_for_pid on bad targets */
414 if (target == PROC_NULL || target->p_stat == SZOMB) {
415 return FALSE;
416 }
417
418 mycred = kauth_cred_get();
419 myuid = kauth_cred_getuid(mycred);
420
421 /* If we're running as root, the check passes */
422 if (kauth_cred_issuser(mycred))
423 return TRUE;
424
425 /* We're allowed to get our own task port */
426 if (target == current_proc())
427 return TRUE;
428
429 /*
430 * Under DENY, only root can get another proc's task port,
431 * so no more checks are needed.
432 */
433 if (tfp_policy == KERN_TFP_POLICY_DENY) {
434 return FALSE;
435 }
436
437 targetcred = kauth_cred_proc_ref(target);
438 allowed = TRUE;
439
440 /* Do target's ruid, euid, and saved uid match my euid? */
441 if ((kauth_cred_getuid(targetcred) != myuid) ||
442 (targetcred->cr_ruid != myuid) ||
443 (targetcred->cr_svuid != myuid)) {
444 allowed = FALSE;
445 goto out;
446 }
447
448 /* Are target's groups a subset of my groups? */
449 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
450 allowed == 0) {
451 allowed = FALSE;
452 goto out;
453 }
454
455 /* Has target switched credentials? */
456 if (target->p_flag & P_SUGID) {
457 allowed = FALSE;
458 goto out;
459 }
460
461 out:
462 kauth_cred_unref(&targetcred);
463 return allowed;
464 }
465
466 /*
467 * Routine: task_for_pid
468 * Purpose:
469 * Get the task port for another "process", named by its
470 * process ID on the same host as "target_task".
471 *
472 * Only permitted to privileged processes, or processes
473 * with the same user ID.
474 *
475 * XXX This should be a BSD system call, not a Mach trap!!!
476 */
477 kern_return_t
478 task_for_pid(
479 struct task_for_pid_args *args)
480 {
481 mach_port_name_t target_tport = args->target_tport;
482 int pid = args->pid;
483 user_addr_t task_addr = args->t;
484 struct uthread *uthread;
485 proc_t p = PROC_NULL;
486 task_t t1 = TASK_NULL;
487 mach_port_name_t tret = MACH_PORT_NULL;
488 ipc_port_t tfpport;
489 void * sright;
490 int error = 0;
491
492 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
493 AUDIT_ARG(pid, pid);
494 AUDIT_ARG(mach_port1, target_tport);
495
496 #if defined(SECURE_KERNEL)
497 if (0 == pid) {
498 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
499 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
500 return(KERN_FAILURE);
501 }
502 #endif
503
504 t1 = port_name_to_task(target_tport);
505 if (t1 == TASK_NULL) {
506 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
507 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
508 return(KERN_FAILURE);
509 }
510
511
512 /*
513 * Delayed binding of thread credential to process credential, if we
514 * are not running with an explicitly set thread credential.
515 */
516 uthread = get_bsdthread_info(current_thread());
517 kauth_cred_uthread_update(uthread, current_proc());
518
519 p = proc_find(pid);
520 AUDIT_ARG(process, p);
521
522 if (!(task_for_pid_posix_check(p))) {
523 error = KERN_FAILURE;
524 goto tfpout;
525 }
526
527 if (p->task != TASK_NULL) {
528 /* If we aren't root and target's task access port is set... */
529 if (!kauth_cred_issuser(kauth_cred_get()) &&
530 (task_get_task_access_port(p->task, &tfpport) == 0) &&
531 (tfpport != IPC_PORT_NULL)) {
532
533 if (tfpport == IPC_PORT_DEAD) {
534 error = KERN_PROTECTION_FAILURE;
535 goto tfpout;
536 }
537
538 /* Call up to the task access server */
539 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
540
541 if (error != MACH_MSG_SUCCESS) {
542 if (error == MACH_RCV_INTERRUPTED)
543 error = KERN_ABORTED;
544 else
545 error = KERN_FAILURE;
546 goto tfpout;
547 }
548 }
549 #if CONFIG_MACF
550 error = mac_proc_check_get_task(kauth_cred_get(), p);
551 if (error) {
552 error = KERN_FAILURE;
553 goto tfpout;
554 }
555 #endif
556
557 /* Grant task port access */
558 task_reference(p->task);
559 sright = (void *) convert_task_to_port(p->task);
560 tret = ipc_port_copyout_send(
561 sright,
562 get_task_ipcspace(current_task()));
563 }
564 error = KERN_SUCCESS;
565
566 tfpout:
567 task_deallocate(t1);
568 AUDIT_ARG(mach_port2, tret);
569 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
570 if (p != PROC_NULL)
571 proc_rele(p);
572 AUDIT_MACH_SYSCALL_EXIT(error);
573 return(error);
574 }
575
576 /*
577 * Routine: task_name_for_pid
578 * Purpose:
579 * Get the task name port for another "process", named by its
580 * process ID on the same host as "target_task".
581 *
582 * Only permitted to privileged processes, or processes
583 * with the same user ID.
584 *
585 * XXX This should be a BSD system call, not a Mach trap!!!
586 */
587
588 kern_return_t
589 task_name_for_pid(
590 struct task_name_for_pid_args *args)
591 {
592 mach_port_name_t target_tport = args->target_tport;
593 int pid = args->pid;
594 user_addr_t task_addr = args->t;
595 struct uthread *uthread;
596 proc_t p = PROC_NULL;
597 task_t t1;
598 mach_port_name_t tret;
599 void * sright;
600 int error = 0, refheld = 0;
601 kauth_cred_t target_cred;
602
603 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
604 AUDIT_ARG(pid, pid);
605 AUDIT_ARG(mach_port1, target_tport);
606
607 t1 = port_name_to_task(target_tport);
608 if (t1 == TASK_NULL) {
609 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
610 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
611 return(KERN_FAILURE);
612 }
613
614
615 /*
616 * Delayed binding of thread credential to process credential, if we
617 * are not running with an explicitly set thread credential.
618 */
619 uthread = get_bsdthread_info(current_thread());
620 kauth_cred_uthread_update(uthread, current_proc());
621
622 p = proc_find(pid);
623 AUDIT_ARG(process, p);
624 if (p != PROC_NULL) {
625 target_cred = kauth_cred_proc_ref(p);
626 refheld = 1;
627
628 if ((p->p_stat != SZOMB)
629 && ((current_proc() == p)
630 || kauth_cred_issuser(kauth_cred_get())
631 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
632 ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) {
633
634 if (p->task != TASK_NULL) {
635 task_reference(p->task);
636 #if CONFIG_MACF
637 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
638 if (error) {
639 task_deallocate(p->task);
640 goto noperm;
641 }
642 #endif
643 sright = (void *)convert_task_name_to_port(p->task);
644 tret = ipc_port_copyout_send(sright,
645 get_task_ipcspace(current_task()));
646 } else
647 tret = MACH_PORT_NULL;
648
649 AUDIT_ARG(mach_port2, tret);
650 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
651 task_deallocate(t1);
652 error = KERN_SUCCESS;
653 goto tnfpout;
654 }
655 }
656
657 #if CONFIG_MACF
658 noperm:
659 #endif
660 task_deallocate(t1);
661 tret = MACH_PORT_NULL;
662 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
663 error = KERN_FAILURE;
664 tnfpout:
665 if (refheld != 0)
666 kauth_cred_unref(&target_cred);
667 if (p != PROC_NULL)
668 proc_rele(p);
669 AUDIT_MACH_SYSCALL_EXIT(error);
670 return(error);
671 }
672
673 static int
674 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
675 __unused int arg2, struct sysctl_req *req)
676 {
677 int error = 0;
678 int new_value;
679
680 error = SYSCTL_OUT(req, arg1, sizeof(int));
681 if (error || req->newptr == USER_ADDR_NULL)
682 return(error);
683
684 if (!is_suser())
685 return(EPERM);
686
687 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
688 goto out;
689 }
690 if ((new_value == KERN_TFP_POLICY_DENY)
691 || (new_value == KERN_TFP_POLICY_DEFAULT))
692 tfp_policy = new_value;
693 else
694 error = EINVAL;
695 out:
696 return(error);
697
698 }
699
700 #if defined(SECURE_KERNEL)
701 static int kern_secure_kernel = 1;
702 #else
703 static int kern_secure_kernel = 0;
704 #endif
705
706 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, "");
707
708 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp");
709 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
710 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
711
712 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW,
713 &shared_region_trace_level, 0, "");
714 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD,
715 &shared_region_version, 0, "");
716 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW,
717 &shared_region_persistence, 0, "");
718
719 /*
720 * shared_region_check_np:
721 *
722 * This system call is intended for dyld.
723 *
724 * dyld calls this when any process starts to see if the process's shared
725 * region is already set up and ready to use.
726 * This call returns the base address of the first mapping in the
727 * process's shared region's first mapping.
728 * dyld will then check what's mapped at that address.
729 *
730 * If the shared region is empty, dyld will then attempt to map the shared
731 * cache file in the shared region via the shared_region_map_np() system call.
732 *
733 * If something's already mapped in the shared region, dyld will check if it
734 * matches the shared cache it would like to use for that process.
735 * If it matches, evrything's ready and the process can proceed and use the
736 * shared region.
737 * If it doesn't match, dyld will unmap the shared region and map the shared
738 * cache into the process's address space via mmap().
739 *
740 * ERROR VALUES
741 * EINVAL no shared region
742 * ENOMEM shared region is empty
743 * EFAULT bad address for "start_address"
744 */
745 int
746 shared_region_check_np(
747 __unused struct proc *p,
748 struct shared_region_check_np_args *uap,
749 __unused int *retvalp)
750 {
751 vm_shared_region_t shared_region;
752 mach_vm_offset_t start_address;
753 int error;
754 kern_return_t kr;
755
756 SHARED_REGION_TRACE_DEBUG(
757 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
758 current_thread(), p->p_pid, p->p_comm,
759 (uint64_t)uap->start_address));
760
761 /* retrieve the current tasks's shared region */
762 shared_region = vm_shared_region_get(current_task());
763 if (shared_region != NULL) {
764 /* retrieve address of its first mapping... */
765 kr = vm_shared_region_start_address(shared_region,
766 &start_address);
767 if (kr != KERN_SUCCESS) {
768 error = ENOMEM;
769 } else {
770 /* ... and give it to the caller */
771 error = copyout(&start_address,
772 (user_addr_t) uap->start_address,
773 sizeof (start_address));
774 if (error) {
775 SHARED_REGION_TRACE_ERROR(
776 ("shared_region: %p [%d(%s)] "
777 "check_np(0x%llx) "
778 "copyout(0x%llx) error %d\n",
779 current_thread(), p->p_pid, p->p_comm,
780 (uint64_t)uap->start_address, (uint64_t)start_address,
781 error));
782 }
783 }
784 vm_shared_region_deallocate(shared_region);
785 } else {
786 /* no shared region ! */
787 error = EINVAL;
788 }
789
790 SHARED_REGION_TRACE_DEBUG(
791 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
792 current_thread(), p->p_pid, p->p_comm,
793 (uint64_t)uap->start_address, (uint64_t)start_address, error));
794
795 return error;
796 }
797
798 /*
799 * shared_region_map_np()
800 *
801 * This system call is intended for dyld.
802 *
803 * dyld uses this to map a shared cache file into a shared region.
804 * This is usually done only the first time a shared cache is needed.
805 * Subsequent processes will just use the populated shared region without
806 * requiring any further setup.
807 */
808 int
809 shared_region_map_np(
810 struct proc *p,
811 struct shared_region_map_np_args *uap,
812 __unused int *retvalp)
813 {
814 int error;
815 kern_return_t kr;
816 int fd;
817 struct fileproc *fp;
818 struct vnode *vp, *root_vp;
819 struct vnode_attr va;
820 off_t fs;
821 memory_object_size_t file_size;
822 user_addr_t user_mappings;
823 struct shared_file_mapping_np *mappings;
824 #define SFM_MAX_STACK 8
825 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
826 unsigned int mappings_count;
827 vm_size_t mappings_size;
828 memory_object_control_t file_control;
829 struct vm_shared_region *shared_region;
830
831 SHARED_REGION_TRACE_DEBUG(
832 ("shared_region: %p [%d(%s)] -> map\n",
833 current_thread(), p->p_pid, p->p_comm));
834
835 shared_region = NULL;
836 mappings_count = 0;
837 mappings_size = 0;
838 mappings = NULL;
839 fp = NULL;
840 vp = NULL;
841
842 /* get file descriptor for shared region cache file */
843 fd = uap->fd;
844
845 /* get file structure from file descriptor */
846 error = fp_lookup(p, fd, &fp, 0);
847 if (error) {
848 SHARED_REGION_TRACE_ERROR(
849 ("shared_region: %p [%d(%s)] map: "
850 "fd=%d lookup failed (error=%d)\n",
851 current_thread(), p->p_pid, p->p_comm, fd, error));
852 goto done;
853 }
854
855 /* make sure we're attempting to map a vnode */
856 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
857 SHARED_REGION_TRACE_ERROR(
858 ("shared_region: %p [%d(%s)] map: "
859 "fd=%d not a vnode (type=%d)\n",
860 current_thread(), p->p_pid, p->p_comm,
861 fd, fp->f_fglob->fg_type));
862 error = EINVAL;
863 goto done;
864 }
865
866 /* we need at least read permission on the file */
867 if (! (fp->f_fglob->fg_flag & FREAD)) {
868 SHARED_REGION_TRACE_ERROR(
869 ("shared_region: %p [%d(%s)] map: "
870 "fd=%d not readable\n",
871 current_thread(), p->p_pid, p->p_comm, fd));
872 error = EPERM;
873 goto done;
874 }
875
876 /* get vnode from file structure */
877 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
878 if (error) {
879 SHARED_REGION_TRACE_ERROR(
880 ("shared_region: %p [%d(%s)] map: "
881 "fd=%d getwithref failed (error=%d)\n",
882 current_thread(), p->p_pid, p->p_comm, fd, error));
883 goto done;
884 }
885 vp = (struct vnode *) fp->f_fglob->fg_data;
886
887 /* make sure the vnode is a regular file */
888 if (vp->v_type != VREG) {
889 SHARED_REGION_TRACE_ERROR(
890 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
891 "not a file (type=%d)\n",
892 current_thread(), p->p_pid, p->p_comm,
893 vp, vp->v_name, vp->v_type));
894 error = EINVAL;
895 goto done;
896 }
897
898 /* make sure vnode is on the process's root volume */
899 root_vp = p->p_fd->fd_rdir;
900 if (root_vp == NULL) {
901 root_vp = rootvnode;
902 }
903 if (vp->v_mount != root_vp->v_mount) {
904 SHARED_REGION_TRACE_ERROR(
905 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
906 "not on process's root volume\n",
907 current_thread(), p->p_pid, p->p_comm,
908 vp, vp->v_name));
909 error = EPERM;
910 goto done;
911 }
912
913 /* make sure vnode is owned by "root" */
914 VATTR_INIT(&va);
915 VATTR_WANTED(&va, va_uid);
916 error = vnode_getattr(vp, &va, vfs_context_current());
917 if (error) {
918 SHARED_REGION_TRACE_ERROR(
919 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
920 "vnode_getattr(%p) failed (error=%d)\n",
921 current_thread(), p->p_pid, p->p_comm,
922 vp, vp->v_name, vp, error));
923 goto done;
924 }
925 if (va.va_uid != 0) {
926 SHARED_REGION_TRACE_ERROR(
927 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
928 "owned by uid=%d instead of 0\n",
929 current_thread(), p->p_pid, p->p_comm,
930 vp, vp->v_name, va.va_uid));
931 error = EPERM;
932 goto done;
933 }
934
935 /* get vnode size */
936 error = vnode_size(vp, &fs, vfs_context_current());
937 if (error) {
938 SHARED_REGION_TRACE_ERROR(
939 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
940 "vnode_size(%p) failed (error=%d)\n",
941 current_thread(), p->p_pid, p->p_comm,
942 vp, vp->v_name, vp, error));
943 goto done;
944 }
945 file_size = fs;
946
947 /* get the file's memory object handle */
948 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
949 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
950 SHARED_REGION_TRACE_ERROR(
951 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
952 "no memory object\n",
953 current_thread(), p->p_pid, p->p_comm,
954 vp, vp->v_name));
955 error = EINVAL;
956 goto done;
957 }
958
959 /* get the list of mappings the caller wants us to establish */
960 mappings_count = uap->count; /* number of mappings */
961 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
962 if (mappings_count == 0) {
963 SHARED_REGION_TRACE_INFO(
964 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
965 "no mappings\n",
966 current_thread(), p->p_pid, p->p_comm,
967 vp, vp->v_name));
968 error = 0; /* no mappings: we're done ! */
969 goto done;
970 } else if (mappings_count <= SFM_MAX_STACK) {
971 mappings = &stack_mappings[0];
972 } else {
973 SHARED_REGION_TRACE_ERROR(
974 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
975 "too many mappings (%d)\n",
976 current_thread(), p->p_pid, p->p_comm,
977 vp, vp->v_name, mappings_count));
978 error = EINVAL;
979 goto done;
980 }
981
982 user_mappings = uap->mappings; /* the mappings, in user space */
983 error = copyin(user_mappings,
984 mappings,
985 mappings_size);
986 if (error) {
987 SHARED_REGION_TRACE_ERROR(
988 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
989 "copyin(0x%llx, %d) failed (error=%d)\n",
990 current_thread(), p->p_pid, p->p_comm,
991 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
992 goto done;
993 }
994
995 /* get the process's shared region (setup in vm_map_exec()) */
996 shared_region = vm_shared_region_get(current_task());
997 if (shared_region == NULL) {
998 SHARED_REGION_TRACE_ERROR(
999 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1000 "no shared region\n",
1001 current_thread(), p->p_pid, p->p_comm,
1002 vp, vp->v_name));
1003 goto done;
1004 }
1005
1006 /* map the file into that shared region's submap */
1007 kr = vm_shared_region_map_file(shared_region,
1008 mappings_count,
1009 mappings,
1010 file_control,
1011 file_size,
1012 (void *) p->p_fd->fd_rdir);
1013 if (kr != KERN_SUCCESS) {
1014 SHARED_REGION_TRACE_ERROR(
1015 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1016 "vm_shared_region_map_file() failed kr=0x%x\n",
1017 current_thread(), p->p_pid, p->p_comm,
1018 vp, vp->v_name, kr));
1019 switch (kr) {
1020 case KERN_INVALID_ADDRESS:
1021 error = EFAULT;
1022 break;
1023 case KERN_PROTECTION_FAILURE:
1024 error = EPERM;
1025 break;
1026 case KERN_NO_SPACE:
1027 error = ENOMEM;
1028 break;
1029 case KERN_FAILURE:
1030 case KERN_INVALID_ARGUMENT:
1031 default:
1032 error = EINVAL;
1033 break;
1034 }
1035 goto done;
1036 }
1037
1038 /*
1039 * The mapping was successful. Let the buffer cache know
1040 * that we've mapped that file with these protections. This
1041 * prevents the vnode from getting recycled while it's mapped.
1042 */
1043 (void) ubc_map(vp, VM_PROT_READ);
1044 error = 0;
1045
1046 /* update the vnode's access time */
1047 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1048 VATTR_INIT(&va);
1049 nanotime(&va.va_access_time);
1050 VATTR_SET_ACTIVE(&va, va_access_time);
1051 vnode_setattr(vp, &va, vfs_context_current());
1052 }
1053
1054 if (p->p_flag & P_NOSHLIB) {
1055 /* signal that this process is now using split libraries */
1056 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag);
1057 }
1058
1059 done:
1060 if (vp != NULL) {
1061 /*
1062 * release the vnode...
1063 * ubc_map() still holds it for us in the non-error case
1064 */
1065 (void) vnode_put(vp);
1066 vp = NULL;
1067 }
1068 if (fp != NULL) {
1069 /* release the file descriptor */
1070 fp_drop(p, fd, fp, 0);
1071 fp = NULL;
1072 }
1073
1074 if (shared_region != NULL) {
1075 vm_shared_region_deallocate(shared_region);
1076 }
1077
1078 SHARED_REGION_TRACE_DEBUG(
1079 ("shared_region: %p [%d(%s)] <- map\n",
1080 current_thread(), p->p_pid, p->p_comm));
1081
1082 return error;
1083 }
1084
1085
1086 /* sysctl overflow room */
1087
1088 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1089 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1090 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1091 extern unsigned int vm_page_free_target;
1092 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD,
1093 &vm_page_free_target, 0, "Pageout daemon free target");
1094