]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
094b6258c16197787fb5dfb7736c8e1a8a90c742
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56
57 #include <sys/file_internal.h>
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/dir.h>
61 #include <sys/namei.h>
62 #include <sys/proc_internal.h>
63 #include <sys/kauth.h>
64 #include <sys/vm.h>
65 #include <sys/file.h>
66 #include <sys/vnode_internal.h>
67 #include <sys/mount.h>
68 #include <sys/trace.h>
69 #include <sys/kernel.h>
70 #include <sys/ubc_internal.h>
71 #include <sys/user.h>
72 #include <sys/syslog.h>
73 #include <sys/stat.h>
74 #include <sys/sysproto.h>
75 #include <sys/mman.h>
76 #include <sys/sysctl.h>
77
78 #include <bsm/audit_kernel.h>
79 #include <bsm/audit_kevents.h>
80
81 #include <kern/kalloc.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_kern.h>
84
85 #include <machine/spl.h>
86
87 #include <mach/shared_region.h>
88 #include <vm/vm_shared_region.h>
89
90 #include <vm/vm_protos.h>
91
92 /*
93 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
94 */
95
96 #ifndef SECURE_KERNEL
97 extern int allow_stack_exec, allow_data_exec;
98
99 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
100 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
101 #endif /* !SECURE_KERNEL */
102
103 #if CONFIG_NO_PRINTF_STRINGS
104 void
105 log_stack_execution_failure(__unused addr64_t a, __unused vm_prot_t b)
106 {
107 }
108 #else
109 static const char *prot_values[] = {
110 "none",
111 "read-only",
112 "write-only",
113 "read-write",
114 "execute-only",
115 "read-execute",
116 "write-execute",
117 "read-write-execute"
118 };
119
120 void
121 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
122 {
123 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
124 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
125 }
126 #endif
127
128
129 int
130 useracc(
131 user_addr_t addr,
132 user_size_t len,
133 int prot)
134 {
135 return (vm_map_check_protection(
136 current_map(),
137 vm_map_trunc_page(addr), vm_map_round_page(addr+len),
138 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
139 }
140
141 int
142 vslock(
143 user_addr_t addr,
144 user_size_t len)
145 {
146 kern_return_t kret;
147 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
148 vm_map_round_page(addr+len),
149 VM_PROT_READ | VM_PROT_WRITE ,FALSE);
150
151 switch (kret) {
152 case KERN_SUCCESS:
153 return (0);
154 case KERN_INVALID_ADDRESS:
155 case KERN_NO_SPACE:
156 return (ENOMEM);
157 case KERN_PROTECTION_FAILURE:
158 return (EACCES);
159 default:
160 return (EINVAL);
161 }
162 }
163
164 int
165 vsunlock(
166 user_addr_t addr,
167 user_size_t len,
168 __unused int dirtied)
169 {
170 #if FIXME /* [ */
171 pmap_t pmap;
172 vm_page_t pg;
173 vm_map_offset_t vaddr;
174 ppnum_t paddr;
175 #endif /* FIXME ] */
176 kern_return_t kret;
177
178 #if FIXME /* [ */
179 if (dirtied) {
180 pmap = get_task_pmap(current_task());
181 for (vaddr = vm_map_trunc_page(addr);
182 vaddr < vm_map_round_page(addr+len);
183 vaddr += PAGE_SIZE) {
184 paddr = pmap_extract(pmap, vaddr);
185 pg = PHYS_TO_VM_PAGE(paddr);
186 vm_page_set_modified(pg);
187 }
188 }
189 #endif /* FIXME ] */
190 #ifdef lint
191 dirtied++;
192 #endif /* lint */
193 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
194 vm_map_round_page(addr+len), FALSE);
195 switch (kret) {
196 case KERN_SUCCESS:
197 return (0);
198 case KERN_INVALID_ADDRESS:
199 case KERN_NO_SPACE:
200 return (ENOMEM);
201 case KERN_PROTECTION_FAILURE:
202 return (EACCES);
203 default:
204 return (EINVAL);
205 }
206 }
207
208 int
209 subyte(
210 user_addr_t addr,
211 int byte)
212 {
213 char character;
214
215 character = (char)byte;
216 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
217 }
218
219 int
220 suibyte(
221 user_addr_t addr,
222 int byte)
223 {
224 char character;
225
226 character = (char)byte;
227 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
228 }
229
230 int fubyte(user_addr_t addr)
231 {
232 unsigned char byte;
233
234 if (copyin(addr, (void *) &byte, sizeof(char)))
235 return(-1);
236 return(byte);
237 }
238
239 int fuibyte(user_addr_t addr)
240 {
241 unsigned char byte;
242
243 if (copyin(addr, (void *) &(byte), sizeof(char)))
244 return(-1);
245 return(byte);
246 }
247
248 int
249 suword(
250 user_addr_t addr,
251 long word)
252 {
253 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
254 }
255
256 long fuword(user_addr_t addr)
257 {
258 long word;
259
260 if (copyin(addr, (void *) &word, sizeof(int)))
261 return(-1);
262 return(word);
263 }
264
265 /* suiword and fuiword are the same as suword and fuword, respectively */
266
267 int
268 suiword(
269 user_addr_t addr,
270 long word)
271 {
272 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
273 }
274
275 long fuiword(user_addr_t addr)
276 {
277 long word;
278
279 if (copyin(addr, (void *) &word, sizeof(int)))
280 return(-1);
281 return(word);
282 }
283
284 /*
285 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
286 * fetching and setting of process-sized size_t and pointer values.
287 */
288 int
289 sulong(user_addr_t addr, int64_t word)
290 {
291
292 if (IS_64BIT_PROCESS(current_proc())) {
293 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
294 } else {
295 return(suiword(addr, (long)word));
296 }
297 }
298
299 int64_t
300 fulong(user_addr_t addr)
301 {
302 int64_t longword;
303
304 if (IS_64BIT_PROCESS(current_proc())) {
305 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
306 return(-1);
307 return(longword);
308 } else {
309 return((int64_t)fuiword(addr));
310 }
311 }
312
313 int
314 suulong(user_addr_t addr, uint64_t uword)
315 {
316
317 if (IS_64BIT_PROCESS(current_proc())) {
318 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
319 } else {
320 return(suiword(addr, (u_long)uword));
321 }
322 }
323
324 uint64_t
325 fuulong(user_addr_t addr)
326 {
327 uint64_t ulongword;
328
329 if (IS_64BIT_PROCESS(current_proc())) {
330 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
331 return(-1ULL);
332 return(ulongword);
333 } else {
334 return((uint64_t)fuiword(addr));
335 }
336 }
337
338 int
339 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
340 {
341 return(ENOTSUP);
342 }
343
344
345 kern_return_t
346 pid_for_task(
347 struct pid_for_task_args *args)
348 {
349 mach_port_name_t t = args->t;
350 user_addr_t pid_addr = args->pid;
351 proc_t p;
352 task_t t1;
353 int pid = -1;
354 kern_return_t err = KERN_SUCCESS;
355
356 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
357 AUDIT_ARG(mach_port1, t);
358
359 t1 = port_name_to_task(t);
360
361 if (t1 == TASK_NULL) {
362 err = KERN_FAILURE;
363 goto pftout;
364 } else {
365 p = get_bsdtask_info(t1);
366 if (p) {
367 pid = proc_pid(p);
368 err = KERN_SUCCESS;
369 } else {
370 err = KERN_FAILURE;
371 }
372 }
373 task_deallocate(t1);
374 pftout:
375 AUDIT_ARG(pid, pid);
376 (void) copyout((char *) &pid, pid_addr, sizeof(int));
377 AUDIT_MACH_SYSCALL_EXIT(err);
378 return(err);
379 }
380
381 /*
382 *
383 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
384 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
385 *
386 */
387 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
388
389 /*
390 * Routine: task_for_pid_posix_check
391 * Purpose:
392 * Verify that the current process should be allowed to
393 * get the target process's task port. This is only
394 * permitted if:
395 * - The current process is root
396 * OR all of the following are true:
397 * - The target process's real, effective, and saved uids
398 * are the same as the current proc's euid,
399 * - The target process's group set is a subset of the
400 * calling process's group set, and
401 * - The target process hasn't switched credentials.
402 *
403 * Returns: TRUE: permitted
404 * FALSE: denied
405 */
406 static int
407 task_for_pid_posix_check(proc_t target)
408 {
409 kauth_cred_t targetcred, mycred;
410 uid_t myuid;
411 int allowed;
412
413 /* No task_for_pid on bad targets */
414 if (target == PROC_NULL || target->p_stat == SZOMB) {
415 return FALSE;
416 }
417
418 mycred = kauth_cred_get();
419 myuid = kauth_cred_getuid(mycred);
420
421 /* If we're running as root, the check passes */
422 if (kauth_cred_issuser(mycred))
423 return TRUE;
424
425 /* We're allowed to get our own task port */
426 if (target == current_proc())
427 return TRUE;
428
429 /*
430 * Under DENY, only root can get another proc's task port,
431 * so no more checks are needed.
432 */
433 if (tfp_policy == KERN_TFP_POLICY_DENY) {
434 return FALSE;
435 }
436
437 targetcred = kauth_cred_proc_ref(target);
438 allowed = TRUE;
439
440 /* Do target's ruid, euid, and saved uid match my euid? */
441 if ((kauth_cred_getuid(targetcred) != myuid) ||
442 (targetcred->cr_ruid != myuid) ||
443 (targetcred->cr_svuid != myuid)) {
444 allowed = FALSE;
445 goto out;
446 }
447
448 /* Are target's groups a subset of my groups? */
449 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
450 allowed == 0) {
451 allowed = FALSE;
452 goto out;
453 }
454
455 /* Has target switched credentials? */
456 if (target->p_flag & P_SUGID) {
457 allowed = FALSE;
458 goto out;
459 }
460
461 out:
462 kauth_cred_unref(&targetcred);
463 return allowed;
464 }
465
466 /*
467 * Routine: task_for_pid
468 * Purpose:
469 * Get the task port for another "process", named by its
470 * process ID on the same host as "target_task".
471 *
472 * Only permitted to privileged processes, or processes
473 * with the same user ID.
474 *
475 * XXX This should be a BSD system call, not a Mach trap!!!
476 */
477 kern_return_t
478 task_for_pid(
479 struct task_for_pid_args *args)
480 {
481 mach_port_name_t target_tport = args->target_tport;
482 int pid = args->pid;
483 user_addr_t task_addr = args->t;
484 struct uthread *uthread;
485 proc_t p = PROC_NULL;
486 task_t t1 = TASK_NULL;
487 mach_port_name_t tret = MACH_PORT_NULL;
488 ipc_port_t tfpport;
489 void * sright;
490 int error = 0;
491
492 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
493 AUDIT_ARG(pid, pid);
494 AUDIT_ARG(mach_port1, target_tport);
495
496 #if defined(SECURE_KERNEL)
497 if (0 == pid) {
498 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
499 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
500 return(KERN_FAILURE);
501 }
502 #endif
503
504 t1 = port_name_to_task(target_tport);
505 if (t1 == TASK_NULL) {
506 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
507 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
508 return(KERN_FAILURE);
509 }
510
511
512 /*
513 * Delayed binding of thread credential to process credential, if we
514 * are not running with an explicitly set thread credential.
515 */
516 uthread = get_bsdthread_info(current_thread());
517 kauth_cred_uthread_update(uthread, current_proc());
518
519 p = proc_find(pid);
520 AUDIT_ARG(process, p);
521
522 if (!(task_for_pid_posix_check(p))) {
523 error = KERN_FAILURE;
524 goto tfpout;
525 }
526
527 if (p->task != TASK_NULL) {
528 /* If we aren't root and target's task access port is set... */
529 if (!kauth_cred_issuser(kauth_cred_get()) &&
530 p != current_proc() &&
531 (task_get_task_access_port(p->task, &tfpport) == 0) &&
532 (tfpport != IPC_PORT_NULL)) {
533
534 if (tfpport == IPC_PORT_DEAD) {
535 error = KERN_PROTECTION_FAILURE;
536 goto tfpout;
537 }
538
539 /* Call up to the task access server */
540 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
541
542 if (error != MACH_MSG_SUCCESS) {
543 if (error == MACH_RCV_INTERRUPTED)
544 error = KERN_ABORTED;
545 else
546 error = KERN_FAILURE;
547 goto tfpout;
548 }
549 }
550 #if CONFIG_MACF
551 error = mac_proc_check_get_task(kauth_cred_get(), p);
552 if (error) {
553 error = KERN_FAILURE;
554 goto tfpout;
555 }
556 #endif
557
558 /* Grant task port access */
559 task_reference(p->task);
560 sright = (void *) convert_task_to_port(p->task);
561 tret = ipc_port_copyout_send(
562 sright,
563 get_task_ipcspace(current_task()));
564 }
565 error = KERN_SUCCESS;
566
567 tfpout:
568 task_deallocate(t1);
569 AUDIT_ARG(mach_port2, tret);
570 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
571 if (p != PROC_NULL)
572 proc_rele(p);
573 AUDIT_MACH_SYSCALL_EXIT(error);
574 return(error);
575 }
576
577 /*
578 * Routine: task_name_for_pid
579 * Purpose:
580 * Get the task name port for another "process", named by its
581 * process ID on the same host as "target_task".
582 *
583 * Only permitted to privileged processes, or processes
584 * with the same user ID.
585 *
586 * XXX This should be a BSD system call, not a Mach trap!!!
587 */
588
589 kern_return_t
590 task_name_for_pid(
591 struct task_name_for_pid_args *args)
592 {
593 mach_port_name_t target_tport = args->target_tport;
594 int pid = args->pid;
595 user_addr_t task_addr = args->t;
596 struct uthread *uthread;
597 proc_t p = PROC_NULL;
598 task_t t1;
599 mach_port_name_t tret;
600 void * sright;
601 int error = 0, refheld = 0;
602 kauth_cred_t target_cred;
603
604 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
605 AUDIT_ARG(pid, pid);
606 AUDIT_ARG(mach_port1, target_tport);
607
608 t1 = port_name_to_task(target_tport);
609 if (t1 == TASK_NULL) {
610 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
611 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
612 return(KERN_FAILURE);
613 }
614
615
616 /*
617 * Delayed binding of thread credential to process credential, if we
618 * are not running with an explicitly set thread credential.
619 */
620 uthread = get_bsdthread_info(current_thread());
621 kauth_cred_uthread_update(uthread, current_proc());
622
623 p = proc_find(pid);
624 AUDIT_ARG(process, p);
625 if (p != PROC_NULL) {
626 target_cred = kauth_cred_proc_ref(p);
627 refheld = 1;
628
629 if ((p->p_stat != SZOMB)
630 && ((current_proc() == p)
631 || kauth_cred_issuser(kauth_cred_get())
632 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
633 ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) {
634
635 if (p->task != TASK_NULL) {
636 task_reference(p->task);
637 #if CONFIG_MACF
638 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
639 if (error) {
640 task_deallocate(p->task);
641 goto noperm;
642 }
643 #endif
644 sright = (void *)convert_task_name_to_port(p->task);
645 tret = ipc_port_copyout_send(sright,
646 get_task_ipcspace(current_task()));
647 } else
648 tret = MACH_PORT_NULL;
649
650 AUDIT_ARG(mach_port2, tret);
651 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
652 task_deallocate(t1);
653 error = KERN_SUCCESS;
654 goto tnfpout;
655 }
656 }
657
658 #if CONFIG_MACF
659 noperm:
660 #endif
661 task_deallocate(t1);
662 tret = MACH_PORT_NULL;
663 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
664 error = KERN_FAILURE;
665 tnfpout:
666 if (refheld != 0)
667 kauth_cred_unref(&target_cred);
668 if (p != PROC_NULL)
669 proc_rele(p);
670 AUDIT_MACH_SYSCALL_EXIT(error);
671 return(error);
672 }
673
674 static int
675 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
676 __unused int arg2, struct sysctl_req *req)
677 {
678 int error = 0;
679 int new_value;
680
681 error = SYSCTL_OUT(req, arg1, sizeof(int));
682 if (error || req->newptr == USER_ADDR_NULL)
683 return(error);
684
685 if (!is_suser())
686 return(EPERM);
687
688 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
689 goto out;
690 }
691 if ((new_value == KERN_TFP_POLICY_DENY)
692 || (new_value == KERN_TFP_POLICY_DEFAULT))
693 tfp_policy = new_value;
694 else
695 error = EINVAL;
696 out:
697 return(error);
698
699 }
700
701 #if defined(SECURE_KERNEL)
702 static int kern_secure_kernel = 1;
703 #else
704 static int kern_secure_kernel = 0;
705 #endif
706
707 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, "");
708
709 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp");
710 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
711 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
712
713 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW,
714 &shared_region_trace_level, 0, "");
715 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD,
716 &shared_region_version, 0, "");
717 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW,
718 &shared_region_persistence, 0, "");
719
720 /*
721 * shared_region_check_np:
722 *
723 * This system call is intended for dyld.
724 *
725 * dyld calls this when any process starts to see if the process's shared
726 * region is already set up and ready to use.
727 * This call returns the base address of the first mapping in the
728 * process's shared region's first mapping.
729 * dyld will then check what's mapped at that address.
730 *
731 * If the shared region is empty, dyld will then attempt to map the shared
732 * cache file in the shared region via the shared_region_map_np() system call.
733 *
734 * If something's already mapped in the shared region, dyld will check if it
735 * matches the shared cache it would like to use for that process.
736 * If it matches, evrything's ready and the process can proceed and use the
737 * shared region.
738 * If it doesn't match, dyld will unmap the shared region and map the shared
739 * cache into the process's address space via mmap().
740 *
741 * ERROR VALUES
742 * EINVAL no shared region
743 * ENOMEM shared region is empty
744 * EFAULT bad address for "start_address"
745 */
746 int
747 shared_region_check_np(
748 __unused struct proc *p,
749 struct shared_region_check_np_args *uap,
750 __unused int *retvalp)
751 {
752 vm_shared_region_t shared_region;
753 mach_vm_offset_t start_address;
754 int error;
755 kern_return_t kr;
756
757 SHARED_REGION_TRACE_DEBUG(
758 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
759 current_thread(), p->p_pid, p->p_comm,
760 (uint64_t)uap->start_address));
761
762 /* retrieve the current tasks's shared region */
763 shared_region = vm_shared_region_get(current_task());
764 if (shared_region != NULL) {
765 /* retrieve address of its first mapping... */
766 kr = vm_shared_region_start_address(shared_region,
767 &start_address);
768 if (kr != KERN_SUCCESS) {
769 error = ENOMEM;
770 } else {
771 /* ... and give it to the caller */
772 error = copyout(&start_address,
773 (user_addr_t) uap->start_address,
774 sizeof (start_address));
775 if (error) {
776 SHARED_REGION_TRACE_ERROR(
777 ("shared_region: %p [%d(%s)] "
778 "check_np(0x%llx) "
779 "copyout(0x%llx) error %d\n",
780 current_thread(), p->p_pid, p->p_comm,
781 (uint64_t)uap->start_address, (uint64_t)start_address,
782 error));
783 }
784 }
785 vm_shared_region_deallocate(shared_region);
786 } else {
787 /* no shared region ! */
788 error = EINVAL;
789 }
790
791 SHARED_REGION_TRACE_DEBUG(
792 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
793 current_thread(), p->p_pid, p->p_comm,
794 (uint64_t)uap->start_address, (uint64_t)start_address, error));
795
796 return error;
797 }
798
799 /*
800 * shared_region_map_np()
801 *
802 * This system call is intended for dyld.
803 *
804 * dyld uses this to map a shared cache file into a shared region.
805 * This is usually done only the first time a shared cache is needed.
806 * Subsequent processes will just use the populated shared region without
807 * requiring any further setup.
808 */
809 int
810 shared_region_map_np(
811 struct proc *p,
812 struct shared_region_map_np_args *uap,
813 __unused int *retvalp)
814 {
815 int error;
816 kern_return_t kr;
817 int fd;
818 struct fileproc *fp;
819 struct vnode *vp, *root_vp;
820 struct vnode_attr va;
821 off_t fs;
822 memory_object_size_t file_size;
823 user_addr_t user_mappings;
824 struct shared_file_mapping_np *mappings;
825 #define SFM_MAX_STACK 8
826 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
827 unsigned int mappings_count;
828 vm_size_t mappings_size;
829 memory_object_control_t file_control;
830 struct vm_shared_region *shared_region;
831
832 SHARED_REGION_TRACE_DEBUG(
833 ("shared_region: %p [%d(%s)] -> map\n",
834 current_thread(), p->p_pid, p->p_comm));
835
836 shared_region = NULL;
837 mappings_count = 0;
838 mappings_size = 0;
839 mappings = NULL;
840 fp = NULL;
841 vp = NULL;
842
843 /* get file descriptor for shared region cache file */
844 fd = uap->fd;
845
846 /* get file structure from file descriptor */
847 error = fp_lookup(p, fd, &fp, 0);
848 if (error) {
849 SHARED_REGION_TRACE_ERROR(
850 ("shared_region: %p [%d(%s)] map: "
851 "fd=%d lookup failed (error=%d)\n",
852 current_thread(), p->p_pid, p->p_comm, fd, error));
853 goto done;
854 }
855
856 /* make sure we're attempting to map a vnode */
857 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
858 SHARED_REGION_TRACE_ERROR(
859 ("shared_region: %p [%d(%s)] map: "
860 "fd=%d not a vnode (type=%d)\n",
861 current_thread(), p->p_pid, p->p_comm,
862 fd, fp->f_fglob->fg_type));
863 error = EINVAL;
864 goto done;
865 }
866
867 /* we need at least read permission on the file */
868 if (! (fp->f_fglob->fg_flag & FREAD)) {
869 SHARED_REGION_TRACE_ERROR(
870 ("shared_region: %p [%d(%s)] map: "
871 "fd=%d not readable\n",
872 current_thread(), p->p_pid, p->p_comm, fd));
873 error = EPERM;
874 goto done;
875 }
876
877 /* get vnode from file structure */
878 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
879 if (error) {
880 SHARED_REGION_TRACE_ERROR(
881 ("shared_region: %p [%d(%s)] map: "
882 "fd=%d getwithref failed (error=%d)\n",
883 current_thread(), p->p_pid, p->p_comm, fd, error));
884 goto done;
885 }
886 vp = (struct vnode *) fp->f_fglob->fg_data;
887
888 /* make sure the vnode is a regular file */
889 if (vp->v_type != VREG) {
890 SHARED_REGION_TRACE_ERROR(
891 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
892 "not a file (type=%d)\n",
893 current_thread(), p->p_pid, p->p_comm,
894 vp, vp->v_name, vp->v_type));
895 error = EINVAL;
896 goto done;
897 }
898
899 /* make sure vnode is on the process's root volume */
900 root_vp = p->p_fd->fd_rdir;
901 if (root_vp == NULL) {
902 root_vp = rootvnode;
903 }
904 if (vp->v_mount != root_vp->v_mount) {
905 SHARED_REGION_TRACE_ERROR(
906 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
907 "not on process's root volume\n",
908 current_thread(), p->p_pid, p->p_comm,
909 vp, vp->v_name));
910 error = EPERM;
911 goto done;
912 }
913
914 /* make sure vnode is owned by "root" */
915 VATTR_INIT(&va);
916 VATTR_WANTED(&va, va_uid);
917 error = vnode_getattr(vp, &va, vfs_context_current());
918 if (error) {
919 SHARED_REGION_TRACE_ERROR(
920 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
921 "vnode_getattr(%p) failed (error=%d)\n",
922 current_thread(), p->p_pid, p->p_comm,
923 vp, vp->v_name, vp, error));
924 goto done;
925 }
926 if (va.va_uid != 0) {
927 SHARED_REGION_TRACE_ERROR(
928 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
929 "owned by uid=%d instead of 0\n",
930 current_thread(), p->p_pid, p->p_comm,
931 vp, vp->v_name, va.va_uid));
932 error = EPERM;
933 goto done;
934 }
935
936 /* get vnode size */
937 error = vnode_size(vp, &fs, vfs_context_current());
938 if (error) {
939 SHARED_REGION_TRACE_ERROR(
940 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
941 "vnode_size(%p) failed (error=%d)\n",
942 current_thread(), p->p_pid, p->p_comm,
943 vp, vp->v_name, vp, error));
944 goto done;
945 }
946 file_size = fs;
947
948 /* get the file's memory object handle */
949 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
950 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
951 SHARED_REGION_TRACE_ERROR(
952 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
953 "no memory object\n",
954 current_thread(), p->p_pid, p->p_comm,
955 vp, vp->v_name));
956 error = EINVAL;
957 goto done;
958 }
959
960 /* get the list of mappings the caller wants us to establish */
961 mappings_count = uap->count; /* number of mappings */
962 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
963 if (mappings_count == 0) {
964 SHARED_REGION_TRACE_INFO(
965 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
966 "no mappings\n",
967 current_thread(), p->p_pid, p->p_comm,
968 vp, vp->v_name));
969 error = 0; /* no mappings: we're done ! */
970 goto done;
971 } else if (mappings_count <= SFM_MAX_STACK) {
972 mappings = &stack_mappings[0];
973 } else {
974 SHARED_REGION_TRACE_ERROR(
975 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
976 "too many mappings (%d)\n",
977 current_thread(), p->p_pid, p->p_comm,
978 vp, vp->v_name, mappings_count));
979 error = EINVAL;
980 goto done;
981 }
982
983 user_mappings = uap->mappings; /* the mappings, in user space */
984 error = copyin(user_mappings,
985 mappings,
986 mappings_size);
987 if (error) {
988 SHARED_REGION_TRACE_ERROR(
989 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
990 "copyin(0x%llx, %d) failed (error=%d)\n",
991 current_thread(), p->p_pid, p->p_comm,
992 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
993 goto done;
994 }
995
996 /* get the process's shared region (setup in vm_map_exec()) */
997 shared_region = vm_shared_region_get(current_task());
998 if (shared_region == NULL) {
999 SHARED_REGION_TRACE_ERROR(
1000 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1001 "no shared region\n",
1002 current_thread(), p->p_pid, p->p_comm,
1003 vp, vp->v_name));
1004 goto done;
1005 }
1006
1007 /* map the file into that shared region's submap */
1008 kr = vm_shared_region_map_file(shared_region,
1009 mappings_count,
1010 mappings,
1011 file_control,
1012 file_size,
1013 (void *) p->p_fd->fd_rdir);
1014 if (kr != KERN_SUCCESS) {
1015 SHARED_REGION_TRACE_ERROR(
1016 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1017 "vm_shared_region_map_file() failed kr=0x%x\n",
1018 current_thread(), p->p_pid, p->p_comm,
1019 vp, vp->v_name, kr));
1020 switch (kr) {
1021 case KERN_INVALID_ADDRESS:
1022 error = EFAULT;
1023 break;
1024 case KERN_PROTECTION_FAILURE:
1025 error = EPERM;
1026 break;
1027 case KERN_NO_SPACE:
1028 error = ENOMEM;
1029 break;
1030 case KERN_FAILURE:
1031 case KERN_INVALID_ARGUMENT:
1032 default:
1033 error = EINVAL;
1034 break;
1035 }
1036 goto done;
1037 }
1038
1039 /*
1040 * The mapping was successful. Let the buffer cache know
1041 * that we've mapped that file with these protections. This
1042 * prevents the vnode from getting recycled while it's mapped.
1043 */
1044 (void) ubc_map(vp, VM_PROT_READ);
1045 error = 0;
1046
1047 /* update the vnode's access time */
1048 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1049 VATTR_INIT(&va);
1050 nanotime(&va.va_access_time);
1051 VATTR_SET_ACTIVE(&va, va_access_time);
1052 vnode_setattr(vp, &va, vfs_context_current());
1053 }
1054
1055 if (p->p_flag & P_NOSHLIB) {
1056 /* signal that this process is now using split libraries */
1057 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag);
1058 }
1059
1060 done:
1061 if (vp != NULL) {
1062 /*
1063 * release the vnode...
1064 * ubc_map() still holds it for us in the non-error case
1065 */
1066 (void) vnode_put(vp);
1067 vp = NULL;
1068 }
1069 if (fp != NULL) {
1070 /* release the file descriptor */
1071 fp_drop(p, fd, fp, 0);
1072 fp = NULL;
1073 }
1074
1075 if (shared_region != NULL) {
1076 vm_shared_region_deallocate(shared_region);
1077 }
1078
1079 SHARED_REGION_TRACE_DEBUG(
1080 ("shared_region: %p [%d(%s)] <- map\n",
1081 current_thread(), p->p_pid, p->p_comm));
1082
1083 return error;
1084 }
1085
1086
1087 /* sysctl overflow room */
1088
1089 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1090 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1091 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1092 extern unsigned int vm_page_free_target;
1093 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD,
1094 &vm_page_free_target, 0, "Pageout daemon free target");
1095