]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
xnu-1228.0.2.tar.gz
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34 /*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41 #include <meta_features.h>
42
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
56
57 #include <sys/file_internal.h>
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/dir.h>
61 #include <sys/namei.h>
62 #include <sys/proc_internal.h>
63 #include <sys/kauth.h>
64 #include <sys/vm.h>
65 #include <sys/file.h>
66 #include <sys/vnode_internal.h>
67 #include <sys/mount.h>
68 #include <sys/trace.h>
69 #include <sys/kernel.h>
70 #include <sys/ubc_internal.h>
71 #include <sys/user.h>
72 #include <sys/syslog.h>
73 #include <sys/stat.h>
74 #include <sys/sysproto.h>
75 #include <sys/mman.h>
76 #include <sys/sysctl.h>
77
78 #include <bsm/audit_kernel.h>
79 #include <bsm/audit_kevents.h>
80
81 #include <kern/kalloc.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_kern.h>
84
85 #include <machine/spl.h>
86
87 #include <mach/shared_region.h>
88 #include <vm/vm_shared_region.h>
89
90 #include <vm/vm_protos.h>
91
92 /*
93 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
94 */
95
96 extern int allow_stack_exec, allow_data_exec;
97
98 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
99 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
100
101 #if CONFIG_NO_PRINTF_STRINGS
102 void
103 log_stack_execution_failure(__unused addr64_t a, __unused vm_prot_t b)
104 {
105 }
106 #else
107 static const char *prot_values[] = {
108 "none",
109 "read-only",
110 "write-only",
111 "read-write",
112 "execute-only",
113 "read-execute",
114 "write-execute",
115 "read-write-execute"
116 };
117
118 void
119 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
120 {
121 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
122 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
123 }
124 #endif
125
126
127 int
128 useracc(
129 user_addr_t addr,
130 user_size_t len,
131 int prot)
132 {
133 return (vm_map_check_protection(
134 current_map(),
135 vm_map_trunc_page(addr), vm_map_round_page(addr+len),
136 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
137 }
138
139 int
140 vslock(
141 user_addr_t addr,
142 user_size_t len)
143 {
144 kern_return_t kret;
145 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
146 vm_map_round_page(addr+len),
147 VM_PROT_READ | VM_PROT_WRITE ,FALSE);
148
149 switch (kret) {
150 case KERN_SUCCESS:
151 return (0);
152 case KERN_INVALID_ADDRESS:
153 case KERN_NO_SPACE:
154 return (ENOMEM);
155 case KERN_PROTECTION_FAILURE:
156 return (EACCES);
157 default:
158 return (EINVAL);
159 }
160 }
161
162 int
163 vsunlock(
164 user_addr_t addr,
165 user_size_t len,
166 __unused int dirtied)
167 {
168 #if FIXME /* [ */
169 pmap_t pmap;
170 vm_page_t pg;
171 vm_map_offset_t vaddr;
172 ppnum_t paddr;
173 #endif /* FIXME ] */
174 kern_return_t kret;
175
176 #if FIXME /* [ */
177 if (dirtied) {
178 pmap = get_task_pmap(current_task());
179 for (vaddr = vm_map_trunc_page(addr);
180 vaddr < vm_map_round_page(addr+len);
181 vaddr += PAGE_SIZE) {
182 paddr = pmap_extract(pmap, vaddr);
183 pg = PHYS_TO_VM_PAGE(paddr);
184 vm_page_set_modified(pg);
185 }
186 }
187 #endif /* FIXME ] */
188 #ifdef lint
189 dirtied++;
190 #endif /* lint */
191 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
192 vm_map_round_page(addr+len), FALSE);
193 switch (kret) {
194 case KERN_SUCCESS:
195 return (0);
196 case KERN_INVALID_ADDRESS:
197 case KERN_NO_SPACE:
198 return (ENOMEM);
199 case KERN_PROTECTION_FAILURE:
200 return (EACCES);
201 default:
202 return (EINVAL);
203 }
204 }
205
206 int
207 subyte(
208 user_addr_t addr,
209 int byte)
210 {
211 char character;
212
213 character = (char)byte;
214 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
215 }
216
217 int
218 suibyte(
219 user_addr_t addr,
220 int byte)
221 {
222 char character;
223
224 character = (char)byte;
225 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
226 }
227
228 int fubyte(user_addr_t addr)
229 {
230 unsigned char byte;
231
232 if (copyin(addr, (void *) &byte, sizeof(char)))
233 return(-1);
234 return(byte);
235 }
236
237 int fuibyte(user_addr_t addr)
238 {
239 unsigned char byte;
240
241 if (copyin(addr, (void *) &(byte), sizeof(char)))
242 return(-1);
243 return(byte);
244 }
245
246 int
247 suword(
248 user_addr_t addr,
249 long word)
250 {
251 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
252 }
253
254 long fuword(user_addr_t addr)
255 {
256 long word;
257
258 if (copyin(addr, (void *) &word, sizeof(int)))
259 return(-1);
260 return(word);
261 }
262
263 /* suiword and fuiword are the same as suword and fuword, respectively */
264
265 int
266 suiword(
267 user_addr_t addr,
268 long word)
269 {
270 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
271 }
272
273 long fuiword(user_addr_t addr)
274 {
275 long word;
276
277 if (copyin(addr, (void *) &word, sizeof(int)))
278 return(-1);
279 return(word);
280 }
281
282 /*
283 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
284 * fetching and setting of process-sized size_t and pointer values.
285 */
286 int
287 sulong(user_addr_t addr, int64_t word)
288 {
289
290 if (IS_64BIT_PROCESS(current_proc())) {
291 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
292 } else {
293 return(suiword(addr, (long)word));
294 }
295 }
296
297 int64_t
298 fulong(user_addr_t addr)
299 {
300 int64_t longword;
301
302 if (IS_64BIT_PROCESS(current_proc())) {
303 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
304 return(-1);
305 return(longword);
306 } else {
307 return((int64_t)fuiword(addr));
308 }
309 }
310
311 int
312 suulong(user_addr_t addr, uint64_t uword)
313 {
314
315 if (IS_64BIT_PROCESS(current_proc())) {
316 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
317 } else {
318 return(suiword(addr, (u_long)uword));
319 }
320 }
321
322 uint64_t
323 fuulong(user_addr_t addr)
324 {
325 uint64_t ulongword;
326
327 if (IS_64BIT_PROCESS(current_proc())) {
328 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
329 return(-1ULL);
330 return(ulongword);
331 } else {
332 return((uint64_t)fuiword(addr));
333 }
334 }
335
336 int
337 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
338 {
339 return(ENOTSUP);
340 }
341
342
343 kern_return_t
344 pid_for_task(
345 struct pid_for_task_args *args)
346 {
347 mach_port_name_t t = args->t;
348 user_addr_t pid_addr = args->pid;
349 proc_t p;
350 task_t t1;
351 int pid = -1;
352 kern_return_t err = KERN_SUCCESS;
353
354 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
355 AUDIT_ARG(mach_port1, t);
356
357 t1 = port_name_to_task(t);
358
359 if (t1 == TASK_NULL) {
360 err = KERN_FAILURE;
361 goto pftout;
362 } else {
363 p = get_bsdtask_info(t1);
364 if (p) {
365 pid = proc_pid(p);
366 err = KERN_SUCCESS;
367 } else {
368 err = KERN_FAILURE;
369 }
370 }
371 task_deallocate(t1);
372 pftout:
373 AUDIT_ARG(pid, pid);
374 (void) copyout((char *) &pid, pid_addr, sizeof(int));
375 AUDIT_MACH_SYSCALL_EXIT(err);
376 return(err);
377 }
378
379 /*
380 *
381 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
382 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
383 *
384 */
385 static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
386
387 /*
388 * Routine: task_for_pid_posix_check
389 * Purpose:
390 * Verify that the current process should be allowed to
391 * get the target process's task port. This is only
392 * permitted if:
393 * - The current process is root
394 * OR all of the following are true:
395 * - The target process's real, effective, and saved uids
396 * are the same as the current proc's euid,
397 * - The target process's group set is a subset of the
398 * calling process's group set, and
399 * - The target process hasn't switched credentials.
400 *
401 * Returns: TRUE: permitted
402 * FALSE: denied
403 */
404 static int
405 task_for_pid_posix_check(proc_t target)
406 {
407 kauth_cred_t targetcred, mycred;
408 uid_t myuid;
409 int allowed;
410
411 /* No task_for_pid on bad targets */
412 if (target == PROC_NULL || target->p_stat == SZOMB) {
413 return FALSE;
414 }
415
416 mycred = kauth_cred_get();
417 myuid = kauth_cred_getuid(mycred);
418
419 /* If we're running as root, the check passes */
420 if (kauth_cred_issuser(mycred))
421 return TRUE;
422
423 /* We're allowed to get our own task port */
424 if (target == current_proc())
425 return TRUE;
426
427 /*
428 * Under DENY, only root can get another proc's task port,
429 * so no more checks are needed.
430 */
431 if (tfp_policy == KERN_TFP_POLICY_DENY) {
432 return FALSE;
433 }
434
435 targetcred = kauth_cred_proc_ref(target);
436 allowed = TRUE;
437
438 /* Do target's ruid, euid, and saved uid match my euid? */
439 if ((kauth_cred_getuid(targetcred) != myuid) ||
440 (targetcred->cr_ruid != myuid) ||
441 (targetcred->cr_svuid != myuid)) {
442 allowed = FALSE;
443 goto out;
444 }
445
446 /* Are target's groups a subset of my groups? */
447 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
448 allowed == 0) {
449 allowed = FALSE;
450 goto out;
451 }
452
453 /* Has target switched credentials? */
454 if (target->p_flag & P_SUGID) {
455 allowed = FALSE;
456 goto out;
457 }
458
459 out:
460 kauth_cred_unref(&targetcred);
461 return allowed;
462 }
463
464 /*
465 * Routine: task_for_pid
466 * Purpose:
467 * Get the task port for another "process", named by its
468 * process ID on the same host as "target_task".
469 *
470 * Only permitted to privileged processes, or processes
471 * with the same user ID.
472 *
473 * XXX This should be a BSD system call, not a Mach trap!!!
474 */
475 kern_return_t
476 task_for_pid(
477 struct task_for_pid_args *args)
478 {
479 mach_port_name_t target_tport = args->target_tport;
480 int pid = args->pid;
481 user_addr_t task_addr = args->t;
482 struct uthread *uthread;
483 proc_t p = PROC_NULL;
484 task_t t1 = TASK_NULL;
485 mach_port_name_t tret = MACH_PORT_NULL;
486 ipc_port_t tfpport;
487 void * sright;
488 int error = 0;
489
490 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
491 AUDIT_ARG(pid, pid);
492 AUDIT_ARG(mach_port1, target_tport);
493
494 #if defined(SECURE_KERNEL)
495 if (0 == pid) {
496 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
497 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
498 return(KERN_FAILURE);
499 }
500 #endif
501
502 t1 = port_name_to_task(target_tport);
503 if (t1 == TASK_NULL) {
504 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
505 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
506 return(KERN_FAILURE);
507 }
508
509
510 /*
511 * Delayed binding of thread credential to process credential, if we
512 * are not running with an explicitly set thread credential.
513 */
514 uthread = get_bsdthread_info(current_thread());
515 kauth_cred_uthread_update(uthread, current_proc());
516
517 p = proc_find(pid);
518 AUDIT_ARG(process, p);
519
520 if (!(task_for_pid_posix_check(p))) {
521 error = KERN_FAILURE;
522 goto tfpout;
523 }
524
525 if (p->task != TASK_NULL) {
526 /* If we aren't root and target's task access port is set... */
527 if (!kauth_cred_issuser(kauth_cred_get()) &&
528 (task_get_task_access_port(p->task, &tfpport) == 0) &&
529 (tfpport != IPC_PORT_NULL)) {
530
531 if (tfpport == IPC_PORT_DEAD) {
532 error = KERN_PROTECTION_FAILURE;
533 goto tfpout;
534 }
535
536 /* Call up to the task access server */
537 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
538
539 if (error != MACH_MSG_SUCCESS) {
540 if (error == MACH_RCV_INTERRUPTED)
541 error = KERN_ABORTED;
542 else
543 error = KERN_FAILURE;
544 goto tfpout;
545 }
546 }
547 #if CONFIG_MACF
548 error = mac_proc_check_get_task(kauth_cred_get(), p);
549 if (error) {
550 error = KERN_FAILURE;
551 goto tfpout;
552 }
553 #endif
554
555 /* Grant task port access */
556 task_reference(p->task);
557 sright = (void *) convert_task_to_port(p->task);
558 tret = ipc_port_copyout_send(
559 sright,
560 get_task_ipcspace(current_task()));
561 }
562 error = KERN_SUCCESS;
563
564 tfpout:
565 task_deallocate(t1);
566 AUDIT_ARG(mach_port2, tret);
567 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
568 if (p != PROC_NULL)
569 proc_rele(p);
570 AUDIT_MACH_SYSCALL_EXIT(error);
571 return(error);
572 }
573
574 /*
575 * Routine: task_name_for_pid
576 * Purpose:
577 * Get the task name port for another "process", named by its
578 * process ID on the same host as "target_task".
579 *
580 * Only permitted to privileged processes, or processes
581 * with the same user ID.
582 *
583 * XXX This should be a BSD system call, not a Mach trap!!!
584 */
585
586 kern_return_t
587 task_name_for_pid(
588 struct task_name_for_pid_args *args)
589 {
590 mach_port_name_t target_tport = args->target_tport;
591 int pid = args->pid;
592 user_addr_t task_addr = args->t;
593 struct uthread *uthread;
594 proc_t p = PROC_NULL;
595 task_t t1;
596 mach_port_name_t tret;
597 void * sright;
598 int error = 0, refheld = 0;
599 kauth_cred_t target_cred;
600
601 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
602 AUDIT_ARG(pid, pid);
603 AUDIT_ARG(mach_port1, target_tport);
604
605 t1 = port_name_to_task(target_tport);
606 if (t1 == TASK_NULL) {
607 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
608 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
609 return(KERN_FAILURE);
610 }
611
612
613 /*
614 * Delayed binding of thread credential to process credential, if we
615 * are not running with an explicitly set thread credential.
616 */
617 uthread = get_bsdthread_info(current_thread());
618 kauth_cred_uthread_update(uthread, current_proc());
619
620 p = proc_find(pid);
621 AUDIT_ARG(process, p);
622 if (p != PROC_NULL) {
623 target_cred = kauth_cred_proc_ref(p);
624 refheld = 1;
625
626 if ((p->p_stat != SZOMB)
627 && ((current_proc() == p)
628 || kauth_cred_issuser(kauth_cred_get())
629 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
630 ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) {
631
632 if (p->task != TASK_NULL) {
633 task_reference(p->task);
634 #if CONFIG_MACF
635 error = mac_proc_check_get_task_name(kauth_cred_get(), p);
636 if (error) {
637 task_deallocate(p->task);
638 goto noperm;
639 }
640 #endif
641 sright = (void *)convert_task_name_to_port(p->task);
642 tret = ipc_port_copyout_send(sright,
643 get_task_ipcspace(current_task()));
644 } else
645 tret = MACH_PORT_NULL;
646
647 AUDIT_ARG(mach_port2, tret);
648 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
649 task_deallocate(t1);
650 error = KERN_SUCCESS;
651 goto tnfpout;
652 }
653 }
654
655 #if CONFIG_MACF
656 noperm:
657 #endif
658 task_deallocate(t1);
659 tret = MACH_PORT_NULL;
660 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
661 error = KERN_FAILURE;
662 tnfpout:
663 if (refheld != 0)
664 kauth_cred_unref(&target_cred);
665 if (p != PROC_NULL)
666 proc_rele(p);
667 AUDIT_MACH_SYSCALL_EXIT(error);
668 return(error);
669 }
670
671 static int
672 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
673 __unused int arg2, struct sysctl_req *req)
674 {
675 int error = 0;
676 int new_value;
677
678 error = SYSCTL_OUT(req, arg1, sizeof(int));
679 if (error || req->newptr == USER_ADDR_NULL)
680 return(error);
681
682 if (!is_suser())
683 return(EPERM);
684
685 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
686 goto out;
687 }
688 if ((new_value == KERN_TFP_POLICY_DENY)
689 || (new_value == KERN_TFP_POLICY_DEFAULT))
690 tfp_policy = new_value;
691 else
692 error = EINVAL;
693 out:
694 return(error);
695
696 }
697
698 #if defined(SECURE_KERNEL)
699 static int kern_secure_kernel = 1;
700 #else
701 static int kern_secure_kernel = 0;
702 #endif
703
704 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, "");
705
706 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp");
707 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
708 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
709
710 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW,
711 &shared_region_trace_level, 0, "");
712 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD,
713 &shared_region_version, 0, "");
714 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW,
715 &shared_region_persistence, 0, "");
716
717 /*
718 * shared_region_check_np:
719 *
720 * This system call is intended for dyld.
721 *
722 * dyld calls this when any process starts to see if the process's shared
723 * region is already set up and ready to use.
724 * This call returns the base address of the first mapping in the
725 * process's shared region's first mapping.
726 * dyld will then check what's mapped at that address.
727 *
728 * If the shared region is empty, dyld will then attempt to map the shared
729 * cache file in the shared region via the shared_region_map_np() system call.
730 *
731 * If something's already mapped in the shared region, dyld will check if it
732 * matches the shared cache it would like to use for that process.
733 * If it matches, evrything's ready and the process can proceed and use the
734 * shared region.
735 * If it doesn't match, dyld will unmap the shared region and map the shared
736 * cache into the process's address space via mmap().
737 *
738 * ERROR VALUES
739 * EINVAL no shared region
740 * ENOMEM shared region is empty
741 * EFAULT bad address for "start_address"
742 */
743 int
744 shared_region_check_np(
745 __unused struct proc *p,
746 struct shared_region_check_np_args *uap,
747 __unused int *retvalp)
748 {
749 vm_shared_region_t shared_region;
750 mach_vm_offset_t start_address;
751 int error;
752 kern_return_t kr;
753
754 SHARED_REGION_TRACE_DEBUG(
755 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
756 current_thread(), p->p_pid, p->p_comm,
757 (uint64_t)uap->start_address));
758
759 /* retrieve the current tasks's shared region */
760 shared_region = vm_shared_region_get(current_task());
761 if (shared_region != NULL) {
762 /* retrieve address of its first mapping... */
763 kr = vm_shared_region_start_address(shared_region,
764 &start_address);
765 if (kr != KERN_SUCCESS) {
766 error = ENOMEM;
767 } else {
768 /* ... and give it to the caller */
769 error = copyout(&start_address,
770 (user_addr_t) uap->start_address,
771 sizeof (start_address));
772 if (error) {
773 SHARED_REGION_TRACE_ERROR(
774 ("shared_region: %p [%d(%s)] "
775 "check_np(0x%llx) "
776 "copyout(0x%llx) error %d\n",
777 current_thread(), p->p_pid, p->p_comm,
778 (uint64_t)uap->start_address, (uint64_t)start_address,
779 error));
780 }
781 }
782 vm_shared_region_deallocate(shared_region);
783 } else {
784 /* no shared region ! */
785 error = EINVAL;
786 }
787
788 SHARED_REGION_TRACE_DEBUG(
789 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
790 current_thread(), p->p_pid, p->p_comm,
791 (uint64_t)uap->start_address, (uint64_t)start_address, error));
792
793 return error;
794 }
795
796 /*
797 * shared_region_map_np()
798 *
799 * This system call is intended for dyld.
800 *
801 * dyld uses this to map a shared cache file into a shared region.
802 * This is usually done only the first time a shared cache is needed.
803 * Subsequent processes will just use the populated shared region without
804 * requiring any further setup.
805 */
806 int
807 shared_region_map_np(
808 struct proc *p,
809 struct shared_region_map_np_args *uap,
810 __unused int *retvalp)
811 {
812 int error;
813 kern_return_t kr;
814 int fd;
815 struct fileproc *fp;
816 struct vnode *vp, *root_vp;
817 struct vnode_attr va;
818 off_t fs;
819 memory_object_size_t file_size;
820 user_addr_t user_mappings;
821 struct shared_file_mapping_np *mappings;
822 #define SFM_MAX_STACK 4
823 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
824 unsigned int mappings_count;
825 vm_size_t mappings_size;
826 memory_object_control_t file_control;
827 struct vm_shared_region *shared_region;
828
829 SHARED_REGION_TRACE_DEBUG(
830 ("shared_region: %p [%d(%s)] -> map\n",
831 current_thread(), p->p_pid, p->p_comm));
832
833 shared_region = NULL;
834 mappings_count = 0;
835 mappings_size = 0;
836 mappings = NULL;
837 fp = NULL;
838 vp = NULL;
839
840 /* get file descriptor for shared region cache file */
841 fd = uap->fd;
842
843 /* get file structure from file descriptor */
844 error = fp_lookup(p, fd, &fp, 0);
845 if (error) {
846 SHARED_REGION_TRACE_ERROR(
847 ("shared_region: %p [%d(%s)] map: "
848 "fd=%d lookup failed (error=%d)\n",
849 current_thread(), p->p_pid, p->p_comm, fd, error));
850 goto done;
851 }
852
853 /* make sure we're attempting to map a vnode */
854 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
855 SHARED_REGION_TRACE_ERROR(
856 ("shared_region: %p [%d(%s)] map: "
857 "fd=%d not a vnode (type=%d)\n",
858 current_thread(), p->p_pid, p->p_comm,
859 fd, fp->f_fglob->fg_type));
860 error = EINVAL;
861 goto done;
862 }
863
864 /* we need at least read permission on the file */
865 if (! (fp->f_fglob->fg_flag & FREAD)) {
866 SHARED_REGION_TRACE_ERROR(
867 ("shared_region: %p [%d(%s)] map: "
868 "fd=%d not readable\n",
869 current_thread(), p->p_pid, p->p_comm, fd));
870 error = EPERM;
871 goto done;
872 }
873
874 /* get vnode from file structure */
875 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
876 if (error) {
877 SHARED_REGION_TRACE_ERROR(
878 ("shared_region: %p [%d(%s)] map: "
879 "fd=%d getwithref failed (error=%d)\n",
880 current_thread(), p->p_pid, p->p_comm, fd, error));
881 goto done;
882 }
883 vp = (struct vnode *) fp->f_fglob->fg_data;
884
885 /* make sure the vnode is a regular file */
886 if (vp->v_type != VREG) {
887 SHARED_REGION_TRACE_ERROR(
888 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
889 "not a file (type=%d)\n",
890 current_thread(), p->p_pid, p->p_comm,
891 vp, vp->v_name, vp->v_type));
892 error = EINVAL;
893 goto done;
894 }
895
896 /* make sure vnode is on the process's root volume */
897 root_vp = p->p_fd->fd_rdir;
898 if (root_vp == NULL) {
899 root_vp = rootvnode;
900 }
901 if (vp->v_mount != root_vp->v_mount) {
902 SHARED_REGION_TRACE_ERROR(
903 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
904 "not on process's root volume\n",
905 current_thread(), p->p_pid, p->p_comm,
906 vp, vp->v_name));
907 error = EPERM;
908 goto done;
909 }
910
911 /* make sure vnode is owned by "root" */
912 VATTR_INIT(&va);
913 VATTR_WANTED(&va, va_uid);
914 error = vnode_getattr(vp, &va, vfs_context_current());
915 if (error) {
916 SHARED_REGION_TRACE_ERROR(
917 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
918 "vnode_getattr(%p) failed (error=%d)\n",
919 current_thread(), p->p_pid, p->p_comm,
920 vp, vp->v_name, vp, error));
921 goto done;
922 }
923 if (va.va_uid != 0) {
924 SHARED_REGION_TRACE_ERROR(
925 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
926 "owned by uid=%d instead of 0\n",
927 current_thread(), p->p_pid, p->p_comm,
928 vp, vp->v_name, va.va_uid));
929 error = EPERM;
930 goto done;
931 }
932
933 /* get vnode size */
934 error = vnode_size(vp, &fs, vfs_context_current());
935 if (error) {
936 SHARED_REGION_TRACE_ERROR(
937 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
938 "vnode_size(%p) failed (error=%d)\n",
939 current_thread(), p->p_pid, p->p_comm,
940 vp, vp->v_name, vp, error));
941 goto done;
942 }
943 file_size = fs;
944
945 /* get the file's memory object handle */
946 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
947 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
948 SHARED_REGION_TRACE_ERROR(
949 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
950 "no memory object\n",
951 current_thread(), p->p_pid, p->p_comm,
952 vp, vp->v_name));
953 error = EINVAL;
954 goto done;
955 }
956
957 /* get the list of mappings the caller wants us to establish */
958 mappings_count = uap->count; /* number of mappings */
959 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
960 if (mappings_count == 0) {
961 SHARED_REGION_TRACE_INFO(
962 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
963 "no mappings\n",
964 current_thread(), p->p_pid, p->p_comm,
965 vp, vp->v_name));
966 error = 0; /* no mappings: we're done ! */
967 goto done;
968 } else if (mappings_count <= SFM_MAX_STACK) {
969 mappings = &stack_mappings[0];
970 } else {
971 SHARED_REGION_TRACE_ERROR(
972 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
973 "too many mappings (%d)\n",
974 current_thread(), p->p_pid, p->p_comm,
975 vp, vp->v_name, mappings_count));
976 error = EINVAL;
977 goto done;
978 }
979
980 user_mappings = uap->mappings; /* the mappings, in user space */
981 error = copyin(user_mappings,
982 mappings,
983 mappings_size);
984 if (error) {
985 SHARED_REGION_TRACE_ERROR(
986 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
987 "copyin(0x%llx, %d) failed (error=%d)\n",
988 current_thread(), p->p_pid, p->p_comm,
989 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
990 goto done;
991 }
992
993 /* get the process's shared region (setup in vm_map_exec()) */
994 shared_region = vm_shared_region_get(current_task());
995 if (shared_region == NULL) {
996 SHARED_REGION_TRACE_ERROR(
997 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
998 "no shared region\n",
999 current_thread(), p->p_pid, p->p_comm,
1000 vp, vp->v_name));
1001 goto done;
1002 }
1003
1004 /* map the file into that shared region's submap */
1005 kr = vm_shared_region_map_file(shared_region,
1006 mappings_count,
1007 mappings,
1008 file_control,
1009 file_size,
1010 (void *) p->p_fd->fd_rdir);
1011 if (kr != KERN_SUCCESS) {
1012 SHARED_REGION_TRACE_ERROR(
1013 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1014 "vm_shared_region_map_file() failed kr=0x%x\n",
1015 current_thread(), p->p_pid, p->p_comm,
1016 vp, vp->v_name, kr));
1017 switch (kr) {
1018 case KERN_INVALID_ADDRESS:
1019 error = EFAULT;
1020 break;
1021 case KERN_PROTECTION_FAILURE:
1022 error = EPERM;
1023 break;
1024 case KERN_NO_SPACE:
1025 error = ENOMEM;
1026 break;
1027 case KERN_FAILURE:
1028 case KERN_INVALID_ARGUMENT:
1029 default:
1030 error = EINVAL;
1031 break;
1032 }
1033 goto done;
1034 }
1035
1036 /*
1037 * The mapping was successful. Let the buffer cache know
1038 * that we've mapped that file with these protections. This
1039 * prevents the vnode from getting recycled while it's mapped.
1040 */
1041 (void) ubc_map(vp, VM_PROT_READ);
1042 error = 0;
1043
1044 /* update the vnode's access time */
1045 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1046 VATTR_INIT(&va);
1047 nanotime(&va.va_access_time);
1048 VATTR_SET_ACTIVE(&va, va_access_time);
1049 vnode_setattr(vp, &va, vfs_context_current());
1050 }
1051
1052 if (p->p_flag & P_NOSHLIB) {
1053 /* signal that this process is now using split libraries */
1054 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag);
1055 }
1056
1057 done:
1058 if (vp != NULL) {
1059 /*
1060 * release the vnode...
1061 * ubc_map() still holds it for us in the non-error case
1062 */
1063 (void) vnode_put(vp);
1064 vp = NULL;
1065 }
1066 if (fp != NULL) {
1067 /* release the file descriptor */
1068 fp_drop(p, fd, fp, 0);
1069 fp = NULL;
1070 }
1071
1072 if (shared_region != NULL) {
1073 vm_shared_region_deallocate(shared_region);
1074 }
1075
1076 SHARED_REGION_TRACE_DEBUG(
1077 ("shared_region: %p [%d(%s)] <- map\n",
1078 current_thread(), p->p_pid, p->p_comm));
1079
1080 return error;
1081 }
1082
1083
1084 /* sysctl overflow room */
1085
1086 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1087 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1088 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1089 extern unsigned int vm_page_free_target;
1090 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD,
1091 &vm_page_free_target, 0, "Pageout daemon free target");
1092