2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
41 #include <meta_features.h>
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
57 #include <sys/file_internal.h>
58 #include <sys/param.h>
59 #include <sys/systm.h>
61 #include <sys/namei.h>
62 #include <sys/proc_internal.h>
63 #include <sys/kauth.h>
66 #include <sys/vnode_internal.h>
67 #include <sys/mount.h>
68 #include <sys/trace.h>
69 #include <sys/kernel.h>
70 #include <sys/ubc_internal.h>
72 #include <sys/syslog.h>
74 #include <sys/sysproto.h>
76 #include <sys/sysctl.h>
78 #include <security/audit/audit.h>
79 #include <bsm/audit_kevents.h>
81 #include <kern/kalloc.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_kern.h>
84 #include <vm/vm_pageout.h>
86 #include <machine/spl.h>
88 #include <mach/shared_region.h>
89 #include <vm/vm_shared_region.h>
91 #include <vm/vm_protos.h>
94 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
98 extern int allow_stack_exec
, allow_data_exec
;
100 SYSCTL_INT(_vm
, OID_AUTO
, allow_stack_exec
, CTLFLAG_RW
, &allow_stack_exec
, 0, "");
101 SYSCTL_INT(_vm
, OID_AUTO
, allow_data_exec
, CTLFLAG_RW
, &allow_data_exec
, 0, "");
102 #endif /* !SECURE_KERNEL */
104 static const char *prot_values
[] = {
116 log_stack_execution_failure(addr64_t vaddr
, vm_prot_t prot
)
118 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
119 current_proc()->p_comm
, current_proc()->p_pid
, vaddr
, prot_values
[prot
& VM_PROT_ALL
]);
122 int shared_region_unnest_logging
= 1;
124 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_unnest_logging
, CTLFLAG_RW
,
125 &shared_region_unnest_logging
, 0, "");
127 int vm_shared_region_unnest_log_interval
= 10;
128 int shared_region_unnest_log_count_threshold
= 5;
130 /* These log rate throttling state variables aren't thread safe, but
131 * are sufficient unto the task.
133 static int64_t last_unnest_log_time
= 0;
134 static int shared_region_unnest_log_count
= 0;
136 void log_unnest_badness(vm_map_t m
, vm_map_offset_t s
, vm_map_offset_t e
) {
138 const char *pcommstr
;
140 if (shared_region_unnest_logging
== 0)
143 if (shared_region_unnest_logging
== 1) {
145 if ((tv
.tv_sec
- last_unnest_log_time
) < vm_shared_region_unnest_log_interval
) {
146 if (shared_region_unnest_log_count
++ > shared_region_unnest_log_count_threshold
)
150 last_unnest_log_time
= tv
.tv_sec
;
151 shared_region_unnest_log_count
= 0;
155 pcommstr
= current_proc()->p_comm
;
157 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm
, get_task_map(current_proc()->task
), m
, (uint64_t)s
, (uint64_t)e
);
166 return (vm_map_check_protection(
168 vm_map_trunc_page(addr
), vm_map_round_page(addr
+len
),
169 prot
== B_READ
? VM_PROT_READ
: VM_PROT_WRITE
));
178 kret
= vm_map_wire(current_map(), vm_map_trunc_page(addr
),
179 vm_map_round_page(addr
+len
),
180 VM_PROT_READ
| VM_PROT_WRITE
,FALSE
);
185 case KERN_INVALID_ADDRESS
:
188 case KERN_PROTECTION_FAILURE
:
199 __unused
int dirtied
)
204 vm_map_offset_t vaddr
;
211 pmap
= get_task_pmap(current_task());
212 for (vaddr
= vm_map_trunc_page(addr
);
213 vaddr
< vm_map_round_page(addr
+len
);
214 vaddr
+= PAGE_SIZE
) {
215 paddr
= pmap_extract(pmap
, vaddr
);
216 pg
= PHYS_TO_VM_PAGE(paddr
);
217 vm_page_set_modified(pg
);
224 kret
= vm_map_unwire(current_map(), vm_map_trunc_page(addr
),
225 vm_map_round_page(addr
+len
), FALSE
);
229 case KERN_INVALID_ADDRESS
:
232 case KERN_PROTECTION_FAILURE
:
246 character
= (char)byte
;
247 return (copyout((void *)&(character
), addr
, sizeof(char)) == 0 ? 0 : -1);
257 character
= (char)byte
;
258 return (copyout((void *)&(character
), addr
, sizeof(char)) == 0 ? 0 : -1);
261 int fubyte(user_addr_t addr
)
265 if (copyin(addr
, (void *) &byte
, sizeof(char)))
270 int fuibyte(user_addr_t addr
)
274 if (copyin(addr
, (void *) &(byte
), sizeof(char)))
284 return (copyout((void *) &word
, addr
, sizeof(int)) == 0 ? 0 : -1);
287 long fuword(user_addr_t addr
)
291 if (copyin(addr
, (void *) &word
, sizeof(int)))
296 /* suiword and fuiword are the same as suword and fuword, respectively */
303 return (copyout((void *) &word
, addr
, sizeof(int)) == 0 ? 0 : -1);
306 long fuiword(user_addr_t addr
)
310 if (copyin(addr
, (void *) &word
, sizeof(int)))
316 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
317 * fetching and setting of process-sized size_t and pointer values.
320 sulong(user_addr_t addr
, int64_t word
)
323 if (IS_64BIT_PROCESS(current_proc())) {
324 return(copyout((void *)&word
, addr
, sizeof(word
)) == 0 ? 0 : -1);
326 return(suiword(addr
, (long)word
));
331 fulong(user_addr_t addr
)
335 if (IS_64BIT_PROCESS(current_proc())) {
336 if (copyin(addr
, (void *)&longword
, sizeof(longword
)) != 0)
340 return((int64_t)fuiword(addr
));
345 suulong(user_addr_t addr
, uint64_t uword
)
348 if (IS_64BIT_PROCESS(current_proc())) {
349 return(copyout((void *)&uword
, addr
, sizeof(uword
)) == 0 ? 0 : -1);
351 return(suiword(addr
, (uint32_t)uword
));
356 fuulong(user_addr_t addr
)
360 if (IS_64BIT_PROCESS(current_proc())) {
361 if (copyin(addr
, (void *)&ulongword
, sizeof(ulongword
)) != 0)
365 return((uint64_t)fuiword(addr
));
370 swapon(__unused proc_t procp
, __unused
struct swapon_args
*uap
, __unused
int *retval
)
378 * Find the BSD process ID for the Mach task associated with the given Mach port
381 * Parameters: args User argument descriptor (see below)
383 * Indirect parameters: args->t Mach port name
384 * args->pid Process ID (returned value; see below)
386 * Returns: KERL_SUCCESS Success
387 * KERN_FAILURE Not success
389 * Implicit returns: args->pid Process ID
394 struct pid_for_task_args
*args
)
396 mach_port_name_t t
= args
->t
;
397 user_addr_t pid_addr
= args
->pid
;
401 kern_return_t err
= KERN_SUCCESS
;
403 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK
);
404 AUDIT_ARG(mach_port1
, t
);
406 t1
= port_name_to_task(t
);
408 if (t1
== TASK_NULL
) {
412 p
= get_bsdtask_info(t1
);
423 (void) copyout((char *) &pid
, pid_addr
, sizeof(int));
424 AUDIT_MACH_SYSCALL_EXIT(err
);
430 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
431 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
434 static int tfp_policy
= KERN_TFP_POLICY_DEFAULT
;
437 * Routine: task_for_pid_posix_check
439 * Verify that the current process should be allowed to
440 * get the target process's task port. This is only
442 * - The current process is root
443 * OR all of the following are true:
444 * - The target process's real, effective, and saved uids
445 * are the same as the current proc's euid,
446 * - The target process's group set is a subset of the
447 * calling process's group set, and
448 * - The target process hasn't switched credentials.
450 * Returns: TRUE: permitted
454 task_for_pid_posix_check(proc_t target
)
456 kauth_cred_t targetcred
, mycred
;
460 /* No task_for_pid on bad targets */
461 if (target
== PROC_NULL
|| target
->p_stat
== SZOMB
) {
465 mycred
= kauth_cred_get();
466 myuid
= kauth_cred_getuid(mycred
);
468 /* If we're running as root, the check passes */
469 if (kauth_cred_issuser(mycred
))
472 /* We're allowed to get our own task port */
473 if (target
== current_proc())
477 * Under DENY, only root can get another proc's task port,
478 * so no more checks are needed.
480 if (tfp_policy
== KERN_TFP_POLICY_DENY
) {
484 targetcred
= kauth_cred_proc_ref(target
);
487 /* Do target's ruid, euid, and saved uid match my euid? */
488 if ((kauth_cred_getuid(targetcred
) != myuid
) ||
489 (targetcred
->cr_ruid
!= myuid
) ||
490 (targetcred
->cr_svuid
!= myuid
)) {
495 /* Are target's groups a subset of my groups? */
496 if (kauth_cred_gid_subset(targetcred
, mycred
, &allowed
) ||
502 /* Has target switched credentials? */
503 if (target
->p_flag
& P_SUGID
) {
509 kauth_cred_unref(&targetcred
);
514 * Routine: task_for_pid
516 * Get the task port for another "process", named by its
517 * process ID on the same host as "target_task".
519 * Only permitted to privileged processes, or processes
520 * with the same user ID.
522 * Note: if pid == 0, an error is return no matter who is calling.
524 * XXX This should be a BSD system call, not a Mach trap!!!
528 struct task_for_pid_args
*args
)
530 mach_port_name_t target_tport
= args
->target_tport
;
532 user_addr_t task_addr
= args
->t
;
533 proc_t p
= PROC_NULL
;
534 task_t t1
= TASK_NULL
;
535 mach_port_name_t tret
= MACH_PORT_NULL
;
540 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID
);
542 AUDIT_ARG(mach_port1
, target_tport
);
544 /* Always check if pid == 0 */
546 (void ) copyout((char *)&t1
, task_addr
, sizeof(mach_port_name_t
));
547 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE
);
548 return(KERN_FAILURE
);
551 t1
= port_name_to_task(target_tport
);
552 if (t1
== TASK_NULL
) {
553 (void) copyout((char *)&t1
, task_addr
, sizeof(mach_port_name_t
));
554 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE
);
555 return(KERN_FAILURE
);
562 AUDIT_ARG(process
, p
);
565 if (!(task_for_pid_posix_check(p
))) {
566 error
= KERN_FAILURE
;
570 if (p
->task
!= TASK_NULL
) {
571 /* If we aren't root and target's task access port is set... */
572 if (!kauth_cred_issuser(kauth_cred_get()) &&
573 p
!= current_proc() &&
574 (task_get_task_access_port(p
->task
, &tfpport
) == 0) &&
575 (tfpport
!= IPC_PORT_NULL
)) {
577 if (tfpport
== IPC_PORT_DEAD
) {
578 error
= KERN_PROTECTION_FAILURE
;
582 /* Call up to the task access server */
583 error
= check_task_access(tfpport
, proc_selfpid(), kauth_getgid(), pid
);
585 if (error
!= MACH_MSG_SUCCESS
) {
586 if (error
== MACH_RCV_INTERRUPTED
)
587 error
= KERN_ABORTED
;
589 error
= KERN_FAILURE
;
594 error
= mac_proc_check_get_task(kauth_cred_get(), p
);
596 error
= KERN_FAILURE
;
601 /* Grant task port access */
602 task_reference(p
->task
);
603 sright
= (void *) convert_task_to_port(p
->task
);
604 tret
= ipc_port_copyout_send(
606 get_task_ipcspace(current_task()));
608 error
= KERN_SUCCESS
;
612 AUDIT_ARG(mach_port2
, tret
);
613 (void) copyout((char *) &tret
, task_addr
, sizeof(mach_port_name_t
));
616 AUDIT_MACH_SYSCALL_EXIT(error
);
621 * Routine: task_name_for_pid
623 * Get the task name port for another "process", named by its
624 * process ID on the same host as "target_task".
626 * Only permitted to privileged processes, or processes
627 * with the same user ID.
629 * XXX This should be a BSD system call, not a Mach trap!!!
634 struct task_name_for_pid_args
*args
)
636 mach_port_name_t target_tport
= args
->target_tport
;
638 user_addr_t task_addr
= args
->t
;
639 proc_t p
= PROC_NULL
;
641 mach_port_name_t tret
;
643 int error
= 0, refheld
= 0;
644 kauth_cred_t target_cred
;
646 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID
);
648 AUDIT_ARG(mach_port1
, target_tport
);
650 t1
= port_name_to_task(target_tport
);
651 if (t1
== TASK_NULL
) {
652 (void) copyout((char *)&t1
, task_addr
, sizeof(mach_port_name_t
));
653 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE
);
654 return(KERN_FAILURE
);
658 if (p
!= PROC_NULL
) {
659 AUDIT_ARG(process
, p
);
660 target_cred
= kauth_cred_proc_ref(p
);
663 if ((p
->p_stat
!= SZOMB
)
664 && ((current_proc() == p
)
665 || kauth_cred_issuser(kauth_cred_get())
666 || ((kauth_cred_getuid(target_cred
) == kauth_cred_getuid(kauth_cred_get())) &&
667 ((target_cred
->cr_ruid
== kauth_cred_get()->cr_ruid
))))) {
669 if (p
->task
!= TASK_NULL
) {
670 task_reference(p
->task
);
672 error
= mac_proc_check_get_task_name(kauth_cred_get(), p
);
674 task_deallocate(p
->task
);
678 sright
= (void *)convert_task_name_to_port(p
->task
);
679 tret
= ipc_port_copyout_send(sright
,
680 get_task_ipcspace(current_task()));
682 tret
= MACH_PORT_NULL
;
684 AUDIT_ARG(mach_port2
, tret
);
685 (void) copyout((char *)&tret
, task_addr
, sizeof(mach_port_name_t
));
687 error
= KERN_SUCCESS
;
696 tret
= MACH_PORT_NULL
;
697 (void) copyout((char *) &tret
, task_addr
, sizeof(mach_port_name_t
));
698 error
= KERN_FAILURE
;
701 kauth_cred_unref(&target_cred
);
704 AUDIT_MACH_SYSCALL_EXIT(error
);
709 pid_suspend(struct proc
*p __unused
, struct pid_suspend_args
*args
, int *ret
)
711 task_t target
= NULL
;
712 proc_t targetproc
= PROC_NULL
;
717 error
= mac_proc_check_suspend_resume(p
, 0); /* 0 for suspend */
719 error
= KERN_FAILURE
;
725 error
= KERN_FAILURE
;
729 targetproc
= proc_find(pid
);
730 if (!task_for_pid_posix_check(targetproc
)) {
731 error
= KERN_FAILURE
;
735 target
= targetproc
->task
;
736 #ifndef CONFIG_EMBEDDED
737 if (target
!= TASK_NULL
) {
740 /* If we aren't root and target's task access port is set... */
741 if (!kauth_cred_issuser(kauth_cred_get()) &&
742 targetproc
!= current_proc() &&
743 (task_get_task_access_port(target
, &tfpport
) == 0) &&
744 (tfpport
!= IPC_PORT_NULL
)) {
746 if (tfpport
== IPC_PORT_DEAD
) {
747 error
= KERN_PROTECTION_FAILURE
;
751 /* Call up to the task access server */
752 error
= check_task_access(tfpport
, proc_selfpid(), kauth_getgid(), pid
);
754 if (error
!= MACH_MSG_SUCCESS
) {
755 if (error
== MACH_RCV_INTERRUPTED
)
756 error
= KERN_ABORTED
;
758 error
= KERN_FAILURE
;
765 task_reference(target
);
766 error
= task_suspend(target
);
767 task_deallocate(target
);
770 if (targetproc
!= PROC_NULL
)
771 proc_rele(targetproc
);
777 pid_resume(struct proc
*p __unused
, struct pid_resume_args
*args
, int *ret
)
779 task_t target
= NULL
;
780 proc_t targetproc
= PROC_NULL
;
785 error
= mac_proc_check_suspend_resume(p
, 1); /* 1 for resume */
787 error
= KERN_FAILURE
;
793 error
= KERN_FAILURE
;
797 targetproc
= proc_find(pid
);
798 if (!task_for_pid_posix_check(targetproc
)) {
799 error
= KERN_FAILURE
;
803 target
= targetproc
->task
;
804 #ifndef CONFIG_EMBEDDED
805 if (target
!= TASK_NULL
) {
808 /* If we aren't root and target's task access port is set... */
809 if (!kauth_cred_issuser(kauth_cred_get()) &&
810 targetproc
!= current_proc() &&
811 (task_get_task_access_port(target
, &tfpport
) == 0) &&
812 (tfpport
!= IPC_PORT_NULL
)) {
814 if (tfpport
== IPC_PORT_DEAD
) {
815 error
= KERN_PROTECTION_FAILURE
;
819 /* Call up to the task access server */
820 error
= check_task_access(tfpport
, proc_selfpid(), kauth_getgid(), pid
);
822 if (error
!= MACH_MSG_SUCCESS
) {
823 if (error
== MACH_RCV_INTERRUPTED
)
824 error
= KERN_ABORTED
;
826 error
= KERN_FAILURE
;
833 task_reference(target
);
834 error
= task_resume(target
);
835 task_deallocate(target
);
838 if (targetproc
!= PROC_NULL
)
839 proc_rele(targetproc
);
847 sysctl_settfp_policy(__unused
struct sysctl_oid
*oidp
, void *arg1
,
848 __unused
int arg2
, struct sysctl_req
*req
)
853 error
= SYSCTL_OUT(req
, arg1
, sizeof(int));
854 if (error
|| req
->newptr
== USER_ADDR_NULL
)
860 if ((error
= SYSCTL_IN(req
, &new_value
, sizeof(int)))) {
863 if ((new_value
== KERN_TFP_POLICY_DENY
)
864 || (new_value
== KERN_TFP_POLICY_DEFAULT
))
865 tfp_policy
= new_value
;
873 #if defined(SECURE_KERNEL)
874 static int kern_secure_kernel
= 1;
876 static int kern_secure_kernel
= 0;
879 SYSCTL_INT(_kern
, OID_AUTO
, secure_kernel
, CTLFLAG_RD
, &kern_secure_kernel
, 0, "");
881 SYSCTL_NODE(_kern
, KERN_TFP
, tfp
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "tfp");
882 SYSCTL_PROC(_kern_tfp
, KERN_TFP_POLICY
, policy
, CTLTYPE_INT
| CTLFLAG_RW
,
883 &tfp_policy
, sizeof(uint32_t), &sysctl_settfp_policy
,"I","policy");
885 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_trace_level
, CTLFLAG_RW
,
886 &shared_region_trace_level
, 0, "");
887 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_version
, CTLFLAG_RD
,
888 &shared_region_version
, 0, "");
889 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_persistence
, CTLFLAG_RW
,
890 &shared_region_persistence
, 0, "");
893 * shared_region_check_np:
895 * This system call is intended for dyld.
897 * dyld calls this when any process starts to see if the process's shared
898 * region is already set up and ready to use.
899 * This call returns the base address of the first mapping in the
900 * process's shared region's first mapping.
901 * dyld will then check what's mapped at that address.
903 * If the shared region is empty, dyld will then attempt to map the shared
904 * cache file in the shared region via the shared_region_map_np() system call.
906 * If something's already mapped in the shared region, dyld will check if it
907 * matches the shared cache it would like to use for that process.
908 * If it matches, evrything's ready and the process can proceed and use the
910 * If it doesn't match, dyld will unmap the shared region and map the shared
911 * cache into the process's address space via mmap().
914 * EINVAL no shared region
915 * ENOMEM shared region is empty
916 * EFAULT bad address for "start_address"
919 shared_region_check_np(
920 __unused
struct proc
*p
,
921 struct shared_region_check_np_args
*uap
,
922 __unused
int *retvalp
)
924 vm_shared_region_t shared_region
;
925 mach_vm_offset_t start_address
;
929 SHARED_REGION_TRACE_DEBUG(
930 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
931 current_thread(), p
->p_pid
, p
->p_comm
,
932 (uint64_t)uap
->start_address
));
934 /* retrieve the current tasks's shared region */
935 shared_region
= vm_shared_region_get(current_task());
936 if (shared_region
!= NULL
) {
937 /* retrieve address of its first mapping... */
938 kr
= vm_shared_region_start_address(shared_region
,
940 if (kr
!= KERN_SUCCESS
) {
943 /* ... and give it to the caller */
944 error
= copyout(&start_address
,
945 (user_addr_t
) uap
->start_address
,
946 sizeof (start_address
));
948 SHARED_REGION_TRACE_ERROR(
949 ("shared_region: %p [%d(%s)] "
951 "copyout(0x%llx) error %d\n",
952 current_thread(), p
->p_pid
, p
->p_comm
,
953 (uint64_t)uap
->start_address
, (uint64_t)start_address
,
957 vm_shared_region_deallocate(shared_region
);
959 /* no shared region ! */
963 SHARED_REGION_TRACE_DEBUG(
964 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
965 current_thread(), p
->p_pid
, p
->p_comm
,
966 (uint64_t)uap
->start_address
, (uint64_t)start_address
, error
));
972 * shared_region_map_np()
974 * This system call is intended for dyld.
976 * dyld uses this to map a shared cache file into a shared region.
977 * This is usually done only the first time a shared cache is needed.
978 * Subsequent processes will just use the populated shared region without
979 * requiring any further setup.
982 shared_region_map_np(
984 struct shared_region_map_np_args
*uap
,
985 __unused
int *retvalp
)
991 struct vnode
*vp
, *root_vp
;
992 struct vnode_attr va
;
994 memory_object_size_t file_size
;
995 user_addr_t user_mappings
;
996 struct shared_file_mapping_np
*mappings
;
997 #define SFM_MAX_STACK 8
998 struct shared_file_mapping_np stack_mappings
[SFM_MAX_STACK
];
999 unsigned int mappings_count
;
1000 vm_size_t mappings_size
;
1001 memory_object_control_t file_control
;
1002 struct vm_shared_region
*shared_region
;
1004 SHARED_REGION_TRACE_DEBUG(
1005 ("shared_region: %p [%d(%s)] -> map\n",
1006 current_thread(), p
->p_pid
, p
->p_comm
));
1008 shared_region
= NULL
;
1015 /* get file descriptor for shared region cache file */
1018 /* get file structure from file descriptor */
1019 error
= fp_lookup(p
, fd
, &fp
, 0);
1021 SHARED_REGION_TRACE_ERROR(
1022 ("shared_region: %p [%d(%s)] map: "
1023 "fd=%d lookup failed (error=%d)\n",
1024 current_thread(), p
->p_pid
, p
->p_comm
, fd
, error
));
1028 /* make sure we're attempting to map a vnode */
1029 if (fp
->f_fglob
->fg_type
!= DTYPE_VNODE
) {
1030 SHARED_REGION_TRACE_ERROR(
1031 ("shared_region: %p [%d(%s)] map: "
1032 "fd=%d not a vnode (type=%d)\n",
1033 current_thread(), p
->p_pid
, p
->p_comm
,
1034 fd
, fp
->f_fglob
->fg_type
));
1039 /* we need at least read permission on the file */
1040 if (! (fp
->f_fglob
->fg_flag
& FREAD
)) {
1041 SHARED_REGION_TRACE_ERROR(
1042 ("shared_region: %p [%d(%s)] map: "
1043 "fd=%d not readable\n",
1044 current_thread(), p
->p_pid
, p
->p_comm
, fd
));
1049 /* get vnode from file structure */
1050 error
= vnode_getwithref((vnode_t
) fp
->f_fglob
->fg_data
);
1052 SHARED_REGION_TRACE_ERROR(
1053 ("shared_region: %p [%d(%s)] map: "
1054 "fd=%d getwithref failed (error=%d)\n",
1055 current_thread(), p
->p_pid
, p
->p_comm
, fd
, error
));
1058 vp
= (struct vnode
*) fp
->f_fglob
->fg_data
;
1060 /* make sure the vnode is a regular file */
1061 if (vp
->v_type
!= VREG
) {
1062 SHARED_REGION_TRACE_ERROR(
1063 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1064 "not a file (type=%d)\n",
1065 current_thread(), p
->p_pid
, p
->p_comm
,
1066 vp
, vp
->v_name
, vp
->v_type
));
1071 /* make sure vnode is on the process's root volume */
1072 root_vp
= p
->p_fd
->fd_rdir
;
1073 if (root_vp
== NULL
) {
1074 root_vp
= rootvnode
;
1076 if (vp
->v_mount
!= root_vp
->v_mount
) {
1077 SHARED_REGION_TRACE_ERROR(
1078 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1079 "not on process's root volume\n",
1080 current_thread(), p
->p_pid
, p
->p_comm
,
1086 /* make sure vnode is owned by "root" */
1088 VATTR_WANTED(&va
, va_uid
);
1089 error
= vnode_getattr(vp
, &va
, vfs_context_current());
1091 SHARED_REGION_TRACE_ERROR(
1092 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1093 "vnode_getattr(%p) failed (error=%d)\n",
1094 current_thread(), p
->p_pid
, p
->p_comm
,
1095 vp
, vp
->v_name
, vp
, error
));
1098 if (va
.va_uid
!= 0) {
1099 SHARED_REGION_TRACE_ERROR(
1100 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1101 "owned by uid=%d instead of 0\n",
1102 current_thread(), p
->p_pid
, p
->p_comm
,
1103 vp
, vp
->v_name
, va
.va_uid
));
1108 /* get vnode size */
1109 error
= vnode_size(vp
, &fs
, vfs_context_current());
1111 SHARED_REGION_TRACE_ERROR(
1112 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1113 "vnode_size(%p) failed (error=%d)\n",
1114 current_thread(), p
->p_pid
, p
->p_comm
,
1115 vp
, vp
->v_name
, vp
, error
));
1120 /* get the file's memory object handle */
1121 file_control
= ubc_getobject(vp
, UBC_HOLDOBJECT
);
1122 if (file_control
== MEMORY_OBJECT_CONTROL_NULL
) {
1123 SHARED_REGION_TRACE_ERROR(
1124 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1125 "no memory object\n",
1126 current_thread(), p
->p_pid
, p
->p_comm
,
1132 /* get the list of mappings the caller wants us to establish */
1133 mappings_count
= uap
->count
; /* number of mappings */
1134 mappings_size
= (vm_size_t
) (mappings_count
* sizeof (mappings
[0]));
1135 if (mappings_count
== 0) {
1136 SHARED_REGION_TRACE_INFO(
1137 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1139 current_thread(), p
->p_pid
, p
->p_comm
,
1141 error
= 0; /* no mappings: we're done ! */
1143 } else if (mappings_count
<= SFM_MAX_STACK
) {
1144 mappings
= &stack_mappings
[0];
1146 SHARED_REGION_TRACE_ERROR(
1147 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1148 "too many mappings (%d)\n",
1149 current_thread(), p
->p_pid
, p
->p_comm
,
1150 vp
, vp
->v_name
, mappings_count
));
1155 user_mappings
= uap
->mappings
; /* the mappings, in user space */
1156 error
= copyin(user_mappings
,
1160 SHARED_REGION_TRACE_ERROR(
1161 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1162 "copyin(0x%llx, %d) failed (error=%d)\n",
1163 current_thread(), p
->p_pid
, p
->p_comm
,
1164 vp
, vp
->v_name
, (uint64_t)user_mappings
, mappings_count
, error
));
1168 /* get the process's shared region (setup in vm_map_exec()) */
1169 shared_region
= vm_shared_region_get(current_task());
1170 if (shared_region
== NULL
) {
1171 SHARED_REGION_TRACE_ERROR(
1172 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1173 "no shared region\n",
1174 current_thread(), p
->p_pid
, p
->p_comm
,
1179 /* map the file into that shared region's submap */
1180 kr
= vm_shared_region_map_file(shared_region
,
1185 (void *) p
->p_fd
->fd_rdir
);
1186 if (kr
!= KERN_SUCCESS
) {
1187 SHARED_REGION_TRACE_ERROR(
1188 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1189 "vm_shared_region_map_file() failed kr=0x%x\n",
1190 current_thread(), p
->p_pid
, p
->p_comm
,
1191 vp
, vp
->v_name
, kr
));
1193 case KERN_INVALID_ADDRESS
:
1196 case KERN_PROTECTION_FAILURE
:
1203 case KERN_INVALID_ARGUMENT
:
1213 /* update the vnode's access time */
1214 if (! (vnode_vfsvisflags(vp
) & MNT_NOATIME
)) {
1216 nanotime(&va
.va_access_time
);
1217 VATTR_SET_ACTIVE(&va
, va_access_time
);
1218 vnode_setattr(vp
, &va
, vfs_context_current());
1221 if (p
->p_flag
& P_NOSHLIB
) {
1222 /* signal that this process is now using split libraries */
1223 OSBitAndAtomic(~((uint32_t)P_NOSHLIB
), &p
->p_flag
);
1229 * release the vnode...
1230 * ubc_map() still holds it for us in the non-error case
1232 (void) vnode_put(vp
);
1236 /* release the file descriptor */
1237 fp_drop(p
, fd
, fp
, 0);
1241 if (shared_region
!= NULL
) {
1242 vm_shared_region_deallocate(shared_region
);
1245 SHARED_REGION_TRACE_DEBUG(
1246 ("shared_region: %p [%d(%s)] <- map\n",
1247 current_thread(), p
->p_pid
, p
->p_comm
));
1253 /* sysctl overflow room */
1255 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1256 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1257 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1258 extern unsigned int vm_page_free_target
;
1259 SYSCTL_INT(_vm
, OID_AUTO
, vm_page_free_target
, CTLFLAG_RD
,
1260 &vm_page_free_target
, 0, "Pageout daemon free target");
1262 extern unsigned int vm_memory_pressure
;
1263 SYSCTL_INT(_vm
, OID_AUTO
, memory_pressure
, CTLFLAG_RD
,
1264 &vm_memory_pressure
, 0, "Memory pressure indicator");
1267 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1269 #pragma unused(oidp, arg1, arg2)
1270 unsigned int page_free_wanted
;
1272 page_free_wanted
= mach_vm_ctl_page_free_wanted();
1273 return SYSCTL_OUT(req
, &page_free_wanted
, sizeof (page_free_wanted
));
1275 SYSCTL_PROC(_vm
, OID_AUTO
, page_free_wanted
,
1276 CTLTYPE_INT
| CTLFLAG_RD
| CTLFLAG_LOCKED
,
1277 0, 0, vm_ctl_page_free_wanted
, "I", "");
1279 extern unsigned int vm_page_purgeable_count
;
1280 SYSCTL_INT(_vm
, OID_AUTO
, page_purgeable_count
, CTLFLAG_RD
,
1281 &vm_page_purgeable_count
, 0, "Purgeable page count");
1283 extern unsigned int vm_page_purgeable_wired_count
;
1284 SYSCTL_INT(_vm
, OID_AUTO
, page_purgeable_wired_count
, CTLFLAG_RD
,
1285 &vm_page_purgeable_wired_count
, 0, "Wired purgeable page count");
1287 SYSCTL_INT(_vm
, OID_AUTO
, page_reusable_count
, CTLFLAG_RD
,
1288 &vm_page_stats_reusable
.reusable_count
, 0, "Reusable page count");
1289 SYSCTL_QUAD(_vm
, OID_AUTO
, reusable_success
, CTLFLAG_RD
,
1290 &vm_page_stats_reusable
.reusable_pages_success
, "");
1291 SYSCTL_QUAD(_vm
, OID_AUTO
, reusable_failure
, CTLFLAG_RD
,
1292 &vm_page_stats_reusable
.reusable_pages_failure
, "");
1293 SYSCTL_QUAD(_vm
, OID_AUTO
, reusable_shared
, CTLFLAG_RD
,
1294 &vm_page_stats_reusable
.reusable_pages_shared
, "");
1295 SYSCTL_QUAD(_vm
, OID_AUTO
, all_reusable_calls
, CTLFLAG_RD
,
1296 &vm_page_stats_reusable
.all_reusable_calls
, "");
1297 SYSCTL_QUAD(_vm
, OID_AUTO
, partial_reusable_calls
, CTLFLAG_RD
,
1298 &vm_page_stats_reusable
.partial_reusable_calls
, "");
1299 SYSCTL_QUAD(_vm
, OID_AUTO
, reuse_success
, CTLFLAG_RD
,
1300 &vm_page_stats_reusable
.reuse_pages_success
, "");
1301 SYSCTL_QUAD(_vm
, OID_AUTO
, reuse_failure
, CTLFLAG_RD
,
1302 &vm_page_stats_reusable
.reuse_pages_failure
, "");
1303 SYSCTL_QUAD(_vm
, OID_AUTO
, all_reuse_calls
, CTLFLAG_RD
,
1304 &vm_page_stats_reusable
.all_reuse_calls
, "");
1305 SYSCTL_QUAD(_vm
, OID_AUTO
, partial_reuse_calls
, CTLFLAG_RD
,
1306 &vm_page_stats_reusable
.partial_reuse_calls
, "");
1307 SYSCTL_QUAD(_vm
, OID_AUTO
, can_reuse_success
, CTLFLAG_RD
,
1308 &vm_page_stats_reusable
.can_reuse_success
, "");
1309 SYSCTL_QUAD(_vm
, OID_AUTO
, can_reuse_failure
, CTLFLAG_RD
,
1310 &vm_page_stats_reusable
.can_reuse_failure
, "");
1314 vm_pressure_monitor(
1315 __unused
struct proc
*p
,
1316 struct vm_pressure_monitor_args
*uap
,
1320 uint32_t pages_reclaimed
;
1321 uint32_t pages_wanted
;
1323 kr
= mach_vm_pressure_monitor(
1324 (boolean_t
) uap
->wait_for_pressure
,
1325 uap
->nsecs_monitored
,
1326 (uap
->pages_reclaimed
) ? &pages_reclaimed
: NULL
,
1338 if (uap
->pages_reclaimed
) {
1339 if (copyout((void *)&pages_reclaimed
,
1340 uap
->pages_reclaimed
,
1341 sizeof (pages_reclaimed
)) != 0) {
1346 *retval
= (int) pages_wanted
;