2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
41 #include <meta_features.h>
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <mach/mach_traps.h>
48 #include <mach/port.h>
49 #include <mach/task.h>
50 #include <mach/task_access.h>
51 #include <mach/task_special_ports.h>
52 #include <mach/time_value.h>
53 #include <mach/vm_map.h>
54 #include <mach/vm_param.h>
55 #include <mach/vm_prot.h>
57 #include <sys/file_internal.h>
58 #include <sys/param.h>
59 #include <sys/systm.h>
61 #include <sys/namei.h>
62 #include <sys/proc_internal.h>
63 #include <sys/kauth.h>
66 #include <sys/vnode_internal.h>
67 #include <sys/mount.h>
68 #include <sys/trace.h>
69 #include <sys/kernel.h>
70 #include <sys/ubc_internal.h>
72 #include <sys/syslog.h>
74 #include <sys/sysproto.h>
76 #include <sys/sysctl.h>
78 #include <security/audit/audit.h>
79 #include <bsm/audit_kevents.h>
81 #include <kern/kalloc.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_kern.h>
84 #include <vm/vm_pageout.h>
86 #include <machine/spl.h>
88 #include <mach/shared_region.h>
89 #include <vm/vm_shared_region.h>
91 #include <vm/vm_protos.h>
94 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
98 extern int allow_stack_exec
, allow_data_exec
;
100 SYSCTL_INT(_vm
, OID_AUTO
, allow_stack_exec
, CTLFLAG_RW
, &allow_stack_exec
, 0, "");
101 SYSCTL_INT(_vm
, OID_AUTO
, allow_data_exec
, CTLFLAG_RW
, &allow_data_exec
, 0, "");
102 #endif /* !SECURE_KERNEL */
104 static const char *prot_values
[] = {
116 log_stack_execution_failure(addr64_t vaddr
, vm_prot_t prot
)
118 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
119 current_proc()->p_comm
, current_proc()->p_pid
, vaddr
, prot_values
[prot
& VM_PROT_ALL
]);
122 int shared_region_unnest_logging
= 1;
124 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_unnest_logging
, CTLFLAG_RW
,
125 &shared_region_unnest_logging
, 0, "");
127 int vm_shared_region_unnest_log_interval
= 10;
128 int shared_region_unnest_log_count_threshold
= 5;
130 /* These log rate throttling state variables aren't thread safe, but
131 * are sufficient unto the task.
133 static int64_t last_unnest_log_time
= 0;
134 static int shared_region_unnest_log_count
= 0;
136 void log_unnest_badness(vm_map_t m
, vm_map_offset_t s
, vm_map_offset_t e
) {
138 const char *pcommstr
;
140 if (shared_region_unnest_logging
== 0)
143 if (shared_region_unnest_logging
== 1) {
145 if ((tv
.tv_sec
- last_unnest_log_time
) < vm_shared_region_unnest_log_interval
) {
146 if (shared_region_unnest_log_count
++ > shared_region_unnest_log_count_threshold
)
150 last_unnest_log_time
= tv
.tv_sec
;
151 shared_region_unnest_log_count
= 0;
155 pcommstr
= current_proc()->p_comm
;
157 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm
, get_task_map(current_proc()->task
), m
, (uint64_t)s
, (uint64_t)e
);
166 return (vm_map_check_protection(
168 vm_map_trunc_page(addr
), vm_map_round_page(addr
+len
),
169 prot
== B_READ
? VM_PROT_READ
: VM_PROT_WRITE
));
178 kret
= vm_map_wire(current_map(), vm_map_trunc_page(addr
),
179 vm_map_round_page(addr
+len
),
180 VM_PROT_READ
| VM_PROT_WRITE
,FALSE
);
185 case KERN_INVALID_ADDRESS
:
188 case KERN_PROTECTION_FAILURE
:
199 __unused
int dirtied
)
204 vm_map_offset_t vaddr
;
211 pmap
= get_task_pmap(current_task());
212 for (vaddr
= vm_map_trunc_page(addr
);
213 vaddr
< vm_map_round_page(addr
+len
);
214 vaddr
+= PAGE_SIZE
) {
215 paddr
= pmap_extract(pmap
, vaddr
);
216 pg
= PHYS_TO_VM_PAGE(paddr
);
217 vm_page_set_modified(pg
);
224 kret
= vm_map_unwire(current_map(), vm_map_trunc_page(addr
),
225 vm_map_round_page(addr
+len
), FALSE
);
229 case KERN_INVALID_ADDRESS
:
232 case KERN_PROTECTION_FAILURE
:
246 character
= (char)byte
;
247 return (copyout((void *)&(character
), addr
, sizeof(char)) == 0 ? 0 : -1);
257 character
= (char)byte
;
258 return (copyout((void *)&(character
), addr
, sizeof(char)) == 0 ? 0 : -1);
261 int fubyte(user_addr_t addr
)
265 if (copyin(addr
, (void *) &byte
, sizeof(char)))
270 int fuibyte(user_addr_t addr
)
274 if (copyin(addr
, (void *) &(byte
), sizeof(char)))
284 return (copyout((void *) &word
, addr
, sizeof(int)) == 0 ? 0 : -1);
287 long fuword(user_addr_t addr
)
291 if (copyin(addr
, (void *) &word
, sizeof(int)))
296 /* suiword and fuiword are the same as suword and fuword, respectively */
303 return (copyout((void *) &word
, addr
, sizeof(int)) == 0 ? 0 : -1);
306 long fuiword(user_addr_t addr
)
310 if (copyin(addr
, (void *) &word
, sizeof(int)))
316 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
317 * fetching and setting of process-sized size_t and pointer values.
320 sulong(user_addr_t addr
, int64_t word
)
323 if (IS_64BIT_PROCESS(current_proc())) {
324 return(copyout((void *)&word
, addr
, sizeof(word
)) == 0 ? 0 : -1);
326 return(suiword(addr
, (long)word
));
331 fulong(user_addr_t addr
)
335 if (IS_64BIT_PROCESS(current_proc())) {
336 if (copyin(addr
, (void *)&longword
, sizeof(longword
)) != 0)
340 return((int64_t)fuiword(addr
));
345 suulong(user_addr_t addr
, uint64_t uword
)
348 if (IS_64BIT_PROCESS(current_proc())) {
349 return(copyout((void *)&uword
, addr
, sizeof(uword
)) == 0 ? 0 : -1);
351 return(suiword(addr
, (uint32_t)uword
));
356 fuulong(user_addr_t addr
)
360 if (IS_64BIT_PROCESS(current_proc())) {
361 if (copyin(addr
, (void *)&ulongword
, sizeof(ulongword
)) != 0)
365 return((uint64_t)fuiword(addr
));
370 swapon(__unused proc_t procp
, __unused
struct swapon_args
*uap
, __unused
int *retval
)
378 * Find the BSD process ID for the Mach task associated with the given Mach port
381 * Parameters: args User argument descriptor (see below)
383 * Indirect parameters: args->t Mach port name
384 * args->pid Process ID (returned value; see below)
386 * Returns: KERL_SUCCESS Success
387 * KERN_FAILURE Not success
389 * Implicit returns: args->pid Process ID
394 struct pid_for_task_args
*args
)
396 mach_port_name_t t
= args
->t
;
397 user_addr_t pid_addr
= args
->pid
;
401 kern_return_t err
= KERN_SUCCESS
;
403 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK
);
404 AUDIT_ARG(mach_port1
, t
);
406 t1
= port_name_to_task(t
);
408 if (t1
== TASK_NULL
) {
412 p
= get_bsdtask_info(t1
);
423 (void) copyout((char *) &pid
, pid_addr
, sizeof(int));
424 AUDIT_MACH_SYSCALL_EXIT(err
);
430 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
431 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
434 static int tfp_policy
= KERN_TFP_POLICY_DEFAULT
;
437 * Routine: task_for_pid_posix_check
439 * Verify that the current process should be allowed to
440 * get the target process's task port. This is only
442 * - The current process is root
443 * OR all of the following are true:
444 * - The target process's real, effective, and saved uids
445 * are the same as the current proc's euid,
446 * - The target process's group set is a subset of the
447 * calling process's group set, and
448 * - The target process hasn't switched credentials.
450 * Returns: TRUE: permitted
454 task_for_pid_posix_check(proc_t target
)
456 kauth_cred_t targetcred
, mycred
;
460 /* No task_for_pid on bad targets */
461 if (target
== PROC_NULL
|| target
->p_stat
== SZOMB
) {
465 mycred
= kauth_cred_get();
466 myuid
= kauth_cred_getuid(mycred
);
468 /* If we're running as root, the check passes */
469 if (kauth_cred_issuser(mycred
))
472 /* We're allowed to get our own task port */
473 if (target
== current_proc())
477 * Under DENY, only root can get another proc's task port,
478 * so no more checks are needed.
480 if (tfp_policy
== KERN_TFP_POLICY_DENY
) {
484 targetcred
= kauth_cred_proc_ref(target
);
487 /* Do target's ruid, euid, and saved uid match my euid? */
488 if ((kauth_cred_getuid(targetcred
) != myuid
) ||
489 (targetcred
->cr_ruid
!= myuid
) ||
490 (targetcred
->cr_svuid
!= myuid
)) {
495 /* Are target's groups a subset of my groups? */
496 if (kauth_cred_gid_subset(targetcred
, mycred
, &allowed
) ||
502 /* Has target switched credentials? */
503 if (target
->p_flag
& P_SUGID
) {
509 kauth_cred_unref(&targetcred
);
514 * Routine: task_for_pid
516 * Get the task port for another "process", named by its
517 * process ID on the same host as "target_task".
519 * Only permitted to privileged processes, or processes
520 * with the same user ID.
522 * Note: if pid == 0, an error is return no matter who is calling.
524 * XXX This should be a BSD system call, not a Mach trap!!!
528 struct task_for_pid_args
*args
)
530 mach_port_name_t target_tport
= args
->target_tport
;
532 user_addr_t task_addr
= args
->t
;
533 proc_t p
= PROC_NULL
;
534 task_t t1
= TASK_NULL
;
535 mach_port_name_t tret
= MACH_PORT_NULL
;
540 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID
);
542 AUDIT_ARG(mach_port1
, target_tport
);
544 /* Always check if pid == 0 */
546 (void ) copyout((char *)&t1
, task_addr
, sizeof(mach_port_name_t
));
547 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE
);
548 return(KERN_FAILURE
);
551 t1
= port_name_to_task(target_tport
);
552 if (t1
== TASK_NULL
) {
553 (void) copyout((char *)&t1
, task_addr
, sizeof(mach_port_name_t
));
554 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE
);
555 return(KERN_FAILURE
);
562 AUDIT_ARG(process
, p
);
565 if (!(task_for_pid_posix_check(p
))) {
566 error
= KERN_FAILURE
;
570 if (p
->task
!= TASK_NULL
) {
571 /* If we aren't root and target's task access port is set... */
572 if (!kauth_cred_issuser(kauth_cred_get()) &&
573 p
!= current_proc() &&
574 (task_get_task_access_port(p
->task
, &tfpport
) == 0) &&
575 (tfpport
!= IPC_PORT_NULL
)) {
577 if (tfpport
== IPC_PORT_DEAD
) {
578 error
= KERN_PROTECTION_FAILURE
;
582 /* Call up to the task access server */
583 error
= check_task_access(tfpport
, proc_selfpid(), kauth_getgid(), pid
);
585 if (error
!= MACH_MSG_SUCCESS
) {
586 if (error
== MACH_RCV_INTERRUPTED
)
587 error
= KERN_ABORTED
;
589 error
= KERN_FAILURE
;
594 error
= mac_proc_check_get_task(kauth_cred_get(), p
);
596 error
= KERN_FAILURE
;
601 /* Grant task port access */
602 task_reference(p
->task
);
603 sright
= (void *) convert_task_to_port(p
->task
);
604 tret
= ipc_port_copyout_send(
606 get_task_ipcspace(current_task()));
608 error
= KERN_SUCCESS
;
612 AUDIT_ARG(mach_port2
, tret
);
613 (void) copyout((char *) &tret
, task_addr
, sizeof(mach_port_name_t
));
616 AUDIT_MACH_SYSCALL_EXIT(error
);
621 * Routine: task_name_for_pid
623 * Get the task name port for another "process", named by its
624 * process ID on the same host as "target_task".
626 * Only permitted to privileged processes, or processes
627 * with the same user ID.
629 * XXX This should be a BSD system call, not a Mach trap!!!
634 struct task_name_for_pid_args
*args
)
636 mach_port_name_t target_tport
= args
->target_tport
;
638 user_addr_t task_addr
= args
->t
;
639 proc_t p
= PROC_NULL
;
641 mach_port_name_t tret
;
643 int error
= 0, refheld
= 0;
644 kauth_cred_t target_cred
;
646 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID
);
648 AUDIT_ARG(mach_port1
, target_tport
);
650 t1
= port_name_to_task(target_tport
);
651 if (t1
== TASK_NULL
) {
652 (void) copyout((char *)&t1
, task_addr
, sizeof(mach_port_name_t
));
653 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE
);
654 return(KERN_FAILURE
);
658 if (p
!= PROC_NULL
) {
659 AUDIT_ARG(process
, p
);
660 target_cred
= kauth_cred_proc_ref(p
);
663 if ((p
->p_stat
!= SZOMB
)
664 && ((current_proc() == p
)
665 || kauth_cred_issuser(kauth_cred_get())
666 || ((kauth_cred_getuid(target_cred
) == kauth_cred_getuid(kauth_cred_get())) &&
667 ((target_cred
->cr_ruid
== kauth_cred_get()->cr_ruid
))))) {
669 if (p
->task
!= TASK_NULL
) {
670 task_reference(p
->task
);
672 error
= mac_proc_check_get_task_name(kauth_cred_get(), p
);
674 task_deallocate(p
->task
);
678 sright
= (void *)convert_task_name_to_port(p
->task
);
679 tret
= ipc_port_copyout_send(sright
,
680 get_task_ipcspace(current_task()));
682 tret
= MACH_PORT_NULL
;
684 AUDIT_ARG(mach_port2
, tret
);
685 (void) copyout((char *)&tret
, task_addr
, sizeof(mach_port_name_t
));
687 error
= KERN_SUCCESS
;
696 tret
= MACH_PORT_NULL
;
697 (void) copyout((char *) &tret
, task_addr
, sizeof(mach_port_name_t
));
698 error
= KERN_FAILURE
;
701 kauth_cred_unref(&target_cred
);
704 AUDIT_MACH_SYSCALL_EXIT(error
);
709 sysctl_settfp_policy(__unused
struct sysctl_oid
*oidp
, void *arg1
,
710 __unused
int arg2
, struct sysctl_req
*req
)
715 error
= SYSCTL_OUT(req
, arg1
, sizeof(int));
716 if (error
|| req
->newptr
== USER_ADDR_NULL
)
722 if ((error
= SYSCTL_IN(req
, &new_value
, sizeof(int)))) {
725 if ((new_value
== KERN_TFP_POLICY_DENY
)
726 || (new_value
== KERN_TFP_POLICY_DEFAULT
))
727 tfp_policy
= new_value
;
735 #if defined(SECURE_KERNEL)
736 static int kern_secure_kernel
= 1;
738 static int kern_secure_kernel
= 0;
741 SYSCTL_INT(_kern
, OID_AUTO
, secure_kernel
, CTLFLAG_RD
, &kern_secure_kernel
, 0, "");
743 SYSCTL_NODE(_kern
, KERN_TFP
, tfp
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "tfp");
744 SYSCTL_PROC(_kern_tfp
, KERN_TFP_POLICY
, policy
, CTLTYPE_INT
| CTLFLAG_RW
,
745 &tfp_policy
, sizeof(uint32_t), &sysctl_settfp_policy
,"I","policy");
747 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_trace_level
, CTLFLAG_RW
,
748 &shared_region_trace_level
, 0, "");
749 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_version
, CTLFLAG_RD
,
750 &shared_region_version
, 0, "");
751 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_persistence
, CTLFLAG_RW
,
752 &shared_region_persistence
, 0, "");
755 * shared_region_check_np:
757 * This system call is intended for dyld.
759 * dyld calls this when any process starts to see if the process's shared
760 * region is already set up and ready to use.
761 * This call returns the base address of the first mapping in the
762 * process's shared region's first mapping.
763 * dyld will then check what's mapped at that address.
765 * If the shared region is empty, dyld will then attempt to map the shared
766 * cache file in the shared region via the shared_region_map_np() system call.
768 * If something's already mapped in the shared region, dyld will check if it
769 * matches the shared cache it would like to use for that process.
770 * If it matches, evrything's ready and the process can proceed and use the
772 * If it doesn't match, dyld will unmap the shared region and map the shared
773 * cache into the process's address space via mmap().
776 * EINVAL no shared region
777 * ENOMEM shared region is empty
778 * EFAULT bad address for "start_address"
781 shared_region_check_np(
782 __unused
struct proc
*p
,
783 struct shared_region_check_np_args
*uap
,
784 __unused
int *retvalp
)
786 vm_shared_region_t shared_region
;
787 mach_vm_offset_t start_address
;
791 SHARED_REGION_TRACE_DEBUG(
792 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
793 current_thread(), p
->p_pid
, p
->p_comm
,
794 (uint64_t)uap
->start_address
));
796 /* retrieve the current tasks's shared region */
797 shared_region
= vm_shared_region_get(current_task());
798 if (shared_region
!= NULL
) {
799 /* retrieve address of its first mapping... */
800 kr
= vm_shared_region_start_address(shared_region
,
802 if (kr
!= KERN_SUCCESS
) {
805 /* ... and give it to the caller */
806 error
= copyout(&start_address
,
807 (user_addr_t
) uap
->start_address
,
808 sizeof (start_address
));
810 SHARED_REGION_TRACE_ERROR(
811 ("shared_region: %p [%d(%s)] "
813 "copyout(0x%llx) error %d\n",
814 current_thread(), p
->p_pid
, p
->p_comm
,
815 (uint64_t)uap
->start_address
, (uint64_t)start_address
,
819 vm_shared_region_deallocate(shared_region
);
821 /* no shared region ! */
825 SHARED_REGION_TRACE_DEBUG(
826 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
827 current_thread(), p
->p_pid
, p
->p_comm
,
828 (uint64_t)uap
->start_address
, (uint64_t)start_address
, error
));
834 * shared_region_map_np()
836 * This system call is intended for dyld.
838 * dyld uses this to map a shared cache file into a shared region.
839 * This is usually done only the first time a shared cache is needed.
840 * Subsequent processes will just use the populated shared region without
841 * requiring any further setup.
844 shared_region_map_np(
846 struct shared_region_map_np_args
*uap
,
847 __unused
int *retvalp
)
853 struct vnode
*vp
, *root_vp
;
854 struct vnode_attr va
;
856 memory_object_size_t file_size
;
857 user_addr_t user_mappings
;
858 struct shared_file_mapping_np
*mappings
;
859 #define SFM_MAX_STACK 8
860 struct shared_file_mapping_np stack_mappings
[SFM_MAX_STACK
];
861 unsigned int mappings_count
;
862 vm_size_t mappings_size
;
863 memory_object_control_t file_control
;
864 struct vm_shared_region
*shared_region
;
866 SHARED_REGION_TRACE_DEBUG(
867 ("shared_region: %p [%d(%s)] -> map\n",
868 current_thread(), p
->p_pid
, p
->p_comm
));
870 shared_region
= NULL
;
877 /* get file descriptor for shared region cache file */
880 /* get file structure from file descriptor */
881 error
= fp_lookup(p
, fd
, &fp
, 0);
883 SHARED_REGION_TRACE_ERROR(
884 ("shared_region: %p [%d(%s)] map: "
885 "fd=%d lookup failed (error=%d)\n",
886 current_thread(), p
->p_pid
, p
->p_comm
, fd
, error
));
890 /* make sure we're attempting to map a vnode */
891 if (fp
->f_fglob
->fg_type
!= DTYPE_VNODE
) {
892 SHARED_REGION_TRACE_ERROR(
893 ("shared_region: %p [%d(%s)] map: "
894 "fd=%d not a vnode (type=%d)\n",
895 current_thread(), p
->p_pid
, p
->p_comm
,
896 fd
, fp
->f_fglob
->fg_type
));
901 /* we need at least read permission on the file */
902 if (! (fp
->f_fglob
->fg_flag
& FREAD
)) {
903 SHARED_REGION_TRACE_ERROR(
904 ("shared_region: %p [%d(%s)] map: "
905 "fd=%d not readable\n",
906 current_thread(), p
->p_pid
, p
->p_comm
, fd
));
911 /* get vnode from file structure */
912 error
= vnode_getwithref((vnode_t
) fp
->f_fglob
->fg_data
);
914 SHARED_REGION_TRACE_ERROR(
915 ("shared_region: %p [%d(%s)] map: "
916 "fd=%d getwithref failed (error=%d)\n",
917 current_thread(), p
->p_pid
, p
->p_comm
, fd
, error
));
920 vp
= (struct vnode
*) fp
->f_fglob
->fg_data
;
922 /* make sure the vnode is a regular file */
923 if (vp
->v_type
!= VREG
) {
924 SHARED_REGION_TRACE_ERROR(
925 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
926 "not a file (type=%d)\n",
927 current_thread(), p
->p_pid
, p
->p_comm
,
928 vp
, vp
->v_name
, vp
->v_type
));
933 /* make sure vnode is on the process's root volume */
934 root_vp
= p
->p_fd
->fd_rdir
;
935 if (root_vp
== NULL
) {
938 if (vp
->v_mount
!= root_vp
->v_mount
) {
939 SHARED_REGION_TRACE_ERROR(
940 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
941 "not on process's root volume\n",
942 current_thread(), p
->p_pid
, p
->p_comm
,
948 /* make sure vnode is owned by "root" */
950 VATTR_WANTED(&va
, va_uid
);
951 error
= vnode_getattr(vp
, &va
, vfs_context_current());
953 SHARED_REGION_TRACE_ERROR(
954 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
955 "vnode_getattr(%p) failed (error=%d)\n",
956 current_thread(), p
->p_pid
, p
->p_comm
,
957 vp
, vp
->v_name
, vp
, error
));
960 if (va
.va_uid
!= 0) {
961 SHARED_REGION_TRACE_ERROR(
962 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
963 "owned by uid=%d instead of 0\n",
964 current_thread(), p
->p_pid
, p
->p_comm
,
965 vp
, vp
->v_name
, va
.va_uid
));
971 error
= vnode_size(vp
, &fs
, vfs_context_current());
973 SHARED_REGION_TRACE_ERROR(
974 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
975 "vnode_size(%p) failed (error=%d)\n",
976 current_thread(), p
->p_pid
, p
->p_comm
,
977 vp
, vp
->v_name
, vp
, error
));
982 /* get the file's memory object handle */
983 file_control
= ubc_getobject(vp
, UBC_HOLDOBJECT
);
984 if (file_control
== MEMORY_OBJECT_CONTROL_NULL
) {
985 SHARED_REGION_TRACE_ERROR(
986 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
987 "no memory object\n",
988 current_thread(), p
->p_pid
, p
->p_comm
,
994 /* get the list of mappings the caller wants us to establish */
995 mappings_count
= uap
->count
; /* number of mappings */
996 mappings_size
= (vm_size_t
) (mappings_count
* sizeof (mappings
[0]));
997 if (mappings_count
== 0) {
998 SHARED_REGION_TRACE_INFO(
999 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1001 current_thread(), p
->p_pid
, p
->p_comm
,
1003 error
= 0; /* no mappings: we're done ! */
1005 } else if (mappings_count
<= SFM_MAX_STACK
) {
1006 mappings
= &stack_mappings
[0];
1008 SHARED_REGION_TRACE_ERROR(
1009 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1010 "too many mappings (%d)\n",
1011 current_thread(), p
->p_pid
, p
->p_comm
,
1012 vp
, vp
->v_name
, mappings_count
));
1017 user_mappings
= uap
->mappings
; /* the mappings, in user space */
1018 error
= copyin(user_mappings
,
1022 SHARED_REGION_TRACE_ERROR(
1023 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1024 "copyin(0x%llx, %d) failed (error=%d)\n",
1025 current_thread(), p
->p_pid
, p
->p_comm
,
1026 vp
, vp
->v_name
, (uint64_t)user_mappings
, mappings_count
, error
));
1030 /* get the process's shared region (setup in vm_map_exec()) */
1031 shared_region
= vm_shared_region_get(current_task());
1032 if (shared_region
== NULL
) {
1033 SHARED_REGION_TRACE_ERROR(
1034 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1035 "no shared region\n",
1036 current_thread(), p
->p_pid
, p
->p_comm
,
1041 /* map the file into that shared region's submap */
1042 kr
= vm_shared_region_map_file(shared_region
,
1047 (void *) p
->p_fd
->fd_rdir
);
1048 if (kr
!= KERN_SUCCESS
) {
1049 SHARED_REGION_TRACE_ERROR(
1050 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1051 "vm_shared_region_map_file() failed kr=0x%x\n",
1052 current_thread(), p
->p_pid
, p
->p_comm
,
1053 vp
, vp
->v_name
, kr
));
1055 case KERN_INVALID_ADDRESS
:
1058 case KERN_PROTECTION_FAILURE
:
1065 case KERN_INVALID_ARGUMENT
:
1075 /* update the vnode's access time */
1076 if (! (vnode_vfsvisflags(vp
) & MNT_NOATIME
)) {
1078 nanotime(&va
.va_access_time
);
1079 VATTR_SET_ACTIVE(&va
, va_access_time
);
1080 vnode_setattr(vp
, &va
, vfs_context_current());
1083 if (p
->p_flag
& P_NOSHLIB
) {
1084 /* signal that this process is now using split libraries */
1085 OSBitAndAtomic(~((uint32_t)P_NOSHLIB
), &p
->p_flag
);
1091 * release the vnode...
1092 * ubc_map() still holds it for us in the non-error case
1094 (void) vnode_put(vp
);
1098 /* release the file descriptor */
1099 fp_drop(p
, fd
, fp
, 0);
1103 if (shared_region
!= NULL
) {
1104 vm_shared_region_deallocate(shared_region
);
1107 SHARED_REGION_TRACE_DEBUG(
1108 ("shared_region: %p [%d(%s)] <- map\n",
1109 current_thread(), p
->p_pid
, p
->p_comm
));
1115 /* sysctl overflow room */
1117 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1118 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1119 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1120 extern unsigned int vm_page_free_target
;
1121 SYSCTL_INT(_vm
, OID_AUTO
, vm_page_free_target
, CTLFLAG_RD
,
1122 &vm_page_free_target
, 0, "Pageout daemon free target");
1124 extern unsigned int vm_memory_pressure
;
1125 SYSCTL_INT(_vm
, OID_AUTO
, memory_pressure
, CTLFLAG_RD
,
1126 &vm_memory_pressure
, 0, "Memory pressure indicator");
1129 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1131 #pragma unused(oidp, arg1, arg2)
1132 unsigned int page_free_wanted
;
1134 page_free_wanted
= mach_vm_ctl_page_free_wanted();
1135 return SYSCTL_OUT(req
, &page_free_wanted
, sizeof (page_free_wanted
));
1137 SYSCTL_PROC(_vm
, OID_AUTO
, page_free_wanted
,
1138 CTLTYPE_INT
| CTLFLAG_RD
| CTLFLAG_LOCKED
,
1139 0, 0, vm_ctl_page_free_wanted
, "I", "");
1141 extern unsigned int vm_page_purgeable_count
;
1142 SYSCTL_INT(_vm
, OID_AUTO
, page_purgeable_count
, CTLFLAG_RD
,
1143 &vm_page_purgeable_count
, 0, "Purgeable page count");
1145 extern unsigned int vm_page_purgeable_wired_count
;
1146 SYSCTL_INT(_vm
, OID_AUTO
, page_purgeable_wired_count
, CTLFLAG_RD
,
1147 &vm_page_purgeable_wired_count
, 0, "Wired purgeable page count");
1149 SYSCTL_INT(_vm
, OID_AUTO
, page_reusable_count
, CTLFLAG_RD
,
1150 &vm_page_stats_reusable
.reusable_count
, 0, "Reusable page count");
1151 SYSCTL_QUAD(_vm
, OID_AUTO
, reusable_success
, CTLFLAG_RD
,
1152 &vm_page_stats_reusable
.reusable_pages_success
, "");
1153 SYSCTL_QUAD(_vm
, OID_AUTO
, reusable_failure
, CTLFLAG_RD
,
1154 &vm_page_stats_reusable
.reusable_pages_failure
, "");
1155 SYSCTL_QUAD(_vm
, OID_AUTO
, reusable_shared
, CTLFLAG_RD
,
1156 &vm_page_stats_reusable
.reusable_pages_shared
, "");
1157 SYSCTL_QUAD(_vm
, OID_AUTO
, all_reusable_calls
, CTLFLAG_RD
,
1158 &vm_page_stats_reusable
.all_reusable_calls
, "");
1159 SYSCTL_QUAD(_vm
, OID_AUTO
, partial_reusable_calls
, CTLFLAG_RD
,
1160 &vm_page_stats_reusable
.partial_reusable_calls
, "");
1161 SYSCTL_QUAD(_vm
, OID_AUTO
, reuse_success
, CTLFLAG_RD
,
1162 &vm_page_stats_reusable
.reuse_pages_success
, "");
1163 SYSCTL_QUAD(_vm
, OID_AUTO
, reuse_failure
, CTLFLAG_RD
,
1164 &vm_page_stats_reusable
.reuse_pages_failure
, "");
1165 SYSCTL_QUAD(_vm
, OID_AUTO
, all_reuse_calls
, CTLFLAG_RD
,
1166 &vm_page_stats_reusable
.all_reuse_calls
, "");
1167 SYSCTL_QUAD(_vm
, OID_AUTO
, partial_reuse_calls
, CTLFLAG_RD
,
1168 &vm_page_stats_reusable
.partial_reuse_calls
, "");
1169 SYSCTL_QUAD(_vm
, OID_AUTO
, can_reuse_success
, CTLFLAG_RD
,
1170 &vm_page_stats_reusable
.can_reuse_success
, "");
1171 SYSCTL_QUAD(_vm
, OID_AUTO
, can_reuse_failure
, CTLFLAG_RD
,
1172 &vm_page_stats_reusable
.can_reuse_failure
, "");
1176 vm_pressure_monitor(
1177 __unused
struct proc
*p
,
1178 struct vm_pressure_monitor_args
*uap
,
1182 uint32_t pages_reclaimed
;
1183 uint32_t pages_wanted
;
1185 kr
= mach_vm_pressure_monitor(
1186 (boolean_t
) uap
->wait_for_pressure
,
1187 uap
->nsecs_monitored
,
1188 (uap
->pages_reclaimed
) ? &pages_reclaimed
: NULL
,
1200 if (uap
->pages_reclaimed
) {
1201 if (copyout((void *)&pages_reclaimed
,
1202 uap
->pages_reclaimed
,
1203 sizeof (pages_reclaimed
)) != 0) {
1208 *retval
= (int) pages_wanted
;