2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
41 #include <meta_features.h>
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <kern/extmod_statistics.h>
48 #include <mach/mach_traps.h>
49 #include <mach/port.h>
50 #include <mach/task.h>
51 #include <mach/task_access.h>
52 #include <mach/task_special_ports.h>
53 #include <mach/time_value.h>
54 #include <mach/vm_map.h>
55 #include <mach/vm_param.h>
56 #include <mach/vm_prot.h>
58 #include <sys/file_internal.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
62 #include <sys/namei.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/mount.h>
69 #include <sys/trace.h>
70 #include <sys/kernel.h>
71 #include <sys/ubc_internal.h>
73 #include <sys/syslog.h>
75 #include <sys/sysproto.h>
77 #include <sys/sysctl.h>
78 #include <sys/cprotect.h>
79 #include <sys/kpi_socket.h>
81 #include <security/audit/audit.h>
82 #include <security/mac.h>
83 #include <bsm/audit_kevents.h>
85 #include <kern/kalloc.h>
86 #include <vm/vm_map.h>
87 #include <vm/vm_kern.h>
88 #include <vm/vm_pageout.h>
90 #include <machine/spl.h>
92 #include <mach/shared_region.h>
93 #include <vm/vm_shared_region.h>
95 #include <vm/vm_protos.h>
98 #include <sys/kern_memorystatus.h>
102 int _shared_region_map( struct proc
*, int, unsigned int, struct shared_file_mapping_np
*, memory_object_control_t
*, struct shared_file_mapping_np
*);
103 int _shared_region_slide(uint32_t, mach_vm_offset_t
, mach_vm_size_t
, mach_vm_offset_t
, mach_vm_size_t
, memory_object_control_t
);
104 int shared_region_copyin_mappings(struct proc
*, user_addr_t
, unsigned int, struct shared_file_mapping_np
*);
106 SYSCTL_INT(_vm
, OID_AUTO
, vm_debug_events
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &vm_debug_events
, 0, "");
110 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
113 #ifndef SECURE_KERNEL
114 extern int allow_stack_exec
, allow_data_exec
;
116 SYSCTL_INT(_vm
, OID_AUTO
, allow_stack_exec
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &allow_stack_exec
, 0, "");
117 SYSCTL_INT(_vm
, OID_AUTO
, allow_data_exec
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &allow_data_exec
, 0, "");
118 #endif /* !SECURE_KERNEL */
120 static const char *prot_values
[] = {
132 log_stack_execution_failure(addr64_t vaddr
, vm_prot_t prot
)
134 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
135 current_proc()->p_comm
, current_proc()->p_pid
, vaddr
, prot_values
[prot
& VM_PROT_ALL
]);
138 int shared_region_unnest_logging
= 1;
140 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_unnest_logging
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
141 &shared_region_unnest_logging
, 0, "");
143 int vm_shared_region_unnest_log_interval
= 10;
144 int shared_region_unnest_log_count_threshold
= 5;
146 /* These log rate throttling state variables aren't thread safe, but
147 * are sufficient unto the task.
149 static int64_t last_unnest_log_time
= 0;
150 static int shared_region_unnest_log_count
= 0;
152 void log_unnest_badness(vm_map_t m
, vm_map_offset_t s
, vm_map_offset_t e
) {
154 const char *pcommstr
;
156 if (shared_region_unnest_logging
== 0)
159 if (shared_region_unnest_logging
== 1) {
161 if ((tv
.tv_sec
- last_unnest_log_time
) < vm_shared_region_unnest_log_interval
) {
162 if (shared_region_unnest_log_count
++ > shared_region_unnest_log_count_threshold
)
166 last_unnest_log_time
= tv
.tv_sec
;
167 shared_region_unnest_log_count
= 0;
171 pcommstr
= current_proc()->p_comm
;
173 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm
, get_task_map(current_proc()->task
), m
, (uint64_t)s
, (uint64_t)e
);
182 return (vm_map_check_protection(
184 vm_map_trunc_page(addr
), vm_map_round_page(addr
+len
),
185 prot
== B_READ
? VM_PROT_READ
: VM_PROT_WRITE
));
194 kret
= vm_map_wire(current_map(), vm_map_trunc_page(addr
),
195 vm_map_round_page(addr
+len
),
196 VM_PROT_READ
| VM_PROT_WRITE
,FALSE
);
201 case KERN_INVALID_ADDRESS
:
204 case KERN_PROTECTION_FAILURE
:
215 __unused
int dirtied
)
220 vm_map_offset_t vaddr
;
227 pmap
= get_task_pmap(current_task());
228 for (vaddr
= vm_map_trunc_page(addr
);
229 vaddr
< vm_map_round_page(addr
+len
);
230 vaddr
+= PAGE_SIZE
) {
231 paddr
= pmap_extract(pmap
, vaddr
);
232 pg
= PHYS_TO_VM_PAGE(paddr
);
233 vm_page_set_modified(pg
);
240 kret
= vm_map_unwire(current_map(), vm_map_trunc_page(addr
),
241 vm_map_round_page(addr
+len
), FALSE
);
245 case KERN_INVALID_ADDRESS
:
248 case KERN_PROTECTION_FAILURE
:
262 character
= (char)byte
;
263 return (copyout((void *)&(character
), addr
, sizeof(char)) == 0 ? 0 : -1);
273 character
= (char)byte
;
274 return (copyout((void *)&(character
), addr
, sizeof(char)) == 0 ? 0 : -1);
277 int fubyte(user_addr_t addr
)
281 if (copyin(addr
, (void *) &byte
, sizeof(char)))
286 int fuibyte(user_addr_t addr
)
290 if (copyin(addr
, (void *) &(byte
), sizeof(char)))
300 return (copyout((void *) &word
, addr
, sizeof(int)) == 0 ? 0 : -1);
303 long fuword(user_addr_t addr
)
307 if (copyin(addr
, (void *) &word
, sizeof(int)))
312 /* suiword and fuiword are the same as suword and fuword, respectively */
319 return (copyout((void *) &word
, addr
, sizeof(int)) == 0 ? 0 : -1);
322 long fuiword(user_addr_t addr
)
326 if (copyin(addr
, (void *) &word
, sizeof(int)))
332 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
333 * fetching and setting of process-sized size_t and pointer values.
336 sulong(user_addr_t addr
, int64_t word
)
339 if (IS_64BIT_PROCESS(current_proc())) {
340 return(copyout((void *)&word
, addr
, sizeof(word
)) == 0 ? 0 : -1);
342 return(suiword(addr
, (long)word
));
347 fulong(user_addr_t addr
)
351 if (IS_64BIT_PROCESS(current_proc())) {
352 if (copyin(addr
, (void *)&longword
, sizeof(longword
)) != 0)
356 return((int64_t)fuiword(addr
));
361 suulong(user_addr_t addr
, uint64_t uword
)
364 if (IS_64BIT_PROCESS(current_proc())) {
365 return(copyout((void *)&uword
, addr
, sizeof(uword
)) == 0 ? 0 : -1);
367 return(suiword(addr
, (uint32_t)uword
));
372 fuulong(user_addr_t addr
)
376 if (IS_64BIT_PROCESS(current_proc())) {
377 if (copyin(addr
, (void *)&ulongword
, sizeof(ulongword
)) != 0)
381 return((uint64_t)fuiword(addr
));
386 swapon(__unused proc_t procp
, __unused
struct swapon_args
*uap
, __unused
int *retval
)
394 * Find the BSD process ID for the Mach task associated with the given Mach port
397 * Parameters: args User argument descriptor (see below)
399 * Indirect parameters: args->t Mach port name
400 * args->pid Process ID (returned value; see below)
402 * Returns: KERL_SUCCESS Success
403 * KERN_FAILURE Not success
405 * Implicit returns: args->pid Process ID
410 struct pid_for_task_args
*args
)
412 mach_port_name_t t
= args
->t
;
413 user_addr_t pid_addr
= args
->pid
;
417 kern_return_t err
= KERN_SUCCESS
;
419 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK
);
420 AUDIT_ARG(mach_port1
, t
);
422 t1
= port_name_to_task(t
);
424 if (t1
== TASK_NULL
) {
428 p
= get_bsdtask_info(t1
);
439 (void) copyout((char *) &pid
, pid_addr
, sizeof(int));
440 AUDIT_MACH_SYSCALL_EXIT(err
);
446 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
447 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
450 static int tfp_policy
= KERN_TFP_POLICY_DEFAULT
;
453 * Routine: task_for_pid_posix_check
455 * Verify that the current process should be allowed to
456 * get the target process's task port. This is only
458 * - The current process is root
459 * OR all of the following are true:
460 * - The target process's real, effective, and saved uids
461 * are the same as the current proc's euid,
462 * - The target process's group set is a subset of the
463 * calling process's group set, and
464 * - The target process hasn't switched credentials.
466 * Returns: TRUE: permitted
470 task_for_pid_posix_check(proc_t target
)
472 kauth_cred_t targetcred
, mycred
;
476 /* No task_for_pid on bad targets */
477 if (target
== PROC_NULL
|| target
->p_stat
== SZOMB
) {
481 mycred
= kauth_cred_get();
482 myuid
= kauth_cred_getuid(mycred
);
484 /* If we're running as root, the check passes */
485 if (kauth_cred_issuser(mycred
))
488 /* We're allowed to get our own task port */
489 if (target
== current_proc())
493 * Under DENY, only root can get another proc's task port,
494 * so no more checks are needed.
496 if (tfp_policy
== KERN_TFP_POLICY_DENY
) {
500 targetcred
= kauth_cred_proc_ref(target
);
503 /* Do target's ruid, euid, and saved uid match my euid? */
504 if ((kauth_cred_getuid(targetcred
) != myuid
) ||
505 (kauth_cred_getruid(targetcred
) != myuid
) ||
506 (kauth_cred_getsvuid(targetcred
) != myuid
)) {
511 /* Are target's groups a subset of my groups? */
512 if (kauth_cred_gid_subset(targetcred
, mycred
, &allowed
) ||
518 /* Has target switched credentials? */
519 if (target
->p_flag
& P_SUGID
) {
525 kauth_cred_unref(&targetcred
);
530 * Routine: task_for_pid
532 * Get the task port for another "process", named by its
533 * process ID on the same host as "target_task".
535 * Only permitted to privileged processes, or processes
536 * with the same user ID.
538 * Note: if pid == 0, an error is return no matter who is calling.
540 * XXX This should be a BSD system call, not a Mach trap!!!
544 struct task_for_pid_args
*args
)
546 mach_port_name_t target_tport
= args
->target_tport
;
548 user_addr_t task_addr
= args
->t
;
549 proc_t p
= PROC_NULL
;
550 task_t t1
= TASK_NULL
;
551 mach_port_name_t tret
= MACH_PORT_NULL
;
556 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID
);
558 AUDIT_ARG(mach_port1
, target_tport
);
560 /* Always check if pid == 0 */
562 (void ) copyout((char *)&t1
, task_addr
, sizeof(mach_port_name_t
));
563 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE
);
564 return(KERN_FAILURE
);
567 t1
= port_name_to_task(target_tport
);
568 if (t1
== TASK_NULL
) {
569 (void) copyout((char *)&t1
, task_addr
, sizeof(mach_port_name_t
));
570 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE
);
571 return(KERN_FAILURE
);
578 AUDIT_ARG(process
, p
);
581 if (!(task_for_pid_posix_check(p
))) {
582 error
= KERN_FAILURE
;
586 if (p
->task
!= TASK_NULL
) {
587 /* If we aren't root and target's task access port is set... */
588 if (!kauth_cred_issuser(kauth_cred_get()) &&
589 p
!= current_proc() &&
590 (task_get_task_access_port(p
->task
, &tfpport
) == 0) &&
591 (tfpport
!= IPC_PORT_NULL
)) {
593 if (tfpport
== IPC_PORT_DEAD
) {
594 error
= KERN_PROTECTION_FAILURE
;
598 /* Call up to the task access server */
599 error
= check_task_access(tfpport
, proc_selfpid(), kauth_getgid(), pid
);
601 if (error
!= MACH_MSG_SUCCESS
) {
602 if (error
== MACH_RCV_INTERRUPTED
)
603 error
= KERN_ABORTED
;
605 error
= KERN_FAILURE
;
610 error
= mac_proc_check_get_task(kauth_cred_get(), p
);
612 error
= KERN_FAILURE
;
617 /* Grant task port access */
618 task_reference(p
->task
);
619 extmod_statistics_incr_task_for_pid(p
->task
);
621 sright
= (void *) convert_task_to_port(p
->task
);
622 tret
= ipc_port_copyout_send(
624 get_task_ipcspace(current_task()));
626 error
= KERN_SUCCESS
;
630 AUDIT_ARG(mach_port2
, tret
);
631 (void) copyout((char *) &tret
, task_addr
, sizeof(mach_port_name_t
));
634 AUDIT_MACH_SYSCALL_EXIT(error
);
639 * Routine: task_name_for_pid
641 * Get the task name port for another "process", named by its
642 * process ID on the same host as "target_task".
644 * Only permitted to privileged processes, or processes
645 * with the same user ID.
647 * XXX This should be a BSD system call, not a Mach trap!!!
652 struct task_name_for_pid_args
*args
)
654 mach_port_name_t target_tport
= args
->target_tport
;
656 user_addr_t task_addr
= args
->t
;
657 proc_t p
= PROC_NULL
;
659 mach_port_name_t tret
;
661 int error
= 0, refheld
= 0;
662 kauth_cred_t target_cred
;
664 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID
);
666 AUDIT_ARG(mach_port1
, target_tport
);
668 t1
= port_name_to_task(target_tport
);
669 if (t1
== TASK_NULL
) {
670 (void) copyout((char *)&t1
, task_addr
, sizeof(mach_port_name_t
));
671 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE
);
672 return(KERN_FAILURE
);
676 if (p
!= PROC_NULL
) {
677 AUDIT_ARG(process
, p
);
678 target_cred
= kauth_cred_proc_ref(p
);
681 if ((p
->p_stat
!= SZOMB
)
682 && ((current_proc() == p
)
683 || kauth_cred_issuser(kauth_cred_get())
684 || ((kauth_cred_getuid(target_cred
) == kauth_cred_getuid(kauth_cred_get())) &&
685 ((kauth_cred_getruid(target_cred
) == kauth_getruid()))))) {
687 if (p
->task
!= TASK_NULL
) {
688 task_reference(p
->task
);
690 error
= mac_proc_check_get_task_name(kauth_cred_get(), p
);
692 task_deallocate(p
->task
);
696 sright
= (void *)convert_task_name_to_port(p
->task
);
697 tret
= ipc_port_copyout_send(sright
,
698 get_task_ipcspace(current_task()));
700 tret
= MACH_PORT_NULL
;
702 AUDIT_ARG(mach_port2
, tret
);
703 (void) copyout((char *)&tret
, task_addr
, sizeof(mach_port_name_t
));
705 error
= KERN_SUCCESS
;
714 tret
= MACH_PORT_NULL
;
715 (void) copyout((char *) &tret
, task_addr
, sizeof(mach_port_name_t
));
716 error
= KERN_FAILURE
;
719 kauth_cred_unref(&target_cred
);
722 AUDIT_MACH_SYSCALL_EXIT(error
);
727 pid_suspend(struct proc
*p __unused
, struct pid_suspend_args
*args
, int *ret
)
729 task_t target
= NULL
;
730 proc_t targetproc
= PROC_NULL
;
735 error
= mac_proc_check_suspend_resume(p
, MAC_PROC_CHECK_SUSPEND
);
747 targetproc
= proc_find(pid
);
748 if (!task_for_pid_posix_check(targetproc
)) {
753 target
= targetproc
->task
;
754 #ifndef CONFIG_EMBEDDED
755 if (target
!= TASK_NULL
) {
758 /* If we aren't root and target's task access port is set... */
759 if (!kauth_cred_issuser(kauth_cred_get()) &&
760 targetproc
!= current_proc() &&
761 (task_get_task_access_port(target
, &tfpport
) == 0) &&
762 (tfpport
!= IPC_PORT_NULL
)) {
764 if (tfpport
== IPC_PORT_DEAD
) {
769 /* Call up to the task access server */
770 error
= check_task_access(tfpport
, proc_selfpid(), kauth_getgid(), pid
);
772 if (error
!= MACH_MSG_SUCCESS
) {
773 if (error
== MACH_RCV_INTERRUPTED
)
783 task_reference(target
);
784 error
= task_suspend(target
);
786 if (error
== KERN_INVALID_ARGUMENT
) {
792 task_deallocate(target
);
795 kern_hibernation_on_pid_suspend(pid
);
799 if (targetproc
!= PROC_NULL
)
800 proc_rele(targetproc
);
806 pid_resume(struct proc
*p __unused
, struct pid_resume_args
*args
, int *ret
)
808 task_t target
= NULL
;
809 proc_t targetproc
= PROC_NULL
;
814 error
= mac_proc_check_suspend_resume(p
, MAC_PROC_CHECK_RESUME
);
826 targetproc
= proc_find(pid
);
827 if (!task_for_pid_posix_check(targetproc
)) {
832 target
= targetproc
->task
;
833 #ifndef CONFIG_EMBEDDED
834 if (target
!= TASK_NULL
) {
837 /* If we aren't root and target's task access port is set... */
838 if (!kauth_cred_issuser(kauth_cred_get()) &&
839 targetproc
!= current_proc() &&
840 (task_get_task_access_port(target
, &tfpport
) == 0) &&
841 (tfpport
!= IPC_PORT_NULL
)) {
843 if (tfpport
== IPC_PORT_DEAD
) {
848 /* Call up to the task access server */
849 error
= check_task_access(tfpport
, proc_selfpid(), kauth_getgid(), pid
);
851 if (error
!= MACH_MSG_SUCCESS
) {
852 if (error
== MACH_RCV_INTERRUPTED
)
862 task_reference(target
);
865 kern_hibernation_on_pid_resume(pid
, target
);
868 error
= task_resume(target
);
870 if (error
== KERN_INVALID_ARGUMENT
) {
876 task_deallocate(target
);
879 if (targetproc
!= PROC_NULL
)
880 proc_rele(targetproc
);
889 pid_hibernate(struct proc
*p __unused
, struct pid_hibernate_args
*args
, int *ret
)
892 proc_t targetproc
= PROC_NULL
;
895 #ifndef CONFIG_FREEZE
900 error
= mac_proc_check_suspend_resume(p
, MAC_PROC_CHECK_HIBERNATE
);
908 * The only accepted pid value here is currently -1, since we just kick off the hibernation thread
909 * here - individual ids aren't required. However, it's intended that that this call is to change
910 * in the future to initiate hibernation of individual processes. In anticipation, we'll obtain the
911 * process handle for potentially valid values and call task_for_pid_posix_check(); this way, everything
912 * is validated correctly and set for further refactoring. See <rdar://problem/7839708> for more details.
915 targetproc
= proc_find(pid
);
916 if (!task_for_pid_posix_check(targetproc
)) {
923 kern_hibernation_on_pid_hibernate(pid
);
930 #endif /* CONFIG_FREEZE */
932 if (targetproc
!= PROC_NULL
)
933 proc_rele(targetproc
);
939 pid_shutdown_sockets(struct proc
*p __unused
, struct pid_shutdown_sockets_args
*args
, int *ret
)
942 proc_t targetproc
= PROC_NULL
;
943 struct filedesc
*fdp
;
946 int level
= args
->level
;
949 if (level
!= SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC
&&
950 level
!= SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
)
957 error
= mac_proc_check_suspend_resume(p
, MAC_PROC_CHECK_SHUTDOWN_SOCKETS
);
964 targetproc
= proc_find(pid
);
965 if (!task_for_pid_posix_check(targetproc
)) {
970 proc_fdlock(targetproc
);
971 fdp
= targetproc
->p_fd
;
973 for (i
= 0; i
< fdp
->fd_nfiles
; i
++) {
974 struct socket
*sockp
;
976 fp
= fdp
->fd_ofiles
[i
];
977 if (fp
== NULL
|| (fdp
->fd_ofileflags
[i
] & UF_RESERVED
) != 0 ||
978 fp
->f_fglob
->fg_type
!= DTYPE_SOCKET
)
983 sockp
= (struct socket
*)fp
->f_fglob
->fg_data
;
985 /* Call networking stack with socket and level */
986 (void) socket_defunct(targetproc
, sockp
, level
);
989 proc_fdunlock(targetproc
);
992 if (targetproc
!= PROC_NULL
)
993 proc_rele(targetproc
);
997 #endif /* CONFIG_EMBEDDED */
1000 sysctl_settfp_policy(__unused
struct sysctl_oid
*oidp
, void *arg1
,
1001 __unused
int arg2
, struct sysctl_req
*req
)
1006 error
= SYSCTL_OUT(req
, arg1
, sizeof(int));
1007 if (error
|| req
->newptr
== USER_ADDR_NULL
)
1013 if ((error
= SYSCTL_IN(req
, &new_value
, sizeof(int)))) {
1016 if ((new_value
== KERN_TFP_POLICY_DENY
)
1017 || (new_value
== KERN_TFP_POLICY_DEFAULT
))
1018 tfp_policy
= new_value
;
1026 #if defined(SECURE_KERNEL)
1027 static int kern_secure_kernel
= 1;
1029 static int kern_secure_kernel
= 0;
1032 SYSCTL_INT(_kern
, OID_AUTO
, secure_kernel
, CTLFLAG_RD
| CTLFLAG_LOCKED
, &kern_secure_kernel
, 0, "");
1034 SYSCTL_NODE(_kern
, KERN_TFP
, tfp
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "tfp");
1035 SYSCTL_PROC(_kern_tfp
, KERN_TFP_POLICY
, policy
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
1036 &tfp_policy
, sizeof(uint32_t), &sysctl_settfp_policy
,"I","policy");
1038 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_trace_level
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
1039 &shared_region_trace_level
, 0, "");
1040 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_version
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1041 &shared_region_version
, 0, "");
1042 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_persistence
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
1043 &shared_region_persistence
, 0, "");
1046 * shared_region_check_np:
1048 * This system call is intended for dyld.
1050 * dyld calls this when any process starts to see if the process's shared
1051 * region is already set up and ready to use.
1052 * This call returns the base address of the first mapping in the
1053 * process's shared region's first mapping.
1054 * dyld will then check what's mapped at that address.
1056 * If the shared region is empty, dyld will then attempt to map the shared
1057 * cache file in the shared region via the shared_region_map_np() system call.
1059 * If something's already mapped in the shared region, dyld will check if it
1060 * matches the shared cache it would like to use for that process.
1061 * If it matches, evrything's ready and the process can proceed and use the
1063 * If it doesn't match, dyld will unmap the shared region and map the shared
1064 * cache into the process's address space via mmap().
1067 * EINVAL no shared region
1068 * ENOMEM shared region is empty
1069 * EFAULT bad address for "start_address"
1072 shared_region_check_np(
1073 __unused
struct proc
*p
,
1074 struct shared_region_check_np_args
*uap
,
1075 __unused
int *retvalp
)
1077 vm_shared_region_t shared_region
;
1078 mach_vm_offset_t start_address
;
1082 SHARED_REGION_TRACE_DEBUG(
1083 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1084 current_thread(), p
->p_pid
, p
->p_comm
,
1085 (uint64_t)uap
->start_address
));
1087 /* retrieve the current tasks's shared region */
1088 shared_region
= vm_shared_region_get(current_task());
1089 if (shared_region
!= NULL
) {
1090 /* retrieve address of its first mapping... */
1091 kr
= vm_shared_region_start_address(shared_region
,
1093 if (kr
!= KERN_SUCCESS
) {
1096 /* ... and give it to the caller */
1097 error
= copyout(&start_address
,
1098 (user_addr_t
) uap
->start_address
,
1099 sizeof (start_address
));
1101 SHARED_REGION_TRACE_ERROR(
1102 ("shared_region: %p [%d(%s)] "
1104 "copyout(0x%llx) error %d\n",
1105 current_thread(), p
->p_pid
, p
->p_comm
,
1106 (uint64_t)uap
->start_address
, (uint64_t)start_address
,
1110 vm_shared_region_deallocate(shared_region
);
1112 /* no shared region ! */
1116 SHARED_REGION_TRACE_DEBUG(
1117 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1118 current_thread(), p
->p_pid
, p
->p_comm
,
1119 (uint64_t)uap
->start_address
, (uint64_t)start_address
, error
));
1126 shared_region_copyin_mappings(
1128 user_addr_t user_mappings
,
1129 unsigned int mappings_count
,
1130 struct shared_file_mapping_np
*mappings
)
1133 vm_size_t mappings_size
= 0;
1135 /* get the list of mappings the caller wants us to establish */
1136 mappings_size
= (vm_size_t
) (mappings_count
* sizeof (mappings
[0]));
1137 error
= copyin(user_mappings
,
1141 SHARED_REGION_TRACE_ERROR(
1142 ("shared_region: %p [%d(%s)] map(): "
1143 "copyin(0x%llx, %d) failed (error=%d)\n",
1144 current_thread(), p
->p_pid
, p
->p_comm
,
1145 (uint64_t)user_mappings
, mappings_count
, error
));
1150 * shared_region_map_np()
1152 * This system call is intended for dyld.
1154 * dyld uses this to map a shared cache file into a shared region.
1155 * This is usually done only the first time a shared cache is needed.
1156 * Subsequent processes will just use the populated shared region without
1157 * requiring any further setup.
1163 uint32_t mappings_count
,
1164 struct shared_file_mapping_np
*mappings
,
1165 memory_object_control_t
*sr_file_control
,
1166 struct shared_file_mapping_np
*mapping_to_slide
)
1170 struct fileproc
*fp
;
1171 struct vnode
*vp
, *root_vp
;
1172 struct vnode_attr va
;
1174 memory_object_size_t file_size
;
1175 vm_prot_t maxprot
= VM_PROT_ALL
;
1176 memory_object_control_t file_control
;
1177 struct vm_shared_region
*shared_region
;
1179 SHARED_REGION_TRACE_DEBUG(
1180 ("shared_region: %p [%d(%s)] -> map\n",
1181 current_thread(), p
->p_pid
, p
->p_comm
));
1183 shared_region
= NULL
;
1187 /* get file structure from file descriptor */
1188 error
= fp_lookup(p
, fd
, &fp
, 0);
1190 SHARED_REGION_TRACE_ERROR(
1191 ("shared_region: %p [%d(%s)] map: "
1192 "fd=%d lookup failed (error=%d)\n",
1193 current_thread(), p
->p_pid
, p
->p_comm
, fd
, error
));
1197 /* make sure we're attempting to map a vnode */
1198 if (fp
->f_fglob
->fg_type
!= DTYPE_VNODE
) {
1199 SHARED_REGION_TRACE_ERROR(
1200 ("shared_region: %p [%d(%s)] map: "
1201 "fd=%d not a vnode (type=%d)\n",
1202 current_thread(), p
->p_pid
, p
->p_comm
,
1203 fd
, fp
->f_fglob
->fg_type
));
1208 /* we need at least read permission on the file */
1209 if (! (fp
->f_fglob
->fg_flag
& FREAD
)) {
1210 SHARED_REGION_TRACE_ERROR(
1211 ("shared_region: %p [%d(%s)] map: "
1212 "fd=%d not readable\n",
1213 current_thread(), p
->p_pid
, p
->p_comm
, fd
));
1218 /* get vnode from file structure */
1219 error
= vnode_getwithref((vnode_t
) fp
->f_fglob
->fg_data
);
1221 SHARED_REGION_TRACE_ERROR(
1222 ("shared_region: %p [%d(%s)] map: "
1223 "fd=%d getwithref failed (error=%d)\n",
1224 current_thread(), p
->p_pid
, p
->p_comm
, fd
, error
));
1227 vp
= (struct vnode
*) fp
->f_fglob
->fg_data
;
1229 /* make sure the vnode is a regular file */
1230 if (vp
->v_type
!= VREG
) {
1231 SHARED_REGION_TRACE_ERROR(
1232 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1233 "not a file (type=%d)\n",
1234 current_thread(), p
->p_pid
, p
->p_comm
,
1235 vp
, vp
->v_name
, vp
->v_type
));
1241 error
= mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1242 fp
->f_fglob
, VM_PROT_ALL
, MAP_FILE
, &maxprot
);
1249 /* check for content protection access */
1252 if ((cnode
= cp_get_protected_cnode(vp
)) != NULL
) {
1253 error
= cp_handle_vnop(cnode
, CP_READ_ACCESS
| CP_WRITE_ACCESS
);
1258 #endif /* CONFIG_PROTECT */
1260 /* make sure vnode is on the process's root volume */
1261 root_vp
= p
->p_fd
->fd_rdir
;
1262 if (root_vp
== NULL
) {
1263 root_vp
= rootvnode
;
1266 * Chroot-ed processes can't use the shared_region.
1272 if (vp
->v_mount
!= root_vp
->v_mount
) {
1273 SHARED_REGION_TRACE_ERROR(
1274 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1275 "not on process's root volume\n",
1276 current_thread(), p
->p_pid
, p
->p_comm
,
1282 /* make sure vnode is owned by "root" */
1284 VATTR_WANTED(&va
, va_uid
);
1285 error
= vnode_getattr(vp
, &va
, vfs_context_current());
1287 SHARED_REGION_TRACE_ERROR(
1288 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1289 "vnode_getattr(%p) failed (error=%d)\n",
1290 current_thread(), p
->p_pid
, p
->p_comm
,
1291 vp
, vp
->v_name
, vp
, error
));
1294 if (va
.va_uid
!= 0) {
1295 SHARED_REGION_TRACE_ERROR(
1296 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1297 "owned by uid=%d instead of 0\n",
1298 current_thread(), p
->p_pid
, p
->p_comm
,
1299 vp
, vp
->v_name
, va
.va_uid
));
1304 /* get vnode size */
1305 error
= vnode_size(vp
, &fs
, vfs_context_current());
1307 SHARED_REGION_TRACE_ERROR(
1308 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1309 "vnode_size(%p) failed (error=%d)\n",
1310 current_thread(), p
->p_pid
, p
->p_comm
,
1311 vp
, vp
->v_name
, vp
, error
));
1316 /* get the file's memory object handle */
1317 file_control
= ubc_getobject(vp
, UBC_HOLDOBJECT
);
1318 if (file_control
== MEMORY_OBJECT_CONTROL_NULL
) {
1319 SHARED_REGION_TRACE_ERROR(
1320 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1321 "no memory object\n",
1322 current_thread(), p
->p_pid
, p
->p_comm
,
1328 if (sr_file_control
!= NULL
) {
1329 *sr_file_control
= file_control
;
1334 /* get the process's shared region (setup in vm_map_exec()) */
1335 shared_region
= vm_shared_region_get(current_task());
1336 if (shared_region
== NULL
) {
1337 SHARED_REGION_TRACE_ERROR(
1338 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1339 "no shared region\n",
1340 current_thread(), p
->p_pid
, p
->p_comm
,
1345 /* map the file into that shared region's submap */
1346 kr
= vm_shared_region_map_file(shared_region
,
1351 (void *) p
->p_fd
->fd_rdir
,
1353 if (kr
!= KERN_SUCCESS
) {
1354 SHARED_REGION_TRACE_ERROR(
1355 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
1356 "vm_shared_region_map_file() failed kr=0x%x\n",
1357 current_thread(), p
->p_pid
, p
->p_comm
,
1358 vp
, vp
->v_name
, kr
));
1360 case KERN_INVALID_ADDRESS
:
1363 case KERN_PROTECTION_FAILURE
:
1370 case KERN_INVALID_ARGUMENT
:
1380 vnode_lock_spin(vp
);
1382 vp
->v_flag
|= VSHARED_DYLD
;
1386 /* update the vnode's access time */
1387 if (! (vnode_vfsvisflags(vp
) & MNT_NOATIME
)) {
1389 nanotime(&va
.va_access_time
);
1390 VATTR_SET_ACTIVE(&va
, va_access_time
);
1391 vnode_setattr(vp
, &va
, vfs_context_current());
1394 if (p
->p_flag
& P_NOSHLIB
) {
1395 /* signal that this process is now using split libraries */
1396 OSBitAndAtomic(~((uint32_t)P_NOSHLIB
), &p
->p_flag
);
1402 * release the vnode...
1403 * ubc_map() still holds it for us in the non-error case
1405 (void) vnode_put(vp
);
1409 /* release the file descriptor */
1410 fp_drop(p
, fd
, fp
, 0);
1414 if (shared_region
!= NULL
) {
1415 vm_shared_region_deallocate(shared_region
);
1418 SHARED_REGION_TRACE_DEBUG(
1419 ("shared_region: %p [%d(%s)] <- map\n",
1420 current_thread(), p
->p_pid
, p
->p_comm
));
1426 _shared_region_slide(uint32_t slide
,
1427 mach_vm_offset_t entry_start_address
,
1428 mach_vm_size_t entry_size
,
1429 mach_vm_offset_t slide_start
,
1430 mach_vm_size_t slide_size
,
1431 memory_object_control_t sr_file_control
)
1433 void *slide_info_entry
= NULL
;
1436 if((error
= vm_shared_region_slide_init(slide_size
, entry_start_address
, entry_size
, slide
, sr_file_control
))) {
1437 printf("slide_info initialization failed with kr=%d\n", error
);
1441 slide_info_entry
= vm_shared_region_get_slide_info_entry();
1442 if (slide_info_entry
== NULL
){
1445 error
= copyin(slide_start
,
1447 (vm_size_t
)slide_size
);
1453 if (vm_shared_region_slide_sanity_check() != KERN_SUCCESS
) {
1455 printf("Sanity Check failed for slide_info\n");
1458 printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n",
1459 (void*)(uintptr_t)entry_start_address
,
1460 (unsigned long)entry_size
,
1461 (unsigned long)slide_size
);
1469 shared_region_map_and_slide_np(
1471 struct shared_region_map_and_slide_np_args
*uap
,
1472 __unused
int *retvalp
)
1474 struct shared_file_mapping_np mapping_to_slide
;
1475 struct shared_file_mapping_np
*mappings
;
1476 unsigned int mappings_count
= uap
->count
;
1478 memory_object_control_t sr_file_control
;
1479 kern_return_t kr
= KERN_SUCCESS
;
1480 uint32_t slide
= uap
->slide
;
1482 #define SFM_MAX_STACK 8
1483 struct shared_file_mapping_np stack_mappings
[SFM_MAX_STACK
];
1485 if ((kr
= vm_shared_region_sliding_valid(slide
)) != KERN_SUCCESS
) {
1486 if (kr
== KERN_INVALID_ARGUMENT
) {
1488 * This will happen if we request sliding again
1489 * with the same slide value that was used earlier
1490 * for the very first sliding. We continue through
1491 * to the mapping layer. This is so that we can be
1492 * absolutely certain that the same mappings have
1501 if (mappings_count
== 0) {
1502 SHARED_REGION_TRACE_INFO(
1503 ("shared_region: %p [%d(%s)] map(): "
1505 current_thread(), p
->p_pid
, p
->p_comm
));
1506 kr
= 0; /* no mappings: we're done ! */
1508 } else if (mappings_count
<= SFM_MAX_STACK
) {
1509 mappings
= &stack_mappings
[0];
1511 SHARED_REGION_TRACE_ERROR(
1512 ("shared_region: %p [%d(%s)] map(): "
1513 "too many mappings (%d)\n",
1514 current_thread(), p
->p_pid
, p
->p_comm
,
1520 if ( (kr
= shared_region_copyin_mappings(p
, uap
->mappings
, uap
->count
, mappings
))) {
1525 kr
= _shared_region_map(p
, uap
->fd
, mappings_count
, mappings
, &sr_file_control
, &mapping_to_slide
);
1526 if (kr
!= KERN_SUCCESS
) {
1531 kr
= _shared_region_slide(slide
,
1532 mapping_to_slide
.sfm_file_offset
,
1533 mapping_to_slide
.sfm_size
,
1537 if (kr
!= KERN_SUCCESS
) {
1538 vm_shared_region_undo_mappings(NULL
, 0, mappings
, mappings_count
);
1546 /* sysctl overflow room */
1548 /* vm_page_free_target is provided as a makeshift solution for applications that want to
1549 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1550 reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1551 extern unsigned int vm_page_free_target
;
1552 SYSCTL_INT(_vm
, OID_AUTO
, vm_page_free_target
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1553 &vm_page_free_target
, 0, "Pageout daemon free target");
1555 extern unsigned int vm_memory_pressure
;
1556 SYSCTL_INT(_vm
, OID_AUTO
, memory_pressure
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1557 &vm_memory_pressure
, 0, "Memory pressure indicator");
1560 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1562 #pragma unused(oidp, arg1, arg2)
1563 unsigned int page_free_wanted
;
1565 page_free_wanted
= mach_vm_ctl_page_free_wanted();
1566 return SYSCTL_OUT(req
, &page_free_wanted
, sizeof (page_free_wanted
));
1568 SYSCTL_PROC(_vm
, OID_AUTO
, page_free_wanted
,
1569 CTLTYPE_INT
| CTLFLAG_RD
| CTLFLAG_LOCKED
,
1570 0, 0, vm_ctl_page_free_wanted
, "I", "");
1572 extern unsigned int vm_page_purgeable_count
;
1573 SYSCTL_INT(_vm
, OID_AUTO
, page_purgeable_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1574 &vm_page_purgeable_count
, 0, "Purgeable page count");
1576 extern unsigned int vm_page_purgeable_wired_count
;
1577 SYSCTL_INT(_vm
, OID_AUTO
, page_purgeable_wired_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1578 &vm_page_purgeable_wired_count
, 0, "Wired purgeable page count");
1580 SYSCTL_INT(_vm
, OID_AUTO
, page_reusable_count
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1581 &vm_page_stats_reusable
.reusable_count
, 0, "Reusable page count");
1582 SYSCTL_QUAD(_vm
, OID_AUTO
, reusable_success
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1583 &vm_page_stats_reusable
.reusable_pages_success
, "");
1584 SYSCTL_QUAD(_vm
, OID_AUTO
, reusable_failure
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1585 &vm_page_stats_reusable
.reusable_pages_failure
, "");
1586 SYSCTL_QUAD(_vm
, OID_AUTO
, reusable_shared
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1587 &vm_page_stats_reusable
.reusable_pages_shared
, "");
1588 SYSCTL_QUAD(_vm
, OID_AUTO
, all_reusable_calls
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1589 &vm_page_stats_reusable
.all_reusable_calls
, "");
1590 SYSCTL_QUAD(_vm
, OID_AUTO
, partial_reusable_calls
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1591 &vm_page_stats_reusable
.partial_reusable_calls
, "");
1592 SYSCTL_QUAD(_vm
, OID_AUTO
, reuse_success
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1593 &vm_page_stats_reusable
.reuse_pages_success
, "");
1594 SYSCTL_QUAD(_vm
, OID_AUTO
, reuse_failure
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1595 &vm_page_stats_reusable
.reuse_pages_failure
, "");
1596 SYSCTL_QUAD(_vm
, OID_AUTO
, all_reuse_calls
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1597 &vm_page_stats_reusable
.all_reuse_calls
, "");
1598 SYSCTL_QUAD(_vm
, OID_AUTO
, partial_reuse_calls
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1599 &vm_page_stats_reusable
.partial_reuse_calls
, "");
1600 SYSCTL_QUAD(_vm
, OID_AUTO
, can_reuse_success
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1601 &vm_page_stats_reusable
.can_reuse_success
, "");
1602 SYSCTL_QUAD(_vm
, OID_AUTO
, can_reuse_failure
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
1603 &vm_page_stats_reusable
.can_reuse_failure
, "");
1607 vm_pressure_monitor(
1608 __unused
struct proc
*p
,
1609 struct vm_pressure_monitor_args
*uap
,
1613 uint32_t pages_reclaimed
;
1614 uint32_t pages_wanted
;
1616 kr
= mach_vm_pressure_monitor(
1617 (boolean_t
) uap
->wait_for_pressure
,
1618 uap
->nsecs_monitored
,
1619 (uap
->pages_reclaimed
) ? &pages_reclaimed
: NULL
,
1631 if (uap
->pages_reclaimed
) {
1632 if (copyout((void *)&pages_reclaimed
,
1633 uap
->pages_reclaimed
,
1634 sizeof (pages_reclaimed
)) != 0) {
1639 *retval
= (int) pages_wanted
;