X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/d1ecb069dfe24481e4a83f44cb5217a2b06746d7..8a3053a07cee346dca737a5670e546fd26a7c9d6:/bsd/vm/vm_unix.c?ds=inline diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c index 369c91350..1aa660399 100644 --- a/bsd/vm/vm_unix.c +++ b/bsd/vm/vm_unix.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -74,8 +75,12 @@ #include #include #include +#include +#include +#include #include +#include #include #include @@ -90,6 +95,15 @@ #include +#include + + +int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t); +int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *); + +SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, ""); + + /* * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c */ @@ -97,8 +111,8 @@ #ifndef SECURE_KERNEL extern int allow_stack_exec, allow_data_exec; -SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, ""); -SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, ""); #endif /* !SECURE_KERNEL */ static const char *prot_values[] = { @@ -121,7 +135,7 @@ log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot) int shared_region_unnest_logging = 1; -SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW, +SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_unnest_logging, 0, ""); int vm_shared_region_unnest_log_interval = 10; @@ -163,9 +177,15 @@ useracc( user_size_t len, int prot) { + vm_map_t map; + + map = current_map(); return (vm_map_check_protection( - current_map(), - vm_map_trunc_page(addr), vm_map_round_page(addr+len), + map, + vm_map_trunc_page(addr, + vm_map_page_mask(map)), + vm_map_round_page(addr+len, + vm_map_page_mask(map)), prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE)); } @@ -174,10 +194,17 @@ vslock( user_addr_t addr, user_size_t len) { - kern_return_t kret; - kret = vm_map_wire(current_map(), vm_map_trunc_page(addr), - vm_map_round_page(addr+len), - VM_PROT_READ | VM_PROT_WRITE ,FALSE); + kern_return_t kret; + vm_map_t map; + + map = current_map(); + kret = vm_map_wire(map, + vm_map_trunc_page(addr, + vm_map_page_mask(map)), + vm_map_round_page(addr+len, + vm_map_page_mask(map)), + VM_PROT_READ | VM_PROT_WRITE, + FALSE); switch (kret) { case KERN_SUCCESS: @@ -204,14 +231,17 @@ vsunlock( vm_map_offset_t vaddr; ppnum_t paddr; #endif /* FIXME ] */ - kern_return_t kret; + kern_return_t kret; + vm_map_t map; + + map = current_map(); #if FIXME /* [ */ if (dirtied) { pmap = get_task_pmap(current_task()); - for (vaddr = vm_map_trunc_page(addr); - vaddr < vm_map_round_page(addr+len); - vaddr += PAGE_SIZE) { + for (vaddr = vm_map_trunc_page(addr, PAGE_MASK); + vaddr < vm_map_round_page(addr+len, PAGE_MASK); + vaddr += PAGE_SIZE) { paddr = pmap_extract(pmap, vaddr); pg = PHYS_TO_VM_PAGE(paddr); vm_page_set_modified(pg); @@ -221,8 +251,12 @@ vsunlock( #ifdef lint dirtied++; #endif /* lint */ - kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr), - vm_map_round_page(addr+len), FALSE); + kret = vm_map_unwire(map, + vm_map_trunc_page(addr, + vm_map_page_mask(map)), + vm_map_round_page(addr+len, + vm_map_page_mask(map)), + FALSE); switch (kret) { case KERN_SUCCESS: return (0); @@ -458,7 +492,7 @@ task_for_pid_posix_check(proc_t target) int allowed; /* No task_for_pid on bad targets */ - if (target == PROC_NULL || target->p_stat == SZOMB) { + if (target->p_stat == SZOMB) { return FALSE; } @@ -486,8 +520,8 @@ task_for_pid_posix_check(proc_t target) /* Do target's ruid, euid, and saved uid match my euid? */ if ((kauth_cred_getuid(targetcred) != myuid) || - (targetcred->cr_ruid != myuid) || - (targetcred->cr_svuid != myuid)) { + (kauth_cred_getruid(targetcred) != myuid) || + (kauth_cred_getsvuid(targetcred) != myuid)) { allowed = FALSE; goto out; } @@ -557,9 +591,13 @@ task_for_pid( p = proc_find(pid); + if (p == PROC_NULL) { + error = KERN_FAILURE; + goto tfpout; + } + #if CONFIG_AUDIT - if (p != PROC_NULL) - AUDIT_ARG(process, p); + AUDIT_ARG(process, p); #endif if (!(task_for_pid_posix_check(p))) { @@ -600,6 +638,8 @@ task_for_pid( /* Grant task port access */ task_reference(p->task); + extmod_statistics_incr_task_for_pid(p->task); + sright = (void *) convert_task_to_port(p->task); tret = ipc_port_copyout_send( sright, @@ -664,7 +704,7 @@ task_name_for_pid( && ((current_proc() == p) || kauth_cred_issuser(kauth_cred_get()) || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && - ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) { + ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) { if (p->task != TASK_NULL) { task_reference(p->task); @@ -714,26 +754,30 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) int error = 0; #if CONFIG_MACF - error = mac_proc_check_suspend_resume(p, 0); /* 0 for suspend */ + error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND); if (error) { - error = KERN_FAILURE; + error = EPERM; goto out; } #endif if (pid == 0) { - error = KERN_FAILURE; + error = EPERM; goto out; } targetproc = proc_find(pid); + if (targetproc == PROC_NULL) { + error = ESRCH; + goto out; + } + if (!task_for_pid_posix_check(targetproc)) { - error = KERN_FAILURE; + error = EPERM; goto out; } target = targetproc->task; -#ifndef CONFIG_EMBEDDED if (target != TASK_NULL) { mach_port_t tfpport; @@ -744,7 +788,7 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) (tfpport != IPC_PORT_NULL)) { if (tfpport == IPC_PORT_DEAD) { - error = KERN_PROTECTION_FAILURE; + error = EACCES; goto out; } @@ -753,17 +797,29 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) if (error != MACH_MSG_SUCCESS) { if (error == MACH_RCV_INTERRUPTED) - error = KERN_ABORTED; + error = EINTR; else - error = KERN_FAILURE; + error = EPERM; goto out; } } } -#endif task_reference(target); - error = task_suspend(target); + error = task_pidsuspend(target); + if (error) { + if (error == KERN_INVALID_ARGUMENT) { + error = EINVAL; + } else { + error = EPERM; + } + } +#if CONFIG_MEMORYSTATUS + else { + memorystatus_on_suspend(targetproc); + } +#endif + task_deallocate(target); out: @@ -782,26 +838,30 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) int error = 0; #if CONFIG_MACF - error = mac_proc_check_suspend_resume(p, 1); /* 1 for resume */ + error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME); if (error) { - error = KERN_FAILURE; + error = EPERM; goto out; } #endif if (pid == 0) { - error = KERN_FAILURE; + error = EPERM; goto out; } targetproc = proc_find(pid); + if (targetproc == PROC_NULL) { + error = ESRCH; + goto out; + } + if (!task_for_pid_posix_check(targetproc)) { - error = KERN_FAILURE; + error = EPERM; goto out; } target = targetproc->task; -#ifndef CONFIG_EMBEDDED if (target != TASK_NULL) { mach_port_t tfpport; @@ -812,7 +872,7 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) (tfpport != IPC_PORT_NULL)) { if (tfpport == IPC_PORT_DEAD) { - error = KERN_PROTECTION_FAILURE; + error = EACCES; goto out; } @@ -821,28 +881,44 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) if (error != MACH_MSG_SUCCESS) { if (error == MACH_RCV_INTERRUPTED) - error = KERN_ABORTED; + error = EINTR; else - error = KERN_FAILURE; + error = EPERM; goto out; } } } -#endif task_reference(target); - error = task_resume(target); + +#if CONFIG_MEMORYSTATUS + memorystatus_on_resume(targetproc); +#endif + + error = task_pidresume(target); + if (error) { + if (error == KERN_INVALID_ARGUMENT) { + error = EINVAL; + } else { + if (error == KERN_MEMORY_ERROR) { + psignal(targetproc, SIGKILL); + error = EIO; + } else + error = EPERM; + } + } + task_deallocate(target); out: if (targetproc != PROC_NULL) proc_rele(targetproc); + *ret = error; return error; - - return 0; } + static int sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, __unused int arg2, struct sysctl_req *req) @@ -854,7 +930,7 @@ sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, if (error || req->newptr == USER_ADDR_NULL) return(error); - if (!is_suser()) + if (!kauth_cred_issuser(kauth_cred_get())) return(EPERM); if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) { @@ -876,17 +952,17 @@ static int kern_secure_kernel = 1; static int kern_secure_kernel = 0; #endif -SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, ""); -SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp"); -SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp"); +SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy"); -SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW, +SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_trace_level, 0, ""); -SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD, +SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_version, 0, ""); -SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW, +SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_persistence, 0, ""); /* @@ -922,7 +998,7 @@ shared_region_check_np( __unused int *retvalp) { vm_shared_region_t shared_region; - mach_vm_offset_t start_address; + mach_vm_offset_t start_address = 0; int error; kern_return_t kr; @@ -968,6 +1044,31 @@ shared_region_check_np( return error; } + +int +shared_region_copyin_mappings( + struct proc *p, + user_addr_t user_mappings, + unsigned int mappings_count, + struct shared_file_mapping_np *mappings) +{ + int error = 0; + vm_size_t mappings_size = 0; + + /* get the list of mappings the caller wants us to establish */ + mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0])); + error = copyin(user_mappings, + mappings, + mappings_size); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(): " + "copyin(0x%llx, %d) failed (error=%d)\n", + current_thread(), p->p_pid, p->p_comm, + (uint64_t)user_mappings, mappings_count, error)); + } + return error; +} /* * shared_region_map_np() * @@ -979,25 +1080,25 @@ shared_region_check_np( * requiring any further setup. */ int -shared_region_map_np( +_shared_region_map_and_slide( struct proc *p, - struct shared_region_map_np_args *uap, - __unused int *retvalp) + int fd, + uint32_t mappings_count, + struct shared_file_mapping_np *mappings, + uint32_t slide, + user_addr_t slide_start, + user_addr_t slide_size) { int error; kern_return_t kr; - int fd; struct fileproc *fp; struct vnode *vp, *root_vp; struct vnode_attr va; off_t fs; memory_object_size_t file_size; - user_addr_t user_mappings; - struct shared_file_mapping_np *mappings; -#define SFM_MAX_STACK 8 - struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; - unsigned int mappings_count; - vm_size_t mappings_size; +#if CONFIG_MACF + vm_prot_t maxprot = VM_PROT_ALL; +#endif memory_object_control_t file_control; struct vm_shared_region *shared_region; @@ -1006,15 +1107,9 @@ shared_region_map_np( current_thread(), p->p_pid, p->p_comm)); shared_region = NULL; - mappings_count = 0; - mappings_size = 0; - mappings = NULL; fp = NULL; vp = NULL; - /* get file descriptor for shared region cache file */ - fd = uap->fd; - /* get file structure from file descriptor */ error = fp_lookup(p, fd, &fp, 0); if (error) { @@ -1026,12 +1121,12 @@ shared_region_map_np( } /* make sure we're attempting to map a vnode */ - if (fp->f_fglob->fg_type != DTYPE_VNODE) { + if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d not a vnode (type=%d)\n", current_thread(), p->p_pid, p->p_comm, - fd, fp->f_fglob->fg_type)); + fd, FILEGLOB_DTYPE(fp->f_fglob))); error = EINVAL; goto done; } @@ -1068,11 +1163,36 @@ shared_region_map_np( goto done; } +#if CONFIG_MACF + error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()), + fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot); + if (error) { + goto done; + } +#endif /* MAC */ + +#if CONFIG_PROTECT + /* check for content protection access */ + { + error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0); + if (error) { + goto done; + } + } +#endif /* CONFIG_PROTECT */ + /* make sure vnode is on the process's root volume */ root_vp = p->p_fd->fd_rdir; if (root_vp == NULL) { root_vp = rootvnode; + } else { + /* + * Chroot-ed processes can't use the shared_region. + */ + error = EINVAL; + goto done; } + if (vp->v_mount != root_vp->v_mount) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " @@ -1128,42 +1248,7 @@ shared_region_map_np( error = EINVAL; goto done; } - - /* get the list of mappings the caller wants us to establish */ - mappings_count = uap->count; /* number of mappings */ - mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0])); - if (mappings_count == 0) { - SHARED_REGION_TRACE_INFO( - ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "no mappings\n", - current_thread(), p->p_pid, p->p_comm, - vp, vp->v_name)); - error = 0; /* no mappings: we're done ! */ - goto done; - } else if (mappings_count <= SFM_MAX_STACK) { - mappings = &stack_mappings[0]; - } else { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "too many mappings (%d)\n", - current_thread(), p->p_pid, p->p_comm, - vp, vp->v_name, mappings_count)); - error = EINVAL; - goto done; - } - user_mappings = uap->mappings; /* the mappings, in user space */ - error = copyin(user_mappings, - mappings, - mappings_size); - if (error) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "copyin(0x%llx, %d) failed (error=%d)\n", - current_thread(), p->p_pid, p->p_comm, - vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error)); - goto done; - } /* get the process's shared region (setup in vm_map_exec()) */ shared_region = vm_shared_region_get(current_task()); @@ -1182,7 +1267,10 @@ shared_region_map_np( mappings, file_control, file_size, - (void *) p->p_fd->fd_rdir); + (void *) p->p_fd->fd_rdir, + slide, + slide_start, + slide_size); if (kr != KERN_SUCCESS) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " @@ -1210,6 +1298,12 @@ shared_region_map_np( error = 0; + vnode_lock_spin(vp); + + vp->v_flag |= VSHARED_DYLD; + + vnode_unlock(vp); + /* update the vnode's access time */ if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) { VATTR_INIT(&va); @@ -1249,6 +1343,72 @@ done: return error; } +int +shared_region_map_and_slide_np( + struct proc *p, + struct shared_region_map_and_slide_np_args *uap, + __unused int *retvalp) +{ + struct shared_file_mapping_np *mappings; + unsigned int mappings_count = uap->count; + kern_return_t kr = KERN_SUCCESS; + uint32_t slide = uap->slide; + +#define SFM_MAX_STACK 8 + struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; + + /* Is the process chrooted?? */ + if (p->p_fd->fd_rdir != NULL) { + kr = EINVAL; + goto done; + } + + if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) { + if (kr == KERN_INVALID_ARGUMENT) { + /* + * This will happen if we request sliding again + * with the same slide value that was used earlier + * for the very first sliding. + */ + kr = KERN_SUCCESS; + } + goto done; + } + + if (mappings_count == 0) { + SHARED_REGION_TRACE_INFO( + ("shared_region: %p [%d(%s)] map(): " + "no mappings\n", + current_thread(), p->p_pid, p->p_comm)); + kr = 0; /* no mappings: we're done ! */ + goto done; + } else if (mappings_count <= SFM_MAX_STACK) { + mappings = &stack_mappings[0]; + } else { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(): " + "too many mappings (%d)\n", + current_thread(), p->p_pid, p->p_comm, + mappings_count)); + kr = KERN_FAILURE; + goto done; + } + + if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) { + goto done; + } + + + kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings, + slide, + uap->slide_start, uap->slide_size); + if (kr != KERN_SUCCESS) { + return kr; + } + +done: + return kr; +} /* sysctl overflow room */ @@ -1256,11 +1416,11 @@ done: allocate buffer space, possibly purgeable memory, but not cause inactive pages to be reclaimed. It allows the app to calculate how much memory is free outside the free target. */ extern unsigned int vm_page_free_target; -SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD, +SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_target, 0, "Pageout daemon free target"); extern unsigned int vm_memory_pressure; -SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD, +SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_memory_pressure, 0, "Memory pressure indicator"); static int @@ -1277,38 +1437,104 @@ SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted, 0, 0, vm_ctl_page_free_wanted, "I", ""); extern unsigned int vm_page_purgeable_count; -SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD, +SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_purgeable_count, 0, "Purgeable page count"); extern unsigned int vm_page_purgeable_wired_count; -SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD, +SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_purgeable_wired_count, 0, "Wired purgeable page count"); -SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD, +extern int madvise_free_debug; +SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED, + &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)"); + +SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.reusable_count, 0, "Reusable page count"); -SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD, +SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.reusable_pages_success, ""); -SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD, +SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.reusable_pages_failure, ""); -SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD, +SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.reusable_pages_shared, ""); -SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD, +SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.all_reusable_calls, ""); -SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD, +SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.partial_reusable_calls, ""); -SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD, +SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.reuse_pages_success, ""); -SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD, +SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.reuse_pages_failure, ""); -SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD, +SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.all_reuse_calls, ""); -SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD, +SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.partial_reuse_calls, ""); -SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD, +SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.can_reuse_success, ""); -SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD, +SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.can_reuse_failure, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.reusable_reclaimed, ""); + + +extern unsigned int vm_page_free_count, vm_page_speculative_count; +SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, ""); + +extern unsigned int vm_page_cleaned_count; +SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size"); + +/* pageout counts */ +extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used; +extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative; +SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, ""); + +extern unsigned int vm_pageout_freed_from_cleaned; +SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, ""); + +/* counts of pages entering the cleaned queue */ +extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty; +SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */ +SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, ""); + +/* counts of pages leaving the cleaned queue */ +extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock; +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */ +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)"); +#include +#include + +void vm_pageout_io_throttle(void); + +void vm_pageout_io_throttle(void) { + struct uthread *uthread = get_bsdthread_info(current_thread()); + + /* + * thread is marked as a low priority I/O type + * and the I/O we issued while in this cleaning operation + * collided with normal I/O operations... we'll + * delay in order to mitigate the impact of this + * task on the normal operation of the system + */ + + if (uthread->uu_lowpri_window) { + throttle_lowpri_io(1); + } + +} int vm_pressure_monitor( @@ -1346,3 +1572,77 @@ vm_pressure_monitor( *retval = (int) pages_wanted; return 0; } + +int +kas_info(struct proc *p, + struct kas_info_args *uap, + int *retval __unused) +{ +#ifdef SECURE_KERNEL + (void)p; + (void)uap; + return ENOTSUP; +#else /* !SECURE_KERNEL */ + int selector = uap->selector; + user_addr_t valuep = uap->value; + user_addr_t sizep = uap->size; + user_size_t size; + int error; + + if (!kauth_cred_issuser(kauth_cred_get())) { + return EPERM; + } + +#if CONFIG_MACF + error = mac_system_check_kas_info(kauth_cred_get(), selector); + if (error) { + return error; + } +#endif + + if (IS_64BIT_PROCESS(p)) { + user64_size_t size64; + error = copyin(sizep, &size64, sizeof(size64)); + size = (user_size_t)size64; + } else { + user32_size_t size32; + error = copyin(sizep, &size32, sizeof(size32)); + size = (user_size_t)size32; + } + if (error) { + return error; + } + + switch (selector) { + case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR: + { + uint64_t slide = vm_kernel_slide; + + if (sizeof(slide) != size) { + return EINVAL; + } + + if (IS_64BIT_PROCESS(p)) { + user64_size_t size64 = (user64_size_t)size; + error = copyout(&size64, sizep, sizeof(size64)); + } else { + user32_size_t size32 = (user32_size_t)size; + error = copyout(&size32, sizep, sizeof(size32)); + } + if (error) { + return error; + } + + error = copyout(&slide, valuep, sizeof(slide)); + if (error) { + return error; + } + } + break; + default: + return EINVAL; + } + + return 0; +#endif /* !SECURE_KERNEL */ +}