X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/0c530ab8987f0ae6a1a3d9284f40182b88852816..d26ffc64f583ab2d29df48f13518685602bc8832:/bsd/kern/kern_sysctl.c?ds=sidebyside diff --git a/bsd/kern/kern_sysctl.c b/bsd/kern/kern_sysctl.c index 2b6a8b4a8..f6ed41035 100644 --- a/bsd/kern/kern_sysctl.c +++ b/bsd/kern/kern_sysctl.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ /*- @@ -57,9 +63,24 @@ * * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ /* - * sysctl system call. +* DEPRECATED sysctl system call code + * + * Everything in this file is deprecated. Sysctls should be handled + * by the code in kern_newsysctl.c. + * The remaining "case" sections are supposed to be converted into + * SYSCTL_*-style definitions, and as soon as all of them are gone, + * this source file is supposed to die. + * + * DO NOT ADD ANY MORE "case" SECTIONS TO THIS FILE, instead define + * your sysctl with SYSCTL_INT, SYSCTL_PROC etc. in your source file. */ #include @@ -80,23 +101,35 @@ #include #include #include +#include +#include +#include +#include -#include +#include +#include +#include #include +#include #include +#include #include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include #include #include #include -extern vm_map_t bsd_pageable_map; - #include #include -#include #include #include @@ -105,1577 +138,1017 @@ extern vm_map_t bsd_pageable_map; #include #include +#include +#include +#include +#include -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) #include #endif -sysctlfn kern_sysctl; -#ifdef DEBUG -sysctlfn debug_sysctl; +#if CONFIG_FREEZE +#include +#endif + +#if KPERF +#include #endif -extern sysctlfn vm_sysctl; -extern sysctlfn vfs_sysctl; -extern sysctlfn net_sysctl; -extern sysctlfn cpu_sysctl; + +#if HYPERVISOR +#include +#endif + +/* + * deliberately setting max requests to really high number + * so that runaway settings do not cause MALLOC overflows + */ +#define AIO_MAX_REQUESTS (128 * CONFIG_AIO_MAX) + extern int aio_max_requests; extern int aio_max_requests_per_process; extern int aio_worker_threads; -extern int maxfilesperproc; extern int lowpri_IO_window_msecs; extern int lowpri_IO_delay_msecs; extern int nx_enabled; +extern int speculative_reads_disabled; +extern unsigned int speculative_prefetch_max; +extern unsigned int speculative_prefetch_max_iosize; +extern unsigned int preheat_max_bytes; +extern unsigned int preheat_min_bytes; +extern long numvnodes; + +extern uuid_string_t bootsessionuuid_string; + +extern unsigned int vm_max_delayed_work_limit; +extern unsigned int vm_max_batch; + +extern unsigned int vm_page_free_min; +extern unsigned int vm_page_free_target; +extern unsigned int vm_page_free_reserved; +extern unsigned int vm_page_speculative_percentage; +extern unsigned int vm_page_speculative_q_age_ms; + +#if (DEVELOPMENT || DEBUG) +extern uint32_t vm_page_creation_throttled_hard; +extern uint32_t vm_page_creation_throttled_soft; +#endif /* DEVELOPMENT || DEBUG */ + +/* + * Conditionally allow dtrace to see these functions for debugging purposes. + */ +#ifdef STATIC +#undef STATIC +#endif +#if 0 +#define STATIC +#else +#define STATIC static +#endif + +extern boolean_t mach_timer_coalescing_enabled; + +extern uint64_t timer_deadline_tracking_bin_1, timer_deadline_tracking_bin_2; + +STATIC void +fill_user32_eproc(proc_t, struct user32_eproc *__restrict); +STATIC void +fill_user32_externproc(proc_t, struct user32_extern_proc *__restrict); +STATIC void +fill_user64_eproc(proc_t, struct user64_eproc *__restrict); +STATIC void +fill_user64_proc(proc_t, struct user64_kinfo_proc *__restrict); +STATIC void +fill_user64_externproc(proc_t, struct user64_extern_proc *__restrict); +STATIC void +fill_user32_proc(proc_t, struct user32_kinfo_proc *__restrict); -static void -fill_eproc(struct proc *p, struct eproc *ep); -static void -fill_externproc(struct proc *p, struct extern_proc *exp); -static void -fill_user_eproc(struct proc *p, struct user_eproc *ep); -static void -fill_user_proc(struct proc *p, struct user_kinfo_proc *kp); -static void -fill_user_externproc(struct proc *p, struct user_extern_proc *exp); extern int kdbg_control(int *name, u_int namelen, user_addr_t where, size_t * sizep); -int -kdebug_ops(int *name, u_int namelen, user_addr_t where, size_t *sizep, struct proc *p); #if NFSCLIENT extern int netboot_root(void); #endif int pcsamples_ops(int *name, u_int namelen, user_addr_t where, size_t *sizep, - struct proc *p); -__private_extern__ kern_return_t -reset_vmobjectcache(unsigned int val1, unsigned int val2); -extern int -resize_namecache(u_int newsize); -static int -sysctl_aiomax(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen); -static int -sysctl_aioprocmax(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen); -static int -sysctl_aiothreads(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen); -extern int -sysctl_clockrate(user_addr_t where, size_t *sizep); -int -sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep); -int -sysctl_doprof(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen); -int -sysctl_file(user_addr_t where, size_t *sizep); -static void -fill_proc(struct proc *p, struct kinfo_proc *kp); -static int -sysctl_maxfilesperproc(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen); -static int -sysctl_maxprocperuid(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen); -static int -sysctl_maxproc(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen); + proc_t p); int sysctl_procargs(int *name, u_int namelen, user_addr_t where, - size_t *sizep, struct proc *cur_proc); -static int -sysctl_procargs2(int *name, u_int namelen, user_addr_t where, size_t *sizep, - struct proc *cur_proc); -static int + size_t *sizep, proc_t cur_proc); +STATIC int sysctl_procargsx(int *name, u_int namelen, user_addr_t where, size_t *sizep, - struct proc *cur_proc, int argc_yes); + proc_t cur_proc, int argc_yes); int sysctl_struct(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen, void *sp, int len); -extern int -sysctl_vnode(user_addr_t where, size_t *sizep); +STATIC int sysdoproc_filt_KERN_PROC_PID(proc_t p, void * arg); +STATIC int sysdoproc_filt_KERN_PROC_PGRP(proc_t p, void * arg); +STATIC int sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg); +STATIC int sysdoproc_filt_KERN_PROC_UID(proc_t p, void * arg); +STATIC int sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg); +int sysdoproc_callback(proc_t p, void *arg); + + +/* forward declarations for non-static STATIC */ +STATIC void fill_loadavg64(struct loadavg *la, struct user64_loadavg *la64); +STATIC void fill_loadavg32(struct loadavg *la, struct user32_loadavg *la32); +STATIC int sysctl_handle_kern_threadname(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_sched_stats(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_sched_stats_enable(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_kdebug_ops SYSCTL_HANDLER_ARGS; +#if COUNT_SYSCALLS +STATIC int sysctl_docountsyscalls SYSCTL_HANDLER_ARGS; +#endif /* COUNT_SYSCALLS */ +#if !CONFIG_EMBEDDED +STATIC int sysctl_doprocargs SYSCTL_HANDLER_ARGS; +#endif /* !CONFIG_EMBEDDED */ +STATIC int sysctl_doprocargs2 SYSCTL_HANDLER_ARGS; +STATIC int sysctl_prochandle SYSCTL_HANDLER_ARGS; +STATIC int sysctl_aiomax(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_aioprocmax(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_aiothreads(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_maxproc(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_osversion(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_sysctl_bootargs(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_maxvnodes(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_securelvl(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_domainname(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_hostname(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_procname(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_boottime(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_symfile(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +#if NFSCLIENT +STATIC int sysctl_netboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +#endif +#ifdef CONFIG_IMGSRC_ACCESS +STATIC int sysctl_imgsrcdev(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +#endif +STATIC int sysctl_usrstack(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_usrstack64(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +#if CONFIG_COREDUMP +STATIC int sysctl_coredump(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_suid_coredump(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +#endif +STATIC int sysctl_delayterm(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_rage_vnode(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_kern_check_openevt(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_nx(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_loadavg(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_vm_toggle_address_reuse(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_swapusage(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int fetch_process_cputype( proc_t cur_proc, int *name, u_int namelen, cpu_type_t *cputype); +STATIC int sysctl_sysctl_native(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_sysctl_cputype(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_safeboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_singleuser(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_minimalboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +STATIC int sysctl_slide(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); + + +extern void IORegistrySetOSBuildVersion(char * build_version); + +STATIC void +fill_loadavg64(struct loadavg *la, struct user64_loadavg *la64) +{ + la64->ldavg[0] = la->ldavg[0]; + la64->ldavg[1] = la->ldavg[1]; + la64->ldavg[2] = la->ldavg[2]; + la64->fscale = (user64_long_t)la->fscale; +} + +STATIC void +fill_loadavg32(struct loadavg *la, struct user32_loadavg *la32) +{ + la32->ldavg[0] = la->ldavg[0]; + la32->ldavg[1] = la->ldavg[1]; + la32->ldavg[2] = la->ldavg[2]; + la32->fscale = (user32_long_t)la->fscale; +} +#if CONFIG_COREDUMP /* - * temporary location for vm_sysctl. This should be machine independant + * Attributes stored in the kernel. */ - -extern uint32_t mach_factor[3]; +extern char corefilename[MAXPATHLEN+1]; +extern int do_coredump; +extern int sugid_coredump; +#endif -static void -loadavg32to64(struct loadavg *la32, struct user_loadavg *la64) +#if COUNT_SYSCALLS +extern int do_count_syscalls; +#endif + +#ifdef INSECURE +int securelevel = -1; +#else +int securelevel; +#endif + +STATIC int +sysctl_handle_kern_threadname( __unused struct sysctl_oid *oidp, __unused void *arg1, + __unused int arg2, struct sysctl_req *req) { - la64->ldavg[0] = la32->ldavg[0]; - la64->ldavg[1] = la32->ldavg[1]; - la64->ldavg[2] = la32->ldavg[2]; - la64->fscale = (user_long_t)la32->fscale; -} + int error; + struct uthread *ut = get_bsdthread_info(current_thread()); + user_addr_t oldp=0, newp=0; + size_t *oldlenp=NULL; + size_t newlen=0; + + oldp = req->oldptr; + oldlenp = &(req->oldlen); + newp = req->newptr; + newlen = req->newlen; + + /* We want the current length, and maybe the string itself */ + if(oldlenp) { + /* if we have no thread name yet tell'em we want MAXTHREADNAMESIZE - 1 */ + size_t currlen = MAXTHREADNAMESIZE - 1; + + if(ut->pth_name) + /* use length of current thread name */ + currlen = strlen(ut->pth_name); + if(oldp) { + if(*oldlenp < currlen) + return ENOMEM; + /* NOTE - we do not copy the NULL terminator */ + if(ut->pth_name) { + error = copyout(ut->pth_name,oldp,currlen); + if(error) + return error; + } + } + /* return length of thread name minus NULL terminator (just like strlen) */ + req->oldidx = currlen; + } -int -vm_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, __unused struct proc *p) -{ - struct loadavg loadinfo; - - switch (name[0]) { - case VM_LOADAVG: - if (proc_is64bit(p)) { - struct user_loadavg loadinfo64; - loadavg32to64(&averunnable, &loadinfo64); - return (sysctl_struct(oldp, oldlenp, newp, newlen, - &loadinfo64, sizeof(loadinfo64))); - } else { - return (sysctl_struct(oldp, oldlenp, newp, newlen, - &averunnable, sizeof(struct loadavg))); - } - case VM_MACHFACTOR: - loadinfo.ldavg[0] = mach_factor[0]; - loadinfo.ldavg[1] = mach_factor[1]; - loadinfo.ldavg[2] = mach_factor[2]; - loadinfo.fscale = LSCALE; - if (proc_is64bit(p)) { - struct user_loadavg loadinfo64; - loadavg32to64(&loadinfo, &loadinfo64); - return (sysctl_struct(oldp, oldlenp, newp, newlen, - &loadinfo64, sizeof(loadinfo64))); + /* We want to set the name to something */ + if(newp) + { + if(newlen > (MAXTHREADNAMESIZE - 1)) + return ENAMETOOLONG; + if(!ut->pth_name) + { + ut->pth_name = (char*)kalloc( MAXTHREADNAMESIZE ); + if(!ut->pth_name) + return ENOMEM; } else { - return (sysctl_struct(oldp, oldlenp, newp, newlen, - &loadinfo, sizeof(struct loadavg))); + kernel_debug_string_simple(TRACE_STRING_THREADNAME_PREV, ut->pth_name); } - case VM_SWAPUSAGE: { - int error; - uint64_t swap_total; - uint64_t swap_avail; - uint32_t swap_pagesize; - boolean_t swap_encrypted; - struct xsw_usage xsu; - - error = macx_swapinfo(&swap_total, - &swap_avail, - &swap_pagesize, - &swap_encrypted); - if (error) + bzero(ut->pth_name, MAXTHREADNAMESIZE); + error = copyin(newp, ut->pth_name, newlen); + if (error) { return error; + } - xsu.xsu_total = swap_total; - xsu.xsu_avail = swap_avail; - xsu.xsu_used = swap_total - swap_avail; - xsu.xsu_pagesize = swap_pagesize; - xsu.xsu_encrypted = swap_encrypted; - return sysctl_struct(oldp, oldlenp, newp, newlen, - &xsu, sizeof (struct xsw_usage)); - } - case VM_METER: - return (ENOTSUP); - case VM_MAXID: - return (ENOTSUP); - default: - return (ENOTSUP); + kernel_debug_string_simple(TRACE_STRING_THREADNAME, ut->pth_name); } - /* NOTREACHED */ - return (ENOTSUP); + + return 0; } -/* - * Locking and stats - */ -static struct sysctl_lock { - int sl_lock; - int sl_want; - int sl_locked; -} memlock; +SYSCTL_PROC(_kern, KERN_THREADNAME, threadname, CTLFLAG_ANYBODY | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, sysctl_handle_kern_threadname,"A",""); -int -__sysctl(struct proc *p, struct __sysctl_args *uap, __unused register_t *retval) +#define BSD_HOST 1 +STATIC int +sysctl_sched_stats(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { - int error, dolock = 1; - size_t savelen = 0, oldlen = 0, newlen; - sysctlfn *fnp = NULL; - int name[CTL_MAXNAME]; - int i; - int error1; + host_basic_info_data_t hinfo; + kern_return_t kret; + uint32_t size; + int changed; + mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; + struct _processor_statistics_np *buf; + int error; - /* - * all top-level sysctl names are non-terminal - */ - if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) - return (EINVAL); - error = copyin(uap->name, &name[0], uap->namelen * sizeof(int)); - if (error) - return (error); - - AUDIT_ARG(ctlname, name, uap->namelen); + kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); + if (kret != KERN_SUCCESS) { + return EINVAL; + } - if (proc_is64bit(p)) { - /* uap->newlen is a size_t value which grows to 64 bits - * when coming from a 64-bit process. since it's doubtful we'll - * have a sysctl newp buffer greater than 4GB we shrink it to size_t - */ - newlen = CAST_DOWN(size_t, uap->newlen); + size = sizeof(struct _processor_statistics_np) * (hinfo.logical_cpu_max + 2); /* One for RT Queue, One for Fair Share Queue */ + + if (req->oldlen < size) { + return EINVAL; } - else { - newlen = uap->newlen; + + MALLOC(buf, struct _processor_statistics_np*, size, M_TEMP, M_ZERO | M_WAITOK); + + kret = get_sched_statistics(buf, &size); + if (kret != KERN_SUCCESS) { + error = EINVAL; + goto out; } - /* CTL_UNSPEC is used to get oid to AUTO_OID */ - if (uap->new != USER_ADDR_NULL - && ((name[0] == CTL_KERN - && !(name[1] == KERN_IPC || name[1] == KERN_PANICINFO || name[1] == KERN_PROCDELAYTERM || - name[1] == KERN_PROC_LOW_PRI_IO || name[1] == KERN_PROCNAME || name[1] == KERN_THALTSTACK)) - || (name[0] == CTL_HW) - || (name[0] == CTL_VM) - || (name[0] == CTL_VFS)) - && (error = suser(kauth_cred_get(), &p->p_acflag))) - return (error); + error = sysctl_io_opaque(req, buf, size, &changed); + if (error) { + goto out; + } - switch (name[0]) { - case CTL_KERN: - fnp = kern_sysctl; - if ((name[1] != KERN_VNODE) && (name[1] != KERN_FILE) - && (name[1] != KERN_PROC)) - dolock = 0; - break; - case CTL_VM: - fnp = vm_sysctl; - break; - - case CTL_VFS: - fnp = vfs_sysctl; - break; -#ifdef DEBUG - case CTL_DEBUG: - fnp = debug_sysctl; - break; -#endif - default: - fnp = NULL; + if (changed) { + panic("Sched info changed?!"); } +out: + FREE(buf, M_TEMP); + return error; +} - if (uap->oldlenp != USER_ADDR_NULL) { - uint64_t oldlen64 = fuulong(uap->oldlenp); +SYSCTL_PROC(_kern, OID_AUTO, sched_stats, CTLFLAG_LOCKED, 0, 0, sysctl_sched_stats, "-", ""); - oldlen = CAST_DOWN(size_t, oldlen64); - /* - * If more than 4G, clamp to 4G - useracc() below will catch - * with an EFAULT, if it's actually necessary. - */ - if (oldlen64 > 0x00000000ffffffffULL) - oldlen = 0xffffffffUL; - } - - if (uap->old != USER_ADDR_NULL) { - if (!useracc(uap->old, (user_size_t)oldlen, B_WRITE)) - return (EFAULT); - - /* The pc sampling mechanism does not need to take this lock */ - if ((name[1] != KERN_PCSAMPLES) && - (!((name[1] == KERN_KDEBUG) && (name[2] == KERN_KDGETENTROPY)))) { - while (memlock.sl_lock) { - memlock.sl_want = 1; - sleep((caddr_t)&memlock, PRIBIO+1); - memlock.sl_locked++; - } - memlock.sl_lock = 1; - } +STATIC int +sysctl_sched_stats_enable(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, __unused struct sysctl_req *req) +{ + boolean_t active; + int res; - if (dolock && oldlen && - (error = vslock(uap->old, (user_size_t)oldlen))) { - if ((name[1] != KERN_PCSAMPLES) && - (! ((name[1] == KERN_KDEBUG) && (name[2] == KERN_KDGETENTROPY)))) { - memlock.sl_lock = 0; - if (memlock.sl_want) { - memlock.sl_want = 0; - wakeup((caddr_t)&memlock); - } - } - return(error); - } - savelen = oldlen; + if (req->newlen != sizeof(active)) { + return EINVAL; } - if (fnp) { - error = (*fnp)(name + 1, uap->namelen - 1, uap->old, - &oldlen, uap->new, newlen, p); + res = copyin(req->newptr, &active, sizeof(active)); + if (res != 0) { + return res; } - else - error = ENOTSUP; - if ( (name[0] != CTL_VFS) && (error == ENOTSUP)) { - size_t tmp = oldlen; - error = userland_sysctl(p, name, uap->namelen, uap->old, &tmp, - 1, uap->new, newlen, &oldlen); - } + return set_sched_stats_active(active); +} + +SYSCTL_PROC(_kern, OID_AUTO, sched_stats_enable, CTLFLAG_LOCKED | CTLFLAG_WR, 0, 0, sysctl_sched_stats_enable, "-", ""); + +extern uint32_t sched_debug_flags; +SYSCTL_INT(_debug, OID_AUTO, sched, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_debug_flags, 0, "scheduler debug"); + +#if (DEBUG || DEVELOPMENT) +extern boolean_t doprnt_hide_pointers; +SYSCTL_INT(_debug, OID_AUTO, hide_kernel_pointers, CTLFLAG_RW | CTLFLAG_LOCKED, &doprnt_hide_pointers, 0, "hide kernel pointers from log"); +#endif + +extern int get_kernel_symfile(proc_t, char **); + +#if COUNT_SYSCALLS +#define KERN_COUNT_SYSCALLS (KERN_OSTYPE + 1000) + +extern unsigned int nsysent; +extern int syscalls_log[]; +extern const char *syscallnames[]; + +STATIC int +sysctl_docountsyscalls SYSCTL_HANDLER_ARGS +{ + __unused int cmd = oidp->oid_arg2; /* subcommand*/ + __unused int *name = arg1; /* oid element argument vector */ + __unused int namelen = arg2; /* number of oid element arguments */ + user_addr_t oldp = req->oldptr; /* user buffer copy out address */ + size_t *oldlenp = &req->oldlen; /* user buffer copy out size */ + user_addr_t newp = req->newptr; /* user buffer copy in address */ + size_t newlen = req->newlen; /* user buffer copy in size */ + int error; + + int tmp; - if (uap->old != USER_ADDR_NULL) { - if (dolock && savelen) { - error1 = vsunlock(uap->old, (user_size_t)savelen, B_WRITE); - if (!error && error1) - error = error1; + /* valid values passed in: + * = 0 means don't keep called counts for each bsd syscall + * > 0 means keep called counts for each bsd syscall + * = 2 means dump current counts to the system log + * = 3 means reset all counts + * for example, to dump current counts: + * sysctl -w kern.count_calls=2 + */ + error = sysctl_int(oldp, oldlenp, newp, newlen, &tmp); + if ( error != 0 ) { + return (error); + } + + if ( tmp == 1 ) { + do_count_syscalls = 1; + } + else if ( tmp == 0 || tmp == 2 || tmp == 3 ) { + int i; + for ( i = 0; i < nsysent; i++ ) { + if ( syscalls_log[i] != 0 ) { + if ( tmp == 2 ) { + printf("%d calls - name %s \n", syscalls_log[i], syscallnames[i]); + } + else { + syscalls_log[i] = 0; + } + } } - if (name[1] != KERN_PCSAMPLES) { - memlock.sl_lock = 0; - if (memlock.sl_want) { - memlock.sl_want = 0; - wakeup((caddr_t)&memlock); - } + if ( tmp != 0 ) { + do_count_syscalls = 1; } } - if ((error) && (error != ENOMEM)) - return (error); - if (uap->oldlenp != USER_ADDR_NULL) { - i = suulong(uap->oldlenp, oldlen); - if (i) - return i; - } + /* adjust index so we return the right required/consumed amount */ + if (!error) + req->oldidx += req->oldlen; return (error); } +SYSCTL_PROC(_kern, KERN_COUNT_SYSCALLS, count_syscalls, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, + 0, /* Pointer argument (arg1) */ + 0, /* Integer argument (arg2) */ + sysctl_docountsyscalls, /* Handler function */ + NULL, /* Data pointer */ + ""); +#endif /* COUNT_SYSCALLS */ /* - * Attributes stored in the kernel. + * The following sysctl_* functions should not be used + * any more, as they can only cope with callers in + * user mode: Use new-style + * sysctl_io_number() + * sysctl_io_string() + * sysctl_io_opaque() + * instead. */ -__private_extern__ char corefilename[MAXPATHLEN+1]; -__private_extern__ int do_coredump; -__private_extern__ int sugid_coredump; +/* + * Validate parameters and get old / set new parameters + * for an integer-valued sysctl function. + */ +int +sysctl_int(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, int *valp) +{ + int error = 0; -#ifdef INSECURE -int securelevel = -1; -#else -int securelevel; -#endif - -static int -sysctl_affinity( - int *name, - u_int namelen, - user_addr_t oldBuf, - size_t *oldSize, - user_addr_t newBuf, - __unused size_t newSize, - struct proc *cur_proc) -{ - if (namelen < 1) - return (ENOTSUP); - - if (name[0] == 0 && 1 == namelen) { - return sysctl_rdint(oldBuf, oldSize, newBuf, - (cur_proc->p_flag & P_AFFINITY) ? 1 : 0); - } else if (name[0] == 1 && 2 == namelen) { - if (name[1] == 0) { - cur_proc->p_flag &= ~P_AFFINITY; - } else { - cur_proc->p_flag |= P_AFFINITY; - } - return 0; + if (oldp != USER_ADDR_NULL && oldlenp == NULL) + return (EFAULT); + if (oldp && *oldlenp < sizeof(int)) + return (ENOMEM); + if (newp && newlen != sizeof(int)) + return (EINVAL); + *oldlenp = sizeof(int); + if (oldp) + error = copyout(valp, oldp, sizeof(int)); + if (error == 0 && newp) { + error = copyin(newp, valp, sizeof(int)); + AUDIT_ARG(value32, *valp); } - return (ENOTSUP); + return (error); } - -static int -sysctl_translate( - int *name, - u_int namelen, - user_addr_t oldBuf, - size_t *oldSize, - user_addr_t newBuf, - __unused size_t newSize, - struct proc *cur_proc) +/* + * Validate parameters and get old / set new parameters + * for an quad(64bit)-valued sysctl function. + */ +int +sysctl_quad(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, quad_t *valp) { - struct proc *p; - - if (namelen != 1) - return (ENOTSUP); + int error = 0; - p = pfind(name[0]); - if (p == NULL) + if (oldp != USER_ADDR_NULL && oldlenp == NULL) + return (EFAULT); + if (oldp && *oldlenp < sizeof(quad_t)) + return (ENOMEM); + if (newp && newlen != sizeof(quad_t)) return (EINVAL); + *oldlenp = sizeof(quad_t); + if (oldp) + error = copyout(valp, oldp, sizeof(quad_t)); + if (error == 0 && newp) + error = copyin(newp, valp, sizeof(quad_t)); + return (error); +} - if ((kauth_cred_getuid(p->p_ucred) != kauth_cred_getuid(kauth_cred_get())) - && suser(kauth_cred_get(), &cur_proc->p_acflag)) - return (EPERM); +STATIC int +sysdoproc_filt_KERN_PROC_PID(proc_t p, void * arg) +{ + if (p->p_pid != (pid_t)*(int*)arg) + return(0); + else + return(1); +} - return sysctl_rdint(oldBuf, oldSize, newBuf, - (p->p_flag & P_TRANSLATED) ? 1 : 0); +STATIC int +sysdoproc_filt_KERN_PROC_PGRP(proc_t p, void * arg) +{ + if (p->p_pgrpid != (pid_t)*(int*)arg) + return(0); + else + return(1); } -int -set_archhandler(struct proc *p, int arch) +STATIC int +sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg) { - int error; - struct nameidata nd; - struct vnode_attr va; - struct vfs_context context; - char *archhandler; - - switch(arch) { - case CPU_TYPE_POWERPC: - archhandler = exec_archhandler_ppc.path; - break; - default: - return (EBADARCH); - } + int retval; + struct tty *tp; + + /* This is very racy but list lock is held.. Hmmm. */ + if ((p->p_flag & P_CONTROLT) == 0 || + (p->p_pgrp == NULL) || (p->p_pgrp->pg_session == NULL) || + (tp = SESSION_TP(p->p_pgrp->pg_session)) == TTY_NULL || + tp->t_dev != (dev_t)*(int*)arg) + retval = 0; + else + retval = 1; - context.vc_proc = p; - context.vc_ucred = kauth_cred_get(); - - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, - CAST_USER_ADDR_T(archhandler), &context); - error = namei(&nd); - if (error) - return (error); - nameidone(&nd); - - /* Check mount point */ - if ((nd.ni_vp->v_mount->mnt_flag & MNT_NOEXEC) || - (nd.ni_vp->v_type != VREG)) { - vnode_put(nd.ni_vp); - return (EACCES); - } - - VATTR_INIT(&va); - VATTR_WANTED(&va, va_fsid); - VATTR_WANTED(&va, va_fileid); - error = vnode_getattr(nd.ni_vp, &va, &context); - if (error) { - vnode_put(nd.ni_vp); - return (error); - } - vnode_put(nd.ni_vp); - - exec_archhandler_ppc.fsid = va.va_fsid; - exec_archhandler_ppc.fileid = (u_long)va.va_fileid; - return 0; + return(retval); } -static int -sysctl_exec_archhandler_ppc( - __unused int *name, - __unused u_int namelen, - user_addr_t oldBuf, - size_t *oldSize, - user_addr_t newBuf, - size_t newSize, - struct proc *p) +STATIC int +sysdoproc_filt_KERN_PROC_UID(proc_t p, void * arg) { - int error; - size_t len; - struct nameidata nd; - struct vnode_attr va; - char handler[sizeof(exec_archhandler_ppc.path)]; - struct vfs_context context; - - context.vc_proc = p; - context.vc_ucred = kauth_cred_get(); - - if (oldSize) { - len = strlen(exec_archhandler_ppc.path) + 1; - if (oldBuf) { - if (*oldSize < len) - return (ENOMEM); - error = copyout(exec_archhandler_ppc.path, oldBuf, len); - if (error) - return (error); - } - *oldSize = len - 1; - } - if (newBuf) { - error = suser(context.vc_ucred, &p->p_acflag); - if (error) - return (error); - if (newSize >= sizeof(exec_archhandler_ppc.path)) - return (ENAMETOOLONG); - error = copyin(newBuf, handler, newSize); - if (error) - return (error); - handler[newSize] = 0; - strcpy(exec_archhandler_ppc.path, handler); - error = set_archhandler(p, CPU_TYPE_POWERPC); - if (error) - return (error); - } - return 0; + kauth_cred_t my_cred; + uid_t uid; + + if (p->p_ucred == NULL) + return(0); + my_cred = kauth_cred_proc_ref(p); + uid = kauth_cred_getuid(my_cred); + kauth_cred_unref(&my_cred); + + if (uid != (uid_t)*(int*)arg) + return(0); + else + return(1); } -SYSCTL_NODE(_kern, KERN_EXEC, exec, CTLFLAG_RD, 0, ""); -SYSCTL_NODE(_kern_exec, OID_AUTO, archhandler, CTLFLAG_RD, 0, ""); +STATIC int +sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg) +{ + kauth_cred_t my_cred; + uid_t ruid; -SYSCTL_STRING(_kern_exec_archhandler, OID_AUTO, powerpc, CTLFLAG_RD, - exec_archhandler_ppc.path, 0, ""); + if (p->p_ucred == NULL) + return(0); + my_cred = kauth_cred_proc_ref(p); + ruid = kauth_cred_getruid(my_cred); + kauth_cred_unref(&my_cred); -extern int get_kernel_symfile( struct proc *, char **); -__private_extern__ int -sysctl_dopanicinfo(int *, u_int, user_addr_t, size_t *, user_addr_t, - size_t, struct proc *); + if (ruid != (uid_t)*(int*)arg) + return(0); + else + return(1); +} /* - * kernel related system variables. + * try over estimating by 5 procs */ +#define KERN_PROCSLOP (5 * sizeof (struct kinfo_proc)) +struct sysdoproc_args { + int buflen; + void *kprocp; + boolean_t is_64_bit; + user_addr_t dp; + size_t needed; + int sizeof_kproc; + int *errorp; + int uidcheck; + int ruidcheck; + int ttycheck; + int uidval; +}; + int -kern_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, struct proc *p) +sysdoproc_callback(proc_t p, void *arg) { - int error, level, inthostid, tmp; - unsigned int oldval=0; - char *str; - /* all sysctl names not listed below are terminal at this level */ - if (namelen != 1 - && !(name[0] == KERN_PROC - || name[0] == KERN_PROF - || name[0] == KERN_KDEBUG - || name[0] == KERN_PROCARGS - || name[0] == KERN_PROCARGS2 - || name[0] == KERN_PCSAMPLES - || name[0] == KERN_IPC - || name[0] == KERN_SYSV - || name[0] == KERN_AFFINITY - || name[0] == KERN_TRANSLATE - || name[0] == KERN_EXEC - || name[0] == KERN_PANICINFO - || name[0] == KERN_POSIX - || name[0] == KERN_TFP) - ) - return (ENOTDIR); /* overloaded */ - - switch (name[0]) { - case KERN_OSTYPE: - return (sysctl_rdstring(oldp, oldlenp, newp, ostype)); - case KERN_OSRELEASE: - return (sysctl_rdstring(oldp, oldlenp, newp, osrelease)); - case KERN_OSREV: - return (sysctl_rdint(oldp, oldlenp, newp, BSD)); - case KERN_VERSION: - return (sysctl_rdstring(oldp, oldlenp, newp, version)); - case KERN_MAXVNODES: - oldval = desiredvnodes; - error = sysctl_int(oldp, oldlenp, newp, - newlen, &desiredvnodes); - reset_vmobjectcache(oldval, desiredvnodes); - resize_namecache(desiredvnodes); - return(error); - case KERN_MAXPROC: - return (sysctl_maxproc(oldp, oldlenp, newp, newlen)); - case KERN_MAXFILES: - return (sysctl_int(oldp, oldlenp, newp, newlen, &maxfiles)); - case KERN_MAXPROCPERUID: - return( sysctl_maxprocperuid( oldp, oldlenp, newp, newlen ) ); - case KERN_MAXFILESPERPROC: - return( sysctl_maxfilesperproc( oldp, oldlenp, newp, newlen ) ); - case KERN_ARGMAX: - return (sysctl_rdint(oldp, oldlenp, newp, ARG_MAX)); - case KERN_SECURELVL: - level = securelevel; - if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &level)) || - newp == USER_ADDR_NULL) - return (error); - if (level < securelevel && p->p_pid != 1) - return (EPERM); - securelevel = level; - return (0); - case KERN_HOSTNAME: - error = sysctl_trstring(oldp, oldlenp, newp, newlen, - hostname, sizeof(hostname)); - if (newp && !error) - hostnamelen = newlen; - return (error); - case KERN_DOMAINNAME: - error = sysctl_string(oldp, oldlenp, newp, newlen, - domainname, sizeof(domainname)); - if (newp && !error) - domainnamelen = newlen; - return (error); - case KERN_HOSTID: - inthostid = hostid; /* XXX assumes sizeof long <= sizeof int */ - error = sysctl_int(oldp, oldlenp, newp, newlen, &inthostid); - hostid = inthostid; - return (error); - case KERN_CLOCKRATE: - return (sysctl_clockrate(oldp, oldlenp)); - case KERN_BOOTTIME: - { - struct timeval t; - - t.tv_sec = boottime_sec(); - t.tv_usec = 0; - - return (sysctl_rdstruct(oldp, oldlenp, newp, &t, - sizeof(struct timeval))); - } - case KERN_VNODE: - return (sysctl_vnode(oldp, oldlenp)); - case KERN_PROC: - return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp)); - case KERN_FILE: - return (sysctl_file(oldp, oldlenp)); -#ifdef GPROF - case KERN_PROF: - return (sysctl_doprof(name + 1, namelen - 1, oldp, oldlenp, - newp, newlen)); -#endif - case KERN_POSIX1: - return (sysctl_rdint(oldp, oldlenp, newp, _POSIX_VERSION)); - case KERN_NGROUPS: - return (sysctl_rdint(oldp, oldlenp, newp, NGROUPS_MAX)); - case KERN_JOB_CONTROL: - return (sysctl_rdint(oldp, oldlenp, newp, 1)); - case KERN_SAVED_IDS: -#ifdef _POSIX_SAVED_IDS - return (sysctl_rdint(oldp, oldlenp, newp, 1)); -#else - return (sysctl_rdint(oldp, oldlenp, newp, 0)); -#endif - case KERN_KDEBUG: - return (kdebug_ops(name + 1, namelen - 1, oldp, oldlenp, p)); - case KERN_PCSAMPLES: - return (pcsamples_ops(name + 1, namelen - 1, oldp, oldlenp, p)); - case KERN_PROCARGS: - /* new one as it does not use kinfo_proc */ - return (sysctl_procargs(name + 1, namelen - 1, oldp, oldlenp, p)); - case KERN_PROCARGS2: - /* new one as it does not use kinfo_proc */ - return (sysctl_procargs2(name + 1, namelen - 1, oldp, oldlenp, p)); - case KERN_SYMFILE: - error = get_kernel_symfile( p, &str ); - if ( error ) - return error; - return (sysctl_rdstring(oldp, oldlenp, newp, str)); -#if NFSCLIENT - case KERN_NETBOOT: - return (sysctl_rdint(oldp, oldlenp, newp, netboot_root())); -#endif - case KERN_PANICINFO: - return(sysctl_dopanicinfo(name + 1, namelen - 1, oldp, oldlenp, - newp, newlen, p)); - case KERN_AFFINITY: - return sysctl_affinity(name+1, namelen-1, oldp, oldlenp, - newp, newlen, p); - case KERN_TRANSLATE: - return sysctl_translate(name+1, namelen-1, oldp, oldlenp, newp, - newlen, p); - case KERN_CLASSICHANDLER: - return sysctl_exec_archhandler_ppc(name+1, namelen-1, oldp, - oldlenp, newp, newlen, p); - case KERN_AIOMAX: - return( sysctl_aiomax( oldp, oldlenp, newp, newlen ) ); - case KERN_AIOPROCMAX: - return( sysctl_aioprocmax( oldp, oldlenp, newp, newlen ) ); - case KERN_AIOTHREADS: - return( sysctl_aiothreads( oldp, oldlenp, newp, newlen ) ); - case KERN_USRSTACK: - return (sysctl_rdint(oldp, oldlenp, newp, (uintptr_t)p->user_stack)); - case KERN_USRSTACK64: - return (sysctl_rdquad(oldp, oldlenp, newp, p->user_stack)); - case KERN_COREFILE: - error = sysctl_string(oldp, oldlenp, newp, newlen, - corefilename, sizeof(corefilename)); - return (error); - case KERN_COREDUMP: - tmp = do_coredump; - error = sysctl_int(oldp, oldlenp, newp, newlen, &do_coredump); - if (!error && ((do_coredump < 0) || (do_coredump > 1))) { - do_coredump = tmp; - error = EINVAL; - } - return (error); - case KERN_SUGID_COREDUMP: - tmp = sugid_coredump; - error = sysctl_int(oldp, oldlenp, newp, newlen, &sugid_coredump); - if (!error && ((sugid_coredump < 0) || (sugid_coredump > 1))) { - sugid_coredump = tmp; - error = EINVAL; + struct sysdoproc_args *args = arg; + + if (args->buflen >= args->sizeof_kproc) { + if ((args->ruidcheck != 0) && (sysdoproc_filt_KERN_PROC_RUID(p, &args->uidval) == 0)) + return (PROC_RETURNED); + if ((args->uidcheck != 0) && (sysdoproc_filt_KERN_PROC_UID(p, &args->uidval) == 0)) + return (PROC_RETURNED); + if ((args->ttycheck != 0) && (sysdoproc_filt_KERN_PROC_TTY(p, &args->uidval) == 0)) + return (PROC_RETURNED); + + bzero(args->kprocp, args->sizeof_kproc); + if (args->is_64_bit) + fill_user64_proc(p, args->kprocp); + else + fill_user32_proc(p, args->kprocp); + int error = copyout(args->kprocp, args->dp, args->sizeof_kproc); + if (error) { + *args->errorp = error; + return (PROC_RETURNED_DONE); } - return (error); - case KERN_PROCDELAYTERM: - { - int old_value, new_value; - - error = 0; - if (oldp && *oldlenp < sizeof(int)) - return (ENOMEM); - if ( newp && newlen != sizeof(int) ) - return(EINVAL); - *oldlenp = sizeof(int); - old_value = (p->p_lflag & P_LDELAYTERM)? 1: 0; - if (oldp && (error = copyout( &old_value, oldp, sizeof(int)))) - return(error); - if (error == 0 && newp ) - error = copyin( newp, &new_value, sizeof(int) ); - if (error == 0 && newp) { - if (new_value) - p->p_lflag |= P_LDELAYTERM; - else - p->p_lflag &= ~P_LDELAYTERM; - } - return(error); + args->dp += args->sizeof_kproc; + args->buflen -= args->sizeof_kproc; } - case KERN_PROC_LOW_PRI_IO: - { - int old_value, new_value; + args->needed += args->sizeof_kproc; + return (PROC_RETURNED); +} - error = 0; - if (oldp && *oldlenp < sizeof(int)) - return (ENOMEM); - if ( newp && newlen != sizeof(int) ) - return(EINVAL); - *oldlenp = sizeof(int); - - old_value = (p->p_lflag & P_LLOW_PRI_IO)? 0x01: 0; - if (p->p_lflag & P_LBACKGROUND_IO) - old_value |= 0x02; - - if (oldp && (error = copyout( &old_value, oldp, sizeof(int)))) - return(error); - if (error == 0 && newp ) - error = copyin( newp, &new_value, sizeof(int) ); - if (error == 0 && newp) { - if (new_value & 0x01) - p->p_lflag |= P_LLOW_PRI_IO; - else if (new_value & 0x02) - p->p_lflag |= P_LBACKGROUND_IO; - else if (new_value == 0) - p->p_lflag &= ~(P_LLOW_PRI_IO | P_LBACKGROUND_IO); - } - return(error); - } - case KERN_LOW_PRI_WINDOW: - { - int old_value, new_value; +SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD | CTLFLAG_LOCKED, 0, ""); +STATIC int +sysctl_prochandle SYSCTL_HANDLER_ARGS +{ + int cmd = oidp->oid_arg2; /* subcommand for multiple nodes */ + int *name = arg1; /* oid element argument vector */ + int namelen = arg2; /* number of oid element arguments */ + user_addr_t where = req->oldptr;/* user buffer copy out address */ - error = 0; - if (oldp && *oldlenp < sizeof(old_value) ) - return (ENOMEM); - if ( newp && newlen != sizeof(new_value) ) - return(EINVAL); - *oldlenp = sizeof(old_value); - - old_value = lowpri_IO_window_msecs; - - if (oldp && (error = copyout( &old_value, oldp, *oldlenp))) - return(error); - if (error == 0 && newp ) - error = copyin( newp, &new_value, sizeof(newlen) ); - if (error == 0 && newp) { - lowpri_IO_window_msecs = new_value; - } - return(error); - } - case KERN_LOW_PRI_DELAY: - { - int old_value, new_value; + user_addr_t dp = where; + size_t needed = 0; + int buflen = where != USER_ADDR_NULL ? req->oldlen : 0; + int error = 0; + boolean_t is_64_bit = proc_is64bit(current_proc()); + struct user32_kinfo_proc user32_kproc; + struct user64_kinfo_proc user_kproc; + int sizeof_kproc; + void *kprocp; + int (*filterfn)(proc_t, void *) = 0; + struct sysdoproc_args args; + int uidcheck = 0; + int ruidcheck = 0; + int ttycheck = 0; + int success = 0; + + if (namelen != 1 && !(namelen == 0 && cmd == KERN_PROC_ALL)) + return (EINVAL); - error = 0; - if (oldp && *oldlenp < sizeof(old_value) ) - return (ENOMEM); - if ( newp && newlen != sizeof(new_value) ) - return(EINVAL); - *oldlenp = sizeof(old_value); - - old_value = lowpri_IO_delay_msecs; - - if (oldp && (error = copyout( &old_value, oldp, *oldlenp))) - return(error); - if (error == 0 && newp ) - error = copyin( newp, &new_value, sizeof(newlen) ); - if (error == 0 && newp) { - lowpri_IO_delay_msecs = new_value; - } - return(error); + if (is_64_bit) { + sizeof_kproc = sizeof(user_kproc); + kprocp = &user_kproc; + } else { + sizeof_kproc = sizeof(user32_kproc); + kprocp = &user32_kproc; } - case KERN_NX_PROTECTION: - { - int old_value, new_value; - error = 0; - if (oldp && *oldlenp < sizeof(old_value) ) - return (ENOMEM); - if ( newp && newlen != sizeof(new_value) ) - return(EINVAL); - *oldlenp = sizeof(old_value); + switch (cmd) { - old_value = nx_enabled; + case KERN_PROC_PID: + filterfn = sysdoproc_filt_KERN_PROC_PID; + break; - if (oldp && (error = copyout( &old_value, oldp, *oldlenp))) - return(error); -#ifdef __i386__ - /* - * Only allow setting if NX is supported on the chip - */ - if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) { -#endif - if (error == 0 && newp) - error = copyin(newp, &new_value, - sizeof(newlen)); - if (error == 0 && newp) - nx_enabled = new_value; -#ifdef __i386__ - } else if (newp) { - error = ENOTSUP; - } -#endif - return(error); - } - case KERN_SHREG_PRIVATIZABLE: - /* this kernel does implement shared_region_make_private_np() */ - return (sysctl_rdint(oldp, oldlenp, newp, 1)); - case KERN_PROCNAME: - error = sysctl_trstring(oldp, oldlenp, newp, newlen, - &p->p_name[0], (2*MAXCOMLEN+1)); - return (error); - case KERN_THALTSTACK: - { - int old_value, new_value; + case KERN_PROC_PGRP: + filterfn = sysdoproc_filt_KERN_PROC_PGRP; + break; + + case KERN_PROC_TTY: + ttycheck = 1; + break; - error = 0; - if (oldp && *oldlenp < sizeof(int)) - return (ENOMEM); - if ( newp && newlen != sizeof(int) ) - return(EINVAL); - *oldlenp = sizeof(int); - old_value = (p->p_lflag & P_LTHSIGSTACK)? 1: 0; - if (oldp && (error = copyout( &old_value, oldp, sizeof(int)))) - return(error); - if (error == 0 && newp ) - error = copyin( newp, &new_value, sizeof(int) ); - if (error == 0 && newp) { - if (new_value) { - /* we cannot swich midstream if inuse */ - if ((p->p_sigacts->ps_flags & SAS_ALTSTACK) == SAS_ALTSTACK) - return(EPERM); - p->p_lflag |= P_LTHSIGSTACK; - } else { - /* we cannot swich midstream */ - if ((p->p_lflag & P_LTHSIGSTACK) == P_LTHSIGSTACK) - return(EPERM); - p->p_lflag &= ~P_LTHSIGSTACK; - } - } - return(error); - } - default: - return (ENOTSUP); - } - /* NOTREACHED */ -} + case KERN_PROC_UID: + uidcheck = 1; + break; -#ifdef DEBUG -/* - * Debugging related system variables. - */ -#if DIAGNOSTIC -extern -#endif /* DIAGNOSTIC */ -struct ctldebug debug0, debug1; -struct ctldebug debug2, debug3, debug4; -struct ctldebug debug5, debug6, debug7, debug8, debug9; -struct ctldebug debug10, debug11, debug12, debug13, debug14; -struct ctldebug debug15, debug16, debug17, debug18, debug19; -static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = { - &debug0, &debug1, &debug2, &debug3, &debug4, - &debug5, &debug6, &debug7, &debug8, &debug9, - &debug10, &debug11, &debug12, &debug13, &debug14, - &debug15, &debug16, &debug17, &debug18, &debug19, -}; -int -debug_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, struct proc *p) -{ - struct ctldebug *cdp; - - /* all sysctl names at this level are name and field */ - if (namelen != 2) - return (ENOTDIR); /* overloaded */ - cdp = debugvars[name[0]]; - if (cdp->debugname == 0) - return (ENOTSUP); - switch (name[1]) { - case CTL_DEBUG_NAME: - return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname)); - case CTL_DEBUG_VALUE: - return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar)); - default: - return (ENOTSUP); - } - /* NOTREACHED */ -} -#endif /* DEBUG */ + case KERN_PROC_RUID: + ruidcheck = 1; + break; -/* - * Validate parameters and get old / set new parameters - * for an integer-valued sysctl function. - */ -int -sysctl_int(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, int *valp) -{ - int error = 0; + case KERN_PROC_ALL: + break; - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - if (oldp && *oldlenp < sizeof(int)) - return (ENOMEM); - if (newp && newlen != sizeof(int)) - return (EINVAL); - *oldlenp = sizeof(int); - if (oldp) - error = copyout(valp, oldp, sizeof(int)); - if (error == 0 && newp) { - error = copyin(newp, valp, sizeof(int)); - AUDIT_ARG(value, *valp); + default: + /* must be kern.proc. */ + return (ENOTSUP); } - return (error); -} -/* - * As above, but read-only. - */ -int -sysctl_rdint(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, int val) -{ - int error = 0; + error = 0; + args.buflen = buflen; + args.kprocp = kprocp; + args.is_64_bit = is_64_bit; + args.dp = dp; + args.needed = needed; + args.errorp = &error; + args.uidcheck = uidcheck; + args.ruidcheck = ruidcheck; + args.ttycheck = ttycheck; + args.sizeof_kproc = sizeof_kproc; + if (namelen) + args.uidval = name[0]; - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - if (oldp && *oldlenp < sizeof(int)) - return (ENOMEM); - if (newp) - return (EPERM); - *oldlenp = sizeof(int); - if (oldp) - error = copyout((caddr_t)&val, oldp, sizeof(int)); - return (error); -} + success = proc_iterate((PROC_ALLPROCLIST | PROC_ZOMBPROCLIST), + sysdoproc_callback, &args, filterfn, name); -/* - * Validate parameters and get old / set new parameters - * for an quad(64bit)-valued sysctl function. - */ -int -sysctl_quad(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, quad_t *valp) -{ - int error = 0; + /* + * rdar://problem/28433391: if we can't iterate over the processes, + * make sure to return an error. + */ - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - if (oldp && *oldlenp < sizeof(quad_t)) + if (success != 0) return (ENOMEM); - if (newp && newlen != sizeof(quad_t)) - return (EINVAL); - *oldlenp = sizeof(quad_t); - if (oldp) - error = copyout(valp, oldp, sizeof(quad_t)); - if (error == 0 && newp) - error = copyin(newp, valp, sizeof(quad_t)); - return (error); + + if (error) + return (error); + + dp = args.dp; + needed = args.needed; + + if (where != USER_ADDR_NULL) { + req->oldlen = dp - where; + if (needed > req->oldlen) + return (ENOMEM); + } else { + needed += KERN_PROCSLOP; + req->oldlen = needed; + } + /* adjust index so we return the right required/consumed amount */ + req->oldidx += req->oldlen; + return (0); } /* - * As above, but read-only. + * We specify the subcommand code for multiple nodes as the 'req->arg2' value + * in the sysctl declaration itself, which comes into the handler function + * as 'oidp->oid_arg2'. + * + * For these particular sysctls, since they have well known OIDs, we could + * have just obtained it from the '((int *)arg1)[0]' parameter, but that would + * not demonstrate how to handle multiple sysctls that used OID_AUTO instead + * of a well known value with a common handler function. This is desirable, + * because we want well known values to "go away" at some future date. + * + * It should be noted that the value of '((int *)arg1)[1]' is used for many + * an integer parameter to the subcommand for many of these sysctls; we'd + * rather have used '((int *)arg1)[0]' for that, or even better, an element + * in a structure passed in as the the 'newp' argument to sysctlbyname(3), + * and then use leaf-node permissions enforcement, but that would have + * necessitated modifying user space code to correspond to the interface + * change, and we are striving for binary backward compatibility here; even + * though these are SPI, and not intended for use by user space applications + * which are not themselves system tools or libraries, some applications + * have erroneously used them. */ -int -sysctl_rdquad(oldp, oldlenp, newp, val) - void *oldp; - size_t *oldlenp; - void *newp; - quad_t val; -{ - int error = 0; +SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, + 0, /* Pointer argument (arg1) */ + KERN_PROC_ALL, /* Integer argument (arg2) */ + sysctl_prochandle, /* Handler function */ + NULL, /* Data is size variant on ILP32/LP64 */ + ""); +SYSCTL_PROC(_kern_proc, KERN_PROC_PID, pid, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, + 0, /* Pointer argument (arg1) */ + KERN_PROC_PID, /* Integer argument (arg2) */ + sysctl_prochandle, /* Handler function */ + NULL, /* Data is size variant on ILP32/LP64 */ + ""); +SYSCTL_PROC(_kern_proc, KERN_PROC_TTY, tty, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, + 0, /* Pointer argument (arg1) */ + KERN_PROC_TTY, /* Integer argument (arg2) */ + sysctl_prochandle, /* Handler function */ + NULL, /* Data is size variant on ILP32/LP64 */ + ""); +SYSCTL_PROC(_kern_proc, KERN_PROC_PGRP, pgrp, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, + 0, /* Pointer argument (arg1) */ + KERN_PROC_PGRP, /* Integer argument (arg2) */ + sysctl_prochandle, /* Handler function */ + NULL, /* Data is size variant on ILP32/LP64 */ + ""); +SYSCTL_PROC(_kern_proc, KERN_PROC_UID, uid, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, + 0, /* Pointer argument (arg1) */ + KERN_PROC_UID, /* Integer argument (arg2) */ + sysctl_prochandle, /* Handler function */ + NULL, /* Data is size variant on ILP32/LP64 */ + ""); +SYSCTL_PROC(_kern_proc, KERN_PROC_RUID, ruid, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, + 0, /* Pointer argument (arg1) */ + KERN_PROC_RUID, /* Integer argument (arg2) */ + sysctl_prochandle, /* Handler function */ + NULL, /* Data is size variant on ILP32/LP64 */ + ""); +SYSCTL_PROC(_kern_proc, KERN_PROC_LCID, lcid, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, + 0, /* Pointer argument (arg1) */ + KERN_PROC_LCID, /* Integer argument (arg2) */ + sysctl_prochandle, /* Handler function */ + NULL, /* Data is size variant on ILP32/LP64 */ + ""); - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - if (oldp && *oldlenp < sizeof(quad_t)) - return (ENOMEM); - if (newp) - return (EPERM); - *oldlenp = sizeof(quad_t); - if (oldp) - error = copyout((caddr_t)&val, CAST_USER_ADDR_T(oldp), sizeof(quad_t)); - return (error); -} /* - * Validate parameters and get old / set new parameters - * for a string-valued sysctl function. Unlike sysctl_string, if you - * give it a too small (but larger than 0 bytes) buffer, instead of - * returning ENOMEM, it truncates the returned string to the buffer - * size. This preserves the semantics of some library routines - * implemented via sysctl, which truncate their returned data, rather - * than simply returning an error. The returned string is always NUL - * terminated. + * Fill in non-zero fields of an eproc structure for the specified process. */ -int -sysctl_trstring(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, char *str, int maxlen) +STATIC void +fill_user32_eproc(proc_t p, struct user32_eproc *__restrict ep) { - int len, copylen, error = 0; + struct tty *tp; + struct pgrp *pg; + struct session *sessp; + kauth_cred_t my_cred; + + pg = proc_pgrp(p); + sessp = proc_session(p); + + if (pg != PGRP_NULL) { + ep->e_pgid = p->p_pgrpid; + ep->e_jobc = pg->pg_jobc; + if (sessp != SESSION_NULL && sessp->s_ttyvp) + ep->e_flag = EPROC_CTTY; + } + ep->e_ppid = p->p_ppid; + if (p->p_ucred) { + my_cred = kauth_cred_proc_ref(p); - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - copylen = len = strlen(str) + 1; - if (oldp && (len < 0 || *oldlenp < 1)) - return (ENOMEM); - if (oldp && (*oldlenp < (size_t)len)) - copylen = *oldlenp + 1; - if (newp && (maxlen < 0 || newlen >= (size_t)maxlen)) - return (EINVAL); - *oldlenp = copylen - 1; /* deal with NULL strings correctly */ - if (oldp) { - error = copyout(str, oldp, copylen); - if (!error) { - unsigned char c = 0; - /* NUL terminate */ - oldp += *oldlenp; - error = copyout((void *)&c, oldp, sizeof(char)); - } + /* A fake historical pcred */ + ep->e_pcred.p_ruid = kauth_cred_getruid(my_cred); + ep->e_pcred.p_svuid = kauth_cred_getsvuid(my_cred); + ep->e_pcred.p_rgid = kauth_cred_getrgid(my_cred); + ep->e_pcred.p_svgid = kauth_cred_getsvgid(my_cred); + + /* A fake historical *kauth_cred_t */ + ep->e_ucred.cr_ref = my_cred->cr_ref; + ep->e_ucred.cr_uid = kauth_cred_getuid(my_cred); + ep->e_ucred.cr_ngroups = posix_cred_get(my_cred)->cr_ngroups; + bcopy(posix_cred_get(my_cred)->cr_groups, + ep->e_ucred.cr_groups, NGROUPS * sizeof (gid_t)); + + kauth_cred_unref(&my_cred); } - if (error == 0 && newp) { - error = copyin(newp, str, newlen); - str[newlen] = 0; - AUDIT_ARG(text, (char *)str); + + if ((p->p_flag & P_CONTROLT) && (sessp != SESSION_NULL) && + (tp = SESSION_TP(sessp))) { + ep->e_tdev = tp->t_dev; + ep->e_tpgid = sessp->s_ttypgrpid; + } else + ep->e_tdev = NODEV; + + if (sessp != SESSION_NULL) { + if (SESS_LEADER(p, sessp)) + ep->e_flag |= EPROC_SLEADER; + session_rele(sessp); } - return (error); + if (pg != PGRP_NULL) + pg_rele(pg); } /* - * Validate parameters and get old / set new parameters - * for a string-valued sysctl function. + * Fill in non-zero fields of an LP64 eproc structure for the specified process. */ -int -sysctl_string(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, char *str, int maxlen) +STATIC void +fill_user64_eproc(proc_t p, struct user64_eproc *__restrict ep) { - int len, error = 0; + struct tty *tp; + struct pgrp *pg; + struct session *sessp; + kauth_cred_t my_cred; + + pg = proc_pgrp(p); + sessp = proc_session(p); - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - len = strlen(str) + 1; - if (oldp && (len < 0 || *oldlenp < (size_t)len)) - return (ENOMEM); - if (newp && (maxlen < 0 || newlen >= (size_t)maxlen)) - return (EINVAL); - *oldlenp = len -1; /* deal with NULL strings correctly */ - if (oldp) { - error = copyout(str, oldp, len); + if (pg != PGRP_NULL) { + ep->e_pgid = p->p_pgrpid; + ep->e_jobc = pg->pg_jobc; + if (sessp != SESSION_NULL && sessp->s_ttyvp) + ep->e_flag = EPROC_CTTY; } - if (error == 0 && newp) { - error = copyin(newp, str, newlen); - str[newlen] = 0; - AUDIT_ARG(text, (char *)str); + ep->e_ppid = p->p_ppid; + if (p->p_ucred) { + my_cred = kauth_cred_proc_ref(p); + + /* A fake historical pcred */ + ep->e_pcred.p_ruid = kauth_cred_getruid(my_cred); + ep->e_pcred.p_svuid = kauth_cred_getsvuid(my_cred); + ep->e_pcred.p_rgid = kauth_cred_getrgid(my_cred); + ep->e_pcred.p_svgid = kauth_cred_getsvgid(my_cred); + + /* A fake historical *kauth_cred_t */ + ep->e_ucred.cr_ref = my_cred->cr_ref; + ep->e_ucred.cr_uid = kauth_cred_getuid(my_cred); + ep->e_ucred.cr_ngroups = posix_cred_get(my_cred)->cr_ngroups; + bcopy(posix_cred_get(my_cred)->cr_groups, + ep->e_ucred.cr_groups, NGROUPS * sizeof (gid_t)); + + kauth_cred_unref(&my_cred); } - return (error); + + if ((p->p_flag & P_CONTROLT) && (sessp != SESSION_NULL) && + (tp = SESSION_TP(sessp))) { + ep->e_tdev = tp->t_dev; + ep->e_tpgid = sessp->s_ttypgrpid; + } else + ep->e_tdev = NODEV; + + if (sessp != SESSION_NULL) { + if (SESS_LEADER(p, sessp)) + ep->e_flag |= EPROC_SLEADER; + session_rele(sessp); + } + if (pg != PGRP_NULL) + pg_rele(pg); } /* - * As above, but read-only. + * Fill in an eproc structure for the specified process. + * bzeroed by our caller, so only set non-zero fields. */ -int -sysctl_rdstring(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, char *str) +STATIC void +fill_user32_externproc(proc_t p, struct user32_extern_proc *__restrict exp) { - int len, error = 0; - - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - len = strlen(str) + 1; - if (oldp && *oldlenp < (size_t)len) - return (ENOMEM); - if (newp) - return (EPERM); - *oldlenp = len; - if (oldp) - error = copyout(str, oldp, len); - return (error); + exp->p_starttime.tv_sec = p->p_start.tv_sec; + exp->p_starttime.tv_usec = p->p_start.tv_usec; + exp->p_flag = p->p_flag; + if (p->p_lflag & P_LTRACED) + exp->p_flag |= P_TRACED; + if (p->p_lflag & P_LPPWAIT) + exp->p_flag |= P_PPWAIT; + if (p->p_lflag & P_LEXIT) + exp->p_flag |= P_WEXIT; + exp->p_stat = p->p_stat; + exp->p_pid = p->p_pid; + exp->p_oppid = p->p_oppid; + /* Mach related */ + exp->user_stack = p->user_stack; + exp->p_debugger = p->p_debugger; + exp->sigwait = p->sigwait; + /* scheduling */ +#ifdef _PROC_HAS_SCHEDINFO_ + exp->p_estcpu = p->p_estcpu; + exp->p_pctcpu = p->p_pctcpu; + exp->p_slptime = p->p_slptime; +#endif + exp->p_realtimer.it_interval.tv_sec = + (user32_time_t)p->p_realtimer.it_interval.tv_sec; + exp->p_realtimer.it_interval.tv_usec = + (__int32_t)p->p_realtimer.it_interval.tv_usec; + + exp->p_realtimer.it_value.tv_sec = + (user32_time_t)p->p_realtimer.it_value.tv_sec; + exp->p_realtimer.it_value.tv_usec = + (__int32_t)p->p_realtimer.it_value.tv_usec; + + exp->p_rtime.tv_sec = (user32_time_t)p->p_rtime.tv_sec; + exp->p_rtime.tv_usec = (__int32_t)p->p_rtime.tv_usec; + + exp->p_sigignore = p->p_sigignore; + exp->p_sigcatch = p->p_sigcatch; + exp->p_priority = p->p_priority; + exp->p_nice = p->p_nice; + bcopy(&p->p_comm, &exp->p_comm, MAXCOMLEN); + exp->p_xstat = p->p_xstat; + exp->p_acflag = p->p_acflag; } /* - * Validate parameters and get old / set new parameters - * for a structure oriented sysctl function. + * Fill in an LP64 version of extern_proc structure for the specified process. */ -int -sysctl_struct(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, void *sp, int len) +STATIC void +fill_user64_externproc(proc_t p, struct user64_extern_proc *__restrict exp) { - int error = 0; + exp->p_starttime.tv_sec = p->p_start.tv_sec; + exp->p_starttime.tv_usec = p->p_start.tv_usec; + exp->p_flag = p->p_flag; + if (p->p_lflag & P_LTRACED) + exp->p_flag |= P_TRACED; + if (p->p_lflag & P_LPPWAIT) + exp->p_flag |= P_PPWAIT; + if (p->p_lflag & P_LEXIT) + exp->p_flag |= P_WEXIT; + exp->p_stat = p->p_stat; + exp->p_pid = p->p_pid; + exp->p_oppid = p->p_oppid; + /* Mach related */ + exp->user_stack = p->user_stack; + exp->p_debugger = p->p_debugger; + exp->sigwait = p->sigwait; + /* scheduling */ +#ifdef _PROC_HAS_SCHEDINFO_ + exp->p_estcpu = p->p_estcpu; + exp->p_pctcpu = p->p_pctcpu; + exp->p_slptime = p->p_slptime; +#endif + exp->p_realtimer.it_interval.tv_sec = p->p_realtimer.it_interval.tv_sec; + exp->p_realtimer.it_interval.tv_usec = p->p_realtimer.it_interval.tv_usec; - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - if (oldp && (len < 0 || *oldlenp < (size_t)len)) - return (ENOMEM); - if (newp && (len < 0 || newlen > (size_t)len)) - return (EINVAL); - if (oldp) { - *oldlenp = len; - error = copyout(sp, oldp, len); - } - if (error == 0 && newp) - error = copyin(newp, sp, len); - return (error); + exp->p_realtimer.it_value.tv_sec = p->p_realtimer.it_value.tv_sec; + exp->p_realtimer.it_value.tv_usec = p->p_realtimer.it_value.tv_usec; + + exp->p_rtime.tv_sec = p->p_rtime.tv_sec; + exp->p_rtime.tv_usec = p->p_rtime.tv_usec; + + exp->p_sigignore = p->p_sigignore; + exp->p_sigcatch = p->p_sigcatch; + exp->p_priority = p->p_priority; + exp->p_nice = p->p_nice; + bcopy(&p->p_comm, &exp->p_comm, MAXCOMLEN); + exp->p_xstat = p->p_xstat; + exp->p_acflag = p->p_acflag; } -/* - * Validate parameters and get old parameters - * for a structure oriented sysctl function. - */ -int -sysctl_rdstruct(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, void *sp, int len) +STATIC void +fill_user32_proc(proc_t p, struct user32_kinfo_proc *__restrict kp) { - int error = 0; + /* on a 64 bit kernel, 32 bit users get some truncated information */ + fill_user32_externproc(p, &kp->kp_proc); + fill_user32_eproc(p, &kp->kp_eproc); +} - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - if (oldp && (len < 0 || *oldlenp < (size_t)len)) - return (ENOMEM); - if (newp) - return (EPERM); - *oldlenp = len; - if (oldp) - error = copyout(sp, oldp, len); - return (error); +STATIC void +fill_user64_proc(proc_t p, struct user64_kinfo_proc *__restrict kp) +{ + fill_user64_externproc(p, &kp->kp_proc); + fill_user64_eproc(p, &kp->kp_eproc); } -/* - * Get file structures. - */ -int -sysctl_file(user_addr_t where, size_t *sizep) +STATIC int +sysctl_kdebug_ops SYSCTL_HANDLER_ARGS { - int buflen, error; - struct fileglob *fg; - user_addr_t start = where; - struct extern_file nef; + __unused int cmd = oidp->oid_arg2; /* subcommand*/ + int *name = arg1; /* oid element argument vector */ + int namelen = arg2; /* number of oid element arguments */ + user_addr_t oldp = req->oldptr; /* user buffer copy out address */ + size_t *oldlenp = &req->oldlen; /* user buffer copy out size */ +// user_addr_t newp = req->newptr; /* user buffer copy in address */ +// size_t newlen = req->newlen; /* user buffer copy in size */ - buflen = *sizep; - if (where == USER_ADDR_NULL) { - /* - * overestimate by 10 files - */ - *sizep = sizeof(filehead) + (nfiles + 10) * sizeof(struct extern_file); - return (0); - } - - /* - * first copyout filehead - */ - if (buflen < 0 || (size_t)buflen < sizeof(filehead)) { - *sizep = 0; - return (0); - } - error = copyout((caddr_t)&filehead, where, sizeof(filehead)); - if (error) - return (error); - buflen -= sizeof(filehead); - where += sizeof(filehead); - - /* - * followed by an array of file structures - */ - for (fg = filehead.lh_first; fg != 0; fg = fg->f_list.le_next) { - if (buflen < 0 || (size_t)buflen < sizeof(struct extern_file)) { - *sizep = where - start; - return (ENOMEM); - } - nef.f_list.le_next = (struct extern_file *)fg->f_list.le_next; - nef.f_list.le_prev = (struct extern_file **)fg->f_list.le_prev; - nef.f_flag = (fg->fg_flag & FMASK); - nef.f_type = fg->fg_type; - nef.f_count = fg->fg_count; - nef.f_msgcount = fg->fg_msgcount; - nef.f_cred = fg->fg_cred; - nef.f_ops = fg->fg_ops; - nef.f_offset = fg->fg_offset; - nef.f_data = fg->fg_data; - error = copyout((caddr_t)&nef, where, sizeof (struct extern_file)); - if (error) - return (error); - buflen -= sizeof(struct extern_file); - where += sizeof(struct extern_file); - } - *sizep = where - start; - return (0); -} - -/* - * try over estimating by 5 procs - */ -#define KERN_PROCSLOP (5 * sizeof (struct kinfo_proc)) - -int -sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep) -{ - struct proc *p; - user_addr_t dp = where; - size_t needed = 0; - int buflen = where != USER_ADDR_NULL ? *sizep : 0; - int doingzomb; - int error = 0; - boolean_t is_64_bit = FALSE; - struct kinfo_proc kproc; - struct user_kinfo_proc user_kproc; - int sizeof_kproc; - caddr_t kprocp; - - if (namelen != 2 && !(namelen == 1 && name[0] == KERN_PROC_ALL)) - return (EINVAL); - p = allproc.lh_first; - doingzomb = 0; - is_64_bit = proc_is64bit(current_proc()); - if (is_64_bit) { - sizeof_kproc = sizeof(user_kproc); - kprocp = (caddr_t) &user_kproc; - } - else { - sizeof_kproc = sizeof(kproc); - kprocp = (caddr_t) &kproc; - } -again: - for (; p != 0; p = p->p_list.le_next) { - /* - * Skip embryonic processes. - */ - if (p->p_stat == SIDL) - continue; - /* - * TODO - make more efficient (see notes below). - * do by session. - */ - switch (name[0]) { - - case KERN_PROC_PID: - /* could do this with just a lookup */ - if (p->p_pid != (pid_t)name[1]) - continue; - break; - - case KERN_PROC_PGRP: - /* could do this by traversing pgrp */ - if (p->p_pgrp->pg_id != (pid_t)name[1]) - continue; - break; - - case KERN_PROC_TTY: - if ((p->p_flag & P_CONTROLT) == 0 || - (p->p_session == NULL) || - p->p_session->s_ttyp == NULL || - p->p_session->s_ttyp->t_dev != (dev_t)name[1]) - continue; - break; - - case KERN_PROC_UID: - if ((p->p_ucred == NULL) || - (kauth_cred_getuid(p->p_ucred) != (uid_t)name[1])) - continue; - break; - - case KERN_PROC_RUID: - if ((p->p_ucred == NULL) || - (p->p_ucred->cr_ruid != (uid_t)name[1])) - continue; - break; - } - if (buflen >= sizeof_kproc) { - bzero(kprocp, sizeof_kproc); - if (is_64_bit) { - fill_user_proc(p, (struct user_kinfo_proc *) kprocp); - } - else { - fill_proc(p, (struct kinfo_proc *) kprocp); - } - error = copyout(kprocp, dp, sizeof_kproc); - if (error) - return (error); - dp += sizeof_kproc; - buflen -= sizeof_kproc; - } - needed += sizeof_kproc; - } - if (doingzomb == 0) { - p = zombproc.lh_first; - doingzomb++; - goto again; - } - if (where != USER_ADDR_NULL) { - *sizep = dp - where; - if (needed > *sizep) - return (ENOMEM); - } else { - needed += KERN_PROCSLOP; - *sizep = needed; - } - return (0); -} - -/* - * Fill in an eproc structure for the specified process. - */ -static void -fill_eproc(p, ep) - register struct proc *p; - register struct eproc *ep; -{ - register struct tty *tp; - - ep->e_paddr = p; - if (p->p_pgrp) { - ep->e_sess = p->p_pgrp->pg_session; - ep->e_pgid = p->p_pgrp->pg_id; - ep->e_jobc = p->p_pgrp->pg_jobc; - if (ep->e_sess && ep->e_sess->s_ttyvp) - ep->e_flag = EPROC_CTTY; - } else { - ep->e_sess = (struct session *)0; - ep->e_pgid = 0; - ep->e_jobc = 0; - } - ep->e_ppid = (p->p_pptr) ? p->p_pptr->p_pid : 0; - /* Pre-zero the fake historical pcred */ - bzero(&ep->e_pcred, sizeof(struct _pcred)); - if (p->p_ucred) { - /* XXX not ref-counted */ - - /* A fake historical pcred */ - ep->e_pcred.p_ruid = p->p_ucred->cr_ruid; - ep->e_pcred.p_svuid = p->p_ucred->cr_svuid; - ep->e_pcred.p_rgid = p->p_ucred->cr_rgid; - ep->e_pcred.p_svgid = p->p_ucred->cr_svgid; - - /* A fake historical *kauth_cred_t */ - ep->e_ucred.cr_ref = p->p_ucred->cr_ref; - ep->e_ucred.cr_uid = kauth_cred_getuid(p->p_ucred); - ep->e_ucred.cr_ngroups = p->p_ucred->cr_ngroups; - bcopy(p->p_ucred->cr_groups, ep->e_ucred.cr_groups, NGROUPS*sizeof(gid_t)); - - } - if (p->p_stat == SIDL || p->p_stat == SZOMB) { - ep->e_vm.vm_tsize = 0; - ep->e_vm.vm_dsize = 0; - ep->e_vm.vm_ssize = 0; - } - ep->e_vm.vm_rssize = 0; - - if ((p->p_flag & P_CONTROLT) && (ep->e_sess) && - (tp = ep->e_sess->s_ttyp)) { - ep->e_tdev = tp->t_dev; - ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; - ep->e_tsess = tp->t_session; - } else - ep->e_tdev = NODEV; - - if (SESS_LEADER(p)) - ep->e_flag |= EPROC_SLEADER; - if (p->p_wmesg) - strncpy(ep->e_wmesg, p->p_wmesg, WMESGLEN); - ep->e_xsize = ep->e_xrssize = 0; - ep->e_xccount = ep->e_xswrss = 0; -} - -/* - * Fill in an LP64 version of eproc structure for the specified process. - */ -static void -fill_user_eproc(register struct proc *p, register struct user_eproc *ep) -{ - register struct tty *tp; - struct session *sessionp = NULL; - - ep->e_paddr = CAST_USER_ADDR_T(p); - if (p->p_pgrp) { - sessionp = p->p_pgrp->pg_session; - ep->e_sess = CAST_USER_ADDR_T(sessionp); - ep->e_pgid = p->p_pgrp->pg_id; - ep->e_jobc = p->p_pgrp->pg_jobc; - if (sessionp) { - if (sessionp->s_ttyvp) - ep->e_flag = EPROC_CTTY; - } - } else { - ep->e_sess = USER_ADDR_NULL; - ep->e_pgid = 0; - ep->e_jobc = 0; - } - ep->e_ppid = (p->p_pptr) ? p->p_pptr->p_pid : 0; - /* Pre-zero the fake historical pcred */ - bzero(&ep->e_pcred, sizeof(ep->e_pcred)); - if (p->p_ucred) { - /* XXX not ref-counted */ - - /* A fake historical pcred */ - ep->e_pcred.p_ruid = p->p_ucred->cr_ruid; - ep->e_pcred.p_svuid = p->p_ucred->cr_svuid; - ep->e_pcred.p_rgid = p->p_ucred->cr_rgid; - ep->e_pcred.p_svgid = p->p_ucred->cr_svgid; - - /* A fake historical *kauth_cred_t */ - ep->e_ucred.cr_ref = p->p_ucred->cr_ref; - ep->e_ucred.cr_uid = kauth_cred_getuid(p->p_ucred); - ep->e_ucred.cr_ngroups = p->p_ucred->cr_ngroups; - bcopy(p->p_ucred->cr_groups, ep->e_ucred.cr_groups, NGROUPS*sizeof(gid_t)); - - } - if (p->p_stat == SIDL || p->p_stat == SZOMB) { - ep->e_vm.vm_tsize = 0; - ep->e_vm.vm_dsize = 0; - ep->e_vm.vm_ssize = 0; - } - ep->e_vm.vm_rssize = 0; - - if ((p->p_flag & P_CONTROLT) && (sessionp) && - (tp = sessionp->s_ttyp)) { - ep->e_tdev = tp->t_dev; - ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; - ep->e_tsess = CAST_USER_ADDR_T(tp->t_session); - } else - ep->e_tdev = NODEV; - - if (SESS_LEADER(p)) - ep->e_flag |= EPROC_SLEADER; - if (p->p_wmesg) - strncpy(ep->e_wmesg, p->p_wmesg, WMESGLEN); - ep->e_xsize = ep->e_xrssize = 0; - ep->e_xccount = ep->e_xswrss = 0; -} - -/* - * Fill in an eproc structure for the specified process. - */ -static void -fill_externproc(p, exp) - register struct proc *p; - register struct extern_proc *exp; -{ - exp->p_forw = exp->p_back = NULL; - if (p->p_stats) - exp->p_starttime = p->p_stats->p_start; - exp->p_vmspace = NULL; - exp->p_sigacts = p->p_sigacts; - exp->p_flag = p->p_flag; - exp->p_stat = p->p_stat ; - exp->p_pid = p->p_pid ; - exp->p_oppid = p->p_oppid ; - exp->p_dupfd = p->p_dupfd ; - /* Mach related */ - exp->user_stack = CAST_DOWN(caddr_t, p->user_stack); - exp->exit_thread = p->exit_thread ; - exp->p_debugger = p->p_debugger ; - exp->sigwait = p->sigwait ; - /* scheduling */ - exp->p_estcpu = p->p_estcpu ; - exp->p_cpticks = p->p_cpticks ; - exp->p_pctcpu = p->p_pctcpu ; - exp->p_wchan = p->p_wchan ; - exp->p_wmesg = p->p_wmesg ; - exp->p_swtime = p->p_swtime ; - exp->p_slptime = p->p_slptime ; - bcopy(&p->p_realtimer, &exp->p_realtimer,sizeof(struct itimerval)); - bcopy(&p->p_rtime, &exp->p_rtime,sizeof(struct timeval)); - exp->p_uticks = p->p_uticks ; - exp->p_sticks = p->p_sticks ; - exp->p_iticks = p->p_iticks ; - exp->p_traceflag = p->p_traceflag ; - exp->p_tracep = p->p_tracep ; - exp->p_siglist = 0 ; /* No longer relevant */ - exp->p_textvp = p->p_textvp ; - exp->p_holdcnt = 0 ; - exp->p_sigmask = 0 ; /* no longer avaialable */ - exp->p_sigignore = p->p_sigignore ; - exp->p_sigcatch = p->p_sigcatch ; - exp->p_priority = p->p_priority ; - exp->p_usrpri = p->p_usrpri ; - exp->p_nice = p->p_nice ; - bcopy(&p->p_comm, &exp->p_comm,MAXCOMLEN); - exp->p_comm[MAXCOMLEN] = '\0'; - exp->p_pgrp = p->p_pgrp ; - exp->p_addr = NULL; - exp->p_xstat = p->p_xstat ; - exp->p_acflag = p->p_acflag ; - exp->p_ru = p->p_ru ; /* XXX may be NULL */ -} - -/* - * Fill in an LP64 version of extern_proc structure for the specified process. - */ -static void -fill_user_externproc(register struct proc *p, register struct user_extern_proc *exp) -{ - exp->p_forw = exp->p_back = USER_ADDR_NULL; - if (p->p_stats) { - exp->p_starttime.tv_sec = p->p_stats->p_start.tv_sec; - exp->p_starttime.tv_usec = p->p_stats->p_start.tv_usec; - } - exp->p_vmspace = USER_ADDR_NULL; - exp->p_sigacts = CAST_USER_ADDR_T(p->p_sigacts); - exp->p_flag = p->p_flag; - exp->p_stat = p->p_stat ; - exp->p_pid = p->p_pid ; - exp->p_oppid = p->p_oppid ; - exp->p_dupfd = p->p_dupfd ; - /* Mach related */ - exp->user_stack = p->user_stack; - exp->exit_thread = CAST_USER_ADDR_T(p->exit_thread); - exp->p_debugger = p->p_debugger ; - exp->sigwait = p->sigwait ; - /* scheduling */ - exp->p_estcpu = p->p_estcpu ; - exp->p_cpticks = p->p_cpticks ; - exp->p_pctcpu = p->p_pctcpu ; - exp->p_wchan = CAST_USER_ADDR_T(p->p_wchan); - exp->p_wmesg = CAST_USER_ADDR_T(p->p_wmesg); - exp->p_swtime = p->p_swtime ; - exp->p_slptime = p->p_slptime ; - exp->p_realtimer.it_interval.tv_sec = p->p_realtimer.it_interval.tv_sec; - exp->p_realtimer.it_interval.tv_usec = p->p_realtimer.it_interval.tv_usec; - exp->p_realtimer.it_value.tv_sec = p->p_realtimer.it_value.tv_sec; - exp->p_realtimer.it_value.tv_usec = p->p_realtimer.it_value.tv_usec; - exp->p_rtime.tv_sec = p->p_rtime.tv_sec; - exp->p_rtime.tv_usec = p->p_rtime.tv_usec; - exp->p_uticks = p->p_uticks ; - exp->p_sticks = p->p_sticks ; - exp->p_iticks = p->p_iticks ; - exp->p_traceflag = p->p_traceflag ; - exp->p_tracep = CAST_USER_ADDR_T(p->p_tracep); - exp->p_siglist = 0 ; /* No longer relevant */ - exp->p_textvp = CAST_USER_ADDR_T(p->p_textvp); - exp->p_holdcnt = 0 ; - exp->p_sigmask = 0 ; /* no longer avaialable */ - exp->p_sigignore = p->p_sigignore ; - exp->p_sigcatch = p->p_sigcatch ; - exp->p_priority = p->p_priority ; - exp->p_usrpri = p->p_usrpri ; - exp->p_nice = p->p_nice ; - bcopy(&p->p_comm, &exp->p_comm,MAXCOMLEN); - exp->p_comm[MAXCOMLEN] = '\0'; - exp->p_pgrp = CAST_USER_ADDR_T(p->p_pgrp); - exp->p_addr = USER_ADDR_NULL; - exp->p_xstat = p->p_xstat ; - exp->p_acflag = p->p_acflag ; - exp->p_ru = CAST_USER_ADDR_T(p->p_ru); /* XXX may be NULL */ -} - -static void -fill_proc(p, kp) - register struct proc *p; - register struct kinfo_proc *kp; -{ - fill_externproc(p, &kp->kp_proc); - fill_eproc(p, &kp->kp_eproc); -} - -static void -fill_user_proc(register struct proc *p, register struct user_kinfo_proc *kp) -{ - fill_user_externproc(p, &kp->kp_proc); - fill_user_eproc(p, &kp->kp_eproc); -} - -int -kdebug_ops(int *name, u_int namelen, user_addr_t where, - size_t *sizep, struct proc *p) -{ int ret=0; - ret = suser(kauth_cred_get(), &p->p_acflag); - if (ret) - return(ret); + if (namelen == 0) + return(ENOTSUP); switch(name[0]) { case KERN_KDEFLAGS: @@ -1687,88 +1160,125 @@ kdebug_ops(int *name, u_int namelen, user_addr_t where, case KERN_KDSETREG: case KERN_KDGETREG: case KERN_KDREADTR: + case KERN_KDWRITETR: + case KERN_KDWRITEMAP: + case KERN_KDTEST: case KERN_KDPIDTR: case KERN_KDTHRMAP: case KERN_KDPIDEX: - case KERN_KDSETRTCDEC: case KERN_KDSETBUF: - case KERN_KDGETENTROPY: - ret = kdbg_control(name, namelen, where, sizep); - break; + case KERN_KDREADCURTHRMAP: + case KERN_KDSET_TYPEFILTER: + case KERN_KDBUFWAIT: + case KERN_KDCPUMAP: + case KERN_KDWRITEMAP_V3: + case KERN_KDWRITETR_V3: + ret = kdbg_control(name, namelen, oldp, oldlenp); + break; default: ret= ENOTSUP; break; } - return(ret); -} - -extern int pcsamples_control(int *name, u_int namelen, user_addr_t where, - size_t * sizep); - -int -pcsamples_ops(int *name, u_int namelen, user_addr_t where, - size_t *sizep, struct proc *p) -{ - int ret=0; - ret = suser(kauth_cred_get(), &p->p_acflag); - if (ret) - return(ret); + /* adjust index so we return the right required/consumed amount */ + if (!ret) + req->oldidx += req->oldlen; - switch(name[0]) { - case KERN_PCDISABLE: - case KERN_PCGETBUF: - case KERN_PCSETUP: - case KERN_PCREMOVE: - case KERN_PCREADBUF: - case KERN_PCSETREG: - case KERN_PCSETBUF: - case KERN_PCCOMM: - ret = pcsamples_control(name, namelen, where, sizep); - break; - default: - ret= ENOTSUP; - break; - } - return(ret); + return (ret); } +SYSCTL_PROC(_kern, KERN_KDEBUG, kdebug, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, + 0, /* Pointer argument (arg1) */ + 0, /* Integer argument (arg2) */ + sysctl_kdebug_ops, /* Handler function */ + NULL, /* Data pointer */ + ""); + +#if !CONFIG_EMBEDDED /* * Return the top *sizep bytes of the user stack, or the entire area of the * user stack down through the saved exec_path, whichever is smaller. */ -int -sysctl_procargs(int *name, u_int namelen, user_addr_t where, - size_t *sizep, struct proc *cur_proc) +STATIC int +sysctl_doprocargs SYSCTL_HANDLER_ARGS { - return sysctl_procargsx( name, namelen, where, sizep, cur_proc, 0); -} + __unused int cmd = oidp->oid_arg2; /* subcommand*/ + int *name = arg1; /* oid element argument vector */ + int namelen = arg2; /* number of oid element arguments */ + user_addr_t oldp = req->oldptr; /* user buffer copy out address */ + size_t *oldlenp = &req->oldlen; /* user buffer copy out size */ +// user_addr_t newp = req->newptr; /* user buffer copy in address */ +// size_t newlen = req->newlen; /* user buffer copy in size */ + int error; -static int -sysctl_procargs2(int *name, u_int namelen, user_addr_t where, - size_t *sizep, struct proc *cur_proc) -{ - return sysctl_procargsx( name, namelen, where, sizep, cur_proc, 1); + error = sysctl_procargsx( name, namelen, oldp, oldlenp, current_proc(), 0); + + /* adjust index so we return the right required/consumed amount */ + if (!error) + req->oldidx += req->oldlen; + + return (error); } +SYSCTL_PROC(_kern, KERN_PROCARGS, procargs, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, + 0, /* Pointer argument (arg1) */ + 0, /* Integer argument (arg2) */ + sysctl_doprocargs, /* Handler function */ + NULL, /* Data pointer */ + ""); +#endif /* !CONFIG_EMBEDDED */ + +STATIC int +sysctl_doprocargs2 SYSCTL_HANDLER_ARGS +{ + __unused int cmd = oidp->oid_arg2; /* subcommand*/ + int *name = arg1; /* oid element argument vector */ + int namelen = arg2; /* number of oid element arguments */ + user_addr_t oldp = req->oldptr; /* user buffer copy out address */ + size_t *oldlenp = &req->oldlen; /* user buffer copy out size */ +// user_addr_t newp = req->newptr; /* user buffer copy in address */ +// size_t newlen = req->newlen; /* user buffer copy in size */ + int error; -static int -sysctl_procargsx(int *name, __unused u_int namelen, user_addr_t where, - size_t *sizep, struct proc *cur_proc, int argc_yes) + error = sysctl_procargsx( name, namelen, oldp, oldlenp, current_proc(), 1); + + /* adjust index so we return the right required/consumed amount */ + if (!error) + req->oldidx += req->oldlen; + + return (error); +} +SYSCTL_PROC(_kern, KERN_PROCARGS2, procargs2, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, + 0, /* Pointer argument (arg1) */ + 0, /* Integer argument (arg2) */ + sysctl_doprocargs2, /* Handler function */ + NULL, /* Data pointer */ + ""); + +STATIC int +sysctl_procargsx(int *name, u_int namelen, user_addr_t where, + size_t *sizep, proc_t cur_proc, int argc_yes) { - struct proc *p; + proc_t p; int buflen = where != USER_ADDR_NULL ? *sizep : 0; int error = 0; - struct vm_map *proc_map; + struct _vm_map *proc_map; struct task * task; vm_map_copy_t tmp; user_addr_t arg_addr; size_t arg_size; caddr_t data; + size_t argslen=0; int size; vm_offset_t copy_start, copy_end; kern_return_t ret; int pid; + kauth_cred_t my_cred; + uid_t uid; + int argc = -1; + if ( namelen < 1 ) + return(EINVAL); + if (argc_yes) buflen -= sizeof(int); /* reserve first word to return argc */ @@ -1785,7 +1295,7 @@ sysctl_procargsx(int *name, __unused u_int namelen, user_addr_t where, * Lookup process by pid */ pid = name[0]; - p = pfind(pid); + p = proc_find(pid); if (p == NULL) { return(EINVAL); } @@ -1800,49 +1310,64 @@ sysctl_procargsx(int *name, __unused u_int namelen, user_addr_t where, * size. */ - if (!p->user_stack) + if (!p->user_stack) { + proc_rele(p); return(EINVAL); + } if (where == USER_ADDR_NULL) { /* caller only wants to know length of proc args data */ - if (sizep == NULL) + if (sizep == NULL) { + proc_rele(p); return(EFAULT); - - size = p->p_argslen; - if (argc_yes) { - size += sizeof(int); - } - else { + } + + size = p->p_argslen; + proc_rele(p); + if (argc_yes) { + size += sizeof(int); + } else { /* * old PROCARGS will return the executable's path and plus some * extra space for work alignment and data tags */ - size += PATH_MAX + (6 * sizeof(int)); - } + size += PATH_MAX + (6 * sizeof(int)); + } size += (size & (sizeof(int) - 1)) ? (sizeof(int) - (size & (sizeof(int) - 1))) : 0; *sizep = size; return (0); } - - if ((kauth_cred_getuid(p->p_ucred) != kauth_cred_getuid(kauth_cred_get())) - && suser(kauth_cred_get(), &cur_proc->p_acflag)) + + my_cred = kauth_cred_proc_ref(p); + uid = kauth_cred_getuid(my_cred); + kauth_cred_unref(&my_cred); + + if ((uid != kauth_cred_getuid(kauth_cred_get())) + && suser(kauth_cred_get(), &cur_proc->p_acflag)) { + proc_rele(p); return (EINVAL); + } if ((u_int)arg_size > p->p_argslen) arg_size = round_page(p->p_argslen); arg_addr = p->user_stack - arg_size; - /* * Before we can block (any VM code), make another * reference to the map to keep it alive. We do * that by getting a reference on the task itself. */ task = p->task; - if (task == NULL) + if (task == NULL) { + proc_rele(p); return(EINVAL); - + } + + /* save off argc before releasing the proc */ + argc = p->p_argc; + + argslen = p->p_argslen; /* * Once we have a task reference we can convert that into a * map reference, which we will use in the calls below. The @@ -1851,13 +1376,15 @@ sysctl_procargsx(int *name, __unused u_int namelen, user_addr_t where, * of stale info (which is always a possibility). */ task_reference(task); + proc_rele(p); proc_map = get_task_map_reference(task); task_deallocate(task); + if (proc_map == NULL) return(EINVAL); - ret = kmem_alloc(kernel_map, ©_start, round_page(arg_size)); + ret = kmem_alloc(kernel_map, ©_start, round_page(arg_size), VM_KERN_MEMORY_BSD); if (ret != KERN_SUCCESS) { vm_map_deallocate(proc_map); return(ENOMEM); @@ -1884,20 +1411,35 @@ sysctl_procargsx(int *name, __unused u_int namelen, user_addr_t where, tmp, FALSE) != KERN_SUCCESS) { kmem_free(kernel_map, copy_start, round_page(arg_size)); + vm_map_copy_discard(tmp); return (EIO); } - if (arg_size > p->p_argslen) { - data = (caddr_t) (copy_end - p->p_argslen); - size = p->p_argslen; + if (arg_size > argslen) { + data = (caddr_t) (copy_end - argslen); + size = argslen; } else { data = (caddr_t) (copy_end - arg_size); size = arg_size; } + /* + * When these sysctls were introduced, the first string in the strings + * section was just the bare path of the executable. However, for security + * reasons we now prefix this string with executable_path= so it can be + * parsed getenv style. To avoid binary compatability issues with exising + * callers of this sysctl, we strip it off here if present. + * (rdar://problem/13746466) + */ +#define EXECUTABLE_KEY "executable_path=" + if (strncmp(EXECUTABLE_KEY, data, strlen(EXECUTABLE_KEY)) == 0){ + data += strlen(EXECUTABLE_KEY); + size -= strlen(EXECUTABLE_KEY); + } + if (argc_yes) { /* Put processes argc as the first word in the copyout buffer */ - suword(where, p->p_argc); + suword(where, argc); error = copyout(data, (where + sizeof(int)), size); size += sizeof(int); } else { @@ -1911,7 +1453,7 @@ sysctl_procargsx(int *name, __unused u_int namelen, user_addr_t where, * * Note: we keep all pointers&sizes aligned to word boundries */ - if ( (! error) && (buflen > 0 && (u_int)buflen > p->p_argslen) ) + if ( (! error) && (buflen > 0 && (u_int)buflen > argslen) ) { int binPath_sz, alignedBinPath_sz = 0; int extraSpaceNeeded, addThis; @@ -1924,366 +1466,2112 @@ sysctl_procargsx(int *name, __unused u_int namelen, user_addr_t where, of the executing binary. If we encounter an error, we bail. */ - /* Limit ourselves to PATH_MAX paths */ - if ( max_len > PATH_MAX ) max_len = PATH_MAX; + /* Limit ourselves to PATH_MAX paths */ + if ( max_len > PATH_MAX ) max_len = PATH_MAX; + + binPath_sz = 0; + + while ( (binPath_sz < max_len-1) && (*str++ != 0) ) + binPath_sz++; + + /* If we have a NUL terminator, copy it, too */ + if (binPath_sz < max_len-1) binPath_sz += 1; + + /* Pre-Flight the space requiremnts */ + + /* Account for the padding that fills out binPath to the next word */ + alignedBinPath_sz += (binPath_sz & (sizeof(int)-1)) ? (sizeof(int)-(binPath_sz & (sizeof(int)-1))) : 0; + + placeHere = where + size; + + /* Account for the bytes needed to keep placeHere word aligned */ + addThis = (placeHere & (sizeof(int)-1)) ? (sizeof(int)-(placeHere & (sizeof(int)-1))) : 0; + + /* Add up all the space that is needed */ + extraSpaceNeeded = alignedBinPath_sz + addThis + binPath_sz + (4 * sizeof(int)); + + /* is there is room to tack on argv[0]? */ + if ( (buflen & ~(sizeof(int)-1)) >= ( argslen + extraSpaceNeeded )) + { + placeHere += addThis; + suword(placeHere, 0); + placeHere += sizeof(int); + suword(placeHere, 0xBFFF0000); + placeHere += sizeof(int); + suword(placeHere, 0); + placeHere += sizeof(int); + error = copyout(data, placeHere, binPath_sz); + if ( ! error ) + { + placeHere += binPath_sz; + suword(placeHere, 0); + size += extraSpaceNeeded; + } + } + } + } + + if (copy_start != (vm_offset_t) 0) { + kmem_free(kernel_map, copy_start, copy_end - copy_start); + } + if (error) { + return(error); + } + + if (where != USER_ADDR_NULL) + *sizep = size; + return (0); +} + + +/* + * Max number of concurrent aio requests + */ +STATIC int +sysctl_aiomax +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int new_value, changed; + int error = sysctl_io_number(req, aio_max_requests, sizeof(int), &new_value, &changed); + if (changed) { + /* make sure the system-wide limit is greater than the per process limit */ + if (new_value >= aio_max_requests_per_process && new_value <= AIO_MAX_REQUESTS) + aio_max_requests = new_value; + else + error = EINVAL; + } + return(error); +} + + +/* + * Max number of concurrent aio requests per process + */ +STATIC int +sysctl_aioprocmax +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int new_value, changed; + int error = sysctl_io_number(req, aio_max_requests_per_process, sizeof(int), &new_value, &changed); + if (changed) { + /* make sure per process limit is less than the system-wide limit */ + if (new_value <= aio_max_requests && new_value >= AIO_LISTIO_MAX) + aio_max_requests_per_process = new_value; + else + error = EINVAL; + } + return(error); +} + + +/* + * Max number of async IO worker threads + */ +STATIC int +sysctl_aiothreads +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int new_value, changed; + int error = sysctl_io_number(req, aio_worker_threads, sizeof(int), &new_value, &changed); + if (changed) { + /* we only allow an increase in the number of worker threads */ + if (new_value > aio_worker_threads ) { + _aio_create_worker_threads((new_value - aio_worker_threads)); + aio_worker_threads = new_value; + } + else + error = EINVAL; + } + return(error); +} + + +/* + * System-wide limit on the max number of processes + */ +STATIC int +sysctl_maxproc +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int new_value, changed; + int error = sysctl_io_number(req, maxproc, sizeof(int), &new_value, &changed); + if (changed) { + AUDIT_ARG(value32, new_value); + /* make sure the system-wide limit is less than the configured hard + limit set at kernel compilation */ + if (new_value <= hard_maxproc && new_value > 0) + maxproc = new_value; + else + error = EINVAL; + } + return(error); +} + +SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + ostype, 0, ""); +SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + osrelease, 0, ""); +SYSCTL_INT(_kern, KERN_OSREV, osrevision, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + (int *)NULL, BSD, ""); +SYSCTL_STRING(_kern, KERN_VERSION, version, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + version, 0, ""); +SYSCTL_STRING(_kern, OID_AUTO, uuid, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + &kernel_uuid_string[0], 0, ""); +#if DEBUG +#ifndef DKPR +#define DKPR 1 +#endif +#endif + +#if DKPR +int debug_kprint_syscall = 0; +char debug_kprint_syscall_process[MAXCOMLEN+1]; + +/* Thread safe: bits and string value are not used to reclaim state */ +SYSCTL_INT (_debug, OID_AUTO, kprint_syscall, + CTLFLAG_RW | CTLFLAG_LOCKED, &debug_kprint_syscall, 0, "kprintf syscall tracing"); +SYSCTL_STRING(_debug, OID_AUTO, kprint_syscall_process, + CTLFLAG_RW | CTLFLAG_LOCKED, debug_kprint_syscall_process, sizeof(debug_kprint_syscall_process), + "name of process for kprintf syscall tracing"); + +int debug_kprint_current_process(const char **namep) +{ + struct proc *p = current_proc(); + + if (p == NULL) { + return 0; + } + + if (debug_kprint_syscall_process[0]) { + /* user asked to scope tracing to a particular process name */ + if(0 == strncmp(debug_kprint_syscall_process, + p->p_comm, sizeof(debug_kprint_syscall_process))) { + /* no value in telling the user that we traced what they asked */ + if(namep) *namep = NULL; + + return 1; + } else { + return 0; + } + } + + /* trace all processes. Tell user what we traced */ + if (namep) { + *namep = p->p_comm; + } + + return 1; +} +#endif + +/* PR-5293665: need to use a callback function for kern.osversion to set + * osversion in IORegistry */ + +STATIC int +sysctl_osversion(__unused struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) +{ + int rval = 0; + + rval = sysctl_handle_string(oidp, arg1, arg2, req); + + if (req->newptr) { + IORegistrySetOSBuildVersion((char *)arg1); + } + + return rval; +} + +SYSCTL_PROC(_kern, KERN_OSVERSION, osversion, + CTLFLAG_RW | CTLFLAG_KERN | CTLTYPE_STRING | CTLFLAG_LOCKED, + osversion, 256 /* OSVERSIZE*/, + sysctl_osversion, "A", ""); + +static uint64_t osproductversion_string[48]; + +STATIC int +sysctl_osproductversion(__unused struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) +{ + if (req->newptr != 0) { + /* + * Can only ever be set by launchd, and only once at boot. + */ + if (req->p->p_pid != 1 || osproductversion_string[0] != '\0') { + return EPERM; + } + } + + return sysctl_handle_string(oidp, arg1, arg2, req); +} + +SYSCTL_PROC(_kern, OID_AUTO, osproductversion, + CTLFLAG_RW | CTLFLAG_KERN | CTLTYPE_STRING | CTLFLAG_LOCKED, + osproductversion_string, sizeof(osproductversion_string), + sysctl_osproductversion, "A", "The ProductVersion from SystemVersion.plist"); + +static uint64_t osvariant_status = 0; + +STATIC int +sysctl_osvariant_status(__unused struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) +{ + if (req->newptr != 0) { + /* + * Can only ever be set by launchd, and only once at boot. + */ + if (req->p->p_pid != 1 || osvariant_status != 0) { + return EPERM; + } + } + + return sysctl_handle_quad(oidp, arg1, arg2, req); +} + +SYSCTL_PROC(_kern, OID_AUTO, osvariant_status, + CTLFLAG_RW | CTLTYPE_QUAD | CTLFLAG_LOCKED | CTLFLAG_MASKED, + &osvariant_status, sizeof(osvariant_status), + sysctl_osvariant_status, "Q", "Opaque flags used to cache OS variant information"); + +STATIC int +sysctl_sysctl_bootargs +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int error; + /* BOOT_LINE_LENGTH */ +#if CONFIG_EMBEDDED + size_t boot_args_len = 256; +#else + size_t boot_args_len = 1024; +#endif + char buf[boot_args_len]; + + strlcpy(buf, PE_boot_args(), boot_args_len); + error = sysctl_io_string(req, buf, boot_args_len, 0, NULL); + return(error); +} + +SYSCTL_PROC(_kern, OID_AUTO, bootargs, + CTLFLAG_LOCKED | CTLFLAG_RD | CTLFLAG_KERN | CTLTYPE_STRING, + NULL, 0, + sysctl_sysctl_bootargs, "A", "bootargs"); + +SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &maxfiles, 0, ""); +SYSCTL_INT(_kern, KERN_ARGMAX, argmax, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + (int *)NULL, ARG_MAX, ""); +SYSCTL_INT(_kern, KERN_POSIX1, posix1version, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + (int *)NULL, _POSIX_VERSION, ""); +SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + (int *)NULL, NGROUPS_MAX, ""); +SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + (int *)NULL, 1, ""); +#if 1 /* _POSIX_SAVED_IDS from */ +SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + (int *)NULL, 1, ""); +#else +SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + NULL, 0, ""); +#endif +SYSCTL_INT(_kern, OID_AUTO, num_files, + CTLFLAG_RD | CTLFLAG_LOCKED, + &nfiles, 0, ""); +SYSCTL_COMPAT_INT(_kern, OID_AUTO, num_vnodes, + CTLFLAG_RD | CTLFLAG_LOCKED, + &numvnodes, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, num_tasks, + CTLFLAG_RD | CTLFLAG_LOCKED, + &task_max, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, num_threads, + CTLFLAG_RD | CTLFLAG_LOCKED, + &thread_max, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, num_taskthreads, + CTLFLAG_RD | CTLFLAG_LOCKED, + &task_threadmax, 0, ""); + +STATIC int +sysctl_maxvnodes (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int oldval = desiredvnodes; + int error = sysctl_io_number(req, desiredvnodes, sizeof(int), &desiredvnodes, NULL); + + if (oldval != desiredvnodes) { + resize_namecache(desiredvnodes); + } + + return(error); +} + +SYSCTL_INT(_kern, OID_AUTO, namecache_disabled, + CTLFLAG_RW | CTLFLAG_LOCKED, + &nc_disabled, 0, ""); + +SYSCTL_PROC(_kern, KERN_MAXVNODES, maxvnodes, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_maxvnodes, "I", ""); + +SYSCTL_PROC(_kern, KERN_MAXPROC, maxproc, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_maxproc, "I", ""); + +SYSCTL_PROC(_kern, KERN_AIOMAX, aiomax, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_aiomax, "I", ""); + +SYSCTL_PROC(_kern, KERN_AIOPROCMAX, aioprocmax, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_aioprocmax, "I", ""); + +SYSCTL_PROC(_kern, KERN_AIOTHREADS, aiothreads, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_aiothreads, "I", ""); + +#if (DEVELOPMENT || DEBUG) +extern int sched_smt_balance; +SYSCTL_INT(_kern, OID_AUTO, sched_smt_balance, + CTLFLAG_KERN| CTLFLAG_RW| CTLFLAG_LOCKED, + &sched_smt_balance, 0, ""); +extern int sched_allow_rt_smt; +SYSCTL_INT(_kern, OID_AUTO, sched_allow_rt_smt, + CTLFLAG_KERN| CTLFLAG_RW| CTLFLAG_LOCKED, + &sched_allow_rt_smt, 0, ""); +#if __arm__ || __arm64__ +extern uint32_t perfcontrol_requested_recommended_cores; +SYSCTL_UINT(_kern, OID_AUTO, sched_recommended_cores, + CTLFLAG_KERN | CTLFLAG_RD | CTLFLAG_LOCKED, + &perfcontrol_requested_recommended_cores, 0, ""); + +/* Scheduler perfcontrol callouts sysctls */ +SYSCTL_DECL(_kern_perfcontrol_callout); +SYSCTL_NODE(_kern, OID_AUTO, perfcontrol_callout, CTLFLAG_RW | CTLFLAG_LOCKED, 0, + "scheduler perfcontrol callouts"); + +extern int perfcontrol_callout_stats_enabled; +SYSCTL_INT(_kern_perfcontrol_callout, OID_AUTO, stats_enabled, + CTLFLAG_KERN| CTLFLAG_RW| CTLFLAG_LOCKED, + &perfcontrol_callout_stats_enabled, 0, ""); + +extern uint64_t perfcontrol_callout_stat_avg(perfcontrol_callout_type_t type, + perfcontrol_callout_stat_t stat); + +/* On-Core Callout */ +STATIC int +sysctl_perfcontrol_callout_stat +(__unused struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) +{ + perfcontrol_callout_stat_t stat = (perfcontrol_callout_stat_t)arg1; + perfcontrol_callout_type_t type = (perfcontrol_callout_type_t)arg2; + return sysctl_io_number(req, (int)perfcontrol_callout_stat_avg(type, stat), + sizeof(int), NULL, NULL); +} + +SYSCTL_PROC(_kern_perfcontrol_callout, OID_AUTO, oncore_instr, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)PERFCONTROL_STAT_INSTRS, PERFCONTROL_CALLOUT_ON_CORE, + sysctl_perfcontrol_callout_stat, "I", ""); +SYSCTL_PROC(_kern_perfcontrol_callout, OID_AUTO, oncore_cycles, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)PERFCONTROL_STAT_CYCLES, PERFCONTROL_CALLOUT_ON_CORE, + sysctl_perfcontrol_callout_stat, "I", ""); +SYSCTL_PROC(_kern_perfcontrol_callout, OID_AUTO, offcore_instr, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)PERFCONTROL_STAT_INSTRS, PERFCONTROL_CALLOUT_OFF_CORE, + sysctl_perfcontrol_callout_stat, "I", ""); +SYSCTL_PROC(_kern_perfcontrol_callout, OID_AUTO, offcore_cycles, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)PERFCONTROL_STAT_CYCLES, PERFCONTROL_CALLOUT_OFF_CORE, + sysctl_perfcontrol_callout_stat, "I", ""); +SYSCTL_PROC(_kern_perfcontrol_callout, OID_AUTO, context_instr, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)PERFCONTROL_STAT_INSTRS, PERFCONTROL_CALLOUT_CONTEXT, + sysctl_perfcontrol_callout_stat, "I", ""); +SYSCTL_PROC(_kern_perfcontrol_callout, OID_AUTO, context_cycles, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)PERFCONTROL_STAT_CYCLES, PERFCONTROL_CALLOUT_CONTEXT, + sysctl_perfcontrol_callout_stat, "I", ""); +SYSCTL_PROC(_kern_perfcontrol_callout, OID_AUTO, update_instr, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)PERFCONTROL_STAT_INSTRS, PERFCONTROL_CALLOUT_STATE_UPDATE, + sysctl_perfcontrol_callout_stat, "I", ""); +SYSCTL_PROC(_kern_perfcontrol_callout, OID_AUTO, update_cycles, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)PERFCONTROL_STAT_CYCLES, PERFCONTROL_CALLOUT_STATE_UPDATE, + sysctl_perfcontrol_callout_stat, "I", ""); + +#endif /* __arm__ || __arm64__ */ +#endif /* (DEVELOPMENT || DEBUG) */ + +STATIC int +sysctl_securelvl +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int new_value, changed; + int error = sysctl_io_number(req, securelevel, sizeof(int), &new_value, &changed); + if (changed) { + if (!(new_value < securelevel && req->p->p_pid != 1)) { + proc_list_lock(); + securelevel = new_value; + proc_list_unlock(); + } else { + error = EPERM; + } + } + return(error); +} + +SYSCTL_PROC(_kern, KERN_SECURELVL, securelevel, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_securelvl, "I", ""); + + +STATIC int +sysctl_domainname +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int error, changed; + error = sysctl_io_string(req, domainname, sizeof(domainname), 0, &changed); + if (changed) { + domainnamelen = strlen(domainname); + } + return(error); +} + +SYSCTL_PROC(_kern, KERN_DOMAINNAME, nisdomainname, + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_domainname, "A", ""); + +SYSCTL_COMPAT_INT(_kern, KERN_HOSTID, hostid, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &hostid, 0, ""); + +STATIC int +sysctl_hostname +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int error, changed; + error = sysctl_io_string(req, hostname, sizeof(hostname), 1, &changed); + if (changed) { + hostnamelen = req->newlen; + } + return(error); +} + + +SYSCTL_PROC(_kern, KERN_HOSTNAME, hostname, + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_hostname, "A", ""); + +STATIC int +sysctl_procname +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + /* Original code allowed writing, I'm copying this, although this all makes + no sense to me. Besides, this sysctl is never used. */ + return sysctl_io_string(req, &req->p->p_name[0], (2*MAXCOMLEN+1), 1, NULL); +} + +SYSCTL_PROC(_kern, KERN_PROCNAME, procname, + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + 0, 0, sysctl_procname, "A", ""); + +SYSCTL_INT(_kern, KERN_SPECULATIVE_READS, speculative_reads_disabled, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &speculative_reads_disabled, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, preheat_max_bytes, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &preheat_max_bytes, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, preheat_min_bytes, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &preheat_min_bytes, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, speculative_prefetch_max, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &speculative_prefetch_max, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, speculative_prefetch_max_iosize, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &speculative_prefetch_max_iosize, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, vm_page_free_target, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vm_page_free_target, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, vm_page_free_min, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vm_page_free_min, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, vm_page_free_reserved, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vm_page_free_reserved, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, vm_page_speculative_percentage, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vm_page_speculative_percentage, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, vm_page_speculative_q_age_ms, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vm_page_speculative_q_age_ms, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, vm_max_delayed_work_limit, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vm_max_delayed_work_limit, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, vm_max_batch, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vm_max_batch, 0, ""); + +SYSCTL_STRING(_kern, OID_AUTO, bootsessionuuid, + CTLFLAG_RD | CTLFLAG_LOCKED, + &bootsessionuuid_string, sizeof(bootsessionuuid_string) , ""); + +STATIC int +sysctl_boottime +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + struct timeval tv; + boottime_timeval(&tv); + struct proc *p = req->p; + + if (proc_is64bit(p)) { + struct user64_timeval t = {}; + t.tv_sec = tv.tv_sec; + t.tv_usec = tv.tv_usec; + return sysctl_io_opaque(req, &t, sizeof(t), NULL); + } else { + struct user32_timeval t = {}; + t.tv_sec = tv.tv_sec; + t.tv_usec = tv.tv_usec; + return sysctl_io_opaque(req, &t, sizeof(t), NULL); + } +} + +SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, + CTLTYPE_STRUCT | CTLFLAG_KERN | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_boottime, "S,timeval", ""); + +STATIC int +sysctl_symfile +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + char *str; + int error = get_kernel_symfile(req->p, &str); + if (error) + return (error); + return sysctl_io_string(req, str, 0, 0, NULL); +} + + +SYSCTL_PROC(_kern, KERN_SYMFILE, symfile, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_symfile, "A", ""); + +#if NFSCLIENT +STATIC int +sysctl_netboot +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + return sysctl_io_number(req, netboot_root(), sizeof(int), NULL, NULL); +} + +SYSCTL_PROC(_kern, KERN_NETBOOT, netboot, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_netboot, "I", ""); +#endif + +#ifdef CONFIG_IMGSRC_ACCESS +/* + * Legacy--act as if only one layer of nesting is possible. + */ +STATIC int +sysctl_imgsrcdev +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + vfs_context_t ctx = vfs_context_current(); + vnode_t devvp; + int result; + + if (!vfs_context_issuser(ctx)) { + return EPERM; + } + + if (imgsrc_rootvnodes[0] == NULL) { + return ENOENT; + } + + result = vnode_getwithref(imgsrc_rootvnodes[0]); + if (result != 0) { + return result; + } + + devvp = vnode_mount(imgsrc_rootvnodes[0])->mnt_devvp; + result = vnode_getwithref(devvp); + if (result != 0) { + goto out; + } + + result = sysctl_io_number(req, vnode_specrdev(devvp), sizeof(dev_t), NULL, NULL); + + vnode_put(devvp); +out: + vnode_put(imgsrc_rootvnodes[0]); + return result; +} + +SYSCTL_PROC(_kern, OID_AUTO, imgsrcdev, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_imgsrcdev, "I", ""); + +STATIC int +sysctl_imgsrcinfo +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int error; + struct imgsrc_info info[MAX_IMAGEBOOT_NESTING] = {}; /* 2 for now, no problem */ + uint32_t i; + vnode_t rvp, devvp; + + if (imgsrc_rootvnodes[0] == NULLVP) { + return ENXIO; + } + + for (i = 0; i < MAX_IMAGEBOOT_NESTING; i++) { + /* + * Go get the root vnode. + */ + rvp = imgsrc_rootvnodes[i]; + if (rvp == NULLVP) { + break; + } + + error = vnode_get(rvp); + if (error != 0) { + return error; + } + + /* + * For now, no getting at a non-local volume. + */ + devvp = vnode_mount(rvp)->mnt_devvp; + if (devvp == NULL) { + vnode_put(rvp); + return EINVAL; + } + + error = vnode_getwithref(devvp); + if (error != 0) { + vnode_put(rvp); + return error; + } + + /* + * Fill in info. + */ + info[i].ii_dev = vnode_specrdev(devvp); + info[i].ii_flags = 0; + info[i].ii_height = i; + bzero(info[i].ii_reserved, sizeof(info[i].ii_reserved)); + + vnode_put(devvp); + vnode_put(rvp); + } + + return sysctl_io_opaque(req, info, i * sizeof(info[0]), NULL); +} + +SYSCTL_PROC(_kern, OID_AUTO, imgsrcinfo, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_imgsrcinfo, "I", ""); + +#endif /* CONFIG_IMGSRC_ACCESS */ + + +SYSCTL_DECL(_kern_timer); +SYSCTL_NODE(_kern, OID_AUTO, timer, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "timer"); + + +SYSCTL_INT(_kern_timer, OID_AUTO, coalescing_enabled, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &mach_timer_coalescing_enabled, 0, ""); + +SYSCTL_QUAD(_kern_timer, OID_AUTO, deadline_tracking_bin_1, + CTLFLAG_RW | CTLFLAG_LOCKED, + &timer_deadline_tracking_bin_1, ""); +SYSCTL_QUAD(_kern_timer, OID_AUTO, deadline_tracking_bin_2, + CTLFLAG_RW | CTLFLAG_LOCKED, + &timer_deadline_tracking_bin_2, ""); + +SYSCTL_DECL(_kern_timer_longterm); +SYSCTL_NODE(_kern_timer, OID_AUTO, longterm, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "longterm"); + + +/* Must match definition in osfmk/kern/timer_call.c */ +enum { + THRESHOLD, QCOUNT, + ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS, + LATENCY, LATENCY_MIN, LATENCY_MAX, SCAN_LIMIT, SCAN_INTERVAL, PAUSES +}; +extern uint64_t timer_sysctl_get(int); +extern int timer_sysctl_set(int, uint64_t); + +STATIC int +sysctl_timer +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int oid = (int)arg1; + uint64_t value = timer_sysctl_get(oid); + uint64_t new_value; + int error; + int changed; + + error = sysctl_io_number(req, value, sizeof(value), &new_value, &changed); + if (changed) + error = timer_sysctl_set(oid, new_value); + + return error; +} + +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, threshold, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + (void *) THRESHOLD, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, scan_limit, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + (void *) SCAN_LIMIT, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, scan_interval, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + (void *) SCAN_INTERVAL, 0, sysctl_timer, "Q", ""); + +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, qlen, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) QCOUNT, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, scan_pauses, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) PAUSES, 0, sysctl_timer, "Q", ""); + +#if DEBUG +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, enqueues, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) ENQUEUES, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, dequeues, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) DEQUEUES, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, escalates, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) ESCALATES, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, scans, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) SCANS, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, preempts, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) PREEMPTS, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, latency, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) LATENCY, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, latency_min, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) LATENCY_MIN, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, latency_max, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) LATENCY_MAX, 0, sysctl_timer, "Q", ""); +#endif /* DEBUG */ + +STATIC int +sysctl_usrstack +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + return sysctl_io_number(req, (int)req->p->user_stack, sizeof(int), NULL, NULL); +} + +SYSCTL_PROC(_kern, KERN_USRSTACK32, usrstack, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_usrstack, "I", ""); + +STATIC int +sysctl_usrstack64 +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + return sysctl_io_number(req, req->p->user_stack, sizeof(req->p->user_stack), NULL, NULL); +} + +SYSCTL_PROC(_kern, KERN_USRSTACK64, usrstack64, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_usrstack64, "Q", ""); + +#if CONFIG_COREDUMP + +SYSCTL_STRING(_kern, KERN_COREFILE, corefile, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + corefilename, sizeof(corefilename), ""); + +STATIC int +sysctl_coredump +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ +#ifdef SECURE_KERNEL + (void)req; + return (ENOTSUP); +#else + int new_value, changed; + int error = sysctl_io_number(req, do_coredump, sizeof(int), &new_value, &changed); + if (changed) { + if ((new_value == 0) || (new_value == 1)) + do_coredump = new_value; + else + error = EINVAL; + } + return(error); +#endif +} + +SYSCTL_PROC(_kern, KERN_COREDUMP, coredump, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_coredump, "I", ""); + +STATIC int +sysctl_suid_coredump +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ +#ifdef SECURE_KERNEL + (void)req; + return (ENOTSUP); +#else + int new_value, changed; + int error = sysctl_io_number(req, sugid_coredump, sizeof(int), &new_value, &changed); + if (changed) { + if ((new_value == 0) || (new_value == 1)) + sugid_coredump = new_value; + else + error = EINVAL; + } + return(error); +#endif +} + +SYSCTL_PROC(_kern, KERN_SUGID_COREDUMP, sugid_coredump, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_suid_coredump, "I", ""); + +#endif /* CONFIG_COREDUMP */ + +STATIC int +sysctl_delayterm +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + struct proc *p = req->p; + int new_value, changed; + int error = sysctl_io_number(req, (req->p->p_lflag & P_LDELAYTERM)? 1: 0, sizeof(int), &new_value, &changed); + if (changed) { + proc_lock(p); + if (new_value) + req->p->p_lflag |= P_LDELAYTERM; + else + req->p->p_lflag &= ~P_LDELAYTERM; + proc_unlock(p); + } + return(error); +} + +SYSCTL_PROC(_kern, KERN_PROCDELAYTERM, delayterm, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, sysctl_delayterm, "I", ""); + + +STATIC int +sysctl_rage_vnode +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + struct proc *p = req->p; + struct uthread *ut; + int new_value, old_value, changed; + int error; + + ut = get_bsdthread_info(current_thread()); + + if (ut->uu_flag & UT_RAGE_VNODES) + old_value = KERN_RAGE_THREAD; + else if (p->p_lflag & P_LRAGE_VNODES) + old_value = KERN_RAGE_PROC; + else + old_value = 0; + + error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed); + + if (error == 0) { + switch (new_value) { + case KERN_RAGE_PROC: + proc_lock(p); + p->p_lflag |= P_LRAGE_VNODES; + proc_unlock(p); + break; + case KERN_UNRAGE_PROC: + proc_lock(p); + p->p_lflag &= ~P_LRAGE_VNODES; + proc_unlock(p); + break; + + case KERN_RAGE_THREAD: + ut->uu_flag |= UT_RAGE_VNODES; + break; + case KERN_UNRAGE_THREAD: + ut = get_bsdthread_info(current_thread()); + ut->uu_flag &= ~UT_RAGE_VNODES; + break; + } + } + return(error); +} + +SYSCTL_PROC(_kern, KERN_RAGEVNODE, rage_vnode, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + 0, 0, sysctl_rage_vnode, "I", ""); + +/* XXX move this interface into libproc and remove this sysctl */ +STATIC int +sysctl_setthread_cpupercent +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int new_value, old_value; + int error = 0; + kern_return_t kret = KERN_SUCCESS; + uint8_t percent = 0; + int ms_refill = 0; + + if (!req->newptr) + return (0); + + old_value = 0; + + if ((error = sysctl_io_number(req, old_value, sizeof(old_value), &new_value, NULL)) != 0) + return (error); + + percent = new_value & 0xff; /* low 8 bytes for perent */ + ms_refill = (new_value >> 8) & 0xffffff; /* upper 24bytes represent ms refill value */ + if (percent > 100) + return (EINVAL); + + /* + * If the caller is specifying a percentage of 0, this will unset the CPU limit, if present. + */ + if ((kret = thread_set_cpulimit(THREAD_CPULIMIT_BLOCK, percent, ms_refill * (int)NSEC_PER_MSEC)) != 0) + return (EIO); + + return (0); +} + +SYSCTL_PROC(_kern, OID_AUTO, setthread_cpupercent, + CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY, + 0, 0, sysctl_setthread_cpupercent, "I", "set thread cpu percentage limit"); + + +STATIC int +sysctl_kern_check_openevt +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + struct proc *p = req->p; + int new_value, old_value, changed; + int error; + + if (p->p_flag & P_CHECKOPENEVT) { + old_value = KERN_OPENEVT_PROC; + } else { + old_value = 0; + } + + error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed); + + if (error == 0) { + switch (new_value) { + case KERN_OPENEVT_PROC: + OSBitOrAtomic(P_CHECKOPENEVT, &p->p_flag); + break; + + case KERN_UNOPENEVT_PROC: + OSBitAndAtomic(~((uint32_t)P_CHECKOPENEVT), &p->p_flag); + break; + + default: + error = EINVAL; + } + } + return(error); +} + +SYSCTL_PROC(_kern, KERN_CHECKOPENEVT, check_openevt, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + 0, 0, sysctl_kern_check_openevt, "I", "set the per-process check-open-evt flag"); + + + +STATIC int +sysctl_nx +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ +#ifdef SECURE_KERNEL + (void)req; + return ENOTSUP; +#else + int new_value, changed; + int error; + + error = sysctl_io_number(req, nx_enabled, sizeof(nx_enabled), &new_value, &changed); + if (error) + return error; + + if (changed) { +#if defined(__i386__) || defined(__x86_64__) + /* + * Only allow setting if NX is supported on the chip + */ + if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD)) + return ENOTSUP; +#endif + nx_enabled = new_value; + } + return(error); +#endif /* SECURE_KERNEL */ +} + + + +SYSCTL_PROC(_kern, KERN_NX_PROTECTION, nx, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + 0, 0, sysctl_nx, "I", ""); + +STATIC int +sysctl_loadavg +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + if (proc_is64bit(req->p)) { + struct user64_loadavg loadinfo64 = {}; + fill_loadavg64(&averunnable, &loadinfo64); + return sysctl_io_opaque(req, &loadinfo64, sizeof(loadinfo64), NULL); + } else { + struct user32_loadavg loadinfo32 = {}; + fill_loadavg32(&averunnable, &loadinfo32); + return sysctl_io_opaque(req, &loadinfo32, sizeof(loadinfo32), NULL); + } +} + +SYSCTL_PROC(_vm, VM_LOADAVG, loadavg, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_loadavg, "S,loadavg", ""); + +/* + * Note: Thread safe; vm_map_lock protects in vm_toggle_entry_reuse() + */ +STATIC int +sysctl_vm_toggle_address_reuse(__unused struct sysctl_oid *oidp, __unused void *arg1, + __unused int arg2, struct sysctl_req *req) +{ + int old_value=0, new_value=0, error=0; + + if(vm_toggle_entry_reuse( VM_TOGGLE_GETVALUE, &old_value )) + return(error); + error = sysctl_io_number(req, old_value, sizeof(int), &new_value, NULL); + if (!error) { + return (vm_toggle_entry_reuse(new_value, NULL)); + } + return(error); +} + +SYSCTL_PROC(_debug, OID_AUTO, toggle_address_reuse, CTLFLAG_ANYBODY | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, sysctl_vm_toggle_address_reuse,"I",""); + + +STATIC int +sysctl_swapusage +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int error; + uint64_t swap_total; + uint64_t swap_avail; + vm_size_t swap_pagesize; + boolean_t swap_encrypted; + struct xsw_usage xsu = {}; + + error = macx_swapinfo(&swap_total, + &swap_avail, + &swap_pagesize, + &swap_encrypted); + if (error) + return error; + + xsu.xsu_total = swap_total; + xsu.xsu_avail = swap_avail; + xsu.xsu_used = swap_total - swap_avail; + xsu.xsu_pagesize = swap_pagesize; + xsu.xsu_encrypted = swap_encrypted; + return sysctl_io_opaque(req, &xsu, sizeof(xsu), NULL); +} + + + +SYSCTL_PROC(_vm, VM_SWAPUSAGE, swapusage, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_swapusage, "S,xsw_usage", ""); + +#if CONFIG_FREEZE +extern void vm_page_reactivate_all_throttled(void); + +static int +sysctl_freeze_enabled SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, val = memorystatus_freeze_enabled ? 1 : 0; + boolean_t disabled; + + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return (error); + + if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) { + //assert(req->newptr); + printf("Failed attempt to set vm.freeze_enabled sysctl\n"); + return EINVAL; + } + + /* + * If freeze is being disabled, we need to move dirty pages out from the throttle to the active queue. + */ + disabled = (!val && memorystatus_freeze_enabled); + + memorystatus_freeze_enabled = val ? TRUE : FALSE; + + if (disabled) { + vm_page_reactivate_all_throttled(); + } + + return (0); +} + +SYSCTL_PROC(_vm, OID_AUTO, freeze_enabled, CTLTYPE_INT|CTLFLAG_RW, &memorystatus_freeze_enabled, 0, sysctl_freeze_enabled, "I", ""); +#endif /* CONFIG_FREEZE */ + +/* this kernel does NOT implement shared_region_make_private_np() */ +SYSCTL_INT(_kern, KERN_SHREG_PRIVATIZABLE, shreg_private, + CTLFLAG_RD | CTLFLAG_LOCKED, + (int *)NULL, 0, ""); + +STATIC int +fetch_process_cputype( + proc_t cur_proc, + int *name, + u_int namelen, + cpu_type_t *cputype) +{ + proc_t p = PROC_NULL; + int refheld = 0; + cpu_type_t ret = 0; + int error = 0; + + if (namelen == 0) + p = cur_proc; + else if (namelen == 1) { + p = proc_find(name[0]); + if (p == NULL) + return (EINVAL); + refheld = 1; + } else { + error = EINVAL; + goto out; + } + + ret = cpu_type() & ~CPU_ARCH_MASK; + if (IS_64BIT_PROCESS(p)) + ret |= CPU_ARCH_ABI64; + + *cputype = ret; + + if (refheld != 0) + proc_rele(p); +out: + return (error); +} + +STATIC int +sysctl_sysctl_native(__unused struct sysctl_oid *oidp, void *arg1, int arg2, + struct sysctl_req *req) +{ + int error; + cpu_type_t proc_cputype = 0; + if ((error = fetch_process_cputype(req->p, (int *)arg1, arg2, &proc_cputype)) != 0) + return error; + int res = 1; + if ((proc_cputype & ~CPU_ARCH_MASK) != (cpu_type() & ~CPU_ARCH_MASK)) + res = 0; + return SYSCTL_OUT(req, &res, sizeof(res)); +} +SYSCTL_PROC(_sysctl, OID_AUTO, proc_native, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_sysctl_native ,"I","proc_native"); + +STATIC int +sysctl_sysctl_cputype(__unused struct sysctl_oid *oidp, void *arg1, int arg2, + struct sysctl_req *req) +{ + int error; + cpu_type_t proc_cputype = 0; + if ((error = fetch_process_cputype(req->p, (int *)arg1, arg2, &proc_cputype)) != 0) + return error; + return SYSCTL_OUT(req, &proc_cputype, sizeof(proc_cputype)); +} +SYSCTL_PROC(_sysctl, OID_AUTO, proc_cputype, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_sysctl_cputype ,"I","proc_cputype"); + +STATIC int +sysctl_safeboot +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + return sysctl_io_number(req, boothowto & RB_SAFEBOOT ? 1 : 0, sizeof(int), NULL, NULL); +} + +SYSCTL_PROC(_kern, KERN_SAFEBOOT, safeboot, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_safeboot, "I", ""); + +STATIC int +sysctl_singleuser +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + return sysctl_io_number(req, boothowto & RB_SINGLE ? 1 : 0, sizeof(int), NULL, NULL); +} + +SYSCTL_PROC(_kern, OID_AUTO, singleuser, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_singleuser, "I", ""); + +STATIC int sysctl_minimalboot +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + return sysctl_io_number(req, minimalboot, sizeof(int), NULL, NULL); +} + +SYSCTL_PROC(_kern, OID_AUTO, minimalboot, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_minimalboot, "I", ""); + +/* + * Controls for debugging affinity sets - see osfmk/kern/affinity.c + */ +extern boolean_t affinity_sets_enabled; +extern int affinity_sets_mapping; + +SYSCTL_INT (_kern, OID_AUTO, affinity_sets_enabled, + CTLFLAG_RW | CTLFLAG_LOCKED, (int *) &affinity_sets_enabled, 0, "hinting enabled"); +SYSCTL_INT (_kern, OID_AUTO, affinity_sets_mapping, + CTLFLAG_RW | CTLFLAG_LOCKED, &affinity_sets_mapping, 0, "mapping policy"); + +/* + * Boolean indicating if KASLR is active. + */ +STATIC int +sysctl_slide +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + uint32_t slide; + + slide = vm_kernel_slide ? 1 : 0; + + return sysctl_io_number( req, slide, sizeof(int), NULL, NULL); +} + +SYSCTL_PROC(_kern, OID_AUTO, slide, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_slide, "I", ""); + +/* + * Limit on total memory users can wire. + * + * vm_global_user_wire_limit - system wide limit on wired memory from all processes combined. + * + * vm_user_wire_limit - per address space limit on wired memory. This puts a cap on the process's rlimit value. + * + * These values are initialized to reasonable defaults at boot time based on the available physical memory in + * kmem_init(). + * + * All values are in bytes. + */ + +vm_map_size_t vm_global_no_user_wire_amount; +vm_map_size_t vm_global_user_wire_limit; +vm_map_size_t vm_user_wire_limit; + +/* + * There needs to be a more automatic/elegant way to do this + */ +#if defined(__ARM__) +SYSCTL_INT(_vm, OID_AUTO, global_no_user_wire_amount, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_global_no_user_wire_amount, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, global_user_wire_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_global_user_wire_limit, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, user_wire_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_user_wire_limit, 0, ""); +#else +SYSCTL_QUAD(_vm, OID_AUTO, global_no_user_wire_amount, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_global_no_user_wire_amount, ""); +SYSCTL_QUAD(_vm, OID_AUTO, global_user_wire_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_global_user_wire_limit, ""); +SYSCTL_QUAD(_vm, OID_AUTO, user_wire_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_user_wire_limit, ""); +#endif + +extern int vm_map_copy_overwrite_aligned_src_not_internal; +extern int vm_map_copy_overwrite_aligned_src_not_symmetric; +extern int vm_map_copy_overwrite_aligned_src_large; +SYSCTL_INT(_vm, OID_AUTO, vm_copy_src_not_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_copy_overwrite_aligned_src_not_internal, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_copy_src_not_symmetric, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_copy_overwrite_aligned_src_not_symmetric, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_copy_src_large, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_copy_overwrite_aligned_src_large, 0, ""); + + +extern uint32_t vm_page_external_count; +extern uint32_t vm_page_filecache_min; + +SYSCTL_INT(_vm, OID_AUTO, vm_page_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_external_count, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_page_filecache_min, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_page_filecache_min, 0, ""); + +extern int vm_compressor_mode; +extern int vm_compressor_is_active; +extern int vm_compressor_available; +extern uint32_t vm_ripe_target_age; +extern uint32_t swapout_target_age; +extern int64_t compressor_bytes_used; +extern int64_t c_segment_input_bytes; +extern int64_t c_segment_compressed_bytes; +extern uint32_t compressor_eval_period_in_msecs; +extern uint32_t compressor_sample_min_in_msecs; +extern uint32_t compressor_sample_max_in_msecs; +extern uint32_t compressor_thrashing_threshold_per_10msecs; +extern uint32_t compressor_thrashing_min_per_10msecs; +extern uint32_t vm_compressor_minorcompact_threshold_divisor; +extern uint32_t vm_compressor_majorcompact_threshold_divisor; +extern uint32_t vm_compressor_unthrottle_threshold_divisor; +extern uint32_t vm_compressor_catchup_threshold_divisor; +extern uint32_t vm_compressor_time_thread; +#if DEVELOPMENT || DEBUG +extern vmct_stats_t vmct_stats; +#endif + +SYSCTL_QUAD(_vm, OID_AUTO, compressor_input_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_input_bytes, ""); +SYSCTL_QUAD(_vm, OID_AUTO, compressor_compressed_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_compressed_bytes, ""); +SYSCTL_QUAD(_vm, OID_AUTO, compressor_bytes_used, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_bytes_used, ""); + +SYSCTL_INT(_vm, OID_AUTO, compressor_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_mode, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_is_active, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_is_active, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_swapout_target_age, CTLFLAG_RD | CTLFLAG_LOCKED, &swapout_target_age, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_available, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_available, 0, ""); + +SYSCTL_INT(_vm, OID_AUTO, vm_ripe_target_age_in_secs, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_ripe_target_age, 0, ""); + +SYSCTL_INT(_vm, OID_AUTO, compressor_eval_period_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_eval_period_in_msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_sample_min_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_sample_min_in_msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_sample_max_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_sample_max_in_msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_thrashing_threshold_per_10msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_thrashing_threshold_per_10msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_thrashing_min_per_10msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_thrashing_min_per_10msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_minorcompact_threshold_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_minorcompact_threshold_divisor, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_majorcompact_threshold_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_majorcompact_threshold_divisor, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_unthrottle_threshold_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_unthrottle_threshold_divisor, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_catchup_threshold_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_catchup_threshold_divisor, 0, ""); + +SYSCTL_STRING(_vm, OID_AUTO, swapfileprefix, CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, swapfilename, sizeof(swapfilename) - SWAPFILENAME_INDEX_LEN, ""); + +SYSCTL_INT(_vm, OID_AUTO, compressor_timing_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_time_thread, 0, ""); + +#if DEVELOPMENT || DEBUG +SYSCTL_QUAD(_vm, OID_AUTO, compressor_thread_runtime0, CTLFLAG_RD | CTLFLAG_LOCKED, &vmct_stats.vmct_runtimes[0], ""); +SYSCTL_QUAD(_vm, OID_AUTO, compressor_thread_runtime1, CTLFLAG_RD | CTLFLAG_LOCKED, &vmct_stats.vmct_runtimes[1], ""); + +SYSCTL_QUAD(_vm, OID_AUTO, compressor_threads_total, CTLFLAG_RD | CTLFLAG_LOCKED, &vmct_stats.vmct_cthreads_total, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, compressor_thread_pages0, CTLFLAG_RD | CTLFLAG_LOCKED, &vmct_stats.vmct_pages[0], ""); +SYSCTL_QUAD(_vm, OID_AUTO, compressor_thread_pages1, CTLFLAG_RD | CTLFLAG_LOCKED, &vmct_stats.vmct_pages[1], ""); + +SYSCTL_QUAD(_vm, OID_AUTO, compressor_thread_iterations0, CTLFLAG_RD | CTLFLAG_LOCKED, &vmct_stats.vmct_iterations[0], ""); +SYSCTL_QUAD(_vm, OID_AUTO, compressor_thread_iterations1, CTLFLAG_RD | CTLFLAG_LOCKED, &vmct_stats.vmct_iterations[1], ""); + +SYSCTL_INT(_vm, OID_AUTO, compressor_thread_minpages0, CTLFLAG_RD | CTLFLAG_LOCKED, &vmct_stats.vmct_minpages[0], 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_thread_minpages1, CTLFLAG_RD | CTLFLAG_LOCKED, &vmct_stats.vmct_minpages[1], 0, ""); + +SYSCTL_INT(_vm, OID_AUTO, compressor_thread_maxpages0, CTLFLAG_RD | CTLFLAG_LOCKED, &vmct_stats.vmct_maxpages[0], 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_thread_maxpages1, CTLFLAG_RD | CTLFLAG_LOCKED, &vmct_stats.vmct_maxpages[1], 0, ""); + +#endif + +SYSCTL_QUAD(_vm, OID_AUTO, lz4_compressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.lz4_compressions, ""); +SYSCTL_QUAD(_vm, OID_AUTO, lz4_compression_failures, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.lz4_compression_failures, ""); +SYSCTL_QUAD(_vm, OID_AUTO, lz4_compressed_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.lz4_compressed_bytes, ""); +SYSCTL_QUAD(_vm, OID_AUTO, lz4_wk_compression_delta, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.lz4_wk_compression_delta, ""); +SYSCTL_QUAD(_vm, OID_AUTO, lz4_wk_compression_negative_delta, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.lz4_wk_compression_negative_delta, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, lz4_decompressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.lz4_decompressions, ""); +SYSCTL_QUAD(_vm, OID_AUTO, lz4_decompressed_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.lz4_decompressed_bytes, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, uc_decompressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.uc_decompressions, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, wk_compressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_compressions, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, wk_catime, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_cabstime, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, wkh_catime, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wkh_cabstime, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wkh_compressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wkh_compressions, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, wks_catime, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wks_cabstime, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wks_compressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wks_compressions, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, wk_compressions_exclusive, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_compressions_exclusive, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wk_sv_compressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_sv_compressions, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wk_mzv_compressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_mzv_compressions, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wk_compression_failures, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_compression_failures, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wk_compressed_bytes_exclusive, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_compressed_bytes_exclusive, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wk_compressed_bytes_total, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_compressed_bytes_total, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, wks_compressed_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wks_compressed_bytes, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wks_compression_failures, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wks_compression_failures, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wks_sv_compressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wks_sv_compressions, ""); + + +SYSCTL_QUAD(_vm, OID_AUTO, wk_decompressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_decompressions, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, wk_datime, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_dabstime, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, wkh_datime, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wkh_dabstime, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wkh_decompressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wkh_decompressions, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, wks_datime, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wks_dabstime, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wks_decompressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wks_decompressions, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, wk_decompressed_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_decompressed_bytes, ""); +SYSCTL_QUAD(_vm, OID_AUTO, wk_sv_decompressions, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_stats.wk_sv_decompressions, ""); + +SYSCTL_INT(_vm, OID_AUTO, lz4_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vmctune.lz4_threshold, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, wkdm_reeval_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vmctune.wkdm_reeval_threshold, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, lz4_max_failure_skips, CTLFLAG_RW | CTLFLAG_LOCKED, &vmctune.lz4_max_failure_skips, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, lz4_max_failure_run_length, CTLFLAG_RW | CTLFLAG_LOCKED, &vmctune.lz4_max_failure_run_length, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, lz4_max_preselects, CTLFLAG_RW | CTLFLAG_LOCKED, &vmctune.lz4_max_preselects, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, lz4_run_preselection_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vmctune.lz4_run_preselection_threshold, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, lz4_run_continue_bytes, CTLFLAG_RW | CTLFLAG_LOCKED, &vmctune.lz4_run_continue_bytes, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, lz4_profitable_bytes, CTLFLAG_RW | CTLFLAG_LOCKED, &vmctune.lz4_profitable_bytes, 0, ""); +#if DEVELOPMENT || DEBUG +extern int vm_compressor_current_codec; +extern int vm_compressor_test_seg_wp; +extern boolean_t vm_compressor_force_sw_wkdm; +SYSCTL_INT(_vm, OID_AUTO, compressor_codec, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_current_codec, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_test_wp, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_test_seg_wp, 0, ""); + +SYSCTL_INT(_vm, OID_AUTO, wksw_force, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_force_sw_wkdm, 0, ""); +extern int precompy, wkswhw; + +SYSCTL_INT(_vm, OID_AUTO, precompy, CTLFLAG_RW | CTLFLAG_LOCKED, &precompy, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, wkswhw, CTLFLAG_RW | CTLFLAG_LOCKED, &wkswhw, 0, ""); +extern unsigned int vm_ktrace_enabled; +SYSCTL_INT(_vm, OID_AUTO, vm_ktrace, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_ktrace_enabled, 0, ""); +#endif + +#if CONFIG_PHANTOM_CACHE +extern uint32_t phantom_cache_thrashing_threshold; +extern uint32_t phantom_cache_eval_period_in_msecs; +extern uint32_t phantom_cache_thrashing_threshold_ssd; + + +SYSCTL_INT(_vm, OID_AUTO, phantom_cache_eval_period_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &phantom_cache_eval_period_in_msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, phantom_cache_thrashing_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &phantom_cache_thrashing_threshold, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, phantom_cache_thrashing_threshold_ssd, CTLFLAG_RW | CTLFLAG_LOCKED, &phantom_cache_thrashing_threshold_ssd, 0, ""); +#endif + +#if CONFIG_BACKGROUND_QUEUE + +extern uint32_t vm_page_background_count; +extern uint32_t vm_page_background_target; +extern uint32_t vm_page_background_internal_count; +extern uint32_t vm_page_background_external_count; +extern uint32_t vm_page_background_mode; +extern uint32_t vm_page_background_exclude_external; +extern uint64_t vm_page_background_promoted_count; +extern uint64_t vm_pageout_considered_bq_internal; +extern uint64_t vm_pageout_considered_bq_external; +extern uint64_t vm_pageout_rejected_bq_internal; +extern uint64_t vm_pageout_rejected_bq_external; + +SYSCTL_INT(_vm, OID_AUTO, vm_page_background_mode, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_page_background_mode, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_page_background_exclude_external, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_page_background_exclude_external, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_page_background_target, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_page_background_target, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_page_background_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_background_count, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_page_background_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_background_internal_count, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_page_background_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_background_external_count, 0, ""); + +SYSCTL_QUAD(_vm, OID_AUTO, vm_page_background_promoted_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_background_promoted_count, ""); +SYSCTL_QUAD(_vm, OID_AUTO, vm_pageout_considered_bq_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_considered_bq_internal, ""); +SYSCTL_QUAD(_vm, OID_AUTO, vm_pageout_considered_bq_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_considered_bq_external, ""); +SYSCTL_QUAD(_vm, OID_AUTO, vm_pageout_rejected_bq_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_rejected_bq_internal, ""); +SYSCTL_QUAD(_vm, OID_AUTO, vm_pageout_rejected_bq_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_rejected_bq_external, ""); + +#endif - binPath_sz = 0; +#if (DEVELOPMENT || DEBUG) - while ( (binPath_sz < max_len-1) && (*str++ != 0) ) - binPath_sz++; +SYSCTL_UINT(_vm, OID_AUTO, vm_page_creation_throttled_hard, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vm_page_creation_throttled_hard, 0, ""); - /* If we have a NUL terminator, copy it, too */ - if (binPath_sz < max_len-1) binPath_sz += 1; +SYSCTL_UINT(_vm, OID_AUTO, vm_page_creation_throttled_soft, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vm_page_creation_throttled_soft, 0, ""); - /* Pre-Flight the space requiremnts */ +extern uint32_t vm_pageout_memorystatus_fb_factor_nr; +extern uint32_t vm_pageout_memorystatus_fb_factor_dr; +SYSCTL_INT(_vm, OID_AUTO, vm_pageout_memorystatus_fb_factor_nr, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_memorystatus_fb_factor_nr, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_pageout_memorystatus_fb_factor_dr, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_memorystatus_fb_factor_dr, 0, ""); - /* Account for the padding that fills out binPath to the next word */ - alignedBinPath_sz += (binPath_sz & (sizeof(int)-1)) ? (sizeof(int)-(binPath_sz & (sizeof(int)-1))) : 0; +extern uint32_t vm_grab_anon_overrides; +extern uint32_t vm_grab_anon_nops; - placeHere = where + size; +SYSCTL_INT(_vm, OID_AUTO, vm_grab_anon_overrides, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_grab_anon_overrides, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_grab_anon_nops, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_grab_anon_nops, 0, ""); - /* Account for the bytes needed to keep placeHere word aligned */ - addThis = (placeHere & (sizeof(int)-1)) ? (sizeof(int)-(placeHere & (sizeof(int)-1))) : 0; +/* log message counters for persistence mode */ +extern uint32_t oslog_p_total_msgcount; +extern uint32_t oslog_p_metadata_saved_msgcount; +extern uint32_t oslog_p_metadata_dropped_msgcount; +extern uint32_t oslog_p_error_count; +extern uint32_t oslog_p_saved_msgcount; +extern uint32_t oslog_p_dropped_msgcount; +extern uint32_t oslog_p_boot_dropped_msgcount; - /* Add up all the space that is needed */ - extraSpaceNeeded = alignedBinPath_sz + addThis + binPath_sz + (4 * sizeof(int)); +/* log message counters for streaming mode */ +extern uint32_t oslog_s_total_msgcount; +extern uint32_t oslog_s_metadata_msgcount; +extern uint32_t oslog_s_error_count; +extern uint32_t oslog_s_streamed_msgcount; +extern uint32_t oslog_s_dropped_msgcount; - /* is there is room to tack on argv[0]? */ - if ( (buflen & ~(sizeof(int)-1)) >= ( p->p_argslen + extraSpaceNeeded )) - { - placeHere += addThis; - suword(placeHere, 0); - placeHere += sizeof(int); - suword(placeHere, 0xBFFF0000); - placeHere += sizeof(int); - suword(placeHere, 0); - placeHere += sizeof(int); - error = copyout(data, placeHere, binPath_sz); - if ( ! error ) - { - placeHere += binPath_sz; - suword(placeHere, 0); - size += extraSpaceNeeded; - } - } - } - } +SYSCTL_UINT(_debug, OID_AUTO, oslog_p_total_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_total_msgcount, 0, ""); +SYSCTL_UINT(_debug, OID_AUTO, oslog_p_metadata_saved_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_metadata_saved_msgcount, 0, ""); +SYSCTL_UINT(_debug, OID_AUTO, oslog_p_metadata_dropped_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_metadata_dropped_msgcount, 0, ""); +SYSCTL_UINT(_debug, OID_AUTO, oslog_p_error_count, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_error_count, 0, ""); +SYSCTL_UINT(_debug, OID_AUTO, oslog_p_saved_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_saved_msgcount, 0, ""); +SYSCTL_UINT(_debug, OID_AUTO, oslog_p_dropped_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_dropped_msgcount, 0, ""); +SYSCTL_UINT(_debug, OID_AUTO, oslog_p_boot_dropped_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_boot_dropped_msgcount, 0, ""); - if (copy_start != (vm_offset_t) 0) { - kmem_free(kernel_map, copy_start, copy_end - copy_start); - } - if (error) { - return(error); - } +SYSCTL_UINT(_debug, OID_AUTO, oslog_s_total_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_s_total_msgcount, 0, ""); +SYSCTL_UINT(_debug, OID_AUTO, oslog_s_metadata_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_s_metadata_msgcount, 0, ""); +SYSCTL_UINT(_debug, OID_AUTO, oslog_s_error_count, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_s_error_count, 0, ""); +SYSCTL_UINT(_debug, OID_AUTO, oslog_s_streamed_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_s_streamed_msgcount, 0, ""); +SYSCTL_UINT(_debug, OID_AUTO, oslog_s_dropped_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_s_dropped_msgcount, 0, ""); - if (where != USER_ADDR_NULL) - *sizep = size; - return (0); -} +#endif /* DEVELOPMENT || DEBUG */ /* - * Validate parameters and get old / set new parameters - * for max number of concurrent aio requests. Makes sure - * the system wide limit is greater than the per process - * limit. + * Enable tracing of voucher contents */ -static int -sysctl_aiomax(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen) -{ - int error = 0; - int new_value; +extern uint32_t ipc_voucher_trace_contents; - if ( oldp && *oldlenp < sizeof(int) ) - return (ENOMEM); - if ( newp && newlen != sizeof(int) ) - return (EINVAL); - - *oldlenp = sizeof(int); - if ( oldp ) - error = copyout( &aio_max_requests, oldp, sizeof(int) ); - if ( error == 0 && newp ) - error = copyin( newp, &new_value, sizeof(int) ); - if ( error == 0 && newp ) { - if ( new_value >= aio_max_requests_per_process ) - aio_max_requests = new_value; - else - error = EINVAL; - } - return( error ); - -} /* sysctl_aiomax */ +SYSCTL_INT (_kern, OID_AUTO, ipc_voucher_trace_contents, + CTLFLAG_RW | CTLFLAG_LOCKED, &ipc_voucher_trace_contents, 0, "Enable tracing voucher contents"); + +/* + * Kernel stack size and depth + */ +SYSCTL_INT (_kern, OID_AUTO, stack_size, + CTLFLAG_RD | CTLFLAG_LOCKED, (int *) &kernel_stack_size, 0, "Kernel stack size"); +SYSCTL_INT (_kern, OID_AUTO, stack_depth_max, + CTLFLAG_RD | CTLFLAG_LOCKED, (int *) &kernel_stack_depth_max, 0, "Max kernel stack depth at interrupt or context switch"); +extern unsigned int kern_feature_overrides; +SYSCTL_INT (_kern, OID_AUTO, kern_feature_overrides, + CTLFLAG_RD | CTLFLAG_LOCKED, &kern_feature_overrides, 0, "Kernel feature override mask"); /* - * Validate parameters and get old / set new parameters - * for max number of concurrent aio requests per process. - * Makes sure per process limit is less than the system wide - * limit. + * enable back trace for port allocations */ -static int -sysctl_aioprocmax(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen ) -{ - int error = 0; - int new_value = 0; +extern int ipc_portbt; - if ( oldp && *oldlenp < sizeof(int) ) - return (ENOMEM); - if ( newp && newlen != sizeof(int) ) - return (EINVAL); - - *oldlenp = sizeof(int); - if ( oldp ) - error = copyout( &aio_max_requests_per_process, oldp, sizeof(int) ); - if ( error == 0 && newp ) - error = copyin( newp, &new_value, sizeof(int) ); - if ( error == 0 && newp ) { - if ( new_value <= aio_max_requests && new_value >= AIO_LISTIO_MAX ) - aio_max_requests_per_process = new_value; - else - error = EINVAL; - } - return( error ); - -} /* sysctl_aioprocmax */ +SYSCTL_INT(_kern, OID_AUTO, ipc_portbt, + CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, + &ipc_portbt, 0, ""); +/* + * Scheduler sysctls + */ + +SYSCTL_STRING(_kern, OID_AUTO, sched, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + sched_string, sizeof(sched_string), + "Timeshare scheduler implementation"); /* - * Validate parameters and get old / set new parameters - * for max number of async IO worker threads. - * We only allow an increase in the number of worker threads. + * Only support runtime modification on embedded platforms + * with development config enabled */ -static int -sysctl_aiothreads(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen) -{ - int error = 0; - int new_value; +#if CONFIG_EMBEDDED +#if !SECURE_KERNEL +extern int precise_user_kernel_time; +SYSCTL_INT(_kern, OID_AUTO, precise_user_kernel_time, + CTLFLAG_RW | CTLFLAG_LOCKED, + &precise_user_kernel_time, 0, "Precise accounting of kernel vs. user time"); +#endif +#endif - if ( oldp && *oldlenp < sizeof(int) ) - return (ENOMEM); - if ( newp && newlen != sizeof(int) ) - return (EINVAL); - - *oldlenp = sizeof(int); - if ( oldp ) - error = copyout( &aio_worker_threads, oldp, sizeof(int) ); - if ( error == 0 && newp ) - error = copyin( newp, &new_value, sizeof(int) ); - if ( error == 0 && newp ) { - if (new_value > aio_worker_threads ) { - _aio_create_worker_threads( (new_value - aio_worker_threads) ); - aio_worker_threads = new_value; - } - else - error = EINVAL; - } - return( error ); - -} /* sysctl_aiothreads */ +/* Parameters related to timer coalescing tuning, to be replaced + * with a dedicated systemcall in the future. + */ +/* Enable processing pending timers in the context of any other interrupt + * Coalescing tuning parameters for various thread/task attributes */ +STATIC int +sysctl_timer_user_us_kernel_abstime SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp) + int size = arg2; /* subcommand*/ + int error; + int changed = 0; + uint64_t old_value_ns; + uint64_t new_value_ns; + uint64_t value_abstime; + if (size == sizeof(uint32_t)) + value_abstime = *((uint32_t *)arg1); + else if (size == sizeof(uint64_t)) + value_abstime = *((uint64_t *)arg1); + else return ENOTSUP; + + absolutetime_to_nanoseconds(value_abstime, &old_value_ns); + error = sysctl_io_number(req, old_value_ns, sizeof(old_value_ns), &new_value_ns, &changed); + if ((error) || (!changed)) + return error; -/* - * Validate parameters and get old / set new parameters - * for max number of processes per UID. - * Makes sure per UID limit is less than the system wide limit. + nanoseconds_to_absolutetime(new_value_ns, &value_abstime); + if (size == sizeof(uint32_t)) + *((uint32_t *)arg1) = (uint32_t)value_abstime; + else + *((uint64_t *)arg1) = value_abstime; + return error; +} + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_bg_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_bg_shift, 0, ""); +SYSCTL_PROC(_kern, OID_AUTO, timer_resort_threshold_ns, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_resort_threshold_abstime, + sizeof(tcoal_prio_params.timer_resort_threshold_abstime), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_bg_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_bg_abstime_max, + sizeof(tcoal_prio_params.timer_coalesce_bg_abstime_max), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_kt_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_kt_shift, 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_kt_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_kt_abstime_max, + sizeof(tcoal_prio_params.timer_coalesce_kt_abstime_max), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_fp_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_fp_shift, 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_fp_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_fp_abstime_max, + sizeof(tcoal_prio_params.timer_coalesce_fp_abstime_max), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_ts_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_ts_shift, 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_ts_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_ts_abstime_max, + sizeof(tcoal_prio_params.timer_coalesce_ts_abstime_max), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier0_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[0], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier0_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[0], + sizeof(tcoal_prio_params.latency_qos_abstime_max[0]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier1_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[1], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier1_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[1], + sizeof(tcoal_prio_params.latency_qos_abstime_max[1]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier2_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[2], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier2_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[2], + sizeof(tcoal_prio_params.latency_qos_abstime_max[2]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier3_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[3], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier3_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[3], + sizeof(tcoal_prio_params.latency_qos_abstime_max[3]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier4_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[4], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier4_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[4], + sizeof(tcoal_prio_params.latency_qos_abstime_max[4]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier5_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[5], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier5_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[5], + sizeof(tcoal_prio_params.latency_qos_abstime_max[5]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +/* Communicate the "user idle level" heuristic to the timer layer, and + * potentially other layers in the future. */ + static int -sysctl_maxprocperuid(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen) +timer_user_idle_level(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { + int new_value = 0, old_value = 0, changed = 0, error; + + old_value = timer_get_user_idle_level(); + + error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed); + + if (error == 0 && changed) { + if (timer_set_user_idle_level(new_value) != KERN_SUCCESS) + error = ERANGE; + } + + return error; +} + +SYSCTL_PROC(_machdep, OID_AUTO, user_idle_level, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, + timer_user_idle_level, "I", "User idle level heuristic, 0-128"); + +#if HYPERVISOR +SYSCTL_INT(_kern, OID_AUTO, hv_support, + CTLFLAG_KERN | CTLFLAG_RD | CTLFLAG_LOCKED, + &hv_support_available, 0, ""); +#endif + +#if CONFIG_EMBEDDED +STATIC int +sysctl_darkboot SYSCTL_HANDLER_ARGS { - int error = 0; - int new_value; + int err = 0, value = 0; +#pragma unused(oidp, arg1, arg2, err, value, req) - if ( oldp != USER_ADDR_NULL && *oldlenp < sizeof(int) ) - return (ENOMEM); - if ( newp != USER_ADDR_NULL && newlen != sizeof(int) ) - return (EINVAL); - - *oldlenp = sizeof(int); - if ( oldp != USER_ADDR_NULL ) - error = copyout( &maxprocperuid, oldp, sizeof(int) ); - if ( error == 0 && newp != USER_ADDR_NULL ) { - error = copyin( newp, &new_value, sizeof(int) ); - if ( error == 0 ) { - AUDIT_ARG(value, new_value); - if ( new_value <= maxproc && new_value > 0 ) - maxprocperuid = new_value; - else - error = EINVAL; + /* + * Handle the sysctl request. + * + * If this is a read, the function will set the value to the current darkboot value. Otherwise, + * we'll get the request identifier into "value" and then we can honor it. + */ + if ((err = sysctl_io_number(req, darkboot, sizeof(int), &value, NULL)) != 0) { + goto exit; + } + + /* writing requested, let's process the request */ + if (req->newptr) { + /* writing is protected by an entitlement */ + if (priv_check_cred(kauth_cred_get(), PRIV_DARKBOOT, 0) != 0) { + err = EPERM; + goto exit; + } + + switch (value) { + case MEMORY_MAINTENANCE_DARK_BOOT_UNSET: + /* + * If the darkboot sysctl is unset, the NVRAM variable + * must be unset too. If that's not the case, it means + * someone is doing something crazy and not supported. + */ + if (darkboot != 0) { + int ret = PERemoveNVRAMProperty(MEMORY_MAINTENANCE_DARK_BOOT_NVRAM_NAME); + if (ret) { + darkboot = 0; + } else { + err = EINVAL; + } + } + break; + case MEMORY_MAINTENANCE_DARK_BOOT_SET: + darkboot = 1; + break; + case MEMORY_MAINTENANCE_DARK_BOOT_SET_PERSISTENT: { + /* + * Set the NVRAM and update 'darkboot' in case + * of success. Otherwise, do not update + * 'darkboot' and report the failure. + */ + if (PEWriteNVRAMBooleanProperty(MEMORY_MAINTENANCE_DARK_BOOT_NVRAM_NAME, TRUE)) { + darkboot = 1; + } else { + err = EINVAL; + } + + break; + } + default: + err = EINVAL; } - else - error = EINVAL; } - return( error ); - -} /* sysctl_maxprocperuid */ +exit: + return err; +} -/* - * Validate parameters and get old / set new parameters - * for max number of files per process. - * Makes sure per process limit is less than the system-wide limit. +SYSCTL_PROC(_kern, OID_AUTO, darkboot, + CTLFLAG_KERN | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, + 0, 0, sysctl_darkboot, "I", ""); +#endif + +#if DEVELOPMENT || DEBUG +#include +/* This should result in a fatal exception, verifying that "sysent" is + * write-protected. */ static int -sysctl_maxfilesperproc(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen) -{ - int error = 0; - int new_value; +kern_sysent_write(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { + uint64_t new_value = 0, old_value = 0; + int changed = 0, error; - if ( oldp != USER_ADDR_NULL && *oldlenp < sizeof(int) ) - return (ENOMEM); - if ( newp != USER_ADDR_NULL && newlen != sizeof(int) ) - return (EINVAL); - - *oldlenp = sizeof(int); - if ( oldp != USER_ADDR_NULL ) - error = copyout( &maxfilesperproc, oldp, sizeof(int) ); - if ( error == 0 && newp != USER_ADDR_NULL ) { - error = copyin( newp, &new_value, sizeof(int) ); - if ( error == 0 ) { - AUDIT_ARG(value, new_value); - if ( new_value < maxfiles && new_value > 0 ) - maxfilesperproc = new_value; - else - error = EINVAL; - } - else - error = EINVAL; + error = sysctl_io_number(req, old_value, sizeof(uint64_t), &new_value, &changed); + if ((error == 0) && changed) { + volatile uint32_t *wraddr = (uint32_t *) &sysent[0]; + *wraddr = 0; + printf("sysent[0] write succeeded\n"); } - return( error ); - -} /* sysctl_maxfilesperproc */ + return error; +} +SYSCTL_PROC(_kern, OID_AUTO, sysent_const_check, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, + kern_sysent_write, "I", "Attempt sysent[0] write"); + +#endif + +#if DEVELOPMENT || DEBUG +SYSCTL_COMPAT_INT(_kern, OID_AUTO, development, CTLFLAG_RD | CTLFLAG_MASKED, NULL, 1, ""); +#else +SYSCTL_COMPAT_INT(_kern, OID_AUTO, development, CTLFLAG_RD | CTLFLAG_MASKED, NULL, 0, ""); +#endif + + +#if DEVELOPMENT || DEBUG -/* - * Validate parameters and get old / set new parameters - * for the system-wide limit on the max number of processes. - * Makes sure the system-wide limit is less than the configured hard - * limit set at kernel compilation. - */ static int -sysctl_maxproc(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen ) +sysctl_panic_test SYSCTL_HANDLER_ARGS { - int error = 0; - int new_value; +#pragma unused(arg1, arg2) + int rval = 0; + char str[32] = "entry prelog postlog postcore"; + + rval = sysctl_handle_string(oidp, str, sizeof(str), req); + + if (rval == 0 && req->newptr) { + if (strncmp("entry", str, strlen("entry")) == 0) { + panic_with_options(0, NULL, DEBUGGER_OPTION_RECURPANIC_ENTRY, "test recursive panic at entry"); + } else if (strncmp("prelog", str, strlen("prelog")) == 0) { + panic_with_options(0, NULL, DEBUGGER_OPTION_RECURPANIC_PRELOG, "test recursive panic prior to writing a paniclog"); + } else if (strncmp("postlog", str, strlen("postlog")) == 0) { + panic_with_options(0, NULL, DEBUGGER_OPTION_RECURPANIC_POSTLOG, "test recursive panic subsequent to paniclog"); + } else if (strncmp("postcore", str, strlen("postcore")) == 0) { + panic_with_options(0, NULL, DEBUGGER_OPTION_RECURPANIC_POSTCORE, "test recursive panic subsequent to on-device core"); + } + } + + return rval; +} - if ( oldp != USER_ADDR_NULL && *oldlenp < sizeof(int) ) - return (ENOMEM); - if ( newp != USER_ADDR_NULL && newlen != sizeof(int) ) - return (EINVAL); - - *oldlenp = sizeof(int); - if ( oldp != USER_ADDR_NULL ) - error = copyout( &maxproc, oldp, sizeof(int) ); - if ( error == 0 && newp != USER_ADDR_NULL ) { - error = copyin( newp, &new_value, sizeof(int) ); - if ( error == 0 ) { - AUDIT_ARG(value, new_value); - if ( new_value <= hard_maxproc && new_value > 0 ) - maxproc = new_value; - else - error = EINVAL; - } - else - error = EINVAL; - } - return( error ); - -} /* sysctl_maxproc */ +static int +sysctl_debugger_test SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int rval = 0; + char str[32] = "entry prelog postlog postcore"; + + rval = sysctl_handle_string(oidp, str, sizeof(str), req); + + if (rval == 0 && req->newptr) { + if (strncmp("entry", str, strlen("entry")) == 0) { + DebuggerWithContext(0, NULL, "test recursive panic via debugger at entry", DEBUGGER_OPTION_RECURPANIC_ENTRY); + } else if (strncmp("prelog", str, strlen("prelog")) == 0) { + DebuggerWithContext(0, NULL, "test recursive panic via debugger prior to writing a paniclog", DEBUGGER_OPTION_RECURPANIC_PRELOG); + } else if (strncmp("postlog", str, strlen("postlog")) == 0) { + DebuggerWithContext(0, NULL, "test recursive panic via debugger subsequent to paniclog", DEBUGGER_OPTION_RECURPANIC_POSTLOG); + } else if (strncmp("postcore", str, strlen("postcore")) == 0) { + DebuggerWithContext(0, NULL, "test recursive panic via debugger subsequent to on-device core", DEBUGGER_OPTION_RECURPANIC_POSTCORE); + } + } + + return rval; +} + +decl_lck_spin_data(, spinlock_panic_test_lock) + +__attribute__((noreturn)) +static void +spinlock_panic_test_acquire_spinlock(void * arg __unused, wait_result_t wres __unused) +{ + lck_spin_lock(&spinlock_panic_test_lock); + while (1) { ; } +} -#if __i386__ static int -sysctl_sysctl_exec_affinity SYSCTL_HANDLER_ARGS +sysctl_spinlock_panic_test SYSCTL_HANDLER_ARGS { - struct proc *cur_proc = req->p; - int error; - - if (req->oldptr != USER_ADDR_NULL) { - cpu_type_t oldcputype = (cur_proc->p_flag & P_AFFINITY) ? CPU_TYPE_POWERPC : CPU_TYPE_I386; - if ((error = SYSCTL_OUT(req, &oldcputype, sizeof(oldcputype)))) - return error; - } +#pragma unused(oidp, arg1, arg2) + if (req->newlen == 0) + return EINVAL; - if (req->newptr != USER_ADDR_NULL) { - cpu_type_t newcputype; - if ((error = SYSCTL_IN(req, &newcputype, sizeof(newcputype)))) - return error; - if (newcputype == CPU_TYPE_I386) - cur_proc->p_flag &= ~P_AFFINITY; - else if (newcputype == CPU_TYPE_POWERPC) - cur_proc->p_flag |= P_AFFINITY; - else - return (EINVAL); + thread_t panic_spinlock_thread; + /* Initialize panic spinlock */ + lck_grp_t * panic_spinlock_grp; + lck_grp_attr_t * panic_spinlock_grp_attr; + lck_attr_t * panic_spinlock_attr; + + panic_spinlock_grp_attr = lck_grp_attr_alloc_init(); + panic_spinlock_grp = lck_grp_alloc_init("panic_spinlock", panic_spinlock_grp_attr); + panic_spinlock_attr = lck_attr_alloc_init(); + + lck_spin_init(&spinlock_panic_test_lock, panic_spinlock_grp, panic_spinlock_attr); + + + /* Create thread to acquire spinlock */ + if (kernel_thread_start(spinlock_panic_test_acquire_spinlock, NULL, &panic_spinlock_thread) != KERN_SUCCESS) { + return EBUSY; } - - return 0; + + /* Try to acquire spinlock -- should panic eventually */ + lck_spin_lock(&spinlock_panic_test_lock); + while(1) { ; } +} + +__attribute__((noreturn)) +static void +simultaneous_panic_worker +(void * arg, wait_result_t wres __unused) +{ + atomic_int *start_panic = (atomic_int *)arg; + + while (!atomic_load(start_panic)) { ; } + panic("SIMULTANEOUS PANIC TEST: INITIATING PANIC FROM CPU %d", cpu_number()); + __builtin_unreachable(); } -SYSCTL_PROC(_sysctl, OID_AUTO, proc_exec_affinity, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, 0, 0, sysctl_sysctl_exec_affinity ,"I","proc_exec_affinity"); -#endif static int -fetch_process_cputype( - struct proc *cur_proc, - int *name, - u_int namelen, - cpu_type_t *cputype) +sysctl_simultaneous_panic_test SYSCTL_HANDLER_ARGS { - struct proc *p = NULL; - cpu_type_t ret = 0; - - if (namelen == 0) - p = cur_proc; - else if (namelen == 1) { - p = pfind(name[0]); - if (p == NULL) - return (EINVAL); - if ((kauth_cred_getuid(p->p_ucred) != kauth_cred_getuid(kauth_cred_get())) - && suser(kauth_cred_get(), &cur_proc->p_acflag)) - return (EPERM); - } else { +#pragma unused(oidp, arg1, arg2) + if (req->newlen == 0) return EINVAL; - } -#if __i386__ - if (p->p_flag & P_TRANSLATED) { - ret = CPU_TYPE_POWERPC; + int i = 0, threads_to_create = 2 * processor_count; + atomic_int start_panic = 0; + unsigned int threads_created = 0; + thread_t new_panic_thread; + + for (i = threads_to_create; i > 0; i--) { + if (kernel_thread_start(simultaneous_panic_worker, (void *) &start_panic, &new_panic_thread) == KERN_SUCCESS) { + threads_created++; + } } - else -#endif - { - ret = cpu_type(); - if (IS_64BIT_PROCESS(p)) - ret |= CPU_ARCH_ABI64; + + /* FAIL if we couldn't create at least processor_count threads */ + if (threads_created < processor_count) { + panic("SIMULTANEOUS PANIC TEST: FAILED TO CREATE ENOUGH THREADS, ONLY CREATED %d (of %d)", + threads_created, threads_to_create); } - *cputype = ret; - - return 0; + + atomic_exchange(&start_panic, 1); + while (1) { ; } } -static int -sysctl_sysctl_native SYSCTL_HANDLER_ARGS +SYSCTL_PROC(_debug, OID_AUTO, panic_test, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_MASKED, 0, 0, sysctl_panic_test, "A", "panic test"); +SYSCTL_PROC(_debug, OID_AUTO, debugger_test, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_MASKED, 0, 0, sysctl_debugger_test, "A", "debugger test"); +SYSCTL_PROC(_debug, OID_AUTO, spinlock_panic_test, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_MASKED, 0, 0, sysctl_spinlock_panic_test, "A", "spinlock panic test"); +SYSCTL_PROC(_debug, OID_AUTO, simultaneous_panic_test, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_MASKED, 0, 0, sysctl_simultaneous_panic_test, "A", "simultaneous panic test"); + + +#endif /* DEVELOPMENT || DEBUG */ + +const uint32_t thread_groups_supported = 0; + +STATIC int +sysctl_thread_groups_supported (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { - int error; - cpu_type_t proc_cputype = 0; - if ((error = fetch_process_cputype(req->p, (int *)arg1, arg2, &proc_cputype)) != 0) - return error; - int res = 1; - if ((proc_cputype & ~CPU_ARCH_MASK) != (cpu_type() & ~CPU_ARCH_MASK)) - res = 0; - return SYSCTL_OUT(req, &res, sizeof(res)); -} -SYSCTL_PROC(_sysctl, OID_AUTO, proc_native, CTLTYPE_NODE|CTLFLAG_RD, 0, 0, sysctl_sysctl_native ,"I","proc_native"); + int value = thread_groups_supported; + return sysctl_io_number(req, value, sizeof(value), NULL, NULL); +} + +SYSCTL_PROC(_kern, OID_AUTO, thread_groups_supported, CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_KERN, + 0, 0, &sysctl_thread_groups_supported, "I", "thread groups supported"); static int -sysctl_sysctl_cputype SYSCTL_HANDLER_ARGS +sysctl_grade_cputype SYSCTL_HANDLER_ARGS { - int error; - cpu_type_t proc_cputype = 0; - if ((error = fetch_process_cputype(req->p, (int *)arg1, arg2, &proc_cputype)) != 0) +#pragma unused(arg1, arg2, oidp) + int error = 0; + int type_tuple[2] = {}; + int return_value = 0; + + error = SYSCTL_IN(req, &type_tuple, sizeof(type_tuple)); + + if (error) { return error; - return SYSCTL_OUT(req, &proc_cputype, sizeof(proc_cputype)); + } + + return_value = grade_binary(type_tuple[0], type_tuple[1]); + + error = SYSCTL_OUT(req, &return_value, sizeof(return_value)); + + if (error) { + return error; + } + + return error; } -SYSCTL_PROC(_sysctl, OID_AUTO, proc_cputype, CTLTYPE_NODE|CTLFLAG_RD, 0, 0, sysctl_sysctl_cputype ,"I","proc_cputype"); +SYSCTL_PROC(_kern, OID_AUTO, grade_cputype, + CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_MASKED|CTLFLAG_LOCKED|CTLTYPE_OPAQUE, + 0, 0, &sysctl_grade_cputype, "S", + "grade value of cpu_type_t+cpu_sub_type_t");