X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/43866e378188c25dd1e2208016ab3cbeb086ae6c..b226f5e54a60dc81db17b1260381d7dbfea3cdf1:/bsd/kern/kern_newsysctl.c?ds=sidebyside

diff --git a/bsd/kern/kern_newsysctl.c b/bsd/kern/kern_newsysctl.c
index 09da4572b..0381325a9 100644
--- a/bsd/kern/kern_newsysctl.c
+++ b/bsd/kern/kern_newsysctl.c
@@ -1,16 +1,19 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -20,9 +23,9 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
- */
-/*-
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ *
+ *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -69,61 +72,64 @@
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
-#include <sys/proc.h>
+#include <sys/proc_internal.h>
+#include <sys/kauth.h>
 #include <sys/systm.h>
+#include <sys/sysproto.h>
 
-/*
-struct sysctl_oid_list sysctl__debug_children;
-struct sysctl_oid_list sysctl__kern_children;
-struct sysctl_oid_list sysctl__net_children;
-struct sysctl_oid_list sysctl__sysctl_children;
-*/
-
-extern struct sysctl_oid *newsysctl_list[];
-
-
-static void
-sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i);
+#include <security/audit/audit.h>
+#include <pexpert/pexpert.h>
 
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
 
 
-/*
- * Locking and stats
- */
-static struct sysctl_lock {
-	int	sl_lock;
-	int	sl_want;
-	int	sl_locked;
-} memlock;
+lck_grp_t * sysctl_lock_group = NULL;
+lck_rw_t * sysctl_geometry_lock = NULL;
+lck_mtx_t * sysctl_unlocked_node_lock = NULL;
 
 /*
- * XXX this does not belong here
+ * Conditionally allow dtrace to see these functions for debugging purposes.
  */
-static funnel_t *
-spl_kernel_funnel(void)
-{
-	funnel_t *cfunnel;
-
-	cfunnel = thread_funnel_get();
-	if (cfunnel != kernel_flock) {
-		if (cfunnel != NULL)
-			thread_funnel_set(cfunnel, FALSE);
-		thread_funnel_set(kernel_flock, TRUE);
-	}
-	return(cfunnel);
-}
-
-static void
-splx_kernel_funnel(funnel_t *saved)
-{
-	if (saved != kernel_flock) {
-		thread_funnel_set(kernel_flock, FALSE);
-		if (saved != NULL) 
-			thread_funnel_set(saved, TRUE);
-	}
-}
+#ifdef STATIC
+#undef STATIC
+#endif
+#if 0
+#define	STATIC
+#else
+#define STATIC static
+#endif
 
-static int sysctl_root SYSCTL_HANDLER_ARGS;
+/* forward declarations  of static functions */
+STATIC void sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i);
+STATIC int sysctl_sysctl_debug(struct sysctl_oid *oidp, void *arg1,
+	int arg2, struct sysctl_req *req);
+STATIC int sysctl_sysctl_name(struct sysctl_oid *oidp, void *arg1,
+	int arg2, struct sysctl_req *req);
+STATIC int sysctl_sysctl_next_ls (struct sysctl_oid_list *lsp,
+	int *name, u_int namelen, int *next, int *len, int level,
+	struct sysctl_oid **oidpp);
+STATIC int sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l);
+STATIC int sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l);
+STATIC int name2oid (char *name, int *oid, u_int *len);
+STATIC int sysctl_sysctl_name2oid(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_sysctl_next(struct sysctl_oid *oidp, void *arg1, int arg2,
+        struct sysctl_req *req);
+STATIC int sysctl_sysctl_oidfmt(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_old_user(struct sysctl_req *req, const void *p, size_t l);
+STATIC int sysctl_new_user(struct sysctl_req *req, void *p, size_t l);
+
+STATIC void sysctl_create_user_req(struct sysctl_req *req, struct proc *p, user_addr_t oldp,
+								   size_t oldlen, user_addr_t newp, size_t newlen);
+STATIC int sysctl_root(boolean_t from_kernel, boolean_t string_is_canonical, char *namestring, size_t namestringlen, int *name, u_int namelen, struct sysctl_req *req);
+
+int	kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen);
+int	kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+int	userland_sysctl(boolean_t string_is_canonical,
+					char *namestring, size_t namestringlen,
+					int *name, u_int namelen, struct sysctl_req *req,
+					size_t *retval);
 
 struct sysctl_oid_list sysctl__children; /* root list */
 
@@ -133,32 +139,75 @@ struct sysctl_oid_list sysctl__children; /* root list */
  * Order by number in each list.
  */
 
-void sysctl_register_oid(struct sysctl_oid *oidp)
+void
+sysctl_register_oid(struct sysctl_oid *new_oidp)
 {
-	struct sysctl_oid_list *parent = oidp->oid_parent;
+	struct sysctl_oid *oidp = NULL;
+	struct sysctl_oid_list *parent = new_oidp->oid_parent;
 	struct sysctl_oid *p;
 	struct sysctl_oid *q;
 	int n;
-	funnel_t *fnl;
 
-	fnl = spl_kernel_funnel();
+	/*
+	 * The OID can be old-style (needs copy), new style without an earlier
+	 * version (also needs copy), or new style with a matching version (no
+	 * copy needed).  Later versions are rejected (presumably, the OID
+	 * structure was changed for a necessary reason).
+	 */
+	if (!(new_oidp->oid_kind & CTLFLAG_OID2)) {
+		/*
+		 * XXX:	M_TEMP is perhaps not the most apropriate zone, as it
+		 * XXX:	will subject us to use-after-free by other consumers.
+		 */
+		MALLOC(oidp, struct sysctl_oid *, sizeof(*oidp), M_TEMP, M_WAITOK | M_ZERO);
+		if (oidp == NULL)
+			return;		/* reject: no memory */
+
+		/*
+		 * Copy the structure only through the oid_fmt field, which
+		 * is the last field in a non-OID2 OID structure.
+		 *
+		 * Note:	We may want to set the oid_descr to the
+		 *		oid_name (or "") at some future date.
+		 */
+		memcpy(oidp, new_oidp, offsetof(struct sysctl_oid, oid_descr));
+	} else {
+		/* It's a later version; handle the versions we know about */
+		switch (new_oidp->oid_version) {
+		case SYSCTL_OID_VERSION:
+			/* current version */
+			oidp = new_oidp;
+			break;
+		default:
+			return;			/* rejects unknown version */
+		}
+	}
+
+	/* Get the write lock to modify the geometry */
+	lck_rw_lock_exclusive(sysctl_geometry_lock);
 
 	/*
 	 * If this oid has a number OID_AUTO, give it a number which
 	 * is greater than any current oid.  Make sure it is at least
-	 * 100 to leave space for pre-assigned oid numbers.
+	 * OID_AUTO_START to leave space for pre-assigned oid numbers.
 	 */
-/*	sysctl_sysctl_debug_dump_node(parent, 3); */
 	if (oidp->oid_number == OID_AUTO) {
-		/* First, find the highest oid in the parent list >99 */
-		n = 99;
+		/* First, find the highest oid in the parent list >OID_AUTO_START-1 */
+		n = OID_AUTO_START;
 		SLIST_FOREACH(p, parent, oid_link) {
 			if (p->oid_number > n)
 				n = p->oid_number;
 		}
 		oidp->oid_number = n + 1;
+		/*
+		 * Reflect the number in an llocated OID into the template
+		 * of the caller for sysctl_unregister_oid() compares.
+		 */
+		if (oidp != new_oidp)
+			new_oidp->oid_number = oidp->oid_number;
 	}
 
+
 	/*
 	 * Insert the oid into the parent's list in order.
 	 */
@@ -173,64 +222,261 @@ void sysctl_register_oid(struct sysctl_oid *oidp)
 	else
 		SLIST_INSERT_HEAD(parent, oidp, oid_link);
 
-	splx_kernel_funnel(fnl);
+	/* Release the write lock */
+	lck_rw_unlock_exclusive(sysctl_geometry_lock);
 }
 
-void sysctl_unregister_oid(struct sysctl_oid *oidp)
+void
+sysctl_unregister_oid(struct sysctl_oid *oidp)
 {
-	funnel_t *fnl;
+	struct sysctl_oid *removed_oidp = NULL;	/* OID removed from tree */
+	struct sysctl_oid *old_oidp = NULL;	/* OID compatibility copy */
+
+	/* Get the write lock to modify the geometry */
+	lck_rw_lock_exclusive(sysctl_geometry_lock);
+
+	if (!(oidp->oid_kind & CTLFLAG_OID2)) {
+		/*
+		 * We're using a copy so we can get the new fields in an
+		 * old structure, so we have to iterate to compare the
+		 * partial structure; when we find a match, we remove it
+		 * normally and free the memory.
+		 */
+		SLIST_FOREACH(old_oidp, oidp->oid_parent, oid_link) {
+			if (!memcmp(&oidp->oid_number, &old_oidp->oid_number, (offsetof(struct sysctl_oid, oid_descr)-offsetof(struct sysctl_oid, oid_number)))) {
+                break;
+            }
+		}
+		if (old_oidp != NULL) {
+			SLIST_REMOVE(old_oidp->oid_parent, old_oidp, sysctl_oid, oid_link);
+			removed_oidp = old_oidp;
+		}
+	} else {
+		/* It's a later version; handle the versions we know about */
+		switch (oidp->oid_version) {
+		case SYSCTL_OID_VERSION:
+			/* We can just remove the OID directly... */
+			SLIST_REMOVE(oidp->oid_parent, oidp, sysctl_oid, oid_link);
+			removed_oidp = oidp;
+			break;
+		default:
+			 /* XXX: Can't happen; probably tree coruption.*/
+			break;			/* rejects unknown version */
+		}
+	}
 
-	fnl = spl_kernel_funnel();
-	SLIST_REMOVE(oidp->oid_parent, oidp, sysctl_oid, oid_link);
-	splx_kernel_funnel(fnl);
+
+	/*
+	 * We've removed it from the list at this point, but we don't want
+	 * to return to the caller until all handler references have drained
+	 * out.  Doing things in this order prevent other people coming in
+	 * and starting new operations against the OID node we want removed.
+	 *
+	 * Note:	oidp could be NULL if it wasn't found.
+	 */
+	while(removed_oidp && removed_oidp->oid_refcnt) {
+		lck_rw_sleep(sysctl_geometry_lock, LCK_SLEEP_EXCLUSIVE, &removed_oidp->oid_refcnt, THREAD_UNINT);
+	}
+
+	/* Release the write lock */
+	lck_rw_unlock_exclusive(sysctl_geometry_lock);
+
+	/* If it was allocated, free it after dropping the lock */
+	if (old_oidp != NULL) {
+		FREE(old_oidp, M_TEMP);
+	}
 }
 
 /*
  * Bulk-register all the oids in a linker_set.
  */
-void sysctl_register_set(struct linker_set *lsp)
+void
+sysctl_register_set(const char *set)
 {
-	int count = lsp->ls_length;
-	int i;
-	for (i = 0; i < count; i++)
-		sysctl_register_oid((struct sysctl_oid *) lsp->ls_items[i]);
+	struct sysctl_oid **oidpp, *oidp;
+
+	LINKER_SET_FOREACH(oidpp, struct sysctl_oid **, set) {
+		oidp = *oidpp;
+		if (!(oidp->oid_kind & CTLFLAG_NOAUTO)) {
+		    sysctl_register_oid(oidp);
+		}
+	}
 }
 
-void sysctl_unregister_set(struct linker_set *lsp)
+void
+sysctl_unregister_set(const char *set)
 {
-	int count = lsp->ls_length;
-	int i;
-	for (i = 0; i < count; i++)
-		sysctl_unregister_oid((struct sysctl_oid *) lsp->ls_items[i]);
+	struct sysctl_oid **oidpp, *oidp;
+
+	LINKER_SET_FOREACH(oidpp, struct sysctl_oid **, set) {
+		oidp = *oidpp;
+		if (!(oidp->oid_kind & CTLFLAG_NOAUTO)) {
+		    sysctl_unregister_oid(oidp);
+		}
+	}
 }
 
+/*
+ * Exported in BSDKernel.exports, kept for binary compatibility
+ */
+#if defined(__x86_64__)
+void
+sysctl_register_fixed(void)
+{
+}
+#endif
 
 /*
- * Register OID's from fixed list
+ * Register the kernel's oids on startup.
  */
 
-void sysctl_register_fixed()
+void
+sysctl_early_init(void)
 {
-    int i = 0;
+	/*
+	 * Initialize the geometry lock for reading/modifying the
+	 * sysctl tree. This is done here because IOKit registers
+	 * some sysctl's before bsd_init() would otherwise perform
+	 * subsystem initialization.
+	 */
 
+	sysctl_lock_group  = lck_grp_alloc_init("sysctl", NULL);
+	sysctl_geometry_lock = lck_rw_alloc_init(sysctl_lock_group, NULL);
+	sysctl_unlocked_node_lock = lck_mtx_alloc_init(sysctl_lock_group, NULL);
 
-    while (newsysctl_list[i]) {
-/*	printf("Registering %d\n", i); */
-	sysctl_register_oid(newsysctl_list[i++]);
-    }
+	sysctl_register_set("__sysctl_set");
 }
 
 /*
- * Register the kernel's oids on startup.
+ * New handler interface
+ *   If the sysctl caller (user mode or kernel mode) is interested in the
+ *   value (req->oldptr != NULL), we copy the data (bigValue etc.) out,
+ *   if the caller wants to set the value (req->newptr), we copy
+ *   the data in (*pValue etc.).
  */
-struct linker_set sysctl_set;
 
-void sysctl_register_all(void *arg)
+int
+sysctl_io_number(struct sysctl_req *req, long long bigValue, size_t valueSize, void *pValue, int *changed) {
+	int		smallValue;
+	int		error;
+
+	if (changed) *changed = 0;
+
+	/*
+	 * Handle the various combinations of caller buffer size and
+	 * data value size.  We are generous in the case where the
+	 * caller has specified a 32-bit buffer but the value is 64-bit
+	 * sized.
+	 */
+
+	/* 32 bit value expected or 32 bit buffer offered */
+	if (((valueSize == sizeof(int)) ||
+	    ((req->oldlen == sizeof(int)) && (valueSize == sizeof(long long))))
+			&& (req->oldptr)) {
+		smallValue = (int)bigValue;
+		if ((long long)smallValue != bigValue)
+			return(ERANGE);
+		error = SYSCTL_OUT(req, &smallValue, sizeof(smallValue));
+	} else {
+		/* any other case is either size-equal or a bug */
+		error = SYSCTL_OUT(req, &bigValue, valueSize);
+	}
+	/* error or nothing to set */
+	if (error || !req->newptr)
+		return(error);
+
+	/* set request for constant */
+	if (pValue == NULL)
+		return(EPERM);
+
+	/* set request needs to convert? */
+	if ((req->newlen == sizeof(int)) && (valueSize == sizeof(long long))) {
+		/* new value is 32 bits, upconvert to 64 bits */
+		error = SYSCTL_IN(req, &smallValue, sizeof(smallValue));
+		if (!error)
+			*(long long *)pValue = (long long)smallValue;
+	} else if ((req->newlen == sizeof(long long)) && (valueSize == sizeof(int))) {
+		/* new value is 64 bits, downconvert to 32 bits and range check */
+		error = SYSCTL_IN(req, &bigValue, sizeof(bigValue));
+		if (!error) {
+			smallValue = (int)bigValue;
+			if ((long long)smallValue != bigValue)
+				return(ERANGE);
+			*(int *)pValue = smallValue;
+		}
+	} else {
+		/* sizes match, just copy in */
+		error = SYSCTL_IN(req, pValue, valueSize);
+	}
+	if (!error && changed)
+		*changed = 1;
+	return(error);
+}
+
+int
+sysctl_io_string(struct sysctl_req *req, char *pValue, size_t valueSize, int trunc, int *changed)
 {
-	sysctl_register_set(&sysctl_set);
+	int error;
+
+	if (changed) *changed = 0;
+
+	if (trunc && req->oldptr && req->oldlen && (req->oldlen<strlen(pValue) + 1)) {
+		/* If trunc != 0, if you give it a too small (but larger than
+		 * 0 bytes) buffer, instead of returning ENOMEM, it truncates the
+		 * returned string to the buffer size.  This preserves the semantics
+		 * of some library routines implemented via sysctl, which truncate
+		 * their returned data, rather than simply returning an error. The
+		 * returned string is always NUL terminated. */
+		error = SYSCTL_OUT(req, pValue, req->oldlen-1);
+		if (!error) {
+			char c = 0;
+			error = SYSCTL_OUT(req, &c, 1);
+		}
+	} else {
+		/* Copy string out */
+		error = SYSCTL_OUT(req, pValue, strlen(pValue) + 1);
+	}
+
+	/* error or no new value */
+	if (error || !req->newptr)
+		return(error);
+
+	/* attempt to set read-only value */
+	if (valueSize == 0)
+		return(EPERM);
+
+	/* make sure there's room for the new string */
+	if (req->newlen >= valueSize)
+		return(EINVAL);
+
+	/* copy the string in and force NUL termination */
+	error = SYSCTL_IN(req, pValue, req->newlen);
+	pValue[req->newlen] = '\0';
+
+	if (!error && changed)
+		*changed = 1;
+	return(error);
 }
 
-SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_ANY, sysctl_register_all, 0);
+int sysctl_io_opaque(struct sysctl_req *req,void *pValue, size_t valueSize, int *changed)
+{
+	int error;
+
+	if (changed) *changed = 0;
+
+	/* Copy blob out */
+	error = SYSCTL_OUT(req, pValue, valueSize);
+
+	/* error or nothing to set */
+	if (error || !req->newptr)
+		return(error);
+
+	error = SYSCTL_IN(req, pValue, valueSize);
+
+	if (!error && changed)
+		*changed = 1;
+	return(error);
+}
 
 /*
  * "Staff-functions"
@@ -250,7 +496,32 @@ SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_ANY, sysctl_register_all, 0);
  * {0,4,...}	return the kind & format info for the "..." OID.
  */
 
-static void
+/*
+ * sysctl_sysctl_debug_dump_node
+ *
+ * Description:	Dump debug information for a given sysctl_oid_list at the
+ *		given oid depth out to the kernel log, via printf
+ *
+ * Parameters:	l				sysctl_oid_list pointer
+ *		i				current node depth
+ *
+ * Returns:	(void)
+ *
+ * Implicit:	kernel log, modified
+ *
+ * Locks:	Assumes sysctl_geometry_lock is held prior to calling
+ *
+ * Notes:	This function may call itself recursively to resolve Node
+ *		values, which potentially have an inferioer sysctl_oid_list
+ *
+ *		This function is only callable indirectly via the function
+ *		sysctl_sysctl_debug()
+ *
+ * Bugs:	The node depth indentation does not work; this may be an
+ *		artifact of leading space removal by the log daemon itself
+ *		or some intermediate routine.
+ */
+STATIC void
 sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
 {
 	int k;
@@ -263,7 +534,8 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
 
 		printf("%d %s ", oidp->oid_number, oidp->oid_name);
 
-		printf("%c%c",
+		printf("%c%c%c",
+			oidp->oid_kind & CTLFLAG_LOCKED ? 'L':' ',
 			oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
 			oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
 
@@ -288,35 +560,105 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
 	}
 }
 
-static int
-sysctl_sysctl_debug SYSCTL_HANDLER_ARGS
+/*
+ * sysctl_sysctl_debug
+ *
+ * Description:	This function implements the "sysctl.debug" portion of the
+ *		OID space for sysctl.
+ *
+ * OID:		0, 0
+ *
+ * Parameters:	__unused
+ *
+ * Returns:	ENOENT
+ *
+ * Implicit:	kernel log, modified
+ *
+ * Locks:	Acquires and then releases a read lock on the
+ *		sysctl_geometry_lock
+ */
+STATIC int
+sysctl_sysctl_debug(__unused struct sysctl_oid *oidp, __unused void *arg1,
+	__unused int arg2, __unused struct sysctl_req *req)
 {
+	lck_rw_lock_shared(sysctl_geometry_lock);
 	sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
+	lck_rw_done(sysctl_geometry_lock);
 	return ENOENT;
 }
 
-SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD,
+SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD | CTLFLAG_LOCKED,
 	0, 0, sysctl_sysctl_debug, "-", "");
 
-static int
-sysctl_sysctl_name SYSCTL_HANDLER_ARGS
+/*
+ * sysctl_sysctl_name
+ *
+ * Description:	Convert an OID into a string name; this is used by the user
+ *		space sysctl() command line utility; this is done in a purely
+ *		advisory capacity (e.g. to provide node names for "sysctl -A"
+ *		output).
+ *
+ * OID:		0, 1
+ *
+ * Parameters:	oidp				__unused
+ *		arg1				A pointer to the OID name list
+ *						integer array, beginning at
+ *						adjusted option base 2
+ *		arg2				The number of elements which
+ *						remain in the name array
+ *
+ * Returns:	0				Success
+ *	SYSCTL_OUT:EPERM			Permission denied
+ *	SYSCTL_OUT:EFAULT			Bad user supplied buffer
+ *	SYSCTL_OUT:???				Return value from user function
+ *						for SYSCTL_PROC leaf node
+ *
+ * Implict:	Contents of user request buffer, modified
+ *
+ * Locks:	Acquires and then releases a read lock on the
+ *		sysctl_geometry_lock
+ *
+ * Notes:	SPI (System Programming Interface); this is subject to change
+ *		and may not be relied upon by third party applications; use
+ *		a subprocess to communicate with the "sysctl" command line
+ *		command instead, if you believe you need this functionality.
+ *		Preferrably, use sysctlbyname() instead.
+ *
+ *		Setting of the NULL termination of the output string is
+ *		delayed until after the geometry lock is dropped.  If there
+ *		are no Entries remaining in the OID name list when this
+ *		function is called, it will still write out the termination
+ *		byte.
+ *
+ *		This function differs from other sysctl functions in that
+ *		it can not take an output buffer length of 0 to determine the
+ *		space which will be required.  It is suggested that the buffer
+ *		length be PATH_MAX, and that authors of new sysctl's refrain
+ *		from exceeding this string length.
+ */
+STATIC int
+sysctl_sysctl_name(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
+        struct sysctl_req *req)
 {
 	int *name = (int *) arg1;
 	u_int namelen = arg2;
 	int error = 0;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
-	char buf[10];
+	char tempbuf[10] = {};
 
+	lck_rw_lock_shared(sysctl_geometry_lock);
 	while (namelen) {
 		if (!lsp) {
-			snprintf(buf,sizeof(buf),"%d",*name);
+			snprintf(tempbuf,sizeof(tempbuf),"%d",*name);
 			if (req->oldidx)
 				error = SYSCTL_OUT(req, ".", 1);
 			if (!error)
-				error = SYSCTL_OUT(req, buf, strlen(buf));
-			if (error)
+				error = SYSCTL_OUT(req, tempbuf, strlen(tempbuf));
+			if (error) {
+				lck_rw_done(sysctl_geometry_lock);
 				return (error);
+			}
 			namelen--;
 			name++;
 			continue;
@@ -331,8 +673,10 @@ sysctl_sysctl_name SYSCTL_HANDLER_ARGS
 			if (!error)
 				error = SYSCTL_OUT(req, oid->oid_name,
 					strlen(oid->oid_name));
-			if (error)
+			if (error) {
+				lck_rw_done(sysctl_geometry_lock);
 				return (error);
+			}
 
 			namelen--;
 			name++;
@@ -348,12 +692,45 @@ sysctl_sysctl_name SYSCTL_HANDLER_ARGS
 		}
 		lsp = lsp2;
 	}
+	lck_rw_done(sysctl_geometry_lock);
 	return (SYSCTL_OUT(req, "", 1));
 }
 
-SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD, sysctl_sysctl_name, "");
+SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_sysctl_name, "");
 
-static int
+/*
+ * sysctl_sysctl_next_ls
+ *
+ * Description:	For a given OID name value, return the next consecutive OID
+ *		name value within the geometry tree
+ *
+ * Parameters:	lsp				The OID list to look in
+ *		name				The OID name to start from
+ *		namelen				The length of the OID name
+ *		next				Pointer to new oid storage to
+ *						fill in
+ *		len				Pointer to receive new OID
+ *						length value of storage written
+ *		level				OID tree depth (used to compute
+ *						len value)
+ *		oidpp				Pointer to OID list entry
+ *						pointer; used to walk the list
+ *						forward across recursion
+ *
+ * Returns:	0				Returning a new entry
+ *		1				End of geometry list reached
+ *
+ * Implicit:	*next				Modified to contain the new OID
+ *		*len				Modified to contain new length
+ *
+ * Locks:	Assumes sysctl_geometry_lock is held prior to calling
+ *
+ * Notes:	This function will not return OID values that have special
+ *		handlers, since we can not tell wheter these handlers consume
+ *		elements from the OID space as parameters.  For this reason,
+ *		we STRONGLY discourage these types of handlers
+ */
+STATIC int
 sysctl_sysctl_next_ls (struct sysctl_oid_list *lsp, int *name, u_int namelen, 
 	int *next, int *len, int level, struct sysctl_oid **oidpp)
 {
@@ -371,6 +748,11 @@ sysctl_sysctl_next_ls (struct sysctl_oid_list *lsp, int *name, u_int namelen,
 				/* We really should call the handler here...*/
 				return 0;
 			lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
+
+			if (!SLIST_FIRST(lsp))
+				/* This node had no children - skip it! */
+				continue;
+
 			if (!sysctl_sysctl_next_ls (lsp, 0, 0, next+1, 
 				len, level+1, oidpp))
 				return 0;
@@ -408,27 +790,87 @@ sysctl_sysctl_next_ls (struct sysctl_oid_list *lsp, int *name, u_int namelen,
 	return 1;
 }
 
-static int
-sysctl_sysctl_next SYSCTL_HANDLER_ARGS
+/*
+ * sysctl_sysctl_next
+ *
+ * Description:	This is an iterator function designed to iterate the oid tree
+ *		and provide a list of OIDs for use by the user space "sysctl"
+ *		command line tool
+ *
+ * OID:		0, 2
+ *
+ * Parameters:	oidp				__unused
+ *		arg1				Pointer to start OID name
+ *		arg2				Start OID name length
+ *		req				Pointer to user request buffer
+ *
+ * Returns:	0				Success
+ *		ENOENT				Reached end of OID space
+ *	SYSCTL_OUT:EPERM			Permission denied
+ *	SYSCTL_OUT:EFAULT			Bad user supplied buffer
+ *	SYSCTL_OUT:???				Return value from user function
+ *						for SYSCTL_PROC leaf node
+ *
+ * Implict:	Contents of user request buffer, modified
+ *
+ * Locks:	Acquires and then releases a read lock on the
+ *		sysctl_geometry_lock
+ *
+ * Notes:	SPI (System Programming Interface); this is subject to change
+ *		and may not be relied upon by third party applications; use
+ *		a subprocess to communicate with the "sysctl" command line
+ *		command instead, if you believe you need this functionality.
+ *		Preferrably, use sysctlbyname() instead.
+ *
+ *		This function differs from other sysctl functions in that
+ *		it can not take an output buffer length of 0 to determine the
+ *		space which will be required.  It is suggested that the buffer
+ *		length be PATH_MAX, and that authors of new sysctl's refrain
+ *		from exceeding this string length.
+ */
+STATIC int
+sysctl_sysctl_next(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
+        struct sysctl_req *req)
 {
 	int *name = (int *) arg1;
 	u_int namelen = arg2;
 	int i, j, error;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *lsp = &sysctl__children;
-	int newoid[CTL_MAXNAME];
+	int newoid[CTL_MAXNAME] = {};
 
+	lck_rw_lock_shared(sysctl_geometry_lock);
 	i = sysctl_sysctl_next_ls (lsp, name, namelen, newoid, &j, 1, &oid);
+	lck_rw_done(sysctl_geometry_lock);
 	if (i)
 		return ENOENT;
 	error = SYSCTL_OUT(req, newoid, j * sizeof (int));
 	return (error);
 }
 
-SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD, sysctl_sysctl_next, "");
+SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_sysctl_next, "");
 
-static int
-name2oid (char *name, int *oid, int *len, struct sysctl_oid **oidpp)
+/*
+ * name2oid
+ *
+ * Description:	Support function for use by sysctl_sysctl_name2oid(); looks
+ *		up an OID name given a string name.
+ *
+ * Parameters:	name				NULL terminated string name
+ *		oid				Pointer to receive OID name
+ *		len				Pointer to receive OID length
+ *						pointer value (see "Notes")
+ *
+ * Returns:	0				Success
+ *		ENOENT				Entry not found
+ *
+ * Implicit:	*oid				Modified to contain OID value
+ *		*len				Modified to contain OID length
+ *
+ * Locks:	Assumes sysctl_geometry_lock is held prior to calling
+ */
+STATIC int
+name2oid (char *name, int *oid, u_int *len)
 {
 	int i;
 	struct sysctl_oid *oidp;
@@ -461,8 +903,6 @@ name2oid (char *name, int *oid, int *len, struct sysctl_oid **oidpp)
 		(*len)++;
 
 		if (!i) {
-			if (oidpp)
-				*oidpp = oidp;
 			return (0);
 		}
 
@@ -474,6 +914,7 @@ name2oid (char *name, int *oid, int *len, struct sysctl_oid **oidpp)
 
 		lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
 		oidp = SLIST_FIRST(lsp);
+		*p = i; /* restore */
 		name = p+1;
 		for (p = name; *p && *p != '.'; p++) 
 				;
@@ -484,20 +925,59 @@ name2oid (char *name, int *oid, int *len, struct sysctl_oid **oidpp)
 	return ENOENT;
 }
 
-static int
-sysctl_sysctl_name2oid SYSCTL_HANDLER_ARGS
+/*
+ * sysctl_sysctl_name2oid
+ *
+ * Description:	Translate a string name to an OID name value; this is used by
+ *		the sysctlbyname() function as well as by the "sysctl" command
+ *		line command.
+ *
+ * OID:		0, 3
+ *
+ * Parameters:	oidp				__unused
+ *		arg1				__unused
+ *		arg2				__unused
+ *		req				Request structure
+ *
+ * Returns:	ENOENT				Input length too short
+ *		ENAMETOOLONG			Input length too long
+ *		ENOMEM				Could not allocate work area
+ *	SYSCTL_IN/OUT:EPERM			Permission denied
+ *	SYSCTL_IN/OUT:EFAULT			Bad user supplied buffer
+ *	SYSCTL_IN/OUT:???			Return value from user function
+ *	name2oid:ENOENT				Not found
+ *
+ * Implicit:	*req				Contents of request, modified
+ *
+ * Locks:	Acquires and then releases a read lock on the
+ *		sysctl_geometry_lock
+ *
+ * Notes:	SPI (System Programming Interface); this is subject to change
+ *		and may not be relied upon by third party applications; use
+ *		a subprocess to communicate with the "sysctl" command line
+ *		command instead, if you believe you need this functionality.
+ *		Preferrably, use sysctlbyname() instead.
+ *
+ *		This function differs from other sysctl functions in that
+ *		it can not take an output buffer length of 0 to determine the
+ *		space which will be required.  It is suggested that the buffer
+ *		length be PATH_MAX, and that authors of new sysctl's refrain
+ *		from exceeding this string length.
+ */
+STATIC int
+sysctl_sysctl_name2oid(__unused struct sysctl_oid *oidp, __unused void *arg1,
+	__unused int arg2, struct sysctl_req *req)
 {
 	char *p;
-	int error, oid[CTL_MAXNAME], len;
-	struct sysctl_oid *op = 0;
+	int error, oid[CTL_MAXNAME] = {};
+	u_int len = 0;		/* set by name2oid() */
 
-	if (!req->newlen) 
+	if (req->newlen < 1) 
 		return ENOENT;
 	if (req->newlen >= MAXPATHLEN)	/* XXX arbitrary, undocumented */
 		return (ENAMETOOLONG);
 
-	p = _MALLOC(req->newlen+1, M_TEMP, M_WAITOK);
-
+	MALLOC(p, char *,req->newlen+1, M_TEMP, M_WAITOK);
 	if (!p)
 	    return ENOMEM;
 
@@ -509,7 +989,13 @@ sysctl_sysctl_name2oid SYSCTL_HANDLER_ARGS
 
 	p [req->newlen] = '\0';
 
-	error = name2oid(p, oid, &len, &op);
+	/*
+	 * Note:	We acquire and release the geometry lock here to
+	 *		avoid making name2oid needlessly complex.
+	 */
+	lck_rw_lock_shared(sysctl_geometry_lock);
+	error = name2oid(p, oid, &len);
+	lck_rw_done(sysctl_geometry_lock);
 
 	FREE(p, M_TEMP);
 
@@ -520,18 +1006,58 @@ sysctl_sysctl_name2oid SYSCTL_HANDLER_ARGS
 	return (error);
 }
 
-SYSCTL_PROC(_sysctl, 3, name2oid, CTLFLAG_RW|CTLFLAG_ANYBODY, 0, 0, 
+SYSCTL_PROC(_sysctl, 3, name2oid, CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0, 
 	sysctl_sysctl_name2oid, "I", "");
 
-static int
-sysctl_sysctl_oidfmt SYSCTL_HANDLER_ARGS
+/*
+ * sysctl_sysctl_oidfmt
+ *
+ * Description:	For a given OID name, determine the format of the data which
+ *		is associated with it.  This is used by the "sysctl" command
+ *		line command.
+ *
+ * OID:		0, 4
+ *
+ * Parameters:	oidp				__unused
+ *		arg1				The OID name to look up
+ *		arg2				The length of the OID name
+ *		req				Pointer to user request buffer
+ *
+ * Returns:	0				Success
+ *		EISDIR				Malformed request
+ *		ENOENT				No such OID name
+ *	SYSCTL_OUT:EPERM			Permission denied
+ *	SYSCTL_OUT:EFAULT			Bad user supplied buffer
+ *	SYSCTL_OUT:???				Return value from user function
+ *
+ * Implict:	Contents of user request buffer, modified
+ *
+ * Locks:	Acquires and then releases a read lock on the
+ *		sysctl_geometry_lock
+ *
+ * Notes:	SPI (System Programming Interface); this is subject to change
+ *		and may not be relied upon by third party applications; use
+ *		a subprocess to communicate with the "sysctl" command line
+ *		command instead, if you believe you need this functionality.
+ *
+ *		This function differs from other sysctl functions in that
+ *		it can not take an output buffer length of 0 to determine the
+ *		space which will be required.  It is suggested that the buffer
+ *		length be PATH_MAX, and that authors of new sysctl's refrain
+ *		from exceeding this string length.
+ */
+STATIC int
+sysctl_sysctl_oidfmt(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
+        struct sysctl_req *req)
 {
-	int *name = (int *) arg1, error;
+	int *name = (int *) arg1;
+	int error = ENOENT;		/* default error: not found */
 	u_int namelen = arg2;
-	int indx;
+	u_int indx;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *lsp = &sysctl__children;
 
+	lck_rw_lock_shared(sysctl_geometry_lock);
 	oid = SLIST_FIRST(lsp);
 
 	indx = 0;
@@ -546,28 +1072,34 @@ sysctl_sysctl_oidfmt SYSCTL_HANDLER_ARGS
 				lsp = (struct sysctl_oid_list *)oid->oid_arg1;
 				oid = SLIST_FIRST(lsp);
 			} else {
-				if (indx != namelen)
-					return EISDIR;
+				if (indx != namelen) {
+					error =  EISDIR;
+					goto err;
+				}
 				goto found;
 			}
 		} else {
 			oid = SLIST_NEXT(oid, oid_link);
 		}
 	}
-	return ENOENT;
+	/* Not found */
+	goto err;
+
 found:
 	if (!oid->oid_fmt)
-		return ENOENT;
+		goto err;
 	error = SYSCTL_OUT(req, 
 		&oid->oid_kind, sizeof(oid->oid_kind));
 	if (!error)
 		error = SYSCTL_OUT(req, oid->oid_fmt, 
 			strlen(oid->oid_fmt)+1);
+err:
+	lck_rw_done(sysctl_geometry_lock);
 	return (error);
 }
 
+SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_sysctl_oidfmt, "");
 
-SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD, sysctl_sysctl_oidfmt, "");
 
 /*
  * Default "handler" functions.
@@ -581,23 +1113,10 @@ SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD, sysctl_sysctl_oidfmt, "");
  */
 
 int
-sysctl_handle_int SYSCTL_HANDLER_ARGS
+sysctl_handle_int(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
+        struct sysctl_req *req)
 {
-	int error = 0;
-
-	if (arg1)
-		error = SYSCTL_OUT(req, arg1, sizeof(int));
-	else
-		error = SYSCTL_OUT(req, &arg2, sizeof(int));
-
-	if (error || !req->newptr)
-		return (error);
-
-	if (!arg1)
-		error = EPERM;
-	else
-		error = SYSCTL_IN(req, arg1, sizeof(int));
-	return (error);
+	return sysctl_io_number(req, arg1? *(int*)arg1: arg2, sizeof(int), arg1, NULL);
 }
 
 /*
@@ -605,19 +1124,12 @@ sysctl_handle_int SYSCTL_HANDLER_ARGS
  */
 
 int
-sysctl_handle_long SYSCTL_HANDLER_ARGS
+sysctl_handle_long(__unused struct sysctl_oid *oidp, void *arg1,
+	__unused int arg2, struct sysctl_req *req)
 {
-	int error = 0;
-
 	if (!arg1)
 		return (EINVAL);
-	error = SYSCTL_OUT(req, arg1, sizeof(long));
-
-	if (error || !req->newptr)
-		return (error);
-
-	error = SYSCTL_IN(req, arg1, sizeof(long));
-	return (error);
+	return sysctl_io_number(req, *(long*)arg1, sizeof(long), arg1, NULL);
 }
 
 /*
@@ -625,19 +1137,12 @@ sysctl_handle_long SYSCTL_HANDLER_ARGS
  */
 
 int
-sysctl_handle_quad SYSCTL_HANDLER_ARGS
+sysctl_handle_quad(__unused struct sysctl_oid *oidp, void *arg1,
+	__unused int arg2, struct sysctl_req *req)
 {
-	int error = 0;
-
 	if (!arg1)
 		return (EINVAL);
-	error = SYSCTL_OUT(req, arg1, sizeof(long long));
-
-	if (error || !req->newptr)
-		return (error);
-
-	error = SYSCTL_IN(req, arg1, sizeof(long long));
-	return (error);
+	return sysctl_io_number(req, *(long long*)arg1, sizeof(long long), arg1, NULL);
 }
 
 /*
@@ -648,7 +1153,8 @@ sysctl_handle_quad SYSCTL_HANDLER_ARGS
  * using ints.
  */
 int
-sysctl_handle_int2quad SYSCTL_HANDLER_ARGS
+sysctl_handle_int2quad(__unused struct sysctl_oid *oidp, void *arg1,
+	__unused int arg2, struct sysctl_req *req)
 {
 	int error = 0;
 	long long val;
@@ -686,24 +1192,10 @@ sysctl_handle_int2quad SYSCTL_HANDLER_ARGS
  */
 
 int
-sysctl_handle_string SYSCTL_HANDLER_ARGS
+sysctl_handle_string( __unused struct sysctl_oid *oidp, void *arg1, int arg2,
+        struct sysctl_req *req)
 {
-	int error=0;
-
-	error = SYSCTL_OUT(req, arg1, strlen((char *)arg1)+1);
-
-	if (error || !req->newptr)
-		return (error);
-
-	if ((req->newlen - req->newidx) >= arg2) {
-		error = EINVAL;
-	} else {
-		arg2 = (req->newlen - req->newidx);
-		error = SYSCTL_IN(req, arg1, arg2);
-		((char *)arg1)[arg2] = '\0';
-	}
-
-	return (error);
+	return sysctl_io_string(req, arg1, arg2, 0, NULL);
 }
 
 /*
@@ -712,35 +1204,26 @@ sysctl_handle_string SYSCTL_HANDLER_ARGS
  */
 
 int
-sysctl_handle_opaque SYSCTL_HANDLER_ARGS
+sysctl_handle_opaque(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
+        struct sysctl_req *req)
 {
-	int error;
-
-	error = SYSCTL_OUT(req, arg1, arg2);
-
-	if (error || !req->newptr)
-		return (error);
-
-	error = SYSCTL_IN(req, arg1, arg2);
-
-	return (error);
+	return sysctl_io_opaque(req, arg1, arg2, NULL);
 }
 
 /*
  * Transfer functions to/from kernel space.
  */
-static int
+STATIC int
 sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
 {
 	size_t i = 0;
-	int error = 0;
 
 	if (req->oldptr) {
 		i = l;
 		if (i > req->oldlen - req->oldidx)
 			i = req->oldlen - req->oldidx;
 		if (i > 0)
-			bcopy((void*)p, (char *)req->oldptr + req->oldidx, i);
+			bcopy((const void*)p, CAST_DOWN(char *, (req->oldptr + req->oldidx)), i);
 	}
 	req->oldidx += l;
 	if (req->oldptr && i != l)
@@ -748,14 +1231,14 @@ sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
 	return (0);
 }
 
-static int
+STATIC int
 sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l)
 {
 	if (!req->newptr)
 		return 0;
 	if (req->newlen - req->newidx < l)
 		return (EINVAL);
-	bcopy((char *)req->newptr + req->newidx, p, l);
+	bcopy(CAST_DOWN(char *, (req->newptr + req->newidx)), p, l);
 	req->newidx += l;
 	return (0);
 }
@@ -765,7 +1248,6 @@ kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldle
 {
 	int error = 0;
 	struct sysctl_req req;
-	funnel_t *fnl;
 
 	/*
 	 * Construct request.
@@ -775,46 +1257,17 @@ kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldle
 	if (oldlenp)
 		req.oldlen = *oldlenp;
 	if (old)
-		req.oldptr= old;
+		req.oldptr = CAST_USER_ADDR_T(old);
 	if (newlen) {
 		req.newlen = newlen;
-		req.newptr = new;
+		req.newptr = CAST_USER_ADDR_T(new);
 	}
 	req.oldfunc = sysctl_old_kernel;
 	req.newfunc = sysctl_new_kernel;
 	req.lock = 1;
 
-	/*
-	 * Locking.  Tree traversal always begins with the kernel funnel held.
-	 */
-	fnl = spl_kernel_funnel();
-
-	/* XXX this should probably be done in a general way */
-	while (memlock.sl_lock) {
-		memlock.sl_want = 1;
-		(void) tsleep((caddr_t)&memlock, PRIBIO+1, "sysctl", 0);
-		memlock.sl_locked++;
-	}
-	memlock.sl_lock = 1;
-
 	/* make the request */
-	error = sysctl_root(0, name, namelen, &req);
-
-	/* unlock memory if required */
-	if (req.lock == 2)
-		vsunlock(req.oldptr, req.oldlen, B_WRITE);
-
-	memlock.sl_lock = 0;
-
-	if (memlock.sl_want) {
-		memlock.sl_want = 0;
-		wakeup((caddr_t)&memlock);
-	}
-
-	/*
-	 * Undo locking.
-	 */
-	splx_kernel_funnel(fnl);
+	error = sysctl_root(TRUE, FALSE, NULL, 0, name, namelen, &req);
 
 	if (error && error != ENOMEM)
 		return (error);
@@ -828,7 +1281,7 @@ kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldle
 /*
  * Transfer function to/from user space.
  */
-static int
+STATIC int
 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 {
 	int error = 0;
@@ -841,8 +1294,7 @@ sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 		if (i > req->oldlen - req->oldidx)
 			i = req->oldlen - req->oldidx;
 		if (i > 0)
-			error = copyout((void*)p, (char *)req->oldptr + req->oldidx,
-					i);
+			error = copyout((const void*)p, (req->oldptr + req->oldidx), i);
 	}
 	req->oldidx += l;
 	if (error)
@@ -852,7 +1304,7 @@ sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 	return (0);
 }
 
-static int
+STATIC int
 sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
 {
 	int error;
@@ -861,7 +1313,7 @@ sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
 		return 0;
 	if (req->newlen - req->newidx < l)
 		return (EINVAL);
-	error = copyin((char *)req->newptr + req->newidx, p, l);
+	error = copyin((req->newptr + req->newidx), p, l);
 	req->newidx += l;
 	return (error);
 }
@@ -872,183 +1324,417 @@ sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
  */
 
 int
-sysctl_root SYSCTL_HANDLER_ARGS
+sysctl_root(boolean_t from_kernel, boolean_t string_is_canonical, char *namestring, size_t namestringlen, int *name, u_int namelen, struct sysctl_req *req)
 {
-	int *name = (int *) arg1;
-	u_int namelen = arg2;
-	int indx, i;
+	u_int indx;
+	int i;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *lsp = &sysctl__children;
+	sysctl_handler_t oid_handler = NULL;
 	int error;
+	boolean_t unlocked_node_found = FALSE;
+	boolean_t namestring_started = FALSE;
+
+	/* Get the read lock on the geometry */
+	lck_rw_lock_shared(sysctl_geometry_lock);
 
+	if (string_is_canonical) {
+		/* namestring is actually canonical, name/namelen needs to be populated */
+		error = name2oid(namestring, name, &namelen);
+		if (error) {
+			goto err;
+		}
+	}
+	
 	oid = SLIST_FIRST(lsp);
 
 	indx = 0;
 	while (oid && indx < CTL_MAXNAME) {
 		if (oid->oid_number == name[indx]) {
+			
+			if (!from_kernel && !string_is_canonical) {
+				if (namestring_started) {
+					if (strlcat(namestring, ".", namestringlen) >= namestringlen) {
+						error = ENAMETOOLONG;
+						goto err;
+					}
+				}
+
+				if (strlcat(namestring, oid->oid_name, namestringlen) >= namestringlen) {
+					error = ENAMETOOLONG;
+					goto err;
+				}
+				namestring_started = TRUE;
+			}
+			
 			indx++;
+			if (!(oid->oid_kind & CTLFLAG_LOCKED))
+			{
+				unlocked_node_found = TRUE;
+			}
 			if (oid->oid_kind & CTLFLAG_NOLOCK)
 				req->lock = 0;
+			/*
+			 * For SYSCTL_PROC() functions which are for sysctl's
+			 * which have parameters at the end of their OID
+			 * space, you need to OR CTLTYPE_NODE into their
+			 * access value.
+			 *
+			 * NOTE: For binary backward compatibility ONLY! Do
+			 * NOT add new sysctl's that do this!  Existing
+			 * sysctl's which do this will eventually have
+			 * compatibility code in user space, and this method
+			 * will become unsupported.
+			 */
 			if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 				if (oid->oid_handler)
 					goto found;
 				if (indx == namelen)
-					return ENOENT;
+				{
+					error = ENOENT;
+					goto err;
+				}
+
 				lsp = (struct sysctl_oid_list *)oid->oid_arg1;
 				oid = SLIST_FIRST(lsp);
 			} else {
 				if (indx != namelen)
-					return EISDIR;
+				{
+					error = EISDIR;
+					goto err;
+				}
 				goto found;
 			}
 		} else {
 			oid = SLIST_NEXT(oid, oid_link);
 		}
 	}
-	return ENOENT;
+	error = ENOENT;
+	goto err;
 found:
+	
+	/*
+	 * indx is the index of the first remaining OID name,
+	 * for sysctls that take them as arguments
+	 */
+	if (!from_kernel && !string_is_canonical && (indx < namelen)) {
+		char tempbuf[10];
+		u_int indx2;
+		
+		for (indx2 = indx; indx2 < namelen; indx2++) {
+			snprintf(tempbuf, sizeof(tempbuf), "%d",name[indx2]);
+			
+			if (namestring_started) {
+				if (strlcat(namestring, ".", namestringlen) >= namestringlen) {
+					error = ENAMETOOLONG;
+					goto err;
+				}
+			}
+			
+			if (strlcat(namestring, tempbuf, namestringlen) >= namestringlen) {
+				error = ENAMETOOLONG;
+				goto err;
+			}
+			namestring_started = TRUE;
+		}
+	}
+	
 	/* If writing isn't allowed */
 	if (req->newptr && (!(oid->oid_kind & CTLFLAG_WR) ||
 			    ((oid->oid_kind & CTLFLAG_SECURE) && securelevel > 0))) {
-		return (EPERM);
+		error = (EPERM);
+		goto err;
 	}
 
 	/*
 	 * If we're inside the kernel, the OID must be marked as kernel-valid.
-	 * XXX This mechanism for testing is bad.
 	 */
-	if ((req->oldfunc == sysctl_old_kernel) && !(oid->oid_kind & CTLFLAG_KERN))
-		return(EPERM);
+	if (from_kernel && !(oid->oid_kind & CTLFLAG_KERN))
+	{
+		error = (EPERM);
+		goto err;
+	}
 
-	/* Most likely only root can write */
+	/*
+	 * This is where legacy enforcement of permissions occurs.  If the
+	 * flag does not say CTLFLAG_ANYBODY, then we prohibit anyone but
+	 * root from writing new values down.  If local enforcement happens
+	 * at the leaf node, then it needs to be set as CTLFLAG_ANYBODY.  In
+	 * addition, if the leaf node is set this way, then in order to do
+	 * specific enforcement, it has to be of type SYSCTL_PROC.
+	 */
 	if (!(oid->oid_kind & CTLFLAG_ANYBODY) &&
 	    req->newptr && req->p &&
-	    (error = suser(req->p->p_ucred, &req->p->p_acflag)))
-		return (error);
+	    (error = proc_suser(req->p)))
+		goto err;
 
-	if (!oid->oid_handler) {
-	    return EINVAL;
+	/*
+	 * sysctl_unregister_oid() may change the handler value, so grab it
+	 * under the lock.
+	 */
+	oid_handler = oid->oid_handler;
+	if (!oid_handler) {
+	    error = EINVAL;
+		goto err;
 	}
 
 	/*
-	 * Switch to the NETWORK funnel for CTL_NET and KERN_IPC sysctls
+	 * Reference the OID and drop the geometry lock; this prevents the
+	 * OID from being deleted out from under the handler call, but does
+	 * not prevent other calls into handlers or calls to manage the
+	 * geometry elsewhere from blocking...
+	 */
+	OSAddAtomic(1, &oid->oid_refcnt);
+
+	lck_rw_done(sysctl_geometry_lock);
+
+#if CONFIG_MACF
+	if (!from_kernel) {
+		error = mac_system_check_sysctlbyname(kauth_cred_get(),
+						      namestring,
+						      name,
+						      namelen,
+						      req->oldptr,
+						      req->oldlen,
+						      req->newptr,
+						      req->newlen);
+		if (error)
+			goto dropref;
+	}
+#endif
+	
+	/*
+	 * ...however, we still have to grab the mutex for those calls which
+	 * may be into code whose reentrancy is protected by it.
 	 */
+	if (unlocked_node_found)
+	{
+		lck_mtx_lock(sysctl_unlocked_node_lock);
+	}
 
-	if (((name[0] == CTL_NET) || ((name[0] == CTL_KERN) &&
-						       (name[1] == KERN_IPC))))
-	     thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
 
 	if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
-		i = (oid->oid_handler) (oid,
-					name + indx, namelen - indx,
-					req);
+		i = oid_handler(oid, name + indx, namelen - indx, req);
 	} else {
-		i = (oid->oid_handler) (oid,
-					oid->oid_arg1, oid->oid_arg2,
-					req);
+		i = oid_handler(oid, oid->oid_arg1, oid->oid_arg2, req);
+	}
+	error = i;
+
+	if (unlocked_node_found)
+	{
+		lck_mtx_unlock(sysctl_unlocked_node_lock);
 	}
 
+#if CONFIG_MACF
+	/* only used from another CONFIG_MACF block */
+dropref:
+#endif
+
 	/*
-	 * Switch back to the KERNEL funnel, if necessary
+	 * This is tricky... we re-grab the geometry lock in order to drop
+	 * the reference and wake on the address; since the geometry
+	 * lock is a reader/writer lock rather than a mutex, we have to
+	 * wake on all apparent 1->0 transitions.  This abuses the drop
+	 * after the reference decrement in order to wake any lck_rw_sleep()
+	 * in progress in sysctl_unregister_oid() that slept because of a
+	 * non-zero reference count.
+	 *
+	 * Note:	OSAddAtomic() is defined to return the previous value;
+	 *		we use this and the fact that the lock itself is a
+	 *		barrier to avoid waking every time through on "hot"
+	 *		OIDs.
 	 */
+	lck_rw_lock_shared(sysctl_geometry_lock);
+	if (OSAddAtomic(-1, &oid->oid_refcnt) == 1)
+		wakeup(&oid->oid_refcnt);
 
-	if (((name[0] == CTL_NET) || ((name[0] == CTL_KERN) &&
-						       (name[1] == KERN_IPC))))
-	     thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-
-	return (i);
+err:
+	lck_rw_done(sysctl_geometry_lock);
+	return (error);
 }
 
-#ifndef _SYS_SYSPROTO_H_
-struct sysctl_args {
-	int	*name;
-	u_int	namelen;
-	void	*old;
-	size_t	*oldlenp;
-	void	*new;
-	size_t	newlen;
-};
-#endif
+void sysctl_create_user_req(struct sysctl_req *req, struct proc *p, user_addr_t oldp,
+							size_t oldlen, user_addr_t newp, size_t newlen)
+{
+	bzero(req, sizeof(*req));
+	
+	req->p = p;
+	
+	req->oldlen = oldlen;
+	req->oldptr = oldp;
+	
+	if (newlen) {
+		req->newlen = newlen;
+		req->newptr = newp;
+	}
+	
+	req->oldfunc = sysctl_old_user;
+	req->newfunc = sysctl_new_user;
+	req->lock = 1;
+
+	return;
+}
 
 int
-/* __sysctl(struct proc *p, struct sysctl_args *uap) */
-new_sysctl(struct proc *p, struct sysctl_args *uap)
+sysctl(proc_t p, struct sysctl_args *uap, __unused int32_t *retval)
 {
-	int error, i, name[CTL_MAXNAME];
-	size_t j;
-
+	int error;
+	size_t oldlen = 0, newlen;
+	int name[CTL_MAXNAME];
+	struct sysctl_req req;
+	char *namestring;
+	size_t namestringlen = MAXPATHLEN;
+	
+	/*
+	 * all top-level sysctl names are non-terminal
+	 */
 	if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
 		return (EINVAL);
-
- 	error = copyin(uap->name, &name, uap->namelen * sizeof(int));
- 	if (error)
+	error = copyin(uap->name, &name[0], uap->namelen * sizeof(int));
+	if (error)
 		return (error);
+	
+	AUDIT_ARG(ctlname, name, uap->namelen);
+	
+	if (uap->newlen > SIZE_T_MAX)
+		return (EINVAL);
+	newlen = (size_t)uap->newlen;
+	
+	if (uap->oldlenp != USER_ADDR_NULL) {
+		uint64_t	oldlen64 = fuulong(uap->oldlenp);
 
-	error = userland_sysctl(p, name, uap->namelen,
-		uap->old, uap->oldlenp, 0,
-		uap->new, uap->newlen, &j);
-	if (error && error != ENOMEM)
-		return (error);
-	if (uap->oldlenp) {
-		i = copyout(&j, uap->oldlenp, sizeof(j));
-		if (i)
-			return (i);
+		/*
+		 * If more than 4G, clamp to 4G
+		 */
+		if (oldlen64 > SIZE_T_MAX)
+			oldlen = SIZE_T_MAX;
+		else
+			oldlen = (size_t)oldlen64;
 	}
+	
+	sysctl_create_user_req(&req, p, uap->old, oldlen, uap->new, newlen);
+
+	/* Guess that longest length for the passed-in MIB, if we can be more aggressive than MAXPATHLEN */
+	if (uap->namelen == 2) {
+		if (name[0] == CTL_KERN && name[1] < KERN_MAXID) {
+			namestringlen = 32; /* "kern.speculative_reads_disabled" */
+		} else if (name[0] == CTL_HW && name[1] < HW_MAXID) {
+			namestringlen = 32; /* "hw.cachelinesize_compat" */
+		}
+	}			
+
+	MALLOC(namestring, char *, namestringlen, M_TEMP, M_WAITOK);
+	if (!namestring) {
+	    oldlen = 0;
+	    goto err;
+	}
+
+	error = userland_sysctl(FALSE, namestring, namestringlen, name, uap->namelen, &req, &oldlen);
+	
+	FREE(namestring, M_TEMP);
+	
+	if ((error) && (error != ENOMEM))
+		return (error);
+	
+err:
+	if (uap->oldlenp != USER_ADDR_NULL)
+		error = suulong(uap->oldlenp, oldlen);
+	
 	return (error);
 }
 
-/*
- * This is used from various compatibility syscalls too.  That's why name
- * must be in kernel space.
- */
 int
-userland_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval)
+sysctlbyname(proc_t p, struct sysctlbyname_args *uap, __unused int32_t *retval)
 {
-	int error = 0;
-	struct sysctl_req req, req2;
-
-	bzero(&req, sizeof req);
+	int error;
+	size_t oldlen = 0, newlen;
+	char *name;
+	size_t namelen = 0;
+	struct sysctl_req req;
+	int oid[CTL_MAXNAME];
 
-	req.p = p;
+	if (uap->namelen >= MAXPATHLEN)	/* XXX arbitrary, undocumented */
+		return (ENAMETOOLONG);
+	namelen = (size_t)uap->namelen;
+	
+	MALLOC(name, char *, namelen+1, M_TEMP, M_WAITOK);
+	if (!name)
+	    return ENOMEM;
 
-	if (oldlenp) {
-		if (inkernel) {
-			req.oldlen = *oldlenp;
-		} else {
-			error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp));
-			if (error)
-				return (error);
-		}
+	error = copyin(uap->name, name, namelen);
+	if (error) {
+		FREE(name, M_TEMP);
+		return (error);
 	}
+	name[namelen] = '\0';
 
-	if (old) {
-		req.oldptr= old;
+	/* XXX
+	 * AUDIT_ARG(ctlname, name, uap->namelen);
+	 */
+	
+	if (uap->newlen > SIZE_T_MAX) {
+		FREE(name, M_TEMP);
+		return (EINVAL);
 	}
-
-	if (newlen) {
-		req.newlen = newlen;
-		req.newptr = new;
+	newlen = (size_t)uap->newlen;
+	
+	if (uap->oldlenp != USER_ADDR_NULL) {
+		uint64_t	oldlen64 = fuulong(uap->oldlenp);
+		
+		/*
+		 * If more than 4G, clamp to 4G
+		 */
+		if (oldlen64 > SIZE_T_MAX)
+			oldlen = SIZE_T_MAX;
+		else
+			oldlen = (size_t)oldlen64;
 	}
+	
+	sysctl_create_user_req(&req, p, uap->old, oldlen, uap->new, newlen);
 
-	req.oldfunc = sysctl_old_user;
-	req.newfunc = sysctl_new_user;
-	req.lock = 1;
+	error = userland_sysctl(TRUE, name, namelen+1, oid, CTL_MAXNAME, &req, &oldlen);
+	
+	FREE(name, M_TEMP);
+
+	if ((error) && (error != ENOMEM))
+		return (error);
+	
+	if (uap->oldlenp != USER_ADDR_NULL)
+		error = suulong(uap->oldlenp, oldlen);
+	
+	return (error);
+}
+
+/*
+ * This is used from various compatibility syscalls too.  That's why name
+ * must be in kernel space.
+ */
+int
+userland_sysctl(boolean_t string_is_canonical,
+				char *namestring, size_t namestringlen,
+				int *name, u_int namelen, struct sysctl_req *req,
+                size_t *retval)
+{
+	int error = 0;
+	struct sysctl_req req2;
 
 	do {
-	    req2 = req;
-	    error = sysctl_root(0, name, namelen, &req2);
-	} while (error == EAGAIN);
+	    /* if EAGAIN, reset output cursor */
+	    req2 = *req;
+	    if (!string_is_canonical)
+	        namestring[0] = '\0';
 
-	req = req2;
+	    error = sysctl_root(FALSE, string_is_canonical, namestring, namestringlen, name, namelen, &req2);
+	} while (error == EAGAIN);
 
 	if (error && error != ENOMEM)
 		return (error);
 
 	if (retval) {
-		if (req.oldptr && req.oldidx > req.oldlen)
-			*retval = req.oldlen;
+		if (req2.oldptr && req2.oldidx > req2.oldlen)
+			*retval = req2.oldlen;
 		else
-			*retval = req.oldidx;
+			*retval = req2.oldidx;
 	}
 	return (error);
 }
@@ -1061,46 +1747,29 @@ userland_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *old
  *
  * Note that some sysctl handlers use copyin/copyout, which
  * may not work correctly.
+ *
+ * The "sysctlbyname" KPI for use by kexts is aliased to this function.
  */
 
-static int
-sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen)
-{
-
-	return(kernel_sysctl(current_proc(), name, namelen, oldp, oldlenp, newp, newlen));
-}
-
-static int
-sysctlnametomib(const char *name, int *mibp, size_t *sizep)
-{
-	int oid[2];
-	int error;
-
-	/* magic service node */
-	oid[0] = 0;
-	oid[1] = 3;
-
-	/* look up OID for name */
-	*sizep *= sizeof(int);
-	error = sysctl(oid, 2, mibp, sizep, (void *)name, strlen(name));
-	*sizep /= sizeof(int);
-	return(error);
-}
-
 int
-sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
-	int oid[CTL_MAXNAME + 2];
+	int oid[CTL_MAXNAME];
+	int name2mib_oid[2];
 	int error;
 	size_t oidlen;
 
-	/* look up the OID */
-	oidlen = CTL_MAXNAME;
-	error = sysctlnametomib(name, oid, &oidlen);
+	/* look up the OID with magic service node */
+	name2mib_oid[0] = 0;
+	name2mib_oid[1] = 3;
 
+	oidlen = sizeof(oid);
+	error = kernel_sysctl(current_proc(), name2mib_oid, 2, oid, &oidlen, __DECONST(void *, name), strlen(name));
+	oidlen /= sizeof(int);
+	
 	/* now use the OID */
 	if (error == 0)
-		error = sysctl(oid, oidlen, oldp, oldlenp, newp, newlen);
+		error = kernel_sysctl(current_proc(), oid, oidlen, oldp, oldlenp, newp, newlen);
 	return(error);
 }