]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/kern_newsysctl.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / bsd / kern / kern_newsysctl.c
index d24a34b5139f7cc34bcb14eb5827247789cd7983..07cd0e08289a3ee91bb4f927abb15ae168c66b7f 100644 (file)
@@ -1,25 +1,31 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
- * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-/*-
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ *
+ *
  * Copyright (c) 1982, 1986, 1989, 1993
  *     The Regents of the University of California.  All rights reserved.
  *
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
-#include <sys/proc.h>
+#include <sys/proc_internal.h>
+#include <sys/kauth.h>
 #include <sys/systm.h>
+#include <sys/sysproto.h>
 
-/*
-struct sysctl_oid_list sysctl__debug_children;
-struct sysctl_oid_list sysctl__kern_children;
-struct sysctl_oid_list sysctl__net_children;
-struct sysctl_oid_list sysctl__sysctl_children;
-*/
-
-extern struct sysctl_oid *newsysctl_list[];
+#include <security/audit/audit.h>
+#include <pexpert/pexpert.h>
 
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
 
-static void
-sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i);
-
+#if defined(HAS_APPLE_PAC)
+#include <ptrauth.h>
+#endif /* defined(HAS_APPLE_PAC) */
 
+lck_grp_t * sysctl_lock_group = NULL;
+lck_rw_t * sysctl_geometry_lock = NULL;
+lck_mtx_t * sysctl_unlocked_node_lock = NULL;
 
 /*
- * Locking and stats
+ * Conditionally allow dtrace to see these functions for debugging purposes.
  */
-static struct sysctl_lock {
-       int     sl_lock;
-       int     sl_want;
-       int     sl_locked;
-} memlock;
+#ifdef STATIC
+#undef STATIC
+#endif
+#if 0
+#define STATIC
+#else
+#define STATIC static
+#endif
 
-static int sysctl_root SYSCTL_HANDLER_ARGS;
+/* forward declarations  of static functions */
+STATIC void sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i);
+STATIC int sysctl_sysctl_debug(struct sysctl_oid *oidp, void *arg1,
+    int arg2, struct sysctl_req *req);
+STATIC int sysctl_sysctl_name(struct sysctl_oid *oidp, void *arg1,
+    int arg2, struct sysctl_req *req);
+STATIC int sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp,
+    int *name, u_int namelen, int *next, int *len, int level,
+    struct sysctl_oid **oidpp);
+STATIC int sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l);
+STATIC int sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l);
+STATIC int name2oid(char *name, int *oid, u_int *len);
+STATIC int sysctl_sysctl_name2oid(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_sysctl_next(struct sysctl_oid *oidp, void *arg1, int arg2,
+    struct sysctl_req *req);
+STATIC int sysctl_sysctl_oidfmt(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_old_user(struct sysctl_req *req, const void *p, size_t l);
+STATIC int sysctl_new_user(struct sysctl_req *req, void *p, size_t l);
+
+STATIC void sysctl_create_user_req(struct sysctl_req *req, struct proc *p, user_addr_t oldp,
+    size_t oldlen, user_addr_t newp, size_t newlen);
+STATIC int sysctl_root(boolean_t from_kernel, boolean_t string_is_canonical, char *namestring, size_t namestringlen, int *name, u_int namelen, struct sysctl_req *req);
+
+int     kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen);
+int     kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+int     userland_sysctl(boolean_t string_is_canonical,
+    char *namestring, size_t namestringlen,
+    int *name, u_int namelen, struct sysctl_req *req,
+    size_t *retval);
 
 struct sysctl_oid_list sysctl__children; /* root list */
 
@@ -103,100 +142,432 @@ struct sysctl_oid_list sysctl__children; /* root list */
  * Order by number in each list.
  */
 
-void sysctl_register_oid(struct sysctl_oid *oidp)
+void
+sysctl_register_oid(struct sysctl_oid *new_oidp)
 {
-       struct sysctl_oid_list *parent = oidp->oid_parent;
+       struct sysctl_oid *oidp = NULL;
+       struct sysctl_oid_list *parent = new_oidp->oid_parent;
        struct sysctl_oid *p;
        struct sysctl_oid *q;
        int n;
 
+       /*
+        * The OID can be old-style (needs copy), new style without an earlier
+        * version (also needs copy), or new style with a matching version (no
+        * copy needed).  Later versions are rejected (presumably, the OID
+        * structure was changed for a necessary reason).
+        */
+       if (!(new_oidp->oid_kind & CTLFLAG_OID2)) {
+               /*
+                * XXX: M_TEMP is perhaps not the most apropriate zone, as it
+                * XXX: will subject us to use-after-free by other consumers.
+                */
+               MALLOC(oidp, struct sysctl_oid *, sizeof(*oidp), M_TEMP, M_WAITOK | M_ZERO);
+               if (oidp == NULL) {
+                       return;         /* reject: no memory */
+               }
+               /*
+                * Copy the structure only through the oid_fmt field, which
+                * is the last field in a non-OID2 OID structure.
+                *
+                * Note:        We may want to set the oid_descr to the
+                *              oid_name (or "") at some future date.
+                */
+               memcpy(oidp, new_oidp, offsetof(struct sysctl_oid, oid_descr));
+       } else {
+               /* It's a later version; handle the versions we know about */
+               switch (new_oidp->oid_version) {
+               case SYSCTL_OID_VERSION:
+                       /* current version */
+                       oidp = new_oidp;
+                       break;
+               default:
+                       return;                 /* rejects unknown version */
+               }
+       }
+
+       /* Get the write lock to modify the geometry */
+       lck_rw_lock_exclusive(sysctl_geometry_lock);
+
        /*
         * If this oid has a number OID_AUTO, give it a number which
         * is greater than any current oid.  Make sure it is at least
-        * 100 to leave space for pre-assigned oid numbers.
+        * OID_AUTO_START to leave space for pre-assigned oid numbers.
         */
-/*     sysctl_sysctl_debug_dump_node(parent, 3); */
        if (oidp->oid_number == OID_AUTO) {
-               /* First, find the highest oid in the parent list >99 */
-               n = 99;
+               /* First, find the highest oid in the parent list >OID_AUTO_START-1 */
+               n = OID_AUTO_START;
                SLIST_FOREACH(p, parent, oid_link) {
-                       if (p->oid_number > n)
+                       if (p->oid_number > n) {
                                n = p->oid_number;
+                       }
                }
                oidp->oid_number = n + 1;
+               /*
+                * Reflect the number in an llocated OID into the template
+                * of the caller for sysctl_unregister_oid() compares.
+                */
+               if (oidp != new_oidp) {
+                       new_oidp->oid_number = oidp->oid_number;
+               }
+       }
+
+#if defined(HAS_APPLE_PAC)
+       if (oidp->oid_handler) {
+               /*
+                * Dereference function-pointer-signed oid_handler to prevent an
+                * attacker with the ability to observe the result of the
+                * auth_and_resign below from trying all possible inputs until an auth
+                * succeeds.
+                */
+               if (__builtin_expect(!*(uintptr_t*)ptrauth_auth_data((void*)
+                   oidp->oid_handler, ptrauth_key_function_pointer, 0), 0)) {
+                       /*
+                        * This is necessary to force the dereference but will never
+                        * actually be reached, dereferencing an invalidly signed pointer
+                        * will trap before getting here (and the codegen is nicer than
+                        * with a panic).
+                        */
+                       __builtin_trap();
+               }
+               /*
+                * Sign oid_handler address-discriminated upon installation to make it
+                * harder to replace with an arbitrary function pointer.
+                */
+               oidp->oid_handler = ptrauth_auth_and_resign(oidp->oid_handler,
+                   ptrauth_key_function_pointer, 0, ptrauth_key_function_pointer,
+                   ptrauth_blend_discriminator(&oidp->oid_handler,
+                   ptrauth_string_discriminator("oid_handler")));
        }
+#endif /* defined(HAS_APPLE_PAC) */
 
        /*
         * Insert the oid into the parent's list in order.
         */
        q = NULL;
        SLIST_FOREACH(p, parent, oid_link) {
-               if (oidp->oid_number < p->oid_number)
+               if (oidp->oid_number == p->oid_number) {
+                       panic("attempting to register a sysctl at previously registered slot : %d", oidp->oid_number);
+               } else if (oidp->oid_number < p->oid_number) {
                        break;
+               }
                q = p;
        }
-       if (q)
+       if (q) {
                SLIST_INSERT_AFTER(q, oidp, oid_link);
-       else
+       } else {
                SLIST_INSERT_HEAD(parent, oidp, oid_link);
+       }
+
+       /* Release the write lock */
+       lck_rw_unlock_exclusive(sysctl_geometry_lock);
 }
 
-void sysctl_unregister_oid(struct sysctl_oid *oidp)
+void
+sysctl_unregister_oid(struct sysctl_oid *oidp)
 {
-       SLIST_REMOVE(oidp->oid_parent, oidp, sysctl_oid, oid_link);
+       struct sysctl_oid *removed_oidp = NULL; /* OID removed from tree */
+       struct sysctl_oid *old_oidp = NULL;     /* OID compatibility copy */
+
+       /* Get the write lock to modify the geometry */
+       lck_rw_lock_exclusive(sysctl_geometry_lock);
+
+       if (!(oidp->oid_kind & CTLFLAG_OID2)) {
+               /*
+                * We're using a copy so we can get the new fields in an
+                * old structure, so we have to iterate to compare the
+                * partial structure; when we find a match, we remove it
+                * normally and free the memory.
+                */
+               SLIST_FOREACH(old_oidp, oidp->oid_parent, oid_link) {
+                       if (!memcmp(&oidp->oid_number, &old_oidp->oid_number, (offsetof(struct sysctl_oid, oid_descr) - offsetof(struct sysctl_oid, oid_number)))) {
+                               break;
+                       }
+               }
+               if (old_oidp != NULL) {
+                       SLIST_REMOVE(old_oidp->oid_parent, old_oidp, sysctl_oid, oid_link);
+                       removed_oidp = old_oidp;
+               }
+       } else {
+               /* It's a later version; handle the versions we know about */
+               switch (oidp->oid_version) {
+               case SYSCTL_OID_VERSION:
+                       /* We can just remove the OID directly... */
+                       SLIST_REMOVE(oidp->oid_parent, oidp, sysctl_oid, oid_link);
+                       removed_oidp = oidp;
+                       break;
+               default:
+                       /* XXX: Can't happen; probably tree coruption.*/
+                       break;                  /* rejects unknown version */
+               }
+       }
+
+#if defined(HAS_APPLE_PAC)
+       if (removed_oidp && removed_oidp->oid_handler && old_oidp == NULL) {
+               /*
+                * Revert address-discriminated signing performed by
+                * sysctl_register_oid() (in case this oid is registered again).
+                */
+               removed_oidp->oid_handler = ptrauth_auth_function(removed_oidp->oid_handler,
+                   ptrauth_key_function_pointer,
+                   ptrauth_blend_discriminator(&removed_oidp->oid_handler,
+                   ptrauth_string_discriminator("oid_handler")));
+               /*
+                * Dereference the function-pointer-signed result to prevent an
+                * attacker with the ability to observe the result of the
+                * auth_and_resign above from trying all possible inputs until an auth
+                * succeeds.
+                */
+               if (__builtin_expect(!*(uintptr_t*)ptrauth_auth_data((void*)
+                   removed_oidp->oid_handler, ptrauth_key_function_pointer, 0), 0)) {
+                       /*
+                        * This is necessary to force the dereference but will never
+                        * actually be reached, dereferencing an invalidly signed pointer
+                        * will trap before getting here (and the codegen is nicer than
+                        * with a panic).
+                        */
+                       __builtin_trap();
+               }
+       }
+#endif /* defined(HAS_APPLE_PAC) */
+
+       /*
+        * We've removed it from the list at this point, but we don't want
+        * to return to the caller until all handler references have drained
+        * out.  Doing things in this order prevent other people coming in
+        * and starting new operations against the OID node we want removed.
+        *
+        * Note:        oidp could be NULL if it wasn't found.
+        */
+       while (removed_oidp && removed_oidp->oid_refcnt) {
+               lck_rw_sleep(sysctl_geometry_lock, LCK_SLEEP_EXCLUSIVE, &removed_oidp->oid_refcnt, THREAD_UNINT);
+       }
+
+       /* Release the write lock */
+       lck_rw_unlock_exclusive(sysctl_geometry_lock);
+
+       /* If it was allocated, free it after dropping the lock */
+       if (old_oidp != NULL) {
+               FREE(old_oidp, M_TEMP);
+       }
 }
 
 /*
  * Bulk-register all the oids in a linker_set.
  */
-void sysctl_register_set(struct linker_set *lsp)
+void
+sysctl_register_set(const char *set)
 {
-       int count = lsp->ls_length;
-       int i;
-       for (i = 0; i < count; i++)
-               sysctl_register_oid((struct sysctl_oid *) lsp->ls_items[i]);
+       struct sysctl_oid **oidpp, *oidp;
+
+       LINKER_SET_FOREACH(oidpp, struct sysctl_oid **, set) {
+               oidp = *oidpp;
+               if (!(oidp->oid_kind & CTLFLAG_NOAUTO)) {
+                       sysctl_register_oid(oidp);
+               }
+       }
 }
 
-void sysctl_unregister_set(struct linker_set *lsp)
+void
+sysctl_unregister_set(const char *set)
 {
-       int count = lsp->ls_length;
-       int i;
-       for (i = 0; i < count; i++)
-               sysctl_unregister_oid((struct sysctl_oid *) lsp->ls_items[i]);
+       struct sysctl_oid **oidpp, *oidp;
+
+       LINKER_SET_FOREACH(oidpp, struct sysctl_oid **, set) {
+               oidp = *oidpp;
+               if (!(oidp->oid_kind & CTLFLAG_NOAUTO)) {
+                       sysctl_unregister_oid(oidp);
+               }
+       }
 }
 
+/*
+ * Exported in BSDKernel.exports, kept for binary compatibility
+ */
+#if defined(__x86_64__)
+void
+sysctl_register_fixed(void)
+{
+}
+#endif
 
 /*
- * Register OID's from fixed list
+ * Register the kernel's oids on startup.
  */
 
-void sysctl_register_fixed()
+void
+sysctl_early_init(void)
 {
-    int i = 0;
+       /*
+        * Initialize the geometry lock for reading/modifying the
+        * sysctl tree. This is done here because IOKit registers
+        * some sysctl's before bsd_init() would otherwise perform
+        * subsystem initialization.
+        */
 
+       sysctl_lock_group  = lck_grp_alloc_init("sysctl", NULL);
+       sysctl_geometry_lock = lck_rw_alloc_init(sysctl_lock_group, NULL);
+       sysctl_unlocked_node_lock = lck_mtx_alloc_init(sysctl_lock_group, NULL);
 
-    while (newsysctl_list[i]) {
-/*     printf("Registering %d\n", i); */
-       sysctl_register_oid(newsysctl_list[i++]);
-    }
+       sysctl_register_set("__sysctl_set");
+       sysctl_load_devicetree_entries();
 }
 
 /*
- * Register the kernel's oids on startup.
+ * New handler interface
+ *   If the sysctl caller (user mode or kernel mode) is interested in the
+ *   value (req->oldptr != NULL), we copy the data (bigValue etc.) out,
+ *   if the caller wants to set the value (req->newptr), we copy
+ *   the data in (*pValue etc.).
  */
-struct linker_set sysctl_set;
 
-void sysctl_register_all(void *arg)
+int
+sysctl_io_number(struct sysctl_req *req, long long bigValue, size_t valueSize, void *pValue, int *changed)
 {
-       sysctl_register_set(&sysctl_set);
+       int             smallValue;
+       int             error;
+
+       if (changed) {
+               *changed = 0;
+       }
+
+       /*
+        * Handle the various combinations of caller buffer size and
+        * data value size.  We are generous in the case where the
+        * caller has specified a 32-bit buffer but the value is 64-bit
+        * sized.
+        */
+
+       /* 32 bit value expected or 32 bit buffer offered */
+       if (((valueSize == sizeof(int)) ||
+           ((req->oldlen == sizeof(int)) && (valueSize == sizeof(long long))))
+           && (req->oldptr)) {
+               smallValue = (int)bigValue;
+               if ((long long)smallValue != bigValue) {
+                       return ERANGE;
+               }
+               error = SYSCTL_OUT(req, &smallValue, sizeof(smallValue));
+       } else {
+               /* any other case is either size-equal or a bug */
+               error = SYSCTL_OUT(req, &bigValue, valueSize);
+       }
+       /* error or nothing to set */
+       if (error || !req->newptr) {
+               return error;
+       }
+
+       /* set request for constant */
+       if (pValue == NULL) {
+               return EPERM;
+       }
+
+       /* set request needs to convert? */
+       if ((req->newlen == sizeof(int)) && (valueSize == sizeof(long long))) {
+               /* new value is 32 bits, upconvert to 64 bits */
+               error = SYSCTL_IN(req, &smallValue, sizeof(smallValue));
+               if (!error) {
+                       *(long long *)pValue = (long long)smallValue;
+               }
+       } else if ((req->newlen == sizeof(long long)) && (valueSize == sizeof(int))) {
+               /* new value is 64 bits, downconvert to 32 bits and range check */
+               error = SYSCTL_IN(req, &bigValue, sizeof(bigValue));
+               if (!error) {
+                       smallValue = (int)bigValue;
+                       if ((long long)smallValue != bigValue) {
+                               return ERANGE;
+                       }
+                       *(int *)pValue = smallValue;
+               }
+       } else {
+               /* sizes match, just copy in */
+               error = SYSCTL_IN(req, pValue, valueSize);
+       }
+       if (!error && changed) {
+               *changed = 1;
+       }
+       return error;
 }
 
-SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_ANY, sysctl_register_all, 0);
+int
+sysctl_io_string(struct sysctl_req *req, char *pValue, size_t valueSize, int trunc, int *changed)
+{
+       int error;
+
+       if (changed) {
+               *changed = 0;
+       }
+
+       if (trunc && req->oldptr && req->oldlen && (req->oldlen < strlen(pValue) + 1)) {
+               /* If trunc != 0, if you give it a too small (but larger than
+                * 0 bytes) buffer, instead of returning ENOMEM, it truncates the
+                * returned string to the buffer size.  This preserves the semantics
+                * of some library routines implemented via sysctl, which truncate
+                * their returned data, rather than simply returning an error. The
+                * returned string is always nul (ascii '\0') terminated. */
+               error = SYSCTL_OUT(req, pValue, req->oldlen - 1);
+               if (!error) {
+                       char c = '\0';
+                       error = SYSCTL_OUT(req, &c, 1);
+               }
+       } else {
+               /* Copy string out */
+               error = SYSCTL_OUT(req, pValue, strlen(pValue) + 1);
+       }
+
+       /* error or no new value */
+       if (error || !req->newptr) {
+               return error;
+       }
+
+       /* attempt to set read-only value */
+       if (valueSize == 0) {
+               return EPERM;
+       }
+
+       /* make sure there's room for the new string */
+       if (req->newlen >= valueSize) {
+               return EINVAL;
+       }
+
+       /* copy the string in and force nul termination */
+       error = SYSCTL_IN(req, pValue, req->newlen);
+       pValue[req->newlen] = '\0';
+
+       if (!error && changed) {
+               *changed = 1;
+       }
+       return error;
+}
+
+int
+sysctl_io_opaque(struct sysctl_req *req, void *pValue, size_t valueSize, int *changed)
+{
+       int error;
+
+       if (changed) {
+               *changed = 0;
+       }
+
+       /* Copy blob out */
+       error = SYSCTL_OUT(req, pValue, valueSize);
+
+       /* error or nothing to set */
+       if (error || !req->newptr) {
+               return error;
+       }
+
+       error = SYSCTL_IN(req, pValue, valueSize);
+
+       if (!error && changed) {
+               *changed = 1;
+       }
+       return error;
+}
 
 /*
  * "Staff-functions"
  *
- * These functions implement a presently undocumented interface 
+ * These functions implement a presently undocumented interface
  * used by the sysctl program to walk the tree, and get the type
  * so it can print the value.
  * This interface is under work and consideration, and should probably
@@ -211,112 +582,250 @@ SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_ANY, sysctl_register_all, 0);
  * {0,4,...}   return the kind & format info for the "..." OID.
  */
 
-static void
+/*
+ * sysctl_sysctl_debug_dump_node
+ *
+ * Description:        Dump debug information for a given sysctl_oid_list at the
+ *             given oid depth out to the kernel log, via printf
+ *
+ * Parameters: l                               sysctl_oid_list pointer
+ *             i                               current node depth
+ *
+ * Returns:    (void)
+ *
+ * Implicit:   kernel log, modified
+ *
+ * Locks:      Assumes sysctl_geometry_lock is held prior to calling
+ *
+ * Notes:      This function may call itself recursively to resolve Node
+ *             values, which potentially have an inferioer sysctl_oid_list
+ *
+ *             This function is only callable indirectly via the function
+ *             sysctl_sysctl_debug()
+ *
+ * Bugs:       The node depth indentation does not work; this may be an
+ *             artifact of leading space removal by the log daemon itself
+ *             or some intermediate routine.
+ */
+STATIC void
 sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
 {
        int k;
        struct sysctl_oid *oidp;
 
        SLIST_FOREACH(oidp, l, oid_link) {
-
-               for (k=0; k<i; k++)
+               for (k = 0; k < i; k++) {
                        printf(" ");
+               }
 
                printf("%d %s ", oidp->oid_number, oidp->oid_name);
 
-               printf("%c%c",
-                       oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
-                       oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
+               printf("%c%c%c",
+                   oidp->oid_kind & CTLFLAG_LOCKED ? 'L':' ',
+                   oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
+                   oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
 
-               if (oidp->oid_handler)
+               if (oidp->oid_handler) {
                        printf(" *Handler");
+               }
 
                switch (oidp->oid_kind & CTLTYPE) {
-                       case CTLTYPE_NODE:
-                               printf(" Node\n");
-                               if (!oidp->oid_handler) {
-                                       sysctl_sysctl_debug_dump_node(
-                                               oidp->oid_arg1, i+2);
-                               }
-                               break;
-                       case CTLTYPE_INT:    printf(" Int\n"); break;
-                       case CTLTYPE_STRING: printf(" String\n"); break;
-                       case CTLTYPE_QUAD:   printf(" Quad\n"); break;
-                       case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
-                       default:             printf("\n");
+               case CTLTYPE_NODE:
+                       printf(" Node\n");
+                       if (!oidp->oid_handler) {
+                               sysctl_sysctl_debug_dump_node(
+                                       oidp->oid_arg1, i + 2);
+                       }
+                       break;
+               case CTLTYPE_INT:    printf(" Int\n"); break;
+               case CTLTYPE_STRING: printf(" String\n"); break;
+               case CTLTYPE_QUAD:   printf(" Quad\n"); break;
+               case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
+               default:             printf("\n");
                }
-
        }
 }
 
-static int
-sysctl_sysctl_debug SYSCTL_HANDLER_ARGS
+/*
+ * sysctl_sysctl_debug
+ *
+ * Description:        This function implements the "sysctl.debug" portion of the
+ *             OID space for sysctl.
+ *
+ * OID:                0, 0
+ *
+ * Parameters: __unused
+ *
+ * Returns:    ENOENT
+ *
+ * Implicit:   kernel log, modified
+ *
+ * Locks:      Acquires and then releases a read lock on the
+ *             sysctl_geometry_lock
+ */
+STATIC int
+sysctl_sysctl_debug(__unused struct sysctl_oid *oidp, __unused void *arg1,
+    __unused int arg2, __unused struct sysctl_req *req)
 {
+       lck_rw_lock_shared(sysctl_geometry_lock);
        sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
+       lck_rw_done(sysctl_geometry_lock);
        return ENOENT;
 }
 
-SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD,
-       0, 0, sysctl_sysctl_debug, "-", "");
+SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED,
+    0, 0, sysctl_sysctl_debug, "-", "");
 
-static int
-sysctl_sysctl_name SYSCTL_HANDLER_ARGS
+/*
+ * sysctl_sysctl_name
+ *
+ * Description:        Convert an OID into a string name; this is used by the user
+ *             space sysctl() command line utility; this is done in a purely
+ *             advisory capacity (e.g. to provide node names for "sysctl -A"
+ *             output).
+ *
+ * OID:                0, 1
+ *
+ * Parameters: oidp                            __unused
+ *             arg1                            A pointer to the OID name list
+ *                                             integer array, beginning at
+ *                                             adjusted option base 2
+ *             arg2                            The number of elements which
+ *                                             remain in the name array
+ *
+ * Returns:    0                               Success
+ *     SYSCTL_OUT:EPERM                        Permission denied
+ *     SYSCTL_OUT:EFAULT                       Bad user supplied buffer
+ *     SYSCTL_OUT:???                          Return value from user function
+ *                                             for SYSCTL_PROC leaf node
+ *
+ * Implict:    Contents of user request buffer, modified
+ *
+ * Locks:      Acquires and then releases a read lock on the
+ *             sysctl_geometry_lock
+ *
+ * Notes:      SPI (System Programming Interface); this is subject to change
+ *             and may not be relied upon by third party applications; use
+ *             a subprocess to communicate with the "sysctl" command line
+ *             command instead, if you believe you need this functionality.
+ *             Preferrably, use sysctlbyname() instead.
+ *
+ *             Setting of the NULL termination of the output string is
+ *             delayed until after the geometry lock is dropped.  If there
+ *             are no Entries remaining in the OID name list when this
+ *             function is called, it will still write out the termination
+ *             byte.
+ *
+ *             This function differs from other sysctl functions in that
+ *             it can not take an output buffer length of 0 to determine the
+ *             space which will be required.  It is suggested that the buffer
+ *             length be PATH_MAX, and that authors of new sysctl's refrain
+ *             from exceeding this string length.
+ */
+STATIC int
+sysctl_sysctl_name(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
+    struct sysctl_req *req)
 {
        int *name = (int *) arg1;
        u_int namelen = arg2;
        int error = 0;
        struct sysctl_oid *oid;
        struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
-       char buf[10];
+       char tempbuf[10] = {};
 
+       lck_rw_lock_shared(sysctl_geometry_lock);
        while (namelen) {
                if (!lsp) {
-                       snprintf(buf,sizeof(buf),"%d",*name);
-                       if (req->oldidx)
+                       snprintf(tempbuf, sizeof(tempbuf), "%d", *name);
+                       if (req->oldidx) {
                                error = SYSCTL_OUT(req, ".", 1);
-                       if (!error)
-                               error = SYSCTL_OUT(req, buf, strlen(buf));
-                       if (error)
-                               return (error);
+                       }
+                       if (!error) {
+                               error = SYSCTL_OUT(req, tempbuf, strlen(tempbuf));
+                       }
+                       if (error) {
+                               lck_rw_done(sysctl_geometry_lock);
+                               return error;
+                       }
                        namelen--;
                        name++;
                        continue;
                }
                lsp2 = 0;
                SLIST_FOREACH(oid, lsp, oid_link) {
-                       if (oid->oid_number != *name)
+                       if (oid->oid_number != *name) {
                                continue;
+                       }
 
-                       if (req->oldidx)
+                       if (req->oldidx) {
                                error = SYSCTL_OUT(req, ".", 1);
-                       if (!error)
+                       }
+                       if (!error) {
                                error = SYSCTL_OUT(req, oid->oid_name,
-                                       strlen(oid->oid_name));
-                       if (error)
-                               return (error);
+                                   strlen(oid->oid_name));
+                       }
+                       if (error) {
+                               lck_rw_done(sysctl_geometry_lock);
+                               return error;
+                       }
 
                        namelen--;
                        name++;
 
-                       if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
+                       if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) {
                                break;
+                       }
 
-                       if (oid->oid_handler)
+                       if (oid->oid_handler) {
                                break;
+                       }
 
                        lsp2 = (struct sysctl_oid_list *)oid->oid_arg1;
                        break;
                }
                lsp = lsp2;
        }
-       return (SYSCTL_OUT(req, "", 1));
+       lck_rw_done(sysctl_geometry_lock);
+       return SYSCTL_OUT(req, "", 1);
 }
 
-SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD, sysctl_sysctl_name, "");
+SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_sysctl_name, "");
 
-static int
-sysctl_sysctl_next_ls (struct sysctl_oid_list *lsp, int *name, u_int namelen, 
-       int *next, int *len, int level, struct sysctl_oid **oidpp)
+/*
+ * sysctl_sysctl_next_ls
+ *
+ * Description:        For a given OID name value, return the next consecutive OID
+ *             name value within the geometry tree
+ *
+ * Parameters: lsp                             The OID list to look in
+ *             name                            The OID name to start from
+ *             namelen                         The length of the OID name
+ *             next                            Pointer to new oid storage to
+ *                                             fill in
+ *             len                             Pointer to receive new OID
+ *                                             length value of storage written
+ *             level                           OID tree depth (used to compute
+ *                                             len value)
+ *             oidpp                           Pointer to OID list entry
+ *                                             pointer; used to walk the list
+ *                                             forward across recursion
+ *
+ * Returns:    0                               Returning a new entry
+ *             1                               End of geometry list reached
+ *
+ * Implicit:   *next                           Modified to contain the new OID
+ *             *len                            Modified to contain new length
+ *
+ * Locks:      Assumes sysctl_geometry_lock is held prior to calling
+ *
+ * Notes:      This function will not return OID values that have special
+ *             handlers, since we can not tell wheter these handlers consume
+ *             elements from the OID space as parameters.  For this reason,
+ *             we STRONGLY discourage these types of handlers
+ */
+STATIC int
+sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen,
+    int *next, int *len, int level, struct sysctl_oid **oidpp)
 {
        struct sysctl_oid *oidp;
 
@@ -326,90 +835,171 @@ sysctl_sysctl_next_ls (struct sysctl_oid_list *lsp, int *name, u_int namelen,
                *oidpp = oidp;
 
                if (!namelen) {
-                       if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
+                       if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) {
                                return 0;
-                       if (oidp->oid_handler) 
+                       }
+                       if (oidp->oid_handler) {
                                /* We really should call the handler here...*/
                                return 0;
+                       }
                        lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
-                       if (!sysctl_sysctl_next_ls (lsp, 0, 0, next+1, 
-                               len, level+1, oidpp))
+
+                       if (!SLIST_FIRST(lsp)) {
+                               /* This node had no children - skip it! */
+                               continue;
+                       }
+
+                       if (!sysctl_sysctl_next_ls(lsp, 0, 0, next + 1,
+                           len, level + 1, oidpp)) {
                                return 0;
+                       }
                        goto next;
                }
 
-               if (oidp->oid_number < *name)
+               if (oidp->oid_number < *name) {
                        continue;
+               }
 
                if (oidp->oid_number > *name) {
-                       if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
+                       if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) {
                                return 0;
-                       if (oidp->oid_handler)
+                       }
+                       if (oidp->oid_handler) {
                                return 0;
+                       }
                        lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
-                       if (!sysctl_sysctl_next_ls (lsp, name+1, namelen-1, 
-                               next+1, len, level+1, oidpp))
-                               return (0);
+                       if (!sysctl_sysctl_next_ls(lsp, name + 1, namelen - 1,
+                           next + 1, len, level + 1, oidpp)) {
+                               return 0;
+                       }
                        goto next;
                }
-               if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
+               if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) {
                        continue;
+               }
 
-               if (oidp->oid_handler)
+               if (oidp->oid_handler) {
                        continue;
+               }
 
                lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
-               if (!sysctl_sysctl_next_ls (lsp, name+1, namelen-1, next+1, 
-                       len, level+1, oidpp))
-                       return (0);
-       next:
+               if (!sysctl_sysctl_next_ls(lsp, name + 1, namelen - 1, next + 1,
+                   len, level + 1, oidpp)) {
+                       return 0;
+               }
+next:
                namelen = 1;
                *len = level;
        }
        return 1;
 }
 
-static int
-sysctl_sysctl_next SYSCTL_HANDLER_ARGS
+/*
+ * sysctl_sysctl_next
+ *
+ * Description:        This is an iterator function designed to iterate the oid tree
+ *             and provide a list of OIDs for use by the user space "sysctl"
+ *             command line tool
+ *
+ * OID:                0, 2
+ *
+ * Parameters: oidp                            __unused
+ *             arg1                            Pointer to start OID name
+ *             arg2                            Start OID name length
+ *             req                             Pointer to user request buffer
+ *
+ * Returns:    0                               Success
+ *             ENOENT                          Reached end of OID space
+ *     SYSCTL_OUT:EPERM                        Permission denied
+ *     SYSCTL_OUT:EFAULT                       Bad user supplied buffer
+ *     SYSCTL_OUT:???                          Return value from user function
+ *                                             for SYSCTL_PROC leaf node
+ *
+ * Implict:    Contents of user request buffer, modified
+ *
+ * Locks:      Acquires and then releases a read lock on the
+ *             sysctl_geometry_lock
+ *
+ * Notes:      SPI (System Programming Interface); this is subject to change
+ *             and may not be relied upon by third party applications; use
+ *             a subprocess to communicate with the "sysctl" command line
+ *             command instead, if you believe you need this functionality.
+ *             Preferrably, use sysctlbyname() instead.
+ *
+ *             This function differs from other sysctl functions in that
+ *             it can not take an output buffer length of 0 to determine the
+ *             space which will be required.  It is suggested that the buffer
+ *             length be PATH_MAX, and that authors of new sysctl's refrain
+ *             from exceeding this string length.
+ */
+STATIC int
+sysctl_sysctl_next(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
+    struct sysctl_req *req)
 {
        int *name = (int *) arg1;
        u_int namelen = arg2;
        int i, j, error;
        struct sysctl_oid *oid;
        struct sysctl_oid_list *lsp = &sysctl__children;
-       int newoid[CTL_MAXNAME];
+       int newoid[CTL_MAXNAME] = {};
 
-       i = sysctl_sysctl_next_ls (lsp, name, namelen, newoid, &j, 1, &oid);
-       if (i)
+       lck_rw_lock_shared(sysctl_geometry_lock);
+       i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid);
+       lck_rw_done(sysctl_geometry_lock);
+       if (i) {
                return ENOENT;
-       error = SYSCTL_OUT(req, newoid, j * sizeof (int));
-       return (error);
+       }
+       error = SYSCTL_OUT(req, newoid, j * sizeof(int));
+       return error;
 }
 
-SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD, sysctl_sysctl_next, "");
+SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_sysctl_next, "");
 
-static int
-name2oid (char *name, int *oid, int *len, struct sysctl_oid **oidpp)
+/*
+ * name2oid
+ *
+ * Description:        Support function for use by sysctl_sysctl_name2oid(); looks
+ *             up an OID name given a string name.
+ *
+ * Parameters: name                            NULL terminated string name
+ *             oid                             Pointer to receive OID name
+ *             len                             Pointer to receive OID length
+ *                                             pointer value (see "Notes")
+ *
+ * Returns:    0                               Success
+ *             ENOENT                          Entry not found
+ *
+ * Implicit:   *oid                            Modified to contain OID value
+ *             *len                            Modified to contain OID length
+ *
+ * Locks:      Assumes sysctl_geometry_lock is held prior to calling
+ */
+STATIC int
+name2oid(char *name, int *oid, u_int *len)
 {
        int i;
        struct sysctl_oid *oidp;
        struct sysctl_oid_list *lsp = &sysctl__children;
        char *p;
 
-       if (!*name)
+       if (!*name) {
                return ENOENT;
+       }
 
-       p = name + strlen(name) - 1 ;
-       if (*p == '.')
+       p = name + strlen(name) - 1;
+       if (*p == '.') {
                *p = '\0';
+       }
 
        *len = 0;
 
-       for (p = name; *p && *p != '.'; p++) 
+       for (p = name; *p && *p != '.'; p++) {
                ;
+       }
        i = *p;
-       if (i == '.')
+       if (i == '.') {
                *p = '\0';
+       }
 
        oidp = SLIST_FIRST(lsp);
 
@@ -422,77 +1012,169 @@ name2oid (char *name, int *oid, int *len, struct sysctl_oid **oidpp)
                (*len)++;
 
                if (!i) {
-                       if (oidpp)
-                               *oidpp = oidp;
-                       return (0);
+                       return 0;
                }
 
-               if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
+               if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) {
                        break;
+               }
 
-               if (oidp->oid_handler)
+               if (oidp->oid_handler) {
                        break;
+               }
 
                lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
                oidp = SLIST_FIRST(lsp);
-               name = p+1;
-               for (p = name; *p && *p != '.'; p++) 
-                               ;
+               *p = i; /* restore */
+               name = p + 1;
+               for (p = name; *p && *p != '.'; p++) {
+                       ;
+               }
                i = *p;
-               if (i == '.')
+               if (i == '.') {
                        *p = '\0';
+               }
        }
        return ENOENT;
 }
 
-static int
-sysctl_sysctl_name2oid SYSCTL_HANDLER_ARGS
+/*
+ * sysctl_sysctl_name2oid
+ *
+ * Description:        Translate a string name to an OID name value; this is used by
+ *             the sysctlbyname() function as well as by the "sysctl" command
+ *             line command.
+ *
+ * OID:                0, 3
+ *
+ * Parameters: oidp                            __unused
+ *             arg1                            __unused
+ *             arg2                            __unused
+ *             req                             Request structure
+ *
+ * Returns:    ENOENT                          Input length too short
+ *             ENAMETOOLONG                    Input length too long
+ *             ENOMEM                          Could not allocate work area
+ *     SYSCTL_IN/OUT:EPERM                     Permission denied
+ *     SYSCTL_IN/OUT:EFAULT                    Bad user supplied buffer
+ *     SYSCTL_IN/OUT:???                       Return value from user function
+ *     name2oid:ENOENT                         Not found
+ *
+ * Implicit:   *req                            Contents of request, modified
+ *
+ * Locks:      Acquires and then releases a read lock on the
+ *             sysctl_geometry_lock
+ *
+ * Notes:      SPI (System Programming Interface); this is subject to change
+ *             and may not be relied upon by third party applications; use
+ *             a subprocess to communicate with the "sysctl" command line
+ *             command instead, if you believe you need this functionality.
+ *             Preferrably, use sysctlbyname() instead.
+ *
+ *             This function differs from other sysctl functions in that
+ *             it can not take an output buffer length of 0 to determine the
+ *             space which will be required.  It is suggested that the buffer
+ *             length be PATH_MAX, and that authors of new sysctl's refrain
+ *             from exceeding this string length.
+ */
+STATIC int
+sysctl_sysctl_name2oid(__unused struct sysctl_oid *oidp, __unused void *arg1,
+    __unused int arg2, struct sysctl_req *req)
 {
        char *p;
-       int error, oid[CTL_MAXNAME], len;
-       struct sysctl_oid *op = 0;
+       int error, oid[CTL_MAXNAME] = {};
+       u_int len = 0;          /* set by name2oid() */
 
-       if (!req->newlen) 
+       if (req->newlen < 1) {
                return ENOENT;
-       if (req->newlen >= MAXPATHLEN)  /* XXX arbitrary, undocumented */
-               return (ENAMETOOLONG);
-
-       p = _MALLOC(req->newlen+1, M_TEMP, M_WAITOK);
+       }
+       if (req->newlen >= MAXPATHLEN) { /* XXX arbitrary, undocumented */
+               return ENAMETOOLONG;
+       }
 
-       if (!p)
-           return ENOMEM;
+       MALLOC(p, char *, req->newlen + 1, M_TEMP, M_WAITOK);
+       if (!p) {
+               return ENOMEM;
+       }
 
        error = SYSCTL_IN(req, p, req->newlen);
        if (error) {
                FREE(p, M_TEMP);
-               return (error);
+               return error;
        }
 
-       p [req->newlen] = '\0';
+       p[req->newlen] = '\0';
 
-       error = name2oid(p, oid, &len, &op);
+       /*
+        * Note:        We acquire and release the geometry lock here to
+        *              avoid making name2oid needlessly complex.
+        */
+       lck_rw_lock_shared(sysctl_geometry_lock);
+       error = name2oid(p, oid, &len);
+       lck_rw_done(sysctl_geometry_lock);
 
        FREE(p, M_TEMP);
 
-       if (error)
-               return (error);
+       if (error) {
+               return error;
+       }
 
        error = SYSCTL_OUT(req, oid, len * sizeof *oid);
-       return (error);
+       return error;
 }
 
-SYSCTL_PROC(_sysctl, 3, name2oid, CTLFLAG_RW|CTLFLAG_ANYBODY, 0, 0, 
-       sysctl_sysctl_name2oid, "I", "");
+SYSCTL_PROC(_sysctl, 3, name2oid, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0,
+    sysctl_sysctl_name2oid, "I", "");
 
-static int
-sysctl_sysctl_oidfmt SYSCTL_HANDLER_ARGS
+/*
+ * sysctl_sysctl_oidfmt
+ *
+ * Description:        For a given OID name, determine the format of the data which
+ *             is associated with it.  This is used by the "sysctl" command
+ *             line command.
+ *
+ * OID:                0, 4
+ *
+ * Parameters: oidp                            __unused
+ *             arg1                            The OID name to look up
+ *             arg2                            The length of the OID name
+ *             req                             Pointer to user request buffer
+ *
+ * Returns:    0                               Success
+ *             EISDIR                          Malformed request
+ *             ENOENT                          No such OID name
+ *     SYSCTL_OUT:EPERM                        Permission denied
+ *     SYSCTL_OUT:EFAULT                       Bad user supplied buffer
+ *     SYSCTL_OUT:???                          Return value from user function
+ *
+ * Implict:    Contents of user request buffer, modified
+ *
+ * Locks:      Acquires and then releases a read lock on the
+ *             sysctl_geometry_lock
+ *
+ * Notes:      SPI (System Programming Interface); this is subject to change
+ *             and may not be relied upon by third party applications; use
+ *             a subprocess to communicate with the "sysctl" command line
+ *             command instead, if you believe you need this functionality.
+ *
+ *             This function differs from other sysctl functions in that
+ *             it can not take an output buffer length of 0 to determine the
+ *             space which will be required.  It is suggested that the buffer
+ *             length be PATH_MAX, and that authors of new sysctl's refrain
+ *             from exceeding this string length.
+ */
+STATIC int
+sysctl_sysctl_oidfmt(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
+    struct sysctl_req *req)
 {
-       int *name = (int *) arg1, error;
+       int *name = (int *) arg1;
+       int error = ENOENT;             /* default error: not found */
        u_int namelen = arg2;
-       int indx;
+       u_int indx;
        struct sysctl_oid *oid;
        struct sysctl_oid_list *lsp = &sysctl__children;
 
+       lck_rw_lock_shared(sysctl_geometry_lock);
        oid = SLIST_FIRST(lsp);
 
        indx = 0;
@@ -500,35 +1182,45 @@ sysctl_sysctl_oidfmt SYSCTL_HANDLER_ARGS
                if (oid->oid_number == name[indx]) {
                        indx++;
                        if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
-                               if (oid->oid_handler)
+                               if (oid->oid_handler) {
                                        goto found;
-                               if (indx == namelen)
+                               }
+                               if (indx == namelen) {
                                        goto found;
+                               }
                                lsp = (struct sysctl_oid_list *)oid->oid_arg1;
                                oid = SLIST_FIRST(lsp);
                        } else {
-                               if (indx != namelen)
-                                       return EISDIR;
+                               if (indx != namelen) {
+                                       error =  EISDIR;
+                                       goto err;
+                               }
                                goto found;
                        }
                } else {
                        oid = SLIST_NEXT(oid, oid_link);
                }
        }
-       return ENOENT;
+       /* Not found */
+       goto err;
+
 found:
-       if (!oid->oid_fmt)
-               return ENOENT;
-       error = SYSCTL_OUT(req, 
-               &oid->oid_kind, sizeof(oid->oid_kind));
-       if (!error)
-               error = SYSCTL_OUT(req, oid->oid_fmt, 
-                       strlen(oid->oid_fmt)+1);
-       return (error);
+       if (!oid->oid_fmt) {
+               goto err;
+       }
+       error = SYSCTL_OUT(req,
+           &oid->oid_kind, sizeof(oid->oid_kind));
+       if (!error) {
+               error = SYSCTL_OUT(req, oid->oid_fmt,
+                   strlen(oid->oid_fmt) + 1);
+       }
+err:
+       lck_rw_done(sysctl_geometry_lock);
+       return error;
 }
 
+SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_sysctl_oidfmt, "");
 
-SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD, sysctl_sysctl_oidfmt, "");
 
 /*
  * Default "handler" functions.
@@ -542,71 +1234,93 @@ SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD, sysctl_sysctl_oidfmt, "");
  */
 
 int
-sysctl_handle_int SYSCTL_HANDLER_ARGS
+sysctl_handle_int(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
+    struct sysctl_req *req)
 {
-       int error = 0;
-
-       if (arg1)
-               error = SYSCTL_OUT(req, arg1, sizeof(int));
-       else
-               error = SYSCTL_OUT(req, &arg2, sizeof(int));
+       return sysctl_io_number(req, arg1? *(int*)arg1: arg2, sizeof(int), arg1, NULL);
+}
 
-       if (error || !req->newptr)
-               return (error);
+/*
+ * Handle a long, signed or unsigned.  arg1 points to it.
+ */
 
-       if (!arg1)
-               error = EPERM;
-       else
-               error = SYSCTL_IN(req, arg1, sizeof(int));
-       return (error);
+int
+sysctl_handle_long(__unused struct sysctl_oid *oidp, void *arg1,
+    __unused int arg2, struct sysctl_req *req)
+{
+       if (!arg1) {
+               return EINVAL;
+       }
+       return sysctl_io_number(req, *(long*)arg1, sizeof(long), arg1, NULL);
 }
 
 /*
- * Handle a long, signed or unsigned.  arg1 points to it.
+ * Handle a quad, signed or unsigned.  arg1 points to it.
  */
 
 int
-sysctl_handle_long SYSCTL_HANDLER_ARGS
+sysctl_handle_quad(__unused struct sysctl_oid *oidp, void *arg1,
+    __unused int arg2, struct sysctl_req *req)
+{
+       if (!arg1) {
+               return EINVAL;
+       }
+       return sysctl_io_number(req, *(long long*)arg1, sizeof(long long), arg1, NULL);
+}
+
+/*
+ * Expose an int value as a quad.
+ *
+ * This interface allows us to support interfaces defined
+ * as using quad values while the implementation is still
+ * using ints.
+ */
+int
+sysctl_handle_int2quad(__unused struct sysctl_oid *oidp, void *arg1,
+    __unused int arg2, struct sysctl_req *req)
 {
        int error = 0;
+       long long val;
+       int newval;
 
-       if (!arg1)
-               return (EINVAL);
-       error = SYSCTL_OUT(req, arg1, sizeof(long));
+       if (!arg1) {
+               return EINVAL;
+       }
+       val = (long long)*(int *)arg1;
+       error = SYSCTL_OUT(req, &val, sizeof(long long));
 
-       if (error || !req->newptr)
-               return (error);
+       if (error || !req->newptr) {
+               return error;
+       }
 
-       error = SYSCTL_IN(req, arg1, sizeof(long));
-       return (error);
+       error = SYSCTL_IN(req, &val, sizeof(long long));
+       if (!error) {
+               /*
+                * Value must be representable; check by
+                * casting and then casting back.
+                */
+               newval = (int)val;
+               if ((long long)newval != val) {
+                       error = ERANGE;
+               } else {
+                       *(int *)arg1 = newval;
+               }
+       }
+       return error;
 }
 
 /*
  * Handle our generic '\0' terminated 'C' string.
  * Two cases:
- *     a variable string:  point arg1 at it, arg2 is max length.
- *     a constant string:  point arg1 at it, arg2 is zero.
+ *      a variable string:  point arg1 at it, arg2 is max length.
+ *      a constant string:  point arg1 at it, arg2 is zero.
  */
 
 int
-sysctl_handle_string SYSCTL_HANDLER_ARGS
+sysctl_handle_string( __unused struct sysctl_oid *oidp, void *arg1, int arg2,
+    struct sysctl_req *req)
 {
-       int error=0;
-
-       error = SYSCTL_OUT(req, arg1, strlen((char *)arg1)+1);
-
-       if (error || !req->newptr)
-               return (error);
-
-       if ((req->newlen - req->newidx) >= arg2) {
-               error = EINVAL;
-       } else {
-               arg2 = (req->newlen - req->newidx);
-               error = SYSCTL_IN(req, arg1, arg2);
-               ((char *)arg1)[arg2] = '\0';
-       }
-
-       return (error);
+       return sysctl_io_string(req, arg1, arg2, 0, NULL);
 }
 
 /*
@@ -615,156 +1329,134 @@ sysctl_handle_string SYSCTL_HANDLER_ARGS
  */
 
 int
-sysctl_handle_opaque SYSCTL_HANDLER_ARGS
+sysctl_handle_opaque(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
+    struct sysctl_req *req)
 {
-       int error;
-
-       error = SYSCTL_OUT(req, arg1, arg2);
-
-       if (error || !req->newptr)
-               return (error);
-
-       error = SYSCTL_IN(req, arg1, arg2);
-
-       return (error);
+       return sysctl_io_opaque(req, arg1, arg2, NULL);
 }
 
 /*
  * Transfer functions to/from kernel space.
- * XXX: rather untested at this point
  */
-static int
+STATIC int
 sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
 {
        size_t i = 0;
-       int error = 0;
 
        if (req->oldptr) {
                i = l;
-               if (i > req->oldlen - req->oldidx)
+               if (i > req->oldlen - req->oldidx) {
                        i = req->oldlen - req->oldidx;
+               }
                if (i > 0) {
-                       error = copyout(p, (char *)req->oldptr + req->oldidx, i);
-                       if (error)
-                           return error;
+                       bcopy((const void*)p, CAST_DOWN(char *, (req->oldptr + req->oldidx)), i);
                }
        }
        req->oldidx += l;
-       if (req->oldptr && i != l)
-               return (ENOMEM);
-       return (0);
+       if (req->oldptr && i != l) {
+               return ENOMEM;
+       }
+       return 0;
 }
 
-static int
+STATIC int
 sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l)
 {
-       if (!req->newptr)
+       if (!req->newptr) {
                return 0;
-       if (req->newlen - req->newidx < l)
-               return (EINVAL);
-       copyin((char *)req->newptr + req->newidx, p, l);
+       }
+       if (req->newlen - req->newidx < l) {
+               return EINVAL;
+       }
+       bcopy(CAST_DOWN(char *, (req->newptr + req->newidx)), p, l);
        req->newidx += l;
-       return (0);
+       return 0;
 }
 
 int
-kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval)
+kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen)
 {
        int error = 0;
        struct sysctl_req req;
 
+       /*
+        * Construct request.
+        */
        bzero(&req, sizeof req);
-
        req.p = p;
-
        if (oldlenp) {
                req.oldlen = *oldlenp;
        }
-
        if (old) {
-               req.oldptr= old;
+               req.oldptr = CAST_USER_ADDR_T(old);
        }
-
        if (newlen) {
                req.newlen = newlen;
-               req.newptr = new;
+               req.newptr = CAST_USER_ADDR_T(new);
        }
-
        req.oldfunc = sysctl_old_kernel;
        req.newfunc = sysctl_new_kernel;
        req.lock = 1;
 
-       /* XXX this should probably be done in a general way */
-       while (memlock.sl_lock) {
-               memlock.sl_want = 1;
-               (void) tsleep((caddr_t)&memlock, PRIBIO+1, "sysctl", 0);
-               memlock.sl_locked++;
-       }
-       memlock.sl_lock = 1;
-
-       error = sysctl_root(0, name, namelen, &req);
-
-       if (req.lock == 2)
-               vsunlock(req.oldptr, req.oldlen, B_WRITE);
+       /* make the request */
+       error = sysctl_root(TRUE, FALSE, NULL, 0, name, namelen, &req);
 
-       memlock.sl_lock = 0;
-
-       if (memlock.sl_want) {
-               memlock.sl_want = 0;
-               wakeup((caddr_t)&memlock);
+       if (error && error != ENOMEM) {
+               return error;
        }
 
-       if (error && error != ENOMEM)
-               return (error);
-
-       if (retval) {
-               if (req.oldptr && req.oldidx > req.oldlen)
-                       *retval = req.oldlen;
-               else
-                       *retval = req.oldidx;
+       if (oldlenp) {
+               *oldlenp = req.oldidx;
        }
-       return (error);
+
+       return error;
 }
 
 /*
  * Transfer function to/from user space.
  */
-static int
+STATIC int
 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 {
        int error = 0;
        size_t i = 0;
 
        if (req->oldptr) {
-                if (req->oldlen - req->oldidx < l)
-                    return (ENOMEM);
+               if (req->oldlen - req->oldidx < l) {
+                       return ENOMEM;
+               }
                i = l;
-               if (i > req->oldlen - req->oldidx)
+               if (i > req->oldlen - req->oldidx) {
                        i = req->oldlen - req->oldidx;
-               if (i > 0)
-                       error = copyout(p, (char *)req->oldptr + req->oldidx,
-                                       i);
+               }
+               if (i > 0) {
+                       error = copyout((const void*)p, (req->oldptr + req->oldidx), i);
+               }
        }
        req->oldidx += l;
-       if (error)
-               return (error);
-       if (req->oldptr && i < l)
-               return (ENOMEM);
-       return (0);
+       if (error) {
+               return error;
+       }
+       if (req->oldptr && i < l) {
+               return ENOMEM;
+       }
+       return 0;
 }
 
-static int
+STATIC int
 sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
 {
        int error;
 
-       if (!req->newptr)
+       if (!req->newptr) {
                return 0;
-       if (req->newlen - req->newidx < l)
-               return (EINVAL);
-       error = copyin((char *)req->newptr + req->newidx, p, l);
+       }
+       if (req->newlen - req->newidx < l) {
+               return EINVAL;
+       }
+       error = copyin((req->newptr + req->newidx), p, l);
        req->newidx += l;
-       return (error);
+       return error;
 }
 
 /*
@@ -773,387 +1465,490 @@ sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
  */
 
 int
-sysctl_root SYSCTL_HANDLER_ARGS
+sysctl_root(boolean_t from_kernel, boolean_t string_is_canonical, char *namestring, size_t namestringlen, int *name, u_int namelen, struct sysctl_req *req)
 {
-       int *name = (int *) arg1;
-       u_int namelen = arg2;
-       int indx, i;
+       u_int indx;
+       int i;
        struct sysctl_oid *oid;
        struct sysctl_oid_list *lsp = &sysctl__children;
+       sysctl_handler_t oid_handler = NULL;
        int error;
+       boolean_t unlocked_node_found = FALSE;
+       boolean_t namestring_started = FALSE;
+
+       /* Get the read lock on the geometry */
+       lck_rw_lock_shared(sysctl_geometry_lock);
+
+       if (string_is_canonical) {
+               /* namestring is actually canonical, name/namelen needs to be populated */
+               error = name2oid(namestring, name, &namelen);
+               if (error) {
+                       goto err;
+               }
+       }
 
        oid = SLIST_FIRST(lsp);
 
        indx = 0;
        while (oid && indx < CTL_MAXNAME) {
                if (oid->oid_number == name[indx]) {
+                       if (!from_kernel && !string_is_canonical) {
+                               if (namestring_started) {
+                                       if (strlcat(namestring, ".", namestringlen) >= namestringlen) {
+                                               error = ENAMETOOLONG;
+                                               goto err;
+                                       }
+                               }
+
+                               if (strlcat(namestring, oid->oid_name, namestringlen) >= namestringlen) {
+                                       error = ENAMETOOLONG;
+                                       goto err;
+                               }
+                               namestring_started = TRUE;
+                       }
+
                        indx++;
-                       if (oid->oid_kind & CTLFLAG_NOLOCK)
+                       if (!(oid->oid_kind & CTLFLAG_LOCKED)) {
+                               unlocked_node_found = TRUE;
+                       }
+                       if (oid->oid_kind & CTLFLAG_NOLOCK) {
                                req->lock = 0;
+                       }
+                       /*
+                        * For SYSCTL_PROC() functions which are for sysctl's
+                        * which have parameters at the end of their OID
+                        * space, you need to OR CTLTYPE_NODE into their
+                        * access value.
+                        *
+                        * NOTE: For binary backward compatibility ONLY! Do
+                        * NOT add new sysctl's that do this!  Existing
+                        * sysctl's which do this will eventually have
+                        * compatibility code in user space, and this method
+                        * will become unsupported.
+                        */
                        if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
-                               if (oid->oid_handler)
+                               if (oid->oid_handler) {
                                        goto found;
-                               if (indx == namelen)
-                                       return ENOENT;
+                               }
+                               if (indx == namelen) {
+                                       error = ENOENT;
+                                       goto err;
+                               }
+
                                lsp = (struct sysctl_oid_list *)oid->oid_arg1;
                                oid = SLIST_FIRST(lsp);
                        } else {
-                               if (indx != namelen)
-                                       return EISDIR;
+                               if (indx != namelen) {
+                                       error = EISDIR;
+                                       goto err;
+                               }
                                goto found;
                        }
                } else {
                        oid = SLIST_NEXT(oid, oid_link);
                }
        }
-       return ENOENT;
+       error = ENOENT;
+       goto err;
 found:
+
+       /*
+        * indx is the index of the first remaining OID name,
+        * for sysctls that take them as arguments
+        */
+       if (!from_kernel && !string_is_canonical && (indx < namelen)) {
+               char tempbuf[10];
+               u_int indx2;
+
+               for (indx2 = indx; indx2 < namelen; indx2++) {
+                       snprintf(tempbuf, sizeof(tempbuf), "%d", name[indx2]);
+
+                       if (namestring_started) {
+                               if (strlcat(namestring, ".", namestringlen) >= namestringlen) {
+                                       error = ENAMETOOLONG;
+                                       goto err;
+                               }
+                       }
+
+                       if (strlcat(namestring, tempbuf, namestringlen) >= namestringlen) {
+                               error = ENAMETOOLONG;
+                               goto err;
+                       }
+                       namestring_started = TRUE;
+               }
+       }
+
        /* If writing isn't allowed */
        if (req->newptr && (!(oid->oid_kind & CTLFLAG_WR) ||
-                           ((oid->oid_kind & CTLFLAG_SECURE) && securelevel > 0))) {
-               return (EPERM);
+           ((oid->oid_kind & CTLFLAG_SECURE) && securelevel > 0))) {
+               error = (EPERM);
+               goto err;
        }
 
-       /* Most likely only root can write */
+       /*
+        * If we're inside the kernel, the OID must be marked as kernel-valid.
+        */
+       if (from_kernel && !(oid->oid_kind & CTLFLAG_KERN)) {
+               error = (EPERM);
+               goto err;
+       }
+
+       /*
+        * This is where legacy enforcement of permissions occurs.  If the
+        * flag does not say CTLFLAG_ANYBODY, then we prohibit anyone but
+        * root from writing new values down.  If local enforcement happens
+        * at the leaf node, then it needs to be set as CTLFLAG_ANYBODY.  In
+        * addition, if the leaf node is set this way, then in order to do
+        * specific enforcement, it has to be of type SYSCTL_PROC.
+        */
        if (!(oid->oid_kind & CTLFLAG_ANYBODY) &&
            req->newptr && req->p &&
-           (error = suser(req->p->p_ucred, &req->p->p_acflag)))
-               return (error);
+           (error = proc_suser(req->p))) {
+               goto err;
+       }
+
+       /*
+        * sysctl_unregister_oid() may change the handler value, so grab it
+        * under the lock.
+        */
+       oid_handler = oid->oid_handler;
+       if (!oid_handler) {
+               error = EINVAL;
+               goto err;
+       }
 
-       if (!oid->oid_handler) {
-           return EINVAL;
+       /*
+        * Reference the OID and drop the geometry lock; this prevents the
+        * OID from being deleted out from under the handler call, but does
+        * not prevent other calls into handlers or calls to manage the
+        * geometry elsewhere from blocking...
+        */
+       OSAddAtomic(1, &oid->oid_refcnt);
+
+       lck_rw_done(sysctl_geometry_lock);
+
+#if CONFIG_MACF
+       if (!from_kernel) {
+               error = mac_system_check_sysctlbyname(kauth_cred_get(),
+                   namestring,
+                   name,
+                   namelen,
+                   req->oldptr,
+                   req->oldlen,
+                   req->newptr,
+                   req->newlen);
+               if (error) {
+                       goto dropref;
+               }
        }
+#endif
 
        /*
-        * Switch to the NETWORK funnel for CTL_NET and KERN_IPC sysctls
+        * ...however, we still have to grab the mutex for those calls which
+        * may be into code whose reentrancy is protected by it.
         */
+       if (unlocked_node_found) {
+               lck_mtx_lock(sysctl_unlocked_node_lock);
+       }
 
-       if (((name[0] == CTL_NET) || ((name[0] == CTL_KERN) &&
-                                                      (name[1] == KERN_IPC))))
-            thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
+#if defined(HAS_APPLE_PAC)
+       /*
+        * oid_handler is signed address-discriminated by sysctl_register_oid().
+        */
+       oid_handler = ptrauth_auth_function(oid_handler,
+           ptrauth_key_function_pointer,
+           ptrauth_blend_discriminator(&oid->oid_handler,
+           ptrauth_string_discriminator("oid_handler")));
+#endif /* defined(HAS_APPLE_PAC) */
 
        if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
-               i = (oid->oid_handler) (oid,
-                                       name + indx, namelen - indx,
-                                       req);
+               i = oid_handler(oid, name + indx, namelen - indx, req);
        } else {
-               i = (oid->oid_handler) (oid,
-                                       oid->oid_arg1, oid->oid_arg2,
-                                       req);
+               i = oid_handler(oid, oid->oid_arg1, oid->oid_arg2, req);
+       }
+       error = i;
+
+       if (unlocked_node_found) {
+               lck_mtx_unlock(sysctl_unlocked_node_lock);
        }
 
+#if CONFIG_MACF
+       /* only used from another CONFIG_MACF block */
+dropref:
+#endif
+
        /*
-        * Switch back to the KERNEL funnel, if necessary
+        * This is tricky... we re-grab the geometry lock in order to drop
+        * the reference and wake on the address; since the geometry
+        * lock is a reader/writer lock rather than a mutex, we have to
+        * wake on all apparent 1->0 transitions.  This abuses the drop
+        * after the reference decrement in order to wake any lck_rw_sleep()
+        * in progress in sysctl_unregister_oid() that slept because of a
+        * non-zero reference count.
+        *
+        * Note:        OSAddAtomic() is defined to return the previous value;
+        *              we use this and the fact that the lock itself is a
+        *              barrier to avoid waking every time through on "hot"
+        *              OIDs.
         */
+       lck_rw_lock_shared(sysctl_geometry_lock);
+       if (OSAddAtomic(-1, &oid->oid_refcnt) == 1) {
+               wakeup(&oid->oid_refcnt);
+       }
 
-       if (((name[0] == CTL_NET) || ((name[0] == CTL_KERN) &&
-                                                      (name[1] == KERN_IPC))))
-            thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-
-       return (i);
+err:
+       lck_rw_done(sysctl_geometry_lock);
+       return error;
 }
 
-#ifndef _SYS_SYSPROTO_H_
-struct sysctl_args {
-       int     *name;
-       u_int   namelen;
-       void    *old;
-       size_t  *oldlenp;
-       void    *new;
-       size_t  newlen;
-};
-#endif
-
-int
-/* __sysctl(struct proc *p, struct sysctl_args *uap) */
-new_sysctl(struct proc *p, struct sysctl_args *uap)
+void
+sysctl_create_user_req(struct sysctl_req *req, struct proc *p, user_addr_t oldp,
+    size_t oldlen, user_addr_t newp, size_t newlen)
 {
-       int error, i, name[CTL_MAXNAME];
-       size_t j;
-
-       if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
-               return (EINVAL);
-
-       error = copyin(uap->name, &name, uap->namelen * sizeof(int));
-       if (error)
-               return (error);
-
-       error = userland_sysctl(p, name, uap->namelen,
-               uap->old, uap->oldlenp, 0,
-               uap->new, uap->newlen, &j);
-       if (error && error != ENOMEM)
-               return (error);
-       if (uap->oldlenp) {
-               i = copyout(&j, uap->oldlenp, sizeof(j));
-               if (i)
-                       return (i);
-       }
-       return (error);
+       bzero(req, sizeof(*req));
+
+       req->p = p;
+
+       req->oldlen = oldlen;
+       req->oldptr = oldp;
+
+       if (newlen) {
+               req->newlen = newlen;
+               req->newptr = newp;
+       }
+
+       req->oldfunc = sysctl_old_user;
+       req->newfunc = sysctl_new_user;
+       req->lock = 1;
+
+       return;
 }
 
-/*
- * This is used from various compatibility syscalls too.  That's why name
- * must be in kernel space.
- */
 int
-userland_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval)
+sysctl(proc_t p, struct sysctl_args *uap, __unused int32_t *retval)
 {
-       int error = 0;
-       struct sysctl_req req, req2;
+       int error, new_error;
+       size_t oldlen = 0, newlen;
+       int name[CTL_MAXNAME];
+       struct sysctl_req req;
+       char *namestring;
+       size_t namestringlen = MAXPATHLEN;
 
-       bzero(&req, sizeof req);
+       /*
+        * all top-level sysctl names are non-terminal
+        */
+       if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) {
+               return EINVAL;
+       }
+       error = copyin(uap->name, &name[0], uap->namelen * sizeof(int));
+       if (error) {
+               return error;
+       }
 
-       req.p = p;
+       AUDIT_ARG(ctlname, name, uap->namelen);
 
-       if (oldlenp) {
-               if (inkernel) {
-                       req.oldlen = *oldlenp;
+       if (uap->newlen > SIZE_T_MAX) {
+               return EINVAL;
+       }
+       newlen = (size_t)uap->newlen;
+
+       if (uap->oldlenp != USER_ADDR_NULL) {
+               uint64_t        oldlen64 = fuulong(uap->oldlenp);
+
+               /*
+                * If more than 4G, clamp to 4G
+                */
+               if (oldlen64 > SIZE_T_MAX) {
+                       oldlen = SIZE_T_MAX;
                } else {
-                       error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp));
-                       if (error)
-                               return (error);
+                       oldlen = (size_t)oldlen64;
                }
        }
 
-       if (old) {
-               req.oldptr= old;
+       sysctl_create_user_req(&req, p, uap->old, oldlen, uap->new, newlen);
+
+       /* Guess that longest length for the passed-in MIB, if we can be more aggressive than MAXPATHLEN */
+       if (uap->namelen == 2) {
+               if (name[0] == CTL_KERN && name[1] < KERN_MAXID) {
+                       namestringlen = 32; /* "kern.speculative_reads_disabled" */
+               } else if (name[0] == CTL_HW && name[1] < HW_MAXID) {
+                       namestringlen = 32; /* "hw.cachelinesize_compat" */
+               }
        }
 
-       if (newlen) {
-               req.newlen = newlen;
-               req.newptr = new;
+       MALLOC(namestring, char *, namestringlen, M_TEMP, M_WAITOK);
+       if (!namestring) {
+               oldlen = 0;
+               goto err;
        }
 
-       req.oldfunc = sysctl_old_user;
-       req.newfunc = sysctl_new_user;
-       req.lock = 1;
+       error = userland_sysctl(FALSE, namestring, namestringlen, name, uap->namelen, &req, &oldlen);
 
-       do {
-           req2 = req;
-           error = sysctl_root(0, name, namelen, &req2);
-       } while (error == EAGAIN);
+       FREE(namestring, M_TEMP);
 
-       req = req2;
+       if ((error) && (error != ENOMEM)) {
+               return error;
+       }
 
-       if (error && error != ENOMEM)
-               return (error);
+err:
+       if (uap->oldlenp != USER_ADDR_NULL) {
+               /*
+                * Only overwrite the old error value on a new error
+                */
+               new_error = suulong(uap->oldlenp, oldlen);
 
-       if (retval) {
-               if (req.oldptr && req.oldidx > req.oldlen)
-                       *retval = req.oldlen;
-               else
-                       *retval = req.oldidx;
+               if (new_error) {
+                       error = new_error;
+               }
        }
-       return (error);
+
+       return error;
 }
-#if 0
 
-#if COMPAT_43
-#include <sys/socket.h>
-#include <vm/vm_param.h>
+// sysctlbyname is also exported as KPI to kexts
+// and the syscall name cannot conflict with it
+int
+sys_sysctlbyname(proc_t p, struct sysctlbyname_args *uap, __unused int32_t *retval)
+{
+       int error, new_error;
+       size_t oldlen = 0, newlen;
+       char *name;
+       size_t namelen = 0;
+       struct sysctl_req req;
+       int oid[CTL_MAXNAME];
 
-#define        KINFO_PROC              (0<<8)
-#define        KINFO_RT                (1<<8)
-#define        KINFO_VNODE             (2<<8)
-#define        KINFO_FILE              (3<<8)
-#define        KINFO_METER             (4<<8)
-#define        KINFO_LOADAVG           (5<<8)
-#define        KINFO_CLOCKRATE         (6<<8)
+       if (uap->namelen >= MAXPATHLEN) { /* XXX arbitrary, undocumented */
+               return ENAMETOOLONG;
+       }
+       namelen = (size_t)uap->namelen;
 
-/* Non-standard BSDI extension - only present on their 4.3 net-2 releases */
-#define        KINFO_BSDI_SYSINFO      (101<<8)
+       MALLOC(name, char *, namelen + 1, M_TEMP, M_WAITOK);
+       if (!name) {
+               return ENOMEM;
+       }
 
-/*
- * XXX this is bloat, but I hope it's better here than on the potentially
- * limited kernel stack...  -Peter
- */
+       error = copyin(uap->name, name, namelen);
+       if (error) {
+               FREE(name, M_TEMP);
+               return error;
+       }
+       name[namelen] = '\0';
 
-static struct {
-       int     bsdi_machine;           /* "i386" on BSD/386 */
-/*      ^^^ this is an offset to the string, relative to the struct start */
-       char    *pad0;
-       long    pad1;
-       long    pad2;
-       long    pad3;
-       u_long  pad4;
-       u_long  pad5;
-       u_long  pad6;
-
-       int     bsdi_ostype;            /* "BSD/386" on BSD/386 */
-       int     bsdi_osrelease;         /* "1.1" on BSD/386 */
-       long    pad7;
-       long    pad8;
-       char    *pad9;
-
-       long    pad10;
-       long    pad11;
-       int     pad12;
-       long    pad13;
-       quad_t  pad14;
-       long    pad15;
-
-       struct  timeval pad16;
-       /* we dont set this, because BSDI's uname used gethostname() instead */
-       int     bsdi_hostname;          /* hostname on BSD/386 */
-
-       /* the actual string data is appended here */
-
-} bsdi_si;
-/*
- * this data is appended to the end of the bsdi_si structure during copyout.
- * The "char *" offsets are relative to the base of the bsdi_si struct.
- * This contains "FreeBSD\02.0-BUILT-nnnnnn\0i386\0", and these strings
- * should not exceed the length of the buffer here... (or else!! :-)
- */
-static char bsdi_strings[80];  /* It had better be less than this! */
-
-#ifndef _SYS_SYSPROTO_H_
-struct getkerninfo_args {
-       int     op;
-       char    *where;
-       size_t  *size;
-       int     arg;
-};
-#endif
+       /* XXX
+        * AUDIT_ARG(ctlname, name, uap->namelen);
+        */
 
-int
-ogetkerninfo(struct proc *p, struct getkerninfo_args *uap)
-{
-       int error, name[6];
-       size_t size;
-
-       switch (uap->op & 0xff00) {
-
-       case KINFO_RT:
-               name[0] = CTL_NET;
-               name[1] = PF_ROUTE;
-               name[2] = 0;
-               name[3] = (uap->op & 0xff0000) >> 16;
-               name[4] = uap->op & 0xff;
-               name[5] = uap->arg;
-               error = userland_sysctl(p, name, 6, uap->where, uap->size,
-                       0, 0, 0, &size);
-               break;
-
-       case KINFO_VNODE:
-               name[0] = CTL_KERN;
-               name[1] = KERN_VNODE;
-               error = userland_sysctl(p, name, 2, uap->where, uap->size,
-                       0, 0, 0, &size);
-               break;
-
-       case KINFO_PROC:
-               name[0] = CTL_KERN;
-               name[1] = KERN_PROC;
-               name[2] = uap->op & 0xff;
-               name[3] = uap->arg;
-               error = userland_sysctl(p, name, 4, uap->where, uap->size,
-                       0, 0, 0, &size);
-               break;
-
-       case KINFO_FILE:
-               name[0] = CTL_KERN;
-               name[1] = KERN_FILE;
-               error = userland_sysctl(p, name, 2, uap->where, uap->size,
-                       0, 0, 0, &size);
-               break;
-
-       case KINFO_METER:
-               name[0] = CTL_VM;
-               name[1] = VM_METER;
-               error = userland_sysctl(p, name, 2, uap->where, uap->size,
-                       0, 0, 0, &size);
-               break;
-
-       case KINFO_LOADAVG:
-               name[0] = CTL_VM;
-               name[1] = VM_LOADAVG;
-               error = userland_sysctl(p, name, 2, uap->where, uap->size,
-                       0, 0, 0, &size);
-               break;
-
-       case KINFO_CLOCKRATE:
-               name[0] = CTL_KERN;
-               name[1] = KERN_CLOCKRATE;
-               error = userland_sysctl(p, name, 2, uap->where, uap->size,
-                       0, 0, 0, &size);
-               break;
-
-       case KINFO_BSDI_SYSINFO: {
-               /*
-                * this is pretty crude, but it's just enough for uname()
-                * from BSDI's 1.x libc to work.
-                *
-                * In particular, it doesn't return the same results when
-                * the supplied buffer is too small.  BSDI's version apparently
-                * will return the amount copied, and set the *size to how
-                * much was needed.  The emulation framework here isn't capable
-                * of that, so we just set both to the amount copied.
-                * BSDI's 2.x product apparently fails with ENOMEM in this
-                * scenario.
-                */
+       if (uap->newlen > SIZE_T_MAX) {
+               FREE(name, M_TEMP);
+               return EINVAL;
+       }
+       newlen = (size_t)uap->newlen;
 
-               u_int needed;
-               u_int left;
-               char *s;
+       if (uap->oldlenp != USER_ADDR_NULL) {
+               uint64_t        oldlen64 = fuulong(uap->oldlenp);
 
-               bzero((char *)&bsdi_si, sizeof(bsdi_si));
-               bzero(bsdi_strings, sizeof(bsdi_strings));
+               /*
+                * If more than 4G, clamp to 4G
+                */
+               if (oldlen64 > SIZE_T_MAX) {
+                       oldlen = SIZE_T_MAX;
+               } else {
+                       oldlen = (size_t)oldlen64;
+               }
+       }
 
-               s = bsdi_strings;
+       sysctl_create_user_req(&req, p, uap->old, oldlen, uap->new, newlen);
 
-               bsdi_si.bsdi_ostype = (s - bsdi_strings) + sizeof(bsdi_si);
-               strcpy(s, ostype);
-               s += strlen(s) + 1;
+       error = userland_sysctl(TRUE, name, namelen + 1, oid, CTL_MAXNAME, &req, &oldlen);
 
-               bsdi_si.bsdi_osrelease = (s - bsdi_strings) + sizeof(bsdi_si);
-               strcpy(s, osrelease);
-               s += strlen(s) + 1;
+       FREE(name, M_TEMP);
 
-               bsdi_si.bsdi_machine = (s - bsdi_strings) + sizeof(bsdi_si);
-               strcpy(s, machine);
-               s += strlen(s) + 1;
+       if ((error) && (error != ENOMEM)) {
+               return error;
+       }
 
-               needed = sizeof(bsdi_si) + (s - bsdi_strings);
+       if (uap->oldlenp != USER_ADDR_NULL) {
+               /*
+                * Only overwrite the old error value on a new error
+                */
+               new_error = suulong(uap->oldlenp, oldlen);
 
-               if (uap->where == NULL) {
-                       /* process is asking how much buffer to supply.. */
-                       size = needed;
-                       error = 0;
-                       break;
+               if (new_error) {
+                       error = new_error;
                }
+       }
 
+       return error;
+}
 
-               /* if too much buffer supplied, trim it down */
-               if (size > needed)
-                       size = needed;
+/*
+ * This is used from various compatibility syscalls too.  That's why name
+ * must be in kernel space.
+ */
+int
+userland_sysctl(boolean_t string_is_canonical,
+    char *namestring, size_t namestringlen,
+    int *name, u_int namelen, struct sysctl_req *req,
+    size_t *retval)
+{
+       int error = 0;
+       struct sysctl_req req2;
 
-               /* how much of the buffer is remaining */
-               left = size;
+       do {
+               /* if EAGAIN, reset output cursor */
+               req2 = *req;
+               if (!string_is_canonical) {
+                       namestring[0] = '\0';
+               }
 
-               if ((error = copyout((char *)&bsdi_si, uap->where, left)) != 0)
-                       break;
+               error = sysctl_root(FALSE, string_is_canonical, namestring, namestringlen, name, namelen, &req2);
+       } while (error == EAGAIN);
 
-               /* is there any point in continuing? */
-               if (left > sizeof(bsdi_si)) {
-                       left -= sizeof(bsdi_si);
-                       error = copyout(&bsdi_strings,
-                                       uap->where + sizeof(bsdi_si), left);
-               }
-               break;
+       if (error && error != ENOMEM) {
+               return error;
        }
 
-       default:
-               return (EOPNOTSUPP);
+       if (retval) {
+               if (req2.oldptr && req2.oldidx > req2.oldlen) {
+                       *retval = req2.oldlen;
+               } else {
+                       *retval = req2.oldidx;
+               }
        }
-       if (error)
-               return (error);
-       p->p_retval[0] = size;
-       if (uap->size)
-               error = copyout((caddr_t)&size, (caddr_t)uap->size,
-                   sizeof(size));
-       return (error);
+       return error;
 }
-#endif /* COMPAT_43 */
 
-#endif
+/*
+ * Kernel versions of the userland sysctl helper functions.
+ *
+ * These allow sysctl to be used in the same fashion in both
+ * userland and the kernel.
+ *
+ * Note that some sysctl handlers use copyin/copyout, which
+ * may not work correctly.
+ *
+ * The "sysctlbyname" KPI for use by kexts is aliased to this function.
+ */
+
+int
+kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+{
+       int oid[CTL_MAXNAME];
+       int name2mib_oid[2];
+       int error;
+       size_t oidlen;
+
+       /* look up the OID with magic service node */
+       name2mib_oid[0] = 0;
+       name2mib_oid[1] = 3;
+
+       oidlen = sizeof(oid);
+       error = kernel_sysctl(current_proc(), name2mib_oid, 2, oid, &oidlen, __DECONST(void *, name), strlen(name));
+       oidlen /= sizeof(int);
+
+       /* now use the OID */
+       if (error == 0) {
+               error = kernel_sysctl(current_proc(), oid, oidlen, oldp, oldlenp, newp, newlen);
+       }
+       return error;
+}