]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/uipc_domain.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / bsd / kern / uipc_domain.c
index 05249011d17ddc0e1a91b3ac0d9adfa208104c21..7fde6ee3e75aa242c3a0fab1839b14b098ead5b9 100644 (file)
@@ -1,33 +1,30 @@
 /*
- * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
- * 
- * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code 
- * as defined in and that are subject to the Apple Public Source License 
- * Version 2.0 (the 'License'). You may not use this file except in 
- * compliance with the License.  The rights granted to you under the 
- * License may not be used to create, or enable the creation or 
- * redistribution of, unlawful or unlicensed copies of an Apple operating 
- * system, or to circumvent, violate, or enable the circumvention or 
- * violation of, any terms of an Apple operating system software license 
- * agreement.
+ * Copyright (c) 1998-2013 Apple Inc. All rights reserved.
  *
- * Please obtain a copy of the License at 
- * http://www.opensource.apple.com/apsl/ and read it before using this 
- * file.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
- * The Original Code and all software distributed under the License are 
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
- * Please see the License for the specific language governing rights and 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
  * limitations under the License.
  *
- * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
 /*
  * Copyright (c) 1982, 1986, 1993
@@ -68,6 +65,7 @@
 #include <sys/socket.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
+#include <sys/mcache.h>
 #include <sys/mbuf.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
 
-void   pffasttimo(void *);
-void   pfslowtimo(void *);
+#include <net/dlil.h>
 
-/*
- * Add/delete 'domain': Link structure into system list,
- *  invoke the domain init, and then the proto inits.
- * To delete, just remove from the list (dom_refs must be zero)
- */
+#include <mach/boolean.h>
+#include <pexpert/pexpert.h>
+
+static void pr_init_old(struct protosw *, struct domain *);
+static void init_proto(struct protosw *, struct domain *);
+static void attach_proto(struct protosw *, struct domain *);
+static void detach_proto(struct protosw *, struct domain *);
+static void dom_init_old(struct domain *);
+static void init_domain(struct domain *);
+static void attach_domain(struct domain *);
+static void detach_domain(struct domain *);
+static struct protosw *pffindprotonotype_locked(int, int, int);
+static struct domain *pffinddomain_locked(int);
 
-lck_grp_t              *domain_proto_mtx_grp;
+static boolean_t domain_timeout_run;   /* domain timer is scheduled to run */
+static boolean_t domain_draining;
+static void domain_sched_timeout(void);
+static void domain_timeout(void *);
+
+lck_grp_t      *domain_proto_mtx_grp;
 lck_attr_t     *domain_proto_mtx_attr;
 static lck_grp_attr_t  *domain_proto_mtx_grp_attr;
-lck_mtx_t              *domain_proto_mtx;
-extern int             do_reclaim;
+decl_lck_mtx_data(static, domain_proto_mtx);
+decl_lck_mtx_data(static, domain_timeout_mtx);
+
+static u_int64_t _net_uptime;
+
+#if (DEVELOPMENT || DEBUG)
+
+SYSCTL_DECL(_kern_ipc);
+
+static int sysctl_do_drain_domains SYSCTL_HANDLER_ARGS;
 
-void init_domain(register struct domain *dp)
+SYSCTL_PROC(_kern_ipc, OID_AUTO, do_drain_domains,
+       CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED,
+       0, 0,
+       sysctl_do_drain_domains, "I", "force manual drain domains");
+
+#endif /* DEVELOPMENT || DEBUG */
+
+static void
+pr_init_old(struct protosw *pp, struct domain *dp)
+{
+#pragma unused(dp)
+       VERIFY(pp->pr_flags & PR_OLD);
+       VERIFY(pp->pr_old != NULL);
+
+       if (pp->pr_old->pr_init != NULL)
+               pp->pr_old->pr_init();
+}
+
+static void
+init_proto(struct protosw *pp, struct domain *dp)
 {
-       struct protosw  *pr;
-       
-       if ((dp->dom_mtx = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr)) == NULL) {
-               printf("init_domain: can't init domain mtx for domain=%s\n", dp->dom_name);
-               return; /* we have a problem... */
+       VERIFY(pp->pr_flags & PR_ATTACHED);
+
+       if (!(pp->pr_flags & PR_INITIALIZED)) {
+               TAILQ_INIT(&pp->pr_filter_head);
+               if (pp->pr_init != NULL)
+                       pp->pr_init(pp, dp);
+               pp->pr_flags |= PR_INITIALIZED;
        }
+}
 
-       if (dp->dom_init)
-               (*dp->dom_init)();
+static void
+attach_proto(struct protosw *pp, struct domain *dp)
+{
+       domain_proto_mtx_lock_assert_held();
+       VERIFY(!(pp->pr_flags & PR_ATTACHED));
+       VERIFY(pp->pr_domain == NULL);
+       VERIFY(pp->pr_protosw == NULL);
 
-       /* and then init the currently installed protos in this domain */
+       TAILQ_INSERT_TAIL(&dp->dom_protosw, pp, pr_entry);
+       pp->pr_flags |= PR_ATTACHED;
+       pp->pr_domain = dp;
+       pp->pr_protosw = pp;
 
-       for (pr = dp->dom_protosw; pr; pr = pr->pr_next) {
-               if (pr->pr_usrreqs == 0)
-                       panic("domaininit: %ssw[%d] has no usrreqs!",
-                             dp->dom_name, 
-                             (int)(pr - dp->dom_protosw));
+       /* do some cleaning up on user request callbacks */
+       pru_sanitize(pp->pr_usrreqs);
+}
 
-               if (pr->pr_init)
-                       (*pr->pr_init)();
+static void
+detach_proto(struct protosw *pp, struct domain *dp)
+{
+       domain_proto_mtx_lock_assert_held();
+       VERIFY(pp->pr_flags & PR_ATTACHED);
+       VERIFY(pp->pr_domain == dp);
+       VERIFY(pp->pr_protosw == pp);
+
+       TAILQ_REMOVE(&dp->dom_protosw, pp, pr_entry);
+       pp->pr_flags &= ~PR_ATTACHED;
+       pp->pr_domain = NULL;
+       pp->pr_protosw = NULL;
+}
+
+static void
+dom_init_old(struct domain *dp)
+{
+       VERIFY(dp->dom_flags & DOM_OLD);
+       VERIFY(dp->dom_old != NULL);
+
+       if (dp->dom_old->dom_init != NULL)
+               dp->dom_old->dom_init();
+}
+
+static void
+init_domain(struct domain *dp)
+{
+       VERIFY(dp->dom_flags & DOM_ATTACHED);
+
+       if (!(dp->dom_flags & DOM_INITIALIZED)) {
+               lck_mtx_init(&dp->dom_mtx_s, domain_proto_mtx_grp,
+                   domain_proto_mtx_attr);
+               dp->dom_mtx = &dp->dom_mtx_s;
+               TAILQ_INIT(&dp->dom_protosw);
+               if (dp->dom_init != NULL)
+                       dp->dom_init(dp);
+               dp->dom_flags |= DOM_INITIALIZED;
        }
 
        /* Recompute for new protocol */
-       if (max_linkhdr < 16)           /* XXX - Sheesh; everything's ether? */
-               max_linkhdr = 16;
-       if (dp->dom_protohdrlen > max_protohdr)
-               max_protohdr = dp->dom_protohdrlen;
+       if (_max_linkhdr < 16)          /* XXX - Sheesh; everything's ether? */
+               _max_linkhdr = 16;
+       _max_linkhdr = max_linkhdr;     /* round it up */
+
+       if (dp->dom_protohdrlen > _max_protohdr)
+               _max_protohdr = dp->dom_protohdrlen;
+       _max_protohdr = max_protohdr;   /* round it up */
+
        max_hdr = max_linkhdr + max_protohdr;
        max_datalen = MHLEN - max_hdr;
 }
 
-void   concat_domain(struct domain *dp) 
+static void
+attach_domain(struct domain *dp)
 {
-       lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
-       dp->dom_next = domains; 
-       domains = dp; 
+       domain_proto_mtx_lock_assert_held();
+       VERIFY(!(dp->dom_flags & DOM_ATTACHED));
+
+       TAILQ_INSERT_TAIL(&domains, dp, dom_entry);
+       dp->dom_flags |= DOM_ATTACHED;
+}
+
+static void
+detach_domain(struct domain *dp)
+{
+       domain_proto_mtx_lock_assert_held();
+       VERIFY(dp->dom_flags & DOM_ATTACHED);
+
+       TAILQ_REMOVE(&domains, dp, dom_entry);
+       dp->dom_flags &= ~DOM_ATTACHED;
+
+       if (dp->dom_flags & DOM_OLD) {
+               struct domain_old *odp = dp->dom_old;
+
+               VERIFY(odp != NULL);
+               odp->dom_next = NULL;
+               odp->dom_mtx = NULL;
+       }
 }
 
+/*
+ * Exported (private) routine, indirection of net_add_domain.
+ */
 void
-net_add_domain(register struct domain *dp)
-{      register struct protosw *pr;
+net_add_domain_old(struct domain_old *odp)
+{
+       struct domain *dp;
+       domain_guard_t guard;
+
+       VERIFY(odp != NULL);
+
+       guard = domain_guard_deploy();
+       if ((dp = pffinddomain_locked(odp->dom_family)) != NULL) {
+               /*
+                * There is really nothing better than to panic here,
+                * as the caller would not have been able to handle
+                * any failures otherwise.
+                */
+               panic("%s: domain (%d,%s) already exists for %s\n", __func__,
+                   dp->dom_family, dp->dom_name, odp->dom_name);
+               /* NOTREACHED */
+       }
+
+       /* Make sure nothing is currently pointing to the odp. */
+       TAILQ_FOREACH(dp, &domains, dom_entry) {
+               if (dp->dom_old == odp) {
+                       panic("%s: domain %p (%d,%s) is already "
+                           "associated with %p (%d,%s)\n", __func__,
+                           odp, odp->dom_family, odp->dom_name, dp,
+                           dp->dom_family, dp->dom_name);
+                       /* NOTREACHED */
+               }
+       }
 
-       kprintf("Adding domain %s (family %d)\n", dp->dom_name,
-               dp->dom_family);
-       /* First, link in the domain */
+       if (odp->dom_protosw != NULL) {
+               panic("%s: domain (%d,%s) protocols need to added "
+                   "via net_add_proto\n", __func__, odp->dom_family,
+                   odp->dom_name);
+               /* NOTREACHED */
+       }
 
-       lck_mtx_lock(domain_proto_mtx);
-       concat_domain(dp);
+       dp = _MALLOC(sizeof (*dp), M_TEMP, M_WAITOK | M_ZERO);
+       if (dp == NULL) {
+               /*
+                * There is really nothing better than to panic here,
+                * as the caller would not have been able to handle
+                * any failures otherwise.
+                */
+               panic("%s: unable to allocate memory for domain family "
+                   "%d (%s)\n", __func__, odp->dom_family, odp->dom_name);
+               /* NOTREACHED */
+       }
 
+       /* Copy everything but dom_init, dom_mtx, dom_next and dom_refs */
+       dp->dom_family          = odp->dom_family;
+       dp->dom_flags           = (odp->dom_flags & DOMF_USERFLAGS) | DOM_OLD;
+       dp->dom_name            = odp->dom_name;
+       dp->dom_init            = dom_init_old;
+       dp->dom_externalize     = odp->dom_externalize;
+       dp->dom_dispose         = odp->dom_dispose;
+       dp->dom_rtattach        = odp->dom_rtattach;
+       dp->dom_rtoffset        = odp->dom_rtoffset;
+       dp->dom_maxrtkey        = odp->dom_maxrtkey;
+       dp->dom_protohdrlen     = odp->dom_protohdrlen;
+       dp->dom_old             = odp;
+
+       attach_domain(dp);
        init_domain(dp);
-       lck_mtx_unlock(domain_proto_mtx);
 
+       /* Point the mutex back to the internal structure's */
+       odp->dom_mtx            = dp->dom_mtx;
+       domain_guard_release(guard);
 }
 
+/*
+ * Exported (private) routine, indirection of net_del_domain.
+ */
 int
-net_del_domain(register struct domain *dp)
-{      register struct domain *dp1, *dp2;
-       register int retval = 0;
+net_del_domain_old(struct domain_old *odp)
+{
+       struct domain *dp1, *dp2;
+       int error = 0;
+       domain_guard_t guard;
 
-       lck_mtx_lock(domain_proto_mtx);
-       if (dp->dom_refs) {
-               lck_mtx_unlock(domain_proto_mtx);
-               return(EBUSY);
-     }
+       VERIFY(odp != NULL);
+
+       guard = domain_guard_deploy();
+       if (odp->dom_refs != 0) {
+               error = EBUSY;
+               goto done;
+       }
 
-       for (dp2 = NULL, dp1 = domains; dp1; dp2 = dp1, dp1 = dp1->dom_next)
-       {       if (dp == dp1)
+       TAILQ_FOREACH_SAFE(dp1, &domains, dom_entry, dp2) {
+               if (!(dp1->dom_flags & DOM_OLD))
+                       continue;
+               VERIFY(dp1->dom_old != NULL);
+               if (odp == dp1->dom_old)
                        break;
        }
-       if (dp1)
-       {       if (dp2)
-                       dp2->dom_next = dp1->dom_next;
-               else
-                       domains = dp1->dom_next;
-       } else
-               retval = EPFNOSUPPORT;
-       lck_mtx_unlock(domain_proto_mtx);
+       if (dp1 != NULL) {
+               struct protosw *pp1, *pp2;
 
-       return(retval);
+               VERIFY(dp1->dom_flags & DOM_OLD);
+               VERIFY(dp1->dom_old == odp);
+
+               /* Remove all protocols attached to this domain */
+               TAILQ_FOREACH_SAFE(pp1, &dp1->dom_protosw, pr_entry, pp2) {
+                       detach_proto(pp1, dp1);
+                       if (pp1->pr_usrreqs->pru_flags & PRUF_OLD)
+                               FREE(pp1->pr_usrreqs, M_TEMP);
+                       if (pp1->pr_flags & PR_OLD)
+                               FREE(pp1, M_TEMP);
+               }
+
+               detach_domain(dp1);
+               FREE(dp1, M_TEMP);
+       } else {
+               error = EPFNOSUPPORT;
+       }
+done:
+       domain_guard_release(guard);
+       return (error);
 }
 
 /*
+ * Internal routine, not exported.
+ *
  * net_add_proto - link a protosw into a domain's protosw chain
- * 
- * note: protocols must use their own domain lock before calling net_add_proto
+ *
+ * NOTE: Caller must have acquired domain_proto_mtx
  */
 int
-net_add_proto(register struct protosw *pp,
-             register struct domain *dp)
-{      register struct protosw *pp1, *pp2;
-
-       for (pp2 = NULL, pp1 = dp->dom_protosw; pp1; pp1 = pp1->pr_next)
-       {       if (pp1->pr_type == pp->pr_type &&
-                   pp1->pr_protocol == pp->pr_protocol) {
-                       return(EEXIST);
+net_add_proto(struct protosw *pp, struct domain *dp, int doinit)
+{
+       struct protosw *pp1;
+
+       /*
+        * This could be called as part of initializing the domain,
+        * and thus DOM_INITIALIZED may not be set (yet).
+        */
+       domain_proto_mtx_lock_assert_held();
+       VERIFY(!(pp->pr_flags & PR_ATTACHED));
+
+       /* pr_domain is set only after the protocol is attached */
+       if (pp->pr_domain != NULL) {
+               panic("%s: domain (%d,%s), proto %d has non-NULL pr_domain!\n",
+                   __func__, dp->dom_family, dp->dom_name, pp->pr_protocol);
+               /* NOTREACHED */
+       }
+
+       if (pp->pr_usrreqs == NULL) {
+               panic("%s: domain (%d,%s), proto %d has no usrreqs!\n",
+                   __func__, dp->dom_family, dp->dom_name, pp->pr_protocol);
+               /* NOTREACHED */
+       }
+
+       TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) {
+               if (pp1->pr_type == pp->pr_type &&
+                   pp1->pr_protocol == pp->pr_protocol)
+                       return (EEXIST);
+       }
+
+       attach_proto(pp, dp);
+       if (doinit)
+               net_init_proto(pp, dp);
+
+       return (0);
+}
+
+void
+net_init_proto(struct protosw *pp, struct domain *dp)
+{
+       /*
+        * This could be called as part of initializing the domain,
+        * and thus DOM_INITIALIZED may not be set (yet).  The protocol
+        * must have been attached via net_addr_protosw() by now.
+        */
+       domain_proto_mtx_lock_assert_held();
+       VERIFY(pp->pr_flags & PR_ATTACHED);
+
+       init_proto(pp, dp);
+}
+
+/*
+ * Exported (private) routine, indirection of net_add_proto.
+ */
+int
+net_add_proto_old(struct protosw_old *opp, struct domain_old *odp)
+{
+       struct pr_usrreqs_old *opru;
+       struct pr_usrreqs *pru = NULL;
+       struct protosw *pp = NULL, *pp1;
+       int error = 0;
+       struct domain *dp;
+       domain_guard_t guard;
+
+       /*
+        * This could be called as part of initializing the domain,
+        * and thus DOM_INITIALIZED may not be set (yet).
+        */
+       guard = domain_guard_deploy();
+
+       /* Make sure the domain has been added via net_add_domain */
+       TAILQ_FOREACH(dp, &domains, dom_entry) {
+               if (!(dp->dom_flags & DOM_OLD))
+                       continue;
+               if (dp->dom_old == odp)
+                       break;
+       }
+       if (dp == NULL) {
+               error = EINVAL;
+               goto done;
+       }
+
+       TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) {
+               if (pp1->pr_type == opp->pr_type &&
+                   pp1->pr_protocol == opp->pr_protocol) {
+                       error = EEXIST;
+                       goto done;
                }
-               pp2 = pp1;
        }
-       if (pp2 == NULL)
-               dp->dom_protosw = pp;
-       else
-               pp2->pr_next = pp;
-       pp->pr_next = NULL;
-       TAILQ_INIT(&pp->pr_filter_head);
-       if (pp->pr_init)
-               (*pp->pr_init)();
 
-       /* Make sure pr_init isn't called again!! */
-       pp->pr_init = 0;
-       return(0);
+       if ((opru = opp->pr_usrreqs) == NULL) {
+               panic("%s: domain (%d,%s), proto %d has no usrreqs!\n",
+                   __func__, odp->dom_family, odp->dom_name, opp->pr_protocol);
+               /* NOTREACHED */
+       }
+
+       pru = _MALLOC(sizeof (*pru), M_TEMP, M_WAITOK | M_ZERO);
+       if (pru == NULL) {
+               error = ENOMEM;
+               goto done;
+       }
+
+       pru->pru_flags          = PRUF_OLD;
+       pru->pru_abort          = opru->pru_abort;
+       pru->pru_accept         = opru->pru_accept;
+       pru->pru_attach         = opru->pru_attach;
+       pru->pru_bind           = opru->pru_bind;
+       pru->pru_connect        = opru->pru_connect;
+       pru->pru_connect2       = opru->pru_connect2;
+       pru->pru_control        = opru->pru_control;
+       pru->pru_detach         = opru->pru_detach;
+       pru->pru_disconnect     = opru->pru_disconnect;
+       pru->pru_listen         = opru->pru_listen;
+       pru->pru_peeraddr       = opru->pru_peeraddr;
+       pru->pru_rcvd           = opru->pru_rcvd;
+       pru->pru_rcvoob         = opru->pru_rcvoob;
+       pru->pru_send           = opru->pru_send;
+       pru->pru_sense          = opru->pru_sense;
+       pru->pru_shutdown       = opru->pru_shutdown;
+       pru->pru_sockaddr       = opru->pru_sockaddr;
+       pru->pru_sosend         = opru->pru_sosend;
+       pru->pru_soreceive      = opru->pru_soreceive;
+       pru->pru_sopoll         = opru->pru_sopoll;
+
+       pp = _MALLOC(sizeof (*pp), M_TEMP, M_WAITOK | M_ZERO);
+       if (pp == NULL) {
+               error = ENOMEM;
+               goto done;
+       }
+
+       /*
+        * Protocol fast and slow timers are now deprecated.
+        */
+       if (opp->pr_unused != NULL) {
+               printf("%s: domain (%d,%s), proto %d: pr_fasttimo is "
+                   "deprecated and won't be called\n", __func__,
+                   odp->dom_family, odp->dom_name, opp->pr_protocol);
+       }
+       if (opp->pr_unused2 != NULL) {
+               printf("%s: domain (%d,%s), proto %d: pr_slowtimo is "
+                   "deprecated and won't be called\n", __func__,
+                   odp->dom_family, odp->dom_name, opp->pr_protocol);
+       }
+
+       /* Copy everything but pr_init, pr_next, pr_domain, pr_protosw */
+       pp->pr_type             = opp->pr_type;
+       pp->pr_protocol         = opp->pr_protocol;
+       pp->pr_flags            = (opp->pr_flags & PRF_USERFLAGS) | PR_OLD;
+       pp->pr_input            = opp->pr_input;
+       pp->pr_output           = opp->pr_output;
+       pp->pr_ctlinput         = opp->pr_ctlinput;
+       pp->pr_ctloutput        = opp->pr_ctloutput;
+       pp->pr_usrreqs          = pru;
+       pp->pr_init             = pr_init_old;
+       pp->pr_drain            = opp->pr_drain;
+       pp->pr_sysctl           = opp->pr_sysctl;
+       pp->pr_lock             = opp->pr_lock;
+       pp->pr_unlock           = opp->pr_unlock;
+       pp->pr_getlock          = opp->pr_getlock;
+       pp->pr_old              = opp;
+
+       /* attach as well as initialize */
+       attach_proto(pp, dp);
+       net_init_proto(pp, dp);
+done:
+       if (error != 0) {
+               printf("%s: domain (%d,%s), proto %d: failed to attach, "
+                   "error %d\n", __func__, odp->dom_family,
+                   odp->dom_name, opp->pr_protocol, error);
+
+               if (pru != NULL)
+                       FREE(pru, M_TEMP);
+               if (pp != NULL)
+                       FREE(pp, M_TEMP);
+       }
+
+       domain_guard_release(guard);
+       return (error);
 }
 
 /*
+ * Internal routine, not exported.
+ *
  * net_del_proto - remove a protosw from a domain's protosw chain.
  * Search the protosw chain for the element with matching data.
  * Then unlink and return.
  *
- * note: protocols must use their own domain lock before calling net_del_proto
+ * NOTE: Caller must have acquired domain_proto_mtx
  */
 int
-net_del_proto(register int type,
-             register int protocol,
-             register struct domain *dp)
-{      register struct protosw *pp1, *pp2;
-
-       for (pp2 = NULL, pp1 = dp->dom_protosw; pp1; pp1 = pp1->pr_next)
-       {       if (pp1->pr_type == type &&
-                   pp1->pr_protocol == protocol)
+net_del_proto(int type, int protocol, struct domain *dp)
+{
+       struct protosw *pp;
+
+       /*
+        * This could be called as part of initializing the domain,
+        * and thus DOM_INITIALIZED may not be set (yet).
+        */
+       domain_proto_mtx_lock_assert_held();
+
+       TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
+               if (pp->pr_type == type && pp->pr_protocol == protocol)
                        break;
-               pp2 = pp1;
        }
-        if (pp1 == NULL) {
-                       return(ENXIO);
-               }
-       if (pp2)
-               pp2->pr_next = pp1->pr_next;
-       else
-               dp->dom_protosw = pp1->pr_next;
-       return(0);
+       if (pp == NULL)
+               return (ENXIO);
+
+       detach_proto(pp, dp);
+       if (pp->pr_usrreqs->pru_flags & PRUF_OLD)
+               FREE(pp->pr_usrreqs, M_TEMP);
+       if (pp->pr_flags & PR_OLD)
+               FREE(pp, M_TEMP);
+
+       return (0);
+}
+
+/*
+ * Exported (private) routine, indirection of net_del_proto.
+ */
+int
+net_del_proto_old(int type, int protocol, struct domain_old *odp)
+{
+       int error = 0;
+       struct protosw *pp;
+       struct domain *dp;
+       domain_guard_t guard;
+
+       /*
+        * This could be called as part of initializing the domain,
+        * and thus DOM_INITIALIZED may not be set (yet).
+        */
+       guard = domain_guard_deploy();
+
+       /* Make sure the domain has been added via net_add_domain */
+       TAILQ_FOREACH(dp, &domains, dom_entry) {
+               if (!(dp->dom_flags & DOM_OLD))
+                       continue;
+               if (dp->dom_old == odp)
+                       break;
+       }
+       if (dp == NULL) {
+               error = ENXIO;
+               goto done;
+       }
+
+       TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
+               if (pp->pr_type == type && pp->pr_protocol == protocol)
+                       break;
+       }
+       if (pp == NULL) {
+               error = ENXIO;
+               goto done;
+       }
+       detach_proto(pp, dp);
+       if (pp->pr_usrreqs->pru_flags & PRUF_OLD)
+               FREE(pp->pr_usrreqs, M_TEMP);
+       if (pp->pr_flags & PR_OLD)
+               FREE(pp, M_TEMP);
+
+done:
+       domain_guard_release(guard);
+       return (error);
 }
 
+static void
+domain_sched_timeout(void)
+{
+       lck_mtx_assert(&domain_timeout_mtx, LCK_MTX_ASSERT_OWNED);
+
+       if (!domain_timeout_run && domain_draining) {
+               domain_timeout_run = TRUE;
+               timeout(domain_timeout, NULL, hz);
+       }
+}
 
 void
-domaininit()
-{      register struct domain *dp;
-       register struct protosw *pr;
-       extern struct domain localdomain, routedomain, ndrvdomain, inetdomain;
-       extern struct domain systemdomain;
-#if NS
-       extern struct domain nsdomain;
-#endif
-#if ISO
-       extern struct domain isodomain;
-#endif
-#if CCITT
-       extern struct domain ccittdomain;
-#endif
+net_drain_domains(void)
+{
+       lck_mtx_lock(&domain_timeout_mtx);
+       domain_draining = TRUE;
+       domain_sched_timeout();
+       lck_mtx_unlock(&domain_timeout_mtx);
+}
 
-#if NETAT
-       extern struct domain atalkdomain;
-#endif
 #if INET6
-       extern struct domain inet6domain;
+extern struct domain inet6domain_s;
 #endif
 #if IPSEC
-       extern struct domain keydomain;
+extern struct domain keydomain_s;
 #endif
 
+extern struct domain routedomain_s, ndrvdomain_s, inetdomain_s;
+extern struct domain systemdomain_s, localdomain_s;
+
+#if MULTIPATH
+extern struct domain mpdomain_s;
+#endif /* MULTIPATH */
+
+static void
+domain_timeout(void *arg)
+{
+#pragma unused(arg)
+       struct protosw *pp;
+       struct domain *dp;
+       domain_guard_t guard;
+
+       lck_mtx_lock(&domain_timeout_mtx);
+       if (domain_draining) {
+               domain_draining = FALSE;
+               lck_mtx_unlock(&domain_timeout_mtx);
+
+               guard = domain_guard_deploy();
+               TAILQ_FOREACH(dp, &domains, dom_entry) {
+                       TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
+                               if (pp->pr_drain != NULL)
+                                       (*pp->pr_drain)();
+                       }
+               }
+               domain_guard_release(guard);
+
+               lck_mtx_lock(&domain_timeout_mtx);
+       }
+
+       /* re-arm the timer if there's work to do */
+       domain_timeout_run = FALSE;
+       domain_sched_timeout();
+       lck_mtx_unlock(&domain_timeout_mtx);
+}
+
+void
+domaininit(void)
+{
+       struct domain *dp;
+       domain_guard_t guard;
+
        /*
         * allocate lock group attribute and group for domain mutexes
         */
        domain_proto_mtx_grp_attr = lck_grp_attr_alloc_init();
-       lck_grp_attr_setdefault(domain_proto_mtx_grp_attr);
 
-       domain_proto_mtx_grp = lck_grp_alloc_init("domain", domain_proto_mtx_grp_attr);
-               
+       domain_proto_mtx_grp = lck_grp_alloc_init("domain",
+           domain_proto_mtx_grp_attr);
+
        /*
         * allocate the lock attribute for per domain mutexes
         */
        domain_proto_mtx_attr = lck_attr_alloc_init();
-       lck_attr_setdefault(domain_proto_mtx_attr);
 
-       if ((domain_proto_mtx = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr)) == NULL) {
-               printf("domaininit: can't init domain mtx for domain list\n");
-               return; /* we have a problem... */
-       }
+       lck_mtx_init(&domain_proto_mtx, domain_proto_mtx_grp,
+           domain_proto_mtx_attr);
+       lck_mtx_init(&domain_timeout_mtx, domain_proto_mtx_grp,
+           domain_proto_mtx_attr);
+
+       guard = domain_guard_deploy();
        /*
-        * Add all the static domains to the domains list
+        * Add all the static domains to the domains list.  route domain
+        * gets added and initialized last, since we need it to attach
+        * rt_tables[] to everything that's already there.  This also
+        * means that domains added after this point won't get their
+        * dom_rtattach() called on rt_tables[].
         */
-
-       lck_mtx_lock(domain_proto_mtx);
-
-       concat_domain(&localdomain);
-       concat_domain(&routedomain);
-       concat_domain(&inetdomain);
-#if NETAT
-       concat_domain(&atalkdomain);
-#endif
+       attach_domain(&inetdomain_s);
 #if INET6
-       concat_domain(&inet6domain);
-#endif
+       attach_domain(&inet6domain_s);
+#endif /* INET6 */
+#if MULTIPATH
+       attach_domain(&mpdomain_s);
+#endif /* MULTIPATH */
+       attach_domain(&systemdomain_s);
+       attach_domain(&localdomain_s);
 #if IPSEC
-       concat_domain(&keydomain);
-#endif
-
-#if NS
-       concat_domain(&nsdomain);
-#endif
-#if ISO
-       concat_domain(&isodomain);
-#endif
-#if CCITT
-       concat_domain(&ccittdomain);
-#endif
-       concat_domain(&ndrvdomain);
-
-       concat_domain(&systemdomain);
+       attach_domain(&keydomain_s);
+#endif /* IPSEC */
+       attach_domain(&ndrvdomain_s);
+       attach_domain(&routedomain_s);  /* must be last domain */
 
        /*
         * Now ask them all to init (XXX including the routing domain,
         * see above)
         */
-       for (dp = domains; dp; dp = dp->dom_next)
+       TAILQ_FOREACH(dp, &domains, dom_entry)
                init_domain(dp);
 
-       lck_mtx_unlock(domain_proto_mtx);
-       timeout(pffasttimo, NULL, 1);
-       timeout(pfslowtimo, NULL, 1);
+       domain_guard_release(guard);
+}
+
+static __inline__ struct domain *
+pffinddomain_locked(int pf)
+{
+       struct domain *dp;
+
+       domain_proto_mtx_lock_assert_held();
+
+       TAILQ_FOREACH(dp, &domains, dom_entry) {
+               if (dp->dom_family == pf)
+                       break;
+       }
+       return (dp);
 }
 
 struct protosw *
-pffindtype(family, type)
-       int family, type;
-{
-       register struct domain *dp;
-       register struct protosw *pr;
-
-       lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
-       lck_mtx_lock(domain_proto_mtx);
-       for (dp = domains; dp; dp = dp->dom_next)
-               if (dp->dom_family == family)
-                       goto found;
-       lck_mtx_unlock(domain_proto_mtx);
-       return (0);
-found:
-       for (pr = dp->dom_protosw; pr; pr = pr->pr_next)
-               if (pr->pr_type && pr->pr_type == type) {
-                       lck_mtx_unlock(domain_proto_mtx);
-                       return (pr);
-               }
-       lck_mtx_unlock(domain_proto_mtx);
-       return (0);
+pffindtype(int family, int type)
+{
+       struct protosw *pp = NULL;
+       struct domain *dp;
+       domain_guard_t guard;
+
+       guard = domain_guard_deploy();
+       if ((dp = pffinddomain_locked(family)) == NULL)
+               goto done;
+
+       TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
+               if (pp->pr_type != 0 && pp->pr_type == type)
+                       goto done;
+       }
+done:
+       domain_guard_release(guard);
+       return (pp);
 }
 
+/*
+ * Internal routine, not exported.
+ */
 struct domain *
 pffinddomain(int pf)
-{      struct domain *dp;
-
-       lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
-       lck_mtx_lock(domain_proto_mtx);
-       dp = domains;
-       while (dp)
-       {       if (dp->dom_family == pf) {
-                       lck_mtx_unlock(domain_proto_mtx);
-                       return(dp);
-               }
-               dp = dp->dom_next;
-       }
-       lck_mtx_unlock(domain_proto_mtx);
-       return(NULL);
+{
+       struct domain *dp;
+       domain_guard_t guard;
+
+       guard = domain_guard_deploy();
+       dp = pffinddomain_locked(pf);
+       domain_guard_release(guard);
+       return (dp);
+}
+
+/*
+ * Exported (private) routine, indirection of pffinddomain.
+ */
+struct domain_old *
+pffinddomain_old(int pf)
+{
+       struct domain_old *odp = NULL;
+       struct domain *dp;
+       domain_guard_t guard;
+
+       guard = domain_guard_deploy();
+       if ((dp = pffinddomain_locked(pf)) != NULL && (dp->dom_flags & DOM_OLD))
+               odp = dp->dom_old;
+       domain_guard_release(guard);
+       return (odp);
 }
 
+/*
+ * Internal routine, not exported.
+ */
 struct protosw *
-pffindproto(family, protocol, type)
-       int family, protocol, type;
+pffindproto(int family, int protocol, int type)
 {
-       register struct protosw *pr;
-       lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
-       lck_mtx_lock(domain_proto_mtx);
-       pr = pffindproto_locked(family, protocol, type);
-       lck_mtx_unlock(domain_proto_mtx);
-       return (pr);
+       struct protosw *pp;
+       domain_guard_t guard;
+
+       guard = domain_guard_deploy();
+       pp = pffindproto_locked(family, protocol, type);
+       domain_guard_release(guard);
+       return (pp);
 }
 
 struct protosw *
-pffindproto_locked(family, protocol, type)
-       int family, protocol, type;
+pffindproto_locked(int family, int protocol, int type)
 {
-       register struct domain *dp;
-       register struct protosw *pr;
-       struct protosw *maybe = 0;
+       struct protosw *maybe = NULL;
+       struct protosw *pp;
+       struct domain *dp;
+
+       domain_proto_mtx_lock_assert_held();
 
        if (family == 0)
                return (0);
-       for (dp = domains; dp; dp = dp->dom_next)
-               if (dp->dom_family == family)
-                       goto found;
-       return (0);
-found:
-       for (pr = dp->dom_protosw; pr; pr = pr->pr_next) {
-               if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
-                       return (pr);
 
-               if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
-                   pr->pr_protocol == 0 && maybe == (struct protosw *)0)
-                       maybe = pr;
+       dp = pffinddomain_locked(family);
+       if (dp == NULL)
+               return (NULL);
+
+       TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
+               if ((pp->pr_protocol == protocol) && (pp->pr_type == type))
+                       return (pp);
+
+               if (type == SOCK_RAW && pp->pr_type == SOCK_RAW &&
+                   pp->pr_protocol == 0 && maybe == NULL)
+                       maybe = pp;
        }
        return (maybe);
 }
 
-int
-net_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, 
-           user_addr_t newp, size_t newlen, struct proc *p)
+/*
+ * Exported (private) routine, indirection of pffindproto.
+ */
+struct protosw_old *
+pffindproto_old(int family, int protocol, int type)
 {
-       register struct domain *dp;
-       register struct protosw *pr;
-       int family, protocol, error;
+       struct protosw_old *opr = NULL;
+       struct protosw *pp;
+       domain_guard_t guard;
 
-       /*
-        * All sysctl names at this level are nonterminal;
-        * next two components are protocol family and protocol number,
-        * then at least one addition component.
-        */
-       if (namelen < 3)
-               return (EISDIR);                /* overloaded */
-       family = name[0];
-       protocol = name[1];
+       guard = domain_guard_deploy();
+       if ((pp = pffindproto_locked(family, protocol, type)) != NULL &&
+           (pp->pr_flags & PR_OLD))
+               opr = pp->pr_old;
+       domain_guard_release(guard);
+       return (opr);
+}
+
+static struct protosw *
+pffindprotonotype_locked(int family, int protocol, int type)
+{
+#pragma unused(type)
+       struct domain *dp;
+       struct protosw *pp;
+
+       domain_proto_mtx_lock_assert_held();
 
        if (family == 0)
                return (0);
-       lck_mtx_lock(domain_proto_mtx);
-       for (dp = domains; dp; dp = dp->dom_next)
-               if (dp->dom_family == family)
-                       goto found;
-       lck_mtx_unlock(domain_proto_mtx);
-       return (ENOPROTOOPT);
-found:
-       for (pr = dp->dom_protosw; pr; pr = pr->pr_next)
-               if (pr->pr_protocol == protocol && pr->pr_sysctl) {
-                       error = (*pr->pr_sysctl)(name + 2, namelen - 2,
-                           oldp, oldlenp, newp, newlen);
-                       lck_mtx_unlock(domain_proto_mtx);
-                       return (error);
-               }
-       lck_mtx_unlock(domain_proto_mtx);
-       return (ENOPROTOOPT);
+
+       dp = pffinddomain_locked(family);
+       if (dp == NULL)
+               return (NULL);
+
+       TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
+               if (pp->pr_protocol == protocol)
+                       return (pp);
+       }
+       return (NULL);
+}
+
+struct protosw *
+pffindprotonotype(int family, int protocol)
+{
+       struct protosw *pp;
+       domain_guard_t guard;
+
+       if (protocol == 0)
+               return (NULL);
+
+       guard = domain_guard_deploy();
+       pp = pffindprotonotype_locked(family, protocol, 0);
+       domain_guard_release(guard);
+       return (pp);
 }
 
 void
-pfctlinput(cmd, sa)
-       int cmd;
-       struct sockaddr *sa;
+pfctlinput(int cmd, struct sockaddr *sa)
 {
-       pfctlinput2(cmd, sa, (void*)0);
+       pfctlinput2(cmd, sa, NULL);
 }
 
 void
-pfctlinput2(cmd, sa, ctlparam)
-       int cmd;
-       struct sockaddr *sa;
-       void *ctlparam;
+pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam)
 {
        struct domain *dp;
-       struct protosw *pr;
+       struct protosw *pp;
+       domain_guard_t guard;
 
-       if (!sa)
+       if (sa == NULL)
                return;
 
-       lck_mtx_lock(domain_proto_mtx);
-       for (dp = domains; dp; dp = dp->dom_next)
-               for (pr = dp->dom_protosw; pr; pr = pr->pr_next)
-                       if (pr->pr_ctlinput)
-                               (*pr->pr_ctlinput)(cmd, sa, ctlparam);
-       lck_mtx_unlock(domain_proto_mtx);
+       guard = domain_guard_deploy();
+       TAILQ_FOREACH(dp, &domains, dom_entry) {
+               TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
+                       if (pp->pr_ctlinput != NULL)
+                               (*pp->pr_ctlinput)(cmd, sa, ctlparam);
+               }
+       }
+       domain_guard_release(guard);
 }
 
 void
-pfslowtimo(arg)
-       void *arg;
-{
-       register struct domain *dp;
-       register struct protosw *pr;
-
-       lck_mtx_lock(domain_proto_mtx);
-       for (dp = domains; dp; dp = dp->dom_next) 
-               for (pr = dp->dom_protosw; pr; pr = pr->pr_next) {
-                       if (pr->pr_slowtimo)
-                               (*pr->pr_slowtimo)();
-                       if (do_reclaim && pr->pr_drain)
-                               (*pr->pr_drain)();
-               }
-       do_reclaim = 0;
-       lck_mtx_unlock(domain_proto_mtx);
-       timeout(pfslowtimo, NULL, hz/2);
-        
+net_update_uptime(void)
+{
+       struct timeval tv;
+
+       microuptime(&tv);
+       _net_uptime = tv.tv_sec;
+       /*
+        * Round up the timer to the nearest integer value because otherwise
+        * we might setup networking timers that are off by almost 1 second.
+        */
+       if (tv.tv_usec > 500000)
+               _net_uptime++;
 }
 
 void
-pffasttimo(arg)
-       void *arg;
+net_update_uptime_secs(uint64_t secs)
 {
-       register struct domain *dp;
-       register struct protosw *pr;
+       _net_uptime = secs;
+}
 
-       lck_mtx_lock(domain_proto_mtx);
-       for (dp = domains; dp; dp = dp->dom_next)
-               for (pr = dp->dom_protosw; pr; pr = pr->pr_next)
-                       if (pr->pr_fasttimo)
-                               (*pr->pr_fasttimo)();
-       lck_mtx_unlock(domain_proto_mtx);
-       timeout(pffasttimo, NULL, hz/5);
+/*
+ * Convert our uin64_t net_uptime to a struct timeval.
+ */
+void
+net_uptime2timeval(struct timeval *tv)
+{
+       if (tv == NULL)
+               return;
+
+       tv->tv_usec = 0;
+       tv->tv_sec = net_uptime();
 }
+
+/*
+ * An alternative way to obtain the coarse-grained uptime (in seconds)
+ * for networking code which do not require high-precision timestamp,
+ * as this is significantly cheaper than microuptime().
+ */
+u_int64_t
+net_uptime(void)
+{
+       if (_net_uptime == 0)
+               net_update_uptime();
+
+       return (_net_uptime);
+}
+
+void
+domain_proto_mtx_lock_assert_held(void)
+{
+       lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
+}
+
+void
+domain_proto_mtx_lock_assert_notheld(void)
+{
+       lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
+}
+
+domain_guard_t
+domain_guard_deploy(void)
+{
+       net_thread_marks_t marks;
+
+       marks = net_thread_marks_push(NET_THREAD_HELD_DOMAIN);
+       if (marks != net_thread_marks_none) {
+               lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
+               lck_mtx_lock(&domain_proto_mtx);
+       }
+       else
+               lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
+
+       return ((domain_guard_t)(const void*)marks);
+}
+
+void
+domain_guard_release(domain_guard_t guard)
+{
+       net_thread_marks_t marks = (net_thread_marks_t)(const void*)guard;
+
+       if (marks != net_thread_marks_none) {
+               lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
+               lck_mtx_unlock(&domain_proto_mtx);
+               net_thread_marks_pop(marks);
+       }
+       else
+               lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
+}
+
+domain_unguard_t
+domain_unguard_deploy(void)
+{
+       net_thread_marks_t marks;
+
+       marks = net_thread_unmarks_push(NET_THREAD_HELD_DOMAIN);
+       if (marks != net_thread_marks_none) {
+               lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
+               lck_mtx_unlock(&domain_proto_mtx);
+       }
+       else
+               lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+       return ((domain_unguard_t)(const void*)marks);
+}
+
+void
+domain_unguard_release(domain_unguard_t unguard)
+{
+       net_thread_marks_t marks = (net_thread_marks_t)(const void*)unguard;
+
+       if (marks != net_thread_marks_none) {
+               lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
+               lck_mtx_lock(&domain_proto_mtx);
+               net_thread_unmarks_pop(marks);
+       }
+       else
+               lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
+}
+
+#if (DEVELOPMENT || DEBUG)
+static int
+sysctl_do_drain_domains SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error;
+       int dummy = 0;
+
+       error = sysctl_handle_int(oidp, &dummy, 0, req);        
+       if (error || req->newptr == USER_ADDR_NULL)
+               return (error);
+
+       net_drain_domains();
+
+       return (0);
+}
+
+#endif /* DEVELOPMENT || DEBUG */
\ No newline at end of file