]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet/in_pcb.c
xnu-3789.21.4.tar.gz
[apple/xnu.git] / bsd / netinet / in_pcb.c
index 973abc9aca426361a39cdf769b64f35c9443606e..c20961c22d1d29d70e8f3a1f81d2894d4dbd446b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -77,6 +77,8 @@
 #include <sys/priv.h>
 #include <sys/proc_uuid_policy.h>
 #include <sys/syslog.h>
+#include <sys/priv.h>
+#include <net/dlil.h>
 
 #include <libkern/OSAtomic.h>
 #include <kern/locks.h>
@@ -90,6 +92,7 @@
 #include <net/route.h>
 #include <net/flowhash.h>
 #include <net/flowadv.h>
+#include <net/ntstat.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
-#if IPSEC
-#include <netinet6/ipsec.h>
-#include <netkey/key.h>
-#endif /* IPSEC */
-
 #include <sys/kdebug.h>
 #include <sys/random.h>
 
 #include <dev/random/randomdev.h>
 #include <mach/boolean.h>
 
-#if FLOW_DIVERT
-#include <netinet/flow_divert.h>
+#include <pexpert/pexpert.h>
+
+#if NECP
+#include <net/necp.h>
 #endif
 
+#include <sys/stat.h>
+#include <sys/ubc.h>
+#include <sys/vnode.h>
+
 static lck_grp_t       *inpcb_lock_grp;
 static lck_attr_t      *inpcb_lock_attr;
 static lck_grp_attr_t  *inpcb_lock_grp_attr;
@@ -127,22 +131,33 @@ static u_int16_t inpcb_timeout_run = 0;   /* INPCB timer is scheduled to run */
 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
 static boolean_t inpcb_ticking = FALSE;                /* "slow" timer is scheduled */
 static boolean_t inpcb_fast_timer_on = FALSE;
-static void inpcb_sched_timeout(struct timeval *);
-static void inpcb_timeout(void *);
-int inpcb_timeout_lazy = 10;   /* 10 seconds leeway for lazy timers */
+static boolean_t intcoproc_unrestricted = FALSE;
+
+extern char *proc_best_name(proc_t);
+
+/*
+ * If the total number of gc reqs is above a threshold, schedule
+ * garbage collect timer sooner
+ */
+static boolean_t inpcb_toomany_gcreq = FALSE;
+
+#define        INPCB_GCREQ_THRESHOLD   50000
+
+static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
+static void inpcb_sched_timeout(void);
+static void inpcb_sched_lazy_timeout(void);
+static void _inpcb_sched_timeout(unsigned int);
+static void inpcb_timeout(void *, void *);
+const int inpcb_timeout_lazy = 10;     /* 10 seconds leeway for lazy timers */
 extern int tvtohz(struct timeval *);
 
 #if CONFIG_PROC_UUID_POLICY
 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
-#if FLOW_DIVERT
-static void inp_update_flow_divert_policy(struct inpcb *, boolean_t);
-#endif /* FLOW_DIVERT */
+#if NECP
+static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
+#endif /* NECP */
 #endif /* !CONFIG_PROC_UUID_POLICY */
 
-#if IPSEC
-extern int ipsec_bypass;
-#endif /* IPSEC */
-
 #define        DBG_FNC_PCB_LOOKUP      NETDBG_CODE(DBG_NETTCP, (6 << 8))
 #define        DBG_FNC_PCB_HLOOKUP     NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
 
@@ -203,6 +218,11 @@ SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
        CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
        &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
 
+static uint32_t apn_fallbk_debug = 0;
+#define apn_fallbk_log(x)       do { if (apn_fallbk_debug >= 1) log x; } while (0)
+
+static boolean_t apn_fallbk_enabled = FALSE;
+
 extern int     udp_use_randomport;
 extern int     tcp_use_randomport;
 
@@ -269,6 +289,12 @@ in_pcbinit(void)
        inpcb_lock_attr = lck_attr_alloc_init();
        lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr);
        lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
+       inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
+           NULL, THREAD_CALL_PRIORITY_KERNEL);
+       inpcb_fast_thread_call = thread_call_allocate_with_priority(
+           inpcb_timeout, NULL, THREAD_CALL_PRIORITY_KERNEL);
+       if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL)
+               panic("unable to alloc the inpcb thread call");
 
        /*
         * Initialize data structures required to deliver
@@ -279,18 +305,26 @@ in_pcbinit(void)
        RB_INIT(&inp_fc_tree);
        bzero(&key_inp, sizeof(key_inp));
        lck_mtx_unlock(&inp_fc_lck);
+
+       PE_parse_boot_argn("intcoproc_unrestricted", &intcoproc_unrestricted,
+           sizeof (intcoproc_unrestricted));
 }
 
 #define        INPCB_HAVE_TIMER_REQ(req)       (((req).intimer_lazy > 0) || \
        ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
 static void
-inpcb_timeout(void *arg)
+inpcb_timeout(void *arg0, void *arg1)
 {
-#pragma unused(arg)
+#pragma unused(arg0)
        struct inpcbinfo *ipi;
        boolean_t t, gc;
        struct intimercount gccnt, tmcnt;
-       struct timeval leeway;
+       boolean_t toomany_gc = FALSE;
+
+       if (arg1 != NULL) {
+               VERIFY(arg1 == &inpcb_toomany_gcreq);
+               toomany_gc = *(boolean_t *)arg1;
+       }
 
        /*
         * Update coarse-grained networking timestamp (in sec.); the idea
@@ -299,11 +333,12 @@ inpcb_timeout(void *arg)
         */
        net_update_uptime();
 
+       bzero(&gccnt, sizeof(gccnt));
+       bzero(&tmcnt, sizeof(tmcnt));
+
        lck_mtx_lock_spin(&inpcb_timeout_lock);
        gc = inpcb_garbage_collecting;
        inpcb_garbage_collecting = FALSE;
-       bzero(&gccnt, sizeof(gccnt));
-       bzero(&tmcnt, sizeof(tmcnt));
 
        t = inpcb_ticking;
        inpcb_ticking = FALSE;
@@ -351,73 +386,110 @@ inpcb_timeout(void *arg)
                inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
 
        /* re-arm the timer if there's work to do */
-       inpcb_timeout_run--;
-       VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
+       if (toomany_gc) {
+               inpcb_toomany_gcreq = FALSE;
+       } else {
+               inpcb_timeout_run--;
+               VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
+       }
 
-       bzero(&leeway, sizeof(leeway));
-       leeway.tv_sec = inpcb_timeout_lazy;
        if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0)
-               inpcb_sched_timeout(NULL);
+               inpcb_sched_timeout();
        else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5)
                /* be lazy when idle with little activity */
-               inpcb_sched_timeout(&leeway);
+               inpcb_sched_lazy_timeout();
        else
-               inpcb_sched_timeout(NULL);
+               inpcb_sched_timeout();
 
        lck_mtx_unlock(&inpcb_timeout_lock);
 }
 
 static void
-inpcb_sched_timeout(struct timeval *leeway)
+inpcb_sched_timeout(void)
 {
-       lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
+       _inpcb_sched_timeout(0);
+}
 
+static void
+inpcb_sched_lazy_timeout(void)
+{
+       _inpcb_sched_timeout(inpcb_timeout_lazy);
+}
+
+static void
+_inpcb_sched_timeout(unsigned int offset)
+{
+       uint64_t deadline, leeway;
+
+       clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
+       lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
        if (inpcb_timeout_run == 0 &&
-               (inpcb_garbage_collecting || inpcb_ticking)) {
+           (inpcb_garbage_collecting || inpcb_ticking)) {
                lck_mtx_convert_spin(&inpcb_timeout_lock);
                inpcb_timeout_run++;
-               if (leeway == NULL) {
+               if (offset == 0) {
                        inpcb_fast_timer_on = TRUE;
-                       timeout(inpcb_timeout, NULL, hz);
+                       thread_call_enter_delayed(inpcb_thread_call,
+                           deadline);
                } else {
                        inpcb_fast_timer_on = FALSE;
-                       timeout_with_leeway(inpcb_timeout, NULL, hz,
-                               tvtohz(leeway));
+                       clock_interval_to_absolutetime_interval(offset,
+                           NSEC_PER_SEC, &leeway);
+                       thread_call_enter_delayed_with_leeway(
+                           inpcb_thread_call, NULL, deadline, leeway,
+                           THREAD_CALL_DELAY_LEEWAY);
                }
        } else if (inpcb_timeout_run == 1 &&
-               leeway == NULL && !inpcb_fast_timer_on) {
+           offset == 0 && !inpcb_fast_timer_on) {
                /*
                 * Since the request was for a fast timer but the
                 * scheduled timer is a lazy timer, try to schedule
-                * another instance of fast timer also
+                * another instance of fast timer also.
                 */
                lck_mtx_convert_spin(&inpcb_timeout_lock);
                inpcb_timeout_run++;
                inpcb_fast_timer_on = TRUE;
-               timeout(inpcb_timeout, NULL, hz);
+               thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
        }
 }
 
 void
 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
 {
-       struct timeval leeway;
+       u_int32_t gccnt;
+       uint64_t deadline;
+
        lck_mtx_lock_spin(&inpcb_timeout_lock);
        inpcb_garbage_collecting = TRUE;
+       gccnt = ipi->ipi_gc_req.intimer_nodelay +
+               ipi->ipi_gc_req.intimer_fast;
+
+       if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) {
+               inpcb_toomany_gcreq = TRUE;
+
+               /*
+                * There are toomany pcbs waiting to be garbage collected,
+                * schedule a much faster timeout in addition to
+                * the caller's request
+                */
+               lck_mtx_convert_spin(&inpcb_timeout_lock);
+               clock_interval_to_deadline(100, NSEC_PER_MSEC, &deadline);
+               thread_call_enter1_delayed(inpcb_thread_call,
+                   &inpcb_toomany_gcreq, deadline);
+       }
+
        switch (type) {
        case INPCB_TIMER_NODELAY:
                atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
-               inpcb_sched_timeout(NULL);
+               inpcb_sched_timeout();
                break;
        case INPCB_TIMER_FAST:
                atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
-               inpcb_sched_timeout(NULL);
+               inpcb_sched_timeout();
                break;
        default:
                atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
-               leeway.tv_sec = inpcb_timeout_lazy;
-               leeway.tv_usec = 0;
-               inpcb_sched_timeout(&leeway);
+               inpcb_sched_lazy_timeout();
                break;
        }
        lck_mtx_unlock(&inpcb_timeout_lock);
@@ -426,23 +498,21 @@ inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
 void
 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
 {
-       struct timeval leeway;
+
        lck_mtx_lock_spin(&inpcb_timeout_lock);
        inpcb_ticking = TRUE;
        switch (type) {
        case INPCB_TIMER_NODELAY:
                atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
-               inpcb_sched_timeout(NULL);
+               inpcb_sched_timeout();
                break;
        case INPCB_TIMER_FAST:
                atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
-               inpcb_sched_timeout(NULL);
+               inpcb_sched_timeout();
                break;
        default:
                atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
-               leeway.tv_sec = inpcb_timeout_lazy;
-               leeway.tv_usec = 0;
-               inpcb_sched_timeout(&leeway);
+               inpcb_sched_lazy_timeout();
                break;
        }
        lck_mtx_unlock(&inpcb_timeout_lock);
@@ -491,7 +561,6 @@ in_pcbinfo_detach(struct inpcbinfo *ipi)
  * Returns:    0                       Success
  *             ENOBUFS
  *             ENOMEM
- *     ipsec_init_policy:???           [IPSEC]
  */
 int
 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
@@ -503,7 +572,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
        int mac_error;
 #endif /* CONFIG_MACF_NET */
 
-       if (!so->cached_in_sock_layer) {
+       if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
                inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
                if (inp == NULL)
                        return (ENOBUFS);
@@ -521,7 +590,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
 #if CONFIG_MACF_NET
        mac_error = mac_inpcb_label_init(inp, M_WAITOK);
        if (mac_error != 0) {
-               if (!so->cached_in_sock_layer)
+               if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0)
                        zfree(pcbinfo->ipi_zone, inp);
                return (mac_error);
        }
@@ -554,6 +623,15 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
                /* NOTREACHED */
        }
 
+       /* make sure inp_Wstat is always 64-bit aligned */
+       inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
+           sizeof (u_int64_t));
+       if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
+           sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) {
+               panic("%s: insufficient space to align inp_Wstat", __func__);
+               /* NOTREACHED */
+       }
+
        so->so_pcb = (caddr_t)inp;
 
        if (so->so_proto->pr_flags & PR_PCBLOCK) {
@@ -561,7 +639,6 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
                    pcbinfo->ipi_lock_attr);
        }
 
-
 #if INET6
        if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on)
                inp->inp_flags |= IN6P_IPV6_V6ONLY;
@@ -569,6 +646,8 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
        if (ip6_auto_flowlabel)
                inp->inp_flags |= IN6P_AUTOFLOWLABEL;
 #endif /* INET6 */
+       if (intcoproc_unrestricted)
+               inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
 
        (void) inp_update_policy(inp);
 
@@ -639,7 +718,7 @@ in_pcb_conflict_post_msg(u_int16_t port)
        ev_msg.dv[0].data_ptr = &in_portinuse;
        ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse);
        ev_msg.dv[1].data_length = 0;
-       kev_post_msg(&ev_msg);
+       dlil_post_complete_msg(NULL, &ev_msg);
 }
 
 /*
@@ -664,7 +743,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
        u_short lport = 0, rand_port = 0;
        int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
        int error, randomport, conflict = 0;
+       boolean_t anonport = FALSE;
        kauth_cred_t cred;
+       struct in_addr laddr;
+       struct ifnet *outif = NULL;
 
        if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
                return (EADDRNOTAVAIL);
@@ -672,10 +754,13 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                return (EINVAL);
        if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
                wild = 1;
+
+       bzero(&laddr, sizeof(laddr));
+
        socket_unlock(so, 0); /* keep reference on socket */
        lck_rw_lock_exclusive(pcbinfo->ipi_lock);
+
        if (nam != NULL) {
-               struct ifnet *outif = NULL;
 
                if (nam->sa_len != sizeof (struct sockaddr_in)) {
                        lck_rw_done(pcbinfo->ipi_lock);
@@ -739,7 +824,6 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                        struct inpcb *t;
                        uid_t u;
 
-                       /* GROSS */
                        if (ntohs(lport) < IPPORT_RESERVED) {
                                cred = kauth_cred_proc_ref(p);
                                error = priv_check_cred(cred,
@@ -802,8 +886,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                }
                        }
                }
-               inp->inp_laddr = SIN(nam)->sin_addr;
-               inp->inp_last_outifp = outif;
+               laddr = SIN(nam)->sin_addr;
        }
        if (lport == 0) {
                u_short first, last;
@@ -814,15 +897,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                    udp_use_randomport);
 
                /*
-                * TODO:
-                *
-                * The following should be moved into its own routine and
-                * thus can be shared with in6_pcbsetport(); the latter
-                * currently duplicates the logic.
+                * Even though this looks similar to the code in
+                * in6_pcbsetport, the v6 vs v4 checks are different.
                 */
-
-               inp->inp_flags |= INP_ANONPORT;
-
+               anonport = TRUE;
                if (inp->inp_flags & INP_HIGHPORT) {
                        first = ipport_hifirstauto;     /* sysctl */
                        last  = ipport_hilastauto;
@@ -871,8 +949,6 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                if (count-- < 0) {      /* completely used? */
                                        lck_rw_done(pcbinfo->ipi_lock);
                                        socket_lock(so, 0);
-                                       inp->inp_laddr.s_addr = INADDR_ANY;
-                                       inp->inp_last_outifp = NULL;
                                        return (EADDRNOTAVAIL);
                                }
                                --*lastport;
@@ -880,7 +956,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                        *lastport = first;
                                lport = htons(*lastport);
                        } while (in_pcblookup_local_and_cleanup(pcbinfo,
-                           inp->inp_laddr, lport, wild));
+                           ((laddr.s_addr != INADDR_ANY) ? laddr :
+                           inp->inp_laddr), lport, wild));
                } else {
                        /*
                         * counting up
@@ -896,8 +973,6 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                if (count-- < 0) {      /* completely used? */
                                        lck_rw_done(pcbinfo->ipi_lock);
                                        socket_lock(so, 0);
-                                       inp->inp_laddr.s_addr = INADDR_ANY;
-                                       inp->inp_last_outifp = NULL;
                                        return (EADDRNOTAVAIL);
                                }
                                ++*lastport;
@@ -905,15 +980,42 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
                                        *lastport = first;
                                lport = htons(*lastport);
                        } while (in_pcblookup_local_and_cleanup(pcbinfo,
-                           inp->inp_laddr, lport, wild));
+                           ((laddr.s_addr != INADDR_ANY) ? laddr :
+                           inp->inp_laddr), lport, wild));
                }
        }
        socket_lock(so, 0);
+
+       /*
+        * We unlocked socket's protocol lock for a long time.
+        * The socket might have been dropped/defuncted.
+        * Checking if world has changed since.
+        */
+       if (inp->inp_state == INPCB_STATE_DEAD) {
+               lck_rw_done(pcbinfo->ipi_lock);
+               return (ECONNABORTED);
+       }
+
+       if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
+               lck_rw_done(pcbinfo->ipi_lock);
+               return (EINVAL);
+       }
+
+       if (laddr.s_addr != INADDR_ANY) {
+               inp->inp_laddr = laddr;
+               inp->inp_last_outifp = outif;
+       }
        inp->inp_lport = lport;
+       if (anonport)
+               inp->inp_flags |= INP_ANONPORT;
+
        if (in_pcbinshash(inp, 1) != 0) {
                inp->inp_laddr.s_addr = INADDR_ANY;
-               inp->inp_lport = 0;
                inp->inp_last_outifp = NULL;
+
+               inp->inp_lport = 0;
+               if (anonport)
+                       inp->inp_flags &= ~INP_ANONPORT;
                lck_rw_done(pcbinfo->ipi_lock);
                return (EAGAIN);
        }
@@ -922,6 +1024,161 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
        return (0);
 }
 
+#define        APN_FALLBACK_IP_FILTER(a)       \
+       (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
+        IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
+        IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
+        IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
+        IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
+
+#define        APN_FALLBACK_NOTIF_INTERVAL     2 /* Magic Number */
+static uint64_t last_apn_fallback = 0;
+
+static boolean_t
+apn_fallback_required (proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
+{
+       uint64_t timenow;
+       struct sockaddr_storage lookup_default_addr;
+       struct rtentry *rt = NULL;
+
+       VERIFY(proc != NULL);
+
+       if (apn_fallbk_enabled  == FALSE)
+               return FALSE;
+
+       if (proc == kernproc)
+               return FALSE;
+
+       if (so && (so->so_options & SO_NOAPNFALLBK))
+               return FALSE;
+
+       timenow = net_uptime();
+       if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
+               apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
+               return FALSE;
+       }
+
+       if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4))
+               return FALSE;
+
+       /* Check if we have unscoped IPv6 default route through cellular */
+       bzero(&lookup_default_addr, sizeof(lookup_default_addr));
+       lookup_default_addr.ss_family = AF_INET6;
+       lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
+
+       rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
+       if (NULL == rt) {
+               apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
+                   "unscoped default IPv6 route.\n"));
+               return FALSE;
+       }
+
+       if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
+               rtfree(rt);
+               apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
+                   "unscoped default IPv6 route through cellular interface.\n"));
+               return FALSE;
+       }
+
+       /*
+        * We have a default IPv6 route, ensure that
+        * we do not have IPv4 default route before triggering
+        * the event
+        */
+       rtfree(rt);
+       rt = NULL;
+
+       bzero(&lookup_default_addr, sizeof(lookup_default_addr));
+       lookup_default_addr.ss_family = AF_INET;
+       lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
+
+       rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
+
+       if (rt) {
+               rtfree(rt);
+               rt = NULL;
+               apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
+                   "IPv4 default route!\n"));
+               return FALSE;
+       }
+
+       {
+               /*
+                * We disable APN fallback if the binary is not a third-party app.
+                * Note that platform daemons use their process name as a
+                * bundle ID so we filter out bundle IDs without dots.
+                */
+               const char *bundle_id = cs_identity_get(proc);
+               if (bundle_id == NULL ||
+                   bundle_id[0] == '\0' ||
+                   strchr(bundle_id, '.') == NULL ||
+                   strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) {
+                       apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
+                           "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
+                       return FALSE;
+               }
+       }
+
+       {
+               /*
+                * The Apple App Store IPv6 requirement started on
+                * June 1st, 2016 at 12:00:00 AM PDT.
+                * We disable APN fallback if the binary is more recent than that.
+                * We check both atime and birthtime since birthtime is not always supported.
+                */
+               static const long ipv6_start_date = 1464764400L;
+               vfs_context_t context;
+               struct stat64 sb;
+               int vn_stat_error;
+
+               bzero(&sb, sizeof(struct stat64));
+               context = vfs_context_create(NULL);
+               vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, context);
+               (void)vfs_context_rele(context);
+
+               if (vn_stat_error != 0 ||
+                   sb.st_atimespec.tv_sec >= ipv6_start_date ||
+                   sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
+                       apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
+                           "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
+                           vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
+                           sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
+                       return FALSE;
+               }
+       }
+       return TRUE;
+}
+
+static void
+apn_fallback_trigger(proc_t proc)
+{
+       pid_t pid = 0;
+       struct kev_msg ev_msg;
+       struct kev_netevent_apnfallbk_data apnfallbk_data;
+
+       last_apn_fallback = net_uptime();
+       pid = proc_pid(proc);
+       uuid_t application_uuid;
+       uuid_clear(application_uuid);
+       proc_getexecutableuuid(proc, application_uuid,
+           sizeof(application_uuid));
+
+       bzero(&ev_msg, sizeof (struct kev_msg));
+       ev_msg.vendor_code      = KEV_VENDOR_APPLE;
+       ev_msg.kev_class        = KEV_NETWORK_CLASS;
+       ev_msg.kev_subclass     = KEV_NETEVENT_SUBCLASS;
+       ev_msg.event_code       = KEV_NETEVENT_APNFALLBACK;
+
+       bzero(&apnfallbk_data, sizeof(apnfallbk_data));
+       apnfallbk_data.epid = pid;
+       uuid_copy(apnfallbk_data.euuid, application_uuid);
+
+       ev_msg.dv[0].data_ptr   = &apnfallbk_data;
+       ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
+       kev_post_msg(&ev_msg);
+       apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
+}
+
 /*
  * Transform old in_pcbconnect() into an inner subroutine for new
  * in_pcbconnect(); do some validity-checking on the remote address
@@ -944,13 +1201,13 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
  */
 int
 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
-    unsigned int ifscope, struct ifnet **outif)
+    unsigned int ifscope, struct ifnet **outif, int raw)
 {
-       boolean_t nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR);
        struct route *ro = &inp->inp_route;
        struct in_ifaddr *ia = NULL;
        struct sockaddr_in sin;
        int error = 0;
+       boolean_t restricted = FALSE;
 
        if (outif != NULL)
                *outif = NULL;
@@ -958,7 +1215,7 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
                return (EINVAL);
        if (SIN(nam)->sin_family != AF_INET)
                return (EAFNOSUPPORT);
-       if (SIN(nam)->sin_port == 0)
+       if (raw == 0 && SIN(nam)->sin_port == 0)
                return (EADDRNOTAVAIL);
 
        /*
@@ -968,8 +1225,8 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
         * and the primary interface supports broadcast,
         * choose the broadcast address for that interface.
         */
-       if (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
-           SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) {
+       if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
+           SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
                lck_rw_lock_shared(in_ifaddr_rwlock);
                if (!TAILQ_EMPTY(&in_ifaddrhead)) {
                        ia = TAILQ_FIRST(&in_ifaddrhead);
@@ -1041,11 +1298,18 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
         * interface to take the source address from.
         */
        if (ro->ro_rt == NULL) {
+               proc_t proc = current_proc();
+
                VERIFY(ia == NULL);
                ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
                if (ia == NULL)
                        ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
                error = ((ia == NULL) ? ENETUNREACH : 0);
+
+               if (apn_fallback_required(proc, inp->inp_socket,
+                   (void *)nam))
+                       apn_fallback_trigger(proc);
+
                goto done;
        }
        RT_LOCK_ASSERT_HELD(ro->ro_rt);
@@ -1059,11 +1323,13 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
                 * If the route points to a cellular interface and the
                 * caller forbids our using interfaces of such type,
                 * pretend that there is no route.
+                * Apply the same logic for expensive interfaces.
                 */
-               if (nocell && IFNET_IS_CELLULAR(ro->ro_rt->rt_ifp)) {
+               if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
                        RT_UNLOCK(ro->ro_rt);
                        ROUTE_RELEASE(ro);
                        error = EHOSTUNREACH;
+                       restricted = TRUE;
                } else {
                        /* Become a regular mutex */
                        RT_CONVERT_LOCK(ro->ro_rt);
@@ -1140,11 +1406,13 @@ done:
                 * If the source address belongs to a cellular interface
                 * and the socket forbids our using interfaces of such
                 * type, pretend that there is no source address.
+                * Apply the same logic for expensive interfaces.
                 */
                IFA_LOCK_SPIN(&ia->ia_ifa);
-               if (nocell && IFNET_IS_CELLULAR(ia->ia_ifa.ifa_ifp)) {
+               if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
                        IFA_UNLOCK(&ia->ia_ifa);
                        error = EHOSTUNREACH;
+                       restricted = TRUE;
                } else if (error == 0) {
                        *laddr = ia->ia_addr.sin_addr;
                        if (outif != NULL) {
@@ -1170,7 +1438,7 @@ done:
                ia = NULL;
        }
 
-       if (nocell && error == EHOSTUNREACH) {
+       if (restricted && error == EHOSTUNREACH) {
                soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
                    SO_FILT_HINT_IFDENIED));
        }
@@ -1196,25 +1464,26 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
        struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
        struct inpcb *pcb;
        int error;
+       struct socket *so = inp->inp_socket;
 
        /*
         *   Call inner routine, to assign local interface address.
         */
-       if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0)
+       if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0)
                return (error);
 
-       socket_unlock(inp->inp_socket, 0);
+       socket_unlock(so, 0);
        pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
            inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
            inp->inp_lport, 0, NULL);
-       socket_lock(inp->inp_socket, 0);
+       socket_lock(so, 0);
 
        /*
         * Check if the socket is still in a valid state. When we unlock this
         * embryonic socket, it can get aborted if another thread is closing
         * the listener (radar 7947600).
         */
-       if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0)
+       if ((so->so_flags & SOF_ABORTED) != 0)
                return (ECONNREFUSED);
 
        if (pcb != NULL) {
@@ -1232,27 +1501,39 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
                         * Lock inversion issue, mostly with udp
                         * multicast packets.
                         */
-                       socket_unlock(inp->inp_socket, 0);
+                       socket_unlock(so, 0);
                        lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
-                       socket_lock(inp->inp_socket, 0);
+                       socket_lock(so, 0);
                }
                inp->inp_laddr = laddr;
                /* no reference needed */
                inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
                inp->inp_flags |= INP_INADDR_ANY;
        } else {
+               /*
+                * Usage of IP_PKTINFO, without local port already
+                * speficified will cause kernel to panic,
+                * see rdar://problem/18508185.
+                * For now returning error to avoid a kernel panic
+                * This routines can be refactored and handle this better
+                * in future.
+                */
+               if (inp->inp_lport == 0)
+                       return (EINVAL);
                if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
                        /*
                         * Lock inversion issue, mostly with udp
                         * multicast packets.
                         */
-                       socket_unlock(inp->inp_socket, 0);
+                       socket_unlock(so, 0);
                        lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
-                       socket_lock(inp->inp_socket, 0);
+                       socket_lock(so, 0);
                }
        }
        inp->inp_faddr = sin->sin_addr;
        inp->inp_fport = sin->sin_port;
+       if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
+               nstat_pcb_invalidate_cache(inp);
        in_pcbrehash(inp);
        lck_rw_done(inp->inp_pcbinfo->ipi_lock);
        return (0);
@@ -1263,6 +1544,9 @@ in_pcbdisconnect(struct inpcb *inp)
 {
        struct socket *so = inp->inp_socket;
 
+       if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
+               nstat_pcb_cache(inp);
+
        inp->inp_faddr.s_addr = INADDR_ANY;
        inp->inp_fport = 0;
 
@@ -1302,6 +1586,20 @@ in_pcbdetach(struct inpcb *inp)
        }
 #endif /* IPSEC */
 
+       /*
+        * Let NetworkStatistics know this PCB is going away
+        * before we detach it.
+        */
+       if (nstat_collect &&
+           (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP))
+               nstat_pcb_detach(inp);
+
+       /* Free memory buffer held for generating keep alives */
+       if (inp->inp_keepalive_data != NULL) {
+               FREE(inp->inp_keepalive_data, M_TEMP);
+               inp->inp_keepalive_data = NULL;
+       }
+
        /* mark socket state as dead */
        if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
                panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
@@ -1320,14 +1618,21 @@ in_pcbdetach(struct inpcb *inp)
                ROUTE_RELEASE(&inp->inp_route);
                imo = inp->inp_moptions;
                inp->inp_moptions = NULL;
-               if (imo != NULL)
-                       IMO_REMREF(imo);
                sofreelastref(so, 0);
                inp->inp_state = INPCB_STATE_DEAD;
                /* makes sure we're not called twice from so_close */
                so->so_flags |= SOF_PCBCLEARING;
 
                inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
+
+               /*
+                * See inp_join_group() for why we need to unlock
+                */
+               if (imo != NULL) {
+                       socket_unlock(so, 0);
+                       IMO_REMREF(imo);
+                       socket_lock(so, 0);
+               }
        }
 }
 
@@ -1392,13 +1697,16 @@ in_pcbdispose(struct inpcb *inp)
 #if CONFIG_MACF_NET
                mac_inpcb_label_destroy(inp);
 #endif /* CONFIG_MACF_NET */
+#if NECP
+               necp_inpcb_dispose(inp);
+#endif /* NECP */
                /*
                 * In case there a route cached after a detach (possible
                 * in the tcp case), make sure that it is freed before
                 * we deallocate the structure.
                 */
                ROUTE_RELEASE(&inp->inp_route);
-               if (!so->cached_in_sock_layer) {
+               if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
                        zfree(ipi->ipi_zone, inp);
                }
                sodealloc(so);
@@ -1450,8 +1758,11 @@ in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss)
        sin->sin_family = AF_INET;
        sin->sin_len = sizeof (*sin);
 
-       if ((inp = sotoinpcb(so)) == NULL ||
-           (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT))
+       if ((inp = sotoinpcb(so)) == NULL
+#if NECP
+               || (necp_socket_should_use_flow_divert(inp))
+#endif /* NECP */
+               )
                return (inp == NULL ? EINVAL : EPROTOTYPE);
 
        sin->sin_port = inp->inp_lport;
@@ -1498,8 +1809,11 @@ in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss)
        sin->sin_family = AF_INET;
        sin->sin_len = sizeof (*sin);
 
-       if ((inp = sotoinpcb(so)) == NULL ||
-           (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) {
+       if ((inp = sotoinpcb(so)) == NULL
+#if NECP
+               || (necp_socket_should_use_flow_divert(inp))
+#endif /* NECP */
+               ) {
                return (inp == NULL ? EINVAL : EPROTOTYPE);
        }
 
@@ -1545,18 +1859,11 @@ in_losing(struct inpcb *inp)
 {
        boolean_t release = FALSE;
        struct rtentry *rt;
-       struct rt_addrinfo info;
 
        if ((rt = inp->inp_route.ro_rt) != NULL) {
                struct in_ifaddr *ia = NULL;
 
-               bzero((caddr_t)&info, sizeof (info));
                RT_LOCK(rt);
-               info.rti_info[RTAX_DST] =
-                   (struct sockaddr *)&inp->inp_route.ro_dst;
-               info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-               info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-               rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
                if (rt->rt_flags & RTF_DYNAMIC) {
                        /*
                         * Prevent another thread from modifying rt_key,
@@ -1746,11 +2053,7 @@ in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
                if (!(inp->inp_vflag & INP_IPV4))
                        continue;
 #endif /* INET6 */
-               if (inp_restricted(inp, ifp))
-                       continue;
-
-               if (ifp != NULL && IFNET_IS_CELLULAR(ifp) &&
-                   (inp->inp_flags & INP_NO_IFT_CELLULAR))
+               if (inp_restricted_recv(inp, ifp))
                        continue;
 
                if (inp->inp_faddr.s_addr == faddr.s_addr &&
@@ -1786,11 +2089,7 @@ in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
                if (!(inp->inp_vflag & INP_IPV4))
                        continue;
 #endif /* INET6 */
-               if (inp_restricted(inp, ifp))
-                       continue;
-
-               if (ifp != NULL && IFNET_IS_CELLULAR(ifp) &&
-                   (inp->inp_flags & INP_NO_IFT_CELLULAR))
+               if (inp_restricted_recv(inp, ifp))
                        continue;
 
                if (inp->inp_faddr.s_addr == INADDR_ANY &&
@@ -1873,11 +2172,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
                if (!(inp->inp_vflag & INP_IPV4))
                        continue;
 #endif /* INET6 */
-               if (inp_restricted(inp, ifp))
-                       continue;
-
-               if (ifp != NULL && IFNET_IS_CELLULAR(ifp) &&
-                   (inp->inp_flags & INP_NO_IFT_CELLULAR))
+               if (inp_restricted_recv(inp, ifp))
                        continue;
 
                if (inp->inp_faddr.s_addr == faddr.s_addr &&
@@ -1914,11 +2209,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
                if (!(inp->inp_vflag & INP_IPV4))
                        continue;
 #endif /* INET6 */
-               if (inp_restricted(inp, ifp))
-                       continue;
-
-               if (ifp != NULL && IFNET_IS_CELLULAR(ifp) &&
-                   (inp->inp_flags & INP_NO_IFT_CELLULAR))
+               if (inp_restricted_recv(inp, ifp))
                        continue;
 
                if (inp->inp_faddr.s_addr == INADDR_ANY &&
@@ -1972,7 +2263,13 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 }
 
 /*
- * Insert PCB onto various hash lists.
+ * @brief      Insert PCB onto various hash lists.
+ *
+ * @param      inp Pointer to internet protocol control block
+ * @param      locked  Implies if ipi_lock (protecting pcb list)
+ *             is already locked or not.
+ *
+ * @return     int error on failure and 0 on success
  */
 int
 in_pcbinshash(struct inpcb *inp, int locked)
@@ -1992,17 +2289,23 @@ in_pcbinshash(struct inpcb *inp, int locked)
                        socket_unlock(inp->inp_socket, 0);
                        lck_rw_lock_exclusive(pcbinfo->ipi_lock);
                        socket_lock(inp->inp_socket, 0);
-                       if (inp->inp_state == INPCB_STATE_DEAD) {
-                               /*
-                                * The socket got dropped when
-                                * it was unlocked
-                                */
-                               lck_rw_done(pcbinfo->ipi_lock);
-                               return (ECONNABORTED);
-                       }
                }
        }
 
+       /*
+        * This routine or its caller may have given up
+        * socket's protocol lock briefly.
+        * During that time the socket may have been dropped.
+        * Safe-guarding against that.
+        */
+       if (inp->inp_state == INPCB_STATE_DEAD) {
+               if (!locked) {
+                       lck_rw_done(pcbinfo->ipi_lock);
+               }
+               return (ECONNABORTED);
+       }
+
+
 #if INET6
        if (inp->inp_vflag & INP_IPV6)
                hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
@@ -2026,8 +2329,6 @@ in_pcbinshash(struct inpcb *inp, int locked)
                        break;
        }
 
-       VERIFY(inp->inp_state != INPCB_STATE_DEAD);
-
        /*
         * If none exists, malloc one and tack it on.
         */
@@ -2043,11 +2344,21 @@ in_pcbinshash(struct inpcb *inp, int locked)
                LIST_INIT(&phd->phd_pcblist);
                LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
        }
+
+       VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
        inp->inp_phd = phd;
        LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
        LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
+       inp->inp_flags2 |= INP2_INHASHLIST;
+
        if (!locked)
                lck_rw_done(pcbinfo->ipi_lock);
+
+#if NECP
+       // This call catches the original setting of the local address
+       inp_update_necp_policy(inp, NULL, NULL, 0);
+#endif /* NECP */
+
        return (0);
 }
 
@@ -2074,8 +2385,19 @@ in_pcbrehash(struct inpcb *inp)
            inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
        head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
 
-       LIST_REMOVE(inp, inp_hash);
+       if (inp->inp_flags2 & INP2_INHASHLIST) {
+               LIST_REMOVE(inp, inp_hash);
+               inp->inp_flags2 &= ~INP2_INHASHLIST;
+       }
+
+       VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
        LIST_INSERT_HEAD(head, inp, inp_hash);
+       inp->inp_flags2 |= INP2_INHASHLIST;
+
+#if NECP
+       // This call catches updates to the remote addresses
+       inp_update_necp_policy(inp, NULL, NULL, 0);
+#endif /* NECP */
 }
 
 /*
@@ -2087,16 +2409,31 @@ in_pcbremlists(struct inpcb *inp)
 {
        inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
 
-       if (inp->inp_lport) {
+       /*
+        * Check if it's in hashlist -- an inp is placed in hashlist when
+        * it's local port gets assigned. So it should also be present
+        * in the port list.
+        */
+       if (inp->inp_flags2 & INP2_INHASHLIST) {
                struct inpcbport *phd = inp->inp_phd;
 
+               VERIFY(phd != NULL && inp->inp_lport > 0);
+
                LIST_REMOVE(inp, inp_hash);
+               inp->inp_hash.le_next = NULL;
+               inp->inp_hash.le_prev = NULL;
+
                LIST_REMOVE(inp, inp_portlist);
-               if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) {
+               inp->inp_portlist.le_next = NULL;
+               inp->inp_portlist.le_prev = NULL;
+               if (LIST_EMPTY(&phd->phd_pcblist)) {
                        LIST_REMOVE(phd, phd_hash);
                        FREE(phd, M_PCB);
                }
+               inp->inp_phd = NULL;
+               inp->inp_flags2 &= ~INP2_INHASHLIST;
        }
+       VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
 
        if (inp->inp_flags2 & INP2_TIMEWAIT) {
                /* Remove from time-wait queue */
@@ -2164,7 +2501,6 @@ stopusing:
                        OSCompareAndSwap(origwant, newwant, wantcnt);
                }
                return (WNT_STOPUSING);
-               break;
 
        case WNT_ACQUIRE:
                /*
@@ -2181,7 +2517,6 @@ stopusing:
                        newwant = origwant + 1;
                } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
                return (WNT_ACQUIRE);
-               break;
 
        case WNT_RELEASE:
                /*
@@ -2218,7 +2553,6 @@ stopusing:
                if (locked == 0)
                        socket_unlock(pcb->inp_socket, 1);
                return (WNT_RELEASE);
-               break;
 
        default:
                panic("%s: so=%p not a valid state =%x\n", __func__,
@@ -2338,7 +2672,7 @@ inp_route_copyin(struct inpcb *inp, struct route *src)
 }
 
 /*
- * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
+ * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
  */
 int
 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
@@ -2414,25 +2748,82 @@ inp_clear_nocellular(struct inpcb *inp)
        }
 }
 
-#if FLOW_DIVERT
+void
+inp_set_noexpensive(struct inpcb *inp)
+{
+       inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
+
+       /* Blow away any cached route in the PCB */
+       ROUTE_RELEASE(&inp->inp_route);
+}
+
+void
+inp_set_awdl_unrestricted(struct inpcb *inp)
+{
+       inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
+
+       /* Blow away any cached route in the PCB */
+       ROUTE_RELEASE(&inp->inp_route);
+}
+
+boolean_t
+inp_get_awdl_unrestricted(struct inpcb *inp)
+{
+       return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
+}
+
+void
+inp_clear_awdl_unrestricted(struct inpcb *inp)
+{
+       inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
+
+       /* Blow away any cached route in the PCB */
+       ROUTE_RELEASE(&inp->inp_route);
+}
+
+void
+inp_set_intcoproc_allowed(struct inpcb *inp)
+{
+       inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
+
+       /* Blow away any cached route in the PCB */
+       ROUTE_RELEASE(&inp->inp_route);
+}
+
+boolean_t
+inp_get_intcoproc_allowed(struct inpcb *inp)
+{
+       return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
+}
+
+void
+inp_clear_intcoproc_allowed(struct inpcb *inp)
+{
+       inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
+
+       /* Blow away any cached route in the PCB */
+       ROUTE_RELEASE(&inp->inp_route);
+}
+
+#if NECP
 /*
- * Called when PROC_UUID_FLOW_DIVERT is set.
+ * Called when PROC_UUID_NECP_APP_POLICY is set.
  */
 void
-inp_set_flow_divert(struct inpcb *inp)
+inp_set_want_app_policy(struct inpcb *inp)
 {
-       inp->inp_flags2 |= INP2_WANT_FLOW_DIVERT;
+       inp->inp_flags2 |= INP2_WANT_APP_POLICY;
 }
 
 /*
- * Called when PROC_UUID_FLOW_DIVERT is cleared.
+ * Called when PROC_UUID_NECP_APP_POLICY is cleared.
  */
 void
-inp_clear_flow_divert(struct inpcb *inp)
+inp_clear_want_app_policy(struct inpcb *inp)
 {
-       inp->inp_flags2 &= ~INP2_WANT_FLOW_DIVERT;
+       inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
 }
-#endif /* FLOW_DIVERT */
+#endif /* NECP */
 
 /*
  * Calculate flow hash for an inp, used by an interface to identify a
@@ -2561,6 +2952,9 @@ inp_fc_feedback(struct inpcb *inp)
                return;
        }
 
+       if (inp->inp_sndinprog_cnt > 0)
+               inp->inp_flags |= INP_FC_FEEDBACK;
+
        /*
         * Return if the connection is not in flow-controlled state.
         * This can happen if the connection experienced
@@ -2592,9 +2986,6 @@ inp_reset_fc_state(struct inpcb *inp)
                soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
        }
 
-       if (inp->inp_sndinprog_cnt > 0)
-               inp->inp_flags |= INP_FC_FEEDBACK;
-
        /* Give a write wakeup to unblock the socket */
        if (needwakeup)
                sowwakeup(so);
@@ -2690,13 +3081,19 @@ inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
        struct socket *so = inp->inp_socket;
 
        soprocinfo->spi_pid = so->last_pid;
+       if (so->last_pid != 0)
+               uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
        /*
         * When not delegated, the effective pid is the same as the real pid
         */
-       if (so->so_flags & SOF_DELEGATED)
+       if (so->so_flags & SOF_DELEGATED) {
+               soprocinfo->spi_delegated = 1;
                soprocinfo->spi_epid = so->e_pid;
-       else
+               uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
+       } else {
+               soprocinfo->spi_delegated = 0;
                soprocinfo->spi_epid = so->last_pid;
+       }
 }
 
 int
@@ -2736,13 +3133,13 @@ inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
        VERIFY(so != NULL);
        VERIFY(inp->inp_state != INPCB_STATE_DEAD);
 
-       before = (inp->inp_flags & INP_NO_IFT_CELLULAR);
+       before = INP_NO_CELLULAR(inp);
        if (set) {
                inp_set_nocellular(inp);
        } else {
                inp_clear_nocellular(inp);
        }
-       after = (inp->inp_flags & INP_NO_IFT_CELLULAR);
+       after = INP_NO_CELLULAR(inp);
        if (net_io_policy_log && (before != after)) {
                static const char *ok = "OK";
                static const char *nok = "NOACCESS";
@@ -2771,9 +3168,9 @@ inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
        }
 }
 
-#if FLOW_DIVERT
+#if NECP
 static void
-inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set)
+inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
 {
        struct socket *so = inp->inp_socket;
        int before, after;
@@ -2781,17 +3178,13 @@ inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set)
        VERIFY(so != NULL);
        VERIFY(inp->inp_state != INPCB_STATE_DEAD);
 
-       if (set && !(inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) {
-               set = !flow_divert_is_dns_service(so);
-       }
-
-       before = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT);
+       before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
        if (set) {
-               inp_set_flow_divert(inp);
+               inp_set_want_app_policy(inp);
        } else {
-               inp_clear_flow_divert(inp);
+               inp_clear_want_app_policy(inp);
        }
-       after = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT);
+       after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
        if (net_io_policy_log && (before != after)) {
                static const char *wanted = "WANTED";
                static const char *unwanted = "UNWANTED";
@@ -2816,9 +3209,24 @@ inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set)
                    ((before < after) ? wanted : unwanted));
        }
 }
-#endif /* FLOW_DIVERT */
+#endif /* NECP */
 #endif /* !CONFIG_PROC_UUID_POLICY */
 
+#if NECP
+void
+inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
+{
+       necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
+       if (necp_socket_should_rescope(inp) &&
+               inp->inp_lport == 0 &&
+               inp->inp_laddr.s_addr == INADDR_ANY &&
+               IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+               // If we should rescope, and the socket is not yet bound
+               inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
+       }
+}
+#endif /* NECP */
+
 int
 inp_update_policy(struct inpcb *inp)
 {
@@ -2863,14 +3271,14 @@ inp_update_policy(struct inpcb *inp)
                } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
                        inp_update_cellular_policy(inp, FALSE);
                }
-#if FLOW_DIVERT
-               /* update flow divert policy for this socket */
-               if (err == 0 && (pflags & PROC_UUID_FLOW_DIVERT)) {
-                       inp_update_flow_divert_policy(inp, TRUE);
-               } else if (!(pflags & PROC_UUID_FLOW_DIVERT)) {
-                       inp_update_flow_divert_policy(inp, FALSE);
+#if NECP
+               /* update necp want app policy for this socket */
+               if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
+                       inp_update_necp_want_app_policy(inp, TRUE);
+               } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
+                       inp_update_necp_want_app_policy(inp, FALSE);
                }
-#endif /* FLOW_DIVERT */
+#endif /* NECP */
        }
 
        return ((err == ENOENT) ? 0 : err);
@@ -2880,15 +3288,40 @@ inp_update_policy(struct inpcb *inp)
 #endif /* !CONFIG_PROC_UUID_POLICY */
 }
 
-boolean_t
-inp_restricted(struct inpcb *inp, struct ifnet *ifp)
+static unsigned int log_restricted;
+SYSCTL_DECL(_net_inet);
+SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
+    "Log network restrictions");
+/*
+ * Called when we need to enforce policy restrictions in the input path.
+ *
+ * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
+ */
+static boolean_t
+_inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
 {
        VERIFY(inp != NULL);
 
+       /*
+        * Inbound restrictions.
+        */
        if (!sorestrictrecv)
                return (FALSE);
 
-       if (ifp == NULL || !(ifp->if_eflags & IFEF_RESTRICTED_RECV))
+       if (ifp == NULL)
+               return (FALSE);
+
+       if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
+               return (TRUE);
+
+       if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
+               return (TRUE);
+
+       if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
+               return (TRUE);
+
+       if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV))
                return (FALSE);
 
        if (inp->inp_flags & INP_RECV_ANYIF)
@@ -2897,5 +3330,160 @@ inp_restricted(struct inpcb *inp, struct ifnet *ifp)
        if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp)
                return (FALSE);
 
+       if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp))
+               return (TRUE);
+
        return (TRUE);
 }
+
+boolean_t
+inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
+{
+       boolean_t ret;
+
+       ret = _inp_restricted_recv(inp, ifp);
+       if (ret == TRUE && log_restricted) {
+               printf("pid %d (%s) is unable to receive packets on %s\n",
+                   current_proc()->p_pid, proc_best_name(current_proc()),
+                   ifp->if_xname);
+       }
+       return (ret);
+}
+
+/*
+ * Called when we need to enforce policy restrictions in the output path.
+ *
+ * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
+ */
+static boolean_t
+_inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
+{
+       VERIFY(inp != NULL);
+
+       /*
+        * Outbound restrictions.
+        */
+       if (!sorestrictsend)
+               return (FALSE);
+
+       if (ifp == NULL)
+               return (FALSE);
+
+       if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp))
+               return (TRUE);
+
+       if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp))
+               return (TRUE);
+
+       if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp))
+               return (TRUE);
+
+       if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp))
+               return (TRUE);
+
+       return (FALSE);
+}
+
+boolean_t
+inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
+{
+       boolean_t ret;
+
+       ret = _inp_restricted_send(inp, ifp);
+       if (ret == TRUE && log_restricted) {
+               printf("pid %d (%s) is unable to transmit packets on %s\n",
+                   current_proc()->p_pid, proc_best_name(current_proc()),
+                   ifp->if_xname);
+       }
+       return (ret);
+}
+
+inline void
+inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
+{
+       struct ifnet *ifp = inp->inp_last_outifp;
+       struct socket *so = inp->inp_socket;
+       if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
+           (ifp->if_type == IFT_CELLULAR ||
+           ifp->if_subfamily == IFNET_SUBFAMILY_WIFI)) {
+               int32_t unsent;
+
+               so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
+
+               /*
+                * There can be data outstanding before the connection
+                * becomes established -- TFO case
+                */
+               if (so->so_snd.sb_cc > 0)
+                       inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
+
+               unsent = inp_get_sndbytes_allunsent(so, th_ack);
+               if (unsent > 0)
+                       inp_incr_sndbytes_unsent(so, unsent);
+       }
+}
+
+inline void
+inp_incr_sndbytes_total(struct socket *so, int32_t len)
+{
+       struct inpcb *inp = (struct inpcb *)so->so_pcb;
+       struct ifnet *ifp = inp->inp_last_outifp;
+
+       if (ifp != NULL) {
+               VERIFY(ifp->if_sndbyte_total >= 0);
+               OSAddAtomic64(len, &ifp->if_sndbyte_total);
+       }
+}
+
+inline void
+inp_decr_sndbytes_total(struct socket *so, int32_t len)
+{
+       struct inpcb *inp = (struct inpcb *)so->so_pcb;
+       struct ifnet *ifp = inp->inp_last_outifp;
+
+       if (ifp != NULL) {
+               VERIFY(ifp->if_sndbyte_total >= len);
+               OSAddAtomic64(-len, &ifp->if_sndbyte_total);
+       }
+}
+
+inline void
+inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
+{
+       struct inpcb *inp = (struct inpcb *)so->so_pcb;
+       struct ifnet *ifp = inp->inp_last_outifp;
+
+       if (ifp != NULL) {
+               VERIFY(ifp->if_sndbyte_unsent >= 0);
+               OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
+       }
+}
+
+inline void
+inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
+{
+       struct inpcb *inp = (struct inpcb *)so->so_pcb;
+       struct ifnet *ifp = inp->inp_last_outifp;
+
+       if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT))
+               return;
+
+       if (ifp != NULL) {
+               if (ifp->if_sndbyte_unsent >= len)
+                       OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
+               else
+                       ifp->if_sndbyte_unsent = 0;
+       }
+}
+
+inline void
+inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
+{
+       int32_t len;
+
+       if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT))
+               return;
+
+       len = inp_get_sndbytes_allunsent(so, th_ack);
+       inp_decr_sndbytes_unsent(so, len);
+}