]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet/ip_input.c
xnu-2050.48.11.tar.gz
[apple/xnu.git] / bsd / netinet / ip_input.c
index 10156a8692a8d5413597ff4235c50421d02e7782..6953044bd753bbe4bd4a05bde07d85da5e84ad20 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -80,6 +80,8 @@
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
+#include <sys/mcache.h>
+#include <mach/mach_time.h>
 
 #include <machine/endian.h>
 
@@ -93,6 +95,7 @@
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/kpi_protocol.h>
+#include <net/ntstat.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/bootp.h>
+#include <mach/sdt.h>
 
 #if CONFIG_MACF_NET
 #include <security/mac_framework.h>
 #include <netkey/key.h>
 #endif
 
-#include "faith.h"
-#if defined(NFAITH) && NFAITH > 0
-#include <net/if_types.h>
-#endif
-
 #if DUMMYNET
 #include <netinet/ip_dummynet.h>
 #endif
 #include <net/pfvar.h>
 #endif /* PF */
 
+#include <netinet/lro_ext.h>
+
 #if IPSEC
 extern int ipsec_bypass;
 extern lck_mtx_t *sadb_mutex;
@@ -151,7 +152,8 @@ extern lck_mtx_t *sadb_mutex;
 lck_grp_t         *sadb_stat_mutex_grp;
 lck_grp_attr_t    *sadb_stat_mutex_grp_attr;
 lck_attr_t        *sadb_stat_mutex_attr;
-lck_mtx_t         *sadb_stat_mutex;
+decl_lck_mtx_data(, sadb_stat_mutex_data);
+lck_mtx_t         *sadb_stat_mutex = &sadb_stat_mutex_data;
 
 #endif
 
@@ -167,48 +169,51 @@ SYSCTL_PROC(_net_inet_ip, IPCTL_FORWARDING, forwarding,
     sysctl_ipforwarding, "I", "Enable IP forwarding between interfaces");
 
 static int     ipsendredirects = 1; /* XXX */
-SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ipsendredirects, 0, "Enable sending IP redirects");
 
 int    ip_defttl = IPDEFTTL;
-SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_defttl, 0, "Maximum TTL on IP packets");
 
 static int     ip_dosourceroute = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
 
 static int     ip_acceptsourceroute = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, 
-    CTLFLAG_RW, &ip_acceptsourceroute, 0, 
+    CTLFLAG_RW | CTLFLAG_LOCKED, &ip_acceptsourceroute, 0, 
     "Enable accepting source routed IP packets");
 
 static int     ip_keepfaith = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
-       &ip_keepfaith,  0,
-       "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
+SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RD | CTLFLAG_LOCKED,
+       &ip_keepfaith,  0, "");
 
 static int     nipq = 0;       /* total # of reass queues */
 static int     maxnipq;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW | CTLFLAG_LOCKED,
        &maxnipq, 0,
        "Maximum number of IPv4 fragment reassembly queue entries");
 
 static int    maxfragsperpacket;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW | CTLFLAG_LOCKED,
        &maxfragsperpacket, 0,
        "Maximum number of IPv4 fragments allowed per packet");
 
 static int    maxfrags;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfrags, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfrags, CTLFLAG_RW | CTLFLAG_LOCKED,
        &maxfrags, 0, "Maximum number of IPv4 fragments allowed");
 
 static int    currentfrags = 0;
 
 int    ip_doscopedroute = 1;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, scopedroute, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, scopedroute, CTLFLAG_RD | CTLFLAG_LOCKED,
      &ip_doscopedroute, 0, "Enable IPv4 scoped routing");
 
+int    ip_restrictrecvif = 1;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, restrictrecvif, CTLFLAG_RW | CTLFLAG_LOCKED,
+     &ip_restrictrecvif, 0, "Enable inbound interface restrictions");
+
 /*
  * XXX - Setting ip_checkinterface mostly implements the receive side of
  * the Strong ES model described in RFC 1122, but since the routing table
@@ -223,10 +228,9 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, scopedroute, CTLFLAG_RW,
  * packets for those addresses are received.
  */
 static int     ip_checkinterface = 0;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
 
-
 #if DIAGNOSTIC
 static int     ipprintfs = 0;
 #endif
@@ -240,7 +244,8 @@ static int  ipqmaxlen = IFQ_MAXLEN;
 static lck_grp_attr_t  *in_ifaddr_rwlock_grp_attr;
 static lck_grp_t       *in_ifaddr_rwlock_grp;
 static lck_attr_t      *in_ifaddr_rwlock_attr;
-lck_rw_t               *in_ifaddr_rwlock;
+decl_lck_rw_data(, in_ifaddr_rwlock_data);
+lck_rw_t               *in_ifaddr_rwlock = &in_ifaddr_rwlock_data;
 
 /* Protected by in_ifaddr_rwlock */
 struct in_ifaddrhead in_ifaddrhead;            /* first inet address */
@@ -251,13 +256,13 @@ static u_int32_t inaddr_nhash;                    /* hash table size */
 static u_int32_t inaddr_hashp;                 /* next largest prime */
 
 struct ifqueue ipintrq;
-SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
-SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD | CTLFLAG_LOCKED,
     &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
 
 struct ipstat ipstat;
-SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD,
+SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
 
 /* Packet reassembly stuff */
@@ -276,16 +281,15 @@ lck_attr_t                *ip_mutex_attr;
 lck_grp_t              *ip_mutex_grp;
 lck_grp_attr_t         *ip_mutex_grp_attr;
 lck_mtx_t              *inet_domain_mutex;
-extern lck_mtx_t       *domain_proto_mtx;
 
 #if IPCTL_DEFMTU
-SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_mtu, 0, "Default MTU");
 #endif
 
 #if IPSTEALTH
 static int     ipstealth = 0;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ipstealth, 0, "");
 #endif
 
@@ -296,25 +300,23 @@ ip_fw_chk_t *ip_fw_chk_ptr;
 int fw_enable = 1;
 int fw_bypass = 1;
 int fw_one_pass = 0;
+#endif /* IPFIREWALL */
 
 #if DUMMYNET
 ip_dn_io_t *ip_dn_io_ptr;
 #endif
 
-int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **) = NULL;
-#endif /* IPFIREWALL */
-
-SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "link local");
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "link local");
 
 struct ip_linklocal_stat ip_linklocal_stat;
-SYSCTL_STRUCT(_net_inet_ip_linklocal, OID_AUTO, stat, CTLFLAG_RD,
+SYSCTL_STRUCT(_net_inet_ip_linklocal, OID_AUTO, stat, CTLFLAG_RD | CTLFLAG_LOCKED,
         &ip_linklocal_stat, ip_linklocal_stat,
         "Number of link local packets with TTL less than 255");
 
-SYSCTL_NODE(_net_inet_ip_linklocal, OID_AUTO, in, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "link local input");
+SYSCTL_NODE(_net_inet_ip_linklocal, OID_AUTO, in, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "link local input");
 
 int ip_linklocal_in_allowbadttl = 1;
-SYSCTL_INT(_net_inet_ip_linklocal_in, OID_AUTO, allowbadttl, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_linklocal_in, OID_AUTO, allowbadttl, CTLFLAG_RW | CTLFLAG_LOCKED,
         &ip_linklocal_in_allowbadttl, 0,
         "Allow incoming link local packets with TTL less than 255");
 
@@ -354,17 +356,47 @@ static void ip_fwd_route_copyout(struct ifnet *, struct route *);
 static void ip_fwd_route_copyin(struct ifnet *, struct route *);
 void   ipintr(void);
 void   in_dinit(void);
+static inline u_short ip_cksum(struct mbuf *, int);
 
 #if RANDOM_IP_ID
 extern u_short ip_id;
 
 int    ip_use_randomid = 1;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_use_randomid, 0, "Randomize IP packets IDs");
 #endif
 
-#define        satosin(sa)     ((struct sockaddr_in *)(sa))
-#define        ifatoia(ifa)    ((struct in_ifaddr *)(ifa))
+/*
+ * On platforms which require strict alignment (currently for anything but
+ * i386 or x86_64), check if the IP header pointer is 32-bit aligned; if not,
+ * copy the contents of the mbuf chain into a new chain, and free the original
+ * one.  Create some head room in the first mbuf of the new chain, in case
+ * it's needed later on.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define        IP_HDR_ALIGNMENT_FIXUP(_m, _ifp, _action) do { } while (0)
+#else /* !__i386__ && !__x86_64__ */
+#define        IP_HDR_ALIGNMENT_FIXUP(_m, _ifp, _action) do {                  \
+       if (!IP_HDR_ALIGNED_P(mtod(_m, caddr_t))) {                     \
+               struct mbuf *_n;                                        \
+               struct ifnet *__ifp = (_ifp);                           \
+               atomic_add_64(&(__ifp)->if_alignerrs, 1);               \
+               if (((_m)->m_flags & M_PKTHDR) &&                       \
+                   (_m)->m_pkthdr.header != NULL)                      \
+                       (_m)->m_pkthdr.header = NULL;                   \
+               _n = m_defrag_offset(_m, max_linkhdr, M_NOWAIT);        \
+               if (_n == NULL) {                                       \
+                       atomic_add_32(&ipstat.ips_toosmall, 1);         \
+                       m_freem(_m);                                    \
+                       (_m) = NULL;                                    \
+                       _action                                         \
+               } else {                                                \
+                       VERIFY(_n != (_m));                             \
+                       (_m) = _n;                                      \
+               }                                                       \
+       }                                                               \
+} while (0)
+#endif /* !__i386__ && !__x86_64__ */
 
 /*
  * IP initialization: fill in IP protocol switch table.
@@ -379,18 +411,23 @@ ip_init(void)
 
        if (!ip_initialized)
        {
+               PE_parse_boot_argn("net.inet.ip.scopedroute",
+                   &ip_doscopedroute, sizeof (ip_doscopedroute));
+
                in_ifaddr_init();
 
                in_ifaddr_rwlock_grp_attr = lck_grp_attr_alloc_init();
                in_ifaddr_rwlock_grp = lck_grp_alloc_init("in_ifaddr_rwlock",
                    in_ifaddr_rwlock_grp_attr);
                in_ifaddr_rwlock_attr = lck_attr_alloc_init();
-               in_ifaddr_rwlock = lck_rw_alloc_init(in_ifaddr_rwlock_grp,
+               lck_rw_init(in_ifaddr_rwlock, in_ifaddr_rwlock_grp,
                    in_ifaddr_rwlock_attr);
 
                TAILQ_INIT(&in_ifaddrhead);
                in_ifaddrhashtbl_init();
 
+               ip_moptions_init();
+
                pr = pffindproto_locked(PF_INET, IPPROTO_RAW, SOCK_RAW);
                if (pr == 0)
                        panic("ip_init");
@@ -438,10 +475,7 @@ ip_init(void)
                sadb_stat_mutex_grp = lck_grp_alloc_init("sadb_stat", sadb_stat_mutex_grp_attr);
                sadb_stat_mutex_attr = lck_attr_alloc_init();
 
-               if ((sadb_stat_mutex = lck_mtx_alloc_init(sadb_stat_mutex_grp, sadb_stat_mutex_attr)) == NULL) {
-                       printf("ip_init: can't alloc sadb_stat_mutex\n");
-                       return;
-               }
+               lck_mtx_init(sadb_stat_mutex, sadb_stat_mutex_grp, sadb_stat_mutex_attr);
 
 #endif
                arp_init();
@@ -531,9 +565,6 @@ in_dinit(void)
 
        if (!inetdomain_initted)
        {
-#if 0
-               kprintf("Initing %d protosw entries\n", in_proto_count);
-#endif
                dp = &inetdomain;
                dp->dom_flags = DOM_REENTRANT;
 
@@ -542,18 +573,21 @@ in_dinit(void)
                inet_domain_mutex = dp->dom_mtx;
                inetdomain_initted = 1;
        
-               lck_mtx_unlock(domain_proto_mtx);       
+               domain_proto_mtx_unlock(TRUE);
                proto_register_input(PF_INET, ip_proto_input, NULL, 1);
-               lck_mtx_lock(domain_proto_mtx); 
+               domain_proto_mtx_lock();
        }
 }
 
+void
+ip_proto_dispatch_in_wrapper(struct mbuf *m, int hlen, u_int8_t proto)
+{
+       ip_proto_dispatch_in(m, hlen, proto, 0); 
+}
+
 __private_extern__ void
-ip_proto_dispatch_in(
-                                       struct mbuf     *m,
-                                       int                     hlen,
-                                       u_int8_t        proto,
-                                       ipfilter_t      inject_ipfref)
+ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto,
+    ipfilter_t inject_ipfref)
 {
        struct ipfilter *filter;
        int seen = (inject_ipfref == 0);
@@ -561,7 +595,7 @@ ip_proto_dispatch_in(
        struct ip *ip;
        void (*pr_input)(struct mbuf *, int len);
 
-       if (!TAILQ_EMPTY(&ipv4_filters)) {      
+       if (!TAILQ_EMPTY(&ipv4_filters)) {
                ipf_ref();
                TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
                        if (seen == 0) {
@@ -569,8 +603,17 @@ ip_proto_dispatch_in(
                                        seen = 1;
                        } else if (filter->ipf_filter.ipf_input) {
                                errno_t result;
-               
+
                                if (changed_header == 0) {
+                                       /*
+                                        * Perform IP header alignment fixup,
+                                        * if needed, before passing packet
+                                        * into filter(s).
+                                        */
+                                       IP_HDR_ALIGNMENT_FIXUP(m,
+                                           m->m_pkthdr.rcvif,
+                                           ipf_unref(); return;);
+
                                        changed_header = 1;
                                        ip = mtod(m, struct ip *);
                                        ip->ip_len = htons(ip->ip_len + hlen);
@@ -579,7 +622,8 @@ ip_proto_dispatch_in(
                                        ip->ip_sum = in_cksum(m, hlen);
                                }
                                result = filter->ipf_filter.ipf_input(
-                                       filter->ipf_filter.cookie, (mbuf_t*)&m, hlen, proto);
+                                   filter->ipf_filter.cookie, (mbuf_t*)&m,
+                                   hlen, proto);
                                if (result == EJUSTRETURN) {
                                        ipf_unref();
                                        return;
@@ -589,10 +633,14 @@ ip_proto_dispatch_in(
                                        m_freem(m);
                                        return;
                                }
-       }
+                       }
                }
                ipf_unref();
        }
+
+       /* Perform IP header alignment fixup (post-filters), if needed */
+       IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return;);
+
        /*
         * If there isn't a specific lock for the protocol
         * we're about to call, use the generic lock for AF_INET.
@@ -626,23 +674,27 @@ ip_input(struct mbuf *m)
        struct ip *ip;
        struct ipq *fp;
        struct in_ifaddr *ia = NULL;
-       int    hlen, checkif;
-       u_short sum;
+       unsigned int    hlen, checkif;
+       u_short sum = 0;
        struct in_addr pkt_dst;
 #if IPFIREWALL
        int i;
        u_int32_t div_info = 0;         /* packet divert/tee info */
+#endif
+#if IPFIREWALL || DUMMYNET
        struct ip_fw_args args;
        struct m_tag    *tag;
 #endif
        ipfilter_t inject_filter_ref = 0;
 
-#if IPFIREWALL
-       args.eh = NULL;
-       args.oif = NULL;
-       args.rule = NULL;
-       args.divert_rule = 0;                   /* divert cookie */
-       args.next_hop = NULL;
+       /* Check if the mbuf is still valid after interface filter processing */
+       MBUF_INPUT_CHECK(m, m->m_pkthdr.rcvif);
+
+       /* Perform IP header alignment fixup, if needed */
+       IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, goto bad;);
+
+#if IPFIREWALL || DUMMYNET
+       bzero(&args, sizeof(struct ip_fw_args));
 
        /*
         * Don't bother searching for tag(s) if there's none.
@@ -657,7 +709,8 @@ ip_input(struct mbuf *m)
                struct dn_pkt_tag       *dn_tag;
 
                dn_tag = (struct dn_pkt_tag *)(tag+1);
-               args.rule = dn_tag->rule;
+               args.fwa_ipfw_rule = dn_tag->dn_ipfw_rule;
+               args.fwa_pf_rule = dn_tag->dn_pf_rule;
 
                m_tag_delete(m, tag);
        }
@@ -669,7 +722,7 @@ ip_input(struct mbuf *m)
                struct divert_tag       *div_tag;
 
                div_tag = (struct divert_tag *)(tag+1);
-               args.divert_rule = div_tag->cookie;
+               args.fwa_divert_rule = div_tag->cookie;
 
                m_tag_delete(m, tag);
        }
@@ -680,7 +733,7 @@ ip_input(struct mbuf *m)
                struct ip_fwd_tag       *ipfwd_tag;
 
                ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
-               args.next_hop = ipfwd_tag->next_hop;
+               args.fwa_next_hop = ipfwd_tag->next_hop;
 
                m_tag_delete(m, tag);
        }
@@ -690,23 +743,35 @@ ip_input(struct mbuf *m)
                panic("ip_input no HDR");
 #endif
 
-       if (args.rule) {        /* dummynet already filtered us */
+#if DUMMYNET
+       if (args.fwa_ipfw_rule || args.fwa_pf_rule) {   /* dummynet already filtered us */
                ip = mtod(m, struct ip *);
                hlen = IP_VHL_HL(ip->ip_vhl) << 2;
                inject_filter_ref = ipf_get_inject_filter(m);
-               goto iphack ;
+#if IPFIREWALL
+               if (args.fwa_ipfw_rule)
+                       goto iphack;
+#endif /* IPFIREWALL */
+               if (args.fwa_pf_rule)
+                       goto check_with_pf;
        }
+#endif /* DUMMYNET */
 ipfw_tags_done:
-#endif /* IPFIREWALL */
+#endif /* IPFIREWALL || DUMMYNET*/
 
        /*
-        * No need to proccess packet twice if we've already seen it.
+        * No need to process packet twice if we've already seen it.
         */
        if (!SLIST_EMPTY(&m->m_pkthdr.tags))
                inject_filter_ref = ipf_get_inject_filter(m);
        if (inject_filter_ref != 0) {
                ip = mtod(m, struct ip *);
                hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+
+               DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL, 
+                       struct ip *, ip, struct ifnet *, m->m_pkthdr.rcvif,
+                       struct ip *, ip, struct ip6_hdr *, NULL);
+               
                ip->ip_len = ntohs(ip->ip_len) - hlen;
                ip->ip_off = ntohs(ip->ip_off);
                ip_proto_dispatch_in(m, hlen, ip->ip_p, inject_filter_ref);
@@ -714,7 +779,6 @@ ipfw_tags_done:
        }
 
        OSAddAtomic(1, &ipstat.ips_total);
-
        if (m->m_pkthdr.len < sizeof(struct ip))
                goto tooshort;
 
@@ -766,41 +830,36 @@ ipfw_tags_done:
                                goto bad;
                }
        }
-       if ((IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) == 0) 
-           || (apple_hwcksum_rx == 0) ||
-          ((m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) && ip->ip_p != IPPROTO_TCP)) {
-                       m->m_pkthdr.csum_flags = 0; /* invalidate HW generated checksum flags */
-       }
 
-       if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
-               sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
-       } else if (!(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
-           apple_hwcksum_tx == 0) {
-               /*
-                * Either this is not loopback packet coming from an interface
-                * that does not support checksum offloading, or it is loopback
-                * packet that has undergone software checksumming at the send
-                * side because apple_hwcksum_tx was set to 0.  In this case,
-                * calculate the checksum in software to validate the packet.
-                */
-               sum = in_cksum(m, hlen);
-       } else {
-               /*
-                * This is a loopback packet without any valid checksum since
-                * the send side has bypassed it (apple_hwcksum_tx set to 1).
-                * We get here because apple_hwcksum_rx was set to 0, and so
-                * we pretend that all is well.
-                */
-               sum = 0;
-               m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
-                   CSUM_IP_CHECKED | CSUM_IP_VALID;
-                m->m_pkthdr.csum_data = 0xffff;
-       }
+       sum = ip_cksum(m, hlen);
        if (sum) {
-               OSAddAtomic(1, &ipstat.ips_badsum);
                goto bad;
        }
 
+       DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL, 
+               struct ip *, ip, struct ifnet *, m->m_pkthdr.rcvif,
+               struct ip *, ip, struct ip6_hdr *, NULL);
+
+       /*
+        * Naively assume we can attribute inbound data to the route we would
+        * use to send to this destination. Asymetric routing breaks this
+        * assumption, but it still allows us to account for traffic from
+        * a remote node in the routing table.
+        * this has a very significant performance impact so we bypass
+        * if nstat_collect is disabled. We may also bypass if the
+        * protocol is tcp in the future because tcp will have a route that
+        * we can use to attribute the data to. That does mean we would not
+        * account for forwarded tcp traffic.
+        */
+       if (nstat_collect) {
+               struct rtentry *rt =
+                   ifnet_cached_rtlookup_inet(m->m_pkthdr.rcvif, ip->ip_src);
+               if (rt != NULL) {
+                       nstat_route_rx(rt, 1, m->m_pkthdr.len, 0);
+                       rtfree(rt);
+               }
+       }
+
        /*
         * Convert fields to host representation.
         */
@@ -839,36 +898,37 @@ tooshort:
                        m_adj(m, ip->ip_len - m->m_pkthdr.len);
        }
 
-#if IPSEC
-       if (ipsec_bypass == 0 && ipsec_gethist(m, NULL))
-               goto pass;
-#endif
 
-       /*
-        * IpHack's section.
-        * Right now when no processing on packet has done
-        * and it is still fresh out of network we do our black
-        * deals with it.
-        * - Firewall: deny/allow/divert
-        * - Xlate: translate packet's addr/port (NAT).
-        * - Pipe: pass pkt through dummynet.
-        * - Wrap: fake packet's addr/port <unimpl.>
-        * - Encapsulate: put it in another IP and send out. <unimp.>
-        */
+#if DUMMYNET
+check_with_pf:
+#endif
 #if PF
        /* Invoke inbound packet filter */
-       if (pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET, TRUE) != 0) {
-               if (m != NULL) {
-                       panic("%s: unexpected packet %p\n", __func__, m);
-                       /* NOTREACHED */
+       if (PF_IS_ENABLED) {
+               int error;
+#if DUMMYNET
+               error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET, TRUE, &args);
+#else
+               error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET, TRUE, NULL);
+#endif /* DUMMYNET */
+               if (error != 0 || m == NULL) {
+                       if (m != NULL) {
+                               panic("%s: unexpected packet %p\n", __func__, m);
+                               /* NOTREACHED */
+                       }
+                       /* Already freed by callee */
+                       return;
                }
-               /* Already freed by callee */
-               return;
+               ip = mtod(m, struct ip *);
+               hlen = IP_VHL_HL(ip->ip_vhl) << 2;
        }
-       ip = mtod(m, struct ip *);
-       hlen = IP_VHL_HL(ip->ip_vhl) << 2;
 #endif /* PF */
 
+#if IPSEC
+       if (ipsec_bypass == 0 && ipsec_gethist(m, NULL))
+               goto pass;
+#endif
+
 #if IPFIREWALL
 #if DUMMYNET
 iphack:
@@ -877,28 +937,20 @@ iphack:
         * Check if we want to allow this packet to be processed.
         * Consider it to be bad if not.
         */
-       if (fr_checkp) {
-               struct  mbuf    *m1 = m;
-
-               if (fr_checkp(ip, hlen, m->m_pkthdr.rcvif, 0, &m1) || !m1) {
-                       return;
-               }
-               ip = mtod(m = m1, struct ip *);
-       }
        if (fw_enable && IPFW_LOADED) {
 #if IPFIREWALL_FORWARD
                /*
                 * If we've been forwarded from the output side, then
                 * skip the firewall a second time
                 */
-               if (args.next_hop)
+               if (args.fwa_next_hop)
                        goto ours;
 #endif /* IPFIREWALL_FORWARD */
 
-               args.m = m;
+               args.fwa_m = m;
 
                i = ip_fw_chk_ptr(&args);
-               m = args.m;
+               m = args.fwa_m;
 
                if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
                        if (m)
@@ -907,13 +959,13 @@ iphack:
                }
                ip = mtod(m, struct ip *); /* just in case m changed */
                
-               if (i == 0 && args.next_hop == NULL) {  /* common case */
+               if (i == 0 && args.fwa_next_hop == NULL) {      /* common case */
                        goto pass;
                }
 #if DUMMYNET
                 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
                        /* Send packet to the appropriate pipe */
-                       ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
+                       ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args, DN_CLIENT_IPFW);
                        return;
                }
 #endif /* DUMMYNET */
@@ -925,7 +977,7 @@ iphack:
                }
 #endif
 #if IPFIREWALL_FORWARD
-               if (i == 0 && args.next_hop != NULL) {
+               if (i == 0 && args.fwa_next_hop != NULL) {
                        goto pass;
                }
 #endif
@@ -946,7 +998,7 @@ pass:
         */
        ip_nhops = 0;           /* for source routed packets */
 #if IPFIREWALL
-       if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop)) {
+       if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.fwa_next_hop)) {
 #else
        if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, NULL)) {
 #endif
@@ -977,8 +1029,8 @@ pass:
         * changed by use of 'ipfw fwd'.
         */
 #if IPFIREWALL
-       pkt_dst = args.next_hop == NULL ?
-           ip->ip_dst : args.next_hop->sin_addr;
+       pkt_dst = args.fwa_next_hop == NULL ?
+           ip->ip_dst : args.fwa_next_hop->sin_addr;
 #else
        pkt_dst = ip->ip_dst;
 #endif
@@ -1000,7 +1052,7 @@ pass:
        checkif = ip_checkinterface && (ipforwarding == 0) && 
            ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0)
 #if IPFIREWALL
-           && (args.next_hop == NULL);
+           && (args.fwa_next_hop == NULL);
 #else
                ;
 #endif
@@ -1015,11 +1067,14 @@ pass:
                 * arrived via the correct interface if checking is
                 * enabled.
                 */
+               IFA_LOCK_SPIN(&ia->ia_ifa);
                if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr && 
                    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif)) {
+                       IFA_UNLOCK(&ia->ia_ifa);
                        lck_rw_done(in_ifaddr_rwlock);
                        goto ours;
                }
+               IFA_UNLOCK(&ia->ia_ifa);
        }
        lck_rw_done(in_ifaddr_rwlock);
 
@@ -1034,18 +1089,22 @@ pass:
        if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
                struct ifaddr *ifa;
                struct ifnet *ifp = m->m_pkthdr.rcvif;
-
                ifnet_lock_shared(ifp);
                TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-                       if (ifa->ifa_addr->sa_family != AF_INET)
+                       IFA_LOCK_SPIN(ifa);
+                       if (ifa->ifa_addr->sa_family != AF_INET) {
+                               IFA_UNLOCK(ifa);
                                continue;
+                       }
                        ia = ifatoia(ifa);
                        if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
                            pkt_dst.s_addr || ia->ia_netbroadcast.s_addr ==
                            pkt_dst.s_addr) {
+                               IFA_UNLOCK(ifa);
                                ifnet_lock_done(ifp);
                                goto ours;
                        }
+                       IFA_UNLOCK(ifa);
                }
                ifnet_lock_done(ifp);
        }
@@ -1085,14 +1144,15 @@ pass:
                 * See if we belong to the destination multicast group on the
                 * arrival interface.
                 */
-               ifnet_lock_shared(ifp);
-               IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
-               ifnet_lock_done(ifp);
+               in_multihead_lock_shared();
+               IN_LOOKUP_MULTI(&ip->ip_dst, ifp, inm);
+               in_multihead_lock_done();
                if (inm == NULL) {
                        OSAddAtomic(1, &ipstat.ips_notmember);
                        m_freem(m);
                        return;
                }
+               INM_REMREF(inm);
                goto ours;
        }
        if (ip->ip_dst.s_addr == (u_int32_t)INADDR_BROADCAST)
@@ -1118,19 +1178,6 @@ pass:
                ip = mtod(m, struct ip *); /* in case it changed */
        }
 
-#if defined(NFAITH) && 0 < NFAITH
-       /*
-        * FAITH(Firewall Aided Internet Translator)
-        */
-       if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
-               if (ip_keepfaith) {
-                       if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 
-                               goto ours;
-               }
-               m_freem(m);
-               return;
-       }
-#endif
        /*
         * Not for us; forward if possible and desirable.
         */
@@ -1139,7 +1186,7 @@ pass:
                m_freem(m);
        } else {
 #if IPFIREWALL
-               ip_forward(m, 0, args.next_hop);
+               ip_forward(m, 0, args.fwa_next_hop);
 #else
                ip_forward(m, 0, NULL);
 #endif
@@ -1247,13 +1294,13 @@ found:
                /*
                 * Attempt reassembly; if it succeeds, proceed.
                 * ip_reass() will return a different mbuf, and update
-                * the divert info in div_info and args.divert_rule.
+                * the divert info in div_info and args.fwa_divert_rule.
                 */
                        OSAddAtomic(1, &ipstat.ips_fragments);
                        m->m_pkthdr.header = ip;
 #if IPDIVERT
                        m = ip_reass(m, fp, &ipq[sum],
-                           (u_int16_t *)&div_info, &args.divert_rule);
+                           (u_int16_t *)&div_info, &args.fwa_divert_rule);
 #else
                        m = ip_reass(m, fp, &ipq[sum]);
 #endif
@@ -1314,7 +1361,7 @@ found:
 #endif
                /* Deliver packet to divert input routine */
                OSAddAtomic(1, &ipstat.ips_delivered);
-               divert_packet(m, 1, div_info & 0xffff, args.divert_rule);
+               divert_packet(m, 1, div_info & 0xffff, args.fwa_divert_rule);
 
                /* If 'tee', continue with original packet */
                if (clone == NULL) {
@@ -1345,27 +1392,31 @@ found:
        OSAddAtomic(1, &ipstat.ips_delivered);
        {
 #if IPFIREWALL
-               if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
+               if (args.fwa_next_hop && ip->ip_p == IPPROTO_TCP) {
                        /* TCP needs IPFORWARD info if available */
                        struct m_tag *fwd_tag;
                        struct ip_fwd_tag       *ipfwd_tag;
                        
-                       fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID,
+                       fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID,
                            KERNEL_TAG_TYPE_IPFORWARD, sizeof (*ipfwd_tag),
-                           M_NOWAIT);
+                           M_NOWAIT, m);
                        if (fwd_tag == NULL) {
                                goto bad;
                        }
                        
                        ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
-                       ipfwd_tag->next_hop = args.next_hop;
+                       ipfwd_tag->next_hop = args.fwa_next_hop;
 
                        m_tag_prepend(m, fwd_tag);
        
                        KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, 
                             ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
        
-       
+                       if (sw_lro) {
+                               m = tcp_lro(m, hlen);
+                               if (m == NULL)
+                                       return;
+                       }               
                        /* TCP deals with its own locking */
                        ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
                } else {
@@ -1375,6 +1426,11 @@ found:
                        ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
                }
 #else
+               if ((sw_lro) && (ip->ip_p == IPPROTO_TCP)) {
+                       m = tcp_lro(m, hlen);
+                       if (m == NULL)
+                               return;
+               }
                ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
 #endif
                
@@ -1731,9 +1787,6 @@ ip_slowtimo(void)
                        }
                }
        }
-#if IPFLOW
-       ipflow_slowtimo();
-#endif
        lck_mtx_unlock(ip_mutex);
 }
 
@@ -1781,6 +1834,9 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop)
        struct sockaddr_in ipaddr = {
            sizeof (ipaddr), AF_INET , 0 , { 0 }, { 0, } };
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        dst = ip->ip_dst;
        cp = (u_char *)(ip + 1);
        cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
@@ -1843,7 +1899,7 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop)
                                break;
                        }
                        else {
-                               ifafree(&ia->ia_ifa);
+                               IFA_REMREF(&ia->ia_ifa);
                                ia = NULL;
                        }
                        off--;                  /* 0 origin */
@@ -1890,9 +1946,10 @@ nosourcerouting:
 
                        if (opt == IPOPT_SSRR) {
 #define        INA     struct in_ifaddr *
-#define        SA      struct sockaddr *
-                           if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0) {
-                                       ia = (INA)ifa_ifwithnet((SA)&ipaddr);
+                               if ((ia = (INA)ifa_ifwithdstaddr(
+                                   (struct sockaddr *)&ipaddr)) == 0) {
+                                       ia = (INA)ifa_ifwithnet(
+                                           (struct sockaddr *)&ipaddr);
                                }
                        } else {
                                ia = ip_rtaddr(ipaddr.sin_addr);
@@ -1903,9 +1960,11 @@ nosourcerouting:
                                goto bad;
                        }
                        ip->ip_dst = ipaddr.sin_addr;
+                       IFA_LOCK(&ia->ia_ifa);
                        (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
                            sizeof(struct in_addr));
-                       ifafree(&ia->ia_ifa);
+                       IFA_UNLOCK(&ia->ia_ifa);
+                       IFA_REMREF(&ia->ia_ifa);
                        ia = NULL;
                        cp[IPOPT_OFFSET] += sizeof(struct in_addr);
                        /*
@@ -1935,23 +1994,26 @@ nosourcerouting:
                         * locate outgoing interface; if we're the destination,
                         * use the incoming interface (should be same).
                         */
-                       if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0) {
+                       if ((ia = (INA)ifa_ifwithaddr((struct sockaddr *)
+                           &ipaddr)) == 0) {
                                if ((ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
                                        type = ICMP_UNREACH;
                                        code = ICMP_UNREACH_HOST;
                                        goto bad;
                                }
                        }
+                       IFA_LOCK(&ia->ia_ifa);
                        (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
                            sizeof(struct in_addr));
-                       ifafree(&ia->ia_ifa);
+                       IFA_UNLOCK(&ia->ia_ifa);
+                       IFA_REMREF(&ia->ia_ifa);
                        ia = NULL;
                        cp[IPOPT_OFFSET] += sizeof(struct in_addr);
                        break;
 
                case IPOPT_TS:
                        code = cp - (u_char *)ip;
-                       ipt = (struct ip_timestamp *)cp;
+                       ipt = (struct ip_timestamp *)(void *)cp;
                        if (ipt->ipt_len < 4 || ipt->ipt_len > 40) {
                                code = (u_char *)&ipt->ipt_len - (u_char *)ip;
                                goto bad;
@@ -1969,7 +2031,7 @@ nosourcerouting:
                                }
                                break;
                        }
-                       sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
+                       sin = (struct in_addr *)(void *)(cp + ipt->ipt_ptr - 1);
                        switch (ipt->ipt_flg) {
 
                        case IPOPT_TS_TSONLY:
@@ -1983,14 +2045,16 @@ nosourcerouting:
                                        goto bad;
                                }
                                ipaddr.sin_addr = dst;
-                               ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
-                                                           m->m_pkthdr.rcvif);
+                               ia = (INA)ifaof_ifpforaddr((struct sockaddr *)
+                                   &ipaddr, m->m_pkthdr.rcvif);
                                if (ia == 0)
                                        continue;
+                               IFA_LOCK(&ia->ia_ifa);
                                (void)memcpy(sin, &IA_SIN(ia)->sin_addr,
                                    sizeof(struct in_addr));
+                               IFA_UNLOCK(&ia->ia_ifa);
                                ipt->ipt_ptr += sizeof(struct in_addr);
-                               ifafree(&ia->ia_ifa);
+                               IFA_REMREF(&ia->ia_ifa);
                                ia = NULL;
                                break;
 
@@ -2003,9 +2067,10 @@ nosourcerouting:
                                }
                                (void)memcpy(&ipaddr.sin_addr, sin,
                                    sizeof(struct in_addr));
-                               if ((ia = (struct in_ifaddr*)ifa_ifwithaddr((SA)&ipaddr)) == 0)
+                               if ((ia = (struct in_ifaddr*)ifa_ifwithaddr(
+                                   (struct sockaddr *)&ipaddr)) == 0)
                                        continue;
-                               ifafree(&ia->ia_ifa);
+                               IFA_REMREF(&ia->ia_ifa);
                                ia = NULL;
                                ipt->ipt_ptr += sizeof(struct in_addr);
                                break;
@@ -2046,7 +2111,7 @@ ip_rtaddr(struct in_addr dst)
        struct route ro;
 
        bzero(&ro, sizeof (ro));
-       sin = (struct sockaddr_in *)&ro.ro_dst;
+       sin = (struct sockaddr_in *)(void *)&ro.ro_dst;
        sin->sin_family = AF_INET;
        sin->sin_len = sizeof (*sin);
        sin->sin_addr = dst;
@@ -2057,7 +2122,7 @@ ip_rtaddr(struct in_addr dst)
 
        RT_LOCK(ro.ro_rt);
        if ((rt_ifa = ro.ro_rt->rt_ifa) != NULL)
-               ifaref(rt_ifa);
+               IFA_ADDREF(rt_ifa);
        RT_UNLOCK(ro.ro_rt);
        rtfree(ro.ro_rt);
 
@@ -2129,7 +2194,7 @@ ip_srcroute(void)
        ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
        (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
            &ip_srcrt.nop, OPTSIZ);
-       q = (struct in_addr *)(mtod(m, caddr_t) +
+       q = (struct in_addr *)(void *)(mtod(m, caddr_t) +
            sizeof(struct in_addr) + OPTSIZ);
 #undef OPTSIZ
        /*
@@ -2169,6 +2234,9 @@ ip_stripoptions(struct mbuf *m, __unused struct mbuf *mopt)
        caddr_t opts;
        int olen;
 
+       /* Expect 32-bit aligned data pointer on strict-align platforms */
+       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
        olen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
        opts = (caddr_t)(ip + 1);
        i = m->m_len - (sizeof (struct ip) + olen);
@@ -2204,12 +2272,12 @@ sysctl_ipforwarding SYSCTL_HANDLER_ARGS
                for (i = 0; i <= if_index; i++) {
                        struct ifnet *ifp = ifindex2ifnet[i];
                        if (ifp != NULL) {
-                               lck_mtx_lock(ifp->if_fwd_route_lock);
-                               if (ifp->if_fwd_route.ro_rt != NULL) {
+                               lck_mtx_lock(&ifp->if_cached_route_lock);
+                               if (ifp->if_fwd_route.ro_rt != NULL)
                                        rtfree(ifp->if_fwd_route.ro_rt);
-                                       ifp->if_fwd_route.ro_rt = NULL;
-                               }
-                               lck_mtx_unlock(ifp->if_fwd_route_lock);
+                               bzero(&ifp->if_fwd_route,
+                                   sizeof (ifp->if_fwd_route));
+                               lck_mtx_unlock(&ifp->if_cached_route_lock);
                        }
                }
                ifnet_head_done();
@@ -2228,20 +2296,16 @@ ip_fwd_route_copyout(struct ifnet *ifp, struct route *dst)
 {
        struct route *src = &ifp->if_fwd_route;
 
-       lck_mtx_lock(ifp->if_fwd_route_lock);
+       lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+       lck_mtx_convert_spin(&ifp->if_cached_route_lock);
 
        /* Minor sanity check */
        if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
                panic("%s: wrong or corrupted route: %p", __func__, src);
 
-       /* Copy everything (rt, dst, flags) from ifnet */
-       bcopy(src, dst, sizeof (*dst));
-
-       /* Hold one reference for the local copy of struct route */
-       if (dst->ro_rt != NULL)
-               RT_ADDREF(dst->ro_rt);
+       route_copyout(dst, src, sizeof(*dst));
 
-       lck_mtx_unlock(ifp->if_fwd_route_lock);
+       lck_mtx_unlock(&ifp->if_cached_route_lock);
 }
 
 static void
@@ -2249,37 +2313,17 @@ ip_fwd_route_copyin(struct ifnet *ifp, struct route *src)
 {
        struct route *dst = &ifp->if_fwd_route;
 
-       lck_mtx_lock(ifp->if_fwd_route_lock);
+       lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+       lck_mtx_convert_spin(&ifp->if_cached_route_lock);
 
        /* Minor sanity check */
        if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
                panic("%s: wrong or corrupted route: %p", __func__, src);
 
-       /* No cached route in the ifnet? */
-       if (dst->ro_rt == NULL) {
-               /*
-                * Copy everything (rt, dst, flags) from ip_forward();
-                * the reference to the route was held at the time
-                * it was allocated and is kept intact.
-                */
-               bcopy(src, dst, sizeof (*dst));
-       } else if (src->ro_rt != NULL) {
-               /*
-                * If the same, update just the ro_flags and ditch the one
-                * in the local copy.  Else ditch the one that is currently
-                * cached, and cache what we got back from ip_output().
-                */
-               if (dst->ro_rt == src->ro_rt) {
-                       dst->ro_flags = src->ro_flags;
-                       rtfree(src->ro_rt);
-                       src->ro_rt = NULL;
-               } else {
-                       rtfree(dst->ro_rt);
-                       bcopy(src, dst, sizeof (*dst));
-               }
-       }
+       if (ifp->if_fwd_cacheok)
+               route_copyin(src, dst, sizeof(*src));
 
-       lck_mtx_unlock(ifp->if_fwd_route_lock);
+       lck_mtx_unlock(&ifp->if_cached_route_lock);
 }
 
 /*
@@ -2311,7 +2355,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
        n_long dest;
        struct in_addr pkt_dst;
        u_int32_t nextmtu = 0;
-       struct ip_out_args ipoa = { IFSCOPE_NONE };
+       struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, 0 };
        struct ifnet *ifp = m->m_pkthdr.rcvif;
 #if PF
        struct pf_mtag *pf_mtag;
@@ -2354,13 +2398,15 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 
 #if PF
        pf_mtag = pf_find_mtag(m);
-       if (pf_mtag != NULL && pf_mtag->rtableid != IFSCOPE_NONE)
-               ipoa.ipoa_ifscope = pf_mtag->rtableid;
+       if (pf_mtag != NULL && pf_mtag->pftag_rtableid != IFSCOPE_NONE) {
+               ipoa.ipoa_boundif = pf_mtag->pftag_rtableid;
+               ipoa.ipoa_flags |= IPOAF_BOUND_IF;
+       }
 #endif /* PF */
 
        ip_fwd_route_copyout(ifp, &fwd_rt);
 
-       sin = (struct sockaddr_in *)&fwd_rt.ro_dst;
+       sin = (struct sockaddr_in *)(void *)&fwd_rt.ro_dst;
        if (fwd_rt.ro_rt == NULL ||
            fwd_rt.ro_rt->generation_id != route_generation ||
            pkt_dst.s_addr != sin->sin_addr.s_addr) {
@@ -2372,7 +2418,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
                sin->sin_len = sizeof (*sin);
                sin->sin_addr = pkt_dst;
 
-               rtalloc_scoped_ign(&fwd_rt, RTF_PRCLONING, ipoa.ipoa_ifscope);
+               rtalloc_scoped_ign(&fwd_rt, RTF_PRCLONING, ipoa.ipoa_boundif);
                if (fwd_rt.ro_rt == NULL) {
                        icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
                        goto done;
@@ -2417,24 +2463,27 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
        if (rt->rt_ifp == m->m_pkthdr.rcvif &&
            (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
            satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
-           ipsendredirects && !srcrt) {
-#define        RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa))
+           ipsendredirects && !srcrt && rt->rt_ifa != NULL) {
+               struct in_ifaddr *ia = (struct in_ifaddr *)rt->rt_ifa;
                u_int32_t src = ntohl(ip->ip_src.s_addr);
 
-               if (RTA(rt) &&
-                   (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
-                   if (rt->rt_flags & RTF_GATEWAY)
-                       dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
-                   else
-                       dest = pkt_dst.s_addr;
-                   /* Router requirements says to only send host redirects */
-                   type = ICMP_REDIRECT;
-                   code = ICMP_REDIRECT_HOST;
+               /* Become a regular mutex */
+               RT_CONVERT_LOCK(rt);
+               IFA_LOCK_SPIN(&ia->ia_ifa);
+               if ((src & ia->ia_subnetmask) == ia->ia_subnet) {
+                       if (rt->rt_flags & RTF_GATEWAY)
+                               dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
+                       else
+                               dest = pkt_dst.s_addr;
+                       /* Router requirements says to only send host redirects */
+                       type = ICMP_REDIRECT;
+                       code = ICMP_REDIRECT_HOST;
 #if DIAGNOSTIC
-                   if (ipprintfs)
-                       printf("redirect (%d) to %lx\n", code, (u_int32_t)dest);
+                       if (ipprintfs)
+                               printf("redirect (%d) to %lx\n", code, (u_int32_t)dest);
 #endif
                }
+               IFA_UNLOCK(&ia->ia_ifa);
        }
        RT_UNLOCK(rt);
 
@@ -2444,9 +2493,9 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
                struct m_tag *tag;
                struct ip_fwd_tag       *ipfwd_tag;
 
-               tag = m_tag_alloc(KERNEL_MODULE_TAG_ID,
+               tag = m_tag_create(KERNEL_MODULE_TAG_ID,
                    KERNEL_TAG_TYPE_IPFORWARD,
-                   sizeof (*ipfwd_tag), M_NOWAIT);
+                   sizeof (*ipfwd_tag), M_NOWAIT, m);
                if (tag == NULL) {
                        error = ENOBUFS;
                        m_freem(m);
@@ -2473,9 +2522,6 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
                        OSAddAtomic(1, &ipstat.ips_redirectsent);
                else {
                        if (mcopy) {
-#if IPFLOW
-                               ipflow_create(&fwd_rt, mcopy);
-#endif
                                /*
                                 * If we didn't have to go thru ipflow and
                                 * the packet was successfully consumed by
@@ -2580,6 +2626,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
                                                }
                                                sav = key_allocsa_policy(&saidx);
                                                if (sav != NULL) {
+                                                       lck_mtx_lock(sadb_mutex);
                                                        if (sav->sah != NULL) {
                                                                ro = &sav->sah->sa_route;
                                                                if (ro->ro_rt != NULL) {
@@ -2591,7 +2638,8 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
                                                                        RT_UNLOCK(ro->ro_rt);
                                                                }
                                                        }
-                                                       key_freesav(sav, KEY_SADB_UNLOCKED);
+                                                       key_freesav(sav, KEY_SADB_LOCKED);
+                                                       lck_mtx_unlock(sadb_mutex);
                                                }
                                        }
                                }
@@ -2617,27 +2665,41 @@ done:
        ip_fwd_route_copyin(ifp, &fwd_rt);
 }
 
-void
+int
 ip_savecontrol(
        struct inpcb *inp,
        struct mbuf **mp,
        struct ip *ip,
        struct mbuf *m)
 {
+       *mp = NULL;
        if (inp->inp_socket->so_options & SO_TIMESTAMP) {
                struct timeval tv;
 
                microtime(&tv);
-               *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
-                       SCM_TIMESTAMP, SOL_SOCKET);
-               if (*mp)
-                       mp = &(*mp)->m_next;
+               mp = sbcreatecontrol_mbuf((caddr_t) &tv, sizeof(tv),
+                       SCM_TIMESTAMP, SOL_SOCKET, mp);
+               if (*mp == NULL) {
+                       goto no_mbufs;
+               }
        }
+       if ((inp->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
+               uint64_t time;
+
+               time = mach_absolute_time();
+               mp = sbcreatecontrol_mbuf((caddr_t) &time, sizeof(time),
+                       SCM_TIMESTAMP_MONOTONIC, SOL_SOCKET, mp);
+               
+               if (*mp == NULL) {
+                       goto no_mbufs;
+               }
+       } 
        if (inp->inp_flags & INP_RECVDSTADDR) {
-               *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
-                   sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
-               if (*mp)
-                       mp = &(*mp)->m_next;
+               mp = sbcreatecontrol_mbuf((caddr_t) &ip->ip_dst,
+                       sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP, mp);
+               if (*mp == NULL) {
+                       goto no_mbufs;
+               }
        }
 #ifdef notyet
        /* XXX
@@ -2646,17 +2708,19 @@ ip_savecontrol(
         */
        /* options were tossed already */
        if (inp->inp_flags & INP_RECVOPTS) {
-               *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
-                   sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
-               if (*mp)
-                       mp = &(*mp)->m_next;
+               mp = sbcreatecontrol_mbuf((caddr_t) opts_deleted_above,
+                       sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP, mp);
+               if (*mp == NULL) {
+                       goto no_mbufs;
+               }
        }
        /* ip_srcroute doesn't do what we want here, need to fix */
        if (inp->inp_flags & INP_RECVRETOPTS) {
-               *mp = sbcreatecontrol((caddr_t) ip_srcroute(),
-                   sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
-               if (*mp)
-                       mp = &(*mp)->m_next;
+               mp = sbcreatecontrol_mbuf((caddr_t) ip_srcroute(),
+                       sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP, mp);
+               if (*mp == NULL) {
+                       goto no_mbufs;
+               }
        }
 #endif
        if (inp->inp_flags & INP_RECVIF) {
@@ -2669,24 +2733,27 @@ ip_savecontrol(
                struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
                ifnet_head_lock_shared();
-               if (((ifp = m->m_pkthdr.rcvif)) 
-               && ( ifp->if_index && (ifp->if_index <= if_index))) {
+               if ((ifp = m->m_pkthdr.rcvif) != NULL &&
+                   ifp->if_index && (ifp->if_index <= if_index)) {
                        struct ifaddr *ifa = ifnet_addrs[ifp->if_index - 1];
 
                        if (!ifa || !ifa->ifa_addr)
                                goto makedummy;
 
-                       sdp = (struct sockaddr_dl *)ifa->ifa_addr;
+                       IFA_LOCK_SPIN(ifa);
+                       sdp = (struct sockaddr_dl *)(void *)ifa->ifa_addr;
                        /*
                         * Change our mind and don't try copy.
                         */
-                       if ((sdp->sdl_family != AF_LINK)
-                       || (sdp->sdl_len > sizeof(sdlbuf))) {
+                       if ((sdp->sdl_family != AF_LINK) ||
+                           (sdp->sdl_len > sizeof(sdlbuf))) {
+                               IFA_UNLOCK(ifa);
                                goto makedummy;
                        }
                        bcopy(sdp, sdl2, sdp->sdl_len);
+                       IFA_UNLOCK(ifa);
                } else {
-makedummy:     
+makedummy:
                        sdl2->sdl_len
                                = offsetof(struct sockaddr_dl, sdl_data[0]);
                        sdl2->sdl_family = AF_LINK;
@@ -2694,15 +2761,46 @@ makedummy:
                        sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
                }
                ifnet_head_done();
-               *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
-                       IP_RECVIF, IPPROTO_IP);
-               if (*mp)
-                       mp = &(*mp)->m_next;
+               mp = sbcreatecontrol_mbuf((caddr_t) sdl2, sdl2->sdl_len,
+                       IP_RECVIF, IPPROTO_IP, mp);
+               if (*mp == NULL) {
+                       goto no_mbufs;
+               }
        }
        if (inp->inp_flags & INP_RECVTTL) {
-               *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, sizeof(ip->ip_ttl), IP_RECVTTL, IPPROTO_IP);
-               if (*mp) mp = &(*mp)->m_next;
+               mp = sbcreatecontrol_mbuf((caddr_t)&ip->ip_ttl, sizeof(ip->ip_ttl), 
+                       IP_RECVTTL, IPPROTO_IP, mp);
+               if (*mp == NULL) {
+                       goto no_mbufs;
+               }
+       }
+       if ((inp->inp_socket->so_flags & SOF_RECV_TRAFFIC_CLASS) != 0) {
+               int tc = m_get_traffic_class(m);
+
+               mp = sbcreatecontrol_mbuf((caddr_t) &tc, sizeof(tc),
+                       SO_TRAFFIC_CLASS, SOL_SOCKET, mp);
+               if (*mp == NULL) {
+                       goto no_mbufs;
+               }
+       }
+       if (inp->inp_flags & INP_PKTINFO) {
+               struct in_pktinfo pi;
+
+               bzero(&pi, sizeof(struct in_pktinfo));
+               bcopy(&ip->ip_dst, &pi.ipi_addr, sizeof(struct in_addr));
+               pi.ipi_ifindex = (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
+               
+               mp = sbcreatecontrol_mbuf((caddr_t)&pi, sizeof(struct in_pktinfo), 
+                       IP_RECVPKTINFO, IPPROTO_IP, mp);
+               if (*mp == NULL) {
+                       goto no_mbufs;
+               }
        }
+       return 0;
+
+no_mbufs:
+       ipstat.ips_pktdropcntrl++;
+       return ENOBUFS;
 }
 
 int
@@ -2742,3 +2840,51 @@ ip_rsvp_done(void)
        }
        return 0;
 }
+
+static inline u_short
+ip_cksum(struct mbuf *m, int hlen)
+{
+
+       u_short sum;
+       struct ip *ip;
+
+       ip = mtod(m, struct ip *);
+
+       if ((IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) == 0)
+                   || (apple_hwcksum_rx == 0) ||
+                  ((m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) && ip->ip_p != IPPROTO_TCP)) {
+               m->m_pkthdr.csum_flags = 0; /* invalidate HW generated checksum flags */
+
+       }
+
+       if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
+               sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
+       } else if (!(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
+                   apple_hwcksum_tx == 0) {
+               /*
+                * Either this is not loopback packet coming from an interface
+                * that does not support checksum offloading, or it is loopback
+                * packet that has undergone software checksumming at the send
+                * side because apple_hwcksum_tx was set to 0.  In this case,
+                * calculate the checksum in software to validate the packet.
+                */
+               sum = in_cksum(m, hlen);
+       } else {
+               /*
+                * This is a loopback packet without any valid checksum since
+                * the send side has bypassed it (apple_hwcksum_tx set to 1).
+                * We get here because apple_hwcksum_rx was set to 0, and so
+                * we pretend that all is well.
+                */
+               sum = 0;
+               m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
+                           CSUM_IP_CHECKED | CSUM_IP_VALID;
+           m->m_pkthdr.csum_data = 0xffff;
+       }
+
+       if (sum) {
+               OSAddAtomic(1, &ipstat.ips_badsum);
+       }
+
+       return sum;
+}