]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet6/ip6_output.c
xnu-4903.221.2.tar.gz
[apple/xnu.git] / bsd / netinet6 / ip6_output.c
index 1dc5dec387c87e007a88d0ec47c13b183b46b2bd..0720d6809f59992495cfab7ec091fa4aaba186b1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <net/if.h>
 #include <net/route.h>
 #include <net/dlil.h>
+#include <net/net_api_stats.h>
 #include <net/net_osdep.h>
+#include <net/net_perf.h>
 
+#include <netinet/ip.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet/kpi_ipfilter_var.h>
+#include <netinet/in_tclass.h>
 
 #include <netinet6/ip6protosw.h>
 #include <netinet/icmp6.h>
 extern int ipsec_bypass;
 #endif /* IPSEC */
 
+#if NECP
+#include <net/necp.h>
+#endif /* NECP */
+
 #if CONFIG_MACF_NET
 #include <security/mac.h>
 #endif /* CONFIG_MACF_NET */
 
 #if DUMMYNET
-#include <netinet6/ip6_fw.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 #endif /* DUMMYNET */
@@ -154,6 +161,9 @@ extern int ipsec_bypass;
 #include <net/pfvar.h>
 #endif /* PF */
 
+static int sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS;
+static int sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS;
+static int sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS;
 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
 static void ip6_out_cksum_stats(int, u_int32_t);
 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
@@ -172,6 +182,30 @@ static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int,
 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
 static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *,
     int, uint32_t, uint32_t);
+extern int udp_ctloutput(struct socket *, struct sockopt *);
+static int ip6_fragment_packet(struct mbuf **m,
+    struct ip6_pktopts *opt, struct ip6_exthdrs *exthdrsp, struct ifnet *ifp,
+    uint32_t mtu, boolean_t alwaysfrag, uint32_t unfragpartlen,
+    struct route_in6 *ro_pmtu, int nxt0, uint32_t optlen);
+
+SYSCTL_DECL(_net_inet6_ip6);
+
+static int ip6_output_measure = 0;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf,
+       CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+       &ip6_output_measure, 0, sysctl_reset_ip6_output_stats, "I", "Do time measurement");
+
+static uint64_t ip6_output_measure_bins = 0;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_bins,
+       CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_output_measure_bins, 0,
+       sysctl_ip6_output_measure_bins, "I",
+       "bins for chaining performance data histogram");
+
+static net_perf_t net_perf;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_data,
+       CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
+       0, 0, sysctl_ip6_output_getperf, "S,net_perf",
+       "IP6 output performance data (struct net_perf, net/net_perf.h)");
 
 #define        IM6O_TRACE_HIST_SIZE    32      /* size of trace history */
 
@@ -206,47 +240,19 @@ static struct zone *im6o_zone;            /* zone for ip6_moptions */
 #define        IM6O_ZONE_MAX           64              /* maximum elements in zone */
 #define        IM6O_ZONE_NAME          "ip6_moptions"  /* zone name */
 
-SYSCTL_DECL(_net_inet6_ip6);
-
-static int ip6_maxchainsent = 0;
-SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent,
-       CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxchainsent, 0,
-       "use dlil_output_list");
-
 /*
- * XXX we don't handle mbuf chains yet in nd6_output() so ip6_output_list() only
- * walks through the packet chain and sends each mbuf separately.
+ * ip6_output() calls ip6_output_list() to do the work
  */
 int
-ip6_output_list(struct mbuf *m0, int packetlist, struct ip6_pktopts *opt,
+ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
     struct ifnet **ifpp, struct ip6_out_args *ip6oa)
 {
-#pragma unused(packetlist)
-       struct mbuf *m = m0, *nextpkt;
-       int error = 0;
-
-       while (m != NULL) {
-               /*
-                * Break the chain before calling ip6_output() and free the
-                * mbufs if there was an error.
-                */
-               nextpkt = m->m_nextpkt;
-               m->m_nextpkt = NULL;
-               error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oa);
-               if (error != 0) {
-                       if (nextpkt != NULL)
-                               m_freem_list(nextpkt);
-                       return (error);
-               }
-               m = nextpkt;
-       }
-
-       return (error);
+       return ip6_output_list(m0, 0, opt, ro, flags, im6o, ifpp, ip6oa);
 }
 
 /*
- * IP6 output. The packet in mbuf chain m contains a skeletal IP6
+ * IP6 output. Each packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
  * This function may modify ver and hlim only.
  * The mbuf chain containing the packet will be freed.
@@ -261,21 +267,24 @@ ip6_output_list(struct mbuf *m0, int packetlist, struct ip6_pktopts *opt,
  * which is rt_rmx.rmx_mtu.
  */
 int
-ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
-    int flags, struct ip6_moptions *im6o, struct ifnet **ifpp,
-    struct ip6_out_args *ip6oa)
+ip6_output_list(struct mbuf *m0, int packetchain, struct ip6_pktopts *opt,
+    struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
+    struct ifnet **ifpp, struct ip6_out_args *ip6oa)
 {
        struct ip6_hdr *ip6;
        u_char *nexthdrp;
        struct ifnet *ifp = NULL, *origifp = NULL;      /* refcnt'd */
+       struct ifnet **ifpp_save = ifpp;
        struct mbuf *m, *mprev;
-       int hlen, tlen, len, off, nxt0;
+       struct mbuf *sendchain = NULL, *sendchain_last = NULL;
+       struct mbuf *inputchain = NULL;
+       int nxt0 = 0;
        struct route_in6 *ro_pmtu = NULL;
        struct rtentry *rt = NULL;
-       struct sockaddr_in6 *dst, src_sa, dst_sa;
+       struct sockaddr_in6 *dst = NULL, src_sa, dst_sa;
        int error = 0;
        struct in6_ifaddr *ia = NULL, *src_ia = NULL;
-       u_int32_t mtu;
+       u_int32_t mtu = 0;
        boolean_t alwaysfrag = FALSE;
        u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
        struct ip6_rthdr *rh;
@@ -283,6 +292,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
        ipfilter_t inject_filter_ref;
        struct ipf_pktopts *ippo = NULL;
        struct flowadv *adv = NULL;
+       uint32_t pktcnt = 0;
+       uint32_t packets_processed = 0;
+       struct timeval start_tv;
 #if DUMMYNET
        struct m_tag *tag;
        struct ip6_out_args saved_ip6oa;
@@ -294,6 +306,11 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
        struct route_in6 *ipsec_saved_route = NULL;
        boolean_t needipsectun = FALSE;
 #endif /* IPSEC */
+#if NECP
+       necp_kernel_policy_result necp_result = 0;
+       necp_kernel_policy_result_parameter necp_result_parameter;
+       necp_kernel_policy_id necp_matched_policy_id = 0;
+#endif /* NECP */
        struct {
                struct ipf_pktopts ipf_pktopts;
                struct ip6_exthdrs exthdrs;
@@ -301,6 +318,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
 #if IPSEC
                struct ipsec_output_state ipsec_state;
 #endif /* IPSEC */
+#if NECP
+               struct route_in6 necp_route;
+#endif /* NECP */
 #if DUMMYNET
                struct route_in6 saved_route;
                struct route_in6 saved_ro_pmtu;
@@ -311,6 +331,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
 #define        exthdrs         ip6obz.exthdrs
 #define        ip6route        ip6obz.ip6route
 #define        ipsec_state     ip6obz.ipsec_state
+#define        necp_route      ip6obz.necp_route
 #define        saved_route     ip6obz.saved_route
 #define        saved_ro_pmtu   ip6obz.saved_ro_pmtu
 #define        args            ip6obz.args
@@ -318,6 +339,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
                struct {
                        boolean_t select_srcif : 1;
                        boolean_t hdrsplit : 1;
+                       boolean_t route_selected : 1;
                        boolean_t dontfrag : 1;
 #if IPSEC
                        boolean_t needipsec : 1;
@@ -327,6 +349,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
                uint32_t raw;
        } ip6obf = { .raw = 0 };
 
+       if (ip6_output_measure)
+               net_perf_start_time(&net_perf, &start_tv);
+
        VERIFY(m0->m_flags & M_PKTHDR);
 
        /* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */
@@ -341,6 +366,13 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
            KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
                struct dn_pkt_tag       *dn_tag;
 
+               /*
+                * ip6_output_list() cannot handle chains of packets reinjected
+                * by dummynet. The same restriction applies to
+                * ip_output_list().
+                */
+               VERIFY(0 == packetchain);
+
                dn_tag = (struct dn_pkt_tag *)(tag+1);
                args.fwa_pf_rule = dn_tag->dn_pf_rule;
 
@@ -375,23 +407,22 @@ tags_done:
 #endif /* DUMMYNET */
 
        m = m0;
-       m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO);
 
 #if IPSEC
-       /* for AH processing. stupid to have "socket" variable in IP layer... */
        if (ipsec_bypass == 0) {
                so = ipsec_getsocket(m);
-               (void) ipsec_setsocket(m, NULL);
-
+               if (so != NULL) {
+                       (void) ipsec_setsocket(m, NULL);
+               }
                /* If packet is bound to an interface, check bound policies */
                if ((flags & IPV6_OUTARGS) &&
-                   (ip6oa->ip6oa_flags & IPOAF_BOUND_IF) &&
-                   ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
+                       (ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
+                       ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
                        /* ip6obf.noipsec is a bitfield, use temp integer */
                        int noipsec = 0;
 
                        if (ipsec6_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND,
-                           flags, ip6oa, &noipsec, &sp) != 0)
+                               flags, ip6oa, &noipsec, &sp) != 0)
                                goto bad;
 
                        ip6obf.noipsec = (noipsec != 0);
@@ -399,13 +430,9 @@ tags_done:
        }
 #endif /* IPSEC */
 
-       ip6 = mtod(m, struct ip6_hdr *);
-       nxt0 = ip6->ip6_nxt;
-       finaldst = ip6->ip6_dst;
-       inject_filter_ref = ipf_get_inject_filter(m);
        ippo = &ipf_pktopts;
 
-       if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
+       if (flags & IPV6_OUTARGS) {
                /*
                 * In the forwarding case, only the ifscope value is used,
                 * as source interface selection doesn't take place.
@@ -432,15 +459,24 @@ tags_done:
                }
        }
 
-       if ((flags & IPV6_OUTARGS) && (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR))
-               ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
-
        if (flags & IPV6_OUTARGS) {
+               if (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR)
+                       ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
+               if (ip6oa->ip6oa_flags & IP6OAF_NO_EXPENSIVE)
+                       ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_EXPENSIVE;
                adv = &ip6oa->ip6oa_flowadv;
                adv->code = FADV_SUCCESS;
                ip6oa->ip6oa_retflags = 0;
        }
 
+       /*
+        * Clear out ifpp to be filled in after determining route. ifpp_save is
+        * used to keep old value to release reference properly and dtrace
+        * ipsec tunnel traffic properly.
+        */
+       if (ifpp != NULL && *ifpp != NULL)
+               *ifpp = NULL;
+
 #if DUMMYNET
        if (args.fwa_pf_rule) {
                ip6 = mtod(m, struct ip6_hdr *);
@@ -449,6 +485,43 @@ tags_done:
        }
 #endif /* DUMMYNET */
 
+#if NECP
+       /*
+        * Since all packets are assumed to come from same socket, necp lookup
+        * only needs to happen once per function entry.
+        */
+       necp_matched_policy_id = necp_ip6_output_find_policy_match(m, flags,
+           (flags & IPV6_OUTARGS) ? ip6oa : NULL, &necp_result,
+           &necp_result_parameter);
+#endif /* NECP */
+
+       /*
+        * If a chain was passed in, prepare for ther first iteration. For all
+        * other iterations, this work will be done at evaluateloop: label.
+        */
+       if (packetchain) {
+               /*
+                * Remove m from the chain during processing to avoid
+                * accidental frees on entire list.
+                */
+               inputchain = m->m_nextpkt;
+               m->m_nextpkt = NULL;
+       }
+
+loopit:
+       packets_processed++;
+       m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO);
+       ip6 = mtod(m, struct ip6_hdr *);
+       nxt0 = ip6->ip6_nxt;
+       finaldst = ip6->ip6_dst;
+       ip6obf.hdrsplit = FALSE;
+       ro_pmtu = NULL;
+
+       if (!SLIST_EMPTY(&m->m_pkthdr.tags))
+               inject_filter_ref = ipf_get_inject_filter(m);
+       else
+               inject_filter_ref = NULL;
+
 #define        MAKE_EXTHDR(hp, mp) do {                                        \
        if (hp != NULL) {                                               \
                struct ip6_ext *eh = (struct ip6_ext *)(hp);            \
@@ -484,19 +557,83 @@ tags_done:
 
 #undef MAKE_EXTHDR
 
+#if NECP
+       if (necp_matched_policy_id) {
+               necp_mark_packet_from_ip(m, necp_matched_policy_id);
+
+               switch (necp_result) {
+               case NECP_KERNEL_POLICY_RESULT_PASS:
+                       goto skip_ipsec;
+               case NECP_KERNEL_POLICY_RESULT_DROP:
+               case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
+                       /*
+                        * Flow divert packets should be blocked at the IP
+                        * layer.
+                        */
+                       error = EHOSTUNREACH;
+                       ip6stat.ip6s_necp_policy_drop++;
+                       goto freehdrs;
+               case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
+                       /*
+                        * Verify that the packet is being routed to the tunnel
+                        */
+                       struct ifnet *policy_ifp =
+                           necp_get_ifnet_from_result_parameter(
+                               &necp_result_parameter);
+
+                       if (policy_ifp == ifp) {
+                               goto skip_ipsec;
+                       } else {
+                               if (necp_packet_can_rebind_to_ifnet(m,
+                                   policy_ifp, (struct route *)&necp_route,
+                                   AF_INET6)) {
+                                       /*
+                                        * Set scoped index to the tunnel
+                                        * interface, since it is compatible
+                                        * with the packet. This will only work
+                                        * for callers who pass IPV6_OUTARGS,
+                                        * but that covers all of the clients
+                                        * we care about today.
+                                        */
+                                       if (flags & IPV6_OUTARGS) {
+                                               ip6oa->ip6oa_boundif =
+                                                   policy_ifp->if_index;
+                                               ip6oa->ip6oa_flags |=
+                                                   IP6OAF_BOUND_IF;
+                                       }
+                                       if (opt != NULL
+                                           && opt->ip6po_pktinfo != NULL) {
+                                               opt->ip6po_pktinfo->
+                                                   ipi6_ifindex =
+                                                       policy_ifp->if_index;
+                                       }
+                                       ro = &necp_route;
+                                       goto skip_ipsec;
+                               } else {
+                                       error = ENETUNREACH;
+                                       ip6stat.ip6s_necp_policy_drop++;
+                                       goto freehdrs;
+                               }
+                       }
+               }
+               default:
+                       break;
+               }
+       }
+#endif /* NECP */
+
 #if IPSEC
        if (ipsec_bypass != 0 || ip6obf.noipsec)
                goto skip_ipsec;
 
-       /* May have been set above if packet was bound */
        if (sp == NULL) {
                /* get a security policy for this packet */
-               if (so == NULL) {
-                       sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
-                           0, &error);
-               } else {
+               if (so != NULL) {
                        sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND,
-                           so, &error);
+                               so, &error);
+               } else {
+                       sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
+                               0, &error);
                }
                if (sp == NULL) {
                        IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
@@ -529,13 +666,7 @@ tags_done:
                        goto freehdrs;
                }
                if (sp->ipsec_if) {
-                       /* Verify the redirect to ipsec interface */
-                       if (sp->ipsec_if == ifp) {
-                               /* Set policy for mbuf */
-                               m->m_pkthdr.ipsec_policy = sp->id;
-                               goto skip_ipsec;
-                       }
-                       goto bad;
+                       goto skip_ipsec;
                } else {
                        ip6obf.needipsec = TRUE;
                }
@@ -663,6 +794,9 @@ skip_ipsec:
        MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
        MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
 
+       /* It is no longer safe to free the pointers in exthdrs. */
+       exthdrs.merged = TRUE;
+
 #undef MAKE_CHAIN
 
 #if IPSEC
@@ -670,7 +804,9 @@ skip_ipsec:
                in6_delayed_cksum_offset(m, 0, optlen, nxt0);
 #endif /* IPSEC */
 
-       if (!TAILQ_EMPTY(&ipv6_filters)) {
+       if (!TAILQ_EMPTY(&ipv6_filters) &&
+           !((flags & IPV6_OUTARGS) &&
+           (ip6oa->ip6oa_flags & IP6OAF_INTCOPROC_ALLOWED))) {
                struct ipfilter *filter;
                int seen = (inject_filter_ref == NULL);
                int fixscope = 0;
@@ -709,7 +845,8 @@ skip_ipsec:
                                    (mbuf_t *)&m, ippo);
                                if (result == EJUSTRETURN) {
                                        ipf_unref();
-                                       goto done;
+                                       m = NULL;
+                                       goto evaluateloop;
                                }
                                if (result != 0) {
                                        ipf_unref();
@@ -846,7 +983,6 @@ skip_ipsec:
                ro = &ip6route;
                bzero((caddr_t)ro, sizeof (*ro));
        }
-       VERIFY(ro_pmtu == NULL);        /* must not get here if dummynet */
        ro_pmtu = ro;
        if (opt != NULL && opt->ip6po_rthdr)
                ro = &opt->ip6po_route;
@@ -906,7 +1042,7 @@ skip_ipsec:
 #if IPSEC
        if (ip6obf.needipsec && needipsectun) {
 #if CONFIG_DTRACE
-               struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL;
+               struct ifnet *trace_ifp = (ifpp_save != NULL) ? (*ifpp_save) : NULL;
 #endif /* CONFIG_DTRACE */
                /*
                 * All the extension headers will become inaccessible
@@ -920,8 +1056,8 @@ skip_ipsec:
                exthdrs.ip6e_ip6 = m;
 
                ipsec_state.m = m;
-               route_copyout(&ipsec_state.ro, (struct route *)ro,
-                   sizeof (ipsec_state.ro));
+               route_copyout((struct route *)&ipsec_state.ro, (struct route *)ro,
+                   sizeof (struct route_in6));
                ipsec_state.dst = SA(dst);
 
                /* So that we can see packets inside the tunnel */
@@ -931,15 +1067,16 @@ skip_ipsec:
 
                error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
                /* tunneled in IPv4? packet is gone */
-               if (ipsec_state.tunneled == 4)
-                       goto done;
+               if (ipsec_state.tunneled == 4) {
+                       m = NULL;
+                       goto evaluateloop;
+               }
                m = ipsec_state.m;
                ipsec_saved_route = ro;
                ro = (struct route_in6 *)&ipsec_state.ro;
                dst = SIN6(ipsec_state.dst);
                if (error) {
                        /* mbuf is already reclaimed in ipsec6_output_tunnel. */
-                       m0 = m = NULL;
                        m = NULL;
                        switch (error) {
                        case EHOSTUNREACH:
@@ -976,10 +1113,12 @@ skip_ipsec:
        }
 #endif /* IPSEC */
 
-       /* for safety */
+       /*
+        * ifp should only be filled in for dummy net packets which will jump
+        * to check_with_pf label.
+        */
        if (ifp != NULL) {
-               ifnet_release(ifp);
-               ifp = NULL;
+               VERIFY(ip6obf.route_selected);
        }
 
        /* adjust pointer */
@@ -997,24 +1136,32 @@ skip_ipsec:
        dst_sa.sin6_addr = ip6->ip6_dst;
 
        /*
+        * Only call in6_selectroute() on first iteration to avoid taking
+        * multiple references on ifp and rt.
+        *
         * in6_selectroute() might return an ifp with its reference held
         * even in the error case, so make sure to release its reference.
         * ip6oa may be NULL if IPV6_OUTARGS isn't set.
         */
-       if ((error = in6_selectroute(ip6obf.select_srcif ? &src_sa : NULL,
-           &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa)) != 0) {
-               switch (error) {
-               case EHOSTUNREACH:
-                       ip6stat.ip6s_noroute++;
-                       break;
-               case EADDRNOTAVAIL:
-               default:
-                       break; /* XXX statistics? */
+       if (!ip6obf.route_selected) {
+               error = in6_selectroute( ip6obf.select_srcif ? &src_sa : NULL,
+                   &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa);
+
+               if (error != 0) {
+                       switch (error) {
+                       case EHOSTUNREACH:
+                               ip6stat.ip6s_noroute++;
+                               break;
+                       case EADDRNOTAVAIL:
+                       default:
+                               break; /* XXX statistics? */
+                       }
+                       if (ifp != NULL)
+                               in6_ifstat_inc(ifp, ifs6_out_discard);
+                       /* ifp (if non-NULL) will be released at the end */
+                       goto bad;
                }
-               if (ifp != NULL)
-                       in6_ifstat_inc(ifp, ifs6_out_discard);
-               /* ifp (if non-NULL) will be released at the end */
-               goto bad;
+               ip6obf.route_selected = TRUE;
        }
        if (rt == NULL) {
                /*
@@ -1024,18 +1171,28 @@ skip_ipsec:
                *dst = dst_sa;  /* XXX */
        }
 
+#if NECP
+       /* Catch-all to check if the interface is allowed */
+       if (!necp_packet_is_allowed_over_interface(m, ifp)) {
+               error = EHOSTUNREACH;
+               ip6stat.ip6s_necp_policy_drop++;
+               goto bad;
+       }
+#endif /* NECP */
+
        /*
         * then rt (for unicast) and ifp must be non-NULL valid values.
         */
        if (!(flags & IPV6_FORWARDING)) {
-               /* XXX: the FORWARDING flag can be set for mrouting. */
                in6_ifstat_inc_na(ifp, ifs6_out_request);
        }
        if (rt != NULL) {
                RT_LOCK(rt);
-               ia = (struct in6_ifaddr *)(rt->rt_ifa);
-               if (ia != NULL)
-                       IFA_ADDREF(&ia->ia_ifa);
+               if (ia == NULL) {
+                       ia = (struct in6_ifaddr *)(rt->rt_ifa);
+                       if (ia != NULL)
+                               IFA_ADDREF(&ia->ia_ifa);
+               }
                rt->rt_use++;
                RT_UNLOCK(rt);
        }
@@ -1164,40 +1321,8 @@ routefound:
                         * forbid loopback, loop back a copy.
                         */
                        ip6_mloopback(NULL, ifp, m, dst, optlen, nxt0);
-               } else {
-                       if (im6o != NULL)
-                               IM6O_UNLOCK(im6o);
-                       /*
-                        * If we are acting as a multicast router, perform
-                        * multicast forwarding as if the packet had just
-                        * arrived on the interface to which we are about
-                        * to send.  The multicast forwarding function
-                        * recursively calls this function, using the
-                        * IPV6_FORWARDING flag to prevent infinite recursion.
-                        *
-                        * Multicasts that are looped back by ip6_mloopback(),
-                        * above, will be forwarded by the ip6_input() routine,
-                        * if necessary.
-                        */
-#if MROUTING
-                       if (ip6_mrouter && !(flags & IPV6_FORWARDING)) {
-                               /*
-                                * XXX: ip6_mforward expects that rcvif is NULL
-                                * when it is called from the originating path.
-                                * However, it is not always the case, since
-                                * some versions of MGETHDR() does not
-                                * initialize the field.
-                                */
-                               m->m_pkthdr.rcvif = NULL;
-                               if (ip6_mforward(ip6, ifp, m) != 0) {
-                                       m_freem(m);
-                                       if (in6m != NULL)
-                                               IN6M_REMREF(in6m);
-                                       goto done;
-                               }
-                       }
-#endif /* MROUTING */
-               }
+               } else if (im6o != NULL)
+                       IM6O_UNLOCK(im6o);
                if (in6m != NULL)
                        IN6M_REMREF(in6m);
                /*
@@ -1210,8 +1335,11 @@ routefound:
                 */
                if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
                    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
-                       m_freem(m);
-                       goto done;
+                       /* remove m from the packetchain and continue looping */
+                       if (m != NULL)
+                               m_freem(m);
+                       m = NULL;
+                       goto evaluateloop;
                }
        }
 
@@ -1219,10 +1347,8 @@ routefound:
         * Fill the outgoing inteface to tell the upper layer
         * to increment per-interface statistics.
         */
-       if (ifpp != NULL) {
+       if (ifpp != NULL && *ifpp == NULL) {
                ifnet_reference(ifp);   /* for caller */
-               if (*ifpp != NULL)
-                       ifnet_release(*ifpp);
                *ifpp = ifp;
        }
 
@@ -1261,26 +1387,6 @@ routefound:
         */
        in6_clearscope(&ip6->ip6_src);
        in6_clearscope(&ip6->ip6_dst);
-
-#if IPFW2
-       /*
-        * Check with the firewall...
-        */
-       if (ip6_fw_enable && ip6_fw_chk_ptr) {
-               u_short port = 0;
-               m->m_pkthdr.rcvif = NULL;       /* XXX */
-               /* If ipfw says divert, we have to just drop packet */
-               if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) {
-                       m_freem(m);
-                       goto done;
-               }
-               if (m == NULL) {
-                       error = EACCES;
-                       goto done;
-               }
-       }
-#endif /* IPFW2 */
-
        /*
         * If the outgoing packet contains a hop-by-hop options header,
         * it must be examined and processed even by the source node.
@@ -1305,9 +1411,13 @@ routefound:
                if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
                    ((hbh->ip6h_len + 1) << 3) - sizeof (struct ip6_hbh),
                    &dummy, &oplen) < 0) {
-                       /* m was already freed at this point */
+                       /*
+                        * m was already freed at this point. Set to NULL so it
+                        * is not re-freed at end of ip6_output_list.
+                        */
+                       m = NULL;
                        error = EINVAL; /* better error? */
-                       goto done;
+                       goto bad;
                }
                m->m_flags &= ~M_LOOP; /* XXX */
                m->m_pkthdr.rcvif = NULL;
@@ -1319,6 +1429,7 @@ check_with_pf:
 #if PF
        if (PF_IS_ENABLED) {
 #if DUMMYNET
+
                /*
                 * TODO: Need to save opt->ip6po_flags for reinjection
                 * rdar://10434993
@@ -1343,58 +1454,236 @@ check_with_pf:
 #endif /* !DUMMYNET */
 
                if (error != 0 || m == NULL) {
-                       /*
-                        * Note that if we ever handle packet chain, we will
-                        * have to restore the linkage from the previous
-                        * packet to the next like in ip_outout_list()
-                        */
                        if (m != NULL) {
                                panic("%s: unexpected packet %p\n",
                                    __func__, m);
                                /* NOTREACHED */
                        }
-                       /* Already freed by callee */
-                       goto done;
+                       /* m was already freed by callee and is now NULL.  */
+                       goto evaluateloop;
                }
                ip6 = mtod(m, struct ip6_hdr *);
        }
 #endif /* PF */
 
+#ifdef IPSEC
+       /* clean ipsec history before fragmentation */
+       ipsec_delaux(m);
+#endif /* IPSEC */
+
+       if (ip6oa != NULL) {
+               u_int8_t dscp;
+               
+               dscp = (ntohl(ip6->ip6_flow) & IP6FLOW_DSCP_MASK) >> IP6FLOW_DSCP_SHIFT;
+
+               error = set_packet_qos(m, ifp,
+                   ip6oa->ip6oa_flags & IP6OAF_QOSMARKING_ALLOWED ? TRUE : FALSE,
+                   ip6oa->ip6oa_sotc, ip6oa->ip6oa_netsvctype, &dscp);
+               if (error == 0) {
+                       ip6->ip6_flow &= ~htonl(IP6FLOW_DSCP_MASK);
+                       ip6->ip6_flow |= htonl((u_int32_t)dscp << IP6FLOW_DSCP_SHIFT);
+               } else {
+                       printf("%s if_dscp_for_mbuf() error %d\n", __func__, error);
+                       error = 0;
+               }
+       }
        /*
-        * Send the packet to the outgoing interface.
-        * If necessary, do IPv6 fragmentation before sending.
-        *
-        * the logic here is rather complex:
-        * 1: normal case (dontfrag == 0, alwaysfrag == 0)
-        * 1-a: send as is if tlen <= path mtu
-        * 1-b: fragment if tlen > path mtu
-        *
-        * 2: if user asks us not to fragment (dontfrag == 1)
-        * 2-a: send as is if tlen <= interface mtu
-        * 2-b: error if tlen > interface mtu
-        *
-        * 3: if we always need to attach fragment header (alwaysfrag == 1)
-        *      always fragment
-        *
-        * 4: if dontfrag == 1 && alwaysfrag == 1
-        *      error, as we cannot handle this conflicting request
+        * Determine whether fragmentation is necessary. If so, m is passed
+        * back as a chain of packets and original mbuf is freed. Otherwise, m
+        * is unchanged.
         */
-       tlen = m->m_pkthdr.len;
+       error = ip6_fragment_packet(&m, opt,
+           &exthdrs, ifp, mtu, alwaysfrag, unfragpartlen, ro_pmtu, nxt0,
+           optlen);
 
-       if (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG))
-               ip6obf.dontfrag = TRUE;
-       else
-               ip6obf.dontfrag = FALSE;
-       if (ip6obf.dontfrag && alwaysfrag) {    /* case 4 */
-               /* conflicting request - can't transmit */
-               error = EMSGSIZE;
+       if (error)
                goto bad;
+
+/*
+ * The evaluateloop label is where we decide whether to continue looping over
+ * packets or call into nd code to send.
+ */
+evaluateloop:
+
+       /*
+        * m may be NULL when we jump to the evaluateloop label from PF or
+        * other code that can drop packets.
+        */
+       if (m != NULL) {
+               /*
+                * If we already have a chain to send, tack m onto the end.
+                * Otherwise make m the start and end of the to-be-sent chain.
+                */
+               if (sendchain != NULL) {
+                       sendchain_last->m_nextpkt = m;
+               } else {
+                       sendchain = m;
+               }
+
+               /* Fragmentation may mean m is a chain. Find the last packet. */
+               while (m->m_nextpkt)
+                       m = m->m_nextpkt;
+               sendchain_last = m;
+               pktcnt++;
+       }
+
+       /* Fill in next m from inputchain as appropriate. */
+       m = inputchain;
+       if (m != NULL) {
+               /* Isolate m from rest of input chain. */
+               inputchain = m->m_nextpkt;
+               m->m_nextpkt = NULL;
+
+               /*
+                * Clear exthdrs and ipsec_state so stale contents are not
+                * reused. Note this also clears the exthdrs.merged flag.
+                */
+               bzero(&exthdrs, sizeof(exthdrs));
+               bzero(&ipsec_state, sizeof(ipsec_state));
+
+               /* Continue looping. */
+               goto loopit;
+       }
+
+       /*
+        * If we get here, there's no more mbufs in inputchain, so send the
+        * sendchain if there is one.
+        */
+       if (pktcnt > 0) {
+               error = nd6_output_list(ifp, origifp, sendchain, dst,
+                   ro->ro_rt, adv);
+               /*
+                * Fall through to done label even in error case because
+                * nd6_output_list frees packetchain in both success and
+                * failure cases.
+                */
+       }
+
+done:
+       if (ifpp_save != NULL && *ifpp_save != NULL) {
+               ifnet_release(*ifpp_save);
+               *ifpp_save = NULL;
+       }
+       ROUTE_RELEASE(&ip6route);
+#if IPSEC
+       ROUTE_RELEASE(&ipsec_state.ro);
+       if (sp != NULL)
+               key_freesp(sp, KEY_SADB_UNLOCKED);
+#endif /* IPSEC */
+#if NECP
+       ROUTE_RELEASE(&necp_route);
+#endif /* NECP */
+#if DUMMYNET
+       ROUTE_RELEASE(&saved_route);
+       ROUTE_RELEASE(&saved_ro_pmtu);
+#endif /* DUMMYNET */
+
+       if (ia != NULL)
+               IFA_REMREF(&ia->ia_ifa);
+       if (src_ia != NULL)
+               IFA_REMREF(&src_ia->ia_ifa);
+       if (ifp != NULL)
+               ifnet_release(ifp);
+       if (origifp != NULL)
+               ifnet_release(origifp);
+       if (ip6_output_measure) {
+               net_perf_measure_time(&net_perf, &start_tv, packets_processed);
+               net_perf_histogram(&net_perf, packets_processed);
+       }
+       return (error);
+
+freehdrs:
+       if (exthdrs.ip6e_hbh != NULL) {
+               if (exthdrs.merged)
+                       panic("Double free of ip6e_hbh");
+               m_freem(exthdrs.ip6e_hbh);
+       }
+       if (exthdrs.ip6e_dest1 != NULL) {
+               if (exthdrs.merged)
+                       panic("Double free of ip6e_dest1");
+               m_freem(exthdrs.ip6e_dest1);
+       }
+       if (exthdrs.ip6e_rthdr != NULL) {
+               if (exthdrs.merged)
+                       panic("Double free of ip6e_rthdr");
+               m_freem(exthdrs.ip6e_rthdr);
+       }
+       if (exthdrs.ip6e_dest2 != NULL) {
+               if (exthdrs.merged)
+                       panic("Double free of ip6e_dest2");
+               m_freem(exthdrs.ip6e_dest2);
+       }
+       /* FALLTHRU */
+bad:
+       if (inputchain != NULL)
+               m_freem_list(inputchain);
+       if (sendchain != NULL)
+               m_freem_list(sendchain);
+       if (m != NULL)
+               m_freem(m);
+
+       goto done;
+
+#undef ipf_pktopts
+#undef exthdrs
+#undef ip6route
+#undef ipsec_state
+#undef saved_route
+#undef saved_ro_pmtu
+#undef args
+}
+
+/* ip6_fragment_packet
+ *
+ * The fragmentation logic is rather complex:
+ * 1: normal case (dontfrag == 0, alwaysfrag == 0)
+ * 1-a:        send as is if tlen <= path mtu
+ * 1-b:        fragment if tlen > path mtu
+ *
+ * 2: if user asks us not to fragment (dontfrag == 1)
+ * 2-a:        send as is if tlen <= interface mtu
+ * 2-b:        error if tlen > interface mtu
+ *
+ * 3: if we always need to attach fragment header (alwaysfrag == 1)
+ *     always fragment
+ *
+ * 4: if dontfrag == 1 && alwaysfrag == 1
+ *     error, as we cannot handle this conflicting request
+ */
+
+static int
+ip6_fragment_packet(struct mbuf **mptr, struct ip6_pktopts *opt,
+     struct ip6_exthdrs *exthdrsp, struct ifnet *ifp, uint32_t mtu,
+     boolean_t alwaysfrag, uint32_t unfragpartlen, struct route_in6 *ro_pmtu,
+     int nxt0, uint32_t optlen)
+{
+       VERIFY(NULL != mptr);
+       struct mbuf *m = *mptr;
+       int error = 0;
+       size_t tlen = m->m_pkthdr.len;
+       boolean_t dontfrag = (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG));
+
+       if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
+               dontfrag = TRUE;
+               /*
+                * Discard partial sum information if this packet originated
+                * from another interface; the packet would already have the
+                * final checksum and we shouldn't recompute it.
+                */
+               if ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID|CSUM_PARTIAL)) ==
+                   (CSUM_DATA_VALID|CSUM_PARTIAL)) {
+                       m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
+                       m->m_pkthdr.csum_data = 0;
+               }
+       }
+
+       if (dontfrag && alwaysfrag) {   /* case 4 */
+               /* conflicting request - can't transmit */
+               return EMSGSIZE;
        }
 
-       lck_rw_lock_shared(nd_if_rwlock);
        /* Access without acquiring nd_ifinfo lock for performance */
-       if (ip6obf.dontfrag && tlen > IN6_LINKMTU(ifp)) {       /* case 2-b */
-               lck_rw_done(nd_if_rwlock);
+       if (dontfrag && tlen > IN6_LINKMTU(ifp)) {      /* case 2-b */
                /*
                 * Even if the DONTFRAG option is specified, we cannot send the
                 * packet when the data length is larger than the MTU of the
@@ -1410,51 +1699,71 @@ check_with_pf:
                bzero(&ip6cp, sizeof (ip6cp));
                ip6cp.ip6c_cmdarg = (void *)&mtu32;
                pfctlinput2(PRC_MSGSIZE, SA(&ro_pmtu->ro_dst), (void *)&ip6cp);
-               error = EMSGSIZE;
-               goto bad;
-       } else {
-               lck_rw_done(nd_if_rwlock);
+               return EMSGSIZE;
        }
 
        /*
         * transmit packet without fragmentation
         */
-       if (ip6obf.dontfrag || (!alwaysfrag &&          /* case 1-a and 2-a */
+       if (dontfrag || (!alwaysfrag &&         /* case 1-a and 2-a */
            (tlen <= mtu || TSO_IPV6_OK(ifp, m) ||
            (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
-#ifdef IPSEC
-               /* clean ipsec history once it goes out of the node */
-               ipsec_delaux(m);
-#endif /* IPSEC */
-
+               /*
+                * mppn not updated in this case because no new chain is formed
+                * and inserted
+                */
                ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen);
-
-               if (ro->ro_rt)
-                       RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
-               error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv);
-               goto done;
+       } else {
+               /*
+                * time to fragment - cases 1-b and 3 are handled inside
+                * ip6_do_fragmentation().
+                * mppn is passed down to be updated to point at fragment chain.
+                */
+               error = ip6_do_fragmentation(mptr, optlen, ifp,
+                   unfragpartlen, mtod(m, struct ip6_hdr *), exthdrsp, mtu, nxt0);
        }
 
+       return error;
+}
+
+/*
+ * ip6_do_fragmentation() is called by ip6_fragment_packet() after determining
+ * the packet needs to be fragmented. on success, morig is freed and a chain
+ * of fragments is linked into the packet chain where morig existed. Otherwise,
+ * an errno is returned.
+ */
+int
+ip6_do_fragmentation(struct mbuf **mptr, uint32_t optlen, struct ifnet *ifp,
+    uint32_t unfragpartlen, struct ip6_hdr *ip6, struct ip6_exthdrs *exthdrsp,
+    uint32_t mtu, int nxt0)
+{
+       VERIFY(NULL != mptr);
+       int error = 0;
+
+       struct mbuf *morig = *mptr;
+       struct mbuf *first_mbufp = NULL;
+       struct mbuf *last_mbufp = NULL;
+
+       size_t tlen = morig->m_pkthdr.len;
+
        /*
         * try to fragment the packet.  case 1-b and 3
         */
-       if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
+       if ((morig->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
                /* TSO and fragment aren't compatible */
-               error = EMSGSIZE;
                in6_ifstat_inc(ifp, ifs6_out_fragfail);
-               goto bad;
+               return EMSGSIZE;
        } else if (mtu < IPV6_MMTU) {
                /* path MTU cannot be less than IPV6_MMTU */
-               error = EMSGSIZE;
                in6_ifstat_inc(ifp, ifs6_out_fragfail);
-               goto bad;
+               return EMSGSIZE;
        } else if (ip6->ip6_plen == 0) {
                /* jumbo payload cannot be fragmented */
-               error = EMSGSIZE;
                in6_ifstat_inc(ifp, ifs6_out_fragfail);
-               goto bad;
+               return EMSGSIZE;
        } else {
-               struct mbuf **mnext, *m_frgpart;
+               size_t hlen, len, off;
+               struct mbuf **mnext = NULL;
                struct ip6_frag *ip6f;
                u_int32_t id = htonl(ip6_randomid());
                u_char nextproto;
@@ -1470,84 +1779,95 @@ check_with_pf:
 
                len = (mtu - hlen - sizeof (struct ip6_frag)) & ~7;
                if (len < 8) {
-                       error = EMSGSIZE;
                        in6_ifstat_inc(ifp, ifs6_out_fragfail);
-                       goto bad;
+                       return EMSGSIZE;
                }
 
-               mnext = &m->m_nextpkt;
-
                /*
                 * Change the next header field of the last header in the
                 * unfragmentable part.
                 */
-               if (exthdrs.ip6e_rthdr != NULL) {
-                       nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
-                       *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
-               } else if (exthdrs.ip6e_dest1 != NULL) {
-                       nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
-                       *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
-               } else if (exthdrs.ip6e_hbh != NULL) {
-                       nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
-                       *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
+               if (exthdrsp->ip6e_rthdr != NULL) {
+                       nextproto = *mtod(exthdrsp->ip6e_rthdr, u_char *);
+                       *mtod(exthdrsp->ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
+               } else if (exthdrsp->ip6e_dest1 != NULL) {
+                       nextproto = *mtod(exthdrsp->ip6e_dest1, u_char *);
+                       *mtod(exthdrsp->ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
+               } else if (exthdrsp->ip6e_hbh != NULL) {
+                       nextproto = *mtod(exthdrsp->ip6e_hbh, u_char *);
+                       *mtod(exthdrsp->ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
                } else {
                        nextproto = ip6->ip6_nxt;
                        ip6->ip6_nxt = IPPROTO_FRAGMENT;
                }
 
-               if (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
-                       in6_delayed_cksum_offset(m, 0, optlen, nxt0);
+               if (morig->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
+                       in6_delayed_cksum_offset(morig, 0, optlen, nxt0);
 
                /*
                 * Loop through length of segment after first fragment,
                 * make new header and copy data of each part and link onto
                 * chain.
                 */
-               m0 = m;
                for (off = hlen; off < tlen; off += len) {
-                       struct ip6_hdr *mhip6;
+                       struct ip6_hdr *new_mhip6;
+                       struct mbuf *new_m;
+                       struct mbuf *m_frgpart;
 
-                       MGETHDR(m, M_DONTWAIT, MT_HEADER);      /* MAC-OK */
-                       if (m == NULL) {
+                       MGETHDR(new_m, M_DONTWAIT, MT_HEADER);  /* MAC-OK */
+                       if (new_m == NULL) {
                                error = ENOBUFS;
                                ip6stat.ip6s_odropped++;
-                               goto sendorfree;
+                               break;
                        }
-                       m->m_pkthdr.rcvif = NULL;
-                       m->m_flags = m0->m_flags & M_COPYFLAGS;
-                       *mnext = m;
-                       mnext = &m->m_nextpkt;
-                       m->m_data += max_linkhdr;
-                       mhip6 = mtod(m, struct ip6_hdr *);
-                       *mhip6 = *ip6;
-                       m->m_len = sizeof (*mhip6);
-                       error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
+                       new_m->m_pkthdr.rcvif = NULL;
+                       new_m->m_flags = morig->m_flags & M_COPYFLAGS;
+
+                       if (first_mbufp != NULL) {
+                               /* Every pass through loop but first */
+                               *mnext = new_m;
+                               last_mbufp = new_m;
+                       } else {
+                               /* This is the first element of the fragment chain */
+                               first_mbufp = new_m;
+                               last_mbufp = new_m;
+                       }
+                       mnext = &new_m->m_nextpkt;
+
+                       new_m->m_data += max_linkhdr;
+                       new_mhip6 = mtod(new_m, struct ip6_hdr *);
+                       *new_mhip6 = *ip6;
+                       new_m->m_len = sizeof (*new_mhip6);
+
+                       error = ip6_insertfraghdr(morig, new_m, hlen, &ip6f);
                        if (error) {
                                ip6stat.ip6s_odropped++;
-                               goto sendorfree;
+                               break;
                        }
+
                        ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
                        if (off + len >= tlen)
                                len = tlen - off;
                        else
                                ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
-                       mhip6->ip6_plen = htons((u_short)(len + hlen +
+                       new_mhip6->ip6_plen = htons((u_short)(len + hlen +
                            sizeof (*ip6f) - sizeof (struct ip6_hdr)));
-                       if ((m_frgpart = m_copy(m0, off, len)) == NULL) {
+
+                       if ((m_frgpart = m_copy(morig, off, len)) == NULL) {
                                error = ENOBUFS;
                                ip6stat.ip6s_odropped++;
-                               goto sendorfree;
+                               break;
                        }
-                       m_cat(m, m_frgpart);
-                       m->m_pkthdr.len = len + hlen + sizeof (*ip6f);
-                       m->m_pkthdr.rcvif = NULL;
+                       m_cat(new_m, m_frgpart);
+                       new_m->m_pkthdr.len = len + hlen + sizeof (*ip6f);
+                       new_m->m_pkthdr.rcvif = NULL;
 
-                       M_COPY_CLASSIFIER(m, m0);
-                       M_COPY_PFTAG(m, m0);
+                       M_COPY_CLASSIFIER(new_m, morig);
+                       M_COPY_PFTAG(new_m, morig);
 
 #ifdef notyet
 #if CONFIG_MACF_NET
-                       mac_create_fragment(m0, m);
+                       mac_create_fragment(morig, new_m);
 #endif /* CONFIG_MACF_NET */
 #endif /* notyet */
 
@@ -1558,78 +1878,23 @@ check_with_pf:
                        in6_ifstat_inc(ifp, ifs6_out_fragcreat);
                }
 
-               in6_ifstat_inc(ifp, ifs6_out_fragok);
-       }
-
-       /*
-        * Remove leading garbages.
-        */
-sendorfree:
-       m = m0->m_nextpkt;
-       m0->m_nextpkt = NULL;
-       m_freem(m0);
-       for (m0 = m; m != NULL; m = m0) {
-               m0 = m->m_nextpkt;
-               m->m_nextpkt = NULL;
-               if (error == 0) {
-#if IPSEC
-                       /* clean ipsec history once it goes out of the node */
-                       ipsec_delaux(m);
-#endif /* IPSEC */
-                       error = nd6_output(ifp, origifp, m, dst, ro->ro_rt,
-                           adv);
+               if (error) {
+                       /* free all the fragments created */
+                       if (first_mbufp != NULL) {
+                               m_freem_list(first_mbufp);
+                               first_mbufp = NULL;
+                       }
+                       last_mbufp = NULL;
                } else {
-                       m_freem(m);
+                       /* successful fragmenting */
+                       m_freem(morig);
+                       *mptr = first_mbufp;
+                       last_mbufp->m_nextpkt = NULL;
+                       ip6stat.ip6s_fragmented++;
+                       in6_ifstat_inc(ifp, ifs6_out_fragok);
                }
        }
-
-       if (error == 0)
-               ip6stat.ip6s_fragmented++;
-
-done:
-       ROUTE_RELEASE(&ip6route);
-#if IPSEC
-       ROUTE_RELEASE(&ipsec_state.ro);
-       if (sp != NULL)
-               key_freesp(sp, KEY_SADB_UNLOCKED);
-#endif /* IPSEC */
-#if DUMMYNET
-       ROUTE_RELEASE(&saved_route);
-       ROUTE_RELEASE(&saved_ro_pmtu);
-#endif /* DUMMYNET */
-
-       if (ia != NULL)
-               IFA_REMREF(&ia->ia_ifa);
-       if (src_ia != NULL)
-               IFA_REMREF(&src_ia->ia_ifa);
-       if (ifp != NULL)
-               ifnet_release(ifp);
-       if (origifp != NULL)
-               ifnet_release(origifp);
-       return (error);
-
-freehdrs:
-       if (exthdrs.ip6e_hbh != NULL)
-               m_freem(exthdrs.ip6e_hbh);
-       if (exthdrs.ip6e_dest1 != NULL)
-               m_freem(exthdrs.ip6e_dest1);
-       if (exthdrs.ip6e_rthdr != NULL)
-               m_freem(exthdrs.ip6e_rthdr);
-       if (exthdrs.ip6e_dest2 != NULL)
-               m_freem(exthdrs.ip6e_dest2);
-       /* FALLTHRU */
-bad:
-       if (m != NULL)
-               m_freem(m);
-       goto done;
-
-#undef ipf_pktopts
-#undef exthdrs
-#undef ip6route
-#undef ipsec_state
-#undef saved_route
-#undef saved_ro_pmtu
-#undef args
+       return error;
 }
 
 static int
@@ -1798,7 +2063,8 @@ in6_finalize_cksum(struct mbuf *m, uint32_t hoff, int32_t optlen,
        ip6_out_cksum_stats(nxt, plen - olen);
 
        /* RFC1122 4.1.3.4 */
-       if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6))
+       if (csum == 0 &&
+           (m->m_pkthdr.csum_flags & (CSUM_UDPIPV6|CSUM_ZERO_INVERT)))
                csum = 0xffff;
 
        /* Insert the checksum in the ULP csum field */
@@ -1810,8 +2076,8 @@ in6_finalize_cksum(struct mbuf *m, uint32_t hoff, int32_t optlen,
        } else {
                bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
        }
-       m->m_pkthdr.csum_flags &=
-           ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID | CSUM_PARTIAL);
+       m->m_pkthdr.csum_flags &= ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID |
+           CSUM_PARTIAL | CSUM_ZERO_INVERT);
 
 done:
        return (sw_csum);
@@ -1964,6 +2230,10 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
        u_int32_t mtu = 0;
        boolean_t alwaysfrag = FALSE;
        int error = 0;
+       boolean_t is_local = FALSE;
+
+       if (IN6_IS_SCOPE_LINKLOCAL(dst))
+               is_local = TRUE;
 
        if (ro_pmtu != ro) {
                /* The first hop and the final destination may differ. */
@@ -1986,10 +2256,10 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
        if (ro_pmtu->ro_rt != NULL) {
                u_int32_t ifmtu;
 
-               lck_rw_lock_shared(nd_if_rwlock);
+               if (ifp == NULL)
+                       ifp = ro_pmtu->ro_rt->rt_ifp;
                /* Access without acquiring nd_ifinfo lock for performance */
                ifmtu = IN6_LINKMTU(ifp);
-               lck_rw_done(nd_if_rwlock);
 
                /*
                 * Access rmx_mtu without holding the route entry lock,
@@ -2026,17 +2296,15 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
                }
        } else {
                if (ifp) {
-                       lck_rw_lock_shared(nd_if_rwlock);
                        /* Don't hold nd_ifinfo lock for performance */
                        mtu = IN6_LINKMTU(ifp);
-                       lck_rw_done(nd_if_rwlock);
                } else {
                        error = EHOSTUNREACH; /* XXX */
                }
        }
 
        *mtup = mtu;
-       if (alwaysfragp != NULL)
+       if ((alwaysfragp != NULL) && !is_local)
                *alwaysfragp = alwaysfrag;
        return (error);
 }
@@ -2068,6 +2336,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
        privileged = (proc_suser(p) == 0);
 
        if (level == IPPROTO_IPV6) {
+               boolean_t capture_exthdrstat_in = FALSE;
                switch (op) {
                case SOPT_SET:
                        switch (optname) {
@@ -2194,6 +2463,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                                                break;
                                        }
                                        OPTSET(IN6P_HOPOPTS);
+                                       capture_exthdrstat_in = TRUE;
                                        break;
 
                                case IPV6_RECVDSTOPTS:
@@ -2203,6 +2473,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                                                break;
                                        }
                                        OPTSET(IN6P_DSTOPTS);
+                                       capture_exthdrstat_in = TRUE;
                                        break;
 
                                case IPV6_RECVRTHDRDSTOPTS:
@@ -2212,6 +2483,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                                                break;
                                        }
                                        OPTSET(IN6P_RTHDRDSTOPTS);
+                                       capture_exthdrstat_in = TRUE;
                                        break;
 
                                case IPV6_RECVRTHDR:
@@ -2221,6 +2493,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                                                break;
                                        }
                                        OPTSET(IN6P_RTHDR);
+                                       capture_exthdrstat_in = TRUE;
                                        break;
 
                                case IPV6_RECVPATHMTU:
@@ -2283,6 +2556,13 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                                optp = &in6p->in6p_outputopts;
                                error = ip6_pcbopt(optname, (u_char *)&optval,
                                    sizeof (optval), optp, uproto);
+
+                               if (optname == IPV6_TCLASS) {
+                                       // Add in the ECN flags
+                                       u_int8_t tos = (in6p->inp_ip_tos & ~IPTOS_ECN_MASK);
+                                       u_int8_t ecn = optval & IPTOS_ECN_MASK;
+                                       in6p->inp_ip_tos = tos | ecn;
+                               }
                                break;
                        }
 
@@ -2315,15 +2595,18 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                                        if (!privileged)
                                                return (EPERM);
                                        OPTSET2292(IN6P_HOPOPTS);
+                                       capture_exthdrstat_in = TRUE;
                                        break;
                                case IPV6_2292DSTOPTS:
                                        if (!privileged)
                                                return (EPERM);
                                        OPTSET2292(IN6P_DSTOPTS|
                                            IN6P_RTHDRDSTOPTS); /* XXX */
+                                       capture_exthdrstat_in = TRUE;
                                        break;
                                case IPV6_2292RTHDR:
                                        OPTSET2292(IN6P_RTHDR);
+                                       capture_exthdrstat_in = TRUE;
                                        break;
                                }
                                break;
@@ -2413,25 +2696,11 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                                req = mtod(m, caddr_t);
                                len = m->m_len;
                                error = ipsec6_set_policy(in6p, optname, req,
-                                   len, privileged);
+                                       len, privileged);
                                m_freem(m);
                                break;
                        }
 #endif /* IPSEC */
-#if IPFIREWALL
-                       case IPV6_FW_ADD:
-                       case IPV6_FW_DEL:
-                       case IPV6_FW_FLUSH:
-                       case IPV6_FW_ZERO: {
-                               if (ip6_fw_ctl_ptr == NULL)
-                                       load_ip6fw();
-                               if (ip6_fw_ctl_ptr != NULL)
-                                       error = (*ip6_fw_ctl_ptr)(sopt);
-                               else
-                                       error = ENOPROTOOPT;
-                               break;
-                       }
-#endif /* IPFIREWALL */
                        /*
                         * IPv6 variant of IP_BOUND_IF; for details see
                         * comments on IP_BOUND_IF in ip_ctloutput().
@@ -2466,8 +2735,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                                        break;
 
                                /* once set, it cannot be unset */
-                               if (!optval &&
-                                   (in6p->inp_flags & INP_NO_IFT_CELLULAR)) {
+                               if (!optval && INP_NO_CELLULAR(in6p)) {
                                        error = EINVAL;
                                        break;
                                }
@@ -2485,6 +2753,13 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                                error = ENOPROTOOPT;
                                break;
                        }
+                       if (capture_exthdrstat_in) {
+                               if (uproto == IPPROTO_TCP) {
+                                       INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_in);
+                               } else if (uproto == IPPROTO_UDP) {
+                                       INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_in);
+                               }    
+                       }    
                        break;
 
                case SOPT_GET:
@@ -2658,40 +2933,10 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                                break;
 #if IPSEC
                        case IPV6_IPSEC_POLICY: {
-                               caddr_t req = NULL;
-                               size_t len = 0;
-                               struct mbuf *m = NULL;
-                               struct mbuf *mp = NULL;
-
-                               error = soopt_getm(sopt, &m);
-                               if (error != 0)
-                                       break;
-                               error = soopt_mcopyin(sopt, m);
-                               if (error != 0)
-                                       break;
-
-                               req = mtod(m, caddr_t);
-                               len = m->m_len;
-                               error = ipsec6_get_policy(in6p, req, len, &mp);
-                               if (error == 0)
-                                       error = soopt_mcopyout(sopt, mp);
-                               if (mp != NULL)
-                                       m_freem(mp);
-                               m_freem(m);
+                               error = 0; /* This option is no longer supported */
                                break;
                        }
 #endif /* IPSEC */
-#if IPFIREWALL
-                       case IPV6_FW_GET: {
-                               if (ip6_fw_ctl_ptr == NULL)
-                                       load_ip6fw();
-                               if (ip6_fw_ctl_ptr != NULL)
-                                       error = (*ip6_fw_ctl_ptr)(sopt);
-                               else
-                                       error = ENOPROTOOPT;
-                               break;
-                       }
-#endif /* IPFIREWALL */
                        case IPV6_BOUND_IF:
                                if (in6p->inp_flags & INP_BOUND_IF)
                                        optval = in6p->inp_boundifp->if_index;
@@ -2700,8 +2945,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                                break;
 
                        case IPV6_NO_IFT_CELLULAR:
-                               optval = (in6p->inp_flags & INP_NO_IFT_CELLULAR)
-                                   ? 1 : 0;
+                               optval = INP_NO_CELLULAR(in6p) ? 1 : 0;
                                error = sooptcopyout(sopt, &optval,
                                    sizeof (optval));
                                break;
@@ -2719,6 +2963,8 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                        }
                        break;
                }
+       } else if (level == IPPROTO_UDP) {
+               error = udp_ctloutput(so, sopt);
        } else {
                error = EINVAL;
        }
@@ -3313,6 +3559,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
 {
        int minmtupolicy, preftemp;
        int error;
+       boolean_t capture_exthdrstat_out = FALSE;
 
        if (!sticky && !cmsg) {
 #ifdef DIAGNOSTIC
@@ -3540,7 +3787,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
                if (opt->ip6po_hbh == NULL)
                        return (ENOBUFS);
                bcopy(hbh, opt->ip6po_hbh, hbhlen);
-
+               capture_exthdrstat_out = TRUE;
                break;
        }
 
@@ -3604,6 +3851,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
                if (*newdest == NULL)
                        return (ENOBUFS);
                bcopy(dest, *newdest, destlen);
+               capture_exthdrstat_out = TRUE;
                break;
        }
 
@@ -3644,6 +3892,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
                if (opt->ip6po_rthdr == NULL)
                        return (ENOBUFS);
                bcopy(rth, opt->ip6po_rthdr, rthlen);
+               capture_exthdrstat_out = TRUE;
                break;
        }
 
@@ -3690,6 +3939,14 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
                return (ENOPROTOOPT);
        } /* end of switch */
 
+       if (capture_exthdrstat_out) {
+               if (uproto == IPPROTO_TCP) {
+                       INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_out);
+               } else if (uproto == IPPROTO_UDP) {
+                       INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_out);
+               }
+       }
+
        return (0);
 }
 
@@ -3823,13 +4080,15 @@ ip6_output_checksum(struct ifnet *ifp, uint32_t mtu, struct mbuf *m,
        } else if (!(sw_csum & CSUM_DELAY_IPV6_DATA) &&
            (hwcap & CSUM_PARTIAL)) {
                /*
-                * Partial checksum offload, ere), if no extension
-                * headers, and TCP only (no UDP support, as the
-                * hardware may not be able to convert +0 to
-                * -0 (0xffff) per RFC1122 4.1.3.4.)
+                * Partial checksum offload, ere), if no extension headers,
+                * and TCP only (no UDP support, as the hardware may not be
+                * able to convert +0 to -0 (0xffff) per RFC1122 4.1.3.4.
+                * unless the interface supports "invert zero" capability.)
                 */
                if (hwcksum_tx && !tso &&
-                   (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) &&
+                   ((m->m_pkthdr.csum_flags & CSUM_TCPIPV6) ||
+                   ((hwcap & CSUM_ZERO_INVERT) &&
+                   (m->m_pkthdr.csum_flags & CSUM_ZERO_INVERT))) &&
                    tlen <= mtu) {
                        uint16_t start = sizeof (struct ip6_hdr);
                        uint16_t ulpoff =
@@ -3891,3 +4150,57 @@ ip6_optlen(struct in6pcb *in6p)
        return (len);
 #undef elen
 }
+
+static int
+sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error, i;
+
+       i = ip6_output_measure;
+       error = sysctl_handle_int(oidp, &i, 0, req);
+       if (error || req->newptr == USER_ADDR_NULL)
+               goto done;
+       /* impose bounds */
+       if (i < 0 || i > 1) {
+               error = EINVAL;
+               goto done;
+       }
+       if (ip6_output_measure != i && i == 1) {
+               net_perf_initialize(&net_perf, ip6_output_measure_bins);
+       }
+       ip6_output_measure = i;
+done:
+       return (error);
+}
+
+static int
+sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error;
+       uint64_t i;
+
+       i = ip6_output_measure_bins;
+       error = sysctl_handle_quad(oidp, &i, 0, req);
+       if (error || req->newptr == USER_ADDR_NULL)
+               goto done;
+       /* validate data */
+       if (!net_perf_validate_bins(i)) {
+               error = EINVAL;
+               goto done;
+       }
+       ip6_output_measure_bins = i;
+done:
+       return (error);
+}
+
+static int
+sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       if (req->oldptr == USER_ADDR_NULL)
+               req->oldlen = (size_t)sizeof (struct ipstat);
+
+       return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen)));
+}