/*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <net/if.h>
#include <net/route.h>
#include <net/dlil.h>
+#include <net/net_api_stats.h>
#include <net/net_osdep.h>
+#include <net/net_perf.h>
+#include <netinet/ip.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet/kpi_ipfilter_var.h>
+#include <netinet/in_tclass.h>
#include <netinet6/ip6protosw.h>
#include <netinet/icmp6.h>
extern int ipsec_bypass;
#endif /* IPSEC */
+#if NECP
+#include <net/necp.h>
+#endif /* NECP */
+
#if CONFIG_MACF_NET
#include <security/mac.h>
#endif /* CONFIG_MACF_NET */
#if DUMMYNET
-#include <netinet6/ip6_fw.h>
#include <netinet/ip_fw.h>
#include <netinet/ip_dummynet.h>
#endif /* DUMMYNET */
#include <net/pfvar.h>
#endif /* PF */
+static int sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS;
+static int sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS;
+static int sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS;
static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
static void ip6_out_cksum_stats(int, u_int32_t);
static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *,
int, uint32_t, uint32_t);
+extern int udp_ctloutput(struct socket *, struct sockopt *);
+static int ip6_fragment_packet(struct mbuf **m,
+ struct ip6_pktopts *opt, struct ip6_exthdrs *exthdrsp, struct ifnet *ifp,
+ uint32_t mtu, boolean_t alwaysfrag, uint32_t unfragpartlen,
+ struct route_in6 *ro_pmtu, int nxt0, uint32_t optlen);
+
+SYSCTL_DECL(_net_inet6_ip6);
+
+static int ip6_output_measure = 0;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+ &ip6_output_measure, 0, sysctl_reset_ip6_output_stats, "I", "Do time measurement");
+
+static uint64_t ip6_output_measure_bins = 0;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_bins,
+ CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_output_measure_bins, 0,
+ sysctl_ip6_output_measure_bins, "I",
+ "bins for chaining performance data histogram");
+
+static net_perf_t net_perf;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_data,
+ CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
+ 0, 0, sysctl_ip6_output_getperf, "S,net_perf",
+ "IP6 output performance data (struct net_perf, net/net_perf.h)");
#define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */
#define IM6O_ZONE_MAX 64 /* maximum elements in zone */
#define IM6O_ZONE_NAME "ip6_moptions" /* zone name */
-SYSCTL_DECL(_net_inet6_ip6);
-
-static int ip6_maxchainsent = 0;
-SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent,
- CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxchainsent, 0,
- "use dlil_output_list");
-
/*
- * XXX we don't handle mbuf chains yet in nd6_output() so ip6_output_list() only
- * walks through the packet chain and sends each mbuf separately.
+ * ip6_output() calls ip6_output_list() to do the work
*/
int
-ip6_output_list(struct mbuf *m0, int packetlist, struct ip6_pktopts *opt,
+ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
struct ifnet **ifpp, struct ip6_out_args *ip6oa)
{
-#pragma unused(packetlist)
- struct mbuf *m = m0, *nextpkt;
- int error = 0;
-
- while (m != NULL) {
- /*
- * Break the chain before calling ip6_output() and free the
- * mbufs if there was an error.
- */
- nextpkt = m->m_nextpkt;
- m->m_nextpkt = NULL;
- error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oa);
- if (error != 0) {
- if (nextpkt != NULL)
- m_freem_list(nextpkt);
- return (error);
- }
- m = nextpkt;
- }
-
- return (error);
+ return ip6_output_list(m0, 0, opt, ro, flags, im6o, ifpp, ip6oa);
}
/*
- * IP6 output. The packet in mbuf chain m contains a skeletal IP6
+ * IP6 output. Each packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
* This function may modify ver and hlim only.
* The mbuf chain containing the packet will be freed.
* which is rt_rmx.rmx_mtu.
*/
int
-ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
- int flags, struct ip6_moptions *im6o, struct ifnet **ifpp,
- struct ip6_out_args *ip6oa)
+ip6_output_list(struct mbuf *m0, int packetchain, struct ip6_pktopts *opt,
+ struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
+ struct ifnet **ifpp, struct ip6_out_args *ip6oa)
{
struct ip6_hdr *ip6;
u_char *nexthdrp;
struct ifnet *ifp = NULL, *origifp = NULL; /* refcnt'd */
+ struct ifnet **ifpp_save = ifpp;
struct mbuf *m, *mprev;
- int hlen, tlen, len, off, nxt0;
+ struct mbuf *sendchain = NULL, *sendchain_last = NULL;
+ struct mbuf *inputchain = NULL;
+ int nxt0 = 0;
struct route_in6 *ro_pmtu = NULL;
struct rtentry *rt = NULL;
- struct sockaddr_in6 *dst, src_sa, dst_sa;
+ struct sockaddr_in6 *dst = NULL, src_sa, dst_sa;
int error = 0;
struct in6_ifaddr *ia = NULL, *src_ia = NULL;
- u_int32_t mtu;
+ u_int32_t mtu = 0;
boolean_t alwaysfrag = FALSE;
u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
struct ip6_rthdr *rh;
ipfilter_t inject_filter_ref;
struct ipf_pktopts *ippo = NULL;
struct flowadv *adv = NULL;
+ uint32_t pktcnt = 0;
+ uint32_t packets_processed = 0;
+ struct timeval start_tv;
#if DUMMYNET
struct m_tag *tag;
struct ip6_out_args saved_ip6oa;
struct route_in6 *ipsec_saved_route = NULL;
boolean_t needipsectun = FALSE;
#endif /* IPSEC */
+#if NECP
+ necp_kernel_policy_result necp_result = 0;
+ necp_kernel_policy_result_parameter necp_result_parameter;
+ necp_kernel_policy_id necp_matched_policy_id = 0;
+#endif /* NECP */
struct {
struct ipf_pktopts ipf_pktopts;
struct ip6_exthdrs exthdrs;
#if IPSEC
struct ipsec_output_state ipsec_state;
#endif /* IPSEC */
+#if NECP
+ struct route_in6 necp_route;
+#endif /* NECP */
#if DUMMYNET
struct route_in6 saved_route;
struct route_in6 saved_ro_pmtu;
#define exthdrs ip6obz.exthdrs
#define ip6route ip6obz.ip6route
#define ipsec_state ip6obz.ipsec_state
+#define necp_route ip6obz.necp_route
#define saved_route ip6obz.saved_route
#define saved_ro_pmtu ip6obz.saved_ro_pmtu
#define args ip6obz.args
struct {
boolean_t select_srcif : 1;
boolean_t hdrsplit : 1;
+ boolean_t route_selected : 1;
boolean_t dontfrag : 1;
#if IPSEC
boolean_t needipsec : 1;
uint32_t raw;
} ip6obf = { .raw = 0 };
+ if (ip6_output_measure)
+ net_perf_start_time(&net_perf, &start_tv);
+
VERIFY(m0->m_flags & M_PKTHDR);
/* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */
KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
struct dn_pkt_tag *dn_tag;
+ /*
+ * ip6_output_list() cannot handle chains of packets reinjected
+ * by dummynet. The same restriction applies to
+ * ip_output_list().
+ */
+ VERIFY(0 == packetchain);
+
dn_tag = (struct dn_pkt_tag *)(tag+1);
args.fwa_pf_rule = dn_tag->dn_pf_rule;
#endif /* DUMMYNET */
m = m0;
- m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO);
#if IPSEC
- /* for AH processing. stupid to have "socket" variable in IP layer... */
if (ipsec_bypass == 0) {
so = ipsec_getsocket(m);
- (void) ipsec_setsocket(m, NULL);
-
+ if (so != NULL) {
+ (void) ipsec_setsocket(m, NULL);
+ }
/* If packet is bound to an interface, check bound policies */
if ((flags & IPV6_OUTARGS) &&
- (ip6oa->ip6oa_flags & IPOAF_BOUND_IF) &&
- ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
+ (ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
+ ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
/* ip6obf.noipsec is a bitfield, use temp integer */
int noipsec = 0;
if (ipsec6_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND,
- flags, ip6oa, &noipsec, &sp) != 0)
+ flags, ip6oa, &noipsec, &sp) != 0)
goto bad;
ip6obf.noipsec = (noipsec != 0);
}
#endif /* IPSEC */
- ip6 = mtod(m, struct ip6_hdr *);
- nxt0 = ip6->ip6_nxt;
- finaldst = ip6->ip6_dst;
- inject_filter_ref = ipf_get_inject_filter(m);
ippo = &ipf_pktopts;
- if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
+ if (flags & IPV6_OUTARGS) {
/*
* In the forwarding case, only the ifscope value is used,
* as source interface selection doesn't take place.
}
}
- if ((flags & IPV6_OUTARGS) && (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR))
- ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
-
if (flags & IPV6_OUTARGS) {
+ if (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR)
+ ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
+ if (ip6oa->ip6oa_flags & IP6OAF_NO_EXPENSIVE)
+ ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_EXPENSIVE;
adv = &ip6oa->ip6oa_flowadv;
adv->code = FADV_SUCCESS;
ip6oa->ip6oa_retflags = 0;
}
+ /*
+ * Clear out ifpp to be filled in after determining route. ifpp_save is
+ * used to keep old value to release reference properly and dtrace
+ * ipsec tunnel traffic properly.
+ */
+ if (ifpp != NULL && *ifpp != NULL)
+ *ifpp = NULL;
+
#if DUMMYNET
if (args.fwa_pf_rule) {
ip6 = mtod(m, struct ip6_hdr *);
}
#endif /* DUMMYNET */
+#if NECP
+ /*
+ * Since all packets are assumed to come from same socket, necp lookup
+ * only needs to happen once per function entry.
+ */
+ necp_matched_policy_id = necp_ip6_output_find_policy_match(m, flags,
+ (flags & IPV6_OUTARGS) ? ip6oa : NULL, &necp_result,
+ &necp_result_parameter);
+#endif /* NECP */
+
+ /*
+ * If a chain was passed in, prepare for ther first iteration. For all
+ * other iterations, this work will be done at evaluateloop: label.
+ */
+ if (packetchain) {
+ /*
+ * Remove m from the chain during processing to avoid
+ * accidental frees on entire list.
+ */
+ inputchain = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ }
+
+loopit:
+ packets_processed++;
+ m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO);
+ ip6 = mtod(m, struct ip6_hdr *);
+ nxt0 = ip6->ip6_nxt;
+ finaldst = ip6->ip6_dst;
+ ip6obf.hdrsplit = FALSE;
+ ro_pmtu = NULL;
+
+ if (!SLIST_EMPTY(&m->m_pkthdr.tags))
+ inject_filter_ref = ipf_get_inject_filter(m);
+ else
+ inject_filter_ref = NULL;
+
#define MAKE_EXTHDR(hp, mp) do { \
if (hp != NULL) { \
struct ip6_ext *eh = (struct ip6_ext *)(hp); \
#undef MAKE_EXTHDR
+#if NECP
+ if (necp_matched_policy_id) {
+ necp_mark_packet_from_ip(m, necp_matched_policy_id);
+
+ switch (necp_result) {
+ case NECP_KERNEL_POLICY_RESULT_PASS:
+ goto skip_ipsec;
+ case NECP_KERNEL_POLICY_RESULT_DROP:
+ case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
+ /*
+ * Flow divert packets should be blocked at the IP
+ * layer.
+ */
+ error = EHOSTUNREACH;
+ ip6stat.ip6s_necp_policy_drop++;
+ goto freehdrs;
+ case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
+ /*
+ * Verify that the packet is being routed to the tunnel
+ */
+ struct ifnet *policy_ifp =
+ necp_get_ifnet_from_result_parameter(
+ &necp_result_parameter);
+
+ if (policy_ifp == ifp) {
+ goto skip_ipsec;
+ } else {
+ if (necp_packet_can_rebind_to_ifnet(m,
+ policy_ifp, (struct route *)&necp_route,
+ AF_INET6)) {
+ /*
+ * Set scoped index to the tunnel
+ * interface, since it is compatible
+ * with the packet. This will only work
+ * for callers who pass IPV6_OUTARGS,
+ * but that covers all of the clients
+ * we care about today.
+ */
+ if (flags & IPV6_OUTARGS) {
+ ip6oa->ip6oa_boundif =
+ policy_ifp->if_index;
+ ip6oa->ip6oa_flags |=
+ IP6OAF_BOUND_IF;
+ }
+ if (opt != NULL
+ && opt->ip6po_pktinfo != NULL) {
+ opt->ip6po_pktinfo->
+ ipi6_ifindex =
+ policy_ifp->if_index;
+ }
+ ro = &necp_route;
+ goto skip_ipsec;
+ } else {
+ error = ENETUNREACH;
+ ip6stat.ip6s_necp_policy_drop++;
+ goto freehdrs;
+ }
+ }
+ }
+ default:
+ break;
+ }
+ }
+#endif /* NECP */
+
#if IPSEC
if (ipsec_bypass != 0 || ip6obf.noipsec)
goto skip_ipsec;
- /* May have been set above if packet was bound */
if (sp == NULL) {
/* get a security policy for this packet */
- if (so == NULL) {
- sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
- 0, &error);
- } else {
+ if (so != NULL) {
sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND,
- so, &error);
+ so, &error);
+ } else {
+ sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
+ 0, &error);
}
if (sp == NULL) {
IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
goto freehdrs;
}
if (sp->ipsec_if) {
- /* Verify the redirect to ipsec interface */
- if (sp->ipsec_if == ifp) {
- /* Set policy for mbuf */
- m->m_pkthdr.ipsec_policy = sp->id;
- goto skip_ipsec;
- }
- goto bad;
+ goto skip_ipsec;
} else {
ip6obf.needipsec = TRUE;
}
MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
+ /* It is no longer safe to free the pointers in exthdrs. */
+ exthdrs.merged = TRUE;
+
#undef MAKE_CHAIN
#if IPSEC
in6_delayed_cksum_offset(m, 0, optlen, nxt0);
#endif /* IPSEC */
- if (!TAILQ_EMPTY(&ipv6_filters)) {
+ if (!TAILQ_EMPTY(&ipv6_filters) &&
+ !((flags & IPV6_OUTARGS) &&
+ (ip6oa->ip6oa_flags & IP6OAF_INTCOPROC_ALLOWED))) {
struct ipfilter *filter;
int seen = (inject_filter_ref == NULL);
int fixscope = 0;
(mbuf_t *)&m, ippo);
if (result == EJUSTRETURN) {
ipf_unref();
- goto done;
+ m = NULL;
+ goto evaluateloop;
}
if (result != 0) {
ipf_unref();
ro = &ip6route;
bzero((caddr_t)ro, sizeof (*ro));
}
- VERIFY(ro_pmtu == NULL); /* must not get here if dummynet */
ro_pmtu = ro;
if (opt != NULL && opt->ip6po_rthdr)
ro = &opt->ip6po_route;
#if IPSEC
if (ip6obf.needipsec && needipsectun) {
#if CONFIG_DTRACE
- struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL;
+ struct ifnet *trace_ifp = (ifpp_save != NULL) ? (*ifpp_save) : NULL;
#endif /* CONFIG_DTRACE */
/*
* All the extension headers will become inaccessible
exthdrs.ip6e_ip6 = m;
ipsec_state.m = m;
- route_copyout(&ipsec_state.ro, (struct route *)ro,
- sizeof (ipsec_state.ro));
+ route_copyout((struct route *)&ipsec_state.ro, (struct route *)ro,
+ sizeof (struct route_in6));
ipsec_state.dst = SA(dst);
/* So that we can see packets inside the tunnel */
error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
/* tunneled in IPv4? packet is gone */
- if (ipsec_state.tunneled == 4)
- goto done;
+ if (ipsec_state.tunneled == 4) {
+ m = NULL;
+ goto evaluateloop;
+ }
m = ipsec_state.m;
ipsec_saved_route = ro;
ro = (struct route_in6 *)&ipsec_state.ro;
dst = SIN6(ipsec_state.dst);
if (error) {
/* mbuf is already reclaimed in ipsec6_output_tunnel. */
- m0 = m = NULL;
m = NULL;
switch (error) {
case EHOSTUNREACH:
}
#endif /* IPSEC */
- /* for safety */
+ /*
+ * ifp should only be filled in for dummy net packets which will jump
+ * to check_with_pf label.
+ */
if (ifp != NULL) {
- ifnet_release(ifp);
- ifp = NULL;
+ VERIFY(ip6obf.route_selected);
}
/* adjust pointer */
dst_sa.sin6_addr = ip6->ip6_dst;
/*
+ * Only call in6_selectroute() on first iteration to avoid taking
+ * multiple references on ifp and rt.
+ *
* in6_selectroute() might return an ifp with its reference held
* even in the error case, so make sure to release its reference.
* ip6oa may be NULL if IPV6_OUTARGS isn't set.
*/
- if ((error = in6_selectroute(ip6obf.select_srcif ? &src_sa : NULL,
- &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa)) != 0) {
- switch (error) {
- case EHOSTUNREACH:
- ip6stat.ip6s_noroute++;
- break;
- case EADDRNOTAVAIL:
- default:
- break; /* XXX statistics? */
+ if (!ip6obf.route_selected) {
+ error = in6_selectroute( ip6obf.select_srcif ? &src_sa : NULL,
+ &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa);
+
+ if (error != 0) {
+ switch (error) {
+ case EHOSTUNREACH:
+ ip6stat.ip6s_noroute++;
+ break;
+ case EADDRNOTAVAIL:
+ default:
+ break; /* XXX statistics? */
+ }
+ if (ifp != NULL)
+ in6_ifstat_inc(ifp, ifs6_out_discard);
+ /* ifp (if non-NULL) will be released at the end */
+ goto bad;
}
- if (ifp != NULL)
- in6_ifstat_inc(ifp, ifs6_out_discard);
- /* ifp (if non-NULL) will be released at the end */
- goto bad;
+ ip6obf.route_selected = TRUE;
}
if (rt == NULL) {
/*
*dst = dst_sa; /* XXX */
}
+#if NECP
+ /* Catch-all to check if the interface is allowed */
+ if (!necp_packet_is_allowed_over_interface(m, ifp)) {
+ error = EHOSTUNREACH;
+ ip6stat.ip6s_necp_policy_drop++;
+ goto bad;
+ }
+#endif /* NECP */
+
/*
* then rt (for unicast) and ifp must be non-NULL valid values.
*/
if (!(flags & IPV6_FORWARDING)) {
- /* XXX: the FORWARDING flag can be set for mrouting. */
in6_ifstat_inc_na(ifp, ifs6_out_request);
}
if (rt != NULL) {
RT_LOCK(rt);
- ia = (struct in6_ifaddr *)(rt->rt_ifa);
- if (ia != NULL)
- IFA_ADDREF(&ia->ia_ifa);
+ if (ia == NULL) {
+ ia = (struct in6_ifaddr *)(rt->rt_ifa);
+ if (ia != NULL)
+ IFA_ADDREF(&ia->ia_ifa);
+ }
rt->rt_use++;
RT_UNLOCK(rt);
}
* forbid loopback, loop back a copy.
*/
ip6_mloopback(NULL, ifp, m, dst, optlen, nxt0);
- } else {
- if (im6o != NULL)
- IM6O_UNLOCK(im6o);
- /*
- * If we are acting as a multicast router, perform
- * multicast forwarding as if the packet had just
- * arrived on the interface to which we are about
- * to send. The multicast forwarding function
- * recursively calls this function, using the
- * IPV6_FORWARDING flag to prevent infinite recursion.
- *
- * Multicasts that are looped back by ip6_mloopback(),
- * above, will be forwarded by the ip6_input() routine,
- * if necessary.
- */
-#if MROUTING
- if (ip6_mrouter && !(flags & IPV6_FORWARDING)) {
- /*
- * XXX: ip6_mforward expects that rcvif is NULL
- * when it is called from the originating path.
- * However, it is not always the case, since
- * some versions of MGETHDR() does not
- * initialize the field.
- */
- m->m_pkthdr.rcvif = NULL;
- if (ip6_mforward(ip6, ifp, m) != 0) {
- m_freem(m);
- if (in6m != NULL)
- IN6M_REMREF(in6m);
- goto done;
- }
- }
-#endif /* MROUTING */
- }
+ } else if (im6o != NULL)
+ IM6O_UNLOCK(im6o);
if (in6m != NULL)
IN6M_REMREF(in6m);
/*
*/
if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
- m_freem(m);
- goto done;
+ /* remove m from the packetchain and continue looping */
+ if (m != NULL)
+ m_freem(m);
+ m = NULL;
+ goto evaluateloop;
}
}
* Fill the outgoing inteface to tell the upper layer
* to increment per-interface statistics.
*/
- if (ifpp != NULL) {
+ if (ifpp != NULL && *ifpp == NULL) {
ifnet_reference(ifp); /* for caller */
- if (*ifpp != NULL)
- ifnet_release(*ifpp);
*ifpp = ifp;
}
*/
in6_clearscope(&ip6->ip6_src);
in6_clearscope(&ip6->ip6_dst);
-
-#if IPFW2
- /*
- * Check with the firewall...
- */
- if (ip6_fw_enable && ip6_fw_chk_ptr) {
- u_short port = 0;
- m->m_pkthdr.rcvif = NULL; /* XXX */
- /* If ipfw says divert, we have to just drop packet */
- if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) {
- m_freem(m);
- goto done;
- }
- if (m == NULL) {
- error = EACCES;
- goto done;
- }
- }
-#endif /* IPFW2 */
-
/*
* If the outgoing packet contains a hop-by-hop options header,
* it must be examined and processed even by the source node.
if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
((hbh->ip6h_len + 1) << 3) - sizeof (struct ip6_hbh),
&dummy, &oplen) < 0) {
- /* m was already freed at this point */
+ /*
+ * m was already freed at this point. Set to NULL so it
+ * is not re-freed at end of ip6_output_list.
+ */
+ m = NULL;
error = EINVAL; /* better error? */
- goto done;
+ goto bad;
}
m->m_flags &= ~M_LOOP; /* XXX */
m->m_pkthdr.rcvif = NULL;
#if PF
if (PF_IS_ENABLED) {
#if DUMMYNET
+
/*
* TODO: Need to save opt->ip6po_flags for reinjection
* rdar://10434993
#endif /* !DUMMYNET */
if (error != 0 || m == NULL) {
- /*
- * Note that if we ever handle packet chain, we will
- * have to restore the linkage from the previous
- * packet to the next like in ip_outout_list()
- */
if (m != NULL) {
panic("%s: unexpected packet %p\n",
__func__, m);
/* NOTREACHED */
}
- /* Already freed by callee */
- goto done;
+ /* m was already freed by callee and is now NULL. */
+ goto evaluateloop;
}
ip6 = mtod(m, struct ip6_hdr *);
}
#endif /* PF */
+#ifdef IPSEC
+ /* clean ipsec history before fragmentation */
+ ipsec_delaux(m);
+#endif /* IPSEC */
+
+ if (ip6oa != NULL) {
+ u_int8_t dscp;
+
+ dscp = (ntohl(ip6->ip6_flow) & IP6FLOW_DSCP_MASK) >> IP6FLOW_DSCP_SHIFT;
+
+ error = set_packet_qos(m, ifp,
+ ip6oa->ip6oa_flags & IP6OAF_QOSMARKING_ALLOWED ? TRUE : FALSE,
+ ip6oa->ip6oa_sotc, ip6oa->ip6oa_netsvctype, &dscp);
+ if (error == 0) {
+ ip6->ip6_flow &= ~htonl(IP6FLOW_DSCP_MASK);
+ ip6->ip6_flow |= htonl((u_int32_t)dscp << IP6FLOW_DSCP_SHIFT);
+ } else {
+ printf("%s if_dscp_for_mbuf() error %d\n", __func__, error);
+ error = 0;
+ }
+ }
/*
- * Send the packet to the outgoing interface.
- * If necessary, do IPv6 fragmentation before sending.
- *
- * the logic here is rather complex:
- * 1: normal case (dontfrag == 0, alwaysfrag == 0)
- * 1-a: send as is if tlen <= path mtu
- * 1-b: fragment if tlen > path mtu
- *
- * 2: if user asks us not to fragment (dontfrag == 1)
- * 2-a: send as is if tlen <= interface mtu
- * 2-b: error if tlen > interface mtu
- *
- * 3: if we always need to attach fragment header (alwaysfrag == 1)
- * always fragment
- *
- * 4: if dontfrag == 1 && alwaysfrag == 1
- * error, as we cannot handle this conflicting request
+ * Determine whether fragmentation is necessary. If so, m is passed
+ * back as a chain of packets and original mbuf is freed. Otherwise, m
+ * is unchanged.
*/
- tlen = m->m_pkthdr.len;
+ error = ip6_fragment_packet(&m, opt,
+ &exthdrs, ifp, mtu, alwaysfrag, unfragpartlen, ro_pmtu, nxt0,
+ optlen);
- if (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG))
- ip6obf.dontfrag = TRUE;
- else
- ip6obf.dontfrag = FALSE;
- if (ip6obf.dontfrag && alwaysfrag) { /* case 4 */
- /* conflicting request - can't transmit */
- error = EMSGSIZE;
+ if (error)
goto bad;
+
+/*
+ * The evaluateloop label is where we decide whether to continue looping over
+ * packets or call into nd code to send.
+ */
+evaluateloop:
+
+ /*
+ * m may be NULL when we jump to the evaluateloop label from PF or
+ * other code that can drop packets.
+ */
+ if (m != NULL) {
+ /*
+ * If we already have a chain to send, tack m onto the end.
+ * Otherwise make m the start and end of the to-be-sent chain.
+ */
+ if (sendchain != NULL) {
+ sendchain_last->m_nextpkt = m;
+ } else {
+ sendchain = m;
+ }
+
+ /* Fragmentation may mean m is a chain. Find the last packet. */
+ while (m->m_nextpkt)
+ m = m->m_nextpkt;
+ sendchain_last = m;
+ pktcnt++;
+ }
+
+ /* Fill in next m from inputchain as appropriate. */
+ m = inputchain;
+ if (m != NULL) {
+ /* Isolate m from rest of input chain. */
+ inputchain = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+
+ /*
+ * Clear exthdrs and ipsec_state so stale contents are not
+ * reused. Note this also clears the exthdrs.merged flag.
+ */
+ bzero(&exthdrs, sizeof(exthdrs));
+ bzero(&ipsec_state, sizeof(ipsec_state));
+
+ /* Continue looping. */
+ goto loopit;
+ }
+
+ /*
+ * If we get here, there's no more mbufs in inputchain, so send the
+ * sendchain if there is one.
+ */
+ if (pktcnt > 0) {
+ error = nd6_output_list(ifp, origifp, sendchain, dst,
+ ro->ro_rt, adv);
+ /*
+ * Fall through to done label even in error case because
+ * nd6_output_list frees packetchain in both success and
+ * failure cases.
+ */
+ }
+
+done:
+ if (ifpp_save != NULL && *ifpp_save != NULL) {
+ ifnet_release(*ifpp_save);
+ *ifpp_save = NULL;
+ }
+ ROUTE_RELEASE(&ip6route);
+#if IPSEC
+ ROUTE_RELEASE(&ipsec_state.ro);
+ if (sp != NULL)
+ key_freesp(sp, KEY_SADB_UNLOCKED);
+#endif /* IPSEC */
+#if NECP
+ ROUTE_RELEASE(&necp_route);
+#endif /* NECP */
+#if DUMMYNET
+ ROUTE_RELEASE(&saved_route);
+ ROUTE_RELEASE(&saved_ro_pmtu);
+#endif /* DUMMYNET */
+
+ if (ia != NULL)
+ IFA_REMREF(&ia->ia_ifa);
+ if (src_ia != NULL)
+ IFA_REMREF(&src_ia->ia_ifa);
+ if (ifp != NULL)
+ ifnet_release(ifp);
+ if (origifp != NULL)
+ ifnet_release(origifp);
+ if (ip6_output_measure) {
+ net_perf_measure_time(&net_perf, &start_tv, packets_processed);
+ net_perf_histogram(&net_perf, packets_processed);
+ }
+ return (error);
+
+freehdrs:
+ if (exthdrs.ip6e_hbh != NULL) {
+ if (exthdrs.merged)
+ panic("Double free of ip6e_hbh");
+ m_freem(exthdrs.ip6e_hbh);
+ }
+ if (exthdrs.ip6e_dest1 != NULL) {
+ if (exthdrs.merged)
+ panic("Double free of ip6e_dest1");
+ m_freem(exthdrs.ip6e_dest1);
+ }
+ if (exthdrs.ip6e_rthdr != NULL) {
+ if (exthdrs.merged)
+ panic("Double free of ip6e_rthdr");
+ m_freem(exthdrs.ip6e_rthdr);
+ }
+ if (exthdrs.ip6e_dest2 != NULL) {
+ if (exthdrs.merged)
+ panic("Double free of ip6e_dest2");
+ m_freem(exthdrs.ip6e_dest2);
+ }
+ /* FALLTHRU */
+bad:
+ if (inputchain != NULL)
+ m_freem_list(inputchain);
+ if (sendchain != NULL)
+ m_freem_list(sendchain);
+ if (m != NULL)
+ m_freem(m);
+
+ goto done;
+
+#undef ipf_pktopts
+#undef exthdrs
+#undef ip6route
+#undef ipsec_state
+#undef saved_route
+#undef saved_ro_pmtu
+#undef args
+}
+
+/* ip6_fragment_packet
+ *
+ * The fragmentation logic is rather complex:
+ * 1: normal case (dontfrag == 0, alwaysfrag == 0)
+ * 1-a: send as is if tlen <= path mtu
+ * 1-b: fragment if tlen > path mtu
+ *
+ * 2: if user asks us not to fragment (dontfrag == 1)
+ * 2-a: send as is if tlen <= interface mtu
+ * 2-b: error if tlen > interface mtu
+ *
+ * 3: if we always need to attach fragment header (alwaysfrag == 1)
+ * always fragment
+ *
+ * 4: if dontfrag == 1 && alwaysfrag == 1
+ * error, as we cannot handle this conflicting request
+ */
+
+static int
+ip6_fragment_packet(struct mbuf **mptr, struct ip6_pktopts *opt,
+ struct ip6_exthdrs *exthdrsp, struct ifnet *ifp, uint32_t mtu,
+ boolean_t alwaysfrag, uint32_t unfragpartlen, struct route_in6 *ro_pmtu,
+ int nxt0, uint32_t optlen)
+{
+ VERIFY(NULL != mptr);
+ struct mbuf *m = *mptr;
+ int error = 0;
+ size_t tlen = m->m_pkthdr.len;
+ boolean_t dontfrag = (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG));
+
+ if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
+ dontfrag = TRUE;
+ /*
+ * Discard partial sum information if this packet originated
+ * from another interface; the packet would already have the
+ * final checksum and we shouldn't recompute it.
+ */
+ if ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID|CSUM_PARTIAL)) ==
+ (CSUM_DATA_VALID|CSUM_PARTIAL)) {
+ m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
+ m->m_pkthdr.csum_data = 0;
+ }
+ }
+
+ if (dontfrag && alwaysfrag) { /* case 4 */
+ /* conflicting request - can't transmit */
+ return EMSGSIZE;
}
- lck_rw_lock_shared(nd_if_rwlock);
/* Access without acquiring nd_ifinfo lock for performance */
- if (ip6obf.dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
- lck_rw_done(nd_if_rwlock);
+ if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
/*
* Even if the DONTFRAG option is specified, we cannot send the
* packet when the data length is larger than the MTU of the
bzero(&ip6cp, sizeof (ip6cp));
ip6cp.ip6c_cmdarg = (void *)&mtu32;
pfctlinput2(PRC_MSGSIZE, SA(&ro_pmtu->ro_dst), (void *)&ip6cp);
- error = EMSGSIZE;
- goto bad;
- } else {
- lck_rw_done(nd_if_rwlock);
+ return EMSGSIZE;
}
/*
* transmit packet without fragmentation
*/
- if (ip6obf.dontfrag || (!alwaysfrag && /* case 1-a and 2-a */
+ if (dontfrag || (!alwaysfrag && /* case 1-a and 2-a */
(tlen <= mtu || TSO_IPV6_OK(ifp, m) ||
(ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
-#ifdef IPSEC
- /* clean ipsec history once it goes out of the node */
- ipsec_delaux(m);
-#endif /* IPSEC */
-
+ /*
+ * mppn not updated in this case because no new chain is formed
+ * and inserted
+ */
ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen);
-
- if (ro->ro_rt)
- RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
- error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv);
- goto done;
+ } else {
+ /*
+ * time to fragment - cases 1-b and 3 are handled inside
+ * ip6_do_fragmentation().
+ * mppn is passed down to be updated to point at fragment chain.
+ */
+ error = ip6_do_fragmentation(mptr, optlen, ifp,
+ unfragpartlen, mtod(m, struct ip6_hdr *), exthdrsp, mtu, nxt0);
}
+ return error;
+}
+
+/*
+ * ip6_do_fragmentation() is called by ip6_fragment_packet() after determining
+ * the packet needs to be fragmented. on success, morig is freed and a chain
+ * of fragments is linked into the packet chain where morig existed. Otherwise,
+ * an errno is returned.
+ */
+int
+ip6_do_fragmentation(struct mbuf **mptr, uint32_t optlen, struct ifnet *ifp,
+ uint32_t unfragpartlen, struct ip6_hdr *ip6, struct ip6_exthdrs *exthdrsp,
+ uint32_t mtu, int nxt0)
+{
+ VERIFY(NULL != mptr);
+ int error = 0;
+
+ struct mbuf *morig = *mptr;
+ struct mbuf *first_mbufp = NULL;
+ struct mbuf *last_mbufp = NULL;
+
+ size_t tlen = morig->m_pkthdr.len;
+
/*
* try to fragment the packet. case 1-b and 3
*/
- if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
+ if ((morig->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
/* TSO and fragment aren't compatible */
- error = EMSGSIZE;
in6_ifstat_inc(ifp, ifs6_out_fragfail);
- goto bad;
+ return EMSGSIZE;
} else if (mtu < IPV6_MMTU) {
/* path MTU cannot be less than IPV6_MMTU */
- error = EMSGSIZE;
in6_ifstat_inc(ifp, ifs6_out_fragfail);
- goto bad;
+ return EMSGSIZE;
} else if (ip6->ip6_plen == 0) {
/* jumbo payload cannot be fragmented */
- error = EMSGSIZE;
in6_ifstat_inc(ifp, ifs6_out_fragfail);
- goto bad;
+ return EMSGSIZE;
} else {
- struct mbuf **mnext, *m_frgpart;
+ size_t hlen, len, off;
+ struct mbuf **mnext = NULL;
struct ip6_frag *ip6f;
u_int32_t id = htonl(ip6_randomid());
u_char nextproto;
len = (mtu - hlen - sizeof (struct ip6_frag)) & ~7;
if (len < 8) {
- error = EMSGSIZE;
in6_ifstat_inc(ifp, ifs6_out_fragfail);
- goto bad;
+ return EMSGSIZE;
}
- mnext = &m->m_nextpkt;
-
/*
* Change the next header field of the last header in the
* unfragmentable part.
*/
- if (exthdrs.ip6e_rthdr != NULL) {
- nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
- *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
- } else if (exthdrs.ip6e_dest1 != NULL) {
- nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
- *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
- } else if (exthdrs.ip6e_hbh != NULL) {
- nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
- *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
+ if (exthdrsp->ip6e_rthdr != NULL) {
+ nextproto = *mtod(exthdrsp->ip6e_rthdr, u_char *);
+ *mtod(exthdrsp->ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
+ } else if (exthdrsp->ip6e_dest1 != NULL) {
+ nextproto = *mtod(exthdrsp->ip6e_dest1, u_char *);
+ *mtod(exthdrsp->ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
+ } else if (exthdrsp->ip6e_hbh != NULL) {
+ nextproto = *mtod(exthdrsp->ip6e_hbh, u_char *);
+ *mtod(exthdrsp->ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
} else {
nextproto = ip6->ip6_nxt;
ip6->ip6_nxt = IPPROTO_FRAGMENT;
}
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
- in6_delayed_cksum_offset(m, 0, optlen, nxt0);
+ if (morig->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
+ in6_delayed_cksum_offset(morig, 0, optlen, nxt0);
/*
* Loop through length of segment after first fragment,
* make new header and copy data of each part and link onto
* chain.
*/
- m0 = m;
for (off = hlen; off < tlen; off += len) {
- struct ip6_hdr *mhip6;
+ struct ip6_hdr *new_mhip6;
+ struct mbuf *new_m;
+ struct mbuf *m_frgpart;
- MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
- if (m == NULL) {
+ MGETHDR(new_m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
+ if (new_m == NULL) {
error = ENOBUFS;
ip6stat.ip6s_odropped++;
- goto sendorfree;
+ break;
}
- m->m_pkthdr.rcvif = NULL;
- m->m_flags = m0->m_flags & M_COPYFLAGS;
- *mnext = m;
- mnext = &m->m_nextpkt;
- m->m_data += max_linkhdr;
- mhip6 = mtod(m, struct ip6_hdr *);
- *mhip6 = *ip6;
- m->m_len = sizeof (*mhip6);
- error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
+ new_m->m_pkthdr.rcvif = NULL;
+ new_m->m_flags = morig->m_flags & M_COPYFLAGS;
+
+ if (first_mbufp != NULL) {
+ /* Every pass through loop but first */
+ *mnext = new_m;
+ last_mbufp = new_m;
+ } else {
+ /* This is the first element of the fragment chain */
+ first_mbufp = new_m;
+ last_mbufp = new_m;
+ }
+ mnext = &new_m->m_nextpkt;
+
+ new_m->m_data += max_linkhdr;
+ new_mhip6 = mtod(new_m, struct ip6_hdr *);
+ *new_mhip6 = *ip6;
+ new_m->m_len = sizeof (*new_mhip6);
+
+ error = ip6_insertfraghdr(morig, new_m, hlen, &ip6f);
if (error) {
ip6stat.ip6s_odropped++;
- goto sendorfree;
+ break;
}
+
ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
if (off + len >= tlen)
len = tlen - off;
else
ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
- mhip6->ip6_plen = htons((u_short)(len + hlen +
+ new_mhip6->ip6_plen = htons((u_short)(len + hlen +
sizeof (*ip6f) - sizeof (struct ip6_hdr)));
- if ((m_frgpart = m_copy(m0, off, len)) == NULL) {
+
+ if ((m_frgpart = m_copy(morig, off, len)) == NULL) {
error = ENOBUFS;
ip6stat.ip6s_odropped++;
- goto sendorfree;
+ break;
}
- m_cat(m, m_frgpart);
- m->m_pkthdr.len = len + hlen + sizeof (*ip6f);
- m->m_pkthdr.rcvif = NULL;
+ m_cat(new_m, m_frgpart);
+ new_m->m_pkthdr.len = len + hlen + sizeof (*ip6f);
+ new_m->m_pkthdr.rcvif = NULL;
- M_COPY_CLASSIFIER(m, m0);
- M_COPY_PFTAG(m, m0);
+ M_COPY_CLASSIFIER(new_m, morig);
+ M_COPY_PFTAG(new_m, morig);
#ifdef notyet
#if CONFIG_MACF_NET
- mac_create_fragment(m0, m);
+ mac_create_fragment(morig, new_m);
#endif /* CONFIG_MACF_NET */
#endif /* notyet */
in6_ifstat_inc(ifp, ifs6_out_fragcreat);
}
- in6_ifstat_inc(ifp, ifs6_out_fragok);
- }
-
- /*
- * Remove leading garbages.
- */
-sendorfree:
- m = m0->m_nextpkt;
- m0->m_nextpkt = NULL;
- m_freem(m0);
- for (m0 = m; m != NULL; m = m0) {
- m0 = m->m_nextpkt;
- m->m_nextpkt = NULL;
- if (error == 0) {
-#if IPSEC
- /* clean ipsec history once it goes out of the node */
- ipsec_delaux(m);
-#endif /* IPSEC */
- error = nd6_output(ifp, origifp, m, dst, ro->ro_rt,
- adv);
+ if (error) {
+ /* free all the fragments created */
+ if (first_mbufp != NULL) {
+ m_freem_list(first_mbufp);
+ first_mbufp = NULL;
+ }
+ last_mbufp = NULL;
} else {
- m_freem(m);
+ /* successful fragmenting */
+ m_freem(morig);
+ *mptr = first_mbufp;
+ last_mbufp->m_nextpkt = NULL;
+ ip6stat.ip6s_fragmented++;
+ in6_ifstat_inc(ifp, ifs6_out_fragok);
}
}
-
- if (error == 0)
- ip6stat.ip6s_fragmented++;
-
-done:
- ROUTE_RELEASE(&ip6route);
-#if IPSEC
- ROUTE_RELEASE(&ipsec_state.ro);
- if (sp != NULL)
- key_freesp(sp, KEY_SADB_UNLOCKED);
-#endif /* IPSEC */
-#if DUMMYNET
- ROUTE_RELEASE(&saved_route);
- ROUTE_RELEASE(&saved_ro_pmtu);
-#endif /* DUMMYNET */
-
- if (ia != NULL)
- IFA_REMREF(&ia->ia_ifa);
- if (src_ia != NULL)
- IFA_REMREF(&src_ia->ia_ifa);
- if (ifp != NULL)
- ifnet_release(ifp);
- if (origifp != NULL)
- ifnet_release(origifp);
- return (error);
-
-freehdrs:
- if (exthdrs.ip6e_hbh != NULL)
- m_freem(exthdrs.ip6e_hbh);
- if (exthdrs.ip6e_dest1 != NULL)
- m_freem(exthdrs.ip6e_dest1);
- if (exthdrs.ip6e_rthdr != NULL)
- m_freem(exthdrs.ip6e_rthdr);
- if (exthdrs.ip6e_dest2 != NULL)
- m_freem(exthdrs.ip6e_dest2);
- /* FALLTHRU */
-bad:
- if (m != NULL)
- m_freem(m);
- goto done;
-
-#undef ipf_pktopts
-#undef exthdrs
-#undef ip6route
-#undef ipsec_state
-#undef saved_route
-#undef saved_ro_pmtu
-#undef args
+ return error;
}
static int
ip6_out_cksum_stats(nxt, plen - olen);
/* RFC1122 4.1.3.4 */
- if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6))
+ if (csum == 0 &&
+ (m->m_pkthdr.csum_flags & (CSUM_UDPIPV6|CSUM_ZERO_INVERT)))
csum = 0xffff;
/* Insert the checksum in the ULP csum field */
} else {
bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
}
- m->m_pkthdr.csum_flags &=
- ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID | CSUM_PARTIAL);
+ m->m_pkthdr.csum_flags &= ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID |
+ CSUM_PARTIAL | CSUM_ZERO_INVERT);
done:
return (sw_csum);
u_int32_t mtu = 0;
boolean_t alwaysfrag = FALSE;
int error = 0;
+ boolean_t is_local = FALSE;
+
+ if (IN6_IS_SCOPE_LINKLOCAL(dst))
+ is_local = TRUE;
if (ro_pmtu != ro) {
/* The first hop and the final destination may differ. */
if (ro_pmtu->ro_rt != NULL) {
u_int32_t ifmtu;
- lck_rw_lock_shared(nd_if_rwlock);
+ if (ifp == NULL)
+ ifp = ro_pmtu->ro_rt->rt_ifp;
/* Access without acquiring nd_ifinfo lock for performance */
ifmtu = IN6_LINKMTU(ifp);
- lck_rw_done(nd_if_rwlock);
/*
* Access rmx_mtu without holding the route entry lock,
}
} else {
if (ifp) {
- lck_rw_lock_shared(nd_if_rwlock);
/* Don't hold nd_ifinfo lock for performance */
mtu = IN6_LINKMTU(ifp);
- lck_rw_done(nd_if_rwlock);
} else {
error = EHOSTUNREACH; /* XXX */
}
}
*mtup = mtu;
- if (alwaysfragp != NULL)
+ if ((alwaysfragp != NULL) && !is_local)
*alwaysfragp = alwaysfrag;
return (error);
}
privileged = (proc_suser(p) == 0);
if (level == IPPROTO_IPV6) {
+ boolean_t capture_exthdrstat_in = FALSE;
switch (op) {
case SOPT_SET:
switch (optname) {
break;
}
OPTSET(IN6P_HOPOPTS);
+ capture_exthdrstat_in = TRUE;
break;
case IPV6_RECVDSTOPTS:
break;
}
OPTSET(IN6P_DSTOPTS);
+ capture_exthdrstat_in = TRUE;
break;
case IPV6_RECVRTHDRDSTOPTS:
break;
}
OPTSET(IN6P_RTHDRDSTOPTS);
+ capture_exthdrstat_in = TRUE;
break;
case IPV6_RECVRTHDR:
break;
}
OPTSET(IN6P_RTHDR);
+ capture_exthdrstat_in = TRUE;
break;
case IPV6_RECVPATHMTU:
optp = &in6p->in6p_outputopts;
error = ip6_pcbopt(optname, (u_char *)&optval,
sizeof (optval), optp, uproto);
+
+ if (optname == IPV6_TCLASS) {
+ // Add in the ECN flags
+ u_int8_t tos = (in6p->inp_ip_tos & ~IPTOS_ECN_MASK);
+ u_int8_t ecn = optval & IPTOS_ECN_MASK;
+ in6p->inp_ip_tos = tos | ecn;
+ }
break;
}
if (!privileged)
return (EPERM);
OPTSET2292(IN6P_HOPOPTS);
+ capture_exthdrstat_in = TRUE;
break;
case IPV6_2292DSTOPTS:
if (!privileged)
return (EPERM);
OPTSET2292(IN6P_DSTOPTS|
IN6P_RTHDRDSTOPTS); /* XXX */
+ capture_exthdrstat_in = TRUE;
break;
case IPV6_2292RTHDR:
OPTSET2292(IN6P_RTHDR);
+ capture_exthdrstat_in = TRUE;
break;
}
break;
req = mtod(m, caddr_t);
len = m->m_len;
error = ipsec6_set_policy(in6p, optname, req,
- len, privileged);
+ len, privileged);
m_freem(m);
break;
}
#endif /* IPSEC */
-#if IPFIREWALL
- case IPV6_FW_ADD:
- case IPV6_FW_DEL:
- case IPV6_FW_FLUSH:
- case IPV6_FW_ZERO: {
- if (ip6_fw_ctl_ptr == NULL)
- load_ip6fw();
- if (ip6_fw_ctl_ptr != NULL)
- error = (*ip6_fw_ctl_ptr)(sopt);
- else
- error = ENOPROTOOPT;
- break;
- }
-#endif /* IPFIREWALL */
/*
* IPv6 variant of IP_BOUND_IF; for details see
* comments on IP_BOUND_IF in ip_ctloutput().
break;
/* once set, it cannot be unset */
- if (!optval &&
- (in6p->inp_flags & INP_NO_IFT_CELLULAR)) {
+ if (!optval && INP_NO_CELLULAR(in6p)) {
error = EINVAL;
break;
}
error = ENOPROTOOPT;
break;
}
+ if (capture_exthdrstat_in) {
+ if (uproto == IPPROTO_TCP) {
+ INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_in);
+ } else if (uproto == IPPROTO_UDP) {
+ INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_in);
+ }
+ }
break;
case SOPT_GET:
break;
#if IPSEC
case IPV6_IPSEC_POLICY: {
- caddr_t req = NULL;
- size_t len = 0;
- struct mbuf *m = NULL;
- struct mbuf *mp = NULL;
-
- error = soopt_getm(sopt, &m);
- if (error != 0)
- break;
- error = soopt_mcopyin(sopt, m);
- if (error != 0)
- break;
-
- req = mtod(m, caddr_t);
- len = m->m_len;
- error = ipsec6_get_policy(in6p, req, len, &mp);
- if (error == 0)
- error = soopt_mcopyout(sopt, mp);
- if (mp != NULL)
- m_freem(mp);
- m_freem(m);
+ error = 0; /* This option is no longer supported */
break;
}
#endif /* IPSEC */
-#if IPFIREWALL
- case IPV6_FW_GET: {
- if (ip6_fw_ctl_ptr == NULL)
- load_ip6fw();
- if (ip6_fw_ctl_ptr != NULL)
- error = (*ip6_fw_ctl_ptr)(sopt);
- else
- error = ENOPROTOOPT;
- break;
- }
-#endif /* IPFIREWALL */
case IPV6_BOUND_IF:
if (in6p->inp_flags & INP_BOUND_IF)
optval = in6p->inp_boundifp->if_index;
break;
case IPV6_NO_IFT_CELLULAR:
- optval = (in6p->inp_flags & INP_NO_IFT_CELLULAR)
- ? 1 : 0;
+ optval = INP_NO_CELLULAR(in6p) ? 1 : 0;
error = sooptcopyout(sopt, &optval,
sizeof (optval));
break;
}
break;
}
+ } else if (level == IPPROTO_UDP) {
+ error = udp_ctloutput(so, sopt);
} else {
error = EINVAL;
}
{
int minmtupolicy, preftemp;
int error;
+ boolean_t capture_exthdrstat_out = FALSE;
if (!sticky && !cmsg) {
#ifdef DIAGNOSTIC
if (opt->ip6po_hbh == NULL)
return (ENOBUFS);
bcopy(hbh, opt->ip6po_hbh, hbhlen);
-
+ capture_exthdrstat_out = TRUE;
break;
}
if (*newdest == NULL)
return (ENOBUFS);
bcopy(dest, *newdest, destlen);
+ capture_exthdrstat_out = TRUE;
break;
}
if (opt->ip6po_rthdr == NULL)
return (ENOBUFS);
bcopy(rth, opt->ip6po_rthdr, rthlen);
+ capture_exthdrstat_out = TRUE;
break;
}
return (ENOPROTOOPT);
} /* end of switch */
+ if (capture_exthdrstat_out) {
+ if (uproto == IPPROTO_TCP) {
+ INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_out);
+ } else if (uproto == IPPROTO_UDP) {
+ INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_out);
+ }
+ }
+
return (0);
}
} else if (!(sw_csum & CSUM_DELAY_IPV6_DATA) &&
(hwcap & CSUM_PARTIAL)) {
/*
- * Partial checksum offload, ere), if no extension
- * headers, and TCP only (no UDP support, as the
- * hardware may not be able to convert +0 to
- * -0 (0xffff) per RFC1122 4.1.3.4.)
+ * Partial checksum offload, ere), if no extension headers,
+ * and TCP only (no UDP support, as the hardware may not be
+ * able to convert +0 to -0 (0xffff) per RFC1122 4.1.3.4.
+ * unless the interface supports "invert zero" capability.)
*/
if (hwcksum_tx && !tso &&
- (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) &&
+ ((m->m_pkthdr.csum_flags & CSUM_TCPIPV6) ||
+ ((hwcap & CSUM_ZERO_INVERT) &&
+ (m->m_pkthdr.csum_flags & CSUM_ZERO_INVERT))) &&
tlen <= mtu) {
uint16_t start = sizeof (struct ip6_hdr);
uint16_t ulpoff =
return (len);
#undef elen
}
+
+static int
+sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int error, i;
+
+ i = ip6_output_measure;
+ error = sysctl_handle_int(oidp, &i, 0, req);
+ if (error || req->newptr == USER_ADDR_NULL)
+ goto done;
+ /* impose bounds */
+ if (i < 0 || i > 1) {
+ error = EINVAL;
+ goto done;
+ }
+ if (ip6_output_measure != i && i == 1) {
+ net_perf_initialize(&net_perf, ip6_output_measure_bins);
+ }
+ ip6_output_measure = i;
+done:
+ return (error);
+}
+
+static int
+sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int error;
+ uint64_t i;
+
+ i = ip6_output_measure_bins;
+ error = sysctl_handle_quad(oidp, &i, 0, req);
+ if (error || req->newptr == USER_ADDR_NULL)
+ goto done;
+ /* validate data */
+ if (!net_perf_validate_bins(i)) {
+ error = EINVAL;
+ goto done;
+ }
+ ip6_output_measure_bins = i;
+done:
+ return (error);
+}
+
+static int
+sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ if (req->oldptr == USER_ADDR_NULL)
+ req->oldlen = (size_t)sizeof (struct ipstat);
+
+ return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen)));
+}