X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/3e170ce000f1506b7b5d2c5c7faec85ceabb573d..5c9f46613a83ebfc29a5b1f099448259e96a98f0:/bsd/netinet/ip_output.c diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index 383751d4d..f5b51ac52 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2017 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -104,6 +104,7 @@ #include #include #include +#include #if CONFIG_MACF_NET #include @@ -206,6 +207,10 @@ SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf_data, 0, 0, sysctl_ip_output_getperf, "S,net_perf", "IP output performance data (struct net_perf, net/net_perf.h)"); +__private_extern__ int rfc6864 = 1; +SYSCTL_INT(_net_inet_ip, OID_AUTO, rfc6864, CTLFLAG_RW | CTLFLAG_LOCKED, + &rfc6864, 0, "updated ip id field behavior"); + #define IMO_TRACE_HIST_SIZE 32 /* size of trace history */ /* For gdb */ @@ -345,9 +350,14 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, uint32_t raw; } ipobf = { .raw = 0 }; +/* + * Here we check for restrictions when sending frames. + * N.B.: IPv4 over internal co-processor interfaces is not allowed. + */ #define IP_CHECK_RESTRICTIONS(_ifp, _ipobf) \ (((_ipobf).nocell && IFNET_IS_CELLULAR(_ifp)) || \ ((_ipobf).noexpensive && IFNET_IS_EXPENSIVE(_ifp)) || \ + (IFNET_IS_INTCOPROC(_ifp)) || \ (!(_ipobf).awdl_unrestricted && IFNET_IS_AWDL_RESTRICTED(_ifp))) if (ip_output_measure) @@ -434,10 +444,10 @@ ipfw_tags_done: } } #endif /* IPSEC */ - + VERIFY(ro != NULL); - if (ip_doscopedroute && (flags & IP_OUTARGS)) { + if (flags & IP_OUTARGS) { /* * In the forwarding case, only the ifscope value is used, * as source interface selection doesn't take place. @@ -484,7 +494,7 @@ ipfw_tags_done: adv->code = FADV_SUCCESS; ipoa->ipoa_retflags = 0; } - + #if IPSEC if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) { so = ipsec_getsocket(m); @@ -580,7 +590,12 @@ loopit: if (!(flags & (IP_FORWARDING|IP_RAWOUTPUT))) { ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); ip->ip_off &= IP_DF; - ip->ip_id = ip_randomid(); + if (rfc6864 && IP_OFF_IS_ATOMIC(ip->ip_off)) { + // Per RFC6864, value of ip_id is undefined for atomic ip packets + ip->ip_id = 0; + } else { + ip->ip_id = ip_randomid(); + } OSAddAtomic(1, &ipstat.ips_localout); } else { hlen = IP_VHL_HL(ip->ip_vhl) << 2; @@ -657,6 +672,7 @@ loopit: if (ia == NULL) { OSAddAtomic(1, &ipstat.ips_noroute); error = ENETUNREACH; + /* XXX IPv6 APN fallback notification?? */ goto bad; } } @@ -704,11 +720,11 @@ loopit: /* * If the source address belongs to a restricted - * interface and the caller forbids our using + * interface and the caller forbids our using * interfaces of such type, pretend that there is no * route. */ - if (ia0 != NULL && + if (ia0 != NULL && IP_CHECK_RESTRICTIONS(ia0->ifa_ifp, ipobf)) { IFA_REMREF(ia0); ia0 = NULL; @@ -801,7 +817,7 @@ loopit: rtalloc_scoped_ign(ro, ign, ifscope); /* - * If the route points to a cellular/expensive interface + * If the route points to a cellular/expensive interface * and the caller forbids our using interfaces of such type, * pretend that there is no route. */ @@ -894,7 +910,7 @@ loopit: if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { struct ifnet *srcifp = NULL; struct in_multi *inm; - u_int32_t vif; + u_int32_t vif = 0; u_int8_t ttl = IP_DEFAULT_MULTICAST_TTL; u_int8_t loop = IP_DEFAULT_MULTICAST_LOOP; @@ -1202,6 +1218,7 @@ sendit: /* Check if the interface is allowed */ if (!necp_packet_is_allowed_over_interface(m, ifp)) { error = EHOSTUNREACH; + OSAddAtomic(1, &ipstat.ips_necp_policy_drop); goto bad; } goto skip_ipsec; @@ -1209,6 +1226,7 @@ sendit: case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT: /* Flow divert packets should be blocked at the IP layer */ error = EHOSTUNREACH; + OSAddAtomic(1, &ipstat.ips_necp_policy_drop); goto bad; case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: { /* Verify that the packet is being routed to the tunnel */ @@ -1217,6 +1235,7 @@ sendit: /* Check if the interface is allowed */ if (!necp_packet_is_allowed_over_interface(m, ifp)) { error = EHOSTUNREACH; + OSAddAtomic(1, &ipstat.ips_necp_policy_drop); goto bad; } goto skip_ipsec; @@ -1225,6 +1244,7 @@ sendit: /* Check if the interface is allowed */ if (!necp_packet_is_allowed_over_interface(m, policy_ifp)) { error = EHOSTUNREACH; + OSAddAtomic(1, &ipstat.ips_necp_policy_drop); goto bad; } @@ -1234,10 +1254,10 @@ sendit: goto skip_ipsec; } else { error = ENETUNREACH; + OSAddAtomic(1, &ipstat.ips_necp_policy_drop); goto bad; } } - break; } default: break; @@ -1246,6 +1266,7 @@ sendit: /* Catch-all to check if the interface is allowed */ if (!necp_packet_is_allowed_over_interface(m, ifp)) { error = EHOSTUNREACH; + OSAddAtomic(1, &ipstat.ips_necp_policy_drop); goto bad; } #endif /* NECP */ @@ -1320,7 +1341,7 @@ sendit: if (flags & IP_ROUTETOIF) { bzero(&ipsec_state.ro, sizeof (ipsec_state.ro)); } else { - route_copyout(&ipsec_state.ro, ro, sizeof (ipsec_state.ro)); + route_copyout((struct route *)&ipsec_state.ro, ro, sizeof (struct route)); } ipsec_state.dst = SA(dst); @@ -1343,6 +1364,11 @@ sendit: struct ip *, ip, struct ip6_hdr *, NULL); error = ipsec4_output(&ipsec_state, sp, flags); + if (ipsec_state.tunneled == 6) { + m0 = m = NULL; + error = 0; + goto bad; + } m0 = m = ipsec_state.m; @@ -1363,10 +1389,10 @@ sendit: */ if (ipsec_state.tunneled) { flags &= ~IP_ROUTETOIF; - ro = &ipsec_state.ro; + ro = (struct route *)&ipsec_state.ro; } } else { - ro = &ipsec_state.ro; + ro = (struct route *)&ipsec_state.ro; } dst = SIN(ipsec_state.dst); if (error) { @@ -1696,7 +1722,7 @@ skip_ipsec: ROUTE_RELEASE(ro_fwd); bcopy(dst, &ro_fwd->ro_dst, sizeof (*dst)); - rtalloc_ign(ro_fwd, RTF_PRCLONING); + rtalloc_ign(ro_fwd, RTF_PRCLONING, false); if (ro_fwd->ro_rt == NULL) { OSAddAtomic(1, &ipstat.ips_noroute); @@ -1768,6 +1794,31 @@ pass: goto bad; } + if (ipoa != NULL) { + u_int8_t dscp = ip->ip_tos >> IPTOS_DSCP_SHIFT; + + error = set_packet_qos(m, ifp, + ipoa->ipoa_flags & IPOAF_QOSMARKING_ALLOWED ? TRUE : FALSE, + ipoa->ipoa_sotc, ipoa->ipoa_netsvctype, &dscp); + if (error == 0) { + ip->ip_tos &= IPTOS_ECN_MASK; + ip->ip_tos |= dscp << IPTOS_DSCP_SHIFT; + } else { + printf("%s if_dscp_for_mbuf() error %d\n", __func__, error); + error = 0; + } + } + + /* + * Some Wi-Fi AP implementations do not correctly handle multicast IP + * packets with DSCP bits set -- see radr://9331522 -- so as a + * workaround we clear the DSCP bits and set the service class to BE + */ + if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) && IFNET_IS_WIFI_INFRA(ifp)) { + ip->ip_tos &= IPTOS_ECN_MASK; + mbuf_set_service_class(m, MBUF_SC_BE); + } + ip_output_checksum(ifp, m, (IP_VHL_HL(ip->ip_vhl) << 2), ip->ip_len, &sw_csum); @@ -2220,7 +2271,8 @@ in_finalize_cksum(struct mbuf *m, uint32_t hoff, uint32_t csum_flags) ip_out_cksum_stats(ip->ip_p, len); /* RFC1122 4.1.3.4 */ - if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDP)) + if (csum == 0 && + (m->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_ZERO_INVERT))) csum = 0xffff; /* Insert the checksum in the ULP csum field */ @@ -2232,8 +2284,8 @@ in_finalize_cksum(struct mbuf *m, uint32_t hoff, uint32_t csum_flags) } else { bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum)); } - m->m_pkthdr.csum_flags &= - ~(CSUM_DELAY_DATA | CSUM_DATA_VALID | CSUM_PARTIAL); + m->m_pkthdr.csum_flags &= ~(CSUM_DELAY_DATA | CSUM_DATA_VALID | + CSUM_PARTIAL | CSUM_ZERO_INVERT); } if (sw_csum & CSUM_DELAY_IP) { @@ -2413,8 +2465,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) m->m_len = sopt->sopt_valsize; error = sooptcopyin(sopt, mtod(m, char *), m->m_len, m->m_len); - if (error) + if (error) { + m_freem(m); break; + } return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, m)); @@ -2428,6 +2482,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_RECVIF: case IP_RECVTTL: case IP_RECVPKTINFO: + case IP_RECVTOS: error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval)); if (error) @@ -2470,73 +2525,13 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_RECVPKTINFO: OPTSET(INP_PKTINFO); break; - } - break; -#undef OPTSET - -#if CONFIG_FORCE_OUT_IFP - /* - * Apple private interface, similar to IP_BOUND_IF, except - * that the parameter is a NULL-terminated string containing - * the name of the network interface; an emptry string means - * unbind. Applications are encouraged to use IP_BOUND_IF - * instead, as that is the current "official" API. - */ - case IP_FORCE_OUT_IFP: { - char ifname[IFNAMSIZ]; - unsigned int ifscope; - - /* This option is settable only for IPv4 */ - if (!(inp->inp_vflag & INP_IPV4)) { - error = EINVAL; - break; - } - /* Verify interface name parameter is sane */ - if (sopt->sopt_valsize > sizeof (ifname)) { - error = EINVAL; + case IP_RECVTOS: + OPTSET(INP_RECVTOS); break; + #undef OPTSET } - - /* Copy the interface name */ - if (sopt->sopt_valsize != 0) { - error = sooptcopyin(sopt, ifname, - sizeof (ifname), sopt->sopt_valsize); - if (error) - break; - } - - if (sopt->sopt_valsize == 0 || ifname[0] == '\0') { - /* Unbind this socket from any interface */ - ifscope = IFSCOPE_NONE; - } else { - ifnet_t ifp; - - /* Verify name is NULL terminated */ - if (ifname[sopt->sopt_valsize - 1] != '\0') { - error = EINVAL; - break; - } - - /* Bail out if given bogus interface name */ - if (ifnet_find_by_name(ifname, &ifp) != 0) { - error = ENXIO; - break; - } - - /* Bind this socket to this interface */ - ifscope = ifp->if_index; - - /* - * Won't actually free; since we don't release - * this later, we should do it now. - */ - ifnet_release(ifp); - } - error = inp_bindif(inp, ifscope, NULL); - } - break; -#endif /* CONFIG_FORCE_OUT_IFP */ + break; /* * Multicast socket options are processed by the in_mcast * module. @@ -2597,7 +2592,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) int priv; struct mbuf *m; int optname; - + if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ @@ -2725,8 +2720,8 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_RECVTTL: case IP_PORTRANGE: case IP_RECVPKTINFO: + case IP_RECVTOS: switch (sopt->sopt_name) { - case IP_TOS: optval = inp->inp_ip_tos; break; @@ -2769,6 +2764,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_RECVPKTINFO: optval = OPTBIT(INP_PKTINFO); break; + + case IP_RECVTOS: + optval = OPTBIT(INP_RECVTOS); + break; } error = sooptcopyout(sopt, &optval, sizeof (optval)); break; @@ -2791,11 +2790,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) #if TRAFFIC_MGT case IP_TRAFFIC_MGT_BACKGROUND: { - unsigned background = (so->so_traffic_mgt_flags & - TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0; + unsigned background = (so->so_flags1 & + SOF1_TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0; return (sooptcopyout(sopt, &background, sizeof (background))); - break; } #endif /* TRAFFIC_MGT */ @@ -3109,7 +3107,7 @@ ip_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m, * interface itself is lo0, this will be overridden by if_loop. */ if (hwcksum_rx) { - copym->m_pkthdr.csum_flags &= ~CSUM_PARTIAL; + copym->m_pkthdr.csum_flags &= ~(CSUM_PARTIAL|CSUM_ZERO_INVERT); copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; copym->m_pkthdr.csum_data = 0xffff; @@ -3463,10 +3461,13 @@ ip_output_checksum(struct ifnet *ifp, struct mbuf *m, int hlen, int ip_len, /* * Partial checksum offload, if non-IP fragment, and TCP only * (no UDP support, as the hardware may not be able to convert - * +0 to -0 (0xffff) per RFC1122 4.1.3.4.) + * +0 to -0 (0xffff) per RFC1122 4.1.3.4. unless the interface + * supports "invert zero" capability.) */ if (hwcksum_tx && !tso && - (m->m_pkthdr.csum_flags & CSUM_TCP) && + ((m->m_pkthdr.csum_flags & CSUM_TCP) || + ((hwcap & CSUM_ZERO_INVERT) && + (m->m_pkthdr.csum_flags & CSUM_ZERO_INVERT))) && ip_len <= ifp->if_mtu) { uint16_t start = sizeof (struct ip); uint16_t ulpoff = m->m_pkthdr.csum_data & 0xffff; @@ -3572,4 +3573,3 @@ sysctl_ip_output_getperf SYSCTL_HANDLER_ARGS return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen))); } -