]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet/ip_output.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
index 9069bb8ba9f259e42b3e5ab38f0d080138916d02..35f778d25ca26a2e49698780e2e813e9b97f4f28 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <netinet/ip_var.h>
 #include <netinet/kpi_ipfilter_var.h>
 #include <netinet/in_tclass.h>
+#include <netinet/udp.h>
+
+#include <netinet6/nd6.h>
 
 #if CONFIG_MACF_NET
 #include <security/mac_framework.h>
@@ -207,6 +210,10 @@ SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf_data,
        0, 0, sysctl_ip_output_getperf, "S,net_perf",
        "IP output performance data (struct net_perf, net/net_perf.h)");
 
+__private_extern__ int rfc6864 = 1;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, rfc6864, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &rfc6864, 0, "updated ip id field behavior");
+
 #define        IMO_TRACE_HIST_SIZE     32      /* size of trace history */
 
 /* For gdb */
@@ -346,6 +353,8 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt,
                uint32_t raw;
        } ipobf = { .raw = 0 };
 
+       int interface_mtu = 0;
+
 /*
  * Here we check for restrictions when sending frames.
  * N.B.: IPv4 over internal co-processor interfaces is not allowed.
@@ -353,7 +362,7 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt,
 #define        IP_CHECK_RESTRICTIONS(_ifp, _ipobf)                             \
        (((_ipobf).nocell && IFNET_IS_CELLULAR(_ifp)) ||                \
         ((_ipobf).noexpensive && IFNET_IS_EXPENSIVE(_ifp)) ||          \
-          (IFNET_IS_INTCOPROC(_ifp)) ||                                        \
+         (IFNET_IS_INTCOPROC(_ifp)) ||                                 \
         (!(_ipobf).awdl_unrestricted && IFNET_IS_AWDL_RESTRICTED(_ifp)))
 
        if (ip_output_measure)
@@ -586,7 +595,12 @@ loopit:
        if (!(flags & (IP_FORWARDING|IP_RAWOUTPUT))) {
                ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
                ip->ip_off &= IP_DF;
-               ip->ip_id = ip_randomid();
+               if (rfc6864 && IP_OFF_IS_ATOMIC(ip->ip_off)) {
+                       // Per RFC6864, value of ip_id is undefined for atomic ip packets
+                       ip->ip_id = 0;
+               } else {
+                       ip->ip_id = ip_randomid();
+               }
                OSAddAtomic(1, &ipstat.ips_localout);
        } else {
                hlen = IP_VHL_HL(ip->ip_vhl) << 2;
@@ -901,7 +915,7 @@ loopit:
        if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
                struct ifnet *srcifp = NULL;
                struct in_multi *inm;
-               u_int32_t vif;
+               u_int32_t vif = 0;
                u_int8_t ttl = IP_DEFAULT_MULTICAST_TTL;
                u_int8_t loop = IP_DEFAULT_MULTICAST_LOOP;
 
@@ -1209,6 +1223,7 @@ sendit:
                                /* Check if the interface is allowed */
                                if (!necp_packet_is_allowed_over_interface(m, ifp)) {
                                        error = EHOSTUNREACH;
+                                       OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                                        goto bad;
                                }
                                goto skip_ipsec;
@@ -1216,6 +1231,7 @@ sendit:
                        case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
                                /* Flow divert packets should be blocked at the IP layer */
                                error = EHOSTUNREACH;
+                               OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                                goto bad;
                        case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
                                /* Verify that the packet is being routed to the tunnel */
@@ -1224,6 +1240,7 @@ sendit:
                                        /* Check if the interface is allowed */
                                        if (!necp_packet_is_allowed_over_interface(m, ifp)) {
                                                error = EHOSTUNREACH;
+                                               OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                                                goto bad;
                                        }
                                        goto skip_ipsec;
@@ -1232,6 +1249,7 @@ sendit:
                                                /* Check if the interface is allowed */
                                                if (!necp_packet_is_allowed_over_interface(m, policy_ifp)) {
                                                        error = EHOSTUNREACH;
+                                                       OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                                                        goto bad;
                                                }
 
@@ -1241,6 +1259,7 @@ sendit:
                                                goto skip_ipsec;
                                        } else {
                                                error = ENETUNREACH;
+                                               OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                                                goto bad;
                                        }
                                }
@@ -1252,6 +1271,7 @@ sendit:
        /* Catch-all to check if the interface is allowed */
        if (!necp_packet_is_allowed_over_interface(m, ifp)) {
                error = EHOSTUNREACH;
+               OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                goto bad;
        }
 #endif /* NECP */
@@ -1326,7 +1346,7 @@ sendit:
        if (flags & IP_ROUTETOIF) {
                bzero(&ipsec_state.ro, sizeof (ipsec_state.ro));
        } else {
-               route_copyout(&ipsec_state.ro, ro, sizeof (ipsec_state.ro));
+               route_copyout((struct route *)&ipsec_state.ro, ro, sizeof (struct route));
        }
        ipsec_state.dst = SA(dst);
 
@@ -1374,10 +1394,10 @@ sendit:
                 */
                if (ipsec_state.tunneled) {
                        flags &= ~IP_ROUTETOIF;
-                       ro = &ipsec_state.ro;
+                       ro = (struct route *)&ipsec_state.ro;
                }
        } else {
-               ro = &ipsec_state.ro;
+               ro = (struct route *)&ipsec_state.ro;
        }
        dst = SIN(ipsec_state.dst);
        if (error) {
@@ -1807,11 +1827,19 @@ pass:
        ip_output_checksum(ifp, m, (IP_VHL_HL(ip->ip_vhl) << 2),
            ip->ip_len, &sw_csum);
 
+       interface_mtu = ifp->if_mtu;
+
+       if (INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
+               interface_mtu = IN6_LINKMTU(ifp);
+               /* Further adjust the size for CLAT46 expansion */
+               interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
+       }
+
        /*
         * If small enough for interface, or the interface will take
         * care of the fragmentation for us, can just send directly.
         */
-       if ((u_short)ip->ip_len <= ifp->if_mtu || TSO_IPV4_OK(ifp, m) ||
+       if ((u_short)ip->ip_len <= interface_mtu || TSO_IPV4_OK(ifp, m) ||
            (!(ip->ip_off & IP_DF) && (ifp->if_hwassist & CSUM_FRAGMENT))) {
 #if BYTE_ORDER != BIG_ENDIAN
                HTONS(ip->ip_len);
@@ -1884,6 +1912,8 @@ sendchain:
                        goto loopit;
                }
        }
+
+       VERIFY(interface_mtu != 0);
        /*
         * Too large for interface; fragment if possible.
         * Must be able to put at least 8 bytes per fragment.
@@ -1903,8 +1933,8 @@ sendchain:
                        RT_LOCK_SPIN(ro->ro_rt);
                        if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
                            !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) &&
-                           (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
-                               ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
+                           (ro->ro_rt->rt_rmx.rmx_mtu > interface_mtu)) {
+                               ro->ro_rt->rt_rmx.rmx_mtu = interface_mtu;
                        }
                        RT_UNLOCK(ro->ro_rt);
                }
@@ -1915,7 +1945,46 @@ sendchain:
                goto bad;
        }
 
-       error = ip_fragment(m, ifp, ifp->if_mtu, sw_csum);
+       /*
+        * XXX Only TCP seems to be passing a list of packets here.
+        * The following issue is limited to UDP datagrams with 0 checksum.
+        * For now limit it to the case when single packet is passed down.
+        */
+       if (packetchain == 0 && IS_INTF_CLAT46(ifp)) {
+               /*
+                * If it is a UDP packet that has checksum set to 0
+                * and is also not being offloaded, compute a full checksum
+                * and update the UDP checksum.
+                */
+               if (ip->ip_p == IPPROTO_UDP &&
+                   !(m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_PARTIAL))) {
+                       struct udphdr *uh = NULL;
+
+                       if (m->m_len < hlen + sizeof (struct udphdr)) {
+                               m = m_pullup(m, hlen + sizeof (struct udphdr));
+                               if (m == NULL) {
+                                       error = ENOBUFS;
+                                       m0 = m;
+                                       goto bad;
+                               }
+                               m0 = m;
+                               ip = mtod(m, struct ip *);
+                       }
+                       /*
+                        * Get UDP header and if checksum is 0, then compute the full
+                        * checksum.
+                        */
+                       uh = (struct udphdr *)(void *)((caddr_t)ip + hlen);
+                       if (uh->uh_sum == 0) {
+                               uh->uh_sum = inet_cksum(m, IPPROTO_UDP, hlen,
+                                   ip->ip_len - hlen);
+                               if (uh->uh_sum == 0)
+                                       uh->uh_sum = 0xffff;
+                       }
+               }
+       }
+
+       error = ip_fragment(m, ifp, interface_mtu, sw_csum);
        if (error != 0) {
                m0 = m = NULL;
                goto bad;
@@ -2014,6 +2083,16 @@ ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum)
        hlen = ip->ip_hl << 2;
 #endif /* !_IP_VHL */
 
+#ifdef INET6
+       /*
+        * We need to adjust the fragment sizes to account
+        * for IPv6 fragment header if it needs to be translated
+        * from IPv4 to IPv6.
+        */
+       if (IS_INTF_CLAT46(ifp))
+               mtu -= sizeof(struct ip6_frag);
+
+#endif
        firstlen = len = (mtu - hlen) &~ 7;
        if (len < 8) {
                m_freem(m);
@@ -2256,7 +2335,8 @@ in_finalize_cksum(struct mbuf *m, uint32_t hoff, uint32_t csum_flags)
                ip_out_cksum_stats(ip->ip_p, len);
 
                /* RFC1122 4.1.3.4 */
-               if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDP))
+               if (csum == 0 &&
+                   (m->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_ZERO_INVERT)))
                        csum = 0xffff;
 
                /* Insert the checksum in the ULP csum field */
@@ -2268,8 +2348,8 @@ in_finalize_cksum(struct mbuf *m, uint32_t hoff, uint32_t csum_flags)
                } else {
                        bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
                }
-               m->m_pkthdr.csum_flags &=
-                   ~(CSUM_DELAY_DATA | CSUM_DATA_VALID | CSUM_PARTIAL);
+               m->m_pkthdr.csum_flags &= ~(CSUM_DELAY_DATA | CSUM_DATA_VALID |
+                   CSUM_PARTIAL | CSUM_ZERO_INVERT);
        }
 
        if (sw_csum & CSUM_DELAY_IP) {
@@ -2449,8 +2529,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
                        m->m_len = sopt->sopt_valsize;
                        error = sooptcopyin(sopt, mtod(m, char *),
                            m->m_len, m->m_len);
-                       if (error)
+                       if (error) {
+                               m_freem(m);
                                break;
+                       }
 
                        return (ip_pcbopts(sopt->sopt_name,
                            &inp->inp_options, m));
@@ -3089,7 +3171,7 @@ ip_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m,
         * interface itself is lo0, this will be overridden by if_loop.
         */
        if (hwcksum_rx) {
-               copym->m_pkthdr.csum_flags &= ~CSUM_PARTIAL;
+               copym->m_pkthdr.csum_flags &= ~(CSUM_PARTIAL|CSUM_ZERO_INVERT);
                copym->m_pkthdr.csum_flags |=
                    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
                copym->m_pkthdr.csum_data = 0xffff;
@@ -3417,6 +3499,19 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
        return (ifa);
 }
 
+/*
+ * @brief      Given outgoing interface it determines what checksum needs
+ *     to be computed in software and what needs to be offloaded to the
+ *     interface.
+ *
+ * @param      ifp Pointer to the outgoing interface
+ * @param      m Pointer to the packet
+ * @param      hlen IP header length
+ * @param      ip_len Total packet size i.e. headers + data payload
+ * @param      sw_csum Pointer to a software checksum flag set
+ *
+ * @return     void
+ */
 void
 ip_output_checksum(struct ifnet *ifp, struct mbuf *m, int hlen, int ip_len,
     uint32_t *sw_csum)
@@ -3440,14 +3535,25 @@ ip_output_checksum(struct ifnet *ifp, struct mbuf *m, int hlen, int ip_len,
                *sw_csum |= ((CSUM_DELAY_DATA | CSUM_DELAY_IP) &
                    m->m_pkthdr.csum_flags);
        } else if (!(*sw_csum & CSUM_DELAY_DATA) && (hwcap & CSUM_PARTIAL)) {
+               int interface_mtu = ifp->if_mtu;
+
+               if (INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
+                       interface_mtu = IN6_LINKMTU(ifp);
+                       /* Further adjust the size for CLAT46 expansion */
+                       interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
+               }
+
                /*
                 * Partial checksum offload, if non-IP fragment, and TCP only
                 * (no UDP support, as the hardware may not be able to convert
-                * +0 to -0 (0xffff) per RFC1122 4.1.3.4.)
+                * +0 to -0 (0xffff) per RFC1122 4.1.3.4. unless the interface
+                * supports "invert zero" capability.)
                 */
                if (hwcksum_tx && !tso &&
-                   (m->m_pkthdr.csum_flags & CSUM_TCP) &&
-                   ip_len <= ifp->if_mtu) {
+                   ((m->m_pkthdr.csum_flags & CSUM_TCP) ||
+                   ((hwcap & CSUM_ZERO_INVERT) &&
+                   (m->m_pkthdr.csum_flags & CSUM_ZERO_INVERT))) &&
+                   ip_len <= interface_mtu) {
                        uint16_t start = sizeof (struct ip);
                        uint16_t ulpoff = m->m_pkthdr.csum_data & 0xffff;
                        m->m_pkthdr.csum_flags |=