]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet/ip_output.c
xnu-4570.31.3.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
index 383751d4d07d559537beb48acd059ab80a1b5e6a..f5b51ac5251a76661d22467733c9cd2a2218578e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/kpi_ipfilter_var.h>
+#include <netinet/in_tclass.h>
 
 #if CONFIG_MACF_NET
 #include <security/mac_framework.h>
@@ -206,6 +207,10 @@ SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf_data,
        0, 0, sysctl_ip_output_getperf, "S,net_perf",
        "IP output performance data (struct net_perf, net/net_perf.h)");
 
+__private_extern__ int rfc6864 = 1;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, rfc6864, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &rfc6864, 0, "updated ip id field behavior");
+
 #define        IMO_TRACE_HIST_SIZE     32      /* size of trace history */
 
 /* For gdb */
@@ -345,9 +350,14 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt,
                uint32_t raw;
        } ipobf = { .raw = 0 };
 
+/*
+ * Here we check for restrictions when sending frames.
+ * N.B.: IPv4 over internal co-processor interfaces is not allowed.
+ */
 #define        IP_CHECK_RESTRICTIONS(_ifp, _ipobf)                             \
        (((_ipobf).nocell && IFNET_IS_CELLULAR(_ifp)) ||                \
         ((_ipobf).noexpensive && IFNET_IS_EXPENSIVE(_ifp)) ||          \
+          (IFNET_IS_INTCOPROC(_ifp)) ||                                        \
         (!(_ipobf).awdl_unrestricted && IFNET_IS_AWDL_RESTRICTED(_ifp)))
 
        if (ip_output_measure)
@@ -434,10 +444,10 @@ ipfw_tags_done:
                }
        }
 #endif /* IPSEC */
-       
+
        VERIFY(ro != NULL);
 
-       if (ip_doscopedroute && (flags & IP_OUTARGS)) {
+       if (flags & IP_OUTARGS) {
                /*
                 * In the forwarding case, only the ifscope value is used,
                 * as source interface selection doesn't take place.
@@ -484,7 +494,7 @@ ipfw_tags_done:
                adv->code = FADV_SUCCESS;
                ipoa->ipoa_retflags = 0;
        }
-       
+
 #if IPSEC
        if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) {
                so = ipsec_getsocket(m);
@@ -580,7 +590,12 @@ loopit:
        if (!(flags & (IP_FORWARDING|IP_RAWOUTPUT))) {
                ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
                ip->ip_off &= IP_DF;
-               ip->ip_id = ip_randomid();
+               if (rfc6864 && IP_OFF_IS_ATOMIC(ip->ip_off)) {
+                       // Per RFC6864, value of ip_id is undefined for atomic ip packets
+                       ip->ip_id = 0;
+               } else {
+                       ip->ip_id = ip_randomid();
+               }
                OSAddAtomic(1, &ipstat.ips_localout);
        } else {
                hlen = IP_VHL_HL(ip->ip_vhl) << 2;
@@ -657,6 +672,7 @@ loopit:
                        if (ia == NULL) {
                                OSAddAtomic(1, &ipstat.ips_noroute);
                                error = ENETUNREACH;
+                               /* XXX IPv6 APN fallback notification?? */
                                goto bad;
                        }
                }
@@ -704,11 +720,11 @@ loopit:
 
                        /*
                         * If the source address belongs to a restricted
-                        * interface and the caller forbids our using 
+                        * interface and the caller forbids our using
                         * interfaces of such type, pretend that there is no
                         * route.
                         */
-                       if (ia0 != NULL && 
+                       if (ia0 != NULL &&
                            IP_CHECK_RESTRICTIONS(ia0->ifa_ifp, ipobf)) {
                                IFA_REMREF(ia0);
                                ia0 = NULL;
@@ -801,7 +817,7 @@ loopit:
                                rtalloc_scoped_ign(ro, ign, ifscope);
 
                        /*
-                        * If the route points to a cellular/expensive interface 
+                        * If the route points to a cellular/expensive interface
                         * and the caller forbids our using interfaces of such type,
                         * pretend that there is no route.
                         */
@@ -894,7 +910,7 @@ loopit:
        if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
                struct ifnet *srcifp = NULL;
                struct in_multi *inm;
-               u_int32_t vif;
+               u_int32_t vif = 0;
                u_int8_t ttl = IP_DEFAULT_MULTICAST_TTL;
                u_int8_t loop = IP_DEFAULT_MULTICAST_LOOP;
 
@@ -1202,6 +1218,7 @@ sendit:
                                /* Check if the interface is allowed */
                                if (!necp_packet_is_allowed_over_interface(m, ifp)) {
                                        error = EHOSTUNREACH;
+                                       OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                                        goto bad;
                                }
                                goto skip_ipsec;
@@ -1209,6 +1226,7 @@ sendit:
                        case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
                                /* Flow divert packets should be blocked at the IP layer */
                                error = EHOSTUNREACH;
+                               OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                                goto bad;
                        case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
                                /* Verify that the packet is being routed to the tunnel */
@@ -1217,6 +1235,7 @@ sendit:
                                        /* Check if the interface is allowed */
                                        if (!necp_packet_is_allowed_over_interface(m, ifp)) {
                                                error = EHOSTUNREACH;
+                                               OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                                                goto bad;
                                        }
                                        goto skip_ipsec;
@@ -1225,6 +1244,7 @@ sendit:
                                                /* Check if the interface is allowed */
                                                if (!necp_packet_is_allowed_over_interface(m, policy_ifp)) {
                                                        error = EHOSTUNREACH;
+                                                       OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                                                        goto bad;
                                                }
 
@@ -1234,10 +1254,10 @@ sendit:
                                                goto skip_ipsec;
                                        } else {
                                                error = ENETUNREACH;
+                                               OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                                                goto bad;
                                        }
                                }
-                               break;
                        }
                        default:
                                break;
@@ -1246,6 +1266,7 @@ sendit:
        /* Catch-all to check if the interface is allowed */
        if (!necp_packet_is_allowed_over_interface(m, ifp)) {
                error = EHOSTUNREACH;
+               OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
                goto bad;
        }
 #endif /* NECP */
@@ -1320,7 +1341,7 @@ sendit:
        if (flags & IP_ROUTETOIF) {
                bzero(&ipsec_state.ro, sizeof (ipsec_state.ro));
        } else {
-               route_copyout(&ipsec_state.ro, ro, sizeof (ipsec_state.ro));
+               route_copyout((struct route *)&ipsec_state.ro, ro, sizeof (struct route));
        }
        ipsec_state.dst = SA(dst);
 
@@ -1343,6 +1364,11 @@ sendit:
            struct ip *, ip, struct ip6_hdr *, NULL);
 
        error = ipsec4_output(&ipsec_state, sp, flags);
+       if (ipsec_state.tunneled == 6) {
+               m0 = m = NULL;
+               error = 0;
+               goto bad;
+       }
 
        m0 = m = ipsec_state.m;
 
@@ -1363,10 +1389,10 @@ sendit:
                 */
                if (ipsec_state.tunneled) {
                        flags &= ~IP_ROUTETOIF;
-                       ro = &ipsec_state.ro;
+                       ro = (struct route *)&ipsec_state.ro;
                }
        } else {
-               ro = &ipsec_state.ro;
+               ro = (struct route *)&ipsec_state.ro;
        }
        dst = SIN(ipsec_state.dst);
        if (error) {
@@ -1696,7 +1722,7 @@ skip_ipsec:
                        ROUTE_RELEASE(ro_fwd);
                        bcopy(dst, &ro_fwd->ro_dst, sizeof (*dst));
 
-                       rtalloc_ign(ro_fwd, RTF_PRCLONING);
+                       rtalloc_ign(ro_fwd, RTF_PRCLONING, false);
 
                        if (ro_fwd->ro_rt == NULL) {
                                OSAddAtomic(1, &ipstat.ips_noroute);
@@ -1768,6 +1794,31 @@ pass:
                goto bad;
        }
 
+       if (ipoa != NULL) {
+               u_int8_t dscp = ip->ip_tos >> IPTOS_DSCP_SHIFT;
+
+               error = set_packet_qos(m, ifp,
+                   ipoa->ipoa_flags & IPOAF_QOSMARKING_ALLOWED ? TRUE : FALSE,
+                   ipoa->ipoa_sotc, ipoa->ipoa_netsvctype, &dscp);
+               if (error == 0) {
+                       ip->ip_tos &= IPTOS_ECN_MASK;
+                       ip->ip_tos |= dscp << IPTOS_DSCP_SHIFT;
+               } else {
+                       printf("%s if_dscp_for_mbuf() error %d\n", __func__, error);
+                       error = 0;
+               }
+       }
+
+       /*
+        * Some Wi-Fi AP implementations do not correctly handle multicast IP
+        * packets with DSCP bits set -- see radr://9331522 -- so as a
+        * workaround we clear the DSCP bits and set the service class to BE
+        */
+       if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) && IFNET_IS_WIFI_INFRA(ifp)) {
+               ip->ip_tos &= IPTOS_ECN_MASK;
+               mbuf_set_service_class(m, MBUF_SC_BE);
+       }
+
        ip_output_checksum(ifp, m, (IP_VHL_HL(ip->ip_vhl) << 2),
            ip->ip_len, &sw_csum);
 
@@ -2220,7 +2271,8 @@ in_finalize_cksum(struct mbuf *m, uint32_t hoff, uint32_t csum_flags)
                ip_out_cksum_stats(ip->ip_p, len);
 
                /* RFC1122 4.1.3.4 */
-               if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDP))
+               if (csum == 0 &&
+                   (m->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_ZERO_INVERT)))
                        csum = 0xffff;
 
                /* Insert the checksum in the ULP csum field */
@@ -2232,8 +2284,8 @@ in_finalize_cksum(struct mbuf *m, uint32_t hoff, uint32_t csum_flags)
                } else {
                        bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
                }
-               m->m_pkthdr.csum_flags &=
-                   ~(CSUM_DELAY_DATA | CSUM_DATA_VALID | CSUM_PARTIAL);
+               m->m_pkthdr.csum_flags &= ~(CSUM_DELAY_DATA | CSUM_DATA_VALID |
+                   CSUM_PARTIAL | CSUM_ZERO_INVERT);
        }
 
        if (sw_csum & CSUM_DELAY_IP) {
@@ -2413,8 +2465,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
                        m->m_len = sopt->sopt_valsize;
                        error = sooptcopyin(sopt, mtod(m, char *),
                            m->m_len, m->m_len);
-                       if (error)
+                       if (error) {
+                               m_freem(m);
                                break;
+                       }
 
                        return (ip_pcbopts(sopt->sopt_name,
                            &inp->inp_options, m));
@@ -2428,6 +2482,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
                case IP_RECVIF:
                case IP_RECVTTL:
                case IP_RECVPKTINFO:
+               case IP_RECVTOS:
                        error = sooptcopyin(sopt, &optval, sizeof (optval),
                            sizeof (optval));
                        if (error)
@@ -2470,73 +2525,13 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
                        case IP_RECVPKTINFO:
                                OPTSET(INP_PKTINFO);
                                break;
-                       }
-                       break;
-#undef OPTSET
-
-#if CONFIG_FORCE_OUT_IFP
-               /*
-                * Apple private interface, similar to IP_BOUND_IF, except
-                * that the parameter is a NULL-terminated string containing
-                * the name of the network interface; an emptry string means
-                * unbind.  Applications are encouraged to use IP_BOUND_IF
-                * instead, as that is the current "official" API.
-                */
-               case IP_FORCE_OUT_IFP: {
-                       char ifname[IFNAMSIZ];
-                       unsigned int ifscope;
-
-                       /* This option is settable only for IPv4 */
-                       if (!(inp->inp_vflag & INP_IPV4)) {
-                               error = EINVAL;
-                               break;
-                       }
 
-                       /* Verify interface name parameter is sane */
-                       if (sopt->sopt_valsize > sizeof (ifname)) {
-                               error = EINVAL;
+                       case IP_RECVTOS:
+                               OPTSET(INP_RECVTOS);
                                break;
+ #undef OPTSET
                        }
-
-                       /* Copy the interface name */
-                       if (sopt->sopt_valsize != 0) {
-                               error = sooptcopyin(sopt, ifname,
-                                   sizeof (ifname), sopt->sopt_valsize);
-                               if (error)
-                                       break;
-                       }
-
-                       if (sopt->sopt_valsize == 0 || ifname[0] == '\0') {
-                               /* Unbind this socket from any interface */
-                               ifscope = IFSCOPE_NONE;
-                       } else {
-                               ifnet_t ifp;
-
-                               /* Verify name is NULL terminated */
-                               if (ifname[sopt->sopt_valsize - 1] != '\0') {
-                                       error = EINVAL;
-                                       break;
-                               }
-
-                               /* Bail out if given bogus interface name */
-                               if (ifnet_find_by_name(ifname, &ifp) != 0) {
-                                       error = ENXIO;
-                                       break;
-                               }
-
-                               /* Bind this socket to this interface */
-                               ifscope = ifp->if_index;
-
-                               /*
-                                * Won't actually free; since we don't release
-                                * this later, we should do it now.
-                                */
-                               ifnet_release(ifp);
-                       }
-                       error = inp_bindif(inp, ifscope, NULL);
-               }
-               break;
-#endif /* CONFIG_FORCE_OUT_IFP */
+                       break;
                /*
                 * Multicast socket options are processed by the in_mcast
                 * module.
@@ -2597,7 +2592,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
                        int priv;
                        struct mbuf *m;
                        int optname;
-                       
+
                        if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
                                break;
                        if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
@@ -2725,8 +2720,8 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
                case IP_RECVTTL:
                case IP_PORTRANGE:
                case IP_RECVPKTINFO:
+               case IP_RECVTOS:
                        switch (sopt->sopt_name) {
-
                        case IP_TOS:
                                optval = inp->inp_ip_tos;
                                break;
@@ -2769,6 +2764,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
                        case IP_RECVPKTINFO:
                                optval = OPTBIT(INP_PKTINFO);
                                break;
+
+                       case IP_RECVTOS:
+                               optval = OPTBIT(INP_RECVTOS);
+                               break;
                        }
                        error = sooptcopyout(sopt, &optval, sizeof (optval));
                        break;
@@ -2791,11 +2790,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
 
 #if TRAFFIC_MGT
                case IP_TRAFFIC_MGT_BACKGROUND: {
-                       unsigned background = (so->so_traffic_mgt_flags &
-                           TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0;
+                       unsigned background = (so->so_flags1 &
+                           SOF1_TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0;
                        return (sooptcopyout(sopt, &background,
                            sizeof (background)));
-                       break;
                }
 #endif /* TRAFFIC_MGT */
 
@@ -3109,7 +3107,7 @@ ip_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m,
         * interface itself is lo0, this will be overridden by if_loop.
         */
        if (hwcksum_rx) {
-               copym->m_pkthdr.csum_flags &= ~CSUM_PARTIAL;
+               copym->m_pkthdr.csum_flags &= ~(CSUM_PARTIAL|CSUM_ZERO_INVERT);
                copym->m_pkthdr.csum_flags |=
                    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
                copym->m_pkthdr.csum_data = 0xffff;
@@ -3463,10 +3461,13 @@ ip_output_checksum(struct ifnet *ifp, struct mbuf *m, int hlen, int ip_len,
                /*
                 * Partial checksum offload, if non-IP fragment, and TCP only
                 * (no UDP support, as the hardware may not be able to convert
-                * +0 to -0 (0xffff) per RFC1122 4.1.3.4.)
+                * +0 to -0 (0xffff) per RFC1122 4.1.3.4. unless the interface
+                * supports "invert zero" capability.)
                 */
                if (hwcksum_tx && !tso &&
-                   (m->m_pkthdr.csum_flags & CSUM_TCP) &&
+                   ((m->m_pkthdr.csum_flags & CSUM_TCP) ||
+                   ((hwcap & CSUM_ZERO_INVERT) &&
+                   (m->m_pkthdr.csum_flags & CSUM_ZERO_INVERT))) &&
                    ip_len <= ifp->if_mtu) {
                        uint16_t start = sizeof (struct ip);
                        uint16_t ulpoff = m->m_pkthdr.csum_data & 0xffff;
@@ -3572,4 +3573,3 @@ sysctl_ip_output_getperf SYSCTL_HANDLER_ARGS
 
        return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen)));
 }
-