X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/ab86ba338a07a58a89f50cf7066a0f0e487ac0cc..4a3eedf9ecc9bbe3f3a5c6ce5e53ad199d639d32:/bsd/netinet/ip_output.c diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index ec096591d..db39fe174 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -1,16 +1,19 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER @@ -20,7 +23,7 @@ * Please see the License for the specific language governing rights and * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 @@ -57,6 +60,12 @@ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $ */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ #define _IP_VHL @@ -68,6 +77,8 @@ #include #include #include +#include +#include #include #include @@ -79,26 +90,24 @@ #include #include +#include + +#if CONFIG_MACF_NET +#include +#endif + #include "faith.h" #include #include +#include #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1) #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3) #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1) #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1) - -#if vax -#include -#endif - -#if __FreeBSD__ -#include - -static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); -#endif +#define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8)) #if IPSEC #include @@ -111,6 +120,7 @@ static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); #endif /*IPSEC*/ #include +#include #if DUMMYNET #include @@ -123,42 +133,48 @@ static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); (ntohl(a.s_addr))&0xFF); #endif + u_short ip_id; -static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); -static struct ifnet *ip_multicast_if __P((struct in_addr *, int *)); -static void ip_mloopback - __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int)); -static int ip_getmoptions - __P((struct sockopt *, struct ip_moptions *)); -static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *)); -static int ip_setmoptions - __P((struct sockopt *, struct ip_moptions **)); +static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); +static struct ifnet *ip_multicast_if(struct in_addr *, int *); +static void ip_mloopback(struct ifnet *, struct mbuf *, + struct sockaddr_in *, int); +static int ip_getmoptions(struct sockopt *, struct ip_moptions *); +static int ip_pcbopts(int, struct mbuf **, struct mbuf *); +static int ip_setmoptions(struct sockopt *, struct ip_moptions **); + +static void ip_out_cksum_stats(int, u_int32_t); int ip_createmoptions(struct ip_moptions **imop); int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq); int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq); -int ip_optcopy __P((struct ip *, struct ip *)); -extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **)); -#ifdef __APPLE__ -extern struct mbuf* m_dup(register struct mbuf *m, int how); -#endif +int ip_optcopy(struct ip *, struct ip *); +void in_delayed_cksum_offset(struct mbuf *, int ); +void in_cksum_offset(struct mbuf* , size_t ); -static u_long lo_dl_tag = 0; +extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **); -void in_delayed_cksum(struct mbuf *m); -extern int apple_hwcksum_tx; extern u_long route_generation; extern struct protosw inetsw[]; extern struct ip_linklocal_stat ip_linklocal_stat; +extern lck_mtx_t *ip_mutex; /* temporary: for testing */ #if IPSEC extern int ipsec_bypass; #endif +static int ip_maxchainsent = 0; +SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW, + &ip_maxchainsent, 0, "use dlil_output_list"); +#if DEBUG +static int forge_ce = 0; +SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW, + &forge_ce, 0, "Forge ECN CE"); +#endif /* DEBUG */ /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). @@ -166,95 +182,188 @@ extern int ipsec_bypass; * The mbuf opt, if present, will not be freed. */ int -ip_output(m0, opt, ro, flags, imo) - struct mbuf *m0; - struct mbuf *opt; - struct route *ro; - int flags; - struct ip_moptions *imo; +ip_output( + struct mbuf *m0, + struct mbuf *opt, + struct route *ro, + int flags, + struct ip_moptions *imo, + struct ifnet *ifp) +{ + int error; + error = ip_output_list(m0, 0, opt, ro, flags, imo, ifp); + return error; +} + +/* + * Returns: 0 Success + * ENOMEM + * EADDRNOTAVAIL + * ENETUNREACH + * EHOSTUNREACH + * EACCES + * EMSGSIZE + * ENOBUFS + * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified] + * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified] + * key_spdacquire:??? [IPSEC] + * ipsec4_output:??? [IPSEC] + * :??? [firewall] + * ip_dn_io_ptr:??? [dummynet] + * dlil_output:??? [DLIL] + * dlil_output_list:??? [DLIL] + * + * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are + * only used as the error return from this function where one of + * these functions fails to return a policy. + */ +int +ip_output_list( + struct mbuf *m0, + int packetchain, + struct mbuf *opt, + struct route *ro, + int flags, + struct ip_moptions *imo, +#if CONFIG_FORCE_OUT_IFP + struct ifnet *pdp_ifp +#else + __unused struct ifnet *unused_ifp +#endif + ) { struct ip *ip, *mhip; struct ifnet *ifp = NULL; struct mbuf *m = m0; int hlen = sizeof (struct ip); - int len, off, error = 0; + int len = 0, off, error = 0; struct sockaddr_in *dst = NULL; struct in_ifaddr *ia = NULL; int isbroadcast, sw_csum; + struct in_addr pkt_dst; #if IPSEC struct route iproute; struct socket *so = NULL; struct secpolicy *sp = NULL; #endif - u_int16_t divert_cookie; /* firewall cookie */ #if IPFIREWALL_FORWARD int fwd_rewrite_src = 0; #endif - struct ip_fw_chain *rule = NULL; - -#if IPDIVERT - /* Get and reset firewall cookie */ - divert_cookie = ip_divert_cookie; - ip_divert_cookie = 0; -#else - divert_cookie = 0; +#if IPFIREWALL + struct ip_fw_args args; #endif + int didfilter = 0; + ipfilter_t inject_filter_ref = 0; + struct m_tag *tag; + struct route saved_route; + struct mbuf * packetlist; + int pktcnt = 0; + KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); -#if IPFIREWALL && DUMMYNET - /* - * dummynet packet are prepended a vestigial mbuf with - * m_type = MT_DUMMYNET and m_data pointing to the matching - * rule. - */ - if (m->m_type == MT_DUMMYNET) { - /* - * the packet was already tagged, so part of the - * processing was already done, and we need to go down. - * Get parameters from the header. - */ - rule = (struct ip_fw_chain *)(m->m_data) ; - opt = NULL ; - ro = & ( ((struct dn_pkt *)m)->ro ) ; - imo = NULL ; - dst = ((struct dn_pkt *)m)->dn_dst ; - ifp = ((struct dn_pkt *)m)->ifp ; - flags = ((struct dn_pkt *)m)->flags; - m0 = m = m->m_next ; -#if IPSEC - if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { - so = ipsec_getsocket(m); - (void)ipsec_setsocket(m, NULL); - } + packetlist = m0; +#if IPFIREWALL + args.next_hop = NULL; + args.eh = NULL; + args.rule = NULL; + args.divert_rule = 0; /* divert cookie */ + + /* Grab info from mtags prepended to the chain */ +#if DUMMYNET + if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { + struct dn_pkt_tag *dn_tag; + + dn_tag = (struct dn_pkt_tag *)(tag+1); + args.rule = dn_tag->rule; + opt = NULL; + saved_route = dn_tag->ro; + ro = &saved_route; + + imo = NULL; + dst = dn_tag->dn_dst; + ifp = dn_tag->ifp; + flags = dn_tag->flags; + + m_tag_delete(m0, tag); + } +#endif /* DUMMYNET */ + +#if IPDIVERT + if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) { + struct divert_tag *div_tag; + + div_tag = (struct divert_tag *)(tag+1); + args.divert_rule = div_tag->cookie; + + m_tag_delete(m0, tag); + } +#endif /* IPDIVERT */ + + if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) { + struct ip_fwd_tag *ipfwd_tag; + + ipfwd_tag = (struct ip_fwd_tag *)(tag+1); + args.next_hop = ipfwd_tag->next_hop; + + m_tag_delete(m0, tag); + } +#endif /* IPFIREWALL */ + + m = m0; + +#if DIAGNOSTIC + if ( !m || (m->m_flags & M_PKTHDR) != 0) + panic("ip_output no HDR"); + if (!ro) + panic("ip_output no route, proto = %d", + mtod(m, struct ip *)->ip_p); #endif + +#if IPFIREWALL + if (args.rule != NULL) { /* dummynet already saw us */ ip = mtod(m, struct ip *); hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; - if (ro->ro_rt != NULL) + lck_mtx_lock(rt_mtx); + if (ro->ro_rt != NULL) ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa; - goto sendit; - } else - rule = NULL ; + if (ia) + ifaref(&ia->ia_ifa); + lck_mtx_unlock(rt_mtx); +#if IPSEC + if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { + so = ipsec_getsocket(m); + (void)ipsec_setsocket(m, NULL); + } #endif + goto sendit; + } +#endif /* IPFIREWALL */ + #if IPSEC if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { so = ipsec_getsocket(m); (void)ipsec_setsocket(m, NULL); } #endif +loopit: + /* + * No need to proccess packet twice if we've + * already seen it + */ + inject_filter_ref = ipf_get_inject_filter(m); -#if DIAGNOSTIC - if ((m->m_flags & M_PKTHDR) == 0) - panic("ip_output no HDR"); - if (!ro) - panic("ip_output no route, proto = %d", - mtod(m, struct ip *)->ip_p); -#endif if (opt) { m = ip_insertoptions(m, opt, &len); hlen = len; } ip = mtod(m, struct ip *); +#if IPFIREWALL + pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; +#else + pkt_dst = ip->ip_dst; +#endif + /* * Fill in IP header. */ @@ -266,14 +375,24 @@ ip_output(m0, opt, ro, flags, imo) #else ip->ip_id = htons(ip_id++); #endif - ipstat.ips_localout++; + OSAddAtomic(1, (SInt32*)&ipstat.ips_localout); } else { hlen = IP_VHL_HL(ip->ip_vhl) << 2; } + +#if DEBUG + /* For debugging, we let the stack forge congestion */ + if (forge_ce != 0 && + ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 || + (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) { + ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE; + forge_ce--; + } +#endif /* DEBUG */ KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr, ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); - + dst = (struct sockaddr_in *)&ro->ro_dst; /* @@ -284,23 +403,30 @@ ip_output(m0, opt, ro, flags, imo) * cache with IPv6. */ - if (ro->ro_rt && (ro->ro_rt->generation_id != route_generation) && - ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) && (ip->ip_src.s_addr != INADDR_ANY) && - (ifa_foraddr(ip->ip_src.s_addr) == NULL)) { - error = EADDRNOTAVAIL; - goto bad; - } - if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || - dst->sin_family != AF_INET || - dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { - rtfree(ro->ro_rt); - ro->ro_rt = (struct rtentry *)0; + lck_mtx_lock(rt_mtx); + if (ro->ro_rt != NULL) { + if (ro->ro_rt->generation_id != route_generation && + ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) && + (ip->ip_src.s_addr != INADDR_ANY) && + (ifa_foraddr(ip->ip_src.s_addr) == 0)) { + error = EADDRNOTAVAIL; + lck_mtx_unlock(rt_mtx); + goto bad; + } + if ((ro->ro_rt->rt_flags & RTF_UP) == 0 || + dst->sin_family != AF_INET || + dst->sin_addr.s_addr != pkt_dst.s_addr) { + rtfree_locked(ro->ro_rt); + ro->ro_rt = NULL; + } + if (ro->ro_rt && ro->ro_rt->generation_id != route_generation) + ro->ro_rt->generation_id = route_generation; } - if (ro->ro_rt == 0) { + if (ro->ro_rt == NULL) { bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); - dst->sin_addr = ip->ip_dst; + dst->sin_addr = pkt_dst; } /* * If routing to interface only, @@ -309,16 +435,35 @@ ip_output(m0, opt, ro, flags, imo) #define ifatoia(ifa) ((struct in_ifaddr *)(ifa)) #define sintosa(sin) ((struct sockaddr *)(sin)) if (flags & IP_ROUTETOIF) { - if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && - (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { - ipstat.ips_noroute++; - error = ENETUNREACH; - goto bad; + if (ia) + ifafree(&ia->ia_ifa); + if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) { + if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { + OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute); + error = ENETUNREACH; + lck_mtx_unlock(rt_mtx); + goto bad; + } } ifp = ia->ia_ifp; ip->ip_ttl = 1; isbroadcast = in_broadcast(dst->sin_addr, ifp); } else { + +#if CONFIG_FORCE_OUT_IFP + /* Check if this packet should be forced out a specific interface */ + if (ro->ro_rt == 0 && pdp_ifp != NULL) { + pdp_context_route_locked(pdp_ifp, ro); + + if (ro->ro_rt == NULL) { + OSAddAtomic(1, (UInt32*)&ipstat.ips_noroute); + error = EHOSTUNREACH; + lck_mtx_unlock(rt_mtx); + goto bad; + } + } +#endif + /* * If this is the case, we probably don't want to allocate * a protocol-cloned route since we didn't get one from the @@ -328,14 +473,42 @@ ip_output(m0, opt, ro, flags, imo) * the link layer, as this is probably required in all cases * for correct operation (as it is for ARP). */ - if (ro->ro_rt == 0) - rtalloc_ign(ro, RTF_PRCLONING); + if (ro->ro_rt == 0) { - ipstat.ips_noroute++; + unsigned long ign = RTF_PRCLONING; + /* + * We make an exception here: if the destination + * address is INADDR_BROADCAST, allocate a protocol- + * cloned host route so that we end up with a route + * marked with the RTF_BROADCAST flag. Otherwise, + * we would end up referring to the default route, + * instead of creating a cloned host route entry. + * That would introduce inconsistencies between ULPs + * that allocate a route and those that don't. The + * RTF_BROADCAST route is important since we'd want + * to send out undirected IP broadcast packets using + * link-level broadcast address. + * + * This exception will no longer be necessary when + * the RTF_PRCLONING scheme is no longer present. + */ + if (dst->sin_addr.s_addr == INADDR_BROADCAST) + ign &= ~RTF_PRCLONING; + + rtalloc_ign_locked(ro, ign); + } + if (ro->ro_rt == 0) { + OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute); error = EHOSTUNREACH; + lck_mtx_unlock(rt_mtx); goto bad; } + + if (ia) + ifafree(&ia->ia_ifa); ia = ifatoia(ro->ro_rt->rt_ifa); + if (ia) + ifaref(&ia->ia_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) @@ -345,7 +518,8 @@ ip_output(m0, opt, ro, flags, imo) else isbroadcast = in_broadcast(dst->sin_addr, ifp); } - if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { + lck_mtx_unlock(rt_mtx); + if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { struct in_multi *inm; m->m_flags |= M_MCAST; @@ -363,10 +537,12 @@ ip_output(m0, opt, ro, flags, imo) if (imo->imo_multicast_ifp != NULL) { ifp = imo->imo_multicast_ifp; } +#if MROUTING if (imo->imo_multicast_vif != -1 && ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY)) ip->ip_src.s_addr = ip_mcast_src(imo->imo_multicast_vif); +#endif /* MROUTING */ } else if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; /* @@ -374,7 +550,7 @@ ip_output(m0, opt, ro, flags, imo) */ if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { if ((ifp->if_flags & IFF_MULTICAST) == 0) { - ipstat.ips_noroute++; + OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute); error = ENETUNREACH; goto bad; } @@ -385,20 +561,23 @@ ip_output(m0, opt, ro, flags, imo) */ if (ip->ip_src.s_addr == INADDR_ANY) { register struct in_ifaddr *ia1; - + lck_mtx_lock(rt_mtx); TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link) if (ia1->ia_ifp == ifp) { ip->ip_src = IA_SIN(ia1)->sin_addr; break; } + lck_mtx_unlock(rt_mtx); if (ip->ip_src.s_addr == INADDR_ANY) { error = ENETUNREACH; goto bad; } } - IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); + ifnet_lock_shared(ifp); + IN_LOOKUP_MULTI(pkt_dst, ifp, inm); + ifnet_lock_done(ifp); if (inm != NULL && (imo == NULL || imo->imo_multicast_loop)) { /* @@ -406,8 +585,53 @@ ip_output(m0, opt, ro, flags, imo) * on the outgoing interface, and the caller did not * forbid loopback, loop back a copy. */ + if (!TAILQ_EMPTY(&ipv4_filters)) { + struct ipfilter *filter; + int seen = (inject_filter_ref == 0); + struct ipf_pktopts *ippo = 0, ipf_pktopts; + + if (imo) { + ippo = &ipf_pktopts; + ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp; + ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl; + ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop; + } + + ipf_ref(); + + /* 4135317 - always pass network byte order to filter */ + HTONS(ip->ip_len); + HTONS(ip->ip_off); + + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { + if (seen == 0) { + if ((struct ipfilter *)inject_filter_ref == filter) + seen = 1; + } else if (filter->ipf_filter.ipf_output) { + errno_t result; + result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo); + if (result == EJUSTRETURN) { + ipf_unref(); + goto done; + } + if (result != 0) { + ipf_unref(); + goto bad; + } + } + } + + /* set back to host byte order */ + ip = mtod(m, struct ip *); + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + + ipf_unref(); + didfilter = 1; + } ip_mloopback(ifp, m, dst, hlen); } +#if MROUTING else { /* * If we are acting as a multicast router, perform @@ -436,6 +660,7 @@ ip_output(m0, opt, ro, flags, imo) } } } +#endif /* MROUTING */ /* * Multicasts with a time-to-live of zero may be looped- @@ -469,15 +694,6 @@ ip_output(m0, opt, ro, flags, imo) #endif /* IPFIREWALL_FORWARD */ } #endif /* notdef */ - /* - * Verify that we have any chance at all of being able to queue - * the packet or packet fragments - */ - if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= - ifp->if_snd.ifq_maxlen) { - error = ENOBUFS; - goto bad; - } /* * Look for broadcast address and @@ -515,6 +731,42 @@ sendit: } } + if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) { + struct ipfilter *filter; + int seen = (inject_filter_ref == 0); + + ipf_ref(); + + /* 4135317 - always pass network byte order to filter */ + HTONS(ip->ip_len); + HTONS(ip->ip_off); + + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { + if (seen == 0) { + if ((struct ipfilter *)inject_filter_ref == filter) + seen = 1; + } else if (filter->ipf_filter.ipf_output) { + errno_t result; + result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0); + if (result == EJUSTRETURN) { + ipf_unref(); + goto done; + } + if (result != 0) { + ipf_unref(); + goto bad; + } + } + } + + /* set back to host byte order */ + ip = mtod(m, struct ip *); + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + + ipf_unref(); + } + #if IPSEC /* temporary for testing only: bypass ipsec alltogether */ @@ -523,6 +775,7 @@ sendit: KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); + /* get SP for this packet */ if (so == NULL) sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); @@ -530,7 +783,7 @@ sendit: sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); if (sp == NULL) { - ipsecstat.out_inval++; + IPSEC_STAT_INCREMENT(ipsecstat.out_inval); KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); goto bad; } @@ -540,10 +793,11 @@ sendit: /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: + case IPSEC_POLICY_GENERATE: /* * This packet is just discarded. */ - ipsecstat.out_polvio++; + IPSEC_STAT_INCREMENT(ipsecstat.out_polvio); KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0); goto bad; @@ -592,7 +846,7 @@ sendit: HTONS(ip->ip_off); error = ipsec4_output(&state, sp, flags); - + m0 = m = state.m; if (flags & IP_ROUTETOIF) { @@ -641,13 +895,15 @@ sendit: #endif /* Check that there wasn't a route change and src is still valid */ - if (ro->ro_rt->generation_id != route_generation) { - if (ifa_foraddr(ip->ip_src.s_addr) == NULL && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) { + lck_mtx_lock(rt_mtx); + if (ro->ro_rt && ro->ro_rt->generation_id != route_generation) { + if (ifa_foraddr(ip->ip_src.s_addr) == 0 && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) { error = EADDRNOTAVAIL; + lck_mtx_unlock(rt_mtx); KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 5,0,0,0,0); goto bad; } - rtfree(ro->ro_rt); + rtfree_locked(ro->ro_rt); ro->ro_rt = NULL; } @@ -656,21 +912,61 @@ sendit: printf("ip_output: " "can't update route after IPsec processing\n"); error = EHOSTUNREACH; /*XXX*/ + lck_mtx_unlock(rt_mtx); KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 6,0,0,0,0); goto bad; } } else { + if (ia) + ifafree(&ia->ia_ifa); ia = ifatoia(ro->ro_rt->rt_ifa); + if (ia) + ifaref(&ia->ia_ifa); ifp = ro->ro_rt->rt_ifp; } + lck_mtx_unlock(rt_mtx); /* make it flipped, again. */ NTOHS(ip->ip_len); NTOHS(ip->ip_off); KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff); + + /* Pass to filters again */ + if (!TAILQ_EMPTY(&ipv4_filters)) { + struct ipfilter *filter; + + ipf_ref(); + + /* 4135317 - always pass network byte order to filter */ + HTONS(ip->ip_len); + HTONS(ip->ip_off); + + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { + if (filter->ipf_filter.ipf_output) { + errno_t result; + result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0); + if (result == EJUSTRETURN) { + ipf_unref(); + goto done; + } + if (result != 0) { + ipf_unref(); + goto bad; + } + } + } + + /* set back to host byte order */ + ip = mtod(m, struct ip *); + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + + ipf_unref(); + } skip_ipsec: #endif /*IPSEC*/ +#if IPFIREWALL /* * IpHack's section. * - Xlate: translate packet's addr/port (NAT). @@ -681,19 +977,26 @@ skip_ipsec: if (fr_checkp) { struct mbuf *m1 = m; - if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) + if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) { goto done; + } ip = mtod(m0 = m = m1, struct ip *); } /* * Check with the firewall... + * but not if we are already being fwd'd from a firewall. */ - if (fw_enable && ip_fw_chk_ptr) { + if (fw_enable && IPFW_LOADED && !args.next_hop) { struct sockaddr_in *old = dst; - off = (*ip_fw_chk_ptr)(&ip, - hlen, ifp, &divert_cookie, &m, &rule, &dst); + args.m = m; + args.next_hop = dst; + args.oif = ifp; + off = ip_fw_chk_ptr(&args); + m = args.m; + dst = args.next_hop; + /* * On return we must do the following: * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new) @@ -716,10 +1019,12 @@ skip_ipsec: goto done ; } ip = mtod(m, struct ip *); - if (off == 0 && dst == old) /* common case */ + + if (off == 0 && dst == old) {/* common case */ goto pass ; + } #if DUMMYNET - if ((off & IP_FW_PORT_DYNT_FLAG) != 0) { + if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { /* * pass the pkt to dummynet. Need to include * pipe number, m, ifp, ro, dst because these are @@ -729,11 +1034,15 @@ skip_ipsec: * XXX note: if the ifp or ro entry are deleted * while a pkt is in dummynet, we are in trouble! */ - error = dummynet_io(off & 0xffff, DN_TO_IP_OUT, m, - ifp,ro,dst,rule, flags); + args.ro = ro; + args.dst = dst; + args.flags = flags; + + error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, + &args); goto done; } -#endif +#endif /* DUMMYNET */ #if IPDIVERT if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { struct mbuf *clone = NULL; @@ -756,8 +1065,7 @@ skip_ipsec: HTONS(ip->ip_off); /* Deliver packet to divert input routine */ - ip_divert_cookie = divert_cookie; - divert_packet(m, 0, off & 0xffff); + divert_packet(m, 0, off & 0xffff, args.divert_rule); /* If 'tee', continue with original packet */ if (clone != NULL) { @@ -780,7 +1088,7 @@ skip_ipsec: * And I'm babbling. */ if (off == 0 && old != dst) { - struct in_ifaddr *ia; + struct in_ifaddr *ia_fw; /* It's changed... */ /* There must be a better way to do this next line... */ @@ -803,31 +1111,64 @@ skip_ipsec: * as the packet runs through ip_input() as * it is done through a ISR. */ - TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { + TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) { /* * If the addr to forward to is one * of ours, we pretend to * be the destination for this packet. */ - if (IA_SIN(ia)->sin_addr.s_addr == + if (IA_SIN(ia_fw)->sin_addr.s_addr == dst->sin_addr.s_addr) break; } if (ia) { /* tell ip_input "dont filter" */ - ip_fw_fwd_addr = dst; + struct m_tag *fwd_tag; + struct ip_fwd_tag *ipfwd_tag; + + fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, + sizeof(struct sockaddr_in), M_NOWAIT); + if (fwd_tag == NULL) { + error = ENOBUFS; + goto bad; + } + + ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1); + ipfwd_tag->next_hop = args.next_hop; + + m_tag_prepend(m, fwd_tag); + if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = ifunit("lo0"); - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) & + m->m_pkthdr.csum_flags) == 0) { + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + m->m_pkthdr.csum_flags |= + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xffff; + } m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xffff; + CSUM_IP_CHECKED | CSUM_IP_VALID; + } + else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + ip->ip_sum = in_cksum(m, hlen); } - m->m_pkthdr.csum_flags |= - CSUM_IP_CHECKED | CSUM_IP_VALID; HTONS(ip->ip_len); HTONS(ip->ip_off); - ip_input(m); + + + /* we need to call dlil_output to run filters + * and resync to avoid recursion loops. + */ + if (lo_ifp) { + dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0); + } + else { + printf("ip_output: no loopback ifp for forwarding!!!\n"); + } goto done; } /* Some of the logic for this was @@ -839,15 +1180,17 @@ skip_ipsec: bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); ro_fwd->ro_rt = 0; - rtalloc_ign(ro_fwd, RTF_PRCLONING); + lck_mtx_lock(rt_mtx); + rtalloc_ign_locked(ro_fwd, RTF_PRCLONING); if (ro_fwd->ro_rt == 0) { - ipstat.ips_noroute++; + OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute); error = EHOSTUNREACH; + lck_mtx_unlock(rt_mtx); goto bad; } - ia = ifatoia(ro_fwd->ro_rt->rt_ifa); + ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa); ifp = ro_fwd->ro_rt->rt_ifp; ro_fwd->ro_rt->rt_use++; if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) @@ -857,9 +1200,10 @@ skip_ipsec: (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); else isbroadcast = in_broadcast(dst->sin_addr, ifp); - rtfree(ro->ro_rt); + rtfree_locked(ro->ro_rt); ro->ro_rt = ro_fwd->ro_rt; dst = (struct sockaddr_in *)&ro_fwd->ro_dst; + lck_mtx_unlock(rt_mtx); /* * If we added a default src ip earlier, @@ -867,7 +1211,7 @@ skip_ipsec: * interface, do it again, from the new one. */ if (fwd_rewrite_src) - ip->ip_src = IA_SIN(ia)->sin_addr; + ip->ip_src = IA_SIN(ia_fw)->sin_addr; goto pass ; } #endif /* IPFIREWALL_FORWARD */ @@ -876,13 +1220,33 @@ skip_ipsec: * we have to drop the pkt */ m_freem(m); - error = EACCES; /* not sure this is the right error msg */ - goto done; + error = EACCES; /* not sure this is the right error msg */ + goto done; } +#endif /* IPFIREWALL */ pass: +#if __APPLE__ + /* Do not allow loopback address to wind up on a wire */ + if ((ifp->if_flags & IFF_LOOPBACK) == 0 && + ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || + (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { + OSAddAtomic(1, (SInt32*)&ipstat.ips_badaddr); + m_freem(m); + /* + * Do not simply drop the packet just like a firewall -- we want the + * the application to feel the pain. + * Return ENETUNREACH like ip6_output does in some similar cases. + * This can startle the otherwise clueless process that specifies + * loopback as the source address. + */ + error = ENETUNREACH; + goto done; + } +#endif m->m_pkthdr.csum_flags |= CSUM_IP; - sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; + sw_csum = m->m_pkthdr.csum_flags + & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) { /* @@ -904,6 +1268,9 @@ pass: /* let the software handle any UDP or TCP checksums */ sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags); } + } else if (apple_hwcksum_tx == 0) { + sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) & + m->m_pkthdr.csum_flags; } if (sw_csum & CSUM_DELAY_DATA) { @@ -911,8 +1278,13 @@ pass: sw_csum &= ~CSUM_DELAY_DATA; m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } - - m->m_pkthdr.csum_flags &= ifp->if_hwassist; + + if (apple_hwcksum_tx != 0) { + m->m_pkthdr.csum_flags &= + IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); + } else { + m->m_pkthdr.csum_flags = 0; + } /* * If small enough for interface, or the interface will take @@ -920,6 +1292,8 @@ pass: */ if ((u_short)ip->ip_len <= ifp->if_mtu || ifp->if_hwassist & CSUM_FRAGMENT) { + struct rtentry *rte; + HTONS(ip->ip_len); HTONS(ip->ip_off); ip->ip_sum = 0; @@ -940,14 +1314,39 @@ pass: if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) ipsec_delaux(m); #endif -#if __APPLE__ - error = dlil_output(ifptodlt(ifp, PF_INET), m, (void *) ro->ro_rt, - (struct sockaddr *)dst, 0); -#else - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, ro->ro_rt); -#endif - goto done; + if (packetchain == 0) { + lck_mtx_lock(rt_mtx); + if ((rte = ro->ro_rt) != NULL) + rtref(rte); + lck_mtx_unlock(rt_mtx); + error = ifnet_output(ifp, PF_INET, m, rte, + (struct sockaddr *)dst); + if (rte != NULL) + rtfree(rte); + goto done; + } + else { /* packet chaining allows us to reuse the route for all packets */ + m = m->m_nextpkt; + if (m == NULL) { + if (pktcnt > ip_maxchainsent) + ip_maxchainsent = pktcnt; + lck_mtx_lock(rt_mtx); + if ((rte = ro->ro_rt) != NULL) + rtref(rte); + lck_mtx_unlock(rt_mtx); + //send + error = ifnet_output(ifp, PF_INET, packetlist, + rte, (struct sockaddr *)dst); + if (rte != NULL) + rtfree(rte); + pktcnt = 0; + goto done; + + } + m0 = m; + pktcnt++; + goto loopit; + } } /* * Too large for interface; fragment if possible. @@ -962,12 +1361,15 @@ pass: * them, there is no way for one to update all its * routes when the MTU is changed. */ - if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) + + lck_mtx_lock(rt_mtx); + if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; } - ipstat.ips_cantfrag++; + lck_mtx_unlock(rt_mtx); + OSAddAtomic(1, (SInt32*)&ipstat.ips_cantfrag); goto bad; } len = (ifp->if_mtu - hlen) &~ 7; @@ -983,8 +1385,6 @@ pass: if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { in_delayed_cksum(m); - if (m == NULL) - return(ENOMEM); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } @@ -1001,10 +1401,10 @@ pass: m0 = m; mhlen = sizeof (struct ip); for (off = hlen + len; off < (u_short)ip->ip_len; off += len) { - MGETHDR(m, M_DONTWAIT, MT_HEADER); + MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */ if (m == 0) { error = ENOBUFS; - ipstat.ips_odropped++; + OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped); goto sendorfree; } m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; @@ -1028,12 +1428,16 @@ pass: if (m->m_next == 0) { (void) m_free(m); error = ENOBUFS; /* ??? */ - ipstat.ips_odropped++; + OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped); goto sendorfree; } m->m_pkthdr.len = mhlen + len; - m->m_pkthdr.rcvif = (struct ifnet *)0; + m->m_pkthdr.rcvif = 0; m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; + m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id; +#if CONFIG_MACF_NET + mac_netinet_fragment(m0, m); +#endif HTONS(mhip->ip_off); mhip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) { @@ -1043,7 +1447,7 @@ pass: mnext = &m->m_nextpkt; nfrags++; } - ipstat.ips_ofragments += nfrags; + OSAddAtomic(nfrags, (SInt32*)&ipstat.ips_ofragments); /* set first/last markers for fragment chain */ m->m_flags |= M_LASTFRAG; @@ -1078,6 +1482,7 @@ sendorfree: ipsec_delaux(m); #endif if (error == 0) { + struct rtentry *rte; #ifndef __APPLE__ /* Record statistics for this interface address. */ if (ia != NULL) { @@ -1085,22 +1490,28 @@ sendorfree: ia->ia_ifa.if_obytes += m->m_pkthdr.len; } #endif - -#if __APPLE__ - error = dlil_output(ifptodlt(ifp, PF_INET), m, (void *) ro->ro_rt, - (struct sockaddr *)dst, 0); -#else - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, ro->ro_rt); -#endif + if ((packetchain != 0) && (pktcnt > 0)) + panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist); + lck_mtx_lock(rt_mtx); + if ((rte = ro->ro_rt) != NULL) + rtref(rte); + lck_mtx_unlock(rt_mtx); + error = ifnet_output(ifp, PF_INET, m, rte, + (struct sockaddr *)dst); + if (rte != NULL) + rtfree(rte); } else m_freem(m); } if (error == 0) - ipstat.ips_fragmented++; + OSAddAtomic(1, (SInt32*)&ipstat.ips_fragmented); } done: + if (ia) { + ifafree(&ia->ia_ifa); + ia = NULL; + } #if IPSEC if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { if (ro == &iproute && ro->ro_rt) { @@ -1110,7 +1521,7 @@ done: if (sp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ip_output call free SP:%x\n", sp)); - key_freesp(sp); + key_freesp(sp, KEY_SADB_UNLOCKED); } } #endif /* IPSEC */ @@ -1122,33 +1533,211 @@ bad: goto done; } +static void +ip_out_cksum_stats(int proto, u_int32_t len) +{ + switch (proto) { + case IPPROTO_TCP: + tcp_out_cksum_stats(len); + break; + case IPPROTO_UDP: + udp_out_cksum_stats(len); + break; + default: + /* keep only TCP or UDP stats for now */ + break; + } +} + void -in_delayed_cksum(struct mbuf *m) +in_delayed_cksum_offset(struct mbuf *m0, int ip_offset) { struct ip *ip; - u_short csum, offset; - ip = mtod(m, struct ip *); + unsigned char buf[sizeof(struct ip)]; + u_short csum, offset, ip_len; + struct mbuf *m = m0; + + while (ip_offset >= m->m_len) { + ip_offset -= m->m_len; + m = m->m_next; + if (m == NULL) { + printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n"); + return; + } + } + + /* Sometimes the IP header is not contiguous, yes this can happen! */ + if (ip_offset + sizeof(struct ip) > m->m_len) { +#if DEBUG + printf("delayed m_pullup, m->len: %ld off: %d\n", + m->m_len, ip_offset); +#endif + m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf); + + ip = (struct ip *)buf; + } else { + ip = (struct ip*)(m->m_data + ip_offset); + } + + /* Gross */ + if (ip_offset) { + m->m_len -= ip_offset; + m->m_data += ip_offset; + } + offset = IP_VHL_HL(ip->ip_vhl) << 2 ; - csum = in_cksum_skip(m, ip->ip_len, offset); - if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) + + /* + * We could be in the context of an IP or interface filter; in the + * former case, ip_len would be in host (correct) order while for + * the latter it would be in network order. Because of this, we + * attempt to interpret the length field by comparing it against + * the actual packet length. If the comparison fails, byte swap + * the length and check again. If it still fails, then the packet + * is bogus and we give up. + */ + ip_len = ip->ip_len; + if (ip_len != (m0->m_pkthdr.len - ip_offset)) { + ip_len = SWAP16(ip_len); + if (ip_len != (m0->m_pkthdr.len - ip_offset)) { + printf("in_delayed_cksum_offset: ip_len %d (%d) " + "doesn't match actual length %d\n", ip->ip_len, + ip_len, (m0->m_pkthdr.len - ip_offset)); + return; + } + } + + csum = in_cksum_skip(m, ip_len, offset); + + /* Update stats */ + ip_out_cksum_stats(ip->ip_p, ip_len - offset); + + if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) csum = 0xffff; - offset += m->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */ + offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */ + + /* Gross */ + if (ip_offset) { + if (M_LEADINGSPACE(m) < ip_offset) + panic("in_delayed_cksum_offset - chain modified!\n"); + m->m_len += ip_offset; + m->m_data -= ip_offset; + } - if (offset > ip->ip_len) /* bogus offset */ + if (offset > ip_len) /* bogus offset */ return; - if (offset + sizeof(u_short) > m->m_len) { - printf("delayed m_pullup, m->len: %d off: %d p: %d\n", - m->m_len, offset, ip->ip_p); - /* - * XXX - * this shouldn't happen, but if it does, the - * correct behavior may be to insert the checksum - * in the existing chain instead of rearranging it. - */ - m = m_pullup(m, offset + sizeof(u_short)); + /* Insert the checksum in the existing chain */ + if (offset + ip_offset + sizeof(u_short) > m->m_len) { + char tmp[2]; + +#if DEBUG + printf("delayed m_copyback, m->len: %ld off: %d p: %d\n", + m->m_len, offset + ip_offset, ip->ip_p); +#endif + *(u_short *)tmp = csum; + m_copyback(m, offset + ip_offset, 2, tmp); + } else + *(u_short *)(m->m_data + offset + ip_offset) = csum; +} + +void +in_delayed_cksum(struct mbuf *m) +{ + in_delayed_cksum_offset(m, 0); +} + +void +in_cksum_offset(struct mbuf* m, size_t ip_offset) +{ + struct ip* ip = NULL; + int hlen = 0; + unsigned char buf[sizeof(struct ip)]; + int swapped = 0; + + while (ip_offset >= m->m_len) { + ip_offset -= m->m_len; + m = m->m_next; + if (m == NULL) { + printf("in_cksum_offset failed - ip_offset wasn't in the packet\n"); + return; + } + } + + /* Sometimes the IP header is not contiguous, yes this can happen! */ + if (ip_offset + sizeof(struct ip) > m->m_len) { + +#if DEBUG + printf("in_cksum_offset - delayed m_pullup, m->len: %ld off: %lu\n", + m->m_len, ip_offset); +#endif + m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf); + + ip = (struct ip *)buf; + ip->ip_sum = 0; + m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum); + } else { + ip = (struct ip*)(m->m_data + ip_offset); + ip->ip_sum = 0; + } + + /* Gross */ + if (ip_offset) { + m->m_len -= ip_offset; + m->m_data += ip_offset; + } + +#ifdef _IP_VHL + hlen = IP_VHL_HL(ip->ip_vhl) << 2; +#else + hlen = ip->ip_hl << 2; +#endif + /* + * We could be in the context of an IP or interface filter; in the + * former case, ip_len would be in host order while for the latter + * it would be in network (correct) order. Because of this, we + * attempt to interpret the length field by comparing it against + * the actual packet length. If the comparison fails, byte swap + * the length and check again. If it still fails, then the packet + * is bogus and we give up. + */ + if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) { + ip->ip_len = SWAP16(ip->ip_len); + swapped = 1; + if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) { + ip->ip_len = SWAP16(ip->ip_len); + printf("in_cksum_offset: ip_len %d (%d) " + "doesn't match actual length %lu\n", + ip->ip_len, SWAP16(ip->ip_len), + (m->m_pkthdr.len - ip_offset)); + return; + } + } + + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, hlen); + if (swapped) + ip->ip_len = SWAP16(ip->ip_len); + + /* Gross */ + if (ip_offset) { + if (M_LEADINGSPACE(m) < ip_offset) + panic("in_cksum_offset - chain modified!\n"); + m->m_len += ip_offset; + m->m_data -= ip_offset; + } + + /* Insert the checksum in the existing chain if IP header not contiguous */ + if (ip_offset + sizeof(struct ip) > m->m_len) { + char tmp[2]; + +#if DEBUG + printf("in_cksum_offset m_copyback, m->len: %lu off: %lu p: %d\n", + m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p); +#endif + *(u_short *)tmp = ip->ip_sum; + m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp); } - *(u_short *)(m->m_data + offset) = csum; } /* @@ -1175,10 +1764,13 @@ ip_insertoptions(m, opt, phlen) if (p->ipopt_dst.s_addr) ip->ip_dst = p->ipopt_dst; if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { - MGETHDR(n, M_DONTWAIT, MT_HEADER); + MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */ if (n == 0) return (m); - n->m_pkthdr.rcvif = (struct ifnet *)0; + n->m_pkthdr.rcvif = 0; +#if CONFIG_MACF_NET + mac_mbuf_label_copy(m, n); +#endif n->m_pkthdr.len = m->m_pkthdr.len + optlen; m->m_len -= sizeof(struct ip); m->m_data += sizeof(struct ip); @@ -1349,6 +1941,61 @@ ip_ctloutput(so, sopt) break; #undef OPTSET +#if CONFIG_FORCE_OUT_IFP + case IP_FORCE_OUT_IFP: { + char ifname[IFNAMSIZ]; + ifnet_t ifp; + + /* Verify interface name parameter is sane */ + if (sopt->sopt_valsize > sizeof(ifname)) { + error = EINVAL; + break; + } + + /* Copy the interface name */ + if (sopt->sopt_valsize != 0) { + error = sooptcopyin(sopt, ifname, sizeof(ifname), sopt->sopt_valsize); + if (error) + break; + } + + if (sopt->sopt_valsize == 0 || ifname[0] == 0) { + // Set pdp_ifp to NULL + inp->pdp_ifp = NULL; + + // Flush the route + if (inp->inp_route.ro_rt) { + rtfree(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = NULL; + } + + break; + } + + /* Verify name is NULL terminated */ + if (ifname[sopt->sopt_valsize - 1] != 0) { + error = EINVAL; + break; + } + + if (ifnet_find_by_name(ifname, &ifp) != 0) { + error = ENXIO; + break; + } + + /* Won't actually free. Since we don't release this later, we should do it now. */ + ifnet_release(ifp); + + /* This only works for point-to-point interfaces */ + if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { + error = ENOTSUP; + break; + } + + inp->pdp_ifp = ifp; + } + break; +#endif case IP_MULTICAST_IF: case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: @@ -1395,13 +2042,16 @@ ip_ctloutput(so, sopt) struct mbuf *m; int optname; + if (sopt->sopt_valsize > MCLBYTES) { + error = EMSGSIZE; + break; + } if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ break; priv = (sopt->sopt_p != NULL && - suser(sopt->sopt_p->p_ucred, - &sopt->sopt_p->p_acflag) != 0) ? 0 : 1; + proc_suser(sopt->sopt_p) != 0) ? 0 : 1; if (m) { req = mtod(m, caddr_t); len = m->m_len; @@ -1413,6 +2063,23 @@ ip_ctloutput(so, sopt) } #endif /*IPSEC*/ +#if TRAFFIC_MGT + case IP_TRAFFIC_MGT_BACKGROUND: + { + unsigned background = 0; + error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background)); + if (error) + break; + + if (background) + so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND; + else + so->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND; + + break; + } +#endif /* TRAFFIC_MGT */ + default: error = ENOPROTOOPT; break; @@ -1522,6 +2189,15 @@ ip_ctloutput(so, sopt) } #endif /*IPSEC*/ +#if TRAFFIC_MGT + case IP_TRAFFIC_MGT_BACKGROUND: + { + unsigned background = so->so_traffic_mgt_flags; + return (sooptcopyout(sopt, &background, sizeof(background))); + break; + } +#endif /* TRAFFIC_MGT */ + default: error = ENOPROTOOPT; break; @@ -1537,10 +2213,10 @@ ip_ctloutput(so, sopt) * with destination address if source routed. */ static int -ip_pcbopts(optname, pcbopt, m) - int optname; - struct mbuf **pcbopt; - register struct mbuf *m; +ip_pcbopts( + __unused int optname, + struct mbuf **pcbopt, + register struct mbuf *m) { register int cnt, optlen; register u_char *cp; @@ -1659,9 +2335,13 @@ ip_multicast_if(a, ifindexp) *ifindexp = 0; if (ntohl(a->s_addr) >> 24 == 0) { ifindex = ntohl(a->s_addr) & 0xffffff; - if (ifindex < 0 || if_index < ifindex) + ifnet_head_lock_shared(); + if (ifindex < 0 || if_index < ifindex) { + ifnet_head_done(); return NULL; + } ifp = ifindex2ifnet[ifindex]; + ifnet_head_done(); if (ifindexp) *ifindexp = ifindex; } else { @@ -1685,7 +2365,6 @@ ip_setmoptions(sopt, imop) struct ifnet *ifp = NULL; struct ip_moptions *imo = *imop; int ifindex; - int s; if (imo == NULL) { /* @@ -1700,6 +2379,7 @@ ip_setmoptions(sopt, imop) switch (sopt->sopt_name) { /* store an index number for the vif you wanna use in the send */ +#if MROUTING case IP_MULTICAST_VIF: if (legal_vif_num == 0) { error = EOPNOTSUPP; @@ -1714,6 +2394,7 @@ ip_setmoptions(sopt, imop) } imo->imo_multicast_vif = i; break; +#endif /* MROUTING */ case IP_MULTICAST_IF: /* @@ -1736,10 +2417,8 @@ ip_setmoptions(sopt, imop) * IP address. Find the interface and confirm that * it supports multicasting. */ - s = splimp(); ifp = ip_multicast_if(&addr, &ifindex); if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { - splx(s); error = EADDRNOTAVAIL; break; } @@ -1748,7 +2427,6 @@ ip_setmoptions(sopt, imop) imo->imo_multicast_addr = addr; else imo->imo_multicast_addr.s_addr = INADDR_ANY; - splx(s); break; case IP_MULTICAST_TTL: @@ -1833,7 +2511,7 @@ ip_setmoptions(sopt, imop) * If all options have default values, no need to keep the mbuf. */ if (imo->imo_multicast_ifp == NULL && - imo->imo_multicast_vif == -1 && + imo->imo_multicast_vif == (u_long)-1 && imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && imo->imo_num_memberships == 0) { @@ -1880,14 +2558,12 @@ ip_addmembership( struct sockaddr_in *dst; struct ifnet *ifp = NULL; int error = 0; - int s = 0; int i; if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) { error = EINVAL; return error; } - s = splimp(); /* * If no interface address was provided, use the interface of * the route to the given multicast address. @@ -1898,15 +2574,17 @@ ip_addmembership( dst->sin_len = sizeof(*dst); dst->sin_family = AF_INET; dst->sin_addr = mreq->imr_multiaddr; - rtalloc(&ro); + lck_mtx_lock(rt_mtx); + rtalloc_ign_locked(&ro, 0UL); if (ro.ro_rt != NULL) { ifp = ro.ro_rt->rt_ifp; - rtfree(ro.ro_rt); + rtfree_locked(ro.ro_rt); } else { /* If there's no default route, try using loopback */ mreq->imr_interface.s_addr = INADDR_LOOPBACK; } + lck_mtx_unlock(rt_mtx); } if (ifp == NULL) { @@ -1919,7 +2597,6 @@ ip_addmembership( */ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; - splx(s); return error; } /* @@ -1934,12 +2611,10 @@ ip_addmembership( } if (i < imo->imo_num_memberships) { error = EADDRINUSE; - splx(s); return error; } if (i == IP_MAX_MEMBERSHIPS) { error = ETOOMANYREFS; - splx(s); return error; } /* @@ -1949,11 +2624,9 @@ ip_addmembership( if ((imo->imo_membership[i] = in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { error = ENOBUFS; - splx(s); return error; } ++imo->imo_num_memberships; - splx(s); return error; } @@ -1967,7 +2640,6 @@ ip_dropmembership( struct ip_mreq *mreq) { int error = 0; - int s = 0; struct ifnet* ifp = NULL; int i; @@ -1976,7 +2648,6 @@ ip_dropmembership( return error; } - s = splimp(); /* * If an interface address was specified, get a pointer * to its ifnet structure. @@ -1987,7 +2658,6 @@ ip_dropmembership( ifp = ip_multicast_if(&mreq->imr_interface, NULL); if (ifp == NULL) { error = EADDRNOTAVAIL; - splx(s); return error; } } @@ -2003,21 +2673,19 @@ ip_dropmembership( } if (i == imo->imo_num_memberships) { error = EADDRNOTAVAIL; - splx(s); return error; } /* * Give up the multicast address record to which the * membership points. */ - in_delmulti(imo->imo_membership[i]); + in_delmulti(&imo->imo_membership[i]); /* * Remove the gap in the membership array. */ for (++i; i < imo->imo_num_memberships; ++i) imo->imo_membership[i-1] = imo->imo_membership[i]; --imo->imo_num_memberships; - splx(s); return error; } @@ -2037,6 +2705,7 @@ ip_getmoptions(sopt, imo) error = 0; switch (sopt->sopt_name) { +#if MROUTING case IP_MULTICAST_VIF: if (imo != NULL) optval = imo->imo_multicast_vif; @@ -2044,6 +2713,7 @@ ip_getmoptions(sopt, imo) optval = -1; error = sooptcopyout(sopt, &optval, sizeof optval); break; +#endif /* MROUTING */ case IP_MULTICAST_IF: if (imo == NULL || imo->imo_multicast_ifp == NULL) @@ -2099,8 +2769,7 @@ ip_freemoptions(imo) if (imo != NULL) { for (i = 0; i < imo->imo_num_memberships; ++i) - if (imo->imo_membership[i] != NULL) - in_delmulti(imo->imo_membership[i]); + in_delmulti(&imo->imo_membership[i]); FREE(imo, M_IPMOPTS); } } @@ -2121,86 +2790,83 @@ ip_mloopback(ifp, m, dst, hlen) { register struct ip *ip; struct mbuf *copym; + int sw_csum = (apple_hwcksum_tx == 0); copym = m_copy(m, 0, M_COPYALL); if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) copym = m_pullup(copym, hlen); - if (copym != NULL) { - /* - * We don't bother to fragment if the IP length is greater - * than the interface's MTU. Can this possibly matter? - */ - ip = mtod(copym, struct ip *); - HTONS(ip->ip_len); - HTONS(ip->ip_off); - ip->ip_sum = 0; - ip->ip_sum = in_cksum(copym, hlen); - /* - * NB: - * It's not clear whether there are any lingering - * reentrancy problems in other areas which might - * be exposed by using ip_input directly (in - * particular, everything which modifies the packet - * in-place). Yet another option is using the - * protosw directly to deliver the looped back - * packet. For the moment, we'll err on the side - * of safety by using if_simloop(). - */ + + if (copym == NULL) + return; + + /* + * We don't bother to fragment if the IP length is greater + * than the interface's MTU. Can this possibly matter? + */ + ip = mtod(copym, struct ip *); + HTONS(ip->ip_len); + HTONS(ip->ip_off); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(copym, hlen); + /* + * NB: + * It's not clear whether there are any lingering + * reentrancy problems in other areas which might + * be exposed by using ip_input directly (in + * particular, everything which modifies the packet + * in-place). Yet another option is using the + * protosw directly to deliver the looped back + * packet. For the moment, we'll err on the side + * of safety by using if_simloop(). + */ #if 1 /* XXX */ - if (dst->sin_family != AF_INET) { - printf("ip_mloopback: bad address family %d\n", - dst->sin_family); - dst->sin_family = AF_INET; - } + if (dst->sin_family != AF_INET) { + printf("ip_mloopback: bad address family %d\n", + dst->sin_family); + dst->sin_family = AF_INET; + } #endif - /* - * Mark checksum as valid or calculate checksum for loopback. - * - * This is done this way because we have to embed the ifp of - * the interface we will send the original copy of the packet - * out on in the mbuf. ip_input will check if_hwassist of the - * embedded ifp and ignore all csum_flags if if_hwassist is 0. - * The UDP checksum has not been calculated yet. - */ - if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - if (ifp->if_hwassist) { - copym->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR | - CSUM_IP_CHECKED | CSUM_IP_VALID; - copym->m_pkthdr.csum_data = 0xffff; - } else { - NTOHS(ip->ip_len); - in_delayed_cksum(copym); - HTONS(ip->ip_len); - } - } - - - /* - * TedW: - * We need to send all loopback traffic down to dlil in case - * a filter has tapped-in. - */ - - if (lo_dl_tag == 0) - dlil_find_dltag(APPLE_IF_FAM_LOOPBACK, 0, PF_INET, &lo_dl_tag); - - /* - * Stuff the 'real' ifp into the pkthdr, to be used in matching - * in ip_input(); we need the loopback ifp/dl_tag passed as args - * to make the loopback driver compliant with the data link - * requirements. - */ - if (lo_dl_tag) { - copym->m_pkthdr.rcvif = ifp; - dlil_output(lo_dl_tag, copym, 0, (struct sockaddr *) dst, 0); + * Mark checksum as valid or calculate checksum for loopback. + * + * This is done this way because we have to embed the ifp of + * the interface we will send the original copy of the packet + * out on in the mbuf. ip_input will check if_hwassist of the + * embedded ifp and ignore all csum_flags if if_hwassist is 0. + * The UDP checksum has not been calculated yet. + */ + if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) { + if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) { + copym->m_pkthdr.csum_flags |= + CSUM_DATA_VALID | CSUM_PSEUDO_HDR | + CSUM_IP_CHECKED | CSUM_IP_VALID; + copym->m_pkthdr.csum_data = 0xffff; } else { - printf("Warning: ip_output call to dlil_find_dltag failed!\n"); - m_freem(copym); + NTOHS(ip->ip_len); + in_delayed_cksum(copym); + HTONS(ip->ip_len); } + } -/* if_simloop(ifp, copym, (struct sockaddr *)dst, 0);*/ + /* + * TedW: + * We need to send all loopback traffic down to dlil in case + * a filter has tapped-in. + */ + + /* + * Stuff the 'real' ifp into the pkthdr, to be used in matching + * in ip_input(); we need the loopback ifp/dl_tag passed as args + * to make the loopback driver compliant with the data link + * requirements. + */ + if (lo_ifp) { + copym->m_pkthdr.rcvif = ifp; + dlil_output(lo_ifp, PF_INET, copym, 0, + (struct sockaddr *) dst, 0); + } else { + printf("Warning: ip_output call to dlil_find_dltag failed!\n"); + m_freem(copym); } }