X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/7b1edb791d9ca667b95988cb5638c4c88416cd17..5eebf7385fedb1517b66b53c28e5aa6bb0a2be50:/bsd/netinet/ip_output.c diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index ff9b2da85..377bf2b08 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -52,17 +52,11 @@ * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 + * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $ */ #define _IP_VHL -#if ISFB31 -#include "opt_ipfw.h" -#include "opt_ipdn.h" -#include "opt_ipdivert.h" -#include "opt_ipfilter.h" -#endif - #include #include #include @@ -78,51 +72,42 @@ #include #include #include -#if INET6 -#include -#include -#endif #include #include #include -#include +#include "faith.h" + +#include #include #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1) #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3) #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1) +#define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1) -#ifdef vax +#if vax #include #endif -#if ISFB31 +#if __FreeBSD__ #include static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); #endif -//static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); - #if IPSEC #include #include +#if IPSEC_DEBUG #include - -#endif /*IPSEC*/ - -#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1 -#undef COMPAT_IPFW -#define COMPAT_IPFW 1 #else -#undef COMPAT_IPFW +#define KEYDEBUG(lev,arg) #endif +#endif /*IPSEC*/ -#if COMPAT_IPFW #include -#endif #if DUMMYNET #include @@ -138,6 +123,7 @@ static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); u_short ip_id; static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); +static struct ifnet *ip_multicast_if __P((struct in_addr *, int *)); static void ip_mloopback __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int)); static int ip_getmoptions @@ -145,14 +131,31 @@ static int ip_getmoptions static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *)); static int ip_setmoptions __P((struct sockopt *, struct ip_moptions **)); + +int ip_createmoptions(struct ip_moptions **imop); +int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq); +int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq); +int ip_optcopy __P((struct ip *, struct ip *)); +extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **)); +#ifdef __APPLE__ +extern struct mbuf* m_dup(register struct mbuf *m, int how); +#endif + static u_long lo_dl_tag = 0; -static int ip_optcopy __P((struct ip *, struct ip *)); void in_delayed_cksum(struct mbuf *m); extern int apple_hwcksum_tx; +extern u_long route_generation; extern struct protosw inetsw[]; +extern struct ip_linklocal_stat ip_linklocal_stat; + +/* temporary: for testing */ +#if IPSEC +extern int ipsec_bypass; +#endif + /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). @@ -168,58 +171,34 @@ ip_output(m0, opt, ro, flags, imo) struct ip_moptions *imo; { struct ip *ip, *mhip; - struct ifnet *ifp; - u_long dl_tag; + struct ifnet *ifp = NULL; struct mbuf *m = m0; int hlen = sizeof (struct ip); int len, off, error = 0; - struct sockaddr_in *dst; - struct in_ifaddr *ia; + struct sockaddr_in *dst = NULL; + struct in_ifaddr *ia = NULL; int isbroadcast, sw_csum; #if IPSEC struct route iproute; - struct socket *so; + struct socket *so = NULL; struct secpolicy *sp = NULL; #endif + u_int16_t divert_cookie; /* firewall cookie */ #if IPFIREWALL_FORWARD int fwd_rewrite_src = 0; #endif - - -#if !IPDIVERT /* dummy variable for the firewall code to play with */ - u_short ip_divert_cookie = 0 ; -#endif -#if COMPAT_IPFW - struct ip_fw_chain *rule = NULL ; + struct ip_fw_chain *rule = NULL; + +#if IPDIVERT + /* Get and reset firewall cookie */ + divert_cookie = ip_divert_cookie; + ip_divert_cookie = 0; +#else + divert_cookie = 0; #endif KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); -#if IPSEC - /* - * NOTE: m->m_pkthdr is NULL cleared below just to prevent ipfw code - * from SEGV. - * ipfw code uses rcvif to determine incoming interface, and - * KAME uses rcvif for ipsec processing. - * ipfw may not be working right with KAME at this moment. - * We need more tests. - */ -#if DUMMYNET - if (m->m_type == MT_DUMMYNET) { - if (m->m_next != NULL) { - so = (struct socket *)m->m_next->m_pkthdr.rcvif; - m->m_next->m_pkthdr.rcvif = NULL; - } else - so = NULL; - } else -#endif - { - so = ipsec_getsocket(m); - ipsec_setsocket(m, NULL); - } -#endif /*IPSEC*/ - - #if IPFIREWALL && DUMMYNET /* * dummynet packet are prepended a vestigial mbuf with @@ -227,26 +206,39 @@ ip_output(m0, opt, ro, flags, imo) * rule. */ if (m->m_type == MT_DUMMYNET) { - struct mbuf *tmp_m = m ; /* * the packet was already tagged, so part of the * processing was already done, and we need to go down. - * opt, flags and imo have already been used, and now - * they are used to hold ifp and hlen and NULL, respectively. + * Get parameters from the header. */ - rule = (struct ip_fw_chain *)(m->m_data) ; - m = m->m_next ; - FREE(tmp_m, M_IPFW); + rule = (struct ip_fw_chain *)(m->m_data) ; + opt = NULL ; + ro = & ( ((struct dn_pkt *)m)->ro ) ; + imo = NULL ; + dst = ((struct dn_pkt *)m)->dn_dst ; + ifp = ((struct dn_pkt *)m)->ifp ; + flags = ((struct dn_pkt *)m)->flags; + m0 = m = m->m_next ; +#if IPSEC + if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { + so = ipsec_getsocket(m); + (void)ipsec_setsocket(m, NULL); + } +#endif ip = mtod(m, struct ip *); - dst = (struct sockaddr_in *)&ro->ro_dst; - ifp = (struct ifnet *)opt; hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; - opt = NULL ; - flags = 0 ; /* XXX is this correct ? */ + if (ro->ro_rt != NULL) + ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa; goto sendit; } else rule = NULL ; #endif +#if IPSEC + if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { + so = ipsec_getsocket(m); + (void)ipsec_setsocket(m, NULL); + } +#endif #if DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) @@ -266,7 +258,11 @@ ip_output(m0, opt, ro, flags, imo) if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); ip->ip_off &= IP_DF; +#if RANDOM_IP_ID + ip->ip_id = ip_randomid(); +#else ip->ip_id = htons(ip_id++); +#endif ipstat.ips_localout++; } else { hlen = IP_VHL_HL(ip->ip_vhl) << 2; @@ -276,17 +272,29 @@ ip_output(m0, opt, ro, flags, imo) ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); dst = (struct sockaddr_in *)&ro->ro_dst; + /* * If there is a cached route, * check that it is to the same destination * and is still up. If not, free it and try again. + * The address family should also be checked in case of sharing the + * cache with IPv6. */ + + if (ro->ro_rt && (ro->ro_rt->generation_id != route_generation) && + ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) && (ip->ip_src.s_addr != INADDR_ANY) && + (ifa_foraddr(ip->ip_src.s_addr) == NULL)) { + error = EADDRNOTAVAIL; + goto bad; + } if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || + dst->sin_family != AF_INET || dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { - RTFREE(ro->ro_rt); + rtfree(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if (ro->ro_rt == 0) { + bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; @@ -305,7 +313,6 @@ ip_output(m0, opt, ro, flags, imo) goto bad; } ifp = ia->ia_ifp; - dl_tag = ia->ia_ifa.ifa_dlt; ip->ip_ttl = 1; isbroadcast = in_broadcast(dst->sin_addr, ifp); } else { @@ -327,7 +334,6 @@ ip_output(m0, opt, ro, flags, imo) } ia = ifatoia(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; - dl_tag = ro->ro_rt->rt_dlt; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; @@ -350,14 +356,16 @@ ip_output(m0, opt, ro, flags, imo) * See if the caller provided any multicast options */ if (imo != NULL) { - ip->ip_ttl = imo->imo_multicast_ttl; - if (imo->imo_multicast_ifp != NULL) + if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl; + if (imo->imo_multicast_ifp != NULL) { ifp = imo->imo_multicast_ifp; - if (imo->imo_multicast_vif != -1) + } + if (imo->imo_multicast_vif != -1 && + ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY)) ip->ip_src.s_addr = - ip_mcast_src(imo->imo_multicast_vif); + ip_mcast_src(imo->imo_multicast_vif); } else - ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; + if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; /* * Confirm that the outgoing interface supports multicast. */ @@ -375,12 +383,16 @@ ip_output(m0, opt, ro, flags, imo) if (ip->ip_src.s_addr == INADDR_ANY) { register struct in_ifaddr *ia1; - for (ia1 = in_ifaddrhead.tqh_first; ia1; - ia1 = ia1->ia_link.tqe_next) + TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link) if (ia1->ia_ifp == ifp) { ip->ip_src = IA_SIN(ia1)->sin_addr; + break; } + if (ip->ip_src.s_addr == INADDR_ANY) { + error = ENETUNREACH; + goto bad; + } } IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); @@ -489,6 +501,173 @@ ip_output(m0, opt, ro, flags, imo) } sendit: + /* + * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt + */ + if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { + ip_linklocal_stat.iplls_out_total++; + if (ip->ip_ttl != MAXTTL) { + ip_linklocal_stat.iplls_out_badttl++; + ip->ip_ttl = MAXTTL; + } + } + +#if IPSEC + /* temporary for testing only: bypass ipsec alltogether */ + + if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0) + goto skip_ipsec; + + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); + + /* get SP for this packet */ + if (so == NULL) + sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); + else + sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); + + if (sp == NULL) { + ipsecstat.out_inval++; + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); + goto bad; + } + + error = 0; + + /* check policy */ + switch (sp->policy) { + case IPSEC_POLICY_DISCARD: + /* + * This packet is just discarded. + */ + ipsecstat.out_polvio++; + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0); + goto bad; + + case IPSEC_POLICY_BYPASS: + case IPSEC_POLICY_NONE: + /* no need to do IPsec. */ + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0); + goto skip_ipsec; + + case IPSEC_POLICY_IPSEC: + if (sp->req == NULL) { + /* acquire a policy */ + error = key_spdacquire(sp); + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0); + goto bad; + } + break; + + case IPSEC_POLICY_ENTRUST: + default: + printf("ip_output: Invalid policy found. %d\n", sp->policy); + } + { + struct ipsec_output_state state; + bzero(&state, sizeof(state)); + state.m = m; + if (flags & IP_ROUTETOIF) { + state.ro = &iproute; + bzero(&iproute, sizeof(iproute)); + } else + state.ro = ro; + state.dst = (struct sockaddr *)dst; + + ip->ip_sum = 0; + + /* + * XXX + * delayed checksums are not currently compatible with IPsec + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } + + HTONS(ip->ip_len); + HTONS(ip->ip_off); + + error = ipsec4_output(&state, sp, flags); + + m0 = m = state.m; + + if (flags & IP_ROUTETOIF) { + /* + * if we have tunnel mode SA, we may need to ignore + * IP_ROUTETOIF. + */ + if (state.ro != &iproute || state.ro->ro_rt != NULL) { + flags &= ~IP_ROUTETOIF; + ro = state.ro; + } + } else + ro = state.ro; + + dst = (struct sockaddr_in *)state.dst; + if (error) { + /* mbuf is already reclaimed in ipsec4_output. */ + m0 = NULL; + switch (error) { + case EHOSTUNREACH: + case ENETUNREACH: + case EMSGSIZE: + case ENOBUFS: + case ENOMEM: + break; + default: + printf("ip4_output (ipsec): error code %d\n", error); + /*fall through*/ + case ENOENT: + /* don't show these error codes to the user */ + error = 0; + break; + } + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0); + goto bad; + } + } + + /* be sure to update variables that are affected by ipsec4_output() */ + ip = mtod(m, struct ip *); + +#ifdef _IP_VHL + hlen = IP_VHL_HL(ip->ip_vhl) << 2; +#else + hlen = ip->ip_hl << 2; +#endif + /* Check that there wasn't a route change and src is still valid */ + + if (ro->ro_rt->generation_id != route_generation) { + if (ifa_foraddr(ip->ip_src.s_addr) == NULL && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) { + error = EADDRNOTAVAIL; + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 5,0,0,0,0); + goto bad; + } + rtfree(ro->ro_rt); + ro->ro_rt = NULL; + } + + if (ro->ro_rt == NULL) { + if ((flags & IP_ROUTETOIF) == 0) { + printf("ip_output: " + "can't update route after IPsec processing\n"); + error = EHOSTUNREACH; /*XXX*/ + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 6,0,0,0,0); + goto bad; + } + } else { + ia = ifatoia(ro->ro_rt->rt_ifa); + ifp = ro->ro_rt->rt_ifp; + } + + /* make it flipped, again. */ + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff); +skip_ipsec: +#endif /*IPSEC*/ + /* * IpHack's section. * - Xlate: translate packet's addr/port (NAT). @@ -496,25 +675,28 @@ sendit: * - Wrap: fake packet's addr/port * - Encapsulate: put it in another IP and send out. */ -#if COMPAT_IPFW - if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, ifp, IP_NAT_OUT)) { - error = EACCES; - goto done; + if (fr_checkp) { + struct mbuf *m1 = m; + + if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) + goto done; + ip = mtod(m0 = m = m1, struct ip *); } /* * Check with the firewall... */ - if (ip_fw_chk_ptr) { + if (fw_enable && ip_fw_chk_ptr) { struct sockaddr_in *old = dst; off = (*ip_fw_chk_ptr)(&ip, - hlen, ifp, &ip_divert_cookie, &m, &rule, &dst); + hlen, ifp, &divert_cookie, &m, &rule, &dst); /* * On return we must do the following: - * m == NULL -> drop the pkt + * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new) * 1<=off<= 0xffff -> DIVERT - * (off & 0x10000) -> send to a DUMMYNET pipe + * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe + * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet * dst != old -> IPFIREWALL_FORWARD * off==0, dst==old -> accept * If some of the above modules is not compiled in, then @@ -523,47 +705,63 @@ sendit: * unsupported rules), but better play safe and drop * packets in case of doubt. */ - if (!m) { /* firewall said to reject */ - error = EACCES; - goto done; + m0 = m; + if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) { + if (m) + m_freem(m); + error = EACCES ; + goto done ; } + ip = mtod(m, struct ip *); if (off == 0 && dst == old) /* common case */ goto pass ; #if DUMMYNET - if (off & 0x10000) { + if ((off & IP_FW_PORT_DYNT_FLAG) != 0) { /* * pass the pkt to dummynet. Need to include - * pipe number, m, ifp, ro, hlen because these are + * pipe number, m, ifp, ro, dst because these are * not recomputed in the next pass. * All other parameters have been already used and * so they are not needed anymore. * XXX note: if the ifp or ro entry are deleted * while a pkt is in dummynet, we are in trouble! */ - dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,ifp,ro,hlen,rule); - goto done; + error = dummynet_io(off & 0xffff, DN_TO_IP_OUT, m, + ifp,ro,dst,rule, flags); + goto done; } #endif #if IPDIVERT - if (off > 0 && off < 0x10000) { /* Divert packet */ - - /* - * delayed checksums are not currently compatible - * with divert sockets. - */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(m); - if (m == NULL) - return(ENOMEM); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } - - /* Restore packet header fields to original values */ - ip->ip_len = htons((u_short)ip->ip_len); - ip->ip_off = htons((u_short)ip->ip_off); - - ip_divert_port = off & 0xffff ; - (*ip_protox[IPPROTO_DIVERT]->pr_input)(m, 0); + if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { + struct mbuf *clone = NULL; + + /* Clone packet if we're doing a 'tee' */ + if ((off & IP_FW_PORT_TEE_FLAG) != 0) + clone = m_dup(m, M_DONTWAIT); + /* + * XXX + * delayed checksums are not currently compatible + * with divert sockets. + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } + + /* Restore packet header fields to original values */ + HTONS(ip->ip_len); + HTONS(ip->ip_off); + + /* Deliver packet to divert input routine */ + ip_divert_cookie = divert_cookie; + divert_packet(m, 0, off & 0xffff); + + /* If 'tee', continue with original packet */ + if (clone != NULL) { + m0 = m = clone; + ip = mtod(m, struct ip *); + goto pass; + } goto done; } #endif @@ -602,8 +800,7 @@ sendit: * as the packet runs through ip_input() as * it is done through a ISR. */ - for (ia = TAILQ_FIRST(&in_ifaddrhead); ia; - ia = TAILQ_NEXT(ia, ia_link)) { + TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { /* * If the addr to forward to is one * of ours, we pretend to @@ -618,18 +815,15 @@ sendit: ip_fw_fwd_addr = dst; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = ifunit("lo0"); - - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m0->m_pkthdr.csum_data = 0xffff; - } - m->m_pkthdr.csum_flags |= - CSUM_IP_CHECKED | CSUM_IP_VALID; - ip->ip_len = htons((u_short)ip->ip_len); - ip->ip_off = htons((u_short)ip->ip_off); - - + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + m->m_pkthdr.csum_flags |= + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xffff; + } + m->m_pkthdr.csum_flags |= + CSUM_IP_CHECKED | CSUM_IP_VALID; + HTONS(ip->ip_len); + HTONS(ip->ip_off); ip_input(m); goto done; } @@ -652,7 +846,6 @@ sendit: ia = ifatoia(ro_fwd->ro_rt->rt_ifa); ifp = ro_fwd->ro_rt->rt_ifp; - dl_tag = ro->ro_rt->rt_dlt; ro_fwd->ro_rt->rt_use++; if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway; @@ -661,7 +854,7 @@ sendit: (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); else isbroadcast = in_broadcast(dst->sin_addr, ifp); - RTFREE(ro->ro_rt); + rtfree(ro->ro_rt); ro->ro_rt = ro_fwd->ro_rt; dst = (struct sockaddr_in *)&ro_fwd->ro_dst; @@ -675,208 +868,100 @@ sendit: goto pass ; } #endif /* IPFIREWALL_FORWARD */ - /* - * if we get here, none of the above matches, and - * we have to drop the pkt - */ - m_freem(m); - error = EACCES; /* not sure this is the right error msg */ - goto done; - } -#endif /* COMPAT_IPFW */ - -pass: - -#if defined(PM) - /* - * Processing IP filter/NAT. - * Return TRUE iff this packet is discarded. - * Return FALSE iff this packet is accepted. - */ - - if (doNatFil && pm_out(ro->ro_rt->rt_ifp, ip, m)) - goto done; -#endif - -#if IPSEC - /* get SP for this packet */ - if (so == NULL) - sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); - else - sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); - - if (sp == NULL) { - ipsecstat.out_inval++; - goto bad; - } - - error = 0; - - /* check policy */ - switch (sp->policy) { - case IPSEC_POLICY_DISCARD: - /* - * This packet is just discarded. - */ - ipsecstat.out_polvio++; - goto bad; - - case IPSEC_POLICY_BYPASS: - case IPSEC_POLICY_NONE: - /* no need to do IPsec. */ - goto skip_ipsec; - - case IPSEC_POLICY_IPSEC: - if (sp->req == NULL) { - /* XXX should be panic ? */ - printf("ip_output: No IPsec request specified.\n"); - error = EINVAL; - goto bad; - } - break; - - case IPSEC_POLICY_ENTRUST: - default: - printf("ip_output: Invalid policy found. %d\n", sp->policy); - } - - - { - struct ipsec_output_state state; - bzero(&state, sizeof(state)); - state.m = m; - if (flags & IP_ROUTETOIF) { - state.ro = &iproute; - bzero(&iproute, sizeof(iproute)); - } else - state.ro = ro; - state.dst = (struct sockaddr *)dst; - - ip->ip_sum = 0; - - /* - * delayed checksums are not currently compatible with IPsec - */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(m); - if (m == NULL) - return(ENOMEM); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } - - ip->ip_len = htons((u_short)ip->ip_len); - ip->ip_off = htons((u_short)ip->ip_off); - - error = ipsec4_output(&state, sp, flags); - - m = state.m; - if (flags & IP_ROUTETOIF) { - /* - * if we have tunnel mode SA, we may need to ignore - * IP_ROUTETOIF. - */ - if (state.ro != &iproute || state.ro->ro_rt != NULL) { - flags &= ~IP_ROUTETOIF; - ro = state.ro; - } - } else - ro = state.ro; - dst = (struct sockaddr_in *)state.dst; - if (error) { - /* mbuf is already reclaimed in ipsec4_output. */ - m0 = NULL; - switch (error) { - case EHOSTUNREACH: - case ENETUNREACH: - case EMSGSIZE: - case ENOBUFS: - case ENOMEM: - break; - default: - printf("ip4_output (ipsec): error code %d\n", error); - /*fall through*/ - case ENOENT: - /* don't show these error codes to the user */ - error = 0; - break; - } - goto bad; - } - } - - /* be sure to update variables that are affected by ipsec4_output() */ - ip = mtod(m, struct ip *); -#ifdef _IP_VHL - hlen = IP_VHL_HL(ip->ip_vhl) << 2; -#else - hlen = ip->ip_hl << 2; -#endif - if (ro->ro_rt == NULL) { - if ((flags & IP_ROUTETOIF) == 0) { - printf("ip_output: " - "can't update route after IPsec processing\n"); - error = EHOSTUNREACH; /*XXX*/ - goto bad; - } - } else { - /* nobody uses ia beyond here */ - ifp = ro->ro_rt->rt_ifp; + /* + * if we get here, none of the above matches, and + * we have to drop the pkt + */ + m_freem(m); + error = EACCES; /* not sure this is the right error msg */ + goto done; } - /* make it flipped, again. */ - ip->ip_len = ntohs((u_short)ip->ip_len); - ip->ip_off = ntohs((u_short)ip->ip_off); -skip_ipsec: -#endif /*IPSEC*/ - - - sw_csum = m->m_pkthdr.csum_flags | CSUM_IP; - - - /* frames that can be checksumed by GMACE SUM16 HW: frame >64, no fragments, no UDP odd length */ - - if (apple_hwcksum_tx && (sw_csum & CSUM_DELAY_DATA) && (ifp->if_hwassist & CSUM_TCP_SUM16) - && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu) - && !((ip->ip_len & 0x1) && (sw_csum & CSUM_UDP)) ) { +pass: +#if __APPLE__ + /* Do not allow loopback address to wind up on a wire */ + if ((ifp->if_flags & IFF_LOOPBACK) == 0 && + ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || + (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { + ipstat.ips_badaddr++; + m_freem(m); + /* + * Simply drop the packet just like a firewall -- we do not want the + * the application to feel the pain, not yet... + * Returning ENETUNREACH like ip6_output does in some similar cases + * could startle the otherwise clueless process that specifies + * loopback as the source address. + */ + goto done; + } +#endif + m->m_pkthdr.csum_flags |= CSUM_IP; + sw_csum = m->m_pkthdr.csum_flags + & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); - /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */ - u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */ - u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF; - m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */ - m->m_pkthdr.csum_data = (csumprev + offset) << 16 ; - m->m_pkthdr.csum_data += offset; + if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) { + /* + * Special case code for GMACE + * frames that can be checksumed by GMACE SUM16 HW: + * frame >64, no fragments, no UDP + */ + if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP) + && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) { + /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */ + u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */ + u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF; + m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */ + m->m_pkthdr.csum_data = (csumprev + offset) << 16 ; + m->m_pkthdr.csum_data += offset; sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */ - } - else { - if (ifp->if_hwassist & CSUM_TCP_SUM16) /* force SW checksuming */ - m->m_pkthdr.csum_flags = 0; - else { /* not Apple enet */ - m->m_pkthdr.csum_flags = sw_csum & ifp->if_hwassist; - sw_csum &= ~ifp->if_hwassist; } - - if (sw_csum & CSUM_DELAY_DATA) { /* perform TCP/UDP checksuming now */ - in_delayed_cksum(m); - if (m == NULL) - return(ENOMEM); - sw_csum &= ~CSUM_DELAY_DATA; + else { + /* let the software handle any UDP or TCP checksums */ + sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags); } - } + } + + if (sw_csum & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + sw_csum &= ~CSUM_DELAY_DATA; + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } + + m->m_pkthdr.csum_flags &= IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); /* - * If small enough for interface, or the interface will take + * If small enough for interface, or the interface will take * care of the fragmentation for us, can just send directly. */ - if ((u_short)ip->ip_len <= ifp->if_mtu || - ifp->if_hwassist & CSUM_FRAGMENT) { - - ip->ip_len = htons((u_short)ip->ip_len); - ip->ip_off = htons((u_short)ip->ip_off); + if ((u_short)ip->ip_len <= ifp->if_mtu || + ifp->if_hwassist & CSUM_FRAGMENT) { + HTONS(ip->ip_len); + HTONS(ip->ip_off); ip->ip_sum = 0; - if (sw_csum & CSUM_DELAY_IP) + if (sw_csum & CSUM_DELAY_IP) { ip->ip_sum = in_cksum(m, hlen); - error = dlil_output(dl_tag, m, (void *) ro->ro_rt, + } + +#ifndef __APPLE__ + /* Record statistics for this interface address. */ + if (!(flags & IP_FORWARDING) && ia != NULL) { + ia->ia_ifa.if_opackets++; + ia->ia_ifa.if_obytes += m->m_pkthdr.len; + } +#endif + +#if IPSEC + /* clean ipsec history once it goes out of the node */ + if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) + ipsec_delaux(m); +#endif +#if __APPLE__ + error = dlil_output(ifptodlt(ifp, PF_INET), m, (void *) ro->ro_rt, (struct sockaddr *)dst, 0); +#else + error = (*ifp->if_output)(ifp, m, + (struct sockaddr *)dst, ro->ro_rt); +#endif goto done; } /* @@ -906,24 +991,23 @@ skip_ipsec: goto bad; } - /* - * if the interface will not calculate checksums on - * fragmented packets, then do it here. - */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && - (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { - in_delayed_cksum(m); + /* + * if the interface will not calculate checksums on + * fragmented packets, then do it here. + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && + (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { + in_delayed_cksum(m); if (m == NULL) return(ENOMEM); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } { int mhlen, firstlen = len; struct mbuf **mnext = &m->m_nextpkt; - int nfrags = 1; - + int nfrags = 1; /* * Loop through length of segment after first fragment, @@ -964,11 +1048,12 @@ skip_ipsec: } m->m_pkthdr.len = mhlen + len; m->m_pkthdr.rcvif = (struct ifnet *)0; - m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; - mhip->ip_off = htons((u_short)mhip->ip_off); + m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; + HTONS(mhip->ip_off); mhip->ip_sum = 0; - if (sw_csum & CSUM_DELAY_IP) + if (sw_csum & CSUM_DELAY_IP) { mhip->ip_sum = in_cksum(m, mhlen); + } *mnext = m; mnext = &m->m_nextpkt; nfrags++; @@ -976,7 +1061,8 @@ skip_ipsec: ipstat.ips_ofragments += nfrags; /* set first/last markers for fragment chain */ - m0->m_flags |= M_FRAG; + m->m_flags |= M_LASTFRAG; + m0->m_flags |= M_FIRSTFRAG | M_FRAG; m0->m_pkthdr.csum_data = nfrags; /* @@ -987,11 +1073,12 @@ skip_ipsec: m_adj(m, hlen + firstlen - (u_short)ip->ip_len); m->m_pkthdr.len = hlen + firstlen; ip->ip_len = htons((u_short)m->m_pkthdr.len); - ip->ip_off = htons((u_short)(ip->ip_off | IP_MF)); + ip->ip_off |= IP_MF; + HTONS(ip->ip_off); ip->ip_sum = 0; - if (sw_csum & CSUM_DELAY_IP) + if (sw_csum & CSUM_DELAY_IP) { ip->ip_sum = in_cksum(m, hlen); - + } sendorfree: KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, @@ -1000,10 +1087,28 @@ sendorfree: for (m = m0; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; - if (error == 0) - error = dlil_output(dl_tag, m, (void *) ro->ro_rt, +#if IPSEC + /* clean ipsec history once it goes out of the node */ + if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) + ipsec_delaux(m); +#endif + if (error == 0) { +#ifndef __APPLE__ + /* Record statistics for this interface address. */ + if (ia != NULL) { + ia->ia_ifa.if_opackets++; + ia->ia_ifa.if_obytes += m->m_pkthdr.len; + } +#endif + +#if __APPLE__ + error = dlil_output(ifptodlt(ifp, PF_INET), m, (void *) ro->ro_rt, (struct sockaddr *)dst, 0); - else +#else + error = (*ifp->if_output)(ifp, m, + (struct sockaddr *)dst, ro->ro_rt); +#endif + } else m_freem(m); } @@ -1012,8 +1117,9 @@ sendorfree: } done: #if IPSEC + if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { if (ro == &iproute && ro->ro_rt) { - RTFREE(ro->ro_rt); + rtfree(ro->ro_rt); ro->ro_rt = NULL; } if (sp != NULL) { @@ -1021,6 +1127,7 @@ done: printf("DP ip_output call free SP:%x\n", sp)); key_freesp(sp); } + } #endif /* IPSEC */ KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0); @@ -1030,41 +1137,33 @@ bad: goto done; } -extern u_short in_chksum_skip(struct mbuf *, int, int); - void in_delayed_cksum(struct mbuf *m) { - struct ip *ip; - u_short csum, csum2, offset; - - ip = mtod(m, struct ip *); - offset = IP_VHL_HL(ip->ip_vhl) << 2 ; - - csum = in_cksum_skip(m, ip->ip_len, offset); - - if ((m->m_pkthdr.csum_flags & CSUM_UDP) && csum == 0) + struct ip *ip; + u_short csum, offset; + ip = mtod(m, struct ip *); + offset = IP_VHL_HL(ip->ip_vhl) << 2 ; + csum = in_cksum_skip(m, ip->ip_len, offset); + if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) csum = 0xffff; - - offset += m->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */ + offset += m->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */ if (offset > ip->ip_len) /* bogus offset */ return; - if (offset + sizeof(u_short) > m->m_len) { - printf("delayed m_pullup, m->len: %d off: %d p: %d\n", - m->m_len, offset, ip->ip_p); - /* - * XXX - * this shouldn't happen, but if it does, the - * correct behavior may be to insert the checksum - * in the existing chain instead of rearranging it. - */ - if (m = m_pullup(m, offset + sizeof(u_short)) == 0) - return; - } - - *(u_short *)(m->m_data + offset) = csum; + if (offset + sizeof(u_short) > m->m_len) { + printf("delayed m_pullup, m->len: %d off: %d p: %d\n", + m->m_len, offset, ip->ip_p); + /* + * XXX + * this shouldn't happen, but if it does, the + * correct behavior may be to insert the checksum + * in the existing chain instead of rearranging it. + */ + m = m_pullup(m, offset + sizeof(u_short)); + } + *(u_short *)(m->m_data + offset) = csum; } /* @@ -1094,6 +1193,7 @@ ip_insertoptions(m, opt, phlen) MGETHDR(n, M_DONTWAIT, MT_HEADER); if (n == 0) return (m); + n->m_pkthdr.rcvif = (struct ifnet *)0; n->m_pkthdr.len = m->m_pkthdr.len + optlen; m->m_len -= sizeof(struct ip); m->m_data += sizeof(struct ip); @@ -1139,8 +1239,16 @@ ip_optcopy(ip, jp) *dp++ = IPOPT_NOP; optlen = 1; continue; - } else - optlen = cp[IPOPT_OLEN]; + } +#if DIAGNOSTIC + if (cnt < IPOPT_OLEN + sizeof(*cp)) + panic("malformed IPv4 option passed to ip_optcopy"); +#endif + optlen = cp[IPOPT_OLEN]; +#if DIAGNOSTIC + if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) + panic("malformed IPv4 option passed to ip_optcopy"); +#endif /* bogus lengths should have been caught by ip_dooptions */ if (optlen > cnt) optlen = cnt; @@ -1204,7 +1312,10 @@ ip_ctloutput(so, sopt) case IP_RECVRETOPTS: case IP_RECVDSTADDR: case IP_RECVIF: + case IP_RECVTTL: +#if defined(NFAITH) && NFAITH > 0 case IP_FAITH: +#endif error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) @@ -1240,9 +1351,15 @@ ip_ctloutput(so, sopt) OPTSET(INP_RECVIF); break; + case IP_RECVTTL: + OPTSET(INP_RECVTTL); + break; + +#if defined(NFAITH) && NFAITH > 0 case IP_FAITH: OPTSET(INP_FAITH); break; +#endif } break; #undef OPTSET @@ -1293,9 +1410,9 @@ ip_ctloutput(so, sopt) struct mbuf *m; int optname; - if (error = sooptgetm(sopt, &m)) /* XXX */ + if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; - if (error = sooptmcopyin(sopt, m)) /* XXX */ + if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ break; priv = (sopt->sopt_p != NULL && suser(sopt->sopt_p->p_ucred, @@ -1336,8 +1453,11 @@ ip_ctloutput(so, sopt) case IP_RECVRETOPTS: case IP_RECVDSTADDR: case IP_RECVIF: + case IP_RECVTTL: case IP_PORTRANGE: +#if defined(NFAITH) && NFAITH > 0 case IP_FAITH: +#endif switch (sopt->sopt_name) { case IP_TOS: @@ -1366,6 +1486,10 @@ ip_ctloutput(so, sopt) optval = OPTBIT(INP_RECVIF); break; + case IP_RECVTTL: + optval = OPTBIT(INP_RECVTTL); + break; + case IP_PORTRANGE: if (inp->inp_flags & INP_HIGHPORT) optval = IP_PORTRANGE_HIGH; @@ -1375,9 +1499,11 @@ ip_ctloutput(so, sopt) optval = 0; break; +#if defined(NFAITH) && NFAITH > 0 case IP_FAITH: optval = OPTBIT(INP_FAITH); break; +#endif } error = sooptcopyout(sopt, &optval, sizeof optval); break; @@ -1395,23 +1521,16 @@ ip_ctloutput(so, sopt) case IP_IPSEC_POLICY: { struct mbuf *m = NULL; - size_t len = 0; caddr_t req = NULL; + size_t len = 0; - if (error = sooptgetm(sopt, &m)) /* XXX */ - break; - if (error = sooptmcopyin(sopt, m)) /* XXX */ - break; - if (m) { + if (m != 0) { req = mtod(m, caddr_t); len = m->m_len; } - error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); if (error == 0) - error = sooptmcopyout(sopt, m); /* XXX */ - - /* if error, m_freem called at soopt_mcopyout(). */ + error = soopt_mcopyout(sopt, m); /* XXX */ if (error == 0) m_freem(m); break; @@ -1539,6 +1658,33 @@ bad: * transmission, and one (IP_MULTICAST_TTL) totally duplicates a * standard option (IP_TTL). */ + +/* + * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. + */ +static struct ifnet * +ip_multicast_if(a, ifindexp) + struct in_addr *a; + int *ifindexp; +{ + int ifindex; + struct ifnet *ifp; + + if (ifindexp) + *ifindexp = 0; + if (ntohl(a->s_addr) >> 24 == 0) { + ifindex = ntohl(a->s_addr) & 0xffffff; + if (ifindex < 0 || if_index < ifindex) + return NULL; + ifp = ifindex2ifnet[ifindex]; + if (ifindexp) + *ifindexp = ifindex; + } else { + INADDR_TO_IFP(*a, ifp); + } + return ifp; +} + /* * Set the IP multicast options in response to user setsockopt(). */ @@ -1551,10 +1697,9 @@ ip_setmoptions(sopt, imop) int i; struct in_addr addr; struct ip_mreq mreq; - struct ifnet *ifp; + struct ifnet *ifp = NULL; struct ip_moptions *imo = *imop; - struct route ro; - struct sockaddr_in *dst; + int ifindex; int s; if (imo == NULL) { @@ -1562,17 +1707,10 @@ ip_setmoptions(sopt, imop) * No multicast option buffer attached to the pcb; * allocate one and initialize to default values. */ - imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS, - M_WAITOK); - - if (imo == NULL) - return (ENOBUFS); - *imop = imo; - imo->imo_multicast_ifp = NULL; - imo->imo_multicast_vif = -1; - imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; - imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; - imo->imo_num_memberships = 0; + error = ip_createmoptions(imop); + if (error != 0) + return error; + imo = *imop; } switch (sopt->sopt_name) { @@ -1614,13 +1752,17 @@ ip_setmoptions(sopt, imop) * it supports multicasting. */ s = splimp(); - INADDR_TO_IFP(addr, ifp); + ifp = ip_multicast_if(&addr, &ifindex); if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { splx(s); error = EADDRNOTAVAIL; break; } imo->imo_multicast_ifp = ifp; + if (ifindex) + imo->imo_multicast_addr = addr; + else + imo->imo_multicast_addr.s_addr = INADDR_ANY; splx(s); break; @@ -1681,76 +1823,8 @@ ip_setmoptions(sopt, imop) error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); if (error) break; - - if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { - error = EINVAL; - break; - } - s = splimp(); - /* - * If no interface address was provided, use the interface of - * the route to the given multicast address. - */ - if (mreq.imr_interface.s_addr == INADDR_ANY) { - bzero((caddr_t)&ro, sizeof(ro)); - dst = (struct sockaddr_in *)&ro.ro_dst; - dst->sin_len = sizeof(*dst); - dst->sin_family = AF_INET; - dst->sin_addr = mreq.imr_multiaddr; - rtalloc(&ro); - if (ro.ro_rt == NULL) { - error = EADDRNOTAVAIL; - splx(s); - break; - } - ifp = ro.ro_rt->rt_ifp; - rtfree(ro.ro_rt); - } - else { - INADDR_TO_IFP(mreq.imr_interface, ifp); - } - - /* - * See if we found an interface, and confirm that it - * supports multicast. - */ - if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { - error = EADDRNOTAVAIL; - splx(s); - break; - } - /* - * See if the membership already exists or if all the - * membership slots are full. - */ - for (i = 0; i < imo->imo_num_memberships; ++i) { - if (imo->imo_membership[i]->inm_ifp == ifp && - imo->imo_membership[i]->inm_addr.s_addr - == mreq.imr_multiaddr.s_addr) - break; - } - if (i < imo->imo_num_memberships) { - error = EADDRINUSE; - splx(s); - break; - } - if (i == IP_MAX_MEMBERSHIPS) { - error = ETOOMANYREFS; - splx(s); - break; - } - /* - * Everything looks good; add a new record to the multicast - * address list for the given interface. - */ - if ((imo->imo_membership[i] = - in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { - error = ENOBUFS; - splx(s); - break; - } - ++imo->imo_num_memberships; - splx(s); + + error = ip_addmembership(imo, &mreq); break; case IP_DROP_MEMBERSHIP: @@ -1761,54 +1835,8 @@ ip_setmoptions(sopt, imop) error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); if (error) break; - - if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { - error = EINVAL; - break; - } - - s = splimp(); - /* - * If an interface address was specified, get a pointer - * to its ifnet structure. - */ - if (mreq.imr_interface.s_addr == INADDR_ANY) - ifp = NULL; - else { - INADDR_TO_IFP(mreq.imr_interface, ifp); - if (ifp == NULL) { - error = EADDRNOTAVAIL; - splx(s); - break; - } - } - /* - * Find the membership in the membership array. - */ - for (i = 0; i < imo->imo_num_memberships; ++i) { - if ((ifp == NULL || - imo->imo_membership[i]->inm_ifp == ifp) && - imo->imo_membership[i]->inm_addr.s_addr == - mreq.imr_multiaddr.s_addr) - break; - } - if (i == imo->imo_num_memberships) { - error = EADDRNOTAVAIL; - splx(s); - break; - } - /* - * Give up the multicast address record to which the - * membership points. - */ - in_delmulti(imo->imo_membership[i]); - /* - * Remove the gap in the membership array. - */ - for (++i; i < imo->imo_num_memberships; ++i) - imo->imo_membership[i-1] = imo->imo_membership[i]; - --imo->imo_num_memberships; - splx(s); + + error = ip_dropmembership(imo, &mreq); break; default: @@ -1831,6 +1859,184 @@ ip_setmoptions(sopt, imop) return (error); } +/* + * Set the IP multicast options in response to user setsockopt(). + */ +__private_extern__ int +ip_createmoptions( + struct ip_moptions **imop) +{ + struct ip_moptions *imo; + imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS, + M_WAITOK); + + if (imo == NULL) + return (ENOBUFS); + *imop = imo; + imo->imo_multicast_ifp = NULL; + imo->imo_multicast_addr.s_addr = INADDR_ANY; + imo->imo_multicast_vif = -1; + imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; + imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; + imo->imo_num_memberships = 0; + + return 0; +} + +/* + * Add membership to an IPv4 multicast. + */ +__private_extern__ int +ip_addmembership( + struct ip_moptions *imo, + struct ip_mreq *mreq) +{ + struct route ro; + struct sockaddr_in *dst; + struct ifnet *ifp = NULL; + int error = 0; + int s = 0; + int i; + + if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) { + error = EINVAL; + return error; + } + s = splimp(); + /* + * If no interface address was provided, use the interface of + * the route to the given multicast address. + */ + if (mreq->imr_interface.s_addr == INADDR_ANY) { + bzero((caddr_t)&ro, sizeof(ro)); + dst = (struct sockaddr_in *)&ro.ro_dst; + dst->sin_len = sizeof(*dst); + dst->sin_family = AF_INET; + dst->sin_addr = mreq->imr_multiaddr; + rtalloc(&ro); + if (ro.ro_rt != NULL) { + ifp = ro.ro_rt->rt_ifp; + rtfree(ro.ro_rt); + } + else { + /* If there's no default route, try using loopback */ + mreq->imr_interface.s_addr = INADDR_LOOPBACK; + } + } + + if (ifp == NULL) { + ifp = ip_multicast_if(&mreq->imr_interface, NULL); + } + + /* + * See if we found an interface, and confirm that it + * supports multicast. + */ + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { + error = EADDRNOTAVAIL; + splx(s); + return error; + } + /* + * See if the membership already exists or if all the + * membership slots are full. + */ + for (i = 0; i < imo->imo_num_memberships; ++i) { + if (imo->imo_membership[i]->inm_ifp == ifp && + imo->imo_membership[i]->inm_addr.s_addr + == mreq->imr_multiaddr.s_addr) + break; + } + if (i < imo->imo_num_memberships) { + error = EADDRINUSE; + splx(s); + return error; + } + if (i == IP_MAX_MEMBERSHIPS) { + error = ETOOMANYREFS; + splx(s); + return error; + } + /* + * Everything looks good; add a new record to the multicast + * address list for the given interface. + */ + if ((imo->imo_membership[i] = + in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { + error = ENOBUFS; + splx(s); + return error; + } + ++imo->imo_num_memberships; + splx(s); + + return error; +} + +/* + * Drop membership of an IPv4 multicast. + */ +__private_extern__ int +ip_dropmembership( + struct ip_moptions *imo, + struct ip_mreq *mreq) +{ + int error = 0; + int s = 0; + struct ifnet* ifp = NULL; + int i; + + if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) { + error = EINVAL; + return error; + } + + s = splimp(); + /* + * If an interface address was specified, get a pointer + * to its ifnet structure. + */ + if (mreq->imr_interface.s_addr == INADDR_ANY) + ifp = NULL; + else { + ifp = ip_multicast_if(&mreq->imr_interface, NULL); + if (ifp == NULL) { + error = EADDRNOTAVAIL; + splx(s); + return error; + } + } + /* + * Find the membership in the membership array. + */ + for (i = 0; i < imo->imo_num_memberships; ++i) { + if ((ifp == NULL || + imo->imo_membership[i]->inm_ifp == ifp) && + imo->imo_membership[i]->inm_addr.s_addr == + mreq->imr_multiaddr.s_addr) + break; + } + if (i == imo->imo_num_memberships) { + error = EADDRNOTAVAIL; + splx(s); + return error; + } + /* + * Give up the multicast address record to which the + * membership points. + */ + in_delmulti(imo->imo_membership[i]); + /* + * Remove the gap in the membership array. + */ + for (++i; i < imo->imo_num_memberships; ++i) + imo->imo_membership[i-1] = imo->imo_membership[i]; + --imo->imo_num_memberships; + splx(s); + + return error; +} + /* * Return the IP multicast options in response to user getsockopt(). */ @@ -1857,7 +2063,10 @@ ip_getmoptions(sopt, imo) case IP_MULTICAST_IF: if (imo == NULL || imo->imo_multicast_ifp == NULL) addr.s_addr = INADDR_ANY; - else { + else if (imo->imo_multicast_addr.s_addr) { + /* return the value user has set */ + addr = imo->imo_multicast_addr; + } else { IFP_TO_IA(imo->imo_multicast_ifp, ia); addr.s_addr = (ia == NULL) ? INADDR_ANY : IA_SIN(ia)->sin_addr.s_addr; @@ -1905,7 +2114,8 @@ ip_freemoptions(imo) if (imo != NULL) { for (i = 0; i < imo->imo_num_memberships; ++i) - in_delmulti(imo->imo_membership[i]); + if (imo->imo_membership[i] != NULL) + in_delmulti(imo->imo_membership[i]); FREE(imo, M_IPMOPTS); } } @@ -1936,11 +2146,10 @@ ip_mloopback(ifp, m, dst, hlen) * than the interface's MTU. Can this possibly matter? */ ip = mtod(copym, struct ip *); - ip->ip_len = htons((u_short)ip->ip_len); - ip->ip_off = htons((u_short)ip->ip_off); + HTONS(ip->ip_len); + HTONS(ip->ip_off); ip->ip_sum = 0; ip->ip_sum = in_cksum(copym, hlen); - /* * NB: * It's not clear whether there are any lingering @@ -1961,24 +2170,27 @@ ip_mloopback(ifp, m, dst, hlen) #endif - /* - * Mark checksum as valid or calculate checksum for loopback. - * - * This is done this way because we have to embed the ifp of - * the interface we will send the original copy of the packet - * out on in the mbuf. ip_input will check if_hwassist of the - * embedded ifp and ignore all csum_flags if if_hwassist is 0. - * The UDP checksum has not been calculated yet. - */ - if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - if (ifp->if_hwassist) { - copym->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR | - CSUM_IP_CHECKED | CSUM_IP_VALID; - copym->m_pkthdr.csum_data = 0xffff; - } else - in_delayed_cksum(copym); - } + /* + * Mark checksum as valid or calculate checksum for loopback. + * + * This is done this way because we have to embed the ifp of + * the interface we will send the original copy of the packet + * out on in the mbuf. ip_input will check if_hwassist of the + * embedded ifp and ignore all csum_flags if if_hwassist is 0. + * The UDP checksum has not been calculated yet. + */ + if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + if (IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) { + copym->m_pkthdr.csum_flags |= + CSUM_DATA_VALID | CSUM_PSEUDO_HDR | + CSUM_IP_CHECKED | CSUM_IP_VALID; + copym->m_pkthdr.csum_data = 0xffff; + } else { + NTOHS(ip->ip_len); + in_delayed_cksum(copym); + HTONS(ip->ip_len); + } + } /* @@ -1996,8 +2208,8 @@ ip_mloopback(ifp, m, dst, hlen) * to make the loopback driver compliant with the data link * requirements. */ - if (lo_dl_tag) - { copym->m_pkthdr.rcvif = ifp; + if (lo_dl_tag) { + copym->m_pkthdr.rcvif = ifp; dlil_output(lo_dl_tag, copym, 0, (struct sockaddr *) dst, 0); } else { printf("Warning: ip_output call to dlil_find_dltag failed!\n");