X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/c910b4d9d2451126ae3917b931cd4390c11e1d52..316670eb35587141e969394ae8537d66b9211e80:/bsd/netinet/ip_icmp.c diff --git a/bsd/netinet/ip_icmp.c b/bsd/netinet/ip_icmp.c index 995ca8346..576067247 100644 --- a/bsd/netinet/ip_icmp.c +++ b/bsd/netinet/ip_icmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -69,12 +69,15 @@ #include #include #include +#include #include #include #include #include #include +#include + #include #include @@ -96,11 +99,6 @@ #if IPSEC #include #include -#endif - -#if defined(NFAITH) && NFAITH > 0 -#include "faith.h" -#include #endif /* XXX This one should go in sys/mbuf.h. It is used to avoid that @@ -114,56 +112,67 @@ #include #endif /* MAC_NET */ + /* * ICMP routines: error generation, receive packet processing, and * routines to turnaround packets back to the originator, and * host table maintenance routines. */ -static struct icmpstat icmpstat; -SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD, +struct icmpstat icmpstat; +SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, &icmpstat, icmpstat, ""); static int icmpmaskrepl = 0; -SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW, +SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW | CTLFLAG_LOCKED, &icmpmaskrepl, 0, ""); static int icmptimestamp = 0; -SYSCTL_INT(_net_inet_icmp, ICMPCTL_TIMESTAMP, timestamp, CTLFLAG_RW, +SYSCTL_INT(_net_inet_icmp, ICMPCTL_TIMESTAMP, timestamp, CTLFLAG_RW | CTLFLAG_LOCKED, &icmptimestamp, 0, ""); static int drop_redirect = 0; -SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, +SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW | CTLFLAG_LOCKED, &drop_redirect, 0, ""); static int log_redirect = 0; -SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW, +SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW | CTLFLAG_LOCKED, &log_redirect, 0, ""); #if ICMP_BANDLIM - + +/* Default values in case CONFIG_ICMP_BANDLIM is not defined in the MASTER file */ +#ifndef CONFIG_ICMP_BANDLIM +#if !CONFIG_EMBEDDED +#define CONFIG_ICMP_BANDLIM 250 +#else /* CONFIG_EMBEDDED */ +#define CONFIG_ICMP_BANDLIM 50 +#endif /* CONFIG_EMBEDDED */ +#endif /* CONFIG_ICMP_BANDLIM */ + /* * ICMP error-response bandwidth limiting sysctl. If not enabled, sysctl * variable content is -1 and read-only. */ -static int icmplim = 250; -SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW, +static int icmplim = CONFIG_ICMP_BANDLIM; +SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW | CTLFLAG_LOCKED, &icmplim, 0, ""); -#else + +#else /* ICMP_BANDLIM */ static int icmplim = -1; -SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD, +SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD | CTLFLAG_LOCKED, &icmplim, 0, ""); -#endif +#endif /* ICMP_BANDLIM */ /* * ICMP broadcast echo sysctl */ static int icmpbmcastecho = 1; -SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW, +SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW | CTLFLAG_LOCKED, &icmpbmcastecho, 0, ""); @@ -173,7 +182,6 @@ int icmpprintfs = 0; static void icmp_reflect(struct mbuf *); static void icmp_send(struct mbuf *, struct mbuf *); -static int ip_next_mtu(int, int); extern struct protosw inetsw[]; @@ -187,14 +195,19 @@ icmp_error( int type, int code, n_long dest, - struct ifnet *destifp) + u_int32_t nextmtu) { struct ip *oip = mtod(n, struct ip *), *nip; - unsigned oiplen = IP_VHL_HL(oip->ip_vhl) << 2; + unsigned oiplen; struct icmp *icp; struct mbuf *m; unsigned icmplen; + /* Expect 32-bit aligned data pointer on strict-align platforms */ + MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(n); + + oiplen = IP_VHL_HL(oip->ip_vhl) << 2; + #if ICMPPRINTFS if (icmpprintfs) printf("icmp_error(%p, %x, %d)\n", oip, type, code); @@ -210,7 +223,8 @@ icmp_error( goto freeit; if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && n->m_len >= oiplen + ICMP_MINLEN && - !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) { + !ICMP_INFOTYPE(((struct icmp *)(void *)((caddr_t)oip + oiplen))-> + icmp_type)) { icmpstat.icps_oldicmp++; goto freeit; } @@ -257,8 +271,8 @@ icmp_error( icp->icmp_pptr = code; code = 0; } else if (type == ICMP_UNREACH && - code == ICMP_UNREACH_NEEDFRAG && destifp) { - icp->icmp_nextmtu = htons(destifp->if_mtu); + code == ICMP_UNREACH_NEEDFRAG && nextmtu != 0) { + icp->icmp_nextmtu = htons(nextmtu); } } @@ -269,9 +283,10 @@ icmp_error( /* * Convert fields to network representation. */ +#if BYTE_ORDER != BIG_ENDIAN HTONS(nip->ip_len); HTONS(nip->ip_off); - +#endif /* * Now, copy old ip header (without options) * in front of icmp message. @@ -309,12 +324,17 @@ icmp_input(struct mbuf *m, int hlen) { struct icmp *icp; struct ip *ip = mtod(m, struct ip *); - int icmplen = ip->ip_len; + int icmplen; int i; struct in_ifaddr *ia; void (*ctlfunc)(int, struct sockaddr *, void *); int code; + /* Expect 32-bit aligned data pointer on strict-align platforms */ + MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); + + icmplen = ip->ip_len; + /* * Locate icmp structure in mbuf, and check * that not corrupted and of at least minimum length. @@ -350,21 +370,6 @@ icmp_input(struct mbuf *m, int hlen) m->m_len += hlen; m->m_data -= hlen; -#if defined(NFAITH) && 0 < NFAITH - if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) { - /* - * Deliver very specific ICMP type only. - */ - switch (icp->icmp_type) { - case ICMP_UNREACH: - case ICMP_TIMXCEED: - break; - default: - goto freeit; - } - } -#endif - #if ICMPPRINTFS if (icmpprintfs) printf("icmp_input, type %d code %d\n", icp->icmp_type, @@ -444,7 +449,11 @@ icmp_input(struct mbuf *m, int hlen) icmpstat.icps_badlen++; goto freeit; } + +#if BYTE_ORDER != BIG_ENDIAN NTOHS(icp->icmp_ip.ip_len); +#endif + /* Discard ICMP's in response to multicast packets */ if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr))) goto badcode; @@ -453,52 +462,7 @@ icmp_input(struct mbuf *m, int hlen) printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; -#if 1 - /* - * MTU discovery: - * If we got a needfrag and there is a host route to the - * original destination, and the MTU is not locked, then - * set the MTU in the route to the suggested new value - * (if given) and then notify as usual. The ULPs will - * notice that the MTU has changed and adapt accordingly. - * If no new MTU was suggested, then we guess a new one - * less than the current value. If the new MTU is - * unreasonably small (defined by sysctl tcp_minmss), then - * we reset the MTU to the interface value and enable the - * lock bit, indicating that we are no longer doing MTU - * discovery. - */ - if (code == PRC_MSGSIZE) { - struct rtentry *rt; - int mtu; - - rt = rtalloc1((struct sockaddr *)&icmpsrc, 0, - RTF_CLONING | RTF_PRCLONING); - if (rt && (rt->rt_flags & RTF_HOST) - && !(rt->rt_rmx.rmx_locks & RTV_MTU)) { - mtu = ntohs(icp->icmp_nextmtu); - if (!mtu) - mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu, - 1); -#if DEBUG_MTUDISC - printf("MTU for %s reduced to %d\n", - inet_ntop(AF_INET, &icmpsrc.sin_addr, ipv4str, - sizeof(ipv4str)), - mtu); -#endif - if (mtu < max(296, (tcp_minmss + sizeof(struct tcpiphdr)))) { - /* rt->rt_rmx.rmx_mtu = - rt->rt_ifp->if_mtu; */ - rt->rt_rmx.rmx_locks |= RTV_MTU; - } else if (rt->rt_rmx.rmx_mtu > mtu) { - rt->rt_rmx.rmx_mtu = mtu; - } - } - if (rt) - rtfree(rt); - } -#endif /* * XXX if the packet contains [IPv4 AH TCP], we can't make a * notification to TCP layer. @@ -552,7 +516,6 @@ icmp_input(struct mbuf *m, int hlen) goto reflect; case ICMP_MASKREQ: -#define satosin(sa) ((struct sockaddr_in *)(sa)) if (icmpmaskrepl == 0) break; /* @@ -575,8 +538,10 @@ icmp_input(struct mbuf *m, int hlen) (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif); if (ia == 0) break; + IFA_LOCK(&ia->ia_ifa); if (ia->ia_ifp == 0) { - ifafree(&ia->ia_ifa); + IFA_UNLOCK(&ia->ia_ifa); + IFA_REMREF(&ia->ia_ifa); ia = NULL; break; } @@ -588,7 +553,8 @@ icmp_input(struct mbuf *m, int hlen) else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr; } - ifafree(&ia->ia_ifa); + IFA_UNLOCK(&ia->ia_ifa); + IFA_REMREF(&ia->ia_ifa); reflect: ip->ip_len += hlen; /* since ip_input deducts this */ icmpstat.icps_reflect++; @@ -598,7 +564,7 @@ reflect: case ICMP_REDIRECT: if (log_redirect) { - u_long src, dst, gw; + u_int32_t src, dst, gw; src = ntohl(ip->ip_src.s_addr); dst = ntohl(icp->icmp_ip.ip_dst.s_addr); @@ -698,16 +664,33 @@ icmp_reflect(struct mbuf *m) * or anonymous), use the address which corresponds * to the incoming interface. */ - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); + TAILQ_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) { + IFA_LOCK(&ia->ia_ifa); + if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) { + IFA_ADDREF_LOCKED(&ia->ia_ifa); + IFA_UNLOCK(&ia->ia_ifa); + goto match; + } + IFA_UNLOCK(&ia->ia_ifa); + } + /* + * Slow path; check for broadcast addresses. Find a source + * IP address to use when replying to the broadcast request; + * let IP handle the source interface selection work. + */ for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { - if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) - break; + IFA_LOCK(&ia->ia_ifa); if (ia->ia_ifp && (ia->ia_ifp->if_flags & IFF_BROADCAST) && - t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr) + t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr) { + IFA_ADDREF_LOCKED(&ia->ia_ifa); + IFA_UNLOCK(&ia->ia_ifa); break; + } + IFA_UNLOCK(&ia->ia_ifa); } - if (ia) - ifaref(&ia->ia_ifa); +match: + lck_rw_done(in_ifaddr_rwlock); icmpdst.sin_addr = t; if ((ia == (struct in_ifaddr *)0) && m->m_pkthdr.rcvif) ia = (struct in_ifaddr *)ifaof_ifpforaddr( @@ -717,22 +700,25 @@ icmp_reflect(struct mbuf *m) * and was received on an interface with no IP address. */ if (ia == (struct in_ifaddr *)0) { + lck_rw_lock_shared(in_ifaddr_rwlock); ia = in_ifaddrhead.tqh_first; if (ia == (struct in_ifaddr *)0) {/* no address yet, bail out */ + lck_rw_done(in_ifaddr_rwlock); m_freem(m); - lck_mtx_unlock(rt_mtx); goto done; } - ifaref(&ia->ia_ifa); + IFA_ADDREF(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); } - lck_mtx_unlock(rt_mtx); #if CONFIG_MACF_NET mac_netinet_icmp_reply(m); #endif + IFA_LOCK_SPIN(&ia->ia_ifa); t = IA_SIN(ia)->sin_addr; + IFA_UNLOCK(&ia->ia_ifa); ip->ip_src = t; ip->ip_ttl = ip_defttl; - ifafree(&ia->ia_ifa); + IFA_REMREF(&ia->ia_ifa); ia = NULL; if (optlen > 0) { @@ -825,10 +811,13 @@ icmp_send(struct mbuf *m, struct mbuf *opts) int hlen; struct icmp *icp; struct route ro; - struct ip_out_args ipoa = { IFSCOPE_NONE }; + struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, + IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR }; - if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) - ipoa.ipoa_ifscope = m->m_pkthdr.rcvif->if_index; + if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) { + ipoa.ipoa_boundif = m->m_pkthdr.rcvif->if_index; + ipoa.ipoa_flags |= IPOAF_BOUND_IF; + } hlen = IP_VHL_HL(ip->ip_vhl) << 2; m->m_data += hlen; @@ -853,17 +842,15 @@ icmp_send(struct mbuf *m, struct mbuf *opts) #endif bzero(&ro, sizeof ro); (void) ip_output(m, opts, &ro, IP_OUTARGS, NULL, &ipoa); - if (ro.ro_rt) { + if (ro.ro_rt) rtfree(ro.ro_rt); - ro.ro_rt = NULL; - } } n_time iptime(void) { struct timeval atv; - u_long t; + u_int32_t t; microtime(&atv); t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; @@ -876,7 +863,7 @@ iptime(void) * given current value MTU. If DIR is less than zero, a larger plateau * is returned; otherwise, a smaller value is returned. */ -static int +int ip_next_mtu(int mtu, int dir) { static int mtutab[] = { @@ -999,8 +986,8 @@ badport_bandlim(int which) #include extern struct domain inetdomain; -extern u_long rip_sendspace; -extern u_long rip_recvspace; +extern u_int32_t rip_sendspace; +extern u_int32_t rip_recvspace; extern struct inpcbinfo ripcbinfo; int rip_abort(struct socket *); @@ -1067,6 +1054,7 @@ icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt) case IP_RECVDSTADDR: case IP_RETOPTS: case IP_MULTICAST_IF: + case IP_MULTICAST_IFINDEX: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: @@ -1075,18 +1063,16 @@ icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt) case IP_PORTRANGE: case IP_RECVIF: case IP_IPSEC_POLICY: -#if defined(NFAITH) && NFAITH > 0 - case IP_FAITH: -#endif case IP_STRIPHDR: case IP_RECVTTL: case IP_BOUND_IF: #if CONFIG_FORCE_OUT_IFP - case IP_FORCE_OUT_IFP: + case IP_FORCE_OUT_IFP: #endif + case IP_NO_IFT_CELLULAR: error = rip_ctloutput(so, sopt); break; - + default: error = EINVAL; break; @@ -1107,6 +1093,8 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n int icmplen; if ((inp->inp_flags & INP_HDRINCL) != 0) { + /* Expect 32-bit aligned data pointer on strict-align platforms */ + MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); /* * This is not raw IP, we liberal only for fields TOS, id and TTL */ @@ -1123,7 +1111,7 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n if (hlen < 20 || hlen > 40 || ip->ip_len != m->m_pkthdr.len) goto bad; /* Bogus fragments can tie up peer resources */ - if (ip->ip_off != 0) + if ((ip->ip_off & ~IP_DF) != 0) goto bad; /* Allow only ICMP even for user provided IP header */ if (ip->ip_p != IPPROTO_ICMP) @@ -1131,28 +1119,33 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n /* To prevent spoofing, specified source address must be one of ours */ if (ip->ip_src.s_addr != INADDR_ANY) { socket_unlock(so, 0); - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); if (TAILQ_EMPTY(&in_ifaddrhead)) { - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); socket_lock(so, 0); goto bad; } - TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { - if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_src.s_addr) { - lck_mtx_unlock(rt_mtx); + TAILQ_FOREACH(ia, INADDR_HASH(ip->ip_src.s_addr), + ia_hash) { + IFA_LOCK(&ia->ia_ifa); + if (IA_SIN(ia)->sin_addr.s_addr == + ip->ip_src.s_addr) { + IFA_UNLOCK(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); socket_lock(so, 0); goto ours; } + IFA_UNLOCK(&ia->ia_ifa); } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); socket_lock(so, 0); goto bad; } ours: /* Do not trust we got a valid checksum */ ip->ip_sum = 0; - - icp = (struct icmp *)(((char *)m->m_data) + hlen); + + icp = (struct icmp *)(void *)(((char *)m->m_data) + hlen); icmplen = m->m_pkthdr.len - hlen; } else { if ((icmplen = m->m_pkthdr.len) < ICMP_MINLEN) {