/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
+#include <sys/mcache.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
+#include <machine/endian.h>
+
#include <net/if.h>
#include <net/route.h>
#if IPSEC
#include <netinet6/ipsec.h>
#include <netkey/key.h>
-#endif
-
-#if defined(NFAITH) && NFAITH > 0
-#include "faith.h"
-#include <net/if_types.h>
#endif
/* XXX This one should go in sys/mbuf.h. It is used to avoid that
#include <security/mac_framework.h>
#endif /* MAC_NET */
+
/*
* ICMP routines: error generation, receive packet processing, and
* routines to turnaround packets back to the originator, and
* host table maintenance routines.
*/
-static struct icmpstat icmpstat;
-SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD,
+struct icmpstat icmpstat;
+SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
&icmpstat, icmpstat, "");
static int icmpmaskrepl = 0;
-SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW | CTLFLAG_LOCKED,
&icmpmaskrepl, 0, "");
static int icmptimestamp = 0;
-SYSCTL_INT(_net_inet_icmp, ICMPCTL_TIMESTAMP, timestamp, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_TIMESTAMP, timestamp, CTLFLAG_RW | CTLFLAG_LOCKED,
&icmptimestamp, 0, "");
static int drop_redirect = 0;
-SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW | CTLFLAG_LOCKED,
&drop_redirect, 0, "");
static int log_redirect = 0;
-SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW | CTLFLAG_LOCKED,
&log_redirect, 0, "");
#if ICMP_BANDLIM
-
+
+/* Default values in case CONFIG_ICMP_BANDLIM is not defined in the MASTER file */
+#ifndef CONFIG_ICMP_BANDLIM
+#if !CONFIG_EMBEDDED
+#define CONFIG_ICMP_BANDLIM 250
+#else /* CONFIG_EMBEDDED */
+#define CONFIG_ICMP_BANDLIM 50
+#endif /* CONFIG_EMBEDDED */
+#endif /* CONFIG_ICMP_BANDLIM */
+
/*
* ICMP error-response bandwidth limiting sysctl. If not enabled, sysctl
* variable content is -1 and read-only.
*/
-static int icmplim = 250;
-SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
+static int icmplim = CONFIG_ICMP_BANDLIM;
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW | CTLFLAG_LOCKED,
&icmplim, 0, "");
-#else
+
+#else /* ICMP_BANDLIM */
static int icmplim = -1;
-SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD | CTLFLAG_LOCKED,
&icmplim, 0, "");
-#endif
+#endif /* ICMP_BANDLIM */
/*
* ICMP broadcast echo sysctl
*/
static int icmpbmcastecho = 1;
-SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW | CTLFLAG_LOCKED,
&icmpbmcastecho, 0, "");
static void icmp_reflect(struct mbuf *);
static void icmp_send(struct mbuf *, struct mbuf *);
-static int ip_next_mtu(int, int);
extern struct protosw inetsw[];
int type,
int code,
n_long dest,
- struct ifnet *destifp)
+ u_int32_t nextmtu)
{
struct ip *oip = mtod(n, struct ip *), *nip;
- unsigned oiplen = IP_VHL_HL(oip->ip_vhl) << 2;
+ unsigned oiplen;
struct icmp *icp;
struct mbuf *m;
unsigned icmplen;
+ /* Expect 32-bit aligned data pointer on strict-align platforms */
+ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(n);
+
+ oiplen = IP_VHL_HL(oip->ip_vhl) << 2;
+
#if ICMPPRINTFS
if (icmpprintfs)
printf("icmp_error(%p, %x, %d)\n", oip, type, code);
goto freeit;
if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
n->m_len >= oiplen + ICMP_MINLEN &&
- !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
+ !ICMP_INFOTYPE(((struct icmp *)(void *)((caddr_t)oip + oiplen))->
+ icmp_type)) {
icmpstat.icps_oldicmp++;
goto freeit;
}
icp->icmp_pptr = code;
code = 0;
} else if (type == ICMP_UNREACH &&
- code == ICMP_UNREACH_NEEDFRAG && destifp) {
- icp->icmp_nextmtu = htons(destifp->if_mtu);
+ code == ICMP_UNREACH_NEEDFRAG && nextmtu != 0) {
+ icp->icmp_nextmtu = htons(nextmtu);
}
}
/*
* Convert fields to network representation.
*/
+#if BYTE_ORDER != BIG_ENDIAN
HTONS(nip->ip_len);
HTONS(nip->ip_off);
-
+#endif
/*
* Now, copy old ip header (without options)
* in front of icmp message.
{
struct icmp *icp;
struct ip *ip = mtod(m, struct ip *);
- int icmplen = ip->ip_len;
+ int icmplen;
int i;
struct in_ifaddr *ia;
void (*ctlfunc)(int, struct sockaddr *, void *);
int code;
+ /* Expect 32-bit aligned data pointer on strict-align platforms */
+ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
+ icmplen = ip->ip_len;
+
/*
* Locate icmp structure in mbuf, and check
* that not corrupted and of at least minimum length.
m->m_len += hlen;
m->m_data -= hlen;
-#if defined(NFAITH) && 0 < NFAITH
- if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
- /*
- * Deliver very specific ICMP type only.
- */
- switch (icp->icmp_type) {
- case ICMP_UNREACH:
- case ICMP_TIMXCEED:
- break;
- default:
- goto freeit;
- }
- }
-#endif
-
#if ICMPPRINTFS
if (icmpprintfs)
printf("icmp_input, type %d code %d\n", icp->icmp_type,
icmpstat.icps_badlen++;
goto freeit;
}
+
+#if BYTE_ORDER != BIG_ENDIAN
NTOHS(icp->icmp_ip.ip_len);
+#endif
+
/* Discard ICMP's in response to multicast packets */
if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
goto badcode;
printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
#endif
icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
-#if 1
- /*
- * MTU discovery:
- * If we got a needfrag and there is a host route to the
- * original destination, and the MTU is not locked, then
- * set the MTU in the route to the suggested new value
- * (if given) and then notify as usual. The ULPs will
- * notice that the MTU has changed and adapt accordingly.
- * If no new MTU was suggested, then we guess a new one
- * less than the current value. If the new MTU is
- * unreasonably small (defined by sysctl tcp_minmss), then
- * we reset the MTU to the interface value and enable the
- * lock bit, indicating that we are no longer doing MTU
- * discovery.
- */
- if (code == PRC_MSGSIZE) {
- struct rtentry *rt;
- int mtu;
-
- rt = rtalloc1((struct sockaddr *)&icmpsrc, 0,
- RTF_CLONING | RTF_PRCLONING);
- if (rt && (rt->rt_flags & RTF_HOST)
- && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
- mtu = ntohs(icp->icmp_nextmtu);
- if (!mtu)
- mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu,
- 1);
-#if DEBUG_MTUDISC
- printf("MTU for %s reduced to %d\n",
- inet_ntop(AF_INET, &icmpsrc.sin_addr, ipv4str,
- sizeof(ipv4str)),
- mtu);
-#endif
- if (mtu < max(296, (tcp_minmss + sizeof(struct tcpiphdr)))) {
- /* rt->rt_rmx.rmx_mtu =
- rt->rt_ifp->if_mtu; */
- rt->rt_rmx.rmx_locks |= RTV_MTU;
- } else if (rt->rt_rmx.rmx_mtu > mtu) {
- rt->rt_rmx.rmx_mtu = mtu;
- }
- }
- if (rt)
- rtfree(rt);
- }
-#endif
/*
* XXX if the packet contains [IPv4 AH TCP], we can't make a
* notification to TCP layer.
goto reflect;
case ICMP_MASKREQ:
-#define satosin(sa) ((struct sockaddr_in *)(sa))
if (icmpmaskrepl == 0)
break;
/*
(struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
if (ia == 0)
break;
+ IFA_LOCK(&ia->ia_ifa);
if (ia->ia_ifp == 0) {
- ifafree(&ia->ia_ifa);
+ IFA_UNLOCK(&ia->ia_ifa);
+ IFA_REMREF(&ia->ia_ifa);
ia = NULL;
break;
}
else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
}
- ifafree(&ia->ia_ifa);
+ IFA_UNLOCK(&ia->ia_ifa);
+ IFA_REMREF(&ia->ia_ifa);
reflect:
ip->ip_len += hlen; /* since ip_input deducts this */
icmpstat.icps_reflect++;
case ICMP_REDIRECT:
if (log_redirect) {
- u_long src, dst, gw;
+ u_int32_t src, dst, gw;
src = ntohl(ip->ip_src.s_addr);
dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
* or anonymous), use the address which corresponds
* to the incoming interface.
*/
- lck_mtx_lock(rt_mtx);
+ lck_rw_lock_shared(in_ifaddr_rwlock);
+ TAILQ_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) {
+ IFA_LOCK(&ia->ia_ifa);
+ if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) {
+ IFA_ADDREF_LOCKED(&ia->ia_ifa);
+ IFA_UNLOCK(&ia->ia_ifa);
+ goto match;
+ }
+ IFA_UNLOCK(&ia->ia_ifa);
+ }
+ /*
+ * Slow path; check for broadcast addresses. Find a source
+ * IP address to use when replying to the broadcast request;
+ * let IP handle the source interface selection work.
+ */
for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) {
- if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
- break;
+ IFA_LOCK(&ia->ia_ifa);
if (ia->ia_ifp && (ia->ia_ifp->if_flags & IFF_BROADCAST) &&
- t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr)
+ t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr) {
+ IFA_ADDREF_LOCKED(&ia->ia_ifa);
+ IFA_UNLOCK(&ia->ia_ifa);
break;
+ }
+ IFA_UNLOCK(&ia->ia_ifa);
}
- if (ia)
- ifaref(&ia->ia_ifa);
+match:
+ lck_rw_done(in_ifaddr_rwlock);
icmpdst.sin_addr = t;
if ((ia == (struct in_ifaddr *)0) && m->m_pkthdr.rcvif)
ia = (struct in_ifaddr *)ifaof_ifpforaddr(
* and was received on an interface with no IP address.
*/
if (ia == (struct in_ifaddr *)0) {
+ lck_rw_lock_shared(in_ifaddr_rwlock);
ia = in_ifaddrhead.tqh_first;
if (ia == (struct in_ifaddr *)0) {/* no address yet, bail out */
+ lck_rw_done(in_ifaddr_rwlock);
m_freem(m);
- lck_mtx_unlock(rt_mtx);
goto done;
}
- ifaref(&ia->ia_ifa);
+ IFA_ADDREF(&ia->ia_ifa);
+ lck_rw_done(in_ifaddr_rwlock);
}
- lck_mtx_unlock(rt_mtx);
#if CONFIG_MACF_NET
mac_netinet_icmp_reply(m);
#endif
+ IFA_LOCK_SPIN(&ia->ia_ifa);
t = IA_SIN(ia)->sin_addr;
+ IFA_UNLOCK(&ia->ia_ifa);
ip->ip_src = t;
ip->ip_ttl = ip_defttl;
- ifafree(&ia->ia_ifa);
+ IFA_REMREF(&ia->ia_ifa);
ia = NULL;
if (optlen > 0) {
int hlen;
struct icmp *icp;
struct route ro;
- struct ip_out_args ipoa = { IFSCOPE_NONE };
+ struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 },
+ IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR };
- if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL)
- ipoa.ipoa_ifscope = m->m_pkthdr.rcvif->if_index;
+ if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) {
+ ipoa.ipoa_boundif = m->m_pkthdr.rcvif->if_index;
+ ipoa.ipoa_flags |= IPOAF_BOUND_IF;
+ }
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
m->m_data += hlen;
#endif
bzero(&ro, sizeof ro);
(void) ip_output(m, opts, &ro, IP_OUTARGS, NULL, &ipoa);
- if (ro.ro_rt) {
+ if (ro.ro_rt)
rtfree(ro.ro_rt);
- ro.ro_rt = NULL;
- }
}
n_time
iptime(void)
{
struct timeval atv;
- u_long t;
+ u_int32_t t;
microtime(&atv);
t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
* given current value MTU. If DIR is less than zero, a larger plateau
* is returned; otherwise, a smaller value is returned.
*/
-static int
+int
ip_next_mtu(int mtu, int dir)
{
static int mtutab[] = {
#include <netinet/in_pcb.h>
extern struct domain inetdomain;
-extern u_long rip_sendspace;
-extern u_long rip_recvspace;
+extern u_int32_t rip_sendspace;
+extern u_int32_t rip_recvspace;
extern struct inpcbinfo ripcbinfo;
int rip_abort(struct socket *);
case IP_RECVDSTADDR:
case IP_RETOPTS:
case IP_MULTICAST_IF:
+ case IP_MULTICAST_IFINDEX:
case IP_MULTICAST_TTL:
case IP_MULTICAST_LOOP:
case IP_ADD_MEMBERSHIP:
case IP_PORTRANGE:
case IP_RECVIF:
case IP_IPSEC_POLICY:
-#if defined(NFAITH) && NFAITH > 0
- case IP_FAITH:
-#endif
case IP_STRIPHDR:
case IP_RECVTTL:
case IP_BOUND_IF:
#if CONFIG_FORCE_OUT_IFP
- case IP_FORCE_OUT_IFP:
+ case IP_FORCE_OUT_IFP:
#endif
+ case IP_NO_IFT_CELLULAR:
error = rip_ctloutput(so, sopt);
break;
-
+
default:
error = EINVAL;
break;
int icmplen;
if ((inp->inp_flags & INP_HDRINCL) != 0) {
+ /* Expect 32-bit aligned data pointer on strict-align platforms */
+ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
/*
* This is not raw IP, we liberal only for fields TOS, id and TTL
*/
if (hlen < 20 || hlen > 40 || ip->ip_len != m->m_pkthdr.len)
goto bad;
/* Bogus fragments can tie up peer resources */
- if (ip->ip_off != 0)
+ if ((ip->ip_off & ~IP_DF) != 0)
goto bad;
/* Allow only ICMP even for user provided IP header */
if (ip->ip_p != IPPROTO_ICMP)
/* To prevent spoofing, specified source address must be one of ours */
if (ip->ip_src.s_addr != INADDR_ANY) {
socket_unlock(so, 0);
- lck_mtx_lock(rt_mtx);
+ lck_rw_lock_shared(in_ifaddr_rwlock);
if (TAILQ_EMPTY(&in_ifaddrhead)) {
- lck_mtx_unlock(rt_mtx);
+ lck_rw_done(in_ifaddr_rwlock);
socket_lock(so, 0);
goto bad;
}
- TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
- if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_src.s_addr) {
- lck_mtx_unlock(rt_mtx);
+ TAILQ_FOREACH(ia, INADDR_HASH(ip->ip_src.s_addr),
+ ia_hash) {
+ IFA_LOCK(&ia->ia_ifa);
+ if (IA_SIN(ia)->sin_addr.s_addr ==
+ ip->ip_src.s_addr) {
+ IFA_UNLOCK(&ia->ia_ifa);
+ lck_rw_done(in_ifaddr_rwlock);
socket_lock(so, 0);
goto ours;
}
+ IFA_UNLOCK(&ia->ia_ifa);
}
- lck_mtx_unlock(rt_mtx);
+ lck_rw_done(in_ifaddr_rwlock);
socket_lock(so, 0);
goto bad;
}
ours:
/* Do not trust we got a valid checksum */
ip->ip_sum = 0;
-
- icp = (struct icmp *)(((char *)m->m_data) + hlen);
+
+ icp = (struct icmp *)(void *)(((char *)m->m_data) + hlen);
icmplen = m->m_pkthdr.len - hlen;
} else {
if ((icmplen = m->m_pkthdr.len) < ICMP_MINLEN) {