/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/socketvar.h>
#include <kern/locks.h>
#include <sys/sysctl.h>
+#include <sys/mcache.h>
#include <machine/endian.h>
+#include <pexpert/pexpert.h>
#include <net/if.h>
#include <net/if_dl.h>
+#include <net/if_types.h>
#include <net/route.h>
+#include <net/ntstat.h>
+#include <net/net_osdep.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <security/mac_framework.h>
#endif
-#include "faith.h"
-
#include <net/dlil.h>
#include <sys/kdebug.h>
#include <libkern/OSAtomic.h>
#include <netinet/ip_fw.h>
#include <netinet/ip_divert.h>
+#include <mach/sdt.h>
#if DUMMYNET
#include <netinet/ip_dummynet.h>
(ntohl(a.s_addr))&0xFF);
#endif
-
u_short ip_id;
static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
-static struct ifnet *ip_multicast_if(struct in_addr *, int *);
static void ip_mloopback(struct ifnet *, struct mbuf *,
struct sockaddr_in *, int);
-static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
-static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
+static void imo_trace(struct ip_moptions *, int);
static void ip_out_cksum_stats(int, u_int32_t);
static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int);
-static void ip_bindif(struct inpcb *, unsigned int);
-int ip_createmoptions(struct ip_moptions **imop);
-int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
-int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
int ip_optcopy(struct ip *, struct ip *);
void in_delayed_cksum_offset(struct mbuf *, int );
void in_cksum_offset(struct mbuf* , size_t );
-extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
-
extern struct protosw inetsw[];
extern struct ip_linklocal_stat ip_linklocal_stat;
#endif
static int ip_maxchainsent = 0;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW | CTLFLAG_LOCKED,
&ip_maxchainsent, 0, "use dlil_output_list");
#if DEBUG
static int forge_ce = 0;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW | CTLFLAG_LOCKED,
&forge_ce, 0, "Forge ECN CE");
#endif /* DEBUG */
static int ip_select_srcif_debug = 0;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
&ip_select_srcif_debug, 0, "log source interface selection debug info");
+#define IMO_TRACE_HIST_SIZE 32 /* size of trace history */
+
+/* For gdb */
+__private_extern__ unsigned int imo_trace_hist_size = IMO_TRACE_HIST_SIZE;
+
+struct ip_moptions_dbg {
+ struct ip_moptions imo; /* ip_moptions */
+ u_int16_t imo_refhold_cnt; /* # of IMO_ADDREF */
+ u_int16_t imo_refrele_cnt; /* # of IMO_REMREF */
+ /*
+ * Alloc and free callers.
+ */
+ ctrace_t imo_alloc;
+ ctrace_t imo_free;
+ /*
+ * Circular lists of IMO_ADDREF and IMO_REMREF callers.
+ */
+ ctrace_t imo_refhold[IMO_TRACE_HIST_SIZE];
+ ctrace_t imo_refrele[IMO_TRACE_HIST_SIZE];
+};
+
+#if DEBUG
+static unsigned int imo_debug = 1; /* debugging (enabled) */
+#else
+static unsigned int imo_debug; /* debugging (disabled) */
+#endif /* !DEBUG */
+static unsigned int imo_size; /* size of zone element */
+static struct zone *imo_zone; /* zone for ip_moptions */
+
+#define IMO_ZONE_MAX 64 /* maximum elements in zone */
+#define IMO_ZONE_NAME "ip_moptions" /* zone name */
+
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
* ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
* key_spdacquire:??? [IPSEC]
* ipsec4_output:??? [IPSEC]
- * <fr_checkp>:??? [firewall]
* ip_dn_io_ptr:??? [dummynet]
* dlil_output:??? [DLIL]
* dlil_output_list:??? [DLIL]
struct route *ro,
int flags,
struct ip_moptions *imo,
- struct ip_out_args *ipoa
- )
+ struct ip_out_args *ipoa)
{
struct ip *ip;
struct ifnet *ifp = NULL;
- struct mbuf *m = m0, **mppn = NULL;
+ struct mbuf *m = m0, *prevnxt = NULL, **mppn = &prevnxt;
int hlen = sizeof (struct ip);
- int len = 0, off, error = 0;
+ int len = 0, error = 0;
struct sockaddr_in *dst = NULL;
struct in_ifaddr *ia = NULL, *src_ia = NULL;
int isbroadcast, sw_csum;
struct in_addr pkt_dst;
+ struct ipf_pktopts *ippo = NULL, ipf_pktopts;
#if IPSEC
- struct route iproute;
+ struct ipsec_output_state ipsec_state;
+ struct route *ipsec_saved_route = NULL;
struct socket *so = NULL;
struct secpolicy *sp = NULL;
#endif
int fwd_rewrite_src = 0;
#endif
#if IPFIREWALL
+ int off;
+ struct sockaddr_in *next_hop_from_ipfwd_tag = NULL;
+#endif
+#if IPFIREWALL || DUMMYNET
struct ip_fw_args args;
+ struct m_tag *tag;
#endif
int didfilter = 0;
ipfilter_t inject_filter_ref = 0;
- struct m_tag *tag;
+#if DUMMYNET
struct route saved_route;
struct ip_out_args saved_ipoa;
+ struct sockaddr_in dst_buf;
+#endif /* DUMMYNET */
struct mbuf * packetlist;
int pktcnt = 0, tso = 0;
- unsigned int ifscope;
- boolean_t select_srcif;
+ u_int32_t bytecnt = 0;
+ unsigned int ifscope = IFSCOPE_NONE;
+ unsigned int nocell = 0;
+ boolean_t select_srcif, srcbound;
+ struct flowadv *adv = NULL;
KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
+#if IPSEC
+ bzero(&ipsec_state, sizeof(ipsec_state));
+#endif /* IPSEC */
+
packetlist = m0;
-#if IPFIREWALL
- args.next_hop = NULL;
- args.eh = NULL;
- args.rule = NULL;
- args.divert_rule = 0; /* divert cookie */
- args.ipoa = NULL;
+#if IPFIREWALL || DUMMYNET
+ bzero(&args, sizeof(struct ip_fw_args));
if (SLIST_EMPTY(&m0->m_pkthdr.tags))
goto ipfw_tags_done;
struct dn_pkt_tag *dn_tag;
dn_tag = (struct dn_pkt_tag *)(tag+1);
- args.rule = dn_tag->rule;
+ args.fwa_ipfw_rule = dn_tag->dn_ipfw_rule;
+ args.fwa_pf_rule = dn_tag->dn_pf_rule;
opt = NULL;
- saved_route = dn_tag->ro;
+ saved_route = dn_tag->dn_ro;
ro = &saved_route;
imo = NULL;
- dst = dn_tag->dn_dst;
- ifp = dn_tag->ifp;
- flags = dn_tag->flags;
- saved_ipoa = dn_tag->ipoa;
- ipoa = &saved_ipoa;
+ bcopy(&dn_tag->dn_dst, &dst_buf, sizeof(dst_buf));
+ dst = &dst_buf;
+ ifp = dn_tag->dn_ifp;
+ flags = dn_tag->dn_flags;
+ if ((dn_tag->dn_flags & IP_OUTARGS)) {
+ saved_ipoa = dn_tag->dn_ipoa;
+ ipoa = &saved_ipoa;
+ }
m_tag_delete(m0, tag);
}
struct divert_tag *div_tag;
div_tag = (struct divert_tag *)(tag+1);
- args.divert_rule = div_tag->cookie;
+ args.fwa_divert_rule = div_tag->cookie;
m_tag_delete(m0, tag);
}
#endif /* IPDIVERT */
+#if IPFIREWALL
if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
struct ip_fwd_tag *ipfwd_tag;
ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
- args.next_hop = ipfwd_tag->next_hop;
+ next_hop_from_ipfwd_tag = ipfwd_tag->next_hop;
m_tag_delete(m0, tag);
}
-ipfw_tags_done:
#endif /* IPFIREWALL */
+ipfw_tags_done:
+#endif /* IPFIREWALL || DUMMYNET */
+
m = m0;
#if DIAGNOSTIC
mtod(m, struct ip *)->ip_p);
#endif
- /*
- * At present the IP_OUTARGS flag implies a request for IP to
- * perform source interface selection. In the forwarding case,
- * only the ifscope value is used, as source interface selection
- * doesn't take place.
- */
+ bzero(&ipf_pktopts, sizeof(struct ipf_pktopts));
+ ippo = &ipf_pktopts;
+
if (ip_doscopedroute && (flags & IP_OUTARGS)) {
- select_srcif = !(flags & IP_FORWARDING);
- ifscope = ipoa->ipoa_ifscope;
+ /*
+ * In the forwarding case, only the ifscope value is used,
+ * as source interface selection doesn't take place.
+ */
+ if ((select_srcif = (!(flags & IP_FORWARDING) &&
+ (ipoa->ipoa_flags & IPOAF_SELECT_SRCIF)))) {
+ ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
+ }
+
+ if ((ipoa->ipoa_flags & IPOAF_BOUND_IF) &&
+ ipoa->ipoa_boundif != IFSCOPE_NONE) {
+ ifscope = ipoa->ipoa_boundif;
+ ipf_pktopts.ippo_flags |=
+ (IPPOF_BOUND_IF | (ifscope << IPPOF_SHIFT_IFSCOPE));
+ }
+
+ if ((srcbound = (ipoa->ipoa_flags & IPOAF_BOUND_SRCADDR)))
+ ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
} else {
select_srcif = FALSE;
+ srcbound = FALSE;
ifscope = IFSCOPE_NONE;
}
-#if IPFIREWALL
- if (args.rule != NULL) { /* dummynet already saw us */
+ if ((flags & IP_OUTARGS) && (ipoa->ipoa_flags & IPOAF_NO_CELLULAR)) {
+ nocell = 1;
+ ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
+ }
+
+ if (flags & IP_OUTARGS) {
+ adv = &ipoa->ipoa_flowadv;
+ adv->code = FADV_SUCCESS;
+ }
+
+#if DUMMYNET
+ if (args.fwa_ipfw_rule != NULL || args.fwa_pf_rule != NULL) {
+ /* dummynet already saw us */
ip = mtod(m, struct ip *);
- hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
+ hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+ pkt_dst = ip->ip_dst;
if (ro->ro_rt != NULL) {
RT_LOCK_SPIN(ro->ro_rt);
ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
- if (ia)
- ifaref(&ia->ia_ifa);
+ if (ia) {
+ /* Become a regular mutex */
+ RT_CONVERT_LOCK(ro->ro_rt);
+ IFA_ADDREF(&ia->ia_ifa);
+ }
RT_UNLOCK(ro->ro_rt);
}
#if IPSEC
so = ipsec_getsocket(m);
(void)ipsec_setsocket(m, NULL);
}
-#endif
- goto sendit;
+#endif /* IPSEC */
+#if IPFIREWALL
+ if (args.fwa_ipfw_rule != NULL)
+ goto skip_ipsec;
+#endif /* #if IPFIREWALL */
+ if (args.fwa_pf_rule != NULL)
+ goto sendit;
}
-#endif /* IPFIREWALL */
+#endif /* DUMMYNET */
#if IPSEC
if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
if (opt) {
m = ip_insertoptions(m, opt, &len);
hlen = len;
+ /* Update the chain */
+ if (m != m0) {
+ if (m0 == packetlist)
+ packetlist = m;
+ m0 = m;
+ }
}
ip = mtod(m, struct ip *);
#if IPFIREWALL
- pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
+ /*
+ * rdar://8542331
+ *
+ * When dealing with a packet chain, we need to reset "next_hop" because
+ * "dst" may have been changed to the gateway address below for the previous
+ * packet of the chain. This could cause the route to be inavertandly changed
+ * to the route to the gateway address (instead of the route to the destination).
+ */
+ args.fwa_next_hop = next_hop_from_ipfwd_tag;
+ pkt_dst = args.fwa_next_hop ? args.fwa_next_hop->sin_addr : ip->ip_dst;
#else
pkt_dst = ip->ip_dst;
#endif
+ /*
+ * We must not send if the packet is destined to network zero.
+ * RFC1122 3.2.1.3 (a) and (b).
+ */
+ if (IN_ZERONET(ntohl(pkt_dst.s_addr))) {
+ error = EHOSTUNREACH;
+ goto bad;
+ }
+
/*
* Fill in IP header.
*/
} else {
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
}
-
+
#if DEBUG
/* For debugging, we let the stack forge congestion */
if (forge_ce != 0 &&
KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
-
- dst = (struct sockaddr_in *)&ro->ro_dst;
+
+ dst = (struct sockaddr_in *)(void *)&ro->ro_dst;
/*
* If there is a cached route,
error = EADDRNOTAVAIL;
goto bad;
}
- ifafree(&src_ia->ia_ifa);
+ IFA_REMREF(&src_ia->ia_ifa);
}
/*
* Test rt_flags without holding rt_lock for performance
* If routing to interface only,
* short circuit routing lookup.
*/
-#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
-#define sintosa(sin) ((struct sockaddr *)(sin))
if (flags & IP_ROUTETOIF) {
if (ia)
- ifafree(&ia->ia_ifa);
+ IFA_REMREF(&ia->ia_ifa);
if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
OSAddAtomic(1, &ipstat.ips_noroute);
ip->ip_ttl = 1;
isbroadcast = in_broadcast(dst->sin_addr, ifp);
} else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) &&
- imo != NULL && imo->imo_multicast_ifp != NULL) {
+ imo != NULL && (ifp = imo->imo_multicast_ifp) != NULL) {
/*
* Bypass the normal routing lookup for multicast
* packets if the interface is specified.
*/
- ifp = imo->imo_multicast_ifp;
isbroadcast = 0;
if (ia != NULL)
- ifafree(&ia->ia_ifa);
+ IFA_REMREF(&ia->ia_ifa);
/* Macro takes reference on ia */
IFP_TO_IA(ifp, ia);
ifa = in_selectsrcif(ip, ro, ifscope);
/*
- * If the source address is spoofed (in the case
- * of IP_RAWOUTPUT), or if this is destined for
- * local/loopback, just let it go out using the
- * interface of the route. Otherwise, there's no
- * interface having such an address, so bail out.
+ * If the source address belongs to a cellular interface
+ * and the caller forbids our using interfaces of such
+ * type, pretend that there is no source address.
*/
- if (ifa == NULL && !(flags & IP_RAWOUTPUT) &&
- ifscope != lo_ifp->if_index) {
+ if (nocell && ifa != NULL &&
+ ifa->ifa_ifp->if_type == IFT_CELLULAR) {
+ IFA_REMREF(ifa);
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+
+ /*
+ * If the source address is spoofed (in the case of
+ * IP_RAWOUTPUT on an unbounded socket), or if this
+ * is destined for local/loopback, just let it go out
+ * using the interface of the route. Otherwise,
+ * there's no interface having such an address,
+ * so bail out.
+ */
+ if (ifa == NULL && (!(flags & IP_RAWOUTPUT) ||
+ srcbound) && ifscope != lo_ifp->if_index) {
error = EADDRNOTAVAIL;
goto bad;
}
if (ifa != NULL) {
if (ifscope == IFSCOPE_NONE)
ifscope = ifa->ifa_ifp->if_index;
- ifafree(ifa);
+ IFA_REMREF(ifa);
cloneok = (!(flags & IP_RAWOUTPUT) &&
!(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))));
}
rtalloc_ign(ro, ign);
else
rtalloc_scoped_ign(ro, ign, ifscope);
+
+ /*
+ * If the route points to a cellular interface and the
+ * caller forbids our using interfaces of such type,
+ * pretend that there is no route.
+ */
+ if (nocell && ro->ro_rt != NULL) {
+ RT_LOCK_SPIN(ro->ro_rt);
+ if (ro->ro_rt->rt_ifp->if_type ==
+ IFT_CELLULAR) {
+ RT_UNLOCK(ro->ro_rt);
+ rtfree(ro->ro_rt);
+ ro->ro_rt = NULL;
+ } else {
+ RT_UNLOCK(ro->ro_rt);
+ }
+ }
}
if (ro->ro_rt == NULL) {
}
if (ia)
- ifafree(&ia->ia_ifa);
+ IFA_REMREF(&ia->ia_ifa);
RT_LOCK_SPIN(ro->ro_rt);
ia = ifatoia(ro->ro_rt->rt_ifa);
- if (ia)
- ifaref(&ia->ia_ifa);
+ if (ia) {
+ /* Become a regular mutex */
+ RT_CONVERT_LOCK(ro->ro_rt);
+ IFA_ADDREF(&ia->ia_ifa);
+ }
ifp = ro->ro_rt->rt_ifp;
ro->ro_rt->rt_use++;
- if (ro->ro_rt->rt_flags & RTF_GATEWAY)
- dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+ if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
+ dst = (struct sockaddr_in *)(void *)
+ ro->ro_rt->rt_gateway;
+ }
if (ro->ro_rt->rt_flags & RTF_HOST) {
isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
} else {
if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
struct in_multi *inm;
+ u_int32_t vif;
+ u_int8_t ttl = IP_DEFAULT_MULTICAST_TTL;
+ u_int8_t loop = IP_DEFAULT_MULTICAST_LOOP;
m->m_flags |= M_MCAST;
/*
* still points to the address in "ro". (It may have been
* changed to point to a gateway address, above.)
*/
- dst = (struct sockaddr_in *)&ro->ro_dst;
+ dst = (struct sockaddr_in *)(void *)&ro->ro_dst;
/*
* See if the caller provided any multicast options
*/
if (imo != NULL) {
- if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
- if (imo->imo_multicast_ifp != NULL) {
+ IMO_LOCK(imo);
+ vif = imo->imo_multicast_vif;
+ ttl = imo->imo_multicast_ttl;
+ loop = imo->imo_multicast_loop;
+ if ((flags & IP_RAWOUTPUT) == 0)
+ ip->ip_ttl = ttl;
+ if (imo->imo_multicast_ifp != NULL)
ifp = imo->imo_multicast_ifp;
- }
+ IMO_UNLOCK(imo);
#if MROUTING
- if (imo->imo_multicast_vif != -1 &&
- ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
- ip->ip_src.s_addr =
- ip_mcast_src(imo->imo_multicast_vif);
+ if (vif != -1 && ((flags & IP_RAWOUTPUT) == 0 ||
+ ip->ip_src.s_addr == INADDR_ANY))
+ ip->ip_src.s_addr = ip_mcast_src(vif);
#endif /* MROUTING */
- } else
- if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
+ } else if ((flags & IP_RAWOUTPUT) == 0) {
+ vif = -1;
+ ip->ip_ttl = ttl;
+ }
/*
* Confirm that the outgoing interface supports multicast.
*/
- if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
+ if (imo == NULL || vif == -1) {
if ((ifp->if_flags & IFF_MULTICAST) == 0) {
OSAddAtomic(1, &ipstat.ips_noroute);
error = ENETUNREACH;
if (ip->ip_src.s_addr == INADDR_ANY) {
struct in_ifaddr *ia1;
lck_rw_lock_shared(in_ifaddr_rwlock);
- TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
+ TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link) {
+ IFA_LOCK_SPIN(&ia1->ia_ifa);
if (ia1->ia_ifp == ifp) {
ip->ip_src = IA_SIN(ia1)->sin_addr;
+ IFA_UNLOCK(&ia1->ia_ifa);
break;
}
+ IFA_UNLOCK(&ia1->ia_ifa);
+ }
lck_rw_done(in_ifaddr_rwlock);
if (ip->ip_src.s_addr == INADDR_ANY) {
error = ENETUNREACH;
}
}
- ifnet_lock_shared(ifp);
- IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
- ifnet_lock_done(ifp);
- if (inm != NULL &&
- (imo == NULL || imo->imo_multicast_loop)) {
+ in_multihead_lock_shared();
+ IN_LOOKUP_MULTI(&pkt_dst, ifp, inm);
+ in_multihead_lock_done();
+ if (inm != NULL && (imo == NULL || loop)) {
/*
* If we belong to the destination multicast group
* on the outgoing interface, and the caller did not
if (!TAILQ_EMPTY(&ipv4_filters)) {
struct ipfilter *filter;
int seen = (inject_filter_ref == 0);
- struct ipf_pktopts *ippo = 0, ipf_pktopts;
- if (imo) {
- ippo = &ipf_pktopts;
- ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
- ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
- ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
+ if (imo != NULL) {
+ ipf_pktopts.ippo_flags |= IPPOF_MCAST_OPTS;
+ ipf_pktopts.ippo_mcast_ifnet = ifp;
+ ipf_pktopts.ippo_mcast_ttl = ttl;
+ ipf_pktopts.ippo_mcast_loop = loop;
}
-
+
ipf_ref();
-
+
/* 4135317 - always pass network byte order to filter */
#if BYTE_ORDER != BIG_ENDIAN
result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
if (result == EJUSTRETURN) {
ipf_unref();
+ INM_REMREF(inm);
goto done;
}
if (result != 0) {
ipf_unref();
+ INM_REMREF(inm);
goto bad;
}
}
}
-
+
/* set back to host byte order */
ip = mtod(m, struct ip *);
* as prescribed by rsvpd.
*/
if (!rsvp_on)
- imo = NULL;
+ imo = NULL;
if (ip_mforward(ip, ifp, m, imo) != 0) {
m_freem(m);
+ if (inm != NULL)
+ INM_REMREF(inm);
+ OSAddAtomic(1, &ipstat.ips_cantforward);
goto done;
}
}
}
#endif /* MROUTING */
-
+ if (inm != NULL)
+ INM_REMREF(inm);
/*
* Multicasts with a time-to-live of zero may be looped-
* back, above, but must not be transmitted on a network.
goto sendit;
}
-#ifndef notdef
/*
* If source address not specified yet, use address
* of outgoing interface.
*/
if (ip->ip_src.s_addr == INADDR_ANY) {
+ IFA_LOCK_SPIN(&ia->ia_ifa);
ip->ip_src = IA_SIN(ia)->sin_addr;
+ IFA_UNLOCK(&ia->ia_ifa);
#if IPFIREWALL_FORWARD
/* Keep note that we did this - if the firewall changes
* the next-hop, our interface may change, changing the
fwd_rewrite_src++;
#endif /* IPFIREWALL_FORWARD */
}
-#endif /* notdef */
/*
* Look for broadcast address and
sendit:
#if PF
/* Invoke outbound packet filter */
- if (pf_af_hook(ifp, mppn, &m, AF_INET, FALSE) != 0) {
- if (packetlist == m0) {
- packetlist = m;
- mppn = NULL;
- }
- if (m != NULL) {
- m0 = m;
- /* Next packet in the chain */
- goto loopit;
- } else if (packetlist != NULL) {
- /* No more packet; send down the chain */
- goto sendchain;
+ if (PF_IS_ENABLED) {
+ int rc;
+
+ m0 = m; /* Save for later */
+#if DUMMYNET
+ args.fwa_m = m;
+ args.fwa_next_hop = dst;
+ args.fwa_oif = ifp;
+ args.fwa_ro = ro;
+ args.fwa_dst = dst;
+ args.fwa_oflags = flags;
+ if (flags & IP_OUTARGS)
+ args.fwa_ipoa = ipoa;
+ rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, &args);
+#else /* DUMMYNET */
+ rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, NULL);
+#endif /* DUMMYNET */
+ if (rc != 0 || m == NULL) {
+ /* Move to the next packet */
+ m = *mppn;
+
+ /* Skip ahead if first packet in list got dropped */
+ if (packetlist == m0)
+ packetlist = m;
+
+ if (m != NULL) {
+ m0 = m;
+ /* Next packet in the chain */
+ goto loopit;
+ } else if (packetlist != NULL) {
+ /* No more packet; send down the chain */
+ goto sendchain;
+ }
+ /* Nothing left; we're done */
+ goto done;
}
- /* Nothing left; we're done */
- goto done;
+ m0 = m;
+ ip = mtod(m, struct ip *);
+ pkt_dst = ip->ip_dst;
+ hlen = IP_VHL_HL(ip->ip_vhl) << 2;
}
- m0 = m;
- ip = mtod(m, struct ip *);
- pkt_dst = ip->ip_dst;
- hlen = IP_VHL_HL(ip->ip_vhl) << 2;
#endif /* PF */
/*
* Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
ip_linklocal_stat.iplls_out_total++;
if (ip->ip_ttl != MAXTTL) {
ip_linklocal_stat.iplls_out_badttl++;
- ip->ip_ttl = MAXTTL;
+ ip->ip_ttl = MAXTTL;
}
}
if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
struct ipfilter *filter;
int seen = (inject_filter_ref == 0);
-
+ ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS;
+
/* Check that a TSO frame isn't passed to a filter.
* This could happen if a filter is inserted while
* TCP is sending the TSO packet.
}
ipf_ref();
-
+
/* 4135317 - always pass network byte order to filter */
#if BYTE_ORDER != BIG_ENDIAN
seen = 1;
} else if (filter->ipf_filter.ipf_output) {
errno_t result;
- result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
+ result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
if (result == EJUSTRETURN) {
ipf_unref();
goto done;
}
}
}
-
+
/* set back to host byte order */
ip = mtod(m, struct ip *);
sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
if (sp == NULL) {
- IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
+ IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
goto bad;
}
/* no need to do IPsec. */
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
goto skip_ipsec;
-
+
case IPSEC_POLICY_IPSEC:
if (sp->req == NULL) {
/* acquire a policy */
printf("ip_output: Invalid policy found. %d\n", sp->policy);
}
{
- struct ipsec_output_state state;
- bzero(&state, sizeof(state));
- state.m = m;
+ ipsec_state.m = m;
if (flags & IP_ROUTETOIF) {
- state.ro = &iproute;
- bzero(&iproute, sizeof(iproute));
+ bzero(&ipsec_state.ro, sizeof(ipsec_state.ro));
} else
- state.ro = ro;
- state.dst = (struct sockaddr *)dst;
+ route_copyout(&ipsec_state.ro, ro, sizeof(ipsec_state.ro));
+ ipsec_state.dst = (struct sockaddr *)dst;
ip->ip_sum = 0;
HTONS(ip->ip_off);
#endif
- error = ipsec4_output(&state, sp, flags);
-
- m0 = m = state.m;
-
+ DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
+ struct ip *, ip, struct ifnet *, ifp,
+ struct ip *, ip, struct ip6_hdr *, NULL);
+
+ error = ipsec4_output(&ipsec_state, sp, flags);
+
+ m0 = m = ipsec_state.m;
+
if (flags & IP_ROUTETOIF) {
/*
* if we have tunnel mode SA, we may need to ignore
* IP_ROUTETOIF.
*/
- if (state.ro != &iproute || state.ro->ro_rt != NULL) {
+ if (ipsec_state.tunneled) {
flags &= ~IP_ROUTETOIF;
- ro = state.ro;
+ ipsec_saved_route = ro;
+ ro = &ipsec_state.ro;
}
- } else
- ro = state.ro;
-
- dst = (struct sockaddr_in *)state.dst;
+ } else {
+ ipsec_saved_route = ro;
+ ro = &ipsec_state.ro;
+ }
+ dst = (struct sockaddr_in *)(void *)ipsec_state.dst;
if (error) {
/* mbuf is already reclaimed in ipsec4_output. */
m0 = NULL;
/* be sure to update variables that are affected by ipsec4_output() */
ip = mtod(m, struct ip *);
-
+
#ifdef _IP_VHL
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
#else
rtfree(ro->ro_rt);
ro->ro_rt = NULL;
if (src_ia != NULL)
- ifafree(&src_ia->ia_ifa);
+ IFA_REMREF(&src_ia->ia_ifa);
}
if (ro->ro_rt == NULL) {
}
} else {
if (ia)
- ifafree(&ia->ia_ifa);
+ IFA_REMREF(&ia->ia_ifa);
RT_LOCK_SPIN(ro->ro_rt);
ia = ifatoia(ro->ro_rt->rt_ifa);
- if (ia)
- ifaref(&ia->ia_ifa);
+ if (ia) {
+ /* Become a regular mutex */
+ RT_CONVERT_LOCK(ro->ro_rt);
+ IFA_ADDREF(&ia->ia_ifa);
+ }
ifp = ro->ro_rt->rt_ifp;
RT_UNLOCK(ro->ro_rt);
}
NTOHS(ip->ip_len);
NTOHS(ip->ip_off);
#endif
-
+
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
-
+
/* Pass to filters again */
if (!TAILQ_EMPTY(&ipv4_filters)) {
struct ipfilter *filter;
-
+
+ ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS;
+
/* Check that a TSO frame isn't passed to a filter.
* This could happen if a filter is inserted while
* TCP is sending the TSO packet.
}
ipf_ref();
-
+
/* 4135317 - always pass network byte order to filter */
#if BYTE_ORDER != BIG_ENDIAN
TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
if (filter->ipf_filter.ipf_output) {
errno_t result;
- result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
+ result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
if (result == EJUSTRETURN) {
ipf_unref();
goto done;
}
}
}
-
+
/* set back to host byte order */
ip = mtod(m, struct ip *);
#endif /*IPSEC*/
#if IPFIREWALL
- /*
- * IpHack's section.
- * - Xlate: translate packet's addr/port (NAT).
- * - Firewall: deny/allow/etc.
- * - Wrap: fake packet's addr/port <unimpl.>
- * - Encapsulate: put it in another IP and send out. <unimp.>
- */
- if (fr_checkp) {
- struct mbuf *m1 = m;
-
- if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
- goto done;
- }
- ip = mtod(m0 = m = m1, struct ip *);
- }
-
/*
* Check with the firewall...
* but not if we are already being fwd'd from a firewall.
*/
- if (fw_enable && IPFW_LOADED && !args.next_hop) {
+ if (fw_enable && IPFW_LOADED && !args.fwa_next_hop) {
struct sockaddr_in *old = dst;
- args.m = m;
- args.next_hop = dst;
- args.oif = ifp;
+ args.fwa_m = m;
+ args.fwa_next_hop = dst;
+ args.fwa_oif = ifp;
off = ip_fw_chk_ptr(&args);
- m = args.m;
- dst = args.next_hop;
+ m = args.fwa_m;
+ dst = args.fwa_next_hop;
/*
* On return we must do the following:
goto done ;
}
ip = mtod(m, struct ip *);
-
+
if (off == 0 && dst == old) {/* common case */
goto pass ;
}
#if DUMMYNET
- if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
+ if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
/*
* pass the pkt to dummynet. Need to include
* pipe number, m, ifp, ro, dst because these are
* XXX note: if the ifp or ro entry are deleted
* while a pkt is in dummynet, we are in trouble!
*/
- args.ro = ro;
- args.dst = dst;
- args.flags = flags;
+ args.fwa_ro = ro;
+ args.fwa_dst = dst;
+ args.fwa_oflags = flags;
if (flags & IP_OUTARGS)
- args.ipoa = ipoa;
+ args.fwa_ipoa = ipoa;
error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
- &args);
+ &args, DN_CLIENT_IPFW);
goto done;
}
#endif /* DUMMYNET */
#endif
/* Deliver packet to divert input routine */
- divert_packet(m, 0, off & 0xffff, args.divert_rule);
+ divert_packet(m, 0, off & 0xffff, args.fwa_divert_rule);
/* If 'tee', continue with original packet */
if (clone != NULL) {
* of ours, we pretend to
* be the destination for this packet.
*/
+ IFA_LOCK_SPIN(&ia_fw->ia_ifa);
if (IA_SIN(ia_fw)->sin_addr.s_addr ==
- dst->sin_addr.s_addr)
+ dst->sin_addr.s_addr) {
+ IFA_UNLOCK(&ia_fw->ia_ifa);
break;
+ }
+ IFA_UNLOCK(&ia_fw->ia_ifa);
}
lck_rw_done(in_ifaddr_rwlock);
if (ia_fw) {
/* tell ip_input "dont filter" */
struct m_tag *fwd_tag;
struct ip_fwd_tag *ipfwd_tag;
-
- fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID,
+
+ fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID,
KERNEL_TAG_TYPE_IPFORWARD,
- sizeof (*ipfwd_tag), M_NOWAIT);
+ sizeof (*ipfwd_tag), M_NOWAIT, m);
if (fwd_tag == NULL) {
error = ENOBUFS;
goto bad;
}
-
+
ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
- ipfwd_tag->next_hop = args.next_hop;
+ ipfwd_tag->next_hop = args.fwa_next_hop;
m_tag_prepend(m, fwd_tag);
if (m->m_pkthdr.rcvif == NULL)
- m->m_pkthdr.rcvif = ifunit("lo0");
+ m->m_pkthdr.rcvif = lo_ifp;
if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
m->m_pkthdr.csum_flags) == 0) {
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_len);
HTONS(ip->ip_off);
-#endif
-
+#endif
+
/* we need to call dlil_output to run filters
* and resync to avoid recursion loops.
*/
if (lo_ifp) {
- dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
+ dlil_output(lo_ifp, PF_INET, m, 0,
+ (struct sockaddr *)dst, 0, adv);
}
else {
printf("ip_output: no loopback ifp for forwarding!!!\n");
RT_LOCK_SPIN(ro_fwd->ro_rt);
ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
- if (ia_fw != NULL)
- ifaref(&ia_fw->ia_ifa);
+ if (ia_fw != NULL) {
+ /* Become a regular mutex */
+ RT_CONVERT_LOCK(ro_fwd->ro_rt);
+ IFA_ADDREF(&ia_fw->ia_ifa);
+ }
ifp = ro_fwd->ro_rt->rt_ifp;
ro_fwd->ro_rt->rt_use++;
if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
- dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
+ dst = (struct sockaddr_in *)(void *)ro_fwd->ro_rt->rt_gateway;
if (ro_fwd->ro_rt->rt_flags & RTF_HOST) {
isbroadcast =
(ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
RT_UNLOCK(ro_fwd->ro_rt);
rtfree(ro->ro_rt);
ro->ro_rt = ro_fwd->ro_rt;
- dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
+ dst = (struct sockaddr_in *)(void *)&ro_fwd->ro_dst;
/*
* If we added a default src ip earlier,
* interface, do it again, from the new one.
*/
if (ia_fw != NULL) {
- if (fwd_rewrite_src)
+ if (fwd_rewrite_src) {
+ IFA_LOCK_SPIN(&ia_fw->ia_ifa);
ip->ip_src = IA_SIN(ia_fw)->sin_addr;
- ifafree(&ia_fw->ia_ifa);
+ IFA_UNLOCK(&ia_fw->ia_ifa);
+ }
+ IFA_REMREF(&ia_fw->ia_ifa);
}
goto pass ;
}
error = EACCES; /* not sure this is the right error msg */
goto done;
}
-#endif /* IPFIREWALL */
pass:
+#endif /* IPFIREWALL */
#if __APPLE__
/* Do not allow loopback address to wind up on a wire */
if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
#endif
m->m_pkthdr.csum_flags |= CSUM_IP;
tso = (ifp->if_hwassist & IFNET_TSO_IPV4) && (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4);
-
+
sw_csum = m->m_pkthdr.csum_flags
& ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
/* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
- m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
+ m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
m->m_pkthdr.csum_data += offset;
- sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
- }
- else {
+ sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
+ } else {
/* let the software handle any UDP or TCP checksums */
sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
}
sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
m->m_pkthdr.csum_flags;
}
-
+
if (sw_csum & CSUM_DELAY_DATA) {
in_delayed_cksum(m);
sw_csum &= ~CSUM_DELAY_DATA;
*/
if ((u_short)ip->ip_len <= ifp->if_mtu || tso ||
ifp->if_hwassist & CSUM_FRAGMENT) {
- if (tso)
+ if (tso)
m->m_pkthdr.csum_flags |= CSUM_TSO_IPV4;
-
+
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_len);
HTONS(ip->ip_off);
#endif
-
+
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP) {
ip->ip_sum = in_cksum(m, hlen);
}
-
+
#ifndef __APPLE__
/* Record statistics for this interface address. */
if (!(flags & IP_FORWARDING) && ia != NULL) {
ipsec_delaux(m);
#endif
if (packetchain == 0) {
- error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
- (struct sockaddr *)dst);
+ if (ro->ro_rt && nstat_collect)
+ nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0);
+ error = dlil_output(ifp, PF_INET, m, ro->ro_rt,
+ (struct sockaddr *)dst, 0, adv);
goto done;
}
else { /* packet chaining allows us to reuse the route for all packets */
+ bytecnt += m->m_pkthdr.len;
mppn = &m->m_nextpkt;
m = m->m_nextpkt;
if (m == NULL) {
#endif /* PF */
if (pktcnt > ip_maxchainsent)
ip_maxchainsent = pktcnt;
+ if (ro->ro_rt && nstat_collect)
+ nstat_route_tx(ro->ro_rt, pktcnt, bytecnt, 0);
//send
- error = ifnet_output(ifp, PF_INET, packetlist,
- ro->ro_rt, (struct sockaddr *)dst);
+ error = dlil_output(ifp, PF_INET, packetlist,
+ ro->ro_rt, (struct sockaddr *)dst, 0, adv);
pktcnt = 0;
+ bytecnt = 0;
goto done;
-
+
}
m0 = m;
pktcnt++;
* Must be able to put at least 8 bytes per fragment.
*/
- if (ip->ip_off & IP_DF || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
+ if (ip->ip_off & IP_DF || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) ||
+ pktcnt > 0) {
error = EMSGSIZE;
/*
* This case can happen if the user changed the MTU
- *
* of an interface after enabling IP on it. Because
* most netifs don't keep track of routes pointing to
* them, there is no way for one to update all its
* routes when the MTU is changed.
*/
- RT_LOCK_SPIN(ro->ro_rt);
- if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
- && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
- && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
- ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
+ if (ro->ro_rt) {
+ RT_LOCK_SPIN(ro->ro_rt);
+ if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
+ && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
+ && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
+ ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
+ }
+ RT_UNLOCK(ro->ro_rt);
+ }
+ if (pktcnt > 0) {
+ m0 = packetlist;
}
- RT_UNLOCK(ro->ro_rt);
OSAddAtomic(1, &ipstat.ips_cantfrag);
goto bad;
}
#endif
if ((packetchain != 0) && (pktcnt > 0))
panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
- error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
- (struct sockaddr *)dst);
+ if (ro->ro_rt && nstat_collect)
+ nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0);
+ error = dlil_output(ifp, PF_INET, m, ro->ro_rt,
+ (struct sockaddr *)dst, 0, adv);
} else
m_freem(m);
}
done:
if (ia) {
- ifafree(&ia->ia_ifa);
+ IFA_REMREF(&ia->ia_ifa);
ia = NULL;
}
#if IPSEC
if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
- if (ro == &iproute && ro->ro_rt) {
- rtfree(ro->ro_rt);
- ro->ro_rt = NULL;
- }
+ if (ipsec_state.ro.ro_rt)
+ rtfree(ipsec_state.ro.ro_rt);
if (sp != NULL) {
KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
printf("DP ip_output call free SP:%x\n", sp));
m->m_pkthdr.rcvif = 0;
m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
+
+ M_COPY_PFTAG(m, m0);
+ m_set_service_class(m, m0->m_pkthdr.svc);
+
#if CONFIG_MACF_NET
mac_netinet_fragment(m0, m);
#endif
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_off);
#endif
-
+
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP) {
ip->ip_sum = in_cksum(m, hlen);
struct ip *ip;
unsigned char buf[sizeof(struct ip)];
u_short csum, offset, ip_len;
- struct mbuf *m = m0;
-
+
+ /* Save copy of first mbuf pointer and the ip_offset before modifying */
+ struct mbuf *m = m0;
+ int ip_offset_copy = ip_offset;
+
while (ip_offset >= m->m_len) {
ip_offset -= m->m_len;
m = m->m_next;
if (m == NULL) {
- printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
+ printf("in_delayed_cksum_withoffset failed - "
+ "ip_offset wasn't in the packet\n");
return;
}
}
-
- /* Sometimes the IP header is not contiguous, yes this can happen! */
- if (ip_offset + sizeof(struct ip) > m->m_len) {
-#if DEBUG
+
+ /*
+ * In case the IP header is not contiguous, or not 32-bit
+ * aligned, copy it to a local buffer.
+ */
+ if ((ip_offset + sizeof(struct ip) > m->m_len) ||
+ !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
+#if DEBUG
printf("delayed m_pullup, m->len: %d off: %d\n",
m->m_len, ip_offset);
#endif
m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
-
- ip = (struct ip *)buf;
+
+ ip = (struct ip *)(void *)buf;
} else {
- ip = (struct ip*)(m->m_data + ip_offset);
+ ip = (struct ip*)(void *)(m->m_data + ip_offset);
}
-
+
/* Gross */
if (ip_offset) {
m->m_len -= ip_offset;
m->m_data += ip_offset;
}
-
+
offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
/*
* is bogus and we give up.
*/
ip_len = ip->ip_len;
- if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
+ if (ip_len != (m0->m_pkthdr.len - ip_offset_copy)) {
ip_len = SWAP16(ip_len);
- if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
+ if (ip_len != (m0->m_pkthdr.len - ip_offset_copy)) {
printf("in_delayed_cksum_offset: ip_len %d (%d) "
"doesn't match actual length %d\n", ip->ip_len,
- ip_len, (m0->m_pkthdr.len - ip_offset));
+ ip_len, (m0->m_pkthdr.len - ip_offset_copy));
return;
}
}
/* Insert the checksum in the existing chain */
if (offset + ip_offset + sizeof(u_short) > m->m_len) {
char tmp[2];
-
+
#if DEBUG
printf("delayed m_copyback, m->len: %d off: %d p: %d\n",
m->m_len, offset + ip_offset, ip->ip_p);
#endif
- *(u_short *)tmp = csum;
+ *(u_short *)(void *)tmp = csum;
m_copyback(m, offset + ip_offset, 2, tmp);
- } else
- *(u_short *)(m->m_data + offset + ip_offset) = csum;
+ } else if (IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
+ *(u_short *)(void *)(m->m_data + offset + ip_offset) = csum;
+ } else {
+ bcopy(&csum, (m->m_data + offset + ip_offset), sizeof (csum));
+ }
}
void
int hlen = 0;
unsigned char buf[sizeof(struct ip)];
int swapped = 0;
-
+
+ /* Save copy of first mbuf pointer and the ip_offset before modifying */
+ struct mbuf* m0 = m;
+ size_t ip_offset_copy = ip_offset;
+
while (ip_offset >= m->m_len) {
ip_offset -= m->m_len;
m = m->m_next;
if (m == NULL) {
- printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
+ printf("in_cksum_offset failed - ip_offset wasn't "
+ "in the packet\n");
return;
}
}
-
- /* Sometimes the IP header is not contiguous, yes this can happen! */
- if (ip_offset + sizeof(struct ip) > m->m_len) {
+ /*
+ * In case the IP header is not contiguous, or not 32-bit
+ * aligned, copy it to a local buffer.
+ */
+ if ((ip_offset + sizeof(struct ip) > m->m_len) ||
+ !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
#if DEBUG
- printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %lu\n",
- m->m_len, ip_offset);
-#endif
+ printf("in_cksum_offset - delayed m_pullup, m->len: %d "
+ "off: %lu\n", m->m_len, ip_offset);
+#endif
m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
- ip = (struct ip *)buf;
+ ip = (struct ip *)(void *)buf;
ip->ip_sum = 0;
- m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum);
+ m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2,
+ (caddr_t)&ip->ip_sum);
} else {
- ip = (struct ip*)(m->m_data + ip_offset);
+ ip = (struct ip*)(void *)(m->m_data + ip_offset);
ip->ip_sum = 0;
}
-
+
/* Gross */
if (ip_offset) {
m->m_len -= ip_offset;
* the length and check again. If it still fails, then the packet
* is bogus and we give up.
*/
- if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
+ if (ntohs(ip->ip_len) != (m0->m_pkthdr.len - ip_offset_copy)) {
ip->ip_len = SWAP16(ip->ip_len);
swapped = 1;
- if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
+ if (ntohs(ip->ip_len) != (m0->m_pkthdr.len - ip_offset_copy)) {
ip->ip_len = SWAP16(ip->ip_len);
printf("in_cksum_offset: ip_len %d (%d) "
"doesn't match actual length %lu\n",
ip->ip_len, SWAP16(ip->ip_len),
- (m->m_pkthdr.len - ip_offset));
+ (m0->m_pkthdr.len - ip_offset_copy));
return;
}
}
m->m_data -= ip_offset;
}
- /* Insert the checksum in the existing chain if IP header not contiguous */
+ /*
+ * Insert the checksum in the existing chain if IP header not
+ * contiguous, or if it's not 32-bit aligned, i.e. all the cases
+ * where it was copied to a local buffer.
+ */
if (ip_offset + sizeof(struct ip) > m->m_len) {
char tmp[2];
#if DEBUG
- printf("in_cksum_offset m_copyback, m->len: %u off: %lu p: %d\n",
- m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
+ printf("in_cksum_offset m_copyback, m->len: %u off: %lu "
+ "p: %d\n", m->m_len,
+ ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
#endif
- *(u_short *)tmp = ip->ip_sum;
+ *(u_short *)(void *)tmp = ip->ip_sum;
m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
+ } else if (!IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
+ bcopy(&ip->ip_sum,
+ (m->m_data + ip_offset + offsetof(struct ip, ip_sum)),
+ sizeof (u_short));
}
}
case IP_RECVDSTADDR:
case IP_RECVIF:
case IP_RECVTTL:
-#if defined(NFAITH) && NFAITH > 0
- case IP_FAITH:
-#endif
+ case IP_RECVPKTINFO:
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
OPTSET(INP_RECVTTL);
break;
-#if defined(NFAITH) && NFAITH > 0
- case IP_FAITH:
- OPTSET(INP_FAITH);
+ case IP_RECVPKTINFO:
+ OPTSET(INP_PKTINFO);
break;
-#endif
}
break;
#undef OPTSET
break;
}
- if (sopt->sopt_valsize == 0 || ifname[0] == NULL) {
+ if (sopt->sopt_valsize == 0 || ifname[0] == '\0') {
/* Unbind this socket from any interface */
ifscope = IFSCOPE_NONE;
} else {
ifnet_t ifp;
/* Verify name is NULL terminated */
- if (ifname[sopt->sopt_valsize - 1] != NULL) {
+ if (ifname[sopt->sopt_valsize - 1] != '\0') {
error = EINVAL;
break;
}
*/
ifnet_release(ifp);
}
- ip_bindif(inp, ifscope);
+ error = inp_bindif(inp, ifscope);
}
break;
#endif
+ /*
+ * Multicast socket options are processed by the in_mcast
+ * module.
+ */
case IP_MULTICAST_IF:
+ case IP_MULTICAST_IFINDEX:
case IP_MULTICAST_VIF:
case IP_MULTICAST_TTL:
case IP_MULTICAST_LOOP:
case IP_ADD_MEMBERSHIP:
case IP_DROP_MEMBERSHIP:
- error = ip_setmoptions(sopt, &inp->inp_moptions);
+ case IP_ADD_SOURCE_MEMBERSHIP:
+ case IP_DROP_SOURCE_MEMBERSHIP:
+ case IP_BLOCK_SOURCE:
+ case IP_UNBLOCK_SOURCE:
+ case IP_MSFILTER:
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ error = inp_setmoptions(inp, sopt);
break;
case IP_PORTRANGE:
struct mbuf *m;
int optname;
- if (sopt->sopt_valsize > MCLBYTES) {
- error = EMSGSIZE;
- break;
- }
if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
break;
if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
if (error)
break;
- if (background)
- so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
- else
- so->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
+ if (background) {
+ socket_set_traffic_mgt_flags_locked(so,
+ TRAFFIC_MGT_SO_BACKGROUND);
+ } else {
+ socket_clear_traffic_mgt_flags_locked(so,
+ TRAFFIC_MGT_SO_BACKGROUND);
+ }
break;
}
* on the destination address type (e.g. unicast, multicast,
* or broadcast if applicable) or whether or not the host is
* directly reachable. Note that in the multicast transmit
- * case, IP_MULTICAST_IF takes precedence over IP_BOUND_IF,
- * since the former practically bypasses the routing table;
- * in this case, IP_BOUND_IF sets the default interface used
- * for sending multicast packets in the absence of an explicit
- * transmit interface set via IP_MULTICAST_IF.
+ * case, IP_MULTICAST_{IF,IFINDEX} takes precedence over
+ * IP_BOUND_IF, since the former practically bypasses the
+ * routing table; in this case, IP_BOUND_IF sets the default
+ * interface used for sending multicast packets in the absence
+ * of an explicit multicast transmit interface.
*/
case IP_BOUND_IF:
/* This option is settable only for IPv4 */
if (error)
break;
- ip_bindif(inp, optval);
+ error = inp_bindif(inp, optval);
+ break;
+
+ case IP_NO_IFT_CELLULAR:
+ /* This option is settable only for IPv4 */
+ if (!(inp->inp_vflag & INP_IPV4)) {
+ error = EINVAL;
+ break;
+ }
+
+ error = sooptcopyin(sopt, &optval, sizeof (optval),
+ sizeof (optval));
+
+ if (error)
+ break;
+
+ error = inp_nocellular(inp, optval);
+ break;
+
+ case IP_OUT_IF:
+ /* This option is not settable */
+ error = EINVAL;
break;
default:
case IP_RECVIF:
case IP_RECVTTL:
case IP_PORTRANGE:
-#if defined(NFAITH) && NFAITH > 0
- case IP_FAITH:
-#endif
+ case IP_RECVPKTINFO:
switch (sopt->sopt_name) {
case IP_TOS:
optval = 0;
break;
-#if defined(NFAITH) && NFAITH > 0
- case IP_FAITH:
- optval = OPTBIT(INP_FAITH);
+ case IP_RECVPKTINFO:
+ optval = OPTBIT(INP_PKTINFO);
break;
-#endif
}
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case IP_MULTICAST_IF:
+ case IP_MULTICAST_IFINDEX:
case IP_MULTICAST_VIF:
case IP_MULTICAST_TTL:
case IP_MULTICAST_LOOP:
- case IP_ADD_MEMBERSHIP:
- case IP_DROP_MEMBERSHIP:
- error = ip_getmoptions(sopt, inp->inp_moptions);
+ case IP_MSFILTER:
+ error = inp_getmoptions(inp, sopt);
break;
#if IPSEC
#if TRAFFIC_MGT
case IP_TRAFFIC_MGT_BACKGROUND:
{
- unsigned background = so->so_traffic_mgt_flags;
+ unsigned background = (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND);
return (sooptcopyout(sopt, &background, sizeof(background)));
break;
}
case IP_BOUND_IF:
if (inp->inp_flags & INP_BOUND_IF)
- optval = inp->inp_boundif;
+ optval = inp->inp_boundifp->if_index;
+ error = sooptcopyout(sopt, &optval, sizeof (optval));
+ break;
+
+ case IP_NO_IFT_CELLULAR:
+ optval = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
+ error = sooptcopyout(sopt, &optval, sizeof (optval));
+ break;
+
+ case IP_OUT_IF:
+ optval = (inp->inp_last_outifp != NULL) ?
+ inp->inp_last_outifp->if_index : 0;
error = sooptcopyout(sopt, &optval, sizeof (optval));
break;
return (EINVAL);
}
-/*
- * XXX
- * The whole multicast option thing needs to be re-thought.
- * Several of these options are equally applicable to non-multicast
- * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
- * standard option (IP_TTL).
- */
-
-/*
- * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
- */
-static struct ifnet *
-ip_multicast_if(a, ifindexp)
- struct in_addr *a;
- int *ifindexp;
+void
+ip_moptions_init(void)
{
- int ifindex;
- struct ifnet *ifp;
+ PE_parse_boot_argn("ifa_debug", &imo_debug, sizeof (imo_debug));
- if (ifindexp)
- *ifindexp = 0;
- if (ntohl(a->s_addr) >> 24 == 0) {
- ifindex = ntohl(a->s_addr) & 0xffffff;
- ifnet_head_lock_shared();
- if (ifindex < 0 || if_index < ifindex) {
- ifnet_head_done();
- return NULL;
- }
- ifp = ifindex2ifnet[ifindex];
- ifnet_head_done();
- if (ifindexp)
- *ifindexp = ifindex;
- } else {
- INADDR_TO_IFP(*a, ifp);
+ imo_size = (imo_debug == 0) ? sizeof (struct ip_moptions) :
+ sizeof (struct ip_moptions_dbg);
+
+ imo_zone = zinit(imo_size, IMO_ZONE_MAX * imo_size, 0,
+ IMO_ZONE_NAME);
+ if (imo_zone == NULL) {
+ panic("%s: failed allocating %s", __func__, IMO_ZONE_NAME);
+ /* NOTREACHED */
}
- return ifp;
+ zone_change(imo_zone, Z_EXPAND, TRUE);
}
-/*
- * Set the IP multicast options in response to user setsockopt().
- */
-static int
-ip_setmoptions(sopt, imop)
- struct sockopt *sopt;
- struct ip_moptions **imop;
+void
+imo_addref(struct ip_moptions *imo, int locked)
{
- int error = 0;
- int i;
- struct in_addr addr;
- struct ip_mreq mreq;
- struct ifnet *ifp = NULL;
- struct ip_moptions *imo = *imop;
- int ifindex;
-
- if (imo == NULL) {
- /*
- * No multicast option buffer attached to the pcb;
- * allocate one and initialize to default values.
- */
- error = ip_createmoptions(imop);
- if (error != 0)
- return error;
- imo = *imop;
- }
-
- switch (sopt->sopt_name) {
- /* store an index number for the vif you wanna use in the send */
-#if MROUTING
- case IP_MULTICAST_VIF:
- if (legal_vif_num == 0) {
- error = EOPNOTSUPP;
- break;
- }
- error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
- if (error)
- break;
- if (!legal_vif_num(i) && (i != -1)) {
- error = EINVAL;
- break;
- }
- imo->imo_multicast_vif = i;
- break;
-#endif /* MROUTING */
-
- case IP_MULTICAST_IF:
- /*
- * Select the interface for outgoing multicast packets.
- */
- error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
- if (error)
- break;
- /*
- * INADDR_ANY is used to remove a previous selection.
- * When no interface is selected, a default one is
- * chosen every time a multicast packet is sent.
- */
- if (addr.s_addr == INADDR_ANY) {
- imo->imo_multicast_ifp = NULL;
- break;
- }
- /*
- * The selected interface is identified by its local
- * IP address. Find the interface and confirm that
- * it supports multicasting.
- */
- ifp = ip_multicast_if(&addr, &ifindex);
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
- error = EADDRNOTAVAIL;
- break;
- }
- imo->imo_multicast_ifp = ifp;
- if (ifindex)
- imo->imo_multicast_addr = addr;
- else
- imo->imo_multicast_addr.s_addr = INADDR_ANY;
- break;
-
- case IP_MULTICAST_TTL:
- /*
- * Set the IP time-to-live for outgoing multicast packets.
- * The original multicast API required a char argument,
- * which is inconsistent with the rest of the socket API.
- * We allow either a char or an int.
- */
- if (sopt->sopt_valsize == 1) {
- u_char ttl;
- error = sooptcopyin(sopt, &ttl, 1, 1);
- if (error)
- break;
- imo->imo_multicast_ttl = ttl;
- } else {
- u_int ttl;
- error = sooptcopyin(sopt, &ttl, sizeof ttl,
- sizeof ttl);
- if (error)
- break;
- if (ttl > 255)
- error = EINVAL;
- else
- imo->imo_multicast_ttl = ttl;
- }
- break;
-
- case IP_MULTICAST_LOOP:
- /*
- * Set the loopback flag for outgoing multicast packets.
- * Must be zero or one. The original multicast API required a
- * char argument, which is inconsistent with the rest
- * of the socket API. We allow either a char or an int.
- */
- if (sopt->sopt_valsize == 1) {
- u_char loop;
- error = sooptcopyin(sopt, &loop, 1, 1);
- if (error)
- break;
- imo->imo_multicast_loop = !!loop;
- } else {
- u_int loop;
- error = sooptcopyin(sopt, &loop, sizeof loop,
- sizeof loop);
- if (error)
- break;
- imo->imo_multicast_loop = !!loop;
- }
- break;
-
- case IP_ADD_MEMBERSHIP:
- /*
- * Add a multicast group membership.
- * Group must be a valid IP multicast address.
- */
- error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
- if (error)
- break;
-
- error = ip_addmembership(imo, &mreq);
- break;
-
- case IP_DROP_MEMBERSHIP:
- /*
- * Drop a multicast group membership.
- * Group must be a valid IP multicast address.
- */
- error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
- if (error)
- break;
-
- error = ip_dropmembership(imo, &mreq);
- break;
-
- default:
- error = EOPNOTSUPP;
- break;
- }
+ if (!locked)
+ IMO_LOCK(imo);
+ else
+ IMO_LOCK_ASSERT_HELD(imo);
- /*
- * If all options have default values, no need to keep the mbuf.
- */
- if (imo->imo_multicast_ifp == NULL &&
- imo->imo_multicast_vif == (u_int32_t)-1 &&
- imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
- imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
- imo->imo_num_memberships == 0) {
- FREE(*imop, M_IPMOPTS);
- *imop = NULL;
+ if (++imo->imo_refcnt == 0) {
+ panic("%s: imo %p wraparound refcnt\n", __func__, imo);
+ /* NOTREACHED */
+ } else if (imo->imo_trace != NULL) {
+ (*imo->imo_trace)(imo, TRUE);
}
- return (error);
+ if (!locked)
+ IMO_UNLOCK(imo);
}
-/*
- * Set the IP multicast options in response to user setsockopt().
- */
-__private_extern__ int
-ip_createmoptions(
- struct ip_moptions **imop)
-{
- struct ip_moptions *imo;
- imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
- M_WAITOK);
-
- if (imo == NULL)
- return (ENOBUFS);
- *imop = imo;
- imo->imo_multicast_ifp = NULL;
- imo->imo_multicast_addr.s_addr = INADDR_ANY;
- imo->imo_multicast_vif = -1;
- imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
- imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
- imo->imo_num_memberships = 0;
-
- return 0;
-}
-
-/*
- * Add membership to an IPv4 multicast.
- */
-__private_extern__ int
-ip_addmembership(
- struct ip_moptions *imo,
- struct ip_mreq *mreq)
+void
+imo_remref(struct ip_moptions *imo)
{
- struct route ro;
- struct sockaddr_in *dst;
- struct ifnet *ifp = NULL;
- int error = 0;
int i;
- bzero((caddr_t)&ro, sizeof(ro));
-
- if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
- error = EINVAL;
- goto done;
- }
- /*
- * If no interface address was provided, use the interface of
- * the route to the given multicast address.
- */
- if (mreq->imr_interface.s_addr == INADDR_ANY) {
- dst = (struct sockaddr_in *)&ro.ro_dst;
- dst->sin_len = sizeof(*dst);
- dst->sin_family = AF_INET;
- dst->sin_addr = mreq->imr_multiaddr;
- rtalloc_ign(&ro, 0);
- if (ro.ro_rt != NULL) {
- ifp = ro.ro_rt->rt_ifp;
- } else {
- /* If there's no default route, try using loopback */
- mreq->imr_interface.s_addr = htonl(INADDR_LOOPBACK);
- }
+ IMO_LOCK(imo);
+ if (imo->imo_refcnt == 0) {
+ panic("%s: imo %p negative refcnt", __func__, imo);
+ /* NOTREACHED */
+ } else if (imo->imo_trace != NULL) {
+ (*imo->imo_trace)(imo, FALSE);
}
- if (ifp == NULL) {
- ifp = ip_multicast_if(&mreq->imr_interface, NULL);
+ --imo->imo_refcnt;
+ if (imo->imo_refcnt > 0) {
+ IMO_UNLOCK(imo);
+ return;
}
- /*
- * See if we found an interface, and confirm that it
- * supports multicast.
- */
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
- error = EADDRNOTAVAIL;
- goto done;
- }
- /*
- * See if the membership already exists or if all the
- * membership slots are full.
- */
for (i = 0; i < imo->imo_num_memberships; ++i) {
- if (imo->imo_membership[i]->inm_ifp == ifp &&
- imo->imo_membership[i]->inm_addr.s_addr
- == mreq->imr_multiaddr.s_addr)
- break;
- }
- if (i < imo->imo_num_memberships) {
- error = EADDRINUSE;
- goto done;
- }
- if (i == IP_MAX_MEMBERSHIPS) {
- error = ETOOMANYREFS;
- goto done;
- }
- /*
- * Everything looks good; add a new record to the multicast
- * address list for the given interface.
- */
- if ((imo->imo_membership[i] =
- in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
- error = ENOBUFS;
- goto done;
- }
- ++imo->imo_num_memberships;
+ struct in_mfilter *imf;
-done:
- if (ro.ro_rt != NULL)
- rtfree(ro.ro_rt);
+ imf = imo->imo_mfilters ? &imo->imo_mfilters[i] : NULL;
+ if (imf != NULL)
+ imf_leave(imf);
- return error;
-}
+ (void) in_leavegroup(imo->imo_membership[i], imf);
-/*
- * Drop membership of an IPv4 multicast.
- */
-__private_extern__ int
-ip_dropmembership(
- struct ip_moptions *imo,
- struct ip_mreq *mreq)
-{
- int error = 0;
- struct ifnet* ifp = NULL;
- int i;
-
- if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
- error = EINVAL;
- return error;
- }
+ if (imf != NULL)
+ imf_purge(imf);
- /*
- * If an interface address was specified, get a pointer
- * to its ifnet structure.
- */
- if (mreq->imr_interface.s_addr == INADDR_ANY)
- ifp = NULL;
- else {
- ifp = ip_multicast_if(&mreq->imr_interface, NULL);
- if (ifp == NULL) {
- error = EADDRNOTAVAIL;
- return error;
- }
+ INM_REMREF(imo->imo_membership[i]);
+ imo->imo_membership[i] = NULL;
}
- /*
- * Find the membership in the membership array.
- */
- for (i = 0; i < imo->imo_num_memberships; ++i) {
- if ((ifp == NULL ||
- imo->imo_membership[i]->inm_ifp == ifp) &&
- imo->imo_membership[i]->inm_addr.s_addr ==
- mreq->imr_multiaddr.s_addr)
- break;
+ imo->imo_num_memberships = 0;
+ if (imo->imo_mfilters != NULL) {
+ FREE(imo->imo_mfilters, M_INMFILTER);
+ imo->imo_mfilters = NULL;
}
- if (i == imo->imo_num_memberships) {
- error = EADDRNOTAVAIL;
- return error;
+ if (imo->imo_membership != NULL) {
+ FREE(imo->imo_membership, M_IPMOPTS);
+ imo->imo_membership = NULL;
}
- /*
- * Give up the multicast address record to which the
- * membership points.
- */
- in_delmulti(&imo->imo_membership[i]);
- /*
- * Remove the gap in the membership array.
- */
- for (++i; i < imo->imo_num_memberships; ++i)
- imo->imo_membership[i-1] = imo->imo_membership[i];
- --imo->imo_num_memberships;
-
- return error;
-}
+ IMO_UNLOCK(imo);
-/*
- * Return the IP multicast options in response to user getsockopt().
- */
-static int
-ip_getmoptions(sopt, imo)
- struct sockopt *sopt;
- register struct ip_moptions *imo;
-{
- struct in_addr addr;
- struct in_ifaddr *ia;
- int error, optval;
- u_char coptval;
-
- error = 0;
- switch (sopt->sopt_name) {
-#if MROUTING
- case IP_MULTICAST_VIF:
- if (imo != NULL)
- optval = imo->imo_multicast_vif;
- else
- optval = -1;
- error = sooptcopyout(sopt, &optval, sizeof optval);
- break;
-#endif /* MROUTING */
-
- case IP_MULTICAST_IF:
- if (imo == NULL || imo->imo_multicast_ifp == NULL)
- addr.s_addr = INADDR_ANY;
- else if (imo->imo_multicast_addr.s_addr) {
- /* return the value user has set */
- addr = imo->imo_multicast_addr;
- } else {
- IFP_TO_IA(imo->imo_multicast_ifp, ia);
- addr.s_addr = (ia == NULL) ? INADDR_ANY
- : IA_SIN(ia)->sin_addr.s_addr;
- if (ia != NULL)
- ifafree(&ia->ia_ifa);
- }
- error = sooptcopyout(sopt, &addr, sizeof addr);
- break;
+ lck_mtx_destroy(&imo->imo_lock, ifa_mtx_grp);
- case IP_MULTICAST_TTL:
- if (imo == 0)
- optval = coptval = IP_DEFAULT_MULTICAST_TTL;
- else
- optval = coptval = imo->imo_multicast_ttl;
- if (sopt->sopt_valsize == 1)
- error = sooptcopyout(sopt, &coptval, 1);
- else
- error = sooptcopyout(sopt, &optval, sizeof optval);
- break;
-
- case IP_MULTICAST_LOOP:
- if (imo == 0)
- optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
- else
- optval = coptval = imo->imo_multicast_loop;
- if (sopt->sopt_valsize == 1)
- error = sooptcopyout(sopt, &coptval, 1);
- else
- error = sooptcopyout(sopt, &optval, sizeof optval);
- break;
+ if (!(imo->imo_debug & IFD_ALLOC)) {
+ panic("%s: imo %p cannot be freed", __func__, imo);
+ /* NOTREACHED */
+ }
+ zfree(imo_zone, imo);
+}
- default:
- error = ENOPROTOOPT;
- break;
+static void
+imo_trace(struct ip_moptions *imo, int refhold)
+{
+ struct ip_moptions_dbg *imo_dbg = (struct ip_moptions_dbg *)imo;
+ ctrace_t *tr;
+ u_int32_t idx;
+ u_int16_t *cnt;
+
+ if (!(imo->imo_debug & IFD_DEBUG)) {
+ panic("%s: imo %p has no debug structure", __func__, imo);
+ /* NOTREACHED */
+ }
+ if (refhold) {
+ cnt = &imo_dbg->imo_refhold_cnt;
+ tr = imo_dbg->imo_refhold;
+ } else {
+ cnt = &imo_dbg->imo_refrele_cnt;
+ tr = imo_dbg->imo_refrele;
}
- return (error);
+
+ idx = atomic_add_16_ov(cnt, 1) % IMO_TRACE_HIST_SIZE;
+ ctrace_record(&tr[idx]);
}
-/*
- * Discard the IP multicast options.
- */
-void
-ip_freemoptions(imo)
- register struct ip_moptions *imo;
+struct ip_moptions *
+ip_allocmoptions(int how)
{
- register int i;
+ struct ip_moptions *imo;
+ imo = (how == M_WAITOK) ? zalloc(imo_zone) : zalloc_noblock(imo_zone);
if (imo != NULL) {
- for (i = 0; i < imo->imo_num_memberships; ++i)
- in_delmulti(&imo->imo_membership[i]);
- FREE(imo, M_IPMOPTS);
+ bzero(imo, imo_size);
+ lck_mtx_init(&imo->imo_lock, ifa_mtx_grp, ifa_mtx_attr);
+ imo->imo_debug |= IFD_ALLOC;
+ if (imo_debug != 0) {
+ imo->imo_debug |= IFD_DEBUG;
+ imo->imo_trace = imo_trace;
+ }
+ IMO_ADDREF(imo);
}
+
+ return (imo);
}
/*
if (lo_ifp) {
copym->m_pkthdr.rcvif = ifp;
dlil_output(lo_ifp, PF_INET, copym, 0,
- (struct sockaddr *) dst, 0);
+ (struct sockaddr *) dst, 0, NULL);
} else {
printf("Warning: ip_output call to dlil_find_dltag failed!\n");
m_freem(copym);
* without any locks based on the assumption that ip_output() is single-
* threaded per-pcb, i.e. for any given pcb there can only be one thread
* performing output at the IP layer.
+ *
+ * This routine is analogous to in6_selectroute() for IPv6.
*/
static struct ifaddr *
in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
struct in_addr src = ip->ip_src;
struct in_addr dst = ip->ip_dst;
struct ifnet *rt_ifp;
- char s_src[16], s_dst[16];
+ char s_src[MAX_IPv4_STR_LEN], s_dst[MAX_IPv4_STR_LEN];
if (ip_select_srcif_debug) {
(void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof (s_src));
*/
if (scope == IFSCOPE_NONE) {
scope = rt_ifp->if_index;
- if (scope != get_primary_ifscope() &&
+ if (scope != get_primary_ifscope(AF_INET) &&
ro->ro_rt->generation_id != route_generation)
- scope = get_primary_ifscope();
+ scope = get_primary_ifscope(AF_INET);
}
ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope);
+ if (ifa == NULL && ip->ip_p != IPPROTO_UDP &&
+ ip->ip_p != IPPROTO_TCP && ipforwarding) {
+ /*
+ * If forwarding is enabled, and if the packet isn't
+ * TCP or UDP, check if the source address belongs
+ * to one of our own interfaces; if so, demote the
+ * interface scope and do a route lookup right below.
+ */
+ ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
+ if (ifa != NULL) {
+ IFA_REMREF(ifa);
+ ifa = NULL;
+ ifscope = IFSCOPE_NONE;
+ }
+ }
+
if (ip_select_srcif_debug && ifa != NULL) {
if (ro->ro_rt != NULL) {
- printf("%s->%s ifscope %d->%d ifa_if %s%d "
- "ro_if %s%d\n", s_src, s_dst, ifscope,
- scope, ifa->ifa_ifp->if_name,
- ifa->ifa_ifp->if_unit, rt_ifp->if_name,
- rt_ifp->if_unit);
+ printf("%s->%s ifscope %d->%d ifa_if %s "
+ "ro_if %s\n", s_src, s_dst, ifscope,
+ scope, if_name(ifa->ifa_ifp),
+ if_name(rt_ifp));
} else {
- printf("%s->%s ifscope %d->%d ifa_if %s%d\n",
+ printf("%s->%s ifscope %d->%d ifa_if %s\n",
s_src, s_dst, ifscope, scope,
- ifa->ifa_ifp->if_name,
- ifa->ifa_ifp->if_unit);
+ if_name(ifa->ifa_ifp));
}
}
}
if (ifa == NULL && ifscope == IFSCOPE_NONE) {
ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
+ /*
+ * If we have the IP address, but not the route, we don't
+ * really know whether or not it belongs to the correct
+ * interface (it could be shared across multiple interfaces.)
+ * The only way to find out is to do a route lookup.
+ */
+ if (ifa != NULL && ro->ro_rt == NULL) {
+ struct rtentry *rt;
+ struct sockaddr_in sin;
+ struct ifaddr *oifa = NULL;
+
+ bzero(&sin, sizeof (sin));
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof (sin);
+ sin.sin_addr = dst;
+
+ lck_mtx_lock(rnh_lock);
+ if ((rt = rt_lookup(TRUE, (struct sockaddr *)&sin, NULL,
+ rt_tables[AF_INET], IFSCOPE_NONE)) != NULL) {
+ RT_LOCK(rt);
+ /*
+ * If the route uses a different interface,
+ * use that one instead. The IP address of
+ * the ifaddr that we pick up here is not
+ * relevant.
+ */
+ if (ifa->ifa_ifp != rt->rt_ifp) {
+ oifa = ifa;
+ ifa = rt->rt_ifa;
+ IFA_ADDREF(ifa);
+ RT_UNLOCK(rt);
+ } else {
+ RT_UNLOCK(rt);
+ }
+ rtfree_locked(rt);
+ }
+ lck_mtx_unlock(rnh_lock);
+
+ if (oifa != NULL) {
+ struct ifaddr *iifa;
+
+ /*
+ * See if the interface pointed to by the
+ * route is configured with the source IP
+ * address of the packet.
+ */
+ iifa = (struct ifaddr *)ifa_foraddr_scoped(
+ src.s_addr, ifa->ifa_ifp->if_index);
+
+ if (iifa != NULL) {
+ /*
+ * Found it; drop the original one
+ * as well as the route interface
+ * address, and use this instead.
+ */
+ IFA_REMREF(oifa);
+ IFA_REMREF(ifa);
+ ifa = iifa;
+ } else if (!ipforwarding ||
+ (rt->rt_flags & RTF_GATEWAY)) {
+ /*
+ * This interface doesn't have that
+ * source IP address; drop the route
+ * interface address and just use the
+ * original one, and let the caller
+ * do a scoped route lookup.
+ */
+ IFA_REMREF(ifa);
+ ifa = oifa;
+ } else {
+ /*
+ * Forwarding is enabled and the source
+ * address belongs to one of our own
+ * interfaces which isn't the outgoing
+ * interface, and we have a route, and
+ * the destination is on a network that
+ * is directly attached (onlink); drop
+ * the original one and use the route
+ * interface address instead.
+ */
+ IFA_REMREF(oifa);
+ }
+ }
+ } else if (ifa != NULL && ro->ro_rt != NULL &&
+ !(ro->ro_rt->rt_flags & RTF_GATEWAY) &&
+ ifa->ifa_ifp != ro->ro_rt->rt_ifp && ipforwarding) {
+ /*
+ * Forwarding is enabled and the source address belongs
+ * to one of our own interfaces which isn't the same
+ * as the interface used by the known route; drop the
+ * original one and use the route interface address.
+ */
+ IFA_REMREF(ifa);
+ ifa = ro->ro_rt->rt_ifa;
+ IFA_ADDREF(ifa);
+ }
+
if (ip_select_srcif_debug && ifa != NULL) {
- printf("%s->%s ifscope %d ifa_if %s%d\n",
- s_src, s_dst, ifscope, ifa->ifa_ifp->if_name,
- ifa->ifa_ifp->if_unit);
+ printf("%s->%s ifscope %d ifa_if %s\n",
+ s_src, s_dst, ifscope, if_name(ifa->ifa_ifp));
}
}
!(ro->ro_rt->rt_flags & RTF_UP))) {
if (ip_select_srcif_debug) {
if (ifa != NULL) {
- printf("%s->%s ifscope %d ro_if %s%d != "
- "ifa_if %s%d (cached route cleared)\n",
- s_src, s_dst, ifscope, rt_ifp->if_name,
- rt_ifp->if_unit, ifa->ifa_ifp->if_name,
- ifa->ifa_ifp->if_unit);
+ printf("%s->%s ifscope %d ro_if %s != "
+ "ifa_if %s (cached route cleared)\n",
+ s_src, s_dst, ifscope, if_name(rt_ifp),
+ if_name(ifa->ifa_ifp));
} else {
- printf("%s->%s ifscope %d ro_if %s%d "
+ printf("%s->%s ifscope %d ro_if %s "
"(no ifa_if found)\n",
- s_src, s_dst, ifscope, rt_ifp->if_name,
- rt_ifp->if_unit);
+ s_src, s_dst, ifscope, if_name(rt_ifp));
}
}
*/
if (IN_LINKLOCAL(ntohl(dst.s_addr)) &&
!IN_LINKLOCAL(ntohl(src.s_addr)) && ifa != NULL) {
- ifafree(ifa);
+ IFA_REMREF(ifa);
ifa = NULL;
}
}
return (ifa);
}
-
-/*
- * Handler for setting IP_FORCE_OUT_IFP or IP_BOUND_IF socket option.
- */
-static void
-ip_bindif(struct inpcb *inp, unsigned int ifscope)
-{
- /*
- * A zero interface scope value indicates an "unbind".
- * Otherwise, take in whatever value the app desires;
- * the app may already know the scope (or force itself
- * to such a scope) ahead of time before the interface
- * gets attached. It doesn't matter either way; any
- * route lookup from this point on will require an
- * exact match for the embedded interface scope.
- */
- inp->inp_boundif = ifscope;
- if (inp->inp_boundif == IFSCOPE_NONE)
- inp->inp_flags &= ~INP_BOUND_IF;
- else
- inp->inp_flags |= INP_BOUND_IF;
-
- /* Blow away any cached route in the PCB */
- if (inp->inp_route.ro_rt != NULL) {
- rtfree(inp->inp_route.ro_rt);
- inp->inp_route.ro_rt = NULL;
- }
-}