* SUCH DAMAGE.
*
* @(#)ip_output.c 8.3 (Berkeley) 1/21/94
+ * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
*/
#define _IP_VHL
-#if ISFB31
-#include "opt_ipfw.h"
-#include "opt_ipdn.h"
-#include "opt_ipdivert.h"
-#include "opt_ipfilter.h"
-#endif
-
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
-#if INET6
-#include <netinet/ip6.h>
-#include <netinet6/ip6_var.h>
-#endif
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
-#include <net/dlil.h>
+#include "faith.h"
+
+#include <net/dlil.h>
#include <sys/kdebug.h>
#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
#define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
-#ifdef vax
+#if vax
#include <machine/mtpr.h>
#endif
-#if ISFB31
+#if __FreeBSD__
#include <machine/in_cksum.h>
static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
#endif
-//static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
-
#if IPSEC
#include <netinet6/ipsec.h>
#include <netkey/key.h>
+#if IPSEC_DEBUG
#include <netkey/key_debug.h>
-
-#endif /*IPSEC*/
-
-#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
-#undef COMPAT_IPFW
-#define COMPAT_IPFW 1
#else
-#undef COMPAT_IPFW
+#define KEYDEBUG(lev,arg)
#endif
+#endif /*IPSEC*/
-#if COMPAT_IPFW
#include <netinet/ip_fw.h>
-#endif
#if DUMMYNET
#include <netinet/ip_dummynet.h>
u_short ip_id;
static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
+static struct ifnet *ip_multicast_if __P((struct in_addr *, int *));
static void ip_mloopback
__P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
static int ip_getmoptions
static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
static int ip_setmoptions
__P((struct sockopt *, struct ip_moptions **));
-static u_long lo_dl_tag = 0;
-#if IPFILTER_LKM || IPFILTER
int ip_optcopy __P((struct ip *, struct ip *));
extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
-#else
-static int ip_optcopy __P((struct ip *, struct ip *));
+#ifdef __APPLE__
+extern struct mbuf* m_dup(register struct mbuf *m, int how);
#endif
+static u_long lo_dl_tag = 0;
+
+void in_delayed_cksum(struct mbuf *m);
+extern int apple_hwcksum_tx;
extern struct protosw inetsw[];
+extern struct ip_linklocal_stat ip_linklocal_stat;
+
+/* temporary: for testing */
+#if IPSEC
+extern int ipsec_bypass;
+#endif
+
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
int hlen = sizeof (struct ip);
int len, off, error = 0;
struct sockaddr_in *dst;
- struct in_ifaddr *ia;
- int isbroadcast;
+ struct in_ifaddr *ia = NULL;
+ int isbroadcast, sw_csum;
#if IPSEC
struct route iproute;
- struct socket *so;
+ struct socket *so = NULL;
struct secpolicy *sp = NULL;
#endif
+ u_int16_t divert_cookie; /* firewall cookie */
#if IPFIREWALL_FORWARD
int fwd_rewrite_src = 0;
#endif
-
-
-#if !IPDIVERT /* dummy variable for the firewall code to play with */
- u_short ip_divert_cookie = 0 ;
-#endif
-#if COMPAT_IPFW
- struct ip_fw_chain *rule = NULL ;
+ struct ip_fw_chain *rule = NULL;
+
+#if IPDIVERT
+ /* Get and reset firewall cookie */
+ divert_cookie = ip_divert_cookie;
+ ip_divert_cookie = 0;
+#else
+ divert_cookie = 0;
#endif
KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
-#if IPSEC
- /*
- * NOTE: m->m_pkthdr is NULL cleared below just to prevent ipfw code
- * from SEGV.
- * ipfw code uses rcvif to determine incoming interface, and
- * KAME uses rcvif for ipsec processing.
- * ipfw may not be working right with KAME at this moment.
- * We need more tests.
- */
-#if DUMMYNET
- if (m->m_type == MT_DUMMYNET) {
- if (m->m_next != NULL) {
- so = (struct socket *)m->m_next->m_pkthdr.rcvif;
- m->m_next->m_pkthdr.rcvif = NULL;
- } else
- so = NULL;
- } else
-#endif
- {
- so = ipsec_getsocket(m);
- ipsec_setsocket(m, NULL);
- }
-#endif /*IPSEC*/
-
-
#if IPFIREWALL && DUMMYNET
/*
* dummynet packet are prepended a vestigial mbuf with
* rule.
*/
if (m->m_type == MT_DUMMYNET) {
- struct mbuf *tmp_m = m ;
/*
* the packet was already tagged, so part of the
* processing was already done, and we need to go down.
- * opt, flags and imo have already been used, and now
- * they are used to hold ifp and hlen and NULL, respectively.
+ * Get parameters from the header.
*/
- rule = (struct ip_fw_chain *)(m->m_data) ;
- m = m->m_next ;
- FREE(tmp_m, M_IPFW);
+ rule = (struct ip_fw_chain *)(m->m_data) ;
+ opt = NULL ;
+ ro = & ( ((struct dn_pkt *)m)->ro ) ;
+ imo = NULL ;
+ dst = ((struct dn_pkt *)m)->dn_dst ;
+ ifp = ((struct dn_pkt *)m)->ifp ;
+ flags = ((struct dn_pkt *)m)->flags ;
+ m0 = m = m->m_next ;
+#if IPSEC
+ if (ipsec_bypass == 0) {
+ so = ipsec_getsocket(m);
+ (void)ipsec_setsocket(m, NULL);
+ }
+#endif
ip = mtod(m, struct ip *);
- dst = (struct sockaddr_in *)&ro->ro_dst;
- ifp = (struct ifnet *)opt;
hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
- opt = NULL ;
- flags = 0 ; /* XXX is this correct ? */
+ if (ro->ro_rt != NULL)
+ ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
goto sendit;
} else
rule = NULL ;
#endif
+#if IPSEC
+ if (ipsec_bypass == 0) {
+ so = ipsec_getsocket(m);
+ (void)ipsec_setsocket(m, NULL);
+ }
+#endif
#if DIAGNOSTIC
if ((m->m_flags & M_PKTHDR) == 0)
if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
ip->ip_off &= IP_DF;
+#if RANDOM_IP_ID
+ ip->ip_id = ip_randomid();
+#else
ip->ip_id = htons(ip_id++);
+#endif
ipstat.ips_localout++;
} else {
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
*/
if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
- RTFREE(ro->ro_rt);
+ rtfree(ro->ro_rt);
ro->ro_rt = (struct rtentry *)0;
}
if (ro->ro_rt == 0) {
*/
if (imo != NULL) {
ip->ip_ttl = imo->imo_multicast_ttl;
- if (imo->imo_multicast_ifp != NULL)
+ if (imo->imo_multicast_ifp != NULL) {
ifp = imo->imo_multicast_ifp;
+ dl_tag = ifp->if_data.default_proto;
+ }
if (imo->imo_multicast_vif != -1)
ip->ip_src.s_addr =
ip_mcast_src(imo->imo_multicast_vif);
if (ip->ip_src.s_addr == INADDR_ANY) {
register struct in_ifaddr *ia1;
- for (ia1 = in_ifaddrhead.tqh_first; ia1;
- ia1 = ia1->ia_link.tqe_next)
+ TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
if (ia1->ia_ifp == ifp) {
ip->ip_src = IA_SIN(ia1)->sin_addr;
break;
}
sendit:
+ /*
+ * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
+ */
+ if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
+ ip_linklocal_stat.iplls_out_total++;
+ if (ip->ip_ttl != MAXTTL) {
+ ip_linklocal_stat.iplls_out_badttl++;
+ ip->ip_ttl = MAXTTL;
+ }
+ }
+
+#if IPSEC
+ /* temporary for testing only: bypass ipsec alltogether */
+
+ if (ipsec_bypass != 0)
+ goto skip_ipsec;
+
+ /* get SP for this packet */
+ if (so == NULL)
+ sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
+ else
+ sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
+
+ if (sp == NULL) {
+ ipsecstat.out_inval++;
+ goto bad;
+ }
+
+ error = 0;
+
+ /* check policy */
+ switch (sp->policy) {
+ case IPSEC_POLICY_DISCARD:
+ /*
+ * This packet is just discarded.
+ */
+ ipsecstat.out_polvio++;
+ goto bad;
+
+ case IPSEC_POLICY_BYPASS:
+ case IPSEC_POLICY_NONE:
+ /* no need to do IPsec. */
+ goto skip_ipsec;
+
+ case IPSEC_POLICY_IPSEC:
+ if (sp->req == NULL) {
+ /* acquire a policy */
+ error = key_spdacquire(sp);
+ goto bad;
+ }
+ break;
+
+ case IPSEC_POLICY_ENTRUST:
+ default:
+ printf("ip_output: Invalid policy found. %d\n", sp->policy);
+ }
+ {
+ struct ipsec_output_state state;
+ bzero(&state, sizeof(state));
+ state.m = m;
+ if (flags & IP_ROUTETOIF) {
+ state.ro = &iproute;
+ bzero(&iproute, sizeof(iproute));
+ } else
+ state.ro = ro;
+ state.dst = (struct sockaddr *)dst;
+
+ ip->ip_sum = 0;
+
+ /*
+ * XXX
+ * delayed checksums are not currently compatible with IPsec
+ */
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+ in_delayed_cksum(m);
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+ }
+
+ HTONS(ip->ip_len);
+ HTONS(ip->ip_off);
+
+ error = ipsec4_output(&state, sp, flags);
+
+ m = state.m;
+ if (flags & IP_ROUTETOIF) {
+ /*
+ * if we have tunnel mode SA, we may need to ignore
+ * IP_ROUTETOIF.
+ */
+ if (state.ro != &iproute || state.ro->ro_rt != NULL) {
+ flags &= ~IP_ROUTETOIF;
+ ro = state.ro;
+ }
+ } else
+ ro = state.ro;
+ dst = (struct sockaddr_in *)state.dst;
+ if (error) {
+ /* mbuf is already reclaimed in ipsec4_output. */
+ m0 = NULL;
+ switch (error) {
+ case EHOSTUNREACH:
+ case ENETUNREACH:
+ case EMSGSIZE:
+ case ENOBUFS:
+ case ENOMEM:
+ break;
+ default:
+ printf("ip4_output (ipsec): error code %d\n", error);
+ /*fall through*/
+ case ENOENT:
+ /* don't show these error codes to the user */
+ error = 0;
+ break;
+ }
+ goto bad;
+ }
+ }
+
+ /* be sure to update variables that are affected by ipsec4_output() */
+ ip = mtod(m, struct ip *);
+#ifdef _IP_VHL
+ hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+#else
+ hlen = ip->ip_hl << 2;
+#endif
+ if (ro->ro_rt == NULL) {
+ if ((flags & IP_ROUTETOIF) == 0) {
+ printf("ip_output: "
+ "can't update route after IPsec processing\n");
+ error = EHOSTUNREACH; /*XXX*/
+ goto bad;
+ }
+ } else {
+ ia = ifatoia(ro->ro_rt->rt_ifa);
+ ifp = ro->ro_rt->rt_ifp;
+ dl_tag = ia->ia_ifa.ifa_dlt;
+ }
+
+ /* make it flipped, again. */
+ NTOHS(ip->ip_len);
+ NTOHS(ip->ip_off);
+skip_ipsec:
+#endif /*IPSEC*/
+
/*
* IpHack's section.
* - Xlate: translate packet's addr/port (NAT).
* - Wrap: fake packet's addr/port <unimpl.>
* - Encapsulate: put it in another IP and send out. <unimp.>
*/
-#if IPFILTER || IPFILTER_LKM
if (fr_checkp) {
struct mbuf *m1 = m;
goto done;
ip = mtod(m = m1, struct ip *);
}
-#endif
-
-#if COMPAT_IPFW
- if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, ifp, IP_NAT_OUT)) {
- error = EACCES;
- goto done;
- }
/*
* Check with the firewall...
*/
- if (ip_fw_chk_ptr) {
+ if (fw_enable && ip_fw_chk_ptr) {
struct sockaddr_in *old = dst;
off = (*ip_fw_chk_ptr)(&ip,
- hlen, ifp, &ip_divert_cookie, &m, &rule, &dst);
+ hlen, ifp, &divert_cookie, &m, &rule, &dst);
/*
* On return we must do the following:
- * m == NULL -> drop the pkt
+ * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
* 1<=off<= 0xffff -> DIVERT
- * (off & 0x10000) -> send to a DUMMYNET pipe
+ * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
+ * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
* dst != old -> IPFIREWALL_FORWARD
* off==0, dst==old -> accept
* If some of the above modules is not compiled in, then
* unsupported rules), but better play safe and drop
* packets in case of doubt.
*/
- if (!m) { /* firewall said to reject */
- error = EACCES;
- goto done;
+ if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
+ if (m)
+ m_freem(m);
+ error = EACCES ;
+ goto done ;
}
+ ip = mtod(m, struct ip *);
if (off == 0 && dst == old) /* common case */
goto pass ;
#if DUMMYNET
- if (off & 0x10000) {
+ if ((off & IP_FW_PORT_DYNT_FLAG) != 0) {
/*
* pass the pkt to dummynet. Need to include
- * pipe number, m, ifp, ro, hlen because these are
+ * pipe number, m, ifp, ro, dst because these are
* not recomputed in the next pass.
* All other parameters have been already used and
* so they are not needed anymore.
* XXX note: if the ifp or ro entry are deleted
* while a pkt is in dummynet, we are in trouble!
*/
- dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,ifp,ro,hlen,rule);
- goto done;
+ error = dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,
+ ifp,ro,dst,rule, flags);
+ goto done;
}
#endif
#if IPDIVERT
- if (off > 0 && off < 0x10000) { /* Divert packet */
- ip_divert_port = off & 0xffff ;
- (*ip_protox[IPPROTO_DIVERT]->pr_input)(m, 0);
+ if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
+ struct mbuf *clone = NULL;
+
+ /* Clone packet if we're doing a 'tee' */
+ if ((off & IP_FW_PORT_TEE_FLAG) != 0)
+ clone = m_dup(m, M_DONTWAIT);
+ /*
+ * XXX
+ * delayed checksums are not currently compatible
+ * with divert sockets.
+ */
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+ in_delayed_cksum(m);
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+ }
+
+ /* Restore packet header fields to original values */
+ HTONS(ip->ip_len);
+ HTONS(ip->ip_off);
+
+ /* Deliver packet to divert input routine */
+ ip_divert_cookie = divert_cookie;
+ divert_packet(m, 0, off & 0xffff);
+
+ /* If 'tee', continue with original packet */
+ if (clone != NULL) {
+ m = clone;
+ ip = mtod(m, struct ip *);
+ goto pass;
+ }
goto done;
}
#endif
* as the packet runs through ip_input() as
* it is done through a ISR.
*/
- for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
- ia = TAILQ_NEXT(ia, ia_link)) {
+ TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
/*
* If the addr to forward to is one
* of ours, we pretend to
ip_fw_fwd_addr = dst;
if (m->m_pkthdr.rcvif == NULL)
m->m_pkthdr.rcvif = ifunit("lo0");
- ip->ip_len = htons((u_short)ip->ip_len);
- ip->ip_off = htons((u_short)ip->ip_off);
- ip->ip_sum = 0;
-
- ip->ip_sum = in_cksum(m, hlen);
-
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+ m->m_pkthdr.csum_flags |=
+ CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ m0->m_pkthdr.csum_data = 0xffff;
+ }
+ m->m_pkthdr.csum_flags |=
+ CSUM_IP_CHECKED | CSUM_IP_VALID;
+ HTONS(ip->ip_len);
+ HTONS(ip->ip_off);
ip_input(m);
goto done;
}
ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
ifp = ro_fwd->ro_rt->rt_ifp;
- dl_tag = ro->ro_rt->rt_dlt;
+ dl_tag = ro_fwd->ro_rt->rt_dlt;
ro_fwd->ro_rt->rt_use++;
if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
(ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
else
isbroadcast = in_broadcast(dst->sin_addr, ifp);
- RTFREE(ro->ro_rt);
+ rtfree(ro->ro_rt);
ro->ro_rt = ro_fwd->ro_rt;
dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
error = EACCES; /* not sure this is the right error msg */
goto done;
}
-#endif /* COMPAT_IPFW */
pass:
+ m->m_pkthdr.csum_flags |= CSUM_IP;
+ sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
-#if defined(PM)
- /*
- * Processing IP filter/NAT.
- * Return TRUE iff this packet is discarded.
- * Return FALSE iff this packet is accepted.
- */
-
- if (doNatFil && pm_out(ro->ro_rt->rt_ifp, ip, m))
- goto done;
-#endif
-
-#if IPSEC
- /* get SP for this packet */
- if (so == NULL)
- sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
- else
- sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
-
- if (sp == NULL) {
- ipsecstat.out_inval++;
- goto bad;
- }
-
- error = 0;
-
- /* check policy */
- switch (sp->policy) {
- case IPSEC_POLICY_DISCARD:
+ if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
/*
- * This packet is just discarded.
+ * Special case code for GMACE
+ * frames that can be checksumed by GMACE SUM16 HW:
+ * frame >64, no fragments, no UDP
*/
- ipsecstat.out_polvio++;
- goto bad;
-
- case IPSEC_POLICY_BYPASS:
- case IPSEC_POLICY_NONE:
- /* no need to do IPsec. */
- goto skip_ipsec;
-
- case IPSEC_POLICY_IPSEC:
- if (sp->req == NULL) {
- /* XXX should be panic ? */
- printf("ip_output: No IPsec request specified.\n");
- error = EINVAL;
- goto bad;
+ if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
+ && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
+ /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
+ u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
+ u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
+ m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
+ m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
+ m->m_pkthdr.csum_data += offset;
+ sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
}
- break;
-
- case IPSEC_POLICY_ENTRUST:
- default:
- printf("ip_output: Invalid policy found. %d\n", sp->policy);
- }
-
- ip->ip_len = htons((u_short)ip->ip_len);
- ip->ip_off = htons((u_short)ip->ip_off);
- ip->ip_sum = 0;
-
- {
- struct ipsec_output_state state;
- bzero(&state, sizeof(state));
- state.m = m;
- if (flags & IP_ROUTETOIF) {
- state.ro = &iproute;
- bzero(&iproute, sizeof(iproute));
- } else
- state.ro = ro;
- state.dst = (struct sockaddr *)dst;
-
- error = ipsec4_output(&state, sp, flags);
-
- m = state.m;
- if (flags & IP_ROUTETOIF) {
- /*
- * if we have tunnel mode SA, we may need to ignore
- * IP_ROUTETOIF.
- */
- if (state.ro != &iproute || state.ro->ro_rt != NULL) {
- flags &= ~IP_ROUTETOIF;
- ro = state.ro;
- }
- } else
- ro = state.ro;
- dst = (struct sockaddr_in *)state.dst;
- if (error) {
- /* mbuf is already reclaimed in ipsec4_output. */
- m0 = NULL;
- switch (error) {
- case EHOSTUNREACH:
- case ENETUNREACH:
- case EMSGSIZE:
- case ENOBUFS:
- case ENOMEM:
- break;
- default:
- printf("ip4_output (ipsec): error code %d\n", error);
- /*fall through*/
- case ENOENT:
- /* don't show these error codes to the user */
- error = 0;
- break;
+ else {
+ /* let the software handle any UDP or TCP checksums */
+ sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
}
- goto bad;
}
- }
-
- /* be sure to update variables that are affected by ipsec4_output() */
- ip = mtod(m, struct ip *);
-#ifdef _IP_VHL
- hlen = IP_VHL_HL(ip->ip_vhl) << 2;
-#else
- hlen = ip->ip_hl << 2;
-#endif
- if (ro->ro_rt == NULL) {
- if ((flags & IP_ROUTETOIF) == 0) {
- printf("ip_output: "
- "can't update route after IPsec processing\n");
- error = EHOSTUNREACH; /*XXX*/
- goto bad;
- }
- } else {
- /* nobody uses ia beyond here */
- ifp = ro->ro_rt->rt_ifp;
+
+ if (sw_csum & CSUM_DELAY_DATA) {
+ in_delayed_cksum(m);
+ sw_csum &= ~CSUM_DELAY_DATA;
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
-
- /* make it flipped, again. */
- ip->ip_len = ntohs((u_short)ip->ip_len);
- ip->ip_off = ntohs((u_short)ip->ip_off);
-skip_ipsec:
-#endif /*IPSEC*/
+
+ m->m_pkthdr.csum_flags &= ifp->if_hwassist;
/*
- * If small enough for interface, can just send directly.
+ * If small enough for interface, or the interface will take
+ * care of the fragmentation for us, can just send directly.
*/
- if ((u_short)ip->ip_len <= ifp->if_mtu) {
- ip->ip_len = htons((u_short)ip->ip_len);
- ip->ip_off = htons((u_short)ip->ip_off);
+ if ((u_short)ip->ip_len <= ifp->if_mtu ||
+ ifp->if_hwassist & CSUM_FRAGMENT) {
+ HTONS(ip->ip_len);
+ HTONS(ip->ip_off);
ip->ip_sum = 0;
- ip->ip_sum = in_cksum(m, hlen);
+ if (sw_csum & CSUM_DELAY_IP) {
+ ip->ip_sum = in_cksum(m, hlen);
+ }
+
+#ifndef __APPLE__
+ /* Record statistics for this interface address. */
+ if (!(flags & IP_FORWARDING) && ia != NULL) {
+ ia->ia_ifa.if_opackets++;
+ ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+ }
+#endif
+
+#if IPSEC
+ /* clean ipsec history once it goes out of the node */
+ if (ipsec_bypass == 0)
+ ipsec_delaux(m);
+#endif
+#if __APPLE__
error = dlil_output(dl_tag, m, (void *) ro->ro_rt,
(struct sockaddr *)dst, 0);
+#else
+ error = (*ifp->if_output)(ifp, m,
+ (struct sockaddr *)dst, ro->ro_rt);
+#endif
goto done;
}
/*
goto bad;
}
+ /*
+ * if the interface will not calculate checksums on
+ * fragmented packets, then do it here.
+ */
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
+ (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
+ in_delayed_cksum(m);
+ if (m == NULL)
+ return(ENOMEM);
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+ }
+
+
{
int mhlen, firstlen = len;
struct mbuf **mnext = &m->m_nextpkt;
+ int nfrags = 1;
/*
* Loop through length of segment after first fragment,
ipstat.ips_odropped++;
goto sendorfree;
}
- m->m_flags |= (m0->m_flags & M_MCAST);
+ m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
m->m_data += max_linkhdr;
mhip = mtod(m, struct ip *);
*mhip = *ip;
}
m->m_pkthdr.len = mhlen + len;
m->m_pkthdr.rcvif = (struct ifnet *)0;
- mhip->ip_off = htons((u_short)mhip->ip_off);
+ m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
+ HTONS(mhip->ip_off);
mhip->ip_sum = 0;
- mhip->ip_sum = in_cksum(m, mhlen);
+ if (sw_csum & CSUM_DELAY_IP) {
+ mhip->ip_sum = in_cksum(m, mhlen);
+ }
*mnext = m;
mnext = &m->m_nextpkt;
- ipstat.ips_ofragments++;
+ nfrags++;
}
+ ipstat.ips_ofragments += nfrags;
+
+ /* set first/last markers for fragment chain */
+ m->m_flags |= M_LASTFRAG;
+ m0->m_flags |= M_FIRSTFRAG | M_FRAG;
+ m0->m_pkthdr.csum_data = nfrags;
+
/*
* Update first fragment by trimming what's been copied out
* and updating header, then send each fragment (in order).
m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
m->m_pkthdr.len = hlen + firstlen;
ip->ip_len = htons((u_short)m->m_pkthdr.len);
- ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
+ ip->ip_off |= IP_MF;
+ HTONS(ip->ip_off);
ip->ip_sum = 0;
- ip->ip_sum = in_cksum(m, hlen);
-
+ if (sw_csum & CSUM_DELAY_IP) {
+ ip->ip_sum = in_cksum(m, hlen);
+ }
sendorfree:
KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
for (m = m0; m; m = m0) {
m0 = m->m_nextpkt;
m->m_nextpkt = 0;
- if (error == 0)
+#if IPSEC
+ /* clean ipsec history once it goes out of the node */
+ if (ipsec_bypass == 0)
+ ipsec_delaux(m);
+#endif
+ if (error == 0) {
+#ifndef __APPLE__
+ /* Record statistics for this interface address. */
+ if (ia != NULL) {
+ ia->ia_ifa.if_opackets++;
+ ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+ }
+#endif
+
+#if __APPLE__
error = dlil_output(dl_tag, m, (void *) ro->ro_rt,
(struct sockaddr *)dst, 0);
- else
+#else
+ error = (*ifp->if_output)(ifp, m,
+ (struct sockaddr *)dst, ro->ro_rt);
+#endif
+ } else
m_freem(m);
}
}
done:
#if IPSEC
+ if (ipsec_bypass == 0) {
if (ro == &iproute && ro->ro_rt) {
- RTFREE(ro->ro_rt);
+ rtfree(ro->ro_rt);
ro->ro_rt = NULL;
}
if (sp != NULL) {
printf("DP ip_output call free SP:%x\n", sp));
key_freesp(sp);
}
+ }
#endif /* IPSEC */
KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
goto done;
}
+void
+in_delayed_cksum(struct mbuf *m)
+{
+ struct ip *ip;
+ u_short csum, offset;
+ ip = mtod(m, struct ip *);
+ offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
+ csum = in_cksum_skip(m, ip->ip_len, offset);
+ if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
+ csum = 0xffff;
+ offset += m->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
+
+ if (offset > ip->ip_len) /* bogus offset */
+ return;
+
+ if (offset + sizeof(u_short) > m->m_len) {
+ printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
+ m->m_len, offset, ip->ip_p);
+ /*
+ * XXX
+ * this shouldn't happen, but if it does, the
+ * correct behavior may be to insert the checksum
+ * in the existing chain instead of rearranging it.
+ */
+ m = m_pullup(m, offset + sizeof(u_short));
+ }
+ *(u_short *)(m->m_data + offset) = csum;
+}
+
/*
* Insert IP options into preformed packet.
* Adjust IP destination as required for IP source routing,
MGETHDR(n, M_DONTWAIT, MT_HEADER);
if (n == 0)
return (m);
+ n->m_pkthdr.rcvif = (struct ifnet *)0;
n->m_pkthdr.len = m->m_pkthdr.len + optlen;
m->m_len -= sizeof(struct ip);
m->m_data += sizeof(struct ip);
* Copy options from ip to jp,
* omitting those not copied during fragmentation.
*/
-#if !IPFILTER && !IPFILTER_LKM
-static
-#endif
int
ip_optcopy(ip, jp)
struct ip *ip, *jp;
*dp++ = IPOPT_NOP;
optlen = 1;
continue;
- } else
- optlen = cp[IPOPT_OLEN];
+ }
+#if DIAGNOSTIC
+ if (cnt < IPOPT_OLEN + sizeof(*cp))
+ panic("malformed IPv4 option passed to ip_optcopy");
+#endif
+ optlen = cp[IPOPT_OLEN];
+#if DIAGNOSTIC
+ if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
+ panic("malformed IPv4 option passed to ip_optcopy");
+#endif
/* bogus lengths should have been caught by ip_dooptions */
if (optlen > cnt)
optlen = cnt;
case IP_RECVRETOPTS:
case IP_RECVDSTADDR:
case IP_RECVIF:
+#if defined(NFAITH) && NFAITH > 0
case IP_FAITH:
+#endif
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
OPTSET(INP_RECVIF);
break;
+#if defined(NFAITH) && NFAITH > 0
case IP_FAITH:
OPTSET(INP_FAITH);
break;
+#endif
}
break;
#undef OPTSET
struct mbuf *m;
int optname;
- if (error = sooptgetm(sopt, &m)) /* XXX */
+ if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
break;
- if (error = sooptmcopyin(sopt, m)) /* XXX */
+ if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
break;
priv = (sopt->sopt_p != NULL &&
suser(sopt->sopt_p->p_ucred,
case IP_RECVDSTADDR:
case IP_RECVIF:
case IP_PORTRANGE:
+#if defined(NFAITH) && NFAITH > 0
case IP_FAITH:
+#endif
switch (sopt->sopt_name) {
case IP_TOS:
optval = 0;
break;
+#if defined(NFAITH) && NFAITH > 0
case IP_FAITH:
optval = OPTBIT(INP_FAITH);
break;
+#endif
}
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case IP_IPSEC_POLICY:
{
struct mbuf *m = NULL;
- size_t len = 0;
caddr_t req = NULL;
+ size_t len = 0;
- if (error = sooptgetm(sopt, &m)) /* XXX */
- break;
- if (error = sooptmcopyin(sopt, m)) /* XXX */
- break;
- if (m) {
+ if (m != 0) {
req = mtod(m, caddr_t);
len = m->m_len;
}
-
error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
if (error == 0)
- error = sooptmcopyout(sopt, m); /* XXX */
-
- /* if error, m_freem called at soopt_mcopyout(). */
+ error = soopt_mcopyout(sopt, m); /* XXX */
if (error == 0)
m_freem(m);
break;
* transmission, and one (IP_MULTICAST_TTL) totally duplicates a
* standard option (IP_TTL).
*/
+
+/*
+ * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
+ */
+static struct ifnet *
+ip_multicast_if(a, ifindexp)
+ struct in_addr *a;
+ int *ifindexp;
+{
+ int ifindex;
+ struct ifnet *ifp;
+
+ if (ifindexp)
+ *ifindexp = 0;
+ if (ntohl(a->s_addr) >> 24 == 0) {
+ ifindex = ntohl(a->s_addr) & 0xffffff;
+ if (ifindex < 0 || if_index < ifindex)
+ return NULL;
+ ifp = ifindex2ifnet[ifindex];
+ if (ifindexp)
+ *ifindexp = ifindex;
+ } else {
+ INADDR_TO_IFP(*a, ifp);
+ }
+ return ifp;
+}
+
/*
* Set the IP multicast options in response to user setsockopt().
*/
int i;
struct in_addr addr;
struct ip_mreq mreq;
- struct ifnet *ifp;
+ struct ifnet *ifp = NULL;
struct ip_moptions *imo = *imop;
struct route ro;
struct sockaddr_in *dst;
+ int ifindex;
int s;
if (imo == NULL) {
return (ENOBUFS);
*imop = imo;
imo->imo_multicast_ifp = NULL;
+ imo->imo_multicast_addr.s_addr = INADDR_ANY;
imo->imo_multicast_vif = -1;
imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
* it supports multicasting.
*/
s = splimp();
- INADDR_TO_IFP(addr, ifp);
+ ifp = ip_multicast_if(&addr, &ifindex);
if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
splx(s);
error = EADDRNOTAVAIL;
break;
}
imo->imo_multicast_ifp = ifp;
+ if (ifindex)
+ imo->imo_multicast_addr = addr;
+ else
+ imo->imo_multicast_addr.s_addr = INADDR_ANY;
splx(s);
break;
dst->sin_family = AF_INET;
dst->sin_addr = mreq.imr_multiaddr;
rtalloc(&ro);
- if (ro.ro_rt == NULL) {
- error = EADDRNOTAVAIL;
- splx(s);
- break;
+ if (ro.ro_rt != NULL) {
+ ifp = ro.ro_rt->rt_ifp;
+ rtfree(ro.ro_rt);
+ }
+ else {
+ /* If there's no default route, try using loopback */
+ mreq.imr_interface.s_addr = INADDR_LOOPBACK;
}
- ifp = ro.ro_rt->rt_ifp;
- rtfree(ro.ro_rt);
}
- else {
- INADDR_TO_IFP(mreq.imr_interface, ifp);
+
+ if (ifp == NULL) {
+ ifp = ip_multicast_if(&mreq.imr_interface, NULL);
}
/*
if (mreq.imr_interface.s_addr == INADDR_ANY)
ifp = NULL;
else {
- INADDR_TO_IFP(mreq.imr_interface, ifp);
+ ifp = ip_multicast_if(&mreq.imr_interface, NULL);
if (ifp == NULL) {
error = EADDRNOTAVAIL;
splx(s);
case IP_MULTICAST_IF:
if (imo == NULL || imo->imo_multicast_ifp == NULL)
addr.s_addr = INADDR_ANY;
- else {
+ else if (imo->imo_multicast_addr.s_addr) {
+ /* return the value user has set */
+ addr = imo->imo_multicast_addr;
+ } else {
IFP_TO_IA(imo->imo_multicast_ifp, ia);
addr.s_addr = (ia == NULL) ? INADDR_ANY
: IA_SIN(ia)->sin_addr.s_addr;
if (imo != NULL) {
for (i = 0; i < imo->imo_num_memberships; ++i)
- in_delmulti(imo->imo_membership[i]);
+ if (imo->imo_membership[i] != NULL)
+ in_delmulti(imo->imo_membership[i]);
FREE(imo, M_IPMOPTS);
}
}
* than the interface's MTU. Can this possibly matter?
*/
ip = mtod(copym, struct ip *);
- ip->ip_len = htons((u_short)ip->ip_len);
- ip->ip_off = htons((u_short)ip->ip_off);
+ HTONS(ip->ip_len);
+ HTONS(ip->ip_off);
ip->ip_sum = 0;
ip->ip_sum = in_cksum(copym, hlen);
-
/*
* NB:
* It's not clear whether there are any lingering
}
#endif
+
+ /*
+ * Mark checksum as valid or calculate checksum for loopback.
+ *
+ * This is done this way because we have to embed the ifp of
+ * the interface we will send the original copy of the packet
+ * out on in the mbuf. ip_input will check if_hwassist of the
+ * embedded ifp and ignore all csum_flags if if_hwassist is 0.
+ * The UDP checksum has not been calculated yet.
+ */
+ if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+ if (ifp->if_hwassist) {
+ copym->m_pkthdr.csum_flags |=
+ CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
+ CSUM_IP_CHECKED | CSUM_IP_VALID;
+ copym->m_pkthdr.csum_data = 0xffff;
+ } else {
+ NTOHS(ip->ip_len);
+ in_delayed_cksum(copym);
+ HTONS(ip->ip_len);
+ }
+ }
+
+
/*
* TedW:
* We need to send all loopback traffic down to dlil in case
* to make the loopback driver compliant with the data link
* requirements.
*/
- if (lo_dl_tag)
- { copym->m_pkthdr.rcvif = ifp;
+ if (lo_dl_tag) {
+ copym->m_pkthdr.rcvif = ifp;
dlil_output(lo_dl_tag, copym, 0, (struct sockaddr *) dst, 0);
} else {
printf("Warning: ip_output call to dlil_find_dltag failed!\n");