2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
23 * Copyright (c) 1982, 1986, 1988, 1990, 1993
24 * The Regents of the University of California. All rights reserved.
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 * 3. All advertising materials mentioning features or use of this software
35 * must display the following acknowledgement:
36 * This product includes software developed by the University of
37 * California, Berkeley and its contributors.
38 * 4. Neither the name of the University nor the names of its contributors
39 * may be used to endorse or promote products derived from this software
40 * without specific prior written permission.
42 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
55 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/kernel.h>
63 #include <sys/malloc.h>
65 #include <sys/protosw.h>
66 #include <sys/socket.h>
67 #include <sys/socketvar.h>
70 #include <net/route.h>
72 #include <netinet/in.h>
73 #include <netinet/in_systm.h>
74 #include <netinet/ip.h>
75 #include <netinet/in_pcb.h>
76 #include <netinet/in_var.h>
77 #include <netinet/ip_var.h>
82 #include <sys/kdebug.h>
84 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
85 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
86 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
87 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
91 #include <machine/mtpr.h>
95 #include <machine/in_cksum.h>
97 static MALLOC_DEFINE(M_IPMOPTS
, "ip_moptions", "internet multicast options");
101 #include <netinet6/ipsec.h>
102 #include <netkey/key.h>
104 #include <netkey/key_debug.h>
106 #define KEYDEBUG(lev,arg)
110 #include <netinet/ip_fw.h>
113 #include <netinet/ip_dummynet.h>
116 #if IPFIREWALL_FORWARD_DEBUG
117 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
118 (ntohl(a.s_addr)>>16)&0xFF,\
119 (ntohl(a.s_addr)>>8)&0xFF,\
120 (ntohl(a.s_addr))&0xFF);
125 static struct mbuf
*ip_insertoptions
__P((struct mbuf
*, struct mbuf
*, int *));
126 static struct ifnet
*ip_multicast_if
__P((struct in_addr
*, int *));
127 static void ip_mloopback
128 __P((struct ifnet
*, struct mbuf
*, struct sockaddr_in
*, int));
129 static int ip_getmoptions
130 __P((struct sockopt
*, struct ip_moptions
*));
131 static int ip_pcbopts
__P((int, struct mbuf
**, struct mbuf
*));
132 static int ip_setmoptions
133 __P((struct sockopt
*, struct ip_moptions
**));
135 int ip_createmoptions(struct ip_moptions
**imop
);
136 int ip_addmembership(struct ip_moptions
*imo
, struct ip_mreq
*mreq
);
137 int ip_dropmembership(struct ip_moptions
*imo
, struct ip_mreq
*mreq
);
138 int ip_optcopy
__P((struct ip
*, struct ip
*));
139 extern int (*fr_checkp
) __P((struct ip
*, int, struct ifnet
*, int, struct mbuf
**));
141 extern struct mbuf
* m_dup(register struct mbuf
*m
, int how
);
144 static u_long lo_dl_tag
= 0;
146 void in_delayed_cksum(struct mbuf
*m
);
147 extern int apple_hwcksum_tx
;
148 extern u_long route_generation
;
150 extern struct protosw inetsw
[];
152 extern struct ip_linklocal_stat ip_linklocal_stat
;
154 /* temporary: for testing */
156 extern int ipsec_bypass
;
160 * IP output. The packet in mbuf chain m contains a skeletal IP
161 * header (with len, off, ttl, proto, tos, src, dst).
162 * The mbuf chain containing the packet will be freed.
163 * The mbuf opt, if present, will not be freed.
166 ip_output(m0
, opt
, ro
, flags
, imo
)
171 struct ip_moptions
*imo
;
173 struct ip
*ip
, *mhip
;
174 struct ifnet
*ifp
= NULL
;
176 int hlen
= sizeof (struct ip
);
177 int len
, off
, error
= 0;
178 struct sockaddr_in
*dst
= NULL
;
179 struct in_ifaddr
*ia
= NULL
;
180 int isbroadcast
, sw_csum
;
182 struct route iproute
;
183 struct socket
*so
= NULL
;
184 struct secpolicy
*sp
= NULL
;
186 u_int16_t divert_cookie
; /* firewall cookie */
187 #if IPFIREWALL_FORWARD
188 int fwd_rewrite_src
= 0;
190 struct ip_fw_chain
*rule
= NULL
;
193 /* Get and reset firewall cookie */
194 divert_cookie
= ip_divert_cookie
;
195 ip_divert_cookie
= 0;
200 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT
| DBG_FUNC_START
, 0,0,0,0,0);
202 #if IPFIREWALL && DUMMYNET
204 * dummynet packet are prepended a vestigial mbuf with
205 * m_type = MT_DUMMYNET and m_data pointing to the matching
208 if (m
->m_type
== MT_DUMMYNET
) {
210 * the packet was already tagged, so part of the
211 * processing was already done, and we need to go down.
212 * Get parameters from the header.
214 rule
= (struct ip_fw_chain
*)(m
->m_data
) ;
216 ro
= & ( ((struct dn_pkt
*)m
)->ro
) ;
218 dst
= ((struct dn_pkt
*)m
)->dn_dst
;
219 ifp
= ((struct dn_pkt
*)m
)->ifp
;
220 flags
= ((struct dn_pkt
*)m
)->flags
;
223 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
224 so
= ipsec_getsocket(m
);
225 (void)ipsec_setsocket(m
, NULL
);
228 ip
= mtod(m
, struct ip
*);
229 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2 ;
230 if (ro
->ro_rt
!= NULL
)
231 ia
= (struct in_ifaddr
*)ro
->ro_rt
->rt_ifa
;
237 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
238 so
= ipsec_getsocket(m
);
239 (void)ipsec_setsocket(m
, NULL
);
244 if ((m
->m_flags
& M_PKTHDR
) == 0)
245 panic("ip_output no HDR");
247 panic("ip_output no route, proto = %d",
248 mtod(m
, struct ip
*)->ip_p
);
251 m
= ip_insertoptions(m
, opt
, &len
);
254 ip
= mtod(m
, struct ip
*);
258 if ((flags
& (IP_FORWARDING
|IP_RAWOUTPUT
)) == 0) {
259 ip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, hlen
>> 2);
262 ip
->ip_id
= ip_randomid();
264 ip
->ip_id
= htons(ip_id
++);
266 ipstat
.ips_localout
++;
268 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
271 KERNEL_DEBUG(DBG_LAYER_BEG
, ip
->ip_dst
.s_addr
,
272 ip
->ip_src
.s_addr
, ip
->ip_p
, ip
->ip_off
, ip
->ip_len
);
274 dst
= (struct sockaddr_in
*)&ro
->ro_dst
;
277 * If there is a cached route,
278 * check that it is to the same destination
279 * and is still up. If not, free it and try again.
280 * The address family should also be checked in case of sharing the
284 if (ro
->ro_rt
&& (ro
->ro_rt
->generation_id
!= route_generation
) &&
285 ((flags
& (IP_ROUTETOIF
| IP_FORWARDING
)) == 0) && (ip
->ip_src
.s_addr
!= INADDR_ANY
) &&
286 (ifa_foraddr(ip
->ip_src
.s_addr
) == NULL
)) {
287 error
= EADDRNOTAVAIL
;
290 if (ro
->ro_rt
&& ((ro
->ro_rt
->rt_flags
& RTF_UP
) == 0 ||
291 dst
->sin_family
!= AF_INET
||
292 dst
->sin_addr
.s_addr
!= ip
->ip_dst
.s_addr
)) {
294 ro
->ro_rt
= (struct rtentry
*)0;
296 if (ro
->ro_rt
== 0) {
297 bzero(dst
, sizeof(*dst
));
298 dst
->sin_family
= AF_INET
;
299 dst
->sin_len
= sizeof(*dst
);
300 dst
->sin_addr
= ip
->ip_dst
;
303 * If routing to interface only,
304 * short circuit routing lookup.
306 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
307 #define sintosa(sin) ((struct sockaddr *)(sin))
308 if (flags
& IP_ROUTETOIF
) {
309 if ((ia
= ifatoia(ifa_ifwithdstaddr(sintosa(dst
)))) == 0 &&
310 (ia
= ifatoia(ifa_ifwithnet(sintosa(dst
)))) == 0) {
311 ipstat
.ips_noroute
++;
317 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
320 * If this is the case, we probably don't want to allocate
321 * a protocol-cloned route since we didn't get one from the
322 * ULP. This lets TCP do its thing, while not burdening
323 * forwarding or ICMP with the overhead of cloning a route.
324 * Of course, we still want to do any cloning requested by
325 * the link layer, as this is probably required in all cases
326 * for correct operation (as it is for ARP).
329 rtalloc_ign(ro
, RTF_PRCLONING
);
330 if (ro
->ro_rt
== 0) {
331 ipstat
.ips_noroute
++;
332 error
= EHOSTUNREACH
;
335 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
336 ifp
= ro
->ro_rt
->rt_ifp
;
338 if (ro
->ro_rt
->rt_flags
& RTF_GATEWAY
)
339 dst
= (struct sockaddr_in
*)ro
->ro_rt
->rt_gateway
;
340 if (ro
->ro_rt
->rt_flags
& RTF_HOST
)
341 isbroadcast
= (ro
->ro_rt
->rt_flags
& RTF_BROADCAST
);
343 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
345 if (IN_MULTICAST(ntohl(ip
->ip_dst
.s_addr
))) {
346 struct in_multi
*inm
;
348 m
->m_flags
|= M_MCAST
;
350 * IP destination address is multicast. Make sure "dst"
351 * still points to the address in "ro". (It may have been
352 * changed to point to a gateway address, above.)
354 dst
= (struct sockaddr_in
*)&ro
->ro_dst
;
356 * See if the caller provided any multicast options
359 if ((flags
& IP_RAWOUTPUT
) == 0) ip
->ip_ttl
= imo
->imo_multicast_ttl
;
360 if (imo
->imo_multicast_ifp
!= NULL
) {
361 ifp
= imo
->imo_multicast_ifp
;
363 if (imo
->imo_multicast_vif
!= -1 &&
364 ((flags
& IP_RAWOUTPUT
) == 0 || ip
->ip_src
.s_addr
== INADDR_ANY
))
366 ip_mcast_src(imo
->imo_multicast_vif
);
368 if ((flags
& IP_RAWOUTPUT
) == 0) ip
->ip_ttl
= IP_DEFAULT_MULTICAST_TTL
;
370 * Confirm that the outgoing interface supports multicast.
372 if ((imo
== NULL
) || (imo
->imo_multicast_vif
== -1)) {
373 if ((ifp
->if_flags
& IFF_MULTICAST
) == 0) {
374 ipstat
.ips_noroute
++;
380 * If source address not specified yet, use address
381 * of outgoing interface.
383 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
384 register struct in_ifaddr
*ia1
;
386 TAILQ_FOREACH(ia1
, &in_ifaddrhead
, ia_link
)
387 if (ia1
->ia_ifp
== ifp
) {
388 ip
->ip_src
= IA_SIN(ia1
)->sin_addr
;
392 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
398 IN_LOOKUP_MULTI(ip
->ip_dst
, ifp
, inm
);
400 (imo
== NULL
|| imo
->imo_multicast_loop
)) {
402 * If we belong to the destination multicast group
403 * on the outgoing interface, and the caller did not
404 * forbid loopback, loop back a copy.
406 ip_mloopback(ifp
, m
, dst
, hlen
);
410 * If we are acting as a multicast router, perform
411 * multicast forwarding as if the packet had just
412 * arrived on the interface to which we are about
413 * to send. The multicast forwarding function
414 * recursively calls this function, using the
415 * IP_FORWARDING flag to prevent infinite recursion.
417 * Multicasts that are looped back by ip_mloopback(),
418 * above, will be forwarded by the ip_input() routine,
421 if (ip_mrouter
&& (flags
& IP_FORWARDING
) == 0) {
423 * Check if rsvp daemon is running. If not, don't
424 * set ip_moptions. This ensures that the packet
425 * is multicast and not just sent down one link
426 * as prescribed by rsvpd.
430 if (ip_mforward(ip
, ifp
, m
, imo
) != 0) {
438 * Multicasts with a time-to-live of zero may be looped-
439 * back, above, but must not be transmitted on a network.
440 * Also, multicasts addressed to the loopback interface
441 * are not sent -- the above call to ip_mloopback() will
442 * loop back a copy if this host actually belongs to the
443 * destination group on the loopback interface.
445 if (ip
->ip_ttl
== 0 || ifp
->if_flags
& IFF_LOOPBACK
) {
454 * If source address not specified yet, use address
455 * of outgoing interface.
457 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
458 ip
->ip_src
= IA_SIN(ia
)->sin_addr
;
459 #if IPFIREWALL_FORWARD
460 /* Keep note that we did this - if the firewall changes
461 * the next-hop, our interface may change, changing the
462 * default source IP. It's a shame so much effort happens
466 #endif /* IPFIREWALL_FORWARD */
470 * Verify that we have any chance at all of being able to queue
471 * the packet or packet fragments
473 if ((ifp
->if_snd
.ifq_len
+ ip
->ip_len
/ ifp
->if_mtu
+ 1) >=
474 ifp
->if_snd
.ifq_maxlen
) {
480 * Look for broadcast address and
481 * and verify user is allowed to send
485 if ((ifp
->if_flags
& IFF_BROADCAST
) == 0) {
486 error
= EADDRNOTAVAIL
;
489 if ((flags
& IP_ALLOWBROADCAST
) == 0) {
493 /* don't allow broadcast messages to be fragmented */
494 if ((u_short
)ip
->ip_len
> ifp
->if_mtu
) {
498 m
->m_flags
|= M_BCAST
;
500 m
->m_flags
&= ~M_BCAST
;
505 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
507 if (IN_LINKLOCAL(ntohl(ip
->ip_src
.s_addr
)) || IN_LINKLOCAL(ntohl(ip
->ip_dst
.s_addr
))) {
508 ip_linklocal_stat
.iplls_out_total
++;
509 if (ip
->ip_ttl
!= MAXTTL
) {
510 ip_linklocal_stat
.iplls_out_badttl
++;
516 /* temporary for testing only: bypass ipsec alltogether */
518 if (ipsec_bypass
!= 0 || (flags
& IP_NOIPSEC
) != 0)
521 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_START
, 0,0,0,0,0);
523 /* get SP for this packet */
525 sp
= ipsec4_getpolicybyaddr(m
, IPSEC_DIR_OUTBOUND
, flags
, &error
);
527 sp
= ipsec4_getpolicybysock(m
, IPSEC_DIR_OUTBOUND
, so
, &error
);
530 ipsecstat
.out_inval
++;
531 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 0,0,0,0,0);
538 switch (sp
->policy
) {
539 case IPSEC_POLICY_DISCARD
:
541 * This packet is just discarded.
543 ipsecstat
.out_polvio
++;
544 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 1,0,0,0,0);
547 case IPSEC_POLICY_BYPASS
:
548 case IPSEC_POLICY_NONE
:
549 /* no need to do IPsec. */
550 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 2,0,0,0,0);
553 case IPSEC_POLICY_IPSEC
:
554 if (sp
->req
== NULL
) {
555 /* acquire a policy */
556 error
= key_spdacquire(sp
);
557 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 3,0,0,0,0);
562 case IPSEC_POLICY_ENTRUST
:
564 printf("ip_output: Invalid policy found. %d\n", sp
->policy
);
567 struct ipsec_output_state state
;
568 bzero(&state
, sizeof(state
));
570 if (flags
& IP_ROUTETOIF
) {
572 bzero(&iproute
, sizeof(iproute
));
575 state
.dst
= (struct sockaddr
*)dst
;
581 * delayed checksums are not currently compatible with IPsec
583 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
585 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
591 error
= ipsec4_output(&state
, sp
, flags
);
595 if (flags
& IP_ROUTETOIF
) {
597 * if we have tunnel mode SA, we may need to ignore
600 if (state
.ro
!= &iproute
|| state
.ro
->ro_rt
!= NULL
) {
601 flags
&= ~IP_ROUTETOIF
;
607 dst
= (struct sockaddr_in
*)state
.dst
;
609 /* mbuf is already reclaimed in ipsec4_output. */
619 printf("ip4_output (ipsec): error code %d\n", error
);
622 /* don't show these error codes to the user */
626 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 4,0,0,0,0);
631 /* be sure to update variables that are affected by ipsec4_output() */
632 ip
= mtod(m
, struct ip
*);
635 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
637 hlen
= ip
->ip_hl
<< 2;
639 /* Check that there wasn't a route change and src is still valid */
641 if (ro
->ro_rt
->generation_id
!= route_generation
) {
642 if (ifa_foraddr(ip
->ip_src
.s_addr
) == NULL
&& ((flags
& (IP_ROUTETOIF
| IP_FORWARDING
)) == 0)) {
643 error
= EADDRNOTAVAIL
;
644 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 5,0,0,0,0);
651 if (ro
->ro_rt
== NULL
) {
652 if ((flags
& IP_ROUTETOIF
) == 0) {
654 "can't update route after IPsec processing\n");
655 error
= EHOSTUNREACH
; /*XXX*/
656 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 6,0,0,0,0);
660 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
661 ifp
= ro
->ro_rt
->rt_ifp
;
664 /* make it flipped, again. */
667 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 7,0xff,0xff,0xff,0xff);
673 * - Xlate: translate packet's addr/port (NAT).
674 * - Firewall: deny/allow/etc.
675 * - Wrap: fake packet's addr/port <unimpl.>
676 * - Encapsulate: put it in another IP and send out. <unimp.>
681 if ((error
= (*fr_checkp
)(ip
, hlen
, ifp
, 1, &m1
)) || !m1
)
683 ip
= mtod(m0
= m
= m1
, struct ip
*);
687 * Check with the firewall...
689 if (fw_enable
&& ip_fw_chk_ptr
) {
690 struct sockaddr_in
*old
= dst
;
692 off
= (*ip_fw_chk_ptr
)(&ip
,
693 hlen
, ifp
, &divert_cookie
, &m
, &rule
, &dst
);
695 * On return we must do the following:
696 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
697 * 1<=off<= 0xffff -> DIVERT
698 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
699 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
700 * dst != old -> IPFIREWALL_FORWARD
701 * off==0, dst==old -> accept
702 * If some of the above modules is not compiled in, then
703 * we should't have to check the corresponding condition
704 * (because the ipfw control socket should not accept
705 * unsupported rules), but better play safe and drop
706 * packets in case of doubt.
709 if ( (off
& IP_FW_PORT_DENY_FLAG
) || m
== NULL
) {
715 ip
= mtod(m
, struct ip
*);
716 if (off
== 0 && dst
== old
) /* common case */
719 if ((off
& IP_FW_PORT_DYNT_FLAG
) != 0) {
721 * pass the pkt to dummynet. Need to include
722 * pipe number, m, ifp, ro, dst because these are
723 * not recomputed in the next pass.
724 * All other parameters have been already used and
725 * so they are not needed anymore.
726 * XXX note: if the ifp or ro entry are deleted
727 * while a pkt is in dummynet, we are in trouble!
729 error
= dummynet_io(off
& 0xffff, DN_TO_IP_OUT
, m
,
730 ifp
,ro
,dst
,rule
, flags
);
735 if (off
!= 0 && (off
& IP_FW_PORT_DYNT_FLAG
) == 0) {
736 struct mbuf
*clone
= NULL
;
738 /* Clone packet if we're doing a 'tee' */
739 if ((off
& IP_FW_PORT_TEE_FLAG
) != 0)
740 clone
= m_dup(m
, M_DONTWAIT
);
743 * delayed checksums are not currently compatible
744 * with divert sockets.
746 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
748 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
751 /* Restore packet header fields to original values */
755 /* Deliver packet to divert input routine */
756 ip_divert_cookie
= divert_cookie
;
757 divert_packet(m
, 0, off
& 0xffff);
759 /* If 'tee', continue with original packet */
762 ip
= mtod(m
, struct ip
*);
769 #if IPFIREWALL_FORWARD
770 /* Here we check dst to make sure it's directly reachable on the
771 * interface we previously thought it was.
772 * If it isn't (which may be likely in some situations) we have
773 * to re-route it (ie, find a route for the next-hop and the
774 * associated interface) and set them here. This is nested
775 * forwarding which in most cases is undesirable, except where
776 * such control is nigh impossible. So we do it here.
779 if (off
== 0 && old
!= dst
) {
780 struct in_ifaddr
*ia
;
782 /* It's changed... */
783 /* There must be a better way to do this next line... */
784 static struct route sro_fwd
, *ro_fwd
= &sro_fwd
;
785 #if IPFIREWALL_FORWARD_DEBUG
786 printf("IPFIREWALL_FORWARD: New dst ip: ");
787 print_ip(dst
->sin_addr
);
791 * We need to figure out if we have been forwarded
792 * to a local socket. If so then we should somehow
793 * "loop back" to ip_input, and get directed to the
794 * PCB as if we had received this packet. This is
795 * because it may be dificult to identify the packets
796 * you want to forward until they are being output
797 * and have selected an interface. (e.g. locally
798 * initiated packets) If we used the loopback inteface,
799 * we would not be able to control what happens
800 * as the packet runs through ip_input() as
801 * it is done through a ISR.
803 TAILQ_FOREACH(ia
, &in_ifaddrhead
, ia_link
) {
805 * If the addr to forward to is one
806 * of ours, we pretend to
807 * be the destination for this packet.
809 if (IA_SIN(ia
)->sin_addr
.s_addr
==
810 dst
->sin_addr
.s_addr
)
814 /* tell ip_input "dont filter" */
815 ip_fw_fwd_addr
= dst
;
816 if (m
->m_pkthdr
.rcvif
== NULL
)
817 m
->m_pkthdr
.rcvif
= ifunit("lo0");
818 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
819 m
->m_pkthdr
.csum_flags
|=
820 CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
;
821 m
->m_pkthdr
.csum_data
= 0xffff;
823 m
->m_pkthdr
.csum_flags
|=
824 CSUM_IP_CHECKED
| CSUM_IP_VALID
;
830 /* Some of the logic for this was
833 * This rewrites the cached route in a local PCB.
834 * Is this what we want to do?
836 bcopy(dst
, &ro_fwd
->ro_dst
, sizeof(*dst
));
839 rtalloc_ign(ro_fwd
, RTF_PRCLONING
);
841 if (ro_fwd
->ro_rt
== 0) {
842 ipstat
.ips_noroute
++;
843 error
= EHOSTUNREACH
;
847 ia
= ifatoia(ro_fwd
->ro_rt
->rt_ifa
);
848 ifp
= ro_fwd
->ro_rt
->rt_ifp
;
849 ro_fwd
->ro_rt
->rt_use
++;
850 if (ro_fwd
->ro_rt
->rt_flags
& RTF_GATEWAY
)
851 dst
= (struct sockaddr_in
*)ro_fwd
->ro_rt
->rt_gateway
;
852 if (ro_fwd
->ro_rt
->rt_flags
& RTF_HOST
)
854 (ro_fwd
->ro_rt
->rt_flags
& RTF_BROADCAST
);
856 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
858 ro
->ro_rt
= ro_fwd
->ro_rt
;
859 dst
= (struct sockaddr_in
*)&ro_fwd
->ro_dst
;
862 * If we added a default src ip earlier,
863 * which would have been gotten from the-then
864 * interface, do it again, from the new one.
867 ip
->ip_src
= IA_SIN(ia
)->sin_addr
;
870 #endif /* IPFIREWALL_FORWARD */
872 * if we get here, none of the above matches, and
873 * we have to drop the pkt
876 error
= EACCES
; /* not sure this is the right error msg */
882 /* Do not allow loopback address to wind up on a wire */
883 if ((ifp
->if_flags
& IFF_LOOPBACK
) == 0 &&
884 ((ntohl(ip
->ip_src
.s_addr
) >> IN_CLASSA_NSHIFT
) == IN_LOOPBACKNET
||
885 (ntohl(ip
->ip_dst
.s_addr
) >> IN_CLASSA_NSHIFT
) == IN_LOOPBACKNET
)) {
886 ipstat
.ips_badaddr
++;
889 * Simply drop the packet just like a firewall -- we do not want the
890 * the application to feel the pain, not yet...
891 * Returning ENETUNREACH like ip6_output does in some similar cases
892 * could startle the otherwise clueless process that specifies
893 * loopback as the source address.
898 m
->m_pkthdr
.csum_flags
|= CSUM_IP
;
899 sw_csum
= m
->m_pkthdr
.csum_flags
900 & ~IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
);
902 if ((ifp
->if_hwassist
& CSUM_TCP_SUM16
) != 0) {
904 * Special case code for GMACE
905 * frames that can be checksumed by GMACE SUM16 HW:
906 * frame >64, no fragments, no UDP
908 if (apple_hwcksum_tx
&& (m
->m_pkthdr
.csum_flags
& CSUM_TCP
)
909 && (ip
->ip_len
> 50) && (ip
->ip_len
<= ifp
->if_mtu
)) {
910 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
911 u_short offset
= (IP_VHL_HL(ip
->ip_vhl
) << 2) +14 ; /* IP+Enet header length */
912 u_short csumprev
= m
->m_pkthdr
.csum_data
& 0xFFFF;
913 m
->m_pkthdr
.csum_flags
= CSUM_DATA_VALID
| CSUM_TCP_SUM16
; /* for GMAC */
914 m
->m_pkthdr
.csum_data
= (csumprev
+ offset
) << 16 ;
915 m
->m_pkthdr
.csum_data
+= offset
;
916 sw_csum
= CSUM_DELAY_IP
; /* do IP hdr chksum in software */
919 /* let the software handle any UDP or TCP checksums */
920 sw_csum
|= (CSUM_DELAY_DATA
& m
->m_pkthdr
.csum_flags
);
924 if (sw_csum
& CSUM_DELAY_DATA
) {
926 sw_csum
&= ~CSUM_DELAY_DATA
;
927 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
930 m
->m_pkthdr
.csum_flags
&= IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
);
933 * If small enough for interface, or the interface will take
934 * care of the fragmentation for us, can just send directly.
936 if ((u_short
)ip
->ip_len
<= ifp
->if_mtu
||
937 ifp
->if_hwassist
& CSUM_FRAGMENT
) {
941 if (sw_csum
& CSUM_DELAY_IP
) {
942 ip
->ip_sum
= in_cksum(m
, hlen
);
946 /* Record statistics for this interface address. */
947 if (!(flags
& IP_FORWARDING
) && ia
!= NULL
) {
948 ia
->ia_ifa
.if_opackets
++;
949 ia
->ia_ifa
.if_obytes
+= m
->m_pkthdr
.len
;
954 /* clean ipsec history once it goes out of the node */
955 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0)
959 error
= dlil_output(ifptodlt(ifp
, PF_INET
), m
, (void *) ro
->ro_rt
,
960 (struct sockaddr
*)dst
, 0);
962 error
= (*ifp
->if_output
)(ifp
, m
,
963 (struct sockaddr
*)dst
, ro
->ro_rt
);
968 * Too large for interface; fragment if possible.
969 * Must be able to put at least 8 bytes per fragment.
971 if (ip
->ip_off
& IP_DF
) {
974 * This case can happen if the user changed the MTU
975 * of an interface after enabling IP on it. Because
976 * most netifs don't keep track of routes pointing to
977 * them, there is no way for one to update all its
978 * routes when the MTU is changed.
980 if ((ro
->ro_rt
->rt_flags
& (RTF_UP
| RTF_HOST
))
981 && !(ro
->ro_rt
->rt_rmx
.rmx_locks
& RTV_MTU
)
982 && (ro
->ro_rt
->rt_rmx
.rmx_mtu
> ifp
->if_mtu
)) {
983 ro
->ro_rt
->rt_rmx
.rmx_mtu
= ifp
->if_mtu
;
985 ipstat
.ips_cantfrag
++;
988 len
= (ifp
->if_mtu
- hlen
) &~ 7;
995 * if the interface will not calculate checksums on
996 * fragmented packets, then do it here.
998 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
&&
999 (ifp
->if_hwassist
& CSUM_IP_FRAGS
) == 0) {
1000 in_delayed_cksum(m
);
1003 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1008 int mhlen
, firstlen
= len
;
1009 struct mbuf
**mnext
= &m
->m_nextpkt
;
1013 * Loop through length of segment after first fragment,
1014 * make new header and copy data of each part and link onto chain.
1017 mhlen
= sizeof (struct ip
);
1018 for (off
= hlen
+ len
; off
< (u_short
)ip
->ip_len
; off
+= len
) {
1019 MGETHDR(m
, M_DONTWAIT
, MT_HEADER
);
1022 ipstat
.ips_odropped
++;
1025 m
->m_flags
|= (m0
->m_flags
& M_MCAST
) | M_FRAG
;
1026 m
->m_data
+= max_linkhdr
;
1027 mhip
= mtod(m
, struct ip
*);
1029 if (hlen
> sizeof (struct ip
)) {
1030 mhlen
= ip_optcopy(ip
, mhip
) + sizeof (struct ip
);
1031 mhip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, mhlen
>> 2);
1034 mhip
->ip_off
= ((off
- hlen
) >> 3) + (ip
->ip_off
& ~IP_MF
);
1035 if (ip
->ip_off
& IP_MF
)
1036 mhip
->ip_off
|= IP_MF
;
1037 if (off
+ len
>= (u_short
)ip
->ip_len
)
1038 len
= (u_short
)ip
->ip_len
- off
;
1040 mhip
->ip_off
|= IP_MF
;
1041 mhip
->ip_len
= htons((u_short
)(len
+ mhlen
));
1042 m
->m_next
= m_copy(m0
, off
, len
);
1043 if (m
->m_next
== 0) {
1045 error
= ENOBUFS
; /* ??? */
1046 ipstat
.ips_odropped
++;
1049 m
->m_pkthdr
.len
= mhlen
+ len
;
1050 m
->m_pkthdr
.rcvif
= (struct ifnet
*)0;
1051 m
->m_pkthdr
.csum_flags
= m0
->m_pkthdr
.csum_flags
;
1052 HTONS(mhip
->ip_off
);
1054 if (sw_csum
& CSUM_DELAY_IP
) {
1055 mhip
->ip_sum
= in_cksum(m
, mhlen
);
1058 mnext
= &m
->m_nextpkt
;
1061 ipstat
.ips_ofragments
+= nfrags
;
1063 /* set first/last markers for fragment chain */
1064 m
->m_flags
|= M_LASTFRAG
;
1065 m0
->m_flags
|= M_FIRSTFRAG
| M_FRAG
;
1066 m0
->m_pkthdr
.csum_data
= nfrags
;
1069 * Update first fragment by trimming what's been copied out
1070 * and updating header, then send each fragment (in order).
1073 m_adj(m
, hlen
+ firstlen
- (u_short
)ip
->ip_len
);
1074 m
->m_pkthdr
.len
= hlen
+ firstlen
;
1075 ip
->ip_len
= htons((u_short
)m
->m_pkthdr
.len
);
1076 ip
->ip_off
|= IP_MF
;
1079 if (sw_csum
& CSUM_DELAY_IP
) {
1080 ip
->ip_sum
= in_cksum(m
, hlen
);
1084 KERNEL_DEBUG(DBG_LAYER_END
, ip
->ip_dst
.s_addr
,
1085 ip
->ip_src
.s_addr
, ip
->ip_p
, ip
->ip_off
, ip
->ip_len
);
1087 for (m
= m0
; m
; m
= m0
) {
1091 /* clean ipsec history once it goes out of the node */
1092 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0)
1097 /* Record statistics for this interface address. */
1099 ia
->ia_ifa
.if_opackets
++;
1100 ia
->ia_ifa
.if_obytes
+= m
->m_pkthdr
.len
;
1105 error
= dlil_output(ifptodlt(ifp
, PF_INET
), m
, (void *) ro
->ro_rt
,
1106 (struct sockaddr
*)dst
, 0);
1108 error
= (*ifp
->if_output
)(ifp
, m
,
1109 (struct sockaddr
*)dst
, ro
->ro_rt
);
1116 ipstat
.ips_fragmented
++;
1120 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
1121 if (ro
== &iproute
&& ro
->ro_rt
) {
1126 KEYDEBUG(KEYDEBUG_IPSEC_STAMP
,
1127 printf("DP ip_output call free SP:%x\n", sp
));
1133 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT
| DBG_FUNC_END
, error
,0,0,0,0);
1141 in_delayed_cksum(struct mbuf
*m
)
1144 u_short csum
, offset
;
1145 ip
= mtod(m
, struct ip
*);
1146 offset
= IP_VHL_HL(ip
->ip_vhl
) << 2 ;
1147 csum
= in_cksum_skip(m
, ip
->ip_len
, offset
);
1148 if (m
->m_pkthdr
.csum_flags
& CSUM_UDP
&& csum
== 0)
1150 offset
+= m
->m_pkthdr
.csum_data
& 0xFFFF; /* checksum offset */
1152 if (offset
> ip
->ip_len
) /* bogus offset */
1155 if (offset
+ sizeof(u_short
) > m
->m_len
) {
1156 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1157 m
->m_len
, offset
, ip
->ip_p
);
1160 * this shouldn't happen, but if it does, the
1161 * correct behavior may be to insert the checksum
1162 * in the existing chain instead of rearranging it.
1164 m
= m_pullup(m
, offset
+ sizeof(u_short
));
1166 *(u_short
*)(m
->m_data
+ offset
) = csum
;
1170 * Insert IP options into preformed packet.
1171 * Adjust IP destination as required for IP source routing,
1172 * as indicated by a non-zero in_addr at the start of the options.
1174 * XXX This routine assumes that the packet has no options in place.
1176 static struct mbuf
*
1177 ip_insertoptions(m
, opt
, phlen
)
1178 register struct mbuf
*m
;
1182 register struct ipoption
*p
= mtod(opt
, struct ipoption
*);
1184 register struct ip
*ip
= mtod(m
, struct ip
*);
1187 optlen
= opt
->m_len
- sizeof(p
->ipopt_dst
);
1188 if (optlen
+ (u_short
)ip
->ip_len
> IP_MAXPACKET
)
1189 return (m
); /* XXX should fail */
1190 if (p
->ipopt_dst
.s_addr
)
1191 ip
->ip_dst
= p
->ipopt_dst
;
1192 if (m
->m_flags
& M_EXT
|| m
->m_data
- optlen
< m
->m_pktdat
) {
1193 MGETHDR(n
, M_DONTWAIT
, MT_HEADER
);
1196 n
->m_pkthdr
.rcvif
= (struct ifnet
*)0;
1197 n
->m_pkthdr
.len
= m
->m_pkthdr
.len
+ optlen
;
1198 m
->m_len
-= sizeof(struct ip
);
1199 m
->m_data
+= sizeof(struct ip
);
1202 m
->m_len
= optlen
+ sizeof(struct ip
);
1203 m
->m_data
+= max_linkhdr
;
1204 (void)memcpy(mtod(m
, void *), ip
, sizeof(struct ip
));
1206 m
->m_data
-= optlen
;
1208 m
->m_pkthdr
.len
+= optlen
;
1209 ovbcopy((caddr_t
)ip
, mtod(m
, caddr_t
), sizeof(struct ip
));
1211 ip
= mtod(m
, struct ip
*);
1212 bcopy(p
->ipopt_list
, ip
+ 1, optlen
);
1213 *phlen
= sizeof(struct ip
) + optlen
;
1214 ip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, *phlen
>> 2);
1215 ip
->ip_len
+= optlen
;
1220 * Copy options from ip to jp,
1221 * omitting those not copied during fragmentation.
1227 register u_char
*cp
, *dp
;
1228 int opt
, optlen
, cnt
;
1230 cp
= (u_char
*)(ip
+ 1);
1231 dp
= (u_char
*)(jp
+ 1);
1232 cnt
= (IP_VHL_HL(ip
->ip_vhl
) << 2) - sizeof (struct ip
);
1233 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
1235 if (opt
== IPOPT_EOL
)
1237 if (opt
== IPOPT_NOP
) {
1238 /* Preserve for IP mcast tunnel's LSRR alignment. */
1244 if (cnt
< IPOPT_OLEN
+ sizeof(*cp
))
1245 panic("malformed IPv4 option passed to ip_optcopy");
1247 optlen
= cp
[IPOPT_OLEN
];
1249 if (optlen
< IPOPT_OLEN
+ sizeof(*cp
) || optlen
> cnt
)
1250 panic("malformed IPv4 option passed to ip_optcopy");
1252 /* bogus lengths should have been caught by ip_dooptions */
1255 if (IPOPT_COPIED(opt
)) {
1256 bcopy(cp
, dp
, optlen
);
1260 for (optlen
= dp
- (u_char
*)(jp
+1); optlen
& 0x3; optlen
++)
1266 * IP socket option processing.
1269 ip_ctloutput(so
, sopt
)
1271 struct sockopt
*sopt
;
1273 struct inpcb
*inp
= sotoinpcb(so
);
1277 if (sopt
->sopt_level
!= IPPROTO_IP
) {
1281 switch (sopt
->sopt_dir
) {
1283 switch (sopt
->sopt_name
) {
1290 if (sopt
->sopt_valsize
> MLEN
) {
1294 MGET(m
, sopt
->sopt_p
? M_WAIT
: M_DONTWAIT
, MT_HEADER
);
1299 m
->m_len
= sopt
->sopt_valsize
;
1300 error
= sooptcopyin(sopt
, mtod(m
, char *), m
->m_len
,
1305 return (ip_pcbopts(sopt
->sopt_name
, &inp
->inp_options
,
1312 case IP_RECVRETOPTS
:
1313 case IP_RECVDSTADDR
:
1316 #if defined(NFAITH) && NFAITH > 0
1319 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1324 switch (sopt
->sopt_name
) {
1326 inp
->inp_ip_tos
= optval
;
1330 inp
->inp_ip_ttl
= optval
;
1332 #define OPTSET(bit) \
1334 inp->inp_flags |= bit; \
1336 inp->inp_flags &= ~bit;
1339 OPTSET(INP_RECVOPTS
);
1342 case IP_RECVRETOPTS
:
1343 OPTSET(INP_RECVRETOPTS
);
1346 case IP_RECVDSTADDR
:
1347 OPTSET(INP_RECVDSTADDR
);
1355 OPTSET(INP_RECVTTL
);
1358 #if defined(NFAITH) && NFAITH > 0
1367 case IP_MULTICAST_IF
:
1368 case IP_MULTICAST_VIF
:
1369 case IP_MULTICAST_TTL
:
1370 case IP_MULTICAST_LOOP
:
1371 case IP_ADD_MEMBERSHIP
:
1372 case IP_DROP_MEMBERSHIP
:
1373 error
= ip_setmoptions(sopt
, &inp
->inp_moptions
);
1377 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1383 case IP_PORTRANGE_DEFAULT
:
1384 inp
->inp_flags
&= ~(INP_LOWPORT
);
1385 inp
->inp_flags
&= ~(INP_HIGHPORT
);
1388 case IP_PORTRANGE_HIGH
:
1389 inp
->inp_flags
&= ~(INP_LOWPORT
);
1390 inp
->inp_flags
|= INP_HIGHPORT
;
1393 case IP_PORTRANGE_LOW
:
1394 inp
->inp_flags
&= ~(INP_HIGHPORT
);
1395 inp
->inp_flags
|= INP_LOWPORT
;
1405 case IP_IPSEC_POLICY
:
1413 if ((error
= soopt_getm(sopt
, &m
)) != 0) /* XXX */
1415 if ((error
= soopt_mcopyin(sopt
, m
)) != 0) /* XXX */
1417 priv
= (sopt
->sopt_p
!= NULL
&&
1418 suser(sopt
->sopt_p
->p_ucred
,
1419 &sopt
->sopt_p
->p_acflag
) != 0) ? 0 : 1;
1421 req
= mtod(m
, caddr_t
);
1424 optname
= sopt
->sopt_name
;
1425 error
= ipsec4_set_policy(inp
, optname
, req
, len
, priv
);
1432 error
= ENOPROTOOPT
;
1438 switch (sopt
->sopt_name
) {
1441 if (inp
->inp_options
)
1442 error
= sooptcopyout(sopt
,
1443 mtod(inp
->inp_options
,
1445 inp
->inp_options
->m_len
);
1447 sopt
->sopt_valsize
= 0;
1453 case IP_RECVRETOPTS
:
1454 case IP_RECVDSTADDR
:
1458 #if defined(NFAITH) && NFAITH > 0
1461 switch (sopt
->sopt_name
) {
1464 optval
= inp
->inp_ip_tos
;
1468 optval
= inp
->inp_ip_ttl
;
1471 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1474 optval
= OPTBIT(INP_RECVOPTS
);
1477 case IP_RECVRETOPTS
:
1478 optval
= OPTBIT(INP_RECVRETOPTS
);
1481 case IP_RECVDSTADDR
:
1482 optval
= OPTBIT(INP_RECVDSTADDR
);
1486 optval
= OPTBIT(INP_RECVIF
);
1490 optval
= OPTBIT(INP_RECVTTL
);
1494 if (inp
->inp_flags
& INP_HIGHPORT
)
1495 optval
= IP_PORTRANGE_HIGH
;
1496 else if (inp
->inp_flags
& INP_LOWPORT
)
1497 optval
= IP_PORTRANGE_LOW
;
1502 #if defined(NFAITH) && NFAITH > 0
1504 optval
= OPTBIT(INP_FAITH
);
1508 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
1511 case IP_MULTICAST_IF
:
1512 case IP_MULTICAST_VIF
:
1513 case IP_MULTICAST_TTL
:
1514 case IP_MULTICAST_LOOP
:
1515 case IP_ADD_MEMBERSHIP
:
1516 case IP_DROP_MEMBERSHIP
:
1517 error
= ip_getmoptions(sopt
, inp
->inp_moptions
);
1521 case IP_IPSEC_POLICY
:
1523 struct mbuf
*m
= NULL
;
1528 req
= mtod(m
, caddr_t
);
1531 error
= ipsec4_get_policy(sotoinpcb(so
), req
, len
, &m
);
1533 error
= soopt_mcopyout(sopt
, m
); /* XXX */
1541 error
= ENOPROTOOPT
;
1550 * Set up IP options in pcb for insertion in output packets.
1551 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1552 * with destination address if source routed.
1555 ip_pcbopts(optname
, pcbopt
, m
)
1557 struct mbuf
**pcbopt
;
1558 register struct mbuf
*m
;
1560 register int cnt
, optlen
;
1561 register u_char
*cp
;
1564 /* turn off any old options */
1566 (void)m_free(*pcbopt
);
1568 if (m
== (struct mbuf
*)0 || m
->m_len
== 0) {
1570 * Only turning off any previous options.
1578 if (m
->m_len
% sizeof(int32_t))
1582 * IP first-hop destination address will be stored before
1583 * actual options; move other options back
1584 * and clear it when none present.
1586 if (m
->m_data
+ m
->m_len
+ sizeof(struct in_addr
) >= &m
->m_dat
[MLEN
])
1589 m
->m_len
+= sizeof(struct in_addr
);
1590 cp
= mtod(m
, u_char
*) + sizeof(struct in_addr
);
1591 ovbcopy(mtod(m
, caddr_t
), (caddr_t
)cp
, (unsigned)cnt
);
1592 bzero(mtod(m
, caddr_t
), sizeof(struct in_addr
));
1594 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
1595 opt
= cp
[IPOPT_OPTVAL
];
1596 if (opt
== IPOPT_EOL
)
1598 if (opt
== IPOPT_NOP
)
1601 if (cnt
< IPOPT_OLEN
+ sizeof(*cp
))
1603 optlen
= cp
[IPOPT_OLEN
];
1604 if (optlen
< IPOPT_OLEN
+ sizeof(*cp
) || optlen
> cnt
)
1615 * user process specifies route as:
1617 * D must be our final destination (but we can't
1618 * check that since we may not have connected yet).
1619 * A is first hop destination, which doesn't appear in
1620 * actual IP option, but is stored before the options.
1622 if (optlen
< IPOPT_MINOFF
- 1 + sizeof(struct in_addr
))
1624 m
->m_len
-= sizeof(struct in_addr
);
1625 cnt
-= sizeof(struct in_addr
);
1626 optlen
-= sizeof(struct in_addr
);
1627 cp
[IPOPT_OLEN
] = optlen
;
1629 * Move first hop before start of options.
1631 bcopy((caddr_t
)&cp
[IPOPT_OFFSET
+1], mtod(m
, caddr_t
),
1632 sizeof(struct in_addr
));
1634 * Then copy rest of options back
1635 * to close up the deleted entry.
1637 ovbcopy((caddr_t
)(&cp
[IPOPT_OFFSET
+1] +
1638 sizeof(struct in_addr
)),
1639 (caddr_t
)&cp
[IPOPT_OFFSET
+1],
1640 (unsigned)cnt
+ sizeof(struct in_addr
));
1644 if (m
->m_len
> MAX_IPOPTLEN
+ sizeof(struct in_addr
))
1656 * The whole multicast option thing needs to be re-thought.
1657 * Several of these options are equally applicable to non-multicast
1658 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1659 * standard option (IP_TTL).
1663 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1665 static struct ifnet
*
1666 ip_multicast_if(a
, ifindexp
)
1675 if (ntohl(a
->s_addr
) >> 24 == 0) {
1676 ifindex
= ntohl(a
->s_addr
) & 0xffffff;
1677 if (ifindex
< 0 || if_index
< ifindex
)
1679 ifp
= ifindex2ifnet
[ifindex
];
1681 *ifindexp
= ifindex
;
1683 INADDR_TO_IFP(*a
, ifp
);
1689 * Set the IP multicast options in response to user setsockopt().
1692 ip_setmoptions(sopt
, imop
)
1693 struct sockopt
*sopt
;
1694 struct ip_moptions
**imop
;
1698 struct in_addr addr
;
1699 struct ip_mreq mreq
;
1700 struct ifnet
*ifp
= NULL
;
1701 struct ip_moptions
*imo
= *imop
;
1707 * No multicast option buffer attached to the pcb;
1708 * allocate one and initialize to default values.
1710 error
= ip_createmoptions(imop
);
1716 switch (sopt
->sopt_name
) {
1717 /* store an index number for the vif you wanna use in the send */
1718 case IP_MULTICAST_VIF
:
1719 if (legal_vif_num
== 0) {
1723 error
= sooptcopyin(sopt
, &i
, sizeof i
, sizeof i
);
1726 if (!legal_vif_num(i
) && (i
!= -1)) {
1730 imo
->imo_multicast_vif
= i
;
1733 case IP_MULTICAST_IF
:
1735 * Select the interface for outgoing multicast packets.
1737 error
= sooptcopyin(sopt
, &addr
, sizeof addr
, sizeof addr
);
1741 * INADDR_ANY is used to remove a previous selection.
1742 * When no interface is selected, a default one is
1743 * chosen every time a multicast packet is sent.
1745 if (addr
.s_addr
== INADDR_ANY
) {
1746 imo
->imo_multicast_ifp
= NULL
;
1750 * The selected interface is identified by its local
1751 * IP address. Find the interface and confirm that
1752 * it supports multicasting.
1755 ifp
= ip_multicast_if(&addr
, &ifindex
);
1756 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0) {
1758 error
= EADDRNOTAVAIL
;
1761 imo
->imo_multicast_ifp
= ifp
;
1763 imo
->imo_multicast_addr
= addr
;
1765 imo
->imo_multicast_addr
.s_addr
= INADDR_ANY
;
1769 case IP_MULTICAST_TTL
:
1771 * Set the IP time-to-live for outgoing multicast packets.
1772 * The original multicast API required a char argument,
1773 * which is inconsistent with the rest of the socket API.
1774 * We allow either a char or an int.
1776 if (sopt
->sopt_valsize
== 1) {
1778 error
= sooptcopyin(sopt
, &ttl
, 1, 1);
1781 imo
->imo_multicast_ttl
= ttl
;
1784 error
= sooptcopyin(sopt
, &ttl
, sizeof ttl
,
1791 imo
->imo_multicast_ttl
= ttl
;
1795 case IP_MULTICAST_LOOP
:
1797 * Set the loopback flag for outgoing multicast packets.
1798 * Must be zero or one. The original multicast API required a
1799 * char argument, which is inconsistent with the rest
1800 * of the socket API. We allow either a char or an int.
1802 if (sopt
->sopt_valsize
== 1) {
1804 error
= sooptcopyin(sopt
, &loop
, 1, 1);
1807 imo
->imo_multicast_loop
= !!loop
;
1810 error
= sooptcopyin(sopt
, &loop
, sizeof loop
,
1814 imo
->imo_multicast_loop
= !!loop
;
1818 case IP_ADD_MEMBERSHIP
:
1820 * Add a multicast group membership.
1821 * Group must be a valid IP multicast address.
1823 error
= sooptcopyin(sopt
, &mreq
, sizeof mreq
, sizeof mreq
);
1827 error
= ip_addmembership(imo
, &mreq
);
1830 case IP_DROP_MEMBERSHIP
:
1832 * Drop a multicast group membership.
1833 * Group must be a valid IP multicast address.
1835 error
= sooptcopyin(sopt
, &mreq
, sizeof mreq
, sizeof mreq
);
1839 error
= ip_dropmembership(imo
, &mreq
);
1848 * If all options have default values, no need to keep the mbuf.
1850 if (imo
->imo_multicast_ifp
== NULL
&&
1851 imo
->imo_multicast_vif
== -1 &&
1852 imo
->imo_multicast_ttl
== IP_DEFAULT_MULTICAST_TTL
&&
1853 imo
->imo_multicast_loop
== IP_DEFAULT_MULTICAST_LOOP
&&
1854 imo
->imo_num_memberships
== 0) {
1855 FREE(*imop
, M_IPMOPTS
);
1863 * Set the IP multicast options in response to user setsockopt().
1865 __private_extern__
int
1867 struct ip_moptions
**imop
)
1869 struct ip_moptions
*imo
;
1870 imo
= (struct ip_moptions
*) _MALLOC(sizeof(*imo
), M_IPMOPTS
,
1876 imo
->imo_multicast_ifp
= NULL
;
1877 imo
->imo_multicast_addr
.s_addr
= INADDR_ANY
;
1878 imo
->imo_multicast_vif
= -1;
1879 imo
->imo_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
1880 imo
->imo_multicast_loop
= IP_DEFAULT_MULTICAST_LOOP
;
1881 imo
->imo_num_memberships
= 0;
1887 * Add membership to an IPv4 multicast.
1889 __private_extern__
int
1891 struct ip_moptions
*imo
,
1892 struct ip_mreq
*mreq
)
1895 struct sockaddr_in
*dst
;
1896 struct ifnet
*ifp
= NULL
;
1901 if (!IN_MULTICAST(ntohl(mreq
->imr_multiaddr
.s_addr
))) {
1907 * If no interface address was provided, use the interface of
1908 * the route to the given multicast address.
1910 if (mreq
->imr_interface
.s_addr
== INADDR_ANY
) {
1911 bzero((caddr_t
)&ro
, sizeof(ro
));
1912 dst
= (struct sockaddr_in
*)&ro
.ro_dst
;
1913 dst
->sin_len
= sizeof(*dst
);
1914 dst
->sin_family
= AF_INET
;
1915 dst
->sin_addr
= mreq
->imr_multiaddr
;
1917 if (ro
.ro_rt
!= NULL
) {
1918 ifp
= ro
.ro_rt
->rt_ifp
;
1922 /* If there's no default route, try using loopback */
1923 mreq
->imr_interface
.s_addr
= INADDR_LOOPBACK
;
1928 ifp
= ip_multicast_if(&mreq
->imr_interface
, NULL
);
1932 * See if we found an interface, and confirm that it
1933 * supports multicast.
1935 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0) {
1936 error
= EADDRNOTAVAIL
;
1941 * See if the membership already exists or if all the
1942 * membership slots are full.
1944 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
) {
1945 if (imo
->imo_membership
[i
]->inm_ifp
== ifp
&&
1946 imo
->imo_membership
[i
]->inm_addr
.s_addr
1947 == mreq
->imr_multiaddr
.s_addr
)
1950 if (i
< imo
->imo_num_memberships
) {
1955 if (i
== IP_MAX_MEMBERSHIPS
) {
1956 error
= ETOOMANYREFS
;
1961 * Everything looks good; add a new record to the multicast
1962 * address list for the given interface.
1964 if ((imo
->imo_membership
[i
] =
1965 in_addmulti(&mreq
->imr_multiaddr
, ifp
)) == NULL
) {
1970 ++imo
->imo_num_memberships
;
1977 * Drop membership of an IPv4 multicast.
1979 __private_extern__
int
1981 struct ip_moptions
*imo
,
1982 struct ip_mreq
*mreq
)
1986 struct ifnet
* ifp
= NULL
;
1989 if (!IN_MULTICAST(ntohl(mreq
->imr_multiaddr
.s_addr
))) {
1996 * If an interface address was specified, get a pointer
1997 * to its ifnet structure.
1999 if (mreq
->imr_interface
.s_addr
== INADDR_ANY
)
2002 ifp
= ip_multicast_if(&mreq
->imr_interface
, NULL
);
2004 error
= EADDRNOTAVAIL
;
2010 * Find the membership in the membership array.
2012 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
) {
2014 imo
->imo_membership
[i
]->inm_ifp
== ifp
) &&
2015 imo
->imo_membership
[i
]->inm_addr
.s_addr
==
2016 mreq
->imr_multiaddr
.s_addr
)
2019 if (i
== imo
->imo_num_memberships
) {
2020 error
= EADDRNOTAVAIL
;
2025 * Give up the multicast address record to which the
2026 * membership points.
2028 in_delmulti(imo
->imo_membership
[i
]);
2030 * Remove the gap in the membership array.
2032 for (++i
; i
< imo
->imo_num_memberships
; ++i
)
2033 imo
->imo_membership
[i
-1] = imo
->imo_membership
[i
];
2034 --imo
->imo_num_memberships
;
2041 * Return the IP multicast options in response to user getsockopt().
2044 ip_getmoptions(sopt
, imo
)
2045 struct sockopt
*sopt
;
2046 register struct ip_moptions
*imo
;
2048 struct in_addr addr
;
2049 struct in_ifaddr
*ia
;
2054 switch (sopt
->sopt_name
) {
2055 case IP_MULTICAST_VIF
:
2057 optval
= imo
->imo_multicast_vif
;
2060 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
2063 case IP_MULTICAST_IF
:
2064 if (imo
== NULL
|| imo
->imo_multicast_ifp
== NULL
)
2065 addr
.s_addr
= INADDR_ANY
;
2066 else if (imo
->imo_multicast_addr
.s_addr
) {
2067 /* return the value user has set */
2068 addr
= imo
->imo_multicast_addr
;
2070 IFP_TO_IA(imo
->imo_multicast_ifp
, ia
);
2071 addr
.s_addr
= (ia
== NULL
) ? INADDR_ANY
2072 : IA_SIN(ia
)->sin_addr
.s_addr
;
2074 error
= sooptcopyout(sopt
, &addr
, sizeof addr
);
2077 case IP_MULTICAST_TTL
:
2079 optval
= coptval
= IP_DEFAULT_MULTICAST_TTL
;
2081 optval
= coptval
= imo
->imo_multicast_ttl
;
2082 if (sopt
->sopt_valsize
== 1)
2083 error
= sooptcopyout(sopt
, &coptval
, 1);
2085 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
2088 case IP_MULTICAST_LOOP
:
2090 optval
= coptval
= IP_DEFAULT_MULTICAST_LOOP
;
2092 optval
= coptval
= imo
->imo_multicast_loop
;
2093 if (sopt
->sopt_valsize
== 1)
2094 error
= sooptcopyout(sopt
, &coptval
, 1);
2096 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
2100 error
= ENOPROTOOPT
;
2107 * Discard the IP multicast options.
2110 ip_freemoptions(imo
)
2111 register struct ip_moptions
*imo
;
2116 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
)
2117 if (imo
->imo_membership
[i
] != NULL
)
2118 in_delmulti(imo
->imo_membership
[i
]);
2119 FREE(imo
, M_IPMOPTS
);
2124 * Routine called from ip_output() to loop back a copy of an IP multicast
2125 * packet to the input queue of a specified interface. Note that this
2126 * calls the output routine of the loopback "driver", but with an interface
2127 * pointer that might NOT be a loopback interface -- evil, but easier than
2128 * replicating that code here.
2131 ip_mloopback(ifp
, m
, dst
, hlen
)
2133 register struct mbuf
*m
;
2134 register struct sockaddr_in
*dst
;
2137 register struct ip
*ip
;
2140 copym
= m_copy(m
, 0, M_COPYALL
);
2141 if (copym
!= NULL
&& (copym
->m_flags
& M_EXT
|| copym
->m_len
< hlen
))
2142 copym
= m_pullup(copym
, hlen
);
2143 if (copym
!= NULL
) {
2145 * We don't bother to fragment if the IP length is greater
2146 * than the interface's MTU. Can this possibly matter?
2148 ip
= mtod(copym
, struct ip
*);
2152 ip
->ip_sum
= in_cksum(copym
, hlen
);
2155 * It's not clear whether there are any lingering
2156 * reentrancy problems in other areas which might
2157 * be exposed by using ip_input directly (in
2158 * particular, everything which modifies the packet
2159 * in-place). Yet another option is using the
2160 * protosw directly to deliver the looped back
2161 * packet. For the moment, we'll err on the side
2162 * of safety by using if_simloop().
2165 if (dst
->sin_family
!= AF_INET
) {
2166 printf("ip_mloopback: bad address family %d\n",
2168 dst
->sin_family
= AF_INET
;
2174 * Mark checksum as valid or calculate checksum for loopback.
2176 * This is done this way because we have to embed the ifp of
2177 * the interface we will send the original copy of the packet
2178 * out on in the mbuf. ip_input will check if_hwassist of the
2179 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
2180 * The UDP checksum has not been calculated yet.
2182 if (copym
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
2183 if (IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
)) {
2184 copym
->m_pkthdr
.csum_flags
|=
2185 CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
|
2186 CSUM_IP_CHECKED
| CSUM_IP_VALID
;
2187 copym
->m_pkthdr
.csum_data
= 0xffff;
2190 in_delayed_cksum(copym
);
2198 * We need to send all loopback traffic down to dlil in case
2199 * a filter has tapped-in.
2203 dlil_find_dltag(APPLE_IF_FAM_LOOPBACK
, 0, PF_INET
, &lo_dl_tag
);
2206 * Stuff the 'real' ifp into the pkthdr, to be used in matching
2207 * in ip_input(); we need the loopback ifp/dl_tag passed as args
2208 * to make the loopback driver compliant with the data link
2212 copym
->m_pkthdr
.rcvif
= ifp
;
2213 dlil_output(lo_dl_tag
, copym
, 0, (struct sockaddr
*) dst
, 0);
2215 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
2219 /* if_simloop(ifp, copym, (struct sockaddr *)dst, 0);*/