2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <kern/locks.h>
81 #include <sys/sysctl.h>
83 #include <machine/endian.h>
86 #include <net/if_dl.h>
87 #include <net/route.h>
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/ip.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94 #include <netinet/ip_var.h>
96 #include <netinet/kpi_ipfilter_var.h>
99 #include <security/mac_framework.h>
104 #include <net/dlil.h>
105 #include <sys/kdebug.h>
106 #include <libkern/OSAtomic.h>
108 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
109 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
110 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
111 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
113 #define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
116 #include <netinet6/ipsec.h>
117 #include <netkey/key.h>
119 #include <netkey/key_debug.h>
121 #define KEYDEBUG(lev,arg)
125 #include <netinet/ip_fw.h>
126 #include <netinet/ip_divert.h>
129 #include <netinet/ip_dummynet.h>
133 #include <net/pfvar.h>
136 #if IPFIREWALL_FORWARD_DEBUG
137 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
138 (ntohl(a.s_addr)>>16)&0xFF,\
139 (ntohl(a.s_addr)>>8)&0xFF,\
140 (ntohl(a.s_addr))&0xFF);
146 static struct mbuf
*ip_insertoptions(struct mbuf
*, struct mbuf
*, int *);
147 static struct ifnet
*ip_multicast_if(struct in_addr
*, int *);
148 static void ip_mloopback(struct ifnet
*, struct mbuf
*,
149 struct sockaddr_in
*, int);
150 static int ip_getmoptions(struct sockopt
*, struct ip_moptions
*);
151 static int ip_pcbopts(int, struct mbuf
**, struct mbuf
*);
152 static int ip_setmoptions(struct sockopt
*, struct ip_moptions
**);
154 static void ip_out_cksum_stats(int, u_int32_t
);
155 static struct ifaddr
*in_selectsrcif(struct ip
*, struct route
*, unsigned int);
156 static void ip_bindif(struct inpcb
*, unsigned int);
158 int ip_createmoptions(struct ip_moptions
**imop
);
159 int ip_addmembership(struct ip_moptions
*imo
, struct ip_mreq
*mreq
);
160 int ip_dropmembership(struct ip_moptions
*imo
, struct ip_mreq
*mreq
);
161 int ip_optcopy(struct ip
*, struct ip
*);
162 void in_delayed_cksum_offset(struct mbuf
*, int );
163 void in_cksum_offset(struct mbuf
* , size_t );
165 extern int (*fr_checkp
)(struct ip
*, int, struct ifnet
*, int, struct mbuf
**);
167 extern struct protosw inetsw
[];
169 extern struct ip_linklocal_stat ip_linklocal_stat
;
170 extern lck_mtx_t
*ip_mutex
;
172 /* temporary: for testing */
174 extern int ipsec_bypass
;
177 static int ip_maxchainsent
= 0;
178 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, maxchainsent
, CTLFLAG_RW
,
179 &ip_maxchainsent
, 0, "use dlil_output_list");
181 static int forge_ce
= 0;
182 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, forge_ce
, CTLFLAG_RW
,
183 &forge_ce
, 0, "Forge ECN CE");
186 static int ip_select_srcif_debug
= 0;
187 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, select_srcif_debug
, CTLFLAG_RW
,
188 &ip_select_srcif_debug
, 0, "log source interface selection debug info");
191 * IP output. The packet in mbuf chain m contains a skeletal IP
192 * header (with len, off, ttl, proto, tos, src, dst).
193 * The mbuf chain containing the packet will be freed.
194 * The mbuf opt, if present, will not be freed.
202 struct ip_moptions
*imo
,
203 struct ip_out_args
*ipoa
)
206 error
= ip_output_list(m0
, 0, opt
, ro
, flags
, imo
, ipoa
);
219 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
220 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
221 * key_spdacquire:??? [IPSEC]
222 * ipsec4_output:??? [IPSEC]
223 * <fr_checkp>:??? [firewall]
224 * ip_dn_io_ptr:??? [dummynet]
225 * dlil_output:??? [DLIL]
226 * dlil_output_list:??? [DLIL]
228 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
229 * only used as the error return from this function where one of
230 * these functions fails to return a policy.
239 struct ip_moptions
*imo
,
240 struct ip_out_args
*ipoa
244 struct ifnet
*ifp
= NULL
;
245 struct mbuf
*m
= m0
, **mppn
= NULL
;
246 int hlen
= sizeof (struct ip
);
247 int len
= 0, off
, error
= 0;
248 struct sockaddr_in
*dst
= NULL
;
249 struct in_ifaddr
*ia
= NULL
, *src_ia
= NULL
;
250 int isbroadcast
, sw_csum
;
251 struct in_addr pkt_dst
;
253 struct route iproute
;
254 struct socket
*so
= NULL
;
255 struct secpolicy
*sp
= NULL
;
257 #if IPFIREWALL_FORWARD
258 int fwd_rewrite_src
= 0;
261 struct ip_fw_args args
;
264 ipfilter_t inject_filter_ref
= 0;
266 struct route saved_route
;
267 struct ip_out_args saved_ipoa
;
268 struct mbuf
* packetlist
;
269 int pktcnt
= 0, tso
= 0;
270 unsigned int ifscope
;
271 boolean_t select_srcif
;
273 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT
| DBG_FUNC_START
, 0,0,0,0,0);
277 args
.next_hop
= NULL
;
280 args
.divert_rule
= 0; /* divert cookie */
283 if (SLIST_EMPTY(&m0
->m_pkthdr
.tags
))
286 /* Grab info from mtags prepended to the chain */
288 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
289 KERNEL_TAG_TYPE_DUMMYNET
, NULL
)) != NULL
) {
290 struct dn_pkt_tag
*dn_tag
;
292 dn_tag
= (struct dn_pkt_tag
*)(tag
+1);
293 args
.rule
= dn_tag
->rule
;
295 saved_route
= dn_tag
->ro
;
299 dst
= dn_tag
->dn_dst
;
301 flags
= dn_tag
->flags
;
302 saved_ipoa
= dn_tag
->ipoa
;
305 m_tag_delete(m0
, tag
);
307 #endif /* DUMMYNET */
310 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
311 KERNEL_TAG_TYPE_DIVERT
, NULL
)) != NULL
) {
312 struct divert_tag
*div_tag
;
314 div_tag
= (struct divert_tag
*)(tag
+1);
315 args
.divert_rule
= div_tag
->cookie
;
317 m_tag_delete(m0
, tag
);
319 #endif /* IPDIVERT */
321 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
322 KERNEL_TAG_TYPE_IPFORWARD
, NULL
)) != NULL
) {
323 struct ip_fwd_tag
*ipfwd_tag
;
325 ipfwd_tag
= (struct ip_fwd_tag
*)(tag
+1);
326 args
.next_hop
= ipfwd_tag
->next_hop
;
328 m_tag_delete(m0
, tag
);
331 #endif /* IPFIREWALL */
336 if ( !m
|| (m
->m_flags
& M_PKTHDR
) != 0)
337 panic("ip_output no HDR");
339 panic("ip_output no route, proto = %d",
340 mtod(m
, struct ip
*)->ip_p
);
344 * At present the IP_OUTARGS flag implies a request for IP to
345 * perform source interface selection. In the forwarding case,
346 * only the ifscope value is used, as source interface selection
347 * doesn't take place.
349 if (ip_doscopedroute
&& (flags
& IP_OUTARGS
)) {
350 select_srcif
= !(flags
& IP_FORWARDING
);
351 ifscope
= ipoa
->ipoa_ifscope
;
353 select_srcif
= FALSE
;
354 ifscope
= IFSCOPE_NONE
;
358 if (args
.rule
!= NULL
) { /* dummynet already saw us */
359 ip
= mtod(m
, struct ip
*);
360 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2 ;
361 if (ro
->ro_rt
!= NULL
) {
362 RT_LOCK_SPIN(ro
->ro_rt
);
363 ia
= (struct in_ifaddr
*)ro
->ro_rt
->rt_ifa
;
366 RT_UNLOCK(ro
->ro_rt
);
369 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
370 so
= ipsec_getsocket(m
);
371 (void)ipsec_setsocket(m
, NULL
);
376 #endif /* IPFIREWALL */
379 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
380 so
= ipsec_getsocket(m
);
381 (void)ipsec_setsocket(m
, NULL
);
386 * No need to proccess packet twice if we've
389 if (!SLIST_EMPTY(&m
->m_pkthdr
.tags
))
390 inject_filter_ref
= ipf_get_inject_filter(m
);
392 inject_filter_ref
= 0;
395 m
= ip_insertoptions(m
, opt
, &len
);
398 ip
= mtod(m
, struct ip
*);
400 pkt_dst
= args
.next_hop
? args
.next_hop
->sin_addr
: ip
->ip_dst
;
402 pkt_dst
= ip
->ip_dst
;
408 if ((flags
& (IP_FORWARDING
|IP_RAWOUTPUT
)) == 0) {
409 ip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, hlen
>> 2);
412 ip
->ip_id
= ip_randomid();
414 ip
->ip_id
= htons(ip_id
++);
416 OSAddAtomic(1, &ipstat
.ips_localout
);
418 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
422 /* For debugging, we let the stack forge congestion */
424 ((ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_ECT1
||
425 (ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_ECT0
)) {
426 ip
->ip_tos
= (ip
->ip_tos
& ~IPTOS_ECN_MASK
) | IPTOS_ECN_CE
;
431 KERNEL_DEBUG(DBG_LAYER_BEG
, ip
->ip_dst
.s_addr
,
432 ip
->ip_src
.s_addr
, ip
->ip_p
, ip
->ip_off
, ip
->ip_len
);
434 dst
= (struct sockaddr_in
*)&ro
->ro_dst
;
437 * If there is a cached route,
438 * check that it is to the same destination
439 * and is still up. If not, free it and try again.
440 * The address family should also be checked in case of sharing the
444 if (ro
->ro_rt
!= NULL
) {
445 if (ro
->ro_rt
->generation_id
!= route_generation
&&
446 ((flags
& (IP_ROUTETOIF
| IP_FORWARDING
)) == 0) &&
447 (ip
->ip_src
.s_addr
!= INADDR_ANY
)) {
448 src_ia
= ifa_foraddr(ip
->ip_src
.s_addr
);
449 if (src_ia
== NULL
) {
450 error
= EADDRNOTAVAIL
;
453 ifafree(&src_ia
->ia_ifa
);
456 * Test rt_flags without holding rt_lock for performance
457 * reasons; if the route is down it will hopefully be
458 * caught by the layer below (since it uses this route
459 * as a hint) or during the next transmit.
461 if ((ro
->ro_rt
->rt_flags
& RTF_UP
) == 0 ||
462 dst
->sin_family
!= AF_INET
||
463 dst
->sin_addr
.s_addr
!= pkt_dst
.s_addr
) {
468 * If we're doing source interface selection, we may not
469 * want to use this route; only synch up the generation
472 if (!select_srcif
&& ro
->ro_rt
!= NULL
&&
473 ro
->ro_rt
->generation_id
!= route_generation
)
474 ro
->ro_rt
->generation_id
= route_generation
;
476 if (ro
->ro_rt
== NULL
) {
477 bzero(dst
, sizeof(*dst
));
478 dst
->sin_family
= AF_INET
;
479 dst
->sin_len
= sizeof(*dst
);
480 dst
->sin_addr
= pkt_dst
;
483 * If routing to interface only,
484 * short circuit routing lookup.
486 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
487 #define sintosa(sin) ((struct sockaddr *)(sin))
488 if (flags
& IP_ROUTETOIF
) {
490 ifafree(&ia
->ia_ifa
);
491 if ((ia
= ifatoia(ifa_ifwithdstaddr(sintosa(dst
)))) == 0) {
492 if ((ia
= ifatoia(ifa_ifwithnet(sintosa(dst
)))) == 0) {
493 OSAddAtomic(1, &ipstat
.ips_noroute
);
500 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
501 } else if (IN_MULTICAST(ntohl(pkt_dst
.s_addr
)) &&
502 imo
!= NULL
&& imo
->imo_multicast_ifp
!= NULL
) {
504 * Bypass the normal routing lookup for multicast
505 * packets if the interface is specified.
507 ifp
= imo
->imo_multicast_ifp
;
510 ifafree(&ia
->ia_ifa
);
512 /* Macro takes reference on ia */
515 boolean_t cloneok
= FALSE
;
517 * Perform source interface selection; the source IP address
518 * must belong to one of the addresses of the interface used
519 * by the route. For performance reasons, do this only if
520 * there is no route, or if the routing table has changed,
521 * or if we haven't done source interface selection on this
522 * route (for this PCB instance) before.
524 if (select_srcif
&& ip
->ip_src
.s_addr
!= INADDR_ANY
&&
525 (ro
->ro_rt
== NULL
|| !(ro
->ro_rt
->rt_flags
& RTF_UP
) ||
526 ro
->ro_rt
->generation_id
!= route_generation
||
527 !(ro
->ro_flags
& ROF_SRCIF_SELECTED
))) {
530 /* Find the source interface */
531 ifa
= in_selectsrcif(ip
, ro
, ifscope
);
534 * If the source address is spoofed (in the case
535 * of IP_RAWOUTPUT), or if this is destined for
536 * local/loopback, just let it go out using the
537 * interface of the route. Otherwise, there's no
538 * interface having such an address, so bail out.
540 if (ifa
== NULL
&& !(flags
& IP_RAWOUTPUT
) &&
541 ifscope
!= lo_ifp
->if_index
) {
542 error
= EADDRNOTAVAIL
;
547 * If the caller didn't explicitly specify the scope,
548 * pick it up from the source interface. If the cached
549 * route was wrong and was blown away as part of source
550 * interface selection, don't mask out RTF_PRCLONING
551 * since that route may have been allocated by the ULP,
552 * unless the IP header was created by the caller or
553 * the destination is IPv4 LLA. The check for the
554 * latter is needed because IPv4 LLAs are never scoped
555 * in the current implementation, and we don't want to
556 * replace the resolved IPv4 LLA route with one whose
557 * gateway points to that of the default gateway on
558 * the primary interface of the system.
561 if (ifscope
== IFSCOPE_NONE
)
562 ifscope
= ifa
->ifa_ifp
->if_index
;
564 cloneok
= (!(flags
& IP_RAWOUTPUT
) &&
565 !(IN_LINKLOCAL(ntohl(ip
->ip_dst
.s_addr
))));
570 * If this is the case, we probably don't want to allocate
571 * a protocol-cloned route since we didn't get one from the
572 * ULP. This lets TCP do its thing, while not burdening
573 * forwarding or ICMP with the overhead of cloning a route.
574 * Of course, we still want to do any cloning requested by
575 * the link layer, as this is probably required in all cases
576 * for correct operation (as it is for ARP).
578 if (ro
->ro_rt
== NULL
) {
579 unsigned long ign
= RTF_PRCLONING
;
581 * We make an exception here: if the destination
582 * address is INADDR_BROADCAST, allocate a protocol-
583 * cloned host route so that we end up with a route
584 * marked with the RTF_BROADCAST flag. Otherwise,
585 * we would end up referring to the default route,
586 * instead of creating a cloned host route entry.
587 * That would introduce inconsistencies between ULPs
588 * that allocate a route and those that don't. The
589 * RTF_BROADCAST route is important since we'd want
590 * to send out undirected IP broadcast packets using
591 * link-level broadcast address. Another exception
592 * is for ULP-created routes that got blown away by
593 * source interface selection (see above).
595 * These exceptions will no longer be necessary when
596 * the RTF_PRCLONING scheme is no longer present.
598 if (cloneok
|| dst
->sin_addr
.s_addr
== INADDR_BROADCAST
)
599 ign
&= ~RTF_PRCLONING
;
602 * Loosen the route lookup criteria if the ifscope
603 * corresponds to the loopback interface; this is
604 * needed to support Application Layer Gateways
605 * listening on loopback, in conjunction with packet
606 * filter redirection rules. The final source IP
607 * address will be rewritten by the packet filter
608 * prior to the RFC1122 loopback check below.
610 if (ifscope
== lo_ifp
->if_index
)
611 rtalloc_ign(ro
, ign
);
613 rtalloc_scoped_ign(ro
, ign
, ifscope
);
616 if (ro
->ro_rt
== NULL
) {
617 OSAddAtomic(1, &ipstat
.ips_noroute
);
618 error
= EHOSTUNREACH
;
623 ifafree(&ia
->ia_ifa
);
624 RT_LOCK_SPIN(ro
->ro_rt
);
625 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
628 ifp
= ro
->ro_rt
->rt_ifp
;
630 if (ro
->ro_rt
->rt_flags
& RTF_GATEWAY
)
631 dst
= (struct sockaddr_in
*)ro
->ro_rt
->rt_gateway
;
632 if (ro
->ro_rt
->rt_flags
& RTF_HOST
) {
633 isbroadcast
= (ro
->ro_rt
->rt_flags
& RTF_BROADCAST
);
635 /* Become a regular mutex */
636 RT_CONVERT_LOCK(ro
->ro_rt
);
637 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
639 RT_UNLOCK(ro
->ro_rt
);
642 if (IN_MULTICAST(ntohl(pkt_dst
.s_addr
))) {
643 struct in_multi
*inm
;
645 m
->m_flags
|= M_MCAST
;
647 * IP destination address is multicast. Make sure "dst"
648 * still points to the address in "ro". (It may have been
649 * changed to point to a gateway address, above.)
651 dst
= (struct sockaddr_in
*)&ro
->ro_dst
;
653 * See if the caller provided any multicast options
656 if ((flags
& IP_RAWOUTPUT
) == 0) ip
->ip_ttl
= imo
->imo_multicast_ttl
;
657 if (imo
->imo_multicast_ifp
!= NULL
) {
658 ifp
= imo
->imo_multicast_ifp
;
661 if (imo
->imo_multicast_vif
!= -1 &&
662 ((flags
& IP_RAWOUTPUT
) == 0 || ip
->ip_src
.s_addr
== INADDR_ANY
))
664 ip_mcast_src(imo
->imo_multicast_vif
);
665 #endif /* MROUTING */
667 if ((flags
& IP_RAWOUTPUT
) == 0) ip
->ip_ttl
= IP_DEFAULT_MULTICAST_TTL
;
669 * Confirm that the outgoing interface supports multicast.
671 if ((imo
== NULL
) || (imo
->imo_multicast_vif
== -1)) {
672 if ((ifp
->if_flags
& IFF_MULTICAST
) == 0) {
673 OSAddAtomic(1, &ipstat
.ips_noroute
);
679 * If source address not specified yet, use address
680 * of outgoing interface.
682 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
683 struct in_ifaddr
*ia1
;
684 lck_rw_lock_shared(in_ifaddr_rwlock
);
685 TAILQ_FOREACH(ia1
, &in_ifaddrhead
, ia_link
)
686 if (ia1
->ia_ifp
== ifp
) {
687 ip
->ip_src
= IA_SIN(ia1
)->sin_addr
;
690 lck_rw_done(in_ifaddr_rwlock
);
691 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
697 ifnet_lock_shared(ifp
);
698 IN_LOOKUP_MULTI(pkt_dst
, ifp
, inm
);
699 ifnet_lock_done(ifp
);
701 (imo
== NULL
|| imo
->imo_multicast_loop
)) {
703 * If we belong to the destination multicast group
704 * on the outgoing interface, and the caller did not
705 * forbid loopback, loop back a copy.
707 if (!TAILQ_EMPTY(&ipv4_filters
)) {
708 struct ipfilter
*filter
;
709 int seen
= (inject_filter_ref
== 0);
710 struct ipf_pktopts
*ippo
= 0, ipf_pktopts
;
714 ipf_pktopts
.ippo_mcast_ifnet
= imo
->imo_multicast_ifp
;
715 ipf_pktopts
.ippo_mcast_ttl
= imo
->imo_multicast_ttl
;
716 ipf_pktopts
.ippo_mcast_loop
= imo
->imo_multicast_loop
;
721 /* 4135317 - always pass network byte order to filter */
723 #if BYTE_ORDER != BIG_ENDIAN
728 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
730 if ((struct ipfilter
*)inject_filter_ref
== filter
)
732 } else if (filter
->ipf_filter
.ipf_output
) {
734 result
= filter
->ipf_filter
.ipf_output(filter
->ipf_filter
.cookie
, (mbuf_t
*)&m
, ippo
);
735 if (result
== EJUSTRETURN
) {
746 /* set back to host byte order */
747 ip
= mtod(m
, struct ip
*);
749 #if BYTE_ORDER != BIG_ENDIAN
757 ip_mloopback(ifp
, m
, dst
, hlen
);
762 * If we are acting as a multicast router, perform
763 * multicast forwarding as if the packet had just
764 * arrived on the interface to which we are about
765 * to send. The multicast forwarding function
766 * recursively calls this function, using the
767 * IP_FORWARDING flag to prevent infinite recursion.
769 * Multicasts that are looped back by ip_mloopback(),
770 * above, will be forwarded by the ip_input() routine,
773 if (ip_mrouter
&& (flags
& IP_FORWARDING
) == 0) {
775 * Check if rsvp daemon is running. If not, don't
776 * set ip_moptions. This ensures that the packet
777 * is multicast and not just sent down one link
778 * as prescribed by rsvpd.
782 if (ip_mforward(ip
, ifp
, m
, imo
) != 0) {
788 #endif /* MROUTING */
791 * Multicasts with a time-to-live of zero may be looped-
792 * back, above, but must not be transmitted on a network.
793 * Also, multicasts addressed to the loopback interface
794 * are not sent -- the above call to ip_mloopback() will
795 * loop back a copy if this host actually belongs to the
796 * destination group on the loopback interface.
798 if (ip
->ip_ttl
== 0 || ifp
->if_flags
& IFF_LOOPBACK
) {
807 * If source address not specified yet, use address
808 * of outgoing interface.
810 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
811 ip
->ip_src
= IA_SIN(ia
)->sin_addr
;
812 #if IPFIREWALL_FORWARD
813 /* Keep note that we did this - if the firewall changes
814 * the next-hop, our interface may change, changing the
815 * default source IP. It's a shame so much effort happens
819 #endif /* IPFIREWALL_FORWARD */
824 * Look for broadcast address and
825 * and verify user is allowed to send
829 if ((ifp
->if_flags
& IFF_BROADCAST
) == 0) {
830 error
= EADDRNOTAVAIL
;
833 if ((flags
& IP_ALLOWBROADCAST
) == 0) {
837 /* don't allow broadcast messages to be fragmented */
838 if ((u_short
)ip
->ip_len
> ifp
->if_mtu
) {
842 m
->m_flags
|= M_BCAST
;
844 m
->m_flags
&= ~M_BCAST
;
849 /* Invoke outbound packet filter */
850 if (pf_af_hook(ifp
, mppn
, &m
, AF_INET
, FALSE
) != 0) {
851 if (packetlist
== m0
) {
857 /* Next packet in the chain */
859 } else if (packetlist
!= NULL
) {
860 /* No more packet; send down the chain */
863 /* Nothing left; we're done */
867 ip
= mtod(m
, struct ip
*);
868 pkt_dst
= ip
->ip_dst
;
869 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
872 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
874 if (IN_LINKLOCAL(ntohl(ip
->ip_src
.s_addr
)) || IN_LINKLOCAL(ntohl(ip
->ip_dst
.s_addr
))) {
875 ip_linklocal_stat
.iplls_out_total
++;
876 if (ip
->ip_ttl
!= MAXTTL
) {
877 ip_linklocal_stat
.iplls_out_badttl
++;
882 if (!didfilter
&& !TAILQ_EMPTY(&ipv4_filters
)) {
883 struct ipfilter
*filter
;
884 int seen
= (inject_filter_ref
== 0);
886 /* Check that a TSO frame isn't passed to a filter.
887 * This could happen if a filter is inserted while
888 * TCP is sending the TSO packet.
890 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) {
897 /* 4135317 - always pass network byte order to filter */
899 #if BYTE_ORDER != BIG_ENDIAN
904 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
906 if ((struct ipfilter
*)inject_filter_ref
== filter
)
908 } else if (filter
->ipf_filter
.ipf_output
) {
910 result
= filter
->ipf_filter
.ipf_output(filter
->ipf_filter
.cookie
, (mbuf_t
*)&m
, 0);
911 if (result
== EJUSTRETURN
) {
922 /* set back to host byte order */
923 ip
= mtod(m
, struct ip
*);
925 #if BYTE_ORDER != BIG_ENDIAN
934 /* temporary for testing only: bypass ipsec alltogether */
936 if (ipsec_bypass
!= 0 || (flags
& IP_NOIPSEC
) != 0)
939 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_START
, 0,0,0,0,0);
942 /* get SP for this packet */
944 sp
= ipsec4_getpolicybyaddr(m
, IPSEC_DIR_OUTBOUND
, flags
, &error
);
946 sp
= ipsec4_getpolicybysock(m
, IPSEC_DIR_OUTBOUND
, so
, &error
);
949 IPSEC_STAT_INCREMENT(ipsecstat
.out_inval
);
950 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 0,0,0,0,0);
957 switch (sp
->policy
) {
958 case IPSEC_POLICY_DISCARD
:
959 case IPSEC_POLICY_GENERATE
:
961 * This packet is just discarded.
963 IPSEC_STAT_INCREMENT(ipsecstat
.out_polvio
);
964 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 1,0,0,0,0);
967 case IPSEC_POLICY_BYPASS
:
968 case IPSEC_POLICY_NONE
:
969 /* no need to do IPsec. */
970 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 2,0,0,0,0);
973 case IPSEC_POLICY_IPSEC
:
974 if (sp
->req
== NULL
) {
975 /* acquire a policy */
976 error
= key_spdacquire(sp
);
977 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 3,0,0,0,0);
982 case IPSEC_POLICY_ENTRUST
:
984 printf("ip_output: Invalid policy found. %d\n", sp
->policy
);
987 struct ipsec_output_state state
;
988 bzero(&state
, sizeof(state
));
990 if (flags
& IP_ROUTETOIF
) {
992 bzero(&iproute
, sizeof(iproute
));
995 state
.dst
= (struct sockaddr
*)dst
;
1001 * delayed checksums are not currently compatible with IPsec
1003 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1004 in_delayed_cksum(m
);
1005 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1009 #if BYTE_ORDER != BIG_ENDIAN
1014 error
= ipsec4_output(&state
, sp
, flags
);
1018 if (flags
& IP_ROUTETOIF
) {
1020 * if we have tunnel mode SA, we may need to ignore
1023 if (state
.ro
!= &iproute
|| state
.ro
->ro_rt
!= NULL
) {
1024 flags
&= ~IP_ROUTETOIF
;
1030 dst
= (struct sockaddr_in
*)state
.dst
;
1032 /* mbuf is already reclaimed in ipsec4_output. */
1042 printf("ip4_output (ipsec): error code %d\n", error
);
1045 /* don't show these error codes to the user */
1049 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 4,0,0,0,0);
1054 /* be sure to update variables that are affected by ipsec4_output() */
1055 ip
= mtod(m
, struct ip
*);
1058 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1060 hlen
= ip
->ip_hl
<< 2;
1062 /* Check that there wasn't a route change and src is still valid */
1063 if (ro
->ro_rt
!= NULL
&& ro
->ro_rt
->generation_id
!= route_generation
) {
1064 if ((src_ia
= ifa_foraddr(ip
->ip_src
.s_addr
)) == NULL
&&
1065 ((flags
& (IP_ROUTETOIF
| IP_FORWARDING
)) == 0)) {
1066 error
= EADDRNOTAVAIL
;
1067 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1074 ifafree(&src_ia
->ia_ifa
);
1077 if (ro
->ro_rt
== NULL
) {
1078 if ((flags
& IP_ROUTETOIF
) == 0) {
1079 printf("ip_output: can't update route after "
1080 "IPsec processing\n");
1081 error
= EHOSTUNREACH
; /*XXX*/
1082 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1088 ifafree(&ia
->ia_ifa
);
1089 RT_LOCK_SPIN(ro
->ro_rt
);
1090 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
1092 ifaref(&ia
->ia_ifa
);
1093 ifp
= ro
->ro_rt
->rt_ifp
;
1094 RT_UNLOCK(ro
->ro_rt
);
1097 /* make it flipped, again. */
1099 #if BYTE_ORDER != BIG_ENDIAN
1104 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 7,0xff,0xff,0xff,0xff);
1106 /* Pass to filters again */
1107 if (!TAILQ_EMPTY(&ipv4_filters
)) {
1108 struct ipfilter
*filter
;
1110 /* Check that a TSO frame isn't passed to a filter.
1111 * This could happen if a filter is inserted while
1112 * TCP is sending the TSO packet.
1114 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) {
1121 /* 4135317 - always pass network byte order to filter */
1123 #if BYTE_ORDER != BIG_ENDIAN
1128 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
1129 if (filter
->ipf_filter
.ipf_output
) {
1131 result
= filter
->ipf_filter
.ipf_output(filter
->ipf_filter
.cookie
, (mbuf_t
*)&m
, 0);
1132 if (result
== EJUSTRETURN
) {
1143 /* set back to host byte order */
1144 ip
= mtod(m
, struct ip
*);
1146 #if BYTE_ORDER != BIG_ENDIAN
1159 * - Xlate: translate packet's addr/port (NAT).
1160 * - Firewall: deny/allow/etc.
1161 * - Wrap: fake packet's addr/port <unimpl.>
1162 * - Encapsulate: put it in another IP and send out. <unimp.>
1165 struct mbuf
*m1
= m
;
1167 if ((error
= (*fr_checkp
)(ip
, hlen
, ifp
, 1, &m1
)) || !m1
) {
1170 ip
= mtod(m0
= m
= m1
, struct ip
*);
1174 * Check with the firewall...
1175 * but not if we are already being fwd'd from a firewall.
1177 if (fw_enable
&& IPFW_LOADED
&& !args
.next_hop
) {
1178 struct sockaddr_in
*old
= dst
;
1181 args
.next_hop
= dst
;
1183 off
= ip_fw_chk_ptr(&args
);
1185 dst
= args
.next_hop
;
1188 * On return we must do the following:
1189 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1190 * 1<=off<= 0xffff -> DIVERT
1191 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1192 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1193 * dst != old -> IPFIREWALL_FORWARD
1194 * off==0, dst==old -> accept
1195 * If some of the above modules is not compiled in, then
1196 * we should't have to check the corresponding condition
1197 * (because the ipfw control socket should not accept
1198 * unsupported rules), but better play safe and drop
1199 * packets in case of doubt.
1202 if ( (off
& IP_FW_PORT_DENY_FLAG
) || m
== NULL
) {
1208 ip
= mtod(m
, struct ip
*);
1210 if (off
== 0 && dst
== old
) {/* common case */
1214 if (DUMMYNET_LOADED
&& (off
& IP_FW_PORT_DYNT_FLAG
) != 0) {
1216 * pass the pkt to dummynet. Need to include
1217 * pipe number, m, ifp, ro, dst because these are
1218 * not recomputed in the next pass.
1219 * All other parameters have been already used and
1220 * so they are not needed anymore.
1221 * XXX note: if the ifp or ro entry are deleted
1222 * while a pkt is in dummynet, we are in trouble!
1227 if (flags
& IP_OUTARGS
)
1230 error
= ip_dn_io_ptr(m
, off
& 0xffff, DN_TO_IP_OUT
,
1234 #endif /* DUMMYNET */
1236 if (off
!= 0 && (off
& IP_FW_PORT_DYNT_FLAG
) == 0) {
1237 struct mbuf
*clone
= NULL
;
1239 /* Clone packet if we're doing a 'tee' */
1240 if ((off
& IP_FW_PORT_TEE_FLAG
) != 0)
1241 clone
= m_dup(m
, M_DONTWAIT
);
1244 * delayed checksums are not currently compatible
1245 * with divert sockets.
1247 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1248 in_delayed_cksum(m
);
1249 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1252 /* Restore packet header fields to original values */
1254 #if BYTE_ORDER != BIG_ENDIAN
1259 /* Deliver packet to divert input routine */
1260 divert_packet(m
, 0, off
& 0xffff, args
.divert_rule
);
1262 /* If 'tee', continue with original packet */
1263 if (clone
!= NULL
) {
1265 ip
= mtod(m
, struct ip
*);
1272 #if IPFIREWALL_FORWARD
1273 /* Here we check dst to make sure it's directly reachable on the
1274 * interface we previously thought it was.
1275 * If it isn't (which may be likely in some situations) we have
1276 * to re-route it (ie, find a route for the next-hop and the
1277 * associated interface) and set them here. This is nested
1278 * forwarding which in most cases is undesirable, except where
1279 * such control is nigh impossible. So we do it here.
1282 if (off
== 0 && old
!= dst
) {
1283 struct in_ifaddr
*ia_fw
;
1285 /* It's changed... */
1286 /* There must be a better way to do this next line... */
1287 static struct route sro_fwd
, *ro_fwd
= &sro_fwd
;
1288 #if IPFIREWALL_FORWARD_DEBUG
1289 printf("IPFIREWALL_FORWARD: New dst ip: ");
1290 print_ip(dst
->sin_addr
);
1294 * We need to figure out if we have been forwarded
1295 * to a local socket. If so then we should somehow
1296 * "loop back" to ip_input, and get directed to the
1297 * PCB as if we had received this packet. This is
1298 * because it may be dificult to identify the packets
1299 * you want to forward until they are being output
1300 * and have selected an interface. (e.g. locally
1301 * initiated packets) If we used the loopback inteface,
1302 * we would not be able to control what happens
1303 * as the packet runs through ip_input() as
1304 * it is done through a ISR.
1306 lck_rw_lock_shared(in_ifaddr_rwlock
);
1307 TAILQ_FOREACH(ia_fw
, &in_ifaddrhead
, ia_link
) {
1309 * If the addr to forward to is one
1310 * of ours, we pretend to
1311 * be the destination for this packet.
1313 if (IA_SIN(ia_fw
)->sin_addr
.s_addr
==
1314 dst
->sin_addr
.s_addr
)
1317 lck_rw_done(in_ifaddr_rwlock
);
1319 /* tell ip_input "dont filter" */
1320 struct m_tag
*fwd_tag
;
1321 struct ip_fwd_tag
*ipfwd_tag
;
1323 fwd_tag
= m_tag_alloc(KERNEL_MODULE_TAG_ID
,
1324 KERNEL_TAG_TYPE_IPFORWARD
,
1325 sizeof (*ipfwd_tag
), M_NOWAIT
);
1326 if (fwd_tag
== NULL
) {
1331 ipfwd_tag
= (struct ip_fwd_tag
*)(fwd_tag
+1);
1332 ipfwd_tag
->next_hop
= args
.next_hop
;
1334 m_tag_prepend(m
, fwd_tag
);
1336 if (m
->m_pkthdr
.rcvif
== NULL
)
1337 m
->m_pkthdr
.rcvif
= ifunit("lo0");
1338 if ((~IF_HWASSIST_CSUM_FLAGS(m
->m_pkthdr
.rcvif
->if_hwassist
) &
1339 m
->m_pkthdr
.csum_flags
) == 0) {
1340 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1341 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1342 m
->m_pkthdr
.csum_flags
|=
1343 CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
;
1344 m
->m_pkthdr
.csum_data
= 0xffff;
1346 m
->m_pkthdr
.csum_flags
|=
1347 CSUM_IP_CHECKED
| CSUM_IP_VALID
;
1349 else if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1350 in_delayed_cksum(m
);
1351 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1352 ip
->ip_sum
= in_cksum(m
, hlen
);
1355 #if BYTE_ORDER != BIG_ENDIAN
1360 /* we need to call dlil_output to run filters
1361 * and resync to avoid recursion loops.
1364 dlil_output(lo_ifp
, PF_INET
, m
, 0, (struct sockaddr
*)dst
, 0);
1367 printf("ip_output: no loopback ifp for forwarding!!!\n");
1371 /* Some of the logic for this was
1372 * nicked from above.
1374 * This rewrites the cached route in a local PCB.
1375 * Is this what we want to do?
1377 bcopy(dst
, &ro_fwd
->ro_dst
, sizeof(*dst
));
1379 ro_fwd
->ro_rt
= NULL
;
1380 rtalloc_ign(ro_fwd
, RTF_PRCLONING
);
1382 if (ro_fwd
->ro_rt
== NULL
) {
1383 OSAddAtomic(1, &ipstat
.ips_noroute
);
1384 error
= EHOSTUNREACH
;
1388 RT_LOCK_SPIN(ro_fwd
->ro_rt
);
1389 ia_fw
= ifatoia(ro_fwd
->ro_rt
->rt_ifa
);
1391 ifaref(&ia_fw
->ia_ifa
);
1392 ifp
= ro_fwd
->ro_rt
->rt_ifp
;
1393 ro_fwd
->ro_rt
->rt_use
++;
1394 if (ro_fwd
->ro_rt
->rt_flags
& RTF_GATEWAY
)
1395 dst
= (struct sockaddr_in
*)ro_fwd
->ro_rt
->rt_gateway
;
1396 if (ro_fwd
->ro_rt
->rt_flags
& RTF_HOST
) {
1398 (ro_fwd
->ro_rt
->rt_flags
& RTF_BROADCAST
);
1400 /* Become a regular mutex */
1401 RT_CONVERT_LOCK(ro_fwd
->ro_rt
);
1402 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
1404 RT_UNLOCK(ro_fwd
->ro_rt
);
1406 ro
->ro_rt
= ro_fwd
->ro_rt
;
1407 dst
= (struct sockaddr_in
*)&ro_fwd
->ro_dst
;
1410 * If we added a default src ip earlier,
1411 * which would have been gotten from the-then
1412 * interface, do it again, from the new one.
1414 if (ia_fw
!= NULL
) {
1415 if (fwd_rewrite_src
)
1416 ip
->ip_src
= IA_SIN(ia_fw
)->sin_addr
;
1417 ifafree(&ia_fw
->ia_ifa
);
1421 #endif /* IPFIREWALL_FORWARD */
1423 * if we get here, none of the above matches, and
1424 * we have to drop the pkt
1427 error
= EACCES
; /* not sure this is the right error msg */
1430 #endif /* IPFIREWALL */
1434 /* Do not allow loopback address to wind up on a wire */
1435 if ((ifp
->if_flags
& IFF_LOOPBACK
) == 0 &&
1436 ((ntohl(ip
->ip_src
.s_addr
) >> IN_CLASSA_NSHIFT
) == IN_LOOPBACKNET
||
1437 (ntohl(ip
->ip_dst
.s_addr
) >> IN_CLASSA_NSHIFT
) == IN_LOOPBACKNET
)) {
1438 OSAddAtomic(1, &ipstat
.ips_badaddr
);
1441 * Do not simply drop the packet just like a firewall -- we want the
1442 * the application to feel the pain.
1443 * Return ENETUNREACH like ip6_output does in some similar cases.
1444 * This can startle the otherwise clueless process that specifies
1445 * loopback as the source address.
1447 error
= ENETUNREACH
;
1451 m
->m_pkthdr
.csum_flags
|= CSUM_IP
;
1452 tso
= (ifp
->if_hwassist
& IFNET_TSO_IPV4
) && (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
);
1454 sw_csum
= m
->m_pkthdr
.csum_flags
1455 & ~IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
);
1457 if ((ifp
->if_hwassist
& CSUM_TCP_SUM16
) != 0) {
1459 * Special case code for GMACE
1460 * frames that can be checksumed by GMACE SUM16 HW:
1461 * frame >64, no fragments, no UDP
1463 if (apple_hwcksum_tx
&& (m
->m_pkthdr
.csum_flags
& CSUM_TCP
)
1464 && (ip
->ip_len
> 50) && (ip
->ip_len
<= ifp
->if_mtu
)) {
1465 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1466 u_short offset
= (IP_VHL_HL(ip
->ip_vhl
) << 2) +14 ; /* IP+Enet header length */
1467 u_short csumprev
= m
->m_pkthdr
.csum_data
& 0xFFFF;
1468 m
->m_pkthdr
.csum_flags
= CSUM_DATA_VALID
| CSUM_TCP_SUM16
; /* for GMAC */
1469 m
->m_pkthdr
.csum_data
= (csumprev
+ offset
) << 16 ;
1470 m
->m_pkthdr
.csum_data
+= offset
;
1471 sw_csum
= CSUM_DELAY_IP
; /* do IP hdr chksum in software */
1474 /* let the software handle any UDP or TCP checksums */
1475 sw_csum
|= (CSUM_DELAY_DATA
& m
->m_pkthdr
.csum_flags
);
1477 } else if (apple_hwcksum_tx
== 0) {
1478 sw_csum
|= (CSUM_DELAY_DATA
| CSUM_DELAY_IP
) &
1479 m
->m_pkthdr
.csum_flags
;
1482 if (sw_csum
& CSUM_DELAY_DATA
) {
1483 in_delayed_cksum(m
);
1484 sw_csum
&= ~CSUM_DELAY_DATA
;
1485 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1488 if (apple_hwcksum_tx
!= 0) {
1489 m
->m_pkthdr
.csum_flags
&=
1490 IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
);
1492 m
->m_pkthdr
.csum_flags
= 0;
1496 * If small enough for interface, or the interface will take
1497 * care of the fragmentation for us, can just send directly.
1499 if ((u_short
)ip
->ip_len
<= ifp
->if_mtu
|| tso
||
1500 ifp
->if_hwassist
& CSUM_FRAGMENT
) {
1502 m
->m_pkthdr
.csum_flags
|= CSUM_TSO_IPV4
;
1505 #if BYTE_ORDER != BIG_ENDIAN
1511 if (sw_csum
& CSUM_DELAY_IP
) {
1512 ip
->ip_sum
= in_cksum(m
, hlen
);
1516 /* Record statistics for this interface address. */
1517 if (!(flags
& IP_FORWARDING
) && ia
!= NULL
) {
1518 ia
->ia_ifa
.if_opackets
++;
1519 ia
->ia_ifa
.if_obytes
+= m
->m_pkthdr
.len
;
1524 /* clean ipsec history once it goes out of the node */
1525 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0)
1528 if (packetchain
== 0) {
1529 error
= ifnet_output(ifp
, PF_INET
, m
, ro
->ro_rt
,
1530 (struct sockaddr
*)dst
);
1533 else { /* packet chaining allows us to reuse the route for all packets */
1534 mppn
= &m
->m_nextpkt
;
1540 if (pktcnt
> ip_maxchainsent
)
1541 ip_maxchainsent
= pktcnt
;
1543 error
= ifnet_output(ifp
, PF_INET
, packetlist
,
1544 ro
->ro_rt
, (struct sockaddr
*)dst
);
1555 * Too large for interface; fragment if possible.
1556 * Must be able to put at least 8 bytes per fragment.
1559 if (ip
->ip_off
& IP_DF
|| (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
)) {
1562 * This case can happen if the user changed the MTU
1564 * of an interface after enabling IP on it. Because
1565 * most netifs don't keep track of routes pointing to
1566 * them, there is no way for one to update all its
1567 * routes when the MTU is changed.
1569 RT_LOCK_SPIN(ro
->ro_rt
);
1570 if (ro
->ro_rt
&& (ro
->ro_rt
->rt_flags
& (RTF_UP
| RTF_HOST
))
1571 && !(ro
->ro_rt
->rt_rmx
.rmx_locks
& RTV_MTU
)
1572 && (ro
->ro_rt
->rt_rmx
.rmx_mtu
> ifp
->if_mtu
)) {
1573 ro
->ro_rt
->rt_rmx
.rmx_mtu
= ifp
->if_mtu
;
1575 RT_UNLOCK(ro
->ro_rt
);
1576 OSAddAtomic(1, &ipstat
.ips_cantfrag
);
1580 error
= ip_fragment(m
, ifp
, ifp
->if_mtu
, sw_csum
);
1586 KERNEL_DEBUG(DBG_LAYER_END
, ip
->ip_dst
.s_addr
,
1587 ip
->ip_src
.s_addr
, ip
->ip_p
, ip
->ip_off
, ip
->ip_len
);
1589 for (m
= m0
; m
; m
= m0
) {
1593 /* clean ipsec history once it goes out of the node */
1594 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0)
1599 /* Record statistics for this interface address. */
1601 ia
->ia_ifa
.if_opackets
++;
1602 ia
->ia_ifa
.if_obytes
+= m
->m_pkthdr
.len
;
1605 if ((packetchain
!= 0) && (pktcnt
> 0))
1606 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist
);
1607 error
= ifnet_output(ifp
, PF_INET
, m
, ro
->ro_rt
,
1608 (struct sockaddr
*)dst
);
1614 OSAddAtomic(1, &ipstat
.ips_fragmented
);
1618 ifafree(&ia
->ia_ifa
);
1622 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
1623 if (ro
== &iproute
&& ro
->ro_rt
) {
1628 KEYDEBUG(KEYDEBUG_IPSEC_STAMP
,
1629 printf("DP ip_output call free SP:%x\n", sp
));
1630 key_freesp(sp
, KEY_SADB_UNLOCKED
);
1635 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT
| DBG_FUNC_END
, error
,0,0,0,0);
1643 ip_fragment(struct mbuf
*m
, struct ifnet
*ifp
, unsigned long mtu
, int sw_csum
)
1645 struct ip
*ip
, *mhip
;
1646 int len
, hlen
, mhlen
, firstlen
, off
, error
= 0;
1647 struct mbuf
**mnext
= &m
->m_nextpkt
, *m0
;
1650 ip
= mtod(m
, struct ip
*);
1652 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1654 hlen
= ip
->ip_hl
<< 2;
1657 firstlen
= len
= (mtu
- hlen
) &~ 7;
1664 * if the interface will not calculate checksums on
1665 * fragmented packets, then do it here.
1667 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
&&
1668 (ifp
->if_hwassist
& CSUM_IP_FRAGS
) == 0) {
1669 in_delayed_cksum(m
);
1670 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1674 * Loop through length of segment after first fragment,
1675 * make new header and copy data of each part and link onto chain.
1678 mhlen
= sizeof (struct ip
);
1679 for (off
= hlen
+ len
; off
< (u_short
)ip
->ip_len
; off
+= len
) {
1680 MGETHDR(m
, M_DONTWAIT
, MT_HEADER
); /* MAC-OK */
1683 OSAddAtomic(1, &ipstat
.ips_odropped
);
1686 m
->m_flags
|= (m0
->m_flags
& M_MCAST
) | M_FRAG
;
1687 m
->m_data
+= max_linkhdr
;
1688 mhip
= mtod(m
, struct ip
*);
1690 if (hlen
> sizeof (struct ip
)) {
1691 mhlen
= ip_optcopy(ip
, mhip
) + sizeof (struct ip
);
1692 mhip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, mhlen
>> 2);
1695 mhip
->ip_off
= ((off
- hlen
) >> 3) + (ip
->ip_off
& ~IP_MF
);
1696 if (ip
->ip_off
& IP_MF
)
1697 mhip
->ip_off
|= IP_MF
;
1698 if (off
+ len
>= (u_short
)ip
->ip_len
)
1699 len
= (u_short
)ip
->ip_len
- off
;
1701 mhip
->ip_off
|= IP_MF
;
1702 mhip
->ip_len
= htons((u_short
)(len
+ mhlen
));
1703 m
->m_next
= m_copy(m0
, off
, len
);
1704 if (m
->m_next
== 0) {
1706 error
= ENOBUFS
; /* ??? */
1707 OSAddAtomic(1, &ipstat
.ips_odropped
);
1710 m
->m_pkthdr
.len
= mhlen
+ len
;
1711 m
->m_pkthdr
.rcvif
= 0;
1712 m
->m_pkthdr
.csum_flags
= m0
->m_pkthdr
.csum_flags
;
1713 m
->m_pkthdr
.socket_id
= m0
->m_pkthdr
.socket_id
;
1715 mac_netinet_fragment(m0
, m
);
1718 #if BYTE_ORDER != BIG_ENDIAN
1719 HTONS(mhip
->ip_off
);
1723 if (sw_csum
& CSUM_DELAY_IP
) {
1724 mhip
->ip_sum
= in_cksum(m
, mhlen
);
1727 mnext
= &m
->m_nextpkt
;
1730 OSAddAtomic(nfrags
, &ipstat
.ips_ofragments
);
1732 /* set first/last markers for fragment chain */
1733 m
->m_flags
|= M_LASTFRAG
;
1734 m0
->m_flags
|= M_FIRSTFRAG
| M_FRAG
;
1735 m0
->m_pkthdr
.csum_data
= nfrags
;
1738 * Update first fragment by trimming what's been copied out
1739 * and updating header, then send each fragment (in order).
1742 m_adj(m
, hlen
+ firstlen
- (u_short
)ip
->ip_len
);
1743 m
->m_pkthdr
.len
= hlen
+ firstlen
;
1744 ip
->ip_len
= htons((u_short
)m
->m_pkthdr
.len
);
1745 ip
->ip_off
|= IP_MF
;
1747 #if BYTE_ORDER != BIG_ENDIAN
1752 if (sw_csum
& CSUM_DELAY_IP
) {
1753 ip
->ip_sum
= in_cksum(m
, hlen
);
1763 ip_out_cksum_stats(int proto
, u_int32_t len
)
1767 tcp_out_cksum_stats(len
);
1770 udp_out_cksum_stats(len
);
1773 /* keep only TCP or UDP stats for now */
1779 in_delayed_cksum_offset(struct mbuf
*m0
, int ip_offset
)
1782 unsigned char buf
[sizeof(struct ip
)];
1783 u_short csum
, offset
, ip_len
;
1784 struct mbuf
*m
= m0
;
1786 while (ip_offset
>= m
->m_len
) {
1787 ip_offset
-= m
->m_len
;
1790 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
1795 /* Sometimes the IP header is not contiguous, yes this can happen! */
1796 if (ip_offset
+ sizeof(struct ip
) > m
->m_len
) {
1798 printf("delayed m_pullup, m->len: %d off: %d\n",
1799 m
->m_len
, ip_offset
);
1801 m_copydata(m
, ip_offset
, sizeof(struct ip
), (caddr_t
) buf
);
1803 ip
= (struct ip
*)buf
;
1805 ip
= (struct ip
*)(m
->m_data
+ ip_offset
);
1810 m
->m_len
-= ip_offset
;
1811 m
->m_data
+= ip_offset
;
1814 offset
= IP_VHL_HL(ip
->ip_vhl
) << 2 ;
1817 * We could be in the context of an IP or interface filter; in the
1818 * former case, ip_len would be in host (correct) order while for
1819 * the latter it would be in network order. Because of this, we
1820 * attempt to interpret the length field by comparing it against
1821 * the actual packet length. If the comparison fails, byte swap
1822 * the length and check again. If it still fails, then the packet
1823 * is bogus and we give up.
1825 ip_len
= ip
->ip_len
;
1826 if (ip_len
!= (m0
->m_pkthdr
.len
- ip_offset
)) {
1827 ip_len
= SWAP16(ip_len
);
1828 if (ip_len
!= (m0
->m_pkthdr
.len
- ip_offset
)) {
1829 printf("in_delayed_cksum_offset: ip_len %d (%d) "
1830 "doesn't match actual length %d\n", ip
->ip_len
,
1831 ip_len
, (m0
->m_pkthdr
.len
- ip_offset
));
1836 csum
= in_cksum_skip(m
, ip_len
, offset
);
1839 ip_out_cksum_stats(ip
->ip_p
, ip_len
- offset
);
1841 if (m0
->m_pkthdr
.csum_flags
& CSUM_UDP
&& csum
== 0)
1843 offset
+= m0
->m_pkthdr
.csum_data
& 0xFFFF; /* checksum offset */
1847 if (M_LEADINGSPACE(m
) < ip_offset
)
1848 panic("in_delayed_cksum_offset - chain modified!\n");
1849 m
->m_len
+= ip_offset
;
1850 m
->m_data
-= ip_offset
;
1853 if (offset
> ip_len
) /* bogus offset */
1856 /* Insert the checksum in the existing chain */
1857 if (offset
+ ip_offset
+ sizeof(u_short
) > m
->m_len
) {
1861 printf("delayed m_copyback, m->len: %d off: %d p: %d\n",
1862 m
->m_len
, offset
+ ip_offset
, ip
->ip_p
);
1864 *(u_short
*)tmp
= csum
;
1865 m_copyback(m
, offset
+ ip_offset
, 2, tmp
);
1867 *(u_short
*)(m
->m_data
+ offset
+ ip_offset
) = csum
;
1871 in_delayed_cksum(struct mbuf
*m
)
1873 in_delayed_cksum_offset(m
, 0);
1877 in_cksum_offset(struct mbuf
* m
, size_t ip_offset
)
1879 struct ip
* ip
= NULL
;
1881 unsigned char buf
[sizeof(struct ip
)];
1884 while (ip_offset
>= m
->m_len
) {
1885 ip_offset
-= m
->m_len
;
1888 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1893 /* Sometimes the IP header is not contiguous, yes this can happen! */
1894 if (ip_offset
+ sizeof(struct ip
) > m
->m_len
) {
1897 printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %lu\n",
1898 m
->m_len
, ip_offset
);
1900 m_copydata(m
, ip_offset
, sizeof(struct ip
), (caddr_t
) buf
);
1902 ip
= (struct ip
*)buf
;
1904 m_copyback(m
, ip_offset
+ offsetof(struct ip
, ip_sum
), 2, (caddr_t
)&ip
->ip_sum
);
1906 ip
= (struct ip
*)(m
->m_data
+ ip_offset
);
1912 m
->m_len
-= ip_offset
;
1913 m
->m_data
+= ip_offset
;
1917 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1919 hlen
= ip
->ip_hl
<< 2;
1922 * We could be in the context of an IP or interface filter; in the
1923 * former case, ip_len would be in host order while for the latter
1924 * it would be in network (correct) order. Because of this, we
1925 * attempt to interpret the length field by comparing it against
1926 * the actual packet length. If the comparison fails, byte swap
1927 * the length and check again. If it still fails, then the packet
1928 * is bogus and we give up.
1930 if (ntohs(ip
->ip_len
) != (m
->m_pkthdr
.len
- ip_offset
)) {
1931 ip
->ip_len
= SWAP16(ip
->ip_len
);
1933 if (ntohs(ip
->ip_len
) != (m
->m_pkthdr
.len
- ip_offset
)) {
1934 ip
->ip_len
= SWAP16(ip
->ip_len
);
1935 printf("in_cksum_offset: ip_len %d (%d) "
1936 "doesn't match actual length %lu\n",
1937 ip
->ip_len
, SWAP16(ip
->ip_len
),
1938 (m
->m_pkthdr
.len
- ip_offset
));
1944 ip
->ip_sum
= in_cksum(m
, hlen
);
1946 ip
->ip_len
= SWAP16(ip
->ip_len
);
1950 if (M_LEADINGSPACE(m
) < ip_offset
)
1951 panic("in_cksum_offset - chain modified!\n");
1952 m
->m_len
+= ip_offset
;
1953 m
->m_data
-= ip_offset
;
1956 /* Insert the checksum in the existing chain if IP header not contiguous */
1957 if (ip_offset
+ sizeof(struct ip
) > m
->m_len
) {
1961 printf("in_cksum_offset m_copyback, m->len: %u off: %lu p: %d\n",
1962 m
->m_len
, ip_offset
+ offsetof(struct ip
, ip_sum
), ip
->ip_p
);
1964 *(u_short
*)tmp
= ip
->ip_sum
;
1965 m_copyback(m
, ip_offset
+ offsetof(struct ip
, ip_sum
), 2, tmp
);
1970 * Insert IP options into preformed packet.
1971 * Adjust IP destination as required for IP source routing,
1972 * as indicated by a non-zero in_addr at the start of the options.
1974 * XXX This routine assumes that the packet has no options in place.
1976 static struct mbuf
*
1977 ip_insertoptions(m
, opt
, phlen
)
1978 register struct mbuf
*m
;
1982 register struct ipoption
*p
= mtod(opt
, struct ipoption
*);
1984 register struct ip
*ip
= mtod(m
, struct ip
*);
1987 optlen
= opt
->m_len
- sizeof(p
->ipopt_dst
);
1988 if (optlen
+ (u_short
)ip
->ip_len
> IP_MAXPACKET
)
1989 return (m
); /* XXX should fail */
1990 if (p
->ipopt_dst
.s_addr
)
1991 ip
->ip_dst
= p
->ipopt_dst
;
1992 if (m
->m_flags
& M_EXT
|| m
->m_data
- optlen
< m
->m_pktdat
) {
1993 MGETHDR(n
, M_DONTWAIT
, MT_HEADER
); /* MAC-OK */
1996 n
->m_pkthdr
.rcvif
= 0;
1998 mac_mbuf_label_copy(m
, n
);
2000 n
->m_pkthdr
.len
= m
->m_pkthdr
.len
+ optlen
;
2001 m
->m_len
-= sizeof(struct ip
);
2002 m
->m_data
+= sizeof(struct ip
);
2005 m
->m_len
= optlen
+ sizeof(struct ip
);
2006 m
->m_data
+= max_linkhdr
;
2007 (void)memcpy(mtod(m
, void *), ip
, sizeof(struct ip
));
2009 m
->m_data
-= optlen
;
2011 m
->m_pkthdr
.len
+= optlen
;
2012 ovbcopy((caddr_t
)ip
, mtod(m
, caddr_t
), sizeof(struct ip
));
2014 ip
= mtod(m
, struct ip
*);
2015 bcopy(p
->ipopt_list
, ip
+ 1, optlen
);
2016 *phlen
= sizeof(struct ip
) + optlen
;
2017 ip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, *phlen
>> 2);
2018 ip
->ip_len
+= optlen
;
2023 * Copy options from ip to jp,
2024 * omitting those not copied during fragmentation.
2030 register u_char
*cp
, *dp
;
2031 int opt
, optlen
, cnt
;
2033 cp
= (u_char
*)(ip
+ 1);
2034 dp
= (u_char
*)(jp
+ 1);
2035 cnt
= (IP_VHL_HL(ip
->ip_vhl
) << 2) - sizeof (struct ip
);
2036 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
2038 if (opt
== IPOPT_EOL
)
2040 if (opt
== IPOPT_NOP
) {
2041 /* Preserve for IP mcast tunnel's LSRR alignment. */
2047 if (cnt
< IPOPT_OLEN
+ sizeof(*cp
))
2048 panic("malformed IPv4 option passed to ip_optcopy");
2050 optlen
= cp
[IPOPT_OLEN
];
2052 if (optlen
< IPOPT_OLEN
+ sizeof(*cp
) || optlen
> cnt
)
2053 panic("malformed IPv4 option passed to ip_optcopy");
2055 /* bogus lengths should have been caught by ip_dooptions */
2058 if (IPOPT_COPIED(opt
)) {
2059 bcopy(cp
, dp
, optlen
);
2063 for (optlen
= dp
- (u_char
*)(jp
+1); optlen
& 0x3; optlen
++)
2069 * IP socket option processing.
2072 ip_ctloutput(so
, sopt
)
2074 struct sockopt
*sopt
;
2076 struct inpcb
*inp
= sotoinpcb(so
);
2080 if (sopt
->sopt_level
!= IPPROTO_IP
) {
2084 switch (sopt
->sopt_dir
) {
2086 switch (sopt
->sopt_name
) {
2093 if (sopt
->sopt_valsize
> MLEN
) {
2097 MGET(m
, sopt
->sopt_p
!= kernproc
? M_WAIT
: M_DONTWAIT
,
2103 m
->m_len
= sopt
->sopt_valsize
;
2104 error
= sooptcopyin(sopt
, mtod(m
, char *), m
->m_len
,
2109 return (ip_pcbopts(sopt
->sopt_name
, &inp
->inp_options
,
2116 case IP_RECVRETOPTS
:
2117 case IP_RECVDSTADDR
:
2120 #if defined(NFAITH) && NFAITH > 0
2123 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
2128 switch (sopt
->sopt_name
) {
2130 inp
->inp_ip_tos
= optval
;
2134 inp
->inp_ip_ttl
= optval
;
2136 #define OPTSET(bit) \
2138 inp->inp_flags |= bit; \
2140 inp->inp_flags &= ~bit;
2143 OPTSET(INP_RECVOPTS
);
2146 case IP_RECVRETOPTS
:
2147 OPTSET(INP_RECVRETOPTS
);
2150 case IP_RECVDSTADDR
:
2151 OPTSET(INP_RECVDSTADDR
);
2159 OPTSET(INP_RECVTTL
);
2162 #if defined(NFAITH) && NFAITH > 0
2171 #if CONFIG_FORCE_OUT_IFP
2173 * Apple private interface, similar to IP_BOUND_IF, except
2174 * that the parameter is a NULL-terminated string containing
2175 * the name of the network interface; an emptry string means
2176 * unbind. Applications are encouraged to use IP_BOUND_IF
2177 * instead, as that is the current "official" API.
2179 case IP_FORCE_OUT_IFP
: {
2180 char ifname
[IFNAMSIZ
];
2181 unsigned int ifscope
;
2183 /* This option is settable only for IPv4 */
2184 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2189 /* Verify interface name parameter is sane */
2190 if (sopt
->sopt_valsize
> sizeof(ifname
)) {
2195 /* Copy the interface name */
2196 if (sopt
->sopt_valsize
!= 0) {
2197 error
= sooptcopyin(sopt
, ifname
,
2198 sizeof (ifname
), sopt
->sopt_valsize
);
2203 if (sopt
->sopt_valsize
== 0 || ifname
[0] == NULL
) {
2204 /* Unbind this socket from any interface */
2205 ifscope
= IFSCOPE_NONE
;
2209 /* Verify name is NULL terminated */
2210 if (ifname
[sopt
->sopt_valsize
- 1] != NULL
) {
2215 /* Bail out if given bogus interface name */
2216 if (ifnet_find_by_name(ifname
, &ifp
) != 0) {
2221 /* Bind this socket to this interface */
2222 ifscope
= ifp
->if_index
;
2225 * Won't actually free; since we don't release
2226 * this later, we should do it now.
2230 ip_bindif(inp
, ifscope
);
2234 case IP_MULTICAST_IF
:
2235 case IP_MULTICAST_VIF
:
2236 case IP_MULTICAST_TTL
:
2237 case IP_MULTICAST_LOOP
:
2238 case IP_ADD_MEMBERSHIP
:
2239 case IP_DROP_MEMBERSHIP
:
2240 error
= ip_setmoptions(sopt
, &inp
->inp_moptions
);
2244 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
2250 case IP_PORTRANGE_DEFAULT
:
2251 inp
->inp_flags
&= ~(INP_LOWPORT
);
2252 inp
->inp_flags
&= ~(INP_HIGHPORT
);
2255 case IP_PORTRANGE_HIGH
:
2256 inp
->inp_flags
&= ~(INP_LOWPORT
);
2257 inp
->inp_flags
|= INP_HIGHPORT
;
2260 case IP_PORTRANGE_LOW
:
2261 inp
->inp_flags
&= ~(INP_HIGHPORT
);
2262 inp
->inp_flags
|= INP_LOWPORT
;
2272 case IP_IPSEC_POLICY
:
2280 if (sopt
->sopt_valsize
> MCLBYTES
) {
2284 if ((error
= soopt_getm(sopt
, &m
)) != 0) /* XXX */
2286 if ((error
= soopt_mcopyin(sopt
, m
)) != 0) /* XXX */
2288 priv
= (proc_suser(sopt
->sopt_p
) == 0);
2290 req
= mtod(m
, caddr_t
);
2293 optname
= sopt
->sopt_name
;
2294 error
= ipsec4_set_policy(inp
, optname
, req
, len
, priv
);
2301 case IP_TRAFFIC_MGT_BACKGROUND
:
2303 unsigned background
= 0;
2304 error
= sooptcopyin(sopt
, &background
, sizeof(background
), sizeof(background
));
2309 socket_set_traffic_mgt_flags(so
,
2310 TRAFFIC_MGT_SO_BACKGROUND
|
2311 TRAFFIC_MGT_SO_BG_REGULATE
);
2313 socket_clear_traffic_mgt_flags(so
,
2314 TRAFFIC_MGT_SO_BACKGROUND
|
2315 TRAFFIC_MGT_SO_BG_REGULATE
);
2320 #endif /* TRAFFIC_MGT */
2323 * On a multihomed system, scoped routing can be used to
2324 * restrict the source interface used for sending packets.
2325 * The socket option IP_BOUND_IF binds a particular AF_INET
2326 * socket to an interface such that data sent on the socket
2327 * is restricted to that interface. This is unlike the
2328 * SO_DONTROUTE option where the routing table is bypassed;
2329 * therefore it allows for a greater flexibility and control
2330 * over the system behavior, and does not place any restriction
2331 * on the destination address type (e.g. unicast, multicast,
2332 * or broadcast if applicable) or whether or not the host is
2333 * directly reachable. Note that in the multicast transmit
2334 * case, IP_MULTICAST_IF takes precedence over IP_BOUND_IF,
2335 * since the former practically bypasses the routing table;
2336 * in this case, IP_BOUND_IF sets the default interface used
2337 * for sending multicast packets in the absence of an explicit
2338 * transmit interface set via IP_MULTICAST_IF.
2341 /* This option is settable only for IPv4 */
2342 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2347 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
2353 ip_bindif(inp
, optval
);
2357 error
= ENOPROTOOPT
;
2363 switch (sopt
->sopt_name
) {
2366 if (inp
->inp_options
)
2367 error
= sooptcopyout(sopt
,
2368 mtod(inp
->inp_options
,
2370 inp
->inp_options
->m_len
);
2372 sopt
->sopt_valsize
= 0;
2378 case IP_RECVRETOPTS
:
2379 case IP_RECVDSTADDR
:
2383 #if defined(NFAITH) && NFAITH > 0
2386 switch (sopt
->sopt_name
) {
2389 optval
= inp
->inp_ip_tos
;
2393 optval
= inp
->inp_ip_ttl
;
2396 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2399 optval
= OPTBIT(INP_RECVOPTS
);
2402 case IP_RECVRETOPTS
:
2403 optval
= OPTBIT(INP_RECVRETOPTS
);
2406 case IP_RECVDSTADDR
:
2407 optval
= OPTBIT(INP_RECVDSTADDR
);
2411 optval
= OPTBIT(INP_RECVIF
);
2415 optval
= OPTBIT(INP_RECVTTL
);
2419 if (inp
->inp_flags
& INP_HIGHPORT
)
2420 optval
= IP_PORTRANGE_HIGH
;
2421 else if (inp
->inp_flags
& INP_LOWPORT
)
2422 optval
= IP_PORTRANGE_LOW
;
2427 #if defined(NFAITH) && NFAITH > 0
2429 optval
= OPTBIT(INP_FAITH
);
2433 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
2436 case IP_MULTICAST_IF
:
2437 case IP_MULTICAST_VIF
:
2438 case IP_MULTICAST_TTL
:
2439 case IP_MULTICAST_LOOP
:
2440 case IP_ADD_MEMBERSHIP
:
2441 case IP_DROP_MEMBERSHIP
:
2442 error
= ip_getmoptions(sopt
, inp
->inp_moptions
);
2446 case IP_IPSEC_POLICY
:
2448 struct mbuf
*m
= NULL
;
2453 req
= mtod(m
, caddr_t
);
2456 error
= ipsec4_get_policy(sotoinpcb(so
), req
, len
, &m
);
2458 error
= soopt_mcopyout(sopt
, m
); /* XXX */
2466 case IP_TRAFFIC_MGT_BACKGROUND
:
2468 unsigned background
= so
->so_traffic_mgt_flags
;
2469 return (sooptcopyout(sopt
, &background
, sizeof(background
)));
2472 #endif /* TRAFFIC_MGT */
2475 if (inp
->inp_flags
& INP_BOUND_IF
)
2476 optval
= inp
->inp_boundif
;
2477 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
2481 error
= ENOPROTOOPT
;
2490 * Set up IP options in pcb for insertion in output packets.
2491 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2492 * with destination address if source routed.
2496 __unused
int optname
,
2497 struct mbuf
**pcbopt
,
2498 register struct mbuf
*m
)
2500 register int cnt
, optlen
;
2501 register u_char
*cp
;
2504 /* turn off any old options */
2506 (void)m_free(*pcbopt
);
2508 if (m
== (struct mbuf
*)0 || m
->m_len
== 0) {
2510 * Only turning off any previous options.
2518 if (m
->m_len
% sizeof(int32_t))
2522 * IP first-hop destination address will be stored before
2523 * actual options; move other options back
2524 * and clear it when none present.
2526 if (m
->m_data
+ m
->m_len
+ sizeof(struct in_addr
) >= &m
->m_dat
[MLEN
])
2529 m
->m_len
+= sizeof(struct in_addr
);
2530 cp
= mtod(m
, u_char
*) + sizeof(struct in_addr
);
2531 ovbcopy(mtod(m
, caddr_t
), (caddr_t
)cp
, (unsigned)cnt
);
2532 bzero(mtod(m
, caddr_t
), sizeof(struct in_addr
));
2534 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
2535 opt
= cp
[IPOPT_OPTVAL
];
2536 if (opt
== IPOPT_EOL
)
2538 if (opt
== IPOPT_NOP
)
2541 if (cnt
< IPOPT_OLEN
+ sizeof(*cp
))
2543 optlen
= cp
[IPOPT_OLEN
];
2544 if (optlen
< IPOPT_OLEN
+ sizeof(*cp
) || optlen
> cnt
)
2555 * user process specifies route as:
2557 * D must be our final destination (but we can't
2558 * check that since we may not have connected yet).
2559 * A is first hop destination, which doesn't appear in
2560 * actual IP option, but is stored before the options.
2562 if (optlen
< IPOPT_MINOFF
- 1 + sizeof(struct in_addr
))
2564 m
->m_len
-= sizeof(struct in_addr
);
2565 cnt
-= sizeof(struct in_addr
);
2566 optlen
-= sizeof(struct in_addr
);
2567 cp
[IPOPT_OLEN
] = optlen
;
2569 * Move first hop before start of options.
2571 bcopy((caddr_t
)&cp
[IPOPT_OFFSET
+1], mtod(m
, caddr_t
),
2572 sizeof(struct in_addr
));
2574 * Then copy rest of options back
2575 * to close up the deleted entry.
2577 ovbcopy((caddr_t
)(&cp
[IPOPT_OFFSET
+1] +
2578 sizeof(struct in_addr
)),
2579 (caddr_t
)&cp
[IPOPT_OFFSET
+1],
2580 (unsigned)cnt
+ sizeof(struct in_addr
));
2584 if (m
->m_len
> MAX_IPOPTLEN
+ sizeof(struct in_addr
))
2596 * The whole multicast option thing needs to be re-thought.
2597 * Several of these options are equally applicable to non-multicast
2598 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
2599 * standard option (IP_TTL).
2603 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2605 static struct ifnet
*
2606 ip_multicast_if(a
, ifindexp
)
2615 if (ntohl(a
->s_addr
) >> 24 == 0) {
2616 ifindex
= ntohl(a
->s_addr
) & 0xffffff;
2617 ifnet_head_lock_shared();
2618 if (ifindex
< 0 || if_index
< ifindex
) {
2622 ifp
= ifindex2ifnet
[ifindex
];
2625 *ifindexp
= ifindex
;
2627 INADDR_TO_IFP(*a
, ifp
);
2633 * Set the IP multicast options in response to user setsockopt().
2636 ip_setmoptions(sopt
, imop
)
2637 struct sockopt
*sopt
;
2638 struct ip_moptions
**imop
;
2641 struct in_addr addr
;
2642 struct ip_mreq mreq
;
2643 struct ifnet
*ifp
= NULL
;
2644 struct ip_moptions
*imo
= *imop
;
2649 * No multicast option buffer attached to the pcb;
2650 * allocate one and initialize to default values.
2652 error
= ip_createmoptions(imop
);
2658 switch (sopt
->sopt_name
) {
2659 /* store an index number for the vif you wanna use in the send */
2661 case IP_MULTICAST_VIF
:
2664 if (legal_vif_num
== 0) {
2668 error
= sooptcopyin(sopt
, &i
, sizeof i
, sizeof i
);
2671 if (!legal_vif_num(i
) && (i
!= -1)) {
2675 imo
->imo_multicast_vif
= i
;
2678 #endif /* MROUTING */
2680 case IP_MULTICAST_IF
:
2682 * Select the interface for outgoing multicast packets.
2684 error
= sooptcopyin(sopt
, &addr
, sizeof addr
, sizeof addr
);
2688 * INADDR_ANY is used to remove a previous selection.
2689 * When no interface is selected, a default one is
2690 * chosen every time a multicast packet is sent.
2692 if (addr
.s_addr
== INADDR_ANY
) {
2693 imo
->imo_multicast_ifp
= NULL
;
2697 * The selected interface is identified by its local
2698 * IP address. Find the interface and confirm that
2699 * it supports multicasting.
2701 ifp
= ip_multicast_if(&addr
, &ifindex
);
2702 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0) {
2703 error
= EADDRNOTAVAIL
;
2706 imo
->imo_multicast_ifp
= ifp
;
2708 imo
->imo_multicast_addr
= addr
;
2710 imo
->imo_multicast_addr
.s_addr
= INADDR_ANY
;
2713 case IP_MULTICAST_TTL
:
2715 * Set the IP time-to-live for outgoing multicast packets.
2716 * The original multicast API required a char argument,
2717 * which is inconsistent with the rest of the socket API.
2718 * We allow either a char or an int.
2720 if (sopt
->sopt_valsize
== 1) {
2722 error
= sooptcopyin(sopt
, &ttl
, 1, 1);
2725 imo
->imo_multicast_ttl
= ttl
;
2728 error
= sooptcopyin(sopt
, &ttl
, sizeof ttl
,
2735 imo
->imo_multicast_ttl
= ttl
;
2739 case IP_MULTICAST_LOOP
:
2741 * Set the loopback flag for outgoing multicast packets.
2742 * Must be zero or one. The original multicast API required a
2743 * char argument, which is inconsistent with the rest
2744 * of the socket API. We allow either a char or an int.
2746 if (sopt
->sopt_valsize
== 1) {
2748 error
= sooptcopyin(sopt
, &loop
, 1, 1);
2751 imo
->imo_multicast_loop
= !!loop
;
2754 error
= sooptcopyin(sopt
, &loop
, sizeof loop
,
2758 imo
->imo_multicast_loop
= !!loop
;
2762 case IP_ADD_MEMBERSHIP
:
2764 * Add a multicast group membership.
2765 * Group must be a valid IP multicast address.
2767 error
= sooptcopyin(sopt
, &mreq
, sizeof mreq
, sizeof mreq
);
2771 error
= ip_addmembership(imo
, &mreq
);
2774 case IP_DROP_MEMBERSHIP
:
2776 * Drop a multicast group membership.
2777 * Group must be a valid IP multicast address.
2779 error
= sooptcopyin(sopt
, &mreq
, sizeof mreq
, sizeof mreq
);
2783 error
= ip_dropmembership(imo
, &mreq
);
2792 * If all options have default values, no need to keep the mbuf.
2794 if (imo
->imo_multicast_ifp
== NULL
&&
2795 imo
->imo_multicast_vif
== (u_int32_t
)-1 &&
2796 imo
->imo_multicast_ttl
== IP_DEFAULT_MULTICAST_TTL
&&
2797 imo
->imo_multicast_loop
== IP_DEFAULT_MULTICAST_LOOP
&&
2798 imo
->imo_num_memberships
== 0) {
2799 FREE(*imop
, M_IPMOPTS
);
2807 * Set the IP multicast options in response to user setsockopt().
2809 __private_extern__
int
2811 struct ip_moptions
**imop
)
2813 struct ip_moptions
*imo
;
2814 imo
= (struct ip_moptions
*) _MALLOC(sizeof(*imo
), M_IPMOPTS
,
2820 imo
->imo_multicast_ifp
= NULL
;
2821 imo
->imo_multicast_addr
.s_addr
= INADDR_ANY
;
2822 imo
->imo_multicast_vif
= -1;
2823 imo
->imo_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
2824 imo
->imo_multicast_loop
= IP_DEFAULT_MULTICAST_LOOP
;
2825 imo
->imo_num_memberships
= 0;
2831 * Add membership to an IPv4 multicast.
2833 __private_extern__
int
2835 struct ip_moptions
*imo
,
2836 struct ip_mreq
*mreq
)
2839 struct sockaddr_in
*dst
;
2840 struct ifnet
*ifp
= NULL
;
2844 bzero((caddr_t
)&ro
, sizeof(ro
));
2846 if (!IN_MULTICAST(ntohl(mreq
->imr_multiaddr
.s_addr
))) {
2851 * If no interface address was provided, use the interface of
2852 * the route to the given multicast address.
2854 if (mreq
->imr_interface
.s_addr
== INADDR_ANY
) {
2855 dst
= (struct sockaddr_in
*)&ro
.ro_dst
;
2856 dst
->sin_len
= sizeof(*dst
);
2857 dst
->sin_family
= AF_INET
;
2858 dst
->sin_addr
= mreq
->imr_multiaddr
;
2859 rtalloc_ign(&ro
, 0);
2860 if (ro
.ro_rt
!= NULL
) {
2861 ifp
= ro
.ro_rt
->rt_ifp
;
2863 /* If there's no default route, try using loopback */
2864 mreq
->imr_interface
.s_addr
= htonl(INADDR_LOOPBACK
);
2869 ifp
= ip_multicast_if(&mreq
->imr_interface
, NULL
);
2873 * See if we found an interface, and confirm that it
2874 * supports multicast.
2876 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0) {
2877 error
= EADDRNOTAVAIL
;
2881 * See if the membership already exists or if all the
2882 * membership slots are full.
2884 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
) {
2885 if (imo
->imo_membership
[i
]->inm_ifp
== ifp
&&
2886 imo
->imo_membership
[i
]->inm_addr
.s_addr
2887 == mreq
->imr_multiaddr
.s_addr
)
2890 if (i
< imo
->imo_num_memberships
) {
2894 if (i
== IP_MAX_MEMBERSHIPS
) {
2895 error
= ETOOMANYREFS
;
2899 * Everything looks good; add a new record to the multicast
2900 * address list for the given interface.
2902 if ((imo
->imo_membership
[i
] =
2903 in_addmulti(&mreq
->imr_multiaddr
, ifp
)) == NULL
) {
2907 ++imo
->imo_num_memberships
;
2910 if (ro
.ro_rt
!= NULL
)
2917 * Drop membership of an IPv4 multicast.
2919 __private_extern__
int
2921 struct ip_moptions
*imo
,
2922 struct ip_mreq
*mreq
)
2925 struct ifnet
* ifp
= NULL
;
2928 if (!IN_MULTICAST(ntohl(mreq
->imr_multiaddr
.s_addr
))) {
2934 * If an interface address was specified, get a pointer
2935 * to its ifnet structure.
2937 if (mreq
->imr_interface
.s_addr
== INADDR_ANY
)
2940 ifp
= ip_multicast_if(&mreq
->imr_interface
, NULL
);
2942 error
= EADDRNOTAVAIL
;
2947 * Find the membership in the membership array.
2949 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
) {
2951 imo
->imo_membership
[i
]->inm_ifp
== ifp
) &&
2952 imo
->imo_membership
[i
]->inm_addr
.s_addr
==
2953 mreq
->imr_multiaddr
.s_addr
)
2956 if (i
== imo
->imo_num_memberships
) {
2957 error
= EADDRNOTAVAIL
;
2961 * Give up the multicast address record to which the
2962 * membership points.
2964 in_delmulti(&imo
->imo_membership
[i
]);
2966 * Remove the gap in the membership array.
2968 for (++i
; i
< imo
->imo_num_memberships
; ++i
)
2969 imo
->imo_membership
[i
-1] = imo
->imo_membership
[i
];
2970 --imo
->imo_num_memberships
;
2976 * Return the IP multicast options in response to user getsockopt().
2979 ip_getmoptions(sopt
, imo
)
2980 struct sockopt
*sopt
;
2981 register struct ip_moptions
*imo
;
2983 struct in_addr addr
;
2984 struct in_ifaddr
*ia
;
2989 switch (sopt
->sopt_name
) {
2991 case IP_MULTICAST_VIF
:
2993 optval
= imo
->imo_multicast_vif
;
2996 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
2998 #endif /* MROUTING */
3000 case IP_MULTICAST_IF
:
3001 if (imo
== NULL
|| imo
->imo_multicast_ifp
== NULL
)
3002 addr
.s_addr
= INADDR_ANY
;
3003 else if (imo
->imo_multicast_addr
.s_addr
) {
3004 /* return the value user has set */
3005 addr
= imo
->imo_multicast_addr
;
3007 IFP_TO_IA(imo
->imo_multicast_ifp
, ia
);
3008 addr
.s_addr
= (ia
== NULL
) ? INADDR_ANY
3009 : IA_SIN(ia
)->sin_addr
.s_addr
;
3011 ifafree(&ia
->ia_ifa
);
3013 error
= sooptcopyout(sopt
, &addr
, sizeof addr
);
3016 case IP_MULTICAST_TTL
:
3018 optval
= coptval
= IP_DEFAULT_MULTICAST_TTL
;
3020 optval
= coptval
= imo
->imo_multicast_ttl
;
3021 if (sopt
->sopt_valsize
== 1)
3022 error
= sooptcopyout(sopt
, &coptval
, 1);
3024 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
3027 case IP_MULTICAST_LOOP
:
3029 optval
= coptval
= IP_DEFAULT_MULTICAST_LOOP
;
3031 optval
= coptval
= imo
->imo_multicast_loop
;
3032 if (sopt
->sopt_valsize
== 1)
3033 error
= sooptcopyout(sopt
, &coptval
, 1);
3035 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
3039 error
= ENOPROTOOPT
;
3046 * Discard the IP multicast options.
3049 ip_freemoptions(imo
)
3050 register struct ip_moptions
*imo
;
3055 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
)
3056 in_delmulti(&imo
->imo_membership
[i
]);
3057 FREE(imo
, M_IPMOPTS
);
3062 * Routine called from ip_output() to loop back a copy of an IP multicast
3063 * packet to the input queue of a specified interface. Note that this
3064 * calls the output routine of the loopback "driver", but with an interface
3065 * pointer that might NOT be a loopback interface -- evil, but easier than
3066 * replicating that code here.
3069 ip_mloopback(ifp
, m
, dst
, hlen
)
3071 register struct mbuf
*m
;
3072 register struct sockaddr_in
*dst
;
3075 register struct ip
*ip
;
3077 int sw_csum
= (apple_hwcksum_tx
== 0);
3079 copym
= m_copy(m
, 0, M_COPYALL
);
3080 if (copym
!= NULL
&& (copym
->m_flags
& M_EXT
|| copym
->m_len
< hlen
))
3081 copym
= m_pullup(copym
, hlen
);
3087 * We don't bother to fragment if the IP length is greater
3088 * than the interface's MTU. Can this possibly matter?
3090 ip
= mtod(copym
, struct ip
*);
3092 #if BYTE_ORDER != BIG_ENDIAN
3098 ip
->ip_sum
= in_cksum(copym
, hlen
);
3101 * It's not clear whether there are any lingering
3102 * reentrancy problems in other areas which might
3103 * be exposed by using ip_input directly (in
3104 * particular, everything which modifies the packet
3105 * in-place). Yet another option is using the
3106 * protosw directly to deliver the looped back
3107 * packet. For the moment, we'll err on the side
3108 * of safety by using if_simloop().
3111 if (dst
->sin_family
!= AF_INET
) {
3112 printf("ip_mloopback: bad address family %d\n",
3114 dst
->sin_family
= AF_INET
;
3119 * Mark checksum as valid or calculate checksum for loopback.
3121 * This is done this way because we have to embed the ifp of
3122 * the interface we will send the original copy of the packet
3123 * out on in the mbuf. ip_input will check if_hwassist of the
3124 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
3125 * The UDP checksum has not been calculated yet.
3127 if (sw_csum
|| (copym
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
)) {
3128 if (!sw_csum
&& IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
)) {
3129 copym
->m_pkthdr
.csum_flags
|=
3130 CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
|
3131 CSUM_IP_CHECKED
| CSUM_IP_VALID
;
3132 copym
->m_pkthdr
.csum_data
= 0xffff;
3135 #if BYTE_ORDER != BIG_ENDIAN
3139 in_delayed_cksum(copym
);
3141 #if BYTE_ORDER != BIG_ENDIAN
3150 * We need to send all loopback traffic down to dlil in case
3151 * a filter has tapped-in.
3155 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3156 * in ip_input(); we need the loopback ifp/dl_tag passed as args
3157 * to make the loopback driver compliant with the data link
3161 copym
->m_pkthdr
.rcvif
= ifp
;
3162 dlil_output(lo_ifp
, PF_INET
, copym
, 0,
3163 (struct sockaddr
*) dst
, 0);
3165 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
3171 * Given a source IP address (and route, if available), determine the best
3172 * interface to send the packet from. Checking for (and updating) the
3173 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
3174 * without any locks based on the assumption that ip_output() is single-
3175 * threaded per-pcb, i.e. for any given pcb there can only be one thread
3176 * performing output at the IP layer.
3178 static struct ifaddr
*
3179 in_selectsrcif(struct ip
*ip
, struct route
*ro
, unsigned int ifscope
)
3181 struct ifaddr
*ifa
= NULL
;
3182 struct in_addr src
= ip
->ip_src
;
3183 struct in_addr dst
= ip
->ip_dst
;
3184 struct ifnet
*rt_ifp
;
3185 char s_src
[MAX_IPv4_STR_LEN
], s_dst
[MAX_IPv4_STR_LEN
];
3187 if (ip_select_srcif_debug
) {
3188 (void) inet_ntop(AF_INET
, &src
.s_addr
, s_src
, sizeof (s_src
));
3189 (void) inet_ntop(AF_INET
, &dst
.s_addr
, s_dst
, sizeof (s_dst
));
3192 if (ro
->ro_rt
!= NULL
)
3195 rt_ifp
= (ro
->ro_rt
!= NULL
) ? ro
->ro_rt
->rt_ifp
: NULL
;
3198 * Given the source IP address, find a suitable source interface
3199 * to use for transmission; if the caller has specified a scope,
3200 * optimize the search by looking at the addresses only for that
3201 * interface. This is still suboptimal, however, as we need to
3202 * traverse the per-interface list.
3204 if (ifscope
!= IFSCOPE_NONE
|| ro
->ro_rt
!= NULL
) {
3205 unsigned int scope
= ifscope
;
3208 * If no scope is specified and the route is stale (pointing
3209 * to a defunct interface) use the current primary interface;
3210 * this happens when switching between interfaces configured
3211 * with the same IP address. Otherwise pick up the scope
3212 * information from the route; the ULP may have looked up a
3213 * correct route and we just need to verify it here and mark
3214 * it with the ROF_SRCIF_SELECTED flag below.
3216 if (scope
== IFSCOPE_NONE
) {
3217 scope
= rt_ifp
->if_index
;
3218 if (scope
!= get_primary_ifscope() &&
3219 ro
->ro_rt
->generation_id
!= route_generation
)
3220 scope
= get_primary_ifscope();
3223 ifa
= (struct ifaddr
*)ifa_foraddr_scoped(src
.s_addr
, scope
);
3225 if (ifa
== NULL
&& ip
->ip_p
!= IPPROTO_UDP
&&
3226 ip
->ip_p
!= IPPROTO_TCP
&& ipforwarding
) {
3228 * If forwarding is enabled, and if the packet isn't
3229 * TCP or UDP, check if the source address belongs
3230 * to one of our own interfaces; if so, demote the
3231 * interface scope and do a route lookup right below.
3233 ifa
= (struct ifaddr
*)ifa_foraddr(src
.s_addr
);
3237 ifscope
= IFSCOPE_NONE
;
3241 if (ip_select_srcif_debug
&& ifa
!= NULL
) {
3242 if (ro
->ro_rt
!= NULL
) {
3243 printf("%s->%s ifscope %d->%d ifa_if %s%d "
3244 "ro_if %s%d\n", s_src
, s_dst
, ifscope
,
3245 scope
, ifa
->ifa_ifp
->if_name
,
3246 ifa
->ifa_ifp
->if_unit
, rt_ifp
->if_name
,
3249 printf("%s->%s ifscope %d->%d ifa_if %s%d\n",
3250 s_src
, s_dst
, ifscope
, scope
,
3251 ifa
->ifa_ifp
->if_name
,
3252 ifa
->ifa_ifp
->if_unit
);
3258 * Slow path; search for an interface having the corresponding source
3259 * IP address if the scope was not specified by the caller, and:
3261 * 1) There currently isn't any route, or,
3262 * 2) The interface used by the route does not own that source
3263 * IP address; in this case, the route will get blown away
3264 * and we'll do a more specific scoped search using the newly
3267 if (ifa
== NULL
&& ifscope
== IFSCOPE_NONE
) {
3268 ifa
= (struct ifaddr
*)ifa_foraddr(src
.s_addr
);
3271 * If we have the IP address, but not the route, we don't
3272 * really know whether or not it belongs to the correct
3273 * interface (it could be shared across multiple interfaces.)
3274 * The only way to find out is to do a route lookup.
3276 if (ifa
!= NULL
&& ro
->ro_rt
== NULL
) {
3278 struct sockaddr_in sin
;
3279 struct ifaddr
*oifa
= NULL
;
3281 bzero(&sin
, sizeof (sin
));
3282 sin
.sin_family
= AF_INET
;
3283 sin
.sin_len
= sizeof (sin
);
3286 lck_mtx_lock(rnh_lock
);
3287 if ((rt
= rt_lookup(TRUE
, (struct sockaddr
*)&sin
, NULL
,
3288 rt_tables
[AF_INET
], IFSCOPE_NONE
)) != NULL
) {
3291 * If the route uses a different interface,
3292 * use that one instead. The IP address of
3293 * the ifaddr that we pick up here is not
3296 if (ifa
->ifa_ifp
!= rt
->rt_ifp
) {
3306 lck_mtx_unlock(rnh_lock
);
3309 struct ifaddr
*iifa
;
3312 * See if the interface pointed to by the
3313 * route is configured with the source IP
3314 * address of the packet.
3316 iifa
= (struct ifaddr
*)ifa_foraddr_scoped(
3317 src
.s_addr
, ifa
->ifa_ifp
->if_index
);
3321 * Found it; drop the original one
3322 * as well as the route interface
3323 * address, and use this instead.
3328 } else if (!ipforwarding
||
3329 (rt
->rt_flags
& RTF_GATEWAY
)) {
3331 * This interface doesn't have that
3332 * source IP address; drop the route
3333 * interface address and just use the
3334 * original one, and let the caller
3335 * do a scoped route lookup.
3341 * Forwarding is enabled and the source
3342 * address belongs to one of our own
3343 * interfaces which isn't the outgoing
3344 * interface, and we have a route, and
3345 * the destination is on a network that
3346 * is directly attached (onlink); drop
3347 * the original one and use the route
3348 * interface address instead.
3353 } else if (ifa
!= NULL
&& ro
->ro_rt
!= NULL
&&
3354 !(ro
->ro_rt
->rt_flags
& RTF_GATEWAY
) &&
3355 ifa
->ifa_ifp
!= ro
->ro_rt
->rt_ifp
&& ipforwarding
) {
3357 * Forwarding is enabled and the source address belongs
3358 * to one of our own interfaces which isn't the same
3359 * as the interface used by the known route; drop the
3360 * original one and use the route interface address.
3363 ifa
= ro
->ro_rt
->rt_ifa
;
3367 if (ip_select_srcif_debug
&& ifa
!= NULL
) {
3368 printf("%s->%s ifscope %d ifa_if %s%d\n",
3369 s_src
, s_dst
, ifscope
, ifa
->ifa_ifp
->if_name
,
3370 ifa
->ifa_ifp
->if_unit
);
3374 if (ro
->ro_rt
!= NULL
)
3375 RT_LOCK_ASSERT_HELD(ro
->ro_rt
);
3377 * If there is a non-loopback route with the wrong interface, or if
3378 * there is no interface configured with such an address, blow it
3379 * away. Except for local/loopback, we look for one with a matching
3380 * interface scope/index.
3382 if (ro
->ro_rt
!= NULL
&&
3383 (ifa
== NULL
|| (ifa
->ifa_ifp
!= rt_ifp
&& rt_ifp
!= lo_ifp
) ||
3384 !(ro
->ro_rt
->rt_flags
& RTF_UP
))) {
3385 if (ip_select_srcif_debug
) {
3387 printf("%s->%s ifscope %d ro_if %s%d != "
3388 "ifa_if %s%d (cached route cleared)\n",
3389 s_src
, s_dst
, ifscope
, rt_ifp
->if_name
,
3390 rt_ifp
->if_unit
, ifa
->ifa_ifp
->if_name
,
3391 ifa
->ifa_ifp
->if_unit
);
3393 printf("%s->%s ifscope %d ro_if %s%d "
3394 "(no ifa_if found)\n",
3395 s_src
, s_dst
, ifscope
, rt_ifp
->if_name
,
3400 RT_UNLOCK(ro
->ro_rt
);
3403 ro
->ro_flags
&= ~ROF_SRCIF_SELECTED
;
3406 * If the destination is IPv4 LLA and the route's interface
3407 * doesn't match the source interface, then the source IP
3408 * address is wrong; it most likely belongs to the primary
3409 * interface associated with the IPv4 LL subnet. Drop the
3410 * packet rather than letting it go out and return an error
3411 * to the ULP. This actually applies not only to IPv4 LL
3412 * but other shared subnets; for now we explicitly test only
3413 * for the former case and save the latter for future.
3415 if (IN_LINKLOCAL(ntohl(dst
.s_addr
)) &&
3416 !IN_LINKLOCAL(ntohl(src
.s_addr
)) && ifa
!= NULL
) {
3422 if (ip_select_srcif_debug
&& ifa
== NULL
) {
3423 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
3424 s_src
, s_dst
, ifscope
);
3428 * If there is a route, mark it accordingly. If there isn't one,
3429 * we'll get here again during the next transmit (possibly with a
3430 * route) and the flag will get set at that point. For IPv4 LLA
3431 * destination, mark it only if the route has been fully resolved;
3432 * otherwise we want to come back here again when the route points
3433 * to the interface over which the ARP reply arrives on.
3435 if (ro
->ro_rt
!= NULL
&& (!IN_LINKLOCAL(ntohl(dst
.s_addr
)) ||
3436 (ro
->ro_rt
->rt_gateway
->sa_family
== AF_LINK
&&
3437 SDL(ro
->ro_rt
->rt_gateway
)->sdl_alen
!= 0))) {
3438 ro
->ro_flags
|= ROF_SRCIF_SELECTED
;
3439 ro
->ro_rt
->generation_id
= route_generation
;
3442 if (ro
->ro_rt
!= NULL
)
3443 RT_UNLOCK(ro
->ro_rt
);
3449 * Handler for setting IP_FORCE_OUT_IFP or IP_BOUND_IF socket option.
3452 ip_bindif(struct inpcb
*inp
, unsigned int ifscope
)
3455 * A zero interface scope value indicates an "unbind".
3456 * Otherwise, take in whatever value the app desires;
3457 * the app may already know the scope (or force itself
3458 * to such a scope) ahead of time before the interface
3459 * gets attached. It doesn't matter either way; any
3460 * route lookup from this point on will require an
3461 * exact match for the embedded interface scope.
3463 inp
->inp_boundif
= ifscope
;
3464 if (inp
->inp_boundif
== IFSCOPE_NONE
)
3465 inp
->inp_flags
&= ~INP_BOUND_IF
;
3467 inp
->inp_flags
|= INP_BOUND_IF
;
3469 /* Blow away any cached route in the PCB */
3470 if (inp
->inp_route
.ro_rt
!= NULL
) {
3471 rtfree(inp
->inp_route
.ro_rt
);
3472 inp
->inp_route
.ro_rt
= NULL
;