2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <kern/locks.h>
81 #include <sys/sysctl.h>
82 #include <sys/mcache.h>
84 #include <machine/endian.h>
85 #include <pexpert/pexpert.h>
88 #include <net/if_dl.h>
89 #include <net/if_types.h>
90 #include <net/route.h>
91 #include <net/ntstat.h>
92 #include <net/net_osdep.h>
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #include <netinet/in_pcb.h>
98 #include <netinet/in_var.h>
99 #include <netinet/ip_var.h>
101 #include <netinet/kpi_ipfilter_var.h>
104 #include <security/mac_framework.h>
109 #include <net/dlil.h>
110 #include <sys/kdebug.h>
111 #include <libkern/OSAtomic.h>
113 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
114 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
115 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
116 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
118 #define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
121 #include <netinet6/ipsec.h>
122 #include <netkey/key.h>
124 #include <netkey/key_debug.h>
126 #define KEYDEBUG(lev,arg)
130 #include <netinet/ip_fw.h>
131 #include <netinet/ip_divert.h>
132 #include <mach/sdt.h>
135 #include <netinet/ip_dummynet.h>
139 #include <net/pfvar.h>
142 #if IPFIREWALL_FORWARD_DEBUG
143 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
144 (ntohl(a.s_addr)>>16)&0xFF,\
145 (ntohl(a.s_addr)>>8)&0xFF,\
146 (ntohl(a.s_addr))&0xFF);
152 static struct mbuf
*ip_insertoptions(struct mbuf
*, struct mbuf
*, int *);
153 static void ip_mloopback(struct ifnet
*, struct mbuf
*,
154 struct sockaddr_in
*, int);
155 static int ip_pcbopts(int, struct mbuf
**, struct mbuf
*);
156 static void imo_trace(struct ip_moptions
*, int);
158 static void ip_out_cksum_stats(int, u_int32_t
);
159 static struct ifaddr
*in_selectsrcif(struct ip
*, struct route
*, unsigned int);
161 int ip_optcopy(struct ip
*, struct ip
*);
162 void in_delayed_cksum_offset(struct mbuf
*, int );
163 void in_cksum_offset(struct mbuf
* , size_t );
165 extern int (*fr_checkp
)(struct ip
*, int, struct ifnet
*, int, struct mbuf
**);
167 extern struct protosw inetsw
[];
169 extern struct ip_linklocal_stat ip_linklocal_stat
;
170 extern lck_mtx_t
*ip_mutex
;
172 /* temporary: for testing */
174 extern int ipsec_bypass
;
177 static int ip_maxchainsent
= 0;
178 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, maxchainsent
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
179 &ip_maxchainsent
, 0, "use dlil_output_list");
181 static int forge_ce
= 0;
182 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, forge_ce
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
183 &forge_ce
, 0, "Forge ECN CE");
186 static int ip_select_srcif_debug
= 0;
187 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, select_srcif_debug
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
188 &ip_select_srcif_debug
, 0, "log source interface selection debug info");
190 #define IMO_TRACE_HIST_SIZE 32 /* size of trace history */
193 __private_extern__
unsigned int imo_trace_hist_size
= IMO_TRACE_HIST_SIZE
;
195 struct ip_moptions_dbg
{
196 struct ip_moptions imo
; /* ip_moptions */
197 u_int16_t imo_refhold_cnt
; /* # of IMO_ADDREF */
198 u_int16_t imo_refrele_cnt
; /* # of IMO_REMREF */
200 * Alloc and free callers.
205 * Circular lists of IMO_ADDREF and IMO_REMREF callers.
207 ctrace_t imo_refhold
[IMO_TRACE_HIST_SIZE
];
208 ctrace_t imo_refrele
[IMO_TRACE_HIST_SIZE
];
212 static unsigned int imo_debug
= 1; /* debugging (enabled) */
214 static unsigned int imo_debug
; /* debugging (disabled) */
216 static unsigned int imo_size
; /* size of zone element */
217 static struct zone
*imo_zone
; /* zone for ip_moptions */
219 #define IMO_ZONE_MAX 64 /* maximum elements in zone */
220 #define IMO_ZONE_NAME "ip_moptions" /* zone name */
223 * IP output. The packet in mbuf chain m contains a skeletal IP
224 * header (with len, off, ttl, proto, tos, src, dst).
225 * The mbuf chain containing the packet will be freed.
226 * The mbuf opt, if present, will not be freed.
234 struct ip_moptions
*imo
,
235 struct ip_out_args
*ipoa
)
238 error
= ip_output_list(m0
, 0, opt
, ro
, flags
, imo
, ipoa
);
251 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
252 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
253 * key_spdacquire:??? [IPSEC]
254 * ipsec4_output:??? [IPSEC]
255 * <fr_checkp>:??? [firewall]
256 * ip_dn_io_ptr:??? [dummynet]
257 * dlil_output:??? [DLIL]
258 * dlil_output_list:??? [DLIL]
260 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
261 * only used as the error return from this function where one of
262 * these functions fails to return a policy.
271 struct ip_moptions
*imo
,
272 struct ip_out_args
*ipoa
276 struct ifnet
*ifp
= NULL
;
277 struct mbuf
*m
= m0
, **mppn
= NULL
;
278 int hlen
= sizeof (struct ip
);
279 int len
= 0, error
= 0;
280 struct sockaddr_in
*dst
= NULL
;
281 struct in_ifaddr
*ia
= NULL
, *src_ia
= NULL
;
282 int isbroadcast
, sw_csum
;
283 struct in_addr pkt_dst
;
284 struct ipf_pktopts
*ippo
= NULL
, ipf_pktopts
;
286 struct ipsec_output_state ipsec_state
;
287 struct route
*ipsec_saved_route
= NULL
;
288 struct socket
*so
= NULL
;
289 struct secpolicy
*sp
= NULL
;
291 #if IPFIREWALL_FORWARD
292 int fwd_rewrite_src
= 0;
296 struct ip_fw_args args
;
298 struct sockaddr_in
*next_hop_from_ipfwd_tag
= NULL
;
301 ipfilter_t inject_filter_ref
= 0;
303 struct route saved_route
;
304 struct ip_out_args saved_ipoa
;
305 struct sockaddr_in dst_buf
;
306 #endif /* DUMMYNET */
307 struct mbuf
* packetlist
;
308 int pktcnt
= 0, tso
= 0;
309 u_int32_t bytecnt
= 0;
310 unsigned int ifscope
;
312 boolean_t select_srcif
;
313 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT
| DBG_FUNC_START
, 0,0,0,0,0);
316 bzero(&ipsec_state
, sizeof(ipsec_state
));
321 args
.next_hop
= NULL
;
324 args
.divert_rule
= 0; /* divert cookie */
327 if (SLIST_EMPTY(&m0
->m_pkthdr
.tags
))
330 /* Grab info from mtags prepended to the chain */
332 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
333 KERNEL_TAG_TYPE_DUMMYNET
, NULL
)) != NULL
) {
334 struct dn_pkt_tag
*dn_tag
;
336 dn_tag
= (struct dn_pkt_tag
*)(tag
+1);
337 args
.rule
= dn_tag
->rule
;
339 saved_route
= dn_tag
->ro
;
343 bcopy(&dn_tag
->dn_dst
, &dst_buf
, sizeof(dst_buf
));
346 flags
= dn_tag
->flags
;
347 saved_ipoa
= dn_tag
->ipoa
;
350 m_tag_delete(m0
, tag
);
352 #endif /* DUMMYNET */
355 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
356 KERNEL_TAG_TYPE_DIVERT
, NULL
)) != NULL
) {
357 struct divert_tag
*div_tag
;
359 div_tag
= (struct divert_tag
*)(tag
+1);
360 args
.divert_rule
= div_tag
->cookie
;
362 m_tag_delete(m0
, tag
);
364 #endif /* IPDIVERT */
366 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
367 KERNEL_TAG_TYPE_IPFORWARD
, NULL
)) != NULL
) {
368 struct ip_fwd_tag
*ipfwd_tag
;
370 ipfwd_tag
= (struct ip_fwd_tag
*)(tag
+1);
371 next_hop_from_ipfwd_tag
= ipfwd_tag
->next_hop
;
373 m_tag_delete(m0
, tag
);
376 #endif /* IPFIREWALL */
381 if ( !m
|| (m
->m_flags
& M_PKTHDR
) != 0)
382 panic("ip_output no HDR");
384 panic("ip_output no route, proto = %d",
385 mtod(m
, struct ip
*)->ip_p
);
388 bzero(&ipf_pktopts
, sizeof(struct ipf_pktopts
));
392 * At present the IP_OUTARGS flag implies a request for IP to
393 * perform source interface selection. In the forwarding case,
394 * only the ifscope value is used, as source interface selection
395 * doesn't take place.
397 if (ip_doscopedroute
&& (flags
& IP_OUTARGS
)) {
398 select_srcif
= !(flags
& IP_FORWARDING
);
399 ifscope
= ipoa
->ipoa_boundif
;
400 ipf_pktopts
.ippo_flags
= IPPOF_BOUND_IF
;
401 ipf_pktopts
.ippo_flags
|= (ifscope
<< IPPOF_SHIFT_IFSCOPE
);
403 select_srcif
= FALSE
;
404 ifscope
= IFSCOPE_NONE
;
407 if (flags
& IP_OUTARGS
) {
408 nocell
= ipoa
->ipoa_nocell
;
410 ipf_pktopts
.ippo_flags
|= IPPOF_NO_IFT_CELLULAR
;
416 if (args
.rule
!= NULL
) { /* dummynet already saw us */
417 ip
= mtod(m
, struct ip
*);
418 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2 ;
419 if (ro
->ro_rt
!= NULL
) {
420 RT_LOCK_SPIN(ro
->ro_rt
);
421 ia
= (struct in_ifaddr
*)ro
->ro_rt
->rt_ifa
;
423 /* Become a regular mutex */
424 RT_CONVERT_LOCK(ro
->ro_rt
);
425 IFA_ADDREF(&ia
->ia_ifa
);
427 RT_UNLOCK(ro
->ro_rt
);
430 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
431 so
= ipsec_getsocket(m
);
432 (void)ipsec_setsocket(m
, NULL
);
437 #endif /* IPFIREWALL */
440 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
441 so
= ipsec_getsocket(m
);
442 (void)ipsec_setsocket(m
, NULL
);
447 * No need to proccess packet twice if we've
450 if (!SLIST_EMPTY(&m
->m_pkthdr
.tags
))
451 inject_filter_ref
= ipf_get_inject_filter(m
);
453 inject_filter_ref
= 0;
456 m
= ip_insertoptions(m
, opt
, &len
);
459 ip
= mtod(m
, struct ip
*);
464 * When dealing with a packet chain, we need to reset "next_hop" because
465 * "dst" may have been changed to the gateway address below for the previous
466 * packet of the chain. This could cause the route to be inavertandly changed
467 * to the route to the gateway address (instead of the route to the destination).
469 args
.next_hop
= next_hop_from_ipfwd_tag
;
470 pkt_dst
= args
.next_hop
? args
.next_hop
->sin_addr
: ip
->ip_dst
;
472 pkt_dst
= ip
->ip_dst
;
476 * We must not send if the packet is destined to network zero.
477 * RFC1122 3.2.1.3 (a) and (b).
479 if (IN_ZERONET(ntohl(pkt_dst
.s_addr
))) {
480 error
= EHOSTUNREACH
;
487 if ((flags
& (IP_FORWARDING
|IP_RAWOUTPUT
)) == 0) {
488 ip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, hlen
>> 2);
491 ip
->ip_id
= ip_randomid();
493 ip
->ip_id
= htons(ip_id
++);
495 OSAddAtomic(1, &ipstat
.ips_localout
);
497 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
501 /* For debugging, we let the stack forge congestion */
503 ((ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_ECT1
||
504 (ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_ECT0
)) {
505 ip
->ip_tos
= (ip
->ip_tos
& ~IPTOS_ECN_MASK
) | IPTOS_ECN_CE
;
510 KERNEL_DEBUG(DBG_LAYER_BEG
, ip
->ip_dst
.s_addr
,
511 ip
->ip_src
.s_addr
, ip
->ip_p
, ip
->ip_off
, ip
->ip_len
);
513 dst
= (struct sockaddr_in
*)&ro
->ro_dst
;
516 * If there is a cached route,
517 * check that it is to the same destination
518 * and is still up. If not, free it and try again.
519 * The address family should also be checked in case of sharing the
523 if (ro
->ro_rt
!= NULL
) {
524 if (ro
->ro_rt
->generation_id
!= route_generation
&&
525 ((flags
& (IP_ROUTETOIF
| IP_FORWARDING
)) == 0) &&
526 (ip
->ip_src
.s_addr
!= INADDR_ANY
)) {
527 src_ia
= ifa_foraddr(ip
->ip_src
.s_addr
);
528 if (src_ia
== NULL
) {
529 error
= EADDRNOTAVAIL
;
532 IFA_REMREF(&src_ia
->ia_ifa
);
535 * Test rt_flags without holding rt_lock for performance
536 * reasons; if the route is down it will hopefully be
537 * caught by the layer below (since it uses this route
538 * as a hint) or during the next transmit.
540 if ((ro
->ro_rt
->rt_flags
& RTF_UP
) == 0 ||
541 dst
->sin_family
!= AF_INET
||
542 dst
->sin_addr
.s_addr
!= pkt_dst
.s_addr
) {
547 * If we're doing source interface selection, we may not
548 * want to use this route; only synch up the generation
551 if (!select_srcif
&& ro
->ro_rt
!= NULL
&&
552 ro
->ro_rt
->generation_id
!= route_generation
)
553 ro
->ro_rt
->generation_id
= route_generation
;
555 if (ro
->ro_rt
== NULL
) {
556 bzero(dst
, sizeof(*dst
));
557 dst
->sin_family
= AF_INET
;
558 dst
->sin_len
= sizeof(*dst
);
559 dst
->sin_addr
= pkt_dst
;
562 * If routing to interface only,
563 * short circuit routing lookup.
565 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
566 #define sintosa(sin) ((struct sockaddr *)(sin))
567 if (flags
& IP_ROUTETOIF
) {
569 IFA_REMREF(&ia
->ia_ifa
);
570 if ((ia
= ifatoia(ifa_ifwithdstaddr(sintosa(dst
)))) == 0) {
571 if ((ia
= ifatoia(ifa_ifwithnet(sintosa(dst
)))) == 0) {
572 OSAddAtomic(1, &ipstat
.ips_noroute
);
579 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
580 } else if (IN_MULTICAST(ntohl(pkt_dst
.s_addr
)) &&
581 imo
!= NULL
&& (ifp
= imo
->imo_multicast_ifp
) != NULL
) {
583 * Bypass the normal routing lookup for multicast
584 * packets if the interface is specified.
588 IFA_REMREF(&ia
->ia_ifa
);
590 /* Macro takes reference on ia */
593 boolean_t cloneok
= FALSE
;
595 * Perform source interface selection; the source IP address
596 * must belong to one of the addresses of the interface used
597 * by the route. For performance reasons, do this only if
598 * there is no route, or if the routing table has changed,
599 * or if we haven't done source interface selection on this
600 * route (for this PCB instance) before.
602 if (select_srcif
&& ip
->ip_src
.s_addr
!= INADDR_ANY
&&
603 (ro
->ro_rt
== NULL
|| !(ro
->ro_rt
->rt_flags
& RTF_UP
) ||
604 ro
->ro_rt
->generation_id
!= route_generation
||
605 !(ro
->ro_flags
& ROF_SRCIF_SELECTED
))) {
608 /* Find the source interface */
609 ifa
= in_selectsrcif(ip
, ro
, ifscope
);
612 * If the source address belongs to a cellular interface
613 * and the caller forbids our using interfaces of such
614 * type, pretend that there is no source address.
616 if (nocell
&& ifa
!= NULL
&&
617 ifa
->ifa_ifp
->if_type
== IFT_CELLULAR
) {
619 error
= EADDRNOTAVAIL
;
624 * If the source address is spoofed (in the case
625 * of IP_RAWOUTPUT), or if this is destined for
626 * local/loopback, just let it go out using the
627 * interface of the route. Otherwise, there's no
628 * interface having such an address, so bail out.
630 if (ifa
== NULL
&& !(flags
& IP_RAWOUTPUT
) &&
631 ifscope
!= lo_ifp
->if_index
) {
632 error
= EADDRNOTAVAIL
;
637 * If the caller didn't explicitly specify the scope,
638 * pick it up from the source interface. If the cached
639 * route was wrong and was blown away as part of source
640 * interface selection, don't mask out RTF_PRCLONING
641 * since that route may have been allocated by the ULP,
642 * unless the IP header was created by the caller or
643 * the destination is IPv4 LLA. The check for the
644 * latter is needed because IPv4 LLAs are never scoped
645 * in the current implementation, and we don't want to
646 * replace the resolved IPv4 LLA route with one whose
647 * gateway points to that of the default gateway on
648 * the primary interface of the system.
651 if (ifscope
== IFSCOPE_NONE
)
652 ifscope
= ifa
->ifa_ifp
->if_index
;
654 cloneok
= (!(flags
& IP_RAWOUTPUT
) &&
655 !(IN_LINKLOCAL(ntohl(ip
->ip_dst
.s_addr
))));
660 * If this is the case, we probably don't want to allocate
661 * a protocol-cloned route since we didn't get one from the
662 * ULP. This lets TCP do its thing, while not burdening
663 * forwarding or ICMP with the overhead of cloning a route.
664 * Of course, we still want to do any cloning requested by
665 * the link layer, as this is probably required in all cases
666 * for correct operation (as it is for ARP).
668 if (ro
->ro_rt
== NULL
) {
669 unsigned long ign
= RTF_PRCLONING
;
671 * We make an exception here: if the destination
672 * address is INADDR_BROADCAST, allocate a protocol-
673 * cloned host route so that we end up with a route
674 * marked with the RTF_BROADCAST flag. Otherwise,
675 * we would end up referring to the default route,
676 * instead of creating a cloned host route entry.
677 * That would introduce inconsistencies between ULPs
678 * that allocate a route and those that don't. The
679 * RTF_BROADCAST route is important since we'd want
680 * to send out undirected IP broadcast packets using
681 * link-level broadcast address. Another exception
682 * is for ULP-created routes that got blown away by
683 * source interface selection (see above).
685 * These exceptions will no longer be necessary when
686 * the RTF_PRCLONING scheme is no longer present.
688 if (cloneok
|| dst
->sin_addr
.s_addr
== INADDR_BROADCAST
)
689 ign
&= ~RTF_PRCLONING
;
692 * Loosen the route lookup criteria if the ifscope
693 * corresponds to the loopback interface; this is
694 * needed to support Application Layer Gateways
695 * listening on loopback, in conjunction with packet
696 * filter redirection rules. The final source IP
697 * address will be rewritten by the packet filter
698 * prior to the RFC1122 loopback check below.
700 if (ifscope
== lo_ifp
->if_index
)
701 rtalloc_ign(ro
, ign
);
703 rtalloc_scoped_ign(ro
, ign
, ifscope
);
706 * If the route points to a cellular interface and the
707 * caller forbids our using interfaces of such type,
708 * pretend that there is no route.
710 if (nocell
&& ro
->ro_rt
!= NULL
) {
711 RT_LOCK_SPIN(ro
->ro_rt
);
712 if (ro
->ro_rt
->rt_ifp
->if_type
==
714 RT_UNLOCK(ro
->ro_rt
);
718 RT_UNLOCK(ro
->ro_rt
);
723 if (ro
->ro_rt
== NULL
) {
724 OSAddAtomic(1, &ipstat
.ips_noroute
);
725 error
= EHOSTUNREACH
;
730 IFA_REMREF(&ia
->ia_ifa
);
731 RT_LOCK_SPIN(ro
->ro_rt
);
732 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
734 /* Become a regular mutex */
735 RT_CONVERT_LOCK(ro
->ro_rt
);
736 IFA_ADDREF(&ia
->ia_ifa
);
738 ifp
= ro
->ro_rt
->rt_ifp
;
740 if (ro
->ro_rt
->rt_flags
& RTF_GATEWAY
)
741 dst
= (struct sockaddr_in
*)ro
->ro_rt
->rt_gateway
;
742 if (ro
->ro_rt
->rt_flags
& RTF_HOST
) {
743 isbroadcast
= (ro
->ro_rt
->rt_flags
& RTF_BROADCAST
);
745 /* Become a regular mutex */
746 RT_CONVERT_LOCK(ro
->ro_rt
);
747 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
749 RT_UNLOCK(ro
->ro_rt
);
752 if (IN_MULTICAST(ntohl(pkt_dst
.s_addr
))) {
753 struct in_multi
*inm
;
755 u_int8_t ttl
= IP_DEFAULT_MULTICAST_TTL
;
756 u_int8_t loop
= IP_DEFAULT_MULTICAST_LOOP
;
758 m
->m_flags
|= M_MCAST
;
760 * IP destination address is multicast. Make sure "dst"
761 * still points to the address in "ro". (It may have been
762 * changed to point to a gateway address, above.)
764 dst
= (struct sockaddr_in
*)&ro
->ro_dst
;
766 * See if the caller provided any multicast options
770 vif
= imo
->imo_multicast_vif
;
771 ttl
= imo
->imo_multicast_ttl
;
772 loop
= imo
->imo_multicast_loop
;
773 if ((flags
& IP_RAWOUTPUT
) == 0)
775 if (imo
->imo_multicast_ifp
!= NULL
)
776 ifp
= imo
->imo_multicast_ifp
;
779 if (vif
!= -1 && ((flags
& IP_RAWOUTPUT
) == 0 ||
780 ip
->ip_src
.s_addr
== INADDR_ANY
))
781 ip
->ip_src
.s_addr
= ip_mcast_src(vif
);
782 #endif /* MROUTING */
783 } else if ((flags
& IP_RAWOUTPUT
) == 0) {
788 * Confirm that the outgoing interface supports multicast.
790 if (imo
== NULL
|| vif
== -1) {
791 if ((ifp
->if_flags
& IFF_MULTICAST
) == 0) {
792 OSAddAtomic(1, &ipstat
.ips_noroute
);
798 * If source address not specified yet, use address
799 * of outgoing interface.
801 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
802 struct in_ifaddr
*ia1
;
803 lck_rw_lock_shared(in_ifaddr_rwlock
);
804 TAILQ_FOREACH(ia1
, &in_ifaddrhead
, ia_link
) {
805 IFA_LOCK_SPIN(&ia1
->ia_ifa
);
806 if (ia1
->ia_ifp
== ifp
) {
807 ip
->ip_src
= IA_SIN(ia1
)->sin_addr
;
808 IFA_UNLOCK(&ia1
->ia_ifa
);
811 IFA_UNLOCK(&ia1
->ia_ifa
);
813 lck_rw_done(in_ifaddr_rwlock
);
814 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
820 in_multihead_lock_shared();
821 IN_LOOKUP_MULTI(&pkt_dst
, ifp
, inm
);
822 in_multihead_lock_done();
823 if (inm
!= NULL
&& (imo
== NULL
|| loop
)) {
825 * If we belong to the destination multicast group
826 * on the outgoing interface, and the caller did not
827 * forbid loopback, loop back a copy.
829 if (!TAILQ_EMPTY(&ipv4_filters
)) {
830 struct ipfilter
*filter
;
831 int seen
= (inject_filter_ref
== 0);
834 ipf_pktopts
.ippo_flags
|= IPPOF_MCAST_OPTS
;
835 ipf_pktopts
.ippo_mcast_ifnet
= ifp
;
836 ipf_pktopts
.ippo_mcast_ttl
= ttl
;
837 ipf_pktopts
.ippo_mcast_loop
= loop
;
842 /* 4135317 - always pass network byte order to filter */
844 #if BYTE_ORDER != BIG_ENDIAN
849 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
851 if ((struct ipfilter
*)inject_filter_ref
== filter
)
853 } else if (filter
->ipf_filter
.ipf_output
) {
855 result
= filter
->ipf_filter
.ipf_output(filter
->ipf_filter
.cookie
, (mbuf_t
*)&m
, ippo
);
856 if (result
== EJUSTRETURN
) {
869 /* set back to host byte order */
870 ip
= mtod(m
, struct ip
*);
872 #if BYTE_ORDER != BIG_ENDIAN
880 ip_mloopback(ifp
, m
, dst
, hlen
);
885 * If we are acting as a multicast router, perform
886 * multicast forwarding as if the packet had just
887 * arrived on the interface to which we are about
888 * to send. The multicast forwarding function
889 * recursively calls this function, using the
890 * IP_FORWARDING flag to prevent infinite recursion.
892 * Multicasts that are looped back by ip_mloopback(),
893 * above, will be forwarded by the ip_input() routine,
896 if (ip_mrouter
&& (flags
& IP_FORWARDING
) == 0) {
898 * Check if rsvp daemon is running. If not, don't
899 * set ip_moptions. This ensures that the packet
900 * is multicast and not just sent down one link
901 * as prescribed by rsvpd.
905 if (ip_mforward(ip
, ifp
, m
, imo
) != 0) {
913 #endif /* MROUTING */
917 * Multicasts with a time-to-live of zero may be looped-
918 * back, above, but must not be transmitted on a network.
919 * Also, multicasts addressed to the loopback interface
920 * are not sent -- the above call to ip_mloopback() will
921 * loop back a copy if this host actually belongs to the
922 * destination group on the loopback interface.
924 if (ip
->ip_ttl
== 0 || ifp
->if_flags
& IFF_LOOPBACK
) {
933 * If source address not specified yet, use address
934 * of outgoing interface.
936 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
937 IFA_LOCK_SPIN(&ia
->ia_ifa
);
938 ip
->ip_src
= IA_SIN(ia
)->sin_addr
;
939 IFA_UNLOCK(&ia
->ia_ifa
);
940 #if IPFIREWALL_FORWARD
941 /* Keep note that we did this - if the firewall changes
942 * the next-hop, our interface may change, changing the
943 * default source IP. It's a shame so much effort happens
947 #endif /* IPFIREWALL_FORWARD */
952 * Look for broadcast address and
953 * and verify user is allowed to send
957 if ((ifp
->if_flags
& IFF_BROADCAST
) == 0) {
958 error
= EADDRNOTAVAIL
;
961 if ((flags
& IP_ALLOWBROADCAST
) == 0) {
965 /* don't allow broadcast messages to be fragmented */
966 if ((u_short
)ip
->ip_len
> ifp
->if_mtu
) {
970 m
->m_flags
|= M_BCAST
;
972 m
->m_flags
&= ~M_BCAST
;
977 /* Invoke outbound packet filter */
978 if ( PF_IS_ENABLED
) {
980 rc
= pf_af_hook(ifp
, mppn
, &m
, AF_INET
, FALSE
);
982 if (packetlist
== m0
) {
988 /* Next packet in the chain */
990 } else if (packetlist
!= NULL
) {
991 /* No more packet; send down the chain */
994 /* Nothing left; we're done */
998 ip
= mtod(m
, struct ip
*);
999 pkt_dst
= ip
->ip_dst
;
1000 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1004 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
1006 if (IN_LINKLOCAL(ntohl(ip
->ip_src
.s_addr
)) || IN_LINKLOCAL(ntohl(ip
->ip_dst
.s_addr
))) {
1007 ip_linklocal_stat
.iplls_out_total
++;
1008 if (ip
->ip_ttl
!= MAXTTL
) {
1009 ip_linklocal_stat
.iplls_out_badttl
++;
1010 ip
->ip_ttl
= MAXTTL
;
1014 if (!didfilter
&& !TAILQ_EMPTY(&ipv4_filters
)) {
1015 struct ipfilter
*filter
;
1016 int seen
= (inject_filter_ref
== 0);
1017 ipf_pktopts
.ippo_flags
&= ~IPPOF_MCAST_OPTS
;
1019 /* Check that a TSO frame isn't passed to a filter.
1020 * This could happen if a filter is inserted while
1021 * TCP is sending the TSO packet.
1023 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) {
1030 /* 4135317 - always pass network byte order to filter */
1032 #if BYTE_ORDER != BIG_ENDIAN
1037 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
1039 if ((struct ipfilter
*)inject_filter_ref
== filter
)
1041 } else if (filter
->ipf_filter
.ipf_output
) {
1043 result
= filter
->ipf_filter
.ipf_output(filter
->ipf_filter
.cookie
, (mbuf_t
*)&m
, ippo
);
1044 if (result
== EJUSTRETURN
) {
1055 /* set back to host byte order */
1056 ip
= mtod(m
, struct ip
*);
1058 #if BYTE_ORDER != BIG_ENDIAN
1067 /* temporary for testing only: bypass ipsec alltogether */
1069 if (ipsec_bypass
!= 0 || (flags
& IP_NOIPSEC
) != 0)
1072 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_START
, 0,0,0,0,0);
1075 /* get SP for this packet */
1077 sp
= ipsec4_getpolicybyaddr(m
, IPSEC_DIR_OUTBOUND
, flags
, &error
);
1079 sp
= ipsec4_getpolicybysock(m
, IPSEC_DIR_OUTBOUND
, so
, &error
);
1082 IPSEC_STAT_INCREMENT(ipsecstat
.out_inval
);
1083 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 0,0,0,0,0);
1090 switch (sp
->policy
) {
1091 case IPSEC_POLICY_DISCARD
:
1092 case IPSEC_POLICY_GENERATE
:
1094 * This packet is just discarded.
1096 IPSEC_STAT_INCREMENT(ipsecstat
.out_polvio
);
1097 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 1,0,0,0,0);
1100 case IPSEC_POLICY_BYPASS
:
1101 case IPSEC_POLICY_NONE
:
1102 /* no need to do IPsec. */
1103 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 2,0,0,0,0);
1106 case IPSEC_POLICY_IPSEC
:
1107 if (sp
->req
== NULL
) {
1108 /* acquire a policy */
1109 error
= key_spdacquire(sp
);
1110 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 3,0,0,0,0);
1115 case IPSEC_POLICY_ENTRUST
:
1117 printf("ip_output: Invalid policy found. %d\n", sp
->policy
);
1121 if (flags
& IP_ROUTETOIF
) {
1122 bzero(&ipsec_state
.ro
, sizeof(ipsec_state
.ro
));
1124 route_copyout(&ipsec_state
.ro
, ro
, sizeof(ipsec_state
.ro
));
1125 ipsec_state
.dst
= (struct sockaddr
*)dst
;
1131 * delayed checksums are not currently compatible with IPsec
1133 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1134 in_delayed_cksum(m
);
1135 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1139 #if BYTE_ORDER != BIG_ENDIAN
1144 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
1145 struct ip
*, ip
, struct ifnet
*, ifp
,
1146 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
1148 error
= ipsec4_output(&ipsec_state
, sp
, flags
);
1150 m0
= m
= ipsec_state
.m
;
1152 if (flags
& IP_ROUTETOIF
) {
1154 * if we have tunnel mode SA, we may need to ignore
1157 if (ipsec_state
.tunneled
) {
1158 flags
&= ~IP_ROUTETOIF
;
1159 ipsec_saved_route
= ro
;
1160 ro
= &ipsec_state
.ro
;
1163 ipsec_saved_route
= ro
;
1164 ro
= &ipsec_state
.ro
;
1166 dst
= (struct sockaddr_in
*)ipsec_state
.dst
;
1168 /* mbuf is already reclaimed in ipsec4_output. */
1178 printf("ip4_output (ipsec): error code %d\n", error
);
1181 /* don't show these error codes to the user */
1185 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 4,0,0,0,0);
1190 /* be sure to update variables that are affected by ipsec4_output() */
1191 ip
= mtod(m
, struct ip
*);
1194 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1196 hlen
= ip
->ip_hl
<< 2;
1198 /* Check that there wasn't a route change and src is still valid */
1199 if (ro
->ro_rt
!= NULL
&& ro
->ro_rt
->generation_id
!= route_generation
) {
1200 if ((src_ia
= ifa_foraddr(ip
->ip_src
.s_addr
)) == NULL
&&
1201 ((flags
& (IP_ROUTETOIF
| IP_FORWARDING
)) == 0)) {
1202 error
= EADDRNOTAVAIL
;
1203 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1210 IFA_REMREF(&src_ia
->ia_ifa
);
1213 if (ro
->ro_rt
== NULL
) {
1214 if ((flags
& IP_ROUTETOIF
) == 0) {
1215 printf("ip_output: can't update route after "
1216 "IPsec processing\n");
1217 error
= EHOSTUNREACH
; /*XXX*/
1218 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1224 IFA_REMREF(&ia
->ia_ifa
);
1225 RT_LOCK_SPIN(ro
->ro_rt
);
1226 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
1228 /* Become a regular mutex */
1229 RT_CONVERT_LOCK(ro
->ro_rt
);
1230 IFA_ADDREF(&ia
->ia_ifa
);
1232 ifp
= ro
->ro_rt
->rt_ifp
;
1233 RT_UNLOCK(ro
->ro_rt
);
1236 /* make it flipped, again. */
1238 #if BYTE_ORDER != BIG_ENDIAN
1243 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 7,0xff,0xff,0xff,0xff);
1245 /* Pass to filters again */
1246 if (!TAILQ_EMPTY(&ipv4_filters
)) {
1247 struct ipfilter
*filter
;
1249 ipf_pktopts
.ippo_flags
&= ~IPPOF_MCAST_OPTS
;
1251 /* Check that a TSO frame isn't passed to a filter.
1252 * This could happen if a filter is inserted while
1253 * TCP is sending the TSO packet.
1255 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) {
1262 /* 4135317 - always pass network byte order to filter */
1264 #if BYTE_ORDER != BIG_ENDIAN
1269 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
1270 if (filter
->ipf_filter
.ipf_output
) {
1272 result
= filter
->ipf_filter
.ipf_output(filter
->ipf_filter
.cookie
, (mbuf_t
*)&m
, ippo
);
1273 if (result
== EJUSTRETURN
) {
1284 /* set back to host byte order */
1285 ip
= mtod(m
, struct ip
*);
1287 #if BYTE_ORDER != BIG_ENDIAN
1300 * - Xlate: translate packet's addr/port (NAT).
1301 * - Firewall: deny/allow/etc.
1302 * - Wrap: fake packet's addr/port <unimpl.>
1303 * - Encapsulate: put it in another IP and send out. <unimp.>
1306 struct mbuf
*m1
= m
;
1308 if ((error
= (*fr_checkp
)(ip
, hlen
, ifp
, 1, &m1
)) || !m1
) {
1311 ip
= mtod(m0
= m
= m1
, struct ip
*);
1315 * Check with the firewall...
1316 * but not if we are already being fwd'd from a firewall.
1318 if (fw_enable
&& IPFW_LOADED
&& !args
.next_hop
) {
1319 struct sockaddr_in
*old
= dst
;
1322 args
.next_hop
= dst
;
1324 off
= ip_fw_chk_ptr(&args
);
1326 dst
= args
.next_hop
;
1329 * On return we must do the following:
1330 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1331 * 1<=off<= 0xffff -> DIVERT
1332 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1333 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1334 * dst != old -> IPFIREWALL_FORWARD
1335 * off==0, dst==old -> accept
1336 * If some of the above modules is not compiled in, then
1337 * we should't have to check the corresponding condition
1338 * (because the ipfw control socket should not accept
1339 * unsupported rules), but better play safe and drop
1340 * packets in case of doubt.
1343 if ( (off
& IP_FW_PORT_DENY_FLAG
) || m
== NULL
) {
1349 ip
= mtod(m
, struct ip
*);
1351 if (off
== 0 && dst
== old
) {/* common case */
1355 if (DUMMYNET_LOADED
&& (off
& IP_FW_PORT_DYNT_FLAG
) != 0) {
1357 * pass the pkt to dummynet. Need to include
1358 * pipe number, m, ifp, ro, dst because these are
1359 * not recomputed in the next pass.
1360 * All other parameters have been already used and
1361 * so they are not needed anymore.
1362 * XXX note: if the ifp or ro entry are deleted
1363 * while a pkt is in dummynet, we are in trouble!
1368 if (flags
& IP_OUTARGS
)
1371 error
= ip_dn_io_ptr(m
, off
& 0xffff, DN_TO_IP_OUT
,
1375 #endif /* DUMMYNET */
1377 if (off
!= 0 && (off
& IP_FW_PORT_DYNT_FLAG
) == 0) {
1378 struct mbuf
*clone
= NULL
;
1380 /* Clone packet if we're doing a 'tee' */
1381 if ((off
& IP_FW_PORT_TEE_FLAG
) != 0)
1382 clone
= m_dup(m
, M_DONTWAIT
);
1385 * delayed checksums are not currently compatible
1386 * with divert sockets.
1388 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1389 in_delayed_cksum(m
);
1390 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1393 /* Restore packet header fields to original values */
1395 #if BYTE_ORDER != BIG_ENDIAN
1400 /* Deliver packet to divert input routine */
1401 divert_packet(m
, 0, off
& 0xffff, args
.divert_rule
);
1403 /* If 'tee', continue with original packet */
1404 if (clone
!= NULL
) {
1406 ip
= mtod(m
, struct ip
*);
1413 #if IPFIREWALL_FORWARD
1414 /* Here we check dst to make sure it's directly reachable on the
1415 * interface we previously thought it was.
1416 * If it isn't (which may be likely in some situations) we have
1417 * to re-route it (ie, find a route for the next-hop and the
1418 * associated interface) and set them here. This is nested
1419 * forwarding which in most cases is undesirable, except where
1420 * such control is nigh impossible. So we do it here.
1423 if (off
== 0 && old
!= dst
) {
1424 struct in_ifaddr
*ia_fw
;
1426 /* It's changed... */
1427 /* There must be a better way to do this next line... */
1428 static struct route sro_fwd
, *ro_fwd
= &sro_fwd
;
1429 #if IPFIREWALL_FORWARD_DEBUG
1430 printf("IPFIREWALL_FORWARD: New dst ip: ");
1431 print_ip(dst
->sin_addr
);
1435 * We need to figure out if we have been forwarded
1436 * to a local socket. If so then we should somehow
1437 * "loop back" to ip_input, and get directed to the
1438 * PCB as if we had received this packet. This is
1439 * because it may be dificult to identify the packets
1440 * you want to forward until they are being output
1441 * and have selected an interface. (e.g. locally
1442 * initiated packets) If we used the loopback inteface,
1443 * we would not be able to control what happens
1444 * as the packet runs through ip_input() as
1445 * it is done through a ISR.
1447 lck_rw_lock_shared(in_ifaddr_rwlock
);
1448 TAILQ_FOREACH(ia_fw
, &in_ifaddrhead
, ia_link
) {
1450 * If the addr to forward to is one
1451 * of ours, we pretend to
1452 * be the destination for this packet.
1454 IFA_LOCK_SPIN(&ia_fw
->ia_ifa
);
1455 if (IA_SIN(ia_fw
)->sin_addr
.s_addr
==
1456 dst
->sin_addr
.s_addr
) {
1457 IFA_UNLOCK(&ia_fw
->ia_ifa
);
1460 IFA_UNLOCK(&ia_fw
->ia_ifa
);
1462 lck_rw_done(in_ifaddr_rwlock
);
1464 /* tell ip_input "dont filter" */
1465 struct m_tag
*fwd_tag
;
1466 struct ip_fwd_tag
*ipfwd_tag
;
1468 fwd_tag
= m_tag_create(KERNEL_MODULE_TAG_ID
,
1469 KERNEL_TAG_TYPE_IPFORWARD
,
1470 sizeof (*ipfwd_tag
), M_NOWAIT
, m
);
1471 if (fwd_tag
== NULL
) {
1476 ipfwd_tag
= (struct ip_fwd_tag
*)(fwd_tag
+1);
1477 ipfwd_tag
->next_hop
= args
.next_hop
;
1479 m_tag_prepend(m
, fwd_tag
);
1481 if (m
->m_pkthdr
.rcvif
== NULL
)
1482 m
->m_pkthdr
.rcvif
= lo_ifp
;
1483 if ((~IF_HWASSIST_CSUM_FLAGS(m
->m_pkthdr
.rcvif
->if_hwassist
) &
1484 m
->m_pkthdr
.csum_flags
) == 0) {
1485 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1486 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1487 m
->m_pkthdr
.csum_flags
|=
1488 CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
;
1489 m
->m_pkthdr
.csum_data
= 0xffff;
1491 m
->m_pkthdr
.csum_flags
|=
1492 CSUM_IP_CHECKED
| CSUM_IP_VALID
;
1494 else if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1495 in_delayed_cksum(m
);
1496 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1497 ip
->ip_sum
= in_cksum(m
, hlen
);
1500 #if BYTE_ORDER != BIG_ENDIAN
1505 /* we need to call dlil_output to run filters
1506 * and resync to avoid recursion loops.
1509 dlil_output(lo_ifp
, PF_INET
, m
, 0, (struct sockaddr
*)dst
, 0);
1512 printf("ip_output: no loopback ifp for forwarding!!!\n");
1516 /* Some of the logic for this was
1517 * nicked from above.
1519 * This rewrites the cached route in a local PCB.
1520 * Is this what we want to do?
1522 bcopy(dst
, &ro_fwd
->ro_dst
, sizeof(*dst
));
1524 ro_fwd
->ro_rt
= NULL
;
1525 rtalloc_ign(ro_fwd
, RTF_PRCLONING
);
1527 if (ro_fwd
->ro_rt
== NULL
) {
1528 OSAddAtomic(1, &ipstat
.ips_noroute
);
1529 error
= EHOSTUNREACH
;
1533 RT_LOCK_SPIN(ro_fwd
->ro_rt
);
1534 ia_fw
= ifatoia(ro_fwd
->ro_rt
->rt_ifa
);
1535 if (ia_fw
!= NULL
) {
1536 /* Become a regular mutex */
1537 RT_CONVERT_LOCK(ro_fwd
->ro_rt
);
1538 IFA_ADDREF(&ia_fw
->ia_ifa
);
1540 ifp
= ro_fwd
->ro_rt
->rt_ifp
;
1541 ro_fwd
->ro_rt
->rt_use
++;
1542 if (ro_fwd
->ro_rt
->rt_flags
& RTF_GATEWAY
)
1543 dst
= (struct sockaddr_in
*)ro_fwd
->ro_rt
->rt_gateway
;
1544 if (ro_fwd
->ro_rt
->rt_flags
& RTF_HOST
) {
1546 (ro_fwd
->ro_rt
->rt_flags
& RTF_BROADCAST
);
1548 /* Become a regular mutex */
1549 RT_CONVERT_LOCK(ro_fwd
->ro_rt
);
1550 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
1552 RT_UNLOCK(ro_fwd
->ro_rt
);
1554 ro
->ro_rt
= ro_fwd
->ro_rt
;
1555 dst
= (struct sockaddr_in
*)&ro_fwd
->ro_dst
;
1558 * If we added a default src ip earlier,
1559 * which would have been gotten from the-then
1560 * interface, do it again, from the new one.
1562 if (ia_fw
!= NULL
) {
1563 if (fwd_rewrite_src
) {
1564 IFA_LOCK_SPIN(&ia_fw
->ia_ifa
);
1565 ip
->ip_src
= IA_SIN(ia_fw
)->sin_addr
;
1566 IFA_UNLOCK(&ia_fw
->ia_ifa
);
1568 IFA_REMREF(&ia_fw
->ia_ifa
);
1572 #endif /* IPFIREWALL_FORWARD */
1574 * if we get here, none of the above matches, and
1575 * we have to drop the pkt
1578 error
= EACCES
; /* not sure this is the right error msg */
1583 #endif /* IPFIREWALL */
1585 /* Do not allow loopback address to wind up on a wire */
1586 if ((ifp
->if_flags
& IFF_LOOPBACK
) == 0 &&
1587 ((ntohl(ip
->ip_src
.s_addr
) >> IN_CLASSA_NSHIFT
) == IN_LOOPBACKNET
||
1588 (ntohl(ip
->ip_dst
.s_addr
) >> IN_CLASSA_NSHIFT
) == IN_LOOPBACKNET
)) {
1589 OSAddAtomic(1, &ipstat
.ips_badaddr
);
1592 * Do not simply drop the packet just like a firewall -- we want the
1593 * the application to feel the pain.
1594 * Return ENETUNREACH like ip6_output does in some similar cases.
1595 * This can startle the otherwise clueless process that specifies
1596 * loopback as the source address.
1598 error
= ENETUNREACH
;
1602 m
->m_pkthdr
.csum_flags
|= CSUM_IP
;
1603 tso
= (ifp
->if_hwassist
& IFNET_TSO_IPV4
) && (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
);
1605 sw_csum
= m
->m_pkthdr
.csum_flags
1606 & ~IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
);
1608 if ((ifp
->if_hwassist
& CSUM_TCP_SUM16
) != 0) {
1610 * Special case code for GMACE
1611 * frames that can be checksumed by GMACE SUM16 HW:
1612 * frame >64, no fragments, no UDP
1614 if (apple_hwcksum_tx
&& (m
->m_pkthdr
.csum_flags
& CSUM_TCP
)
1615 && (ip
->ip_len
> 50) && (ip
->ip_len
<= ifp
->if_mtu
)) {
1616 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1617 u_short offset
= (IP_VHL_HL(ip
->ip_vhl
) << 2) +14 ; /* IP+Enet header length */
1618 u_short csumprev
= m
->m_pkthdr
.csum_data
& 0xFFFF;
1619 m
->m_pkthdr
.csum_flags
= CSUM_DATA_VALID
| CSUM_TCP_SUM16
; /* for GMAC */
1620 m
->m_pkthdr
.csum_data
= (csumprev
+ offset
) << 16 ;
1621 m
->m_pkthdr
.csum_data
+= offset
;
1622 sw_csum
= CSUM_DELAY_IP
; /* do IP hdr chksum in software */
1625 /* let the software handle any UDP or TCP checksums */
1626 sw_csum
|= (CSUM_DELAY_DATA
& m
->m_pkthdr
.csum_flags
);
1628 } else if (apple_hwcksum_tx
== 0) {
1629 sw_csum
|= (CSUM_DELAY_DATA
| CSUM_DELAY_IP
) &
1630 m
->m_pkthdr
.csum_flags
;
1633 if (sw_csum
& CSUM_DELAY_DATA
) {
1634 in_delayed_cksum(m
);
1635 sw_csum
&= ~CSUM_DELAY_DATA
;
1636 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1639 if (apple_hwcksum_tx
!= 0) {
1640 m
->m_pkthdr
.csum_flags
&=
1641 IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
);
1643 m
->m_pkthdr
.csum_flags
= 0;
1647 * If small enough for interface, or the interface will take
1648 * care of the fragmentation for us, can just send directly.
1650 if ((u_short
)ip
->ip_len
<= ifp
->if_mtu
|| tso
||
1651 ifp
->if_hwassist
& CSUM_FRAGMENT
) {
1653 m
->m_pkthdr
.csum_flags
|= CSUM_TSO_IPV4
;
1656 #if BYTE_ORDER != BIG_ENDIAN
1662 if (sw_csum
& CSUM_DELAY_IP
) {
1663 ip
->ip_sum
= in_cksum(m
, hlen
);
1667 /* Record statistics for this interface address. */
1668 if (!(flags
& IP_FORWARDING
) && ia
!= NULL
) {
1669 ia
->ia_ifa
.if_opackets
++;
1670 ia
->ia_ifa
.if_obytes
+= m
->m_pkthdr
.len
;
1675 /* clean ipsec history once it goes out of the node */
1676 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0)
1679 if (packetchain
== 0) {
1680 if (ro
->ro_rt
&& nstat_collect
)
1681 nstat_route_tx(ro
->ro_rt
, 1, m
->m_pkthdr
.len
, 0);
1682 error
= ifnet_output(ifp
, PF_INET
, m
, ro
->ro_rt
,
1683 (struct sockaddr
*)dst
);
1686 else { /* packet chaining allows us to reuse the route for all packets */
1687 bytecnt
+= m
->m_pkthdr
.len
;
1688 mppn
= &m
->m_nextpkt
;
1694 if (pktcnt
> ip_maxchainsent
)
1695 ip_maxchainsent
= pktcnt
;
1696 if (ro
->ro_rt
&& nstat_collect
)
1697 nstat_route_tx(ro
->ro_rt
, pktcnt
, bytecnt
, 0);
1699 error
= ifnet_output(ifp
, PF_INET
, packetlist
,
1700 ro
->ro_rt
, (struct sockaddr
*)dst
);
1712 * Too large for interface; fragment if possible.
1713 * Must be able to put at least 8 bytes per fragment.
1716 if (ip
->ip_off
& IP_DF
|| (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) ||
1720 * This case can happen if the user changed the MTU
1721 * of an interface after enabling IP on it. Because
1722 * most netifs don't keep track of routes pointing to
1723 * them, there is no way for one to update all its
1724 * routes when the MTU is changed.
1727 RT_LOCK_SPIN(ro
->ro_rt
);
1728 if ((ro
->ro_rt
->rt_flags
& (RTF_UP
| RTF_HOST
))
1729 && !(ro
->ro_rt
->rt_rmx
.rmx_locks
& RTV_MTU
)
1730 && (ro
->ro_rt
->rt_rmx
.rmx_mtu
> ifp
->if_mtu
)) {
1731 ro
->ro_rt
->rt_rmx
.rmx_mtu
= ifp
->if_mtu
;
1733 RT_UNLOCK(ro
->ro_rt
);
1738 OSAddAtomic(1, &ipstat
.ips_cantfrag
);
1742 error
= ip_fragment(m
, ifp
, ifp
->if_mtu
, sw_csum
);
1748 KERNEL_DEBUG(DBG_LAYER_END
, ip
->ip_dst
.s_addr
,
1749 ip
->ip_src
.s_addr
, ip
->ip_p
, ip
->ip_off
, ip
->ip_len
);
1751 for (m
= m0
; m
; m
= m0
) {
1755 /* clean ipsec history once it goes out of the node */
1756 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0)
1761 /* Record statistics for this interface address. */
1763 ia
->ia_ifa
.if_opackets
++;
1764 ia
->ia_ifa
.if_obytes
+= m
->m_pkthdr
.len
;
1767 if ((packetchain
!= 0) && (pktcnt
> 0))
1768 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist
);
1769 if (ro
->ro_rt
&& nstat_collect
)
1770 nstat_route_tx(ro
->ro_rt
, 1, m
->m_pkthdr
.len
, 0);
1771 error
= ifnet_output(ifp
, PF_INET
, m
, ro
->ro_rt
,
1772 (struct sockaddr
*)dst
);
1778 OSAddAtomic(1, &ipstat
.ips_fragmented
);
1782 IFA_REMREF(&ia
->ia_ifa
);
1786 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
1787 if (ipsec_state
.ro
.ro_rt
)
1788 rtfree(ipsec_state
.ro
.ro_rt
);
1790 KEYDEBUG(KEYDEBUG_IPSEC_STAMP
,
1791 printf("DP ip_output call free SP:%x\n", sp
));
1792 key_freesp(sp
, KEY_SADB_UNLOCKED
);
1797 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT
| DBG_FUNC_END
, error
,0,0,0,0);
1805 ip_fragment(struct mbuf
*m
, struct ifnet
*ifp
, unsigned long mtu
, int sw_csum
)
1807 struct ip
*ip
, *mhip
;
1808 int len
, hlen
, mhlen
, firstlen
, off
, error
= 0;
1809 struct mbuf
**mnext
= &m
->m_nextpkt
, *m0
;
1812 ip
= mtod(m
, struct ip
*);
1814 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1816 hlen
= ip
->ip_hl
<< 2;
1819 firstlen
= len
= (mtu
- hlen
) &~ 7;
1826 * if the interface will not calculate checksums on
1827 * fragmented packets, then do it here.
1829 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
&&
1830 (ifp
->if_hwassist
& CSUM_IP_FRAGS
) == 0) {
1831 in_delayed_cksum(m
);
1832 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1836 * Loop through length of segment after first fragment,
1837 * make new header and copy data of each part and link onto chain.
1840 mhlen
= sizeof (struct ip
);
1841 for (off
= hlen
+ len
; off
< (u_short
)ip
->ip_len
; off
+= len
) {
1842 MGETHDR(m
, M_DONTWAIT
, MT_HEADER
); /* MAC-OK */
1845 OSAddAtomic(1, &ipstat
.ips_odropped
);
1848 m
->m_flags
|= (m0
->m_flags
& M_MCAST
) | M_FRAG
;
1849 m
->m_data
+= max_linkhdr
;
1850 mhip
= mtod(m
, struct ip
*);
1852 if (hlen
> sizeof (struct ip
)) {
1853 mhlen
= ip_optcopy(ip
, mhip
) + sizeof (struct ip
);
1854 mhip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, mhlen
>> 2);
1857 mhip
->ip_off
= ((off
- hlen
) >> 3) + (ip
->ip_off
& ~IP_MF
);
1858 if (ip
->ip_off
& IP_MF
)
1859 mhip
->ip_off
|= IP_MF
;
1860 if (off
+ len
>= (u_short
)ip
->ip_len
)
1861 len
= (u_short
)ip
->ip_len
- off
;
1863 mhip
->ip_off
|= IP_MF
;
1864 mhip
->ip_len
= htons((u_short
)(len
+ mhlen
));
1865 m
->m_next
= m_copy(m0
, off
, len
);
1866 if (m
->m_next
== 0) {
1868 error
= ENOBUFS
; /* ??? */
1869 OSAddAtomic(1, &ipstat
.ips_odropped
);
1872 m
->m_pkthdr
.len
= mhlen
+ len
;
1873 m
->m_pkthdr
.rcvif
= 0;
1874 m
->m_pkthdr
.csum_flags
= m0
->m_pkthdr
.csum_flags
;
1875 m
->m_pkthdr
.socket_id
= m0
->m_pkthdr
.socket_id
;
1877 mac_netinet_fragment(m0
, m
);
1880 #if BYTE_ORDER != BIG_ENDIAN
1881 HTONS(mhip
->ip_off
);
1885 if (sw_csum
& CSUM_DELAY_IP
) {
1886 mhip
->ip_sum
= in_cksum(m
, mhlen
);
1889 mnext
= &m
->m_nextpkt
;
1892 OSAddAtomic(nfrags
, &ipstat
.ips_ofragments
);
1894 /* set first/last markers for fragment chain */
1895 m
->m_flags
|= M_LASTFRAG
;
1896 m0
->m_flags
|= M_FIRSTFRAG
| M_FRAG
;
1897 m0
->m_pkthdr
.csum_data
= nfrags
;
1900 * Update first fragment by trimming what's been copied out
1901 * and updating header, then send each fragment (in order).
1904 m_adj(m
, hlen
+ firstlen
- (u_short
)ip
->ip_len
);
1905 m
->m_pkthdr
.len
= hlen
+ firstlen
;
1906 ip
->ip_len
= htons((u_short
)m
->m_pkthdr
.len
);
1907 ip
->ip_off
|= IP_MF
;
1909 #if BYTE_ORDER != BIG_ENDIAN
1914 if (sw_csum
& CSUM_DELAY_IP
) {
1915 ip
->ip_sum
= in_cksum(m
, hlen
);
1925 ip_out_cksum_stats(int proto
, u_int32_t len
)
1929 tcp_out_cksum_stats(len
);
1932 udp_out_cksum_stats(len
);
1935 /* keep only TCP or UDP stats for now */
1941 in_delayed_cksum_offset(struct mbuf
*m0
, int ip_offset
)
1944 unsigned char buf
[sizeof(struct ip
)];
1945 u_short csum
, offset
, ip_len
;
1947 /* Save copy of first mbuf pointer and the ip_offset before modifying */
1948 struct mbuf
*m
= m0
;
1949 int ip_offset_copy
= ip_offset
;
1951 while (ip_offset
>= m
->m_len
) {
1952 ip_offset
-= m
->m_len
;
1955 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
1960 /* Sometimes the IP header is not contiguous, yes this can happen! */
1961 if (ip_offset
+ sizeof(struct ip
) > m
->m_len
) {
1963 printf("delayed m_pullup, m->len: %d off: %d\n",
1964 m
->m_len
, ip_offset
);
1966 m_copydata(m
, ip_offset
, sizeof(struct ip
), (caddr_t
) buf
);
1968 ip
= (struct ip
*)buf
;
1970 ip
= (struct ip
*)(m
->m_data
+ ip_offset
);
1975 m
->m_len
-= ip_offset
;
1976 m
->m_data
+= ip_offset
;
1979 offset
= IP_VHL_HL(ip
->ip_vhl
) << 2 ;
1982 * We could be in the context of an IP or interface filter; in the
1983 * former case, ip_len would be in host (correct) order while for
1984 * the latter it would be in network order. Because of this, we
1985 * attempt to interpret the length field by comparing it against
1986 * the actual packet length. If the comparison fails, byte swap
1987 * the length and check again. If it still fails, then the packet
1988 * is bogus and we give up.
1990 ip_len
= ip
->ip_len
;
1991 if (ip_len
!= (m0
->m_pkthdr
.len
- ip_offset_copy
)) {
1992 ip_len
= SWAP16(ip_len
);
1993 if (ip_len
!= (m0
->m_pkthdr
.len
- ip_offset_copy
)) {
1994 printf("in_delayed_cksum_offset: ip_len %d (%d) "
1995 "doesn't match actual length %d\n", ip
->ip_len
,
1996 ip_len
, (m0
->m_pkthdr
.len
- ip_offset_copy
));
2001 csum
= in_cksum_skip(m
, ip_len
, offset
);
2004 ip_out_cksum_stats(ip
->ip_p
, ip_len
- offset
);
2006 if (m0
->m_pkthdr
.csum_flags
& CSUM_UDP
&& csum
== 0)
2008 offset
+= m0
->m_pkthdr
.csum_data
& 0xFFFF; /* checksum offset */
2012 if (M_LEADINGSPACE(m
) < ip_offset
)
2013 panic("in_delayed_cksum_offset - chain modified!\n");
2014 m
->m_len
+= ip_offset
;
2015 m
->m_data
-= ip_offset
;
2018 if (offset
> ip_len
) /* bogus offset */
2021 /* Insert the checksum in the existing chain */
2022 if (offset
+ ip_offset
+ sizeof(u_short
) > m
->m_len
) {
2026 printf("delayed m_copyback, m->len: %d off: %d p: %d\n",
2027 m
->m_len
, offset
+ ip_offset
, ip
->ip_p
);
2029 *(u_short
*)tmp
= csum
;
2030 m_copyback(m
, offset
+ ip_offset
, 2, tmp
);
2032 *(u_short
*)(m
->m_data
+ offset
+ ip_offset
) = csum
;
2036 in_delayed_cksum(struct mbuf
*m
)
2038 in_delayed_cksum_offset(m
, 0);
2042 in_cksum_offset(struct mbuf
* m
, size_t ip_offset
)
2044 struct ip
* ip
= NULL
;
2046 unsigned char buf
[sizeof(struct ip
)];
2049 /* Save copy of first mbuf pointer and the ip_offset before modifying */
2050 struct mbuf
* m0
= m
;
2051 size_t ip_offset_copy
= ip_offset
;
2053 while (ip_offset
>= m
->m_len
) {
2054 ip_offset
-= m
->m_len
;
2057 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
2062 /* Sometimes the IP header is not contiguous, yes this can happen! */
2063 if (ip_offset
+ sizeof(struct ip
) > m
->m_len
) {
2066 printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %lu\n",
2067 m
->m_len
, ip_offset
);
2069 m_copydata(m
, ip_offset
, sizeof(struct ip
), (caddr_t
) buf
);
2071 ip
= (struct ip
*)buf
;
2073 m_copyback(m
, ip_offset
+ offsetof(struct ip
, ip_sum
), 2, (caddr_t
)&ip
->ip_sum
);
2075 ip
= (struct ip
*)(m
->m_data
+ ip_offset
);
2081 m
->m_len
-= ip_offset
;
2082 m
->m_data
+= ip_offset
;
2086 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
2088 hlen
= ip
->ip_hl
<< 2;
2091 * We could be in the context of an IP or interface filter; in the
2092 * former case, ip_len would be in host order while for the latter
2093 * it would be in network (correct) order. Because of this, we
2094 * attempt to interpret the length field by comparing it against
2095 * the actual packet length. If the comparison fails, byte swap
2096 * the length and check again. If it still fails, then the packet
2097 * is bogus and we give up.
2099 if (ntohs(ip
->ip_len
) != (m0
->m_pkthdr
.len
- ip_offset_copy
)) {
2100 ip
->ip_len
= SWAP16(ip
->ip_len
);
2102 if (ntohs(ip
->ip_len
) != (m0
->m_pkthdr
.len
- ip_offset_copy
)) {
2103 ip
->ip_len
= SWAP16(ip
->ip_len
);
2104 printf("in_cksum_offset: ip_len %d (%d) "
2105 "doesn't match actual length %lu\n",
2106 ip
->ip_len
, SWAP16(ip
->ip_len
),
2107 (m0
->m_pkthdr
.len
- ip_offset_copy
));
2113 ip
->ip_sum
= in_cksum(m
, hlen
);
2115 ip
->ip_len
= SWAP16(ip
->ip_len
);
2119 if (M_LEADINGSPACE(m
) < ip_offset
)
2120 panic("in_cksum_offset - chain modified!\n");
2121 m
->m_len
+= ip_offset
;
2122 m
->m_data
-= ip_offset
;
2125 /* Insert the checksum in the existing chain if IP header not contiguous */
2126 if (ip_offset
+ sizeof(struct ip
) > m
->m_len
) {
2130 printf("in_cksum_offset m_copyback, m->len: %u off: %lu p: %d\n",
2131 m
->m_len
, ip_offset
+ offsetof(struct ip
, ip_sum
), ip
->ip_p
);
2133 *(u_short
*)tmp
= ip
->ip_sum
;
2134 m_copyback(m
, ip_offset
+ offsetof(struct ip
, ip_sum
), 2, tmp
);
2139 * Insert IP options into preformed packet.
2140 * Adjust IP destination as required for IP source routing,
2141 * as indicated by a non-zero in_addr at the start of the options.
2143 * XXX This routine assumes that the packet has no options in place.
2145 static struct mbuf
*
2146 ip_insertoptions(m
, opt
, phlen
)
2147 register struct mbuf
*m
;
2151 register struct ipoption
*p
= mtod(opt
, struct ipoption
*);
2153 register struct ip
*ip
= mtod(m
, struct ip
*);
2156 optlen
= opt
->m_len
- sizeof(p
->ipopt_dst
);
2157 if (optlen
+ (u_short
)ip
->ip_len
> IP_MAXPACKET
)
2158 return (m
); /* XXX should fail */
2159 if (p
->ipopt_dst
.s_addr
)
2160 ip
->ip_dst
= p
->ipopt_dst
;
2161 if (m
->m_flags
& M_EXT
|| m
->m_data
- optlen
< m
->m_pktdat
) {
2162 MGETHDR(n
, M_DONTWAIT
, MT_HEADER
); /* MAC-OK */
2165 n
->m_pkthdr
.rcvif
= 0;
2167 mac_mbuf_label_copy(m
, n
);
2169 n
->m_pkthdr
.len
= m
->m_pkthdr
.len
+ optlen
;
2170 m
->m_len
-= sizeof(struct ip
);
2171 m
->m_data
+= sizeof(struct ip
);
2174 m
->m_len
= optlen
+ sizeof(struct ip
);
2175 m
->m_data
+= max_linkhdr
;
2176 (void)memcpy(mtod(m
, void *), ip
, sizeof(struct ip
));
2178 m
->m_data
-= optlen
;
2180 m
->m_pkthdr
.len
+= optlen
;
2181 ovbcopy((caddr_t
)ip
, mtod(m
, caddr_t
), sizeof(struct ip
));
2183 ip
= mtod(m
, struct ip
*);
2184 bcopy(p
->ipopt_list
, ip
+ 1, optlen
);
2185 *phlen
= sizeof(struct ip
) + optlen
;
2186 ip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, *phlen
>> 2);
2187 ip
->ip_len
+= optlen
;
2192 * Copy options from ip to jp,
2193 * omitting those not copied during fragmentation.
2199 register u_char
*cp
, *dp
;
2200 int opt
, optlen
, cnt
;
2202 cp
= (u_char
*)(ip
+ 1);
2203 dp
= (u_char
*)(jp
+ 1);
2204 cnt
= (IP_VHL_HL(ip
->ip_vhl
) << 2) - sizeof (struct ip
);
2205 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
2207 if (opt
== IPOPT_EOL
)
2209 if (opt
== IPOPT_NOP
) {
2210 /* Preserve for IP mcast tunnel's LSRR alignment. */
2216 if (cnt
< IPOPT_OLEN
+ sizeof(*cp
))
2217 panic("malformed IPv4 option passed to ip_optcopy");
2219 optlen
= cp
[IPOPT_OLEN
];
2221 if (optlen
< IPOPT_OLEN
+ sizeof(*cp
) || optlen
> cnt
)
2222 panic("malformed IPv4 option passed to ip_optcopy");
2224 /* bogus lengths should have been caught by ip_dooptions */
2227 if (IPOPT_COPIED(opt
)) {
2228 bcopy(cp
, dp
, optlen
);
2232 for (optlen
= dp
- (u_char
*)(jp
+1); optlen
& 0x3; optlen
++)
2238 * IP socket option processing.
2241 ip_ctloutput(so
, sopt
)
2243 struct sockopt
*sopt
;
2245 struct inpcb
*inp
= sotoinpcb(so
);
2249 if (sopt
->sopt_level
!= IPPROTO_IP
) {
2253 switch (sopt
->sopt_dir
) {
2255 switch (sopt
->sopt_name
) {
2262 if (sopt
->sopt_valsize
> MLEN
) {
2266 MGET(m
, sopt
->sopt_p
!= kernproc
? M_WAIT
: M_DONTWAIT
,
2272 m
->m_len
= sopt
->sopt_valsize
;
2273 error
= sooptcopyin(sopt
, mtod(m
, char *), m
->m_len
,
2278 return (ip_pcbopts(sopt
->sopt_name
, &inp
->inp_options
,
2285 case IP_RECVRETOPTS
:
2286 case IP_RECVDSTADDR
:
2289 #if defined(NFAITH) && NFAITH > 0
2292 case IP_RECVPKTINFO
:
2293 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
2298 switch (sopt
->sopt_name
) {
2300 inp
->inp_ip_tos
= optval
;
2304 inp
->inp_ip_ttl
= optval
;
2306 #define OPTSET(bit) \
2308 inp->inp_flags |= bit; \
2310 inp->inp_flags &= ~bit;
2313 OPTSET(INP_RECVOPTS
);
2316 case IP_RECVRETOPTS
:
2317 OPTSET(INP_RECVRETOPTS
);
2320 case IP_RECVDSTADDR
:
2321 OPTSET(INP_RECVDSTADDR
);
2329 OPTSET(INP_RECVTTL
);
2332 #if defined(NFAITH) && NFAITH > 0
2337 case IP_RECVPKTINFO
:
2338 OPTSET(INP_PKTINFO
);
2344 #if CONFIG_FORCE_OUT_IFP
2346 * Apple private interface, similar to IP_BOUND_IF, except
2347 * that the parameter is a NULL-terminated string containing
2348 * the name of the network interface; an emptry string means
2349 * unbind. Applications are encouraged to use IP_BOUND_IF
2350 * instead, as that is the current "official" API.
2352 case IP_FORCE_OUT_IFP
: {
2353 char ifname
[IFNAMSIZ
];
2354 unsigned int ifscope
;
2356 /* This option is settable only for IPv4 */
2357 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2362 /* Verify interface name parameter is sane */
2363 if (sopt
->sopt_valsize
> sizeof(ifname
)) {
2368 /* Copy the interface name */
2369 if (sopt
->sopt_valsize
!= 0) {
2370 error
= sooptcopyin(sopt
, ifname
,
2371 sizeof (ifname
), sopt
->sopt_valsize
);
2376 if (sopt
->sopt_valsize
== 0 || ifname
[0] == '\0') {
2377 /* Unbind this socket from any interface */
2378 ifscope
= IFSCOPE_NONE
;
2382 /* Verify name is NULL terminated */
2383 if (ifname
[sopt
->sopt_valsize
- 1] != '\0') {
2388 /* Bail out if given bogus interface name */
2389 if (ifnet_find_by_name(ifname
, &ifp
) != 0) {
2394 /* Bind this socket to this interface */
2395 ifscope
= ifp
->if_index
;
2398 * Won't actually free; since we don't release
2399 * this later, we should do it now.
2403 inp_bindif(inp
, ifscope
);
2408 * Multicast socket options are processed by the in_mcast
2411 case IP_MULTICAST_IF
:
2412 case IP_MULTICAST_IFINDEX
:
2413 case IP_MULTICAST_VIF
:
2414 case IP_MULTICAST_TTL
:
2415 case IP_MULTICAST_LOOP
:
2416 case IP_ADD_MEMBERSHIP
:
2417 case IP_DROP_MEMBERSHIP
:
2418 case IP_ADD_SOURCE_MEMBERSHIP
:
2419 case IP_DROP_SOURCE_MEMBERSHIP
:
2420 case IP_BLOCK_SOURCE
:
2421 case IP_UNBLOCK_SOURCE
:
2423 case MCAST_JOIN_GROUP
:
2424 case MCAST_LEAVE_GROUP
:
2425 case MCAST_JOIN_SOURCE_GROUP
:
2426 case MCAST_LEAVE_SOURCE_GROUP
:
2427 case MCAST_BLOCK_SOURCE
:
2428 case MCAST_UNBLOCK_SOURCE
:
2429 error
= inp_setmoptions(inp
, sopt
);
2433 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
2439 case IP_PORTRANGE_DEFAULT
:
2440 inp
->inp_flags
&= ~(INP_LOWPORT
);
2441 inp
->inp_flags
&= ~(INP_HIGHPORT
);
2444 case IP_PORTRANGE_HIGH
:
2445 inp
->inp_flags
&= ~(INP_LOWPORT
);
2446 inp
->inp_flags
|= INP_HIGHPORT
;
2449 case IP_PORTRANGE_LOW
:
2450 inp
->inp_flags
&= ~(INP_HIGHPORT
);
2451 inp
->inp_flags
|= INP_LOWPORT
;
2461 case IP_IPSEC_POLICY
:
2469 if ((error
= soopt_getm(sopt
, &m
)) != 0) /* XXX */
2471 if ((error
= soopt_mcopyin(sopt
, m
)) != 0) /* XXX */
2473 priv
= (proc_suser(sopt
->sopt_p
) == 0);
2475 req
= mtod(m
, caddr_t
);
2478 optname
= sopt
->sopt_name
;
2479 error
= ipsec4_set_policy(inp
, optname
, req
, len
, priv
);
2486 case IP_TRAFFIC_MGT_BACKGROUND
:
2488 unsigned background
= 0;
2489 error
= sooptcopyin(sopt
, &background
, sizeof(background
), sizeof(background
));
2494 socket_set_traffic_mgt_flags_locked(so
,
2495 TRAFFIC_MGT_SO_BACKGROUND
);
2497 socket_clear_traffic_mgt_flags_locked(so
,
2498 TRAFFIC_MGT_SO_BACKGROUND
);
2503 #endif /* TRAFFIC_MGT */
2506 * On a multihomed system, scoped routing can be used to
2507 * restrict the source interface used for sending packets.
2508 * The socket option IP_BOUND_IF binds a particular AF_INET
2509 * socket to an interface such that data sent on the socket
2510 * is restricted to that interface. This is unlike the
2511 * SO_DONTROUTE option where the routing table is bypassed;
2512 * therefore it allows for a greater flexibility and control
2513 * over the system behavior, and does not place any restriction
2514 * on the destination address type (e.g. unicast, multicast,
2515 * or broadcast if applicable) or whether or not the host is
2516 * directly reachable. Note that in the multicast transmit
2517 * case, IP_MULTICAST_{IF,IFINDEX} takes precedence over
2518 * IP_BOUND_IF, since the former practically bypasses the
2519 * routing table; in this case, IP_BOUND_IF sets the default
2520 * interface used for sending multicast packets in the absence
2521 * of an explicit multicast transmit interface.
2524 /* This option is settable only for IPv4 */
2525 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2530 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
2536 inp_bindif(inp
, optval
);
2539 case IP_NO_IFT_CELLULAR
:
2540 /* This option is settable only for IPv4 */
2541 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2546 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
2552 error
= inp_nocellular(inp
, optval
);
2556 /* This option is not settable */
2561 error
= ENOPROTOOPT
;
2567 switch (sopt
->sopt_name
) {
2570 if (inp
->inp_options
)
2571 error
= sooptcopyout(sopt
,
2572 mtod(inp
->inp_options
,
2574 inp
->inp_options
->m_len
);
2576 sopt
->sopt_valsize
= 0;
2582 case IP_RECVRETOPTS
:
2583 case IP_RECVDSTADDR
:
2587 #if defined(NFAITH) && NFAITH > 0
2590 case IP_RECVPKTINFO
:
2591 switch (sopt
->sopt_name
) {
2594 optval
= inp
->inp_ip_tos
;
2598 optval
= inp
->inp_ip_ttl
;
2601 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2604 optval
= OPTBIT(INP_RECVOPTS
);
2607 case IP_RECVRETOPTS
:
2608 optval
= OPTBIT(INP_RECVRETOPTS
);
2611 case IP_RECVDSTADDR
:
2612 optval
= OPTBIT(INP_RECVDSTADDR
);
2616 optval
= OPTBIT(INP_RECVIF
);
2620 optval
= OPTBIT(INP_RECVTTL
);
2624 if (inp
->inp_flags
& INP_HIGHPORT
)
2625 optval
= IP_PORTRANGE_HIGH
;
2626 else if (inp
->inp_flags
& INP_LOWPORT
)
2627 optval
= IP_PORTRANGE_LOW
;
2632 #if defined(NFAITH) && NFAITH > 0
2634 optval
= OPTBIT(INP_FAITH
);
2637 case IP_RECVPKTINFO
:
2638 optval
= OPTBIT(INP_PKTINFO
);
2641 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
2644 case IP_MULTICAST_IF
:
2645 case IP_MULTICAST_IFINDEX
:
2646 case IP_MULTICAST_VIF
:
2647 case IP_MULTICAST_TTL
:
2648 case IP_MULTICAST_LOOP
:
2650 error
= inp_getmoptions(inp
, sopt
);
2654 case IP_IPSEC_POLICY
:
2656 struct mbuf
*m
= NULL
;
2661 req
= mtod(m
, caddr_t
);
2664 error
= ipsec4_get_policy(sotoinpcb(so
), req
, len
, &m
);
2666 error
= soopt_mcopyout(sopt
, m
); /* XXX */
2674 case IP_TRAFFIC_MGT_BACKGROUND
:
2676 unsigned background
= (so
->so_traffic_mgt_flags
& TRAFFIC_MGT_SO_BACKGROUND
);
2677 return (sooptcopyout(sopt
, &background
, sizeof(background
)));
2680 #endif /* TRAFFIC_MGT */
2683 if (inp
->inp_flags
& INP_BOUND_IF
)
2684 optval
= inp
->inp_boundif
;
2685 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
2688 case IP_NO_IFT_CELLULAR
:
2689 optval
= (inp
->inp_flags
& INP_NO_IFT_CELLULAR
) ? 1 : 0;
2690 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
2694 optval
= inp
->inp_last_outif
;
2695 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
2699 error
= ENOPROTOOPT
;
2708 * Set up IP options in pcb for insertion in output packets.
2709 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2710 * with destination address if source routed.
2714 __unused
int optname
,
2715 struct mbuf
**pcbopt
,
2716 register struct mbuf
*m
)
2718 register int cnt
, optlen
;
2719 register u_char
*cp
;
2722 /* turn off any old options */
2724 (void)m_free(*pcbopt
);
2726 if (m
== (struct mbuf
*)0 || m
->m_len
== 0) {
2728 * Only turning off any previous options.
2736 if (m
->m_len
% sizeof(int32_t))
2740 * IP first-hop destination address will be stored before
2741 * actual options; move other options back
2742 * and clear it when none present.
2744 if (m
->m_data
+ m
->m_len
+ sizeof(struct in_addr
) >= &m
->m_dat
[MLEN
])
2747 m
->m_len
+= sizeof(struct in_addr
);
2748 cp
= mtod(m
, u_char
*) + sizeof(struct in_addr
);
2749 ovbcopy(mtod(m
, caddr_t
), (caddr_t
)cp
, (unsigned)cnt
);
2750 bzero(mtod(m
, caddr_t
), sizeof(struct in_addr
));
2752 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
2753 opt
= cp
[IPOPT_OPTVAL
];
2754 if (opt
== IPOPT_EOL
)
2756 if (opt
== IPOPT_NOP
)
2759 if (cnt
< IPOPT_OLEN
+ sizeof(*cp
))
2761 optlen
= cp
[IPOPT_OLEN
];
2762 if (optlen
< IPOPT_OLEN
+ sizeof(*cp
) || optlen
> cnt
)
2773 * user process specifies route as:
2775 * D must be our final destination (but we can't
2776 * check that since we may not have connected yet).
2777 * A is first hop destination, which doesn't appear in
2778 * actual IP option, but is stored before the options.
2780 if (optlen
< IPOPT_MINOFF
- 1 + sizeof(struct in_addr
))
2782 m
->m_len
-= sizeof(struct in_addr
);
2783 cnt
-= sizeof(struct in_addr
);
2784 optlen
-= sizeof(struct in_addr
);
2785 cp
[IPOPT_OLEN
] = optlen
;
2787 * Move first hop before start of options.
2789 bcopy((caddr_t
)&cp
[IPOPT_OFFSET
+1], mtod(m
, caddr_t
),
2790 sizeof(struct in_addr
));
2792 * Then copy rest of options back
2793 * to close up the deleted entry.
2795 ovbcopy((caddr_t
)(&cp
[IPOPT_OFFSET
+1] +
2796 sizeof(struct in_addr
)),
2797 (caddr_t
)&cp
[IPOPT_OFFSET
+1],
2798 (unsigned)cnt
+ sizeof(struct in_addr
));
2802 if (m
->m_len
> MAX_IPOPTLEN
+ sizeof(struct in_addr
))
2813 ip_moptions_init(void)
2815 PE_parse_boot_argn("ifa_debug", &imo_debug
, sizeof (imo_debug
));
2817 imo_size
= (imo_debug
== 0) ? sizeof (struct ip_moptions
) :
2818 sizeof (struct ip_moptions_dbg
);
2820 imo_zone
= zinit(imo_size
, IMO_ZONE_MAX
* imo_size
, 0,
2822 if (imo_zone
== NULL
) {
2823 panic("%s: failed allocating %s", __func__
, IMO_ZONE_NAME
);
2826 zone_change(imo_zone
, Z_EXPAND
, TRUE
);
2830 imo_addref(struct ip_moptions
*imo
, int locked
)
2835 IMO_LOCK_ASSERT_HELD(imo
);
2837 if (++imo
->imo_refcnt
== 0) {
2838 panic("%s: imo %p wraparound refcnt\n", __func__
, imo
);
2840 } else if (imo
->imo_trace
!= NULL
) {
2841 (*imo
->imo_trace
)(imo
, TRUE
);
2849 imo_remref(struct ip_moptions
*imo
)
2854 if (imo
->imo_refcnt
== 0) {
2855 panic("%s: imo %p negative refcnt", __func__
, imo
);
2857 } else if (imo
->imo_trace
!= NULL
) {
2858 (*imo
->imo_trace
)(imo
, FALSE
);
2862 if (imo
->imo_refcnt
> 0) {
2867 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
) {
2868 struct in_mfilter
*imf
;
2870 imf
= imo
->imo_mfilters
? &imo
->imo_mfilters
[i
] : NULL
;
2874 (void) in_leavegroup(imo
->imo_membership
[i
], imf
);
2879 INM_REMREF(imo
->imo_membership
[i
]);
2880 imo
->imo_membership
[i
] = NULL
;
2882 imo
->imo_num_memberships
= 0;
2883 if (imo
->imo_mfilters
!= NULL
) {
2884 FREE(imo
->imo_mfilters
, M_INMFILTER
);
2885 imo
->imo_mfilters
= NULL
;
2887 if (imo
->imo_membership
!= NULL
) {
2888 FREE(imo
->imo_membership
, M_IPMOPTS
);
2889 imo
->imo_membership
= NULL
;
2893 lck_mtx_destroy(&imo
->imo_lock
, ifa_mtx_grp
);
2895 if (!(imo
->imo_debug
& IFD_ALLOC
)) {
2896 panic("%s: imo %p cannot be freed", __func__
, imo
);
2899 zfree(imo_zone
, imo
);
2903 imo_trace(struct ip_moptions
*imo
, int refhold
)
2905 struct ip_moptions_dbg
*imo_dbg
= (struct ip_moptions_dbg
*)imo
;
2910 if (!(imo
->imo_debug
& IFD_DEBUG
)) {
2911 panic("%s: imo %p has no debug structure", __func__
, imo
);
2915 cnt
= &imo_dbg
->imo_refhold_cnt
;
2916 tr
= imo_dbg
->imo_refhold
;
2918 cnt
= &imo_dbg
->imo_refrele_cnt
;
2919 tr
= imo_dbg
->imo_refrele
;
2922 idx
= atomic_add_16_ov(cnt
, 1) % IMO_TRACE_HIST_SIZE
;
2923 ctrace_record(&tr
[idx
]);
2926 struct ip_moptions
*
2927 ip_allocmoptions(int how
)
2929 struct ip_moptions
*imo
;
2931 imo
= (how
== M_WAITOK
) ? zalloc(imo_zone
) : zalloc_noblock(imo_zone
);
2933 bzero(imo
, imo_size
);
2934 lck_mtx_init(&imo
->imo_lock
, ifa_mtx_grp
, ifa_mtx_attr
);
2935 imo
->imo_debug
|= IFD_ALLOC
;
2936 if (imo_debug
!= 0) {
2937 imo
->imo_debug
|= IFD_DEBUG
;
2938 imo
->imo_trace
= imo_trace
;
2947 * Routine called from ip_output() to loop back a copy of an IP multicast
2948 * packet to the input queue of a specified interface. Note that this
2949 * calls the output routine of the loopback "driver", but with an interface
2950 * pointer that might NOT be a loopback interface -- evil, but easier than
2951 * replicating that code here.
2954 ip_mloopback(ifp
, m
, dst
, hlen
)
2956 register struct mbuf
*m
;
2957 register struct sockaddr_in
*dst
;
2960 register struct ip
*ip
;
2962 int sw_csum
= (apple_hwcksum_tx
== 0);
2964 copym
= m_copy(m
, 0, M_COPYALL
);
2965 if (copym
!= NULL
&& (copym
->m_flags
& M_EXT
|| copym
->m_len
< hlen
))
2966 copym
= m_pullup(copym
, hlen
);
2972 * We don't bother to fragment if the IP length is greater
2973 * than the interface's MTU. Can this possibly matter?
2975 ip
= mtod(copym
, struct ip
*);
2977 #if BYTE_ORDER != BIG_ENDIAN
2983 ip
->ip_sum
= in_cksum(copym
, hlen
);
2986 * It's not clear whether there are any lingering
2987 * reentrancy problems in other areas which might
2988 * be exposed by using ip_input directly (in
2989 * particular, everything which modifies the packet
2990 * in-place). Yet another option is using the
2991 * protosw directly to deliver the looped back
2992 * packet. For the moment, we'll err on the side
2993 * of safety by using if_simloop().
2996 if (dst
->sin_family
!= AF_INET
) {
2997 printf("ip_mloopback: bad address family %d\n",
2999 dst
->sin_family
= AF_INET
;
3004 * Mark checksum as valid or calculate checksum for loopback.
3006 * This is done this way because we have to embed the ifp of
3007 * the interface we will send the original copy of the packet
3008 * out on in the mbuf. ip_input will check if_hwassist of the
3009 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
3010 * The UDP checksum has not been calculated yet.
3012 if (sw_csum
|| (copym
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
)) {
3013 if (!sw_csum
&& IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
)) {
3014 copym
->m_pkthdr
.csum_flags
|=
3015 CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
|
3016 CSUM_IP_CHECKED
| CSUM_IP_VALID
;
3017 copym
->m_pkthdr
.csum_data
= 0xffff;
3020 #if BYTE_ORDER != BIG_ENDIAN
3024 in_delayed_cksum(copym
);
3026 #if BYTE_ORDER != BIG_ENDIAN
3035 * We need to send all loopback traffic down to dlil in case
3036 * a filter has tapped-in.
3040 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3041 * in ip_input(); we need the loopback ifp/dl_tag passed as args
3042 * to make the loopback driver compliant with the data link
3046 copym
->m_pkthdr
.rcvif
= ifp
;
3047 dlil_output(lo_ifp
, PF_INET
, copym
, 0,
3048 (struct sockaddr
*) dst
, 0);
3050 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
3056 * Given a source IP address (and route, if available), determine the best
3057 * interface to send the packet from. Checking for (and updating) the
3058 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
3059 * without any locks based on the assumption that ip_output() is single-
3060 * threaded per-pcb, i.e. for any given pcb there can only be one thread
3061 * performing output at the IP layer.
3063 * This routine is analogous to in6_selectroute() for IPv6.
3065 static struct ifaddr
*
3066 in_selectsrcif(struct ip
*ip
, struct route
*ro
, unsigned int ifscope
)
3068 struct ifaddr
*ifa
= NULL
;
3069 struct in_addr src
= ip
->ip_src
;
3070 struct in_addr dst
= ip
->ip_dst
;
3071 struct ifnet
*rt_ifp
;
3072 char s_src
[MAX_IPv4_STR_LEN
], s_dst
[MAX_IPv4_STR_LEN
];
3074 if (ip_select_srcif_debug
) {
3075 (void) inet_ntop(AF_INET
, &src
.s_addr
, s_src
, sizeof (s_src
));
3076 (void) inet_ntop(AF_INET
, &dst
.s_addr
, s_dst
, sizeof (s_dst
));
3079 if (ro
->ro_rt
!= NULL
)
3082 rt_ifp
= (ro
->ro_rt
!= NULL
) ? ro
->ro_rt
->rt_ifp
: NULL
;
3085 * Given the source IP address, find a suitable source interface
3086 * to use for transmission; if the caller has specified a scope,
3087 * optimize the search by looking at the addresses only for that
3088 * interface. This is still suboptimal, however, as we need to
3089 * traverse the per-interface list.
3091 if (ifscope
!= IFSCOPE_NONE
|| ro
->ro_rt
!= NULL
) {
3092 unsigned int scope
= ifscope
;
3095 * If no scope is specified and the route is stale (pointing
3096 * to a defunct interface) use the current primary interface;
3097 * this happens when switching between interfaces configured
3098 * with the same IP address. Otherwise pick up the scope
3099 * information from the route; the ULP may have looked up a
3100 * correct route and we just need to verify it here and mark
3101 * it with the ROF_SRCIF_SELECTED flag below.
3103 if (scope
== IFSCOPE_NONE
) {
3104 scope
= rt_ifp
->if_index
;
3105 if (scope
!= get_primary_ifscope(AF_INET
) &&
3106 ro
->ro_rt
->generation_id
!= route_generation
)
3107 scope
= get_primary_ifscope(AF_INET
);
3110 ifa
= (struct ifaddr
*)ifa_foraddr_scoped(src
.s_addr
, scope
);
3112 if (ifa
== NULL
&& ip
->ip_p
!= IPPROTO_UDP
&&
3113 ip
->ip_p
!= IPPROTO_TCP
&& ipforwarding
) {
3115 * If forwarding is enabled, and if the packet isn't
3116 * TCP or UDP, check if the source address belongs
3117 * to one of our own interfaces; if so, demote the
3118 * interface scope and do a route lookup right below.
3120 ifa
= (struct ifaddr
*)ifa_foraddr(src
.s_addr
);
3124 ifscope
= IFSCOPE_NONE
;
3128 if (ip_select_srcif_debug
&& ifa
!= NULL
) {
3129 if (ro
->ro_rt
!= NULL
) {
3130 printf("%s->%s ifscope %d->%d ifa_if %s "
3131 "ro_if %s\n", s_src
, s_dst
, ifscope
,
3132 scope
, if_name(ifa
->ifa_ifp
),
3135 printf("%s->%s ifscope %d->%d ifa_if %s\n",
3136 s_src
, s_dst
, ifscope
, scope
,
3137 if_name(ifa
->ifa_ifp
));
3143 * Slow path; search for an interface having the corresponding source
3144 * IP address if the scope was not specified by the caller, and:
3146 * 1) There currently isn't any route, or,
3147 * 2) The interface used by the route does not own that source
3148 * IP address; in this case, the route will get blown away
3149 * and we'll do a more specific scoped search using the newly
3152 if (ifa
== NULL
&& ifscope
== IFSCOPE_NONE
) {
3153 ifa
= (struct ifaddr
*)ifa_foraddr(src
.s_addr
);
3156 * If we have the IP address, but not the route, we don't
3157 * really know whether or not it belongs to the correct
3158 * interface (it could be shared across multiple interfaces.)
3159 * The only way to find out is to do a route lookup.
3161 if (ifa
!= NULL
&& ro
->ro_rt
== NULL
) {
3163 struct sockaddr_in sin
;
3164 struct ifaddr
*oifa
= NULL
;
3166 bzero(&sin
, sizeof (sin
));
3167 sin
.sin_family
= AF_INET
;
3168 sin
.sin_len
= sizeof (sin
);
3171 lck_mtx_lock(rnh_lock
);
3172 if ((rt
= rt_lookup(TRUE
, (struct sockaddr
*)&sin
, NULL
,
3173 rt_tables
[AF_INET
], IFSCOPE_NONE
)) != NULL
) {
3176 * If the route uses a different interface,
3177 * use that one instead. The IP address of
3178 * the ifaddr that we pick up here is not
3181 if (ifa
->ifa_ifp
!= rt
->rt_ifp
) {
3191 lck_mtx_unlock(rnh_lock
);
3194 struct ifaddr
*iifa
;
3197 * See if the interface pointed to by the
3198 * route is configured with the source IP
3199 * address of the packet.
3201 iifa
= (struct ifaddr
*)ifa_foraddr_scoped(
3202 src
.s_addr
, ifa
->ifa_ifp
->if_index
);
3206 * Found it; drop the original one
3207 * as well as the route interface
3208 * address, and use this instead.
3213 } else if (!ipforwarding
||
3214 (rt
->rt_flags
& RTF_GATEWAY
)) {
3216 * This interface doesn't have that
3217 * source IP address; drop the route
3218 * interface address and just use the
3219 * original one, and let the caller
3220 * do a scoped route lookup.
3226 * Forwarding is enabled and the source
3227 * address belongs to one of our own
3228 * interfaces which isn't the outgoing
3229 * interface, and we have a route, and
3230 * the destination is on a network that
3231 * is directly attached (onlink); drop
3232 * the original one and use the route
3233 * interface address instead.
3238 } else if (ifa
!= NULL
&& ro
->ro_rt
!= NULL
&&
3239 !(ro
->ro_rt
->rt_flags
& RTF_GATEWAY
) &&
3240 ifa
->ifa_ifp
!= ro
->ro_rt
->rt_ifp
&& ipforwarding
) {
3242 * Forwarding is enabled and the source address belongs
3243 * to one of our own interfaces which isn't the same
3244 * as the interface used by the known route; drop the
3245 * original one and use the route interface address.
3248 ifa
= ro
->ro_rt
->rt_ifa
;
3252 if (ip_select_srcif_debug
&& ifa
!= NULL
) {
3253 printf("%s->%s ifscope %d ifa_if %s\n",
3254 s_src
, s_dst
, ifscope
, if_name(ifa
->ifa_ifp
));
3258 if (ro
->ro_rt
!= NULL
)
3259 RT_LOCK_ASSERT_HELD(ro
->ro_rt
);
3261 * If there is a non-loopback route with the wrong interface, or if
3262 * there is no interface configured with such an address, blow it
3263 * away. Except for local/loopback, we look for one with a matching
3264 * interface scope/index.
3266 if (ro
->ro_rt
!= NULL
&&
3267 (ifa
== NULL
|| (ifa
->ifa_ifp
!= rt_ifp
&& rt_ifp
!= lo_ifp
) ||
3268 !(ro
->ro_rt
->rt_flags
& RTF_UP
))) {
3269 if (ip_select_srcif_debug
) {
3271 printf("%s->%s ifscope %d ro_if %s != "
3272 "ifa_if %s (cached route cleared)\n",
3273 s_src
, s_dst
, ifscope
, if_name(rt_ifp
),
3274 if_name(ifa
->ifa_ifp
));
3276 printf("%s->%s ifscope %d ro_if %s "
3277 "(no ifa_if found)\n",
3278 s_src
, s_dst
, ifscope
, if_name(rt_ifp
));
3282 RT_UNLOCK(ro
->ro_rt
);
3285 ro
->ro_flags
&= ~ROF_SRCIF_SELECTED
;
3288 * If the destination is IPv4 LLA and the route's interface
3289 * doesn't match the source interface, then the source IP
3290 * address is wrong; it most likely belongs to the primary
3291 * interface associated with the IPv4 LL subnet. Drop the
3292 * packet rather than letting it go out and return an error
3293 * to the ULP. This actually applies not only to IPv4 LL
3294 * but other shared subnets; for now we explicitly test only
3295 * for the former case and save the latter for future.
3297 if (IN_LINKLOCAL(ntohl(dst
.s_addr
)) &&
3298 !IN_LINKLOCAL(ntohl(src
.s_addr
)) && ifa
!= NULL
) {
3304 if (ip_select_srcif_debug
&& ifa
== NULL
) {
3305 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
3306 s_src
, s_dst
, ifscope
);
3310 * If there is a route, mark it accordingly. If there isn't one,
3311 * we'll get here again during the next transmit (possibly with a
3312 * route) and the flag will get set at that point. For IPv4 LLA
3313 * destination, mark it only if the route has been fully resolved;
3314 * otherwise we want to come back here again when the route points
3315 * to the interface over which the ARP reply arrives on.
3317 if (ro
->ro_rt
!= NULL
&& (!IN_LINKLOCAL(ntohl(dst
.s_addr
)) ||
3318 (ro
->ro_rt
->rt_gateway
->sa_family
== AF_LINK
&&
3319 SDL(ro
->ro_rt
->rt_gateway
)->sdl_alen
!= 0))) {
3320 ro
->ro_flags
|= ROF_SRCIF_SELECTED
;
3321 ro
->ro_rt
->generation_id
= route_generation
;
3324 if (ro
->ro_rt
!= NULL
)
3325 RT_UNLOCK(ro
->ro_rt
);