2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <kern/locks.h>
81 #include <sys/sysctl.h>
82 #include <sys/mcache.h>
84 #include <machine/endian.h>
85 #include <pexpert/pexpert.h>
88 #include <net/if_dl.h>
89 #include <net/if_types.h>
90 #include <net/route.h>
91 #include <net/ntstat.h>
92 #include <net/net_osdep.h>
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #include <netinet/in_pcb.h>
98 #include <netinet/in_var.h>
99 #include <netinet/ip_var.h>
101 #include <netinet/kpi_ipfilter_var.h>
104 #include <security/mac_framework.h>
107 #include <net/dlil.h>
108 #include <sys/kdebug.h>
109 #include <libkern/OSAtomic.h>
111 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
112 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
113 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
114 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
116 #define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
119 #include <netinet6/ipsec.h>
120 #include <netkey/key.h>
122 #include <netkey/key_debug.h>
124 #define KEYDEBUG(lev,arg)
128 #include <netinet/ip_fw.h>
129 #include <netinet/ip_divert.h>
130 #include <mach/sdt.h>
133 #include <netinet/ip_dummynet.h>
137 #include <net/pfvar.h>
140 #if IPFIREWALL_FORWARD_DEBUG
141 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
142 (ntohl(a.s_addr)>>16)&0xFF,\
143 (ntohl(a.s_addr)>>8)&0xFF,\
144 (ntohl(a.s_addr))&0xFF);
149 static struct mbuf
*ip_insertoptions(struct mbuf
*, struct mbuf
*, int *);
150 static void ip_mloopback(struct ifnet
*, struct mbuf
*,
151 struct sockaddr_in
*, int);
152 static int ip_pcbopts(int, struct mbuf
**, struct mbuf
*);
153 static void imo_trace(struct ip_moptions
*, int);
155 static void ip_out_cksum_stats(int, u_int32_t
);
156 static struct ifaddr
*in_selectsrcif(struct ip
*, struct route
*, unsigned int);
158 int ip_optcopy(struct ip
*, struct ip
*);
159 void in_delayed_cksum_offset(struct mbuf
*, int );
160 void in_cksum_offset(struct mbuf
* , size_t );
162 extern struct protosw inetsw
[];
164 extern struct ip_linklocal_stat ip_linklocal_stat
;
165 extern lck_mtx_t
*ip_mutex
;
167 /* temporary: for testing */
169 extern int ipsec_bypass
;
172 static int ip_maxchainsent
= 0;
173 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, maxchainsent
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
174 &ip_maxchainsent
, 0, "use dlil_output_list");
176 static int forge_ce
= 0;
177 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, forge_ce
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
178 &forge_ce
, 0, "Forge ECN CE");
181 static int ip_select_srcif_debug
= 0;
182 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, select_srcif_debug
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
183 &ip_select_srcif_debug
, 0, "log source interface selection debug info");
185 #define IMO_TRACE_HIST_SIZE 32 /* size of trace history */
188 __private_extern__
unsigned int imo_trace_hist_size
= IMO_TRACE_HIST_SIZE
;
190 struct ip_moptions_dbg
{
191 struct ip_moptions imo
; /* ip_moptions */
192 u_int16_t imo_refhold_cnt
; /* # of IMO_ADDREF */
193 u_int16_t imo_refrele_cnt
; /* # of IMO_REMREF */
195 * Alloc and free callers.
200 * Circular lists of IMO_ADDREF and IMO_REMREF callers.
202 ctrace_t imo_refhold
[IMO_TRACE_HIST_SIZE
];
203 ctrace_t imo_refrele
[IMO_TRACE_HIST_SIZE
];
207 static unsigned int imo_debug
= 1; /* debugging (enabled) */
209 static unsigned int imo_debug
; /* debugging (disabled) */
211 static unsigned int imo_size
; /* size of zone element */
212 static struct zone
*imo_zone
; /* zone for ip_moptions */
214 #define IMO_ZONE_MAX 64 /* maximum elements in zone */
215 #define IMO_ZONE_NAME "ip_moptions" /* zone name */
218 * IP output. The packet in mbuf chain m contains a skeletal IP
219 * header (with len, off, ttl, proto, tos, src, dst).
220 * The mbuf chain containing the packet will be freed.
221 * The mbuf opt, if present, will not be freed.
229 struct ip_moptions
*imo
,
230 struct ip_out_args
*ipoa
)
233 error
= ip_output_list(m0
, 0, opt
, ro
, flags
, imo
, ipoa
);
246 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
247 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
248 * key_spdacquire:??? [IPSEC]
249 * ipsec4_output:??? [IPSEC]
250 * ip_dn_io_ptr:??? [dummynet]
251 * dlil_output:??? [DLIL]
252 * dlil_output_list:??? [DLIL]
254 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
255 * only used as the error return from this function where one of
256 * these functions fails to return a policy.
265 struct ip_moptions
*imo
,
266 struct ip_out_args
*ipoa
)
269 struct ifnet
*ifp
= NULL
;
270 struct mbuf
*m
= m0
, *prevnxt
= NULL
, **mppn
= &prevnxt
;
271 int hlen
= sizeof (struct ip
);
272 int len
= 0, error
= 0;
273 struct sockaddr_in
*dst
= NULL
;
274 struct in_ifaddr
*ia
= NULL
, *src_ia
= NULL
;
275 int isbroadcast
, sw_csum
;
276 struct in_addr pkt_dst
;
277 struct ipf_pktopts
*ippo
= NULL
, ipf_pktopts
;
279 struct ipsec_output_state ipsec_state
;
280 struct route
*ipsec_saved_route
= NULL
;
281 struct socket
*so
= NULL
;
282 struct secpolicy
*sp
= NULL
;
284 #if IPFIREWALL_FORWARD
285 int fwd_rewrite_src
= 0;
289 struct sockaddr_in
*next_hop_from_ipfwd_tag
= NULL
;
291 #if IPFIREWALL || DUMMYNET
292 struct ip_fw_args args
;
296 ipfilter_t inject_filter_ref
= 0;
298 struct route saved_route
;
299 struct ip_out_args saved_ipoa
;
300 struct sockaddr_in dst_buf
;
301 #endif /* DUMMYNET */
302 struct mbuf
* packetlist
;
303 int pktcnt
= 0, tso
= 0;
304 u_int32_t bytecnt
= 0;
305 unsigned int ifscope
= IFSCOPE_NONE
;
306 unsigned int nocell
= 0;
307 boolean_t select_srcif
, srcbound
;
308 struct flowadv
*adv
= NULL
;
310 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT
| DBG_FUNC_START
, 0,0,0,0,0);
313 bzero(&ipsec_state
, sizeof(ipsec_state
));
317 #if IPFIREWALL || DUMMYNET
318 bzero(&args
, sizeof(struct ip_fw_args
));
320 if (SLIST_EMPTY(&m0
->m_pkthdr
.tags
))
323 /* Grab info from mtags prepended to the chain */
325 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
326 KERNEL_TAG_TYPE_DUMMYNET
, NULL
)) != NULL
) {
327 struct dn_pkt_tag
*dn_tag
;
329 dn_tag
= (struct dn_pkt_tag
*)(tag
+1);
330 args
.fwa_ipfw_rule
= dn_tag
->dn_ipfw_rule
;
331 args
.fwa_pf_rule
= dn_tag
->dn_pf_rule
;
333 saved_route
= dn_tag
->dn_ro
;
337 bcopy(&dn_tag
->dn_dst
, &dst_buf
, sizeof(dst_buf
));
339 ifp
= dn_tag
->dn_ifp
;
340 flags
= dn_tag
->dn_flags
;
341 if ((dn_tag
->dn_flags
& IP_OUTARGS
)) {
342 saved_ipoa
= dn_tag
->dn_ipoa
;
346 m_tag_delete(m0
, tag
);
348 #endif /* DUMMYNET */
351 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
352 KERNEL_TAG_TYPE_DIVERT
, NULL
)) != NULL
) {
353 struct divert_tag
*div_tag
;
355 div_tag
= (struct divert_tag
*)(tag
+1);
356 args
.fwa_divert_rule
= div_tag
->cookie
;
358 m_tag_delete(m0
, tag
);
360 #endif /* IPDIVERT */
363 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
364 KERNEL_TAG_TYPE_IPFORWARD
, NULL
)) != NULL
) {
365 struct ip_fwd_tag
*ipfwd_tag
;
367 ipfwd_tag
= (struct ip_fwd_tag
*)(tag
+1);
368 next_hop_from_ipfwd_tag
= ipfwd_tag
->next_hop
;
370 m_tag_delete(m0
, tag
);
372 #endif /* IPFIREWALL */
375 #endif /* IPFIREWALL || DUMMYNET */
380 if ( !m
|| (m
->m_flags
& M_PKTHDR
) != 0)
381 panic("ip_output no HDR");
383 panic("ip_output no route, proto = %d",
384 mtod(m
, struct ip
*)->ip_p
);
387 bzero(&ipf_pktopts
, sizeof(struct ipf_pktopts
));
390 if (ip_doscopedroute
&& (flags
& IP_OUTARGS
)) {
392 * In the forwarding case, only the ifscope value is used,
393 * as source interface selection doesn't take place.
395 if ((select_srcif
= (!(flags
& IP_FORWARDING
) &&
396 (ipoa
->ipoa_flags
& IPOAF_SELECT_SRCIF
)))) {
397 ipf_pktopts
.ippo_flags
|= IPPOF_SELECT_SRCIF
;
400 if ((ipoa
->ipoa_flags
& IPOAF_BOUND_IF
) &&
401 ipoa
->ipoa_boundif
!= IFSCOPE_NONE
) {
402 ifscope
= ipoa
->ipoa_boundif
;
403 ipf_pktopts
.ippo_flags
|=
404 (IPPOF_BOUND_IF
| (ifscope
<< IPPOF_SHIFT_IFSCOPE
));
407 if ((srcbound
= (ipoa
->ipoa_flags
& IPOAF_BOUND_SRCADDR
)))
408 ipf_pktopts
.ippo_flags
|= IPPOF_BOUND_SRCADDR
;
410 select_srcif
= FALSE
;
412 ifscope
= IFSCOPE_NONE
;
415 if ((flags
& IP_OUTARGS
) && (ipoa
->ipoa_flags
& IPOAF_NO_CELLULAR
)) {
417 ipf_pktopts
.ippo_flags
|= IPPOF_NO_IFT_CELLULAR
;
420 if (flags
& IP_OUTARGS
) {
421 adv
= &ipoa
->ipoa_flowadv
;
422 adv
->code
= FADV_SUCCESS
;
426 if (args
.fwa_ipfw_rule
!= NULL
|| args
.fwa_pf_rule
!= NULL
) {
427 /* dummynet already saw us */
428 ip
= mtod(m
, struct ip
*);
429 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
430 pkt_dst
= ip
->ip_dst
;
431 if (ro
->ro_rt
!= NULL
) {
432 RT_LOCK_SPIN(ro
->ro_rt
);
433 ia
= (struct in_ifaddr
*)ro
->ro_rt
->rt_ifa
;
435 /* Become a regular mutex */
436 RT_CONVERT_LOCK(ro
->ro_rt
);
437 IFA_ADDREF(&ia
->ia_ifa
);
439 RT_UNLOCK(ro
->ro_rt
);
442 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
443 so
= ipsec_getsocket(m
);
444 (void)ipsec_setsocket(m
, NULL
);
448 if (args
.fwa_ipfw_rule
!= NULL
)
450 #endif /* #if IPFIREWALL */
451 if (args
.fwa_pf_rule
!= NULL
)
454 #endif /* DUMMYNET */
457 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
458 so
= ipsec_getsocket(m
);
459 (void)ipsec_setsocket(m
, NULL
);
464 * No need to proccess packet twice if we've
467 if (!SLIST_EMPTY(&m
->m_pkthdr
.tags
))
468 inject_filter_ref
= ipf_get_inject_filter(m
);
470 inject_filter_ref
= 0;
473 m
= ip_insertoptions(m
, opt
, &len
);
475 /* Update the chain */
477 if (m0
== packetlist
)
482 ip
= mtod(m
, struct ip
*);
487 * When dealing with a packet chain, we need to reset "next_hop" because
488 * "dst" may have been changed to the gateway address below for the previous
489 * packet of the chain. This could cause the route to be inavertandly changed
490 * to the route to the gateway address (instead of the route to the destination).
492 args
.fwa_next_hop
= next_hop_from_ipfwd_tag
;
493 pkt_dst
= args
.fwa_next_hop
? args
.fwa_next_hop
->sin_addr
: ip
->ip_dst
;
495 pkt_dst
= ip
->ip_dst
;
499 * We must not send if the packet is destined to network zero.
500 * RFC1122 3.2.1.3 (a) and (b).
502 if (IN_ZERONET(ntohl(pkt_dst
.s_addr
))) {
503 error
= EHOSTUNREACH
;
510 if ((flags
& (IP_FORWARDING
|IP_RAWOUTPUT
)) == 0) {
511 ip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, hlen
>> 2);
514 ip
->ip_id
= ip_randomid();
516 ip
->ip_id
= htons(ip_id
++);
518 OSAddAtomic(1, &ipstat
.ips_localout
);
520 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
524 /* For debugging, we let the stack forge congestion */
526 ((ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_ECT1
||
527 (ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_ECT0
)) {
528 ip
->ip_tos
= (ip
->ip_tos
& ~IPTOS_ECN_MASK
) | IPTOS_ECN_CE
;
533 KERNEL_DEBUG(DBG_LAYER_BEG
, ip
->ip_dst
.s_addr
,
534 ip
->ip_src
.s_addr
, ip
->ip_p
, ip
->ip_off
, ip
->ip_len
);
536 dst
= (struct sockaddr_in
*)(void *)&ro
->ro_dst
;
539 * If there is a cached route,
540 * check that it is to the same destination
541 * and is still up. If not, free it and try again.
542 * The address family should also be checked in case of sharing the
546 if (ro
->ro_rt
!= NULL
) {
547 if (ro
->ro_rt
->generation_id
!= route_generation
&&
548 ((flags
& (IP_ROUTETOIF
| IP_FORWARDING
)) == 0) &&
549 (ip
->ip_src
.s_addr
!= INADDR_ANY
)) {
550 src_ia
= ifa_foraddr(ip
->ip_src
.s_addr
);
551 if (src_ia
== NULL
) {
552 error
= EADDRNOTAVAIL
;
555 IFA_REMREF(&src_ia
->ia_ifa
);
558 * Test rt_flags without holding rt_lock for performance
559 * reasons; if the route is down it will hopefully be
560 * caught by the layer below (since it uses this route
561 * as a hint) or during the next transmit.
563 if ((ro
->ro_rt
->rt_flags
& RTF_UP
) == 0 ||
564 dst
->sin_family
!= AF_INET
||
565 dst
->sin_addr
.s_addr
!= pkt_dst
.s_addr
) {
570 * If we're doing source interface selection, we may not
571 * want to use this route; only synch up the generation
574 if (!select_srcif
&& ro
->ro_rt
!= NULL
&&
575 ro
->ro_rt
->generation_id
!= route_generation
)
576 ro
->ro_rt
->generation_id
= route_generation
;
578 if (ro
->ro_rt
== NULL
) {
579 bzero(dst
, sizeof(*dst
));
580 dst
->sin_family
= AF_INET
;
581 dst
->sin_len
= sizeof(*dst
);
582 dst
->sin_addr
= pkt_dst
;
585 * If routing to interface only,
586 * short circuit routing lookup.
588 if (flags
& IP_ROUTETOIF
) {
590 IFA_REMREF(&ia
->ia_ifa
);
591 if ((ia
= ifatoia(ifa_ifwithdstaddr(sintosa(dst
)))) == 0) {
592 if ((ia
= ifatoia(ifa_ifwithnet(sintosa(dst
)))) == 0) {
593 OSAddAtomic(1, &ipstat
.ips_noroute
);
600 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
601 } else if (IN_MULTICAST(ntohl(pkt_dst
.s_addr
)) &&
602 imo
!= NULL
&& (ifp
= imo
->imo_multicast_ifp
) != NULL
) {
604 * Bypass the normal routing lookup for multicast
605 * packets if the interface is specified.
609 IFA_REMREF(&ia
->ia_ifa
);
611 /* Macro takes reference on ia */
614 boolean_t cloneok
= FALSE
;
616 * Perform source interface selection; the source IP address
617 * must belong to one of the addresses of the interface used
618 * by the route. For performance reasons, do this only if
619 * there is no route, or if the routing table has changed,
620 * or if we haven't done source interface selection on this
621 * route (for this PCB instance) before.
623 if (select_srcif
&& ip
->ip_src
.s_addr
!= INADDR_ANY
&&
624 (ro
->ro_rt
== NULL
|| !(ro
->ro_rt
->rt_flags
& RTF_UP
) ||
625 ro
->ro_rt
->generation_id
!= route_generation
||
626 !(ro
->ro_flags
& ROF_SRCIF_SELECTED
))) {
629 /* Find the source interface */
630 ifa
= in_selectsrcif(ip
, ro
, ifscope
);
633 * If the source address belongs to a cellular interface
634 * and the caller forbids our using interfaces of such
635 * type, pretend that there is no source address.
637 if (nocell
&& ifa
!= NULL
&&
638 ifa
->ifa_ifp
->if_type
== IFT_CELLULAR
) {
640 error
= EADDRNOTAVAIL
;
645 * If the source address is spoofed (in the case of
646 * IP_RAWOUTPUT on an unbounded socket), or if this
647 * is destined for local/loopback, just let it go out
648 * using the interface of the route. Otherwise,
649 * there's no interface having such an address,
652 if (ifa
== NULL
&& (!(flags
& IP_RAWOUTPUT
) ||
653 srcbound
) && ifscope
!= lo_ifp
->if_index
) {
654 error
= EADDRNOTAVAIL
;
659 * If the caller didn't explicitly specify the scope,
660 * pick it up from the source interface. If the cached
661 * route was wrong and was blown away as part of source
662 * interface selection, don't mask out RTF_PRCLONING
663 * since that route may have been allocated by the ULP,
664 * unless the IP header was created by the caller or
665 * the destination is IPv4 LLA. The check for the
666 * latter is needed because IPv4 LLAs are never scoped
667 * in the current implementation, and we don't want to
668 * replace the resolved IPv4 LLA route with one whose
669 * gateway points to that of the default gateway on
670 * the primary interface of the system.
673 if (ifscope
== IFSCOPE_NONE
)
674 ifscope
= ifa
->ifa_ifp
->if_index
;
676 cloneok
= (!(flags
& IP_RAWOUTPUT
) &&
677 !(IN_LINKLOCAL(ntohl(ip
->ip_dst
.s_addr
))));
682 * If this is the case, we probably don't want to allocate
683 * a protocol-cloned route since we didn't get one from the
684 * ULP. This lets TCP do its thing, while not burdening
685 * forwarding or ICMP with the overhead of cloning a route.
686 * Of course, we still want to do any cloning requested by
687 * the link layer, as this is probably required in all cases
688 * for correct operation (as it is for ARP).
690 if (ro
->ro_rt
== NULL
) {
691 unsigned long ign
= RTF_PRCLONING
;
693 * We make an exception here: if the destination
694 * address is INADDR_BROADCAST, allocate a protocol-
695 * cloned host route so that we end up with a route
696 * marked with the RTF_BROADCAST flag. Otherwise,
697 * we would end up referring to the default route,
698 * instead of creating a cloned host route entry.
699 * That would introduce inconsistencies between ULPs
700 * that allocate a route and those that don't. The
701 * RTF_BROADCAST route is important since we'd want
702 * to send out undirected IP broadcast packets using
703 * link-level broadcast address. Another exception
704 * is for ULP-created routes that got blown away by
705 * source interface selection (see above).
707 * These exceptions will no longer be necessary when
708 * the RTF_PRCLONING scheme is no longer present.
710 if (cloneok
|| dst
->sin_addr
.s_addr
== INADDR_BROADCAST
)
711 ign
&= ~RTF_PRCLONING
;
714 * Loosen the route lookup criteria if the ifscope
715 * corresponds to the loopback interface; this is
716 * needed to support Application Layer Gateways
717 * listening on loopback, in conjunction with packet
718 * filter redirection rules. The final source IP
719 * address will be rewritten by the packet filter
720 * prior to the RFC1122 loopback check below.
722 if (ifscope
== lo_ifp
->if_index
)
723 rtalloc_ign(ro
, ign
);
725 rtalloc_scoped_ign(ro
, ign
, ifscope
);
728 * If the route points to a cellular interface and the
729 * caller forbids our using interfaces of such type,
730 * pretend that there is no route.
732 if (nocell
&& ro
->ro_rt
!= NULL
) {
733 RT_LOCK_SPIN(ro
->ro_rt
);
734 if (ro
->ro_rt
->rt_ifp
->if_type
==
736 RT_UNLOCK(ro
->ro_rt
);
740 RT_UNLOCK(ro
->ro_rt
);
745 if (ro
->ro_rt
== NULL
) {
746 OSAddAtomic(1, &ipstat
.ips_noroute
);
747 error
= EHOSTUNREACH
;
752 IFA_REMREF(&ia
->ia_ifa
);
753 RT_LOCK_SPIN(ro
->ro_rt
);
754 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
756 /* Become a regular mutex */
757 RT_CONVERT_LOCK(ro
->ro_rt
);
758 IFA_ADDREF(&ia
->ia_ifa
);
760 ifp
= ro
->ro_rt
->rt_ifp
;
762 if (ro
->ro_rt
->rt_flags
& RTF_GATEWAY
) {
763 dst
= (struct sockaddr_in
*)(void *)
764 ro
->ro_rt
->rt_gateway
;
766 if (ro
->ro_rt
->rt_flags
& RTF_HOST
) {
767 isbroadcast
= (ro
->ro_rt
->rt_flags
& RTF_BROADCAST
);
769 /* Become a regular mutex */
770 RT_CONVERT_LOCK(ro
->ro_rt
);
771 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
773 RT_UNLOCK(ro
->ro_rt
);
776 if (IN_MULTICAST(ntohl(pkt_dst
.s_addr
))) {
777 struct in_multi
*inm
;
779 u_int8_t ttl
= IP_DEFAULT_MULTICAST_TTL
;
780 u_int8_t loop
= IP_DEFAULT_MULTICAST_LOOP
;
782 m
->m_flags
|= M_MCAST
;
784 * IP destination address is multicast. Make sure "dst"
785 * still points to the address in "ro". (It may have been
786 * changed to point to a gateway address, above.)
788 dst
= (struct sockaddr_in
*)(void *)&ro
->ro_dst
;
790 * See if the caller provided any multicast options
794 vif
= imo
->imo_multicast_vif
;
795 ttl
= imo
->imo_multicast_ttl
;
796 loop
= imo
->imo_multicast_loop
;
797 if ((flags
& IP_RAWOUTPUT
) == 0)
799 if (imo
->imo_multicast_ifp
!= NULL
)
800 ifp
= imo
->imo_multicast_ifp
;
803 if (vif
!= -1 && ((flags
& IP_RAWOUTPUT
) == 0 ||
804 ip
->ip_src
.s_addr
== INADDR_ANY
))
805 ip
->ip_src
.s_addr
= ip_mcast_src(vif
);
806 #endif /* MROUTING */
807 } else if ((flags
& IP_RAWOUTPUT
) == 0) {
812 * Confirm that the outgoing interface supports multicast.
814 if (imo
== NULL
|| vif
== -1) {
815 if ((ifp
->if_flags
& IFF_MULTICAST
) == 0) {
816 OSAddAtomic(1, &ipstat
.ips_noroute
);
822 * If source address not specified yet, use address
823 * of outgoing interface.
825 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
826 struct in_ifaddr
*ia1
;
827 lck_rw_lock_shared(in_ifaddr_rwlock
);
828 TAILQ_FOREACH(ia1
, &in_ifaddrhead
, ia_link
) {
829 IFA_LOCK_SPIN(&ia1
->ia_ifa
);
830 if (ia1
->ia_ifp
== ifp
) {
831 ip
->ip_src
= IA_SIN(ia1
)->sin_addr
;
832 IFA_UNLOCK(&ia1
->ia_ifa
);
835 IFA_UNLOCK(&ia1
->ia_ifa
);
837 lck_rw_done(in_ifaddr_rwlock
);
838 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
844 in_multihead_lock_shared();
845 IN_LOOKUP_MULTI(&pkt_dst
, ifp
, inm
);
846 in_multihead_lock_done();
847 if (inm
!= NULL
&& (imo
== NULL
|| loop
)) {
849 * If we belong to the destination multicast group
850 * on the outgoing interface, and the caller did not
851 * forbid loopback, loop back a copy.
853 if (!TAILQ_EMPTY(&ipv4_filters
)) {
854 struct ipfilter
*filter
;
855 int seen
= (inject_filter_ref
== 0);
858 ipf_pktopts
.ippo_flags
|= IPPOF_MCAST_OPTS
;
859 ipf_pktopts
.ippo_mcast_ifnet
= ifp
;
860 ipf_pktopts
.ippo_mcast_ttl
= ttl
;
861 ipf_pktopts
.ippo_mcast_loop
= loop
;
866 /* 4135317 - always pass network byte order to filter */
868 #if BYTE_ORDER != BIG_ENDIAN
873 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
875 if ((struct ipfilter
*)inject_filter_ref
== filter
)
877 } else if (filter
->ipf_filter
.ipf_output
) {
879 result
= filter
->ipf_filter
.ipf_output(filter
->ipf_filter
.cookie
, (mbuf_t
*)&m
, ippo
);
880 if (result
== EJUSTRETURN
) {
893 /* set back to host byte order */
894 ip
= mtod(m
, struct ip
*);
896 #if BYTE_ORDER != BIG_ENDIAN
904 ip_mloopback(ifp
, m
, dst
, hlen
);
909 * If we are acting as a multicast router, perform
910 * multicast forwarding as if the packet had just
911 * arrived on the interface to which we are about
912 * to send. The multicast forwarding function
913 * recursively calls this function, using the
914 * IP_FORWARDING flag to prevent infinite recursion.
916 * Multicasts that are looped back by ip_mloopback(),
917 * above, will be forwarded by the ip_input() routine,
920 if (ip_mrouter
&& (flags
& IP_FORWARDING
) == 0) {
922 * Check if rsvp daemon is running. If not, don't
923 * set ip_moptions. This ensures that the packet
924 * is multicast and not just sent down one link
925 * as prescribed by rsvpd.
929 if (ip_mforward(ip
, ifp
, m
, imo
) != 0) {
933 OSAddAtomic(1, &ipstat
.ips_cantforward
);
938 #endif /* MROUTING */
942 * Multicasts with a time-to-live of zero may be looped-
943 * back, above, but must not be transmitted on a network.
944 * Also, multicasts addressed to the loopback interface
945 * are not sent -- the above call to ip_mloopback() will
946 * loop back a copy if this host actually belongs to the
947 * destination group on the loopback interface.
949 if (ip
->ip_ttl
== 0 || ifp
->if_flags
& IFF_LOOPBACK
) {
957 * If source address not specified yet, use address
958 * of outgoing interface.
960 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
961 IFA_LOCK_SPIN(&ia
->ia_ifa
);
962 ip
->ip_src
= IA_SIN(ia
)->sin_addr
;
963 IFA_UNLOCK(&ia
->ia_ifa
);
964 #if IPFIREWALL_FORWARD
965 /* Keep note that we did this - if the firewall changes
966 * the next-hop, our interface may change, changing the
967 * default source IP. It's a shame so much effort happens
971 #endif /* IPFIREWALL_FORWARD */
975 * Look for broadcast address and
976 * and verify user is allowed to send
980 if ((ifp
->if_flags
& IFF_BROADCAST
) == 0) {
981 error
= EADDRNOTAVAIL
;
984 if ((flags
& IP_ALLOWBROADCAST
) == 0) {
988 /* don't allow broadcast messages to be fragmented */
989 if ((u_short
)ip
->ip_len
> ifp
->if_mtu
) {
993 m
->m_flags
|= M_BCAST
;
995 m
->m_flags
&= ~M_BCAST
;
1000 /* Invoke outbound packet filter */
1001 if (PF_IS_ENABLED
) {
1004 m0
= m
; /* Save for later */
1007 args
.fwa_next_hop
= dst
;
1011 args
.fwa_oflags
= flags
;
1012 if (flags
& IP_OUTARGS
)
1013 args
.fwa_ipoa
= ipoa
;
1014 rc
= pf_af_hook(ifp
, mppn
, &m
, AF_INET
, FALSE
, &args
);
1015 #else /* DUMMYNET */
1016 rc
= pf_af_hook(ifp
, mppn
, &m
, AF_INET
, FALSE
, NULL
);
1017 #endif /* DUMMYNET */
1018 if (rc
!= 0 || m
== NULL
) {
1019 /* Move to the next packet */
1022 /* Skip ahead if first packet in list got dropped */
1023 if (packetlist
== m0
)
1028 /* Next packet in the chain */
1030 } else if (packetlist
!= NULL
) {
1031 /* No more packet; send down the chain */
1034 /* Nothing left; we're done */
1038 ip
= mtod(m
, struct ip
*);
1039 pkt_dst
= ip
->ip_dst
;
1040 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1044 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
1046 if (IN_LINKLOCAL(ntohl(ip
->ip_src
.s_addr
)) || IN_LINKLOCAL(ntohl(ip
->ip_dst
.s_addr
))) {
1047 ip_linklocal_stat
.iplls_out_total
++;
1048 if (ip
->ip_ttl
!= MAXTTL
) {
1049 ip_linklocal_stat
.iplls_out_badttl
++;
1050 ip
->ip_ttl
= MAXTTL
;
1054 if (!didfilter
&& !TAILQ_EMPTY(&ipv4_filters
)) {
1055 struct ipfilter
*filter
;
1056 int seen
= (inject_filter_ref
== 0);
1057 ipf_pktopts
.ippo_flags
&= ~IPPOF_MCAST_OPTS
;
1059 /* Check that a TSO frame isn't passed to a filter.
1060 * This could happen if a filter is inserted while
1061 * TCP is sending the TSO packet.
1063 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) {
1070 /* 4135317 - always pass network byte order to filter */
1072 #if BYTE_ORDER != BIG_ENDIAN
1077 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
1079 if ((struct ipfilter
*)inject_filter_ref
== filter
)
1081 } else if (filter
->ipf_filter
.ipf_output
) {
1083 result
= filter
->ipf_filter
.ipf_output(filter
->ipf_filter
.cookie
, (mbuf_t
*)&m
, ippo
);
1084 if (result
== EJUSTRETURN
) {
1095 /* set back to host byte order */
1096 ip
= mtod(m
, struct ip
*);
1098 #if BYTE_ORDER != BIG_ENDIAN
1107 /* temporary for testing only: bypass ipsec alltogether */
1109 if (ipsec_bypass
!= 0 || (flags
& IP_NOIPSEC
) != 0)
1112 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_START
, 0,0,0,0,0);
1115 /* get SP for this packet */
1117 sp
= ipsec4_getpolicybyaddr(m
, IPSEC_DIR_OUTBOUND
, flags
, &error
);
1119 sp
= ipsec4_getpolicybysock(m
, IPSEC_DIR_OUTBOUND
, so
, &error
);
1122 IPSEC_STAT_INCREMENT(ipsecstat
.out_inval
);
1123 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 0,0,0,0,0);
1130 switch (sp
->policy
) {
1131 case IPSEC_POLICY_DISCARD
:
1132 case IPSEC_POLICY_GENERATE
:
1134 * This packet is just discarded.
1136 IPSEC_STAT_INCREMENT(ipsecstat
.out_polvio
);
1137 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 1,0,0,0,0);
1140 case IPSEC_POLICY_BYPASS
:
1141 case IPSEC_POLICY_NONE
:
1142 /* no need to do IPsec. */
1143 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 2,0,0,0,0);
1146 case IPSEC_POLICY_IPSEC
:
1147 if (sp
->req
== NULL
) {
1148 /* acquire a policy */
1149 error
= key_spdacquire(sp
);
1150 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 3,0,0,0,0);
1155 case IPSEC_POLICY_ENTRUST
:
1157 printf("ip_output: Invalid policy found. %d\n", sp
->policy
);
1161 if (flags
& IP_ROUTETOIF
) {
1162 bzero(&ipsec_state
.ro
, sizeof(ipsec_state
.ro
));
1164 route_copyout(&ipsec_state
.ro
, ro
, sizeof(ipsec_state
.ro
));
1165 ipsec_state
.dst
= (struct sockaddr
*)dst
;
1171 * delayed checksums are not currently compatible with IPsec
1173 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1174 in_delayed_cksum(m
);
1175 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1179 #if BYTE_ORDER != BIG_ENDIAN
1184 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
1185 struct ip
*, ip
, struct ifnet
*, ifp
,
1186 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
1188 error
= ipsec4_output(&ipsec_state
, sp
, flags
);
1190 m0
= m
= ipsec_state
.m
;
1192 if (flags
& IP_ROUTETOIF
) {
1194 * if we have tunnel mode SA, we may need to ignore
1197 if (ipsec_state
.tunneled
) {
1198 flags
&= ~IP_ROUTETOIF
;
1199 ipsec_saved_route
= ro
;
1200 ro
= &ipsec_state
.ro
;
1203 ipsec_saved_route
= ro
;
1204 ro
= &ipsec_state
.ro
;
1206 dst
= (struct sockaddr_in
*)(void *)ipsec_state
.dst
;
1208 /* mbuf is already reclaimed in ipsec4_output. */
1218 printf("ip4_output (ipsec): error code %d\n", error
);
1221 /* don't show these error codes to the user */
1225 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 4,0,0,0,0);
1230 /* be sure to update variables that are affected by ipsec4_output() */
1231 ip
= mtod(m
, struct ip
*);
1234 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1236 hlen
= ip
->ip_hl
<< 2;
1238 /* Check that there wasn't a route change and src is still valid */
1239 if (ro
->ro_rt
!= NULL
&& ro
->ro_rt
->generation_id
!= route_generation
) {
1240 if ((src_ia
= ifa_foraddr(ip
->ip_src
.s_addr
)) == NULL
&&
1241 ((flags
& (IP_ROUTETOIF
| IP_FORWARDING
)) == 0)) {
1242 error
= EADDRNOTAVAIL
;
1243 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1250 IFA_REMREF(&src_ia
->ia_ifa
);
1253 if (ro
->ro_rt
== NULL
) {
1254 if ((flags
& IP_ROUTETOIF
) == 0) {
1255 printf("ip_output: can't update route after "
1256 "IPsec processing\n");
1257 error
= EHOSTUNREACH
; /*XXX*/
1258 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1264 IFA_REMREF(&ia
->ia_ifa
);
1265 RT_LOCK_SPIN(ro
->ro_rt
);
1266 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
1268 /* Become a regular mutex */
1269 RT_CONVERT_LOCK(ro
->ro_rt
);
1270 IFA_ADDREF(&ia
->ia_ifa
);
1272 ifp
= ro
->ro_rt
->rt_ifp
;
1273 RT_UNLOCK(ro
->ro_rt
);
1276 /* make it flipped, again. */
1278 #if BYTE_ORDER != BIG_ENDIAN
1283 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
, 7,0xff,0xff,0xff,0xff);
1285 /* Pass to filters again */
1286 if (!TAILQ_EMPTY(&ipv4_filters
)) {
1287 struct ipfilter
*filter
;
1289 ipf_pktopts
.ippo_flags
&= ~IPPOF_MCAST_OPTS
;
1291 /* Check that a TSO frame isn't passed to a filter.
1292 * This could happen if a filter is inserted while
1293 * TCP is sending the TSO packet.
1295 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) {
1302 /* 4135317 - always pass network byte order to filter */
1304 #if BYTE_ORDER != BIG_ENDIAN
1309 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
1310 if (filter
->ipf_filter
.ipf_output
) {
1312 result
= filter
->ipf_filter
.ipf_output(filter
->ipf_filter
.cookie
, (mbuf_t
*)&m
, ippo
);
1313 if (result
== EJUSTRETURN
) {
1324 /* set back to host byte order */
1325 ip
= mtod(m
, struct ip
*);
1327 #if BYTE_ORDER != BIG_ENDIAN
1339 * Check with the firewall...
1340 * but not if we are already being fwd'd from a firewall.
1342 if (fw_enable
&& IPFW_LOADED
&& !args
.fwa_next_hop
) {
1343 struct sockaddr_in
*old
= dst
;
1346 args
.fwa_next_hop
= dst
;
1348 off
= ip_fw_chk_ptr(&args
);
1350 dst
= args
.fwa_next_hop
;
1353 * On return we must do the following:
1354 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1355 * 1<=off<= 0xffff -> DIVERT
1356 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1357 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1358 * dst != old -> IPFIREWALL_FORWARD
1359 * off==0, dst==old -> accept
1360 * If some of the above modules is not compiled in, then
1361 * we should't have to check the corresponding condition
1362 * (because the ipfw control socket should not accept
1363 * unsupported rules), but better play safe and drop
1364 * packets in case of doubt.
1367 if ( (off
& IP_FW_PORT_DENY_FLAG
) || m
== NULL
) {
1373 ip
= mtod(m
, struct ip
*);
1375 if (off
== 0 && dst
== old
) {/* common case */
1379 if (DUMMYNET_LOADED
&& (off
& IP_FW_PORT_DYNT_FLAG
) != 0) {
1381 * pass the pkt to dummynet. Need to include
1382 * pipe number, m, ifp, ro, dst because these are
1383 * not recomputed in the next pass.
1384 * All other parameters have been already used and
1385 * so they are not needed anymore.
1386 * XXX note: if the ifp or ro entry are deleted
1387 * while a pkt is in dummynet, we are in trouble!
1391 args
.fwa_oflags
= flags
;
1392 if (flags
& IP_OUTARGS
)
1393 args
.fwa_ipoa
= ipoa
;
1395 error
= ip_dn_io_ptr(m
, off
& 0xffff, DN_TO_IP_OUT
,
1396 &args
, DN_CLIENT_IPFW
);
1399 #endif /* DUMMYNET */
1401 if (off
!= 0 && (off
& IP_FW_PORT_DYNT_FLAG
) == 0) {
1402 struct mbuf
*clone
= NULL
;
1404 /* Clone packet if we're doing a 'tee' */
1405 if ((off
& IP_FW_PORT_TEE_FLAG
) != 0)
1406 clone
= m_dup(m
, M_DONTWAIT
);
1409 * delayed checksums are not currently compatible
1410 * with divert sockets.
1412 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1413 in_delayed_cksum(m
);
1414 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1417 /* Restore packet header fields to original values */
1419 #if BYTE_ORDER != BIG_ENDIAN
1424 /* Deliver packet to divert input routine */
1425 divert_packet(m
, 0, off
& 0xffff, args
.fwa_divert_rule
);
1427 /* If 'tee', continue with original packet */
1428 if (clone
!= NULL
) {
1430 ip
= mtod(m
, struct ip
*);
1437 #if IPFIREWALL_FORWARD
1438 /* Here we check dst to make sure it's directly reachable on the
1439 * interface we previously thought it was.
1440 * If it isn't (which may be likely in some situations) we have
1441 * to re-route it (ie, find a route for the next-hop and the
1442 * associated interface) and set them here. This is nested
1443 * forwarding which in most cases is undesirable, except where
1444 * such control is nigh impossible. So we do it here.
1447 if (off
== 0 && old
!= dst
) {
1448 struct in_ifaddr
*ia_fw
;
1450 /* It's changed... */
1451 /* There must be a better way to do this next line... */
1452 static struct route sro_fwd
, *ro_fwd
= &sro_fwd
;
1453 #if IPFIREWALL_FORWARD_DEBUG
1454 printf("IPFIREWALL_FORWARD: New dst ip: ");
1455 print_ip(dst
->sin_addr
);
1459 * We need to figure out if we have been forwarded
1460 * to a local socket. If so then we should somehow
1461 * "loop back" to ip_input, and get directed to the
1462 * PCB as if we had received this packet. This is
1463 * because it may be dificult to identify the packets
1464 * you want to forward until they are being output
1465 * and have selected an interface. (e.g. locally
1466 * initiated packets) If we used the loopback inteface,
1467 * we would not be able to control what happens
1468 * as the packet runs through ip_input() as
1469 * it is done through a ISR.
1471 lck_rw_lock_shared(in_ifaddr_rwlock
);
1472 TAILQ_FOREACH(ia_fw
, &in_ifaddrhead
, ia_link
) {
1474 * If the addr to forward to is one
1475 * of ours, we pretend to
1476 * be the destination for this packet.
1478 IFA_LOCK_SPIN(&ia_fw
->ia_ifa
);
1479 if (IA_SIN(ia_fw
)->sin_addr
.s_addr
==
1480 dst
->sin_addr
.s_addr
) {
1481 IFA_UNLOCK(&ia_fw
->ia_ifa
);
1484 IFA_UNLOCK(&ia_fw
->ia_ifa
);
1486 lck_rw_done(in_ifaddr_rwlock
);
1488 /* tell ip_input "dont filter" */
1489 struct m_tag
*fwd_tag
;
1490 struct ip_fwd_tag
*ipfwd_tag
;
1492 fwd_tag
= m_tag_create(KERNEL_MODULE_TAG_ID
,
1493 KERNEL_TAG_TYPE_IPFORWARD
,
1494 sizeof (*ipfwd_tag
), M_NOWAIT
, m
);
1495 if (fwd_tag
== NULL
) {
1500 ipfwd_tag
= (struct ip_fwd_tag
*)(fwd_tag
+1);
1501 ipfwd_tag
->next_hop
= args
.fwa_next_hop
;
1503 m_tag_prepend(m
, fwd_tag
);
1505 if (m
->m_pkthdr
.rcvif
== NULL
)
1506 m
->m_pkthdr
.rcvif
= lo_ifp
;
1507 if ((~IF_HWASSIST_CSUM_FLAGS(m
->m_pkthdr
.rcvif
->if_hwassist
) &
1508 m
->m_pkthdr
.csum_flags
) == 0) {
1509 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1510 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1511 m
->m_pkthdr
.csum_flags
|=
1512 CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
;
1513 m
->m_pkthdr
.csum_data
= 0xffff;
1515 m
->m_pkthdr
.csum_flags
|=
1516 CSUM_IP_CHECKED
| CSUM_IP_VALID
;
1518 else if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1519 in_delayed_cksum(m
);
1520 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1521 ip
->ip_sum
= in_cksum(m
, hlen
);
1524 #if BYTE_ORDER != BIG_ENDIAN
1529 /* we need to call dlil_output to run filters
1530 * and resync to avoid recursion loops.
1533 dlil_output(lo_ifp
, PF_INET
, m
, 0,
1534 (struct sockaddr
*)dst
, 0, adv
);
1537 printf("ip_output: no loopback ifp for forwarding!!!\n");
1541 /* Some of the logic for this was
1542 * nicked from above.
1544 * This rewrites the cached route in a local PCB.
1545 * Is this what we want to do?
1547 bcopy(dst
, &ro_fwd
->ro_dst
, sizeof(*dst
));
1549 ro_fwd
->ro_rt
= NULL
;
1550 rtalloc_ign(ro_fwd
, RTF_PRCLONING
);
1552 if (ro_fwd
->ro_rt
== NULL
) {
1553 OSAddAtomic(1, &ipstat
.ips_noroute
);
1554 error
= EHOSTUNREACH
;
1558 RT_LOCK_SPIN(ro_fwd
->ro_rt
);
1559 ia_fw
= ifatoia(ro_fwd
->ro_rt
->rt_ifa
);
1560 if (ia_fw
!= NULL
) {
1561 /* Become a regular mutex */
1562 RT_CONVERT_LOCK(ro_fwd
->ro_rt
);
1563 IFA_ADDREF(&ia_fw
->ia_ifa
);
1565 ifp
= ro_fwd
->ro_rt
->rt_ifp
;
1566 ro_fwd
->ro_rt
->rt_use
++;
1567 if (ro_fwd
->ro_rt
->rt_flags
& RTF_GATEWAY
)
1568 dst
= (struct sockaddr_in
*)(void *)ro_fwd
->ro_rt
->rt_gateway
;
1569 if (ro_fwd
->ro_rt
->rt_flags
& RTF_HOST
) {
1571 (ro_fwd
->ro_rt
->rt_flags
& RTF_BROADCAST
);
1573 /* Become a regular mutex */
1574 RT_CONVERT_LOCK(ro_fwd
->ro_rt
);
1575 isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
1577 RT_UNLOCK(ro_fwd
->ro_rt
);
1579 ro
->ro_rt
= ro_fwd
->ro_rt
;
1580 dst
= (struct sockaddr_in
*)(void *)&ro_fwd
->ro_dst
;
1583 * If we added a default src ip earlier,
1584 * which would have been gotten from the-then
1585 * interface, do it again, from the new one.
1587 if (ia_fw
!= NULL
) {
1588 if (fwd_rewrite_src
) {
1589 IFA_LOCK_SPIN(&ia_fw
->ia_ifa
);
1590 ip
->ip_src
= IA_SIN(ia_fw
)->sin_addr
;
1591 IFA_UNLOCK(&ia_fw
->ia_ifa
);
1593 IFA_REMREF(&ia_fw
->ia_ifa
);
1597 #endif /* IPFIREWALL_FORWARD */
1599 * if we get here, none of the above matches, and
1600 * we have to drop the pkt
1603 error
= EACCES
; /* not sure this is the right error msg */
1608 #endif /* IPFIREWALL */
1610 /* Do not allow loopback address to wind up on a wire */
1611 if ((ifp
->if_flags
& IFF_LOOPBACK
) == 0 &&
1612 ((ntohl(ip
->ip_src
.s_addr
) >> IN_CLASSA_NSHIFT
) == IN_LOOPBACKNET
||
1613 (ntohl(ip
->ip_dst
.s_addr
) >> IN_CLASSA_NSHIFT
) == IN_LOOPBACKNET
)) {
1614 OSAddAtomic(1, &ipstat
.ips_badaddr
);
1617 * Do not simply drop the packet just like a firewall -- we want the
1618 * the application to feel the pain.
1619 * Return ENETUNREACH like ip6_output does in some similar cases.
1620 * This can startle the otherwise clueless process that specifies
1621 * loopback as the source address.
1623 error
= ENETUNREACH
;
1627 m
->m_pkthdr
.csum_flags
|= CSUM_IP
;
1628 tso
= (ifp
->if_hwassist
& IFNET_TSO_IPV4
) && (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
);
1630 sw_csum
= m
->m_pkthdr
.csum_flags
1631 & ~IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
);
1633 if ((ifp
->if_hwassist
& CSUM_TCP_SUM16
) != 0) {
1635 * Special case code for GMACE
1636 * frames that can be checksumed by GMACE SUM16 HW:
1637 * frame >64, no fragments, no UDP
1639 if (apple_hwcksum_tx
&& (m
->m_pkthdr
.csum_flags
& CSUM_TCP
)
1640 && (ip
->ip_len
> 50) && (ip
->ip_len
<= ifp
->if_mtu
)) {
1641 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1642 u_short offset
= (IP_VHL_HL(ip
->ip_vhl
) << 2) +14 ; /* IP+Enet header length */
1643 u_short csumprev
= m
->m_pkthdr
.csum_data
& 0xFFFF;
1644 m
->m_pkthdr
.csum_flags
= CSUM_DATA_VALID
| CSUM_TCP_SUM16
; /* for GMAC */
1645 m
->m_pkthdr
.csum_data
= (csumprev
+ offset
) << 16 ;
1646 m
->m_pkthdr
.csum_data
+= offset
;
1647 sw_csum
= CSUM_DELAY_IP
; /* do IP hdr chksum in software */
1649 /* let the software handle any UDP or TCP checksums */
1650 sw_csum
|= (CSUM_DELAY_DATA
& m
->m_pkthdr
.csum_flags
);
1652 } else if (apple_hwcksum_tx
== 0) {
1653 sw_csum
|= (CSUM_DELAY_DATA
| CSUM_DELAY_IP
) &
1654 m
->m_pkthdr
.csum_flags
;
1657 if (sw_csum
& CSUM_DELAY_DATA
) {
1658 in_delayed_cksum(m
);
1659 sw_csum
&= ~CSUM_DELAY_DATA
;
1660 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1663 if (apple_hwcksum_tx
!= 0) {
1664 m
->m_pkthdr
.csum_flags
&=
1665 IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
);
1667 m
->m_pkthdr
.csum_flags
= 0;
1671 * If small enough for interface, or the interface will take
1672 * care of the fragmentation for us, can just send directly.
1674 if ((u_short
)ip
->ip_len
<= ifp
->if_mtu
|| tso
||
1675 ifp
->if_hwassist
& CSUM_FRAGMENT
) {
1677 m
->m_pkthdr
.csum_flags
|= CSUM_TSO_IPV4
;
1680 #if BYTE_ORDER != BIG_ENDIAN
1686 if (sw_csum
& CSUM_DELAY_IP
) {
1687 ip
->ip_sum
= in_cksum(m
, hlen
);
1691 /* Record statistics for this interface address. */
1692 if (!(flags
& IP_FORWARDING
) && ia
!= NULL
) {
1693 ia
->ia_ifa
.if_opackets
++;
1694 ia
->ia_ifa
.if_obytes
+= m
->m_pkthdr
.len
;
1699 /* clean ipsec history once it goes out of the node */
1700 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0)
1703 if (packetchain
== 0) {
1704 if (ro
->ro_rt
&& nstat_collect
)
1705 nstat_route_tx(ro
->ro_rt
, 1, m
->m_pkthdr
.len
, 0);
1706 error
= dlil_output(ifp
, PF_INET
, m
, ro
->ro_rt
,
1707 (struct sockaddr
*)dst
, 0, adv
);
1710 else { /* packet chaining allows us to reuse the route for all packets */
1711 bytecnt
+= m
->m_pkthdr
.len
;
1712 mppn
= &m
->m_nextpkt
;
1718 if (pktcnt
> ip_maxchainsent
)
1719 ip_maxchainsent
= pktcnt
;
1720 if (ro
->ro_rt
&& nstat_collect
)
1721 nstat_route_tx(ro
->ro_rt
, pktcnt
, bytecnt
, 0);
1723 error
= dlil_output(ifp
, PF_INET
, packetlist
,
1724 ro
->ro_rt
, (struct sockaddr
*)dst
, 0, adv
);
1736 * Too large for interface; fragment if possible.
1737 * Must be able to put at least 8 bytes per fragment.
1740 if (ip
->ip_off
& IP_DF
|| (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) ||
1744 * This case can happen if the user changed the MTU
1745 * of an interface after enabling IP on it. Because
1746 * most netifs don't keep track of routes pointing to
1747 * them, there is no way for one to update all its
1748 * routes when the MTU is changed.
1751 RT_LOCK_SPIN(ro
->ro_rt
);
1752 if ((ro
->ro_rt
->rt_flags
& (RTF_UP
| RTF_HOST
))
1753 && !(ro
->ro_rt
->rt_rmx
.rmx_locks
& RTV_MTU
)
1754 && (ro
->ro_rt
->rt_rmx
.rmx_mtu
> ifp
->if_mtu
)) {
1755 ro
->ro_rt
->rt_rmx
.rmx_mtu
= ifp
->if_mtu
;
1757 RT_UNLOCK(ro
->ro_rt
);
1762 OSAddAtomic(1, &ipstat
.ips_cantfrag
);
1766 error
= ip_fragment(m
, ifp
, ifp
->if_mtu
, sw_csum
);
1772 KERNEL_DEBUG(DBG_LAYER_END
, ip
->ip_dst
.s_addr
,
1773 ip
->ip_src
.s_addr
, ip
->ip_p
, ip
->ip_off
, ip
->ip_len
);
1775 for (m
= m0
; m
; m
= m0
) {
1779 /* clean ipsec history once it goes out of the node */
1780 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0)
1785 /* Record statistics for this interface address. */
1787 ia
->ia_ifa
.if_opackets
++;
1788 ia
->ia_ifa
.if_obytes
+= m
->m_pkthdr
.len
;
1791 if ((packetchain
!= 0) && (pktcnt
> 0))
1792 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist
);
1793 if (ro
->ro_rt
&& nstat_collect
)
1794 nstat_route_tx(ro
->ro_rt
, 1, m
->m_pkthdr
.len
, 0);
1795 error
= dlil_output(ifp
, PF_INET
, m
, ro
->ro_rt
,
1796 (struct sockaddr
*)dst
, 0, adv
);
1802 OSAddAtomic(1, &ipstat
.ips_fragmented
);
1806 IFA_REMREF(&ia
->ia_ifa
);
1810 if (ipsec_bypass
== 0 && (flags
& IP_NOIPSEC
) == 0) {
1811 if (ipsec_state
.ro
.ro_rt
)
1812 rtfree(ipsec_state
.ro
.ro_rt
);
1814 KEYDEBUG(KEYDEBUG_IPSEC_STAMP
,
1815 printf("DP ip_output call free SP:%x\n", sp
));
1816 key_freesp(sp
, KEY_SADB_UNLOCKED
);
1821 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT
| DBG_FUNC_END
, error
,0,0,0,0);
1829 ip_fragment(struct mbuf
*m
, struct ifnet
*ifp
, unsigned long mtu
, int sw_csum
)
1831 struct ip
*ip
, *mhip
;
1832 int len
, hlen
, mhlen
, firstlen
, off
, error
= 0;
1833 struct mbuf
**mnext
= &m
->m_nextpkt
, *m0
;
1836 ip
= mtod(m
, struct ip
*);
1838 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1840 hlen
= ip
->ip_hl
<< 2;
1843 firstlen
= len
= (mtu
- hlen
) &~ 7;
1850 * if the interface will not calculate checksums on
1851 * fragmented packets, then do it here.
1853 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
&&
1854 (ifp
->if_hwassist
& CSUM_IP_FRAGS
) == 0) {
1855 in_delayed_cksum(m
);
1856 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_DATA
;
1860 * Loop through length of segment after first fragment,
1861 * make new header and copy data of each part and link onto chain.
1864 mhlen
= sizeof (struct ip
);
1865 for (off
= hlen
+ len
; off
< (u_short
)ip
->ip_len
; off
+= len
) {
1866 MGETHDR(m
, M_DONTWAIT
, MT_HEADER
); /* MAC-OK */
1869 OSAddAtomic(1, &ipstat
.ips_odropped
);
1872 m
->m_flags
|= (m0
->m_flags
& M_MCAST
) | M_FRAG
;
1873 m
->m_data
+= max_linkhdr
;
1874 mhip
= mtod(m
, struct ip
*);
1876 if (hlen
> sizeof (struct ip
)) {
1877 mhlen
= ip_optcopy(ip
, mhip
) + sizeof (struct ip
);
1878 mhip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, mhlen
>> 2);
1881 mhip
->ip_off
= ((off
- hlen
) >> 3) + (ip
->ip_off
& ~IP_MF
);
1882 if (ip
->ip_off
& IP_MF
)
1883 mhip
->ip_off
|= IP_MF
;
1884 if (off
+ len
>= (u_short
)ip
->ip_len
)
1885 len
= (u_short
)ip
->ip_len
- off
;
1887 mhip
->ip_off
|= IP_MF
;
1888 mhip
->ip_len
= htons((u_short
)(len
+ mhlen
));
1889 m
->m_next
= m_copy(m0
, off
, len
);
1890 if (m
->m_next
== 0) {
1892 error
= ENOBUFS
; /* ??? */
1893 OSAddAtomic(1, &ipstat
.ips_odropped
);
1896 m
->m_pkthdr
.len
= mhlen
+ len
;
1897 m
->m_pkthdr
.rcvif
= 0;
1898 m
->m_pkthdr
.csum_flags
= m0
->m_pkthdr
.csum_flags
;
1899 m
->m_pkthdr
.socket_id
= m0
->m_pkthdr
.socket_id
;
1901 M_COPY_PFTAG(m
, m0
);
1902 m_set_service_class(m
, m0
->m_pkthdr
.svc
);
1905 mac_netinet_fragment(m0
, m
);
1908 #if BYTE_ORDER != BIG_ENDIAN
1909 HTONS(mhip
->ip_off
);
1913 if (sw_csum
& CSUM_DELAY_IP
) {
1914 mhip
->ip_sum
= in_cksum(m
, mhlen
);
1917 mnext
= &m
->m_nextpkt
;
1920 OSAddAtomic(nfrags
, &ipstat
.ips_ofragments
);
1922 /* set first/last markers for fragment chain */
1923 m
->m_flags
|= M_LASTFRAG
;
1924 m0
->m_flags
|= M_FIRSTFRAG
| M_FRAG
;
1925 m0
->m_pkthdr
.csum_data
= nfrags
;
1928 * Update first fragment by trimming what's been copied out
1929 * and updating header, then send each fragment (in order).
1932 m_adj(m
, hlen
+ firstlen
- (u_short
)ip
->ip_len
);
1933 m
->m_pkthdr
.len
= hlen
+ firstlen
;
1934 ip
->ip_len
= htons((u_short
)m
->m_pkthdr
.len
);
1935 ip
->ip_off
|= IP_MF
;
1937 #if BYTE_ORDER != BIG_ENDIAN
1942 if (sw_csum
& CSUM_DELAY_IP
) {
1943 ip
->ip_sum
= in_cksum(m
, hlen
);
1953 ip_out_cksum_stats(int proto
, u_int32_t len
)
1957 tcp_out_cksum_stats(len
);
1960 udp_out_cksum_stats(len
);
1963 /* keep only TCP or UDP stats for now */
1969 in_delayed_cksum_offset(struct mbuf
*m0
, int ip_offset
)
1972 unsigned char buf
[sizeof(struct ip
)];
1973 u_short csum
, offset
, ip_len
;
1975 /* Save copy of first mbuf pointer and the ip_offset before modifying */
1976 struct mbuf
*m
= m0
;
1977 int ip_offset_copy
= ip_offset
;
1979 while (ip_offset
>= m
->m_len
) {
1980 ip_offset
-= m
->m_len
;
1983 printf("in_delayed_cksum_withoffset failed - "
1984 "ip_offset wasn't in the packet\n");
1990 * In case the IP header is not contiguous, or not 32-bit
1991 * aligned, copy it to a local buffer.
1993 if ((ip_offset
+ sizeof(struct ip
) > m
->m_len
) ||
1994 !IP_HDR_ALIGNED_P(mtod(m
, caddr_t
) + ip_offset
)) {
1996 printf("delayed m_pullup, m->len: %d off: %d\n",
1997 m
->m_len
, ip_offset
);
1999 m_copydata(m
, ip_offset
, sizeof(struct ip
), (caddr_t
) buf
);
2001 ip
= (struct ip
*)(void *)buf
;
2003 ip
= (struct ip
*)(void *)(m
->m_data
+ ip_offset
);
2008 m
->m_len
-= ip_offset
;
2009 m
->m_data
+= ip_offset
;
2012 offset
= IP_VHL_HL(ip
->ip_vhl
) << 2 ;
2015 * We could be in the context of an IP or interface filter; in the
2016 * former case, ip_len would be in host (correct) order while for
2017 * the latter it would be in network order. Because of this, we
2018 * attempt to interpret the length field by comparing it against
2019 * the actual packet length. If the comparison fails, byte swap
2020 * the length and check again. If it still fails, then the packet
2021 * is bogus and we give up.
2023 ip_len
= ip
->ip_len
;
2024 if (ip_len
!= (m0
->m_pkthdr
.len
- ip_offset_copy
)) {
2025 ip_len
= SWAP16(ip_len
);
2026 if (ip_len
!= (m0
->m_pkthdr
.len
- ip_offset_copy
)) {
2027 printf("in_delayed_cksum_offset: ip_len %d (%d) "
2028 "doesn't match actual length %d\n", ip
->ip_len
,
2029 ip_len
, (m0
->m_pkthdr
.len
- ip_offset_copy
));
2034 csum
= in_cksum_skip(m
, ip_len
, offset
);
2037 ip_out_cksum_stats(ip
->ip_p
, ip_len
- offset
);
2039 if (m0
->m_pkthdr
.csum_flags
& CSUM_UDP
&& csum
== 0)
2041 offset
+= m0
->m_pkthdr
.csum_data
& 0xFFFF; /* checksum offset */
2045 if (M_LEADINGSPACE(m
) < ip_offset
)
2046 panic("in_delayed_cksum_offset - chain modified!\n");
2047 m
->m_len
+= ip_offset
;
2048 m
->m_data
-= ip_offset
;
2051 if (offset
> ip_len
) /* bogus offset */
2054 /* Insert the checksum in the existing chain */
2055 if (offset
+ ip_offset
+ sizeof(u_short
) > m
->m_len
) {
2059 printf("delayed m_copyback, m->len: %d off: %d p: %d\n",
2060 m
->m_len
, offset
+ ip_offset
, ip
->ip_p
);
2062 *(u_short
*)(void *)tmp
= csum
;
2063 m_copyback(m
, offset
+ ip_offset
, 2, tmp
);
2064 } else if (IP_HDR_ALIGNED_P(mtod(m
, caddr_t
) + ip_offset
)) {
2065 *(u_short
*)(void *)(m
->m_data
+ offset
+ ip_offset
) = csum
;
2067 bcopy(&csum
, (m
->m_data
+ offset
+ ip_offset
), sizeof (csum
));
2072 in_delayed_cksum(struct mbuf
*m
)
2074 in_delayed_cksum_offset(m
, 0);
2078 in_cksum_offset(struct mbuf
* m
, size_t ip_offset
)
2080 struct ip
* ip
= NULL
;
2082 unsigned char buf
[sizeof(struct ip
)];
2085 /* Save copy of first mbuf pointer and the ip_offset before modifying */
2086 struct mbuf
* m0
= m
;
2087 size_t ip_offset_copy
= ip_offset
;
2089 while (ip_offset
>= m
->m_len
) {
2090 ip_offset
-= m
->m_len
;
2093 printf("in_cksum_offset failed - ip_offset wasn't "
2100 * In case the IP header is not contiguous, or not 32-bit
2101 * aligned, copy it to a local buffer.
2103 if ((ip_offset
+ sizeof(struct ip
) > m
->m_len
) ||
2104 !IP_HDR_ALIGNED_P(mtod(m
, caddr_t
) + ip_offset
)) {
2106 printf("in_cksum_offset - delayed m_pullup, m->len: %d "
2107 "off: %lu\n", m
->m_len
, ip_offset
);
2109 m_copydata(m
, ip_offset
, sizeof(struct ip
), (caddr_t
) buf
);
2111 ip
= (struct ip
*)(void *)buf
;
2113 m_copyback(m
, ip_offset
+ offsetof(struct ip
, ip_sum
), 2,
2114 (caddr_t
)&ip
->ip_sum
);
2116 ip
= (struct ip
*)(void *)(m
->m_data
+ ip_offset
);
2122 m
->m_len
-= ip_offset
;
2123 m
->m_data
+= ip_offset
;
2127 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
2129 hlen
= ip
->ip_hl
<< 2;
2132 * We could be in the context of an IP or interface filter; in the
2133 * former case, ip_len would be in host order while for the latter
2134 * it would be in network (correct) order. Because of this, we
2135 * attempt to interpret the length field by comparing it against
2136 * the actual packet length. If the comparison fails, byte swap
2137 * the length and check again. If it still fails, then the packet
2138 * is bogus and we give up.
2140 if (ntohs(ip
->ip_len
) != (m0
->m_pkthdr
.len
- ip_offset_copy
)) {
2141 ip
->ip_len
= SWAP16(ip
->ip_len
);
2143 if (ntohs(ip
->ip_len
) != (m0
->m_pkthdr
.len
- ip_offset_copy
)) {
2144 ip
->ip_len
= SWAP16(ip
->ip_len
);
2145 printf("in_cksum_offset: ip_len %d (%d) "
2146 "doesn't match actual length %lu\n",
2147 ip
->ip_len
, SWAP16(ip
->ip_len
),
2148 (m0
->m_pkthdr
.len
- ip_offset_copy
));
2154 ip
->ip_sum
= in_cksum(m
, hlen
);
2156 ip
->ip_len
= SWAP16(ip
->ip_len
);
2160 if (M_LEADINGSPACE(m
) < ip_offset
)
2161 panic("in_cksum_offset - chain modified!\n");
2162 m
->m_len
+= ip_offset
;
2163 m
->m_data
-= ip_offset
;
2167 * Insert the checksum in the existing chain if IP header not
2168 * contiguous, or if it's not 32-bit aligned, i.e. all the cases
2169 * where it was copied to a local buffer.
2171 if (ip_offset
+ sizeof(struct ip
) > m
->m_len
) {
2175 printf("in_cksum_offset m_copyback, m->len: %u off: %lu "
2176 "p: %d\n", m
->m_len
,
2177 ip_offset
+ offsetof(struct ip
, ip_sum
), ip
->ip_p
);
2179 *(u_short
*)(void *)tmp
= ip
->ip_sum
;
2180 m_copyback(m
, ip_offset
+ offsetof(struct ip
, ip_sum
), 2, tmp
);
2181 } else if (!IP_HDR_ALIGNED_P(mtod(m
, caddr_t
) + ip_offset
)) {
2183 (m
->m_data
+ ip_offset
+ offsetof(struct ip
, ip_sum
)),
2189 * Insert IP options into preformed packet.
2190 * Adjust IP destination as required for IP source routing,
2191 * as indicated by a non-zero in_addr at the start of the options.
2193 * XXX This routine assumes that the packet has no options in place.
2195 static struct mbuf
*
2196 ip_insertoptions(m
, opt
, phlen
)
2197 register struct mbuf
*m
;
2201 register struct ipoption
*p
= mtod(opt
, struct ipoption
*);
2203 register struct ip
*ip
= mtod(m
, struct ip
*);
2206 optlen
= opt
->m_len
- sizeof(p
->ipopt_dst
);
2207 if (optlen
+ (u_short
)ip
->ip_len
> IP_MAXPACKET
)
2208 return (m
); /* XXX should fail */
2209 if (p
->ipopt_dst
.s_addr
)
2210 ip
->ip_dst
= p
->ipopt_dst
;
2211 if (m
->m_flags
& M_EXT
|| m
->m_data
- optlen
< m
->m_pktdat
) {
2212 MGETHDR(n
, M_DONTWAIT
, MT_HEADER
); /* MAC-OK */
2215 n
->m_pkthdr
.rcvif
= 0;
2217 mac_mbuf_label_copy(m
, n
);
2219 n
->m_pkthdr
.len
= m
->m_pkthdr
.len
+ optlen
;
2220 m
->m_len
-= sizeof(struct ip
);
2221 m
->m_data
+= sizeof(struct ip
);
2224 m
->m_len
= optlen
+ sizeof(struct ip
);
2225 m
->m_data
+= max_linkhdr
;
2226 (void)memcpy(mtod(m
, void *), ip
, sizeof(struct ip
));
2228 m
->m_data
-= optlen
;
2230 m
->m_pkthdr
.len
+= optlen
;
2231 ovbcopy((caddr_t
)ip
, mtod(m
, caddr_t
), sizeof(struct ip
));
2233 ip
= mtod(m
, struct ip
*);
2234 bcopy(p
->ipopt_list
, ip
+ 1, optlen
);
2235 *phlen
= sizeof(struct ip
) + optlen
;
2236 ip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, *phlen
>> 2);
2237 ip
->ip_len
+= optlen
;
2242 * Copy options from ip to jp,
2243 * omitting those not copied during fragmentation.
2249 register u_char
*cp
, *dp
;
2250 int opt
, optlen
, cnt
;
2252 cp
= (u_char
*)(ip
+ 1);
2253 dp
= (u_char
*)(jp
+ 1);
2254 cnt
= (IP_VHL_HL(ip
->ip_vhl
) << 2) - sizeof (struct ip
);
2255 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
2257 if (opt
== IPOPT_EOL
)
2259 if (opt
== IPOPT_NOP
) {
2260 /* Preserve for IP mcast tunnel's LSRR alignment. */
2266 if (cnt
< IPOPT_OLEN
+ sizeof(*cp
))
2267 panic("malformed IPv4 option passed to ip_optcopy");
2269 optlen
= cp
[IPOPT_OLEN
];
2271 if (optlen
< IPOPT_OLEN
+ sizeof(*cp
) || optlen
> cnt
)
2272 panic("malformed IPv4 option passed to ip_optcopy");
2274 /* bogus lengths should have been caught by ip_dooptions */
2277 if (IPOPT_COPIED(opt
)) {
2278 bcopy(cp
, dp
, optlen
);
2282 for (optlen
= dp
- (u_char
*)(jp
+1); optlen
& 0x3; optlen
++)
2288 * IP socket option processing.
2291 ip_ctloutput(so
, sopt
)
2293 struct sockopt
*sopt
;
2295 struct inpcb
*inp
= sotoinpcb(so
);
2299 if (sopt
->sopt_level
!= IPPROTO_IP
) {
2303 switch (sopt
->sopt_dir
) {
2305 switch (sopt
->sopt_name
) {
2312 if (sopt
->sopt_valsize
> MLEN
) {
2316 MGET(m
, sopt
->sopt_p
!= kernproc
? M_WAIT
: M_DONTWAIT
,
2322 m
->m_len
= sopt
->sopt_valsize
;
2323 error
= sooptcopyin(sopt
, mtod(m
, char *), m
->m_len
,
2328 return (ip_pcbopts(sopt
->sopt_name
, &inp
->inp_options
,
2335 case IP_RECVRETOPTS
:
2336 case IP_RECVDSTADDR
:
2339 case IP_RECVPKTINFO
:
2340 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
2345 switch (sopt
->sopt_name
) {
2347 inp
->inp_ip_tos
= optval
;
2351 inp
->inp_ip_ttl
= optval
;
2353 #define OPTSET(bit) \
2355 inp->inp_flags |= bit; \
2357 inp->inp_flags &= ~bit;
2360 OPTSET(INP_RECVOPTS
);
2363 case IP_RECVRETOPTS
:
2364 OPTSET(INP_RECVRETOPTS
);
2367 case IP_RECVDSTADDR
:
2368 OPTSET(INP_RECVDSTADDR
);
2376 OPTSET(INP_RECVTTL
);
2379 case IP_RECVPKTINFO
:
2380 OPTSET(INP_PKTINFO
);
2386 #if CONFIG_FORCE_OUT_IFP
2388 * Apple private interface, similar to IP_BOUND_IF, except
2389 * that the parameter is a NULL-terminated string containing
2390 * the name of the network interface; an emptry string means
2391 * unbind. Applications are encouraged to use IP_BOUND_IF
2392 * instead, as that is the current "official" API.
2394 case IP_FORCE_OUT_IFP
: {
2395 char ifname
[IFNAMSIZ
];
2396 unsigned int ifscope
;
2398 /* This option is settable only for IPv4 */
2399 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2404 /* Verify interface name parameter is sane */
2405 if (sopt
->sopt_valsize
> sizeof(ifname
)) {
2410 /* Copy the interface name */
2411 if (sopt
->sopt_valsize
!= 0) {
2412 error
= sooptcopyin(sopt
, ifname
,
2413 sizeof (ifname
), sopt
->sopt_valsize
);
2418 if (sopt
->sopt_valsize
== 0 || ifname
[0] == '\0') {
2419 /* Unbind this socket from any interface */
2420 ifscope
= IFSCOPE_NONE
;
2424 /* Verify name is NULL terminated */
2425 if (ifname
[sopt
->sopt_valsize
- 1] != '\0') {
2430 /* Bail out if given bogus interface name */
2431 if (ifnet_find_by_name(ifname
, &ifp
) != 0) {
2436 /* Bind this socket to this interface */
2437 ifscope
= ifp
->if_index
;
2440 * Won't actually free; since we don't release
2441 * this later, we should do it now.
2445 error
= inp_bindif(inp
, ifscope
);
2450 * Multicast socket options are processed by the in_mcast
2453 case IP_MULTICAST_IF
:
2454 case IP_MULTICAST_IFINDEX
:
2455 case IP_MULTICAST_VIF
:
2456 case IP_MULTICAST_TTL
:
2457 case IP_MULTICAST_LOOP
:
2458 case IP_ADD_MEMBERSHIP
:
2459 case IP_DROP_MEMBERSHIP
:
2460 case IP_ADD_SOURCE_MEMBERSHIP
:
2461 case IP_DROP_SOURCE_MEMBERSHIP
:
2462 case IP_BLOCK_SOURCE
:
2463 case IP_UNBLOCK_SOURCE
:
2465 case MCAST_JOIN_GROUP
:
2466 case MCAST_LEAVE_GROUP
:
2467 case MCAST_JOIN_SOURCE_GROUP
:
2468 case MCAST_LEAVE_SOURCE_GROUP
:
2469 case MCAST_BLOCK_SOURCE
:
2470 case MCAST_UNBLOCK_SOURCE
:
2471 error
= inp_setmoptions(inp
, sopt
);
2475 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
2481 case IP_PORTRANGE_DEFAULT
:
2482 inp
->inp_flags
&= ~(INP_LOWPORT
);
2483 inp
->inp_flags
&= ~(INP_HIGHPORT
);
2486 case IP_PORTRANGE_HIGH
:
2487 inp
->inp_flags
&= ~(INP_LOWPORT
);
2488 inp
->inp_flags
|= INP_HIGHPORT
;
2491 case IP_PORTRANGE_LOW
:
2492 inp
->inp_flags
&= ~(INP_HIGHPORT
);
2493 inp
->inp_flags
|= INP_LOWPORT
;
2503 case IP_IPSEC_POLICY
:
2511 if ((error
= soopt_getm(sopt
, &m
)) != 0) /* XXX */
2513 if ((error
= soopt_mcopyin(sopt
, m
)) != 0) /* XXX */
2515 priv
= (proc_suser(sopt
->sopt_p
) == 0);
2517 req
= mtod(m
, caddr_t
);
2520 optname
= sopt
->sopt_name
;
2521 error
= ipsec4_set_policy(inp
, optname
, req
, len
, priv
);
2528 case IP_TRAFFIC_MGT_BACKGROUND
:
2530 unsigned background
= 0;
2531 error
= sooptcopyin(sopt
, &background
, sizeof(background
), sizeof(background
));
2536 socket_set_traffic_mgt_flags_locked(so
,
2537 TRAFFIC_MGT_SO_BACKGROUND
);
2539 socket_clear_traffic_mgt_flags_locked(so
,
2540 TRAFFIC_MGT_SO_BACKGROUND
);
2545 #endif /* TRAFFIC_MGT */
2548 * On a multihomed system, scoped routing can be used to
2549 * restrict the source interface used for sending packets.
2550 * The socket option IP_BOUND_IF binds a particular AF_INET
2551 * socket to an interface such that data sent on the socket
2552 * is restricted to that interface. This is unlike the
2553 * SO_DONTROUTE option where the routing table is bypassed;
2554 * therefore it allows for a greater flexibility and control
2555 * over the system behavior, and does not place any restriction
2556 * on the destination address type (e.g. unicast, multicast,
2557 * or broadcast if applicable) or whether or not the host is
2558 * directly reachable. Note that in the multicast transmit
2559 * case, IP_MULTICAST_{IF,IFINDEX} takes precedence over
2560 * IP_BOUND_IF, since the former practically bypasses the
2561 * routing table; in this case, IP_BOUND_IF sets the default
2562 * interface used for sending multicast packets in the absence
2563 * of an explicit multicast transmit interface.
2566 /* This option is settable only for IPv4 */
2567 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2572 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
2578 error
= inp_bindif(inp
, optval
);
2581 case IP_NO_IFT_CELLULAR
:
2582 /* This option is settable only for IPv4 */
2583 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2588 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
2594 error
= inp_nocellular(inp
, optval
);
2598 /* This option is not settable */
2603 error
= ENOPROTOOPT
;
2609 switch (sopt
->sopt_name
) {
2612 if (inp
->inp_options
)
2613 error
= sooptcopyout(sopt
,
2614 mtod(inp
->inp_options
,
2616 inp
->inp_options
->m_len
);
2618 sopt
->sopt_valsize
= 0;
2624 case IP_RECVRETOPTS
:
2625 case IP_RECVDSTADDR
:
2629 case IP_RECVPKTINFO
:
2630 switch (sopt
->sopt_name
) {
2633 optval
= inp
->inp_ip_tos
;
2637 optval
= inp
->inp_ip_ttl
;
2640 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2643 optval
= OPTBIT(INP_RECVOPTS
);
2646 case IP_RECVRETOPTS
:
2647 optval
= OPTBIT(INP_RECVRETOPTS
);
2650 case IP_RECVDSTADDR
:
2651 optval
= OPTBIT(INP_RECVDSTADDR
);
2655 optval
= OPTBIT(INP_RECVIF
);
2659 optval
= OPTBIT(INP_RECVTTL
);
2663 if (inp
->inp_flags
& INP_HIGHPORT
)
2664 optval
= IP_PORTRANGE_HIGH
;
2665 else if (inp
->inp_flags
& INP_LOWPORT
)
2666 optval
= IP_PORTRANGE_LOW
;
2671 case IP_RECVPKTINFO
:
2672 optval
= OPTBIT(INP_PKTINFO
);
2675 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
2678 case IP_MULTICAST_IF
:
2679 case IP_MULTICAST_IFINDEX
:
2680 case IP_MULTICAST_VIF
:
2681 case IP_MULTICAST_TTL
:
2682 case IP_MULTICAST_LOOP
:
2684 error
= inp_getmoptions(inp
, sopt
);
2688 case IP_IPSEC_POLICY
:
2690 struct mbuf
*m
= NULL
;
2695 req
= mtod(m
, caddr_t
);
2698 error
= ipsec4_get_policy(sotoinpcb(so
), req
, len
, &m
);
2700 error
= soopt_mcopyout(sopt
, m
); /* XXX */
2708 case IP_TRAFFIC_MGT_BACKGROUND
:
2710 unsigned background
= (so
->so_traffic_mgt_flags
& TRAFFIC_MGT_SO_BACKGROUND
);
2711 return (sooptcopyout(sopt
, &background
, sizeof(background
)));
2714 #endif /* TRAFFIC_MGT */
2717 if (inp
->inp_flags
& INP_BOUND_IF
)
2718 optval
= inp
->inp_boundifp
->if_index
;
2719 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
2722 case IP_NO_IFT_CELLULAR
:
2723 optval
= (inp
->inp_flags
& INP_NO_IFT_CELLULAR
) ? 1 : 0;
2724 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
2728 optval
= (inp
->inp_last_outifp
!= NULL
) ?
2729 inp
->inp_last_outifp
->if_index
: 0;
2730 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
2734 error
= ENOPROTOOPT
;
2743 * Set up IP options in pcb for insertion in output packets.
2744 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2745 * with destination address if source routed.
2749 __unused
int optname
,
2750 struct mbuf
**pcbopt
,
2751 register struct mbuf
*m
)
2753 register int cnt
, optlen
;
2754 register u_char
*cp
;
2757 /* turn off any old options */
2759 (void)m_free(*pcbopt
);
2761 if (m
== (struct mbuf
*)0 || m
->m_len
== 0) {
2763 * Only turning off any previous options.
2771 if (m
->m_len
% sizeof(int32_t))
2775 * IP first-hop destination address will be stored before
2776 * actual options; move other options back
2777 * and clear it when none present.
2779 if (m
->m_data
+ m
->m_len
+ sizeof(struct in_addr
) >= &m
->m_dat
[MLEN
])
2782 m
->m_len
+= sizeof(struct in_addr
);
2783 cp
= mtod(m
, u_char
*) + sizeof(struct in_addr
);
2784 ovbcopy(mtod(m
, caddr_t
), (caddr_t
)cp
, (unsigned)cnt
);
2785 bzero(mtod(m
, caddr_t
), sizeof(struct in_addr
));
2787 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
2788 opt
= cp
[IPOPT_OPTVAL
];
2789 if (opt
== IPOPT_EOL
)
2791 if (opt
== IPOPT_NOP
)
2794 if (cnt
< IPOPT_OLEN
+ sizeof(*cp
))
2796 optlen
= cp
[IPOPT_OLEN
];
2797 if (optlen
< IPOPT_OLEN
+ sizeof(*cp
) || optlen
> cnt
)
2808 * user process specifies route as:
2810 * D must be our final destination (but we can't
2811 * check that since we may not have connected yet).
2812 * A is first hop destination, which doesn't appear in
2813 * actual IP option, but is stored before the options.
2815 if (optlen
< IPOPT_MINOFF
- 1 + sizeof(struct in_addr
))
2817 m
->m_len
-= sizeof(struct in_addr
);
2818 cnt
-= sizeof(struct in_addr
);
2819 optlen
-= sizeof(struct in_addr
);
2820 cp
[IPOPT_OLEN
] = optlen
;
2822 * Move first hop before start of options.
2824 bcopy((caddr_t
)&cp
[IPOPT_OFFSET
+1], mtod(m
, caddr_t
),
2825 sizeof(struct in_addr
));
2827 * Then copy rest of options back
2828 * to close up the deleted entry.
2830 ovbcopy((caddr_t
)(&cp
[IPOPT_OFFSET
+1] +
2831 sizeof(struct in_addr
)),
2832 (caddr_t
)&cp
[IPOPT_OFFSET
+1],
2833 (unsigned)cnt
+ sizeof(struct in_addr
));
2837 if (m
->m_len
> MAX_IPOPTLEN
+ sizeof(struct in_addr
))
2848 ip_moptions_init(void)
2850 PE_parse_boot_argn("ifa_debug", &imo_debug
, sizeof (imo_debug
));
2852 imo_size
= (imo_debug
== 0) ? sizeof (struct ip_moptions
) :
2853 sizeof (struct ip_moptions_dbg
);
2855 imo_zone
= zinit(imo_size
, IMO_ZONE_MAX
* imo_size
, 0,
2857 if (imo_zone
== NULL
) {
2858 panic("%s: failed allocating %s", __func__
, IMO_ZONE_NAME
);
2861 zone_change(imo_zone
, Z_EXPAND
, TRUE
);
2865 imo_addref(struct ip_moptions
*imo
, int locked
)
2870 IMO_LOCK_ASSERT_HELD(imo
);
2872 if (++imo
->imo_refcnt
== 0) {
2873 panic("%s: imo %p wraparound refcnt\n", __func__
, imo
);
2875 } else if (imo
->imo_trace
!= NULL
) {
2876 (*imo
->imo_trace
)(imo
, TRUE
);
2884 imo_remref(struct ip_moptions
*imo
)
2889 if (imo
->imo_refcnt
== 0) {
2890 panic("%s: imo %p negative refcnt", __func__
, imo
);
2892 } else if (imo
->imo_trace
!= NULL
) {
2893 (*imo
->imo_trace
)(imo
, FALSE
);
2897 if (imo
->imo_refcnt
> 0) {
2902 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
) {
2903 struct in_mfilter
*imf
;
2905 imf
= imo
->imo_mfilters
? &imo
->imo_mfilters
[i
] : NULL
;
2909 (void) in_leavegroup(imo
->imo_membership
[i
], imf
);
2914 INM_REMREF(imo
->imo_membership
[i
]);
2915 imo
->imo_membership
[i
] = NULL
;
2917 imo
->imo_num_memberships
= 0;
2918 if (imo
->imo_mfilters
!= NULL
) {
2919 FREE(imo
->imo_mfilters
, M_INMFILTER
);
2920 imo
->imo_mfilters
= NULL
;
2922 if (imo
->imo_membership
!= NULL
) {
2923 FREE(imo
->imo_membership
, M_IPMOPTS
);
2924 imo
->imo_membership
= NULL
;
2928 lck_mtx_destroy(&imo
->imo_lock
, ifa_mtx_grp
);
2930 if (!(imo
->imo_debug
& IFD_ALLOC
)) {
2931 panic("%s: imo %p cannot be freed", __func__
, imo
);
2934 zfree(imo_zone
, imo
);
2938 imo_trace(struct ip_moptions
*imo
, int refhold
)
2940 struct ip_moptions_dbg
*imo_dbg
= (struct ip_moptions_dbg
*)imo
;
2945 if (!(imo
->imo_debug
& IFD_DEBUG
)) {
2946 panic("%s: imo %p has no debug structure", __func__
, imo
);
2950 cnt
= &imo_dbg
->imo_refhold_cnt
;
2951 tr
= imo_dbg
->imo_refhold
;
2953 cnt
= &imo_dbg
->imo_refrele_cnt
;
2954 tr
= imo_dbg
->imo_refrele
;
2957 idx
= atomic_add_16_ov(cnt
, 1) % IMO_TRACE_HIST_SIZE
;
2958 ctrace_record(&tr
[idx
]);
2961 struct ip_moptions
*
2962 ip_allocmoptions(int how
)
2964 struct ip_moptions
*imo
;
2966 imo
= (how
== M_WAITOK
) ? zalloc(imo_zone
) : zalloc_noblock(imo_zone
);
2968 bzero(imo
, imo_size
);
2969 lck_mtx_init(&imo
->imo_lock
, ifa_mtx_grp
, ifa_mtx_attr
);
2970 imo
->imo_debug
|= IFD_ALLOC
;
2971 if (imo_debug
!= 0) {
2972 imo
->imo_debug
|= IFD_DEBUG
;
2973 imo
->imo_trace
= imo_trace
;
2982 * Routine called from ip_output() to loop back a copy of an IP multicast
2983 * packet to the input queue of a specified interface. Note that this
2984 * calls the output routine of the loopback "driver", but with an interface
2985 * pointer that might NOT be a loopback interface -- evil, but easier than
2986 * replicating that code here.
2989 ip_mloopback(ifp
, m
, dst
, hlen
)
2991 register struct mbuf
*m
;
2992 register struct sockaddr_in
*dst
;
2995 register struct ip
*ip
;
2997 int sw_csum
= (apple_hwcksum_tx
== 0);
2999 copym
= m_copy(m
, 0, M_COPYALL
);
3000 if (copym
!= NULL
&& (copym
->m_flags
& M_EXT
|| copym
->m_len
< hlen
))
3001 copym
= m_pullup(copym
, hlen
);
3007 * We don't bother to fragment if the IP length is greater
3008 * than the interface's MTU. Can this possibly matter?
3010 ip
= mtod(copym
, struct ip
*);
3012 #if BYTE_ORDER != BIG_ENDIAN
3018 ip
->ip_sum
= in_cksum(copym
, hlen
);
3021 * It's not clear whether there are any lingering
3022 * reentrancy problems in other areas which might
3023 * be exposed by using ip_input directly (in
3024 * particular, everything which modifies the packet
3025 * in-place). Yet another option is using the
3026 * protosw directly to deliver the looped back
3027 * packet. For the moment, we'll err on the side
3028 * of safety by using if_simloop().
3031 if (dst
->sin_family
!= AF_INET
) {
3032 printf("ip_mloopback: bad address family %d\n",
3034 dst
->sin_family
= AF_INET
;
3039 * Mark checksum as valid or calculate checksum for loopback.
3041 * This is done this way because we have to embed the ifp of
3042 * the interface we will send the original copy of the packet
3043 * out on in the mbuf. ip_input will check if_hwassist of the
3044 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
3045 * The UDP checksum has not been calculated yet.
3047 if (sw_csum
|| (copym
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
)) {
3048 if (!sw_csum
&& IF_HWASSIST_CSUM_FLAGS(ifp
->if_hwassist
)) {
3049 copym
->m_pkthdr
.csum_flags
|=
3050 CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
|
3051 CSUM_IP_CHECKED
| CSUM_IP_VALID
;
3052 copym
->m_pkthdr
.csum_data
= 0xffff;
3055 #if BYTE_ORDER != BIG_ENDIAN
3059 in_delayed_cksum(copym
);
3061 #if BYTE_ORDER != BIG_ENDIAN
3070 * We need to send all loopback traffic down to dlil in case
3071 * a filter has tapped-in.
3075 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3076 * in ip_input(); we need the loopback ifp/dl_tag passed as args
3077 * to make the loopback driver compliant with the data link
3081 copym
->m_pkthdr
.rcvif
= ifp
;
3082 dlil_output(lo_ifp
, PF_INET
, copym
, 0,
3083 (struct sockaddr
*) dst
, 0, NULL
);
3085 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
3091 * Given a source IP address (and route, if available), determine the best
3092 * interface to send the packet from. Checking for (and updating) the
3093 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
3094 * without any locks based on the assumption that ip_output() is single-
3095 * threaded per-pcb, i.e. for any given pcb there can only be one thread
3096 * performing output at the IP layer.
3098 * This routine is analogous to in6_selectroute() for IPv6.
3100 static struct ifaddr
*
3101 in_selectsrcif(struct ip
*ip
, struct route
*ro
, unsigned int ifscope
)
3103 struct ifaddr
*ifa
= NULL
;
3104 struct in_addr src
= ip
->ip_src
;
3105 struct in_addr dst
= ip
->ip_dst
;
3106 struct ifnet
*rt_ifp
;
3107 char s_src
[MAX_IPv4_STR_LEN
], s_dst
[MAX_IPv4_STR_LEN
];
3109 if (ip_select_srcif_debug
) {
3110 (void) inet_ntop(AF_INET
, &src
.s_addr
, s_src
, sizeof (s_src
));
3111 (void) inet_ntop(AF_INET
, &dst
.s_addr
, s_dst
, sizeof (s_dst
));
3114 if (ro
->ro_rt
!= NULL
)
3117 rt_ifp
= (ro
->ro_rt
!= NULL
) ? ro
->ro_rt
->rt_ifp
: NULL
;
3120 * Given the source IP address, find a suitable source interface
3121 * to use for transmission; if the caller has specified a scope,
3122 * optimize the search by looking at the addresses only for that
3123 * interface. This is still suboptimal, however, as we need to
3124 * traverse the per-interface list.
3126 if (ifscope
!= IFSCOPE_NONE
|| ro
->ro_rt
!= NULL
) {
3127 unsigned int scope
= ifscope
;
3130 * If no scope is specified and the route is stale (pointing
3131 * to a defunct interface) use the current primary interface;
3132 * this happens when switching between interfaces configured
3133 * with the same IP address. Otherwise pick up the scope
3134 * information from the route; the ULP may have looked up a
3135 * correct route and we just need to verify it here and mark
3136 * it with the ROF_SRCIF_SELECTED flag below.
3138 if (scope
== IFSCOPE_NONE
) {
3139 scope
= rt_ifp
->if_index
;
3140 if (scope
!= get_primary_ifscope(AF_INET
) &&
3141 ro
->ro_rt
->generation_id
!= route_generation
)
3142 scope
= get_primary_ifscope(AF_INET
);
3145 ifa
= (struct ifaddr
*)ifa_foraddr_scoped(src
.s_addr
, scope
);
3147 if (ifa
== NULL
&& ip
->ip_p
!= IPPROTO_UDP
&&
3148 ip
->ip_p
!= IPPROTO_TCP
&& ipforwarding
) {
3150 * If forwarding is enabled, and if the packet isn't
3151 * TCP or UDP, check if the source address belongs
3152 * to one of our own interfaces; if so, demote the
3153 * interface scope and do a route lookup right below.
3155 ifa
= (struct ifaddr
*)ifa_foraddr(src
.s_addr
);
3159 ifscope
= IFSCOPE_NONE
;
3163 if (ip_select_srcif_debug
&& ifa
!= NULL
) {
3164 if (ro
->ro_rt
!= NULL
) {
3165 printf("%s->%s ifscope %d->%d ifa_if %s "
3166 "ro_if %s\n", s_src
, s_dst
, ifscope
,
3167 scope
, if_name(ifa
->ifa_ifp
),
3170 printf("%s->%s ifscope %d->%d ifa_if %s\n",
3171 s_src
, s_dst
, ifscope
, scope
,
3172 if_name(ifa
->ifa_ifp
));
3178 * Slow path; search for an interface having the corresponding source
3179 * IP address if the scope was not specified by the caller, and:
3181 * 1) There currently isn't any route, or,
3182 * 2) The interface used by the route does not own that source
3183 * IP address; in this case, the route will get blown away
3184 * and we'll do a more specific scoped search using the newly
3187 if (ifa
== NULL
&& ifscope
== IFSCOPE_NONE
) {
3188 ifa
= (struct ifaddr
*)ifa_foraddr(src
.s_addr
);
3191 * If we have the IP address, but not the route, we don't
3192 * really know whether or not it belongs to the correct
3193 * interface (it could be shared across multiple interfaces.)
3194 * The only way to find out is to do a route lookup.
3196 if (ifa
!= NULL
&& ro
->ro_rt
== NULL
) {
3198 struct sockaddr_in sin
;
3199 struct ifaddr
*oifa
= NULL
;
3201 bzero(&sin
, sizeof (sin
));
3202 sin
.sin_family
= AF_INET
;
3203 sin
.sin_len
= sizeof (sin
);
3206 lck_mtx_lock(rnh_lock
);
3207 if ((rt
= rt_lookup(TRUE
, (struct sockaddr
*)&sin
, NULL
,
3208 rt_tables
[AF_INET
], IFSCOPE_NONE
)) != NULL
) {
3211 * If the route uses a different interface,
3212 * use that one instead. The IP address of
3213 * the ifaddr that we pick up here is not
3216 if (ifa
->ifa_ifp
!= rt
->rt_ifp
) {
3226 lck_mtx_unlock(rnh_lock
);
3229 struct ifaddr
*iifa
;
3232 * See if the interface pointed to by the
3233 * route is configured with the source IP
3234 * address of the packet.
3236 iifa
= (struct ifaddr
*)ifa_foraddr_scoped(
3237 src
.s_addr
, ifa
->ifa_ifp
->if_index
);
3241 * Found it; drop the original one
3242 * as well as the route interface
3243 * address, and use this instead.
3248 } else if (!ipforwarding
||
3249 (rt
->rt_flags
& RTF_GATEWAY
)) {
3251 * This interface doesn't have that
3252 * source IP address; drop the route
3253 * interface address and just use the
3254 * original one, and let the caller
3255 * do a scoped route lookup.
3261 * Forwarding is enabled and the source
3262 * address belongs to one of our own
3263 * interfaces which isn't the outgoing
3264 * interface, and we have a route, and
3265 * the destination is on a network that
3266 * is directly attached (onlink); drop
3267 * the original one and use the route
3268 * interface address instead.
3273 } else if (ifa
!= NULL
&& ro
->ro_rt
!= NULL
&&
3274 !(ro
->ro_rt
->rt_flags
& RTF_GATEWAY
) &&
3275 ifa
->ifa_ifp
!= ro
->ro_rt
->rt_ifp
&& ipforwarding
) {
3277 * Forwarding is enabled and the source address belongs
3278 * to one of our own interfaces which isn't the same
3279 * as the interface used by the known route; drop the
3280 * original one and use the route interface address.
3283 ifa
= ro
->ro_rt
->rt_ifa
;
3287 if (ip_select_srcif_debug
&& ifa
!= NULL
) {
3288 printf("%s->%s ifscope %d ifa_if %s\n",
3289 s_src
, s_dst
, ifscope
, if_name(ifa
->ifa_ifp
));
3293 if (ro
->ro_rt
!= NULL
)
3294 RT_LOCK_ASSERT_HELD(ro
->ro_rt
);
3296 * If there is a non-loopback route with the wrong interface, or if
3297 * there is no interface configured with such an address, blow it
3298 * away. Except for local/loopback, we look for one with a matching
3299 * interface scope/index.
3301 if (ro
->ro_rt
!= NULL
&&
3302 (ifa
== NULL
|| (ifa
->ifa_ifp
!= rt_ifp
&& rt_ifp
!= lo_ifp
) ||
3303 !(ro
->ro_rt
->rt_flags
& RTF_UP
))) {
3304 if (ip_select_srcif_debug
) {
3306 printf("%s->%s ifscope %d ro_if %s != "
3307 "ifa_if %s (cached route cleared)\n",
3308 s_src
, s_dst
, ifscope
, if_name(rt_ifp
),
3309 if_name(ifa
->ifa_ifp
));
3311 printf("%s->%s ifscope %d ro_if %s "
3312 "(no ifa_if found)\n",
3313 s_src
, s_dst
, ifscope
, if_name(rt_ifp
));
3317 RT_UNLOCK(ro
->ro_rt
);
3320 ro
->ro_flags
&= ~ROF_SRCIF_SELECTED
;
3323 * If the destination is IPv4 LLA and the route's interface
3324 * doesn't match the source interface, then the source IP
3325 * address is wrong; it most likely belongs to the primary
3326 * interface associated with the IPv4 LL subnet. Drop the
3327 * packet rather than letting it go out and return an error
3328 * to the ULP. This actually applies not only to IPv4 LL
3329 * but other shared subnets; for now we explicitly test only
3330 * for the former case and save the latter for future.
3332 if (IN_LINKLOCAL(ntohl(dst
.s_addr
)) &&
3333 !IN_LINKLOCAL(ntohl(src
.s_addr
)) && ifa
!= NULL
) {
3339 if (ip_select_srcif_debug
&& ifa
== NULL
) {
3340 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
3341 s_src
, s_dst
, ifscope
);
3345 * If there is a route, mark it accordingly. If there isn't one,
3346 * we'll get here again during the next transmit (possibly with a
3347 * route) and the flag will get set at that point. For IPv4 LLA
3348 * destination, mark it only if the route has been fully resolved;
3349 * otherwise we want to come back here again when the route points
3350 * to the interface over which the ARP reply arrives on.
3352 if (ro
->ro_rt
!= NULL
&& (!IN_LINKLOCAL(ntohl(dst
.s_addr
)) ||
3353 (ro
->ro_rt
->rt_gateway
->sa_family
== AF_LINK
&&
3354 SDL(ro
->ro_rt
->rt_gateway
)->sdl_alen
!= 0))) {
3355 ro
->ro_flags
|= ROF_SRCIF_SELECTED
;
3356 ro
->ro_rt
->generation_id
= route_generation
;
3359 if (ro
->ro_rt
!= NULL
)
3360 RT_UNLOCK(ro
->ro_rt
);