2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/kernel.h>
74 #include <sys/malloc.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <kern/locks.h>
80 #include <sys/sysctl.h>
81 #include <sys/mcache.h>
82 #include <sys/kdebug.h>
84 #include <machine/endian.h>
85 #include <pexpert/pexpert.h>
88 #include <libkern/OSAtomic.h>
89 #include <libkern/OSByteOrder.h>
92 #include <net/if_dl.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/ntstat.h>
96 #include <net/net_osdep.h>
99 #include <netinet/in.h>
100 #include <netinet/in_systm.h>
101 #include <netinet/ip.h>
102 #include <netinet/in_pcb.h>
103 #include <netinet/in_var.h>
104 #include <netinet/ip_var.h>
105 #include <netinet/kpi_ipfilter_var.h>
108 #include <security/mac_framework.h>
109 #endif /* CONFIG_MACF_NET */
111 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
112 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
113 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
114 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
117 #include <netinet6/ipsec.h>
118 #include <netkey/key.h>
120 #include <netkey/key_debug.h>
122 #define KEYDEBUG(lev, arg)
127 #include <netinet/ip_fw.h>
129 #include <netinet/ip_divert.h>
130 #endif /* IPDIVERT */
131 #endif /* IPFIREWALL */
134 #include <netinet/ip_dummynet.h>
138 #include <net/pfvar.h>
141 #if IPFIREWALL_FORWARD && IPFIREWALL_FORWARD_DEBUG
142 #define print_ip(a) \
143 printf("%ld.%ld.%ld.%ld", (ntohl(a.s_addr) >> 24) & 0xFF, \
144 (ntohl(a.s_addr) >> 16) & 0xFF, \
145 (ntohl(a.s_addr) >> 8) & 0xFF, \
146 (ntohl(a.s_addr)) & 0xFF);
147 #endif /* IPFIREWALL_FORWARD && IPFIREWALL_FORWARD_DEBUG */
151 static void ip_out_cksum_stats(int, u_int32_t
);
152 static struct mbuf
*ip_insertoptions(struct mbuf
*, struct mbuf
*, int *);
153 static int ip_optcopy(struct ip
*, struct ip
*);
154 static int ip_pcbopts(int, struct mbuf
**, struct mbuf
*);
155 static void imo_trace(struct ip_moptions
*, int);
156 static void ip_mloopback(struct ifnet
*, struct ifnet
*, struct mbuf
*,
157 struct sockaddr_in
*, int);
158 static struct ifaddr
*in_selectsrcif(struct ip
*, struct route
*, unsigned int);
160 extern struct ip_linklocal_stat ip_linklocal_stat
;
162 /* temporary: for testing */
164 extern int ipsec_bypass
;
167 static int ip_maxchainsent
= 0;
168 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, maxchainsent
,
169 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ip_maxchainsent
, 0,
170 "use dlil_output_list");
172 static int forge_ce
= 0;
173 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, forge_ce
,
174 CTLFLAG_RW
| CTLFLAG_LOCKED
, &forge_ce
, 0,
178 static int ip_select_srcif_debug
= 0;
179 SYSCTL_INT(_net_inet_ip
, OID_AUTO
, select_srcif_debug
,
180 CTLFLAG_RW
| CTLFLAG_LOCKED
, &ip_select_srcif_debug
, 0,
181 "log source interface selection debug info");
183 #define IMO_TRACE_HIST_SIZE 32 /* size of trace history */
186 __private_extern__
unsigned int imo_trace_hist_size
= IMO_TRACE_HIST_SIZE
;
188 struct ip_moptions_dbg
{
189 struct ip_moptions imo
; /* ip_moptions */
190 u_int16_t imo_refhold_cnt
; /* # of IMO_ADDREF */
191 u_int16_t imo_refrele_cnt
; /* # of IMO_REMREF */
193 * Alloc and free callers.
198 * Circular lists of IMO_ADDREF and IMO_REMREF callers.
200 ctrace_t imo_refhold
[IMO_TRACE_HIST_SIZE
];
201 ctrace_t imo_refrele
[IMO_TRACE_HIST_SIZE
];
205 static unsigned int imo_debug
= 1; /* debugging (enabled) */
207 static unsigned int imo_debug
; /* debugging (disabled) */
209 static unsigned int imo_size
; /* size of zone element */
210 static struct zone
*imo_zone
; /* zone for ip_moptions */
212 #define IMO_ZONE_MAX 64 /* maximum elements in zone */
213 #define IMO_ZONE_NAME "ip_moptions" /* zone name */
216 * IP output. The packet in mbuf chain m contains a skeletal IP
217 * header (with len, off, ttl, proto, tos, src, dst).
218 * The mbuf chain containing the packet will be freed.
219 * The mbuf opt, if present, will not be freed.
222 ip_output(struct mbuf
*m0
, struct mbuf
*opt
, struct route
*ro
, int flags
,
223 struct ip_moptions
*imo
, struct ip_out_args
*ipoa
)
225 return (ip_output_list(m0
, 0, opt
, ro
, flags
, imo
, ipoa
));
229 * IP output. The packet in mbuf chain m contains a skeletal IP
230 * header (with len, off, ttl, proto, tos, src, dst).
231 * The mbuf chain containing the packet will be freed.
232 * The mbuf opt, if present, will not be freed.
234 * Route ro MUST be non-NULL; if ro->ro_rt is valid, route lookup would be
235 * skipped and ro->ro_rt would be used. Otherwise the result of route
236 * lookup is stored in ro->ro_rt.
238 * In the IP forwarding case, the packet will arrive with options already
239 * inserted, so must have a NULL opt pointer.
242 ip_output_list(struct mbuf
*m0
, int packetchain
, struct mbuf
*opt
,
243 struct route
*ro
, int flags
, struct ip_moptions
*imo
,
244 struct ip_out_args
*ipoa
)
247 struct ifnet
*ifp
= NULL
; /* not refcnt'd */
248 struct mbuf
*m
= m0
, *prevnxt
= NULL
, **mppn
= &prevnxt
;
249 int hlen
= sizeof (struct ip
);
250 int len
= 0, error
= 0;
251 struct sockaddr_in
*dst
= NULL
;
252 struct in_ifaddr
*ia
= NULL
, *src_ia
= NULL
;
253 struct in_addr pkt_dst
;
254 struct ipf_pktopts
*ippo
= NULL
;
255 ipfilter_t inject_filter_ref
= NULL
;
256 struct mbuf
*packetlist
;
257 uint32_t sw_csum
, pktcnt
= 0, scnt
= 0, bytecnt
= 0;
258 unsigned int ifscope
= IFSCOPE_NONE
;
259 struct flowadv
*adv
= NULL
;
261 struct socket
*so
= NULL
;
262 struct secpolicy
*sp
= NULL
;
266 struct sockaddr_in
*next_hop_from_ipfwd_tag
= NULL
;
267 #endif /* IPFIREWALL */
268 #if IPFIREWALL || DUMMYNET
270 #endif /* IPFIREWALL || DUMMYNET */
272 struct ip_out_args saved_ipoa
;
273 struct sockaddr_in dst_buf
;
274 #endif /* DUMMYNET */
277 struct ipsec_output_state ipsec_state
;
279 #if IPFIREWALL || DUMMYNET
280 struct ip_fw_args args
;
281 #endif /* IPFIREWALL || DUMMYNET */
282 #if IPFIREWALL_FORWARD
283 struct route sro_fwd
;
284 #endif /* IPFIREWALL_FORWARD */
286 struct route saved_route
;
287 #endif /* DUMMYNET */
288 struct ipf_pktopts ipf_pktopts
;
290 #define ipsec_state ipobz.ipsec_state
291 #define args ipobz.args
292 #define sro_fwd ipobz.sro_fwd
293 #define saved_route ipobz.saved_route
294 #define ipf_pktopts ipobz.ipf_pktopts
297 boolean_t select_srcif
: 1; /* set once */
298 boolean_t srcbound
: 1; /* set once */
299 boolean_t nocell
: 1; /* set once */
300 boolean_t isbroadcast
: 1;
301 boolean_t didfilter
: 1;
302 #if IPFIREWALL_FORWARD
303 boolean_t fwd_rewrite_src
: 1;
304 #endif /* IPFIREWALL_FORWARD */
307 } ipobf
= { .raw
= 0 };
309 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
311 VERIFY(m0
->m_flags
& M_PKTHDR
);
314 /* zero out {ipsec_state, args, sro_fwd, saved_route, ipf_pktops} */
315 bzero(&ipobz
, sizeof (ipobz
));
318 #if IPFIREWALL || DUMMYNET
319 if (SLIST_EMPTY(&m0
->m_pkthdr
.tags
))
322 /* Grab info from mtags prepended to the chain */
324 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
325 KERNEL_TAG_TYPE_DUMMYNET
, NULL
)) != NULL
) {
326 struct dn_pkt_tag
*dn_tag
;
328 dn_tag
= (struct dn_pkt_tag
*)(tag
+1);
329 args
.fwa_ipfw_rule
= dn_tag
->dn_ipfw_rule
;
330 args
.fwa_pf_rule
= dn_tag
->dn_pf_rule
;
332 saved_route
= dn_tag
->dn_ro
;
336 bcopy(&dn_tag
->dn_dst
, &dst_buf
, sizeof (dst_buf
));
338 ifp
= dn_tag
->dn_ifp
;
339 flags
= dn_tag
->dn_flags
;
340 if ((dn_tag
->dn_flags
& IP_OUTARGS
)) {
341 saved_ipoa
= dn_tag
->dn_ipoa
;
345 m_tag_delete(m0
, tag
);
347 #endif /* DUMMYNET */
350 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
351 KERNEL_TAG_TYPE_DIVERT
, NULL
)) != NULL
) {
352 struct divert_tag
*div_tag
;
354 div_tag
= (struct divert_tag
*)(tag
+1);
355 args
.fwa_divert_rule
= div_tag
->cookie
;
357 m_tag_delete(m0
, tag
);
359 #endif /* IPDIVERT */
362 if ((tag
= m_tag_locate(m0
, KERNEL_MODULE_TAG_ID
,
363 KERNEL_TAG_TYPE_IPFORWARD
, NULL
)) != NULL
) {
364 struct ip_fwd_tag
*ipfwd_tag
;
366 ipfwd_tag
= (struct ip_fwd_tag
*)(tag
+1);
367 next_hop_from_ipfwd_tag
= ipfwd_tag
->next_hop
;
369 m_tag_delete(m0
, tag
);
371 #endif /* IPFIREWALL */
374 #endif /* IPFIREWALL || DUMMYNET */
377 m
->m_pkthdr
.pkt_flags
&= ~(PKTF_LOOP
|PKTF_IFAINFO
);
380 if (ipsec_bypass
== 0 && !(flags
& IP_NOIPSEC
)) {
381 /* If packet is bound to an interface, check bound policies */
382 if ((flags
& IP_OUTARGS
) && (ipoa
!= NULL
) &&
383 (ipoa
->ipoa_flags
& IPOAF_BOUND_IF
) &&
384 ipoa
->ipoa_boundif
!= IFSCOPE_NONE
) {
385 if (ipsec4_getpolicybyinterface(m
, IPSEC_DIR_OUTBOUND
,
386 &flags
, ipoa
, &sp
) != 0)
394 if (ip_doscopedroute
&& (flags
& IP_OUTARGS
)) {
396 * In the forwarding case, only the ifscope value is used,
397 * as source interface selection doesn't take place.
399 if ((ipobf
.select_srcif
= (!(flags
& IP_FORWARDING
) &&
400 (ipoa
->ipoa_flags
& IPOAF_SELECT_SRCIF
)))) {
401 ipf_pktopts
.ippo_flags
|= IPPOF_SELECT_SRCIF
;
404 if ((ipoa
->ipoa_flags
& IPOAF_BOUND_IF
) &&
405 ipoa
->ipoa_boundif
!= IFSCOPE_NONE
) {
406 ifscope
= ipoa
->ipoa_boundif
;
407 ipf_pktopts
.ippo_flags
|=
408 (IPPOF_BOUND_IF
| (ifscope
<< IPPOF_SHIFT_IFSCOPE
));
411 /* double negation needed for bool bit field */
412 ipobf
.srcbound
= !!(ipoa
->ipoa_flags
& IPOAF_BOUND_SRCADDR
);
414 ipf_pktopts
.ippo_flags
|= IPPOF_BOUND_SRCADDR
;
416 ipobf
.select_srcif
= FALSE
;
417 ipobf
.srcbound
= FALSE
;
418 ifscope
= IFSCOPE_NONE
;
419 if (flags
& IP_OUTARGS
) {
420 ipoa
->ipoa_boundif
= IFSCOPE_NONE
;
421 ipoa
->ipoa_flags
&= ~(IPOAF_SELECT_SRCIF
|
422 IPOAF_BOUND_IF
| IPOAF_BOUND_SRCADDR
);
426 if ((flags
& IP_OUTARGS
) && (ipoa
->ipoa_flags
& IPOAF_NO_CELLULAR
)) {
428 ipf_pktopts
.ippo_flags
|= IPPOF_NO_IFT_CELLULAR
;
431 if (flags
& IP_OUTARGS
) {
432 adv
= &ipoa
->ipoa_flowadv
;
433 adv
->code
= FADV_SUCCESS
;
434 ipoa
->ipoa_retflags
= 0;
438 if (args
.fwa_ipfw_rule
!= NULL
|| args
.fwa_pf_rule
!= NULL
) {
439 /* dummynet already saw us */
440 ip
= mtod(m
, struct ip
*);
441 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
442 pkt_dst
= ip
->ip_dst
;
443 if (ro
->ro_rt
!= NULL
) {
444 RT_LOCK_SPIN(ro
->ro_rt
);
445 ia
= (struct in_ifaddr
*)ro
->ro_rt
->rt_ifa
;
447 /* Become a regular mutex */
448 RT_CONVERT_LOCK(ro
->ro_rt
);
449 IFA_ADDREF(&ia
->ia_ifa
);
451 RT_UNLOCK(ro
->ro_rt
);
454 if (ipsec_bypass
== 0 && !(flags
& IP_NOIPSEC
)) {
455 so
= ipsec_getsocket(m
);
456 (void) ipsec_setsocket(m
, NULL
);
460 if (args
.fwa_ipfw_rule
!= NULL
)
462 #endif /* IPFIREWALL */
463 if (args
.fwa_pf_rule
!= NULL
)
466 #endif /* DUMMYNET */
469 if (ipsec_bypass
== 0 && !(flags
& IP_NOIPSEC
)) {
470 so
= ipsec_getsocket(m
);
471 (void) ipsec_setsocket(m
, NULL
);
476 ipobf
.isbroadcast
= FALSE
;
477 ipobf
.didfilter
= FALSE
;
478 #if IPFIREWALL_FORWARD
479 ipobf
.fwd_rewrite_src
= FALSE
;
480 #endif /* IPFIREWALL_FORWARD */
482 VERIFY(m
->m_flags
& M_PKTHDR
);
484 * No need to proccess packet twice if we've already seen it.
486 if (!SLIST_EMPTY(&m
->m_pkthdr
.tags
))
487 inject_filter_ref
= ipf_get_inject_filter(m
);
489 inject_filter_ref
= NULL
;
492 m
= ip_insertoptions(m
, opt
, &len
);
494 /* Update the chain */
496 if (m0
== packetlist
)
501 ip
= mtod(m
, struct ip
*);
507 * When dealing with a packet chain, we need to reset "next_hop"
508 * because "dst" may have been changed to the gateway address below
509 * for the previous packet of the chain. This could cause the route
510 * to be inavertandly changed to the route to the gateway address
511 * (instead of the route to the destination).
513 args
.fwa_next_hop
= next_hop_from_ipfwd_tag
;
514 pkt_dst
= args
.fwa_next_hop
? args
.fwa_next_hop
->sin_addr
: ip
->ip_dst
;
515 #else /* !IPFIREWALL */
516 pkt_dst
= ip
->ip_dst
;
517 #endif /* !IPFIREWALL */
520 * We must not send if the packet is destined to network zero.
521 * RFC1122 3.2.1.3 (a) and (b).
523 if (IN_ZERONET(ntohl(pkt_dst
.s_addr
))) {
524 error
= EHOSTUNREACH
;
531 if (!(flags
& (IP_FORWARDING
|IP_RAWOUTPUT
))) {
532 ip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, hlen
>> 2);
534 ip
->ip_id
= ip_randomid();
535 OSAddAtomic(1, &ipstat
.ips_localout
);
537 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
541 /* For debugging, we let the stack forge congestion */
543 ((ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_ECT1
||
544 (ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_ECT0
)) {
545 ip
->ip_tos
= (ip
->ip_tos
& ~IPTOS_ECN_MASK
) | IPTOS_ECN_CE
;
550 KERNEL_DEBUG(DBG_LAYER_BEG
, ip
->ip_dst
.s_addr
, ip
->ip_src
.s_addr
,
551 ip
->ip_p
, ip
->ip_off
, ip
->ip_len
);
553 dst
= SIN(&ro
->ro_dst
);
556 * If there is a cached route,
557 * check that it is to the same destination
558 * and is still up. If not, free it and try again.
559 * The address family should also be checked in case of sharing the
563 if (ro
->ro_rt
!= NULL
) {
564 if (ROUTE_UNUSABLE(ro
) && ip
->ip_src
.s_addr
!= INADDR_ANY
&&
565 !(flags
& (IP_ROUTETOIF
| IP_FORWARDING
))) {
566 src_ia
= ifa_foraddr(ip
->ip_src
.s_addr
);
567 if (src_ia
== NULL
) {
568 error
= EADDRNOTAVAIL
;
571 IFA_REMREF(&src_ia
->ia_ifa
);
575 * Test rt_flags without holding rt_lock for performance
576 * reasons; if the route is down it will hopefully be
577 * caught by the layer below (since it uses this route
578 * as a hint) or during the next transmit.
580 if (ROUTE_UNUSABLE(ro
) || dst
->sin_family
!= AF_INET
||
581 dst
->sin_addr
.s_addr
!= pkt_dst
.s_addr
)
585 * If we're doing source interface selection, we may not
586 * want to use this route; only synch up the generation
589 if (!ipobf
.select_srcif
&& ro
->ro_rt
!= NULL
&&
590 RT_GENID_OUTOFSYNC(ro
->ro_rt
))
591 RT_GENID_SYNC(ro
->ro_rt
);
593 if (ro
->ro_rt
== NULL
) {
594 bzero(dst
, sizeof (*dst
));
595 dst
->sin_family
= AF_INET
;
596 dst
->sin_len
= sizeof (*dst
);
597 dst
->sin_addr
= pkt_dst
;
600 * If routing to interface only,
601 * short circuit routing lookup.
603 if (flags
& IP_ROUTETOIF
) {
605 IFA_REMREF(&ia
->ia_ifa
);
606 if ((ia
= ifatoia(ifa_ifwithdstaddr(sintosa(dst
)))) == NULL
) {
607 ia
= ifatoia(ifa_ifwithnet(sintosa(dst
)));
609 OSAddAtomic(1, &ipstat
.ips_noroute
);
616 ipobf
.isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
618 * For consistency with other cases below. Loopback
619 * multicast case is handled separately by ip_mloopback().
621 if ((ifp
->if_flags
& IFF_LOOPBACK
) &&
622 !IN_MULTICAST(ntohl(pkt_dst
.s_addr
))) {
623 m
->m_pkthdr
.rcvif
= ifp
;
624 ip_setsrcifaddr_info(m
, ifp
->if_index
, NULL
);
625 ip_setdstifaddr_info(m
, ifp
->if_index
, NULL
);
627 } else if (IN_MULTICAST(ntohl(pkt_dst
.s_addr
)) &&
628 imo
!= NULL
&& (ifp
= imo
->imo_multicast_ifp
) != NULL
) {
630 * Bypass the normal routing lookup for multicast
631 * packets if the interface is specified.
633 ipobf
.isbroadcast
= FALSE
;
635 IFA_REMREF(&ia
->ia_ifa
);
637 /* Macro takes reference on ia */
640 struct ifaddr
*ia0
= NULL
;
641 boolean_t cloneok
= FALSE
;
643 * Perform source interface selection; the source IP address
644 * must belong to one of the addresses of the interface used
645 * by the route. For performance reasons, do this only if
646 * there is no route, or if the routing table has changed,
647 * or if we haven't done source interface selection on this
648 * route (for this PCB instance) before.
650 if (ipobf
.select_srcif
&&
651 ip
->ip_src
.s_addr
!= INADDR_ANY
&& (ROUTE_UNUSABLE(ro
) ||
652 !(ro
->ro_flags
& ROF_SRCIF_SELECTED
))) {
653 /* Find the source interface */
654 ia0
= in_selectsrcif(ip
, ro
, ifscope
);
657 * If the source address belongs to a cellular interface
658 * and the caller forbids our using interfaces of such
659 * type, pretend that there is no route.
661 if (ipobf
.nocell
&& ia0
!= NULL
&&
662 IFNET_IS_CELLULAR(ia0
->ifa_ifp
)) {
665 error
= EHOSTUNREACH
;
666 if (flags
& IP_OUTARGS
)
667 ipoa
->ipoa_retflags
|= IPOARF_IFDENIED
;
672 * If the source address is spoofed (in the case of
673 * IP_RAWOUTPUT on an unbounded socket), or if this
674 * is destined for local/loopback, just let it go out
675 * using the interface of the route. Otherwise,
676 * there's no interface having such an address,
679 if (ia0
== NULL
&& (!(flags
& IP_RAWOUTPUT
) ||
680 ipobf
.srcbound
) && ifscope
!= lo_ifp
->if_index
) {
681 error
= EADDRNOTAVAIL
;
686 * If the caller didn't explicitly specify the scope,
687 * pick it up from the source interface. If the cached
688 * route was wrong and was blown away as part of source
689 * interface selection, don't mask out RTF_PRCLONING
690 * since that route may have been allocated by the ULP,
691 * unless the IP header was created by the caller or
692 * the destination is IPv4 LLA. The check for the
693 * latter is needed because IPv4 LLAs are never scoped
694 * in the current implementation, and we don't want to
695 * replace the resolved IPv4 LLA route with one whose
696 * gateway points to that of the default gateway on
697 * the primary interface of the system.
700 if (ifscope
== IFSCOPE_NONE
)
701 ifscope
= ia0
->ifa_ifp
->if_index
;
702 cloneok
= (!(flags
& IP_RAWOUTPUT
) &&
703 !(IN_LINKLOCAL(ntohl(ip
->ip_dst
.s_addr
))));
708 * If this is the case, we probably don't want to allocate
709 * a protocol-cloned route since we didn't get one from the
710 * ULP. This lets TCP do its thing, while not burdening
711 * forwarding or ICMP with the overhead of cloning a route.
712 * Of course, we still want to do any cloning requested by
713 * the link layer, as this is probably required in all cases
714 * for correct operation (as it is for ARP).
716 if (ro
->ro_rt
== NULL
) {
717 unsigned long ign
= RTF_PRCLONING
;
719 * We make an exception here: if the destination
720 * address is INADDR_BROADCAST, allocate a protocol-
721 * cloned host route so that we end up with a route
722 * marked with the RTF_BROADCAST flag. Otherwise,
723 * we would end up referring to the default route,
724 * instead of creating a cloned host route entry.
725 * That would introduce inconsistencies between ULPs
726 * that allocate a route and those that don't. The
727 * RTF_BROADCAST route is important since we'd want
728 * to send out undirected IP broadcast packets using
729 * link-level broadcast address. Another exception
730 * is for ULP-created routes that got blown away by
731 * source interface selection (see above).
733 * These exceptions will no longer be necessary when
734 * the RTF_PRCLONING scheme is no longer present.
736 if (cloneok
|| dst
->sin_addr
.s_addr
== INADDR_BROADCAST
)
737 ign
&= ~RTF_PRCLONING
;
740 * Loosen the route lookup criteria if the ifscope
741 * corresponds to the loopback interface; this is
742 * needed to support Application Layer Gateways
743 * listening on loopback, in conjunction with packet
744 * filter redirection rules. The final source IP
745 * address will be rewritten by the packet filter
746 * prior to the RFC1122 loopback check below.
748 if (ifscope
== lo_ifp
->if_index
)
749 rtalloc_ign(ro
, ign
);
751 rtalloc_scoped_ign(ro
, ign
, ifscope
);
754 * If the route points to a cellular interface and the
755 * caller forbids our using interfaces of such type,
756 * pretend that there is no route.
758 if (ipobf
.nocell
&& ro
->ro_rt
!= NULL
) {
759 RT_LOCK_SPIN(ro
->ro_rt
);
760 if (IFNET_IS_CELLULAR(ro
->ro_rt
->rt_ifp
)) {
761 RT_UNLOCK(ro
->ro_rt
);
763 if (flags
& IP_OUTARGS
) {
764 ipoa
->ipoa_retflags
|=
768 RT_UNLOCK(ro
->ro_rt
);
773 if (ro
->ro_rt
== NULL
) {
774 OSAddAtomic(1, &ipstat
.ips_noroute
);
775 error
= EHOSTUNREACH
;
784 IFA_REMREF(&ia
->ia_ifa
);
785 RT_LOCK_SPIN(ro
->ro_rt
);
786 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
788 /* Become a regular mutex */
789 RT_CONVERT_LOCK(ro
->ro_rt
);
790 IFA_ADDREF(&ia
->ia_ifa
);
793 * Note: ia_ifp may not be the same as rt_ifp; the latter
794 * is what we use for determining outbound i/f, mtu, etc.
796 ifp
= ro
->ro_rt
->rt_ifp
;
798 if (ro
->ro_rt
->rt_flags
& RTF_GATEWAY
) {
799 dst
= SIN(ro
->ro_rt
->rt_gateway
);
801 if (ro
->ro_rt
->rt_flags
& RTF_HOST
) {
802 /* double negation needed for bool bit field */
804 !!(ro
->ro_rt
->rt_flags
& RTF_BROADCAST
);
806 /* Become a regular mutex */
807 RT_CONVERT_LOCK(ro
->ro_rt
);
808 ipobf
.isbroadcast
= in_broadcast(dst
->sin_addr
, ifp
);
811 * For consistency with IPv6, as well as to ensure that
812 * IP_RECVIF is set correctly for packets that are sent
813 * to one of the local addresses. ia (rt_ifa) would have
814 * been fixed up by rt_setif for local routes. This
815 * would make it appear as if the packet arrives on the
816 * interface which owns the local address. Loopback
817 * multicast case is handled separately by ip_mloopback().
819 if (ia
!= NULL
&& (ifp
->if_flags
& IFF_LOOPBACK
) &&
820 !IN_MULTICAST(ntohl(pkt_dst
.s_addr
))) {
823 m
->m_pkthdr
.rcvif
= ia
->ia_ifa
.ifa_ifp
;
826 srcidx
= ia0
->ifa_ifp
->if_index
;
827 else if ((ro
->ro_flags
& ROF_SRCIF_SELECTED
) &&
828 ro
->ro_srcia
!= NULL
)
829 srcidx
= ro
->ro_srcia
->ifa_ifp
->if_index
;
833 ip_setsrcifaddr_info(m
, srcidx
, NULL
);
834 ip_setdstifaddr_info(m
, 0, ia
);
836 RT_UNLOCK(ro
->ro_rt
);
843 if (IN_MULTICAST(ntohl(pkt_dst
.s_addr
))) {
844 struct ifnet
*srcifp
= NULL
;
845 struct in_multi
*inm
;
847 u_int8_t ttl
= IP_DEFAULT_MULTICAST_TTL
;
848 u_int8_t loop
= IP_DEFAULT_MULTICAST_LOOP
;
850 m
->m_flags
|= M_MCAST
;
852 * IP destination address is multicast. Make sure "dst"
853 * still points to the address in "ro". (It may have been
854 * changed to point to a gateway address, above.)
856 dst
= SIN(&ro
->ro_dst
);
858 * See if the caller provided any multicast options
862 vif
= imo
->imo_multicast_vif
;
863 ttl
= imo
->imo_multicast_ttl
;
864 loop
= imo
->imo_multicast_loop
;
865 if (!(flags
& IP_RAWOUTPUT
))
867 if (imo
->imo_multicast_ifp
!= NULL
)
868 ifp
= imo
->imo_multicast_ifp
;
871 if (vif
!= -1 && (!(flags
& IP_RAWOUTPUT
) ||
872 ip
->ip_src
.s_addr
== INADDR_ANY
))
873 ip
->ip_src
.s_addr
= ip_mcast_src(vif
);
874 #endif /* MROUTING */
875 } else if (!(flags
& IP_RAWOUTPUT
)) {
880 * Confirm that the outgoing interface supports multicast.
882 if (imo
== NULL
|| vif
== -1) {
883 if (!(ifp
->if_flags
& IFF_MULTICAST
)) {
884 OSAddAtomic(1, &ipstat
.ips_noroute
);
890 * If source address not specified yet, use address
891 * of outgoing interface.
893 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
894 struct in_ifaddr
*ia1
;
895 lck_rw_lock_shared(in_ifaddr_rwlock
);
896 TAILQ_FOREACH(ia1
, &in_ifaddrhead
, ia_link
) {
897 IFA_LOCK_SPIN(&ia1
->ia_ifa
);
898 if (ia1
->ia_ifp
== ifp
) {
899 ip
->ip_src
= IA_SIN(ia1
)->sin_addr
;
901 IFA_UNLOCK(&ia1
->ia_ifa
);
904 IFA_UNLOCK(&ia1
->ia_ifa
);
906 lck_rw_done(in_ifaddr_rwlock
);
907 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
913 in_multihead_lock_shared();
914 IN_LOOKUP_MULTI(&pkt_dst
, ifp
, inm
);
915 in_multihead_lock_done();
916 if (inm
!= NULL
&& (imo
== NULL
|| loop
)) {
918 * If we belong to the destination multicast group
919 * on the outgoing interface, and the caller did not
920 * forbid loopback, loop back a copy.
922 if (!TAILQ_EMPTY(&ipv4_filters
)) {
923 struct ipfilter
*filter
;
924 int seen
= (inject_filter_ref
== NULL
);
927 ipf_pktopts
.ippo_flags
|=
929 ipf_pktopts
.ippo_mcast_ifnet
= ifp
;
930 ipf_pktopts
.ippo_mcast_ttl
= ttl
;
931 ipf_pktopts
.ippo_mcast_loop
= loop
;
937 * 4135317 - always pass network byte
940 #if BYTE_ORDER != BIG_ENDIAN
944 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
946 if ((struct ipfilter
*)
947 inject_filter_ref
== filter
)
949 } else if (filter
->ipf_filter
.
950 ipf_output
!= NULL
) {
952 result
= filter
->ipf_filter
.
956 if (result
== EJUSTRETURN
) {
969 /* set back to host byte order */
970 ip
= mtod(m
, struct ip
*);
971 #if BYTE_ORDER != BIG_ENDIAN
976 ipobf
.didfilter
= TRUE
;
978 ip_mloopback(srcifp
, ifp
, m
, dst
, hlen
);
983 * If we are acting as a multicast router, perform
984 * multicast forwarding as if the packet had just
985 * arrived on the interface to which we are about
986 * to send. The multicast forwarding function
987 * recursively calls this function, using the
988 * IP_FORWARDING flag to prevent infinite recursion.
990 * Multicasts that are looped back by ip_mloopback(),
991 * above, will be forwarded by the ip_input() routine,
994 if (ip_mrouter
&& !(flags
& IP_FORWARDING
)) {
996 * Check if rsvp daemon is running. If not,
997 * don't set ip_moptions. This ensures that
998 * the packet is multicast and not just sent
999 * down one link as prescribed by rsvpd.
1003 if (ip_mforward(ip
, ifp
, m
, imo
) != 0) {
1007 OSAddAtomic(1, &ipstat
.ips_cantforward
);
1012 #endif /* MROUTING */
1016 * Multicasts with a time-to-live of zero may be looped-
1017 * back, above, but must not be transmitted on a network.
1018 * Also, multicasts addressed to the loopback interface
1019 * are not sent -- the above call to ip_mloopback() will
1020 * loop back a copy if this host actually belongs to the
1021 * destination group on the loopback interface.
1023 if (ip
->ip_ttl
== 0 || ifp
->if_flags
& IFF_LOOPBACK
) {
1031 * If source address not specified yet, use address
1032 * of outgoing interface.
1034 if (ip
->ip_src
.s_addr
== INADDR_ANY
) {
1035 IFA_LOCK_SPIN(&ia
->ia_ifa
);
1036 ip
->ip_src
= IA_SIN(ia
)->sin_addr
;
1037 IFA_UNLOCK(&ia
->ia_ifa
);
1038 #if IPFIREWALL_FORWARD
1040 * Keep note that we did this - if the firewall changes
1041 * the next-hop, our interface may change, changing the
1042 * default source IP. It's a shame so much effort happens
1045 ipobf
.fwd_rewrite_src
= TRUE
;
1046 #endif /* IPFIREWALL_FORWARD */
1050 * Look for broadcast address and
1051 * and verify user is allowed to send
1054 if (ipobf
.isbroadcast
) {
1055 if (!(ifp
->if_flags
& IFF_BROADCAST
)) {
1056 error
= EADDRNOTAVAIL
;
1059 if (!(flags
& IP_ALLOWBROADCAST
)) {
1063 /* don't allow broadcast messages to be fragmented */
1064 if ((u_short
)ip
->ip_len
> ifp
->if_mtu
) {
1068 m
->m_flags
|= M_BCAST
;
1070 m
->m_flags
&= ~M_BCAST
;
1075 /* Invoke outbound packet filter */
1076 if (PF_IS_ENABLED
) {
1079 m0
= m
; /* Save for later */
1082 args
.fwa_next_hop
= dst
;
1086 args
.fwa_oflags
= flags
;
1087 if (flags
& IP_OUTARGS
)
1088 args
.fwa_ipoa
= ipoa
;
1089 rc
= pf_af_hook(ifp
, mppn
, &m
, AF_INET
, FALSE
, &args
);
1090 #else /* DUMMYNET */
1091 rc
= pf_af_hook(ifp
, mppn
, &m
, AF_INET
, FALSE
, NULL
);
1092 #endif /* DUMMYNET */
1093 if (rc
!= 0 || m
== NULL
) {
1094 /* Move to the next packet */
1097 /* Skip ahead if first packet in list got dropped */
1098 if (packetlist
== m0
)
1103 /* Next packet in the chain */
1105 } else if (packetlist
!= NULL
) {
1106 /* No more packet; send down the chain */
1109 /* Nothing left; we're done */
1113 ip
= mtod(m
, struct ip
*);
1114 pkt_dst
= ip
->ip_dst
;
1115 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1119 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
1121 if (IN_LINKLOCAL(ntohl(ip
->ip_src
.s_addr
)) ||
1122 IN_LINKLOCAL(ntohl(ip
->ip_dst
.s_addr
))) {
1123 ip_linklocal_stat
.iplls_out_total
++;
1124 if (ip
->ip_ttl
!= MAXTTL
) {
1125 ip_linklocal_stat
.iplls_out_badttl
++;
1126 ip
->ip_ttl
= MAXTTL
;
1130 if (!ipobf
.didfilter
&& !TAILQ_EMPTY(&ipv4_filters
)) {
1131 struct ipfilter
*filter
;
1132 int seen
= (inject_filter_ref
== NULL
);
1133 ipf_pktopts
.ippo_flags
&= ~IPPOF_MCAST_OPTS
;
1136 * Check that a TSO frame isn't passed to a filter.
1137 * This could happen if a filter is inserted while
1138 * TCP is sending the TSO packet.
1140 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) {
1147 /* 4135317 - always pass network byte order to filter */
1148 #if BYTE_ORDER != BIG_ENDIAN
1152 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
1154 if ((struct ipfilter
*)inject_filter_ref
==
1157 } else if (filter
->ipf_filter
.ipf_output
) {
1159 result
= filter
->ipf_filter
.
1160 ipf_output(filter
->ipf_filter
.cookie
,
1161 (mbuf_t
*)&m
, ippo
);
1162 if (result
== EJUSTRETURN
) {
1172 /* set back to host byte order */
1173 ip
= mtod(m
, struct ip
*);
1174 #if BYTE_ORDER != BIG_ENDIAN
1182 /* temporary for testing only: bypass ipsec alltogether */
1184 if (ipsec_bypass
!= 0 || (flags
& IP_NOIPSEC
))
1187 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1189 /* May have been set above if packet was bound */
1191 /* get SP for this packet */
1193 sp
= ipsec4_getpolicybyaddr(m
, IPSEC_DIR_OUTBOUND
,
1196 sp
= ipsec4_getpolicybysock(m
, IPSEC_DIR_OUTBOUND
,
1200 IPSEC_STAT_INCREMENT(ipsecstat
.out_inval
);
1201 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1210 switch (sp
->policy
) {
1211 case IPSEC_POLICY_DISCARD
:
1212 case IPSEC_POLICY_GENERATE
:
1214 * This packet is just discarded.
1216 IPSEC_STAT_INCREMENT(ipsecstat
.out_polvio
);
1217 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1221 case IPSEC_POLICY_BYPASS
:
1222 case IPSEC_POLICY_NONE
:
1223 /* no need to do IPsec. */
1224 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1228 case IPSEC_POLICY_IPSEC
:
1229 if (sp
->req
== NULL
) {
1230 /* acquire a policy */
1231 error
= key_spdacquire(sp
);
1232 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1237 /* Verify the redirect to ipsec interface */
1238 if (sp
->ipsec_if
== ifp
) {
1239 /* Set policy for mbuf */
1240 m
->m_pkthdr
.ipsec_policy
= sp
->id
;
1247 case IPSEC_POLICY_ENTRUST
:
1249 printf("ip_output: Invalid policy found. %d\n", sp
->policy
);
1253 if (flags
& IP_ROUTETOIF
) {
1254 bzero(&ipsec_state
.ro
, sizeof (ipsec_state
.ro
));
1256 route_copyout(&ipsec_state
.ro
, ro
, sizeof (ipsec_state
.ro
));
1258 ipsec_state
.dst
= SA(dst
);
1264 * delayed checksums are not currently compatible with IPsec
1266 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
)
1267 in_delayed_cksum(m
);
1269 #if BYTE_ORDER != BIG_ENDIAN
1274 DTRACE_IP6(send
, struct mbuf
*, m
, struct inpcb
*, NULL
,
1275 struct ip
*, ip
, struct ifnet
*, ifp
,
1276 struct ip
*, ip
, struct ip6_hdr
*, NULL
);
1278 error
= ipsec4_output(&ipsec_state
, sp
, flags
);
1280 m0
= m
= ipsec_state
.m
;
1284 * If we're about to use the route in ipsec_state
1285 * and this came from dummynet, cleaup now.
1287 if (ro
== &saved_route
&&
1288 (!(flags
& IP_ROUTETOIF
) || ipsec_state
.tunneled
))
1290 #endif /* DUMMYNET */
1292 if (flags
& IP_ROUTETOIF
) {
1294 * if we have tunnel mode SA, we may need to ignore
1297 if (ipsec_state
.tunneled
) {
1298 flags
&= ~IP_ROUTETOIF
;
1299 ro
= &ipsec_state
.ro
;
1302 ro
= &ipsec_state
.ro
;
1304 dst
= SIN(ipsec_state
.dst
);
1306 /* mbuf is already reclaimed in ipsec4_output. */
1316 printf("ip4_output (ipsec): error code %d\n", error
);
1319 /* don't show these error codes to the user */
1323 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1329 /* be sure to update variables that are affected by ipsec4_output() */
1330 ip
= mtod(m
, struct ip
*);
1333 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1334 #else /* !_IP_VHL */
1335 hlen
= ip
->ip_hl
<< 2;
1336 #endif /* !_IP_VHL */
1337 /* Check that there wasn't a route change and src is still valid */
1338 if (ROUTE_UNUSABLE(ro
)) {
1340 VERIFY(src_ia
== NULL
);
1341 if (ip
->ip_src
.s_addr
!= INADDR_ANY
&&
1342 !(flags
& (IP_ROUTETOIF
| IP_FORWARDING
)) &&
1343 (src_ia
= ifa_foraddr(ip
->ip_src
.s_addr
)) == NULL
) {
1344 error
= EADDRNOTAVAIL
;
1345 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1349 if (src_ia
!= NULL
) {
1350 IFA_REMREF(&src_ia
->ia_ifa
);
1355 if (ro
->ro_rt
== NULL
) {
1356 if (!(flags
& IP_ROUTETOIF
)) {
1357 printf("%s: can't update route after "
1358 "IPsec processing\n", __func__
);
1359 error
= EHOSTUNREACH
; /* XXX */
1360 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1366 IFA_REMREF(&ia
->ia_ifa
);
1367 RT_LOCK_SPIN(ro
->ro_rt
);
1368 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
1370 /* Become a regular mutex */
1371 RT_CONVERT_LOCK(ro
->ro_rt
);
1372 IFA_ADDREF(&ia
->ia_ifa
);
1374 ifp
= ro
->ro_rt
->rt_ifp
;
1375 RT_UNLOCK(ro
->ro_rt
);
1378 /* make it flipped, again. */
1379 #if BYTE_ORDER != BIG_ENDIAN
1383 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT
| DBG_FUNC_END
,
1384 7, 0xff, 0xff, 0xff, 0xff);
1386 /* Pass to filters again */
1387 if (!TAILQ_EMPTY(&ipv4_filters
)) {
1388 struct ipfilter
*filter
;
1390 ipf_pktopts
.ippo_flags
&= ~IPPOF_MCAST_OPTS
;
1393 * Check that a TSO frame isn't passed to a filter.
1394 * This could happen if a filter is inserted while
1395 * TCP is sending the TSO packet.
1397 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) {
1404 /* 4135317 - always pass network byte order to filter */
1405 #if BYTE_ORDER != BIG_ENDIAN
1409 TAILQ_FOREACH(filter
, &ipv4_filters
, ipf_link
) {
1410 if (filter
->ipf_filter
.ipf_output
) {
1412 result
= filter
->ipf_filter
.
1413 ipf_output(filter
->ipf_filter
.cookie
,
1414 (mbuf_t
*)&m
, ippo
);
1415 if (result
== EJUSTRETURN
) {
1425 /* set back to host byte order */
1426 ip
= mtod(m
, struct ip
*);
1427 #if BYTE_ORDER != BIG_ENDIAN
1438 * Check with the firewall...
1439 * but not if we are already being fwd'd from a firewall.
1441 if (fw_enable
&& IPFW_LOADED
&& !args
.fwa_next_hop
) {
1442 struct sockaddr_in
*old
= dst
;
1445 args
.fwa_next_hop
= dst
;
1447 ipfwoff
= ip_fw_chk_ptr(&args
);
1449 dst
= args
.fwa_next_hop
;
1452 * On return we must do the following:
1453 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1454 * 1<=off<= 0xffff -> DIVERT
1455 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1456 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1457 * dst != old -> IPFIREWALL_FORWARD
1458 * off==0, dst==old -> accept
1459 * If some of the above modules is not compiled in, then
1460 * we should't have to check the corresponding condition
1461 * (because the ipfw control socket should not accept
1462 * unsupported rules), but better play safe and drop
1463 * packets in case of doubt.
1466 if ((ipfwoff
& IP_FW_PORT_DENY_FLAG
) || m
== NULL
) {
1472 ip
= mtod(m
, struct ip
*);
1474 if (ipfwoff
== 0 && dst
== old
) { /* common case */
1478 if (DUMMYNET_LOADED
&& (ipfwoff
& IP_FW_PORT_DYNT_FLAG
) != 0) {
1480 * pass the pkt to dummynet. Need to include
1481 * pipe number, m, ifp, ro, dst because these are
1482 * not recomputed in the next pass.
1483 * All other parameters have been already used and
1484 * so they are not needed anymore.
1485 * XXX note: if the ifp or ro entry are deleted
1486 * while a pkt is in dummynet, we are in trouble!
1490 args
.fwa_oflags
= flags
;
1491 if (flags
& IP_OUTARGS
)
1492 args
.fwa_ipoa
= ipoa
;
1494 error
= ip_dn_io_ptr(m
, ipfwoff
& 0xffff, DN_TO_IP_OUT
,
1495 &args
, DN_CLIENT_IPFW
);
1498 #endif /* DUMMYNET */
1500 if (ipfwoff
!= 0 && (ipfwoff
& IP_FW_PORT_DYNT_FLAG
) == 0) {
1501 struct mbuf
*clone
= NULL
;
1503 /* Clone packet if we're doing a 'tee' */
1504 if ((ipfwoff
& IP_FW_PORT_TEE_FLAG
) != 0)
1505 clone
= m_dup(m
, M_DONTWAIT
);
1508 * delayed checksums are not currently compatible
1509 * with divert sockets.
1511 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
)
1512 in_delayed_cksum(m
);
1514 /* Restore packet header fields to original values */
1516 #if BYTE_ORDER != BIG_ENDIAN
1521 /* Deliver packet to divert input routine */
1522 divert_packet(m
, 0, ipfwoff
& 0xffff,
1523 args
.fwa_divert_rule
);
1525 /* If 'tee', continue with original packet */
1526 if (clone
!= NULL
) {
1528 ip
= mtod(m
, struct ip
*);
1533 #endif /* IPDIVERT */
1534 #if IPFIREWALL_FORWARD
1536 * Here we check dst to make sure it's directly reachable on
1537 * the interface we previously thought it was.
1538 * If it isn't (which may be likely in some situations) we have
1539 * to re-route it (ie, find a route for the next-hop and the
1540 * associated interface) and set them here. This is nested
1541 * forwarding which in most cases is undesirable, except where
1542 * such control is nigh impossible. So we do it here.
1545 if (ipfwoff
== 0 && old
!= dst
) {
1546 struct in_ifaddr
*ia_fw
;
1547 struct route
*ro_fwd
= &sro_fwd
;
1549 #if IPFIREWALL_FORWARD_DEBUG
1550 printf("IPFIREWALL_FORWARD: New dst ip: ");
1551 print_ip(dst
->sin_addr
);
1553 #endif /* IPFIREWALL_FORWARD_DEBUG */
1555 * We need to figure out if we have been forwarded
1556 * to a local socket. If so then we should somehow
1557 * "loop back" to ip_input, and get directed to the
1558 * PCB as if we had received this packet. This is
1559 * because it may be dificult to identify the packets
1560 * you want to forward until they are being output
1561 * and have selected an interface. (e.g. locally
1562 * initiated packets) If we used the loopback inteface,
1563 * we would not be able to control what happens
1564 * as the packet runs through ip_input() as
1565 * it is done through a ISR.
1567 lck_rw_lock_shared(in_ifaddr_rwlock
);
1568 TAILQ_FOREACH(ia_fw
, &in_ifaddrhead
, ia_link
) {
1570 * If the addr to forward to is one
1571 * of ours, we pretend to
1572 * be the destination for this packet.
1574 IFA_LOCK_SPIN(&ia_fw
->ia_ifa
);
1575 if (IA_SIN(ia_fw
)->sin_addr
.s_addr
==
1576 dst
->sin_addr
.s_addr
) {
1577 IFA_UNLOCK(&ia_fw
->ia_ifa
);
1580 IFA_UNLOCK(&ia_fw
->ia_ifa
);
1582 lck_rw_done(in_ifaddr_rwlock
);
1584 /* tell ip_input "dont filter" */
1585 struct m_tag
*fwd_tag
;
1586 struct ip_fwd_tag
*ipfwd_tag
;
1588 fwd_tag
= m_tag_create(KERNEL_MODULE_TAG_ID
,
1589 KERNEL_TAG_TYPE_IPFORWARD
,
1590 sizeof (*ipfwd_tag
), M_NOWAIT
, m
);
1591 if (fwd_tag
== NULL
) {
1596 ipfwd_tag
= (struct ip_fwd_tag
*)(fwd_tag
+1);
1597 ipfwd_tag
->next_hop
= args
.fwa_next_hop
;
1599 m_tag_prepend(m
, fwd_tag
);
1601 if (m
->m_pkthdr
.rcvif
== NULL
)
1602 m
->m_pkthdr
.rcvif
= lo_ifp
;
1604 #if BYTE_ORDER != BIG_ENDIAN
1608 mbuf_outbound_finalize(m
, PF_INET
, 0);
1611 * we need to call dlil_output to run filters
1612 * and resync to avoid recursion loops.
1615 dlil_output(lo_ifp
, PF_INET
, m
, NULL
,
1618 printf("%s: no loopback ifp for "
1619 "forwarding!!!\n", __func__
);
1624 * Some of the logic for this was nicked from above.
1626 * This rewrites the cached route in a local PCB.
1627 * Is this what we want to do?
1629 ROUTE_RELEASE(ro_fwd
);
1630 bcopy(dst
, &ro_fwd
->ro_dst
, sizeof (*dst
));
1632 rtalloc_ign(ro_fwd
, RTF_PRCLONING
);
1634 if (ro_fwd
->ro_rt
== NULL
) {
1635 OSAddAtomic(1, &ipstat
.ips_noroute
);
1636 error
= EHOSTUNREACH
;
1640 RT_LOCK_SPIN(ro_fwd
->ro_rt
);
1641 ia_fw
= ifatoia(ro_fwd
->ro_rt
->rt_ifa
);
1642 if (ia_fw
!= NULL
) {
1643 /* Become a regular mutex */
1644 RT_CONVERT_LOCK(ro_fwd
->ro_rt
);
1645 IFA_ADDREF(&ia_fw
->ia_ifa
);
1647 ifp
= ro_fwd
->ro_rt
->rt_ifp
;
1648 ro_fwd
->ro_rt
->rt_use
++;
1649 if (ro_fwd
->ro_rt
->rt_flags
& RTF_GATEWAY
)
1650 dst
= SIN(ro_fwd
->ro_rt
->rt_gateway
);
1651 if (ro_fwd
->ro_rt
->rt_flags
& RTF_HOST
) {
1652 /* double negation needed for bool bit field */
1654 !!(ro_fwd
->ro_rt
->rt_flags
& RTF_BROADCAST
);
1656 /* Become a regular mutex */
1657 RT_CONVERT_LOCK(ro_fwd
->ro_rt
);
1659 in_broadcast(dst
->sin_addr
, ifp
);
1661 RT_UNLOCK(ro_fwd
->ro_rt
);
1663 ro
->ro_rt
= ro_fwd
->ro_rt
;
1664 ro_fwd
->ro_rt
= NULL
;
1665 dst
= SIN(&ro_fwd
->ro_dst
);
1668 * If we added a default src ip earlier,
1669 * which would have been gotten from the-then
1670 * interface, do it again, from the new one.
1672 if (ia_fw
!= NULL
) {
1673 if (ipobf
.fwd_rewrite_src
) {
1674 IFA_LOCK_SPIN(&ia_fw
->ia_ifa
);
1675 ip
->ip_src
= IA_SIN(ia_fw
)->sin_addr
;
1676 IFA_UNLOCK(&ia_fw
->ia_ifa
);
1678 IFA_REMREF(&ia_fw
->ia_ifa
);
1682 #endif /* IPFIREWALL_FORWARD */
1684 * if we get here, none of the above matches, and
1685 * we have to drop the pkt
1688 error
= EACCES
; /* not sure this is the right error msg */
1693 #endif /* IPFIREWALL */
1695 /* 127/8 must not appear on wire - RFC1122 */
1696 if (!(ifp
->if_flags
& IFF_LOOPBACK
) &&
1697 ((ntohl(ip
->ip_src
.s_addr
) >> IN_CLASSA_NSHIFT
) == IN_LOOPBACKNET
||
1698 (ntohl(ip
->ip_dst
.s_addr
) >> IN_CLASSA_NSHIFT
) == IN_LOOPBACKNET
)) {
1699 OSAddAtomic(1, &ipstat
.ips_badaddr
);
1701 error
= EADDRNOTAVAIL
;
1705 ip_output_checksum(ifp
, m
, (IP_VHL_HL(ip
->ip_vhl
) << 2),
1706 ip
->ip_len
, &sw_csum
);
1709 * If small enough for interface, or the interface will take
1710 * care of the fragmentation for us, can just send directly.
1712 if ((u_short
)ip
->ip_len
<= ifp
->if_mtu
|| TSO_IPV4_OK(ifp
, m
) ||
1713 (!(ip
->ip_off
& IP_DF
) && (ifp
->if_hwassist
& CSUM_FRAGMENT
))) {
1714 #if BYTE_ORDER != BIG_ENDIAN
1720 if (sw_csum
& CSUM_DELAY_IP
) {
1721 ip
->ip_sum
= ip_cksum_hdr_out(m
, hlen
);
1722 sw_csum
&= ~CSUM_DELAY_IP
;
1723 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_IP
;
1727 /* clean ipsec history once it goes out of the node */
1728 if (ipsec_bypass
== 0 && !(flags
& IP_NOIPSEC
))
1731 if ((m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
) &&
1732 (m
->m_pkthdr
.tso_segsz
> 0))
1733 scnt
+= m
->m_pkthdr
.len
/ m
->m_pkthdr
.tso_segsz
;
1737 if (packetchain
== 0) {
1738 if (ro
->ro_rt
!= NULL
&& nstat_collect
)
1739 nstat_route_tx(ro
->ro_rt
, scnt
,
1740 m
->m_pkthdr
.len
, 0);
1742 error
= dlil_output(ifp
, PF_INET
, m
, ro
->ro_rt
,
1748 * packet chaining allows us to reuse the
1749 * route for all packets
1751 bytecnt
+= m
->m_pkthdr
.len
;
1752 mppn
= &m
->m_nextpkt
;
1758 if (pktcnt
> ip_maxchainsent
)
1759 ip_maxchainsent
= pktcnt
;
1760 if (ro
->ro_rt
!= NULL
&& nstat_collect
)
1761 nstat_route_tx(ro
->ro_rt
, scnt
,
1764 error
= dlil_output(ifp
, PF_INET
, packetlist
,
1765 ro
->ro_rt
, SA(dst
), 0, adv
);
1778 * Too large for interface; fragment if possible.
1779 * Must be able to put at least 8 bytes per fragment.
1780 * Balk when DF bit is set or the interface didn't support TSO.
1782 if ((ip
->ip_off
& IP_DF
) || pktcnt
> 0 ||
1783 (m
->m_pkthdr
.csum_flags
& CSUM_TSO_IPV4
)) {
1786 * This case can happen if the user changed the MTU
1787 * of an interface after enabling IP on it. Because
1788 * most netifs don't keep track of routes pointing to
1789 * them, there is no way for one to update all its
1790 * routes when the MTU is changed.
1793 RT_LOCK_SPIN(ro
->ro_rt
);
1794 if ((ro
->ro_rt
->rt_flags
& (RTF_UP
| RTF_HOST
)) &&
1795 !(ro
->ro_rt
->rt_rmx
.rmx_locks
& RTV_MTU
) &&
1796 (ro
->ro_rt
->rt_rmx
.rmx_mtu
> ifp
->if_mtu
)) {
1797 ro
->ro_rt
->rt_rmx
.rmx_mtu
= ifp
->if_mtu
;
1799 RT_UNLOCK(ro
->ro_rt
);
1804 OSAddAtomic(1, &ipstat
.ips_cantfrag
);
1808 error
= ip_fragment(m
, ifp
, ifp
->if_mtu
, sw_csum
);
1814 KERNEL_DEBUG(DBG_LAYER_END
, ip
->ip_dst
.s_addr
,
1815 ip
->ip_src
.s_addr
, ip
->ip_p
, ip
->ip_off
, ip
->ip_len
);
1817 for (m
= m0
; m
; m
= m0
) {
1821 /* clean ipsec history once it goes out of the node */
1822 if (ipsec_bypass
== 0 && !(flags
& IP_NOIPSEC
))
1826 if ((packetchain
!= 0) && (pktcnt
> 0)) {
1827 panic("%s: mix of packet in packetlist is "
1828 "wrong=%p", __func__
, packetlist
);
1831 if (ro
->ro_rt
!= NULL
&& nstat_collect
) {
1832 nstat_route_tx(ro
->ro_rt
, 1,
1833 m
->m_pkthdr
.len
, 0);
1835 error
= dlil_output(ifp
, PF_INET
, m
, ro
->ro_rt
,
1843 OSAddAtomic(1, &ipstat
.ips_fragmented
);
1847 IFA_REMREF(&ia
->ia_ifa
);
1851 ROUTE_RELEASE(&ipsec_state
.ro
);
1853 KEYDEBUG(KEYDEBUG_IPSEC_STAMP
,
1854 printf("DP ip_output call free SP:%x\n", sp
));
1855 key_freesp(sp
, KEY_SADB_UNLOCKED
);
1859 ROUTE_RELEASE(&saved_route
);
1860 #endif /* DUMMYNET */
1861 #if IPFIREWALL_FORWARD
1862 ROUTE_RELEASE(&sro_fwd
);
1863 #endif /* IPFIREWALL_FORWARD */
1865 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1879 ip_fragment(struct mbuf
*m
, struct ifnet
*ifp
, unsigned long mtu
, int sw_csum
)
1881 struct ip
*ip
, *mhip
;
1882 int len
, hlen
, mhlen
, firstlen
, off
, error
= 0;
1883 struct mbuf
**mnext
= &m
->m_nextpkt
, *m0
;
1886 ip
= mtod(m
, struct ip
*);
1888 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1889 #else /* !_IP_VHL */
1890 hlen
= ip
->ip_hl
<< 2;
1891 #endif /* !_IP_VHL */
1893 firstlen
= len
= (mtu
- hlen
) &~ 7;
1900 * if the interface will not calculate checksums on
1901 * fragmented packets, then do it here.
1903 if ((m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) &&
1904 !(ifp
->if_hwassist
& CSUM_IP_FRAGS
))
1905 in_delayed_cksum(m
);
1908 * Loop through length of segment after first fragment,
1909 * make new header and copy data of each part and link onto chain.
1912 mhlen
= sizeof (struct ip
);
1913 for (off
= hlen
+ len
; off
< (u_short
)ip
->ip_len
; off
+= len
) {
1914 MGETHDR(m
, M_DONTWAIT
, MT_HEADER
); /* MAC-OK */
1917 OSAddAtomic(1, &ipstat
.ips_odropped
);
1920 m
->m_flags
|= (m0
->m_flags
& M_MCAST
) | M_FRAG
;
1921 m
->m_data
+= max_linkhdr
;
1922 mhip
= mtod(m
, struct ip
*);
1924 if (hlen
> sizeof (struct ip
)) {
1925 mhlen
= ip_optcopy(ip
, mhip
) + sizeof (struct ip
);
1926 mhip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, mhlen
>> 2);
1929 mhip
->ip_off
= ((off
- hlen
) >> 3) + (ip
->ip_off
& ~IP_MF
);
1930 if (ip
->ip_off
& IP_MF
)
1931 mhip
->ip_off
|= IP_MF
;
1932 if (off
+ len
>= (u_short
)ip
->ip_len
)
1933 len
= (u_short
)ip
->ip_len
- off
;
1935 mhip
->ip_off
|= IP_MF
;
1936 mhip
->ip_len
= htons((u_short
)(len
+ mhlen
));
1937 m
->m_next
= m_copy(m0
, off
, len
);
1938 if (m
->m_next
== NULL
) {
1940 error
= ENOBUFS
; /* ??? */
1941 OSAddAtomic(1, &ipstat
.ips_odropped
);
1944 m
->m_pkthdr
.len
= mhlen
+ len
;
1945 m
->m_pkthdr
.rcvif
= NULL
;
1946 m
->m_pkthdr
.csum_flags
= m0
->m_pkthdr
.csum_flags
;
1948 M_COPY_CLASSIFIER(m
, m0
);
1949 M_COPY_PFTAG(m
, m0
);
1952 mac_netinet_fragment(m0
, m
);
1953 #endif /* CONFIG_MACF_NET */
1955 #if BYTE_ORDER != BIG_ENDIAN
1956 HTONS(mhip
->ip_off
);
1960 if (sw_csum
& CSUM_DELAY_IP
) {
1961 mhip
->ip_sum
= ip_cksum_hdr_out(m
, mhlen
);
1962 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_IP
;
1965 mnext
= &m
->m_nextpkt
;
1968 OSAddAtomic(nfrags
, &ipstat
.ips_ofragments
);
1970 /* set first/last markers for fragment chain */
1971 m
->m_flags
|= M_LASTFRAG
;
1972 m0
->m_flags
|= M_FIRSTFRAG
| M_FRAG
;
1973 m0
->m_pkthdr
.csum_data
= nfrags
;
1976 * Update first fragment by trimming what's been copied out
1977 * and updating header, then send each fragment (in order).
1980 m_adj(m
, hlen
+ firstlen
- (u_short
)ip
->ip_len
);
1981 m
->m_pkthdr
.len
= hlen
+ firstlen
;
1982 ip
->ip_len
= htons((u_short
)m
->m_pkthdr
.len
);
1983 ip
->ip_off
|= IP_MF
;
1985 #if BYTE_ORDER != BIG_ENDIAN
1990 if (sw_csum
& CSUM_DELAY_IP
) {
1991 ip
->ip_sum
= ip_cksum_hdr_out(m
, hlen
);
1992 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_IP
;
2002 ip_out_cksum_stats(int proto
, u_int32_t len
)
2006 tcp_out_cksum_stats(len
);
2009 udp_out_cksum_stats(len
);
2012 /* keep only TCP or UDP stats for now */
2018 * Process a delayed payload checksum calculation (outbound path.)
2020 * hoff is the number of bytes beyond the mbuf data pointer which
2021 * points to the IP header.
2023 * Returns a bitmask representing all the work done in software.
2026 in_finalize_cksum(struct mbuf
*m
, uint32_t hoff
, uint32_t csum_flags
)
2028 unsigned char buf
[15 << 2] __attribute__((aligned(8)));
2030 uint32_t offset
, _hlen
, mlen
, hlen
, len
, sw_csum
;
2031 uint16_t csum
, ip_len
;
2033 _CASSERT(sizeof (csum
) == sizeof (uint16_t));
2034 VERIFY(m
->m_flags
& M_PKTHDR
);
2036 sw_csum
= (csum_flags
& m
->m_pkthdr
.csum_flags
);
2038 if ((sw_csum
&= (CSUM_DELAY_IP
| CSUM_DELAY_DATA
)) == 0)
2041 mlen
= m
->m_pkthdr
.len
; /* total mbuf len */
2043 /* sanity check (need at least simple IP header) */
2044 if (mlen
< (hoff
+ sizeof (*ip
))) {
2045 panic("%s: mbuf %p pkt len (%u) < hoff+ip_hdr "
2046 "(%u+%u)\n", __func__
, m
, mlen
, hoff
,
2047 (uint32_t)sizeof (*ip
));
2052 * In case the IP header is not contiguous, or not 32-bit aligned,
2053 * or if we're computing the IP header checksum, copy it to a local
2054 * buffer. Copy only the simple IP header here (IP options case
2055 * is handled below.)
2057 if ((sw_csum
& CSUM_DELAY_IP
) || (hoff
+ sizeof (*ip
)) > m
->m_len
||
2058 !IP_HDR_ALIGNED_P(mtod(m
, caddr_t
) + hoff
)) {
2059 m_copydata(m
, hoff
, sizeof (*ip
), (caddr_t
)buf
);
2060 ip
= (struct ip
*)(void *)buf
;
2061 _hlen
= sizeof (*ip
);
2063 ip
= (struct ip
*)(void *)(m
->m_data
+ hoff
);
2067 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2; /* IP header len */
2070 if (mlen
< (hoff
+ hlen
)) {
2071 panic("%s: mbuf %p pkt too short (%d) for IP header (%u), "
2072 "hoff %u", __func__
, m
, mlen
, hlen
, hoff
);
2077 * We could be in the context of an IP or interface filter; in the
2078 * former case, ip_len would be in host (correct) order while for
2079 * the latter it would be in network order. Because of this, we
2080 * attempt to interpret the length field by comparing it against
2081 * the actual packet length. If the comparison fails, byte swap
2082 * the length and check again. If it still fails, use the actual
2083 * packet length. This also covers the trailing bytes case.
2085 ip_len
= ip
->ip_len
;
2086 if (ip_len
!= (mlen
- hoff
)) {
2087 ip_len
= OSSwapInt16(ip_len
);
2088 if (ip_len
!= (mlen
- hoff
)) {
2089 printf("%s: mbuf 0x%llx proto %d IP len %d (%x) "
2090 "[swapped %d (%x)] doesn't match actual packet "
2091 "length; %d is used instead\n", __func__
,
2092 (uint64_t)VM_KERNEL_ADDRPERM(m
), ip
->ip_p
,
2093 ip
->ip_len
, ip
->ip_len
, ip_len
, ip_len
,
2095 ip_len
= mlen
- hoff
;
2099 len
= ip_len
- hlen
; /* csum span */
2101 if (sw_csum
& CSUM_DELAY_DATA
) {
2105 * offset is added to the lower 16-bit value of csum_data,
2106 * which is expected to contain the ULP offset; therefore
2107 * CSUM_PARTIAL offset adjustment must be undone.
2109 if ((m
->m_pkthdr
.csum_flags
& (CSUM_PARTIAL
|CSUM_DATA_VALID
)) ==
2110 (CSUM_PARTIAL
|CSUM_DATA_VALID
)) {
2112 * Get back the original ULP offset (this will
2113 * undo the CSUM_PARTIAL logic in ip_output.)
2115 m
->m_pkthdr
.csum_data
= (m
->m_pkthdr
.csum_tx_stuff
-
2116 m
->m_pkthdr
.csum_tx_start
);
2119 ulpoff
= (m
->m_pkthdr
.csum_data
& 0xffff); /* ULP csum offset */
2120 offset
= hoff
+ hlen
; /* ULP header */
2122 if (mlen
< (ulpoff
+ sizeof (csum
))) {
2123 panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP "
2124 "cksum offset (%u) cksum flags 0x%x\n", __func__
,
2125 m
, mlen
, ip
->ip_p
, ulpoff
, m
->m_pkthdr
.csum_flags
);
2129 csum
= inet_cksum(m
, 0, offset
, len
);
2132 ip_out_cksum_stats(ip
->ip_p
, len
);
2134 /* RFC1122 4.1.3.4 */
2135 if (csum
== 0 && (m
->m_pkthdr
.csum_flags
& CSUM_UDP
))
2138 /* Insert the checksum in the ULP csum field */
2140 if (offset
+ sizeof (csum
) > m
->m_len
) {
2141 m_copyback(m
, offset
, sizeof (csum
), &csum
);
2142 } else if (IP_HDR_ALIGNED_P(mtod(m
, char *) + hoff
)) {
2143 *(uint16_t *)(void *)(mtod(m
, char *) + offset
) = csum
;
2145 bcopy(&csum
, (mtod(m
, char *) + offset
), sizeof (csum
));
2147 m
->m_pkthdr
.csum_flags
&=
2148 ~(CSUM_DELAY_DATA
| CSUM_DATA_VALID
| CSUM_PARTIAL
);
2151 if (sw_csum
& CSUM_DELAY_IP
) {
2152 /* IP header must be in the local buffer */
2153 VERIFY(_hlen
== sizeof (*ip
));
2154 if (_hlen
!= hlen
) {
2155 VERIFY(hlen
<= sizeof (buf
));
2156 m_copydata(m
, hoff
, hlen
, (caddr_t
)buf
);
2157 ip
= (struct ip
*)(void *)buf
;
2162 * Compute the IP header checksum as if the IP length
2163 * is the length which we believe is "correct"; see
2164 * how ip_len gets calculated above. Note that this
2165 * is done on the local copy and not on the real one.
2167 ip
->ip_len
= htons(ip_len
);
2169 csum
= in_cksum_hdr_opt(ip
);
2172 ipstat
.ips_snd_swcsum
++;
2173 ipstat
.ips_snd_swcsum_bytes
+= hlen
;
2176 * Insert only the checksum in the existing IP header
2177 * csum field; all other fields are left unchanged.
2179 offset
= hoff
+ offsetof(struct ip
, ip_sum
);
2180 if (offset
+ sizeof (csum
) > m
->m_len
) {
2181 m_copyback(m
, offset
, sizeof (csum
), &csum
);
2182 } else if (IP_HDR_ALIGNED_P(mtod(m
, char *) + hoff
)) {
2183 *(uint16_t *)(void *)(mtod(m
, char *) + offset
) = csum
;
2185 bcopy(&csum
, (mtod(m
, char *) + offset
), sizeof (csum
));
2187 m
->m_pkthdr
.csum_flags
&= ~CSUM_DELAY_IP
;
2195 * Insert IP options into preformed packet.
2196 * Adjust IP destination as required for IP source routing,
2197 * as indicated by a non-zero in_addr at the start of the options.
2199 * XXX This routine assumes that the packet has no options in place.
2201 static struct mbuf
*
2202 ip_insertoptions(struct mbuf
*m
, struct mbuf
*opt
, int *phlen
)
2204 struct ipoption
*p
= mtod(opt
, struct ipoption
*);
2206 struct ip
*ip
= mtod(m
, struct ip
*);
2209 optlen
= opt
->m_len
- sizeof (p
->ipopt_dst
);
2210 if (optlen
+ (u_short
)ip
->ip_len
> IP_MAXPACKET
)
2211 return (m
); /* XXX should fail */
2212 if (p
->ipopt_dst
.s_addr
)
2213 ip
->ip_dst
= p
->ipopt_dst
;
2214 if (m
->m_flags
& M_EXT
|| m
->m_data
- optlen
< m
->m_pktdat
) {
2215 MGETHDR(n
, M_DONTWAIT
, MT_HEADER
); /* MAC-OK */
2218 n
->m_pkthdr
.rcvif
= 0;
2220 mac_mbuf_label_copy(m
, n
);
2221 #endif /* CONFIG_MACF_NET */
2222 n
->m_pkthdr
.len
= m
->m_pkthdr
.len
+ optlen
;
2223 m
->m_len
-= sizeof (struct ip
);
2224 m
->m_data
+= sizeof (struct ip
);
2227 m
->m_len
= optlen
+ sizeof (struct ip
);
2228 m
->m_data
+= max_linkhdr
;
2229 (void) memcpy(mtod(m
, void *), ip
, sizeof (struct ip
));
2231 m
->m_data
-= optlen
;
2233 m
->m_pkthdr
.len
+= optlen
;
2234 ovbcopy((caddr_t
)ip
, mtod(m
, caddr_t
), sizeof (struct ip
));
2236 ip
= mtod(m
, struct ip
*);
2237 bcopy(p
->ipopt_list
, ip
+ 1, optlen
);
2238 *phlen
= sizeof (struct ip
) + optlen
;
2239 ip
->ip_vhl
= IP_MAKE_VHL(IPVERSION
, *phlen
>> 2);
2240 ip
->ip_len
+= optlen
;
2245 * Copy options from ip to jp,
2246 * omitting those not copied during fragmentation.
2249 ip_optcopy(struct ip
*ip
, struct ip
*jp
)
2252 int opt
, optlen
, cnt
;
2254 cp
= (u_char
*)(ip
+ 1);
2255 dp
= (u_char
*)(jp
+ 1);
2256 cnt
= (IP_VHL_HL(ip
->ip_vhl
) << 2) - sizeof (struct ip
);
2257 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
2259 if (opt
== IPOPT_EOL
)
2261 if (opt
== IPOPT_NOP
) {
2262 /* Preserve for IP mcast tunnel's LSRR alignment. */
2268 if (cnt
< IPOPT_OLEN
+ sizeof (*cp
)) {
2269 panic("malformed IPv4 option passed to ip_optcopy");
2273 optlen
= cp
[IPOPT_OLEN
];
2275 if (optlen
< IPOPT_OLEN
+ sizeof (*cp
) || optlen
> cnt
) {
2276 panic("malformed IPv4 option passed to ip_optcopy");
2280 /* bogus lengths should have been caught by ip_dooptions */
2283 if (IPOPT_COPIED(opt
)) {
2284 bcopy(cp
, dp
, optlen
);
2288 for (optlen
= dp
- (u_char
*)(jp
+1); optlen
& 0x3; optlen
++)
2294 * IP socket option processing.
2297 ip_ctloutput(struct socket
*so
, struct sockopt
*sopt
)
2299 struct inpcb
*inp
= sotoinpcb(so
);
2303 if (sopt
->sopt_level
!= IPPROTO_IP
)
2306 switch (sopt
->sopt_dir
) {
2308 switch (sopt
->sopt_name
) {
2315 if (sopt
->sopt_valsize
> MLEN
) {
2319 MGET(m
, sopt
->sopt_p
!= kernproc
? M_WAIT
: M_DONTWAIT
,
2325 m
->m_len
= sopt
->sopt_valsize
;
2326 error
= sooptcopyin(sopt
, mtod(m
, char *),
2327 m
->m_len
, m
->m_len
);
2331 return (ip_pcbopts(sopt
->sopt_name
,
2332 &inp
->inp_options
, m
));
2338 case IP_RECVRETOPTS
:
2339 case IP_RECVDSTADDR
:
2342 case IP_RECVPKTINFO
:
2343 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
2348 switch (sopt
->sopt_name
) {
2350 inp
->inp_ip_tos
= optval
;
2354 inp
->inp_ip_ttl
= optval
;
2356 #define OPTSET(bit) \
2358 inp->inp_flags |= bit; \
2360 inp->inp_flags &= ~bit;
2363 OPTSET(INP_RECVOPTS
);
2366 case IP_RECVRETOPTS
:
2367 OPTSET(INP_RECVRETOPTS
);
2370 case IP_RECVDSTADDR
:
2371 OPTSET(INP_RECVDSTADDR
);
2379 OPTSET(INP_RECVTTL
);
2382 case IP_RECVPKTINFO
:
2383 OPTSET(INP_PKTINFO
);
2389 #if CONFIG_FORCE_OUT_IFP
2391 * Apple private interface, similar to IP_BOUND_IF, except
2392 * that the parameter is a NULL-terminated string containing
2393 * the name of the network interface; an emptry string means
2394 * unbind. Applications are encouraged to use IP_BOUND_IF
2395 * instead, as that is the current "official" API.
2397 case IP_FORCE_OUT_IFP
: {
2398 char ifname
[IFNAMSIZ
];
2399 unsigned int ifscope
;
2401 /* This option is settable only for IPv4 */
2402 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2407 /* Verify interface name parameter is sane */
2408 if (sopt
->sopt_valsize
> sizeof (ifname
)) {
2413 /* Copy the interface name */
2414 if (sopt
->sopt_valsize
!= 0) {
2415 error
= sooptcopyin(sopt
, ifname
,
2416 sizeof (ifname
), sopt
->sopt_valsize
);
2421 if (sopt
->sopt_valsize
== 0 || ifname
[0] == '\0') {
2422 /* Unbind this socket from any interface */
2423 ifscope
= IFSCOPE_NONE
;
2427 /* Verify name is NULL terminated */
2428 if (ifname
[sopt
->sopt_valsize
- 1] != '\0') {
2433 /* Bail out if given bogus interface name */
2434 if (ifnet_find_by_name(ifname
, &ifp
) != 0) {
2439 /* Bind this socket to this interface */
2440 ifscope
= ifp
->if_index
;
2443 * Won't actually free; since we don't release
2444 * this later, we should do it now.
2448 error
= inp_bindif(inp
, ifscope
, NULL
);
2451 #endif /* CONFIG_FORCE_OUT_IFP */
2453 * Multicast socket options are processed by the in_mcast
2456 case IP_MULTICAST_IF
:
2457 case IP_MULTICAST_IFINDEX
:
2458 case IP_MULTICAST_VIF
:
2459 case IP_MULTICAST_TTL
:
2460 case IP_MULTICAST_LOOP
:
2461 case IP_ADD_MEMBERSHIP
:
2462 case IP_DROP_MEMBERSHIP
:
2463 case IP_ADD_SOURCE_MEMBERSHIP
:
2464 case IP_DROP_SOURCE_MEMBERSHIP
:
2465 case IP_BLOCK_SOURCE
:
2466 case IP_UNBLOCK_SOURCE
:
2468 case MCAST_JOIN_GROUP
:
2469 case MCAST_LEAVE_GROUP
:
2470 case MCAST_JOIN_SOURCE_GROUP
:
2471 case MCAST_LEAVE_SOURCE_GROUP
:
2472 case MCAST_BLOCK_SOURCE
:
2473 case MCAST_UNBLOCK_SOURCE
:
2474 error
= inp_setmoptions(inp
, sopt
);
2478 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
2484 case IP_PORTRANGE_DEFAULT
:
2485 inp
->inp_flags
&= ~(INP_LOWPORT
);
2486 inp
->inp_flags
&= ~(INP_HIGHPORT
);
2489 case IP_PORTRANGE_HIGH
:
2490 inp
->inp_flags
&= ~(INP_LOWPORT
);
2491 inp
->inp_flags
|= INP_HIGHPORT
;
2494 case IP_PORTRANGE_LOW
:
2495 inp
->inp_flags
&= ~(INP_HIGHPORT
);
2496 inp
->inp_flags
|= INP_LOWPORT
;
2506 case IP_IPSEC_POLICY
: {
2513 if ((error
= soopt_getm(sopt
, &m
)) != 0) /* XXX */
2515 if ((error
= soopt_mcopyin(sopt
, m
)) != 0) /* XXX */
2517 priv
= (proc_suser(sopt
->sopt_p
) == 0);
2519 req
= mtod(m
, caddr_t
);
2522 optname
= sopt
->sopt_name
;
2523 error
= ipsec4_set_policy(inp
, optname
, req
, len
, priv
);
2530 case IP_TRAFFIC_MGT_BACKGROUND
: {
2531 unsigned background
= 0;
2533 error
= sooptcopyin(sopt
, &background
,
2534 sizeof (background
), sizeof (background
));
2539 socket_set_traffic_mgt_flags_locked(so
,
2540 TRAFFIC_MGT_SO_BACKGROUND
);
2542 socket_clear_traffic_mgt_flags_locked(so
,
2543 TRAFFIC_MGT_SO_BACKGROUND
);
2548 #endif /* TRAFFIC_MGT */
2551 * On a multihomed system, scoped routing can be used to
2552 * restrict the source interface used for sending packets.
2553 * The socket option IP_BOUND_IF binds a particular AF_INET
2554 * socket to an interface such that data sent on the socket
2555 * is restricted to that interface. This is unlike the
2556 * SO_DONTROUTE option where the routing table is bypassed;
2557 * therefore it allows for a greater flexibility and control
2558 * over the system behavior, and does not place any restriction
2559 * on the destination address type (e.g. unicast, multicast,
2560 * or broadcast if applicable) or whether or not the host is
2561 * directly reachable. Note that in the multicast transmit
2562 * case, IP_MULTICAST_{IF,IFINDEX} takes precedence over
2563 * IP_BOUND_IF, since the former practically bypasses the
2564 * routing table; in this case, IP_BOUND_IF sets the default
2565 * interface used for sending multicast packets in the absence
2566 * of an explicit multicast transmit interface.
2569 /* This option is settable only for IPv4 */
2570 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2575 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
2581 error
= inp_bindif(inp
, optval
, NULL
);
2584 case IP_NO_IFT_CELLULAR
:
2585 /* This option is settable only for IPv4 */
2586 if (!(inp
->inp_vflag
& INP_IPV4
)) {
2591 error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
2597 /* once set, it cannot be unset */
2598 if (!optval
&& (inp
->inp_flags
& INP_NO_IFT_CELLULAR
)) {
2603 error
= so_set_restrictions(so
,
2604 SO_RESTRICT_DENY_CELLULAR
);
2608 /* This option is not settable */
2613 error
= ENOPROTOOPT
;
2619 switch (sopt
->sopt_name
) {
2622 if (inp
->inp_options
) {
2623 error
= sooptcopyout(sopt
,
2624 mtod(inp
->inp_options
, char *),
2625 inp
->inp_options
->m_len
);
2627 sopt
->sopt_valsize
= 0;
2634 case IP_RECVRETOPTS
:
2635 case IP_RECVDSTADDR
:
2639 case IP_RECVPKTINFO
:
2640 switch (sopt
->sopt_name
) {
2643 optval
= inp
->inp_ip_tos
;
2647 optval
= inp
->inp_ip_ttl
;
2650 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2653 optval
= OPTBIT(INP_RECVOPTS
);
2656 case IP_RECVRETOPTS
:
2657 optval
= OPTBIT(INP_RECVRETOPTS
);
2660 case IP_RECVDSTADDR
:
2661 optval
= OPTBIT(INP_RECVDSTADDR
);
2665 optval
= OPTBIT(INP_RECVIF
);
2669 optval
= OPTBIT(INP_RECVTTL
);
2673 if (inp
->inp_flags
& INP_HIGHPORT
)
2674 optval
= IP_PORTRANGE_HIGH
;
2675 else if (inp
->inp_flags
& INP_LOWPORT
)
2676 optval
= IP_PORTRANGE_LOW
;
2681 case IP_RECVPKTINFO
:
2682 optval
= OPTBIT(INP_PKTINFO
);
2685 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
2688 case IP_MULTICAST_IF
:
2689 case IP_MULTICAST_IFINDEX
:
2690 case IP_MULTICAST_VIF
:
2691 case IP_MULTICAST_TTL
:
2692 case IP_MULTICAST_LOOP
:
2694 error
= inp_getmoptions(inp
, sopt
);
2698 case IP_IPSEC_POLICY
: {
2699 struct mbuf
*m
= NULL
;
2704 req
= mtod(m
, caddr_t
);
2707 error
= ipsec4_get_policy(sotoinpcb(so
), req
, len
, &m
);
2709 error
= soopt_mcopyout(sopt
, m
); /* XXX */
2717 case IP_TRAFFIC_MGT_BACKGROUND
: {
2718 unsigned background
= (so
->so_traffic_mgt_flags
&
2719 TRAFFIC_MGT_SO_BACKGROUND
) ? 1 : 0;
2720 return (sooptcopyout(sopt
, &background
,
2721 sizeof (background
)));
2724 #endif /* TRAFFIC_MGT */
2727 if (inp
->inp_flags
& INP_BOUND_IF
)
2728 optval
= inp
->inp_boundifp
->if_index
;
2729 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
2732 case IP_NO_IFT_CELLULAR
:
2733 optval
= (inp
->inp_flags
& INP_NO_IFT_CELLULAR
) ? 1 : 0;
2734 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
2738 optval
= (inp
->inp_last_outifp
!= NULL
) ?
2739 inp
->inp_last_outifp
->if_index
: 0;
2740 error
= sooptcopyout(sopt
, &optval
, sizeof (optval
));
2744 error
= ENOPROTOOPT
;
2753 * Set up IP options in pcb for insertion in output packets.
2754 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2755 * with destination address if source routed.
2758 ip_pcbopts(int optname
, struct mbuf
**pcbopt
, struct mbuf
*m
)
2760 #pragma unused(optname)
2765 /* turn off any old options */
2767 (void) m_free(*pcbopt
);
2769 if (m
== (struct mbuf
*)0 || m
->m_len
== 0) {
2771 * Only turning off any previous options.
2778 if (m
->m_len
% sizeof (int32_t))
2782 * IP first-hop destination address will be stored before
2783 * actual options; move other options back
2784 * and clear it when none present.
2786 if (m
->m_data
+ m
->m_len
+ sizeof (struct in_addr
) >= &m
->m_dat
[MLEN
])
2789 m
->m_len
+= sizeof (struct in_addr
);
2790 cp
= mtod(m
, u_char
*) + sizeof (struct in_addr
);
2791 ovbcopy(mtod(m
, caddr_t
), (caddr_t
)cp
, (unsigned)cnt
);
2792 bzero(mtod(m
, caddr_t
), sizeof (struct in_addr
));
2794 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
2795 opt
= cp
[IPOPT_OPTVAL
];
2796 if (opt
== IPOPT_EOL
)
2798 if (opt
== IPOPT_NOP
)
2801 if (cnt
< IPOPT_OLEN
+ sizeof (*cp
))
2803 optlen
= cp
[IPOPT_OLEN
];
2804 if (optlen
< IPOPT_OLEN
+ sizeof (*cp
) || optlen
> cnt
)
2815 * user process specifies route as:
2817 * D must be our final destination (but we can't
2818 * check that since we may not have connected yet).
2819 * A is first hop destination, which doesn't appear in
2820 * actual IP option, but is stored before the options.
2822 if (optlen
< IPOPT_MINOFF
- 1 + sizeof (struct in_addr
))
2824 m
->m_len
-= sizeof (struct in_addr
);
2825 cnt
-= sizeof (struct in_addr
);
2826 optlen
-= sizeof (struct in_addr
);
2827 cp
[IPOPT_OLEN
] = optlen
;
2829 * Move first hop before start of options.
2831 bcopy((caddr_t
)&cp
[IPOPT_OFFSET
+1], mtod(m
, caddr_t
),
2832 sizeof (struct in_addr
));
2834 * Then copy rest of options back
2835 * to close up the deleted entry.
2837 ovbcopy((caddr_t
)(&cp
[IPOPT_OFFSET
+1] +
2838 sizeof (struct in_addr
)),
2839 (caddr_t
)&cp
[IPOPT_OFFSET
+1],
2840 (unsigned)cnt
+ sizeof (struct in_addr
));
2844 if (m
->m_len
> MAX_IPOPTLEN
+ sizeof (struct in_addr
))
2855 ip_moptions_init(void)
2857 PE_parse_boot_argn("ifa_debug", &imo_debug
, sizeof (imo_debug
));
2859 imo_size
= (imo_debug
== 0) ? sizeof (struct ip_moptions
) :
2860 sizeof (struct ip_moptions_dbg
);
2862 imo_zone
= zinit(imo_size
, IMO_ZONE_MAX
* imo_size
, 0,
2864 if (imo_zone
== NULL
) {
2865 panic("%s: failed allocating %s", __func__
, IMO_ZONE_NAME
);
2868 zone_change(imo_zone
, Z_EXPAND
, TRUE
);
2872 imo_addref(struct ip_moptions
*imo
, int locked
)
2877 IMO_LOCK_ASSERT_HELD(imo
);
2879 if (++imo
->imo_refcnt
== 0) {
2880 panic("%s: imo %p wraparound refcnt\n", __func__
, imo
);
2882 } else if (imo
->imo_trace
!= NULL
) {
2883 (*imo
->imo_trace
)(imo
, TRUE
);
2891 imo_remref(struct ip_moptions
*imo
)
2896 if (imo
->imo_refcnt
== 0) {
2897 panic("%s: imo %p negative refcnt", __func__
, imo
);
2899 } else if (imo
->imo_trace
!= NULL
) {
2900 (*imo
->imo_trace
)(imo
, FALSE
);
2904 if (imo
->imo_refcnt
> 0) {
2909 for (i
= 0; i
< imo
->imo_num_memberships
; ++i
) {
2910 struct in_mfilter
*imf
;
2912 imf
= imo
->imo_mfilters
? &imo
->imo_mfilters
[i
] : NULL
;
2916 (void) in_leavegroup(imo
->imo_membership
[i
], imf
);
2921 INM_REMREF(imo
->imo_membership
[i
]);
2922 imo
->imo_membership
[i
] = NULL
;
2924 imo
->imo_num_memberships
= 0;
2925 if (imo
->imo_mfilters
!= NULL
) {
2926 FREE(imo
->imo_mfilters
, M_INMFILTER
);
2927 imo
->imo_mfilters
= NULL
;
2929 if (imo
->imo_membership
!= NULL
) {
2930 FREE(imo
->imo_membership
, M_IPMOPTS
);
2931 imo
->imo_membership
= NULL
;
2935 lck_mtx_destroy(&imo
->imo_lock
, ifa_mtx_grp
);
2937 if (!(imo
->imo_debug
& IFD_ALLOC
)) {
2938 panic("%s: imo %p cannot be freed", __func__
, imo
);
2941 zfree(imo_zone
, imo
);
2945 imo_trace(struct ip_moptions
*imo
, int refhold
)
2947 struct ip_moptions_dbg
*imo_dbg
= (struct ip_moptions_dbg
*)imo
;
2952 if (!(imo
->imo_debug
& IFD_DEBUG
)) {
2953 panic("%s: imo %p has no debug structure", __func__
, imo
);
2957 cnt
= &imo_dbg
->imo_refhold_cnt
;
2958 tr
= imo_dbg
->imo_refhold
;
2960 cnt
= &imo_dbg
->imo_refrele_cnt
;
2961 tr
= imo_dbg
->imo_refrele
;
2964 idx
= atomic_add_16_ov(cnt
, 1) % IMO_TRACE_HIST_SIZE
;
2965 ctrace_record(&tr
[idx
]);
2968 struct ip_moptions
*
2969 ip_allocmoptions(int how
)
2971 struct ip_moptions
*imo
;
2973 imo
= (how
== M_WAITOK
) ? zalloc(imo_zone
) : zalloc_noblock(imo_zone
);
2975 bzero(imo
, imo_size
);
2976 lck_mtx_init(&imo
->imo_lock
, ifa_mtx_grp
, ifa_mtx_attr
);
2977 imo
->imo_debug
|= IFD_ALLOC
;
2978 if (imo_debug
!= 0) {
2979 imo
->imo_debug
|= IFD_DEBUG
;
2980 imo
->imo_trace
= imo_trace
;
2989 * Routine called from ip_output() to loop back a copy of an IP multicast
2990 * packet to the input queue of a specified interface. Note that this
2991 * calls the output routine of the loopback "driver", but with an interface
2992 * pointer that might NOT be a loopback interface -- evil, but easier than
2993 * replicating that code here.
2996 ip_mloopback(struct ifnet
*srcifp
, struct ifnet
*origifp
, struct mbuf
*m
,
2997 struct sockaddr_in
*dst
, int hlen
)
3006 * Copy the packet header as it's needed for the checksum
3007 * Make sure to deep-copy IP header portion in case the data
3008 * is in an mbuf cluster, so that we can safely override the IP
3009 * header portion later.
3011 copym
= m_copym_mode(m
, 0, M_COPYALL
, M_DONTWAIT
, M_COPYM_COPY_HDR
);
3012 if (copym
!= NULL
&& ((copym
->m_flags
& M_EXT
) || copym
->m_len
< hlen
))
3013 copym
= m_pullup(copym
, hlen
);
3019 * We don't bother to fragment if the IP length is greater
3020 * than the interface's MTU. Can this possibly matter?
3022 ip
= mtod(copym
, struct ip
*);
3023 #if BYTE_ORDER != BIG_ENDIAN
3028 ip
->ip_sum
= ip_cksum_hdr_out(copym
, hlen
);
3031 * Mark checksum as valid unless receive checksum offload is
3032 * disabled; if so, compute checksum in software. If the
3033 * interface itself is lo0, this will be overridden by if_loop.
3036 copym
->m_pkthdr
.csum_flags
&= ~CSUM_PARTIAL
;
3037 copym
->m_pkthdr
.csum_flags
|=
3038 CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
;
3039 copym
->m_pkthdr
.csum_data
= 0xffff;
3040 } else if (copym
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
3041 #if BYTE_ORDER != BIG_ENDIAN
3044 in_delayed_cksum(copym
);
3045 #if BYTE_ORDER != BIG_ENDIAN
3051 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3052 * in ip_input(); we need the loopback ifp/dl_tag passed as args
3053 * to make the loopback driver compliant with the data link
3056 copym
->m_pkthdr
.rcvif
= origifp
;
3059 * Also record the source interface (which owns the source address).
3060 * This is basically a stripped down version of ifa_foraddr().
3062 if (srcifp
== NULL
) {
3063 struct in_ifaddr
*ia
;
3065 lck_rw_lock_shared(in_ifaddr_rwlock
);
3066 TAILQ_FOREACH(ia
, INADDR_HASH(ip
->ip_src
.s_addr
), ia_hash
) {
3067 IFA_LOCK_SPIN(&ia
->ia_ifa
);
3068 if (IA_SIN(ia
)->sin_addr
.s_addr
== ip
->ip_src
.s_addr
) {
3069 srcifp
= ia
->ia_ifp
;
3070 IFA_UNLOCK(&ia
->ia_ifa
);
3073 IFA_UNLOCK(&ia
->ia_ifa
);
3075 lck_rw_done(in_ifaddr_rwlock
);
3078 ip_setsrcifaddr_info(copym
, srcifp
->if_index
, NULL
);
3079 ip_setdstifaddr_info(copym
, origifp
->if_index
, NULL
);
3081 dlil_output(lo_ifp
, PF_INET
, copym
, NULL
, SA(dst
), 0, NULL
);
3085 * Given a source IP address (and route, if available), determine the best
3086 * interface to send the packet from. Checking for (and updating) the
3087 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
3088 * without any locks based on the assumption that ip_output() is single-
3089 * threaded per-pcb, i.e. for any given pcb there can only be one thread
3090 * performing output at the IP layer.
3092 * This routine is analogous to in6_selectroute() for IPv6.
3094 static struct ifaddr
*
3095 in_selectsrcif(struct ip
*ip
, struct route
*ro
, unsigned int ifscope
)
3097 struct ifaddr
*ifa
= NULL
;
3098 struct in_addr src
= ip
->ip_src
;
3099 struct in_addr dst
= ip
->ip_dst
;
3100 struct ifnet
*rt_ifp
;
3101 char s_src
[MAX_IPv4_STR_LEN
], s_dst
[MAX_IPv4_STR_LEN
];
3103 VERIFY(src
.s_addr
!= INADDR_ANY
);
3105 if (ip_select_srcif_debug
) {
3106 (void) inet_ntop(AF_INET
, &src
.s_addr
, s_src
, sizeof (s_src
));
3107 (void) inet_ntop(AF_INET
, &dst
.s_addr
, s_dst
, sizeof (s_dst
));
3110 if (ro
->ro_rt
!= NULL
)
3113 rt_ifp
= (ro
->ro_rt
!= NULL
) ? ro
->ro_rt
->rt_ifp
: NULL
;
3116 * Given the source IP address, find a suitable source interface
3117 * to use for transmission; if the caller has specified a scope,
3118 * optimize the search by looking at the addresses only for that
3119 * interface. This is still suboptimal, however, as we need to
3120 * traverse the per-interface list.
3122 if (ifscope
!= IFSCOPE_NONE
|| ro
->ro_rt
!= NULL
) {
3123 unsigned int scope
= ifscope
;
3126 * If no scope is specified and the route is stale (pointing
3127 * to a defunct interface) use the current primary interface;
3128 * this happens when switching between interfaces configured
3129 * with the same IP address. Otherwise pick up the scope
3130 * information from the route; the ULP may have looked up a
3131 * correct route and we just need to verify it here and mark
3132 * it with the ROF_SRCIF_SELECTED flag below.
3134 if (scope
== IFSCOPE_NONE
) {
3135 scope
= rt_ifp
->if_index
;
3136 if (scope
!= get_primary_ifscope(AF_INET
) &&
3138 scope
= get_primary_ifscope(AF_INET
);
3141 ifa
= (struct ifaddr
*)ifa_foraddr_scoped(src
.s_addr
, scope
);
3143 if (ifa
== NULL
&& ip
->ip_p
!= IPPROTO_UDP
&&
3144 ip
->ip_p
!= IPPROTO_TCP
&& ipforwarding
) {
3146 * If forwarding is enabled, and if the packet isn't
3147 * TCP or UDP, check if the source address belongs
3148 * to one of our own interfaces; if so, demote the
3149 * interface scope and do a route lookup right below.
3151 ifa
= (struct ifaddr
*)ifa_foraddr(src
.s_addr
);
3155 ifscope
= IFSCOPE_NONE
;
3159 if (ip_select_srcif_debug
&& ifa
!= NULL
) {
3160 if (ro
->ro_rt
!= NULL
) {
3161 printf("%s->%s ifscope %d->%d ifa_if %s "
3162 "ro_if %s\n", s_src
, s_dst
, ifscope
,
3163 scope
, if_name(ifa
->ifa_ifp
),
3166 printf("%s->%s ifscope %d->%d ifa_if %s\n",
3167 s_src
, s_dst
, ifscope
, scope
,
3168 if_name(ifa
->ifa_ifp
));
3174 * Slow path; search for an interface having the corresponding source
3175 * IP address if the scope was not specified by the caller, and:
3177 * 1) There currently isn't any route, or,
3178 * 2) The interface used by the route does not own that source
3179 * IP address; in this case, the route will get blown away
3180 * and we'll do a more specific scoped search using the newly
3183 if (ifa
== NULL
&& ifscope
== IFSCOPE_NONE
) {
3184 ifa
= (struct ifaddr
*)ifa_foraddr(src
.s_addr
);
3187 * If we have the IP address, but not the route, we don't
3188 * really know whether or not it belongs to the correct
3189 * interface (it could be shared across multiple interfaces.)
3190 * The only way to find out is to do a route lookup.
3192 if (ifa
!= NULL
&& ro
->ro_rt
== NULL
) {
3194 struct sockaddr_in sin
;
3195 struct ifaddr
*oifa
= NULL
;
3197 bzero(&sin
, sizeof (sin
));
3198 sin
.sin_family
= AF_INET
;
3199 sin
.sin_len
= sizeof (sin
);
3202 lck_mtx_lock(rnh_lock
);
3203 if ((rt
= rt_lookup(TRUE
, SA(&sin
), NULL
,
3204 rt_tables
[AF_INET
], IFSCOPE_NONE
)) != NULL
) {
3207 * If the route uses a different interface,
3208 * use that one instead. The IP address of
3209 * the ifaddr that we pick up here is not
3212 if (ifa
->ifa_ifp
!= rt
->rt_ifp
) {
3222 lck_mtx_unlock(rnh_lock
);
3225 struct ifaddr
*iifa
;
3228 * See if the interface pointed to by the
3229 * route is configured with the source IP
3230 * address of the packet.
3232 iifa
= (struct ifaddr
*)ifa_foraddr_scoped(
3233 src
.s_addr
, ifa
->ifa_ifp
->if_index
);
3237 * Found it; drop the original one
3238 * as well as the route interface
3239 * address, and use this instead.
3244 } else if (!ipforwarding
||
3245 (rt
->rt_flags
& RTF_GATEWAY
)) {
3247 * This interface doesn't have that
3248 * source IP address; drop the route
3249 * interface address and just use the
3250 * original one, and let the caller
3251 * do a scoped route lookup.
3257 * Forwarding is enabled and the source
3258 * address belongs to one of our own
3259 * interfaces which isn't the outgoing
3260 * interface, and we have a route, and
3261 * the destination is on a network that
3262 * is directly attached (onlink); drop
3263 * the original one and use the route
3264 * interface address instead.
3269 } else if (ifa
!= NULL
&& ro
->ro_rt
!= NULL
&&
3270 !(ro
->ro_rt
->rt_flags
& RTF_GATEWAY
) &&
3271 ifa
->ifa_ifp
!= ro
->ro_rt
->rt_ifp
&& ipforwarding
) {
3273 * Forwarding is enabled and the source address belongs
3274 * to one of our own interfaces which isn't the same
3275 * as the interface used by the known route; drop the
3276 * original one and use the route interface address.
3279 ifa
= ro
->ro_rt
->rt_ifa
;
3283 if (ip_select_srcif_debug
&& ifa
!= NULL
) {
3284 printf("%s->%s ifscope %d ifa_if %s\n",
3285 s_src
, s_dst
, ifscope
, if_name(ifa
->ifa_ifp
));
3289 if (ro
->ro_rt
!= NULL
)
3290 RT_LOCK_ASSERT_HELD(ro
->ro_rt
);
3292 * If there is a non-loopback route with the wrong interface, or if
3293 * there is no interface configured with such an address, blow it
3294 * away. Except for local/loopback, we look for one with a matching
3295 * interface scope/index.
3297 if (ro
->ro_rt
!= NULL
&&
3298 (ifa
== NULL
|| (ifa
->ifa_ifp
!= rt_ifp
&& rt_ifp
!= lo_ifp
) ||
3299 !(ro
->ro_rt
->rt_flags
& RTF_UP
))) {
3300 if (ip_select_srcif_debug
) {
3302 printf("%s->%s ifscope %d ro_if %s != "
3303 "ifa_if %s (cached route cleared)\n",
3304 s_src
, s_dst
, ifscope
, if_name(rt_ifp
),
3305 if_name(ifa
->ifa_ifp
));
3307 printf("%s->%s ifscope %d ro_if %s "
3308 "(no ifa_if found)\n",
3309 s_src
, s_dst
, ifscope
, if_name(rt_ifp
));
3313 RT_UNLOCK(ro
->ro_rt
);
3317 * If the destination is IPv4 LLA and the route's interface
3318 * doesn't match the source interface, then the source IP
3319 * address is wrong; it most likely belongs to the primary
3320 * interface associated with the IPv4 LL subnet. Drop the
3321 * packet rather than letting it go out and return an error
3322 * to the ULP. This actually applies not only to IPv4 LL
3323 * but other shared subnets; for now we explicitly test only
3324 * for the former case and save the latter for future.
3326 if (IN_LINKLOCAL(ntohl(dst
.s_addr
)) &&
3327 !IN_LINKLOCAL(ntohl(src
.s_addr
)) && ifa
!= NULL
) {
3333 if (ip_select_srcif_debug
&& ifa
== NULL
) {
3334 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
3335 s_src
, s_dst
, ifscope
);
3339 * If there is a route, mark it accordingly. If there isn't one,
3340 * we'll get here again during the next transmit (possibly with a
3341 * route) and the flag will get set at that point. For IPv4 LLA
3342 * destination, mark it only if the route has been fully resolved;
3343 * otherwise we want to come back here again when the route points
3344 * to the interface over which the ARP reply arrives on.
3346 if (ro
->ro_rt
!= NULL
&& (!IN_LINKLOCAL(ntohl(dst
.s_addr
)) ||
3347 (ro
->ro_rt
->rt_gateway
->sa_family
== AF_LINK
&&
3348 SDL(ro
->ro_rt
->rt_gateway
)->sdl_alen
!= 0))) {
3350 IFA_ADDREF(ifa
); /* for route */
3351 if (ro
->ro_srcia
!= NULL
)
3352 IFA_REMREF(ro
->ro_srcia
);
3354 ro
->ro_flags
|= ROF_SRCIF_SELECTED
;
3355 RT_GENID_SYNC(ro
->ro_rt
);
3358 if (ro
->ro_rt
!= NULL
)
3359 RT_UNLOCK(ro
->ro_rt
);
3365 ip_output_checksum(struct ifnet
*ifp
, struct mbuf
*m
, int hlen
, int ip_len
,
3368 int tso
= TSO_IPV4_OK(ifp
, m
);
3369 uint32_t hwcap
= ifp
->if_hwassist
;
3371 m
->m_pkthdr
.csum_flags
|= CSUM_IP
;
3374 /* do all in software; hardware checksum offload is disabled */
3375 *sw_csum
= (CSUM_DELAY_DATA
| CSUM_DELAY_IP
) &
3376 m
->m_pkthdr
.csum_flags
;
3378 /* do in software what the hardware cannot */
3379 *sw_csum
= m
->m_pkthdr
.csum_flags
&
3380 ~IF_HWASSIST_CSUM_FLAGS(hwcap
);
3383 if (hlen
!= sizeof (struct ip
)) {
3384 *sw_csum
|= ((CSUM_DELAY_DATA
| CSUM_DELAY_IP
) &
3385 m
->m_pkthdr
.csum_flags
);
3386 } else if (!(*sw_csum
& CSUM_DELAY_DATA
) && (hwcap
& CSUM_PARTIAL
)) {
3388 * Partial checksum offload, if non-IP fragment, and TCP only
3389 * (no UDP support, as the hardware may not be able to convert
3390 * +0 to -0 (0xffff) per RFC1122 4.1.3.4.)
3392 if (hwcksum_tx
&& !tso
&&
3393 (m
->m_pkthdr
.csum_flags
& CSUM_TCP
) &&
3394 ip_len
<= ifp
->if_mtu
) {
3395 uint16_t start
= sizeof (struct ip
);
3396 uint16_t ulpoff
= m
->m_pkthdr
.csum_data
& 0xffff;
3397 m
->m_pkthdr
.csum_flags
|=
3398 (CSUM_DATA_VALID
| CSUM_PARTIAL
);
3399 m
->m_pkthdr
.csum_tx_stuff
= (ulpoff
+ start
);
3400 m
->m_pkthdr
.csum_tx_start
= start
;
3401 /* do IP hdr chksum in software */
3402 *sw_csum
= CSUM_DELAY_IP
;
3404 *sw_csum
|= (CSUM_DELAY_DATA
& m
->m_pkthdr
.csum_flags
);
3408 if (*sw_csum
& CSUM_DELAY_DATA
) {
3409 in_delayed_cksum(m
);
3410 *sw_csum
&= ~CSUM_DELAY_DATA
;
3415 * Drop off bits that aren't supported by hardware;
3416 * also make sure to preserve non-checksum related bits.
3418 m
->m_pkthdr
.csum_flags
=
3419 ((m
->m_pkthdr
.csum_flags
&
3420 (IF_HWASSIST_CSUM_FLAGS(hwcap
) | CSUM_DATA_VALID
)) |
3421 (m
->m_pkthdr
.csum_flags
& ~IF_HWASSIST_CSUM_MASK
));
3423 /* drop all bits; hardware checksum offload is disabled */
3424 m
->m_pkthdr
.csum_flags
= 0;
3429 * GRE protocol output for PPP/PPTP
3432 ip_gre_output(struct mbuf
*m
)
3437 bzero(&ro
, sizeof (ro
));
3439 error
= ip_output(m
, NULL
, &ro
, 0, NULL
, NULL
);