]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/ip_output.c
xnu-6153.61.1.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
CommitLineData
1c79356b 1/*
cb323159 2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61 */
2d21ac55
A
62/*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
1c79356b 68
0a7de745 69#define _IP_VHL
1c79356b 70
1c79356b
A
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/kernel.h>
74#include <sys/malloc.h>
75#include <sys/mbuf.h>
76#include <sys/protosw.h>
77#include <sys/socket.h>
78#include <sys/socketvar.h>
91447636
A
79#include <kern/locks.h>
80#include <sys/sysctl.h>
6d2010ae 81#include <sys/mcache.h>
39236c6e 82#include <sys/kdebug.h>
1c79356b 83
b0d623f7 84#include <machine/endian.h>
6d2010ae 85#include <pexpert/pexpert.h>
39236c6e
A
86#include <mach/sdt.h>
87
88#include <libkern/OSAtomic.h>
89#include <libkern/OSByteOrder.h>
b0d623f7 90
1c79356b 91#include <net/if.h>
c910b4d9 92#include <net/if_dl.h>
6d2010ae 93#include <net/if_types.h>
1c79356b 94#include <net/route.h>
6d2010ae
A
95#include <net/ntstat.h>
96#include <net/net_osdep.h>
39236c6e 97#include <net/dlil.h>
3e170ce0 98#include <net/net_perf.h>
1c79356b
A
99
100#include <netinet/in.h>
101#include <netinet/in_systm.h>
102#include <netinet/ip.h>
1c79356b
A
103#include <netinet/in_pcb.h>
104#include <netinet/in_var.h>
105#include <netinet/ip_var.h>
91447636 106#include <netinet/kpi_ipfilter_var.h>
39037602 107#include <netinet/in_tclass.h>
d9a64523
A
108#include <netinet/udp.h>
109
110#include <netinet6/nd6.h>
91447636 111
2d21ac55
A
112#if CONFIG_MACF_NET
113#include <security/mac_framework.h>
39236c6e 114#endif /* CONFIG_MACF_NET */
1c79356b 115
0a7de745
A
116#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
117#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
118#define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
119#define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
1c79356b 120
1c79356b
A
121#if IPSEC
122#include <netinet6/ipsec.h>
123#include <netkey/key.h>
9bccf70c 124#if IPSEC_DEBUG
1c79356b 125#include <netkey/key_debug.h>
1c79356b 126#else
0a7de745 127#define KEYDEBUG(lev, arg)
1c79356b 128#endif
39236c6e 129#endif /* IPSEC */
1c79356b 130
fe8ab488
A
131#if NECP
132#include <net/necp.h>
133#endif /* NECP */
134
39236c6e 135#if IPFIREWALL
1c79356b 136#include <netinet/ip_fw.h>
39236c6e 137#if IPDIVERT
91447636 138#include <netinet/ip_divert.h>
39236c6e
A
139#endif /* IPDIVERT */
140#endif /* IPFIREWALL */
1c79356b
A
141
142#if DUMMYNET
143#include <netinet/ip_dummynet.h>
144#endif
145
b0d623f7
A
146#if PF
147#include <net/pfvar.h>
148#endif /* PF */
149
39236c6e 150#if IPFIREWALL_FORWARD && IPFIREWALL_FORWARD_DEBUG
0a7de745
A
151#define print_ip(a) \
152 printf("%ld.%ld.%ld.%ld", (ntohl(a.s_addr) >> 24) & 0xFF, \
153 (ntohl(a.s_addr) >> 16) & 0xFF, \
154 (ntohl(a.s_addr) >> 8) & 0xFF, \
39236c6e
A
155 (ntohl(a.s_addr)) & 0xFF);
156#endif /* IPFIREWALL_FORWARD && IPFIREWALL_FORWARD_DEBUG */
1c79356b
A
157
158u_short ip_id;
159
3e170ce0
A
160static int sysctl_reset_ip_output_stats SYSCTL_HANDLER_ARGS;
161static int sysctl_ip_output_measure_bins SYSCTL_HANDLER_ARGS;
162static int sysctl_ip_output_getperf SYSCTL_HANDLER_ARGS;
2d21ac55 163static void ip_out_cksum_stats(int, u_int32_t);
39236c6e
A
164static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
165static int ip_optcopy(struct ip *, struct ip *);
166static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
167static void imo_trace(struct ip_moptions *, int);
168static void ip_mloopback(struct ifnet *, struct ifnet *, struct mbuf *,
169 struct sockaddr_in *, int);
c910b4d9 170static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int);
2d21ac55 171
9bccf70c
A
172extern struct ip_linklocal_stat ip_linklocal_stat;
173
174/* temporary: for testing */
175#if IPSEC
176extern int ipsec_bypass;
177#endif
178
39236c6e
A
179static int ip_maxchainsent = 0;
180SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent,
0a7de745
A
181 CTLFLAG_RW | CTLFLAG_LOCKED, &ip_maxchainsent, 0,
182 "use dlil_output_list");
2d21ac55
A
183#if DEBUG
184static int forge_ce = 0;
39236c6e 185SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce,
0a7de745
A
186 CTLFLAG_RW | CTLFLAG_LOCKED, &forge_ce, 0,
187 "Forge ECN CE");
2d21ac55 188#endif /* DEBUG */
c910b4d9
A
189
190static int ip_select_srcif_debug = 0;
39236c6e 191SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug,
0a7de745
A
192 CTLFLAG_RW | CTLFLAG_LOCKED, &ip_select_srcif_debug, 0,
193 "log source interface selection debug info");
c910b4d9 194
3e170ce0
A
195static int ip_output_measure = 0;
196SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf,
0a7de745
A
197 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
198 &ip_output_measure, 0, sysctl_reset_ip_output_stats, "I",
199 "Do time measurement");
3e170ce0
A
200
201static uint64_t ip_output_measure_bins = 0;
202SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf_bins,
0a7de745
A
203 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip_output_measure_bins, 0,
204 sysctl_ip_output_measure_bins, "I",
205 "bins for chaining performance data histogram");
3e170ce0
A
206
207static net_perf_t net_perf;
208SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf_data,
0a7de745
A
209 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
210 0, 0, sysctl_ip_output_getperf, "S,net_perf",
211 "IP output performance data (struct net_perf, net/net_perf.h)");
3e170ce0 212
5ba3f43e
A
213__private_extern__ int rfc6864 = 1;
214SYSCTL_INT(_net_inet_ip, OID_AUTO, rfc6864, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 215 &rfc6864, 0, "updated ip id field behavior");
5ba3f43e 216
0a7de745 217#define IMO_TRACE_HIST_SIZE 32 /* size of trace history */
6d2010ae
A
218
219/* For gdb */
220__private_extern__ unsigned int imo_trace_hist_size = IMO_TRACE_HIST_SIZE;
221
222struct ip_moptions_dbg {
0a7de745
A
223 struct ip_moptions imo; /* ip_moptions */
224 u_int16_t imo_refhold_cnt; /* # of IMO_ADDREF */
225 u_int16_t imo_refrele_cnt; /* # of IMO_REMREF */
6d2010ae
A
226 /*
227 * Alloc and free callers.
228 */
0a7de745
A
229 ctrace_t imo_alloc;
230 ctrace_t imo_free;
6d2010ae
A
231 /*
232 * Circular lists of IMO_ADDREF and IMO_REMREF callers.
233 */
0a7de745
A
234 ctrace_t imo_refhold[IMO_TRACE_HIST_SIZE];
235 ctrace_t imo_refrele[IMO_TRACE_HIST_SIZE];
6d2010ae
A
236};
237
238#if DEBUG
0a7de745 239static unsigned int imo_debug = 1; /* debugging (enabled) */
6d2010ae 240#else
0a7de745 241static unsigned int imo_debug; /* debugging (disabled) */
6d2010ae 242#endif /* !DEBUG */
0a7de745
A
243static unsigned int imo_size; /* size of zone element */
244static struct zone *imo_zone; /* zone for ip_moptions */
6d2010ae 245
0a7de745
A
246#define IMO_ZONE_MAX 64 /* maximum elements in zone */
247#define IMO_ZONE_NAME "ip_moptions" /* zone name */
6d2010ae 248
1c79356b
A
249/*
250 * IP output. The packet in mbuf chain m contains a skeletal IP
251 * header (with len, off, ttl, proto, tos, src, dst).
252 * The mbuf chain containing the packet will be freed.
253 * The mbuf opt, if present, will not be freed.
254 */
255int
39236c6e
A
256ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags,
257 struct ip_moptions *imo, struct ip_out_args *ipoa)
91447636 258{
0a7de745 259 return ip_output_list(m0, 0, opt, ro, flags, imo, ipoa);
91447636
A
260}
261
2d21ac55 262/*
39236c6e
A
263 * IP output. The packet in mbuf chain m contains a skeletal IP
264 * header (with len, off, ttl, proto, tos, src, dst).
265 * The mbuf chain containing the packet will be freed.
266 * The mbuf opt, if present, will not be freed.
267 *
268 * Route ro MUST be non-NULL; if ro->ro_rt is valid, route lookup would be
269 * skipped and ro->ro_rt would be used. Otherwise the result of route
270 * lookup is stored in ro->ro_rt.
2d21ac55 271 *
39236c6e
A
272 * In the IP forwarding case, the packet will arrive with options already
273 * inserted, so must have a NULL opt pointer.
2d21ac55 274 */
91447636 275int
39236c6e
A
276ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt,
277 struct route *ro, int flags, struct ip_moptions *imo,
278 struct ip_out_args *ipoa)
1c79356b 279{
b0d623f7 280 struct ip *ip;
0a7de745 281 struct ifnet *ifp = NULL; /* not refcnt'd */
316670eb 282 struct mbuf *m = m0, *prevnxt = NULL, **mppn = &prevnxt;
0a7de745 283 int hlen = sizeof(struct ip);
6d2010ae 284 int len = 0, error = 0;
55e303ae 285 struct sockaddr_in *dst = NULL;
b0d623f7 286 struct in_ifaddr *ia = NULL, *src_ia = NULL;
91447636 287 struct in_addr pkt_dst;
39236c6e
A
288 struct ipf_pktopts *ippo = NULL;
289 ipfilter_t inject_filter_ref = NULL;
290 struct mbuf *packetlist;
291 uint32_t sw_csum, pktcnt = 0, scnt = 0, bytecnt = 0;
3e170ce0 292 uint32_t packets_processed = 0;
39236c6e
A
293 unsigned int ifscope = IFSCOPE_NONE;
294 struct flowadv *adv = NULL;
3e170ce0 295 struct timeval start_tv;
1c79356b 296#if IPSEC
9bccf70c 297 struct socket *so = NULL;
1c79356b 298 struct secpolicy *sp = NULL;
39236c6e 299#endif /* IPSEC */
fe8ab488
A
300#if NECP
301 necp_kernel_policy_result necp_result = 0;
302 necp_kernel_policy_result_parameter necp_result_parameter;
303 necp_kernel_policy_id necp_matched_policy_id = 0;
304#endif /* NECP */
4a3eedf9 305#if IPFIREWALL
39236c6e 306 int ipfwoff;
316670eb 307 struct sockaddr_in *next_hop_from_ipfwd_tag = NULL;
39236c6e 308#endif /* IPFIREWALL */
316670eb 309#if IPFIREWALL || DUMMYNET
39236c6e
A
310 struct m_tag *tag;
311#endif /* IPFIREWALL || DUMMYNET */
6d2010ae 312#if DUMMYNET
c910b4d9 313 struct ip_out_args saved_ipoa;
6d2010ae
A
314 struct sockaddr_in dst_buf;
315#endif /* DUMMYNET */
39236c6e 316 struct {
ebb1b9f4 317#if IPSEC
39236c6e 318 struct ipsec_output_state ipsec_state;
ebb1b9f4 319#endif /* IPSEC */
fe8ab488
A
320#if NECP
321 struct route necp_route;
322#endif /* NECP */
39236c6e
A
323#if IPFIREWALL || DUMMYNET
324 struct ip_fw_args args;
325#endif /* IPFIREWALL || DUMMYNET */
326#if IPFIREWALL_FORWARD
327 struct route sro_fwd;
328#endif /* IPFIREWALL_FORWARD */
329#if DUMMYNET
330 struct route saved_route;
331#endif /* DUMMYNET */
332 struct ipf_pktopts ipf_pktopts;
333 } ipobz;
0a7de745
A
334#define ipsec_state ipobz.ipsec_state
335#define necp_route ipobz.necp_route
336#define args ipobz.args
337#define sro_fwd ipobz.sro_fwd
338#define saved_route ipobz.saved_route
339#define ipf_pktopts ipobz.ipf_pktopts
39236c6e
A
340 union {
341 struct {
0a7de745
A
342 boolean_t select_srcif : 1; /* set once */
343 boolean_t srcbound : 1; /* set once */
344 boolean_t nocell : 1; /* set once */
39236c6e
A
345 boolean_t isbroadcast : 1;
346 boolean_t didfilter : 1;
0a7de745 347 boolean_t noexpensive : 1; /* set once */
cb323159 348 boolean_t noconstrained : 1; /* set once */
0a7de745 349 boolean_t awdl_unrestricted : 1; /* set once */
39236c6e
A
350#if IPFIREWALL_FORWARD
351 boolean_t fwd_rewrite_src : 1;
352#endif /* IPFIREWALL_FORWARD */
353 };
354 uint32_t raw;
355 } ipobf = { .raw = 0 };
356
d9a64523
A
357 int interface_mtu = 0;
358
39037602
A
359/*
360 * Here we check for restrictions when sending frames.
361 * N.B.: IPv4 over internal co-processor interfaces is not allowed.
362 */
0a7de745
A
363#define IP_CHECK_RESTRICTIONS(_ifp, _ipobf) \
364 (((_ipobf).nocell && IFNET_IS_CELLULAR(_ifp)) || \
365 ((_ipobf).noexpensive && IFNET_IS_EXPENSIVE(_ifp)) || \
cb323159
A
366 ((_ipobf).noconstrained && IFNET_IS_CONSTRAINED(_ifp)) || \
367 (IFNET_IS_INTCOPROC(_ifp)) || \
fe8ab488
A
368 (!(_ipobf).awdl_unrestricted && IFNET_IS_AWDL_RESTRICTED(_ifp)))
369
0a7de745 370 if (ip_output_measure) {
3e170ce0 371 net_perf_start_time(&net_perf, &start_tv);
0a7de745 372 }
39236c6e 373 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
ebb1b9f4 374
39236c6e 375 VERIFY(m0->m_flags & M_PKTHDR);
91447636 376 packetlist = m0;
b0d623f7 377
39236c6e 378 /* zero out {ipsec_state, args, sro_fwd, saved_route, ipf_pktops} */
0a7de745 379 bzero(&ipobz, sizeof(ipobz));
39236c6e
A
380 ippo = &ipf_pktopts;
381
382#if IPFIREWALL || DUMMYNET
0a7de745 383 if (SLIST_EMPTY(&m0->m_pkthdr.tags)) {
b0d623f7 384 goto ipfw_tags_done;
0a7de745 385 }
b0d623f7 386
91447636
A
387 /* Grab info from mtags prepended to the chain */
388#if DUMMYNET
b0d623f7
A
389 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
390 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
0a7de745 391 struct dn_pkt_tag *dn_tag;
b0d623f7 392
0a7de745 393 dn_tag = (struct dn_pkt_tag *)(tag + 1);
316670eb
A
394 args.fwa_ipfw_rule = dn_tag->dn_ipfw_rule;
395 args.fwa_pf_rule = dn_tag->dn_pf_rule;
91447636 396 opt = NULL;
316670eb 397 saved_route = dn_tag->dn_ro;
2d21ac55 398 ro = &saved_route;
b0d623f7 399
91447636 400 imo = NULL;
0a7de745 401 bcopy(&dn_tag->dn_dst, &dst_buf, sizeof(dst_buf));
6d2010ae 402 dst = &dst_buf;
316670eb
A
403 ifp = dn_tag->dn_ifp;
404 flags = dn_tag->dn_flags;
405 if ((dn_tag->dn_flags & IP_OUTARGS)) {
406 saved_ipoa = dn_tag->dn_ipoa;
407 ipoa = &saved_ipoa;
408 }
b0d623f7 409
91447636
A
410 m_tag_delete(m0, tag);
411 }
412#endif /* DUMMYNET */
413
2d21ac55 414#if IPDIVERT
b0d623f7
A
415 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
416 KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
0a7de745 417 struct divert_tag *div_tag;
b0d623f7 418
0a7de745 419 div_tag = (struct divert_tag *)(tag + 1);
316670eb 420 args.fwa_divert_rule = div_tag->cookie;
91447636
A
421
422 m_tag_delete(m0, tag);
423 }
2d21ac55 424#endif /* IPDIVERT */
2d21ac55 425
316670eb 426#if IPFIREWALL
b0d623f7
A
427 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
428 KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
0a7de745 429 struct ip_fwd_tag *ipfwd_tag;
b0d623f7 430
0a7de745 431 ipfwd_tag = (struct ip_fwd_tag *)(tag + 1);
6d2010ae 432 next_hop_from_ipfwd_tag = ipfwd_tag->next_hop;
316670eb 433
91447636
A
434 m_tag_delete(m0, tag);
435 }
4a3eedf9 436#endif /* IPFIREWALL */
91447636 437
316670eb
A
438ipfw_tags_done:
439#endif /* IPFIREWALL || DUMMYNET */
440
91447636 441 m = m0;
0a7de745 442 m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP | PKTF_IFAINFO);
b0d623f7 443
39236c6e
A
444#if IPSEC
445 if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) {
446 /* If packet is bound to an interface, check bound policies */
447 if ((flags & IP_OUTARGS) && (ipoa != NULL) &&
0a7de745
A
448 (ipoa->ipoa_flags & IPOAF_BOUND_IF) &&
449 ipoa->ipoa_boundif != IFSCOPE_NONE) {
39236c6e 450 if (ipsec4_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND,
0a7de745 451 &flags, ipoa, &sp) != 0) {
39236c6e 452 goto bad;
0a7de745 453 }
39236c6e
A
454 }
455 }
456#endif /* IPSEC */
39037602 457
39236c6e 458 VERIFY(ro != NULL);
6d2010ae 459
39037602 460 if (flags & IP_OUTARGS) {
316670eb
A
461 /*
462 * In the forwarding case, only the ifscope value is used,
463 * as source interface selection doesn't take place.
464 */
39236c6e 465 if ((ipobf.select_srcif = (!(flags & IP_FORWARDING) &&
316670eb
A
466 (ipoa->ipoa_flags & IPOAF_SELECT_SRCIF)))) {
467 ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
468 }
469
470 if ((ipoa->ipoa_flags & IPOAF_BOUND_IF) &&
471 ipoa->ipoa_boundif != IFSCOPE_NONE) {
472 ifscope = ipoa->ipoa_boundif;
473 ipf_pktopts.ippo_flags |=
474 (IPPOF_BOUND_IF | (ifscope << IPPOF_SHIFT_IFSCOPE));
475 }
476
39236c6e
A
477 /* double negation needed for bool bit field */
478 ipobf.srcbound = !!(ipoa->ipoa_flags & IPOAF_BOUND_SRCADDR);
0a7de745 479 if (ipobf.srcbound) {
316670eb 480 ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
0a7de745 481 }
c910b4d9 482 } else {
39236c6e
A
483 ipobf.select_srcif = FALSE;
484 ipobf.srcbound = FALSE;
c910b4d9 485 ifscope = IFSCOPE_NONE;
39236c6e
A
486 if (flags & IP_OUTARGS) {
487 ipoa->ipoa_boundif = IFSCOPE_NONE;
488 ipoa->ipoa_flags &= ~(IPOAF_SELECT_SRCIF |
489 IPOAF_BOUND_IF | IPOAF_BOUND_SRCADDR);
490 }
c910b4d9
A
491 }
492
6d2010ae 493 if (flags & IP_OUTARGS) {
fe8ab488
A
494 if (ipoa->ipoa_flags & IPOAF_NO_CELLULAR) {
495 ipobf.nocell = TRUE;
496 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
497 }
498 if (ipoa->ipoa_flags & IPOAF_NO_EXPENSIVE) {
499 ipobf.noexpensive = TRUE;
500 ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_EXPENSIVE;
501 }
cb323159
A
502 if (ipoa->ipoa_flags & IPOAF_NO_CONSTRAINED) {
503 ipobf.noconstrained = TRUE;
504 ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_CONSTRAINED;
505 }
0a7de745 506 if (ipoa->ipoa_flags & IPOAF_AWDL_UNRESTRICTED) {
fe8ab488 507 ipobf.awdl_unrestricted = TRUE;
0a7de745 508 }
316670eb
A
509 adv = &ipoa->ipoa_flowadv;
510 adv->code = FADV_SUCCESS;
39236c6e 511 ipoa->ipoa_retflags = 0;
6d2010ae 512 }
39037602 513
fe8ab488
A
514#if IPSEC
515 if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) {
516 so = ipsec_getsocket(m);
517 if (so != NULL) {
518 (void) ipsec_setsocket(m, NULL);
519 }
520 }
521#endif /* IPSEC */
6d2010ae 522
316670eb
A
523#if DUMMYNET
524 if (args.fwa_ipfw_rule != NULL || args.fwa_pf_rule != NULL) {
525 /* dummynet already saw us */
b0d623f7 526 ip = mtod(m, struct ip *);
316670eb
A
527 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
528 pkt_dst = ip->ip_dst;
b0d623f7
A
529 if (ro->ro_rt != NULL) {
530 RT_LOCK_SPIN(ro->ro_rt);
531 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
6d2010ae
A
532 if (ia) {
533 /* Become a regular mutex */
534 RT_CONVERT_LOCK(ro->ro_rt);
535 IFA_ADDREF(&ia->ia_ifa);
536 }
b0d623f7
A
537 RT_UNLOCK(ro->ro_rt);
538 }
fe8ab488 539
39236c6e 540#if IPFIREWALL
0a7de745 541 if (args.fwa_ipfw_rule != NULL) {
316670eb 542 goto skip_ipsec;
0a7de745 543 }
39236c6e 544#endif /* IPFIREWALL */
0a7de745 545 if (args.fwa_pf_rule != NULL) {
316670eb 546 goto sendit;
0a7de745 547 }
91447636 548 }
316670eb 549#endif /* DUMMYNET */
91447636 550
39236c6e 551loopit:
3e170ce0 552 packets_processed++;
39236c6e
A
553 ipobf.isbroadcast = FALSE;
554 ipobf.didfilter = FALSE;
555#if IPFIREWALL_FORWARD
556 ipobf.fwd_rewrite_src = FALSE;
557#endif /* IPFIREWALL_FORWARD */
558
559 VERIFY(m->m_flags & M_PKTHDR);
91447636 560 /*
39236c6e 561 * No need to proccess packet twice if we've already seen it.
91447636 562 */
0a7de745 563 if (!SLIST_EMPTY(&m->m_pkthdr.tags)) {
b0d623f7 564 inject_filter_ref = ipf_get_inject_filter(m);
0a7de745 565 } else {
39236c6e 566 inject_filter_ref = NULL;
0a7de745 567 }
1c79356b 568
1c79356b
A
569 if (opt) {
570 m = ip_insertoptions(m, opt, &len);
571 hlen = len;
316670eb
A
572 /* Update the chain */
573 if (m != m0) {
0a7de745 574 if (m0 == packetlist) {
316670eb 575 packetlist = m;
0a7de745 576 }
316670eb
A
577 m0 = m;
578 }
1c79356b
A
579 }
580 ip = mtod(m, struct ip *);
39236c6e 581
4a3eedf9 582#if IPFIREWALL
6d2010ae
A
583 /*
584 * rdar://8542331
585 *
39236c6e
A
586 * When dealing with a packet chain, we need to reset "next_hop"
587 * because "dst" may have been changed to the gateway address below
588 * for the previous packet of the chain. This could cause the route
589 * to be inavertandly changed to the route to the gateway address
590 * (instead of the route to the destination).
6d2010ae 591 */
316670eb
A
592 args.fwa_next_hop = next_hop_from_ipfwd_tag;
593 pkt_dst = args.fwa_next_hop ? args.fwa_next_hop->sin_addr : ip->ip_dst;
39236c6e 594#else /* !IPFIREWALL */
4a3eedf9 595 pkt_dst = ip->ip_dst;
39236c6e 596#endif /* !IPFIREWALL */
91447636 597
6d2010ae
A
598 /*
599 * We must not send if the packet is destined to network zero.
600 * RFC1122 3.2.1.3 (a) and (b).
601 */
602 if (IN_ZERONET(ntohl(pkt_dst.s_addr))) {
603 error = EHOSTUNREACH;
604 goto bad;
605 }
606
1c79356b
A
607 /*
608 * Fill in IP header.
609 */
0a7de745 610 if (!(flags & (IP_FORWARDING | IP_RAWOUTPUT))) {
1c79356b
A
611 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
612 ip->ip_off &= IP_DF;
5ba3f43e
A
613 if (rfc6864 && IP_OFF_IS_ATOMIC(ip->ip_off)) {
614 // Per RFC6864, value of ip_id is undefined for atomic ip packets
615 ip->ip_id = 0;
616 } else {
617 ip->ip_id = ip_randomid();
618 }
b0d623f7 619 OSAddAtomic(1, &ipstat.ips_localout);
1c79356b
A
620 } else {
621 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
622 }
316670eb 623
2d21ac55
A
624#if DEBUG
625 /* For debugging, we let the stack forge congestion */
626 if (forge_ce != 0 &&
39236c6e
A
627 ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 ||
628 (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) {
2d21ac55
A
629 ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE;
630 forge_ce--;
631 }
632#endif /* DEBUG */
1c79356b 633
39236c6e
A
634 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr, ip->ip_src.s_addr,
635 ip->ip_p, ip->ip_off, ip->ip_len);
316670eb 636
39236c6e 637 dst = SIN(&ro->ro_dst);
55e303ae 638
1c79356b
A
639 /*
640 * If there is a cached route,
641 * check that it is to the same destination
642 * and is still up. If not, free it and try again.
55e303ae
A
643 * The address family should also be checked in case of sharing the
644 * cache with IPv6.
1c79356b 645 */
55e303ae 646
2d21ac55 647 if (ro->ro_rt != NULL) {
39236c6e
A
648 if (ROUTE_UNUSABLE(ro) && ip->ip_src.s_addr != INADDR_ANY &&
649 !(flags & (IP_ROUTETOIF | IP_FORWARDING))) {
b0d623f7
A
650 src_ia = ifa_foraddr(ip->ip_src.s_addr);
651 if (src_ia == NULL) {
652 error = EADDRNOTAVAIL;
653 goto bad;
654 }
6d2010ae 655 IFA_REMREF(&src_ia->ia_ifa);
39236c6e 656 src_ia = NULL;
91447636 657 }
b0d623f7
A
658 /*
659 * Test rt_flags without holding rt_lock for performance
660 * reasons; if the route is down it will hopefully be
661 * caught by the layer below (since it uses this route
662 * as a hint) or during the next transmit.
663 */
39236c6e 664 if (ROUTE_UNUSABLE(ro) || dst->sin_family != AF_INET ||
0a7de745 665 dst->sin_addr.s_addr != pkt_dst.s_addr) {
39236c6e 666 ROUTE_RELEASE(ro);
0a7de745 667 }
39236c6e 668
c910b4d9
A
669 /*
670 * If we're doing source interface selection, we may not
671 * want to use this route; only synch up the generation
672 * count otherwise.
673 */
39236c6e 674 if (!ipobf.select_srcif && ro->ro_rt != NULL &&
0a7de745 675 RT_GENID_OUTOFSYNC(ro->ro_rt)) {
39236c6e 676 RT_GENID_SYNC(ro->ro_rt);
0a7de745 677 }
ab86ba33 678 }
2d21ac55 679 if (ro->ro_rt == NULL) {
0a7de745 680 bzero(dst, sizeof(*dst));
1c79356b 681 dst->sin_family = AF_INET;
0a7de745 682 dst->sin_len = sizeof(*dst);
91447636 683 dst->sin_addr = pkt_dst;
1c79356b
A
684 }
685 /*
686 * If routing to interface only,
687 * short circuit routing lookup.
688 */
1c79356b 689 if (flags & IP_ROUTETOIF) {
0a7de745 690 if (ia != NULL) {
6d2010ae 691 IFA_REMREF(&ia->ia_ifa);
0a7de745 692 }
39236c6e
A
693 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
694 ia = ifatoia(ifa_ifwithnet(sintosa(dst)));
695 if (ia == NULL) {
b0d623f7 696 OSAddAtomic(1, &ipstat.ips_noroute);
91447636 697 error = ENETUNREACH;
39037602 698 /* XXX IPv6 APN fallback notification?? */
91447636
A
699 goto bad;
700 }
1c79356b
A
701 }
702 ifp = ia->ia_ifp;
1c79356b 703 ip->ip_ttl = 1;
39236c6e
A
704 ipobf.isbroadcast = in_broadcast(dst->sin_addr, ifp);
705 /*
706 * For consistency with other cases below. Loopback
707 * multicast case is handled separately by ip_mloopback().
708 */
709 if ((ifp->if_flags & IFF_LOOPBACK) &&
710 !IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
711 m->m_pkthdr.rcvif = ifp;
712 ip_setsrcifaddr_info(m, ifp->if_index, NULL);
713 ip_setdstifaddr_info(m, ifp->if_index, NULL);
714 }
c910b4d9 715 } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) &&
6d2010ae 716 imo != NULL && (ifp = imo->imo_multicast_ifp) != NULL) {
c910b4d9
A
717 /*
718 * Bypass the normal routing lookup for multicast
719 * packets if the interface is specified.
720 */
39236c6e 721 ipobf.isbroadcast = FALSE;
0a7de745 722 if (ia != NULL) {
6d2010ae 723 IFA_REMREF(&ia->ia_ifa);
0a7de745 724 }
c910b4d9 725
b0d623f7
A
726 /* Macro takes reference on ia */
727 IFP_TO_IA(ifp, ia);
1c79356b 728 } else {
39236c6e 729 struct ifaddr *ia0 = NULL;
c910b4d9
A
730 boolean_t cloneok = FALSE;
731 /*
732 * Perform source interface selection; the source IP address
733 * must belong to one of the addresses of the interface used
734 * by the route. For performance reasons, do this only if
735 * there is no route, or if the routing table has changed,
736 * or if we haven't done source interface selection on this
737 * route (for this PCB instance) before.
738 */
39236c6e
A
739 if (ipobf.select_srcif &&
740 ip->ip_src.s_addr != INADDR_ANY && (ROUTE_UNUSABLE(ro) ||
c910b4d9 741 !(ro->ro_flags & ROF_SRCIF_SELECTED))) {
c910b4d9 742 /* Find the source interface */
39236c6e 743 ia0 = in_selectsrcif(ip, ro, ifscope);
c910b4d9 744
6d2010ae 745 /*
fe8ab488 746 * If the source address belongs to a restricted
39037602 747 * interface and the caller forbids our using
fe8ab488
A
748 * interfaces of such type, pretend that there is no
749 * route.
6d2010ae 750 */
39037602 751 if (ia0 != NULL &&
fe8ab488 752 IP_CHECK_RESTRICTIONS(ia0->ifa_ifp, ipobf)) {
39236c6e
A
753 IFA_REMREF(ia0);
754 ia0 = NULL;
755 error = EHOSTUNREACH;
0a7de745 756 if (flags & IP_OUTARGS) {
39236c6e 757 ipoa->ipoa_retflags |= IPOARF_IFDENIED;
0a7de745 758 }
6d2010ae
A
759 goto bad;
760 }
761
c910b4d9 762 /*
316670eb
A
763 * If the source address is spoofed (in the case of
764 * IP_RAWOUTPUT on an unbounded socket), or if this
765 * is destined for local/loopback, just let it go out
766 * using the interface of the route. Otherwise,
767 * there's no interface having such an address,
768 * so bail out.
c910b4d9 769 */
39236c6e
A
770 if (ia0 == NULL && (!(flags & IP_RAWOUTPUT) ||
771 ipobf.srcbound) && ifscope != lo_ifp->if_index) {
c910b4d9 772 error = EADDRNOTAVAIL;
2d21ac55
A
773 goto bad;
774 }
c910b4d9
A
775
776 /*
777 * If the caller didn't explicitly specify the scope,
778 * pick it up from the source interface. If the cached
779 * route was wrong and was blown away as part of source
780 * interface selection, don't mask out RTF_PRCLONING
781 * since that route may have been allocated by the ULP,
782 * unless the IP header was created by the caller or
783 * the destination is IPv4 LLA. The check for the
784 * latter is needed because IPv4 LLAs are never scoped
785 * in the current implementation, and we don't want to
786 * replace the resolved IPv4 LLA route with one whose
787 * gateway points to that of the default gateway on
788 * the primary interface of the system.
789 */
39236c6e 790 if (ia0 != NULL) {
0a7de745 791 if (ifscope == IFSCOPE_NONE) {
39236c6e 792 ifscope = ia0->ifa_ifp->if_index;
0a7de745 793 }
c910b4d9
A
794 cloneok = (!(flags & IP_RAWOUTPUT) &&
795 !(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))));
796 }
2d21ac55 797 }
c910b4d9 798
1c79356b
A
799 /*
800 * If this is the case, we probably don't want to allocate
801 * a protocol-cloned route since we didn't get one from the
802 * ULP. This lets TCP do its thing, while not burdening
803 * forwarding or ICMP with the overhead of cloning a route.
804 * Of course, we still want to do any cloning requested by
805 * the link layer, as this is probably required in all cases
806 * for correct operation (as it is for ARP).
807 */
c910b4d9 808 if (ro->ro_rt == NULL) {
2d21ac55
A
809 unsigned long ign = RTF_PRCLONING;
810 /*
811 * We make an exception here: if the destination
812 * address is INADDR_BROADCAST, allocate a protocol-
813 * cloned host route so that we end up with a route
814 * marked with the RTF_BROADCAST flag. Otherwise,
815 * we would end up referring to the default route,
816 * instead of creating a cloned host route entry.
817 * That would introduce inconsistencies between ULPs
818 * that allocate a route and those that don't. The
819 * RTF_BROADCAST route is important since we'd want
820 * to send out undirected IP broadcast packets using
c910b4d9
A
821 * link-level broadcast address. Another exception
822 * is for ULP-created routes that got blown away by
823 * source interface selection (see above).
2d21ac55 824 *
c910b4d9 825 * These exceptions will no longer be necessary when
2d21ac55
A
826 * the RTF_PRCLONING scheme is no longer present.
827 */
0a7de745 828 if (cloneok || dst->sin_addr.s_addr == INADDR_BROADCAST) {
2d21ac55 829 ign &= ~RTF_PRCLONING;
0a7de745 830 }
2d21ac55 831
b0d623f7
A
832 /*
833 * Loosen the route lookup criteria if the ifscope
834 * corresponds to the loopback interface; this is
835 * needed to support Application Layer Gateways
836 * listening on loopback, in conjunction with packet
837 * filter redirection rules. The final source IP
838 * address will be rewritten by the packet filter
839 * prior to the RFC1122 loopback check below.
840 */
0a7de745 841 if (ifscope == lo_ifp->if_index) {
b0d623f7 842 rtalloc_ign(ro, ign);
0a7de745 843 } else {
b0d623f7 844 rtalloc_scoped_ign(ro, ign, ifscope);
0a7de745 845 }
6d2010ae
A
846
847 /*
39037602 848 * If the route points to a cellular/expensive interface
fe8ab488 849 * and the caller forbids our using interfaces of such type,
6d2010ae
A
850 * pretend that there is no route.
851 */
fe8ab488 852 if (ro->ro_rt != NULL) {
6d2010ae 853 RT_LOCK_SPIN(ro->ro_rt);
fe8ab488
A
854 if (IP_CHECK_RESTRICTIONS(ro->ro_rt->rt_ifp,
855 ipobf)) {
6d2010ae 856 RT_UNLOCK(ro->ro_rt);
39236c6e
A
857 ROUTE_RELEASE(ro);
858 if (flags & IP_OUTARGS) {
859 ipoa->ipoa_retflags |=
860 IPOARF_IFDENIED;
861 }
6d2010ae
A
862 } else {
863 RT_UNLOCK(ro->ro_rt);
864 }
865 }
2d21ac55 866 }
c910b4d9
A
867
868 if (ro->ro_rt == NULL) {
b0d623f7 869 OSAddAtomic(1, &ipstat.ips_noroute);
1c79356b 870 error = EHOSTUNREACH;
39236c6e
A
871 if (ia0 != NULL) {
872 IFA_REMREF(ia0);
873 ia0 = NULL;
874 }
1c79356b
A
875 goto bad;
876 }
c910b4d9 877
0a7de745 878 if (ia != NULL) {
6d2010ae 879 IFA_REMREF(&ia->ia_ifa);
0a7de745 880 }
b0d623f7 881 RT_LOCK_SPIN(ro->ro_rt);
1c79356b 882 ia = ifatoia(ro->ro_rt->rt_ifa);
39236c6e 883 if (ia != NULL) {
6d2010ae
A
884 /* Become a regular mutex */
885 RT_CONVERT_LOCK(ro->ro_rt);
886 IFA_ADDREF(&ia->ia_ifa);
887 }
39236c6e
A
888 /*
889 * Note: ia_ifp may not be the same as rt_ifp; the latter
890 * is what we use for determining outbound i/f, mtu, etc.
891 */
1c79356b 892 ifp = ro->ro_rt->rt_ifp;
1c79356b 893 ro->ro_rt->rt_use++;
316670eb 894 if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
39236c6e 895 dst = SIN(ro->ro_rt->rt_gateway);
316670eb 896 }
7e4a7d39 897 if (ro->ro_rt->rt_flags & RTF_HOST) {
39236c6e
A
898 /* double negation needed for bool bit field */
899 ipobf.isbroadcast =
900 !!(ro->ro_rt->rt_flags & RTF_BROADCAST);
7e4a7d39
A
901 } else {
902 /* Become a regular mutex */
903 RT_CONVERT_LOCK(ro->ro_rt);
39236c6e
A
904 ipobf.isbroadcast = in_broadcast(dst->sin_addr, ifp);
905 }
906 /*
907 * For consistency with IPv6, as well as to ensure that
908 * IP_RECVIF is set correctly for packets that are sent
909 * to one of the local addresses. ia (rt_ifa) would have
910 * been fixed up by rt_setif for local routes. This
911 * would make it appear as if the packet arrives on the
912 * interface which owns the local address. Loopback
913 * multicast case is handled separately by ip_mloopback().
914 */
915 if (ia != NULL && (ifp->if_flags & IFF_LOOPBACK) &&
916 !IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
917 uint32_t srcidx;
918
919 m->m_pkthdr.rcvif = ia->ia_ifa.ifa_ifp;
920
0a7de745 921 if (ia0 != NULL) {
39236c6e 922 srcidx = ia0->ifa_ifp->if_index;
0a7de745
A
923 } else if ((ro->ro_flags & ROF_SRCIF_SELECTED) &&
924 ro->ro_srcia != NULL) {
39236c6e 925 srcidx = ro->ro_srcia->ifa_ifp->if_index;
0a7de745 926 } else {
39236c6e 927 srcidx = 0;
0a7de745 928 }
39236c6e
A
929
930 ip_setsrcifaddr_info(m, srcidx, NULL);
931 ip_setdstifaddr_info(m, 0, ia);
7e4a7d39 932 }
b0d623f7 933 RT_UNLOCK(ro->ro_rt);
39236c6e
A
934 if (ia0 != NULL) {
935 IFA_REMREF(ia0);
936 ia0 = NULL;
937 }
1c79356b 938 }
b0d623f7 939
91447636 940 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
39236c6e 941 struct ifnet *srcifp = NULL;
1c79356b 942 struct in_multi *inm;
5ba3f43e 943 u_int32_t vif = 0;
6d2010ae
A
944 u_int8_t ttl = IP_DEFAULT_MULTICAST_TTL;
945 u_int8_t loop = IP_DEFAULT_MULTICAST_LOOP;
1c79356b
A
946
947 m->m_flags |= M_MCAST;
948 /*
949 * IP destination address is multicast. Make sure "dst"
950 * still points to the address in "ro". (It may have been
951 * changed to point to a gateway address, above.)
952 */
39236c6e 953 dst = SIN(&ro->ro_dst);
1c79356b
A
954 /*
955 * See if the caller provided any multicast options
956 */
957 if (imo != NULL) {
6d2010ae
A
958 IMO_LOCK(imo);
959 vif = imo->imo_multicast_vif;
960 ttl = imo->imo_multicast_ttl;
961 loop = imo->imo_multicast_loop;
0a7de745 962 if (!(flags & IP_RAWOUTPUT)) {
6d2010ae 963 ip->ip_ttl = ttl;
0a7de745
A
964 }
965 if (imo->imo_multicast_ifp != NULL) {
1c79356b 966 ifp = imo->imo_multicast_ifp;
0a7de745 967 }
6d2010ae 968 IMO_UNLOCK(imo);
39236c6e 969 } else if (!(flags & IP_RAWOUTPUT)) {
6d2010ae
A
970 vif = -1;
971 ip->ip_ttl = ttl;
972 }
1c79356b
A
973 /*
974 * Confirm that the outgoing interface supports multicast.
975 */
6d2010ae 976 if (imo == NULL || vif == -1) {
39236c6e 977 if (!(ifp->if_flags & IFF_MULTICAST)) {
b0d623f7 978 OSAddAtomic(1, &ipstat.ips_noroute);
1c79356b
A
979 error = ENETUNREACH;
980 goto bad;
981 }
982 }
983 /*
984 * If source address not specified yet, use address
985 * of outgoing interface.
986 */
987 if (ip->ip_src.s_addr == INADDR_ANY) {
b0d623f7
A
988 struct in_ifaddr *ia1;
989 lck_rw_lock_shared(in_ifaddr_rwlock);
6d2010ae
A
990 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link) {
991 IFA_LOCK_SPIN(&ia1->ia_ifa);
1c79356b
A
992 if (ia1->ia_ifp == ifp) {
993 ip->ip_src = IA_SIN(ia1)->sin_addr;
39236c6e 994 srcifp = ifp;
6d2010ae 995 IFA_UNLOCK(&ia1->ia_ifa);
1c79356b
A
996 break;
997 }
6d2010ae
A
998 IFA_UNLOCK(&ia1->ia_ifa);
999 }
b0d623f7 1000 lck_rw_done(in_ifaddr_rwlock);
55e303ae
A
1001 if (ip->ip_src.s_addr == INADDR_ANY) {
1002 error = ENETUNREACH;
1003 goto bad;
1004 }
1c79356b
A
1005 }
1006
6d2010ae
A
1007 in_multihead_lock_shared();
1008 IN_LOOKUP_MULTI(&pkt_dst, ifp, inm);
1009 in_multihead_lock_done();
1010 if (inm != NULL && (imo == NULL || loop)) {
1c79356b
A
1011 /*
1012 * If we belong to the destination multicast group
1013 * on the outgoing interface, and the caller did not
1014 * forbid loopback, loop back a copy.
1015 */
cb323159
A
1016 if (!TAILQ_EMPTY(&ipv4_filters)
1017#if NECP
1018 && !necp_packet_should_skip_filters(m)
1019#endif // NECP
1020 ) {
0a7de745 1021 struct ipfilter *filter;
39236c6e 1022 int seen = (inject_filter_ref == NULL);
91447636 1023
6d2010ae 1024 if (imo != NULL) {
39236c6e
A
1025 ipf_pktopts.ippo_flags |=
1026 IPPOF_MCAST_OPTS;
6d2010ae
A
1027 ipf_pktopts.ippo_mcast_ifnet = ifp;
1028 ipf_pktopts.ippo_mcast_ttl = ttl;
1029 ipf_pktopts.ippo_mcast_loop = loop;
91447636 1030 }
6d2010ae 1031
91447636 1032 ipf_ref();
6d2010ae 1033
39236c6e
A
1034 /*
1035 * 4135317 - always pass network byte
1036 * order to filter
1037 */
b0d623f7 1038#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1039 HTONS(ip->ip_len);
1040 HTONS(ip->ip_off);
b0d623f7 1041#endif
91447636
A
1042 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
1043 if (seen == 0) {
39236c6e 1044 if ((struct ipfilter *)
0a7de745 1045 inject_filter_ref == filter) {
91447636 1046 seen = 1;
0a7de745 1047 }
39236c6e
A
1048 } else if (filter->ipf_filter.
1049 ipf_output != NULL) {
91447636 1050 errno_t result;
39236c6e
A
1051 result = filter->ipf_filter.
1052 ipf_output(filter->
1053 ipf_filter.cookie,
1054 (mbuf_t *)&m, ippo);
91447636
A
1055 if (result == EJUSTRETURN) {
1056 ipf_unref();
6d2010ae 1057 INM_REMREF(inm);
91447636
A
1058 goto done;
1059 }
1060 if (result != 0) {
1061 ipf_unref();
6d2010ae 1062 INM_REMREF(inm);
91447636
A
1063 goto bad;
1064 }
1065 }
1066 }
6d2010ae 1067
0c530ab8 1068 /* set back to host byte order */
6601e61a 1069 ip = mtod(m, struct ip *);
b0d623f7 1070#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1071 NTOHS(ip->ip_len);
1072 NTOHS(ip->ip_off);
b0d623f7 1073#endif
91447636 1074 ipf_unref();
39236c6e 1075 ipobf.didfilter = TRUE;
91447636 1076 }
39236c6e 1077 ip_mloopback(srcifp, ifp, m, dst, hlen);
1c79356b 1078 }
0a7de745 1079 if (inm != NULL) {
6d2010ae 1080 INM_REMREF(inm);
0a7de745 1081 }
1c79356b
A
1082 /*
1083 * Multicasts with a time-to-live of zero may be looped-
1084 * back, above, but must not be transmitted on a network.
1085 * Also, multicasts addressed to the loopback interface
1086 * are not sent -- the above call to ip_mloopback() will
1087 * loop back a copy if this host actually belongs to the
1088 * destination group on the loopback interface.
1089 */
1090 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
1091 m_freem(m);
1092 goto done;
1093 }
1094
1095 goto sendit;
1096 }
1c79356b
A
1097 /*
1098 * If source address not specified yet, use address
1099 * of outgoing interface.
1100 */
1101 if (ip->ip_src.s_addr == INADDR_ANY) {
6d2010ae 1102 IFA_LOCK_SPIN(&ia->ia_ifa);
1c79356b 1103 ip->ip_src = IA_SIN(ia)->sin_addr;
6d2010ae 1104 IFA_UNLOCK(&ia->ia_ifa);
1c79356b 1105#if IPFIREWALL_FORWARD
39236c6e
A
1106 /*
1107 * Keep note that we did this - if the firewall changes
1c79356b
A
1108 * the next-hop, our interface may change, changing the
1109 * default source IP. It's a shame so much effort happens
39236c6e 1110 * twice. Oh well.
1c79356b 1111 */
39236c6e 1112 ipobf.fwd_rewrite_src = TRUE;
1c79356b
A
1113#endif /* IPFIREWALL_FORWARD */
1114 }
1c79356b
A
1115
1116 /*
1117 * Look for broadcast address and
1118 * and verify user is allowed to send
1119 * such a packet.
1120 */
39236c6e
A
1121 if (ipobf.isbroadcast) {
1122 if (!(ifp->if_flags & IFF_BROADCAST)) {
1c79356b
A
1123 error = EADDRNOTAVAIL;
1124 goto bad;
1125 }
39236c6e 1126 if (!(flags & IP_ALLOWBROADCAST)) {
1c79356b
A
1127 error = EACCES;
1128 goto bad;
1129 }
1130 /* don't allow broadcast messages to be fragmented */
1131 if ((u_short)ip->ip_len > ifp->if_mtu) {
1132 error = EMSGSIZE;
1133 goto bad;
1134 }
1135 m->m_flags |= M_BCAST;
1136 } else {
1137 m->m_flags &= ~M_BCAST;
1138 }
1139
1140sendit:
b0d623f7
A
1141#if PF
1142 /* Invoke outbound packet filter */
316670eb 1143 if (PF_IS_ENABLED) {
6d2010ae 1144 int rc;
316670eb 1145
39236c6e 1146 m0 = m; /* Save for later */
316670eb
A
1147#if DUMMYNET
1148 args.fwa_m = m;
1149 args.fwa_next_hop = dst;
1150 args.fwa_oif = ifp;
1151 args.fwa_ro = ro;
1152 args.fwa_dst = dst;
1153 args.fwa_oflags = flags;
0a7de745 1154 if (flags & IP_OUTARGS) {
316670eb 1155 args.fwa_ipoa = ipoa;
0a7de745 1156 }
316670eb
A
1157 rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, &args);
1158#else /* DUMMYNET */
1159 rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, NULL);
1160#endif /* DUMMYNET */
1161 if (rc != 0 || m == NULL) {
1162 /* Move to the next packet */
1163 m = *mppn;
1164
1165 /* Skip ahead if first packet in list got dropped */
0a7de745 1166 if (packetlist == m0) {
6d2010ae 1167 packetlist = m;
0a7de745 1168 }
316670eb 1169
6d2010ae
A
1170 if (m != NULL) {
1171 m0 = m;
1172 /* Next packet in the chain */
1173 goto loopit;
1174 } else if (packetlist != NULL) {
1175 /* No more packet; send down the chain */
1176 goto sendchain;
1177 }
1178 /* Nothing left; we're done */
1179 goto done;
b0d623f7 1180 }
6d2010ae
A
1181 m0 = m;
1182 ip = mtod(m, struct ip *);
1183 pkt_dst = ip->ip_dst;
1184 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
b0d623f7 1185 }
b0d623f7 1186#endif /* PF */
39236c6e
A
1187 /*
1188 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
1189 */
1190 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) ||
1191 IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
9bccf70c
A
1192 ip_linklocal_stat.iplls_out_total++;
1193 if (ip->ip_ttl != MAXTTL) {
1194 ip_linklocal_stat.iplls_out_badttl++;
316670eb 1195 ip->ip_ttl = MAXTTL;
9bccf70c 1196 }
39236c6e 1197 }
9bccf70c 1198
cb323159
A
1199 if (!ipobf.didfilter &&
1200 !TAILQ_EMPTY(&ipv4_filters)
1201#if NECP
1202 && !necp_packet_should_skip_filters(m)
1203#endif // NECP
1204 ) {
0a7de745 1205 struct ipfilter *filter;
39236c6e 1206 int seen = (inject_filter_ref == NULL);
6d2010ae
A
1207 ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS;
1208
39236c6e
A
1209 /*
1210 * Check that a TSO frame isn't passed to a filter.
b0d623f7
A
1211 * This could happen if a filter is inserted while
1212 * TCP is sending the TSO packet.
1213 */
1214 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1215 error = EMSGSIZE;
1216 goto bad;
1217 }
1218
91447636 1219 ipf_ref();
316670eb 1220
0c530ab8 1221 /* 4135317 - always pass network byte order to filter */
b0d623f7 1222#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1223 HTONS(ip->ip_len);
1224 HTONS(ip->ip_off);
b0d623f7 1225#endif
91447636
A
1226 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
1227 if (seen == 0) {
39236c6e 1228 if ((struct ipfilter *)inject_filter_ref ==
0a7de745 1229 filter) {
91447636 1230 seen = 1;
0a7de745 1231 }
91447636
A
1232 } else if (filter->ipf_filter.ipf_output) {
1233 errno_t result;
39236c6e
A
1234 result = filter->ipf_filter.
1235 ipf_output(filter->ipf_filter.cookie,
1236 (mbuf_t *)&m, ippo);
91447636
A
1237 if (result == EJUSTRETURN) {
1238 ipf_unref();
1239 goto done;
1240 }
1241 if (result != 0) {
1242 ipf_unref();
91447636
A
1243 goto bad;
1244 }
1245 }
1246 }
0c530ab8 1247 /* set back to host byte order */
6601e61a 1248 ip = mtod(m, struct ip *);
b0d623f7 1249#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1250 NTOHS(ip->ip_len);
1251 NTOHS(ip->ip_off);
b0d623f7 1252#endif
91447636 1253 ipf_unref();
91447636
A
1254 }
1255
fe8ab488
A
1256#if NECP
1257 /* Process Network Extension Policy. Will Pass, Drop, or Rebind packet. */
0a7de745 1258 necp_matched_policy_id = necp_ip_output_find_policy_match(m,
cb323159 1259 flags, (flags & IP_OUTARGS) ? ipoa : NULL, ro ? ro->ro_rt : NULL, &necp_result, &necp_result_parameter);
fe8ab488
A
1260 if (necp_matched_policy_id) {
1261 necp_mark_packet_from_ip(m, necp_matched_policy_id);
1262 switch (necp_result) {
0a7de745
A
1263 case NECP_KERNEL_POLICY_RESULT_PASS:
1264 /* Check if the interface is allowed */
1265 if (!necp_packet_is_allowed_over_interface(m, ifp)) {
1266 error = EHOSTUNREACH;
1267 OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
1268 goto bad;
1269 }
1270 goto skip_ipsec;
1271 case NECP_KERNEL_POLICY_RESULT_DROP:
1272 case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
1273 /* Flow divert packets should be blocked at the IP layer */
1274 error = EHOSTUNREACH;
1275 OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
1276 goto bad;
1277 case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
1278 /* Verify that the packet is being routed to the tunnel */
1279 struct ifnet *policy_ifp = necp_get_ifnet_from_result_parameter(&necp_result_parameter);
1280 if (policy_ifp == ifp) {
3e170ce0
A
1281 /* Check if the interface is allowed */
1282 if (!necp_packet_is_allowed_over_interface(m, ifp)) {
1283 error = EHOSTUNREACH;
5ba3f43e 1284 OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
3e170ce0
A
1285 goto bad;
1286 }
fe8ab488 1287 goto skip_ipsec;
0a7de745
A
1288 } else {
1289 if (necp_packet_can_rebind_to_ifnet(m, policy_ifp, &necp_route, AF_INET)) {
3e170ce0 1290 /* Check if the interface is allowed */
0a7de745 1291 if (!necp_packet_is_allowed_over_interface(m, policy_ifp)) {
3e170ce0 1292 error = EHOSTUNREACH;
5ba3f43e 1293 OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
3e170ce0
A
1294 goto bad;
1295 }
0a7de745
A
1296
1297 /* Set ifp to the tunnel interface, since it is compatible with the packet */
1298 ifp = policy_ifp;
1299 ro = &necp_route;
fe8ab488
A
1300 goto skip_ipsec;
1301 } else {
0a7de745
A
1302 error = ENETUNREACH;
1303 OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
1304 goto bad;
fe8ab488 1305 }
fe8ab488 1306 }
0a7de745
A
1307 }
1308 default:
1309 break;
fe8ab488
A
1310 }
1311 }
3e170ce0
A
1312 /* Catch-all to check if the interface is allowed */
1313 if (!necp_packet_is_allowed_over_interface(m, ifp)) {
1314 error = EHOSTUNREACH;
5ba3f43e 1315 OSAddAtomic(1, &ipstat.ips_necp_policy_drop);
3e170ce0
A
1316 goto bad;
1317 }
fe8ab488 1318#endif /* NECP */
3e170ce0 1319
9bccf70c 1320#if IPSEC
0a7de745 1321 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC)) {
9bccf70c 1322 goto skip_ipsec;
0a7de745 1323 }
9bccf70c 1324
39236c6e 1325 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
9bccf70c
A
1326
1327 if (sp == NULL) {
39236c6e 1328 /* get SP for this packet */
fe8ab488 1329 if (so != NULL) {
39236c6e 1330 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND,
0a7de745 1331 so, &error);
fe8ab488
A
1332 } else {
1333 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
0a7de745 1334 flags, &error);
fe8ab488 1335 }
39236c6e
A
1336 if (sp == NULL) {
1337 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
1338 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
0a7de745 1339 0, 0, 0, 0, 0);
39236c6e
A
1340 goto bad;
1341 }
9bccf70c
A
1342 }
1343
1344 error = 0;
1345
1346 /* check policy */
1347 switch (sp->policy) {
1348 case IPSEC_POLICY_DISCARD:
2d21ac55 1349 case IPSEC_POLICY_GENERATE:
9bccf70c
A
1350 /*
1351 * This packet is just discarded.
1352 */
2d21ac55 1353 IPSEC_STAT_INCREMENT(ipsecstat.out_polvio);
39236c6e
A
1354 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1355 1, 0, 0, 0, 0);
9bccf70c
A
1356 goto bad;
1357
1358 case IPSEC_POLICY_BYPASS:
1359 case IPSEC_POLICY_NONE:
1360 /* no need to do IPsec. */
39236c6e
A
1361 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1362 2, 0, 0, 0, 0);
9bccf70c 1363 goto skip_ipsec;
316670eb 1364
9bccf70c
A
1365 case IPSEC_POLICY_IPSEC:
1366 if (sp->req == NULL) {
1367 /* acquire a policy */
1368 error = key_spdacquire(sp);
39236c6e
A
1369 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1370 3, 0, 0, 0, 0);
1371 goto bad;
1372 }
1373 if (sp->ipsec_if) {
1374 /* Verify the redirect to ipsec interface */
1375 if (sp->ipsec_if == ifp) {
39236c6e
A
1376 goto skip_ipsec;
1377 }
9bccf70c
A
1378 goto bad;
1379 }
1380 break;
1381
1382 case IPSEC_POLICY_ENTRUST:
1383 default:
1384 printf("ip_output: Invalid policy found. %d\n", sp->policy);
1385 }
39236c6e 1386 {
0a7de745
A
1387 ipsec_state.m = m;
1388 if (flags & IP_ROUTETOIF) {
1389 bzero(&ipsec_state.ro, sizeof(ipsec_state.ro));
1390 } else {
1391 route_copyout((struct route *)&ipsec_state.ro, ro, sizeof(struct route));
1392 }
1393 ipsec_state.dst = SA(dst);
9bccf70c 1394
0a7de745 1395 ip->ip_sum = 0;
9bccf70c 1396
0a7de745
A
1397 /*
1398 * XXX
1399 * delayed checksums are not currently compatible with IPsec
1400 */
1401 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1402 in_delayed_cksum(m);
1403 }
b0d623f7
A
1404
1405#if BYTE_ORDER != BIG_ENDIAN
0a7de745
A
1406 HTONS(ip->ip_len);
1407 HTONS(ip->ip_off);
b0d623f7 1408#endif
9bccf70c 1409
0a7de745
A
1410 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
1411 struct ip *, ip, struct ifnet *, ifp,
1412 struct ip *, ip, struct ip6_hdr *, NULL);
6d2010ae 1413
0a7de745
A
1414 error = ipsec4_output(&ipsec_state, sp, flags);
1415 if (ipsec_state.tunneled == 6) {
1416 m0 = m = NULL;
1417 error = 0;
1418 goto bad;
1419 }
316670eb 1420
0a7de745 1421 m0 = m = ipsec_state.m;
316670eb 1422
39236c6e 1423#if DUMMYNET
9bccf70c 1424 /*
0a7de745
A
1425 * If we're about to use the route in ipsec_state
1426 * and this came from dummynet, cleaup now.
9bccf70c 1427 */
0a7de745
A
1428 if (ro == &saved_route &&
1429 (!(flags & IP_ROUTETOIF) || ipsec_state.tunneled)) {
1430 ROUTE_RELEASE(ro);
1431 }
1432#endif /* DUMMYNET */
1433
1434 if (flags & IP_ROUTETOIF) {
1435 /*
1436 * if we have tunnel mode SA, we may need to ignore
1437 * IP_ROUTETOIF.
1438 */
1439 if (ipsec_state.tunneled) {
1440 flags &= ~IP_ROUTETOIF;
1441 ro = (struct route *)&ipsec_state.ro;
1442 }
1443 } else {
5c9f4661 1444 ro = (struct route *)&ipsec_state.ro;
9bccf70c 1445 }
0a7de745
A
1446 dst = SIN(ipsec_state.dst);
1447 if (error) {
1448 /* mbuf is already reclaimed in ipsec4_output. */
1449 m0 = NULL;
1450 switch (error) {
1451 case EHOSTUNREACH:
1452 case ENETUNREACH:
1453 case EMSGSIZE:
1454 case ENOBUFS:
1455 case ENOMEM:
1456 break;
1457 default:
1458 printf("ip4_output (ipsec): error code %d\n", error);
39236c6e 1459 /* FALLTHRU */
0a7de745
A
1460 case ENOENT:
1461 /* don't show these error codes to the user */
1462 error = 0;
1463 break;
1464 }
1465 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1466 4, 0, 0, 0, 0);
1467 goto bad;
9bccf70c 1468 }
39236c6e 1469 }
9bccf70c
A
1470
1471 /* be sure to update variables that are affected by ipsec4_output() */
1472 ip = mtod(m, struct ip *);
316670eb 1473
9bccf70c
A
1474#ifdef _IP_VHL
1475 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
39236c6e 1476#else /* !_IP_VHL */
9bccf70c 1477 hlen = ip->ip_hl << 2;
39236c6e 1478#endif /* !_IP_VHL */
55e303ae 1479 /* Check that there wasn't a route change and src is still valid */
39236c6e
A
1480 if (ROUTE_UNUSABLE(ro)) {
1481 ROUTE_RELEASE(ro);
1482 VERIFY(src_ia == NULL);
1483 if (ip->ip_src.s_addr != INADDR_ANY &&
1484 !(flags & (IP_ROUTETOIF | IP_FORWARDING)) &&
1485 (src_ia = ifa_foraddr(ip->ip_src.s_addr)) == NULL) {
b0d623f7
A
1486 error = EADDRNOTAVAIL;
1487 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
39236c6e 1488 5, 0, 0, 0, 0);
55e303ae
A
1489 goto bad;
1490 }
39236c6e 1491 if (src_ia != NULL) {
6d2010ae 1492 IFA_REMREF(&src_ia->ia_ifa);
39236c6e
A
1493 src_ia = NULL;
1494 }
55e303ae
A
1495 }
1496
9bccf70c 1497 if (ro->ro_rt == NULL) {
39236c6e
A
1498 if (!(flags & IP_ROUTETOIF)) {
1499 printf("%s: can't update route after "
1500 "IPsec processing\n", __func__);
0a7de745 1501 error = EHOSTUNREACH; /* XXX */
b0d623f7 1502 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
39236c6e 1503 6, 0, 0, 0, 0);
9bccf70c
A
1504 goto bad;
1505 }
1506 } else {
0a7de745 1507 if (ia != NULL) {
6d2010ae 1508 IFA_REMREF(&ia->ia_ifa);
0a7de745 1509 }
b0d623f7 1510 RT_LOCK_SPIN(ro->ro_rt);
9bccf70c 1511 ia = ifatoia(ro->ro_rt->rt_ifa);
39236c6e 1512 if (ia != NULL) {
6d2010ae
A
1513 /* Become a regular mutex */
1514 RT_CONVERT_LOCK(ro->ro_rt);
1515 IFA_ADDREF(&ia->ia_ifa);
1516 }
9bccf70c 1517 ifp = ro->ro_rt->rt_ifp;
b0d623f7 1518 RT_UNLOCK(ro->ro_rt);
9bccf70c
A
1519 }
1520
1521 /* make it flipped, again. */
b0d623f7 1522#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1523 NTOHS(ip->ip_len);
1524 NTOHS(ip->ip_off);
b0d623f7 1525#endif
39236c6e
A
1526 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1527 7, 0xff, 0xff, 0xff, 0xff);
316670eb 1528
91447636 1529 /* Pass to filters again */
cb323159
A
1530 if (!TAILQ_EMPTY(&ipv4_filters)
1531#if NECP
1532 && !necp_packet_should_skip_filters(m)
1533#endif // NECP
1534 ) {
0a7de745 1535 struct ipfilter *filter;
316670eb 1536
6d2010ae
A
1537 ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS;
1538
39236c6e
A
1539 /*
1540 * Check that a TSO frame isn't passed to a filter.
b0d623f7
A
1541 * This could happen if a filter is inserted while
1542 * TCP is sending the TSO packet.
1543 */
1544 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1545 error = EMSGSIZE;
1546 goto bad;
1547 }
1548
91447636 1549 ipf_ref();
316670eb 1550
0c530ab8 1551 /* 4135317 - always pass network byte order to filter */
b0d623f7 1552#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1553 HTONS(ip->ip_len);
1554 HTONS(ip->ip_off);
b0d623f7 1555#endif
91447636
A
1556 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
1557 if (filter->ipf_filter.ipf_output) {
1558 errno_t result;
39236c6e
A
1559 result = filter->ipf_filter.
1560 ipf_output(filter->ipf_filter.cookie,
1561 (mbuf_t *)&m, ippo);
91447636
A
1562 if (result == EJUSTRETURN) {
1563 ipf_unref();
1564 goto done;
1565 }
1566 if (result != 0) {
1567 ipf_unref();
91447636
A
1568 goto bad;
1569 }
1570 }
1571 }
0c530ab8 1572 /* set back to host byte order */
6601e61a 1573 ip = mtod(m, struct ip *);
b0d623f7 1574#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1575 NTOHS(ip->ip_len);
1576 NTOHS(ip->ip_off);
b0d623f7 1577#endif
91447636 1578 ipf_unref();
91447636 1579 }
9bccf70c 1580skip_ipsec:
39236c6e 1581#endif /* IPSEC */
9bccf70c 1582
2d21ac55 1583#if IPFIREWALL
1c79356b
A
1584 /*
1585 * Check with the firewall...
91447636 1586 * but not if we are already being fwd'd from a firewall.
1c79356b 1587 */
316670eb 1588 if (fw_enable && IPFW_LOADED && !args.fwa_next_hop) {
1c79356b
A
1589 struct sockaddr_in *old = dst;
1590
316670eb
A
1591 args.fwa_m = m;
1592 args.fwa_next_hop = dst;
1593 args.fwa_oif = ifp;
39236c6e 1594 ipfwoff = ip_fw_chk_ptr(&args);
316670eb
A
1595 m = args.fwa_m;
1596 dst = args.fwa_next_hop;
91447636 1597
39236c6e
A
1598 /*
1599 * On return we must do the following:
1600 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1601 * 1<=off<= 0xffff -> DIVERT
1602 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1603 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1604 * dst != old -> IPFIREWALL_FORWARD
1605 * off==0, dst==old -> accept
1606 * If some of the above modules is not compiled in, then
1607 * we should't have to check the corresponding condition
1608 * (because the ipfw control socket should not accept
1609 * unsupported rules), but better play safe and drop
1610 * packets in case of doubt.
1611 */
55e303ae 1612 m0 = m;
39236c6e 1613 if ((ipfwoff & IP_FW_PORT_DENY_FLAG) || m == NULL) {
0a7de745 1614 if (m) {
9bccf70c 1615 m_freem(m);
0a7de745 1616 }
39236c6e
A
1617 error = EACCES;
1618 goto done;
1c79356b 1619 }
9bccf70c 1620 ip = mtod(m, struct ip *);
316670eb 1621
0a7de745 1622 if (ipfwoff == 0 && dst == old) { /* common case */
39236c6e 1623 goto pass;
3a60a9f5 1624 }
1c79356b 1625#if DUMMYNET
39236c6e 1626 if (DUMMYNET_LOADED && (ipfwoff & IP_FW_PORT_DYNT_FLAG) != 0) {
c910b4d9
A
1627 /*
1628 * pass the pkt to dummynet. Need to include
1629 * pipe number, m, ifp, ro, dst because these are
1630 * not recomputed in the next pass.
1631 * All other parameters have been already used and
1632 * so they are not needed anymore.
1633 * XXX note: if the ifp or ro entry are deleted
1634 * while a pkt is in dummynet, we are in trouble!
1635 */
316670eb
A
1636 args.fwa_ro = ro;
1637 args.fwa_dst = dst;
1638 args.fwa_oflags = flags;
0a7de745 1639 if (flags & IP_OUTARGS) {
316670eb 1640 args.fwa_ipoa = ipoa;
0a7de745 1641 }
c910b4d9 1642
39236c6e 1643 error = ip_dn_io_ptr(m, ipfwoff & 0xffff, DN_TO_IP_OUT,
316670eb 1644 &args, DN_CLIENT_IPFW);
c910b4d9 1645 goto done;
1c79356b 1646 }
91447636 1647#endif /* DUMMYNET */
1c79356b 1648#if IPDIVERT
39236c6e 1649 if (ipfwoff != 0 && (ipfwoff & IP_FW_PORT_DYNT_FLAG) == 0) {
9bccf70c
A
1650 struct mbuf *clone = NULL;
1651
1652 /* Clone packet if we're doing a 'tee' */
0a7de745 1653 if ((ipfwoff & IP_FW_PORT_TEE_FLAG) != 0) {
9bccf70c 1654 clone = m_dup(m, M_DONTWAIT);
0a7de745 1655 }
9bccf70c
A
1656 /*
1657 * XXX
1658 * delayed checksums are not currently compatible
1659 * with divert sockets.
1660 */
0a7de745 1661 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
9bccf70c 1662 in_delayed_cksum(m);
0a7de745 1663 }
9bccf70c
A
1664
1665 /* Restore packet header fields to original values */
b0d623f7
A
1666
1667#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1668 HTONS(ip->ip_len);
1669 HTONS(ip->ip_off);
b0d623f7 1670#endif
9bccf70c
A
1671
1672 /* Deliver packet to divert input routine */
39236c6e
A
1673 divert_packet(m, 0, ipfwoff & 0xffff,
1674 args.fwa_divert_rule);
9bccf70c
A
1675
1676 /* If 'tee', continue with original packet */
1677 if (clone != NULL) {
55e303ae 1678 m0 = m = clone;
9bccf70c
A
1679 ip = mtod(m, struct ip *);
1680 goto pass;
1681 }
1c79356b
A
1682 goto done;
1683 }
39236c6e 1684#endif /* IPDIVERT */
1c79356b 1685#if IPFIREWALL_FORWARD
39236c6e
A
1686 /*
1687 * Here we check dst to make sure it's directly reachable on
1688 * the interface we previously thought it was.
1c79356b
A
1689 * If it isn't (which may be likely in some situations) we have
1690 * to re-route it (ie, find a route for the next-hop and the
1691 * associated interface) and set them here. This is nested
1692 * forwarding which in most cases is undesirable, except where
1693 * such control is nigh impossible. So we do it here.
1694 * And I'm babbling.
1695 */
39236c6e 1696 if (ipfwoff == 0 && old != dst) {
91447636 1697 struct in_ifaddr *ia_fw;
39236c6e 1698 struct route *ro_fwd = &sro_fwd;
1c79356b 1699
1c79356b
A
1700#if IPFIREWALL_FORWARD_DEBUG
1701 printf("IPFIREWALL_FORWARD: New dst ip: ");
1702 print_ip(dst->sin_addr);
1703 printf("\n");
39236c6e 1704#endif /* IPFIREWALL_FORWARD_DEBUG */
1c79356b
A
1705 /*
1706 * We need to figure out if we have been forwarded
39236c6e 1707 * to a local socket. If so then we should somehow
1c79356b
A
1708 * "loop back" to ip_input, and get directed to the
1709 * PCB as if we had received this packet. This is
1710 * because it may be dificult to identify the packets
1711 * you want to forward until they are being output
1712 * and have selected an interface. (e.g. locally
1713 * initiated packets) If we used the loopback inteface,
39236c6e 1714 * we would not be able to control what happens
1c79356b
A
1715 * as the packet runs through ip_input() as
1716 * it is done through a ISR.
1717 */
b0d623f7 1718 lck_rw_lock_shared(in_ifaddr_rwlock);
91447636 1719 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1c79356b
A
1720 /*
1721 * If the addr to forward to is one
1722 * of ours, we pretend to
1723 * be the destination for this packet.
1724 */
6d2010ae 1725 IFA_LOCK_SPIN(&ia_fw->ia_ifa);
91447636 1726 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
6d2010ae
A
1727 dst->sin_addr.s_addr) {
1728 IFA_UNLOCK(&ia_fw->ia_ifa);
1c79356b 1729 break;
6d2010ae
A
1730 }
1731 IFA_UNLOCK(&ia_fw->ia_ifa);
1c79356b 1732 }
b0d623f7
A
1733 lck_rw_done(in_ifaddr_rwlock);
1734 if (ia_fw) {
1c79356b 1735 /* tell ip_input "dont filter" */
0a7de745
A
1736 struct m_tag *fwd_tag;
1737 struct ip_fwd_tag *ipfwd_tag;
6d2010ae
A
1738
1739 fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID,
b0d623f7 1740 KERNEL_TAG_TYPE_IPFORWARD,
0a7de745 1741 sizeof(*ipfwd_tag), M_NOWAIT, m);
91447636
A
1742 if (fwd_tag == NULL) {
1743 error = ENOBUFS;
1744 goto bad;
1745 }
6d2010ae 1746
0a7de745 1747 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag + 1);
316670eb 1748 ipfwd_tag->next_hop = args.fwa_next_hop;
91447636
A
1749
1750 m_tag_prepend(m, fwd_tag);
1751
0a7de745 1752 if (m->m_pkthdr.rcvif == NULL) {
6d2010ae 1753 m->m_pkthdr.rcvif = lo_ifp;
0a7de745 1754 }
b0d623f7
A
1755
1756#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1757 HTONS(ip->ip_len);
1758 HTONS(ip->ip_off);
316670eb 1759#endif
39236c6e 1760 mbuf_outbound_finalize(m, PF_INET, 0);
316670eb 1761
39236c6e
A
1762 /*
1763 * we need to call dlil_output to run filters
1764 * and resync to avoid recursion loops.
91447636
A
1765 */
1766 if (lo_ifp) {
39236c6e
A
1767 dlil_output(lo_ifp, PF_INET, m, NULL,
1768 SA(dst), 0, adv);
1769 } else {
1770 printf("%s: no loopback ifp for "
1771 "forwarding!!!\n", __func__);
91447636 1772 }
1c79356b
A
1773 goto done;
1774 }
39236c6e
A
1775 /*
1776 * Some of the logic for this was nicked from above.
1c79356b
A
1777 *
1778 * This rewrites the cached route in a local PCB.
1779 * Is this what we want to do?
1780 */
39236c6e 1781 ROUTE_RELEASE(ro_fwd);
0a7de745 1782 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1c79356b 1783
39037602 1784 rtalloc_ign(ro_fwd, RTF_PRCLONING, false);
1c79356b 1785
b0d623f7
A
1786 if (ro_fwd->ro_rt == NULL) {
1787 OSAddAtomic(1, &ipstat.ips_noroute);
1c79356b
A
1788 error = EHOSTUNREACH;
1789 goto bad;
1790 }
1791
b0d623f7 1792 RT_LOCK_SPIN(ro_fwd->ro_rt);
91447636 1793 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
6d2010ae
A
1794 if (ia_fw != NULL) {
1795 /* Become a regular mutex */
1796 RT_CONVERT_LOCK(ro_fwd->ro_rt);
1797 IFA_ADDREF(&ia_fw->ia_ifa);
1798 }
1c79356b 1799 ifp = ro_fwd->ro_rt->rt_ifp;
1c79356b 1800 ro_fwd->ro_rt->rt_use++;
0a7de745 1801 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) {
39236c6e 1802 dst = SIN(ro_fwd->ro_rt->rt_gateway);
0a7de745 1803 }
7e4a7d39 1804 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) {
39236c6e
A
1805 /* double negation needed for bool bit field */
1806 ipobf.isbroadcast =
1807 !!(ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
7e4a7d39
A
1808 } else {
1809 /* Become a regular mutex */
1810 RT_CONVERT_LOCK(ro_fwd->ro_rt);
39236c6e
A
1811 ipobf.isbroadcast =
1812 in_broadcast(dst->sin_addr, ifp);
7e4a7d39 1813 }
b0d623f7 1814 RT_UNLOCK(ro_fwd->ro_rt);
39236c6e 1815 ROUTE_RELEASE(ro);
1c79356b 1816 ro->ro_rt = ro_fwd->ro_rt;
39236c6e
A
1817 ro_fwd->ro_rt = NULL;
1818 dst = SIN(&ro_fwd->ro_dst);
1c79356b
A
1819
1820 /*
1821 * If we added a default src ip earlier,
1822 * which would have been gotten from the-then
1823 * interface, do it again, from the new one.
1824 */
b0d623f7 1825 if (ia_fw != NULL) {
39236c6e 1826 if (ipobf.fwd_rewrite_src) {
6d2010ae 1827 IFA_LOCK_SPIN(&ia_fw->ia_ifa);
b0d623f7 1828 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
6d2010ae
A
1829 IFA_UNLOCK(&ia_fw->ia_ifa);
1830 }
1831 IFA_REMREF(&ia_fw->ia_ifa);
b0d623f7 1832 }
39236c6e 1833 goto pass;
1c79356b
A
1834 }
1835#endif /* IPFIREWALL_FORWARD */
39236c6e
A
1836 /*
1837 * if we get here, none of the above matches, and
1838 * we have to drop the pkt
1839 */
1c79356b 1840 m_freem(m);
91447636 1841 error = EACCES; /* not sure this is the right error msg */
91447636 1842 goto done;
1c79356b 1843 }
1c79356b
A
1844
1845pass:
6d2010ae 1846#endif /* IPFIREWALL */
39236c6e
A
1847
1848 /* 127/8 must not appear on wire - RFC1122 */
1849 if (!(ifp->if_flags & IFF_LOOPBACK) &&
1850 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1851 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
b0d623f7 1852 OSAddAtomic(1, &ipstat.ips_badaddr);
39236c6e 1853 error = EADDRNOTAVAIL;
fe8ab488 1854 goto bad;
e5568f75 1855 }
316670eb 1856
39037602
A
1857 if (ipoa != NULL) {
1858 u_int8_t dscp = ip->ip_tos >> IPTOS_DSCP_SHIFT;
1859
1860 error = set_packet_qos(m, ifp,
1861 ipoa->ipoa_flags & IPOAF_QOSMARKING_ALLOWED ? TRUE : FALSE,
1862 ipoa->ipoa_sotc, ipoa->ipoa_netsvctype, &dscp);
1863 if (error == 0) {
1864 ip->ip_tos &= IPTOS_ECN_MASK;
1865 ip->ip_tos |= dscp << IPTOS_DSCP_SHIFT;
1866 } else {
1867 printf("%s if_dscp_for_mbuf() error %d\n", __func__, error);
1868 error = 0;
1869 }
1870 }
1871
39236c6e
A
1872 ip_output_checksum(ifp, m, (IP_VHL_HL(ip->ip_vhl) << 2),
1873 ip->ip_len, &sw_csum);
0b4e3aa0 1874
d9a64523
A
1875 interface_mtu = ifp->if_mtu;
1876
1877 if (INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
1878 interface_mtu = IN6_LINKMTU(ifp);
1879 /* Further adjust the size for CLAT46 expansion */
1880 interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
1881 }
1882
1c79356b 1883 /*
9bccf70c 1884 * If small enough for interface, or the interface will take
0b4e3aa0 1885 * care of the fragmentation for us, can just send directly.
1c79356b 1886 */
d9a64523 1887 if ((u_short)ip->ip_len <= interface_mtu || TSO_IPV4_OK(ifp, m) ||
39236c6e 1888 (!(ip->ip_off & IP_DF) && (ifp->if_hwassist & CSUM_FRAGMENT))) {
b0d623f7 1889#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1890 HTONS(ip->ip_len);
1891 HTONS(ip->ip_off);
b0d623f7 1892#endif
316670eb 1893
1c79356b 1894 ip->ip_sum = 0;
9bccf70c 1895 if (sw_csum & CSUM_DELAY_IP) {
39236c6e
A
1896 ip->ip_sum = ip_cksum_hdr_out(m, hlen);
1897 sw_csum &= ~CSUM_DELAY_IP;
1898 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
9bccf70c 1899 }
316670eb 1900
9bccf70c
A
1901#if IPSEC
1902 /* clean ipsec history once it goes out of the node */
0a7de745 1903 if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) {
9bccf70c 1904 ipsec_delaux(m);
0a7de745 1905 }
39236c6e
A
1906#endif /* IPSEC */
1907 if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) &&
0a7de745 1908 (m->m_pkthdr.tso_segsz > 0)) {
39236c6e 1909 scnt += m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
0a7de745 1910 } else {
39236c6e 1911 scnt++;
0a7de745 1912 }
39236c6e 1913
91447636 1914 if (packetchain == 0) {
0a7de745 1915 if (ro->ro_rt != NULL && nstat_collect) {
39236c6e
A
1916 nstat_route_tx(ro->ro_rt, scnt,
1917 m->m_pkthdr.len, 0);
0a7de745 1918 }
39236c6e 1919
316670eb 1920 error = dlil_output(ifp, PF_INET, m, ro->ro_rt,
39236c6e 1921 SA(dst), 0, adv);
fe8ab488
A
1922 if (dlil_verbose && error) {
1923 printf("dlil_output error on interface %s: %d\n",
0a7de745 1924 ifp->if_xname, error);
fe8ab488 1925 }
39236c6e 1926 scnt = 0;
2d21ac55 1927 goto done;
39236c6e
A
1928 } else {
1929 /*
1930 * packet chaining allows us to reuse the
1931 * route for all packets
1932 */
6d2010ae 1933 bytecnt += m->m_pkthdr.len;
b0d623f7 1934 mppn = &m->m_nextpkt;
91447636
A
1935 m = m->m_nextpkt;
1936 if (m == NULL) {
b0d623f7
A
1937#if PF
1938sendchain:
1939#endif /* PF */
0a7de745 1940 if (pktcnt > ip_maxchainsent) {
91447636 1941 ip_maxchainsent = pktcnt;
0a7de745
A
1942 }
1943 if (ro->ro_rt != NULL && nstat_collect) {
39236c6e
A
1944 nstat_route_tx(ro->ro_rt, scnt,
1945 bytecnt, 0);
0a7de745 1946 }
39236c6e 1947
316670eb 1948 error = dlil_output(ifp, PF_INET, packetlist,
39236c6e 1949 ro->ro_rt, SA(dst), 0, adv);
fe8ab488
A
1950 if (dlil_verbose && error) {
1951 printf("dlil_output error on interface %s: %d\n",
0a7de745 1952 ifp->if_xname, error);
fe8ab488 1953 }
91447636 1954 pktcnt = 0;
39236c6e 1955 scnt = 0;
6d2010ae 1956 bytecnt = 0;
91447636 1957 goto done;
91447636
A
1958 }
1959 m0 = m;
1960 pktcnt++;
1961 goto loopit;
1962 }
1c79356b 1963 }
d9a64523
A
1964
1965 VERIFY(interface_mtu != 0);
1c79356b
A
1966 /*
1967 * Too large for interface; fragment if possible.
1968 * Must be able to put at least 8 bytes per fragment.
39236c6e 1969 * Balk when DF bit is set or the interface didn't support TSO.
1c79356b 1970 */
39236c6e
A
1971 if ((ip->ip_off & IP_DF) || pktcnt > 0 ||
1972 (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
1c79356b
A
1973 error = EMSGSIZE;
1974 /*
1975 * This case can happen if the user changed the MTU
1976 * of an interface after enabling IP on it. Because
1977 * most netifs don't keep track of routes pointing to
1978 * them, there is no way for one to update all its
1979 * routes when the MTU is changed.
1980 */
6d2010ae
A
1981 if (ro->ro_rt) {
1982 RT_LOCK_SPIN(ro->ro_rt);
39236c6e
A
1983 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
1984 !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) &&
d9a64523
A
1985 (ro->ro_rt->rt_rmx.rmx_mtu > interface_mtu)) {
1986 ro->ro_rt->rt_rmx.rmx_mtu = interface_mtu;
6d2010ae
A
1987 }
1988 RT_UNLOCK(ro->ro_rt);
1989 }
1990 if (pktcnt > 0) {
1991 m0 = packetlist;
1c79356b 1992 }
b0d623f7 1993 OSAddAtomic(1, &ipstat.ips_cantfrag);
1c79356b
A
1994 goto bad;
1995 }
b0d623f7 1996
d9a64523
A
1997 /*
1998 * XXX Only TCP seems to be passing a list of packets here.
1999 * The following issue is limited to UDP datagrams with 0 checksum.
2000 * For now limit it to the case when single packet is passed down.
2001 */
2002 if (packetchain == 0 && IS_INTF_CLAT46(ifp)) {
2003 /*
2004 * If it is a UDP packet that has checksum set to 0
2005 * and is also not being offloaded, compute a full checksum
2006 * and update the UDP checksum.
2007 */
2008 if (ip->ip_p == IPPROTO_UDP &&
2009 !(m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_PARTIAL))) {
2010 struct udphdr *uh = NULL;
2011
0a7de745
A
2012 if (m->m_len < hlen + sizeof(struct udphdr)) {
2013 m = m_pullup(m, hlen + sizeof(struct udphdr));
d9a64523 2014 if (m == NULL) {
0a7de745 2015 error = ENOBUFS;
d9a64523
A
2016 m0 = m;
2017 goto bad;
2018 }
2019 m0 = m;
2020 ip = mtod(m, struct ip *);
2021 }
2022 /*
2023 * Get UDP header and if checksum is 0, then compute the full
2024 * checksum.
2025 */
2026 uh = (struct udphdr *)(void *)((caddr_t)ip + hlen);
2027 if (uh->uh_sum == 0) {
2028 uh->uh_sum = inet_cksum(m, IPPROTO_UDP, hlen,
2029 ip->ip_len - hlen);
0a7de745 2030 if (uh->uh_sum == 0) {
d9a64523 2031 uh->uh_sum = 0xffff;
0a7de745 2032 }
d9a64523
A
2033 }
2034 }
2035 }
2036
2037 error = ip_fragment(m, ifp, interface_mtu, sw_csum);
b0d623f7
A
2038 if (error != 0) {
2039 m0 = m = NULL;
1c79356b
A
2040 goto bad;
2041 }
2042
39236c6e
A
2043 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
2044 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
b0d623f7
A
2045
2046 for (m = m0; m; m = m0) {
2047 m0 = m->m_nextpkt;
2048 m->m_nextpkt = 0;
2049#if IPSEC
2050 /* clean ipsec history once it goes out of the node */
0a7de745 2051 if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) {
b0d623f7 2052 ipsec_delaux(m);
0a7de745 2053 }
39236c6e 2054#endif /* IPSEC */
b0d623f7 2055 if (error == 0) {
39236c6e
A
2056 if ((packetchain != 0) && (pktcnt > 0)) {
2057 panic("%s: mix of packet in packetlist is "
2058 "wrong=%p", __func__, packetlist);
2059 /* NOTREACHED */
2060 }
2061 if (ro->ro_rt != NULL && nstat_collect) {
2062 nstat_route_tx(ro->ro_rt, 1,
2063 m->m_pkthdr.len, 0);
b0d623f7 2064 }
316670eb 2065 error = dlil_output(ifp, PF_INET, m, ro->ro_rt,
39236c6e 2066 SA(dst), 0, adv);
fe8ab488
A
2067 if (dlil_verbose && error) {
2068 printf("dlil_output error on interface %s: %d\n",
0a7de745 2069 ifp->if_xname, error);
fe8ab488 2070 }
39236c6e 2071 } else {
b0d623f7 2072 m_freem(m);
39236c6e 2073 }
b0d623f7
A
2074 }
2075
0a7de745 2076 if (error == 0) {
b0d623f7 2077 OSAddAtomic(1, &ipstat.ips_fragmented);
0a7de745 2078 }
b0d623f7
A
2079
2080done:
39236c6e 2081 if (ia != NULL) {
6d2010ae 2082 IFA_REMREF(&ia->ia_ifa);
b0d623f7
A
2083 ia = NULL;
2084 }
2085#if IPSEC
39236c6e 2086 ROUTE_RELEASE(&ipsec_state.ro);
b0d623f7
A
2087 if (sp != NULL) {
2088 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
39236c6e 2089 printf("DP ip_output call free SP:%x\n", sp));
b0d623f7
A
2090 key_freesp(sp, KEY_SADB_UNLOCKED);
2091 }
b0d623f7 2092#endif /* IPSEC */
fe8ab488
A
2093#if NECP
2094 ROUTE_RELEASE(&necp_route);
2095#endif /* NECP */
39236c6e
A
2096#if DUMMYNET
2097 ROUTE_RELEASE(&saved_route);
2098#endif /* DUMMYNET */
2099#if IPFIREWALL_FORWARD
2100 ROUTE_RELEASE(&sro_fwd);
2101#endif /* IPFIREWALL_FORWARD */
b0d623f7 2102
39236c6e 2103 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0
A
2104 if (ip_output_measure) {
2105 net_perf_measure_time(&net_perf, &start_tv, packets_processed);
2106 net_perf_histogram(&net_perf, packets_processed);
2107 }
0a7de745 2108 return error;
b0d623f7 2109bad:
0a7de745 2110 if (pktcnt > 0) {
fe8ab488 2111 m0 = packetlist;
0a7de745 2112 }
fe8ab488 2113 m_freem_list(m0);
b0d623f7 2114 goto done;
39236c6e
A
2115
2116#undef ipsec_state
2117#undef args
2118#undef sro_fwd
2119#undef saved_route
2120#undef ipf_pktopts
fe8ab488 2121#undef IP_CHECK_RESTRICTIONS
b0d623f7
A
2122}
2123
2124int
2125ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum)
2126{
2127 struct ip *ip, *mhip;
2128 int len, hlen, mhlen, firstlen, off, error = 0;
2129 struct mbuf **mnext = &m->m_nextpkt, *m0;
2130 int nfrags = 1;
2131
2132 ip = mtod(m, struct ip *);
2133#ifdef _IP_VHL
2134 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
39236c6e 2135#else /* !_IP_VHL */
b0d623f7 2136 hlen = ip->ip_hl << 2;
39236c6e 2137#endif /* !_IP_VHL */
b0d623f7 2138
d9a64523
A
2139#ifdef INET6
2140 /*
2141 * We need to adjust the fragment sizes to account
2142 * for IPv6 fragment header if it needs to be translated
2143 * from IPv4 to IPv6.
2144 */
0a7de745 2145 if (IS_INTF_CLAT46(ifp)) {
d9a64523 2146 mtu -= sizeof(struct ip6_frag);
0a7de745 2147 }
d9a64523
A
2148
2149#endif
0a7de745 2150 firstlen = len = (mtu - hlen) & ~7;
b0d623f7
A
2151 if (len < 8) {
2152 m_freem(m);
0a7de745 2153 return EMSGSIZE;
b0d623f7
A
2154 }
2155
9bccf70c
A
2156 /*
2157 * if the interface will not calculate checksums on
2158 * fragmented packets, then do it here.
2159 */
39236c6e 2160 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) &&
0a7de745 2161 !(ifp->if_hwassist & CSUM_IP_FRAGS)) {
9bccf70c 2162 in_delayed_cksum(m);
0a7de745 2163 }
0b4e3aa0 2164
1c79356b
A
2165 /*
2166 * Loop through length of segment after first fragment,
2167 * make new header and copy data of each part and link onto chain.
2168 */
2169 m0 = m;
0a7de745 2170 mhlen = sizeof(struct ip);
1c79356b 2171 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
0a7de745 2172 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
39236c6e 2173 if (m == NULL) {
1c79356b 2174 error = ENOBUFS;
b0d623f7 2175 OSAddAtomic(1, &ipstat.ips_odropped);
1c79356b
A
2176 goto sendorfree;
2177 }
0b4e3aa0 2178 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1c79356b
A
2179 m->m_data += max_linkhdr;
2180 mhip = mtod(m, struct ip *);
2181 *mhip = *ip;
0a7de745
A
2182 if (hlen > sizeof(struct ip)) {
2183 mhlen = ip_optcopy(ip, mhip) + sizeof(struct ip);
1c79356b
A
2184 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
2185 }
2186 m->m_len = mhlen;
2187 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
0a7de745 2188 if (ip->ip_off & IP_MF) {
1c79356b 2189 mhip->ip_off |= IP_MF;
0a7de745
A
2190 }
2191 if (off + len >= (u_short)ip->ip_len) {
1c79356b 2192 len = (u_short)ip->ip_len - off;
0a7de745 2193 } else {
1c79356b 2194 mhip->ip_off |= IP_MF;
0a7de745 2195 }
1c79356b
A
2196 mhip->ip_len = htons((u_short)(len + mhlen));
2197 m->m_next = m_copy(m0, off, len);
39236c6e 2198 if (m->m_next == NULL) {
1c79356b 2199 (void) m_free(m);
0a7de745 2200 error = ENOBUFS; /* ??? */
b0d623f7 2201 OSAddAtomic(1, &ipstat.ips_odropped);
1c79356b
A
2202 goto sendorfree;
2203 }
2204 m->m_pkthdr.len = mhlen + len;
39236c6e 2205 m->m_pkthdr.rcvif = NULL;
9bccf70c 2206 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
316670eb 2207
39236c6e 2208 M_COPY_CLASSIFIER(m, m0);
316670eb 2209 M_COPY_PFTAG(m, m0);
316670eb 2210
2d21ac55
A
2211#if CONFIG_MACF_NET
2212 mac_netinet_fragment(m0, m);
39236c6e 2213#endif /* CONFIG_MACF_NET */
b0d623f7
A
2214
2215#if BYTE_ORDER != BIG_ENDIAN
9bccf70c 2216 HTONS(mhip->ip_off);
b0d623f7
A
2217#endif
2218
1c79356b 2219 mhip->ip_sum = 0;
9bccf70c 2220 if (sw_csum & CSUM_DELAY_IP) {
39236c6e
A
2221 mhip->ip_sum = ip_cksum_hdr_out(m, mhlen);
2222 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
9bccf70c 2223 }
1c79356b
A
2224 *mnext = m;
2225 mnext = &m->m_nextpkt;
0b4e3aa0 2226 nfrags++;
1c79356b 2227 }
b0d623f7 2228 OSAddAtomic(nfrags, &ipstat.ips_ofragments);
0b4e3aa0
A
2229
2230 /* set first/last markers for fragment chain */
9bccf70c
A
2231 m->m_flags |= M_LASTFRAG;
2232 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
0b4e3aa0
A
2233 m0->m_pkthdr.csum_data = nfrags;
2234
1c79356b
A
2235 /*
2236 * Update first fragment by trimming what's been copied out
2237 * and updating header, then send each fragment (in order).
2238 */
2239 m = m0;
2240 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
2241 m->m_pkthdr.len = hlen + firstlen;
2242 ip->ip_len = htons((u_short)m->m_pkthdr.len);
9bccf70c 2243 ip->ip_off |= IP_MF;
b0d623f7
A
2244
2245#if BYTE_ORDER != BIG_ENDIAN
9bccf70c 2246 HTONS(ip->ip_off);
b0d623f7 2247#endif
316670eb 2248
1c79356b 2249 ip->ip_sum = 0;
9bccf70c 2250 if (sw_csum & CSUM_DELAY_IP) {
39236c6e
A
2251 ip->ip_sum = ip_cksum_hdr_out(m, hlen);
2252 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
9bccf70c 2253 }
1c79356b 2254sendorfree:
0a7de745 2255 if (error) {
b0d623f7 2256 m_freem_list(m0);
0a7de745 2257 }
1c79356b 2258
0a7de745 2259 return error;
1c79356b
A
2260}
2261
2d21ac55
A
2262static void
2263ip_out_cksum_stats(int proto, u_int32_t len)
2264{
2265 switch (proto) {
2266 case IPPROTO_TCP:
2267 tcp_out_cksum_stats(len);
2268 break;
2269 case IPPROTO_UDP:
2270 udp_out_cksum_stats(len);
2271 break;
2272 default:
2273 /* keep only TCP or UDP stats for now */
2274 break;
2275 }
2276}
2277
39236c6e
A
2278/*
2279 * Process a delayed payload checksum calculation (outbound path.)
2280 *
2281 * hoff is the number of bytes beyond the mbuf data pointer which
2282 * points to the IP header.
2283 *
2284 * Returns a bitmask representing all the work done in software.
2285 */
2286uint32_t
2287in_finalize_cksum(struct mbuf *m, uint32_t hoff, uint32_t csum_flags)
0b4e3aa0 2288{
39236c6e 2289 unsigned char buf[15 << 2] __attribute__((aligned(8)));
9bccf70c 2290 struct ip *ip;
39236c6e
A
2291 uint32_t offset, _hlen, mlen, hlen, len, sw_csum;
2292 uint16_t csum, ip_len;
6d2010ae 2293
0a7de745 2294 _CASSERT(sizeof(csum) == sizeof(uint16_t));
39236c6e 2295 VERIFY(m->m_flags & M_PKTHDR);
6d2010ae 2296
39236c6e
A
2297 sw_csum = (csum_flags & m->m_pkthdr.csum_flags);
2298
0a7de745 2299 if ((sw_csum &= (CSUM_DELAY_IP | CSUM_DELAY_DATA)) == 0) {
39236c6e 2300 goto done;
0a7de745 2301 }
39236c6e 2302
0a7de745 2303 mlen = m->m_pkthdr.len; /* total mbuf len */
39236c6e
A
2304
2305 /* sanity check (need at least simple IP header) */
0a7de745 2306 if (mlen < (hoff + sizeof(*ip))) {
39236c6e
A
2307 panic("%s: mbuf %p pkt len (%u) < hoff+ip_hdr "
2308 "(%u+%u)\n", __func__, m, mlen, hoff,
0a7de745 2309 (uint32_t)sizeof(*ip));
39236c6e 2310 /* NOTREACHED */
91447636 2311 }
316670eb
A
2312
2313 /*
39236c6e
A
2314 * In case the IP header is not contiguous, or not 32-bit aligned,
2315 * or if we're computing the IP header checksum, copy it to a local
2316 * buffer. Copy only the simple IP header here (IP options case
2317 * is handled below.)
316670eb 2318 */
0a7de745 2319 if ((sw_csum & CSUM_DELAY_IP) || (hoff + sizeof(*ip)) > m->m_len ||
39236c6e 2320 !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + hoff)) {
0a7de745 2321 m_copydata(m, hoff, sizeof(*ip), (caddr_t)buf);
316670eb 2322 ip = (struct ip *)(void *)buf;
0a7de745 2323 _hlen = sizeof(*ip);
8f6c56a5 2324 } else {
39236c6e
A
2325 ip = (struct ip *)(void *)(m->m_data + hoff);
2326 _hlen = 0;
91447636 2327 }
316670eb 2328
0a7de745 2329 hlen = IP_VHL_HL(ip->ip_vhl) << 2; /* IP header len */
316670eb 2330
39236c6e
A
2331 /* sanity check */
2332 if (mlen < (hoff + hlen)) {
2333 panic("%s: mbuf %p pkt too short (%d) for IP header (%u), "
2334 "hoff %u", __func__, m, mlen, hlen, hoff);
2335 /* NOTREACHED */
2336 }
8f6c56a5
A
2337
2338 /*
2339 * We could be in the context of an IP or interface filter; in the
2340 * former case, ip_len would be in host (correct) order while for
2341 * the latter it would be in network order. Because of this, we
2342 * attempt to interpret the length field by comparing it against
2343 * the actual packet length. If the comparison fails, byte swap
39236c6e
A
2344 * the length and check again. If it still fails, use the actual
2345 * packet length. This also covers the trailing bytes case.
8f6c56a5
A
2346 */
2347 ip_len = ip->ip_len;
39236c6e
A
2348 if (ip_len != (mlen - hoff)) {
2349 ip_len = OSSwapInt16(ip_len);
2350 if (ip_len != (mlen - hoff)) {
2351 printf("%s: mbuf 0x%llx proto %d IP len %d (%x) "
2352 "[swapped %d (%x)] doesn't match actual packet "
2353 "length; %d is used instead\n", __func__,
2354 (uint64_t)VM_KERNEL_ADDRPERM(m), ip->ip_p,
2355 ip->ip_len, ip->ip_len, ip_len, ip_len,
2356 (mlen - hoff));
2357 ip_len = mlen - hoff;
8f6c56a5
A
2358 }
2359 }
2360
0a7de745 2361 len = ip_len - hlen; /* csum span */
8f6c56a5 2362
39236c6e
A
2363 if (sw_csum & CSUM_DELAY_DATA) {
2364 uint16_t ulpoff;
2d21ac55 2365
39236c6e
A
2366 /*
2367 * offset is added to the lower 16-bit value of csum_data,
2368 * which is expected to contain the ULP offset; therefore
2369 * CSUM_PARTIAL offset adjustment must be undone.
2370 */
0a7de745
A
2371 if ((m->m_pkthdr.csum_flags & (CSUM_PARTIAL | CSUM_DATA_VALID)) ==
2372 (CSUM_PARTIAL | CSUM_DATA_VALID)) {
39236c6e
A
2373 /*
2374 * Get back the original ULP offset (this will
2375 * undo the CSUM_PARTIAL logic in ip_output.)
2376 */
2377 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_tx_stuff -
2378 m->m_pkthdr.csum_tx_start);
2379 }
8f6c56a5 2380
39236c6e 2381 ulpoff = (m->m_pkthdr.csum_data & 0xffff); /* ULP csum offset */
0a7de745 2382 offset = hoff + hlen; /* ULP header */
0b4e3aa0 2383
0a7de745 2384 if (mlen < (ulpoff + sizeof(csum))) {
39236c6e
A
2385 panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP "
2386 "cksum offset (%u) cksum flags 0x%x\n", __func__,
2387 m, mlen, ip->ip_p, ulpoff, m->m_pkthdr.csum_flags);
2388 /* NOTREACHED */
2389 }
0b4e3aa0 2390
39236c6e 2391 csum = inet_cksum(m, 0, offset, len);
316670eb 2392
39236c6e
A
2393 /* Update stats */
2394 ip_out_cksum_stats(ip->ip_p, len);
91447636 2395
39236c6e 2396 /* RFC1122 4.1.3.4 */
5ba3f43e 2397 if (csum == 0 &&
0a7de745 2398 (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_ZERO_INVERT))) {
39236c6e 2399 csum = 0xffff;
0a7de745 2400 }
91447636 2401
39236c6e
A
2402 /* Insert the checksum in the ULP csum field */
2403 offset += ulpoff;
0a7de745
A
2404 if (offset + sizeof(csum) > m->m_len) {
2405 m_copyback(m, offset, sizeof(csum), &csum);
39236c6e
A
2406 } else if (IP_HDR_ALIGNED_P(mtod(m, char *) + hoff)) {
2407 *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
2408 } else {
0a7de745 2409 bcopy(&csum, (mtod(m, char *) + offset), sizeof(csum));
91447636 2410 }
5ba3f43e
A
2411 m->m_pkthdr.csum_flags &= ~(CSUM_DELAY_DATA | CSUM_DATA_VALID |
2412 CSUM_PARTIAL | CSUM_ZERO_INVERT);
91447636 2413 }
8f6c56a5 2414
39236c6e
A
2415 if (sw_csum & CSUM_DELAY_IP) {
2416 /* IP header must be in the local buffer */
0a7de745 2417 VERIFY(_hlen == sizeof(*ip));
39236c6e 2418 if (_hlen != hlen) {
0a7de745 2419 VERIFY(hlen <= sizeof(buf));
39236c6e
A
2420 m_copydata(m, hoff, hlen, (caddr_t)buf);
2421 ip = (struct ip *)(void *)buf;
2422 _hlen = hlen;
2423 }
8f6c56a5 2424
39236c6e
A
2425 /*
2426 * Compute the IP header checksum as if the IP length
2427 * is the length which we believe is "correct"; see
2428 * how ip_len gets calculated above. Note that this
2429 * is done on the local copy and not on the real one.
2430 */
2431 ip->ip_len = htons(ip_len);
8f6c56a5 2432 ip->ip_sum = 0;
39236c6e 2433 csum = in_cksum_hdr_opt(ip);
316670eb 2434
39236c6e
A
2435 /* Update stats */
2436 ipstat.ips_snd_swcsum++;
2437 ipstat.ips_snd_swcsum_bytes += hlen;
91447636 2438
39236c6e
A
2439 /*
2440 * Insert only the checksum in the existing IP header
2441 * csum field; all other fields are left unchanged.
2442 */
2443 offset = hoff + offsetof(struct ip, ip_sum);
0a7de745
A
2444 if (offset + sizeof(csum) > m->m_len) {
2445 m_copyback(m, offset, sizeof(csum), &csum);
39236c6e
A
2446 } else if (IP_HDR_ALIGNED_P(mtod(m, char *) + hoff)) {
2447 *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
2448 } else {
0a7de745 2449 bcopy(&csum, (mtod(m, char *) + offset), sizeof(csum));
8f6c56a5 2450 }
39236c6e 2451 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
8f6c56a5
A
2452 }
2453
39236c6e 2454done:
0a7de745 2455 return sw_csum;
0b4e3aa0
A
2456}
2457
1c79356b
A
2458/*
2459 * Insert IP options into preformed packet.
2460 * Adjust IP destination as required for IP source routing,
2461 * as indicated by a non-zero in_addr at the start of the options.
2462 *
2463 * XXX This routine assumes that the packet has no options in place.
2464 */
2465static struct mbuf *
39236c6e 2466ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
1c79356b 2467{
39236c6e 2468 struct ipoption *p = mtod(opt, struct ipoption *);
1c79356b 2469 struct mbuf *n;
39236c6e 2470 struct ip *ip = mtod(m, struct ip *);
1c79356b
A
2471 unsigned optlen;
2472
0a7de745
A
2473 optlen = opt->m_len - sizeof(p->ipopt_dst);
2474 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) {
2475 return m; /* XXX should fail */
2476 }
2477 if (p->ipopt_dst.s_addr) {
1c79356b 2478 ip->ip_dst = p->ipopt_dst;
0a7de745 2479 }
1c79356b 2480 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
0a7de745
A
2481 MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */
2482 if (n == NULL) {
2483 return m;
2484 }
91447636 2485 n->m_pkthdr.rcvif = 0;
2d21ac55
A
2486#if CONFIG_MACF_NET
2487 mac_mbuf_label_copy(m, n);
39236c6e 2488#endif /* CONFIG_MACF_NET */
1c79356b 2489 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
0a7de745
A
2490 m->m_len -= sizeof(struct ip);
2491 m->m_data += sizeof(struct ip);
1c79356b
A
2492 n->m_next = m;
2493 m = n;
0a7de745 2494 m->m_len = optlen + sizeof(struct ip);
1c79356b 2495 m->m_data += max_linkhdr;
0a7de745 2496 (void) memcpy(mtod(m, void *), ip, sizeof(struct ip));
1c79356b
A
2497 } else {
2498 m->m_data -= optlen;
2499 m->m_len += optlen;
2500 m->m_pkthdr.len += optlen;
0a7de745 2501 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1c79356b
A
2502 }
2503 ip = mtod(m, struct ip *);
2504 bcopy(p->ipopt_list, ip + 1, optlen);
0a7de745 2505 *phlen = sizeof(struct ip) + optlen;
1c79356b
A
2506 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
2507 ip->ip_len += optlen;
0a7de745 2508 return m;
1c79356b
A
2509}
2510
2511/*
2512 * Copy options from ip to jp,
2513 * omitting those not copied during fragmentation.
2514 */
39236c6e
A
2515static int
2516ip_optcopy(struct ip *ip, struct ip *jp)
1c79356b 2517{
39236c6e 2518 u_char *cp, *dp;
1c79356b
A
2519 int opt, optlen, cnt;
2520
2521 cp = (u_char *)(ip + 1);
2522 dp = (u_char *)(jp + 1);
0a7de745 2523 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip);
1c79356b
A
2524 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2525 opt = cp[0];
0a7de745 2526 if (opt == IPOPT_EOL) {
1c79356b 2527 break;
0a7de745 2528 }
1c79356b
A
2529 if (opt == IPOPT_NOP) {
2530 /* Preserve for IP mcast tunnel's LSRR alignment. */
2531 *dp++ = IPOPT_NOP;
2532 optlen = 1;
2533 continue;
9bccf70c
A
2534 }
2535#if DIAGNOSTIC
0a7de745 2536 if (cnt < IPOPT_OLEN + sizeof(*cp)) {
9bccf70c 2537 panic("malformed IPv4 option passed to ip_optcopy");
39236c6e
A
2538 /* NOTREACHED */
2539 }
9bccf70c
A
2540#endif
2541 optlen = cp[IPOPT_OLEN];
2542#if DIAGNOSTIC
0a7de745 2543 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
9bccf70c 2544 panic("malformed IPv4 option passed to ip_optcopy");
39236c6e
A
2545 /* NOTREACHED */
2546 }
9bccf70c 2547#endif
1c79356b 2548 /* bogus lengths should have been caught by ip_dooptions */
0a7de745 2549 if (optlen > cnt) {
1c79356b 2550 optlen = cnt;
0a7de745 2551 }
1c79356b
A
2552 if (IPOPT_COPIED(opt)) {
2553 bcopy(cp, dp, optlen);
2554 dp += optlen;
2555 }
2556 }
0a7de745 2557 for (optlen = dp - (u_char *)(jp + 1); optlen & 0x3; optlen++) {
1c79356b 2558 *dp++ = IPOPT_EOL;
0a7de745
A
2559 }
2560 return optlen;
1c79356b
A
2561}
2562
2563/*
2564 * IP socket option processing.
2565 */
2566int
39236c6e 2567ip_ctloutput(struct socket *so, struct sockopt *sopt)
1c79356b 2568{
0a7de745
A
2569 struct inpcb *inp = sotoinpcb(so);
2570 int error, optval;
cb323159 2571 lck_mtx_t *mutex_held = NULL;
1c79356b
A
2572
2573 error = optval = 0;
0a7de745
A
2574 if (sopt->sopt_level != IPPROTO_IP) {
2575 return EINVAL;
2576 }
1c79356b
A
2577
2578 switch (sopt->sopt_dir) {
2579 case SOPT_SET:
cb323159
A
2580 mutex_held = socket_getlock(so, PR_F_WILLUNLOCK);
2581 /*
2582 * Wait if we are in the middle of ip_output
2583 * as we unlocked the socket there and don't
2584 * want to overwrite the IP options
2585 */
2586 if (inp->inp_sndinprog_cnt > 0) {
2587 inp->inp_sndingprog_waiters++;
2588
2589 while (inp->inp_sndinprog_cnt > 0) {
2590 msleep(&inp->inp_sndinprog_cnt, mutex_held,
2591 PSOCK | PCATCH, "inp_sndinprog_cnt", NULL);
2592 }
2593 inp->inp_sndingprog_waiters--;
2594 }
1c79356b 2595 switch (sopt->sopt_name) {
1c79356b
A
2596#ifdef notyet
2597 case IP_RETOPTS:
2598#endif
39236c6e 2599 case IP_OPTIONS: {
1c79356b 2600 struct mbuf *m;
39236c6e 2601
1c79356b
A
2602 if (sopt->sopt_valsize > MLEN) {
2603 error = EMSGSIZE;
2604 break;
2605 }
b0d623f7
A
2606 MGET(m, sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT,
2607 MT_HEADER);
39236c6e 2608 if (m == NULL) {
1c79356b
A
2609 error = ENOBUFS;
2610 break;
2611 }
2612 m->m_len = sopt->sopt_valsize;
39236c6e
A
2613 error = sooptcopyin(sopt, mtod(m, char *),
2614 m->m_len, m->m_len);
5ba3f43e
A
2615 if (error) {
2616 m_freem(m);
1c79356b 2617 break;
5ba3f43e 2618 }
39236c6e 2619
0a7de745
A
2620 return ip_pcbopts(sopt->sopt_name,
2621 &inp->inp_options, m);
1c79356b
A
2622 }
2623
2624 case IP_TOS:
2625 case IP_TTL:
2626 case IP_RECVOPTS:
2627 case IP_RECVRETOPTS:
2628 case IP_RECVDSTADDR:
2629 case IP_RECVIF:
55e303ae 2630 case IP_RECVTTL:
6d2010ae 2631 case IP_RECVPKTINFO:
813fb2f6 2632 case IP_RECVTOS:
0a7de745
A
2633 error = sooptcopyin(sopt, &optval, sizeof(optval),
2634 sizeof(optval));
2635 if (error) {
1c79356b 2636 break;
0a7de745 2637 }
1c79356b
A
2638
2639 switch (sopt->sopt_name) {
2640 case IP_TOS:
2641 inp->inp_ip_tos = optval;
2642 break;
2643
2644 case IP_TTL:
2645 inp->inp_ip_ttl = optval;
2646 break;
0a7de745 2647#define OPTSET(bit) \
1c79356b 2648 if (optval) \
0a7de745 2649 inp->inp_flags |= bit; \
1c79356b 2650 else \
0a7de745 2651 inp->inp_flags &= ~bit;
1c79356b
A
2652
2653 case IP_RECVOPTS:
2654 OPTSET(INP_RECVOPTS);
2655 break;
2656
2657 case IP_RECVRETOPTS:
2658 OPTSET(INP_RECVRETOPTS);
2659 break;
2660
2661 case IP_RECVDSTADDR:
2662 OPTSET(INP_RECVDSTADDR);
2663 break;
2664
2665 case IP_RECVIF:
2666 OPTSET(INP_RECVIF);
2667 break;
2668
55e303ae
A
2669 case IP_RECVTTL:
2670 OPTSET(INP_RECVTTL);
2671 break;
2672
6d2010ae
A
2673 case IP_RECVPKTINFO:
2674 OPTSET(INP_PKTINFO);
2675 break;
813fb2f6
A
2676
2677 case IP_RECVTOS:
2678 OPTSET(INP_RECVTOS);
2679 break;
2680 #undef OPTSET
1c79356b
A
2681 }
2682 break;
6d2010ae
A
2683 /*
2684 * Multicast socket options are processed by the in_mcast
2685 * module.
2686 */
1c79356b 2687 case IP_MULTICAST_IF:
6d2010ae 2688 case IP_MULTICAST_IFINDEX:
1c79356b
A
2689 case IP_MULTICAST_VIF:
2690 case IP_MULTICAST_TTL:
2691 case IP_MULTICAST_LOOP:
2692 case IP_ADD_MEMBERSHIP:
2693 case IP_DROP_MEMBERSHIP:
6d2010ae
A
2694 case IP_ADD_SOURCE_MEMBERSHIP:
2695 case IP_DROP_SOURCE_MEMBERSHIP:
2696 case IP_BLOCK_SOURCE:
2697 case IP_UNBLOCK_SOURCE:
2698 case IP_MSFILTER:
2699 case MCAST_JOIN_GROUP:
2700 case MCAST_LEAVE_GROUP:
2701 case MCAST_JOIN_SOURCE_GROUP:
2702 case MCAST_LEAVE_SOURCE_GROUP:
2703 case MCAST_BLOCK_SOURCE:
2704 case MCAST_UNBLOCK_SOURCE:
2705 error = inp_setmoptions(inp, sopt);
1c79356b
A
2706 break;
2707
2708 case IP_PORTRANGE:
0a7de745
A
2709 error = sooptcopyin(sopt, &optval, sizeof(optval),
2710 sizeof(optval));
2711 if (error) {
1c79356b 2712 break;
0a7de745 2713 }
1c79356b
A
2714
2715 switch (optval) {
2716 case IP_PORTRANGE_DEFAULT:
2717 inp->inp_flags &= ~(INP_LOWPORT);
2718 inp->inp_flags &= ~(INP_HIGHPORT);
2719 break;
2720
2721 case IP_PORTRANGE_HIGH:
2722 inp->inp_flags &= ~(INP_LOWPORT);
2723 inp->inp_flags |= INP_HIGHPORT;
2724 break;
2725
2726 case IP_PORTRANGE_LOW:
2727 inp->inp_flags &= ~(INP_HIGHPORT);
2728 inp->inp_flags |= INP_LOWPORT;
2729 break;
2730
2731 default:
2732 error = EINVAL;
2733 break;
2734 }
2735 break;
2736
2737#if IPSEC
39236c6e 2738 case IP_IPSEC_POLICY: {
1c79356b
A
2739 caddr_t req = NULL;
2740 size_t len = 0;
2741 int priv;
2742 struct mbuf *m;
2743 int optname;
39037602 2744
0a7de745 2745 if ((error = soopt_getm(sopt, &m)) != 0) { /* XXX */
1c79356b 2746 break;
0a7de745
A
2747 }
2748 if ((error = soopt_mcopyin(sopt, m)) != 0) { /* XXX */
1c79356b 2749 break;
0a7de745 2750 }
b0d623f7 2751 priv = (proc_suser(sopt->sopt_p) == 0);
1c79356b
A
2752 if (m) {
2753 req = mtod(m, caddr_t);
2754 len = m->m_len;
2755 }
2756 optname = sopt->sopt_name;
2757 error = ipsec4_set_policy(inp, optname, req, len, priv);
2758 m_freem(m);
2759 break;
2760 }
39236c6e 2761#endif /* IPSEC */
1c79356b 2762
2d21ac55 2763#if TRAFFIC_MGT
39236c6e
A
2764 case IP_TRAFFIC_MGT_BACKGROUND: {
2765 unsigned background = 0;
2766
2767 error = sooptcopyin(sopt, &background,
0a7de745
A
2768 sizeof(background), sizeof(background));
2769 if (error) {
2d21ac55 2770 break;
0a7de745 2771 }
2d21ac55 2772
d1ecb069 2773 if (background) {
6d2010ae
A
2774 socket_set_traffic_mgt_flags_locked(so,
2775 TRAFFIC_MGT_SO_BACKGROUND);
d1ecb069 2776 } else {
6d2010ae
A
2777 socket_clear_traffic_mgt_flags_locked(so,
2778 TRAFFIC_MGT_SO_BACKGROUND);
d1ecb069 2779 }
2d21ac55
A
2780
2781 break;
2782 }
2783#endif /* TRAFFIC_MGT */
2784
c910b4d9
A
2785 /*
2786 * On a multihomed system, scoped routing can be used to
2787 * restrict the source interface used for sending packets.
2788 * The socket option IP_BOUND_IF binds a particular AF_INET
2789 * socket to an interface such that data sent on the socket
2790 * is restricted to that interface. This is unlike the
2791 * SO_DONTROUTE option where the routing table is bypassed;
2792 * therefore it allows for a greater flexibility and control
2793 * over the system behavior, and does not place any restriction
2794 * on the destination address type (e.g. unicast, multicast,
2795 * or broadcast if applicable) or whether or not the host is
2796 * directly reachable. Note that in the multicast transmit
6d2010ae
A
2797 * case, IP_MULTICAST_{IF,IFINDEX} takes precedence over
2798 * IP_BOUND_IF, since the former practically bypasses the
2799 * routing table; in this case, IP_BOUND_IF sets the default
2800 * interface used for sending multicast packets in the absence
2801 * of an explicit multicast transmit interface.
c910b4d9
A
2802 */
2803 case IP_BOUND_IF:
2804 /* This option is settable only for IPv4 */
2805 if (!(inp->inp_vflag & INP_IPV4)) {
2806 error = EINVAL;
2807 break;
2808 }
2809
0a7de745
A
2810 error = sooptcopyin(sopt, &optval, sizeof(optval),
2811 sizeof(optval));
c910b4d9 2812
0a7de745 2813 if (error) {
c910b4d9 2814 break;
0a7de745 2815 }
c910b4d9 2816
39236c6e 2817 error = inp_bindif(inp, optval, NULL);
6d2010ae
A
2818 break;
2819
2820 case IP_NO_IFT_CELLULAR:
2821 /* This option is settable only for IPv4 */
2822 if (!(inp->inp_vflag & INP_IPV4)) {
2823 error = EINVAL;
2824 break;
2825 }
2826
0a7de745
A
2827 error = sooptcopyin(sopt, &optval, sizeof(optval),
2828 sizeof(optval));
6d2010ae 2829
0a7de745 2830 if (error) {
6d2010ae 2831 break;
0a7de745 2832 }
6d2010ae 2833
39236c6e 2834 /* once set, it cannot be unset */
fe8ab488 2835 if (!optval && INP_NO_CELLULAR(inp)) {
39236c6e
A
2836 error = EINVAL;
2837 break;
2838 }
2839
2840 error = so_set_restrictions(so,
2841 SO_RESTRICT_DENY_CELLULAR);
6d2010ae
A
2842 break;
2843
2844 case IP_OUT_IF:
2845 /* This option is not settable */
2846 error = EINVAL;
c910b4d9
A
2847 break;
2848
1c79356b
A
2849 default:
2850 error = ENOPROTOOPT;
2851 break;
2852 }
2853 break;
2854
2855 case SOPT_GET:
2856 switch (sopt->sopt_name) {
2857 case IP_OPTIONS:
2858 case IP_RETOPTS:
39236c6e
A
2859 if (inp->inp_options) {
2860 error = sooptcopyout(sopt,
2861 mtod(inp->inp_options, char *),
2862 inp->inp_options->m_len);
2863 } else {
1c79356b 2864 sopt->sopt_valsize = 0;
39236c6e 2865 }
1c79356b
A
2866 break;
2867
2868 case IP_TOS:
2869 case IP_TTL:
2870 case IP_RECVOPTS:
2871 case IP_RECVRETOPTS:
2872 case IP_RECVDSTADDR:
2873 case IP_RECVIF:
55e303ae 2874 case IP_RECVTTL:
1c79356b 2875 case IP_PORTRANGE:
6d2010ae 2876 case IP_RECVPKTINFO:
813fb2f6 2877 case IP_RECVTOS:
1c79356b 2878 switch (sopt->sopt_name) {
1c79356b
A
2879 case IP_TOS:
2880 optval = inp->inp_ip_tos;
2881 break;
2882
2883 case IP_TTL:
2884 optval = inp->inp_ip_ttl;
2885 break;
2886
0a7de745 2887#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1c79356b
A
2888
2889 case IP_RECVOPTS:
2890 optval = OPTBIT(INP_RECVOPTS);
2891 break;
2892
2893 case IP_RECVRETOPTS:
2894 optval = OPTBIT(INP_RECVRETOPTS);
2895 break;
2896
2897 case IP_RECVDSTADDR:
2898 optval = OPTBIT(INP_RECVDSTADDR);
2899 break;
2900
2901 case IP_RECVIF:
2902 optval = OPTBIT(INP_RECVIF);
2903 break;
2904
55e303ae
A
2905 case IP_RECVTTL:
2906 optval = OPTBIT(INP_RECVTTL);
2907 break;
2908
1c79356b 2909 case IP_PORTRANGE:
0a7de745 2910 if (inp->inp_flags & INP_HIGHPORT) {
1c79356b 2911 optval = IP_PORTRANGE_HIGH;
0a7de745 2912 } else if (inp->inp_flags & INP_LOWPORT) {
1c79356b 2913 optval = IP_PORTRANGE_LOW;
0a7de745 2914 } else {
1c79356b 2915 optval = 0;
0a7de745 2916 }
1c79356b
A
2917 break;
2918
6d2010ae
A
2919 case IP_RECVPKTINFO:
2920 optval = OPTBIT(INP_PKTINFO);
2921 break;
813fb2f6
A
2922
2923 case IP_RECVTOS:
2924 optval = OPTBIT(INP_RECVTOS);
2925 break;
1c79356b 2926 }
0a7de745 2927 error = sooptcopyout(sopt, &optval, sizeof(optval));
1c79356b
A
2928 break;
2929
2930 case IP_MULTICAST_IF:
6d2010ae 2931 case IP_MULTICAST_IFINDEX:
1c79356b
A
2932 case IP_MULTICAST_VIF:
2933 case IP_MULTICAST_TTL:
2934 case IP_MULTICAST_LOOP:
6d2010ae
A
2935 case IP_MSFILTER:
2936 error = inp_getmoptions(inp, sopt);
1c79356b
A
2937 break;
2938
2939#if IPSEC
39236c6e 2940 case IP_IPSEC_POLICY: {
fe8ab488 2941 error = 0; /* This option is no longer supported */
1c79356b
A
2942 break;
2943 }
39236c6e 2944#endif /* IPSEC */
1c79356b 2945
2d21ac55 2946#if TRAFFIC_MGT
39236c6e 2947 case IP_TRAFFIC_MGT_BACKGROUND: {
39037602
A
2948 unsigned background = (so->so_flags1 &
2949 SOF1_TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0;
0a7de745
A
2950 return sooptcopyout(sopt, &background,
2951 sizeof(background));
2d21ac55
A
2952 }
2953#endif /* TRAFFIC_MGT */
2954
c910b4d9 2955 case IP_BOUND_IF:
0a7de745 2956 if (inp->inp_flags & INP_BOUND_IF) {
316670eb 2957 optval = inp->inp_boundifp->if_index;
0a7de745
A
2958 }
2959 error = sooptcopyout(sopt, &optval, sizeof(optval));
c910b4d9
A
2960 break;
2961
6d2010ae 2962 case IP_NO_IFT_CELLULAR:
fe8ab488 2963 optval = INP_NO_CELLULAR(inp) ? 1 : 0;
0a7de745 2964 error = sooptcopyout(sopt, &optval, sizeof(optval));
6d2010ae
A
2965 break;
2966
2967 case IP_OUT_IF:
316670eb
A
2968 optval = (inp->inp_last_outifp != NULL) ?
2969 inp->inp_last_outifp->if_index : 0;
0a7de745 2970 error = sooptcopyout(sopt, &optval, sizeof(optval));
6d2010ae
A
2971 break;
2972
1c79356b
A
2973 default:
2974 error = ENOPROTOOPT;
2975 break;
2976 }
2977 break;
2978 }
0a7de745 2979 return error;
1c79356b
A
2980}
2981
2982/*
2983 * Set up IP options in pcb for insertion in output packets.
2984 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2985 * with destination address if source routed.
2986 */
2987static int
39236c6e 2988ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m)
1c79356b 2989{
39236c6e
A
2990#pragma unused(optname)
2991 int cnt, optlen;
2992 u_char *cp;
1c79356b
A
2993 u_char opt;
2994
2995 /* turn off any old options */
0a7de745 2996 if (*pcbopt) {
39236c6e 2997 (void) m_free(*pcbopt);
0a7de745 2998 }
1c79356b
A
2999 *pcbopt = 0;
3000 if (m == (struct mbuf *)0 || m->m_len == 0) {
3001 /*
3002 * Only turning off any previous options.
3003 */
0a7de745 3004 if (m) {
39236c6e 3005 (void) m_free(m);
0a7de745
A
3006 }
3007 return 0;
1c79356b
A
3008 }
3009
0a7de745 3010 if (m->m_len % sizeof(int32_t)) {
1c79356b 3011 goto bad;
0a7de745 3012 }
39236c6e 3013
1c79356b
A
3014 /*
3015 * IP first-hop destination address will be stored before
3016 * actual options; move other options back
3017 * and clear it when none present.
3018 */
0a7de745 3019 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) {
1c79356b 3020 goto bad;
0a7de745 3021 }
1c79356b 3022 cnt = m->m_len;
0a7de745
A
3023 m->m_len += sizeof(struct in_addr);
3024 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1c79356b 3025 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
0a7de745 3026 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1c79356b
A
3027
3028 for (; cnt > 0; cnt -= optlen, cp += optlen) {
3029 opt = cp[IPOPT_OPTVAL];
0a7de745 3030 if (opt == IPOPT_EOL) {
1c79356b 3031 break;
0a7de745
A
3032 }
3033 if (opt == IPOPT_NOP) {
1c79356b 3034 optlen = 1;
0a7de745
A
3035 } else {
3036 if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1c79356b 3037 goto bad;
0a7de745 3038 }
1c79356b 3039 optlen = cp[IPOPT_OLEN];
0a7de745 3040 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1c79356b 3041 goto bad;
0a7de745 3042 }
1c79356b
A
3043 }
3044 switch (opt) {
1c79356b
A
3045 default:
3046 break;
3047
3048 case IPOPT_LSRR:
3049 case IPOPT_SSRR:
3050 /*
3051 * user process specifies route as:
3052 * ->A->B->C->D
3053 * D must be our final destination (but we can't
3054 * check that since we may not have connected yet).
3055 * A is first hop destination, which doesn't appear in
3056 * actual IP option, but is stored before the options.
3057 */
0a7de745 3058 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) {
1c79356b 3059 goto bad;
0a7de745
A
3060 }
3061 m->m_len -= sizeof(struct in_addr);
3062 cnt -= sizeof(struct in_addr);
3063 optlen -= sizeof(struct in_addr);
1c79356b
A
3064 cp[IPOPT_OLEN] = optlen;
3065 /*
3066 * Move first hop before start of options.
3067 */
0a7de745
A
3068 bcopy((caddr_t)&cp[IPOPT_OFFSET + 1], mtod(m, caddr_t),
3069 sizeof(struct in_addr));
1c79356b
A
3070 /*
3071 * Then copy rest of options back
3072 * to close up the deleted entry.
3073 */
0a7de745
A
3074 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET + 1] +
3075 sizeof(struct in_addr)),
3076 (caddr_t)&cp[IPOPT_OFFSET + 1],
94ff46dc 3077 (unsigned)cnt - (IPOPT_MINOFF - 1));
1c79356b
A
3078 break;
3079 }
3080 }
0a7de745 3081 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) {
1c79356b 3082 goto bad;
0a7de745 3083 }
1c79356b 3084 *pcbopt = m;
0a7de745 3085 return 0;
1c79356b
A
3086
3087bad:
39236c6e 3088 (void) m_free(m);
0a7de745 3089 return EINVAL;
1c79356b
A
3090}
3091
6d2010ae
A
3092void
3093ip_moptions_init(void)
9bccf70c 3094{
0a7de745 3095 PE_parse_boot_argn("ifa_debug", &imo_debug, sizeof(imo_debug));
9bccf70c 3096
0a7de745
A
3097 imo_size = (imo_debug == 0) ? sizeof(struct ip_moptions) :
3098 sizeof(struct ip_moptions_dbg);
6d2010ae
A
3099
3100 imo_zone = zinit(imo_size, IMO_ZONE_MAX * imo_size, 0,
3101 IMO_ZONE_NAME);
3102 if (imo_zone == NULL) {
3103 panic("%s: failed allocating %s", __func__, IMO_ZONE_NAME);
3104 /* NOTREACHED */
9bccf70c 3105 }
6d2010ae 3106 zone_change(imo_zone, Z_EXPAND, TRUE);
9bccf70c
A
3107}
3108
6d2010ae
A
3109void
3110imo_addref(struct ip_moptions *imo, int locked)
1c79356b 3111{
0a7de745 3112 if (!locked) {
6d2010ae 3113 IMO_LOCK(imo);
0a7de745 3114 } else {
6d2010ae 3115 IMO_LOCK_ASSERT_HELD(imo);
0a7de745 3116 }
1c79356b 3117
6d2010ae
A
3118 if (++imo->imo_refcnt == 0) {
3119 panic("%s: imo %p wraparound refcnt\n", __func__, imo);
3120 /* NOTREACHED */
3121 } else if (imo->imo_trace != NULL) {
3122 (*imo->imo_trace)(imo, TRUE);
1c79356b
A
3123 }
3124
0a7de745 3125 if (!locked) {
6d2010ae 3126 IMO_UNLOCK(imo);
0a7de745 3127 }
1c79356b
A
3128}
3129
6d2010ae
A
3130void
3131imo_remref(struct ip_moptions *imo)
55e303ae 3132{
55e303ae 3133 int i;
b0d623f7 3134
6d2010ae
A
3135 IMO_LOCK(imo);
3136 if (imo->imo_refcnt == 0) {
3137 panic("%s: imo %p negative refcnt", __func__, imo);
3138 /* NOTREACHED */
3139 } else if (imo->imo_trace != NULL) {
3140 (*imo->imo_trace)(imo, FALSE);
55e303ae 3141 }
b0d623f7 3142
6d2010ae
A
3143 --imo->imo_refcnt;
3144 if (imo->imo_refcnt > 0) {
3145 IMO_UNLOCK(imo);
3146 return;
55e303ae
A
3147 }
3148
55e303ae 3149 for (i = 0; i < imo->imo_num_memberships; ++i) {
6d2010ae 3150 struct in_mfilter *imf;
b0d623f7 3151
6d2010ae 3152 imf = imo->imo_mfilters ? &imo->imo_mfilters[i] : NULL;
0a7de745 3153 if (imf != NULL) {
6d2010ae 3154 imf_leave(imf);
0a7de745 3155 }
b0d623f7 3156
6d2010ae 3157 (void) in_leavegroup(imo->imo_membership[i], imf);
55e303ae 3158
0a7de745 3159 if (imf != NULL) {
6d2010ae 3160 imf_purge(imf);
0a7de745 3161 }
55e303ae 3162
6d2010ae
A
3163 INM_REMREF(imo->imo_membership[i]);
3164 imo->imo_membership[i] = NULL;
55e303ae 3165 }
6d2010ae
A
3166 imo->imo_num_memberships = 0;
3167 if (imo->imo_mfilters != NULL) {
3168 FREE(imo->imo_mfilters, M_INMFILTER);
3169 imo->imo_mfilters = NULL;
55e303ae 3170 }
6d2010ae
A
3171 if (imo->imo_membership != NULL) {
3172 FREE(imo->imo_membership, M_IPMOPTS);
3173 imo->imo_membership = NULL;
55e303ae 3174 }
6d2010ae 3175 IMO_UNLOCK(imo);
1c79356b 3176
6d2010ae 3177 lck_mtx_destroy(&imo->imo_lock, ifa_mtx_grp);
1c79356b 3178
6d2010ae
A
3179 if (!(imo->imo_debug & IFD_ALLOC)) {
3180 panic("%s: imo %p cannot be freed", __func__, imo);
3181 /* NOTREACHED */
3182 }
3183 zfree(imo_zone, imo);
3184}
1c79356b 3185
6d2010ae
A
3186static void
3187imo_trace(struct ip_moptions *imo, int refhold)
3188{
3189 struct ip_moptions_dbg *imo_dbg = (struct ip_moptions_dbg *)imo;
3190 ctrace_t *tr;
3191 u_int32_t idx;
3192 u_int16_t *cnt;
3193
3194 if (!(imo->imo_debug & IFD_DEBUG)) {
3195 panic("%s: imo %p has no debug structure", __func__, imo);
3196 /* NOTREACHED */
3197 }
3198 if (refhold) {
3199 cnt = &imo_dbg->imo_refhold_cnt;
3200 tr = imo_dbg->imo_refhold;
3201 } else {
3202 cnt = &imo_dbg->imo_refrele_cnt;
3203 tr = imo_dbg->imo_refrele;
1c79356b 3204 }
6d2010ae
A
3205
3206 idx = atomic_add_16_ov(cnt, 1) % IMO_TRACE_HIST_SIZE;
3207 ctrace_record(&tr[idx]);
1c79356b
A
3208}
3209
6d2010ae
A
3210struct ip_moptions *
3211ip_allocmoptions(int how)
1c79356b 3212{
6d2010ae 3213 struct ip_moptions *imo;
1c79356b 3214
6d2010ae 3215 imo = (how == M_WAITOK) ? zalloc(imo_zone) : zalloc_noblock(imo_zone);
1c79356b 3216 if (imo != NULL) {
6d2010ae
A
3217 bzero(imo, imo_size);
3218 lck_mtx_init(&imo->imo_lock, ifa_mtx_grp, ifa_mtx_attr);
3219 imo->imo_debug |= IFD_ALLOC;
3220 if (imo_debug != 0) {
3221 imo->imo_debug |= IFD_DEBUG;
3222 imo->imo_trace = imo_trace;
3223 }
3224 IMO_ADDREF(imo);
1c79356b 3225 }
6d2010ae 3226
0a7de745 3227 return imo;
1c79356b
A
3228}
3229
3230/*
3231 * Routine called from ip_output() to loop back a copy of an IP multicast
3232 * packet to the input queue of a specified interface. Note that this
3233 * calls the output routine of the loopback "driver", but with an interface
3234 * pointer that might NOT be a loopback interface -- evil, but easier than
3235 * replicating that code here.
3236 */
3237static void
39236c6e
A
3238ip_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m,
3239 struct sockaddr_in *dst, int hlen)
1c79356b 3240{
1c79356b 3241 struct mbuf *copym;
39236c6e 3242 struct ip *ip;
1c79356b 3243
0a7de745 3244 if (lo_ifp == NULL) {
39236c6e 3245 return;
0a7de745 3246 }
39236c6e
A
3247
3248 /*
3249 * Copy the packet header as it's needed for the checksum
3250 * Make sure to deep-copy IP header portion in case the data
3251 * is in an mbuf cluster, so that we can safely override the IP
3252 * header portion later.
3253 */
3254 copym = m_copym_mode(m, 0, M_COPYALL, M_DONTWAIT, M_COPYM_COPY_HDR);
0a7de745 3255 if (copym != NULL && ((copym->m_flags & M_EXT) || copym->m_len < hlen)) {
1c79356b 3256 copym = m_pullup(copym, hlen);
0a7de745 3257 }
2d21ac55 3258
0a7de745 3259 if (copym == NULL) {
2d21ac55 3260 return;
0a7de745 3261 }
2d21ac55
A
3262
3263 /*
3264 * We don't bother to fragment if the IP length is greater
3265 * than the interface's MTU. Can this possibly matter?
3266 */
3267 ip = mtod(copym, struct ip *);
b0d623f7 3268#if BYTE_ORDER != BIG_ENDIAN
2d21ac55
A
3269 HTONS(ip->ip_len);
3270 HTONS(ip->ip_off);
b0d623f7 3271#endif
2d21ac55 3272 ip->ip_sum = 0;
39236c6e
A
3273 ip->ip_sum = ip_cksum_hdr_out(copym, hlen);
3274
2d21ac55 3275 /*
39236c6e
A
3276 * Mark checksum as valid unless receive checksum offload is
3277 * disabled; if so, compute checksum in software. If the
3278 * interface itself is lo0, this will be overridden by if_loop.
2d21ac55 3279 */
39236c6e 3280 if (hwcksum_rx) {
0a7de745 3281 copym->m_pkthdr.csum_flags &= ~(CSUM_PARTIAL | CSUM_ZERO_INVERT);
39236c6e
A
3282 copym->m_pkthdr.csum_flags |=
3283 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
3284 copym->m_pkthdr.csum_data = 0xffff;
3285 } else if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
b0d623f7 3286#if BYTE_ORDER != BIG_ENDIAN
39236c6e 3287 NTOHS(ip->ip_len);
b0d623f7 3288#endif
39236c6e 3289 in_delayed_cksum(copym);
b0d623f7 3290#if BYTE_ORDER != BIG_ENDIAN
39236c6e 3291 HTONS(ip->ip_len);
b0d623f7 3292#endif
39236c6e 3293 }
1c79356b 3294
2d21ac55 3295 /*
39236c6e
A
3296 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3297 * in ip_input(); we need the loopback ifp/dl_tag passed as args
3298 * to make the loopback driver compliant with the data link
3299 * requirements.
2d21ac55 3300 */
39236c6e 3301 copym->m_pkthdr.rcvif = origifp;
2d21ac55
A
3302
3303 /*
39236c6e
A
3304 * Also record the source interface (which owns the source address).
3305 * This is basically a stripped down version of ifa_foraddr().
2d21ac55 3306 */
39236c6e
A
3307 if (srcifp == NULL) {
3308 struct in_ifaddr *ia;
3309
3310 lck_rw_lock_shared(in_ifaddr_rwlock);
3311 TAILQ_FOREACH(ia, INADDR_HASH(ip->ip_src.s_addr), ia_hash) {
3312 IFA_LOCK_SPIN(&ia->ia_ifa);
3313 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_src.s_addr) {
3314 srcifp = ia->ia_ifp;
3315 IFA_UNLOCK(&ia->ia_ifa);
3316 break;
3317 }
3318 IFA_UNLOCK(&ia->ia_ifa);
3319 }
3320 lck_rw_done(in_ifaddr_rwlock);
1c79356b 3321 }
0a7de745 3322 if (srcifp != NULL) {
39236c6e 3323 ip_setsrcifaddr_info(copym, srcifp->if_index, NULL);
0a7de745 3324 }
39236c6e
A
3325 ip_setdstifaddr_info(copym, origifp->if_index, NULL);
3326
3327 dlil_output(lo_ifp, PF_INET, copym, NULL, SA(dst), 0, NULL);
1c79356b 3328}
c910b4d9
A
3329
3330/*
3331 * Given a source IP address (and route, if available), determine the best
b0d623f7
A
3332 * interface to send the packet from. Checking for (and updating) the
3333 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
3334 * without any locks based on the assumption that ip_output() is single-
3335 * threaded per-pcb, i.e. for any given pcb there can only be one thread
3336 * performing output at the IP layer.
6d2010ae
A
3337 *
3338 * This routine is analogous to in6_selectroute() for IPv6.
c910b4d9
A
3339 */
3340static struct ifaddr *
3341in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
3342{
3343 struct ifaddr *ifa = NULL;
b0d623f7
A
3344 struct in_addr src = ip->ip_src;
3345 struct in_addr dst = ip->ip_dst;
c910b4d9 3346 struct ifnet *rt_ifp;
0b4c1975 3347 char s_src[MAX_IPv4_STR_LEN], s_dst[MAX_IPv4_STR_LEN];
c910b4d9 3348
39236c6e
A
3349 VERIFY(src.s_addr != INADDR_ANY);
3350
c910b4d9 3351 if (ip_select_srcif_debug) {
0a7de745
A
3352 (void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof(s_src));
3353 (void) inet_ntop(AF_INET, &dst.s_addr, s_dst, sizeof(s_dst));
c910b4d9
A
3354 }
3355
0a7de745 3356 if (ro->ro_rt != NULL) {
b0d623f7 3357 RT_LOCK(ro->ro_rt);
0a7de745 3358 }
c910b4d9 3359
c910b4d9
A
3360 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
3361
3362 /*
3363 * Given the source IP address, find a suitable source interface
3364 * to use for transmission; if the caller has specified a scope,
3365 * optimize the search by looking at the addresses only for that
3366 * interface. This is still suboptimal, however, as we need to
3367 * traverse the per-interface list.
3368 */
3369 if (ifscope != IFSCOPE_NONE || ro->ro_rt != NULL) {
3370 unsigned int scope = ifscope;
3371
3372 /*
3373 * If no scope is specified and the route is stale (pointing
3374 * to a defunct interface) use the current primary interface;
3375 * this happens when switching between interfaces configured
3376 * with the same IP address. Otherwise pick up the scope
3377 * information from the route; the ULP may have looked up a
3378 * correct route and we just need to verify it here and mark
3379 * it with the ROF_SRCIF_SELECTED flag below.
3380 */
3381 if (scope == IFSCOPE_NONE) {
3382 scope = rt_ifp->if_index;
6d2010ae 3383 if (scope != get_primary_ifscope(AF_INET) &&
0a7de745 3384 ROUTE_UNUSABLE(ro)) {
6d2010ae 3385 scope = get_primary_ifscope(AF_INET);
0a7de745 3386 }
c910b4d9
A
3387 }
3388
b0d623f7 3389 ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope);
c910b4d9 3390
0b4c1975
A
3391 if (ifa == NULL && ip->ip_p != IPPROTO_UDP &&
3392 ip->ip_p != IPPROTO_TCP && ipforwarding) {
3393 /*
3394 * If forwarding is enabled, and if the packet isn't
3395 * TCP or UDP, check if the source address belongs
3396 * to one of our own interfaces; if so, demote the
3397 * interface scope and do a route lookup right below.
3398 */
3399 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
3400 if (ifa != NULL) {
6d2010ae 3401 IFA_REMREF(ifa);
0b4c1975
A
3402 ifa = NULL;
3403 ifscope = IFSCOPE_NONE;
3404 }
3405 }
3406
c910b4d9
A
3407 if (ip_select_srcif_debug && ifa != NULL) {
3408 if (ro->ro_rt != NULL) {
6d2010ae
A
3409 printf("%s->%s ifscope %d->%d ifa_if %s "
3410 "ro_if %s\n", s_src, s_dst, ifscope,
3411 scope, if_name(ifa->ifa_ifp),
3412 if_name(rt_ifp));
c910b4d9 3413 } else {
6d2010ae 3414 printf("%s->%s ifscope %d->%d ifa_if %s\n",
b0d623f7 3415 s_src, s_dst, ifscope, scope,
6d2010ae 3416 if_name(ifa->ifa_ifp));
c910b4d9
A
3417 }
3418 }
3419 }
3420
3421 /*
3422 * Slow path; search for an interface having the corresponding source
3423 * IP address if the scope was not specified by the caller, and:
3424 *
3425 * 1) There currently isn't any route, or,
3426 * 2) The interface used by the route does not own that source
3427 * IP address; in this case, the route will get blown away
3428 * and we'll do a more specific scoped search using the newly
3429 * found interface.
3430 */
3431 if (ifa == NULL && ifscope == IFSCOPE_NONE) {
b0d623f7 3432 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
c910b4d9 3433
0b4c1975
A
3434 /*
3435 * If we have the IP address, but not the route, we don't
3436 * really know whether or not it belongs to the correct
3437 * interface (it could be shared across multiple interfaces.)
3438 * The only way to find out is to do a route lookup.
3439 */
3440 if (ifa != NULL && ro->ro_rt == NULL) {
3441 struct rtentry *rt;
3442 struct sockaddr_in sin;
3443 struct ifaddr *oifa = NULL;
3444
0a7de745 3445 bzero(&sin, sizeof(sin));
0b4c1975 3446 sin.sin_family = AF_INET;
0a7de745 3447 sin.sin_len = sizeof(sin);
0b4c1975
A
3448 sin.sin_addr = dst;
3449
3450 lck_mtx_lock(rnh_lock);
39236c6e 3451 if ((rt = rt_lookup(TRUE, SA(&sin), NULL,
0b4c1975
A
3452 rt_tables[AF_INET], IFSCOPE_NONE)) != NULL) {
3453 RT_LOCK(rt);
3454 /*
3455 * If the route uses a different interface,
3456 * use that one instead. The IP address of
3457 * the ifaddr that we pick up here is not
3458 * relevant.
3459 */
3460 if (ifa->ifa_ifp != rt->rt_ifp) {
3461 oifa = ifa;
3462 ifa = rt->rt_ifa;
6d2010ae 3463 IFA_ADDREF(ifa);
0b4c1975
A
3464 RT_UNLOCK(rt);
3465 } else {
3466 RT_UNLOCK(rt);
3467 }
3468 rtfree_locked(rt);
3469 }
3470 lck_mtx_unlock(rnh_lock);
3471
3472 if (oifa != NULL) {
3473 struct ifaddr *iifa;
3474
3475 /*
3476 * See if the interface pointed to by the
3477 * route is configured with the source IP
3478 * address of the packet.
3479 */
3480 iifa = (struct ifaddr *)ifa_foraddr_scoped(
0a7de745 3481 src.s_addr, ifa->ifa_ifp->if_index);
0b4c1975
A
3482
3483 if (iifa != NULL) {
3484 /*
3485 * Found it; drop the original one
3486 * as well as the route interface
3487 * address, and use this instead.
3488 */
6d2010ae
A
3489 IFA_REMREF(oifa);
3490 IFA_REMREF(ifa);
0b4c1975
A
3491 ifa = iifa;
3492 } else if (!ipforwarding ||
3493 (rt->rt_flags & RTF_GATEWAY)) {
3494 /*
3495 * This interface doesn't have that
3496 * source IP address; drop the route
3497 * interface address and just use the
3498 * original one, and let the caller
3499 * do a scoped route lookup.
3500 */
6d2010ae 3501 IFA_REMREF(ifa);
0b4c1975
A
3502 ifa = oifa;
3503 } else {
3504 /*
3505 * Forwarding is enabled and the source
3506 * address belongs to one of our own
3507 * interfaces which isn't the outgoing
3508 * interface, and we have a route, and
3509 * the destination is on a network that
3510 * is directly attached (onlink); drop
3511 * the original one and use the route
3512 * interface address instead.
3513 */
6d2010ae 3514 IFA_REMREF(oifa);
0b4c1975
A
3515 }
3516 }
3517 } else if (ifa != NULL && ro->ro_rt != NULL &&
3518 !(ro->ro_rt->rt_flags & RTF_GATEWAY) &&
3519 ifa->ifa_ifp != ro->ro_rt->rt_ifp && ipforwarding) {
3520 /*
3521 * Forwarding is enabled and the source address belongs
3522 * to one of our own interfaces which isn't the same
3523 * as the interface used by the known route; drop the
3524 * original one and use the route interface address.
3525 */
6d2010ae 3526 IFA_REMREF(ifa);
0b4c1975 3527 ifa = ro->ro_rt->rt_ifa;
6d2010ae 3528 IFA_ADDREF(ifa);
0b4c1975
A
3529 }
3530
c910b4d9 3531 if (ip_select_srcif_debug && ifa != NULL) {
6d2010ae
A
3532 printf("%s->%s ifscope %d ifa_if %s\n",
3533 s_src, s_dst, ifscope, if_name(ifa->ifa_ifp));
c910b4d9
A
3534 }
3535 }
3536
0a7de745 3537 if (ro->ro_rt != NULL) {
b0d623f7 3538 RT_LOCK_ASSERT_HELD(ro->ro_rt);
0a7de745 3539 }
c910b4d9
A
3540 /*
3541 * If there is a non-loopback route with the wrong interface, or if
3542 * there is no interface configured with such an address, blow it
3543 * away. Except for local/loopback, we look for one with a matching
3544 * interface scope/index.
3545 */
3546 if (ro->ro_rt != NULL &&
3547 (ifa == NULL || (ifa->ifa_ifp != rt_ifp && rt_ifp != lo_ifp) ||
3548 !(ro->ro_rt->rt_flags & RTF_UP))) {
3549 if (ip_select_srcif_debug) {
3550 if (ifa != NULL) {
6d2010ae
A
3551 printf("%s->%s ifscope %d ro_if %s != "
3552 "ifa_if %s (cached route cleared)\n",
3553 s_src, s_dst, ifscope, if_name(rt_ifp),
3554 if_name(ifa->ifa_ifp));
c910b4d9 3555 } else {
6d2010ae 3556 printf("%s->%s ifscope %d ro_if %s "
c910b4d9 3557 "(no ifa_if found)\n",
6d2010ae 3558 s_src, s_dst, ifscope, if_name(rt_ifp));
c910b4d9
A
3559 }
3560 }
3561
b0d623f7 3562 RT_UNLOCK(ro->ro_rt);
39236c6e 3563 ROUTE_RELEASE(ro);
c910b4d9
A
3564
3565 /*
3566 * If the destination is IPv4 LLA and the route's interface
3567 * doesn't match the source interface, then the source IP
3568 * address is wrong; it most likely belongs to the primary
3569 * interface associated with the IPv4 LL subnet. Drop the
3570 * packet rather than letting it go out and return an error
3571 * to the ULP. This actually applies not only to IPv4 LL
3572 * but other shared subnets; for now we explicitly test only
3573 * for the former case and save the latter for future.
3574 */
b0d623f7
A
3575 if (IN_LINKLOCAL(ntohl(dst.s_addr)) &&
3576 !IN_LINKLOCAL(ntohl(src.s_addr)) && ifa != NULL) {
6d2010ae 3577 IFA_REMREF(ifa);
c910b4d9
A
3578 ifa = NULL;
3579 }
3580 }
3581
3582 if (ip_select_srcif_debug && ifa == NULL) {
3583 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
b0d623f7 3584 s_src, s_dst, ifscope);
c910b4d9
A
3585 }
3586
3587 /*
3588 * If there is a route, mark it accordingly. If there isn't one,
3589 * we'll get here again during the next transmit (possibly with a
3590 * route) and the flag will get set at that point. For IPv4 LLA
3591 * destination, mark it only if the route has been fully resolved;
3592 * otherwise we want to come back here again when the route points
3593 * to the interface over which the ARP reply arrives on.
3594 */
b0d623f7 3595 if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(dst.s_addr)) ||
c910b4d9
A
3596 (ro->ro_rt->rt_gateway->sa_family == AF_LINK &&
3597 SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) {
0a7de745
A
3598 if (ifa != NULL) {
3599 IFA_ADDREF(ifa); /* for route */
3600 }
3601 if (ro->ro_srcia != NULL) {
39236c6e 3602 IFA_REMREF(ro->ro_srcia);
0a7de745 3603 }
39236c6e 3604 ro->ro_srcia = ifa;
c910b4d9 3605 ro->ro_flags |= ROF_SRCIF_SELECTED;
39236c6e 3606 RT_GENID_SYNC(ro->ro_rt);
c910b4d9
A
3607 }
3608
0a7de745 3609 if (ro->ro_rt != NULL) {
b0d623f7 3610 RT_UNLOCK(ro->ro_rt);
0a7de745 3611 }
b0d623f7 3612
0a7de745 3613 return ifa;
c910b4d9 3614}
39236c6e 3615
d9a64523
A
3616/*
3617 * @brief Given outgoing interface it determines what checksum needs
0a7de745
A
3618 * to be computed in software and what needs to be offloaded to the
3619 * interface.
d9a64523
A
3620 *
3621 * @param ifp Pointer to the outgoing interface
3622 * @param m Pointer to the packet
3623 * @param hlen IP header length
3624 * @param ip_len Total packet size i.e. headers + data payload
3625 * @param sw_csum Pointer to a software checksum flag set
3626 *
3627 * @return void
3628 */
39236c6e
A
3629void
3630ip_output_checksum(struct ifnet *ifp, struct mbuf *m, int hlen, int ip_len,
3631 uint32_t *sw_csum)
3632{
3633 int tso = TSO_IPV4_OK(ifp, m);
3634 uint32_t hwcap = ifp->if_hwassist;
3635
3636 m->m_pkthdr.csum_flags |= CSUM_IP;
3637
3638 if (!hwcksum_tx) {
3639 /* do all in software; hardware checksum offload is disabled */
3640 *sw_csum = (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
3641 m->m_pkthdr.csum_flags;
3642 } else {
3643 /* do in software what the hardware cannot */
3644 *sw_csum = m->m_pkthdr.csum_flags &
3645 ~IF_HWASSIST_CSUM_FLAGS(hwcap);
3646 }
3647
0a7de745 3648 if (hlen != sizeof(struct ip)) {
39236c6e
A
3649 *sw_csum |= ((CSUM_DELAY_DATA | CSUM_DELAY_IP) &
3650 m->m_pkthdr.csum_flags);
3651 } else if (!(*sw_csum & CSUM_DELAY_DATA) && (hwcap & CSUM_PARTIAL)) {
d9a64523
A
3652 int interface_mtu = ifp->if_mtu;
3653
3654 if (INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
3655 interface_mtu = IN6_LINKMTU(ifp);
3656 /* Further adjust the size for CLAT46 expansion */
3657 interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
3658 }
3659
39236c6e
A
3660 /*
3661 * Partial checksum offload, if non-IP fragment, and TCP only
3662 * (no UDP support, as the hardware may not be able to convert
5ba3f43e
A
3663 * +0 to -0 (0xffff) per RFC1122 4.1.3.4. unless the interface
3664 * supports "invert zero" capability.)
39236c6e
A
3665 */
3666 if (hwcksum_tx && !tso &&
5ba3f43e
A
3667 ((m->m_pkthdr.csum_flags & CSUM_TCP) ||
3668 ((hwcap & CSUM_ZERO_INVERT) &&
3669 (m->m_pkthdr.csum_flags & CSUM_ZERO_INVERT))) &&
d9a64523 3670 ip_len <= interface_mtu) {
0a7de745 3671 uint16_t start = sizeof(struct ip);
39236c6e
A
3672 uint16_t ulpoff = m->m_pkthdr.csum_data & 0xffff;
3673 m->m_pkthdr.csum_flags |=
3674 (CSUM_DATA_VALID | CSUM_PARTIAL);
3675 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
3676 m->m_pkthdr.csum_tx_start = start;
3677 /* do IP hdr chksum in software */
3678 *sw_csum = CSUM_DELAY_IP;
3679 } else {
3680 *sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
3681 }
3682 }
3683
3684 if (*sw_csum & CSUM_DELAY_DATA) {
3685 in_delayed_cksum(m);
3686 *sw_csum &= ~CSUM_DELAY_DATA;
3687 }
3688
3689 if (hwcksum_tx) {
3690 /*
3691 * Drop off bits that aren't supported by hardware;
3692 * also make sure to preserve non-checksum related bits.
3693 */
3694 m->m_pkthdr.csum_flags =
3695 ((m->m_pkthdr.csum_flags &
3696 (IF_HWASSIST_CSUM_FLAGS(hwcap) | CSUM_DATA_VALID)) |
3697 (m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_MASK));
3698 } else {
3699 /* drop all bits; hardware checksum offload is disabled */
3700 m->m_pkthdr.csum_flags = 0;
3701 }
3702}
3703
3704/*
3705 * GRE protocol output for PPP/PPTP
3706 */
3707int
3708ip_gre_output(struct mbuf *m)
3709{
3710 struct route ro;
3711 int error;
3712
0a7de745 3713 bzero(&ro, sizeof(ro));
39236c6e
A
3714
3715 error = ip_output(m, NULL, &ro, 0, NULL, NULL);
3716
3717 ROUTE_RELEASE(&ro);
3718
0a7de745 3719 return error;
39236c6e 3720}
3e170ce0
A
3721
3722static int
3723sysctl_reset_ip_output_stats SYSCTL_HANDLER_ARGS
3724{
3725#pragma unused(arg1, arg2)
3726 int error, i;
3727
3728 i = ip_output_measure;
3729 error = sysctl_handle_int(oidp, &i, 0, req);
0a7de745 3730 if (error || req->newptr == USER_ADDR_NULL) {
3e170ce0 3731 goto done;
0a7de745 3732 }
3e170ce0
A
3733 /* impose bounds */
3734 if (i < 0 || i > 1) {
3735 error = EINVAL;
3736 goto done;
3737 }
3738 if (ip_output_measure != i && i == 1) {
3739 net_perf_initialize(&net_perf, ip_output_measure_bins);
3740 }
3741 ip_output_measure = i;
3742done:
0a7de745 3743 return error;
3e170ce0
A
3744}
3745
3746static int
3747sysctl_ip_output_measure_bins SYSCTL_HANDLER_ARGS
3748{
3749#pragma unused(arg1, arg2)
3750 int error;
3751 uint64_t i;
3752
3753 i = ip_output_measure_bins;
3754 error = sysctl_handle_quad(oidp, &i, 0, req);
0a7de745 3755 if (error || req->newptr == USER_ADDR_NULL) {
3e170ce0 3756 goto done;
0a7de745 3757 }
3e170ce0
A
3758 /* validate data */
3759 if (!net_perf_validate_bins(i)) {
3760 error = EINVAL;
3761 goto done;
3762 }
3763 ip_output_measure_bins = i;
3764done:
0a7de745 3765 return error;
3e170ce0
A
3766}
3767
3768static int
3769sysctl_ip_output_getperf SYSCTL_HANDLER_ARGS
3770{
3771#pragma unused(oidp, arg1, arg2)
0a7de745
A
3772 if (req->oldptr == USER_ADDR_NULL) {
3773 req->oldlen = (size_t)sizeof(struct ipstat);
3774 }
3e170ce0 3775
0a7de745 3776 return SYSCTL_OUT(req, &net_perf, MIN(sizeof(net_perf), req->oldlen));
3e170ce0 3777}