]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/ip_output.c
xnu-2050.7.9.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
CommitLineData
1c79356b 1/*
316670eb 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
9bccf70c 61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
1c79356b 62 */
2d21ac55
A
63/*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
1c79356b
A
69
70#define _IP_VHL
71
1c79356b
A
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/kernel.h>
75#include <sys/malloc.h>
76#include <sys/mbuf.h>
77#include <sys/protosw.h>
78#include <sys/socket.h>
79#include <sys/socketvar.h>
91447636
A
80#include <kern/locks.h>
81#include <sys/sysctl.h>
6d2010ae 82#include <sys/mcache.h>
1c79356b 83
b0d623f7 84#include <machine/endian.h>
6d2010ae 85#include <pexpert/pexpert.h>
b0d623f7 86
1c79356b 87#include <net/if.h>
c910b4d9 88#include <net/if_dl.h>
6d2010ae 89#include <net/if_types.h>
1c79356b 90#include <net/route.h>
6d2010ae
A
91#include <net/ntstat.h>
92#include <net/net_osdep.h>
1c79356b
A
93
94#include <netinet/in.h>
95#include <netinet/in_systm.h>
96#include <netinet/ip.h>
1c79356b
A
97#include <netinet/in_pcb.h>
98#include <netinet/in_var.h>
99#include <netinet/ip_var.h>
1c79356b 100
91447636
A
101#include <netinet/kpi_ipfilter_var.h>
102
2d21ac55
A
103#if CONFIG_MACF_NET
104#include <security/mac_framework.h>
105#endif
106
9bccf70c 107#include <net/dlil.h>
1c79356b 108#include <sys/kdebug.h>
2d21ac55 109#include <libkern/OSAtomic.h>
1c79356b
A
110
111#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
112#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
113#define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
55e303ae 114#define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
1c79356b 115
8f6c56a5 116#define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
1c79356b 117
1c79356b
A
118#if IPSEC
119#include <netinet6/ipsec.h>
120#include <netkey/key.h>
9bccf70c 121#if IPSEC_DEBUG
1c79356b 122#include <netkey/key_debug.h>
1c79356b 123#else
9bccf70c 124#define KEYDEBUG(lev,arg)
1c79356b 125#endif
9bccf70c 126#endif /*IPSEC*/
1c79356b 127
1c79356b 128#include <netinet/ip_fw.h>
91447636 129#include <netinet/ip_divert.h>
6d2010ae 130#include <mach/sdt.h>
1c79356b
A
131
132#if DUMMYNET
133#include <netinet/ip_dummynet.h>
134#endif
135
b0d623f7
A
136#if PF
137#include <net/pfvar.h>
138#endif /* PF */
139
1c79356b
A
140#if IPFIREWALL_FORWARD_DEBUG
141#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
142 (ntohl(a.s_addr)>>16)&0xFF,\
143 (ntohl(a.s_addr)>>8)&0xFF,\
144 (ntohl(a.s_addr))&0xFF);
145#endif
146
147u_short ip_id;
148
91447636 149static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
91447636
A
150static void ip_mloopback(struct ifnet *, struct mbuf *,
151 struct sockaddr_in *, int);
91447636 152static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
6d2010ae 153static void imo_trace(struct ip_moptions *, int);
9bccf70c 154
2d21ac55 155static void ip_out_cksum_stats(int, u_int32_t);
c910b4d9 156static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int);
2d21ac55 157
91447636 158int ip_optcopy(struct ip *, struct ip *);
2d21ac55
A
159void in_delayed_cksum_offset(struct mbuf *, int );
160void in_cksum_offset(struct mbuf* , size_t );
161
1c79356b
A
162extern struct protosw inetsw[];
163
9bccf70c 164extern struct ip_linklocal_stat ip_linklocal_stat;
91447636 165extern lck_mtx_t *ip_mutex;
9bccf70c
A
166
167/* temporary: for testing */
168#if IPSEC
169extern int ipsec_bypass;
170#endif
171
91447636 172static int ip_maxchainsent = 0;
6d2010ae 173SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW | CTLFLAG_LOCKED,
91447636 174 &ip_maxchainsent, 0, "use dlil_output_list");
2d21ac55
A
175#if DEBUG
176static int forge_ce = 0;
6d2010ae 177SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW | CTLFLAG_LOCKED,
2d21ac55
A
178 &forge_ce, 0, "Forge ECN CE");
179#endif /* DEBUG */
c910b4d9
A
180
181static int ip_select_srcif_debug = 0;
6d2010ae 182SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
c910b4d9
A
183 &ip_select_srcif_debug, 0, "log source interface selection debug info");
184
6d2010ae
A
185#define IMO_TRACE_HIST_SIZE 32 /* size of trace history */
186
187/* For gdb */
188__private_extern__ unsigned int imo_trace_hist_size = IMO_TRACE_HIST_SIZE;
189
190struct ip_moptions_dbg {
191 struct ip_moptions imo; /* ip_moptions */
192 u_int16_t imo_refhold_cnt; /* # of IMO_ADDREF */
193 u_int16_t imo_refrele_cnt; /* # of IMO_REMREF */
194 /*
195 * Alloc and free callers.
196 */
197 ctrace_t imo_alloc;
198 ctrace_t imo_free;
199 /*
200 * Circular lists of IMO_ADDREF and IMO_REMREF callers.
201 */
202 ctrace_t imo_refhold[IMO_TRACE_HIST_SIZE];
203 ctrace_t imo_refrele[IMO_TRACE_HIST_SIZE];
204};
205
206#if DEBUG
207static unsigned int imo_debug = 1; /* debugging (enabled) */
208#else
209static unsigned int imo_debug; /* debugging (disabled) */
210#endif /* !DEBUG */
211static unsigned int imo_size; /* size of zone element */
212static struct zone *imo_zone; /* zone for ip_moptions */
213
214#define IMO_ZONE_MAX 64 /* maximum elements in zone */
215#define IMO_ZONE_NAME "ip_moptions" /* zone name */
216
1c79356b
A
217/*
218 * IP output. The packet in mbuf chain m contains a skeletal IP
219 * header (with len, off, ttl, proto, tos, src, dst).
220 * The mbuf chain containing the packet will be freed.
221 * The mbuf opt, if present, will not be freed.
222 */
223int
91447636
A
224ip_output(
225 struct mbuf *m0,
226 struct mbuf *opt,
227 struct route *ro,
228 int flags,
2d21ac55 229 struct ip_moptions *imo,
c910b4d9 230 struct ip_out_args *ipoa)
91447636
A
231{
232 int error;
c910b4d9 233 error = ip_output_list(m0, 0, opt, ro, flags, imo, ipoa);
91447636
A
234 return error;
235}
236
2d21ac55
A
237/*
238 * Returns: 0 Success
239 * ENOMEM
240 * EADDRNOTAVAIL
241 * ENETUNREACH
242 * EHOSTUNREACH
243 * EACCES
244 * EMSGSIZE
245 * ENOBUFS
246 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
247 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
248 * key_spdacquire:??? [IPSEC]
249 * ipsec4_output:??? [IPSEC]
2d21ac55
A
250 * ip_dn_io_ptr:??? [dummynet]
251 * dlil_output:??? [DLIL]
252 * dlil_output_list:??? [DLIL]
253 *
254 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
255 * only used as the error return from this function where one of
256 * these functions fails to return a policy.
257 */
91447636
A
258int
259ip_output_list(
260 struct mbuf *m0,
261 int packetchain,
262 struct mbuf *opt,
263 struct route *ro,
264 int flags,
2d21ac55 265 struct ip_moptions *imo,
316670eb 266 struct ip_out_args *ipoa)
1c79356b 267{
b0d623f7 268 struct ip *ip;
55e303ae 269 struct ifnet *ifp = NULL;
316670eb 270 struct mbuf *m = m0, *prevnxt = NULL, **mppn = &prevnxt;
1c79356b 271 int hlen = sizeof (struct ip);
6d2010ae 272 int len = 0, error = 0;
55e303ae 273 struct sockaddr_in *dst = NULL;
b0d623f7 274 struct in_ifaddr *ia = NULL, *src_ia = NULL;
0b4e3aa0 275 int isbroadcast, sw_csum;
91447636 276 struct in_addr pkt_dst;
6d2010ae 277 struct ipf_pktopts *ippo = NULL, ipf_pktopts;
1c79356b 278#if IPSEC
ebb1b9f4
A
279 struct ipsec_output_state ipsec_state;
280 struct route *ipsec_saved_route = NULL;
9bccf70c 281 struct socket *so = NULL;
1c79356b
A
282 struct secpolicy *sp = NULL;
283#endif
284#if IPFIREWALL_FORWARD
285 int fwd_rewrite_src = 0;
286#endif
4a3eedf9 287#if IPFIREWALL
6d2010ae 288 int off;
316670eb
A
289 struct sockaddr_in *next_hop_from_ipfwd_tag = NULL;
290#endif
291#if IPFIREWALL || DUMMYNET
91447636 292 struct ip_fw_args args;
6d2010ae 293 struct m_tag *tag;
4a3eedf9 294#endif
91447636
A
295 int didfilter = 0;
296 ipfilter_t inject_filter_ref = 0;
6d2010ae 297#if DUMMYNET
2d21ac55 298 struct route saved_route;
c910b4d9 299 struct ip_out_args saved_ipoa;
6d2010ae
A
300 struct sockaddr_in dst_buf;
301#endif /* DUMMYNET */
91447636 302 struct mbuf * packetlist;
b0d623f7 303 int pktcnt = 0, tso = 0;
6d2010ae 304 u_int32_t bytecnt = 0;
316670eb
A
305 unsigned int ifscope = IFSCOPE_NONE;
306 unsigned int nocell = 0;
307 boolean_t select_srcif, srcbound;
308 struct flowadv *adv = NULL;
309
1c79356b
A
310 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
311
ebb1b9f4
A
312#if IPSEC
313 bzero(&ipsec_state, sizeof(ipsec_state));
314#endif /* IPSEC */
315
91447636 316 packetlist = m0;
316670eb
A
317#if IPFIREWALL || DUMMYNET
318 bzero(&args, sizeof(struct ip_fw_args));
b0d623f7
A
319
320 if (SLIST_EMPTY(&m0->m_pkthdr.tags))
321 goto ipfw_tags_done;
322
91447636
A
323 /* Grab info from mtags prepended to the chain */
324#if DUMMYNET
b0d623f7
A
325 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
326 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
91447636 327 struct dn_pkt_tag *dn_tag;
b0d623f7 328
91447636 329 dn_tag = (struct dn_pkt_tag *)(tag+1);
316670eb
A
330 args.fwa_ipfw_rule = dn_tag->dn_ipfw_rule;
331 args.fwa_pf_rule = dn_tag->dn_pf_rule;
91447636 332 opt = NULL;
316670eb 333 saved_route = dn_tag->dn_ro;
2d21ac55 334 ro = &saved_route;
b0d623f7 335
91447636 336 imo = NULL;
6d2010ae
A
337 bcopy(&dn_tag->dn_dst, &dst_buf, sizeof(dst_buf));
338 dst = &dst_buf;
316670eb
A
339 ifp = dn_tag->dn_ifp;
340 flags = dn_tag->dn_flags;
341 if ((dn_tag->dn_flags & IP_OUTARGS)) {
342 saved_ipoa = dn_tag->dn_ipoa;
343 ipoa = &saved_ipoa;
344 }
b0d623f7 345
91447636
A
346 m_tag_delete(m0, tag);
347 }
348#endif /* DUMMYNET */
349
2d21ac55 350#if IPDIVERT
b0d623f7
A
351 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
352 KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
91447636 353 struct divert_tag *div_tag;
b0d623f7 354
91447636 355 div_tag = (struct divert_tag *)(tag+1);
316670eb 356 args.fwa_divert_rule = div_tag->cookie;
91447636
A
357
358 m_tag_delete(m0, tag);
359 }
2d21ac55 360#endif /* IPDIVERT */
2d21ac55 361
316670eb 362#if IPFIREWALL
b0d623f7
A
363 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
364 KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
91447636 365 struct ip_fwd_tag *ipfwd_tag;
b0d623f7 366
91447636 367 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
6d2010ae 368 next_hop_from_ipfwd_tag = ipfwd_tag->next_hop;
316670eb 369
91447636
A
370 m_tag_delete(m0, tag);
371 }
4a3eedf9 372#endif /* IPFIREWALL */
91447636 373
316670eb
A
374ipfw_tags_done:
375#endif /* IPFIREWALL || DUMMYNET */
376
91447636 377 m = m0;
b0d623f7 378
91447636
A
379#if DIAGNOSTIC
380 if ( !m || (m->m_flags & M_PKTHDR) != 0)
381 panic("ip_output no HDR");
382 if (!ro)
383 panic("ip_output no route, proto = %d",
384 mtod(m, struct ip *)->ip_p);
9bccf70c 385#endif
91447636 386
6d2010ae
A
387 bzero(&ipf_pktopts, sizeof(struct ipf_pktopts));
388 ippo = &ipf_pktopts;
389
b0d623f7 390 if (ip_doscopedroute && (flags & IP_OUTARGS)) {
316670eb
A
391 /*
392 * In the forwarding case, only the ifscope value is used,
393 * as source interface selection doesn't take place.
394 */
395 if ((select_srcif = (!(flags & IP_FORWARDING) &&
396 (ipoa->ipoa_flags & IPOAF_SELECT_SRCIF)))) {
397 ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
398 }
399
400 if ((ipoa->ipoa_flags & IPOAF_BOUND_IF) &&
401 ipoa->ipoa_boundif != IFSCOPE_NONE) {
402 ifscope = ipoa->ipoa_boundif;
403 ipf_pktopts.ippo_flags |=
404 (IPPOF_BOUND_IF | (ifscope << IPPOF_SHIFT_IFSCOPE));
405 }
406
407 if ((srcbound = (ipoa->ipoa_flags & IPOAF_BOUND_SRCADDR)))
408 ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
c910b4d9
A
409 } else {
410 select_srcif = FALSE;
316670eb 411 srcbound = FALSE;
c910b4d9
A
412 ifscope = IFSCOPE_NONE;
413 }
414
316670eb
A
415 if ((flags & IP_OUTARGS) && (ipoa->ipoa_flags & IPOAF_NO_CELLULAR)) {
416 nocell = 1;
417 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
418 }
419
6d2010ae 420 if (flags & IP_OUTARGS) {
316670eb
A
421 adv = &ipoa->ipoa_flowadv;
422 adv->code = FADV_SUCCESS;
6d2010ae
A
423 }
424
316670eb
A
425#if DUMMYNET
426 if (args.fwa_ipfw_rule != NULL || args.fwa_pf_rule != NULL) {
427 /* dummynet already saw us */
b0d623f7 428 ip = mtod(m, struct ip *);
316670eb
A
429 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
430 pkt_dst = ip->ip_dst;
b0d623f7
A
431 if (ro->ro_rt != NULL) {
432 RT_LOCK_SPIN(ro->ro_rt);
433 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
6d2010ae
A
434 if (ia) {
435 /* Become a regular mutex */
436 RT_CONVERT_LOCK(ro->ro_rt);
437 IFA_ADDREF(&ia->ia_ifa);
438 }
b0d623f7
A
439 RT_UNLOCK(ro->ro_rt);
440 }
91447636 441#if IPSEC
b0d623f7
A
442 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
443 so = ipsec_getsocket(m);
444 (void)ipsec_setsocket(m, NULL);
2d21ac55 445 }
316670eb
A
446#endif /* IPSEC */
447#if IPFIREWALL
448 if (args.fwa_ipfw_rule != NULL)
449 goto skip_ipsec;
450#endif /* #if IPFIREWALL */
451 if (args.fwa_pf_rule != NULL)
452 goto sendit;
91447636 453 }
316670eb 454#endif /* DUMMYNET */
91447636 455
9bccf70c 456#if IPSEC
55e303ae 457 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
9bccf70c
A
458 so = ipsec_getsocket(m);
459 (void)ipsec_setsocket(m, NULL);
460 }
461#endif
91447636
A
462loopit:
463 /*
464 * No need to proccess packet twice if we've
465 * already seen it
466 */
b0d623f7
A
467 if (!SLIST_EMPTY(&m->m_pkthdr.tags))
468 inject_filter_ref = ipf_get_inject_filter(m);
469 else
470 inject_filter_ref = 0;
1c79356b 471
1c79356b
A
472 if (opt) {
473 m = ip_insertoptions(m, opt, &len);
474 hlen = len;
316670eb
A
475 /* Update the chain */
476 if (m != m0) {
477 if (m0 == packetlist)
478 packetlist = m;
479 m0 = m;
480 }
1c79356b
A
481 }
482 ip = mtod(m, struct ip *);
4a3eedf9 483#if IPFIREWALL
6d2010ae
A
484 /*
485 * rdar://8542331
486 *
487 * When dealing with a packet chain, we need to reset "next_hop" because
488 * "dst" may have been changed to the gateway address below for the previous
489 * packet of the chain. This could cause the route to be inavertandly changed
490 * to the route to the gateway address (instead of the route to the destination).
491 */
316670eb
A
492 args.fwa_next_hop = next_hop_from_ipfwd_tag;
493 pkt_dst = args.fwa_next_hop ? args.fwa_next_hop->sin_addr : ip->ip_dst;
4a3eedf9
A
494#else
495 pkt_dst = ip->ip_dst;
496#endif
91447636 497
6d2010ae
A
498 /*
499 * We must not send if the packet is destined to network zero.
500 * RFC1122 3.2.1.3 (a) and (b).
501 */
502 if (IN_ZERONET(ntohl(pkt_dst.s_addr))) {
503 error = EHOSTUNREACH;
504 goto bad;
505 }
506
1c79356b
A
507 /*
508 * Fill in IP header.
509 */
510 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
511 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
512 ip->ip_off &= IP_DF;
9bccf70c
A
513#if RANDOM_IP_ID
514 ip->ip_id = ip_randomid();
515#else
1c79356b 516 ip->ip_id = htons(ip_id++);
9bccf70c 517#endif
b0d623f7 518 OSAddAtomic(1, &ipstat.ips_localout);
1c79356b
A
519 } else {
520 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
521 }
316670eb 522
2d21ac55
A
523#if DEBUG
524 /* For debugging, we let the stack forge congestion */
525 if (forge_ce != 0 &&
526 ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 ||
527 (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) {
528 ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE;
529 forge_ce--;
530 }
531#endif /* DEBUG */
1c79356b
A
532
533 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
534 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
316670eb
A
535
536 dst = (struct sockaddr_in *)(void *)&ro->ro_dst;
55e303ae 537
1c79356b
A
538 /*
539 * If there is a cached route,
540 * check that it is to the same destination
541 * and is still up. If not, free it and try again.
55e303ae
A
542 * The address family should also be checked in case of sharing the
543 * cache with IPv6.
1c79356b 544 */
55e303ae 545
2d21ac55
A
546 if (ro->ro_rt != NULL) {
547 if (ro->ro_rt->generation_id != route_generation &&
548 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) &&
b0d623f7
A
549 (ip->ip_src.s_addr != INADDR_ANY)) {
550 src_ia = ifa_foraddr(ip->ip_src.s_addr);
551 if (src_ia == NULL) {
552 error = EADDRNOTAVAIL;
553 goto bad;
554 }
6d2010ae 555 IFA_REMREF(&src_ia->ia_ifa);
91447636 556 }
b0d623f7
A
557 /*
558 * Test rt_flags without holding rt_lock for performance
559 * reasons; if the route is down it will hopefully be
560 * caught by the layer below (since it uses this route
561 * as a hint) or during the next transmit.
562 */
2d21ac55
A
563 if ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
564 dst->sin_family != AF_INET ||
565 dst->sin_addr.s_addr != pkt_dst.s_addr) {
b0d623f7 566 rtfree(ro->ro_rt);
2d21ac55
A
567 ro->ro_rt = NULL;
568 }
c910b4d9
A
569 /*
570 * If we're doing source interface selection, we may not
571 * want to use this route; only synch up the generation
572 * count otherwise.
573 */
574 if (!select_srcif && ro->ro_rt != NULL &&
575 ro->ro_rt->generation_id != route_generation)
2d21ac55 576 ro->ro_rt->generation_id = route_generation;
ab86ba33 577 }
2d21ac55 578 if (ro->ro_rt == NULL) {
55e303ae 579 bzero(dst, sizeof(*dst));
1c79356b
A
580 dst->sin_family = AF_INET;
581 dst->sin_len = sizeof(*dst);
91447636 582 dst->sin_addr = pkt_dst;
1c79356b
A
583 }
584 /*
585 * If routing to interface only,
586 * short circuit routing lookup.
587 */
1c79356b 588 if (flags & IP_ROUTETOIF) {
91447636 589 if (ia)
6d2010ae 590 IFA_REMREF(&ia->ia_ifa);
91447636
A
591 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
592 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
b0d623f7 593 OSAddAtomic(1, &ipstat.ips_noroute);
91447636
A
594 error = ENETUNREACH;
595 goto bad;
596 }
1c79356b
A
597 }
598 ifp = ia->ia_ifp;
1c79356b
A
599 ip->ip_ttl = 1;
600 isbroadcast = in_broadcast(dst->sin_addr, ifp);
c910b4d9 601 } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) &&
6d2010ae 602 imo != NULL && (ifp = imo->imo_multicast_ifp) != NULL) {
c910b4d9
A
603 /*
604 * Bypass the normal routing lookup for multicast
605 * packets if the interface is specified.
606 */
c910b4d9
A
607 isbroadcast = 0;
608 if (ia != NULL)
6d2010ae 609 IFA_REMREF(&ia->ia_ifa);
c910b4d9 610
b0d623f7
A
611 /* Macro takes reference on ia */
612 IFP_TO_IA(ifp, ia);
1c79356b 613 } else {
c910b4d9
A
614 boolean_t cloneok = FALSE;
615 /*
616 * Perform source interface selection; the source IP address
617 * must belong to one of the addresses of the interface used
618 * by the route. For performance reasons, do this only if
619 * there is no route, or if the routing table has changed,
620 * or if we haven't done source interface selection on this
621 * route (for this PCB instance) before.
622 */
623 if (select_srcif && ip->ip_src.s_addr != INADDR_ANY &&
b0d623f7 624 (ro->ro_rt == NULL || !(ro->ro_rt->rt_flags & RTF_UP) ||
c910b4d9
A
625 ro->ro_rt->generation_id != route_generation ||
626 !(ro->ro_flags & ROF_SRCIF_SELECTED))) {
627 struct ifaddr *ifa;
2d21ac55 628
c910b4d9
A
629 /* Find the source interface */
630 ifa = in_selectsrcif(ip, ro, ifscope);
631
6d2010ae
A
632 /*
633 * If the source address belongs to a cellular interface
634 * and the caller forbids our using interfaces of such
635 * type, pretend that there is no source address.
636 */
637 if (nocell && ifa != NULL &&
638 ifa->ifa_ifp->if_type == IFT_CELLULAR) {
639 IFA_REMREF(ifa);
640 error = EADDRNOTAVAIL;
641 goto bad;
642 }
643
c910b4d9 644 /*
316670eb
A
645 * If the source address is spoofed (in the case of
646 * IP_RAWOUTPUT on an unbounded socket), or if this
647 * is destined for local/loopback, just let it go out
648 * using the interface of the route. Otherwise,
649 * there's no interface having such an address,
650 * so bail out.
c910b4d9 651 */
316670eb
A
652 if (ifa == NULL && (!(flags & IP_RAWOUTPUT) ||
653 srcbound) && ifscope != lo_ifp->if_index) {
c910b4d9 654 error = EADDRNOTAVAIL;
2d21ac55
A
655 goto bad;
656 }
c910b4d9
A
657
658 /*
659 * If the caller didn't explicitly specify the scope,
660 * pick it up from the source interface. If the cached
661 * route was wrong and was blown away as part of source
662 * interface selection, don't mask out RTF_PRCLONING
663 * since that route may have been allocated by the ULP,
664 * unless the IP header was created by the caller or
665 * the destination is IPv4 LLA. The check for the
666 * latter is needed because IPv4 LLAs are never scoped
667 * in the current implementation, and we don't want to
668 * replace the resolved IPv4 LLA route with one whose
669 * gateway points to that of the default gateway on
670 * the primary interface of the system.
671 */
672 if (ifa != NULL) {
673 if (ifscope == IFSCOPE_NONE)
674 ifscope = ifa->ifa_ifp->if_index;
6d2010ae 675 IFA_REMREF(ifa);
c910b4d9
A
676 cloneok = (!(flags & IP_RAWOUTPUT) &&
677 !(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))));
678 }
2d21ac55 679 }
c910b4d9 680
1c79356b
A
681 /*
682 * If this is the case, we probably don't want to allocate
683 * a protocol-cloned route since we didn't get one from the
684 * ULP. This lets TCP do its thing, while not burdening
685 * forwarding or ICMP with the overhead of cloning a route.
686 * Of course, we still want to do any cloning requested by
687 * the link layer, as this is probably required in all cases
688 * for correct operation (as it is for ARP).
689 */
c910b4d9 690 if (ro->ro_rt == NULL) {
2d21ac55
A
691 unsigned long ign = RTF_PRCLONING;
692 /*
693 * We make an exception here: if the destination
694 * address is INADDR_BROADCAST, allocate a protocol-
695 * cloned host route so that we end up with a route
696 * marked with the RTF_BROADCAST flag. Otherwise,
697 * we would end up referring to the default route,
698 * instead of creating a cloned host route entry.
699 * That would introduce inconsistencies between ULPs
700 * that allocate a route and those that don't. The
701 * RTF_BROADCAST route is important since we'd want
702 * to send out undirected IP broadcast packets using
c910b4d9
A
703 * link-level broadcast address. Another exception
704 * is for ULP-created routes that got blown away by
705 * source interface selection (see above).
2d21ac55 706 *
c910b4d9 707 * These exceptions will no longer be necessary when
2d21ac55
A
708 * the RTF_PRCLONING scheme is no longer present.
709 */
c910b4d9 710 if (cloneok || dst->sin_addr.s_addr == INADDR_BROADCAST)
2d21ac55
A
711 ign &= ~RTF_PRCLONING;
712
b0d623f7
A
713 /*
714 * Loosen the route lookup criteria if the ifscope
715 * corresponds to the loopback interface; this is
716 * needed to support Application Layer Gateways
717 * listening on loopback, in conjunction with packet
718 * filter redirection rules. The final source IP
719 * address will be rewritten by the packet filter
720 * prior to the RFC1122 loopback check below.
721 */
722 if (ifscope == lo_ifp->if_index)
723 rtalloc_ign(ro, ign);
724 else
725 rtalloc_scoped_ign(ro, ign, ifscope);
6d2010ae
A
726
727 /*
728 * If the route points to a cellular interface and the
729 * caller forbids our using interfaces of such type,
730 * pretend that there is no route.
731 */
732 if (nocell && ro->ro_rt != NULL) {
733 RT_LOCK_SPIN(ro->ro_rt);
734 if (ro->ro_rt->rt_ifp->if_type ==
735 IFT_CELLULAR) {
736 RT_UNLOCK(ro->ro_rt);
737 rtfree(ro->ro_rt);
738 ro->ro_rt = NULL;
739 } else {
740 RT_UNLOCK(ro->ro_rt);
741 }
742 }
2d21ac55 743 }
c910b4d9
A
744
745 if (ro->ro_rt == NULL) {
b0d623f7 746 OSAddAtomic(1, &ipstat.ips_noroute);
1c79356b
A
747 error = EHOSTUNREACH;
748 goto bad;
749 }
c910b4d9 750
91447636 751 if (ia)
6d2010ae 752 IFA_REMREF(&ia->ia_ifa);
b0d623f7 753 RT_LOCK_SPIN(ro->ro_rt);
1c79356b 754 ia = ifatoia(ro->ro_rt->rt_ifa);
6d2010ae
A
755 if (ia) {
756 /* Become a regular mutex */
757 RT_CONVERT_LOCK(ro->ro_rt);
758 IFA_ADDREF(&ia->ia_ifa);
759 }
1c79356b 760 ifp = ro->ro_rt->rt_ifp;
1c79356b 761 ro->ro_rt->rt_use++;
316670eb
A
762 if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
763 dst = (struct sockaddr_in *)(void *)
764 ro->ro_rt->rt_gateway;
765 }
7e4a7d39 766 if (ro->ro_rt->rt_flags & RTF_HOST) {
1c79356b 767 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
7e4a7d39
A
768 } else {
769 /* Become a regular mutex */
770 RT_CONVERT_LOCK(ro->ro_rt);
1c79356b 771 isbroadcast = in_broadcast(dst->sin_addr, ifp);
7e4a7d39 772 }
b0d623f7 773 RT_UNLOCK(ro->ro_rt);
1c79356b 774 }
b0d623f7 775
91447636 776 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
1c79356b 777 struct in_multi *inm;
6d2010ae
A
778 u_int32_t vif;
779 u_int8_t ttl = IP_DEFAULT_MULTICAST_TTL;
780 u_int8_t loop = IP_DEFAULT_MULTICAST_LOOP;
1c79356b
A
781
782 m->m_flags |= M_MCAST;
783 /*
784 * IP destination address is multicast. Make sure "dst"
785 * still points to the address in "ro". (It may have been
786 * changed to point to a gateway address, above.)
787 */
316670eb 788 dst = (struct sockaddr_in *)(void *)&ro->ro_dst;
1c79356b
A
789 /*
790 * See if the caller provided any multicast options
791 */
792 if (imo != NULL) {
6d2010ae
A
793 IMO_LOCK(imo);
794 vif = imo->imo_multicast_vif;
795 ttl = imo->imo_multicast_ttl;
796 loop = imo->imo_multicast_loop;
797 if ((flags & IP_RAWOUTPUT) == 0)
798 ip->ip_ttl = ttl;
799 if (imo->imo_multicast_ifp != NULL)
1c79356b 800 ifp = imo->imo_multicast_ifp;
6d2010ae 801 IMO_UNLOCK(imo);
2d21ac55 802#if MROUTING
6d2010ae
A
803 if (vif != -1 && ((flags & IP_RAWOUTPUT) == 0 ||
804 ip->ip_src.s_addr == INADDR_ANY))
805 ip->ip_src.s_addr = ip_mcast_src(vif);
2d21ac55 806#endif /* MROUTING */
6d2010ae
A
807 } else if ((flags & IP_RAWOUTPUT) == 0) {
808 vif = -1;
809 ip->ip_ttl = ttl;
810 }
1c79356b
A
811 /*
812 * Confirm that the outgoing interface supports multicast.
813 */
6d2010ae 814 if (imo == NULL || vif == -1) {
1c79356b 815 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
b0d623f7 816 OSAddAtomic(1, &ipstat.ips_noroute);
1c79356b
A
817 error = ENETUNREACH;
818 goto bad;
819 }
820 }
821 /*
822 * If source address not specified yet, use address
823 * of outgoing interface.
824 */
825 if (ip->ip_src.s_addr == INADDR_ANY) {
b0d623f7
A
826 struct in_ifaddr *ia1;
827 lck_rw_lock_shared(in_ifaddr_rwlock);
6d2010ae
A
828 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link) {
829 IFA_LOCK_SPIN(&ia1->ia_ifa);
1c79356b
A
830 if (ia1->ia_ifp == ifp) {
831 ip->ip_src = IA_SIN(ia1)->sin_addr;
6d2010ae 832 IFA_UNLOCK(&ia1->ia_ifa);
1c79356b
A
833 break;
834 }
6d2010ae
A
835 IFA_UNLOCK(&ia1->ia_ifa);
836 }
b0d623f7 837 lck_rw_done(in_ifaddr_rwlock);
55e303ae
A
838 if (ip->ip_src.s_addr == INADDR_ANY) {
839 error = ENETUNREACH;
840 goto bad;
841 }
1c79356b
A
842 }
843
6d2010ae
A
844 in_multihead_lock_shared();
845 IN_LOOKUP_MULTI(&pkt_dst, ifp, inm);
846 in_multihead_lock_done();
847 if (inm != NULL && (imo == NULL || loop)) {
1c79356b
A
848 /*
849 * If we belong to the destination multicast group
850 * on the outgoing interface, and the caller did not
851 * forbid loopback, loop back a copy.
852 */
91447636
A
853 if (!TAILQ_EMPTY(&ipv4_filters)) {
854 struct ipfilter *filter;
855 int seen = (inject_filter_ref == 0);
91447636 856
6d2010ae
A
857 if (imo != NULL) {
858 ipf_pktopts.ippo_flags |= IPPOF_MCAST_OPTS;
859 ipf_pktopts.ippo_mcast_ifnet = ifp;
860 ipf_pktopts.ippo_mcast_ttl = ttl;
861 ipf_pktopts.ippo_mcast_loop = loop;
91447636 862 }
6d2010ae 863
91447636 864 ipf_ref();
6d2010ae 865
0c530ab8 866 /* 4135317 - always pass network byte order to filter */
b0d623f7
A
867
868#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
869 HTONS(ip->ip_len);
870 HTONS(ip->ip_off);
b0d623f7
A
871#endif
872
91447636
A
873 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
874 if (seen == 0) {
875 if ((struct ipfilter *)inject_filter_ref == filter)
876 seen = 1;
877 } else if (filter->ipf_filter.ipf_output) {
878 errno_t result;
879 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
880 if (result == EJUSTRETURN) {
881 ipf_unref();
6d2010ae 882 INM_REMREF(inm);
91447636
A
883 goto done;
884 }
885 if (result != 0) {
886 ipf_unref();
6d2010ae 887 INM_REMREF(inm);
91447636
A
888 goto bad;
889 }
890 }
891 }
6d2010ae 892
0c530ab8 893 /* set back to host byte order */
6601e61a 894 ip = mtod(m, struct ip *);
b0d623f7
A
895
896#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
897 NTOHS(ip->ip_len);
898 NTOHS(ip->ip_off);
b0d623f7
A
899#endif
900
91447636
A
901 ipf_unref();
902 didfilter = 1;
903 }
1c79356b
A
904 ip_mloopback(ifp, m, dst, hlen);
905 }
2d21ac55 906#if MROUTING
1c79356b
A
907 else {
908 /*
909 * If we are acting as a multicast router, perform
910 * multicast forwarding as if the packet had just
911 * arrived on the interface to which we are about
912 * to send. The multicast forwarding function
913 * recursively calls this function, using the
914 * IP_FORWARDING flag to prevent infinite recursion.
915 *
916 * Multicasts that are looped back by ip_mloopback(),
917 * above, will be forwarded by the ip_input() routine,
918 * if necessary.
919 */
920 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
921 /*
922 * Check if rsvp daemon is running. If not, don't
923 * set ip_moptions. This ensures that the packet
924 * is multicast and not just sent down one link
925 * as prescribed by rsvpd.
926 */
927 if (!rsvp_on)
6d2010ae 928 imo = NULL;
1c79356b
A
929 if (ip_mforward(ip, ifp, m, imo) != 0) {
930 m_freem(m);
6d2010ae
A
931 if (inm != NULL)
932 INM_REMREF(inm);
316670eb 933 OSAddAtomic(1, &ipstat.ips_cantforward);
1c79356b
A
934 goto done;
935 }
936 }
937 }
2d21ac55 938#endif /* MROUTING */
6d2010ae
A
939 if (inm != NULL)
940 INM_REMREF(inm);
1c79356b
A
941 /*
942 * Multicasts with a time-to-live of zero may be looped-
943 * back, above, but must not be transmitted on a network.
944 * Also, multicasts addressed to the loopback interface
945 * are not sent -- the above call to ip_mloopback() will
946 * loop back a copy if this host actually belongs to the
947 * destination group on the loopback interface.
948 */
949 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
950 m_freem(m);
951 goto done;
952 }
953
954 goto sendit;
955 }
1c79356b
A
956 /*
957 * If source address not specified yet, use address
958 * of outgoing interface.
959 */
960 if (ip->ip_src.s_addr == INADDR_ANY) {
6d2010ae 961 IFA_LOCK_SPIN(&ia->ia_ifa);
1c79356b 962 ip->ip_src = IA_SIN(ia)->sin_addr;
6d2010ae 963 IFA_UNLOCK(&ia->ia_ifa);
1c79356b
A
964#if IPFIREWALL_FORWARD
965 /* Keep note that we did this - if the firewall changes
966 * the next-hop, our interface may change, changing the
967 * default source IP. It's a shame so much effort happens
968 * twice. Oh well.
969 */
970 fwd_rewrite_src++;
971#endif /* IPFIREWALL_FORWARD */
972 }
1c79356b
A
973
974 /*
975 * Look for broadcast address and
976 * and verify user is allowed to send
977 * such a packet.
978 */
979 if (isbroadcast) {
980 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
981 error = EADDRNOTAVAIL;
982 goto bad;
983 }
984 if ((flags & IP_ALLOWBROADCAST) == 0) {
985 error = EACCES;
986 goto bad;
987 }
988 /* don't allow broadcast messages to be fragmented */
989 if ((u_short)ip->ip_len > ifp->if_mtu) {
990 error = EMSGSIZE;
991 goto bad;
992 }
993 m->m_flags |= M_BCAST;
994 } else {
995 m->m_flags &= ~M_BCAST;
996 }
997
998sendit:
b0d623f7
A
999#if PF
1000 /* Invoke outbound packet filter */
316670eb 1001 if (PF_IS_ENABLED) {
6d2010ae 1002 int rc;
316670eb
A
1003
1004 m0 = m; /* Save for later */
1005#if DUMMYNET
1006 args.fwa_m = m;
1007 args.fwa_next_hop = dst;
1008 args.fwa_oif = ifp;
1009 args.fwa_ro = ro;
1010 args.fwa_dst = dst;
1011 args.fwa_oflags = flags;
1012 if (flags & IP_OUTARGS)
1013 args.fwa_ipoa = ipoa;
1014 rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, &args);
1015#else /* DUMMYNET */
1016 rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, NULL);
1017#endif /* DUMMYNET */
1018 if (rc != 0 || m == NULL) {
1019 /* Move to the next packet */
1020 m = *mppn;
1021
1022 /* Skip ahead if first packet in list got dropped */
1023 if (packetlist == m0)
6d2010ae 1024 packetlist = m;
316670eb 1025
6d2010ae
A
1026 if (m != NULL) {
1027 m0 = m;
1028 /* Next packet in the chain */
1029 goto loopit;
1030 } else if (packetlist != NULL) {
1031 /* No more packet; send down the chain */
1032 goto sendchain;
1033 }
1034 /* Nothing left; we're done */
1035 goto done;
b0d623f7 1036 }
6d2010ae
A
1037 m0 = m;
1038 ip = mtod(m, struct ip *);
1039 pkt_dst = ip->ip_dst;
1040 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
b0d623f7 1041 }
b0d623f7 1042#endif /* PF */
9bccf70c
A
1043 /*
1044 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
1045 */
1046 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
1047 ip_linklocal_stat.iplls_out_total++;
1048 if (ip->ip_ttl != MAXTTL) {
1049 ip_linklocal_stat.iplls_out_badttl++;
316670eb 1050 ip->ip_ttl = MAXTTL;
9bccf70c
A
1051 }
1052 }
1053
91447636
A
1054 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
1055 struct ipfilter *filter;
1056 int seen = (inject_filter_ref == 0);
6d2010ae
A
1057 ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS;
1058
b0d623f7
A
1059 /* Check that a TSO frame isn't passed to a filter.
1060 * This could happen if a filter is inserted while
1061 * TCP is sending the TSO packet.
1062 */
1063 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1064 error = EMSGSIZE;
1065 goto bad;
1066 }
1067
91447636 1068 ipf_ref();
316670eb 1069
0c530ab8 1070 /* 4135317 - always pass network byte order to filter */
b0d623f7
A
1071
1072#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1073 HTONS(ip->ip_len);
1074 HTONS(ip->ip_off);
b0d623f7
A
1075#endif
1076
91447636
A
1077 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
1078 if (seen == 0) {
1079 if ((struct ipfilter *)inject_filter_ref == filter)
1080 seen = 1;
1081 } else if (filter->ipf_filter.ipf_output) {
1082 errno_t result;
6d2010ae 1083 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
91447636
A
1084 if (result == EJUSTRETURN) {
1085 ipf_unref();
1086 goto done;
1087 }
1088 if (result != 0) {
1089 ipf_unref();
91447636
A
1090 goto bad;
1091 }
1092 }
1093 }
316670eb 1094
0c530ab8 1095 /* set back to host byte order */
6601e61a 1096 ip = mtod(m, struct ip *);
b0d623f7
A
1097
1098#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1099 NTOHS(ip->ip_len);
1100 NTOHS(ip->ip_off);
b0d623f7
A
1101#endif
1102
91447636 1103 ipf_unref();
91447636
A
1104 }
1105
9bccf70c
A
1106#if IPSEC
1107 /* temporary for testing only: bypass ipsec alltogether */
1108
55e303ae 1109 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
9bccf70c
A
1110 goto skip_ipsec;
1111
55e303ae
A
1112 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
1113
91447636 1114
9bccf70c
A
1115 /* get SP for this packet */
1116 if (so == NULL)
1117 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
1118 else
1119 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
1120
1121 if (sp == NULL) {
316670eb 1122 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
55e303ae 1123 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
9bccf70c
A
1124 goto bad;
1125 }
1126
1127 error = 0;
1128
1129 /* check policy */
1130 switch (sp->policy) {
1131 case IPSEC_POLICY_DISCARD:
2d21ac55 1132 case IPSEC_POLICY_GENERATE:
9bccf70c
A
1133 /*
1134 * This packet is just discarded.
1135 */
2d21ac55 1136 IPSEC_STAT_INCREMENT(ipsecstat.out_polvio);
55e303ae 1137 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
9bccf70c
A
1138 goto bad;
1139
1140 case IPSEC_POLICY_BYPASS:
1141 case IPSEC_POLICY_NONE:
1142 /* no need to do IPsec. */
55e303ae 1143 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
9bccf70c 1144 goto skip_ipsec;
316670eb 1145
9bccf70c
A
1146 case IPSEC_POLICY_IPSEC:
1147 if (sp->req == NULL) {
1148 /* acquire a policy */
1149 error = key_spdacquire(sp);
55e303ae 1150 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
9bccf70c
A
1151 goto bad;
1152 }
1153 break;
1154
1155 case IPSEC_POLICY_ENTRUST:
1156 default:
1157 printf("ip_output: Invalid policy found. %d\n", sp->policy);
1158 }
1159 {
ebb1b9f4 1160 ipsec_state.m = m;
9bccf70c 1161 if (flags & IP_ROUTETOIF) {
ebb1b9f4 1162 bzero(&ipsec_state.ro, sizeof(ipsec_state.ro));
9bccf70c 1163 } else
ebb1b9f4
A
1164 route_copyout(&ipsec_state.ro, ro, sizeof(ipsec_state.ro));
1165 ipsec_state.dst = (struct sockaddr *)dst;
9bccf70c
A
1166
1167 ip->ip_sum = 0;
1168
1169 /*
1170 * XXX
1171 * delayed checksums are not currently compatible with IPsec
1172 */
1173 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1174 in_delayed_cksum(m);
1175 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1176 }
1177
b0d623f7
A
1178
1179#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1180 HTONS(ip->ip_len);
1181 HTONS(ip->ip_off);
b0d623f7 1182#endif
9bccf70c 1183
6d2010ae
A
1184 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
1185 struct ip *, ip, struct ifnet *, ifp,
1186 struct ip *, ip, struct ip6_hdr *, NULL);
1187
ebb1b9f4 1188 error = ipsec4_output(&ipsec_state, sp, flags);
316670eb 1189
ebb1b9f4 1190 m0 = m = ipsec_state.m;
316670eb 1191
9bccf70c
A
1192 if (flags & IP_ROUTETOIF) {
1193 /*
1194 * if we have tunnel mode SA, we may need to ignore
1195 * IP_ROUTETOIF.
1196 */
ebb1b9f4 1197 if (ipsec_state.tunneled) {
9bccf70c 1198 flags &= ~IP_ROUTETOIF;
ebb1b9f4
A
1199 ipsec_saved_route = ro;
1200 ro = &ipsec_state.ro;
9bccf70c 1201 }
ebb1b9f4
A
1202 } else {
1203 ipsec_saved_route = ro;
1204 ro = &ipsec_state.ro;
1205 }
316670eb 1206 dst = (struct sockaddr_in *)(void *)ipsec_state.dst;
9bccf70c
A
1207 if (error) {
1208 /* mbuf is already reclaimed in ipsec4_output. */
1209 m0 = NULL;
1210 switch (error) {
1211 case EHOSTUNREACH:
1212 case ENETUNREACH:
1213 case EMSGSIZE:
1214 case ENOBUFS:
1215 case ENOMEM:
1216 break;
1217 default:
1218 printf("ip4_output (ipsec): error code %d\n", error);
1219 /*fall through*/
1220 case ENOENT:
1221 /* don't show these error codes to the user */
1222 error = 0;
1223 break;
1224 }
55e303ae 1225 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
9bccf70c
A
1226 goto bad;
1227 }
1228 }
1229
1230 /* be sure to update variables that are affected by ipsec4_output() */
1231 ip = mtod(m, struct ip *);
316670eb 1232
9bccf70c
A
1233#ifdef _IP_VHL
1234 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1235#else
1236 hlen = ip->ip_hl << 2;
1237#endif
55e303ae 1238 /* Check that there wasn't a route change and src is still valid */
b0d623f7
A
1239 if (ro->ro_rt != NULL && ro->ro_rt->generation_id != route_generation) {
1240 if ((src_ia = ifa_foraddr(ip->ip_src.s_addr)) == NULL &&
1241 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
1242 error = EADDRNOTAVAIL;
1243 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1244 5,0,0,0,0);
55e303ae
A
1245 goto bad;
1246 }
b0d623f7 1247 rtfree(ro->ro_rt);
55e303ae 1248 ro->ro_rt = NULL;
b0d623f7 1249 if (src_ia != NULL)
6d2010ae 1250 IFA_REMREF(&src_ia->ia_ifa);
55e303ae
A
1251 }
1252
9bccf70c
A
1253 if (ro->ro_rt == NULL) {
1254 if ((flags & IP_ROUTETOIF) == 0) {
b0d623f7
A
1255 printf("ip_output: can't update route after "
1256 "IPsec processing\n");
1257 error = EHOSTUNREACH; /*XXX*/
1258 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1259 6,0,0,0,0);
9bccf70c
A
1260 goto bad;
1261 }
1262 } else {
91447636 1263 if (ia)
6d2010ae 1264 IFA_REMREF(&ia->ia_ifa);
b0d623f7 1265 RT_LOCK_SPIN(ro->ro_rt);
9bccf70c 1266 ia = ifatoia(ro->ro_rt->rt_ifa);
6d2010ae
A
1267 if (ia) {
1268 /* Become a regular mutex */
1269 RT_CONVERT_LOCK(ro->ro_rt);
1270 IFA_ADDREF(&ia->ia_ifa);
1271 }
9bccf70c 1272 ifp = ro->ro_rt->rt_ifp;
b0d623f7 1273 RT_UNLOCK(ro->ro_rt);
9bccf70c
A
1274 }
1275
1276 /* make it flipped, again. */
b0d623f7
A
1277
1278#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1279 NTOHS(ip->ip_len);
1280 NTOHS(ip->ip_off);
b0d623f7 1281#endif
316670eb 1282
55e303ae 1283 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
316670eb 1284
91447636
A
1285 /* Pass to filters again */
1286 if (!TAILQ_EMPTY(&ipv4_filters)) {
1287 struct ipfilter *filter;
316670eb 1288
6d2010ae
A
1289 ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS;
1290
b0d623f7
A
1291 /* Check that a TSO frame isn't passed to a filter.
1292 * This could happen if a filter is inserted while
1293 * TCP is sending the TSO packet.
1294 */
1295 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1296 error = EMSGSIZE;
1297 goto bad;
1298 }
1299
91447636 1300 ipf_ref();
316670eb 1301
0c530ab8 1302 /* 4135317 - always pass network byte order to filter */
b0d623f7
A
1303
1304#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1305 HTONS(ip->ip_len);
1306 HTONS(ip->ip_off);
b0d623f7
A
1307#endif
1308
91447636
A
1309 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
1310 if (filter->ipf_filter.ipf_output) {
1311 errno_t result;
6d2010ae 1312 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
91447636
A
1313 if (result == EJUSTRETURN) {
1314 ipf_unref();
1315 goto done;
1316 }
1317 if (result != 0) {
1318 ipf_unref();
91447636
A
1319 goto bad;
1320 }
1321 }
1322 }
316670eb 1323
0c530ab8 1324 /* set back to host byte order */
6601e61a 1325 ip = mtod(m, struct ip *);
b0d623f7
A
1326
1327#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1328 NTOHS(ip->ip_len);
1329 NTOHS(ip->ip_off);
b0d623f7
A
1330#endif
1331
91447636 1332 ipf_unref();
91447636 1333 }
9bccf70c
A
1334skip_ipsec:
1335#endif /*IPSEC*/
1336
2d21ac55 1337#if IPFIREWALL
1c79356b
A
1338 /*
1339 * Check with the firewall...
91447636 1340 * but not if we are already being fwd'd from a firewall.
1c79356b 1341 */
316670eb 1342 if (fw_enable && IPFW_LOADED && !args.fwa_next_hop) {
1c79356b
A
1343 struct sockaddr_in *old = dst;
1344
316670eb
A
1345 args.fwa_m = m;
1346 args.fwa_next_hop = dst;
1347 args.fwa_oif = ifp;
91447636 1348 off = ip_fw_chk_ptr(&args);
316670eb
A
1349 m = args.fwa_m;
1350 dst = args.fwa_next_hop;
91447636 1351
1c79356b
A
1352 /*
1353 * On return we must do the following:
9bccf70c 1354 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1c79356b 1355 * 1<=off<= 0xffff -> DIVERT
9bccf70c
A
1356 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1357 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1c79356b
A
1358 * dst != old -> IPFIREWALL_FORWARD
1359 * off==0, dst==old -> accept
1360 * If some of the above modules is not compiled in, then
1361 * we should't have to check the corresponding condition
1362 * (because the ipfw control socket should not accept
1363 * unsupported rules), but better play safe and drop
1364 * packets in case of doubt.
1365 */
55e303ae 1366 m0 = m;
9bccf70c
A
1367 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
1368 if (m)
1369 m_freem(m);
1370 error = EACCES ;
1371 goto done ;
1c79356b 1372 }
9bccf70c 1373 ip = mtod(m, struct ip *);
316670eb 1374
3a60a9f5 1375 if (off == 0 && dst == old) {/* common case */
1c79356b 1376 goto pass ;
3a60a9f5 1377 }
1c79356b 1378#if DUMMYNET
316670eb 1379 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
c910b4d9
A
1380 /*
1381 * pass the pkt to dummynet. Need to include
1382 * pipe number, m, ifp, ro, dst because these are
1383 * not recomputed in the next pass.
1384 * All other parameters have been already used and
1385 * so they are not needed anymore.
1386 * XXX note: if the ifp or ro entry are deleted
1387 * while a pkt is in dummynet, we are in trouble!
1388 */
316670eb
A
1389 args.fwa_ro = ro;
1390 args.fwa_dst = dst;
1391 args.fwa_oflags = flags;
c910b4d9 1392 if (flags & IP_OUTARGS)
316670eb 1393 args.fwa_ipoa = ipoa;
c910b4d9
A
1394
1395 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
316670eb 1396 &args, DN_CLIENT_IPFW);
c910b4d9 1397 goto done;
1c79356b 1398 }
91447636 1399#endif /* DUMMYNET */
1c79356b 1400#if IPDIVERT
9bccf70c
A
1401 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
1402 struct mbuf *clone = NULL;
1403
1404 /* Clone packet if we're doing a 'tee' */
1405 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
1406 clone = m_dup(m, M_DONTWAIT);
1407 /*
1408 * XXX
1409 * delayed checksums are not currently compatible
1410 * with divert sockets.
1411 */
1412 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1413 in_delayed_cksum(m);
1414 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1415 }
1416
1417 /* Restore packet header fields to original values */
b0d623f7
A
1418
1419#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1420 HTONS(ip->ip_len);
1421 HTONS(ip->ip_off);
b0d623f7 1422#endif
9bccf70c
A
1423
1424 /* Deliver packet to divert input routine */
316670eb 1425 divert_packet(m, 0, off & 0xffff, args.fwa_divert_rule);
9bccf70c
A
1426
1427 /* If 'tee', continue with original packet */
1428 if (clone != NULL) {
55e303ae 1429 m0 = m = clone;
9bccf70c
A
1430 ip = mtod(m, struct ip *);
1431 goto pass;
1432 }
1c79356b
A
1433 goto done;
1434 }
1435#endif
1436
1437#if IPFIREWALL_FORWARD
1438 /* Here we check dst to make sure it's directly reachable on the
1439 * interface we previously thought it was.
1440 * If it isn't (which may be likely in some situations) we have
1441 * to re-route it (ie, find a route for the next-hop and the
1442 * associated interface) and set them here. This is nested
1443 * forwarding which in most cases is undesirable, except where
1444 * such control is nigh impossible. So we do it here.
1445 * And I'm babbling.
1446 */
1447 if (off == 0 && old != dst) {
91447636 1448 struct in_ifaddr *ia_fw;
1c79356b
A
1449
1450 /* It's changed... */
1451 /* There must be a better way to do this next line... */
1452 static struct route sro_fwd, *ro_fwd = &sro_fwd;
1453#if IPFIREWALL_FORWARD_DEBUG
1454 printf("IPFIREWALL_FORWARD: New dst ip: ");
1455 print_ip(dst->sin_addr);
1456 printf("\n");
1457#endif
1458 /*
1459 * We need to figure out if we have been forwarded
1460 * to a local socket. If so then we should somehow
1461 * "loop back" to ip_input, and get directed to the
1462 * PCB as if we had received this packet. This is
1463 * because it may be dificult to identify the packets
1464 * you want to forward until they are being output
1465 * and have selected an interface. (e.g. locally
1466 * initiated packets) If we used the loopback inteface,
1467 * we would not be able to control what happens
1468 * as the packet runs through ip_input() as
1469 * it is done through a ISR.
1470 */
b0d623f7 1471 lck_rw_lock_shared(in_ifaddr_rwlock);
91447636 1472 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1c79356b
A
1473 /*
1474 * If the addr to forward to is one
1475 * of ours, we pretend to
1476 * be the destination for this packet.
1477 */
6d2010ae 1478 IFA_LOCK_SPIN(&ia_fw->ia_ifa);
91447636 1479 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
6d2010ae
A
1480 dst->sin_addr.s_addr) {
1481 IFA_UNLOCK(&ia_fw->ia_ifa);
1c79356b 1482 break;
6d2010ae
A
1483 }
1484 IFA_UNLOCK(&ia_fw->ia_ifa);
1c79356b 1485 }
b0d623f7
A
1486 lck_rw_done(in_ifaddr_rwlock);
1487 if (ia_fw) {
1c79356b 1488 /* tell ip_input "dont filter" */
91447636
A
1489 struct m_tag *fwd_tag;
1490 struct ip_fwd_tag *ipfwd_tag;
6d2010ae
A
1491
1492 fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID,
b0d623f7 1493 KERNEL_TAG_TYPE_IPFORWARD,
6d2010ae 1494 sizeof (*ipfwd_tag), M_NOWAIT, m);
91447636
A
1495 if (fwd_tag == NULL) {
1496 error = ENOBUFS;
1497 goto bad;
1498 }
6d2010ae 1499
91447636 1500 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
316670eb 1501 ipfwd_tag->next_hop = args.fwa_next_hop;
91447636
A
1502
1503 m_tag_prepend(m, fwd_tag);
1504
1c79356b 1505 if (m->m_pkthdr.rcvif == NULL)
6d2010ae 1506 m->m_pkthdr.rcvif = lo_ifp;
91447636
A
1507 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1508 m->m_pkthdr.csum_flags) == 0) {
1509 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1510 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1511 m->m_pkthdr.csum_flags |=
1512 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1513 m->m_pkthdr.csum_data = 0xffff;
1514 }
9bccf70c 1515 m->m_pkthdr.csum_flags |=
91447636
A
1516 CSUM_IP_CHECKED | CSUM_IP_VALID;
1517 }
1518 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1519 in_delayed_cksum(m);
1520 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1521 ip->ip_sum = in_cksum(m, hlen);
9bccf70c 1522 }
b0d623f7
A
1523
1524#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1525 HTONS(ip->ip_len);
1526 HTONS(ip->ip_off);
316670eb
A
1527#endif
1528
91447636
A
1529 /* we need to call dlil_output to run filters
1530 * and resync to avoid recursion loops.
1531 */
1532 if (lo_ifp) {
316670eb
A
1533 dlil_output(lo_ifp, PF_INET, m, 0,
1534 (struct sockaddr *)dst, 0, adv);
91447636
A
1535 }
1536 else {
1537 printf("ip_output: no loopback ifp for forwarding!!!\n");
1538 }
1c79356b
A
1539 goto done;
1540 }
1541 /* Some of the logic for this was
1542 * nicked from above.
1543 *
1544 * This rewrites the cached route in a local PCB.
1545 * Is this what we want to do?
1546 */
1547 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1548
b0d623f7
A
1549 ro_fwd->ro_rt = NULL;
1550 rtalloc_ign(ro_fwd, RTF_PRCLONING);
1c79356b 1551
b0d623f7
A
1552 if (ro_fwd->ro_rt == NULL) {
1553 OSAddAtomic(1, &ipstat.ips_noroute);
1c79356b
A
1554 error = EHOSTUNREACH;
1555 goto bad;
1556 }
1557
b0d623f7 1558 RT_LOCK_SPIN(ro_fwd->ro_rt);
91447636 1559 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
6d2010ae
A
1560 if (ia_fw != NULL) {
1561 /* Become a regular mutex */
1562 RT_CONVERT_LOCK(ro_fwd->ro_rt);
1563 IFA_ADDREF(&ia_fw->ia_ifa);
1564 }
1c79356b 1565 ifp = ro_fwd->ro_rt->rt_ifp;
1c79356b
A
1566 ro_fwd->ro_rt->rt_use++;
1567 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
316670eb 1568 dst = (struct sockaddr_in *)(void *)ro_fwd->ro_rt->rt_gateway;
7e4a7d39 1569 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) {
1c79356b
A
1570 isbroadcast =
1571 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
7e4a7d39
A
1572 } else {
1573 /* Become a regular mutex */
1574 RT_CONVERT_LOCK(ro_fwd->ro_rt);
1c79356b 1575 isbroadcast = in_broadcast(dst->sin_addr, ifp);
7e4a7d39 1576 }
b0d623f7
A
1577 RT_UNLOCK(ro_fwd->ro_rt);
1578 rtfree(ro->ro_rt);
1c79356b 1579 ro->ro_rt = ro_fwd->ro_rt;
316670eb 1580 dst = (struct sockaddr_in *)(void *)&ro_fwd->ro_dst;
1c79356b
A
1581
1582 /*
1583 * If we added a default src ip earlier,
1584 * which would have been gotten from the-then
1585 * interface, do it again, from the new one.
1586 */
b0d623f7 1587 if (ia_fw != NULL) {
6d2010ae
A
1588 if (fwd_rewrite_src) {
1589 IFA_LOCK_SPIN(&ia_fw->ia_ifa);
b0d623f7 1590 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
6d2010ae
A
1591 IFA_UNLOCK(&ia_fw->ia_ifa);
1592 }
1593 IFA_REMREF(&ia_fw->ia_ifa);
b0d623f7 1594 }
1c79356b
A
1595 goto pass ;
1596 }
1597#endif /* IPFIREWALL_FORWARD */
1598 /*
1599 * if we get here, none of the above matches, and
1600 * we have to drop the pkt
1601 */
1602 m_freem(m);
91447636 1603 error = EACCES; /* not sure this is the right error msg */
91447636 1604 goto done;
1c79356b 1605 }
1c79356b
A
1606
1607pass:
6d2010ae 1608#endif /* IPFIREWALL */
e5568f75
A
1609#if __APPLE__
1610 /* Do not allow loopback address to wind up on a wire */
1611 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1612 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1613 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
b0d623f7 1614 OSAddAtomic(1, &ipstat.ips_badaddr);
e5568f75 1615 m_freem(m);
91447636
A
1616 /*
1617 * Do not simply drop the packet just like a firewall -- we want the
1618 * the application to feel the pain.
1619 * Return ENETUNREACH like ip6_output does in some similar cases.
1620 * This can startle the otherwise clueless process that specifies
e5568f75
A
1621 * loopback as the source address.
1622 */
91447636 1623 error = ENETUNREACH;
e5568f75
A
1624 goto done;
1625 }
1626#endif
9bccf70c 1627 m->m_pkthdr.csum_flags |= CSUM_IP;
b0d623f7 1628 tso = (ifp->if_hwassist & IFNET_TSO_IPV4) && (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4);
316670eb 1629
4a249263
A
1630 sw_csum = m->m_pkthdr.csum_flags
1631 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1c79356b 1632
9bccf70c 1633 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1c79356b 1634 /*
9bccf70c
A
1635 * Special case code for GMACE
1636 * frames that can be checksumed by GMACE SUM16 HW:
1637 * frame >64, no fragments, no UDP
1c79356b 1638 */
9bccf70c
A
1639 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1640 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1641 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1642 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1643 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
316670eb 1644 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
9bccf70c
A
1645 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1646 m->m_pkthdr.csum_data += offset;
316670eb
A
1647 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1648 } else {
9bccf70c
A
1649 /* let the software handle any UDP or TCP checksums */
1650 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1c79356b 1651 }
2d21ac55
A
1652 } else if (apple_hwcksum_tx == 0) {
1653 sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
1654 m->m_pkthdr.csum_flags;
1c79356b 1655 }
316670eb 1656
9bccf70c
A
1657 if (sw_csum & CSUM_DELAY_DATA) {
1658 in_delayed_cksum(m);
1659 sw_csum &= ~CSUM_DELAY_DATA;
1660 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
0b4e3aa0 1661 }
2d21ac55
A
1662
1663 if (apple_hwcksum_tx != 0) {
1664 m->m_pkthdr.csum_flags &=
1665 IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1666 } else {
1667 m->m_pkthdr.csum_flags = 0;
1668 }
0b4e3aa0 1669
1c79356b 1670 /*
9bccf70c 1671 * If small enough for interface, or the interface will take
0b4e3aa0 1672 * care of the fragmentation for us, can just send directly.
1c79356b 1673 */
b0d623f7 1674 if ((u_short)ip->ip_len <= ifp->if_mtu || tso ||
9bccf70c 1675 ifp->if_hwassist & CSUM_FRAGMENT) {
316670eb 1676 if (tso)
b0d623f7 1677 m->m_pkthdr.csum_flags |= CSUM_TSO_IPV4;
316670eb 1678
2d21ac55 1679
b0d623f7 1680#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1681 HTONS(ip->ip_len);
1682 HTONS(ip->ip_off);
b0d623f7 1683#endif
316670eb 1684
1c79356b 1685 ip->ip_sum = 0;
9bccf70c 1686 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1687 ip->ip_sum = in_cksum(m, hlen);
9bccf70c 1688 }
316670eb 1689
9bccf70c
A
1690#ifndef __APPLE__
1691 /* Record statistics for this interface address. */
1692 if (!(flags & IP_FORWARDING) && ia != NULL) {
1693 ia->ia_ifa.if_opackets++;
1694 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1695 }
1696#endif
1697
1698#if IPSEC
1699 /* clean ipsec history once it goes out of the node */
55e303ae 1700 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
9bccf70c
A
1701 ipsec_delaux(m);
1702#endif
91447636 1703 if (packetchain == 0) {
6d2010ae
A
1704 if (ro->ro_rt && nstat_collect)
1705 nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0);
316670eb
A
1706 error = dlil_output(ifp, PF_INET, m, ro->ro_rt,
1707 (struct sockaddr *)dst, 0, adv);
2d21ac55 1708 goto done;
91447636
A
1709 }
1710 else { /* packet chaining allows us to reuse the route for all packets */
6d2010ae 1711 bytecnt += m->m_pkthdr.len;
b0d623f7 1712 mppn = &m->m_nextpkt;
91447636
A
1713 m = m->m_nextpkt;
1714 if (m == NULL) {
b0d623f7
A
1715#if PF
1716sendchain:
1717#endif /* PF */
91447636
A
1718 if (pktcnt > ip_maxchainsent)
1719 ip_maxchainsent = pktcnt;
6d2010ae
A
1720 if (ro->ro_rt && nstat_collect)
1721 nstat_route_tx(ro->ro_rt, pktcnt, bytecnt, 0);
91447636 1722 //send
316670eb
A
1723 error = dlil_output(ifp, PF_INET, packetlist,
1724 ro->ro_rt, (struct sockaddr *)dst, 0, adv);
91447636 1725 pktcnt = 0;
6d2010ae 1726 bytecnt = 0;
91447636 1727 goto done;
316670eb 1728
91447636
A
1729 }
1730 m0 = m;
1731 pktcnt++;
1732 goto loopit;
1733 }
1c79356b
A
1734 }
1735 /*
1736 * Too large for interface; fragment if possible.
1737 * Must be able to put at least 8 bytes per fragment.
1738 */
b0d623f7 1739
6d2010ae
A
1740 if (ip->ip_off & IP_DF || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) ||
1741 pktcnt > 0) {
1c79356b
A
1742 error = EMSGSIZE;
1743 /*
1744 * This case can happen if the user changed the MTU
1745 * of an interface after enabling IP on it. Because
1746 * most netifs don't keep track of routes pointing to
1747 * them, there is no way for one to update all its
1748 * routes when the MTU is changed.
1749 */
6d2010ae
A
1750 if (ro->ro_rt) {
1751 RT_LOCK_SPIN(ro->ro_rt);
1752 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1753 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1754 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1755 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1756 }
1757 RT_UNLOCK(ro->ro_rt);
1758 }
1759 if (pktcnt > 0) {
1760 m0 = packetlist;
1c79356b 1761 }
b0d623f7 1762 OSAddAtomic(1, &ipstat.ips_cantfrag);
1c79356b
A
1763 goto bad;
1764 }
b0d623f7
A
1765
1766 error = ip_fragment(m, ifp, ifp->if_mtu, sw_csum);
1767 if (error != 0) {
1768 m0 = m = NULL;
1c79356b
A
1769 goto bad;
1770 }
1771
b0d623f7
A
1772 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1773 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1774
1775 for (m = m0; m; m = m0) {
1776 m0 = m->m_nextpkt;
1777 m->m_nextpkt = 0;
1778#if IPSEC
1779 /* clean ipsec history once it goes out of the node */
1780 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1781 ipsec_delaux(m);
1782#endif
1783 if (error == 0) {
1784#ifndef __APPLE__
1785 /* Record statistics for this interface address. */
1786 if (ia != NULL) {
1787 ia->ia_ifa.if_opackets++;
1788 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1789 }
1790#endif
1791 if ((packetchain != 0) && (pktcnt > 0))
1792 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
6d2010ae
A
1793 if (ro->ro_rt && nstat_collect)
1794 nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0);
316670eb
A
1795 error = dlil_output(ifp, PF_INET, m, ro->ro_rt,
1796 (struct sockaddr *)dst, 0, adv);
b0d623f7
A
1797 } else
1798 m_freem(m);
1799 }
1800
1801 if (error == 0)
1802 OSAddAtomic(1, &ipstat.ips_fragmented);
1803
1804done:
1805 if (ia) {
6d2010ae 1806 IFA_REMREF(&ia->ia_ifa);
b0d623f7
A
1807 ia = NULL;
1808 }
1809#if IPSEC
1810 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
ebb1b9f4
A
1811 if (ipsec_state.ro.ro_rt)
1812 rtfree(ipsec_state.ro.ro_rt);
b0d623f7
A
1813 if (sp != NULL) {
1814 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1815 printf("DP ip_output call free SP:%x\n", sp));
1816 key_freesp(sp, KEY_SADB_UNLOCKED);
1817 }
1818 }
1819#endif /* IPSEC */
1820
1821 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1822 return (error);
1823bad:
1824 m_freem(m0);
1825 goto done;
1826}
1827
1828int
1829ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum)
1830{
1831 struct ip *ip, *mhip;
1832 int len, hlen, mhlen, firstlen, off, error = 0;
1833 struct mbuf **mnext = &m->m_nextpkt, *m0;
1834 int nfrags = 1;
1835
1836 ip = mtod(m, struct ip *);
1837#ifdef _IP_VHL
1838 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1839#else
1840 hlen = ip->ip_hl << 2;
1841#endif
1842
1843 firstlen = len = (mtu - hlen) &~ 7;
1844 if (len < 8) {
1845 m_freem(m);
1846 return (EMSGSIZE);
1847 }
1848
9bccf70c
A
1849 /*
1850 * if the interface will not calculate checksums on
1851 * fragmented packets, then do it here.
1852 */
1853 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1854 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1855 in_delayed_cksum(m);
9bccf70c
A
1856 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1857 }
0b4e3aa0 1858
1c79356b
A
1859 /*
1860 * Loop through length of segment after first fragment,
1861 * make new header and copy data of each part and link onto chain.
1862 */
1863 m0 = m;
1864 mhlen = sizeof (struct ip);
1865 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
2d21ac55 1866 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1c79356b
A
1867 if (m == 0) {
1868 error = ENOBUFS;
b0d623f7 1869 OSAddAtomic(1, &ipstat.ips_odropped);
1c79356b
A
1870 goto sendorfree;
1871 }
0b4e3aa0 1872 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1c79356b
A
1873 m->m_data += max_linkhdr;
1874 mhip = mtod(m, struct ip *);
1875 *mhip = *ip;
1876 if (hlen > sizeof (struct ip)) {
1877 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1878 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1879 }
1880 m->m_len = mhlen;
1881 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1882 if (ip->ip_off & IP_MF)
1883 mhip->ip_off |= IP_MF;
1884 if (off + len >= (u_short)ip->ip_len)
1885 len = (u_short)ip->ip_len - off;
1886 else
1887 mhip->ip_off |= IP_MF;
1888 mhip->ip_len = htons((u_short)(len + mhlen));
1889 m->m_next = m_copy(m0, off, len);
1890 if (m->m_next == 0) {
1891 (void) m_free(m);
1892 error = ENOBUFS; /* ??? */
b0d623f7 1893 OSAddAtomic(1, &ipstat.ips_odropped);
1c79356b
A
1894 goto sendorfree;
1895 }
1896 m->m_pkthdr.len = mhlen + len;
91447636 1897 m->m_pkthdr.rcvif = 0;
9bccf70c 1898 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
91447636 1899 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
316670eb
A
1900
1901 M_COPY_PFTAG(m, m0);
1902 m_set_service_class(m, m0->m_pkthdr.svc);
1903
2d21ac55
A
1904#if CONFIG_MACF_NET
1905 mac_netinet_fragment(m0, m);
1906#endif
b0d623f7
A
1907
1908#if BYTE_ORDER != BIG_ENDIAN
9bccf70c 1909 HTONS(mhip->ip_off);
b0d623f7
A
1910#endif
1911
1c79356b 1912 mhip->ip_sum = 0;
9bccf70c 1913 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1914 mhip->ip_sum = in_cksum(m, mhlen);
9bccf70c 1915 }
1c79356b
A
1916 *mnext = m;
1917 mnext = &m->m_nextpkt;
0b4e3aa0 1918 nfrags++;
1c79356b 1919 }
b0d623f7 1920 OSAddAtomic(nfrags, &ipstat.ips_ofragments);
0b4e3aa0
A
1921
1922 /* set first/last markers for fragment chain */
9bccf70c
A
1923 m->m_flags |= M_LASTFRAG;
1924 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
0b4e3aa0
A
1925 m0->m_pkthdr.csum_data = nfrags;
1926
1c79356b
A
1927 /*
1928 * Update first fragment by trimming what's been copied out
1929 * and updating header, then send each fragment (in order).
1930 */
1931 m = m0;
1932 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1933 m->m_pkthdr.len = hlen + firstlen;
1934 ip->ip_len = htons((u_short)m->m_pkthdr.len);
9bccf70c 1935 ip->ip_off |= IP_MF;
b0d623f7
A
1936
1937#if BYTE_ORDER != BIG_ENDIAN
9bccf70c 1938 HTONS(ip->ip_off);
b0d623f7 1939#endif
316670eb 1940
1c79356b 1941 ip->ip_sum = 0;
9bccf70c 1942 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1943 ip->ip_sum = in_cksum(m, hlen);
9bccf70c 1944 }
1c79356b 1945sendorfree:
b0d623f7
A
1946 if (error)
1947 m_freem_list(m0);
1c79356b 1948
1c79356b 1949 return (error);
1c79356b
A
1950}
1951
2d21ac55
A
1952static void
1953ip_out_cksum_stats(int proto, u_int32_t len)
1954{
1955 switch (proto) {
1956 case IPPROTO_TCP:
1957 tcp_out_cksum_stats(len);
1958 break;
1959 case IPPROTO_UDP:
1960 udp_out_cksum_stats(len);
1961 break;
1962 default:
1963 /* keep only TCP or UDP stats for now */
1964 break;
1965 }
1966}
1967
0b4e3aa0 1968void
8f6c56a5 1969in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
0b4e3aa0 1970{
9bccf70c 1971 struct ip *ip;
8f6c56a5
A
1972 unsigned char buf[sizeof(struct ip)];
1973 u_short csum, offset, ip_len;
6d2010ae
A
1974
1975 /* Save copy of first mbuf pointer and the ip_offset before modifying */
1976 struct mbuf *m = m0;
1977 int ip_offset_copy = ip_offset;
1978
8f6c56a5 1979 while (ip_offset >= m->m_len) {
91447636
A
1980 ip_offset -= m->m_len;
1981 m = m->m_next;
8f6c56a5 1982 if (m == NULL) {
316670eb
A
1983 printf("in_delayed_cksum_withoffset failed - "
1984 "ip_offset wasn't in the packet\n");
91447636
A
1985 return;
1986 }
1987 }
316670eb
A
1988
1989 /*
1990 * In case the IP header is not contiguous, or not 32-bit
1991 * aligned, copy it to a local buffer.
1992 */
1993 if ((ip_offset + sizeof(struct ip) > m->m_len) ||
1994 !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
1995#if DEBUG
b0d623f7 1996 printf("delayed m_pullup, m->len: %d off: %d\n",
743b1565 1997 m->m_len, ip_offset);
8f6c56a5 1998#endif
2d21ac55 1999 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
316670eb
A
2000
2001 ip = (struct ip *)(void *)buf;
8f6c56a5 2002 } else {
316670eb 2003 ip = (struct ip*)(void *)(m->m_data + ip_offset);
91447636 2004 }
316670eb 2005
91447636
A
2006 /* Gross */
2007 if (ip_offset) {
2008 m->m_len -= ip_offset;
2009 m->m_data += ip_offset;
2010 }
316670eb 2011
9bccf70c 2012 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
8f6c56a5
A
2013
2014 /*
2015 * We could be in the context of an IP or interface filter; in the
2016 * former case, ip_len would be in host (correct) order while for
2017 * the latter it would be in network order. Because of this, we
2018 * attempt to interpret the length field by comparing it against
2019 * the actual packet length. If the comparison fails, byte swap
2020 * the length and check again. If it still fails, then the packet
2021 * is bogus and we give up.
2022 */
2023 ip_len = ip->ip_len;
6d2010ae 2024 if (ip_len != (m0->m_pkthdr.len - ip_offset_copy)) {
8f6c56a5 2025 ip_len = SWAP16(ip_len);
6d2010ae 2026 if (ip_len != (m0->m_pkthdr.len - ip_offset_copy)) {
8f6c56a5
A
2027 printf("in_delayed_cksum_offset: ip_len %d (%d) "
2028 "doesn't match actual length %d\n", ip->ip_len,
6d2010ae 2029 ip_len, (m0->m_pkthdr.len - ip_offset_copy));
8f6c56a5
A
2030 return;
2031 }
2032 }
2033
2034 csum = in_cksum_skip(m, ip_len, offset);
2035
2d21ac55
A
2036 /* Update stats */
2037 ip_out_cksum_stats(ip->ip_p, ip_len - offset);
2038
8f6c56a5 2039 if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
0b4e3aa0 2040 csum = 0xffff;
8f6c56a5
A
2041 offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
2042
91447636
A
2043 /* Gross */
2044 if (ip_offset) {
2045 if (M_LEADINGSPACE(m) < ip_offset)
8f6c56a5 2046 panic("in_delayed_cksum_offset - chain modified!\n");
91447636
A
2047 m->m_len += ip_offset;
2048 m->m_data -= ip_offset;
2049 }
0b4e3aa0 2050
8f6c56a5 2051 if (offset > ip_len) /* bogus offset */
0b4e3aa0
A
2052 return;
2053
8f6c56a5 2054 /* Insert the checksum in the existing chain */
91447636 2055 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
8f6c56a5 2056 char tmp[2];
316670eb 2057
8f6c56a5 2058#if DEBUG
b0d623f7 2059 printf("delayed m_copyback, m->len: %d off: %d p: %d\n",
91447636 2060 m->m_len, offset + ip_offset, ip->ip_p);
8f6c56a5 2061#endif
316670eb 2062 *(u_short *)(void *)tmp = csum;
8f6c56a5 2063 m_copyback(m, offset + ip_offset, 2, tmp);
316670eb
A
2064 } else if (IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
2065 *(u_short *)(void *)(m->m_data + offset + ip_offset) = csum;
2066 } else {
2067 bcopy(&csum, (m->m_data + offset + ip_offset), sizeof (csum));
2068 }
91447636
A
2069}
2070
2071void
2072in_delayed_cksum(struct mbuf *m)
2073{
2074 in_delayed_cksum_offset(m, 0);
2075}
2076
2077void
2078in_cksum_offset(struct mbuf* m, size_t ip_offset)
2079{
2080 struct ip* ip = NULL;
2081 int hlen = 0;
8f6c56a5
A
2082 unsigned char buf[sizeof(struct ip)];
2083 int swapped = 0;
6d2010ae
A
2084
2085 /* Save copy of first mbuf pointer and the ip_offset before modifying */
2086 struct mbuf* m0 = m;
2087 size_t ip_offset_copy = ip_offset;
316670eb 2088
8f6c56a5 2089 while (ip_offset >= m->m_len) {
91447636
A
2090 ip_offset -= m->m_len;
2091 m = m->m_next;
8f6c56a5 2092 if (m == NULL) {
316670eb
A
2093 printf("in_cksum_offset failed - ip_offset wasn't "
2094 "in the packet\n");
91447636
A
2095 return;
2096 }
2097 }
8f6c56a5 2098
316670eb
A
2099 /*
2100 * In case the IP header is not contiguous, or not 32-bit
2101 * aligned, copy it to a local buffer.
2102 */
2103 if ((ip_offset + sizeof(struct ip) > m->m_len) ||
2104 !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
8f6c56a5 2105#if DEBUG
316670eb
A
2106 printf("in_cksum_offset - delayed m_pullup, m->len: %d "
2107 "off: %lu\n", m->m_len, ip_offset);
2108#endif
2d21ac55 2109 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
8f6c56a5 2110
316670eb 2111 ip = (struct ip *)(void *)buf;
8f6c56a5 2112 ip->ip_sum = 0;
316670eb
A
2113 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2,
2114 (caddr_t)&ip->ip_sum);
8f6c56a5 2115 } else {
316670eb 2116 ip = (struct ip*)(void *)(m->m_data + ip_offset);
8f6c56a5 2117 ip->ip_sum = 0;
91447636 2118 }
316670eb 2119
91447636
A
2120 /* Gross */
2121 if (ip_offset) {
2122 m->m_len -= ip_offset;
2123 m->m_data += ip_offset;
2124 }
2125
91447636
A
2126#ifdef _IP_VHL
2127 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
2128#else
2129 hlen = ip->ip_hl << 2;
2130#endif
8f6c56a5
A
2131 /*
2132 * We could be in the context of an IP or interface filter; in the
2133 * former case, ip_len would be in host order while for the latter
2134 * it would be in network (correct) order. Because of this, we
2135 * attempt to interpret the length field by comparing it against
2136 * the actual packet length. If the comparison fails, byte swap
2137 * the length and check again. If it still fails, then the packet
2138 * is bogus and we give up.
2139 */
6d2010ae 2140 if (ntohs(ip->ip_len) != (m0->m_pkthdr.len - ip_offset_copy)) {
8f6c56a5
A
2141 ip->ip_len = SWAP16(ip->ip_len);
2142 swapped = 1;
6d2010ae 2143 if (ntohs(ip->ip_len) != (m0->m_pkthdr.len - ip_offset_copy)) {
8f6c56a5
A
2144 ip->ip_len = SWAP16(ip->ip_len);
2145 printf("in_cksum_offset: ip_len %d (%d) "
2d21ac55 2146 "doesn't match actual length %lu\n",
8f6c56a5 2147 ip->ip_len, SWAP16(ip->ip_len),
6d2010ae 2148 (m0->m_pkthdr.len - ip_offset_copy));
8f6c56a5
A
2149 return;
2150 }
2151 }
2152
91447636
A
2153 ip->ip_sum = 0;
2154 ip->ip_sum = in_cksum(m, hlen);
8f6c56a5
A
2155 if (swapped)
2156 ip->ip_len = SWAP16(ip->ip_len);
2157
91447636
A
2158 /* Gross */
2159 if (ip_offset) {
2160 if (M_LEADINGSPACE(m) < ip_offset)
2161 panic("in_cksum_offset - chain modified!\n");
2162 m->m_len += ip_offset;
2163 m->m_data -= ip_offset;
9bccf70c 2164 }
8f6c56a5 2165
316670eb
A
2166 /*
2167 * Insert the checksum in the existing chain if IP header not
2168 * contiguous, or if it's not 32-bit aligned, i.e. all the cases
2169 * where it was copied to a local buffer.
2170 */
8f6c56a5
A
2171 if (ip_offset + sizeof(struct ip) > m->m_len) {
2172 char tmp[2];
2173
2174#if DEBUG
316670eb
A
2175 printf("in_cksum_offset m_copyback, m->len: %u off: %lu "
2176 "p: %d\n", m->m_len,
2177 ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
8f6c56a5 2178#endif
316670eb 2179 *(u_short *)(void *)tmp = ip->ip_sum;
8f6c56a5 2180 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
316670eb
A
2181 } else if (!IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) {
2182 bcopy(&ip->ip_sum,
2183 (m->m_data + ip_offset + offsetof(struct ip, ip_sum)),
2184 sizeof (u_short));
8f6c56a5 2185 }
0b4e3aa0
A
2186}
2187
1c79356b
A
2188/*
2189 * Insert IP options into preformed packet.
2190 * Adjust IP destination as required for IP source routing,
2191 * as indicated by a non-zero in_addr at the start of the options.
2192 *
2193 * XXX This routine assumes that the packet has no options in place.
2194 */
2195static struct mbuf *
2196ip_insertoptions(m, opt, phlen)
2197 register struct mbuf *m;
2198 struct mbuf *opt;
2199 int *phlen;
2200{
2201 register struct ipoption *p = mtod(opt, struct ipoption *);
2202 struct mbuf *n;
2203 register struct ip *ip = mtod(m, struct ip *);
2204 unsigned optlen;
2205
2206 optlen = opt->m_len - sizeof(p->ipopt_dst);
2207 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
2208 return (m); /* XXX should fail */
2209 if (p->ipopt_dst.s_addr)
2210 ip->ip_dst = p->ipopt_dst;
2211 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
2d21ac55 2212 MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1c79356b
A
2213 if (n == 0)
2214 return (m);
91447636 2215 n->m_pkthdr.rcvif = 0;
2d21ac55
A
2216#if CONFIG_MACF_NET
2217 mac_mbuf_label_copy(m, n);
2218#endif
1c79356b
A
2219 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
2220 m->m_len -= sizeof(struct ip);
2221 m->m_data += sizeof(struct ip);
2222 n->m_next = m;
2223 m = n;
2224 m->m_len = optlen + sizeof(struct ip);
2225 m->m_data += max_linkhdr;
2226 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
2227 } else {
2228 m->m_data -= optlen;
2229 m->m_len += optlen;
2230 m->m_pkthdr.len += optlen;
2231 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
2232 }
2233 ip = mtod(m, struct ip *);
2234 bcopy(p->ipopt_list, ip + 1, optlen);
2235 *phlen = sizeof(struct ip) + optlen;
2236 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
2237 ip->ip_len += optlen;
2238 return (m);
2239}
2240
2241/*
2242 * Copy options from ip to jp,
2243 * omitting those not copied during fragmentation.
2244 */
1c79356b
A
2245int
2246ip_optcopy(ip, jp)
2247 struct ip *ip, *jp;
2248{
2249 register u_char *cp, *dp;
2250 int opt, optlen, cnt;
2251
2252 cp = (u_char *)(ip + 1);
2253 dp = (u_char *)(jp + 1);
2254 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
2255 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2256 opt = cp[0];
2257 if (opt == IPOPT_EOL)
2258 break;
2259 if (opt == IPOPT_NOP) {
2260 /* Preserve for IP mcast tunnel's LSRR alignment. */
2261 *dp++ = IPOPT_NOP;
2262 optlen = 1;
2263 continue;
9bccf70c
A
2264 }
2265#if DIAGNOSTIC
2266 if (cnt < IPOPT_OLEN + sizeof(*cp))
2267 panic("malformed IPv4 option passed to ip_optcopy");
2268#endif
2269 optlen = cp[IPOPT_OLEN];
2270#if DIAGNOSTIC
2271 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2272 panic("malformed IPv4 option passed to ip_optcopy");
2273#endif
1c79356b
A
2274 /* bogus lengths should have been caught by ip_dooptions */
2275 if (optlen > cnt)
2276 optlen = cnt;
2277 if (IPOPT_COPIED(opt)) {
2278 bcopy(cp, dp, optlen);
2279 dp += optlen;
2280 }
2281 }
2282 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
2283 *dp++ = IPOPT_EOL;
2284 return (optlen);
2285}
2286
2287/*
2288 * IP socket option processing.
2289 */
2290int
2291ip_ctloutput(so, sopt)
2292 struct socket *so;
2293 struct sockopt *sopt;
2294{
2295 struct inpcb *inp = sotoinpcb(so);
2296 int error, optval;
2297
2298 error = optval = 0;
2299 if (sopt->sopt_level != IPPROTO_IP) {
2300 return (EINVAL);
2301 }
2302
2303 switch (sopt->sopt_dir) {
2304 case SOPT_SET:
2305 switch (sopt->sopt_name) {
2306 case IP_OPTIONS:
2307#ifdef notyet
2308 case IP_RETOPTS:
2309#endif
2310 {
2311 struct mbuf *m;
2312 if (sopt->sopt_valsize > MLEN) {
2313 error = EMSGSIZE;
2314 break;
2315 }
b0d623f7
A
2316 MGET(m, sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT,
2317 MT_HEADER);
1c79356b
A
2318 if (m == 0) {
2319 error = ENOBUFS;
2320 break;
2321 }
2322 m->m_len = sopt->sopt_valsize;
2323 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
2324 m->m_len);
2325 if (error)
2326 break;
2327
2328 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
2329 m));
2330 }
2331
2332 case IP_TOS:
2333 case IP_TTL:
2334 case IP_RECVOPTS:
2335 case IP_RECVRETOPTS:
2336 case IP_RECVDSTADDR:
2337 case IP_RECVIF:
55e303ae 2338 case IP_RECVTTL:
6d2010ae 2339 case IP_RECVPKTINFO:
1c79356b
A
2340 error = sooptcopyin(sopt, &optval, sizeof optval,
2341 sizeof optval);
2342 if (error)
2343 break;
2344
2345 switch (sopt->sopt_name) {
2346 case IP_TOS:
2347 inp->inp_ip_tos = optval;
2348 break;
2349
2350 case IP_TTL:
2351 inp->inp_ip_ttl = optval;
2352 break;
2353#define OPTSET(bit) \
2354 if (optval) \
2355 inp->inp_flags |= bit; \
2356 else \
2357 inp->inp_flags &= ~bit;
2358
2359 case IP_RECVOPTS:
2360 OPTSET(INP_RECVOPTS);
2361 break;
2362
2363 case IP_RECVRETOPTS:
2364 OPTSET(INP_RECVRETOPTS);
2365 break;
2366
2367 case IP_RECVDSTADDR:
2368 OPTSET(INP_RECVDSTADDR);
2369 break;
2370
2371 case IP_RECVIF:
2372 OPTSET(INP_RECVIF);
2373 break;
2374
55e303ae
A
2375 case IP_RECVTTL:
2376 OPTSET(INP_RECVTTL);
2377 break;
2378
6d2010ae
A
2379 case IP_RECVPKTINFO:
2380 OPTSET(INP_PKTINFO);
2381 break;
1c79356b
A
2382 }
2383 break;
2384#undef OPTSET
2385
c910b4d9
A
2386#if CONFIG_FORCE_OUT_IFP
2387 /*
2388 * Apple private interface, similar to IP_BOUND_IF, except
2389 * that the parameter is a NULL-terminated string containing
2390 * the name of the network interface; an emptry string means
2391 * unbind. Applications are encouraged to use IP_BOUND_IF
2392 * instead, as that is the current "official" API.
2393 */
2d21ac55 2394 case IP_FORCE_OUT_IFP: {
c910b4d9
A
2395 char ifname[IFNAMSIZ];
2396 unsigned int ifscope;
2397
2398 /* This option is settable only for IPv4 */
2399 if (!(inp->inp_vflag & INP_IPV4)) {
2400 error = EINVAL;
2401 break;
2402 }
2403
2d21ac55
A
2404 /* Verify interface name parameter is sane */
2405 if (sopt->sopt_valsize > sizeof(ifname)) {
2406 error = EINVAL;
2407 break;
2408 }
c910b4d9 2409
2d21ac55
A
2410 /* Copy the interface name */
2411 if (sopt->sopt_valsize != 0) {
c910b4d9
A
2412 error = sooptcopyin(sopt, ifname,
2413 sizeof (ifname), sopt->sopt_valsize);
2d21ac55
A
2414 if (error)
2415 break;
2416 }
c910b4d9 2417
6d2010ae 2418 if (sopt->sopt_valsize == 0 || ifname[0] == '\0') {
c910b4d9
A
2419 /* Unbind this socket from any interface */
2420 ifscope = IFSCOPE_NONE;
2421 } else {
2422 ifnet_t ifp;
2423
2424 /* Verify name is NULL terminated */
6d2010ae 2425 if (ifname[sopt->sopt_valsize - 1] != '\0') {
c910b4d9
A
2426 error = EINVAL;
2427 break;
2d21ac55 2428 }
c910b4d9
A
2429
2430 /* Bail out if given bogus interface name */
2431 if (ifnet_find_by_name(ifname, &ifp) != 0) {
2432 error = ENXIO;
2433 break;
2434 }
2435
2436 /* Bind this socket to this interface */
2437 ifscope = ifp->if_index;
2438
2439 /*
2440 * Won't actually free; since we don't release
2441 * this later, we should do it now.
2442 */
2443 ifnet_release(ifp);
2d21ac55 2444 }
316670eb 2445 error = inp_bindif(inp, ifscope);
2d21ac55
A
2446 }
2447 break;
2448#endif
6d2010ae
A
2449 /*
2450 * Multicast socket options are processed by the in_mcast
2451 * module.
2452 */
1c79356b 2453 case IP_MULTICAST_IF:
6d2010ae 2454 case IP_MULTICAST_IFINDEX:
1c79356b
A
2455 case IP_MULTICAST_VIF:
2456 case IP_MULTICAST_TTL:
2457 case IP_MULTICAST_LOOP:
2458 case IP_ADD_MEMBERSHIP:
2459 case IP_DROP_MEMBERSHIP:
6d2010ae
A
2460 case IP_ADD_SOURCE_MEMBERSHIP:
2461 case IP_DROP_SOURCE_MEMBERSHIP:
2462 case IP_BLOCK_SOURCE:
2463 case IP_UNBLOCK_SOURCE:
2464 case IP_MSFILTER:
2465 case MCAST_JOIN_GROUP:
2466 case MCAST_LEAVE_GROUP:
2467 case MCAST_JOIN_SOURCE_GROUP:
2468 case MCAST_LEAVE_SOURCE_GROUP:
2469 case MCAST_BLOCK_SOURCE:
2470 case MCAST_UNBLOCK_SOURCE:
2471 error = inp_setmoptions(inp, sopt);
1c79356b
A
2472 break;
2473
2474 case IP_PORTRANGE:
2475 error = sooptcopyin(sopt, &optval, sizeof optval,
2476 sizeof optval);
2477 if (error)
2478 break;
2479
2480 switch (optval) {
2481 case IP_PORTRANGE_DEFAULT:
2482 inp->inp_flags &= ~(INP_LOWPORT);
2483 inp->inp_flags &= ~(INP_HIGHPORT);
2484 break;
2485
2486 case IP_PORTRANGE_HIGH:
2487 inp->inp_flags &= ~(INP_LOWPORT);
2488 inp->inp_flags |= INP_HIGHPORT;
2489 break;
2490
2491 case IP_PORTRANGE_LOW:
2492 inp->inp_flags &= ~(INP_HIGHPORT);
2493 inp->inp_flags |= INP_LOWPORT;
2494 break;
2495
2496 default:
2497 error = EINVAL;
2498 break;
2499 }
2500 break;
2501
2502#if IPSEC
2503 case IP_IPSEC_POLICY:
2504 {
2505 caddr_t req = NULL;
2506 size_t len = 0;
2507 int priv;
2508 struct mbuf *m;
2509 int optname;
2510
9bccf70c 2511 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1c79356b 2512 break;
9bccf70c 2513 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1c79356b 2514 break;
b0d623f7 2515 priv = (proc_suser(sopt->sopt_p) == 0);
1c79356b
A
2516 if (m) {
2517 req = mtod(m, caddr_t);
2518 len = m->m_len;
2519 }
2520 optname = sopt->sopt_name;
2521 error = ipsec4_set_policy(inp, optname, req, len, priv);
2522 m_freem(m);
2523 break;
2524 }
2525#endif /*IPSEC*/
2526
2d21ac55
A
2527#if TRAFFIC_MGT
2528 case IP_TRAFFIC_MGT_BACKGROUND:
2529 {
2530 unsigned background = 0;
2531 error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background));
2532 if (error)
2533 break;
2534
d1ecb069 2535 if (background) {
6d2010ae
A
2536 socket_set_traffic_mgt_flags_locked(so,
2537 TRAFFIC_MGT_SO_BACKGROUND);
d1ecb069 2538 } else {
6d2010ae
A
2539 socket_clear_traffic_mgt_flags_locked(so,
2540 TRAFFIC_MGT_SO_BACKGROUND);
d1ecb069 2541 }
2d21ac55
A
2542
2543 break;
2544 }
2545#endif /* TRAFFIC_MGT */
2546
c910b4d9
A
2547 /*
2548 * On a multihomed system, scoped routing can be used to
2549 * restrict the source interface used for sending packets.
2550 * The socket option IP_BOUND_IF binds a particular AF_INET
2551 * socket to an interface such that data sent on the socket
2552 * is restricted to that interface. This is unlike the
2553 * SO_DONTROUTE option where the routing table is bypassed;
2554 * therefore it allows for a greater flexibility and control
2555 * over the system behavior, and does not place any restriction
2556 * on the destination address type (e.g. unicast, multicast,
2557 * or broadcast if applicable) or whether or not the host is
2558 * directly reachable. Note that in the multicast transmit
6d2010ae
A
2559 * case, IP_MULTICAST_{IF,IFINDEX} takes precedence over
2560 * IP_BOUND_IF, since the former practically bypasses the
2561 * routing table; in this case, IP_BOUND_IF sets the default
2562 * interface used for sending multicast packets in the absence
2563 * of an explicit multicast transmit interface.
c910b4d9
A
2564 */
2565 case IP_BOUND_IF:
2566 /* This option is settable only for IPv4 */
2567 if (!(inp->inp_vflag & INP_IPV4)) {
2568 error = EINVAL;
2569 break;
2570 }
2571
2572 error = sooptcopyin(sopt, &optval, sizeof (optval),
2573 sizeof (optval));
2574
2575 if (error)
2576 break;
2577
316670eb 2578 error = inp_bindif(inp, optval);
6d2010ae
A
2579 break;
2580
2581 case IP_NO_IFT_CELLULAR:
2582 /* This option is settable only for IPv4 */
2583 if (!(inp->inp_vflag & INP_IPV4)) {
2584 error = EINVAL;
2585 break;
2586 }
2587
2588 error = sooptcopyin(sopt, &optval, sizeof (optval),
2589 sizeof (optval));
2590
2591 if (error)
2592 break;
2593
2594 error = inp_nocellular(inp, optval);
2595 break;
2596
2597 case IP_OUT_IF:
2598 /* This option is not settable */
2599 error = EINVAL;
c910b4d9
A
2600 break;
2601
1c79356b
A
2602 default:
2603 error = ENOPROTOOPT;
2604 break;
2605 }
2606 break;
2607
2608 case SOPT_GET:
2609 switch (sopt->sopt_name) {
2610 case IP_OPTIONS:
2611 case IP_RETOPTS:
2612 if (inp->inp_options)
2613 error = sooptcopyout(sopt,
2614 mtod(inp->inp_options,
2615 char *),
2616 inp->inp_options->m_len);
2617 else
2618 sopt->sopt_valsize = 0;
2619 break;
2620
2621 case IP_TOS:
2622 case IP_TTL:
2623 case IP_RECVOPTS:
2624 case IP_RECVRETOPTS:
2625 case IP_RECVDSTADDR:
2626 case IP_RECVIF:
55e303ae 2627 case IP_RECVTTL:
1c79356b 2628 case IP_PORTRANGE:
6d2010ae 2629 case IP_RECVPKTINFO:
1c79356b
A
2630 switch (sopt->sopt_name) {
2631
2632 case IP_TOS:
2633 optval = inp->inp_ip_tos;
2634 break;
2635
2636 case IP_TTL:
2637 optval = inp->inp_ip_ttl;
2638 break;
2639
2640#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2641
2642 case IP_RECVOPTS:
2643 optval = OPTBIT(INP_RECVOPTS);
2644 break;
2645
2646 case IP_RECVRETOPTS:
2647 optval = OPTBIT(INP_RECVRETOPTS);
2648 break;
2649
2650 case IP_RECVDSTADDR:
2651 optval = OPTBIT(INP_RECVDSTADDR);
2652 break;
2653
2654 case IP_RECVIF:
2655 optval = OPTBIT(INP_RECVIF);
2656 break;
2657
55e303ae
A
2658 case IP_RECVTTL:
2659 optval = OPTBIT(INP_RECVTTL);
2660 break;
2661
1c79356b
A
2662 case IP_PORTRANGE:
2663 if (inp->inp_flags & INP_HIGHPORT)
2664 optval = IP_PORTRANGE_HIGH;
2665 else if (inp->inp_flags & INP_LOWPORT)
2666 optval = IP_PORTRANGE_LOW;
2667 else
2668 optval = 0;
2669 break;
2670
6d2010ae
A
2671 case IP_RECVPKTINFO:
2672 optval = OPTBIT(INP_PKTINFO);
2673 break;
1c79356b
A
2674 }
2675 error = sooptcopyout(sopt, &optval, sizeof optval);
2676 break;
2677
2678 case IP_MULTICAST_IF:
6d2010ae 2679 case IP_MULTICAST_IFINDEX:
1c79356b
A
2680 case IP_MULTICAST_VIF:
2681 case IP_MULTICAST_TTL:
2682 case IP_MULTICAST_LOOP:
6d2010ae
A
2683 case IP_MSFILTER:
2684 error = inp_getmoptions(inp, sopt);
1c79356b
A
2685 break;
2686
2687#if IPSEC
2688 case IP_IPSEC_POLICY:
2689 {
2690 struct mbuf *m = NULL;
1c79356b 2691 caddr_t req = NULL;
9bccf70c 2692 size_t len = 0;
1c79356b 2693
9bccf70c 2694 if (m != 0) {
1c79356b
A
2695 req = mtod(m, caddr_t);
2696 len = m->m_len;
2697 }
1c79356b
A
2698 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
2699 if (error == 0)
9bccf70c 2700 error = soopt_mcopyout(sopt, m); /* XXX */
1c79356b
A
2701 if (error == 0)
2702 m_freem(m);
2703 break;
2704 }
2705#endif /*IPSEC*/
2706
2d21ac55
A
2707#if TRAFFIC_MGT
2708 case IP_TRAFFIC_MGT_BACKGROUND:
2709 {
6d2010ae 2710 unsigned background = (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND);
2d21ac55
A
2711 return (sooptcopyout(sopt, &background, sizeof(background)));
2712 break;
2713 }
2714#endif /* TRAFFIC_MGT */
2715
c910b4d9
A
2716 case IP_BOUND_IF:
2717 if (inp->inp_flags & INP_BOUND_IF)
316670eb 2718 optval = inp->inp_boundifp->if_index;
c910b4d9
A
2719 error = sooptcopyout(sopt, &optval, sizeof (optval));
2720 break;
2721
6d2010ae
A
2722 case IP_NO_IFT_CELLULAR:
2723 optval = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
2724 error = sooptcopyout(sopt, &optval, sizeof (optval));
2725 break;
2726
2727 case IP_OUT_IF:
316670eb
A
2728 optval = (inp->inp_last_outifp != NULL) ?
2729 inp->inp_last_outifp->if_index : 0;
6d2010ae
A
2730 error = sooptcopyout(sopt, &optval, sizeof (optval));
2731 break;
2732
1c79356b
A
2733 default:
2734 error = ENOPROTOOPT;
2735 break;
2736 }
2737 break;
2738 }
2739 return (error);
2740}
2741
2742/*
2743 * Set up IP options in pcb for insertion in output packets.
2744 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2745 * with destination address if source routed.
2746 */
2747static int
2d21ac55
A
2748ip_pcbopts(
2749 __unused int optname,
2750 struct mbuf **pcbopt,
2751 register struct mbuf *m)
1c79356b
A
2752{
2753 register int cnt, optlen;
2754 register u_char *cp;
2755 u_char opt;
2756
2757 /* turn off any old options */
2758 if (*pcbopt)
2759 (void)m_free(*pcbopt);
2760 *pcbopt = 0;
2761 if (m == (struct mbuf *)0 || m->m_len == 0) {
2762 /*
2763 * Only turning off any previous options.
2764 */
2765 if (m)
2766 (void)m_free(m);
2767 return (0);
2768 }
2769
2770#ifndef vax
2771 if (m->m_len % sizeof(int32_t))
2772 goto bad;
2773#endif
2774 /*
2775 * IP first-hop destination address will be stored before
2776 * actual options; move other options back
2777 * and clear it when none present.
2778 */
2779 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
2780 goto bad;
2781 cnt = m->m_len;
2782 m->m_len += sizeof(struct in_addr);
2783 cp = mtod(m, u_char *) + sizeof(struct in_addr);
2784 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
2785 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
2786
2787 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2788 opt = cp[IPOPT_OPTVAL];
2789 if (opt == IPOPT_EOL)
2790 break;
2791 if (opt == IPOPT_NOP)
2792 optlen = 1;
2793 else {
2794 if (cnt < IPOPT_OLEN + sizeof(*cp))
2795 goto bad;
2796 optlen = cp[IPOPT_OLEN];
2797 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2798 goto bad;
2799 }
2800 switch (opt) {
2801
2802 default:
2803 break;
2804
2805 case IPOPT_LSRR:
2806 case IPOPT_SSRR:
2807 /*
2808 * user process specifies route as:
2809 * ->A->B->C->D
2810 * D must be our final destination (but we can't
2811 * check that since we may not have connected yet).
2812 * A is first hop destination, which doesn't appear in
2813 * actual IP option, but is stored before the options.
2814 */
2815 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
2816 goto bad;
2817 m->m_len -= sizeof(struct in_addr);
2818 cnt -= sizeof(struct in_addr);
2819 optlen -= sizeof(struct in_addr);
2820 cp[IPOPT_OLEN] = optlen;
2821 /*
2822 * Move first hop before start of options.
2823 */
2824 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
2825 sizeof(struct in_addr));
2826 /*
2827 * Then copy rest of options back
2828 * to close up the deleted entry.
2829 */
2830 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
2831 sizeof(struct in_addr)),
2832 (caddr_t)&cp[IPOPT_OFFSET+1],
2833 (unsigned)cnt + sizeof(struct in_addr));
2834 break;
2835 }
2836 }
2837 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
2838 goto bad;
2839 *pcbopt = m;
2840 return (0);
2841
2842bad:
2843 (void)m_free(m);
2844 return (EINVAL);
2845}
2846
6d2010ae
A
2847void
2848ip_moptions_init(void)
9bccf70c 2849{
6d2010ae 2850 PE_parse_boot_argn("ifa_debug", &imo_debug, sizeof (imo_debug));
9bccf70c 2851
6d2010ae
A
2852 imo_size = (imo_debug == 0) ? sizeof (struct ip_moptions) :
2853 sizeof (struct ip_moptions_dbg);
2854
2855 imo_zone = zinit(imo_size, IMO_ZONE_MAX * imo_size, 0,
2856 IMO_ZONE_NAME);
2857 if (imo_zone == NULL) {
2858 panic("%s: failed allocating %s", __func__, IMO_ZONE_NAME);
2859 /* NOTREACHED */
9bccf70c 2860 }
6d2010ae 2861 zone_change(imo_zone, Z_EXPAND, TRUE);
9bccf70c
A
2862}
2863
6d2010ae
A
2864void
2865imo_addref(struct ip_moptions *imo, int locked)
1c79356b 2866{
6d2010ae
A
2867 if (!locked)
2868 IMO_LOCK(imo);
2869 else
2870 IMO_LOCK_ASSERT_HELD(imo);
1c79356b 2871
6d2010ae
A
2872 if (++imo->imo_refcnt == 0) {
2873 panic("%s: imo %p wraparound refcnt\n", __func__, imo);
2874 /* NOTREACHED */
2875 } else if (imo->imo_trace != NULL) {
2876 (*imo->imo_trace)(imo, TRUE);
1c79356b
A
2877 }
2878
6d2010ae
A
2879 if (!locked)
2880 IMO_UNLOCK(imo);
1c79356b
A
2881}
2882
6d2010ae
A
2883void
2884imo_remref(struct ip_moptions *imo)
55e303ae 2885{
55e303ae 2886 int i;
b0d623f7 2887
6d2010ae
A
2888 IMO_LOCK(imo);
2889 if (imo->imo_refcnt == 0) {
2890 panic("%s: imo %p negative refcnt", __func__, imo);
2891 /* NOTREACHED */
2892 } else if (imo->imo_trace != NULL) {
2893 (*imo->imo_trace)(imo, FALSE);
55e303ae 2894 }
b0d623f7 2895
6d2010ae
A
2896 --imo->imo_refcnt;
2897 if (imo->imo_refcnt > 0) {
2898 IMO_UNLOCK(imo);
2899 return;
55e303ae
A
2900 }
2901
55e303ae 2902 for (i = 0; i < imo->imo_num_memberships; ++i) {
6d2010ae 2903 struct in_mfilter *imf;
b0d623f7 2904
6d2010ae
A
2905 imf = imo->imo_mfilters ? &imo->imo_mfilters[i] : NULL;
2906 if (imf != NULL)
2907 imf_leave(imf);
b0d623f7 2908
6d2010ae 2909 (void) in_leavegroup(imo->imo_membership[i], imf);
55e303ae 2910
6d2010ae
A
2911 if (imf != NULL)
2912 imf_purge(imf);
55e303ae 2913
6d2010ae
A
2914 INM_REMREF(imo->imo_membership[i]);
2915 imo->imo_membership[i] = NULL;
55e303ae 2916 }
6d2010ae
A
2917 imo->imo_num_memberships = 0;
2918 if (imo->imo_mfilters != NULL) {
2919 FREE(imo->imo_mfilters, M_INMFILTER);
2920 imo->imo_mfilters = NULL;
55e303ae 2921 }
6d2010ae
A
2922 if (imo->imo_membership != NULL) {
2923 FREE(imo->imo_membership, M_IPMOPTS);
2924 imo->imo_membership = NULL;
55e303ae 2925 }
6d2010ae 2926 IMO_UNLOCK(imo);
1c79356b 2927
6d2010ae 2928 lck_mtx_destroy(&imo->imo_lock, ifa_mtx_grp);
1c79356b 2929
6d2010ae
A
2930 if (!(imo->imo_debug & IFD_ALLOC)) {
2931 panic("%s: imo %p cannot be freed", __func__, imo);
2932 /* NOTREACHED */
2933 }
2934 zfree(imo_zone, imo);
2935}
1c79356b 2936
6d2010ae
A
2937static void
2938imo_trace(struct ip_moptions *imo, int refhold)
2939{
2940 struct ip_moptions_dbg *imo_dbg = (struct ip_moptions_dbg *)imo;
2941 ctrace_t *tr;
2942 u_int32_t idx;
2943 u_int16_t *cnt;
2944
2945 if (!(imo->imo_debug & IFD_DEBUG)) {
2946 panic("%s: imo %p has no debug structure", __func__, imo);
2947 /* NOTREACHED */
2948 }
2949 if (refhold) {
2950 cnt = &imo_dbg->imo_refhold_cnt;
2951 tr = imo_dbg->imo_refhold;
2952 } else {
2953 cnt = &imo_dbg->imo_refrele_cnt;
2954 tr = imo_dbg->imo_refrele;
1c79356b 2955 }
6d2010ae
A
2956
2957 idx = atomic_add_16_ov(cnt, 1) % IMO_TRACE_HIST_SIZE;
2958 ctrace_record(&tr[idx]);
1c79356b
A
2959}
2960
6d2010ae
A
2961struct ip_moptions *
2962ip_allocmoptions(int how)
1c79356b 2963{
6d2010ae 2964 struct ip_moptions *imo;
1c79356b 2965
6d2010ae 2966 imo = (how == M_WAITOK) ? zalloc(imo_zone) : zalloc_noblock(imo_zone);
1c79356b 2967 if (imo != NULL) {
6d2010ae
A
2968 bzero(imo, imo_size);
2969 lck_mtx_init(&imo->imo_lock, ifa_mtx_grp, ifa_mtx_attr);
2970 imo->imo_debug |= IFD_ALLOC;
2971 if (imo_debug != 0) {
2972 imo->imo_debug |= IFD_DEBUG;
2973 imo->imo_trace = imo_trace;
2974 }
2975 IMO_ADDREF(imo);
1c79356b 2976 }
6d2010ae
A
2977
2978 return (imo);
1c79356b
A
2979}
2980
2981/*
2982 * Routine called from ip_output() to loop back a copy of an IP multicast
2983 * packet to the input queue of a specified interface. Note that this
2984 * calls the output routine of the loopback "driver", but with an interface
2985 * pointer that might NOT be a loopback interface -- evil, but easier than
2986 * replicating that code here.
2987 */
2988static void
2989ip_mloopback(ifp, m, dst, hlen)
2990 struct ifnet *ifp;
2991 register struct mbuf *m;
2992 register struct sockaddr_in *dst;
2993 int hlen;
2994{
2995 register struct ip *ip;
2996 struct mbuf *copym;
2d21ac55 2997 int sw_csum = (apple_hwcksum_tx == 0);
1c79356b
A
2998
2999 copym = m_copy(m, 0, M_COPYALL);
3000 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
3001 copym = m_pullup(copym, hlen);
2d21ac55
A
3002
3003 if (copym == NULL)
3004 return;
3005
3006 /*
3007 * We don't bother to fragment if the IP length is greater
3008 * than the interface's MTU. Can this possibly matter?
3009 */
3010 ip = mtod(copym, struct ip *);
b0d623f7
A
3011
3012#if BYTE_ORDER != BIG_ENDIAN
2d21ac55
A
3013 HTONS(ip->ip_len);
3014 HTONS(ip->ip_off);
b0d623f7
A
3015#endif
3016
2d21ac55
A
3017 ip->ip_sum = 0;
3018 ip->ip_sum = in_cksum(copym, hlen);
3019 /*
3020 * NB:
3021 * It's not clear whether there are any lingering
3022 * reentrancy problems in other areas which might
3023 * be exposed by using ip_input directly (in
3024 * particular, everything which modifies the packet
3025 * in-place). Yet another option is using the
3026 * protosw directly to deliver the looped back
3027 * packet. For the moment, we'll err on the side
3028 * of safety by using if_simloop().
3029 */
1c79356b 3030#if 1 /* XXX */
2d21ac55
A
3031 if (dst->sin_family != AF_INET) {
3032 printf("ip_mloopback: bad address family %d\n",
3033 dst->sin_family);
3034 dst->sin_family = AF_INET;
3035 }
1c79356b
A
3036#endif
3037
9bccf70c 3038 /*
2d21ac55
A
3039 * Mark checksum as valid or calculate checksum for loopback.
3040 *
3041 * This is done this way because we have to embed the ifp of
3042 * the interface we will send the original copy of the packet
3043 * out on in the mbuf. ip_input will check if_hwassist of the
3044 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
3045 * The UDP checksum has not been calculated yet.
3046 */
3047 if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
3048 if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
3049 copym->m_pkthdr.csum_flags |=
3050 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3051 CSUM_IP_CHECKED | CSUM_IP_VALID;
3052 copym->m_pkthdr.csum_data = 0xffff;
1c79356b 3053 } else {
b0d623f7
A
3054
3055#if BYTE_ORDER != BIG_ENDIAN
2d21ac55 3056 NTOHS(ip->ip_len);
b0d623f7
A
3057#endif
3058
2d21ac55 3059 in_delayed_cksum(copym);
b0d623f7
A
3060
3061#if BYTE_ORDER != BIG_ENDIAN
2d21ac55 3062 HTONS(ip->ip_len);
b0d623f7
A
3063#endif
3064
1c79356b 3065 }
2d21ac55 3066 }
1c79356b 3067
2d21ac55
A
3068 /*
3069 * TedW:
3070 * We need to send all loopback traffic down to dlil in case
3071 * a filter has tapped-in.
3072 */
3073
3074 /*
3075 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3076 * in ip_input(); we need the loopback ifp/dl_tag passed as args
3077 * to make the loopback driver compliant with the data link
3078 * requirements.
3079 */
3080 if (lo_ifp) {
3081 copym->m_pkthdr.rcvif = ifp;
3082 dlil_output(lo_ifp, PF_INET, copym, 0,
316670eb 3083 (struct sockaddr *) dst, 0, NULL);
2d21ac55
A
3084 } else {
3085 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
3086 m_freem(copym);
1c79356b
A
3087 }
3088}
c910b4d9
A
3089
3090/*
3091 * Given a source IP address (and route, if available), determine the best
b0d623f7
A
3092 * interface to send the packet from. Checking for (and updating) the
3093 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
3094 * without any locks based on the assumption that ip_output() is single-
3095 * threaded per-pcb, i.e. for any given pcb there can only be one thread
3096 * performing output at the IP layer.
6d2010ae
A
3097 *
3098 * This routine is analogous to in6_selectroute() for IPv6.
c910b4d9
A
3099 */
3100static struct ifaddr *
3101in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
3102{
3103 struct ifaddr *ifa = NULL;
b0d623f7
A
3104 struct in_addr src = ip->ip_src;
3105 struct in_addr dst = ip->ip_dst;
c910b4d9 3106 struct ifnet *rt_ifp;
0b4c1975 3107 char s_src[MAX_IPv4_STR_LEN], s_dst[MAX_IPv4_STR_LEN];
c910b4d9
A
3108
3109 if (ip_select_srcif_debug) {
b0d623f7
A
3110 (void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof (s_src));
3111 (void) inet_ntop(AF_INET, &dst.s_addr, s_dst, sizeof (s_dst));
c910b4d9
A
3112 }
3113
b0d623f7
A
3114 if (ro->ro_rt != NULL)
3115 RT_LOCK(ro->ro_rt);
c910b4d9 3116
c910b4d9
A
3117 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
3118
3119 /*
3120 * Given the source IP address, find a suitable source interface
3121 * to use for transmission; if the caller has specified a scope,
3122 * optimize the search by looking at the addresses only for that
3123 * interface. This is still suboptimal, however, as we need to
3124 * traverse the per-interface list.
3125 */
3126 if (ifscope != IFSCOPE_NONE || ro->ro_rt != NULL) {
3127 unsigned int scope = ifscope;
3128
3129 /*
3130 * If no scope is specified and the route is stale (pointing
3131 * to a defunct interface) use the current primary interface;
3132 * this happens when switching between interfaces configured
3133 * with the same IP address. Otherwise pick up the scope
3134 * information from the route; the ULP may have looked up a
3135 * correct route and we just need to verify it here and mark
3136 * it with the ROF_SRCIF_SELECTED flag below.
3137 */
3138 if (scope == IFSCOPE_NONE) {
3139 scope = rt_ifp->if_index;
6d2010ae 3140 if (scope != get_primary_ifscope(AF_INET) &&
c910b4d9 3141 ro->ro_rt->generation_id != route_generation)
6d2010ae 3142 scope = get_primary_ifscope(AF_INET);
c910b4d9
A
3143 }
3144
b0d623f7 3145 ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope);
c910b4d9 3146
0b4c1975
A
3147 if (ifa == NULL && ip->ip_p != IPPROTO_UDP &&
3148 ip->ip_p != IPPROTO_TCP && ipforwarding) {
3149 /*
3150 * If forwarding is enabled, and if the packet isn't
3151 * TCP or UDP, check if the source address belongs
3152 * to one of our own interfaces; if so, demote the
3153 * interface scope and do a route lookup right below.
3154 */
3155 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
3156 if (ifa != NULL) {
6d2010ae 3157 IFA_REMREF(ifa);
0b4c1975
A
3158 ifa = NULL;
3159 ifscope = IFSCOPE_NONE;
3160 }
3161 }
3162
c910b4d9
A
3163 if (ip_select_srcif_debug && ifa != NULL) {
3164 if (ro->ro_rt != NULL) {
6d2010ae
A
3165 printf("%s->%s ifscope %d->%d ifa_if %s "
3166 "ro_if %s\n", s_src, s_dst, ifscope,
3167 scope, if_name(ifa->ifa_ifp),
3168 if_name(rt_ifp));
c910b4d9 3169 } else {
6d2010ae 3170 printf("%s->%s ifscope %d->%d ifa_if %s\n",
b0d623f7 3171 s_src, s_dst, ifscope, scope,
6d2010ae 3172 if_name(ifa->ifa_ifp));
c910b4d9
A
3173 }
3174 }
3175 }
3176
3177 /*
3178 * Slow path; search for an interface having the corresponding source
3179 * IP address if the scope was not specified by the caller, and:
3180 *
3181 * 1) There currently isn't any route, or,
3182 * 2) The interface used by the route does not own that source
3183 * IP address; in this case, the route will get blown away
3184 * and we'll do a more specific scoped search using the newly
3185 * found interface.
3186 */
3187 if (ifa == NULL && ifscope == IFSCOPE_NONE) {
b0d623f7 3188 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
c910b4d9 3189
0b4c1975
A
3190 /*
3191 * If we have the IP address, but not the route, we don't
3192 * really know whether or not it belongs to the correct
3193 * interface (it could be shared across multiple interfaces.)
3194 * The only way to find out is to do a route lookup.
3195 */
3196 if (ifa != NULL && ro->ro_rt == NULL) {
3197 struct rtentry *rt;
3198 struct sockaddr_in sin;
3199 struct ifaddr *oifa = NULL;
3200
3201 bzero(&sin, sizeof (sin));
3202 sin.sin_family = AF_INET;
3203 sin.sin_len = sizeof (sin);
3204 sin.sin_addr = dst;
3205
3206 lck_mtx_lock(rnh_lock);
3207 if ((rt = rt_lookup(TRUE, (struct sockaddr *)&sin, NULL,
3208 rt_tables[AF_INET], IFSCOPE_NONE)) != NULL) {
3209 RT_LOCK(rt);
3210 /*
3211 * If the route uses a different interface,
3212 * use that one instead. The IP address of
3213 * the ifaddr that we pick up here is not
3214 * relevant.
3215 */
3216 if (ifa->ifa_ifp != rt->rt_ifp) {
3217 oifa = ifa;
3218 ifa = rt->rt_ifa;
6d2010ae 3219 IFA_ADDREF(ifa);
0b4c1975
A
3220 RT_UNLOCK(rt);
3221 } else {
3222 RT_UNLOCK(rt);
3223 }
3224 rtfree_locked(rt);
3225 }
3226 lck_mtx_unlock(rnh_lock);
3227
3228 if (oifa != NULL) {
3229 struct ifaddr *iifa;
3230
3231 /*
3232 * See if the interface pointed to by the
3233 * route is configured with the source IP
3234 * address of the packet.
3235 */
3236 iifa = (struct ifaddr *)ifa_foraddr_scoped(
3237 src.s_addr, ifa->ifa_ifp->if_index);
3238
3239 if (iifa != NULL) {
3240 /*
3241 * Found it; drop the original one
3242 * as well as the route interface
3243 * address, and use this instead.
3244 */
6d2010ae
A
3245 IFA_REMREF(oifa);
3246 IFA_REMREF(ifa);
0b4c1975
A
3247 ifa = iifa;
3248 } else if (!ipforwarding ||
3249 (rt->rt_flags & RTF_GATEWAY)) {
3250 /*
3251 * This interface doesn't have that
3252 * source IP address; drop the route
3253 * interface address and just use the
3254 * original one, and let the caller
3255 * do a scoped route lookup.
3256 */
6d2010ae 3257 IFA_REMREF(ifa);
0b4c1975
A
3258 ifa = oifa;
3259 } else {
3260 /*
3261 * Forwarding is enabled and the source
3262 * address belongs to one of our own
3263 * interfaces which isn't the outgoing
3264 * interface, and we have a route, and
3265 * the destination is on a network that
3266 * is directly attached (onlink); drop
3267 * the original one and use the route
3268 * interface address instead.
3269 */
6d2010ae 3270 IFA_REMREF(oifa);
0b4c1975
A
3271 }
3272 }
3273 } else if (ifa != NULL && ro->ro_rt != NULL &&
3274 !(ro->ro_rt->rt_flags & RTF_GATEWAY) &&
3275 ifa->ifa_ifp != ro->ro_rt->rt_ifp && ipforwarding) {
3276 /*
3277 * Forwarding is enabled and the source address belongs
3278 * to one of our own interfaces which isn't the same
3279 * as the interface used by the known route; drop the
3280 * original one and use the route interface address.
3281 */
6d2010ae 3282 IFA_REMREF(ifa);
0b4c1975 3283 ifa = ro->ro_rt->rt_ifa;
6d2010ae 3284 IFA_ADDREF(ifa);
0b4c1975
A
3285 }
3286
c910b4d9 3287 if (ip_select_srcif_debug && ifa != NULL) {
6d2010ae
A
3288 printf("%s->%s ifscope %d ifa_if %s\n",
3289 s_src, s_dst, ifscope, if_name(ifa->ifa_ifp));
c910b4d9
A
3290 }
3291 }
3292
b0d623f7
A
3293 if (ro->ro_rt != NULL)
3294 RT_LOCK_ASSERT_HELD(ro->ro_rt);
c910b4d9
A
3295 /*
3296 * If there is a non-loopback route with the wrong interface, or if
3297 * there is no interface configured with such an address, blow it
3298 * away. Except for local/loopback, we look for one with a matching
3299 * interface scope/index.
3300 */
3301 if (ro->ro_rt != NULL &&
3302 (ifa == NULL || (ifa->ifa_ifp != rt_ifp && rt_ifp != lo_ifp) ||
3303 !(ro->ro_rt->rt_flags & RTF_UP))) {
3304 if (ip_select_srcif_debug) {
3305 if (ifa != NULL) {
6d2010ae
A
3306 printf("%s->%s ifscope %d ro_if %s != "
3307 "ifa_if %s (cached route cleared)\n",
3308 s_src, s_dst, ifscope, if_name(rt_ifp),
3309 if_name(ifa->ifa_ifp));
c910b4d9 3310 } else {
6d2010ae 3311 printf("%s->%s ifscope %d ro_if %s "
c910b4d9 3312 "(no ifa_if found)\n",
6d2010ae 3313 s_src, s_dst, ifscope, if_name(rt_ifp));
c910b4d9
A
3314 }
3315 }
3316
b0d623f7
A
3317 RT_UNLOCK(ro->ro_rt);
3318 rtfree(ro->ro_rt);
c910b4d9
A
3319 ro->ro_rt = NULL;
3320 ro->ro_flags &= ~ROF_SRCIF_SELECTED;
3321
3322 /*
3323 * If the destination is IPv4 LLA and the route's interface
3324 * doesn't match the source interface, then the source IP
3325 * address is wrong; it most likely belongs to the primary
3326 * interface associated with the IPv4 LL subnet. Drop the
3327 * packet rather than letting it go out and return an error
3328 * to the ULP. This actually applies not only to IPv4 LL
3329 * but other shared subnets; for now we explicitly test only
3330 * for the former case and save the latter for future.
3331 */
b0d623f7
A
3332 if (IN_LINKLOCAL(ntohl(dst.s_addr)) &&
3333 !IN_LINKLOCAL(ntohl(src.s_addr)) && ifa != NULL) {
6d2010ae 3334 IFA_REMREF(ifa);
c910b4d9
A
3335 ifa = NULL;
3336 }
3337 }
3338
3339 if (ip_select_srcif_debug && ifa == NULL) {
3340 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
b0d623f7 3341 s_src, s_dst, ifscope);
c910b4d9
A
3342 }
3343
3344 /*
3345 * If there is a route, mark it accordingly. If there isn't one,
3346 * we'll get here again during the next transmit (possibly with a
3347 * route) and the flag will get set at that point. For IPv4 LLA
3348 * destination, mark it only if the route has been fully resolved;
3349 * otherwise we want to come back here again when the route points
3350 * to the interface over which the ARP reply arrives on.
3351 */
b0d623f7 3352 if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(dst.s_addr)) ||
c910b4d9
A
3353 (ro->ro_rt->rt_gateway->sa_family == AF_LINK &&
3354 SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) {
3355 ro->ro_flags |= ROF_SRCIF_SELECTED;
3356 ro->ro_rt->generation_id = route_generation;
3357 }
3358
b0d623f7
A
3359 if (ro->ro_rt != NULL)
3360 RT_UNLOCK(ro->ro_rt);
3361
c910b4d9
A
3362 return (ifa);
3363}