]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/ip_output.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
CommitLineData
1c79356b 1/*
c910b4d9 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
9bccf70c 61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
1c79356b 62 */
2d21ac55
A
63/*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
1c79356b
A
69
70#define _IP_VHL
71
1c79356b
A
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/kernel.h>
75#include <sys/malloc.h>
76#include <sys/mbuf.h>
77#include <sys/protosw.h>
78#include <sys/socket.h>
79#include <sys/socketvar.h>
91447636
A
80#include <kern/locks.h>
81#include <sys/sysctl.h>
1c79356b 82
b0d623f7
A
83#include <machine/endian.h>
84
1c79356b 85#include <net/if.h>
c910b4d9 86#include <net/if_dl.h>
1c79356b
A
87#include <net/route.h>
88
89#include <netinet/in.h>
90#include <netinet/in_systm.h>
91#include <netinet/ip.h>
1c79356b
A
92#include <netinet/in_pcb.h>
93#include <netinet/in_var.h>
94#include <netinet/ip_var.h>
1c79356b 95
91447636
A
96#include <netinet/kpi_ipfilter_var.h>
97
2d21ac55
A
98#if CONFIG_MACF_NET
99#include <security/mac_framework.h>
100#endif
101
9bccf70c
A
102#include "faith.h"
103
104#include <net/dlil.h>
1c79356b 105#include <sys/kdebug.h>
2d21ac55 106#include <libkern/OSAtomic.h>
1c79356b
A
107
108#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
109#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
110#define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
55e303ae 111#define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
1c79356b 112
8f6c56a5 113#define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
1c79356b 114
1c79356b
A
115#if IPSEC
116#include <netinet6/ipsec.h>
117#include <netkey/key.h>
9bccf70c 118#if IPSEC_DEBUG
1c79356b 119#include <netkey/key_debug.h>
1c79356b 120#else
9bccf70c 121#define KEYDEBUG(lev,arg)
1c79356b 122#endif
9bccf70c 123#endif /*IPSEC*/
1c79356b 124
1c79356b 125#include <netinet/ip_fw.h>
91447636 126#include <netinet/ip_divert.h>
1c79356b
A
127
128#if DUMMYNET
129#include <netinet/ip_dummynet.h>
130#endif
131
b0d623f7
A
132#if PF
133#include <net/pfvar.h>
134#endif /* PF */
135
1c79356b
A
136#if IPFIREWALL_FORWARD_DEBUG
137#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
138 (ntohl(a.s_addr)>>16)&0xFF,\
139 (ntohl(a.s_addr)>>8)&0xFF,\
140 (ntohl(a.s_addr))&0xFF);
141#endif
142
91447636 143
1c79356b
A
144u_short ip_id;
145
91447636
A
146static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
147static struct ifnet *ip_multicast_if(struct in_addr *, int *);
148static void ip_mloopback(struct ifnet *, struct mbuf *,
149 struct sockaddr_in *, int);
150static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
151static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
152static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
9bccf70c 153
2d21ac55 154static void ip_out_cksum_stats(int, u_int32_t);
c910b4d9
A
155static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int);
156static void ip_bindif(struct inpcb *, unsigned int);
2d21ac55 157
55e303ae
A
158int ip_createmoptions(struct ip_moptions **imop);
159int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
160int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
91447636 161int ip_optcopy(struct ip *, struct ip *);
2d21ac55
A
162void in_delayed_cksum_offset(struct mbuf *, int );
163void in_cksum_offset(struct mbuf* , size_t );
164
91447636 165extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
9bccf70c 166
1c79356b
A
167extern struct protosw inetsw[];
168
9bccf70c 169extern struct ip_linklocal_stat ip_linklocal_stat;
91447636 170extern lck_mtx_t *ip_mutex;
9bccf70c
A
171
172/* temporary: for testing */
173#if IPSEC
174extern int ipsec_bypass;
175#endif
176
91447636
A
177static int ip_maxchainsent = 0;
178SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
179 &ip_maxchainsent, 0, "use dlil_output_list");
2d21ac55
A
180#if DEBUG
181static int forge_ce = 0;
182SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW,
183 &forge_ce, 0, "Forge ECN CE");
184#endif /* DEBUG */
c910b4d9
A
185
186static int ip_select_srcif_debug = 0;
187SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW,
188 &ip_select_srcif_debug, 0, "log source interface selection debug info");
189
1c79356b
A
190/*
191 * IP output. The packet in mbuf chain m contains a skeletal IP
192 * header (with len, off, ttl, proto, tos, src, dst).
193 * The mbuf chain containing the packet will be freed.
194 * The mbuf opt, if present, will not be freed.
195 */
196int
91447636
A
197ip_output(
198 struct mbuf *m0,
199 struct mbuf *opt,
200 struct route *ro,
201 int flags,
2d21ac55 202 struct ip_moptions *imo,
c910b4d9 203 struct ip_out_args *ipoa)
91447636
A
204{
205 int error;
c910b4d9 206 error = ip_output_list(m0, 0, opt, ro, flags, imo, ipoa);
91447636
A
207 return error;
208}
209
2d21ac55
A
210/*
211 * Returns: 0 Success
212 * ENOMEM
213 * EADDRNOTAVAIL
214 * ENETUNREACH
215 * EHOSTUNREACH
216 * EACCES
217 * EMSGSIZE
218 * ENOBUFS
219 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
220 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
221 * key_spdacquire:??? [IPSEC]
222 * ipsec4_output:??? [IPSEC]
223 * <fr_checkp>:??? [firewall]
224 * ip_dn_io_ptr:??? [dummynet]
225 * dlil_output:??? [DLIL]
226 * dlil_output_list:??? [DLIL]
227 *
228 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
229 * only used as the error return from this function where one of
230 * these functions fails to return a policy.
231 */
91447636
A
232int
233ip_output_list(
234 struct mbuf *m0,
235 int packetchain,
236 struct mbuf *opt,
237 struct route *ro,
238 int flags,
2d21ac55 239 struct ip_moptions *imo,
c910b4d9 240 struct ip_out_args *ipoa
2d21ac55 241 )
1c79356b 242{
b0d623f7 243 struct ip *ip;
55e303ae 244 struct ifnet *ifp = NULL;
b0d623f7 245 struct mbuf *m = m0, **mppn = NULL;
1c79356b 246 int hlen = sizeof (struct ip);
2d21ac55 247 int len = 0, off, error = 0;
55e303ae 248 struct sockaddr_in *dst = NULL;
b0d623f7 249 struct in_ifaddr *ia = NULL, *src_ia = NULL;
0b4e3aa0 250 int isbroadcast, sw_csum;
91447636 251 struct in_addr pkt_dst;
1c79356b
A
252#if IPSEC
253 struct route iproute;
9bccf70c 254 struct socket *so = NULL;
1c79356b
A
255 struct secpolicy *sp = NULL;
256#endif
257#if IPFIREWALL_FORWARD
258 int fwd_rewrite_src = 0;
259#endif
4a3eedf9 260#if IPFIREWALL
91447636 261 struct ip_fw_args args;
4a3eedf9 262#endif
91447636
A
263 int didfilter = 0;
264 ipfilter_t inject_filter_ref = 0;
265 struct m_tag *tag;
2d21ac55 266 struct route saved_route;
c910b4d9 267 struct ip_out_args saved_ipoa;
91447636 268 struct mbuf * packetlist;
b0d623f7 269 int pktcnt = 0, tso = 0;
c910b4d9
A
270 unsigned int ifscope;
271 boolean_t select_srcif;
1c79356b
A
272
273 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
274
91447636 275 packetlist = m0;
2d21ac55 276#if IPFIREWALL
4a3eedf9 277 args.next_hop = NULL;
91447636
A
278 args.eh = NULL;
279 args.rule = NULL;
91447636 280 args.divert_rule = 0; /* divert cookie */
c910b4d9 281 args.ipoa = NULL;
b0d623f7
A
282
283 if (SLIST_EMPTY(&m0->m_pkthdr.tags))
284 goto ipfw_tags_done;
285
91447636
A
286 /* Grab info from mtags prepended to the chain */
287#if DUMMYNET
b0d623f7
A
288 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
289 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
91447636 290 struct dn_pkt_tag *dn_tag;
b0d623f7 291
91447636
A
292 dn_tag = (struct dn_pkt_tag *)(tag+1);
293 args.rule = dn_tag->rule;
294 opt = NULL;
2d21ac55
A
295 saved_route = dn_tag->ro;
296 ro = &saved_route;
b0d623f7 297
91447636
A
298 imo = NULL;
299 dst = dn_tag->dn_dst;
300 ifp = dn_tag->ifp;
301 flags = dn_tag->flags;
c910b4d9
A
302 saved_ipoa = dn_tag->ipoa;
303 ipoa = &saved_ipoa;
b0d623f7 304
91447636
A
305 m_tag_delete(m0, tag);
306 }
307#endif /* DUMMYNET */
308
2d21ac55 309#if IPDIVERT
b0d623f7
A
310 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
311 KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
91447636 312 struct divert_tag *div_tag;
b0d623f7 313
91447636
A
314 div_tag = (struct divert_tag *)(tag+1);
315 args.divert_rule = div_tag->cookie;
316
317 m_tag_delete(m0, tag);
318 }
2d21ac55 319#endif /* IPDIVERT */
2d21ac55 320
b0d623f7
A
321 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
322 KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
91447636 323 struct ip_fwd_tag *ipfwd_tag;
b0d623f7 324
91447636
A
325 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
326 args.next_hop = ipfwd_tag->next_hop;
b0d623f7 327
91447636
A
328 m_tag_delete(m0, tag);
329 }
b0d623f7 330ipfw_tags_done:
4a3eedf9 331#endif /* IPFIREWALL */
91447636
A
332
333 m = m0;
b0d623f7 334
91447636
A
335#if DIAGNOSTIC
336 if ( !m || (m->m_flags & M_PKTHDR) != 0)
337 panic("ip_output no HDR");
338 if (!ro)
339 panic("ip_output no route, proto = %d",
340 mtod(m, struct ip *)->ip_p);
9bccf70c 341#endif
91447636 342
c910b4d9 343 /*
c910b4d9 344 * At present the IP_OUTARGS flag implies a request for IP to
b0d623f7
A
345 * perform source interface selection. In the forwarding case,
346 * only the ifscope value is used, as source interface selection
347 * doesn't take place.
c910b4d9 348 */
b0d623f7
A
349 if (ip_doscopedroute && (flags & IP_OUTARGS)) {
350 select_srcif = !(flags & IP_FORWARDING);
c910b4d9
A
351 ifscope = ipoa->ipoa_ifscope;
352 } else {
353 select_srcif = FALSE;
354 ifscope = IFSCOPE_NONE;
355 }
356
2d21ac55 357#if IPFIREWALL
91447636 358 if (args.rule != NULL) { /* dummynet already saw us */
b0d623f7
A
359 ip = mtod(m, struct ip *);
360 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
361 if (ro->ro_rt != NULL) {
362 RT_LOCK_SPIN(ro->ro_rt);
363 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
364 if (ia)
365 ifaref(&ia->ia_ifa);
366 RT_UNLOCK(ro->ro_rt);
367 }
91447636 368#if IPSEC
b0d623f7
A
369 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
370 so = ipsec_getsocket(m);
371 (void)ipsec_setsocket(m, NULL);
2d21ac55 372 }
1c79356b 373#endif
b0d623f7 374 goto sendit;
91447636 375 }
2d21ac55 376#endif /* IPFIREWALL */
91447636 377
9bccf70c 378#if IPSEC
55e303ae 379 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
9bccf70c
A
380 so = ipsec_getsocket(m);
381 (void)ipsec_setsocket(m, NULL);
382 }
383#endif
91447636
A
384loopit:
385 /*
386 * No need to proccess packet twice if we've
387 * already seen it
388 */
b0d623f7
A
389 if (!SLIST_EMPTY(&m->m_pkthdr.tags))
390 inject_filter_ref = ipf_get_inject_filter(m);
391 else
392 inject_filter_ref = 0;
1c79356b 393
1c79356b
A
394 if (opt) {
395 m = ip_insertoptions(m, opt, &len);
396 hlen = len;
397 }
398 ip = mtod(m, struct ip *);
4a3eedf9 399#if IPFIREWALL
91447636 400 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
4a3eedf9
A
401#else
402 pkt_dst = ip->ip_dst;
403#endif
91447636 404
1c79356b
A
405 /*
406 * Fill in IP header.
407 */
408 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
409 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
410 ip->ip_off &= IP_DF;
9bccf70c
A
411#if RANDOM_IP_ID
412 ip->ip_id = ip_randomid();
413#else
1c79356b 414 ip->ip_id = htons(ip_id++);
9bccf70c 415#endif
b0d623f7 416 OSAddAtomic(1, &ipstat.ips_localout);
1c79356b
A
417 } else {
418 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
419 }
2d21ac55
A
420
421#if DEBUG
422 /* For debugging, we let the stack forge congestion */
423 if (forge_ce != 0 &&
424 ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 ||
425 (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) {
426 ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE;
427 forge_ce--;
428 }
429#endif /* DEBUG */
1c79356b
A
430
431 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
432 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
2d21ac55 433
1c79356b 434 dst = (struct sockaddr_in *)&ro->ro_dst;
55e303ae 435
1c79356b
A
436 /*
437 * If there is a cached route,
438 * check that it is to the same destination
439 * and is still up. If not, free it and try again.
55e303ae
A
440 * The address family should also be checked in case of sharing the
441 * cache with IPv6.
1c79356b 442 */
55e303ae 443
2d21ac55
A
444 if (ro->ro_rt != NULL) {
445 if (ro->ro_rt->generation_id != route_generation &&
446 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) &&
b0d623f7
A
447 (ip->ip_src.s_addr != INADDR_ANY)) {
448 src_ia = ifa_foraddr(ip->ip_src.s_addr);
449 if (src_ia == NULL) {
450 error = EADDRNOTAVAIL;
451 goto bad;
452 }
453 ifafree(&src_ia->ia_ifa);
91447636 454 }
b0d623f7
A
455 /*
456 * Test rt_flags without holding rt_lock for performance
457 * reasons; if the route is down it will hopefully be
458 * caught by the layer below (since it uses this route
459 * as a hint) or during the next transmit.
460 */
2d21ac55
A
461 if ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
462 dst->sin_family != AF_INET ||
463 dst->sin_addr.s_addr != pkt_dst.s_addr) {
b0d623f7 464 rtfree(ro->ro_rt);
2d21ac55
A
465 ro->ro_rt = NULL;
466 }
c910b4d9
A
467 /*
468 * If we're doing source interface selection, we may not
469 * want to use this route; only synch up the generation
470 * count otherwise.
471 */
472 if (!select_srcif && ro->ro_rt != NULL &&
473 ro->ro_rt->generation_id != route_generation)
2d21ac55 474 ro->ro_rt->generation_id = route_generation;
ab86ba33 475 }
2d21ac55 476 if (ro->ro_rt == NULL) {
55e303ae 477 bzero(dst, sizeof(*dst));
1c79356b
A
478 dst->sin_family = AF_INET;
479 dst->sin_len = sizeof(*dst);
91447636 480 dst->sin_addr = pkt_dst;
1c79356b
A
481 }
482 /*
483 * If routing to interface only,
484 * short circuit routing lookup.
485 */
486#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
487#define sintosa(sin) ((struct sockaddr *)(sin))
488 if (flags & IP_ROUTETOIF) {
91447636
A
489 if (ia)
490 ifafree(&ia->ia_ifa);
491 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
492 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
b0d623f7 493 OSAddAtomic(1, &ipstat.ips_noroute);
91447636
A
494 error = ENETUNREACH;
495 goto bad;
496 }
1c79356b
A
497 }
498 ifp = ia->ia_ifp;
1c79356b
A
499 ip->ip_ttl = 1;
500 isbroadcast = in_broadcast(dst->sin_addr, ifp);
c910b4d9
A
501 } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) &&
502 imo != NULL && imo->imo_multicast_ifp != NULL) {
503 /*
504 * Bypass the normal routing lookup for multicast
505 * packets if the interface is specified.
506 */
507 ifp = imo->imo_multicast_ifp;
508 isbroadcast = 0;
509 if (ia != NULL)
510 ifafree(&ia->ia_ifa);
511
b0d623f7
A
512 /* Macro takes reference on ia */
513 IFP_TO_IA(ifp, ia);
1c79356b 514 } else {
c910b4d9
A
515 boolean_t cloneok = FALSE;
516 /*
517 * Perform source interface selection; the source IP address
518 * must belong to one of the addresses of the interface used
519 * by the route. For performance reasons, do this only if
520 * there is no route, or if the routing table has changed,
521 * or if we haven't done source interface selection on this
522 * route (for this PCB instance) before.
523 */
524 if (select_srcif && ip->ip_src.s_addr != INADDR_ANY &&
b0d623f7 525 (ro->ro_rt == NULL || !(ro->ro_rt->rt_flags & RTF_UP) ||
c910b4d9
A
526 ro->ro_rt->generation_id != route_generation ||
527 !(ro->ro_flags & ROF_SRCIF_SELECTED))) {
528 struct ifaddr *ifa;
2d21ac55 529
c910b4d9
A
530 /* Find the source interface */
531 ifa = in_selectsrcif(ip, ro, ifscope);
532
533 /*
534 * If the source address is spoofed (in the case
535 * of IP_RAWOUTPUT), or if this is destined for
536 * local/loopback, just let it go out using the
537 * interface of the route. Otherwise, there's no
538 * interface having such an address, so bail out.
539 */
540 if (ifa == NULL && !(flags & IP_RAWOUTPUT) &&
541 ifscope != lo_ifp->if_index) {
542 error = EADDRNOTAVAIL;
2d21ac55
A
543 goto bad;
544 }
c910b4d9
A
545
546 /*
547 * If the caller didn't explicitly specify the scope,
548 * pick it up from the source interface. If the cached
549 * route was wrong and was blown away as part of source
550 * interface selection, don't mask out RTF_PRCLONING
551 * since that route may have been allocated by the ULP,
552 * unless the IP header was created by the caller or
553 * the destination is IPv4 LLA. The check for the
554 * latter is needed because IPv4 LLAs are never scoped
555 * in the current implementation, and we don't want to
556 * replace the resolved IPv4 LLA route with one whose
557 * gateway points to that of the default gateway on
558 * the primary interface of the system.
559 */
560 if (ifa != NULL) {
561 if (ifscope == IFSCOPE_NONE)
562 ifscope = ifa->ifa_ifp->if_index;
563 ifafree(ifa);
564 cloneok = (!(flags & IP_RAWOUTPUT) &&
565 !(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))));
566 }
2d21ac55 567 }
c910b4d9 568
1c79356b
A
569 /*
570 * If this is the case, we probably don't want to allocate
571 * a protocol-cloned route since we didn't get one from the
572 * ULP. This lets TCP do its thing, while not burdening
573 * forwarding or ICMP with the overhead of cloning a route.
574 * Of course, we still want to do any cloning requested by
575 * the link layer, as this is probably required in all cases
576 * for correct operation (as it is for ARP).
577 */
c910b4d9 578 if (ro->ro_rt == NULL) {
2d21ac55
A
579 unsigned long ign = RTF_PRCLONING;
580 /*
581 * We make an exception here: if the destination
582 * address is INADDR_BROADCAST, allocate a protocol-
583 * cloned host route so that we end up with a route
584 * marked with the RTF_BROADCAST flag. Otherwise,
585 * we would end up referring to the default route,
586 * instead of creating a cloned host route entry.
587 * That would introduce inconsistencies between ULPs
588 * that allocate a route and those that don't. The
589 * RTF_BROADCAST route is important since we'd want
590 * to send out undirected IP broadcast packets using
c910b4d9
A
591 * link-level broadcast address. Another exception
592 * is for ULP-created routes that got blown away by
593 * source interface selection (see above).
2d21ac55 594 *
c910b4d9 595 * These exceptions will no longer be necessary when
2d21ac55
A
596 * the RTF_PRCLONING scheme is no longer present.
597 */
c910b4d9 598 if (cloneok || dst->sin_addr.s_addr == INADDR_BROADCAST)
2d21ac55
A
599 ign &= ~RTF_PRCLONING;
600
b0d623f7
A
601 /*
602 * Loosen the route lookup criteria if the ifscope
603 * corresponds to the loopback interface; this is
604 * needed to support Application Layer Gateways
605 * listening on loopback, in conjunction with packet
606 * filter redirection rules. The final source IP
607 * address will be rewritten by the packet filter
608 * prior to the RFC1122 loopback check below.
609 */
610 if (ifscope == lo_ifp->if_index)
611 rtalloc_ign(ro, ign);
612 else
613 rtalloc_scoped_ign(ro, ign, ifscope);
2d21ac55 614 }
c910b4d9
A
615
616 if (ro->ro_rt == NULL) {
b0d623f7 617 OSAddAtomic(1, &ipstat.ips_noroute);
1c79356b
A
618 error = EHOSTUNREACH;
619 goto bad;
620 }
c910b4d9 621
91447636
A
622 if (ia)
623 ifafree(&ia->ia_ifa);
b0d623f7 624 RT_LOCK_SPIN(ro->ro_rt);
1c79356b 625 ia = ifatoia(ro->ro_rt->rt_ifa);
91447636
A
626 if (ia)
627 ifaref(&ia->ia_ifa);
1c79356b 628 ifp = ro->ro_rt->rt_ifp;
1c79356b
A
629 ro->ro_rt->rt_use++;
630 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
631 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
7e4a7d39 632 if (ro->ro_rt->rt_flags & RTF_HOST) {
1c79356b 633 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
7e4a7d39
A
634 } else {
635 /* Become a regular mutex */
636 RT_CONVERT_LOCK(ro->ro_rt);
1c79356b 637 isbroadcast = in_broadcast(dst->sin_addr, ifp);
7e4a7d39 638 }
b0d623f7 639 RT_UNLOCK(ro->ro_rt);
1c79356b 640 }
b0d623f7 641
91447636 642 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
1c79356b
A
643 struct in_multi *inm;
644
645 m->m_flags |= M_MCAST;
646 /*
647 * IP destination address is multicast. Make sure "dst"
648 * still points to the address in "ro". (It may have been
649 * changed to point to a gateway address, above.)
650 */
651 dst = (struct sockaddr_in *)&ro->ro_dst;
652 /*
653 * See if the caller provided any multicast options
654 */
655 if (imo != NULL) {
55e303ae 656 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
fa4905b1 657 if (imo->imo_multicast_ifp != NULL) {
1c79356b 658 ifp = imo->imo_multicast_ifp;
fa4905b1 659 }
2d21ac55 660#if MROUTING
55e303ae
A
661 if (imo->imo_multicast_vif != -1 &&
662 ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
1c79356b 663 ip->ip_src.s_addr =
55e303ae 664 ip_mcast_src(imo->imo_multicast_vif);
2d21ac55 665#endif /* MROUTING */
1c79356b 666 } else
55e303ae 667 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
1c79356b
A
668 /*
669 * Confirm that the outgoing interface supports multicast.
670 */
671 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
672 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
b0d623f7 673 OSAddAtomic(1, &ipstat.ips_noroute);
1c79356b
A
674 error = ENETUNREACH;
675 goto bad;
676 }
677 }
678 /*
679 * If source address not specified yet, use address
680 * of outgoing interface.
681 */
682 if (ip->ip_src.s_addr == INADDR_ANY) {
b0d623f7
A
683 struct in_ifaddr *ia1;
684 lck_rw_lock_shared(in_ifaddr_rwlock);
9bccf70c 685 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
1c79356b
A
686 if (ia1->ia_ifp == ifp) {
687 ip->ip_src = IA_SIN(ia1)->sin_addr;
688 break;
689 }
b0d623f7 690 lck_rw_done(in_ifaddr_rwlock);
55e303ae
A
691 if (ip->ip_src.s_addr == INADDR_ANY) {
692 error = ENETUNREACH;
693 goto bad;
694 }
1c79356b
A
695 }
696
91447636
A
697 ifnet_lock_shared(ifp);
698 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
699 ifnet_lock_done(ifp);
1c79356b
A
700 if (inm != NULL &&
701 (imo == NULL || imo->imo_multicast_loop)) {
702 /*
703 * If we belong to the destination multicast group
704 * on the outgoing interface, and the caller did not
705 * forbid loopback, loop back a copy.
706 */
91447636
A
707 if (!TAILQ_EMPTY(&ipv4_filters)) {
708 struct ipfilter *filter;
709 int seen = (inject_filter_ref == 0);
710 struct ipf_pktopts *ippo = 0, ipf_pktopts;
711
712 if (imo) {
713 ippo = &ipf_pktopts;
714 ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
715 ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
716 ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
717 }
718
91447636 719 ipf_ref();
0c530ab8
A
720
721 /* 4135317 - always pass network byte order to filter */
b0d623f7
A
722
723#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
724 HTONS(ip->ip_len);
725 HTONS(ip->ip_off);
b0d623f7
A
726#endif
727
91447636
A
728 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
729 if (seen == 0) {
730 if ((struct ipfilter *)inject_filter_ref == filter)
731 seen = 1;
732 } else if (filter->ipf_filter.ipf_output) {
733 errno_t result;
734 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
735 if (result == EJUSTRETURN) {
736 ipf_unref();
737 goto done;
738 }
739 if (result != 0) {
740 ipf_unref();
91447636
A
741 goto bad;
742 }
743 }
744 }
0c530ab8
A
745
746 /* set back to host byte order */
6601e61a 747 ip = mtod(m, struct ip *);
b0d623f7
A
748
749#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
750 NTOHS(ip->ip_len);
751 NTOHS(ip->ip_off);
b0d623f7
A
752#endif
753
91447636
A
754 ipf_unref();
755 didfilter = 1;
756 }
1c79356b
A
757 ip_mloopback(ifp, m, dst, hlen);
758 }
2d21ac55 759#if MROUTING
1c79356b
A
760 else {
761 /*
762 * If we are acting as a multicast router, perform
763 * multicast forwarding as if the packet had just
764 * arrived on the interface to which we are about
765 * to send. The multicast forwarding function
766 * recursively calls this function, using the
767 * IP_FORWARDING flag to prevent infinite recursion.
768 *
769 * Multicasts that are looped back by ip_mloopback(),
770 * above, will be forwarded by the ip_input() routine,
771 * if necessary.
772 */
773 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
774 /*
775 * Check if rsvp daemon is running. If not, don't
776 * set ip_moptions. This ensures that the packet
777 * is multicast and not just sent down one link
778 * as prescribed by rsvpd.
779 */
780 if (!rsvp_on)
781 imo = NULL;
782 if (ip_mforward(ip, ifp, m, imo) != 0) {
783 m_freem(m);
784 goto done;
785 }
786 }
787 }
2d21ac55 788#endif /* MROUTING */
1c79356b
A
789
790 /*
791 * Multicasts with a time-to-live of zero may be looped-
792 * back, above, but must not be transmitted on a network.
793 * Also, multicasts addressed to the loopback interface
794 * are not sent -- the above call to ip_mloopback() will
795 * loop back a copy if this host actually belongs to the
796 * destination group on the loopback interface.
797 */
798 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
799 m_freem(m);
800 goto done;
801 }
802
803 goto sendit;
804 }
805#ifndef notdef
806 /*
807 * If source address not specified yet, use address
808 * of outgoing interface.
809 */
810 if (ip->ip_src.s_addr == INADDR_ANY) {
811 ip->ip_src = IA_SIN(ia)->sin_addr;
812#if IPFIREWALL_FORWARD
813 /* Keep note that we did this - if the firewall changes
814 * the next-hop, our interface may change, changing the
815 * default source IP. It's a shame so much effort happens
816 * twice. Oh well.
817 */
818 fwd_rewrite_src++;
819#endif /* IPFIREWALL_FORWARD */
820 }
821#endif /* notdef */
1c79356b
A
822
823 /*
824 * Look for broadcast address and
825 * and verify user is allowed to send
826 * such a packet.
827 */
828 if (isbroadcast) {
829 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
830 error = EADDRNOTAVAIL;
831 goto bad;
832 }
833 if ((flags & IP_ALLOWBROADCAST) == 0) {
834 error = EACCES;
835 goto bad;
836 }
837 /* don't allow broadcast messages to be fragmented */
838 if ((u_short)ip->ip_len > ifp->if_mtu) {
839 error = EMSGSIZE;
840 goto bad;
841 }
842 m->m_flags |= M_BCAST;
843 } else {
844 m->m_flags &= ~M_BCAST;
845 }
846
847sendit:
b0d623f7
A
848#if PF
849 /* Invoke outbound packet filter */
850 if (pf_af_hook(ifp, mppn, &m, AF_INET, FALSE) != 0) {
851 if (packetlist == m0) {
852 packetlist = m;
853 mppn = NULL;
854 }
855 if (m != NULL) {
856 m0 = m;
857 /* Next packet in the chain */
858 goto loopit;
859 } else if (packetlist != NULL) {
860 /* No more packet; send down the chain */
861 goto sendchain;
862 }
863 /* Nothing left; we're done */
864 goto done;
865 }
866 m0 = m;
867 ip = mtod(m, struct ip *);
868 pkt_dst = ip->ip_dst;
869 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
870#endif /* PF */
9bccf70c
A
871 /*
872 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
873 */
874 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
875 ip_linklocal_stat.iplls_out_total++;
876 if (ip->ip_ttl != MAXTTL) {
877 ip_linklocal_stat.iplls_out_badttl++;
878 ip->ip_ttl = MAXTTL;
879 }
880 }
881
91447636
A
882 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
883 struct ipfilter *filter;
884 int seen = (inject_filter_ref == 0);
b0d623f7
A
885
886 /* Check that a TSO frame isn't passed to a filter.
887 * This could happen if a filter is inserted while
888 * TCP is sending the TSO packet.
889 */
890 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
891 error = EMSGSIZE;
892 goto bad;
893 }
894
91447636 895 ipf_ref();
0c530ab8
A
896
897 /* 4135317 - always pass network byte order to filter */
b0d623f7
A
898
899#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
900 HTONS(ip->ip_len);
901 HTONS(ip->ip_off);
b0d623f7
A
902#endif
903
91447636
A
904 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
905 if (seen == 0) {
906 if ((struct ipfilter *)inject_filter_ref == filter)
907 seen = 1;
908 } else if (filter->ipf_filter.ipf_output) {
909 errno_t result;
910 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
911 if (result == EJUSTRETURN) {
912 ipf_unref();
913 goto done;
914 }
915 if (result != 0) {
916 ipf_unref();
91447636
A
917 goto bad;
918 }
919 }
920 }
0c530ab8
A
921
922 /* set back to host byte order */
6601e61a 923 ip = mtod(m, struct ip *);
b0d623f7
A
924
925#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
926 NTOHS(ip->ip_len);
927 NTOHS(ip->ip_off);
b0d623f7
A
928#endif
929
91447636 930 ipf_unref();
91447636
A
931 }
932
9bccf70c
A
933#if IPSEC
934 /* temporary for testing only: bypass ipsec alltogether */
935
55e303ae 936 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
9bccf70c
A
937 goto skip_ipsec;
938
55e303ae
A
939 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
940
91447636 941
9bccf70c
A
942 /* get SP for this packet */
943 if (so == NULL)
944 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
945 else
946 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
947
948 if (sp == NULL) {
2d21ac55 949 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
55e303ae 950 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
9bccf70c
A
951 goto bad;
952 }
953
954 error = 0;
955
956 /* check policy */
957 switch (sp->policy) {
958 case IPSEC_POLICY_DISCARD:
2d21ac55 959 case IPSEC_POLICY_GENERATE:
9bccf70c
A
960 /*
961 * This packet is just discarded.
962 */
2d21ac55 963 IPSEC_STAT_INCREMENT(ipsecstat.out_polvio);
55e303ae 964 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
9bccf70c
A
965 goto bad;
966
967 case IPSEC_POLICY_BYPASS:
968 case IPSEC_POLICY_NONE:
969 /* no need to do IPsec. */
55e303ae 970 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
9bccf70c
A
971 goto skip_ipsec;
972
973 case IPSEC_POLICY_IPSEC:
974 if (sp->req == NULL) {
975 /* acquire a policy */
976 error = key_spdacquire(sp);
55e303ae 977 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
9bccf70c
A
978 goto bad;
979 }
980 break;
981
982 case IPSEC_POLICY_ENTRUST:
983 default:
984 printf("ip_output: Invalid policy found. %d\n", sp->policy);
985 }
986 {
987 struct ipsec_output_state state;
988 bzero(&state, sizeof(state));
989 state.m = m;
990 if (flags & IP_ROUTETOIF) {
991 state.ro = &iproute;
992 bzero(&iproute, sizeof(iproute));
993 } else
994 state.ro = ro;
995 state.dst = (struct sockaddr *)dst;
996
997 ip->ip_sum = 0;
998
999 /*
1000 * XXX
1001 * delayed checksums are not currently compatible with IPsec
1002 */
1003 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1004 in_delayed_cksum(m);
1005 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1006 }
1007
b0d623f7
A
1008
1009#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1010 HTONS(ip->ip_len);
1011 HTONS(ip->ip_off);
b0d623f7 1012#endif
9bccf70c
A
1013
1014 error = ipsec4_output(&state, sp, flags);
91447636 1015
55e303ae
A
1016 m0 = m = state.m;
1017
9bccf70c
A
1018 if (flags & IP_ROUTETOIF) {
1019 /*
1020 * if we have tunnel mode SA, we may need to ignore
1021 * IP_ROUTETOIF.
1022 */
1023 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
1024 flags &= ~IP_ROUTETOIF;
1025 ro = state.ro;
1026 }
1027 } else
1028 ro = state.ro;
55e303ae 1029
9bccf70c
A
1030 dst = (struct sockaddr_in *)state.dst;
1031 if (error) {
1032 /* mbuf is already reclaimed in ipsec4_output. */
1033 m0 = NULL;
1034 switch (error) {
1035 case EHOSTUNREACH:
1036 case ENETUNREACH:
1037 case EMSGSIZE:
1038 case ENOBUFS:
1039 case ENOMEM:
1040 break;
1041 default:
1042 printf("ip4_output (ipsec): error code %d\n", error);
1043 /*fall through*/
1044 case ENOENT:
1045 /* don't show these error codes to the user */
1046 error = 0;
1047 break;
1048 }
55e303ae 1049 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
9bccf70c
A
1050 goto bad;
1051 }
1052 }
1053
1054 /* be sure to update variables that are affected by ipsec4_output() */
1055 ip = mtod(m, struct ip *);
55e303ae 1056
9bccf70c
A
1057#ifdef _IP_VHL
1058 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1059#else
1060 hlen = ip->ip_hl << 2;
1061#endif
55e303ae 1062 /* Check that there wasn't a route change and src is still valid */
b0d623f7
A
1063 if (ro->ro_rt != NULL && ro->ro_rt->generation_id != route_generation) {
1064 if ((src_ia = ifa_foraddr(ip->ip_src.s_addr)) == NULL &&
1065 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
1066 error = EADDRNOTAVAIL;
1067 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1068 5,0,0,0,0);
55e303ae
A
1069 goto bad;
1070 }
b0d623f7 1071 rtfree(ro->ro_rt);
55e303ae 1072 ro->ro_rt = NULL;
b0d623f7
A
1073 if (src_ia != NULL)
1074 ifafree(&src_ia->ia_ifa);
55e303ae
A
1075 }
1076
9bccf70c
A
1077 if (ro->ro_rt == NULL) {
1078 if ((flags & IP_ROUTETOIF) == 0) {
b0d623f7
A
1079 printf("ip_output: can't update route after "
1080 "IPsec processing\n");
1081 error = EHOSTUNREACH; /*XXX*/
1082 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1083 6,0,0,0,0);
9bccf70c
A
1084 goto bad;
1085 }
1086 } else {
91447636
A
1087 if (ia)
1088 ifafree(&ia->ia_ifa);
b0d623f7 1089 RT_LOCK_SPIN(ro->ro_rt);
9bccf70c 1090 ia = ifatoia(ro->ro_rt->rt_ifa);
91447636
A
1091 if (ia)
1092 ifaref(&ia->ia_ifa);
9bccf70c 1093 ifp = ro->ro_rt->rt_ifp;
b0d623f7 1094 RT_UNLOCK(ro->ro_rt);
9bccf70c
A
1095 }
1096
1097 /* make it flipped, again. */
b0d623f7
A
1098
1099#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1100 NTOHS(ip->ip_len);
1101 NTOHS(ip->ip_off);
b0d623f7
A
1102#endif
1103
55e303ae 1104 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
91447636
A
1105
1106 /* Pass to filters again */
1107 if (!TAILQ_EMPTY(&ipv4_filters)) {
1108 struct ipfilter *filter;
1109
b0d623f7
A
1110 /* Check that a TSO frame isn't passed to a filter.
1111 * This could happen if a filter is inserted while
1112 * TCP is sending the TSO packet.
1113 */
1114 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1115 error = EMSGSIZE;
1116 goto bad;
1117 }
1118
91447636 1119 ipf_ref();
0c530ab8
A
1120
1121 /* 4135317 - always pass network byte order to filter */
b0d623f7
A
1122
1123#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1124 HTONS(ip->ip_len);
1125 HTONS(ip->ip_off);
b0d623f7
A
1126#endif
1127
91447636
A
1128 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
1129 if (filter->ipf_filter.ipf_output) {
1130 errno_t result;
1131 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
1132 if (result == EJUSTRETURN) {
1133 ipf_unref();
1134 goto done;
1135 }
1136 if (result != 0) {
1137 ipf_unref();
91447636
A
1138 goto bad;
1139 }
1140 }
1141 }
0c530ab8
A
1142
1143 /* set back to host byte order */
6601e61a 1144 ip = mtod(m, struct ip *);
b0d623f7
A
1145
1146#if BYTE_ORDER != BIG_ENDIAN
0c530ab8
A
1147 NTOHS(ip->ip_len);
1148 NTOHS(ip->ip_off);
b0d623f7
A
1149#endif
1150
91447636 1151 ipf_unref();
91447636 1152 }
9bccf70c
A
1153skip_ipsec:
1154#endif /*IPSEC*/
1155
2d21ac55 1156#if IPFIREWALL
1c79356b
A
1157 /*
1158 * IpHack's section.
1159 * - Xlate: translate packet's addr/port (NAT).
1160 * - Firewall: deny/allow/etc.
1161 * - Wrap: fake packet's addr/port <unimpl.>
1162 * - Encapsulate: put it in another IP and send out. <unimp.>
1163 */
9bccf70c
A
1164 if (fr_checkp) {
1165 struct mbuf *m1 = m;
1166
91447636 1167 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
9bccf70c 1168 goto done;
91447636 1169 }
55e303ae 1170 ip = mtod(m0 = m = m1, struct ip *);
1c79356b
A
1171 }
1172
1173 /*
1174 * Check with the firewall...
91447636 1175 * but not if we are already being fwd'd from a firewall.
1c79356b 1176 */
91447636 1177 if (fw_enable && IPFW_LOADED && !args.next_hop) {
1c79356b
A
1178 struct sockaddr_in *old = dst;
1179
91447636
A
1180 args.m = m;
1181 args.next_hop = dst;
1182 args.oif = ifp;
1183 off = ip_fw_chk_ptr(&args);
1184 m = args.m;
1185 dst = args.next_hop;
1186
1c79356b
A
1187 /*
1188 * On return we must do the following:
9bccf70c 1189 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1c79356b 1190 * 1<=off<= 0xffff -> DIVERT
9bccf70c
A
1191 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1192 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1c79356b
A
1193 * dst != old -> IPFIREWALL_FORWARD
1194 * off==0, dst==old -> accept
1195 * If some of the above modules is not compiled in, then
1196 * we should't have to check the corresponding condition
1197 * (because the ipfw control socket should not accept
1198 * unsupported rules), but better play safe and drop
1199 * packets in case of doubt.
1200 */
55e303ae 1201 m0 = m;
9bccf70c
A
1202 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
1203 if (m)
1204 m_freem(m);
1205 error = EACCES ;
1206 goto done ;
1c79356b 1207 }
9bccf70c 1208 ip = mtod(m, struct ip *);
2d21ac55 1209
3a60a9f5 1210 if (off == 0 && dst == old) {/* common case */
1c79356b 1211 goto pass ;
3a60a9f5 1212 }
1c79356b 1213#if DUMMYNET
91447636 1214 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
c910b4d9
A
1215 /*
1216 * pass the pkt to dummynet. Need to include
1217 * pipe number, m, ifp, ro, dst because these are
1218 * not recomputed in the next pass.
1219 * All other parameters have been already used and
1220 * so they are not needed anymore.
1221 * XXX note: if the ifp or ro entry are deleted
1222 * while a pkt is in dummynet, we are in trouble!
1223 */
1224 args.ro = ro;
1225 args.dst = dst;
1226 args.flags = flags;
1227 if (flags & IP_OUTARGS)
1228 args.ipoa = ipoa;
1229
1230 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
1231 &args);
1232 goto done;
1c79356b 1233 }
91447636 1234#endif /* DUMMYNET */
1c79356b 1235#if IPDIVERT
9bccf70c
A
1236 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
1237 struct mbuf *clone = NULL;
1238
1239 /* Clone packet if we're doing a 'tee' */
1240 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
1241 clone = m_dup(m, M_DONTWAIT);
1242 /*
1243 * XXX
1244 * delayed checksums are not currently compatible
1245 * with divert sockets.
1246 */
1247 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1248 in_delayed_cksum(m);
1249 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1250 }
1251
1252 /* Restore packet header fields to original values */
b0d623f7
A
1253
1254#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1255 HTONS(ip->ip_len);
1256 HTONS(ip->ip_off);
b0d623f7 1257#endif
9bccf70c
A
1258
1259 /* Deliver packet to divert input routine */
91447636 1260 divert_packet(m, 0, off & 0xffff, args.divert_rule);
9bccf70c
A
1261
1262 /* If 'tee', continue with original packet */
1263 if (clone != NULL) {
55e303ae 1264 m0 = m = clone;
9bccf70c
A
1265 ip = mtod(m, struct ip *);
1266 goto pass;
1267 }
1c79356b
A
1268 goto done;
1269 }
1270#endif
1271
1272#if IPFIREWALL_FORWARD
1273 /* Here we check dst to make sure it's directly reachable on the
1274 * interface we previously thought it was.
1275 * If it isn't (which may be likely in some situations) we have
1276 * to re-route it (ie, find a route for the next-hop and the
1277 * associated interface) and set them here. This is nested
1278 * forwarding which in most cases is undesirable, except where
1279 * such control is nigh impossible. So we do it here.
1280 * And I'm babbling.
1281 */
1282 if (off == 0 && old != dst) {
91447636 1283 struct in_ifaddr *ia_fw;
1c79356b
A
1284
1285 /* It's changed... */
1286 /* There must be a better way to do this next line... */
1287 static struct route sro_fwd, *ro_fwd = &sro_fwd;
1288#if IPFIREWALL_FORWARD_DEBUG
1289 printf("IPFIREWALL_FORWARD: New dst ip: ");
1290 print_ip(dst->sin_addr);
1291 printf("\n");
1292#endif
1293 /*
1294 * We need to figure out if we have been forwarded
1295 * to a local socket. If so then we should somehow
1296 * "loop back" to ip_input, and get directed to the
1297 * PCB as if we had received this packet. This is
1298 * because it may be dificult to identify the packets
1299 * you want to forward until they are being output
1300 * and have selected an interface. (e.g. locally
1301 * initiated packets) If we used the loopback inteface,
1302 * we would not be able to control what happens
1303 * as the packet runs through ip_input() as
1304 * it is done through a ISR.
1305 */
b0d623f7 1306 lck_rw_lock_shared(in_ifaddr_rwlock);
91447636 1307 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1c79356b
A
1308 /*
1309 * If the addr to forward to is one
1310 * of ours, we pretend to
1311 * be the destination for this packet.
1312 */
91447636 1313 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
1c79356b
A
1314 dst->sin_addr.s_addr)
1315 break;
1316 }
b0d623f7
A
1317 lck_rw_done(in_ifaddr_rwlock);
1318 if (ia_fw) {
1c79356b 1319 /* tell ip_input "dont filter" */
91447636
A
1320 struct m_tag *fwd_tag;
1321 struct ip_fwd_tag *ipfwd_tag;
1322
b0d623f7
A
1323 fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID,
1324 KERNEL_TAG_TYPE_IPFORWARD,
1325 sizeof (*ipfwd_tag), M_NOWAIT);
91447636
A
1326 if (fwd_tag == NULL) {
1327 error = ENOBUFS;
1328 goto bad;
1329 }
1330
1331 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
1332 ipfwd_tag->next_hop = args.next_hop;
1333
1334 m_tag_prepend(m, fwd_tag);
1335
1c79356b
A
1336 if (m->m_pkthdr.rcvif == NULL)
1337 m->m_pkthdr.rcvif = ifunit("lo0");
91447636
A
1338 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1339 m->m_pkthdr.csum_flags) == 0) {
1340 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1341 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1342 m->m_pkthdr.csum_flags |=
1343 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1344 m->m_pkthdr.csum_data = 0xffff;
1345 }
9bccf70c 1346 m->m_pkthdr.csum_flags |=
91447636
A
1347 CSUM_IP_CHECKED | CSUM_IP_VALID;
1348 }
1349 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1350 in_delayed_cksum(m);
1351 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1352 ip->ip_sum = in_cksum(m, hlen);
9bccf70c 1353 }
b0d623f7
A
1354
1355#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1356 HTONS(ip->ip_len);
1357 HTONS(ip->ip_off);
b0d623f7 1358#endif
91447636
A
1359
1360 /* we need to call dlil_output to run filters
1361 * and resync to avoid recursion loops.
1362 */
1363 if (lo_ifp) {
1364 dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
1365 }
1366 else {
1367 printf("ip_output: no loopback ifp for forwarding!!!\n");
1368 }
1c79356b
A
1369 goto done;
1370 }
1371 /* Some of the logic for this was
1372 * nicked from above.
1373 *
1374 * This rewrites the cached route in a local PCB.
1375 * Is this what we want to do?
1376 */
1377 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1378
b0d623f7
A
1379 ro_fwd->ro_rt = NULL;
1380 rtalloc_ign(ro_fwd, RTF_PRCLONING);
1c79356b 1381
b0d623f7
A
1382 if (ro_fwd->ro_rt == NULL) {
1383 OSAddAtomic(1, &ipstat.ips_noroute);
1c79356b
A
1384 error = EHOSTUNREACH;
1385 goto bad;
1386 }
1387
b0d623f7 1388 RT_LOCK_SPIN(ro_fwd->ro_rt);
91447636 1389 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
b0d623f7
A
1390 if (ia_fw != NULL)
1391 ifaref(&ia_fw->ia_ifa);
1c79356b 1392 ifp = ro_fwd->ro_rt->rt_ifp;
1c79356b
A
1393 ro_fwd->ro_rt->rt_use++;
1394 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
1395 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
7e4a7d39 1396 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) {
1c79356b
A
1397 isbroadcast =
1398 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
7e4a7d39
A
1399 } else {
1400 /* Become a regular mutex */
1401 RT_CONVERT_LOCK(ro_fwd->ro_rt);
1c79356b 1402 isbroadcast = in_broadcast(dst->sin_addr, ifp);
7e4a7d39 1403 }
b0d623f7
A
1404 RT_UNLOCK(ro_fwd->ro_rt);
1405 rtfree(ro->ro_rt);
1c79356b
A
1406 ro->ro_rt = ro_fwd->ro_rt;
1407 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
1408
1409 /*
1410 * If we added a default src ip earlier,
1411 * which would have been gotten from the-then
1412 * interface, do it again, from the new one.
1413 */
b0d623f7
A
1414 if (ia_fw != NULL) {
1415 if (fwd_rewrite_src)
1416 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
1417 ifafree(&ia_fw->ia_ifa);
1418 }
1c79356b
A
1419 goto pass ;
1420 }
1421#endif /* IPFIREWALL_FORWARD */
1422 /*
1423 * if we get here, none of the above matches, and
1424 * we have to drop the pkt
1425 */
1426 m_freem(m);
91447636 1427 error = EACCES; /* not sure this is the right error msg */
91447636 1428 goto done;
1c79356b 1429 }
2d21ac55 1430#endif /* IPFIREWALL */
1c79356b
A
1431
1432pass:
e5568f75
A
1433#if __APPLE__
1434 /* Do not allow loopback address to wind up on a wire */
1435 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1436 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1437 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
b0d623f7 1438 OSAddAtomic(1, &ipstat.ips_badaddr);
e5568f75 1439 m_freem(m);
91447636
A
1440 /*
1441 * Do not simply drop the packet just like a firewall -- we want the
1442 * the application to feel the pain.
1443 * Return ENETUNREACH like ip6_output does in some similar cases.
1444 * This can startle the otherwise clueless process that specifies
e5568f75
A
1445 * loopback as the source address.
1446 */
91447636 1447 error = ENETUNREACH;
e5568f75
A
1448 goto done;
1449 }
1450#endif
9bccf70c 1451 m->m_pkthdr.csum_flags |= CSUM_IP;
b0d623f7
A
1452 tso = (ifp->if_hwassist & IFNET_TSO_IPV4) && (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4);
1453
4a249263
A
1454 sw_csum = m->m_pkthdr.csum_flags
1455 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1c79356b 1456
9bccf70c 1457 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1c79356b 1458 /*
9bccf70c
A
1459 * Special case code for GMACE
1460 * frames that can be checksumed by GMACE SUM16 HW:
1461 * frame >64, no fragments, no UDP
1c79356b 1462 */
9bccf70c
A
1463 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1464 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1465 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1466 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1467 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
1468 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
1469 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1470 m->m_pkthdr.csum_data += offset;
1471 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1c79356b 1472 }
9bccf70c
A
1473 else {
1474 /* let the software handle any UDP or TCP checksums */
1475 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1c79356b 1476 }
2d21ac55
A
1477 } else if (apple_hwcksum_tx == 0) {
1478 sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
1479 m->m_pkthdr.csum_flags;
1c79356b 1480 }
9bccf70c
A
1481
1482 if (sw_csum & CSUM_DELAY_DATA) {
1483 in_delayed_cksum(m);
1484 sw_csum &= ~CSUM_DELAY_DATA;
1485 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
0b4e3aa0 1486 }
2d21ac55
A
1487
1488 if (apple_hwcksum_tx != 0) {
1489 m->m_pkthdr.csum_flags &=
1490 IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1491 } else {
1492 m->m_pkthdr.csum_flags = 0;
1493 }
0b4e3aa0 1494
1c79356b 1495 /*
9bccf70c 1496 * If small enough for interface, or the interface will take
0b4e3aa0 1497 * care of the fragmentation for us, can just send directly.
1c79356b 1498 */
b0d623f7 1499 if ((u_short)ip->ip_len <= ifp->if_mtu || tso ||
9bccf70c 1500 ifp->if_hwassist & CSUM_FRAGMENT) {
b0d623f7
A
1501 if (tso)
1502 m->m_pkthdr.csum_flags |= CSUM_TSO_IPV4;
1503
2d21ac55 1504
b0d623f7 1505#if BYTE_ORDER != BIG_ENDIAN
9bccf70c
A
1506 HTONS(ip->ip_len);
1507 HTONS(ip->ip_off);
b0d623f7
A
1508#endif
1509
1c79356b 1510 ip->ip_sum = 0;
9bccf70c 1511 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1512 ip->ip_sum = in_cksum(m, hlen);
9bccf70c
A
1513 }
1514
1515#ifndef __APPLE__
1516 /* Record statistics for this interface address. */
1517 if (!(flags & IP_FORWARDING) && ia != NULL) {
1518 ia->ia_ifa.if_opackets++;
1519 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1520 }
1521#endif
1522
1523#if IPSEC
1524 /* clean ipsec history once it goes out of the node */
55e303ae 1525 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
9bccf70c
A
1526 ipsec_delaux(m);
1527#endif
91447636 1528 if (packetchain == 0) {
b0d623f7 1529 error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
2d21ac55 1530 (struct sockaddr *)dst);
2d21ac55 1531 goto done;
91447636
A
1532 }
1533 else { /* packet chaining allows us to reuse the route for all packets */
b0d623f7 1534 mppn = &m->m_nextpkt;
91447636
A
1535 m = m->m_nextpkt;
1536 if (m == NULL) {
b0d623f7
A
1537#if PF
1538sendchain:
1539#endif /* PF */
91447636
A
1540 if (pktcnt > ip_maxchainsent)
1541 ip_maxchainsent = pktcnt;
1542 //send
2d21ac55 1543 error = ifnet_output(ifp, PF_INET, packetlist,
b0d623f7 1544 ro->ro_rt, (struct sockaddr *)dst);
91447636
A
1545 pktcnt = 0;
1546 goto done;
1547
1548 }
1549 m0 = m;
1550 pktcnt++;
1551 goto loopit;
1552 }
1c79356b
A
1553 }
1554 /*
1555 * Too large for interface; fragment if possible.
1556 * Must be able to put at least 8 bytes per fragment.
1557 */
b0d623f7
A
1558
1559 if (ip->ip_off & IP_DF || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
1c79356b
A
1560 error = EMSGSIZE;
1561 /*
1562 * This case can happen if the user changed the MTU
b0d623f7 1563 *
1c79356b
A
1564 * of an interface after enabling IP on it. Because
1565 * most netifs don't keep track of routes pointing to
1566 * them, there is no way for one to update all its
1567 * routes when the MTU is changed.
1568 */
b0d623f7 1569 RT_LOCK_SPIN(ro->ro_rt);
8f6c56a5 1570 if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1c79356b
A
1571 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1572 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1573 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1574 }
b0d623f7
A
1575 RT_UNLOCK(ro->ro_rt);
1576 OSAddAtomic(1, &ipstat.ips_cantfrag);
1c79356b
A
1577 goto bad;
1578 }
b0d623f7
A
1579
1580 error = ip_fragment(m, ifp, ifp->if_mtu, sw_csum);
1581 if (error != 0) {
1582 m0 = m = NULL;
1c79356b
A
1583 goto bad;
1584 }
1585
b0d623f7
A
1586 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1587 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1588
1589 for (m = m0; m; m = m0) {
1590 m0 = m->m_nextpkt;
1591 m->m_nextpkt = 0;
1592#if IPSEC
1593 /* clean ipsec history once it goes out of the node */
1594 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1595 ipsec_delaux(m);
1596#endif
1597 if (error == 0) {
1598#ifndef __APPLE__
1599 /* Record statistics for this interface address. */
1600 if (ia != NULL) {
1601 ia->ia_ifa.if_opackets++;
1602 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1603 }
1604#endif
1605 if ((packetchain != 0) && (pktcnt > 0))
1606 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
1607 error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
1608 (struct sockaddr *)dst);
1609 } else
1610 m_freem(m);
1611 }
1612
1613 if (error == 0)
1614 OSAddAtomic(1, &ipstat.ips_fragmented);
1615
1616done:
1617 if (ia) {
1618 ifafree(&ia->ia_ifa);
1619 ia = NULL;
1620 }
1621#if IPSEC
1622 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
1623 if (ro == &iproute && ro->ro_rt) {
1624 rtfree(ro->ro_rt);
1625 ro->ro_rt = NULL;
1626 }
1627 if (sp != NULL) {
1628 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1629 printf("DP ip_output call free SP:%x\n", sp));
1630 key_freesp(sp, KEY_SADB_UNLOCKED);
1631 }
1632 }
1633#endif /* IPSEC */
1634
1635 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1636 return (error);
1637bad:
1638 m_freem(m0);
1639 goto done;
1640}
1641
1642int
1643ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum)
1644{
1645 struct ip *ip, *mhip;
1646 int len, hlen, mhlen, firstlen, off, error = 0;
1647 struct mbuf **mnext = &m->m_nextpkt, *m0;
1648 int nfrags = 1;
1649
1650 ip = mtod(m, struct ip *);
1651#ifdef _IP_VHL
1652 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1653#else
1654 hlen = ip->ip_hl << 2;
1655#endif
1656
1657 firstlen = len = (mtu - hlen) &~ 7;
1658 if (len < 8) {
1659 m_freem(m);
1660 return (EMSGSIZE);
1661 }
1662
9bccf70c
A
1663 /*
1664 * if the interface will not calculate checksums on
1665 * fragmented packets, then do it here.
1666 */
1667 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1668 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1669 in_delayed_cksum(m);
9bccf70c
A
1670 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1671 }
0b4e3aa0 1672
1c79356b
A
1673 /*
1674 * Loop through length of segment after first fragment,
1675 * make new header and copy data of each part and link onto chain.
1676 */
1677 m0 = m;
1678 mhlen = sizeof (struct ip);
1679 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
2d21ac55 1680 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1c79356b
A
1681 if (m == 0) {
1682 error = ENOBUFS;
b0d623f7 1683 OSAddAtomic(1, &ipstat.ips_odropped);
1c79356b
A
1684 goto sendorfree;
1685 }
0b4e3aa0 1686 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1c79356b
A
1687 m->m_data += max_linkhdr;
1688 mhip = mtod(m, struct ip *);
1689 *mhip = *ip;
1690 if (hlen > sizeof (struct ip)) {
1691 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1692 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1693 }
1694 m->m_len = mhlen;
1695 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1696 if (ip->ip_off & IP_MF)
1697 mhip->ip_off |= IP_MF;
1698 if (off + len >= (u_short)ip->ip_len)
1699 len = (u_short)ip->ip_len - off;
1700 else
1701 mhip->ip_off |= IP_MF;
1702 mhip->ip_len = htons((u_short)(len + mhlen));
1703 m->m_next = m_copy(m0, off, len);
1704 if (m->m_next == 0) {
1705 (void) m_free(m);
1706 error = ENOBUFS; /* ??? */
b0d623f7 1707 OSAddAtomic(1, &ipstat.ips_odropped);
1c79356b
A
1708 goto sendorfree;
1709 }
1710 m->m_pkthdr.len = mhlen + len;
91447636 1711 m->m_pkthdr.rcvif = 0;
9bccf70c 1712 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
91447636 1713 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
2d21ac55
A
1714#if CONFIG_MACF_NET
1715 mac_netinet_fragment(m0, m);
1716#endif
b0d623f7
A
1717
1718#if BYTE_ORDER != BIG_ENDIAN
9bccf70c 1719 HTONS(mhip->ip_off);
b0d623f7
A
1720#endif
1721
1c79356b 1722 mhip->ip_sum = 0;
9bccf70c 1723 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1724 mhip->ip_sum = in_cksum(m, mhlen);
9bccf70c 1725 }
1c79356b
A
1726 *mnext = m;
1727 mnext = &m->m_nextpkt;
0b4e3aa0 1728 nfrags++;
1c79356b 1729 }
b0d623f7 1730 OSAddAtomic(nfrags, &ipstat.ips_ofragments);
0b4e3aa0
A
1731
1732 /* set first/last markers for fragment chain */
9bccf70c
A
1733 m->m_flags |= M_LASTFRAG;
1734 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
0b4e3aa0
A
1735 m0->m_pkthdr.csum_data = nfrags;
1736
1c79356b
A
1737 /*
1738 * Update first fragment by trimming what's been copied out
1739 * and updating header, then send each fragment (in order).
1740 */
1741 m = m0;
1742 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1743 m->m_pkthdr.len = hlen + firstlen;
1744 ip->ip_len = htons((u_short)m->m_pkthdr.len);
9bccf70c 1745 ip->ip_off |= IP_MF;
b0d623f7
A
1746
1747#if BYTE_ORDER != BIG_ENDIAN
9bccf70c 1748 HTONS(ip->ip_off);
b0d623f7
A
1749#endif
1750
1c79356b 1751 ip->ip_sum = 0;
9bccf70c 1752 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1753 ip->ip_sum = in_cksum(m, hlen);
9bccf70c 1754 }
1c79356b 1755sendorfree:
b0d623f7
A
1756 if (error)
1757 m_freem_list(m0);
1c79356b 1758
1c79356b 1759 return (error);
1c79356b
A
1760}
1761
2d21ac55
A
1762static void
1763ip_out_cksum_stats(int proto, u_int32_t len)
1764{
1765 switch (proto) {
1766 case IPPROTO_TCP:
1767 tcp_out_cksum_stats(len);
1768 break;
1769 case IPPROTO_UDP:
1770 udp_out_cksum_stats(len);
1771 break;
1772 default:
1773 /* keep only TCP or UDP stats for now */
1774 break;
1775 }
1776}
1777
0b4e3aa0 1778void
8f6c56a5 1779in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
0b4e3aa0 1780{
9bccf70c 1781 struct ip *ip;
8f6c56a5
A
1782 unsigned char buf[sizeof(struct ip)];
1783 u_short csum, offset, ip_len;
1784 struct mbuf *m = m0;
91447636 1785
8f6c56a5 1786 while (ip_offset >= m->m_len) {
91447636
A
1787 ip_offset -= m->m_len;
1788 m = m->m_next;
8f6c56a5 1789 if (m == NULL) {
2d21ac55 1790 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
91447636
A
1791 return;
1792 }
1793 }
1794
8f6c56a5 1795 /* Sometimes the IP header is not contiguous, yes this can happen! */
91447636 1796 if (ip_offset + sizeof(struct ip) > m->m_len) {
8f6c56a5 1797#if DEBUG
b0d623f7 1798 printf("delayed m_pullup, m->len: %d off: %d\n",
743b1565 1799 m->m_len, ip_offset);
8f6c56a5 1800#endif
2d21ac55 1801 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
8f6c56a5
A
1802
1803 ip = (struct ip *)buf;
1804 } else {
1805 ip = (struct ip*)(m->m_data + ip_offset);
91447636
A
1806 }
1807
1808 /* Gross */
1809 if (ip_offset) {
1810 m->m_len -= ip_offset;
1811 m->m_data += ip_offset;
1812 }
1813
9bccf70c 1814 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
8f6c56a5
A
1815
1816 /*
1817 * We could be in the context of an IP or interface filter; in the
1818 * former case, ip_len would be in host (correct) order while for
1819 * the latter it would be in network order. Because of this, we
1820 * attempt to interpret the length field by comparing it against
1821 * the actual packet length. If the comparison fails, byte swap
1822 * the length and check again. If it still fails, then the packet
1823 * is bogus and we give up.
1824 */
1825 ip_len = ip->ip_len;
1826 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1827 ip_len = SWAP16(ip_len);
1828 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1829 printf("in_delayed_cksum_offset: ip_len %d (%d) "
1830 "doesn't match actual length %d\n", ip->ip_len,
1831 ip_len, (m0->m_pkthdr.len - ip_offset));
1832 return;
1833 }
1834 }
1835
1836 csum = in_cksum_skip(m, ip_len, offset);
1837
2d21ac55
A
1838 /* Update stats */
1839 ip_out_cksum_stats(ip->ip_p, ip_len - offset);
1840
8f6c56a5 1841 if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
0b4e3aa0 1842 csum = 0xffff;
8f6c56a5
A
1843 offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1844
91447636
A
1845 /* Gross */
1846 if (ip_offset) {
1847 if (M_LEADINGSPACE(m) < ip_offset)
8f6c56a5 1848 panic("in_delayed_cksum_offset - chain modified!\n");
91447636
A
1849 m->m_len += ip_offset;
1850 m->m_data -= ip_offset;
1851 }
0b4e3aa0 1852
8f6c56a5 1853 if (offset > ip_len) /* bogus offset */
0b4e3aa0
A
1854 return;
1855
8f6c56a5 1856 /* Insert the checksum in the existing chain */
91447636 1857 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
8f6c56a5
A
1858 char tmp[2];
1859
1860#if DEBUG
b0d623f7 1861 printf("delayed m_copyback, m->len: %d off: %d p: %d\n",
91447636 1862 m->m_len, offset + ip_offset, ip->ip_p);
8f6c56a5
A
1863#endif
1864 *(u_short *)tmp = csum;
1865 m_copyback(m, offset + ip_offset, 2, tmp);
1866 } else
1867 *(u_short *)(m->m_data + offset + ip_offset) = csum;
91447636
A
1868}
1869
1870void
1871in_delayed_cksum(struct mbuf *m)
1872{
1873 in_delayed_cksum_offset(m, 0);
1874}
1875
1876void
1877in_cksum_offset(struct mbuf* m, size_t ip_offset)
1878{
1879 struct ip* ip = NULL;
1880 int hlen = 0;
8f6c56a5
A
1881 unsigned char buf[sizeof(struct ip)];
1882 int swapped = 0;
91447636 1883
8f6c56a5 1884 while (ip_offset >= m->m_len) {
91447636
A
1885 ip_offset -= m->m_len;
1886 m = m->m_next;
8f6c56a5 1887 if (m == NULL) {
91447636
A
1888 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1889 return;
1890 }
1891 }
1892
8f6c56a5 1893 /* Sometimes the IP header is not contiguous, yes this can happen! */
91447636 1894 if (ip_offset + sizeof(struct ip) > m->m_len) {
8f6c56a5
A
1895
1896#if DEBUG
b0d623f7 1897 printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %lu\n",
91447636 1898 m->m_len, ip_offset);
8f6c56a5 1899#endif
2d21ac55 1900 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
8f6c56a5
A
1901
1902 ip = (struct ip *)buf;
1903 ip->ip_sum = 0;
1904 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum);
1905 } else {
1906 ip = (struct ip*)(m->m_data + ip_offset);
1907 ip->ip_sum = 0;
91447636
A
1908 }
1909
1910 /* Gross */
1911 if (ip_offset) {
1912 m->m_len -= ip_offset;
1913 m->m_data += ip_offset;
1914 }
1915
91447636
A
1916#ifdef _IP_VHL
1917 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1918#else
1919 hlen = ip->ip_hl << 2;
1920#endif
8f6c56a5
A
1921 /*
1922 * We could be in the context of an IP or interface filter; in the
1923 * former case, ip_len would be in host order while for the latter
1924 * it would be in network (correct) order. Because of this, we
1925 * attempt to interpret the length field by comparing it against
1926 * the actual packet length. If the comparison fails, byte swap
1927 * the length and check again. If it still fails, then the packet
1928 * is bogus and we give up.
1929 */
1930 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1931 ip->ip_len = SWAP16(ip->ip_len);
1932 swapped = 1;
1933 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1934 ip->ip_len = SWAP16(ip->ip_len);
1935 printf("in_cksum_offset: ip_len %d (%d) "
2d21ac55 1936 "doesn't match actual length %lu\n",
8f6c56a5
A
1937 ip->ip_len, SWAP16(ip->ip_len),
1938 (m->m_pkthdr.len - ip_offset));
1939 return;
1940 }
1941 }
1942
91447636
A
1943 ip->ip_sum = 0;
1944 ip->ip_sum = in_cksum(m, hlen);
8f6c56a5
A
1945 if (swapped)
1946 ip->ip_len = SWAP16(ip->ip_len);
1947
91447636
A
1948 /* Gross */
1949 if (ip_offset) {
1950 if (M_LEADINGSPACE(m) < ip_offset)
1951 panic("in_cksum_offset - chain modified!\n");
1952 m->m_len += ip_offset;
1953 m->m_data -= ip_offset;
9bccf70c 1954 }
8f6c56a5
A
1955
1956 /* Insert the checksum in the existing chain if IP header not contiguous */
1957 if (ip_offset + sizeof(struct ip) > m->m_len) {
1958 char tmp[2];
1959
1960#if DEBUG
b0d623f7 1961 printf("in_cksum_offset m_copyback, m->len: %u off: %lu p: %d\n",
8f6c56a5
A
1962 m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
1963#endif
1964 *(u_short *)tmp = ip->ip_sum;
1965 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
1966 }
0b4e3aa0
A
1967}
1968
1c79356b
A
1969/*
1970 * Insert IP options into preformed packet.
1971 * Adjust IP destination as required for IP source routing,
1972 * as indicated by a non-zero in_addr at the start of the options.
1973 *
1974 * XXX This routine assumes that the packet has no options in place.
1975 */
1976static struct mbuf *
1977ip_insertoptions(m, opt, phlen)
1978 register struct mbuf *m;
1979 struct mbuf *opt;
1980 int *phlen;
1981{
1982 register struct ipoption *p = mtod(opt, struct ipoption *);
1983 struct mbuf *n;
1984 register struct ip *ip = mtod(m, struct ip *);
1985 unsigned optlen;
1986
1987 optlen = opt->m_len - sizeof(p->ipopt_dst);
1988 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1989 return (m); /* XXX should fail */
1990 if (p->ipopt_dst.s_addr)
1991 ip->ip_dst = p->ipopt_dst;
1992 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
2d21ac55 1993 MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1c79356b
A
1994 if (n == 0)
1995 return (m);
91447636 1996 n->m_pkthdr.rcvif = 0;
2d21ac55
A
1997#if CONFIG_MACF_NET
1998 mac_mbuf_label_copy(m, n);
1999#endif
1c79356b
A
2000 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
2001 m->m_len -= sizeof(struct ip);
2002 m->m_data += sizeof(struct ip);
2003 n->m_next = m;
2004 m = n;
2005 m->m_len = optlen + sizeof(struct ip);
2006 m->m_data += max_linkhdr;
2007 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
2008 } else {
2009 m->m_data -= optlen;
2010 m->m_len += optlen;
2011 m->m_pkthdr.len += optlen;
2012 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
2013 }
2014 ip = mtod(m, struct ip *);
2015 bcopy(p->ipopt_list, ip + 1, optlen);
2016 *phlen = sizeof(struct ip) + optlen;
2017 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
2018 ip->ip_len += optlen;
2019 return (m);
2020}
2021
2022/*
2023 * Copy options from ip to jp,
2024 * omitting those not copied during fragmentation.
2025 */
1c79356b
A
2026int
2027ip_optcopy(ip, jp)
2028 struct ip *ip, *jp;
2029{
2030 register u_char *cp, *dp;
2031 int opt, optlen, cnt;
2032
2033 cp = (u_char *)(ip + 1);
2034 dp = (u_char *)(jp + 1);
2035 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
2036 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2037 opt = cp[0];
2038 if (opt == IPOPT_EOL)
2039 break;
2040 if (opt == IPOPT_NOP) {
2041 /* Preserve for IP mcast tunnel's LSRR alignment. */
2042 *dp++ = IPOPT_NOP;
2043 optlen = 1;
2044 continue;
9bccf70c
A
2045 }
2046#if DIAGNOSTIC
2047 if (cnt < IPOPT_OLEN + sizeof(*cp))
2048 panic("malformed IPv4 option passed to ip_optcopy");
2049#endif
2050 optlen = cp[IPOPT_OLEN];
2051#if DIAGNOSTIC
2052 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2053 panic("malformed IPv4 option passed to ip_optcopy");
2054#endif
1c79356b
A
2055 /* bogus lengths should have been caught by ip_dooptions */
2056 if (optlen > cnt)
2057 optlen = cnt;
2058 if (IPOPT_COPIED(opt)) {
2059 bcopy(cp, dp, optlen);
2060 dp += optlen;
2061 }
2062 }
2063 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
2064 *dp++ = IPOPT_EOL;
2065 return (optlen);
2066}
2067
2068/*
2069 * IP socket option processing.
2070 */
2071int
2072ip_ctloutput(so, sopt)
2073 struct socket *so;
2074 struct sockopt *sopt;
2075{
2076 struct inpcb *inp = sotoinpcb(so);
2077 int error, optval;
2078
2079 error = optval = 0;
2080 if (sopt->sopt_level != IPPROTO_IP) {
2081 return (EINVAL);
2082 }
2083
2084 switch (sopt->sopt_dir) {
2085 case SOPT_SET:
2086 switch (sopt->sopt_name) {
2087 case IP_OPTIONS:
2088#ifdef notyet
2089 case IP_RETOPTS:
2090#endif
2091 {
2092 struct mbuf *m;
2093 if (sopt->sopt_valsize > MLEN) {
2094 error = EMSGSIZE;
2095 break;
2096 }
b0d623f7
A
2097 MGET(m, sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT,
2098 MT_HEADER);
1c79356b
A
2099 if (m == 0) {
2100 error = ENOBUFS;
2101 break;
2102 }
2103 m->m_len = sopt->sopt_valsize;
2104 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
2105 m->m_len);
2106 if (error)
2107 break;
2108
2109 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
2110 m));
2111 }
2112
2113 case IP_TOS:
2114 case IP_TTL:
2115 case IP_RECVOPTS:
2116 case IP_RECVRETOPTS:
2117 case IP_RECVDSTADDR:
2118 case IP_RECVIF:
55e303ae 2119 case IP_RECVTTL:
9bccf70c 2120#if defined(NFAITH) && NFAITH > 0
1c79356b 2121 case IP_FAITH:
9bccf70c 2122#endif
1c79356b
A
2123 error = sooptcopyin(sopt, &optval, sizeof optval,
2124 sizeof optval);
2125 if (error)
2126 break;
2127
2128 switch (sopt->sopt_name) {
2129 case IP_TOS:
2130 inp->inp_ip_tos = optval;
2131 break;
2132
2133 case IP_TTL:
2134 inp->inp_ip_ttl = optval;
2135 break;
2136#define OPTSET(bit) \
2137 if (optval) \
2138 inp->inp_flags |= bit; \
2139 else \
2140 inp->inp_flags &= ~bit;
2141
2142 case IP_RECVOPTS:
2143 OPTSET(INP_RECVOPTS);
2144 break;
2145
2146 case IP_RECVRETOPTS:
2147 OPTSET(INP_RECVRETOPTS);
2148 break;
2149
2150 case IP_RECVDSTADDR:
2151 OPTSET(INP_RECVDSTADDR);
2152 break;
2153
2154 case IP_RECVIF:
2155 OPTSET(INP_RECVIF);
2156 break;
2157
55e303ae
A
2158 case IP_RECVTTL:
2159 OPTSET(INP_RECVTTL);
2160 break;
2161
9bccf70c 2162#if defined(NFAITH) && NFAITH > 0
1c79356b
A
2163 case IP_FAITH:
2164 OPTSET(INP_FAITH);
2165 break;
9bccf70c 2166#endif
1c79356b
A
2167 }
2168 break;
2169#undef OPTSET
2170
c910b4d9
A
2171#if CONFIG_FORCE_OUT_IFP
2172 /*
2173 * Apple private interface, similar to IP_BOUND_IF, except
2174 * that the parameter is a NULL-terminated string containing
2175 * the name of the network interface; an emptry string means
2176 * unbind. Applications are encouraged to use IP_BOUND_IF
2177 * instead, as that is the current "official" API.
2178 */
2d21ac55 2179 case IP_FORCE_OUT_IFP: {
c910b4d9
A
2180 char ifname[IFNAMSIZ];
2181 unsigned int ifscope;
2182
2183 /* This option is settable only for IPv4 */
2184 if (!(inp->inp_vflag & INP_IPV4)) {
2185 error = EINVAL;
2186 break;
2187 }
2188
2d21ac55
A
2189 /* Verify interface name parameter is sane */
2190 if (sopt->sopt_valsize > sizeof(ifname)) {
2191 error = EINVAL;
2192 break;
2193 }
c910b4d9 2194
2d21ac55
A
2195 /* Copy the interface name */
2196 if (sopt->sopt_valsize != 0) {
c910b4d9
A
2197 error = sooptcopyin(sopt, ifname,
2198 sizeof (ifname), sopt->sopt_valsize);
2d21ac55
A
2199 if (error)
2200 break;
2201 }
c910b4d9
A
2202
2203 if (sopt->sopt_valsize == 0 || ifname[0] == NULL) {
2204 /* Unbind this socket from any interface */
2205 ifscope = IFSCOPE_NONE;
2206 } else {
2207 ifnet_t ifp;
2208
2209 /* Verify name is NULL terminated */
2210 if (ifname[sopt->sopt_valsize - 1] != NULL) {
2211 error = EINVAL;
2212 break;
2d21ac55 2213 }
c910b4d9
A
2214
2215 /* Bail out if given bogus interface name */
2216 if (ifnet_find_by_name(ifname, &ifp) != 0) {
2217 error = ENXIO;
2218 break;
2219 }
2220
2221 /* Bind this socket to this interface */
2222 ifscope = ifp->if_index;
2223
2224 /*
2225 * Won't actually free; since we don't release
2226 * this later, we should do it now.
2227 */
2228 ifnet_release(ifp);
2d21ac55 2229 }
c910b4d9 2230 ip_bindif(inp, ifscope);
2d21ac55
A
2231 }
2232 break;
2233#endif
1c79356b
A
2234 case IP_MULTICAST_IF:
2235 case IP_MULTICAST_VIF:
2236 case IP_MULTICAST_TTL:
2237 case IP_MULTICAST_LOOP:
2238 case IP_ADD_MEMBERSHIP:
2239 case IP_DROP_MEMBERSHIP:
2240 error = ip_setmoptions(sopt, &inp->inp_moptions);
2241 break;
2242
2243 case IP_PORTRANGE:
2244 error = sooptcopyin(sopt, &optval, sizeof optval,
2245 sizeof optval);
2246 if (error)
2247 break;
2248
2249 switch (optval) {
2250 case IP_PORTRANGE_DEFAULT:
2251 inp->inp_flags &= ~(INP_LOWPORT);
2252 inp->inp_flags &= ~(INP_HIGHPORT);
2253 break;
2254
2255 case IP_PORTRANGE_HIGH:
2256 inp->inp_flags &= ~(INP_LOWPORT);
2257 inp->inp_flags |= INP_HIGHPORT;
2258 break;
2259
2260 case IP_PORTRANGE_LOW:
2261 inp->inp_flags &= ~(INP_HIGHPORT);
2262 inp->inp_flags |= INP_LOWPORT;
2263 break;
2264
2265 default:
2266 error = EINVAL;
2267 break;
2268 }
2269 break;
2270
2271#if IPSEC
2272 case IP_IPSEC_POLICY:
2273 {
2274 caddr_t req = NULL;
2275 size_t len = 0;
2276 int priv;
2277 struct mbuf *m;
2278 int optname;
2279
91447636
A
2280 if (sopt->sopt_valsize > MCLBYTES) {
2281 error = EMSGSIZE;
2282 break;
2283 }
9bccf70c 2284 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1c79356b 2285 break;
9bccf70c 2286 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1c79356b 2287 break;
b0d623f7 2288 priv = (proc_suser(sopt->sopt_p) == 0);
1c79356b
A
2289 if (m) {
2290 req = mtod(m, caddr_t);
2291 len = m->m_len;
2292 }
2293 optname = sopt->sopt_name;
2294 error = ipsec4_set_policy(inp, optname, req, len, priv);
2295 m_freem(m);
2296 break;
2297 }
2298#endif /*IPSEC*/
2299
2d21ac55
A
2300#if TRAFFIC_MGT
2301 case IP_TRAFFIC_MGT_BACKGROUND:
2302 {
2303 unsigned background = 0;
2304 error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background));
2305 if (error)
2306 break;
2307
d1ecb069
A
2308 if (background) {
2309 socket_set_traffic_mgt_flags(so,
2310 TRAFFIC_MGT_SO_BACKGROUND |
2311 TRAFFIC_MGT_SO_BG_REGULATE);
2312 } else {
2313 socket_clear_traffic_mgt_flags(so,
2314 TRAFFIC_MGT_SO_BACKGROUND |
2315 TRAFFIC_MGT_SO_BG_REGULATE);
2316 }
2d21ac55
A
2317
2318 break;
2319 }
2320#endif /* TRAFFIC_MGT */
2321
c910b4d9
A
2322 /*
2323 * On a multihomed system, scoped routing can be used to
2324 * restrict the source interface used for sending packets.
2325 * The socket option IP_BOUND_IF binds a particular AF_INET
2326 * socket to an interface such that data sent on the socket
2327 * is restricted to that interface. This is unlike the
2328 * SO_DONTROUTE option where the routing table is bypassed;
2329 * therefore it allows for a greater flexibility and control
2330 * over the system behavior, and does not place any restriction
2331 * on the destination address type (e.g. unicast, multicast,
2332 * or broadcast if applicable) or whether or not the host is
2333 * directly reachable. Note that in the multicast transmit
2334 * case, IP_MULTICAST_IF takes precedence over IP_BOUND_IF,
2335 * since the former practically bypasses the routing table;
2336 * in this case, IP_BOUND_IF sets the default interface used
2337 * for sending multicast packets in the absence of an explicit
2338 * transmit interface set via IP_MULTICAST_IF.
2339 */
2340 case IP_BOUND_IF:
2341 /* This option is settable only for IPv4 */
2342 if (!(inp->inp_vflag & INP_IPV4)) {
2343 error = EINVAL;
2344 break;
2345 }
2346
2347 error = sooptcopyin(sopt, &optval, sizeof (optval),
2348 sizeof (optval));
2349
2350 if (error)
2351 break;
2352
2353 ip_bindif(inp, optval);
2354 break;
2355
1c79356b
A
2356 default:
2357 error = ENOPROTOOPT;
2358 break;
2359 }
2360 break;
2361
2362 case SOPT_GET:
2363 switch (sopt->sopt_name) {
2364 case IP_OPTIONS:
2365 case IP_RETOPTS:
2366 if (inp->inp_options)
2367 error = sooptcopyout(sopt,
2368 mtod(inp->inp_options,
2369 char *),
2370 inp->inp_options->m_len);
2371 else
2372 sopt->sopt_valsize = 0;
2373 break;
2374
2375 case IP_TOS:
2376 case IP_TTL:
2377 case IP_RECVOPTS:
2378 case IP_RECVRETOPTS:
2379 case IP_RECVDSTADDR:
2380 case IP_RECVIF:
55e303ae 2381 case IP_RECVTTL:
1c79356b 2382 case IP_PORTRANGE:
9bccf70c 2383#if defined(NFAITH) && NFAITH > 0
1c79356b 2384 case IP_FAITH:
9bccf70c 2385#endif
1c79356b
A
2386 switch (sopt->sopt_name) {
2387
2388 case IP_TOS:
2389 optval = inp->inp_ip_tos;
2390 break;
2391
2392 case IP_TTL:
2393 optval = inp->inp_ip_ttl;
2394 break;
2395
2396#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2397
2398 case IP_RECVOPTS:
2399 optval = OPTBIT(INP_RECVOPTS);
2400 break;
2401
2402 case IP_RECVRETOPTS:
2403 optval = OPTBIT(INP_RECVRETOPTS);
2404 break;
2405
2406 case IP_RECVDSTADDR:
2407 optval = OPTBIT(INP_RECVDSTADDR);
2408 break;
2409
2410 case IP_RECVIF:
2411 optval = OPTBIT(INP_RECVIF);
2412 break;
2413
55e303ae
A
2414 case IP_RECVTTL:
2415 optval = OPTBIT(INP_RECVTTL);
2416 break;
2417
1c79356b
A
2418 case IP_PORTRANGE:
2419 if (inp->inp_flags & INP_HIGHPORT)
2420 optval = IP_PORTRANGE_HIGH;
2421 else if (inp->inp_flags & INP_LOWPORT)
2422 optval = IP_PORTRANGE_LOW;
2423 else
2424 optval = 0;
2425 break;
2426
9bccf70c 2427#if defined(NFAITH) && NFAITH > 0
1c79356b
A
2428 case IP_FAITH:
2429 optval = OPTBIT(INP_FAITH);
2430 break;
9bccf70c 2431#endif
1c79356b
A
2432 }
2433 error = sooptcopyout(sopt, &optval, sizeof optval);
2434 break;
2435
2436 case IP_MULTICAST_IF:
2437 case IP_MULTICAST_VIF:
2438 case IP_MULTICAST_TTL:
2439 case IP_MULTICAST_LOOP:
2440 case IP_ADD_MEMBERSHIP:
2441 case IP_DROP_MEMBERSHIP:
2442 error = ip_getmoptions(sopt, inp->inp_moptions);
2443 break;
2444
2445#if IPSEC
2446 case IP_IPSEC_POLICY:
2447 {
2448 struct mbuf *m = NULL;
1c79356b 2449 caddr_t req = NULL;
9bccf70c 2450 size_t len = 0;
1c79356b 2451
9bccf70c 2452 if (m != 0) {
1c79356b
A
2453 req = mtod(m, caddr_t);
2454 len = m->m_len;
2455 }
1c79356b
A
2456 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
2457 if (error == 0)
9bccf70c 2458 error = soopt_mcopyout(sopt, m); /* XXX */
1c79356b
A
2459 if (error == 0)
2460 m_freem(m);
2461 break;
2462 }
2463#endif /*IPSEC*/
2464
2d21ac55
A
2465#if TRAFFIC_MGT
2466 case IP_TRAFFIC_MGT_BACKGROUND:
2467 {
2468 unsigned background = so->so_traffic_mgt_flags;
2469 return (sooptcopyout(sopt, &background, sizeof(background)));
2470 break;
2471 }
2472#endif /* TRAFFIC_MGT */
2473
c910b4d9
A
2474 case IP_BOUND_IF:
2475 if (inp->inp_flags & INP_BOUND_IF)
2476 optval = inp->inp_boundif;
2477 error = sooptcopyout(sopt, &optval, sizeof (optval));
2478 break;
2479
1c79356b
A
2480 default:
2481 error = ENOPROTOOPT;
2482 break;
2483 }
2484 break;
2485 }
2486 return (error);
2487}
2488
2489/*
2490 * Set up IP options in pcb for insertion in output packets.
2491 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2492 * with destination address if source routed.
2493 */
2494static int
2d21ac55
A
2495ip_pcbopts(
2496 __unused int optname,
2497 struct mbuf **pcbopt,
2498 register struct mbuf *m)
1c79356b
A
2499{
2500 register int cnt, optlen;
2501 register u_char *cp;
2502 u_char opt;
2503
2504 /* turn off any old options */
2505 if (*pcbopt)
2506 (void)m_free(*pcbopt);
2507 *pcbopt = 0;
2508 if (m == (struct mbuf *)0 || m->m_len == 0) {
2509 /*
2510 * Only turning off any previous options.
2511 */
2512 if (m)
2513 (void)m_free(m);
2514 return (0);
2515 }
2516
2517#ifndef vax
2518 if (m->m_len % sizeof(int32_t))
2519 goto bad;
2520#endif
2521 /*
2522 * IP first-hop destination address will be stored before
2523 * actual options; move other options back
2524 * and clear it when none present.
2525 */
2526 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
2527 goto bad;
2528 cnt = m->m_len;
2529 m->m_len += sizeof(struct in_addr);
2530 cp = mtod(m, u_char *) + sizeof(struct in_addr);
2531 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
2532 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
2533
2534 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2535 opt = cp[IPOPT_OPTVAL];
2536 if (opt == IPOPT_EOL)
2537 break;
2538 if (opt == IPOPT_NOP)
2539 optlen = 1;
2540 else {
2541 if (cnt < IPOPT_OLEN + sizeof(*cp))
2542 goto bad;
2543 optlen = cp[IPOPT_OLEN];
2544 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2545 goto bad;
2546 }
2547 switch (opt) {
2548
2549 default:
2550 break;
2551
2552 case IPOPT_LSRR:
2553 case IPOPT_SSRR:
2554 /*
2555 * user process specifies route as:
2556 * ->A->B->C->D
2557 * D must be our final destination (but we can't
2558 * check that since we may not have connected yet).
2559 * A is first hop destination, which doesn't appear in
2560 * actual IP option, but is stored before the options.
2561 */
2562 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
2563 goto bad;
2564 m->m_len -= sizeof(struct in_addr);
2565 cnt -= sizeof(struct in_addr);
2566 optlen -= sizeof(struct in_addr);
2567 cp[IPOPT_OLEN] = optlen;
2568 /*
2569 * Move first hop before start of options.
2570 */
2571 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
2572 sizeof(struct in_addr));
2573 /*
2574 * Then copy rest of options back
2575 * to close up the deleted entry.
2576 */
2577 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
2578 sizeof(struct in_addr)),
2579 (caddr_t)&cp[IPOPT_OFFSET+1],
2580 (unsigned)cnt + sizeof(struct in_addr));
2581 break;
2582 }
2583 }
2584 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
2585 goto bad;
2586 *pcbopt = m;
2587 return (0);
2588
2589bad:
2590 (void)m_free(m);
2591 return (EINVAL);
2592}
2593
2594/*
2595 * XXX
2596 * The whole multicast option thing needs to be re-thought.
2597 * Several of these options are equally applicable to non-multicast
2598 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
2599 * standard option (IP_TTL).
2600 */
9bccf70c
A
2601
2602/*
2603 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2604 */
2605static struct ifnet *
2606ip_multicast_if(a, ifindexp)
2607 struct in_addr *a;
2608 int *ifindexp;
2609{
2610 int ifindex;
2611 struct ifnet *ifp;
2612
2613 if (ifindexp)
2614 *ifindexp = 0;
2615 if (ntohl(a->s_addr) >> 24 == 0) {
2616 ifindex = ntohl(a->s_addr) & 0xffffff;
91447636
A
2617 ifnet_head_lock_shared();
2618 if (ifindex < 0 || if_index < ifindex) {
2619 ifnet_head_done();
9bccf70c 2620 return NULL;
91447636 2621 }
9bccf70c 2622 ifp = ifindex2ifnet[ifindex];
91447636 2623 ifnet_head_done();
9bccf70c
A
2624 if (ifindexp)
2625 *ifindexp = ifindex;
2626 } else {
2627 INADDR_TO_IFP(*a, ifp);
2628 }
2629 return ifp;
2630}
2631
1c79356b
A
2632/*
2633 * Set the IP multicast options in response to user setsockopt().
2634 */
2635static int
2636ip_setmoptions(sopt, imop)
2637 struct sockopt *sopt;
2638 struct ip_moptions **imop;
2639{
2640 int error = 0;
1c79356b
A
2641 struct in_addr addr;
2642 struct ip_mreq mreq;
9bccf70c 2643 struct ifnet *ifp = NULL;
1c79356b 2644 struct ip_moptions *imo = *imop;
9bccf70c 2645 int ifindex;
1c79356b
A
2646
2647 if (imo == NULL) {
2648 /*
2649 * No multicast option buffer attached to the pcb;
2650 * allocate one and initialize to default values.
2651 */
55e303ae
A
2652 error = ip_createmoptions(imop);
2653 if (error != 0)
2654 return error;
2655 imo = *imop;
1c79356b
A
2656 }
2657
2658 switch (sopt->sopt_name) {
2659 /* store an index number for the vif you wanna use in the send */
2d21ac55 2660#if MROUTING
b7266188
A
2661 case IP_MULTICAST_VIF:
2662 {
2663 int i;
2664 if (legal_vif_num == 0) {
2665 error = EOPNOTSUPP;
2666 break;
2667 }
2668 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2669 if (error)
2670 break;
2671 if (!legal_vif_num(i) && (i != -1)) {
2672 error = EINVAL;
2673 break;
2674 }
2675 imo->imo_multicast_vif = i;
1c79356b
A
2676 break;
2677 }
2d21ac55 2678#endif /* MROUTING */
1c79356b
A
2679
2680 case IP_MULTICAST_IF:
2681 /*
2682 * Select the interface for outgoing multicast packets.
2683 */
2684 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
2685 if (error)
2686 break;
2687 /*
2688 * INADDR_ANY is used to remove a previous selection.
2689 * When no interface is selected, a default one is
2690 * chosen every time a multicast packet is sent.
2691 */
2692 if (addr.s_addr == INADDR_ANY) {
2693 imo->imo_multicast_ifp = NULL;
2694 break;
2695 }
2696 /*
2697 * The selected interface is identified by its local
2698 * IP address. Find the interface and confirm that
2699 * it supports multicasting.
2700 */
9bccf70c 2701 ifp = ip_multicast_if(&addr, &ifindex);
1c79356b 2702 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1c79356b
A
2703 error = EADDRNOTAVAIL;
2704 break;
2705 }
2706 imo->imo_multicast_ifp = ifp;
9bccf70c
A
2707 if (ifindex)
2708 imo->imo_multicast_addr = addr;
2709 else
2710 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1c79356b
A
2711 break;
2712
2713 case IP_MULTICAST_TTL:
2714 /*
2715 * Set the IP time-to-live for outgoing multicast packets.
2716 * The original multicast API required a char argument,
2717 * which is inconsistent with the rest of the socket API.
2718 * We allow either a char or an int.
2719 */
2720 if (sopt->sopt_valsize == 1) {
2721 u_char ttl;
2722 error = sooptcopyin(sopt, &ttl, 1, 1);
2723 if (error)
2724 break;
2725 imo->imo_multicast_ttl = ttl;
2726 } else {
2727 u_int ttl;
2728 error = sooptcopyin(sopt, &ttl, sizeof ttl,
2729 sizeof ttl);
2730 if (error)
2731 break;
2732 if (ttl > 255)
2733 error = EINVAL;
2734 else
2735 imo->imo_multicast_ttl = ttl;
2736 }
2737 break;
2738
2739 case IP_MULTICAST_LOOP:
2740 /*
2741 * Set the loopback flag for outgoing multicast packets.
2742 * Must be zero or one. The original multicast API required a
2743 * char argument, which is inconsistent with the rest
2744 * of the socket API. We allow either a char or an int.
2745 */
2746 if (sopt->sopt_valsize == 1) {
2747 u_char loop;
2748 error = sooptcopyin(sopt, &loop, 1, 1);
2749 if (error)
2750 break;
2751 imo->imo_multicast_loop = !!loop;
2752 } else {
2753 u_int loop;
2754 error = sooptcopyin(sopt, &loop, sizeof loop,
2755 sizeof loop);
2756 if (error)
2757 break;
2758 imo->imo_multicast_loop = !!loop;
2759 }
2760 break;
2761
2762 case IP_ADD_MEMBERSHIP:
2763 /*
2764 * Add a multicast group membership.
2765 * Group must be a valid IP multicast address.
2766 */
2767 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2768 if (error)
2769 break;
9bccf70c 2770
55e303ae 2771 error = ip_addmembership(imo, &mreq);
1c79356b
A
2772 break;
2773
2774 case IP_DROP_MEMBERSHIP:
2775 /*
2776 * Drop a multicast group membership.
2777 * Group must be a valid IP multicast address.
2778 */
2779 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2780 if (error)
2781 break;
55e303ae
A
2782
2783 error = ip_dropmembership(imo, &mreq);
1c79356b
A
2784 break;
2785
2786 default:
2787 error = EOPNOTSUPP;
2788 break;
2789 }
2790
2791 /*
2792 * If all options have default values, no need to keep the mbuf.
2793 */
2794 if (imo->imo_multicast_ifp == NULL &&
b0d623f7 2795 imo->imo_multicast_vif == (u_int32_t)-1 &&
1c79356b
A
2796 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2797 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2798 imo->imo_num_memberships == 0) {
2799 FREE(*imop, M_IPMOPTS);
2800 *imop = NULL;
2801 }
2802
2803 return (error);
2804}
2805
55e303ae
A
2806/*
2807 * Set the IP multicast options in response to user setsockopt().
2808 */
2809__private_extern__ int
2810ip_createmoptions(
2811 struct ip_moptions **imop)
2812{
2813 struct ip_moptions *imo;
2814 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
2815 M_WAITOK);
2816
2817 if (imo == NULL)
2818 return (ENOBUFS);
2819 *imop = imo;
2820 imo->imo_multicast_ifp = NULL;
2821 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2822 imo->imo_multicast_vif = -1;
2823 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2824 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2825 imo->imo_num_memberships = 0;
2826
2827 return 0;
2828}
2829
2830/*
2831 * Add membership to an IPv4 multicast.
2832 */
2833__private_extern__ int
2834ip_addmembership(
2835 struct ip_moptions *imo,
2836 struct ip_mreq *mreq)
2837{
2838 struct route ro;
2839 struct sockaddr_in *dst;
2840 struct ifnet *ifp = NULL;
2841 int error = 0;
55e303ae 2842 int i;
b0d623f7
A
2843
2844 bzero((caddr_t)&ro, sizeof(ro));
2845
55e303ae
A
2846 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2847 error = EINVAL;
b0d623f7 2848 goto done;
55e303ae 2849 }
55e303ae
A
2850 /*
2851 * If no interface address was provided, use the interface of
2852 * the route to the given multicast address.
2853 */
2854 if (mreq->imr_interface.s_addr == INADDR_ANY) {
55e303ae
A
2855 dst = (struct sockaddr_in *)&ro.ro_dst;
2856 dst->sin_len = sizeof(*dst);
2857 dst->sin_family = AF_INET;
2858 dst->sin_addr = mreq->imr_multiaddr;
b0d623f7 2859 rtalloc_ign(&ro, 0);
55e303ae
A
2860 if (ro.ro_rt != NULL) {
2861 ifp = ro.ro_rt->rt_ifp;
b0d623f7 2862 } else {
55e303ae 2863 /* If there's no default route, try using loopback */
b0d623f7 2864 mreq->imr_interface.s_addr = htonl(INADDR_LOOPBACK);
55e303ae
A
2865 }
2866 }
b0d623f7 2867
55e303ae
A
2868 if (ifp == NULL) {
2869 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2870 }
2871
2872 /*
2873 * See if we found an interface, and confirm that it
2874 * supports multicast.
2875 */
2876 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2877 error = EADDRNOTAVAIL;
b0d623f7 2878 goto done;
55e303ae
A
2879 }
2880 /*
2881 * See if the membership already exists or if all the
2882 * membership slots are full.
2883 */
2884 for (i = 0; i < imo->imo_num_memberships; ++i) {
2885 if (imo->imo_membership[i]->inm_ifp == ifp &&
2886 imo->imo_membership[i]->inm_addr.s_addr
2887 == mreq->imr_multiaddr.s_addr)
2888 break;
2889 }
2890 if (i < imo->imo_num_memberships) {
2891 error = EADDRINUSE;
b0d623f7 2892 goto done;
55e303ae
A
2893 }
2894 if (i == IP_MAX_MEMBERSHIPS) {
2895 error = ETOOMANYREFS;
b0d623f7 2896 goto done;
55e303ae
A
2897 }
2898 /*
2899 * Everything looks good; add a new record to the multicast
2900 * address list for the given interface.
2901 */
2902 if ((imo->imo_membership[i] =
2903 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
2904 error = ENOBUFS;
b0d623f7 2905 goto done;
55e303ae
A
2906 }
2907 ++imo->imo_num_memberships;
b0d623f7
A
2908
2909done:
2910 if (ro.ro_rt != NULL)
2911 rtfree(ro.ro_rt);
2912
55e303ae
A
2913 return error;
2914}
2915
2916/*
2917 * Drop membership of an IPv4 multicast.
2918 */
2919__private_extern__ int
2920ip_dropmembership(
2921 struct ip_moptions *imo,
2922 struct ip_mreq *mreq)
2923{
2924 int error = 0;
55e303ae
A
2925 struct ifnet* ifp = NULL;
2926 int i;
2927
2928 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2929 error = EINVAL;
2930 return error;
2931 }
2932
55e303ae
A
2933 /*
2934 * If an interface address was specified, get a pointer
2935 * to its ifnet structure.
2936 */
2937 if (mreq->imr_interface.s_addr == INADDR_ANY)
2938 ifp = NULL;
2939 else {
2940 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2941 if (ifp == NULL) {
2942 error = EADDRNOTAVAIL;
55e303ae
A
2943 return error;
2944 }
2945 }
2946 /*
2947 * Find the membership in the membership array.
2948 */
2949 for (i = 0; i < imo->imo_num_memberships; ++i) {
2950 if ((ifp == NULL ||
2951 imo->imo_membership[i]->inm_ifp == ifp) &&
2952 imo->imo_membership[i]->inm_addr.s_addr ==
2953 mreq->imr_multiaddr.s_addr)
2954 break;
2955 }
2956 if (i == imo->imo_num_memberships) {
2957 error = EADDRNOTAVAIL;
55e303ae
A
2958 return error;
2959 }
2960 /*
2961 * Give up the multicast address record to which the
2962 * membership points.
2963 */
91447636 2964 in_delmulti(&imo->imo_membership[i]);
55e303ae
A
2965 /*
2966 * Remove the gap in the membership array.
2967 */
2968 for (++i; i < imo->imo_num_memberships; ++i)
2969 imo->imo_membership[i-1] = imo->imo_membership[i];
2970 --imo->imo_num_memberships;
55e303ae
A
2971
2972 return error;
2973}
2974
1c79356b
A
2975/*
2976 * Return the IP multicast options in response to user getsockopt().
2977 */
2978static int
2979ip_getmoptions(sopt, imo)
2980 struct sockopt *sopt;
2981 register struct ip_moptions *imo;
2982{
2983 struct in_addr addr;
2984 struct in_ifaddr *ia;
2985 int error, optval;
2986 u_char coptval;
2987
2988 error = 0;
2989 switch (sopt->sopt_name) {
2d21ac55 2990#if MROUTING
1c79356b
A
2991 case IP_MULTICAST_VIF:
2992 if (imo != NULL)
2993 optval = imo->imo_multicast_vif;
2994 else
2995 optval = -1;
2996 error = sooptcopyout(sopt, &optval, sizeof optval);
2997 break;
2d21ac55 2998#endif /* MROUTING */
1c79356b
A
2999
3000 case IP_MULTICAST_IF:
3001 if (imo == NULL || imo->imo_multicast_ifp == NULL)
3002 addr.s_addr = INADDR_ANY;
9bccf70c
A
3003 else if (imo->imo_multicast_addr.s_addr) {
3004 /* return the value user has set */
3005 addr = imo->imo_multicast_addr;
3006 } else {
1c79356b
A
3007 IFP_TO_IA(imo->imo_multicast_ifp, ia);
3008 addr.s_addr = (ia == NULL) ? INADDR_ANY
3009 : IA_SIN(ia)->sin_addr.s_addr;
b0d623f7
A
3010 if (ia != NULL)
3011 ifafree(&ia->ia_ifa);
1c79356b
A
3012 }
3013 error = sooptcopyout(sopt, &addr, sizeof addr);
3014 break;
3015
3016 case IP_MULTICAST_TTL:
3017 if (imo == 0)
3018 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
3019 else
3020 optval = coptval = imo->imo_multicast_ttl;
3021 if (sopt->sopt_valsize == 1)
3022 error = sooptcopyout(sopt, &coptval, 1);
3023 else
3024 error = sooptcopyout(sopt, &optval, sizeof optval);
3025 break;
3026
3027 case IP_MULTICAST_LOOP:
3028 if (imo == 0)
3029 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
3030 else
3031 optval = coptval = imo->imo_multicast_loop;
3032 if (sopt->sopt_valsize == 1)
3033 error = sooptcopyout(sopt, &coptval, 1);
3034 else
3035 error = sooptcopyout(sopt, &optval, sizeof optval);
3036 break;
3037
3038 default:
3039 error = ENOPROTOOPT;
3040 break;
3041 }
3042 return (error);
3043}
3044
3045/*
3046 * Discard the IP multicast options.
3047 */
3048void
3049ip_freemoptions(imo)
3050 register struct ip_moptions *imo;
3051{
3052 register int i;
3053
3054 if (imo != NULL) {
3055 for (i = 0; i < imo->imo_num_memberships; ++i)
91447636 3056 in_delmulti(&imo->imo_membership[i]);
1c79356b
A
3057 FREE(imo, M_IPMOPTS);
3058 }
3059}
3060
3061/*
3062 * Routine called from ip_output() to loop back a copy of an IP multicast
3063 * packet to the input queue of a specified interface. Note that this
3064 * calls the output routine of the loopback "driver", but with an interface
3065 * pointer that might NOT be a loopback interface -- evil, but easier than
3066 * replicating that code here.
3067 */
3068static void
3069ip_mloopback(ifp, m, dst, hlen)
3070 struct ifnet *ifp;
3071 register struct mbuf *m;
3072 register struct sockaddr_in *dst;
3073 int hlen;
3074{
3075 register struct ip *ip;
3076 struct mbuf *copym;
2d21ac55 3077 int sw_csum = (apple_hwcksum_tx == 0);
1c79356b
A
3078
3079 copym = m_copy(m, 0, M_COPYALL);
3080 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
3081 copym = m_pullup(copym, hlen);
2d21ac55
A
3082
3083 if (copym == NULL)
3084 return;
3085
3086 /*
3087 * We don't bother to fragment if the IP length is greater
3088 * than the interface's MTU. Can this possibly matter?
3089 */
3090 ip = mtod(copym, struct ip *);
b0d623f7
A
3091
3092#if BYTE_ORDER != BIG_ENDIAN
2d21ac55
A
3093 HTONS(ip->ip_len);
3094 HTONS(ip->ip_off);
b0d623f7
A
3095#endif
3096
2d21ac55
A
3097 ip->ip_sum = 0;
3098 ip->ip_sum = in_cksum(copym, hlen);
3099 /*
3100 * NB:
3101 * It's not clear whether there are any lingering
3102 * reentrancy problems in other areas which might
3103 * be exposed by using ip_input directly (in
3104 * particular, everything which modifies the packet
3105 * in-place). Yet another option is using the
3106 * protosw directly to deliver the looped back
3107 * packet. For the moment, we'll err on the side
3108 * of safety by using if_simloop().
3109 */
1c79356b 3110#if 1 /* XXX */
2d21ac55
A
3111 if (dst->sin_family != AF_INET) {
3112 printf("ip_mloopback: bad address family %d\n",
3113 dst->sin_family);
3114 dst->sin_family = AF_INET;
3115 }
1c79356b
A
3116#endif
3117
9bccf70c 3118 /*
2d21ac55
A
3119 * Mark checksum as valid or calculate checksum for loopback.
3120 *
3121 * This is done this way because we have to embed the ifp of
3122 * the interface we will send the original copy of the packet
3123 * out on in the mbuf. ip_input will check if_hwassist of the
3124 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
3125 * The UDP checksum has not been calculated yet.
3126 */
3127 if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
3128 if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
3129 copym->m_pkthdr.csum_flags |=
3130 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3131 CSUM_IP_CHECKED | CSUM_IP_VALID;
3132 copym->m_pkthdr.csum_data = 0xffff;
1c79356b 3133 } else {
b0d623f7
A
3134
3135#if BYTE_ORDER != BIG_ENDIAN
2d21ac55 3136 NTOHS(ip->ip_len);
b0d623f7
A
3137#endif
3138
2d21ac55 3139 in_delayed_cksum(copym);
b0d623f7
A
3140
3141#if BYTE_ORDER != BIG_ENDIAN
2d21ac55 3142 HTONS(ip->ip_len);
b0d623f7
A
3143#endif
3144
1c79356b 3145 }
2d21ac55 3146 }
1c79356b 3147
2d21ac55
A
3148 /*
3149 * TedW:
3150 * We need to send all loopback traffic down to dlil in case
3151 * a filter has tapped-in.
3152 */
3153
3154 /*
3155 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3156 * in ip_input(); we need the loopback ifp/dl_tag passed as args
3157 * to make the loopback driver compliant with the data link
3158 * requirements.
3159 */
3160 if (lo_ifp) {
3161 copym->m_pkthdr.rcvif = ifp;
3162 dlil_output(lo_ifp, PF_INET, copym, 0,
3163 (struct sockaddr *) dst, 0);
3164 } else {
3165 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
3166 m_freem(copym);
1c79356b
A
3167 }
3168}
c910b4d9
A
3169
3170/*
3171 * Given a source IP address (and route, if available), determine the best
b0d623f7
A
3172 * interface to send the packet from. Checking for (and updating) the
3173 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
3174 * without any locks based on the assumption that ip_output() is single-
3175 * threaded per-pcb, i.e. for any given pcb there can only be one thread
3176 * performing output at the IP layer.
c910b4d9
A
3177 */
3178static struct ifaddr *
3179in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
3180{
3181 struct ifaddr *ifa = NULL;
b0d623f7
A
3182 struct in_addr src = ip->ip_src;
3183 struct in_addr dst = ip->ip_dst;
c910b4d9 3184 struct ifnet *rt_ifp;
0b4c1975 3185 char s_src[MAX_IPv4_STR_LEN], s_dst[MAX_IPv4_STR_LEN];
c910b4d9
A
3186
3187 if (ip_select_srcif_debug) {
b0d623f7
A
3188 (void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof (s_src));
3189 (void) inet_ntop(AF_INET, &dst.s_addr, s_dst, sizeof (s_dst));
c910b4d9
A
3190 }
3191
b0d623f7
A
3192 if (ro->ro_rt != NULL)
3193 RT_LOCK(ro->ro_rt);
c910b4d9 3194
c910b4d9
A
3195 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
3196
3197 /*
3198 * Given the source IP address, find a suitable source interface
3199 * to use for transmission; if the caller has specified a scope,
3200 * optimize the search by looking at the addresses only for that
3201 * interface. This is still suboptimal, however, as we need to
3202 * traverse the per-interface list.
3203 */
3204 if (ifscope != IFSCOPE_NONE || ro->ro_rt != NULL) {
3205 unsigned int scope = ifscope;
3206
3207 /*
3208 * If no scope is specified and the route is stale (pointing
3209 * to a defunct interface) use the current primary interface;
3210 * this happens when switching between interfaces configured
3211 * with the same IP address. Otherwise pick up the scope
3212 * information from the route; the ULP may have looked up a
3213 * correct route and we just need to verify it here and mark
3214 * it with the ROF_SRCIF_SELECTED flag below.
3215 */
3216 if (scope == IFSCOPE_NONE) {
3217 scope = rt_ifp->if_index;
3218 if (scope != get_primary_ifscope() &&
3219 ro->ro_rt->generation_id != route_generation)
3220 scope = get_primary_ifscope();
3221 }
3222
b0d623f7 3223 ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope);
c910b4d9 3224
0b4c1975
A
3225 if (ifa == NULL && ip->ip_p != IPPROTO_UDP &&
3226 ip->ip_p != IPPROTO_TCP && ipforwarding) {
3227 /*
3228 * If forwarding is enabled, and if the packet isn't
3229 * TCP or UDP, check if the source address belongs
3230 * to one of our own interfaces; if so, demote the
3231 * interface scope and do a route lookup right below.
3232 */
3233 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
3234 if (ifa != NULL) {
3235 ifafree(ifa);
3236 ifa = NULL;
3237 ifscope = IFSCOPE_NONE;
3238 }
3239 }
3240
c910b4d9
A
3241 if (ip_select_srcif_debug && ifa != NULL) {
3242 if (ro->ro_rt != NULL) {
3243 printf("%s->%s ifscope %d->%d ifa_if %s%d "
b0d623f7 3244 "ro_if %s%d\n", s_src, s_dst, ifscope,
c910b4d9
A
3245 scope, ifa->ifa_ifp->if_name,
3246 ifa->ifa_ifp->if_unit, rt_ifp->if_name,
3247 rt_ifp->if_unit);
3248 } else {
3249 printf("%s->%s ifscope %d->%d ifa_if %s%d\n",
b0d623f7 3250 s_src, s_dst, ifscope, scope,
c910b4d9
A
3251 ifa->ifa_ifp->if_name,
3252 ifa->ifa_ifp->if_unit);
3253 }
3254 }
3255 }
3256
3257 /*
3258 * Slow path; search for an interface having the corresponding source
3259 * IP address if the scope was not specified by the caller, and:
3260 *
3261 * 1) There currently isn't any route, or,
3262 * 2) The interface used by the route does not own that source
3263 * IP address; in this case, the route will get blown away
3264 * and we'll do a more specific scoped search using the newly
3265 * found interface.
3266 */
3267 if (ifa == NULL && ifscope == IFSCOPE_NONE) {
b0d623f7 3268 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
c910b4d9 3269
0b4c1975
A
3270 /*
3271 * If we have the IP address, but not the route, we don't
3272 * really know whether or not it belongs to the correct
3273 * interface (it could be shared across multiple interfaces.)
3274 * The only way to find out is to do a route lookup.
3275 */
3276 if (ifa != NULL && ro->ro_rt == NULL) {
3277 struct rtentry *rt;
3278 struct sockaddr_in sin;
3279 struct ifaddr *oifa = NULL;
3280
3281 bzero(&sin, sizeof (sin));
3282 sin.sin_family = AF_INET;
3283 sin.sin_len = sizeof (sin);
3284 sin.sin_addr = dst;
3285
3286 lck_mtx_lock(rnh_lock);
3287 if ((rt = rt_lookup(TRUE, (struct sockaddr *)&sin, NULL,
3288 rt_tables[AF_INET], IFSCOPE_NONE)) != NULL) {
3289 RT_LOCK(rt);
3290 /*
3291 * If the route uses a different interface,
3292 * use that one instead. The IP address of
3293 * the ifaddr that we pick up here is not
3294 * relevant.
3295 */
3296 if (ifa->ifa_ifp != rt->rt_ifp) {
3297 oifa = ifa;
3298 ifa = rt->rt_ifa;
3299 ifaref(ifa);
3300 RT_UNLOCK(rt);
3301 } else {
3302 RT_UNLOCK(rt);
3303 }
3304 rtfree_locked(rt);
3305 }
3306 lck_mtx_unlock(rnh_lock);
3307
3308 if (oifa != NULL) {
3309 struct ifaddr *iifa;
3310
3311 /*
3312 * See if the interface pointed to by the
3313 * route is configured with the source IP
3314 * address of the packet.
3315 */
3316 iifa = (struct ifaddr *)ifa_foraddr_scoped(
3317 src.s_addr, ifa->ifa_ifp->if_index);
3318
3319 if (iifa != NULL) {
3320 /*
3321 * Found it; drop the original one
3322 * as well as the route interface
3323 * address, and use this instead.
3324 */
3325 ifafree(oifa);
3326 ifafree(ifa);
3327 ifa = iifa;
3328 } else if (!ipforwarding ||
3329 (rt->rt_flags & RTF_GATEWAY)) {
3330 /*
3331 * This interface doesn't have that
3332 * source IP address; drop the route
3333 * interface address and just use the
3334 * original one, and let the caller
3335 * do a scoped route lookup.
3336 */
3337 ifafree(ifa);
3338 ifa = oifa;
3339 } else {
3340 /*
3341 * Forwarding is enabled and the source
3342 * address belongs to one of our own
3343 * interfaces which isn't the outgoing
3344 * interface, and we have a route, and
3345 * the destination is on a network that
3346 * is directly attached (onlink); drop
3347 * the original one and use the route
3348 * interface address instead.
3349 */
3350 ifafree(oifa);
3351 }
3352 }
3353 } else if (ifa != NULL && ro->ro_rt != NULL &&
3354 !(ro->ro_rt->rt_flags & RTF_GATEWAY) &&
3355 ifa->ifa_ifp != ro->ro_rt->rt_ifp && ipforwarding) {
3356 /*
3357 * Forwarding is enabled and the source address belongs
3358 * to one of our own interfaces which isn't the same
3359 * as the interface used by the known route; drop the
3360 * original one and use the route interface address.
3361 */
3362 ifafree(ifa);
3363 ifa = ro->ro_rt->rt_ifa;
3364 ifaref(ifa);
3365 }
3366
c910b4d9
A
3367 if (ip_select_srcif_debug && ifa != NULL) {
3368 printf("%s->%s ifscope %d ifa_if %s%d\n",
b0d623f7 3369 s_src, s_dst, ifscope, ifa->ifa_ifp->if_name,
c910b4d9
A
3370 ifa->ifa_ifp->if_unit);
3371 }
3372 }
3373
b0d623f7
A
3374 if (ro->ro_rt != NULL)
3375 RT_LOCK_ASSERT_HELD(ro->ro_rt);
c910b4d9
A
3376 /*
3377 * If there is a non-loopback route with the wrong interface, or if
3378 * there is no interface configured with such an address, blow it
3379 * away. Except for local/loopback, we look for one with a matching
3380 * interface scope/index.
3381 */
3382 if (ro->ro_rt != NULL &&
3383 (ifa == NULL || (ifa->ifa_ifp != rt_ifp && rt_ifp != lo_ifp) ||
3384 !(ro->ro_rt->rt_flags & RTF_UP))) {
3385 if (ip_select_srcif_debug) {
3386 if (ifa != NULL) {
3387 printf("%s->%s ifscope %d ro_if %s%d != "
3388 "ifa_if %s%d (cached route cleared)\n",
b0d623f7 3389 s_src, s_dst, ifscope, rt_ifp->if_name,
c910b4d9
A
3390 rt_ifp->if_unit, ifa->ifa_ifp->if_name,
3391 ifa->ifa_ifp->if_unit);
3392 } else {
3393 printf("%s->%s ifscope %d ro_if %s%d "
3394 "(no ifa_if found)\n",
b0d623f7 3395 s_src, s_dst, ifscope, rt_ifp->if_name,
c910b4d9
A
3396 rt_ifp->if_unit);
3397 }
3398 }
3399
b0d623f7
A
3400 RT_UNLOCK(ro->ro_rt);
3401 rtfree(ro->ro_rt);
c910b4d9
A
3402 ro->ro_rt = NULL;
3403 ro->ro_flags &= ~ROF_SRCIF_SELECTED;
3404
3405 /*
3406 * If the destination is IPv4 LLA and the route's interface
3407 * doesn't match the source interface, then the source IP
3408 * address is wrong; it most likely belongs to the primary
3409 * interface associated with the IPv4 LL subnet. Drop the
3410 * packet rather than letting it go out and return an error
3411 * to the ULP. This actually applies not only to IPv4 LL
3412 * but other shared subnets; for now we explicitly test only
3413 * for the former case and save the latter for future.
3414 */
b0d623f7
A
3415 if (IN_LINKLOCAL(ntohl(dst.s_addr)) &&
3416 !IN_LINKLOCAL(ntohl(src.s_addr)) && ifa != NULL) {
c910b4d9
A
3417 ifafree(ifa);
3418 ifa = NULL;
3419 }
3420 }
3421
3422 if (ip_select_srcif_debug && ifa == NULL) {
3423 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
b0d623f7 3424 s_src, s_dst, ifscope);
c910b4d9
A
3425 }
3426
3427 /*
3428 * If there is a route, mark it accordingly. If there isn't one,
3429 * we'll get here again during the next transmit (possibly with a
3430 * route) and the flag will get set at that point. For IPv4 LLA
3431 * destination, mark it only if the route has been fully resolved;
3432 * otherwise we want to come back here again when the route points
3433 * to the interface over which the ARP reply arrives on.
3434 */
b0d623f7 3435 if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(dst.s_addr)) ||
c910b4d9
A
3436 (ro->ro_rt->rt_gateway->sa_family == AF_LINK &&
3437 SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) {
3438 ro->ro_flags |= ROF_SRCIF_SELECTED;
3439 ro->ro_rt->generation_id = route_generation;
3440 }
3441
b0d623f7
A
3442 if (ro->ro_rt != NULL)
3443 RT_UNLOCK(ro->ro_rt);
3444
c910b4d9
A
3445 return (ifa);
3446}
3447
3448/*
3449 * Handler for setting IP_FORCE_OUT_IFP or IP_BOUND_IF socket option.
3450 */
3451static void
3452ip_bindif(struct inpcb *inp, unsigned int ifscope)
3453{
3454 /*
3455 * A zero interface scope value indicates an "unbind".
3456 * Otherwise, take in whatever value the app desires;
3457 * the app may already know the scope (or force itself
3458 * to such a scope) ahead of time before the interface
3459 * gets attached. It doesn't matter either way; any
3460 * route lookup from this point on will require an
3461 * exact match for the embedded interface scope.
3462 */
3463 inp->inp_boundif = ifscope;
3464 if (inp->inp_boundif == IFSCOPE_NONE)
3465 inp->inp_flags &= ~INP_BOUND_IF;
3466 else
3467 inp->inp_flags |= INP_BOUND_IF;
3468
c910b4d9
A
3469 /* Blow away any cached route in the PCB */
3470 if (inp->inp_route.ro_rt != NULL) {
b0d623f7 3471 rtfree(inp->inp_route.ro_rt);
c910b4d9
A
3472 inp->inp_route.ro_rt = NULL;
3473 }
c910b4d9 3474}