]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/ip_output.c
xnu-1228.3.13.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
9bccf70c 61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
1c79356b 62 */
2d21ac55
A
63/*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
1c79356b
A
69
70#define _IP_VHL
71
1c79356b
A
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/kernel.h>
75#include <sys/malloc.h>
76#include <sys/mbuf.h>
77#include <sys/protosw.h>
78#include <sys/socket.h>
79#include <sys/socketvar.h>
91447636
A
80#include <kern/locks.h>
81#include <sys/sysctl.h>
1c79356b
A
82
83#include <net/if.h>
84#include <net/route.h>
85
86#include <netinet/in.h>
87#include <netinet/in_systm.h>
88#include <netinet/ip.h>
1c79356b
A
89#include <netinet/in_pcb.h>
90#include <netinet/in_var.h>
91#include <netinet/ip_var.h>
1c79356b 92
91447636
A
93#include <netinet/kpi_ipfilter_var.h>
94
2d21ac55
A
95#if CONFIG_MACF_NET
96#include <security/mac_framework.h>
97#endif
98
9bccf70c
A
99#include "faith.h"
100
101#include <net/dlil.h>
1c79356b 102#include <sys/kdebug.h>
2d21ac55 103#include <libkern/OSAtomic.h>
1c79356b
A
104
105#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
106#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
107#define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
55e303ae 108#define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
1c79356b 109
8f6c56a5 110#define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
1c79356b 111
1c79356b
A
112#if IPSEC
113#include <netinet6/ipsec.h>
114#include <netkey/key.h>
9bccf70c 115#if IPSEC_DEBUG
1c79356b 116#include <netkey/key_debug.h>
1c79356b 117#else
9bccf70c 118#define KEYDEBUG(lev,arg)
1c79356b 119#endif
9bccf70c 120#endif /*IPSEC*/
1c79356b 121
1c79356b 122#include <netinet/ip_fw.h>
91447636 123#include <netinet/ip_divert.h>
1c79356b
A
124
125#if DUMMYNET
126#include <netinet/ip_dummynet.h>
127#endif
128
129#if IPFIREWALL_FORWARD_DEBUG
130#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
131 (ntohl(a.s_addr)>>16)&0xFF,\
132 (ntohl(a.s_addr)>>8)&0xFF,\
133 (ntohl(a.s_addr))&0xFF);
134#endif
135
91447636 136
1c79356b
A
137u_short ip_id;
138
91447636
A
139static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
140static struct ifnet *ip_multicast_if(struct in_addr *, int *);
141static void ip_mloopback(struct ifnet *, struct mbuf *,
142 struct sockaddr_in *, int);
143static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
144static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
145static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
9bccf70c 146
2d21ac55
A
147static void ip_out_cksum_stats(int, u_int32_t);
148
55e303ae
A
149int ip_createmoptions(struct ip_moptions **imop);
150int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
151int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
91447636 152int ip_optcopy(struct ip *, struct ip *);
2d21ac55
A
153void in_delayed_cksum_offset(struct mbuf *, int );
154void in_cksum_offset(struct mbuf* , size_t );
155
91447636 156extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
9bccf70c 157
55e303ae 158extern u_long route_generation;
1c79356b
A
159
160extern struct protosw inetsw[];
161
9bccf70c 162extern struct ip_linklocal_stat ip_linklocal_stat;
91447636 163extern lck_mtx_t *ip_mutex;
9bccf70c
A
164
165/* temporary: for testing */
166#if IPSEC
167extern int ipsec_bypass;
168#endif
169
91447636
A
170static int ip_maxchainsent = 0;
171SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
172 &ip_maxchainsent, 0, "use dlil_output_list");
2d21ac55
A
173#if DEBUG
174static int forge_ce = 0;
175SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW,
176 &forge_ce, 0, "Forge ECN CE");
177#endif /* DEBUG */
1c79356b
A
178/*
179 * IP output. The packet in mbuf chain m contains a skeletal IP
180 * header (with len, off, ttl, proto, tos, src, dst).
181 * The mbuf chain containing the packet will be freed.
182 * The mbuf opt, if present, will not be freed.
183 */
184int
91447636
A
185ip_output(
186 struct mbuf *m0,
187 struct mbuf *opt,
188 struct route *ro,
189 int flags,
2d21ac55
A
190 struct ip_moptions *imo,
191 struct ifnet *ifp)
91447636
A
192{
193 int error;
2d21ac55 194 error = ip_output_list(m0, 0, opt, ro, flags, imo, ifp);
91447636
A
195 return error;
196}
197
2d21ac55
A
198/*
199 * Returns: 0 Success
200 * ENOMEM
201 * EADDRNOTAVAIL
202 * ENETUNREACH
203 * EHOSTUNREACH
204 * EACCES
205 * EMSGSIZE
206 * ENOBUFS
207 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
208 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
209 * key_spdacquire:??? [IPSEC]
210 * ipsec4_output:??? [IPSEC]
211 * <fr_checkp>:??? [firewall]
212 * ip_dn_io_ptr:??? [dummynet]
213 * dlil_output:??? [DLIL]
214 * dlil_output_list:??? [DLIL]
215 *
216 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
217 * only used as the error return from this function where one of
218 * these functions fails to return a policy.
219 */
91447636
A
220int
221ip_output_list(
222 struct mbuf *m0,
223 int packetchain,
224 struct mbuf *opt,
225 struct route *ro,
226 int flags,
2d21ac55
A
227 struct ip_moptions *imo,
228#if CONFIG_FORCE_OUT_IFP
229 struct ifnet *pdp_ifp
230#else
231 __unused struct ifnet *unused_ifp
232#endif
233 )
1c79356b
A
234{
235 struct ip *ip, *mhip;
55e303ae 236 struct ifnet *ifp = NULL;
1c79356b
A
237 struct mbuf *m = m0;
238 int hlen = sizeof (struct ip);
2d21ac55 239 int len = 0, off, error = 0;
55e303ae 240 struct sockaddr_in *dst = NULL;
9bccf70c 241 struct in_ifaddr *ia = NULL;
0b4e3aa0 242 int isbroadcast, sw_csum;
91447636 243 struct in_addr pkt_dst;
1c79356b
A
244#if IPSEC
245 struct route iproute;
9bccf70c 246 struct socket *so = NULL;
1c79356b
A
247 struct secpolicy *sp = NULL;
248#endif
249#if IPFIREWALL_FORWARD
250 int fwd_rewrite_src = 0;
251#endif
4a3eedf9 252#if IPFIREWALL
91447636 253 struct ip_fw_args args;
4a3eedf9 254#endif
91447636
A
255 int didfilter = 0;
256 ipfilter_t inject_filter_ref = 0;
257 struct m_tag *tag;
2d21ac55 258 struct route saved_route;
91447636
A
259 struct mbuf * packetlist;
260 int pktcnt = 0;
261
1c79356b
A
262
263 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
264
91447636 265 packetlist = m0;
2d21ac55 266#if IPFIREWALL
4a3eedf9 267 args.next_hop = NULL;
91447636
A
268 args.eh = NULL;
269 args.rule = NULL;
91447636
A
270 args.divert_rule = 0; /* divert cookie */
271
272 /* Grab info from mtags prepended to the chain */
273#if DUMMYNET
274 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
275 struct dn_pkt_tag *dn_tag;
276
277 dn_tag = (struct dn_pkt_tag *)(tag+1);
278 args.rule = dn_tag->rule;
279 opt = NULL;
2d21ac55
A
280 saved_route = dn_tag->ro;
281 ro = &saved_route;
91447636
A
282
283 imo = NULL;
284 dst = dn_tag->dn_dst;
285 ifp = dn_tag->ifp;
286 flags = dn_tag->flags;
287
288 m_tag_delete(m0, tag);
289 }
290#endif /* DUMMYNET */
291
2d21ac55 292#if IPDIVERT
91447636
A
293 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
294 struct divert_tag *div_tag;
295
296 div_tag = (struct divert_tag *)(tag+1);
297 args.divert_rule = div_tag->cookie;
298
299 m_tag_delete(m0, tag);
300 }
2d21ac55 301#endif /* IPDIVERT */
2d21ac55 302
91447636
A
303 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
304 struct ip_fwd_tag *ipfwd_tag;
305
306 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
307 args.next_hop = ipfwd_tag->next_hop;
308
309 m_tag_delete(m0, tag);
310 }
4a3eedf9 311#endif /* IPFIREWALL */
91447636
A
312
313 m = m0;
314
315#if DIAGNOSTIC
316 if ( !m || (m->m_flags & M_PKTHDR) != 0)
317 panic("ip_output no HDR");
318 if (!ro)
319 panic("ip_output no route, proto = %d",
320 mtod(m, struct ip *)->ip_p);
9bccf70c 321#endif
91447636 322
2d21ac55 323#if IPFIREWALL
91447636 324 if (args.rule != NULL) { /* dummynet already saw us */
1c79356b 325 ip = mtod(m, struct ip *);
1c79356b 326 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
2d21ac55
A
327 lck_mtx_lock(rt_mtx);
328 if (ro->ro_rt != NULL)
9bccf70c 329 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
2d21ac55
A
330 if (ia)
331 ifaref(&ia->ia_ifa);
332 lck_mtx_unlock(rt_mtx);
91447636 333#if IPSEC
2d21ac55
A
334 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
335 so = ipsec_getsocket(m);
336 (void)ipsec_setsocket(m, NULL);
337 }
1c79356b 338#endif
91447636
A
339 goto sendit;
340 }
2d21ac55 341#endif /* IPFIREWALL */
91447636 342
9bccf70c 343#if IPSEC
55e303ae 344 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
9bccf70c
A
345 so = ipsec_getsocket(m);
346 (void)ipsec_setsocket(m, NULL);
347 }
348#endif
91447636
A
349loopit:
350 /*
351 * No need to proccess packet twice if we've
352 * already seen it
353 */
354 inject_filter_ref = ipf_get_inject_filter(m);
1c79356b 355
1c79356b
A
356 if (opt) {
357 m = ip_insertoptions(m, opt, &len);
358 hlen = len;
359 }
360 ip = mtod(m, struct ip *);
4a3eedf9 361#if IPFIREWALL
91447636 362 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
4a3eedf9
A
363#else
364 pkt_dst = ip->ip_dst;
365#endif
91447636 366
1c79356b
A
367 /*
368 * Fill in IP header.
369 */
370 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
371 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
372 ip->ip_off &= IP_DF;
9bccf70c
A
373#if RANDOM_IP_ID
374 ip->ip_id = ip_randomid();
375#else
1c79356b 376 ip->ip_id = htons(ip_id++);
9bccf70c 377#endif
2d21ac55 378 OSAddAtomic(1, (SInt32*)&ipstat.ips_localout);
1c79356b
A
379 } else {
380 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
381 }
2d21ac55
A
382
383#if DEBUG
384 /* For debugging, we let the stack forge congestion */
385 if (forge_ce != 0 &&
386 ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 ||
387 (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) {
388 ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE;
389 forge_ce--;
390 }
391#endif /* DEBUG */
1c79356b
A
392
393 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
394 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
2d21ac55 395
1c79356b 396 dst = (struct sockaddr_in *)&ro->ro_dst;
55e303ae 397
1c79356b
A
398 /*
399 * If there is a cached route,
400 * check that it is to the same destination
401 * and is still up. If not, free it and try again.
55e303ae
A
402 * The address family should also be checked in case of sharing the
403 * cache with IPv6.
1c79356b 404 */
55e303ae 405
2d21ac55
A
406 lck_mtx_lock(rt_mtx);
407 if (ro->ro_rt != NULL) {
408 if (ro->ro_rt->generation_id != route_generation &&
409 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) &&
410 (ip->ip_src.s_addr != INADDR_ANY) &&
411 (ifa_foraddr(ip->ip_src.s_addr) == 0)) {
91447636 412 error = EADDRNOTAVAIL;
2d21ac55 413 lck_mtx_unlock(rt_mtx);
91447636
A
414 goto bad;
415 }
2d21ac55
A
416 if ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
417 dst->sin_family != AF_INET ||
418 dst->sin_addr.s_addr != pkt_dst.s_addr) {
419 rtfree_locked(ro->ro_rt);
420 ro->ro_rt = NULL;
421 }
422 if (ro->ro_rt && ro->ro_rt->generation_id != route_generation)
423 ro->ro_rt->generation_id = route_generation;
ab86ba33 424 }
2d21ac55 425 if (ro->ro_rt == NULL) {
55e303ae 426 bzero(dst, sizeof(*dst));
1c79356b
A
427 dst->sin_family = AF_INET;
428 dst->sin_len = sizeof(*dst);
91447636 429 dst->sin_addr = pkt_dst;
1c79356b
A
430 }
431 /*
432 * If routing to interface only,
433 * short circuit routing lookup.
434 */
435#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
436#define sintosa(sin) ((struct sockaddr *)(sin))
437 if (flags & IP_ROUTETOIF) {
91447636
A
438 if (ia)
439 ifafree(&ia->ia_ifa);
440 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
441 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
2d21ac55 442 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
91447636 443 error = ENETUNREACH;
2d21ac55 444 lck_mtx_unlock(rt_mtx);
91447636
A
445 goto bad;
446 }
1c79356b
A
447 }
448 ifp = ia->ia_ifp;
1c79356b
A
449 ip->ip_ttl = 1;
450 isbroadcast = in_broadcast(dst->sin_addr, ifp);
451 } else {
2d21ac55
A
452
453#if CONFIG_FORCE_OUT_IFP
454 /* Check if this packet should be forced out a specific interface */
455 if (ro->ro_rt == 0 && pdp_ifp != NULL) {
456 pdp_context_route_locked(pdp_ifp, ro);
457
458 if (ro->ro_rt == NULL) {
459 OSAddAtomic(1, (UInt32*)&ipstat.ips_noroute);
460 error = EHOSTUNREACH;
461 lck_mtx_unlock(rt_mtx);
462 goto bad;
463 }
464 }
465#endif
466
1c79356b
A
467 /*
468 * If this is the case, we probably don't want to allocate
469 * a protocol-cloned route since we didn't get one from the
470 * ULP. This lets TCP do its thing, while not burdening
471 * forwarding or ICMP with the overhead of cloning a route.
472 * Of course, we still want to do any cloning requested by
473 * the link layer, as this is probably required in all cases
474 * for correct operation (as it is for ARP).
475 */
2d21ac55
A
476
477 if (ro->ro_rt == 0) {
478 unsigned long ign = RTF_PRCLONING;
479 /*
480 * We make an exception here: if the destination
481 * address is INADDR_BROADCAST, allocate a protocol-
482 * cloned host route so that we end up with a route
483 * marked with the RTF_BROADCAST flag. Otherwise,
484 * we would end up referring to the default route,
485 * instead of creating a cloned host route entry.
486 * That would introduce inconsistencies between ULPs
487 * that allocate a route and those that don't. The
488 * RTF_BROADCAST route is important since we'd want
489 * to send out undirected IP broadcast packets using
490 * link-level broadcast address.
491 *
492 * This exception will no longer be necessary when
493 * the RTF_PRCLONING scheme is no longer present.
494 */
495 if (dst->sin_addr.s_addr == INADDR_BROADCAST)
496 ign &= ~RTF_PRCLONING;
497
498 rtalloc_ign_locked(ro, ign);
499 }
1c79356b 500 if (ro->ro_rt == 0) {
2d21ac55 501 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
1c79356b 502 error = EHOSTUNREACH;
2d21ac55 503 lck_mtx_unlock(rt_mtx);
1c79356b
A
504 goto bad;
505 }
2d21ac55 506
91447636
A
507 if (ia)
508 ifafree(&ia->ia_ifa);
1c79356b 509 ia = ifatoia(ro->ro_rt->rt_ifa);
91447636
A
510 if (ia)
511 ifaref(&ia->ia_ifa);
1c79356b 512 ifp = ro->ro_rt->rt_ifp;
1c79356b
A
513 ro->ro_rt->rt_use++;
514 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
515 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
516 if (ro->ro_rt->rt_flags & RTF_HOST)
517 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
518 else
519 isbroadcast = in_broadcast(dst->sin_addr, ifp);
520 }
2d21ac55 521 lck_mtx_unlock(rt_mtx);
91447636 522 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
1c79356b
A
523 struct in_multi *inm;
524
525 m->m_flags |= M_MCAST;
526 /*
527 * IP destination address is multicast. Make sure "dst"
528 * still points to the address in "ro". (It may have been
529 * changed to point to a gateway address, above.)
530 */
531 dst = (struct sockaddr_in *)&ro->ro_dst;
532 /*
533 * See if the caller provided any multicast options
534 */
535 if (imo != NULL) {
55e303ae 536 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
fa4905b1 537 if (imo->imo_multicast_ifp != NULL) {
1c79356b 538 ifp = imo->imo_multicast_ifp;
fa4905b1 539 }
2d21ac55 540#if MROUTING
55e303ae
A
541 if (imo->imo_multicast_vif != -1 &&
542 ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
1c79356b 543 ip->ip_src.s_addr =
55e303ae 544 ip_mcast_src(imo->imo_multicast_vif);
2d21ac55 545#endif /* MROUTING */
1c79356b 546 } else
55e303ae 547 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
1c79356b
A
548 /*
549 * Confirm that the outgoing interface supports multicast.
550 */
551 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
552 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2d21ac55 553 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
1c79356b
A
554 error = ENETUNREACH;
555 goto bad;
556 }
557 }
558 /*
559 * If source address not specified yet, use address
560 * of outgoing interface.
561 */
562 if (ip->ip_src.s_addr == INADDR_ANY) {
563 register struct in_ifaddr *ia1;
2d21ac55 564 lck_mtx_lock(rt_mtx);
9bccf70c 565 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
1c79356b
A
566 if (ia1->ia_ifp == ifp) {
567 ip->ip_src = IA_SIN(ia1)->sin_addr;
55e303ae 568
1c79356b
A
569 break;
570 }
2d21ac55 571 lck_mtx_unlock(rt_mtx);
55e303ae
A
572 if (ip->ip_src.s_addr == INADDR_ANY) {
573 error = ENETUNREACH;
574 goto bad;
575 }
1c79356b
A
576 }
577
91447636
A
578 ifnet_lock_shared(ifp);
579 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
580 ifnet_lock_done(ifp);
1c79356b
A
581 if (inm != NULL &&
582 (imo == NULL || imo->imo_multicast_loop)) {
583 /*
584 * If we belong to the destination multicast group
585 * on the outgoing interface, and the caller did not
586 * forbid loopback, loop back a copy.
587 */
91447636
A
588 if (!TAILQ_EMPTY(&ipv4_filters)) {
589 struct ipfilter *filter;
590 int seen = (inject_filter_ref == 0);
591 struct ipf_pktopts *ippo = 0, ipf_pktopts;
592
593 if (imo) {
594 ippo = &ipf_pktopts;
595 ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
596 ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
597 ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
598 }
599
91447636 600 ipf_ref();
0c530ab8
A
601
602 /* 4135317 - always pass network byte order to filter */
603 HTONS(ip->ip_len);
604 HTONS(ip->ip_off);
605
91447636
A
606 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
607 if (seen == 0) {
608 if ((struct ipfilter *)inject_filter_ref == filter)
609 seen = 1;
610 } else if (filter->ipf_filter.ipf_output) {
611 errno_t result;
612 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
613 if (result == EJUSTRETURN) {
614 ipf_unref();
615 goto done;
616 }
617 if (result != 0) {
618 ipf_unref();
91447636
A
619 goto bad;
620 }
621 }
622 }
0c530ab8
A
623
624 /* set back to host byte order */
6601e61a 625 ip = mtod(m, struct ip *);
0c530ab8
A
626 NTOHS(ip->ip_len);
627 NTOHS(ip->ip_off);
628
91447636
A
629 ipf_unref();
630 didfilter = 1;
631 }
1c79356b
A
632 ip_mloopback(ifp, m, dst, hlen);
633 }
2d21ac55 634#if MROUTING
1c79356b
A
635 else {
636 /*
637 * If we are acting as a multicast router, perform
638 * multicast forwarding as if the packet had just
639 * arrived on the interface to which we are about
640 * to send. The multicast forwarding function
641 * recursively calls this function, using the
642 * IP_FORWARDING flag to prevent infinite recursion.
643 *
644 * Multicasts that are looped back by ip_mloopback(),
645 * above, will be forwarded by the ip_input() routine,
646 * if necessary.
647 */
648 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
649 /*
650 * Check if rsvp daemon is running. If not, don't
651 * set ip_moptions. This ensures that the packet
652 * is multicast and not just sent down one link
653 * as prescribed by rsvpd.
654 */
655 if (!rsvp_on)
656 imo = NULL;
657 if (ip_mforward(ip, ifp, m, imo) != 0) {
658 m_freem(m);
659 goto done;
660 }
661 }
662 }
2d21ac55 663#endif /* MROUTING */
1c79356b
A
664
665 /*
666 * Multicasts with a time-to-live of zero may be looped-
667 * back, above, but must not be transmitted on a network.
668 * Also, multicasts addressed to the loopback interface
669 * are not sent -- the above call to ip_mloopback() will
670 * loop back a copy if this host actually belongs to the
671 * destination group on the loopback interface.
672 */
673 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
674 m_freem(m);
675 goto done;
676 }
677
678 goto sendit;
679 }
680#ifndef notdef
681 /*
682 * If source address not specified yet, use address
683 * of outgoing interface.
684 */
685 if (ip->ip_src.s_addr == INADDR_ANY) {
686 ip->ip_src = IA_SIN(ia)->sin_addr;
687#if IPFIREWALL_FORWARD
688 /* Keep note that we did this - if the firewall changes
689 * the next-hop, our interface may change, changing the
690 * default source IP. It's a shame so much effort happens
691 * twice. Oh well.
692 */
693 fwd_rewrite_src++;
694#endif /* IPFIREWALL_FORWARD */
695 }
696#endif /* notdef */
1c79356b
A
697
698 /*
699 * Look for broadcast address and
700 * and verify user is allowed to send
701 * such a packet.
702 */
703 if (isbroadcast) {
704 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
705 error = EADDRNOTAVAIL;
706 goto bad;
707 }
708 if ((flags & IP_ALLOWBROADCAST) == 0) {
709 error = EACCES;
710 goto bad;
711 }
712 /* don't allow broadcast messages to be fragmented */
713 if ((u_short)ip->ip_len > ifp->if_mtu) {
714 error = EMSGSIZE;
715 goto bad;
716 }
717 m->m_flags |= M_BCAST;
718 } else {
719 m->m_flags &= ~M_BCAST;
720 }
721
722sendit:
9bccf70c
A
723 /*
724 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
725 */
726 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
727 ip_linklocal_stat.iplls_out_total++;
728 if (ip->ip_ttl != MAXTTL) {
729 ip_linklocal_stat.iplls_out_badttl++;
730 ip->ip_ttl = MAXTTL;
731 }
732 }
733
91447636
A
734 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
735 struct ipfilter *filter;
736 int seen = (inject_filter_ref == 0);
737
91447636 738 ipf_ref();
0c530ab8
A
739
740 /* 4135317 - always pass network byte order to filter */
741 HTONS(ip->ip_len);
742 HTONS(ip->ip_off);
743
91447636
A
744 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
745 if (seen == 0) {
746 if ((struct ipfilter *)inject_filter_ref == filter)
747 seen = 1;
748 } else if (filter->ipf_filter.ipf_output) {
749 errno_t result;
750 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
751 if (result == EJUSTRETURN) {
752 ipf_unref();
753 goto done;
754 }
755 if (result != 0) {
756 ipf_unref();
91447636
A
757 goto bad;
758 }
759 }
760 }
0c530ab8
A
761
762 /* set back to host byte order */
6601e61a 763 ip = mtod(m, struct ip *);
0c530ab8
A
764 NTOHS(ip->ip_len);
765 NTOHS(ip->ip_off);
766
91447636 767 ipf_unref();
91447636
A
768 }
769
9bccf70c
A
770#if IPSEC
771 /* temporary for testing only: bypass ipsec alltogether */
772
55e303ae 773 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
9bccf70c
A
774 goto skip_ipsec;
775
55e303ae
A
776 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
777
91447636 778
9bccf70c
A
779 /* get SP for this packet */
780 if (so == NULL)
781 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
782 else
783 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
784
785 if (sp == NULL) {
2d21ac55 786 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
55e303ae 787 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
9bccf70c
A
788 goto bad;
789 }
790
791 error = 0;
792
793 /* check policy */
794 switch (sp->policy) {
795 case IPSEC_POLICY_DISCARD:
2d21ac55 796 case IPSEC_POLICY_GENERATE:
9bccf70c
A
797 /*
798 * This packet is just discarded.
799 */
2d21ac55 800 IPSEC_STAT_INCREMENT(ipsecstat.out_polvio);
55e303ae 801 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
9bccf70c
A
802 goto bad;
803
804 case IPSEC_POLICY_BYPASS:
805 case IPSEC_POLICY_NONE:
806 /* no need to do IPsec. */
55e303ae 807 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
9bccf70c
A
808 goto skip_ipsec;
809
810 case IPSEC_POLICY_IPSEC:
811 if (sp->req == NULL) {
812 /* acquire a policy */
813 error = key_spdacquire(sp);
55e303ae 814 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
9bccf70c
A
815 goto bad;
816 }
817 break;
818
819 case IPSEC_POLICY_ENTRUST:
820 default:
821 printf("ip_output: Invalid policy found. %d\n", sp->policy);
822 }
823 {
824 struct ipsec_output_state state;
825 bzero(&state, sizeof(state));
826 state.m = m;
827 if (flags & IP_ROUTETOIF) {
828 state.ro = &iproute;
829 bzero(&iproute, sizeof(iproute));
830 } else
831 state.ro = ro;
832 state.dst = (struct sockaddr *)dst;
833
834 ip->ip_sum = 0;
835
836 /*
837 * XXX
838 * delayed checksums are not currently compatible with IPsec
839 */
840 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
841 in_delayed_cksum(m);
842 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
843 }
844
845 HTONS(ip->ip_len);
846 HTONS(ip->ip_off);
847
848 error = ipsec4_output(&state, sp, flags);
91447636 849
55e303ae
A
850 m0 = m = state.m;
851
9bccf70c
A
852 if (flags & IP_ROUTETOIF) {
853 /*
854 * if we have tunnel mode SA, we may need to ignore
855 * IP_ROUTETOIF.
856 */
857 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
858 flags &= ~IP_ROUTETOIF;
859 ro = state.ro;
860 }
861 } else
862 ro = state.ro;
55e303ae 863
9bccf70c
A
864 dst = (struct sockaddr_in *)state.dst;
865 if (error) {
866 /* mbuf is already reclaimed in ipsec4_output. */
867 m0 = NULL;
868 switch (error) {
869 case EHOSTUNREACH:
870 case ENETUNREACH:
871 case EMSGSIZE:
872 case ENOBUFS:
873 case ENOMEM:
874 break;
875 default:
876 printf("ip4_output (ipsec): error code %d\n", error);
877 /*fall through*/
878 case ENOENT:
879 /* don't show these error codes to the user */
880 error = 0;
881 break;
882 }
55e303ae 883 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
9bccf70c
A
884 goto bad;
885 }
886 }
887
888 /* be sure to update variables that are affected by ipsec4_output() */
889 ip = mtod(m, struct ip *);
55e303ae 890
9bccf70c
A
891#ifdef _IP_VHL
892 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
893#else
894 hlen = ip->ip_hl << 2;
895#endif
55e303ae
A
896 /* Check that there wasn't a route change and src is still valid */
897
2d21ac55 898 lck_mtx_lock(rt_mtx);
8f6c56a5 899 if (ro->ro_rt && ro->ro_rt->generation_id != route_generation) {
91447636 900 if (ifa_foraddr(ip->ip_src.s_addr) == 0 && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
55e303ae 901 error = EADDRNOTAVAIL;
2d21ac55 902 lck_mtx_unlock(rt_mtx);
55e303ae
A
903 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 5,0,0,0,0);
904 goto bad;
905 }
2d21ac55 906 rtfree_locked(ro->ro_rt);
55e303ae
A
907 ro->ro_rt = NULL;
908 }
909
9bccf70c
A
910 if (ro->ro_rt == NULL) {
911 if ((flags & IP_ROUTETOIF) == 0) {
912 printf("ip_output: "
913 "can't update route after IPsec processing\n");
55e303ae 914 error = EHOSTUNREACH; /*XXX*/
2d21ac55 915 lck_mtx_unlock(rt_mtx);
55e303ae 916 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 6,0,0,0,0);
9bccf70c
A
917 goto bad;
918 }
919 } else {
91447636
A
920 if (ia)
921 ifafree(&ia->ia_ifa);
9bccf70c 922 ia = ifatoia(ro->ro_rt->rt_ifa);
91447636
A
923 if (ia)
924 ifaref(&ia->ia_ifa);
9bccf70c 925 ifp = ro->ro_rt->rt_ifp;
9bccf70c 926 }
2d21ac55 927 lck_mtx_unlock(rt_mtx);
9bccf70c
A
928
929 /* make it flipped, again. */
930 NTOHS(ip->ip_len);
931 NTOHS(ip->ip_off);
55e303ae 932 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
91447636
A
933
934 /* Pass to filters again */
935 if (!TAILQ_EMPTY(&ipv4_filters)) {
936 struct ipfilter *filter;
937
91447636 938 ipf_ref();
0c530ab8
A
939
940 /* 4135317 - always pass network byte order to filter */
941 HTONS(ip->ip_len);
942 HTONS(ip->ip_off);
943
91447636
A
944 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
945 if (filter->ipf_filter.ipf_output) {
946 errno_t result;
947 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
948 if (result == EJUSTRETURN) {
949 ipf_unref();
950 goto done;
951 }
952 if (result != 0) {
953 ipf_unref();
91447636
A
954 goto bad;
955 }
956 }
957 }
0c530ab8
A
958
959 /* set back to host byte order */
6601e61a 960 ip = mtod(m, struct ip *);
0c530ab8
A
961 NTOHS(ip->ip_len);
962 NTOHS(ip->ip_off);
963
91447636 964 ipf_unref();
91447636 965 }
9bccf70c
A
966skip_ipsec:
967#endif /*IPSEC*/
968
2d21ac55 969#if IPFIREWALL
1c79356b
A
970 /*
971 * IpHack's section.
972 * - Xlate: translate packet's addr/port (NAT).
973 * - Firewall: deny/allow/etc.
974 * - Wrap: fake packet's addr/port <unimpl.>
975 * - Encapsulate: put it in another IP and send out. <unimp.>
976 */
9bccf70c
A
977 if (fr_checkp) {
978 struct mbuf *m1 = m;
979
91447636 980 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
9bccf70c 981 goto done;
91447636 982 }
55e303ae 983 ip = mtod(m0 = m = m1, struct ip *);
1c79356b
A
984 }
985
986 /*
987 * Check with the firewall...
91447636 988 * but not if we are already being fwd'd from a firewall.
1c79356b 989 */
91447636 990 if (fw_enable && IPFW_LOADED && !args.next_hop) {
1c79356b
A
991 struct sockaddr_in *old = dst;
992
91447636
A
993 args.m = m;
994 args.next_hop = dst;
995 args.oif = ifp;
996 off = ip_fw_chk_ptr(&args);
997 m = args.m;
998 dst = args.next_hop;
999
1c79356b
A
1000 /*
1001 * On return we must do the following:
9bccf70c 1002 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1c79356b 1003 * 1<=off<= 0xffff -> DIVERT
9bccf70c
A
1004 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1005 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1c79356b
A
1006 * dst != old -> IPFIREWALL_FORWARD
1007 * off==0, dst==old -> accept
1008 * If some of the above modules is not compiled in, then
1009 * we should't have to check the corresponding condition
1010 * (because the ipfw control socket should not accept
1011 * unsupported rules), but better play safe and drop
1012 * packets in case of doubt.
1013 */
55e303ae 1014 m0 = m;
9bccf70c
A
1015 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
1016 if (m)
1017 m_freem(m);
1018 error = EACCES ;
1019 goto done ;
1c79356b 1020 }
9bccf70c 1021 ip = mtod(m, struct ip *);
2d21ac55 1022
3a60a9f5 1023 if (off == 0 && dst == old) {/* common case */
1c79356b 1024 goto pass ;
3a60a9f5 1025 }
1c79356b 1026#if DUMMYNET
91447636 1027 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
1c79356b
A
1028 /*
1029 * pass the pkt to dummynet. Need to include
9bccf70c 1030 * pipe number, m, ifp, ro, dst because these are
1c79356b
A
1031 * not recomputed in the next pass.
1032 * All other parameters have been already used and
1033 * so they are not needed anymore.
1034 * XXX note: if the ifp or ro entry are deleted
1035 * while a pkt is in dummynet, we are in trouble!
1036 */
91447636
A
1037 args.ro = ro;
1038 args.dst = dst;
1039 args.flags = flags;
1040
91447636
A
1041 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
1042 &args);
9bccf70c 1043 goto done;
1c79356b 1044 }
91447636 1045#endif /* DUMMYNET */
1c79356b 1046#if IPDIVERT
9bccf70c
A
1047 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
1048 struct mbuf *clone = NULL;
1049
1050 /* Clone packet if we're doing a 'tee' */
1051 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
1052 clone = m_dup(m, M_DONTWAIT);
1053 /*
1054 * XXX
1055 * delayed checksums are not currently compatible
1056 * with divert sockets.
1057 */
1058 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1059 in_delayed_cksum(m);
1060 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1061 }
1062
1063 /* Restore packet header fields to original values */
1064 HTONS(ip->ip_len);
1065 HTONS(ip->ip_off);
1066
1067 /* Deliver packet to divert input routine */
91447636 1068 divert_packet(m, 0, off & 0xffff, args.divert_rule);
9bccf70c
A
1069
1070 /* If 'tee', continue with original packet */
1071 if (clone != NULL) {
55e303ae 1072 m0 = m = clone;
9bccf70c
A
1073 ip = mtod(m, struct ip *);
1074 goto pass;
1075 }
1c79356b
A
1076 goto done;
1077 }
1078#endif
1079
1080#if IPFIREWALL_FORWARD
1081 /* Here we check dst to make sure it's directly reachable on the
1082 * interface we previously thought it was.
1083 * If it isn't (which may be likely in some situations) we have
1084 * to re-route it (ie, find a route for the next-hop and the
1085 * associated interface) and set them here. This is nested
1086 * forwarding which in most cases is undesirable, except where
1087 * such control is nigh impossible. So we do it here.
1088 * And I'm babbling.
1089 */
1090 if (off == 0 && old != dst) {
91447636 1091 struct in_ifaddr *ia_fw;
1c79356b
A
1092
1093 /* It's changed... */
1094 /* There must be a better way to do this next line... */
1095 static struct route sro_fwd, *ro_fwd = &sro_fwd;
1096#if IPFIREWALL_FORWARD_DEBUG
1097 printf("IPFIREWALL_FORWARD: New dst ip: ");
1098 print_ip(dst->sin_addr);
1099 printf("\n");
1100#endif
1101 /*
1102 * We need to figure out if we have been forwarded
1103 * to a local socket. If so then we should somehow
1104 * "loop back" to ip_input, and get directed to the
1105 * PCB as if we had received this packet. This is
1106 * because it may be dificult to identify the packets
1107 * you want to forward until they are being output
1108 * and have selected an interface. (e.g. locally
1109 * initiated packets) If we used the loopback inteface,
1110 * we would not be able to control what happens
1111 * as the packet runs through ip_input() as
1112 * it is done through a ISR.
1113 */
91447636 1114 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1c79356b
A
1115 /*
1116 * If the addr to forward to is one
1117 * of ours, we pretend to
1118 * be the destination for this packet.
1119 */
91447636 1120 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
1c79356b
A
1121 dst->sin_addr.s_addr)
1122 break;
1123 }
1124 if (ia) {
1125 /* tell ip_input "dont filter" */
91447636
A
1126 struct m_tag *fwd_tag;
1127 struct ip_fwd_tag *ipfwd_tag;
1128
1129 fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD,
1130 sizeof(struct sockaddr_in), M_NOWAIT);
1131 if (fwd_tag == NULL) {
1132 error = ENOBUFS;
1133 goto bad;
1134 }
1135
1136 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
1137 ipfwd_tag->next_hop = args.next_hop;
1138
1139 m_tag_prepend(m, fwd_tag);
1140
1c79356b
A
1141 if (m->m_pkthdr.rcvif == NULL)
1142 m->m_pkthdr.rcvif = ifunit("lo0");
91447636
A
1143 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1144 m->m_pkthdr.csum_flags) == 0) {
1145 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1146 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1147 m->m_pkthdr.csum_flags |=
1148 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1149 m->m_pkthdr.csum_data = 0xffff;
1150 }
9bccf70c 1151 m->m_pkthdr.csum_flags |=
91447636
A
1152 CSUM_IP_CHECKED | CSUM_IP_VALID;
1153 }
1154 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1155 in_delayed_cksum(m);
1156 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1157 ip->ip_sum = in_cksum(m, hlen);
9bccf70c 1158 }
9bccf70c
A
1159 HTONS(ip->ip_len);
1160 HTONS(ip->ip_off);
91447636 1161
91447636
A
1162
1163 /* we need to call dlil_output to run filters
1164 * and resync to avoid recursion loops.
1165 */
1166 if (lo_ifp) {
1167 dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
1168 }
1169 else {
1170 printf("ip_output: no loopback ifp for forwarding!!!\n");
1171 }
1c79356b
A
1172 goto done;
1173 }
1174 /* Some of the logic for this was
1175 * nicked from above.
1176 *
1177 * This rewrites the cached route in a local PCB.
1178 * Is this what we want to do?
1179 */
1180 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1181
1182 ro_fwd->ro_rt = 0;
2d21ac55
A
1183 lck_mtx_lock(rt_mtx);
1184 rtalloc_ign_locked(ro_fwd, RTF_PRCLONING);
1c79356b
A
1185
1186 if (ro_fwd->ro_rt == 0) {
2d21ac55 1187 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
1c79356b 1188 error = EHOSTUNREACH;
2d21ac55 1189 lck_mtx_unlock(rt_mtx);
1c79356b
A
1190 goto bad;
1191 }
1192
91447636 1193 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
1c79356b 1194 ifp = ro_fwd->ro_rt->rt_ifp;
1c79356b
A
1195 ro_fwd->ro_rt->rt_use++;
1196 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
1197 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
1198 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
1199 isbroadcast =
1200 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
1201 else
1202 isbroadcast = in_broadcast(dst->sin_addr, ifp);
2d21ac55 1203 rtfree_locked(ro->ro_rt);
1c79356b
A
1204 ro->ro_rt = ro_fwd->ro_rt;
1205 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
2d21ac55 1206 lck_mtx_unlock(rt_mtx);
1c79356b
A
1207
1208 /*
1209 * If we added a default src ip earlier,
1210 * which would have been gotten from the-then
1211 * interface, do it again, from the new one.
1212 */
1213 if (fwd_rewrite_src)
91447636 1214 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
1c79356b
A
1215 goto pass ;
1216 }
1217#endif /* IPFIREWALL_FORWARD */
1218 /*
1219 * if we get here, none of the above matches, and
1220 * we have to drop the pkt
1221 */
1222 m_freem(m);
91447636 1223 error = EACCES; /* not sure this is the right error msg */
91447636 1224 goto done;
1c79356b 1225 }
2d21ac55 1226#endif /* IPFIREWALL */
1c79356b
A
1227
1228pass:
e5568f75
A
1229#if __APPLE__
1230 /* Do not allow loopback address to wind up on a wire */
1231 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1232 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1233 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
2d21ac55 1234 OSAddAtomic(1, (SInt32*)&ipstat.ips_badaddr);
e5568f75 1235 m_freem(m);
91447636
A
1236 /*
1237 * Do not simply drop the packet just like a firewall -- we want the
1238 * the application to feel the pain.
1239 * Return ENETUNREACH like ip6_output does in some similar cases.
1240 * This can startle the otherwise clueless process that specifies
e5568f75
A
1241 * loopback as the source address.
1242 */
91447636 1243 error = ENETUNREACH;
e5568f75
A
1244 goto done;
1245 }
1246#endif
9bccf70c 1247 m->m_pkthdr.csum_flags |= CSUM_IP;
4a249263
A
1248 sw_csum = m->m_pkthdr.csum_flags
1249 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1c79356b 1250
9bccf70c 1251 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1c79356b 1252 /*
9bccf70c
A
1253 * Special case code for GMACE
1254 * frames that can be checksumed by GMACE SUM16 HW:
1255 * frame >64, no fragments, no UDP
1c79356b 1256 */
9bccf70c
A
1257 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1258 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1259 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1260 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1261 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
1262 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
1263 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1264 m->m_pkthdr.csum_data += offset;
1265 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1c79356b 1266 }
9bccf70c
A
1267 else {
1268 /* let the software handle any UDP or TCP checksums */
1269 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1c79356b 1270 }
2d21ac55
A
1271 } else if (apple_hwcksum_tx == 0) {
1272 sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
1273 m->m_pkthdr.csum_flags;
1c79356b 1274 }
9bccf70c
A
1275
1276 if (sw_csum & CSUM_DELAY_DATA) {
1277 in_delayed_cksum(m);
1278 sw_csum &= ~CSUM_DELAY_DATA;
1279 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
0b4e3aa0 1280 }
2d21ac55
A
1281
1282 if (apple_hwcksum_tx != 0) {
1283 m->m_pkthdr.csum_flags &=
1284 IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1285 } else {
1286 m->m_pkthdr.csum_flags = 0;
1287 }
0b4e3aa0 1288
1c79356b 1289 /*
9bccf70c 1290 * If small enough for interface, or the interface will take
0b4e3aa0 1291 * care of the fragmentation for us, can just send directly.
1c79356b 1292 */
9bccf70c
A
1293 if ((u_short)ip->ip_len <= ifp->if_mtu ||
1294 ifp->if_hwassist & CSUM_FRAGMENT) {
2d21ac55
A
1295 struct rtentry *rte;
1296
9bccf70c
A
1297 HTONS(ip->ip_len);
1298 HTONS(ip->ip_off);
1c79356b 1299 ip->ip_sum = 0;
9bccf70c 1300 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1301 ip->ip_sum = in_cksum(m, hlen);
9bccf70c
A
1302 }
1303
1304#ifndef __APPLE__
1305 /* Record statistics for this interface address. */
1306 if (!(flags & IP_FORWARDING) && ia != NULL) {
1307 ia->ia_ifa.if_opackets++;
1308 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1309 }
1310#endif
1311
1312#if IPSEC
1313 /* clean ipsec history once it goes out of the node */
55e303ae 1314 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
9bccf70c
A
1315 ipsec_delaux(m);
1316#endif
91447636 1317 if (packetchain == 0) {
2d21ac55
A
1318 lck_mtx_lock(rt_mtx);
1319 if ((rte = ro->ro_rt) != NULL)
1320 rtref(rte);
1321 lck_mtx_unlock(rt_mtx);
1322 error = ifnet_output(ifp, PF_INET, m, rte,
1323 (struct sockaddr *)dst);
1324 if (rte != NULL)
1325 rtfree(rte);
1326 goto done;
91447636
A
1327 }
1328 else { /* packet chaining allows us to reuse the route for all packets */
1329 m = m->m_nextpkt;
1330 if (m == NULL) {
1331 if (pktcnt > ip_maxchainsent)
1332 ip_maxchainsent = pktcnt;
2d21ac55
A
1333 lck_mtx_lock(rt_mtx);
1334 if ((rte = ro->ro_rt) != NULL)
1335 rtref(rte);
1336 lck_mtx_unlock(rt_mtx);
91447636 1337 //send
2d21ac55
A
1338 error = ifnet_output(ifp, PF_INET, packetlist,
1339 rte, (struct sockaddr *)dst);
1340 if (rte != NULL)
1341 rtfree(rte);
91447636
A
1342 pktcnt = 0;
1343 goto done;
1344
1345 }
1346 m0 = m;
1347 pktcnt++;
1348 goto loopit;
1349 }
1c79356b
A
1350 }
1351 /*
1352 * Too large for interface; fragment if possible.
1353 * Must be able to put at least 8 bytes per fragment.
1354 */
1355 if (ip->ip_off & IP_DF) {
1356 error = EMSGSIZE;
1357 /*
1358 * This case can happen if the user changed the MTU
1359 * of an interface after enabling IP on it. Because
1360 * most netifs don't keep track of routes pointing to
1361 * them, there is no way for one to update all its
1362 * routes when the MTU is changed.
1363 */
2d21ac55
A
1364
1365 lck_mtx_lock(rt_mtx);
8f6c56a5 1366 if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1c79356b
A
1367 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1368 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1369 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1370 }
2d21ac55
A
1371 lck_mtx_unlock(rt_mtx);
1372 OSAddAtomic(1, (SInt32*)&ipstat.ips_cantfrag);
1c79356b
A
1373 goto bad;
1374 }
1375 len = (ifp->if_mtu - hlen) &~ 7;
1376 if (len < 8) {
1377 error = EMSGSIZE;
1378 goto bad;
1379 }
1380
9bccf70c
A
1381 /*
1382 * if the interface will not calculate checksums on
1383 * fragmented packets, then do it here.
1384 */
1385 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1386 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1387 in_delayed_cksum(m);
9bccf70c
A
1388 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1389 }
0b4e3aa0
A
1390
1391
1c79356b
A
1392 {
1393 int mhlen, firstlen = len;
1394 struct mbuf **mnext = &m->m_nextpkt;
9bccf70c 1395 int nfrags = 1;
1c79356b
A
1396
1397 /*
1398 * Loop through length of segment after first fragment,
1399 * make new header and copy data of each part and link onto chain.
1400 */
1401 m0 = m;
1402 mhlen = sizeof (struct ip);
1403 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
2d21ac55 1404 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1c79356b
A
1405 if (m == 0) {
1406 error = ENOBUFS;
2d21ac55 1407 OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped);
1c79356b
A
1408 goto sendorfree;
1409 }
0b4e3aa0 1410 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1c79356b
A
1411 m->m_data += max_linkhdr;
1412 mhip = mtod(m, struct ip *);
1413 *mhip = *ip;
1414 if (hlen > sizeof (struct ip)) {
1415 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1416 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1417 }
1418 m->m_len = mhlen;
1419 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1420 if (ip->ip_off & IP_MF)
1421 mhip->ip_off |= IP_MF;
1422 if (off + len >= (u_short)ip->ip_len)
1423 len = (u_short)ip->ip_len - off;
1424 else
1425 mhip->ip_off |= IP_MF;
1426 mhip->ip_len = htons((u_short)(len + mhlen));
1427 m->m_next = m_copy(m0, off, len);
1428 if (m->m_next == 0) {
1429 (void) m_free(m);
1430 error = ENOBUFS; /* ??? */
2d21ac55 1431 OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped);
1c79356b
A
1432 goto sendorfree;
1433 }
1434 m->m_pkthdr.len = mhlen + len;
91447636 1435 m->m_pkthdr.rcvif = 0;
9bccf70c 1436 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
91447636 1437 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
2d21ac55
A
1438#if CONFIG_MACF_NET
1439 mac_netinet_fragment(m0, m);
1440#endif
9bccf70c 1441 HTONS(mhip->ip_off);
1c79356b 1442 mhip->ip_sum = 0;
9bccf70c 1443 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1444 mhip->ip_sum = in_cksum(m, mhlen);
9bccf70c 1445 }
1c79356b
A
1446 *mnext = m;
1447 mnext = &m->m_nextpkt;
0b4e3aa0 1448 nfrags++;
1c79356b 1449 }
2d21ac55 1450 OSAddAtomic(nfrags, (SInt32*)&ipstat.ips_ofragments);
0b4e3aa0
A
1451
1452 /* set first/last markers for fragment chain */
9bccf70c
A
1453 m->m_flags |= M_LASTFRAG;
1454 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
0b4e3aa0
A
1455 m0->m_pkthdr.csum_data = nfrags;
1456
1c79356b
A
1457 /*
1458 * Update first fragment by trimming what's been copied out
1459 * and updating header, then send each fragment (in order).
1460 */
1461 m = m0;
1462 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1463 m->m_pkthdr.len = hlen + firstlen;
1464 ip->ip_len = htons((u_short)m->m_pkthdr.len);
9bccf70c
A
1465 ip->ip_off |= IP_MF;
1466 HTONS(ip->ip_off);
1c79356b 1467 ip->ip_sum = 0;
9bccf70c 1468 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1469 ip->ip_sum = in_cksum(m, hlen);
9bccf70c 1470 }
1c79356b
A
1471sendorfree:
1472
1473 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1474 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1475
1476 for (m = m0; m; m = m0) {
1477 m0 = m->m_nextpkt;
1478 m->m_nextpkt = 0;
9bccf70c
A
1479#if IPSEC
1480 /* clean ipsec history once it goes out of the node */
55e303ae 1481 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
9bccf70c
A
1482 ipsec_delaux(m);
1483#endif
1484 if (error == 0) {
2d21ac55 1485 struct rtentry *rte;
9bccf70c
A
1486#ifndef __APPLE__
1487 /* Record statistics for this interface address. */
1488 if (ia != NULL) {
1489 ia->ia_ifa.if_opackets++;
1490 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1491 }
1492#endif
2d21ac55
A
1493 if ((packetchain != 0) && (pktcnt > 0))
1494 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
1495 lck_mtx_lock(rt_mtx);
1496 if ((rte = ro->ro_rt) != NULL)
1497 rtref(rte);
1498 lck_mtx_unlock(rt_mtx);
1499 error = ifnet_output(ifp, PF_INET, m, rte,
1500 (struct sockaddr *)dst);
1501 if (rte != NULL)
1502 rtfree(rte);
9bccf70c 1503 } else
1c79356b
A
1504 m_freem(m);
1505 }
1506
1507 if (error == 0)
2d21ac55 1508 OSAddAtomic(1, (SInt32*)&ipstat.ips_fragmented);
1c79356b
A
1509 }
1510done:
91447636
A
1511 if (ia) {
1512 ifafree(&ia->ia_ifa);
1513 ia = NULL;
1514 }
1c79356b 1515#if IPSEC
55e303ae 1516 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
1c79356b 1517 if (ro == &iproute && ro->ro_rt) {
9bccf70c 1518 rtfree(ro->ro_rt);
1c79356b
A
1519 ro->ro_rt = NULL;
1520 }
1521 if (sp != NULL) {
1522 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1523 printf("DP ip_output call free SP:%x\n", sp));
2d21ac55 1524 key_freesp(sp, KEY_SADB_UNLOCKED);
1c79356b 1525 }
9bccf70c 1526 }
1c79356b
A
1527#endif /* IPSEC */
1528
1529 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1530 return (error);
1531bad:
1532 m_freem(m0);
1533 goto done;
1534}
1535
2d21ac55
A
1536static void
1537ip_out_cksum_stats(int proto, u_int32_t len)
1538{
1539 switch (proto) {
1540 case IPPROTO_TCP:
1541 tcp_out_cksum_stats(len);
1542 break;
1543 case IPPROTO_UDP:
1544 udp_out_cksum_stats(len);
1545 break;
1546 default:
1547 /* keep only TCP or UDP stats for now */
1548 break;
1549 }
1550}
1551
0b4e3aa0 1552void
8f6c56a5 1553in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
0b4e3aa0 1554{
9bccf70c 1555 struct ip *ip;
8f6c56a5
A
1556 unsigned char buf[sizeof(struct ip)];
1557 u_short csum, offset, ip_len;
1558 struct mbuf *m = m0;
91447636 1559
8f6c56a5 1560 while (ip_offset >= m->m_len) {
91447636
A
1561 ip_offset -= m->m_len;
1562 m = m->m_next;
8f6c56a5 1563 if (m == NULL) {
2d21ac55 1564 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
91447636
A
1565 return;
1566 }
1567 }
1568
8f6c56a5 1569 /* Sometimes the IP header is not contiguous, yes this can happen! */
91447636 1570 if (ip_offset + sizeof(struct ip) > m->m_len) {
8f6c56a5 1571#if DEBUG
2d21ac55 1572 printf("delayed m_pullup, m->len: %ld off: %d\n",
743b1565 1573 m->m_len, ip_offset);
8f6c56a5 1574#endif
2d21ac55 1575 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
8f6c56a5
A
1576
1577 ip = (struct ip *)buf;
1578 } else {
1579 ip = (struct ip*)(m->m_data + ip_offset);
91447636
A
1580 }
1581
1582 /* Gross */
1583 if (ip_offset) {
1584 m->m_len -= ip_offset;
1585 m->m_data += ip_offset;
1586 }
1587
9bccf70c 1588 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
8f6c56a5
A
1589
1590 /*
1591 * We could be in the context of an IP or interface filter; in the
1592 * former case, ip_len would be in host (correct) order while for
1593 * the latter it would be in network order. Because of this, we
1594 * attempt to interpret the length field by comparing it against
1595 * the actual packet length. If the comparison fails, byte swap
1596 * the length and check again. If it still fails, then the packet
1597 * is bogus and we give up.
1598 */
1599 ip_len = ip->ip_len;
1600 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1601 ip_len = SWAP16(ip_len);
1602 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1603 printf("in_delayed_cksum_offset: ip_len %d (%d) "
1604 "doesn't match actual length %d\n", ip->ip_len,
1605 ip_len, (m0->m_pkthdr.len - ip_offset));
1606 return;
1607 }
1608 }
1609
1610 csum = in_cksum_skip(m, ip_len, offset);
1611
2d21ac55
A
1612 /* Update stats */
1613 ip_out_cksum_stats(ip->ip_p, ip_len - offset);
1614
8f6c56a5 1615 if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
0b4e3aa0 1616 csum = 0xffff;
8f6c56a5
A
1617 offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1618
91447636
A
1619 /* Gross */
1620 if (ip_offset) {
1621 if (M_LEADINGSPACE(m) < ip_offset)
8f6c56a5 1622 panic("in_delayed_cksum_offset - chain modified!\n");
91447636
A
1623 m->m_len += ip_offset;
1624 m->m_data -= ip_offset;
1625 }
0b4e3aa0 1626
8f6c56a5 1627 if (offset > ip_len) /* bogus offset */
0b4e3aa0
A
1628 return;
1629
8f6c56a5 1630 /* Insert the checksum in the existing chain */
91447636 1631 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
8f6c56a5
A
1632 char tmp[2];
1633
1634#if DEBUG
2d21ac55 1635 printf("delayed m_copyback, m->len: %ld off: %d p: %d\n",
91447636 1636 m->m_len, offset + ip_offset, ip->ip_p);
8f6c56a5
A
1637#endif
1638 *(u_short *)tmp = csum;
1639 m_copyback(m, offset + ip_offset, 2, tmp);
1640 } else
1641 *(u_short *)(m->m_data + offset + ip_offset) = csum;
91447636
A
1642}
1643
1644void
1645in_delayed_cksum(struct mbuf *m)
1646{
1647 in_delayed_cksum_offset(m, 0);
1648}
1649
1650void
1651in_cksum_offset(struct mbuf* m, size_t ip_offset)
1652{
1653 struct ip* ip = NULL;
1654 int hlen = 0;
8f6c56a5
A
1655 unsigned char buf[sizeof(struct ip)];
1656 int swapped = 0;
91447636 1657
8f6c56a5 1658 while (ip_offset >= m->m_len) {
91447636
A
1659 ip_offset -= m->m_len;
1660 m = m->m_next;
8f6c56a5 1661 if (m == NULL) {
91447636
A
1662 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1663 return;
1664 }
1665 }
1666
8f6c56a5 1667 /* Sometimes the IP header is not contiguous, yes this can happen! */
91447636 1668 if (ip_offset + sizeof(struct ip) > m->m_len) {
8f6c56a5
A
1669
1670#if DEBUG
2d21ac55 1671 printf("in_cksum_offset - delayed m_pullup, m->len: %ld off: %lu\n",
91447636 1672 m->m_len, ip_offset);
8f6c56a5 1673#endif
2d21ac55 1674 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
8f6c56a5
A
1675
1676 ip = (struct ip *)buf;
1677 ip->ip_sum = 0;
1678 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum);
1679 } else {
1680 ip = (struct ip*)(m->m_data + ip_offset);
1681 ip->ip_sum = 0;
91447636
A
1682 }
1683
1684 /* Gross */
1685 if (ip_offset) {
1686 m->m_len -= ip_offset;
1687 m->m_data += ip_offset;
1688 }
1689
91447636
A
1690#ifdef _IP_VHL
1691 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1692#else
1693 hlen = ip->ip_hl << 2;
1694#endif
8f6c56a5
A
1695 /*
1696 * We could be in the context of an IP or interface filter; in the
1697 * former case, ip_len would be in host order while for the latter
1698 * it would be in network (correct) order. Because of this, we
1699 * attempt to interpret the length field by comparing it against
1700 * the actual packet length. If the comparison fails, byte swap
1701 * the length and check again. If it still fails, then the packet
1702 * is bogus and we give up.
1703 */
1704 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1705 ip->ip_len = SWAP16(ip->ip_len);
1706 swapped = 1;
1707 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1708 ip->ip_len = SWAP16(ip->ip_len);
1709 printf("in_cksum_offset: ip_len %d (%d) "
2d21ac55 1710 "doesn't match actual length %lu\n",
8f6c56a5
A
1711 ip->ip_len, SWAP16(ip->ip_len),
1712 (m->m_pkthdr.len - ip_offset));
1713 return;
1714 }
1715 }
1716
91447636
A
1717 ip->ip_sum = 0;
1718 ip->ip_sum = in_cksum(m, hlen);
8f6c56a5
A
1719 if (swapped)
1720 ip->ip_len = SWAP16(ip->ip_len);
1721
91447636
A
1722 /* Gross */
1723 if (ip_offset) {
1724 if (M_LEADINGSPACE(m) < ip_offset)
1725 panic("in_cksum_offset - chain modified!\n");
1726 m->m_len += ip_offset;
1727 m->m_data -= ip_offset;
9bccf70c 1728 }
8f6c56a5
A
1729
1730 /* Insert the checksum in the existing chain if IP header not contiguous */
1731 if (ip_offset + sizeof(struct ip) > m->m_len) {
1732 char tmp[2];
1733
1734#if DEBUG
2d21ac55 1735 printf("in_cksum_offset m_copyback, m->len: %lu off: %lu p: %d\n",
8f6c56a5
A
1736 m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
1737#endif
1738 *(u_short *)tmp = ip->ip_sum;
1739 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
1740 }
0b4e3aa0
A
1741}
1742
1c79356b
A
1743/*
1744 * Insert IP options into preformed packet.
1745 * Adjust IP destination as required for IP source routing,
1746 * as indicated by a non-zero in_addr at the start of the options.
1747 *
1748 * XXX This routine assumes that the packet has no options in place.
1749 */
1750static struct mbuf *
1751ip_insertoptions(m, opt, phlen)
1752 register struct mbuf *m;
1753 struct mbuf *opt;
1754 int *phlen;
1755{
1756 register struct ipoption *p = mtod(opt, struct ipoption *);
1757 struct mbuf *n;
1758 register struct ip *ip = mtod(m, struct ip *);
1759 unsigned optlen;
1760
1761 optlen = opt->m_len - sizeof(p->ipopt_dst);
1762 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1763 return (m); /* XXX should fail */
1764 if (p->ipopt_dst.s_addr)
1765 ip->ip_dst = p->ipopt_dst;
1766 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
2d21ac55 1767 MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1c79356b
A
1768 if (n == 0)
1769 return (m);
91447636 1770 n->m_pkthdr.rcvif = 0;
2d21ac55
A
1771#if CONFIG_MACF_NET
1772 mac_mbuf_label_copy(m, n);
1773#endif
1c79356b
A
1774 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1775 m->m_len -= sizeof(struct ip);
1776 m->m_data += sizeof(struct ip);
1777 n->m_next = m;
1778 m = n;
1779 m->m_len = optlen + sizeof(struct ip);
1780 m->m_data += max_linkhdr;
1781 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1782 } else {
1783 m->m_data -= optlen;
1784 m->m_len += optlen;
1785 m->m_pkthdr.len += optlen;
1786 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1787 }
1788 ip = mtod(m, struct ip *);
1789 bcopy(p->ipopt_list, ip + 1, optlen);
1790 *phlen = sizeof(struct ip) + optlen;
1791 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1792 ip->ip_len += optlen;
1793 return (m);
1794}
1795
1796/*
1797 * Copy options from ip to jp,
1798 * omitting those not copied during fragmentation.
1799 */
1c79356b
A
1800int
1801ip_optcopy(ip, jp)
1802 struct ip *ip, *jp;
1803{
1804 register u_char *cp, *dp;
1805 int opt, optlen, cnt;
1806
1807 cp = (u_char *)(ip + 1);
1808 dp = (u_char *)(jp + 1);
1809 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1810 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1811 opt = cp[0];
1812 if (opt == IPOPT_EOL)
1813 break;
1814 if (opt == IPOPT_NOP) {
1815 /* Preserve for IP mcast tunnel's LSRR alignment. */
1816 *dp++ = IPOPT_NOP;
1817 optlen = 1;
1818 continue;
9bccf70c
A
1819 }
1820#if DIAGNOSTIC
1821 if (cnt < IPOPT_OLEN + sizeof(*cp))
1822 panic("malformed IPv4 option passed to ip_optcopy");
1823#endif
1824 optlen = cp[IPOPT_OLEN];
1825#if DIAGNOSTIC
1826 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1827 panic("malformed IPv4 option passed to ip_optcopy");
1828#endif
1c79356b
A
1829 /* bogus lengths should have been caught by ip_dooptions */
1830 if (optlen > cnt)
1831 optlen = cnt;
1832 if (IPOPT_COPIED(opt)) {
1833 bcopy(cp, dp, optlen);
1834 dp += optlen;
1835 }
1836 }
1837 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1838 *dp++ = IPOPT_EOL;
1839 return (optlen);
1840}
1841
1842/*
1843 * IP socket option processing.
1844 */
1845int
1846ip_ctloutput(so, sopt)
1847 struct socket *so;
1848 struct sockopt *sopt;
1849{
1850 struct inpcb *inp = sotoinpcb(so);
1851 int error, optval;
1852
1853 error = optval = 0;
1854 if (sopt->sopt_level != IPPROTO_IP) {
1855 return (EINVAL);
1856 }
1857
1858 switch (sopt->sopt_dir) {
1859 case SOPT_SET:
1860 switch (sopt->sopt_name) {
1861 case IP_OPTIONS:
1862#ifdef notyet
1863 case IP_RETOPTS:
1864#endif
1865 {
1866 struct mbuf *m;
1867 if (sopt->sopt_valsize > MLEN) {
1868 error = EMSGSIZE;
1869 break;
1870 }
1871 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1872 if (m == 0) {
1873 error = ENOBUFS;
1874 break;
1875 }
1876 m->m_len = sopt->sopt_valsize;
1877 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1878 m->m_len);
1879 if (error)
1880 break;
1881
1882 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1883 m));
1884 }
1885
1886 case IP_TOS:
1887 case IP_TTL:
1888 case IP_RECVOPTS:
1889 case IP_RECVRETOPTS:
1890 case IP_RECVDSTADDR:
1891 case IP_RECVIF:
55e303ae 1892 case IP_RECVTTL:
9bccf70c 1893#if defined(NFAITH) && NFAITH > 0
1c79356b 1894 case IP_FAITH:
9bccf70c 1895#endif
1c79356b
A
1896 error = sooptcopyin(sopt, &optval, sizeof optval,
1897 sizeof optval);
1898 if (error)
1899 break;
1900
1901 switch (sopt->sopt_name) {
1902 case IP_TOS:
1903 inp->inp_ip_tos = optval;
1904 break;
1905
1906 case IP_TTL:
1907 inp->inp_ip_ttl = optval;
1908 break;
1909#define OPTSET(bit) \
1910 if (optval) \
1911 inp->inp_flags |= bit; \
1912 else \
1913 inp->inp_flags &= ~bit;
1914
1915 case IP_RECVOPTS:
1916 OPTSET(INP_RECVOPTS);
1917 break;
1918
1919 case IP_RECVRETOPTS:
1920 OPTSET(INP_RECVRETOPTS);
1921 break;
1922
1923 case IP_RECVDSTADDR:
1924 OPTSET(INP_RECVDSTADDR);
1925 break;
1926
1927 case IP_RECVIF:
1928 OPTSET(INP_RECVIF);
1929 break;
1930
55e303ae
A
1931 case IP_RECVTTL:
1932 OPTSET(INP_RECVTTL);
1933 break;
1934
9bccf70c 1935#if defined(NFAITH) && NFAITH > 0
1c79356b
A
1936 case IP_FAITH:
1937 OPTSET(INP_FAITH);
1938 break;
9bccf70c 1939#endif
1c79356b
A
1940 }
1941 break;
1942#undef OPTSET
1943
2d21ac55
A
1944#if CONFIG_FORCE_OUT_IFP
1945 case IP_FORCE_OUT_IFP: {
1946 char ifname[IFNAMSIZ];
1947 ifnet_t ifp;
1948
1949 /* Verify interface name parameter is sane */
1950 if (sopt->sopt_valsize > sizeof(ifname)) {
1951 error = EINVAL;
1952 break;
1953 }
1954
1955 /* Copy the interface name */
1956 if (sopt->sopt_valsize != 0) {
1957 error = sooptcopyin(sopt, ifname, sizeof(ifname), sopt->sopt_valsize);
1958 if (error)
1959 break;
1960 }
1961
1962 if (sopt->sopt_valsize == 0 || ifname[0] == 0) {
1963 // Set pdp_ifp to NULL
1964 inp->pdp_ifp = NULL;
1965
1966 // Flush the route
1967 if (inp->inp_route.ro_rt) {
1968 rtfree(inp->inp_route.ro_rt);
1969 inp->inp_route.ro_rt = NULL;
1970 }
1971
1972 break;
1973 }
1974
1975 /* Verify name is NULL terminated */
1976 if (ifname[sopt->sopt_valsize - 1] != 0) {
1977 error = EINVAL;
1978 break;
1979 }
1980
1981 if (ifnet_find_by_name(ifname, &ifp) != 0) {
1982 error = ENXIO;
1983 break;
1984 }
1985
1986 /* Won't actually free. Since we don't release this later, we should do it now. */
1987 ifnet_release(ifp);
1988
1989 /* This only works for point-to-point interfaces */
1990 if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
1991 error = ENOTSUP;
1992 break;
1993 }
1994
1995 inp->pdp_ifp = ifp;
1996 }
1997 break;
1998#endif
1c79356b
A
1999 case IP_MULTICAST_IF:
2000 case IP_MULTICAST_VIF:
2001 case IP_MULTICAST_TTL:
2002 case IP_MULTICAST_LOOP:
2003 case IP_ADD_MEMBERSHIP:
2004 case IP_DROP_MEMBERSHIP:
2005 error = ip_setmoptions(sopt, &inp->inp_moptions);
2006 break;
2007
2008 case IP_PORTRANGE:
2009 error = sooptcopyin(sopt, &optval, sizeof optval,
2010 sizeof optval);
2011 if (error)
2012 break;
2013
2014 switch (optval) {
2015 case IP_PORTRANGE_DEFAULT:
2016 inp->inp_flags &= ~(INP_LOWPORT);
2017 inp->inp_flags &= ~(INP_HIGHPORT);
2018 break;
2019
2020 case IP_PORTRANGE_HIGH:
2021 inp->inp_flags &= ~(INP_LOWPORT);
2022 inp->inp_flags |= INP_HIGHPORT;
2023 break;
2024
2025 case IP_PORTRANGE_LOW:
2026 inp->inp_flags &= ~(INP_HIGHPORT);
2027 inp->inp_flags |= INP_LOWPORT;
2028 break;
2029
2030 default:
2031 error = EINVAL;
2032 break;
2033 }
2034 break;
2035
2036#if IPSEC
2037 case IP_IPSEC_POLICY:
2038 {
2039 caddr_t req = NULL;
2040 size_t len = 0;
2041 int priv;
2042 struct mbuf *m;
2043 int optname;
2044
91447636
A
2045 if (sopt->sopt_valsize > MCLBYTES) {
2046 error = EMSGSIZE;
2047 break;
2048 }
9bccf70c 2049 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1c79356b 2050 break;
9bccf70c 2051 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1c79356b
A
2052 break;
2053 priv = (sopt->sopt_p != NULL &&
91447636 2054 proc_suser(sopt->sopt_p) != 0) ? 0 : 1;
1c79356b
A
2055 if (m) {
2056 req = mtod(m, caddr_t);
2057 len = m->m_len;
2058 }
2059 optname = sopt->sopt_name;
2060 error = ipsec4_set_policy(inp, optname, req, len, priv);
2061 m_freem(m);
2062 break;
2063 }
2064#endif /*IPSEC*/
2065
2d21ac55
A
2066#if TRAFFIC_MGT
2067 case IP_TRAFFIC_MGT_BACKGROUND:
2068 {
2069 unsigned background = 0;
2070 error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background));
2071 if (error)
2072 break;
2073
2074 if (background)
2075 so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
2076 else
2077 so->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
2078
2079 break;
2080 }
2081#endif /* TRAFFIC_MGT */
2082
1c79356b
A
2083 default:
2084 error = ENOPROTOOPT;
2085 break;
2086 }
2087 break;
2088
2089 case SOPT_GET:
2090 switch (sopt->sopt_name) {
2091 case IP_OPTIONS:
2092 case IP_RETOPTS:
2093 if (inp->inp_options)
2094 error = sooptcopyout(sopt,
2095 mtod(inp->inp_options,
2096 char *),
2097 inp->inp_options->m_len);
2098 else
2099 sopt->sopt_valsize = 0;
2100 break;
2101
2102 case IP_TOS:
2103 case IP_TTL:
2104 case IP_RECVOPTS:
2105 case IP_RECVRETOPTS:
2106 case IP_RECVDSTADDR:
2107 case IP_RECVIF:
55e303ae 2108 case IP_RECVTTL:
1c79356b 2109 case IP_PORTRANGE:
9bccf70c 2110#if defined(NFAITH) && NFAITH > 0
1c79356b 2111 case IP_FAITH:
9bccf70c 2112#endif
1c79356b
A
2113 switch (sopt->sopt_name) {
2114
2115 case IP_TOS:
2116 optval = inp->inp_ip_tos;
2117 break;
2118
2119 case IP_TTL:
2120 optval = inp->inp_ip_ttl;
2121 break;
2122
2123#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2124
2125 case IP_RECVOPTS:
2126 optval = OPTBIT(INP_RECVOPTS);
2127 break;
2128
2129 case IP_RECVRETOPTS:
2130 optval = OPTBIT(INP_RECVRETOPTS);
2131 break;
2132
2133 case IP_RECVDSTADDR:
2134 optval = OPTBIT(INP_RECVDSTADDR);
2135 break;
2136
2137 case IP_RECVIF:
2138 optval = OPTBIT(INP_RECVIF);
2139 break;
2140
55e303ae
A
2141 case IP_RECVTTL:
2142 optval = OPTBIT(INP_RECVTTL);
2143 break;
2144
1c79356b
A
2145 case IP_PORTRANGE:
2146 if (inp->inp_flags & INP_HIGHPORT)
2147 optval = IP_PORTRANGE_HIGH;
2148 else if (inp->inp_flags & INP_LOWPORT)
2149 optval = IP_PORTRANGE_LOW;
2150 else
2151 optval = 0;
2152 break;
2153
9bccf70c 2154#if defined(NFAITH) && NFAITH > 0
1c79356b
A
2155 case IP_FAITH:
2156 optval = OPTBIT(INP_FAITH);
2157 break;
9bccf70c 2158#endif
1c79356b
A
2159 }
2160 error = sooptcopyout(sopt, &optval, sizeof optval);
2161 break;
2162
2163 case IP_MULTICAST_IF:
2164 case IP_MULTICAST_VIF:
2165 case IP_MULTICAST_TTL:
2166 case IP_MULTICAST_LOOP:
2167 case IP_ADD_MEMBERSHIP:
2168 case IP_DROP_MEMBERSHIP:
2169 error = ip_getmoptions(sopt, inp->inp_moptions);
2170 break;
2171
2172#if IPSEC
2173 case IP_IPSEC_POLICY:
2174 {
2175 struct mbuf *m = NULL;
1c79356b 2176 caddr_t req = NULL;
9bccf70c 2177 size_t len = 0;
1c79356b 2178
9bccf70c 2179 if (m != 0) {
1c79356b
A
2180 req = mtod(m, caddr_t);
2181 len = m->m_len;
2182 }
1c79356b
A
2183 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
2184 if (error == 0)
9bccf70c 2185 error = soopt_mcopyout(sopt, m); /* XXX */
1c79356b
A
2186 if (error == 0)
2187 m_freem(m);
2188 break;
2189 }
2190#endif /*IPSEC*/
2191
2d21ac55
A
2192#if TRAFFIC_MGT
2193 case IP_TRAFFIC_MGT_BACKGROUND:
2194 {
2195 unsigned background = so->so_traffic_mgt_flags;
2196 return (sooptcopyout(sopt, &background, sizeof(background)));
2197 break;
2198 }
2199#endif /* TRAFFIC_MGT */
2200
1c79356b
A
2201 default:
2202 error = ENOPROTOOPT;
2203 break;
2204 }
2205 break;
2206 }
2207 return (error);
2208}
2209
2210/*
2211 * Set up IP options in pcb for insertion in output packets.
2212 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2213 * with destination address if source routed.
2214 */
2215static int
2d21ac55
A
2216ip_pcbopts(
2217 __unused int optname,
2218 struct mbuf **pcbopt,
2219 register struct mbuf *m)
1c79356b
A
2220{
2221 register int cnt, optlen;
2222 register u_char *cp;
2223 u_char opt;
2224
2225 /* turn off any old options */
2226 if (*pcbopt)
2227 (void)m_free(*pcbopt);
2228 *pcbopt = 0;
2229 if (m == (struct mbuf *)0 || m->m_len == 0) {
2230 /*
2231 * Only turning off any previous options.
2232 */
2233 if (m)
2234 (void)m_free(m);
2235 return (0);
2236 }
2237
2238#ifndef vax
2239 if (m->m_len % sizeof(int32_t))
2240 goto bad;
2241#endif
2242 /*
2243 * IP first-hop destination address will be stored before
2244 * actual options; move other options back
2245 * and clear it when none present.
2246 */
2247 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
2248 goto bad;
2249 cnt = m->m_len;
2250 m->m_len += sizeof(struct in_addr);
2251 cp = mtod(m, u_char *) + sizeof(struct in_addr);
2252 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
2253 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
2254
2255 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2256 opt = cp[IPOPT_OPTVAL];
2257 if (opt == IPOPT_EOL)
2258 break;
2259 if (opt == IPOPT_NOP)
2260 optlen = 1;
2261 else {
2262 if (cnt < IPOPT_OLEN + sizeof(*cp))
2263 goto bad;
2264 optlen = cp[IPOPT_OLEN];
2265 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2266 goto bad;
2267 }
2268 switch (opt) {
2269
2270 default:
2271 break;
2272
2273 case IPOPT_LSRR:
2274 case IPOPT_SSRR:
2275 /*
2276 * user process specifies route as:
2277 * ->A->B->C->D
2278 * D must be our final destination (but we can't
2279 * check that since we may not have connected yet).
2280 * A is first hop destination, which doesn't appear in
2281 * actual IP option, but is stored before the options.
2282 */
2283 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
2284 goto bad;
2285 m->m_len -= sizeof(struct in_addr);
2286 cnt -= sizeof(struct in_addr);
2287 optlen -= sizeof(struct in_addr);
2288 cp[IPOPT_OLEN] = optlen;
2289 /*
2290 * Move first hop before start of options.
2291 */
2292 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
2293 sizeof(struct in_addr));
2294 /*
2295 * Then copy rest of options back
2296 * to close up the deleted entry.
2297 */
2298 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
2299 sizeof(struct in_addr)),
2300 (caddr_t)&cp[IPOPT_OFFSET+1],
2301 (unsigned)cnt + sizeof(struct in_addr));
2302 break;
2303 }
2304 }
2305 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
2306 goto bad;
2307 *pcbopt = m;
2308 return (0);
2309
2310bad:
2311 (void)m_free(m);
2312 return (EINVAL);
2313}
2314
2315/*
2316 * XXX
2317 * The whole multicast option thing needs to be re-thought.
2318 * Several of these options are equally applicable to non-multicast
2319 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
2320 * standard option (IP_TTL).
2321 */
9bccf70c
A
2322
2323/*
2324 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2325 */
2326static struct ifnet *
2327ip_multicast_if(a, ifindexp)
2328 struct in_addr *a;
2329 int *ifindexp;
2330{
2331 int ifindex;
2332 struct ifnet *ifp;
2333
2334 if (ifindexp)
2335 *ifindexp = 0;
2336 if (ntohl(a->s_addr) >> 24 == 0) {
2337 ifindex = ntohl(a->s_addr) & 0xffffff;
91447636
A
2338 ifnet_head_lock_shared();
2339 if (ifindex < 0 || if_index < ifindex) {
2340 ifnet_head_done();
9bccf70c 2341 return NULL;
91447636 2342 }
9bccf70c 2343 ifp = ifindex2ifnet[ifindex];
91447636 2344 ifnet_head_done();
9bccf70c
A
2345 if (ifindexp)
2346 *ifindexp = ifindex;
2347 } else {
2348 INADDR_TO_IFP(*a, ifp);
2349 }
2350 return ifp;
2351}
2352
1c79356b
A
2353/*
2354 * Set the IP multicast options in response to user setsockopt().
2355 */
2356static int
2357ip_setmoptions(sopt, imop)
2358 struct sockopt *sopt;
2359 struct ip_moptions **imop;
2360{
2361 int error = 0;
2362 int i;
2363 struct in_addr addr;
2364 struct ip_mreq mreq;
9bccf70c 2365 struct ifnet *ifp = NULL;
1c79356b 2366 struct ip_moptions *imo = *imop;
9bccf70c 2367 int ifindex;
1c79356b
A
2368
2369 if (imo == NULL) {
2370 /*
2371 * No multicast option buffer attached to the pcb;
2372 * allocate one and initialize to default values.
2373 */
55e303ae
A
2374 error = ip_createmoptions(imop);
2375 if (error != 0)
2376 return error;
2377 imo = *imop;
1c79356b
A
2378 }
2379
2380 switch (sopt->sopt_name) {
2381 /* store an index number for the vif you wanna use in the send */
2d21ac55 2382#if MROUTING
1c79356b
A
2383 case IP_MULTICAST_VIF:
2384 if (legal_vif_num == 0) {
2385 error = EOPNOTSUPP;
2386 break;
2387 }
2388 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2389 if (error)
2390 break;
2391 if (!legal_vif_num(i) && (i != -1)) {
2392 error = EINVAL;
2393 break;
2394 }
2395 imo->imo_multicast_vif = i;
2396 break;
2d21ac55 2397#endif /* MROUTING */
1c79356b
A
2398
2399 case IP_MULTICAST_IF:
2400 /*
2401 * Select the interface for outgoing multicast packets.
2402 */
2403 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
2404 if (error)
2405 break;
2406 /*
2407 * INADDR_ANY is used to remove a previous selection.
2408 * When no interface is selected, a default one is
2409 * chosen every time a multicast packet is sent.
2410 */
2411 if (addr.s_addr == INADDR_ANY) {
2412 imo->imo_multicast_ifp = NULL;
2413 break;
2414 }
2415 /*
2416 * The selected interface is identified by its local
2417 * IP address. Find the interface and confirm that
2418 * it supports multicasting.
2419 */
9bccf70c 2420 ifp = ip_multicast_if(&addr, &ifindex);
1c79356b 2421 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1c79356b
A
2422 error = EADDRNOTAVAIL;
2423 break;
2424 }
2425 imo->imo_multicast_ifp = ifp;
9bccf70c
A
2426 if (ifindex)
2427 imo->imo_multicast_addr = addr;
2428 else
2429 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1c79356b
A
2430 break;
2431
2432 case IP_MULTICAST_TTL:
2433 /*
2434 * Set the IP time-to-live for outgoing multicast packets.
2435 * The original multicast API required a char argument,
2436 * which is inconsistent with the rest of the socket API.
2437 * We allow either a char or an int.
2438 */
2439 if (sopt->sopt_valsize == 1) {
2440 u_char ttl;
2441 error = sooptcopyin(sopt, &ttl, 1, 1);
2442 if (error)
2443 break;
2444 imo->imo_multicast_ttl = ttl;
2445 } else {
2446 u_int ttl;
2447 error = sooptcopyin(sopt, &ttl, sizeof ttl,
2448 sizeof ttl);
2449 if (error)
2450 break;
2451 if (ttl > 255)
2452 error = EINVAL;
2453 else
2454 imo->imo_multicast_ttl = ttl;
2455 }
2456 break;
2457
2458 case IP_MULTICAST_LOOP:
2459 /*
2460 * Set the loopback flag for outgoing multicast packets.
2461 * Must be zero or one. The original multicast API required a
2462 * char argument, which is inconsistent with the rest
2463 * of the socket API. We allow either a char or an int.
2464 */
2465 if (sopt->sopt_valsize == 1) {
2466 u_char loop;
2467 error = sooptcopyin(sopt, &loop, 1, 1);
2468 if (error)
2469 break;
2470 imo->imo_multicast_loop = !!loop;
2471 } else {
2472 u_int loop;
2473 error = sooptcopyin(sopt, &loop, sizeof loop,
2474 sizeof loop);
2475 if (error)
2476 break;
2477 imo->imo_multicast_loop = !!loop;
2478 }
2479 break;
2480
2481 case IP_ADD_MEMBERSHIP:
2482 /*
2483 * Add a multicast group membership.
2484 * Group must be a valid IP multicast address.
2485 */
2486 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2487 if (error)
2488 break;
9bccf70c 2489
55e303ae 2490 error = ip_addmembership(imo, &mreq);
1c79356b
A
2491 break;
2492
2493 case IP_DROP_MEMBERSHIP:
2494 /*
2495 * Drop a multicast group membership.
2496 * Group must be a valid IP multicast address.
2497 */
2498 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2499 if (error)
2500 break;
55e303ae
A
2501
2502 error = ip_dropmembership(imo, &mreq);
1c79356b
A
2503 break;
2504
2505 default:
2506 error = EOPNOTSUPP;
2507 break;
2508 }
2509
2510 /*
2511 * If all options have default values, no need to keep the mbuf.
2512 */
2513 if (imo->imo_multicast_ifp == NULL &&
2d21ac55 2514 imo->imo_multicast_vif == (u_long)-1 &&
1c79356b
A
2515 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2516 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2517 imo->imo_num_memberships == 0) {
2518 FREE(*imop, M_IPMOPTS);
2519 *imop = NULL;
2520 }
2521
2522 return (error);
2523}
2524
55e303ae
A
2525/*
2526 * Set the IP multicast options in response to user setsockopt().
2527 */
2528__private_extern__ int
2529ip_createmoptions(
2530 struct ip_moptions **imop)
2531{
2532 struct ip_moptions *imo;
2533 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
2534 M_WAITOK);
2535
2536 if (imo == NULL)
2537 return (ENOBUFS);
2538 *imop = imo;
2539 imo->imo_multicast_ifp = NULL;
2540 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2541 imo->imo_multicast_vif = -1;
2542 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2543 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2544 imo->imo_num_memberships = 0;
2545
2546 return 0;
2547}
2548
2549/*
2550 * Add membership to an IPv4 multicast.
2551 */
2552__private_extern__ int
2553ip_addmembership(
2554 struct ip_moptions *imo,
2555 struct ip_mreq *mreq)
2556{
2557 struct route ro;
2558 struct sockaddr_in *dst;
2559 struct ifnet *ifp = NULL;
2560 int error = 0;
55e303ae
A
2561 int i;
2562
2563 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2564 error = EINVAL;
2565 return error;
2566 }
55e303ae
A
2567 /*
2568 * If no interface address was provided, use the interface of
2569 * the route to the given multicast address.
2570 */
2571 if (mreq->imr_interface.s_addr == INADDR_ANY) {
2572 bzero((caddr_t)&ro, sizeof(ro));
2573 dst = (struct sockaddr_in *)&ro.ro_dst;
2574 dst->sin_len = sizeof(*dst);
2575 dst->sin_family = AF_INET;
2576 dst->sin_addr = mreq->imr_multiaddr;
2d21ac55
A
2577 lck_mtx_lock(rt_mtx);
2578 rtalloc_ign_locked(&ro, 0UL);
55e303ae
A
2579 if (ro.ro_rt != NULL) {
2580 ifp = ro.ro_rt->rt_ifp;
2d21ac55 2581 rtfree_locked(ro.ro_rt);
55e303ae
A
2582 }
2583 else {
2584 /* If there's no default route, try using loopback */
2585 mreq->imr_interface.s_addr = INADDR_LOOPBACK;
2586 }
2d21ac55 2587 lck_mtx_unlock(rt_mtx);
55e303ae
A
2588 }
2589
2590 if (ifp == NULL) {
2591 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2592 }
2593
2594 /*
2595 * See if we found an interface, and confirm that it
2596 * supports multicast.
2597 */
2598 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2599 error = EADDRNOTAVAIL;
55e303ae
A
2600 return error;
2601 }
2602 /*
2603 * See if the membership already exists or if all the
2604 * membership slots are full.
2605 */
2606 for (i = 0; i < imo->imo_num_memberships; ++i) {
2607 if (imo->imo_membership[i]->inm_ifp == ifp &&
2608 imo->imo_membership[i]->inm_addr.s_addr
2609 == mreq->imr_multiaddr.s_addr)
2610 break;
2611 }
2612 if (i < imo->imo_num_memberships) {
2613 error = EADDRINUSE;
55e303ae
A
2614 return error;
2615 }
2616 if (i == IP_MAX_MEMBERSHIPS) {
2617 error = ETOOMANYREFS;
55e303ae
A
2618 return error;
2619 }
2620 /*
2621 * Everything looks good; add a new record to the multicast
2622 * address list for the given interface.
2623 */
2624 if ((imo->imo_membership[i] =
2625 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
2626 error = ENOBUFS;
55e303ae
A
2627 return error;
2628 }
2629 ++imo->imo_num_memberships;
55e303ae
A
2630
2631 return error;
2632}
2633
2634/*
2635 * Drop membership of an IPv4 multicast.
2636 */
2637__private_extern__ int
2638ip_dropmembership(
2639 struct ip_moptions *imo,
2640 struct ip_mreq *mreq)
2641{
2642 int error = 0;
55e303ae
A
2643 struct ifnet* ifp = NULL;
2644 int i;
2645
2646 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2647 error = EINVAL;
2648 return error;
2649 }
2650
55e303ae
A
2651 /*
2652 * If an interface address was specified, get a pointer
2653 * to its ifnet structure.
2654 */
2655 if (mreq->imr_interface.s_addr == INADDR_ANY)
2656 ifp = NULL;
2657 else {
2658 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2659 if (ifp == NULL) {
2660 error = EADDRNOTAVAIL;
55e303ae
A
2661 return error;
2662 }
2663 }
2664 /*
2665 * Find the membership in the membership array.
2666 */
2667 for (i = 0; i < imo->imo_num_memberships; ++i) {
2668 if ((ifp == NULL ||
2669 imo->imo_membership[i]->inm_ifp == ifp) &&
2670 imo->imo_membership[i]->inm_addr.s_addr ==
2671 mreq->imr_multiaddr.s_addr)
2672 break;
2673 }
2674 if (i == imo->imo_num_memberships) {
2675 error = EADDRNOTAVAIL;
55e303ae
A
2676 return error;
2677 }
2678 /*
2679 * Give up the multicast address record to which the
2680 * membership points.
2681 */
91447636 2682 in_delmulti(&imo->imo_membership[i]);
55e303ae
A
2683 /*
2684 * Remove the gap in the membership array.
2685 */
2686 for (++i; i < imo->imo_num_memberships; ++i)
2687 imo->imo_membership[i-1] = imo->imo_membership[i];
2688 --imo->imo_num_memberships;
55e303ae
A
2689
2690 return error;
2691}
2692
1c79356b
A
2693/*
2694 * Return the IP multicast options in response to user getsockopt().
2695 */
2696static int
2697ip_getmoptions(sopt, imo)
2698 struct sockopt *sopt;
2699 register struct ip_moptions *imo;
2700{
2701 struct in_addr addr;
2702 struct in_ifaddr *ia;
2703 int error, optval;
2704 u_char coptval;
2705
2706 error = 0;
2707 switch (sopt->sopt_name) {
2d21ac55 2708#if MROUTING
1c79356b
A
2709 case IP_MULTICAST_VIF:
2710 if (imo != NULL)
2711 optval = imo->imo_multicast_vif;
2712 else
2713 optval = -1;
2714 error = sooptcopyout(sopt, &optval, sizeof optval);
2715 break;
2d21ac55 2716#endif /* MROUTING */
1c79356b
A
2717
2718 case IP_MULTICAST_IF:
2719 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2720 addr.s_addr = INADDR_ANY;
9bccf70c
A
2721 else if (imo->imo_multicast_addr.s_addr) {
2722 /* return the value user has set */
2723 addr = imo->imo_multicast_addr;
2724 } else {
1c79356b
A
2725 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2726 addr.s_addr = (ia == NULL) ? INADDR_ANY
2727 : IA_SIN(ia)->sin_addr.s_addr;
2728 }
2729 error = sooptcopyout(sopt, &addr, sizeof addr);
2730 break;
2731
2732 case IP_MULTICAST_TTL:
2733 if (imo == 0)
2734 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2735 else
2736 optval = coptval = imo->imo_multicast_ttl;
2737 if (sopt->sopt_valsize == 1)
2738 error = sooptcopyout(sopt, &coptval, 1);
2739 else
2740 error = sooptcopyout(sopt, &optval, sizeof optval);
2741 break;
2742
2743 case IP_MULTICAST_LOOP:
2744 if (imo == 0)
2745 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2746 else
2747 optval = coptval = imo->imo_multicast_loop;
2748 if (sopt->sopt_valsize == 1)
2749 error = sooptcopyout(sopt, &coptval, 1);
2750 else
2751 error = sooptcopyout(sopt, &optval, sizeof optval);
2752 break;
2753
2754 default:
2755 error = ENOPROTOOPT;
2756 break;
2757 }
2758 return (error);
2759}
2760
2761/*
2762 * Discard the IP multicast options.
2763 */
2764void
2765ip_freemoptions(imo)
2766 register struct ip_moptions *imo;
2767{
2768 register int i;
2769
2770 if (imo != NULL) {
2771 for (i = 0; i < imo->imo_num_memberships; ++i)
91447636 2772 in_delmulti(&imo->imo_membership[i]);
1c79356b
A
2773 FREE(imo, M_IPMOPTS);
2774 }
2775}
2776
2777/*
2778 * Routine called from ip_output() to loop back a copy of an IP multicast
2779 * packet to the input queue of a specified interface. Note that this
2780 * calls the output routine of the loopback "driver", but with an interface
2781 * pointer that might NOT be a loopback interface -- evil, but easier than
2782 * replicating that code here.
2783 */
2784static void
2785ip_mloopback(ifp, m, dst, hlen)
2786 struct ifnet *ifp;
2787 register struct mbuf *m;
2788 register struct sockaddr_in *dst;
2789 int hlen;
2790{
2791 register struct ip *ip;
2792 struct mbuf *copym;
2d21ac55 2793 int sw_csum = (apple_hwcksum_tx == 0);
1c79356b
A
2794
2795 copym = m_copy(m, 0, M_COPYALL);
2796 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2797 copym = m_pullup(copym, hlen);
2d21ac55
A
2798
2799 if (copym == NULL)
2800 return;
2801
2802 /*
2803 * We don't bother to fragment if the IP length is greater
2804 * than the interface's MTU. Can this possibly matter?
2805 */
2806 ip = mtod(copym, struct ip *);
2807 HTONS(ip->ip_len);
2808 HTONS(ip->ip_off);
2809 ip->ip_sum = 0;
2810 ip->ip_sum = in_cksum(copym, hlen);
2811 /*
2812 * NB:
2813 * It's not clear whether there are any lingering
2814 * reentrancy problems in other areas which might
2815 * be exposed by using ip_input directly (in
2816 * particular, everything which modifies the packet
2817 * in-place). Yet another option is using the
2818 * protosw directly to deliver the looped back
2819 * packet. For the moment, we'll err on the side
2820 * of safety by using if_simloop().
2821 */
1c79356b 2822#if 1 /* XXX */
2d21ac55
A
2823 if (dst->sin_family != AF_INET) {
2824 printf("ip_mloopback: bad address family %d\n",
2825 dst->sin_family);
2826 dst->sin_family = AF_INET;
2827 }
1c79356b
A
2828#endif
2829
9bccf70c 2830 /*
2d21ac55
A
2831 * Mark checksum as valid or calculate checksum for loopback.
2832 *
2833 * This is done this way because we have to embed the ifp of
2834 * the interface we will send the original copy of the packet
2835 * out on in the mbuf. ip_input will check if_hwassist of the
2836 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
2837 * The UDP checksum has not been calculated yet.
2838 */
2839 if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
2840 if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
2841 copym->m_pkthdr.csum_flags |=
2842 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2843 CSUM_IP_CHECKED | CSUM_IP_VALID;
2844 copym->m_pkthdr.csum_data = 0xffff;
1c79356b 2845 } else {
2d21ac55
A
2846 NTOHS(ip->ip_len);
2847 in_delayed_cksum(copym);
2848 HTONS(ip->ip_len);
1c79356b 2849 }
2d21ac55 2850 }
1c79356b 2851
2d21ac55
A
2852 /*
2853 * TedW:
2854 * We need to send all loopback traffic down to dlil in case
2855 * a filter has tapped-in.
2856 */
2857
2858 /*
2859 * Stuff the 'real' ifp into the pkthdr, to be used in matching
2860 * in ip_input(); we need the loopback ifp/dl_tag passed as args
2861 * to make the loopback driver compliant with the data link
2862 * requirements.
2863 */
2864 if (lo_ifp) {
2865 copym->m_pkthdr.rcvif = ifp;
2866 dlil_output(lo_ifp, PF_INET, copym, 0,
2867 (struct sockaddr *) dst, 0);
2868 } else {
2869 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
2870 m_freem(copym);
1c79356b
A
2871 }
2872}