]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/ip_output.c
xnu-792.25.20.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
CommitLineData
1c79356b 1/*
5d5c5d0d
A
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
6601e61a 4 * @APPLE_LICENSE_HEADER_START@
1c79356b 5 *
6601e61a
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
8f6c56a5 11 *
6601e61a
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
6601e61a
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
8f6c56a5 19 *
6601e61a 20 * @APPLE_LICENSE_HEADER_END@
1c79356b
A
21 */
22/*
23 * Copyright (c) 1982, 1986, 1988, 1990, 1993
24 * The Regents of the University of California. All rights reserved.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 * 3. All advertising materials mentioning features or use of this software
35 * must display the following acknowledgement:
36 * This product includes software developed by the University of
37 * California, Berkeley and its contributors.
38 * 4. Neither the name of the University nor the names of its contributors
39 * may be used to endorse or promote products derived from this software
40 * without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
9bccf70c 55 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
1c79356b
A
56 */
57
58#define _IP_VHL
59
1c79356b
A
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/kernel.h>
63#include <sys/malloc.h>
64#include <sys/mbuf.h>
65#include <sys/protosw.h>
66#include <sys/socket.h>
67#include <sys/socketvar.h>
91447636
A
68#include <kern/locks.h>
69#include <sys/sysctl.h>
1c79356b
A
70
71#include <net/if.h>
72#include <net/route.h>
73
74#include <netinet/in.h>
75#include <netinet/in_systm.h>
76#include <netinet/ip.h>
1c79356b
A
77#include <netinet/in_pcb.h>
78#include <netinet/in_var.h>
79#include <netinet/ip_var.h>
1c79356b 80
91447636
A
81#include <netinet/kpi_ipfilter_var.h>
82
9bccf70c
A
83#include "faith.h"
84
85#include <net/dlil.h>
1c79356b
A
86#include <sys/kdebug.h>
87
88#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
89#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
90#define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
55e303ae 91#define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
1c79356b 92
8f6c56a5 93#define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
1c79356b 94
1c79356b
A
95#if IPSEC
96#include <netinet6/ipsec.h>
97#include <netkey/key.h>
9bccf70c 98#if IPSEC_DEBUG
1c79356b 99#include <netkey/key_debug.h>
1c79356b 100#else
9bccf70c 101#define KEYDEBUG(lev,arg)
1c79356b 102#endif
9bccf70c 103#endif /*IPSEC*/
1c79356b 104
1c79356b 105#include <netinet/ip_fw.h>
91447636 106#include <netinet/ip_divert.h>
1c79356b
A
107
108#if DUMMYNET
109#include <netinet/ip_dummynet.h>
110#endif
111
112#if IPFIREWALL_FORWARD_DEBUG
113#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
114 (ntohl(a.s_addr)>>16)&0xFF,\
115 (ntohl(a.s_addr)>>8)&0xFF,\
116 (ntohl(a.s_addr))&0xFF);
117#endif
118
91447636
A
119#if IPSEC
120extern lck_mtx_t *sadb_mutex;
121#endif
122
1c79356b
A
123u_short ip_id;
124
91447636
A
125static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
126static struct ifnet *ip_multicast_if(struct in_addr *, int *);
127static void ip_mloopback(struct ifnet *, struct mbuf *,
128 struct sockaddr_in *, int);
129static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
130static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
131static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
9bccf70c 132
55e303ae
A
133int ip_createmoptions(struct ip_moptions **imop);
134int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
135int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
91447636
A
136int ip_optcopy(struct ip *, struct ip *);
137extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
9bccf70c
A
138#ifdef __APPLE__
139extern struct mbuf* m_dup(register struct mbuf *m, int how);
140#endif
141
0b4e3aa0 142extern int apple_hwcksum_tx;
55e303ae 143extern u_long route_generation;
1c79356b
A
144
145extern struct protosw inetsw[];
146
9bccf70c 147extern struct ip_linklocal_stat ip_linklocal_stat;
91447636 148extern lck_mtx_t *ip_mutex;
9bccf70c
A
149
150/* temporary: for testing */
151#if IPSEC
152extern int ipsec_bypass;
153#endif
154
91447636
A
155static int ip_maxchainsent = 0;
156SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
157 &ip_maxchainsent, 0, "use dlil_output_list");
1c79356b
A
158/*
159 * IP output. The packet in mbuf chain m contains a skeletal IP
160 * header (with len, off, ttl, proto, tos, src, dst).
161 * The mbuf chain containing the packet will be freed.
162 * The mbuf opt, if present, will not be freed.
163 */
164int
91447636
A
165ip_output(
166 struct mbuf *m0,
167 struct mbuf *opt,
168 struct route *ro,
169 int flags,
170 struct ip_moptions *imo)
171{
172 int error;
173 error = ip_output_list(m0, 0, opt, ro, flags, imo);
174 return error;
175}
176
177int
178ip_output_list(
179 struct mbuf *m0,
180 int packetchain,
181 struct mbuf *opt,
182 struct route *ro,
183 int flags,
184 struct ip_moptions *imo)
1c79356b
A
185{
186 struct ip *ip, *mhip;
55e303ae 187 struct ifnet *ifp = NULL;
1c79356b
A
188 struct mbuf *m = m0;
189 int hlen = sizeof (struct ip);
190 int len, off, error = 0;
55e303ae 191 struct sockaddr_in *dst = NULL;
9bccf70c 192 struct in_ifaddr *ia = NULL;
0b4e3aa0 193 int isbroadcast, sw_csum;
91447636 194 struct in_addr pkt_dst;
1c79356b
A
195#if IPSEC
196 struct route iproute;
9bccf70c 197 struct socket *so = NULL;
1c79356b
A
198 struct secpolicy *sp = NULL;
199#endif
200#if IPFIREWALL_FORWARD
201 int fwd_rewrite_src = 0;
202#endif
91447636
A
203 struct ip_fw_args args;
204 int didfilter = 0;
205 ipfilter_t inject_filter_ref = 0;
206 struct m_tag *tag;
207 struct route dn_route;
208 struct mbuf * packetlist;
209 int pktcnt = 0;
210
211 lck_mtx_lock(ip_mutex);
1c79356b
A
212
213 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
214
91447636
A
215 packetlist = m0;
216 args.eh = NULL;
217 args.rule = NULL;
218 args.next_hop = NULL;
219 args.divert_rule = 0; /* divert cookie */
220
221 /* Grab info from mtags prepended to the chain */
222#if DUMMYNET
223 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
224 struct dn_pkt_tag *dn_tag;
225
226 dn_tag = (struct dn_pkt_tag *)(tag+1);
227 args.rule = dn_tag->rule;
228 opt = NULL;
229 dn_route = dn_tag->ro;
230 ro = &dn_route;
231
232 imo = NULL;
233 dst = dn_tag->dn_dst;
234 ifp = dn_tag->ifp;
235 flags = dn_tag->flags;
236
237 m_tag_delete(m0, tag);
238 }
239#endif /* DUMMYNET */
240
241 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
242 struct divert_tag *div_tag;
243
244 div_tag = (struct divert_tag *)(tag+1);
245 args.divert_rule = div_tag->cookie;
246
247 m_tag_delete(m0, tag);
248 }
249 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
250 struct ip_fwd_tag *ipfwd_tag;
251
252 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
253 args.next_hop = ipfwd_tag->next_hop;
254
255 m_tag_delete(m0, tag);
256 }
257
258 m = m0;
259
260#if DIAGNOSTIC
261 if ( !m || (m->m_flags & M_PKTHDR) != 0)
262 panic("ip_output no HDR");
263 if (!ro)
264 panic("ip_output no route, proto = %d",
265 mtod(m, struct ip *)->ip_p);
9bccf70c 266#endif
91447636
A
267
268 if (args.rule != NULL) { /* dummynet already saw us */
1c79356b 269 ip = mtod(m, struct ip *);
1c79356b 270 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
9bccf70c
A
271 if (ro->ro_rt != NULL)
272 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
91447636
A
273 if (ia)
274 ifaref(&ia->ia_ifa);
275#if IPSEC
276 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
277 so = ipsec_getsocket(m);
278 (void)ipsec_setsocket(m, NULL);
279 }
1c79356b 280#endif
91447636
A
281 goto sendit;
282 }
283
9bccf70c 284#if IPSEC
55e303ae 285 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
9bccf70c
A
286 so = ipsec_getsocket(m);
287 (void)ipsec_setsocket(m, NULL);
288 }
289#endif
91447636
A
290loopit:
291 /*
292 * No need to proccess packet twice if we've
293 * already seen it
294 */
295 inject_filter_ref = ipf_get_inject_filter(m);
1c79356b 296
1c79356b
A
297 if (opt) {
298 m = ip_insertoptions(m, opt, &len);
299 hlen = len;
300 }
301 ip = mtod(m, struct ip *);
91447636
A
302 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
303
1c79356b
A
304 /*
305 * Fill in IP header.
306 */
307 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
308 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
309 ip->ip_off &= IP_DF;
9bccf70c
A
310#if RANDOM_IP_ID
311 ip->ip_id = ip_randomid();
312#else
1c79356b 313 ip->ip_id = htons(ip_id++);
9bccf70c 314#endif
1c79356b
A
315 ipstat.ips_localout++;
316 } else {
317 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
318 }
319
320 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
321 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
322
323 dst = (struct sockaddr_in *)&ro->ro_dst;
55e303ae 324
1c79356b
A
325 /*
326 * If there is a cached route,
327 * check that it is to the same destination
328 * and is still up. If not, free it and try again.
55e303ae
A
329 * The address family should also be checked in case of sharing the
330 * cache with IPv6.
1c79356b 331 */
55e303ae 332
91447636
A
333 {
334 if (ro->ro_rt && (ro->ro_rt->generation_id != route_generation) &&
335 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) && (ip->ip_src.s_addr != INADDR_ANY) &&
336 (ifa_foraddr(ip->ip_src.s_addr) == 0)) {
337 error = EADDRNOTAVAIL;
338 goto bad;
339 }
ab86ba33 340 }
1c79356b 341 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
55e303ae 342 dst->sin_family != AF_INET ||
91447636 343 dst->sin_addr.s_addr != pkt_dst.s_addr)) {
9bccf70c 344 rtfree(ro->ro_rt);
1c79356b
A
345 ro->ro_rt = (struct rtentry *)0;
346 }
347 if (ro->ro_rt == 0) {
55e303ae 348 bzero(dst, sizeof(*dst));
1c79356b
A
349 dst->sin_family = AF_INET;
350 dst->sin_len = sizeof(*dst);
91447636 351 dst->sin_addr = pkt_dst;
1c79356b
A
352 }
353 /*
354 * If routing to interface only,
355 * short circuit routing lookup.
356 */
357#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
358#define sintosa(sin) ((struct sockaddr *)(sin))
359 if (flags & IP_ROUTETOIF) {
91447636
A
360 if (ia)
361 ifafree(&ia->ia_ifa);
362 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
363 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
364 ipstat.ips_noroute++;
365 error = ENETUNREACH;
366 goto bad;
367 }
1c79356b
A
368 }
369 ifp = ia->ia_ifp;
1c79356b
A
370 ip->ip_ttl = 1;
371 isbroadcast = in_broadcast(dst->sin_addr, ifp);
372 } else {
373 /*
374 * If this is the case, we probably don't want to allocate
375 * a protocol-cloned route since we didn't get one from the
376 * ULP. This lets TCP do its thing, while not burdening
377 * forwarding or ICMP with the overhead of cloning a route.
378 * Of course, we still want to do any cloning requested by
379 * the link layer, as this is probably required in all cases
380 * for correct operation (as it is for ARP).
381 */
382 if (ro->ro_rt == 0)
383 rtalloc_ign(ro, RTF_PRCLONING);
384 if (ro->ro_rt == 0) {
385 ipstat.ips_noroute++;
386 error = EHOSTUNREACH;
387 goto bad;
388 }
91447636
A
389 if (ia)
390 ifafree(&ia->ia_ifa);
1c79356b 391 ia = ifatoia(ro->ro_rt->rt_ifa);
91447636
A
392 if (ia)
393 ifaref(&ia->ia_ifa);
1c79356b 394 ifp = ro->ro_rt->rt_ifp;
1c79356b
A
395 ro->ro_rt->rt_use++;
396 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
397 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
398 if (ro->ro_rt->rt_flags & RTF_HOST)
399 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
400 else
401 isbroadcast = in_broadcast(dst->sin_addr, ifp);
402 }
91447636 403 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
1c79356b
A
404 struct in_multi *inm;
405
406 m->m_flags |= M_MCAST;
407 /*
408 * IP destination address is multicast. Make sure "dst"
409 * still points to the address in "ro". (It may have been
410 * changed to point to a gateway address, above.)
411 */
412 dst = (struct sockaddr_in *)&ro->ro_dst;
413 /*
414 * See if the caller provided any multicast options
415 */
416 if (imo != NULL) {
55e303ae 417 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
fa4905b1 418 if (imo->imo_multicast_ifp != NULL) {
1c79356b 419 ifp = imo->imo_multicast_ifp;
fa4905b1 420 }
55e303ae
A
421 if (imo->imo_multicast_vif != -1 &&
422 ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
1c79356b 423 ip->ip_src.s_addr =
55e303ae 424 ip_mcast_src(imo->imo_multicast_vif);
1c79356b 425 } else
55e303ae 426 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
1c79356b
A
427 /*
428 * Confirm that the outgoing interface supports multicast.
429 */
430 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
431 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
432 ipstat.ips_noroute++;
433 error = ENETUNREACH;
434 goto bad;
435 }
436 }
437 /*
438 * If source address not specified yet, use address
439 * of outgoing interface.
440 */
441 if (ip->ip_src.s_addr == INADDR_ANY) {
442 register struct in_ifaddr *ia1;
443
9bccf70c 444 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
1c79356b
A
445 if (ia1->ia_ifp == ifp) {
446 ip->ip_src = IA_SIN(ia1)->sin_addr;
55e303ae 447
1c79356b
A
448 break;
449 }
55e303ae
A
450 if (ip->ip_src.s_addr == INADDR_ANY) {
451 error = ENETUNREACH;
452 goto bad;
453 }
1c79356b
A
454 }
455
91447636
A
456 ifnet_lock_shared(ifp);
457 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
458 ifnet_lock_done(ifp);
1c79356b
A
459 if (inm != NULL &&
460 (imo == NULL || imo->imo_multicast_loop)) {
461 /*
462 * If we belong to the destination multicast group
463 * on the outgoing interface, and the caller did not
464 * forbid loopback, loop back a copy.
465 */
91447636
A
466 if (!TAILQ_EMPTY(&ipv4_filters)) {
467 struct ipfilter *filter;
468 int seen = (inject_filter_ref == 0);
469 struct ipf_pktopts *ippo = 0, ipf_pktopts;
470
471 if (imo) {
472 ippo = &ipf_pktopts;
473 ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
474 ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
475 ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
476 }
477
478 lck_mtx_unlock(ip_mutex);
479 ipf_ref();
0c530ab8
A
480
481 /* 4135317 - always pass network byte order to filter */
482 HTONS(ip->ip_len);
483 HTONS(ip->ip_off);
484
91447636
A
485 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
486 if (seen == 0) {
487 if ((struct ipfilter *)inject_filter_ref == filter)
488 seen = 1;
489 } else if (filter->ipf_filter.ipf_output) {
490 errno_t result;
491 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
492 if (result == EJUSTRETURN) {
493 ipf_unref();
494 goto done;
495 }
496 if (result != 0) {
497 ipf_unref();
498 lck_mtx_lock(ip_mutex);
499 goto bad;
500 }
501 }
502 }
0c530ab8
A
503
504 /* set back to host byte order */
6601e61a 505 ip = mtod(m, struct ip *);
0c530ab8
A
506 NTOHS(ip->ip_len);
507 NTOHS(ip->ip_off);
508
509 lck_mtx_lock(ip_mutex);
91447636
A
510 ipf_unref();
511 didfilter = 1;
512 }
1c79356b
A
513 ip_mloopback(ifp, m, dst, hlen);
514 }
515 else {
516 /*
517 * If we are acting as a multicast router, perform
518 * multicast forwarding as if the packet had just
519 * arrived on the interface to which we are about
520 * to send. The multicast forwarding function
521 * recursively calls this function, using the
522 * IP_FORWARDING flag to prevent infinite recursion.
523 *
524 * Multicasts that are looped back by ip_mloopback(),
525 * above, will be forwarded by the ip_input() routine,
526 * if necessary.
527 */
528 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
529 /*
530 * Check if rsvp daemon is running. If not, don't
531 * set ip_moptions. This ensures that the packet
532 * is multicast and not just sent down one link
533 * as prescribed by rsvpd.
534 */
535 if (!rsvp_on)
536 imo = NULL;
537 if (ip_mforward(ip, ifp, m, imo) != 0) {
538 m_freem(m);
91447636 539 lck_mtx_unlock(ip_mutex);
1c79356b
A
540 goto done;
541 }
542 }
543 }
544
545 /*
546 * Multicasts with a time-to-live of zero may be looped-
547 * back, above, but must not be transmitted on a network.
548 * Also, multicasts addressed to the loopback interface
549 * are not sent -- the above call to ip_mloopback() will
550 * loop back a copy if this host actually belongs to the
551 * destination group on the loopback interface.
552 */
553 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
554 m_freem(m);
91447636 555 lck_mtx_unlock(ip_mutex);
1c79356b
A
556 goto done;
557 }
558
559 goto sendit;
560 }
561#ifndef notdef
562 /*
563 * If source address not specified yet, use address
564 * of outgoing interface.
565 */
566 if (ip->ip_src.s_addr == INADDR_ANY) {
567 ip->ip_src = IA_SIN(ia)->sin_addr;
568#if IPFIREWALL_FORWARD
569 /* Keep note that we did this - if the firewall changes
570 * the next-hop, our interface may change, changing the
571 * default source IP. It's a shame so much effort happens
572 * twice. Oh well.
573 */
574 fwd_rewrite_src++;
575#endif /* IPFIREWALL_FORWARD */
576 }
577#endif /* notdef */
1c79356b
A
578
579 /*
580 * Look for broadcast address and
581 * and verify user is allowed to send
582 * such a packet.
583 */
584 if (isbroadcast) {
585 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
586 error = EADDRNOTAVAIL;
587 goto bad;
588 }
589 if ((flags & IP_ALLOWBROADCAST) == 0) {
590 error = EACCES;
591 goto bad;
592 }
593 /* don't allow broadcast messages to be fragmented */
594 if ((u_short)ip->ip_len > ifp->if_mtu) {
595 error = EMSGSIZE;
596 goto bad;
597 }
598 m->m_flags |= M_BCAST;
599 } else {
600 m->m_flags &= ~M_BCAST;
601 }
602
603sendit:
9bccf70c
A
604 /*
605 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
606 */
607 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
608 ip_linklocal_stat.iplls_out_total++;
609 if (ip->ip_ttl != MAXTTL) {
610 ip_linklocal_stat.iplls_out_badttl++;
611 ip->ip_ttl = MAXTTL;
612 }
613 }
614
91447636
A
615injectit:
616 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
617 struct ipfilter *filter;
618 int seen = (inject_filter_ref == 0);
619
620 lck_mtx_unlock(ip_mutex);
621 ipf_ref();
0c530ab8
A
622
623 /* 4135317 - always pass network byte order to filter */
624 HTONS(ip->ip_len);
625 HTONS(ip->ip_off);
626
91447636
A
627 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
628 if (seen == 0) {
629 if ((struct ipfilter *)inject_filter_ref == filter)
630 seen = 1;
631 } else if (filter->ipf_filter.ipf_output) {
632 errno_t result;
633 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
634 if (result == EJUSTRETURN) {
635 ipf_unref();
636 goto done;
637 }
638 if (result != 0) {
639 ipf_unref();
640 lck_mtx_lock(ip_mutex);
641 goto bad;
642 }
643 }
644 }
0c530ab8
A
645
646 /* set back to host byte order */
6601e61a 647 ip = mtod(m, struct ip *);
0c530ab8
A
648 NTOHS(ip->ip_len);
649 NTOHS(ip->ip_off);
650
91447636
A
651 ipf_unref();
652 lck_mtx_lock(ip_mutex);
653 }
654
9bccf70c
A
655#if IPSEC
656 /* temporary for testing only: bypass ipsec alltogether */
657
55e303ae 658 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
9bccf70c
A
659 goto skip_ipsec;
660
55e303ae
A
661 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
662
91447636
A
663 lck_mtx_lock(sadb_mutex);
664
9bccf70c
A
665 /* get SP for this packet */
666 if (so == NULL)
667 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
668 else
669 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
670
671 if (sp == NULL) {
672 ipsecstat.out_inval++;
55e303ae 673 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
91447636 674 lck_mtx_unlock(sadb_mutex);
9bccf70c
A
675 goto bad;
676 }
677
678 error = 0;
679
680 /* check policy */
681 switch (sp->policy) {
682 case IPSEC_POLICY_DISCARD:
683 /*
684 * This packet is just discarded.
685 */
686 ipsecstat.out_polvio++;
55e303ae 687 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
91447636 688 lck_mtx_unlock(sadb_mutex);
9bccf70c
A
689 goto bad;
690
691 case IPSEC_POLICY_BYPASS:
692 case IPSEC_POLICY_NONE:
693 /* no need to do IPsec. */
55e303ae 694 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
91447636 695 lck_mtx_unlock(sadb_mutex);
9bccf70c
A
696 goto skip_ipsec;
697
698 case IPSEC_POLICY_IPSEC:
699 if (sp->req == NULL) {
700 /* acquire a policy */
701 error = key_spdacquire(sp);
55e303ae 702 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
91447636 703 lck_mtx_unlock(sadb_mutex);
9bccf70c
A
704 goto bad;
705 }
706 break;
707
708 case IPSEC_POLICY_ENTRUST:
709 default:
710 printf("ip_output: Invalid policy found. %d\n", sp->policy);
711 }
712 {
713 struct ipsec_output_state state;
714 bzero(&state, sizeof(state));
715 state.m = m;
716 if (flags & IP_ROUTETOIF) {
717 state.ro = &iproute;
718 bzero(&iproute, sizeof(iproute));
719 } else
720 state.ro = ro;
721 state.dst = (struct sockaddr *)dst;
722
723 ip->ip_sum = 0;
724
725 /*
726 * XXX
727 * delayed checksums are not currently compatible with IPsec
728 */
729 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
730 in_delayed_cksum(m);
731 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
732 }
733
734 HTONS(ip->ip_len);
735 HTONS(ip->ip_off);
736
91447636 737 lck_mtx_unlock(ip_mutex);
9bccf70c 738 error = ipsec4_output(&state, sp, flags);
91447636
A
739 lck_mtx_unlock(sadb_mutex);
740 lck_mtx_lock(ip_mutex);
741
55e303ae
A
742 m0 = m = state.m;
743
9bccf70c
A
744 if (flags & IP_ROUTETOIF) {
745 /*
746 * if we have tunnel mode SA, we may need to ignore
747 * IP_ROUTETOIF.
748 */
749 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
750 flags &= ~IP_ROUTETOIF;
751 ro = state.ro;
752 }
753 } else
754 ro = state.ro;
55e303ae 755
9bccf70c
A
756 dst = (struct sockaddr_in *)state.dst;
757 if (error) {
758 /* mbuf is already reclaimed in ipsec4_output. */
759 m0 = NULL;
760 switch (error) {
761 case EHOSTUNREACH:
762 case ENETUNREACH:
763 case EMSGSIZE:
764 case ENOBUFS:
765 case ENOMEM:
766 break;
767 default:
768 printf("ip4_output (ipsec): error code %d\n", error);
769 /*fall through*/
770 case ENOENT:
771 /* don't show these error codes to the user */
772 error = 0;
773 break;
774 }
55e303ae 775 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
9bccf70c
A
776 goto bad;
777 }
778 }
779
780 /* be sure to update variables that are affected by ipsec4_output() */
781 ip = mtod(m, struct ip *);
55e303ae 782
9bccf70c
A
783#ifdef _IP_VHL
784 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
785#else
786 hlen = ip->ip_hl << 2;
787#endif
55e303ae
A
788 /* Check that there wasn't a route change and src is still valid */
789
8f6c56a5 790 if (ro->ro_rt && ro->ro_rt->generation_id != route_generation) {
91447636 791 if (ifa_foraddr(ip->ip_src.s_addr) == 0 && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
55e303ae
A
792 error = EADDRNOTAVAIL;
793 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 5,0,0,0,0);
794 goto bad;
795 }
796 rtfree(ro->ro_rt);
797 ro->ro_rt = NULL;
798 }
799
9bccf70c
A
800 if (ro->ro_rt == NULL) {
801 if ((flags & IP_ROUTETOIF) == 0) {
802 printf("ip_output: "
803 "can't update route after IPsec processing\n");
55e303ae
A
804 error = EHOSTUNREACH; /*XXX*/
805 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 6,0,0,0,0);
9bccf70c
A
806 goto bad;
807 }
808 } else {
91447636
A
809 if (ia)
810 ifafree(&ia->ia_ifa);
9bccf70c 811 ia = ifatoia(ro->ro_rt->rt_ifa);
91447636
A
812 if (ia)
813 ifaref(&ia->ia_ifa);
9bccf70c 814 ifp = ro->ro_rt->rt_ifp;
9bccf70c
A
815 }
816
817 /* make it flipped, again. */
818 NTOHS(ip->ip_len);
819 NTOHS(ip->ip_off);
55e303ae 820 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
91447636
A
821
822 /* Pass to filters again */
823 if (!TAILQ_EMPTY(&ipv4_filters)) {
824 struct ipfilter *filter;
825
826 lck_mtx_unlock(ip_mutex);
827 ipf_ref();
0c530ab8
A
828
829 /* 4135317 - always pass network byte order to filter */
830 HTONS(ip->ip_len);
831 HTONS(ip->ip_off);
832
91447636
A
833 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
834 if (filter->ipf_filter.ipf_output) {
835 errno_t result;
836 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
837 if (result == EJUSTRETURN) {
838 ipf_unref();
839 goto done;
840 }
841 if (result != 0) {
842 ipf_unref();
843 lck_mtx_lock(ip_mutex);
844 goto bad;
845 }
846 }
847 }
0c530ab8
A
848
849 /* set back to host byte order */
6601e61a 850 ip = mtod(m, struct ip *);
0c530ab8
A
851 NTOHS(ip->ip_len);
852 NTOHS(ip->ip_off);
853
91447636
A
854 ipf_unref();
855 lck_mtx_lock(ip_mutex);
856 }
9bccf70c
A
857skip_ipsec:
858#endif /*IPSEC*/
859
1c79356b
A
860 /*
861 * IpHack's section.
862 * - Xlate: translate packet's addr/port (NAT).
863 * - Firewall: deny/allow/etc.
864 * - Wrap: fake packet's addr/port <unimpl.>
865 * - Encapsulate: put it in another IP and send out. <unimp.>
866 */
9bccf70c
A
867 if (fr_checkp) {
868 struct mbuf *m1 = m;
869
91447636
A
870 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
871 lck_mtx_unlock(ip_mutex);
9bccf70c 872 goto done;
91447636 873 }
55e303ae 874 ip = mtod(m0 = m = m1, struct ip *);
1c79356b
A
875 }
876
877 /*
878 * Check with the firewall...
91447636 879 * but not if we are already being fwd'd from a firewall.
1c79356b 880 */
91447636 881 if (fw_enable && IPFW_LOADED && !args.next_hop) {
1c79356b
A
882 struct sockaddr_in *old = dst;
883
91447636
A
884 args.m = m;
885 args.next_hop = dst;
886 args.oif = ifp;
3a60a9f5 887 lck_mtx_unlock(ip_mutex);
91447636
A
888 off = ip_fw_chk_ptr(&args);
889 m = args.m;
890 dst = args.next_hop;
891
1c79356b
A
892 /*
893 * On return we must do the following:
9bccf70c 894 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1c79356b 895 * 1<=off<= 0xffff -> DIVERT
9bccf70c
A
896 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
897 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1c79356b
A
898 * dst != old -> IPFIREWALL_FORWARD
899 * off==0, dst==old -> accept
900 * If some of the above modules is not compiled in, then
901 * we should't have to check the corresponding condition
902 * (because the ipfw control socket should not accept
903 * unsupported rules), but better play safe and drop
904 * packets in case of doubt.
905 */
55e303ae 906 m0 = m;
9bccf70c
A
907 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
908 if (m)
909 m_freem(m);
910 error = EACCES ;
911 goto done ;
1c79356b 912 }
9bccf70c 913 ip = mtod(m, struct ip *);
3a60a9f5
A
914 if (off == 0 && dst == old) {/* common case */
915 lck_mtx_lock(ip_mutex);
1c79356b 916 goto pass ;
3a60a9f5 917 }
1c79356b 918#if DUMMYNET
91447636 919 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
1c79356b
A
920 /*
921 * pass the pkt to dummynet. Need to include
9bccf70c 922 * pipe number, m, ifp, ro, dst because these are
1c79356b
A
923 * not recomputed in the next pass.
924 * All other parameters have been already used and
925 * so they are not needed anymore.
926 * XXX note: if the ifp or ro entry are deleted
927 * while a pkt is in dummynet, we are in trouble!
928 */
91447636
A
929 args.ro = ro;
930 args.dst = dst;
931 args.flags = flags;
932
91447636
A
933 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
934 &args);
9bccf70c 935 goto done;
1c79356b 936 }
91447636 937#endif /* DUMMYNET */
3a60a9f5 938 lck_mtx_lock(ip_mutex);
1c79356b 939#if IPDIVERT
9bccf70c
A
940 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
941 struct mbuf *clone = NULL;
942
943 /* Clone packet if we're doing a 'tee' */
944 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
945 clone = m_dup(m, M_DONTWAIT);
946 /*
947 * XXX
948 * delayed checksums are not currently compatible
949 * with divert sockets.
950 */
951 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
952 in_delayed_cksum(m);
953 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
954 }
955
956 /* Restore packet header fields to original values */
957 HTONS(ip->ip_len);
958 HTONS(ip->ip_off);
959
960 /* Deliver packet to divert input routine */
91447636 961 divert_packet(m, 0, off & 0xffff, args.divert_rule);
9bccf70c
A
962
963 /* If 'tee', continue with original packet */
964 if (clone != NULL) {
55e303ae 965 m0 = m = clone;
9bccf70c
A
966 ip = mtod(m, struct ip *);
967 goto pass;
968 }
91447636 969 lck_mtx_unlock(ip_mutex);
1c79356b
A
970 goto done;
971 }
972#endif
973
974#if IPFIREWALL_FORWARD
975 /* Here we check dst to make sure it's directly reachable on the
976 * interface we previously thought it was.
977 * If it isn't (which may be likely in some situations) we have
978 * to re-route it (ie, find a route for the next-hop and the
979 * associated interface) and set them here. This is nested
980 * forwarding which in most cases is undesirable, except where
981 * such control is nigh impossible. So we do it here.
982 * And I'm babbling.
983 */
984 if (off == 0 && old != dst) {
91447636 985 struct in_ifaddr *ia_fw;
1c79356b
A
986
987 /* It's changed... */
988 /* There must be a better way to do this next line... */
989 static struct route sro_fwd, *ro_fwd = &sro_fwd;
990#if IPFIREWALL_FORWARD_DEBUG
991 printf("IPFIREWALL_FORWARD: New dst ip: ");
992 print_ip(dst->sin_addr);
993 printf("\n");
994#endif
995 /*
996 * We need to figure out if we have been forwarded
997 * to a local socket. If so then we should somehow
998 * "loop back" to ip_input, and get directed to the
999 * PCB as if we had received this packet. This is
1000 * because it may be dificult to identify the packets
1001 * you want to forward until they are being output
1002 * and have selected an interface. (e.g. locally
1003 * initiated packets) If we used the loopback inteface,
1004 * we would not be able to control what happens
1005 * as the packet runs through ip_input() as
1006 * it is done through a ISR.
1007 */
91447636 1008 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1c79356b
A
1009 /*
1010 * If the addr to forward to is one
1011 * of ours, we pretend to
1012 * be the destination for this packet.
1013 */
91447636 1014 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
1c79356b
A
1015 dst->sin_addr.s_addr)
1016 break;
1017 }
1018 if (ia) {
1019 /* tell ip_input "dont filter" */
91447636
A
1020 struct m_tag *fwd_tag;
1021 struct ip_fwd_tag *ipfwd_tag;
1022
1023 fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD,
1024 sizeof(struct sockaddr_in), M_NOWAIT);
1025 if (fwd_tag == NULL) {
1026 error = ENOBUFS;
1027 goto bad;
1028 }
1029
1030 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
1031 ipfwd_tag->next_hop = args.next_hop;
1032
1033 m_tag_prepend(m, fwd_tag);
1034
1c79356b
A
1035 if (m->m_pkthdr.rcvif == NULL)
1036 m->m_pkthdr.rcvif = ifunit("lo0");
91447636
A
1037 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1038 m->m_pkthdr.csum_flags) == 0) {
1039 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1040 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1041 m->m_pkthdr.csum_flags |=
1042 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1043 m->m_pkthdr.csum_data = 0xffff;
1044 }
9bccf70c 1045 m->m_pkthdr.csum_flags |=
91447636
A
1046 CSUM_IP_CHECKED | CSUM_IP_VALID;
1047 }
1048 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1049 in_delayed_cksum(m);
1050 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1051 ip->ip_sum = in_cksum(m, hlen);
9bccf70c 1052 }
9bccf70c
A
1053 HTONS(ip->ip_len);
1054 HTONS(ip->ip_off);
91447636
A
1055
1056 lck_mtx_unlock(ip_mutex);
1057
1058 /* we need to call dlil_output to run filters
1059 * and resync to avoid recursion loops.
1060 */
1061 if (lo_ifp) {
1062 dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
1063 }
1064 else {
1065 printf("ip_output: no loopback ifp for forwarding!!!\n");
1066 }
1c79356b
A
1067 goto done;
1068 }
1069 /* Some of the logic for this was
1070 * nicked from above.
1071 *
1072 * This rewrites the cached route in a local PCB.
1073 * Is this what we want to do?
1074 */
1075 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1076
1077 ro_fwd->ro_rt = 0;
1078 rtalloc_ign(ro_fwd, RTF_PRCLONING);
1079
1080 if (ro_fwd->ro_rt == 0) {
1081 ipstat.ips_noroute++;
1082 error = EHOSTUNREACH;
1083 goto bad;
1084 }
1085
91447636 1086 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
1c79356b 1087 ifp = ro_fwd->ro_rt->rt_ifp;
1c79356b
A
1088 ro_fwd->ro_rt->rt_use++;
1089 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
1090 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
1091 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
1092 isbroadcast =
1093 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
1094 else
1095 isbroadcast = in_broadcast(dst->sin_addr, ifp);
9bccf70c 1096 rtfree(ro->ro_rt);
1c79356b
A
1097 ro->ro_rt = ro_fwd->ro_rt;
1098 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
1099
1100 /*
1101 * If we added a default src ip earlier,
1102 * which would have been gotten from the-then
1103 * interface, do it again, from the new one.
1104 */
1105 if (fwd_rewrite_src)
91447636 1106 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
1c79356b
A
1107 goto pass ;
1108 }
1109#endif /* IPFIREWALL_FORWARD */
1110 /*
1111 * if we get here, none of the above matches, and
1112 * we have to drop the pkt
1113 */
1114 m_freem(m);
91447636
A
1115 error = EACCES; /* not sure this is the right error msg */
1116 lck_mtx_unlock(ip_mutex);
1117 goto done;
1c79356b 1118 }
1c79356b
A
1119
1120pass:
e5568f75
A
1121#if __APPLE__
1122 /* Do not allow loopback address to wind up on a wire */
1123 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1124 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1125 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
1126 ipstat.ips_badaddr++;
1127 m_freem(m);
91447636
A
1128 /*
1129 * Do not simply drop the packet just like a firewall -- we want the
1130 * the application to feel the pain.
1131 * Return ENETUNREACH like ip6_output does in some similar cases.
1132 * This can startle the otherwise clueless process that specifies
e5568f75
A
1133 * loopback as the source address.
1134 */
91447636
A
1135 error = ENETUNREACH;
1136 lck_mtx_unlock(ip_mutex);
e5568f75
A
1137 goto done;
1138 }
1139#endif
9bccf70c 1140 m->m_pkthdr.csum_flags |= CSUM_IP;
4a249263
A
1141 sw_csum = m->m_pkthdr.csum_flags
1142 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1c79356b 1143
9bccf70c 1144 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1c79356b 1145 /*
9bccf70c
A
1146 * Special case code for GMACE
1147 * frames that can be checksumed by GMACE SUM16 HW:
1148 * frame >64, no fragments, no UDP
1c79356b 1149 */
9bccf70c
A
1150 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1151 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1152 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1153 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1154 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
1155 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
1156 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1157 m->m_pkthdr.csum_data += offset;
1158 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1c79356b 1159 }
9bccf70c
A
1160 else {
1161 /* let the software handle any UDP or TCP checksums */
1162 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1c79356b 1163 }
1c79356b 1164 }
9bccf70c
A
1165
1166 if (sw_csum & CSUM_DELAY_DATA) {
1167 in_delayed_cksum(m);
1168 sw_csum &= ~CSUM_DELAY_DATA;
1169 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
0b4e3aa0 1170 }
9bccf70c 1171
4a249263 1172 m->m_pkthdr.csum_flags &= IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
0b4e3aa0 1173
1c79356b 1174 /*
9bccf70c 1175 * If small enough for interface, or the interface will take
0b4e3aa0 1176 * care of the fragmentation for us, can just send directly.
1c79356b 1177 */
9bccf70c
A
1178 if ((u_short)ip->ip_len <= ifp->if_mtu ||
1179 ifp->if_hwassist & CSUM_FRAGMENT) {
1180 HTONS(ip->ip_len);
1181 HTONS(ip->ip_off);
1c79356b 1182 ip->ip_sum = 0;
9bccf70c 1183 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1184 ip->ip_sum = in_cksum(m, hlen);
9bccf70c
A
1185 }
1186
1187#ifndef __APPLE__
1188 /* Record statistics for this interface address. */
1189 if (!(flags & IP_FORWARDING) && ia != NULL) {
1190 ia->ia_ifa.if_opackets++;
1191 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1192 }
1193#endif
1194
1195#if IPSEC
1196 /* clean ipsec history once it goes out of the node */
55e303ae 1197 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
9bccf70c
A
1198 ipsec_delaux(m);
1199#endif
91447636
A
1200 if (packetchain == 0) {
1201 lck_mtx_unlock(ip_mutex);
1202 error = dlil_output(ifp, PF_INET, m, (void *) ro->ro_rt,
1c79356b 1203 (struct sockaddr *)dst, 0);
91447636
A
1204 goto done;
1205 }
1206 else { /* packet chaining allows us to reuse the route for all packets */
1207 m = m->m_nextpkt;
1208 if (m == NULL) {
1209 if (pktcnt > ip_maxchainsent)
1210 ip_maxchainsent = pktcnt;
1211 //send
1212 lck_mtx_unlock(ip_mutex);
1213 error = dlil_output_list(ifp, PF_INET, packetlist, (void *) ro->ro_rt,
1214 (struct sockaddr *)dst, 0);
1215 pktcnt = 0;
1216 goto done;
1217
1218 }
1219 m0 = m;
1220 pktcnt++;
1221 goto loopit;
1222 }
1c79356b
A
1223 }
1224 /*
1225 * Too large for interface; fragment if possible.
1226 * Must be able to put at least 8 bytes per fragment.
1227 */
1228 if (ip->ip_off & IP_DF) {
1229 error = EMSGSIZE;
1230 /*
1231 * This case can happen if the user changed the MTU
1232 * of an interface after enabling IP on it. Because
1233 * most netifs don't keep track of routes pointing to
1234 * them, there is no way for one to update all its
1235 * routes when the MTU is changed.
1236 */
8f6c56a5 1237 if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1c79356b
A
1238 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1239 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1240 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1241 }
1242 ipstat.ips_cantfrag++;
1243 goto bad;
1244 }
1245 len = (ifp->if_mtu - hlen) &~ 7;
1246 if (len < 8) {
1247 error = EMSGSIZE;
1248 goto bad;
1249 }
1250
9bccf70c
A
1251 /*
1252 * if the interface will not calculate checksums on
1253 * fragmented packets, then do it here.
1254 */
1255 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1256 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1257 in_delayed_cksum(m);
91447636
A
1258 if (m == NULL) {
1259 lck_mtx_unlock(ip_mutex);
0b4e3aa0 1260 return(ENOMEM);
91447636 1261 }
9bccf70c
A
1262 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1263 }
0b4e3aa0
A
1264
1265
1c79356b
A
1266 {
1267 int mhlen, firstlen = len;
1268 struct mbuf **mnext = &m->m_nextpkt;
9bccf70c 1269 int nfrags = 1;
1c79356b
A
1270
1271 /*
1272 * Loop through length of segment after first fragment,
1273 * make new header and copy data of each part and link onto chain.
1274 */
1275 m0 = m;
1276 mhlen = sizeof (struct ip);
1277 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
1278 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1279 if (m == 0) {
1280 error = ENOBUFS;
1281 ipstat.ips_odropped++;
1282 goto sendorfree;
1283 }
0b4e3aa0 1284 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1c79356b
A
1285 m->m_data += max_linkhdr;
1286 mhip = mtod(m, struct ip *);
1287 *mhip = *ip;
1288 if (hlen > sizeof (struct ip)) {
1289 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1290 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1291 }
1292 m->m_len = mhlen;
1293 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1294 if (ip->ip_off & IP_MF)
1295 mhip->ip_off |= IP_MF;
1296 if (off + len >= (u_short)ip->ip_len)
1297 len = (u_short)ip->ip_len - off;
1298 else
1299 mhip->ip_off |= IP_MF;
1300 mhip->ip_len = htons((u_short)(len + mhlen));
1301 m->m_next = m_copy(m0, off, len);
1302 if (m->m_next == 0) {
1303 (void) m_free(m);
1304 error = ENOBUFS; /* ??? */
1305 ipstat.ips_odropped++;
1306 goto sendorfree;
1307 }
1308 m->m_pkthdr.len = mhlen + len;
91447636 1309 m->m_pkthdr.rcvif = 0;
9bccf70c 1310 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
91447636 1311 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
9bccf70c 1312 HTONS(mhip->ip_off);
1c79356b 1313 mhip->ip_sum = 0;
9bccf70c 1314 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1315 mhip->ip_sum = in_cksum(m, mhlen);
9bccf70c 1316 }
1c79356b
A
1317 *mnext = m;
1318 mnext = &m->m_nextpkt;
0b4e3aa0 1319 nfrags++;
1c79356b 1320 }
0b4e3aa0
A
1321 ipstat.ips_ofragments += nfrags;
1322
1323 /* set first/last markers for fragment chain */
9bccf70c
A
1324 m->m_flags |= M_LASTFRAG;
1325 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
0b4e3aa0
A
1326 m0->m_pkthdr.csum_data = nfrags;
1327
1c79356b
A
1328 /*
1329 * Update first fragment by trimming what's been copied out
1330 * and updating header, then send each fragment (in order).
1331 */
1332 m = m0;
1333 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1334 m->m_pkthdr.len = hlen + firstlen;
1335 ip->ip_len = htons((u_short)m->m_pkthdr.len);
9bccf70c
A
1336 ip->ip_off |= IP_MF;
1337 HTONS(ip->ip_off);
1c79356b 1338 ip->ip_sum = 0;
9bccf70c 1339 if (sw_csum & CSUM_DELAY_IP) {
0b4e3aa0 1340 ip->ip_sum = in_cksum(m, hlen);
9bccf70c 1341 }
1c79356b
A
1342sendorfree:
1343
1344 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1345 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1346
91447636 1347 lck_mtx_unlock(ip_mutex);
1c79356b
A
1348 for (m = m0; m; m = m0) {
1349 m0 = m->m_nextpkt;
1350 m->m_nextpkt = 0;
9bccf70c
A
1351#if IPSEC
1352 /* clean ipsec history once it goes out of the node */
55e303ae 1353 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
9bccf70c
A
1354 ipsec_delaux(m);
1355#endif
1356 if (error == 0) {
1357#ifndef __APPLE__
1358 /* Record statistics for this interface address. */
1359 if (ia != NULL) {
1360 ia->ia_ifa.if_opackets++;
1361 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1362 }
1363#endif
91447636
A
1364 if ((packetchain != 0) && (pktcnt > 0))
1365 panic("ip_output: mix of packet in packetlist is wrong=%x", packetlist);
1366 error = dlil_output(ifp, PF_INET, m, (void *) ro->ro_rt,
1c79356b 1367 (struct sockaddr *)dst, 0);
9bccf70c 1368 } else
1c79356b
A
1369 m_freem(m);
1370 }
1371
1372 if (error == 0)
1373 ipstat.ips_fragmented++;
1374 }
1375done:
91447636
A
1376 if (ia) {
1377 ifafree(&ia->ia_ifa);
1378 ia = NULL;
1379 }
1c79356b 1380#if IPSEC
55e303ae 1381 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
1c79356b 1382 if (ro == &iproute && ro->ro_rt) {
9bccf70c 1383 rtfree(ro->ro_rt);
1c79356b
A
1384 ro->ro_rt = NULL;
1385 }
1386 if (sp != NULL) {
1387 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1388 printf("DP ip_output call free SP:%x\n", sp));
91447636 1389 lck_mtx_lock(sadb_mutex);
1c79356b 1390 key_freesp(sp);
91447636 1391 lck_mtx_unlock(sadb_mutex);
1c79356b 1392 }
9bccf70c 1393 }
1c79356b
A
1394#endif /* IPSEC */
1395
1396 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1397 return (error);
1398bad:
1399 m_freem(m0);
91447636 1400 lck_mtx_unlock(ip_mutex);
1c79356b
A
1401 goto done;
1402}
1403
0b4e3aa0 1404void
8f6c56a5 1405in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
0b4e3aa0 1406{
9bccf70c 1407 struct ip *ip;
8f6c56a5
A
1408 unsigned char buf[sizeof(struct ip)];
1409 u_short csum, offset, ip_len;
1410 struct mbuf *m = m0;
91447636 1411
8f6c56a5 1412 while (ip_offset >= m->m_len) {
91447636
A
1413 ip_offset -= m->m_len;
1414 m = m->m_next;
8f6c56a5
A
1415 if (m == NULL) {
1416 printf("in_delayed_cksum_offset failed - ip_offset wasn't in the packet\n");
91447636
A
1417 return;
1418 }
1419 }
1420
8f6c56a5 1421 /* Sometimes the IP header is not contiguous, yes this can happen! */
91447636 1422 if (ip_offset + sizeof(struct ip) > m->m_len) {
8f6c56a5 1423#if DEBUG
743b1565
A
1424 printf("delayed m_pullup, m->len: %d off: %d\n",
1425 m->m_len, ip_offset);
8f6c56a5
A
1426#endif
1427 m_copydata(m, ip_offset, sizeof(struct ip), buf);
1428
1429 ip = (struct ip *)buf;
1430 } else {
1431 ip = (struct ip*)(m->m_data + ip_offset);
91447636
A
1432 }
1433
1434 /* Gross */
1435 if (ip_offset) {
1436 m->m_len -= ip_offset;
1437 m->m_data += ip_offset;
1438 }
1439
9bccf70c 1440 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
8f6c56a5
A
1441
1442 /*
1443 * We could be in the context of an IP or interface filter; in the
1444 * former case, ip_len would be in host (correct) order while for
1445 * the latter it would be in network order. Because of this, we
1446 * attempt to interpret the length field by comparing it against
1447 * the actual packet length. If the comparison fails, byte swap
1448 * the length and check again. If it still fails, then the packet
1449 * is bogus and we give up.
1450 */
1451 ip_len = ip->ip_len;
1452 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1453 ip_len = SWAP16(ip_len);
1454 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1455 printf("in_delayed_cksum_offset: ip_len %d (%d) "
1456 "doesn't match actual length %d\n", ip->ip_len,
1457 ip_len, (m0->m_pkthdr.len - ip_offset));
1458 return;
1459 }
1460 }
1461
1462 csum = in_cksum_skip(m, ip_len, offset);
1463
1464 if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
0b4e3aa0 1465 csum = 0xffff;
8f6c56a5
A
1466 offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1467
91447636
A
1468 /* Gross */
1469 if (ip_offset) {
1470 if (M_LEADINGSPACE(m) < ip_offset)
8f6c56a5 1471 panic("in_delayed_cksum_offset - chain modified!\n");
91447636
A
1472 m->m_len += ip_offset;
1473 m->m_data -= ip_offset;
1474 }
0b4e3aa0 1475
8f6c56a5 1476 if (offset > ip_len) /* bogus offset */
0b4e3aa0
A
1477 return;
1478
8f6c56a5 1479 /* Insert the checksum in the existing chain */
91447636 1480 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
8f6c56a5
A
1481 char tmp[2];
1482
1483#if DEBUG
1484 printf("delayed m_copyback, m->len: %d off: %d p: %d\n",
91447636 1485 m->m_len, offset + ip_offset, ip->ip_p);
8f6c56a5
A
1486#endif
1487 *(u_short *)tmp = csum;
1488 m_copyback(m, offset + ip_offset, 2, tmp);
1489 } else
1490 *(u_short *)(m->m_data + offset + ip_offset) = csum;
91447636
A
1491}
1492
1493void
1494in_delayed_cksum(struct mbuf *m)
1495{
1496 in_delayed_cksum_offset(m, 0);
1497}
1498
1499void
1500in_cksum_offset(struct mbuf* m, size_t ip_offset)
1501{
1502 struct ip* ip = NULL;
1503 int hlen = 0;
8f6c56a5
A
1504 unsigned char buf[sizeof(struct ip)];
1505 int swapped = 0;
91447636 1506
8f6c56a5 1507 while (ip_offset >= m->m_len) {
91447636
A
1508 ip_offset -= m->m_len;
1509 m = m->m_next;
8f6c56a5 1510 if (m == NULL) {
91447636
A
1511 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1512 return;
1513 }
1514 }
1515
8f6c56a5 1516 /* Sometimes the IP header is not contiguous, yes this can happen! */
91447636 1517 if (ip_offset + sizeof(struct ip) > m->m_len) {
8f6c56a5
A
1518
1519#if DEBUG
91447636
A
1520 printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %d\n",
1521 m->m_len, ip_offset);
8f6c56a5
A
1522#endif
1523 m_copydata(m, ip_offset, sizeof(struct ip), buf);
1524
1525 ip = (struct ip *)buf;
1526 ip->ip_sum = 0;
1527 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum);
1528 } else {
1529 ip = (struct ip*)(m->m_data + ip_offset);
1530 ip->ip_sum = 0;
91447636
A
1531 }
1532
1533 /* Gross */
1534 if (ip_offset) {
1535 m->m_len -= ip_offset;
1536 m->m_data += ip_offset;
1537 }
1538
91447636
A
1539#ifdef _IP_VHL
1540 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1541#else
1542 hlen = ip->ip_hl << 2;
1543#endif
8f6c56a5
A
1544 /*
1545 * We could be in the context of an IP or interface filter; in the
1546 * former case, ip_len would be in host order while for the latter
1547 * it would be in network (correct) order. Because of this, we
1548 * attempt to interpret the length field by comparing it against
1549 * the actual packet length. If the comparison fails, byte swap
1550 * the length and check again. If it still fails, then the packet
1551 * is bogus and we give up.
1552 */
1553 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1554 ip->ip_len = SWAP16(ip->ip_len);
1555 swapped = 1;
1556 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1557 ip->ip_len = SWAP16(ip->ip_len);
1558 printf("in_cksum_offset: ip_len %d (%d) "
1559 "doesn't match actual length %d\n",
1560 ip->ip_len, SWAP16(ip->ip_len),
1561 (m->m_pkthdr.len - ip_offset));
1562 return;
1563 }
1564 }
1565
91447636
A
1566 ip->ip_sum = 0;
1567 ip->ip_sum = in_cksum(m, hlen);
8f6c56a5
A
1568 if (swapped)
1569 ip->ip_len = SWAP16(ip->ip_len);
1570
91447636
A
1571 /* Gross */
1572 if (ip_offset) {
1573 if (M_LEADINGSPACE(m) < ip_offset)
1574 panic("in_cksum_offset - chain modified!\n");
1575 m->m_len += ip_offset;
1576 m->m_data -= ip_offset;
9bccf70c 1577 }
8f6c56a5
A
1578
1579 /* Insert the checksum in the existing chain if IP header not contiguous */
1580 if (ip_offset + sizeof(struct ip) > m->m_len) {
1581 char tmp[2];
1582
1583#if DEBUG
1584 printf("in_cksum_offset m_copyback, m->len: %d off: %d p: %d\n",
1585 m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
1586#endif
1587 *(u_short *)tmp = ip->ip_sum;
1588 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
1589 }
0b4e3aa0
A
1590}
1591
1c79356b
A
1592/*
1593 * Insert IP options into preformed packet.
1594 * Adjust IP destination as required for IP source routing,
1595 * as indicated by a non-zero in_addr at the start of the options.
1596 *
1597 * XXX This routine assumes that the packet has no options in place.
1598 */
1599static struct mbuf *
1600ip_insertoptions(m, opt, phlen)
1601 register struct mbuf *m;
1602 struct mbuf *opt;
1603 int *phlen;
1604{
1605 register struct ipoption *p = mtod(opt, struct ipoption *);
1606 struct mbuf *n;
1607 register struct ip *ip = mtod(m, struct ip *);
1608 unsigned optlen;
1609
1610 optlen = opt->m_len - sizeof(p->ipopt_dst);
1611 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1612 return (m); /* XXX should fail */
1613 if (p->ipopt_dst.s_addr)
1614 ip->ip_dst = p->ipopt_dst;
1615 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1616 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1617 if (n == 0)
1618 return (m);
91447636 1619 n->m_pkthdr.rcvif = 0;
1c79356b
A
1620 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1621 m->m_len -= sizeof(struct ip);
1622 m->m_data += sizeof(struct ip);
1623 n->m_next = m;
1624 m = n;
1625 m->m_len = optlen + sizeof(struct ip);
1626 m->m_data += max_linkhdr;
1627 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1628 } else {
1629 m->m_data -= optlen;
1630 m->m_len += optlen;
1631 m->m_pkthdr.len += optlen;
1632 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1633 }
1634 ip = mtod(m, struct ip *);
1635 bcopy(p->ipopt_list, ip + 1, optlen);
1636 *phlen = sizeof(struct ip) + optlen;
1637 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1638 ip->ip_len += optlen;
1639 return (m);
1640}
1641
1642/*
1643 * Copy options from ip to jp,
1644 * omitting those not copied during fragmentation.
1645 */
1c79356b
A
1646int
1647ip_optcopy(ip, jp)
1648 struct ip *ip, *jp;
1649{
1650 register u_char *cp, *dp;
1651 int opt, optlen, cnt;
1652
1653 cp = (u_char *)(ip + 1);
1654 dp = (u_char *)(jp + 1);
1655 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1656 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1657 opt = cp[0];
1658 if (opt == IPOPT_EOL)
1659 break;
1660 if (opt == IPOPT_NOP) {
1661 /* Preserve for IP mcast tunnel's LSRR alignment. */
1662 *dp++ = IPOPT_NOP;
1663 optlen = 1;
1664 continue;
9bccf70c
A
1665 }
1666#if DIAGNOSTIC
1667 if (cnt < IPOPT_OLEN + sizeof(*cp))
1668 panic("malformed IPv4 option passed to ip_optcopy");
1669#endif
1670 optlen = cp[IPOPT_OLEN];
1671#if DIAGNOSTIC
1672 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1673 panic("malformed IPv4 option passed to ip_optcopy");
1674#endif
1c79356b
A
1675 /* bogus lengths should have been caught by ip_dooptions */
1676 if (optlen > cnt)
1677 optlen = cnt;
1678 if (IPOPT_COPIED(opt)) {
1679 bcopy(cp, dp, optlen);
1680 dp += optlen;
1681 }
1682 }
1683 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1684 *dp++ = IPOPT_EOL;
1685 return (optlen);
1686}
1687
1688/*
1689 * IP socket option processing.
1690 */
1691int
1692ip_ctloutput(so, sopt)
1693 struct socket *so;
1694 struct sockopt *sopt;
1695{
1696 struct inpcb *inp = sotoinpcb(so);
1697 int error, optval;
1698
1699 error = optval = 0;
1700 if (sopt->sopt_level != IPPROTO_IP) {
1701 return (EINVAL);
1702 }
1703
1704 switch (sopt->sopt_dir) {
1705 case SOPT_SET:
1706 switch (sopt->sopt_name) {
1707 case IP_OPTIONS:
1708#ifdef notyet
1709 case IP_RETOPTS:
1710#endif
1711 {
1712 struct mbuf *m;
1713 if (sopt->sopt_valsize > MLEN) {
1714 error = EMSGSIZE;
1715 break;
1716 }
1717 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1718 if (m == 0) {
1719 error = ENOBUFS;
1720 break;
1721 }
1722 m->m_len = sopt->sopt_valsize;
1723 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1724 m->m_len);
1725 if (error)
1726 break;
1727
1728 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1729 m));
1730 }
1731
1732 case IP_TOS:
1733 case IP_TTL:
1734 case IP_RECVOPTS:
1735 case IP_RECVRETOPTS:
1736 case IP_RECVDSTADDR:
1737 case IP_RECVIF:
55e303ae 1738 case IP_RECVTTL:
9bccf70c 1739#if defined(NFAITH) && NFAITH > 0
1c79356b 1740 case IP_FAITH:
9bccf70c 1741#endif
1c79356b
A
1742 error = sooptcopyin(sopt, &optval, sizeof optval,
1743 sizeof optval);
1744 if (error)
1745 break;
1746
1747 switch (sopt->sopt_name) {
1748 case IP_TOS:
1749 inp->inp_ip_tos = optval;
1750 break;
1751
1752 case IP_TTL:
1753 inp->inp_ip_ttl = optval;
1754 break;
1755#define OPTSET(bit) \
1756 if (optval) \
1757 inp->inp_flags |= bit; \
1758 else \
1759 inp->inp_flags &= ~bit;
1760
1761 case IP_RECVOPTS:
1762 OPTSET(INP_RECVOPTS);
1763 break;
1764
1765 case IP_RECVRETOPTS:
1766 OPTSET(INP_RECVRETOPTS);
1767 break;
1768
1769 case IP_RECVDSTADDR:
1770 OPTSET(INP_RECVDSTADDR);
1771 break;
1772
1773 case IP_RECVIF:
1774 OPTSET(INP_RECVIF);
1775 break;
1776
55e303ae
A
1777 case IP_RECVTTL:
1778 OPTSET(INP_RECVTTL);
1779 break;
1780
9bccf70c 1781#if defined(NFAITH) && NFAITH > 0
1c79356b
A
1782 case IP_FAITH:
1783 OPTSET(INP_FAITH);
1784 break;
9bccf70c 1785#endif
1c79356b
A
1786 }
1787 break;
1788#undef OPTSET
1789
1790 case IP_MULTICAST_IF:
1791 case IP_MULTICAST_VIF:
1792 case IP_MULTICAST_TTL:
1793 case IP_MULTICAST_LOOP:
1794 case IP_ADD_MEMBERSHIP:
1795 case IP_DROP_MEMBERSHIP:
1796 error = ip_setmoptions(sopt, &inp->inp_moptions);
1797 break;
1798
1799 case IP_PORTRANGE:
1800 error = sooptcopyin(sopt, &optval, sizeof optval,
1801 sizeof optval);
1802 if (error)
1803 break;
1804
1805 switch (optval) {
1806 case IP_PORTRANGE_DEFAULT:
1807 inp->inp_flags &= ~(INP_LOWPORT);
1808 inp->inp_flags &= ~(INP_HIGHPORT);
1809 break;
1810
1811 case IP_PORTRANGE_HIGH:
1812 inp->inp_flags &= ~(INP_LOWPORT);
1813 inp->inp_flags |= INP_HIGHPORT;
1814 break;
1815
1816 case IP_PORTRANGE_LOW:
1817 inp->inp_flags &= ~(INP_HIGHPORT);
1818 inp->inp_flags |= INP_LOWPORT;
1819 break;
1820
1821 default:
1822 error = EINVAL;
1823 break;
1824 }
1825 break;
1826
1827#if IPSEC
1828 case IP_IPSEC_POLICY:
1829 {
1830 caddr_t req = NULL;
1831 size_t len = 0;
1832 int priv;
1833 struct mbuf *m;
1834 int optname;
1835
91447636
A
1836 if (sopt->sopt_valsize > MCLBYTES) {
1837 error = EMSGSIZE;
1838 break;
1839 }
9bccf70c 1840 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1c79356b 1841 break;
9bccf70c 1842 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1c79356b
A
1843 break;
1844 priv = (sopt->sopt_p != NULL &&
91447636 1845 proc_suser(sopt->sopt_p) != 0) ? 0 : 1;
1c79356b
A
1846 if (m) {
1847 req = mtod(m, caddr_t);
1848 len = m->m_len;
1849 }
1850 optname = sopt->sopt_name;
91447636 1851 lck_mtx_lock(sadb_mutex);
1c79356b 1852 error = ipsec4_set_policy(inp, optname, req, len, priv);
91447636 1853 lck_mtx_unlock(sadb_mutex);
1c79356b
A
1854 m_freem(m);
1855 break;
1856 }
1857#endif /*IPSEC*/
1858
1859 default:
1860 error = ENOPROTOOPT;
1861 break;
1862 }
1863 break;
1864
1865 case SOPT_GET:
1866 switch (sopt->sopt_name) {
1867 case IP_OPTIONS:
1868 case IP_RETOPTS:
1869 if (inp->inp_options)
1870 error = sooptcopyout(sopt,
1871 mtod(inp->inp_options,
1872 char *),
1873 inp->inp_options->m_len);
1874 else
1875 sopt->sopt_valsize = 0;
1876 break;
1877
1878 case IP_TOS:
1879 case IP_TTL:
1880 case IP_RECVOPTS:
1881 case IP_RECVRETOPTS:
1882 case IP_RECVDSTADDR:
1883 case IP_RECVIF:
55e303ae 1884 case IP_RECVTTL:
1c79356b 1885 case IP_PORTRANGE:
9bccf70c 1886#if defined(NFAITH) && NFAITH > 0
1c79356b 1887 case IP_FAITH:
9bccf70c 1888#endif
1c79356b
A
1889 switch (sopt->sopt_name) {
1890
1891 case IP_TOS:
1892 optval = inp->inp_ip_tos;
1893 break;
1894
1895 case IP_TTL:
1896 optval = inp->inp_ip_ttl;
1897 break;
1898
1899#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1900
1901 case IP_RECVOPTS:
1902 optval = OPTBIT(INP_RECVOPTS);
1903 break;
1904
1905 case IP_RECVRETOPTS:
1906 optval = OPTBIT(INP_RECVRETOPTS);
1907 break;
1908
1909 case IP_RECVDSTADDR:
1910 optval = OPTBIT(INP_RECVDSTADDR);
1911 break;
1912
1913 case IP_RECVIF:
1914 optval = OPTBIT(INP_RECVIF);
1915 break;
1916
55e303ae
A
1917 case IP_RECVTTL:
1918 optval = OPTBIT(INP_RECVTTL);
1919 break;
1920
1c79356b
A
1921 case IP_PORTRANGE:
1922 if (inp->inp_flags & INP_HIGHPORT)
1923 optval = IP_PORTRANGE_HIGH;
1924 else if (inp->inp_flags & INP_LOWPORT)
1925 optval = IP_PORTRANGE_LOW;
1926 else
1927 optval = 0;
1928 break;
1929
9bccf70c 1930#if defined(NFAITH) && NFAITH > 0
1c79356b
A
1931 case IP_FAITH:
1932 optval = OPTBIT(INP_FAITH);
1933 break;
9bccf70c 1934#endif
1c79356b
A
1935 }
1936 error = sooptcopyout(sopt, &optval, sizeof optval);
1937 break;
1938
1939 case IP_MULTICAST_IF:
1940 case IP_MULTICAST_VIF:
1941 case IP_MULTICAST_TTL:
1942 case IP_MULTICAST_LOOP:
1943 case IP_ADD_MEMBERSHIP:
1944 case IP_DROP_MEMBERSHIP:
1945 error = ip_getmoptions(sopt, inp->inp_moptions);
1946 break;
1947
1948#if IPSEC
1949 case IP_IPSEC_POLICY:
1950 {
1951 struct mbuf *m = NULL;
1c79356b 1952 caddr_t req = NULL;
9bccf70c 1953 size_t len = 0;
1c79356b 1954
9bccf70c 1955 if (m != 0) {
1c79356b
A
1956 req = mtod(m, caddr_t);
1957 len = m->m_len;
1958 }
91447636 1959 lck_mtx_lock(sadb_mutex);
1c79356b 1960 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
91447636 1961 lck_mtx_unlock(sadb_mutex);
1c79356b 1962 if (error == 0)
9bccf70c 1963 error = soopt_mcopyout(sopt, m); /* XXX */
1c79356b
A
1964 if (error == 0)
1965 m_freem(m);
1966 break;
1967 }
1968#endif /*IPSEC*/
1969
1970 default:
1971 error = ENOPROTOOPT;
1972 break;
1973 }
1974 break;
1975 }
1976 return (error);
1977}
1978
1979/*
1980 * Set up IP options in pcb for insertion in output packets.
1981 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1982 * with destination address if source routed.
1983 */
1984static int
1985ip_pcbopts(optname, pcbopt, m)
1986 int optname;
1987 struct mbuf **pcbopt;
1988 register struct mbuf *m;
1989{
1990 register int cnt, optlen;
1991 register u_char *cp;
1992 u_char opt;
1993
1994 /* turn off any old options */
1995 if (*pcbopt)
1996 (void)m_free(*pcbopt);
1997 *pcbopt = 0;
1998 if (m == (struct mbuf *)0 || m->m_len == 0) {
1999 /*
2000 * Only turning off any previous options.
2001 */
2002 if (m)
2003 (void)m_free(m);
2004 return (0);
2005 }
2006
2007#ifndef vax
2008 if (m->m_len % sizeof(int32_t))
2009 goto bad;
2010#endif
2011 /*
2012 * IP first-hop destination address will be stored before
2013 * actual options; move other options back
2014 * and clear it when none present.
2015 */
2016 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
2017 goto bad;
2018 cnt = m->m_len;
2019 m->m_len += sizeof(struct in_addr);
2020 cp = mtod(m, u_char *) + sizeof(struct in_addr);
2021 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
2022 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
2023
2024 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2025 opt = cp[IPOPT_OPTVAL];
2026 if (opt == IPOPT_EOL)
2027 break;
2028 if (opt == IPOPT_NOP)
2029 optlen = 1;
2030 else {
2031 if (cnt < IPOPT_OLEN + sizeof(*cp))
2032 goto bad;
2033 optlen = cp[IPOPT_OLEN];
2034 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2035 goto bad;
2036 }
2037 switch (opt) {
2038
2039 default:
2040 break;
2041
2042 case IPOPT_LSRR:
2043 case IPOPT_SSRR:
2044 /*
2045 * user process specifies route as:
2046 * ->A->B->C->D
2047 * D must be our final destination (but we can't
2048 * check that since we may not have connected yet).
2049 * A is first hop destination, which doesn't appear in
2050 * actual IP option, but is stored before the options.
2051 */
2052 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
2053 goto bad;
2054 m->m_len -= sizeof(struct in_addr);
2055 cnt -= sizeof(struct in_addr);
2056 optlen -= sizeof(struct in_addr);
2057 cp[IPOPT_OLEN] = optlen;
2058 /*
2059 * Move first hop before start of options.
2060 */
2061 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
2062 sizeof(struct in_addr));
2063 /*
2064 * Then copy rest of options back
2065 * to close up the deleted entry.
2066 */
2067 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
2068 sizeof(struct in_addr)),
2069 (caddr_t)&cp[IPOPT_OFFSET+1],
2070 (unsigned)cnt + sizeof(struct in_addr));
2071 break;
2072 }
2073 }
2074 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
2075 goto bad;
2076 *pcbopt = m;
2077 return (0);
2078
2079bad:
2080 (void)m_free(m);
2081 return (EINVAL);
2082}
2083
2084/*
2085 * XXX
2086 * The whole multicast option thing needs to be re-thought.
2087 * Several of these options are equally applicable to non-multicast
2088 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
2089 * standard option (IP_TTL).
2090 */
9bccf70c
A
2091
2092/*
2093 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2094 */
2095static struct ifnet *
2096ip_multicast_if(a, ifindexp)
2097 struct in_addr *a;
2098 int *ifindexp;
2099{
2100 int ifindex;
2101 struct ifnet *ifp;
2102
2103 if (ifindexp)
2104 *ifindexp = 0;
2105 if (ntohl(a->s_addr) >> 24 == 0) {
2106 ifindex = ntohl(a->s_addr) & 0xffffff;
91447636
A
2107 ifnet_head_lock_shared();
2108 if (ifindex < 0 || if_index < ifindex) {
2109 ifnet_head_done();
9bccf70c 2110 return NULL;
91447636 2111 }
9bccf70c 2112 ifp = ifindex2ifnet[ifindex];
91447636 2113 ifnet_head_done();
9bccf70c
A
2114 if (ifindexp)
2115 *ifindexp = ifindex;
2116 } else {
2117 INADDR_TO_IFP(*a, ifp);
2118 }
2119 return ifp;
2120}
2121
1c79356b
A
2122/*
2123 * Set the IP multicast options in response to user setsockopt().
2124 */
2125static int
2126ip_setmoptions(sopt, imop)
2127 struct sockopt *sopt;
2128 struct ip_moptions **imop;
2129{
2130 int error = 0;
2131 int i;
2132 struct in_addr addr;
2133 struct ip_mreq mreq;
9bccf70c 2134 struct ifnet *ifp = NULL;
1c79356b 2135 struct ip_moptions *imo = *imop;
9bccf70c 2136 int ifindex;
1c79356b
A
2137
2138 if (imo == NULL) {
2139 /*
2140 * No multicast option buffer attached to the pcb;
2141 * allocate one and initialize to default values.
2142 */
55e303ae
A
2143 error = ip_createmoptions(imop);
2144 if (error != 0)
2145 return error;
2146 imo = *imop;
1c79356b
A
2147 }
2148
2149 switch (sopt->sopt_name) {
2150 /* store an index number for the vif you wanna use in the send */
2151 case IP_MULTICAST_VIF:
2152 if (legal_vif_num == 0) {
2153 error = EOPNOTSUPP;
2154 break;
2155 }
2156 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2157 if (error)
2158 break;
2159 if (!legal_vif_num(i) && (i != -1)) {
2160 error = EINVAL;
2161 break;
2162 }
2163 imo->imo_multicast_vif = i;
2164 break;
2165
2166 case IP_MULTICAST_IF:
2167 /*
2168 * Select the interface for outgoing multicast packets.
2169 */
2170 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
2171 if (error)
2172 break;
2173 /*
2174 * INADDR_ANY is used to remove a previous selection.
2175 * When no interface is selected, a default one is
2176 * chosen every time a multicast packet is sent.
2177 */
2178 if (addr.s_addr == INADDR_ANY) {
2179 imo->imo_multicast_ifp = NULL;
2180 break;
2181 }
2182 /*
2183 * The selected interface is identified by its local
2184 * IP address. Find the interface and confirm that
2185 * it supports multicasting.
2186 */
9bccf70c 2187 ifp = ip_multicast_if(&addr, &ifindex);
1c79356b 2188 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1c79356b
A
2189 error = EADDRNOTAVAIL;
2190 break;
2191 }
2192 imo->imo_multicast_ifp = ifp;
9bccf70c
A
2193 if (ifindex)
2194 imo->imo_multicast_addr = addr;
2195 else
2196 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1c79356b
A
2197 break;
2198
2199 case IP_MULTICAST_TTL:
2200 /*
2201 * Set the IP time-to-live for outgoing multicast packets.
2202 * The original multicast API required a char argument,
2203 * which is inconsistent with the rest of the socket API.
2204 * We allow either a char or an int.
2205 */
2206 if (sopt->sopt_valsize == 1) {
2207 u_char ttl;
2208 error = sooptcopyin(sopt, &ttl, 1, 1);
2209 if (error)
2210 break;
2211 imo->imo_multicast_ttl = ttl;
2212 } else {
2213 u_int ttl;
2214 error = sooptcopyin(sopt, &ttl, sizeof ttl,
2215 sizeof ttl);
2216 if (error)
2217 break;
2218 if (ttl > 255)
2219 error = EINVAL;
2220 else
2221 imo->imo_multicast_ttl = ttl;
2222 }
2223 break;
2224
2225 case IP_MULTICAST_LOOP:
2226 /*
2227 * Set the loopback flag for outgoing multicast packets.
2228 * Must be zero or one. The original multicast API required a
2229 * char argument, which is inconsistent with the rest
2230 * of the socket API. We allow either a char or an int.
2231 */
2232 if (sopt->sopt_valsize == 1) {
2233 u_char loop;
2234 error = sooptcopyin(sopt, &loop, 1, 1);
2235 if (error)
2236 break;
2237 imo->imo_multicast_loop = !!loop;
2238 } else {
2239 u_int loop;
2240 error = sooptcopyin(sopt, &loop, sizeof loop,
2241 sizeof loop);
2242 if (error)
2243 break;
2244 imo->imo_multicast_loop = !!loop;
2245 }
2246 break;
2247
2248 case IP_ADD_MEMBERSHIP:
2249 /*
2250 * Add a multicast group membership.
2251 * Group must be a valid IP multicast address.
2252 */
2253 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2254 if (error)
2255 break;
9bccf70c 2256
55e303ae 2257 error = ip_addmembership(imo, &mreq);
1c79356b
A
2258 break;
2259
2260 case IP_DROP_MEMBERSHIP:
2261 /*
2262 * Drop a multicast group membership.
2263 * Group must be a valid IP multicast address.
2264 */
2265 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2266 if (error)
2267 break;
55e303ae
A
2268
2269 error = ip_dropmembership(imo, &mreq);
1c79356b
A
2270 break;
2271
2272 default:
2273 error = EOPNOTSUPP;
2274 break;
2275 }
2276
2277 /*
2278 * If all options have default values, no need to keep the mbuf.
2279 */
2280 if (imo->imo_multicast_ifp == NULL &&
2281 imo->imo_multicast_vif == -1 &&
2282 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2283 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2284 imo->imo_num_memberships == 0) {
2285 FREE(*imop, M_IPMOPTS);
2286 *imop = NULL;
2287 }
2288
2289 return (error);
2290}
2291
55e303ae
A
2292/*
2293 * Set the IP multicast options in response to user setsockopt().
2294 */
2295__private_extern__ int
2296ip_createmoptions(
2297 struct ip_moptions **imop)
2298{
2299 struct ip_moptions *imo;
2300 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
2301 M_WAITOK);
2302
2303 if (imo == NULL)
2304 return (ENOBUFS);
2305 *imop = imo;
2306 imo->imo_multicast_ifp = NULL;
2307 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2308 imo->imo_multicast_vif = -1;
2309 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2310 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2311 imo->imo_num_memberships = 0;
2312
2313 return 0;
2314}
2315
2316/*
2317 * Add membership to an IPv4 multicast.
2318 */
2319__private_extern__ int
2320ip_addmembership(
2321 struct ip_moptions *imo,
2322 struct ip_mreq *mreq)
2323{
2324 struct route ro;
2325 struct sockaddr_in *dst;
2326 struct ifnet *ifp = NULL;
2327 int error = 0;
55e303ae
A
2328 int i;
2329
2330 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2331 error = EINVAL;
2332 return error;
2333 }
55e303ae
A
2334 /*
2335 * If no interface address was provided, use the interface of
2336 * the route to the given multicast address.
2337 */
2338 if (mreq->imr_interface.s_addr == INADDR_ANY) {
2339 bzero((caddr_t)&ro, sizeof(ro));
2340 dst = (struct sockaddr_in *)&ro.ro_dst;
2341 dst->sin_len = sizeof(*dst);
2342 dst->sin_family = AF_INET;
2343 dst->sin_addr = mreq->imr_multiaddr;
2344 rtalloc(&ro);
2345 if (ro.ro_rt != NULL) {
2346 ifp = ro.ro_rt->rt_ifp;
2347 rtfree(ro.ro_rt);
2348 }
2349 else {
2350 /* If there's no default route, try using loopback */
2351 mreq->imr_interface.s_addr = INADDR_LOOPBACK;
2352 }
2353 }
2354
2355 if (ifp == NULL) {
2356 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2357 }
2358
2359 /*
2360 * See if we found an interface, and confirm that it
2361 * supports multicast.
2362 */
2363 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2364 error = EADDRNOTAVAIL;
55e303ae
A
2365 return error;
2366 }
2367 /*
2368 * See if the membership already exists or if all the
2369 * membership slots are full.
2370 */
2371 for (i = 0; i < imo->imo_num_memberships; ++i) {
2372 if (imo->imo_membership[i]->inm_ifp == ifp &&
2373 imo->imo_membership[i]->inm_addr.s_addr
2374 == mreq->imr_multiaddr.s_addr)
2375 break;
2376 }
2377 if (i < imo->imo_num_memberships) {
2378 error = EADDRINUSE;
55e303ae
A
2379 return error;
2380 }
2381 if (i == IP_MAX_MEMBERSHIPS) {
2382 error = ETOOMANYREFS;
55e303ae
A
2383 return error;
2384 }
2385 /*
2386 * Everything looks good; add a new record to the multicast
2387 * address list for the given interface.
2388 */
2389 if ((imo->imo_membership[i] =
2390 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
2391 error = ENOBUFS;
55e303ae
A
2392 return error;
2393 }
2394 ++imo->imo_num_memberships;
55e303ae
A
2395
2396 return error;
2397}
2398
2399/*
2400 * Drop membership of an IPv4 multicast.
2401 */
2402__private_extern__ int
2403ip_dropmembership(
2404 struct ip_moptions *imo,
2405 struct ip_mreq *mreq)
2406{
2407 int error = 0;
55e303ae
A
2408 struct ifnet* ifp = NULL;
2409 int i;
2410
2411 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2412 error = EINVAL;
2413 return error;
2414 }
2415
55e303ae
A
2416 /*
2417 * If an interface address was specified, get a pointer
2418 * to its ifnet structure.
2419 */
2420 if (mreq->imr_interface.s_addr == INADDR_ANY)
2421 ifp = NULL;
2422 else {
2423 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2424 if (ifp == NULL) {
2425 error = EADDRNOTAVAIL;
55e303ae
A
2426 return error;
2427 }
2428 }
2429 /*
2430 * Find the membership in the membership array.
2431 */
2432 for (i = 0; i < imo->imo_num_memberships; ++i) {
2433 if ((ifp == NULL ||
2434 imo->imo_membership[i]->inm_ifp == ifp) &&
2435 imo->imo_membership[i]->inm_addr.s_addr ==
2436 mreq->imr_multiaddr.s_addr)
2437 break;
2438 }
2439 if (i == imo->imo_num_memberships) {
2440 error = EADDRNOTAVAIL;
55e303ae
A
2441 return error;
2442 }
2443 /*
2444 * Give up the multicast address record to which the
2445 * membership points.
2446 */
91447636 2447 in_delmulti(&imo->imo_membership[i]);
55e303ae
A
2448 /*
2449 * Remove the gap in the membership array.
2450 */
2451 for (++i; i < imo->imo_num_memberships; ++i)
2452 imo->imo_membership[i-1] = imo->imo_membership[i];
2453 --imo->imo_num_memberships;
55e303ae
A
2454
2455 return error;
2456}
2457
1c79356b
A
2458/*
2459 * Return the IP multicast options in response to user getsockopt().
2460 */
2461static int
2462ip_getmoptions(sopt, imo)
2463 struct sockopt *sopt;
2464 register struct ip_moptions *imo;
2465{
2466 struct in_addr addr;
2467 struct in_ifaddr *ia;
2468 int error, optval;
2469 u_char coptval;
2470
2471 error = 0;
2472 switch (sopt->sopt_name) {
2473 case IP_MULTICAST_VIF:
2474 if (imo != NULL)
2475 optval = imo->imo_multicast_vif;
2476 else
2477 optval = -1;
2478 error = sooptcopyout(sopt, &optval, sizeof optval);
2479 break;
2480
2481 case IP_MULTICAST_IF:
2482 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2483 addr.s_addr = INADDR_ANY;
9bccf70c
A
2484 else if (imo->imo_multicast_addr.s_addr) {
2485 /* return the value user has set */
2486 addr = imo->imo_multicast_addr;
2487 } else {
1c79356b
A
2488 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2489 addr.s_addr = (ia == NULL) ? INADDR_ANY
2490 : IA_SIN(ia)->sin_addr.s_addr;
2491 }
2492 error = sooptcopyout(sopt, &addr, sizeof addr);
2493 break;
2494
2495 case IP_MULTICAST_TTL:
2496 if (imo == 0)
2497 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2498 else
2499 optval = coptval = imo->imo_multicast_ttl;
2500 if (sopt->sopt_valsize == 1)
2501 error = sooptcopyout(sopt, &coptval, 1);
2502 else
2503 error = sooptcopyout(sopt, &optval, sizeof optval);
2504 break;
2505
2506 case IP_MULTICAST_LOOP:
2507 if (imo == 0)
2508 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2509 else
2510 optval = coptval = imo->imo_multicast_loop;
2511 if (sopt->sopt_valsize == 1)
2512 error = sooptcopyout(sopt, &coptval, 1);
2513 else
2514 error = sooptcopyout(sopt, &optval, sizeof optval);
2515 break;
2516
2517 default:
2518 error = ENOPROTOOPT;
2519 break;
2520 }
2521 return (error);
2522}
2523
2524/*
2525 * Discard the IP multicast options.
2526 */
2527void
2528ip_freemoptions(imo)
2529 register struct ip_moptions *imo;
2530{
2531 register int i;
2532
2533 if (imo != NULL) {
2534 for (i = 0; i < imo->imo_num_memberships; ++i)
91447636 2535 in_delmulti(&imo->imo_membership[i]);
1c79356b
A
2536 FREE(imo, M_IPMOPTS);
2537 }
2538}
2539
2540/*
2541 * Routine called from ip_output() to loop back a copy of an IP multicast
2542 * packet to the input queue of a specified interface. Note that this
2543 * calls the output routine of the loopback "driver", but with an interface
2544 * pointer that might NOT be a loopback interface -- evil, but easier than
2545 * replicating that code here.
2546 */
2547static void
2548ip_mloopback(ifp, m, dst, hlen)
2549 struct ifnet *ifp;
2550 register struct mbuf *m;
2551 register struct sockaddr_in *dst;
2552 int hlen;
2553{
2554 register struct ip *ip;
2555 struct mbuf *copym;
2556
2557 copym = m_copy(m, 0, M_COPYALL);
2558 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2559 copym = m_pullup(copym, hlen);
2560 if (copym != NULL) {
2561 /*
2562 * We don't bother to fragment if the IP length is greater
2563 * than the interface's MTU. Can this possibly matter?
2564 */
2565 ip = mtod(copym, struct ip *);
9bccf70c
A
2566 HTONS(ip->ip_len);
2567 HTONS(ip->ip_off);
1c79356b
A
2568 ip->ip_sum = 0;
2569 ip->ip_sum = in_cksum(copym, hlen);
1c79356b
A
2570 /*
2571 * NB:
2572 * It's not clear whether there are any lingering
2573 * reentrancy problems in other areas which might
2574 * be exposed by using ip_input directly (in
2575 * particular, everything which modifies the packet
2576 * in-place). Yet another option is using the
2577 * protosw directly to deliver the looped back
2578 * packet. For the moment, we'll err on the side
2579 * of safety by using if_simloop().
2580 */
2581#if 1 /* XXX */
2582 if (dst->sin_family != AF_INET) {
2583 printf("ip_mloopback: bad address family %d\n",
2584 dst->sin_family);
2585 dst->sin_family = AF_INET;
2586 }
2587#endif
2588
0b4e3aa0 2589
9bccf70c
A
2590 /*
2591 * Mark checksum as valid or calculate checksum for loopback.
2592 *
2593 * This is done this way because we have to embed the ifp of
2594 * the interface we will send the original copy of the packet
2595 * out on in the mbuf. ip_input will check if_hwassist of the
2596 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
2597 * The UDP checksum has not been calculated yet.
2598 */
2599 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
4a249263 2600 if (IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
9bccf70c
A
2601 copym->m_pkthdr.csum_flags |=
2602 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2603 CSUM_IP_CHECKED | CSUM_IP_VALID;
2604 copym->m_pkthdr.csum_data = 0xffff;
2605 } else {
2606 NTOHS(ip->ip_len);
2607 in_delayed_cksum(copym);
2608 HTONS(ip->ip_len);
2609 }
2610 }
0b4e3aa0
A
2611
2612
1c79356b
A
2613 /*
2614 * TedW:
2615 * We need to send all loopback traffic down to dlil in case
2616 * a filter has tapped-in.
2617 */
2618
1c79356b
A
2619 /*
2620 * Stuff the 'real' ifp into the pkthdr, to be used in matching
2621 * in ip_input(); we need the loopback ifp/dl_tag passed as args
2622 * to make the loopback driver compliant with the data link
2623 * requirements.
2624 */
91447636 2625 if (lo_ifp) {
9bccf70c 2626 copym->m_pkthdr.rcvif = ifp;
91447636 2627 dlil_output(lo_ifp, PF_INET, copym, 0, (struct sockaddr *) dst, 0);
1c79356b
A
2628 } else {
2629 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
2630 m_freem(copym);
2631 }
2632
2633/* if_simloop(ifp, copym, (struct sockaddr *)dst, 0);*/
2634 }
2635}