]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
xnu-1228.5.18.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #define _IP_VHL
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <kern/locks.h>
81 #include <sys/sysctl.h>
82
83 #include <net/if.h>
84 #include <net/route.h>
85
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #include <netinet/in_pcb.h>
90 #include <netinet/in_var.h>
91 #include <netinet/ip_var.h>
92
93 #include <netinet/kpi_ipfilter_var.h>
94
95 #if CONFIG_MACF_NET
96 #include <security/mac_framework.h>
97 #endif
98
99 #include "faith.h"
100
101 #include <net/dlil.h>
102 #include <sys/kdebug.h>
103 #include <libkern/OSAtomic.h>
104
105 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
106 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
107 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
108 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
109
110 #define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
111
112 #if IPSEC
113 #include <netinet6/ipsec.h>
114 #include <netkey/key.h>
115 #if IPSEC_DEBUG
116 #include <netkey/key_debug.h>
117 #else
118 #define KEYDEBUG(lev,arg)
119 #endif
120 #endif /*IPSEC*/
121
122 #include <netinet/ip_fw.h>
123 #include <netinet/ip_divert.h>
124
125 #if DUMMYNET
126 #include <netinet/ip_dummynet.h>
127 #endif
128
129 #if IPFIREWALL_FORWARD_DEBUG
130 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
131 (ntohl(a.s_addr)>>16)&0xFF,\
132 (ntohl(a.s_addr)>>8)&0xFF,\
133 (ntohl(a.s_addr))&0xFF);
134 #endif
135
136
137 u_short ip_id;
138
139 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
140 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
141 static void ip_mloopback(struct ifnet *, struct mbuf *,
142 struct sockaddr_in *, int);
143 static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
144 static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
145 static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
146
147 static void ip_out_cksum_stats(int, u_int32_t);
148
149 int ip_createmoptions(struct ip_moptions **imop);
150 int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
151 int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
152 int ip_optcopy(struct ip *, struct ip *);
153 void in_delayed_cksum_offset(struct mbuf *, int );
154 void in_cksum_offset(struct mbuf* , size_t );
155
156 extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
157
158 extern u_long route_generation;
159
160 extern struct protosw inetsw[];
161
162 extern struct ip_linklocal_stat ip_linklocal_stat;
163 extern lck_mtx_t *ip_mutex;
164
165 /* temporary: for testing */
166 #if IPSEC
167 extern int ipsec_bypass;
168 #endif
169
170 static int ip_maxchainsent = 0;
171 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
172 &ip_maxchainsent, 0, "use dlil_output_list");
173 #if DEBUG
174 static int forge_ce = 0;
175 SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW,
176 &forge_ce, 0, "Forge ECN CE");
177 #endif /* DEBUG */
178 /*
179 * IP output. The packet in mbuf chain m contains a skeletal IP
180 * header (with len, off, ttl, proto, tos, src, dst).
181 * The mbuf chain containing the packet will be freed.
182 * The mbuf opt, if present, will not be freed.
183 */
184 int
185 ip_output(
186 struct mbuf *m0,
187 struct mbuf *opt,
188 struct route *ro,
189 int flags,
190 struct ip_moptions *imo,
191 struct ifnet *ifp)
192 {
193 int error;
194 error = ip_output_list(m0, 0, opt, ro, flags, imo, ifp);
195 return error;
196 }
197
198 /*
199 * Returns: 0 Success
200 * ENOMEM
201 * EADDRNOTAVAIL
202 * ENETUNREACH
203 * EHOSTUNREACH
204 * EACCES
205 * EMSGSIZE
206 * ENOBUFS
207 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
208 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
209 * key_spdacquire:??? [IPSEC]
210 * ipsec4_output:??? [IPSEC]
211 * <fr_checkp>:??? [firewall]
212 * ip_dn_io_ptr:??? [dummynet]
213 * dlil_output:??? [DLIL]
214 * dlil_output_list:??? [DLIL]
215 *
216 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
217 * only used as the error return from this function where one of
218 * these functions fails to return a policy.
219 */
220 int
221 ip_output_list(
222 struct mbuf *m0,
223 int packetchain,
224 struct mbuf *opt,
225 struct route *ro,
226 int flags,
227 struct ip_moptions *imo,
228 #if CONFIG_FORCE_OUT_IFP
229 struct ifnet *pdp_ifp
230 #else
231 __unused struct ifnet *unused_ifp
232 #endif
233 )
234 {
235 struct ip *ip, *mhip;
236 struct ifnet *ifp = NULL;
237 struct mbuf *m = m0;
238 int hlen = sizeof (struct ip);
239 int len = 0, off, error = 0;
240 struct sockaddr_in *dst = NULL;
241 struct in_ifaddr *ia = NULL;
242 int isbroadcast, sw_csum;
243 struct in_addr pkt_dst;
244 #if IPSEC
245 struct route iproute;
246 struct socket *so = NULL;
247 struct secpolicy *sp = NULL;
248 #endif
249 #if IPFIREWALL_FORWARD
250 int fwd_rewrite_src = 0;
251 #endif
252 #if IPFIREWALL
253 struct ip_fw_args args;
254 #endif
255 int didfilter = 0;
256 ipfilter_t inject_filter_ref = 0;
257 struct m_tag *tag;
258 struct route saved_route;
259 struct mbuf * packetlist;
260 int pktcnt = 0;
261
262
263 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
264
265 packetlist = m0;
266 #if IPFIREWALL
267 args.next_hop = NULL;
268 args.eh = NULL;
269 args.rule = NULL;
270 args.divert_rule = 0; /* divert cookie */
271
272 /* Grab info from mtags prepended to the chain */
273 #if DUMMYNET
274 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
275 struct dn_pkt_tag *dn_tag;
276
277 dn_tag = (struct dn_pkt_tag *)(tag+1);
278 args.rule = dn_tag->rule;
279 opt = NULL;
280 saved_route = dn_tag->ro;
281 ro = &saved_route;
282
283 imo = NULL;
284 dst = dn_tag->dn_dst;
285 ifp = dn_tag->ifp;
286 flags = dn_tag->flags;
287
288 m_tag_delete(m0, tag);
289 }
290 #endif /* DUMMYNET */
291
292 #if IPDIVERT
293 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
294 struct divert_tag *div_tag;
295
296 div_tag = (struct divert_tag *)(tag+1);
297 args.divert_rule = div_tag->cookie;
298
299 m_tag_delete(m0, tag);
300 }
301 #endif /* IPDIVERT */
302
303 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
304 struct ip_fwd_tag *ipfwd_tag;
305
306 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
307 args.next_hop = ipfwd_tag->next_hop;
308
309 m_tag_delete(m0, tag);
310 }
311 #endif /* IPFIREWALL */
312
313 m = m0;
314
315 #if DIAGNOSTIC
316 if ( !m || (m->m_flags & M_PKTHDR) != 0)
317 panic("ip_output no HDR");
318 if (!ro)
319 panic("ip_output no route, proto = %d",
320 mtod(m, struct ip *)->ip_p);
321 #endif
322
323 #if IPFIREWALL
324 if (args.rule != NULL) { /* dummynet already saw us */
325 ip = mtod(m, struct ip *);
326 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
327 lck_mtx_lock(rt_mtx);
328 if (ro->ro_rt != NULL)
329 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
330 if (ia)
331 ifaref(&ia->ia_ifa);
332 lck_mtx_unlock(rt_mtx);
333 #if IPSEC
334 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
335 so = ipsec_getsocket(m);
336 (void)ipsec_setsocket(m, NULL);
337 }
338 #endif
339 goto sendit;
340 }
341 #endif /* IPFIREWALL */
342
343 #if IPSEC
344 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
345 so = ipsec_getsocket(m);
346 (void)ipsec_setsocket(m, NULL);
347 }
348 #endif
349 loopit:
350 /*
351 * No need to proccess packet twice if we've
352 * already seen it
353 */
354 inject_filter_ref = ipf_get_inject_filter(m);
355
356 if (opt) {
357 m = ip_insertoptions(m, opt, &len);
358 hlen = len;
359 }
360 ip = mtod(m, struct ip *);
361 #if IPFIREWALL
362 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
363 #else
364 pkt_dst = ip->ip_dst;
365 #endif
366
367 /*
368 * Fill in IP header.
369 */
370 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
371 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
372 ip->ip_off &= IP_DF;
373 #if RANDOM_IP_ID
374 ip->ip_id = ip_randomid();
375 #else
376 ip->ip_id = htons(ip_id++);
377 #endif
378 OSAddAtomic(1, (SInt32*)&ipstat.ips_localout);
379 } else {
380 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
381 }
382
383 #if DEBUG
384 /* For debugging, we let the stack forge congestion */
385 if (forge_ce != 0 &&
386 ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 ||
387 (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) {
388 ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE;
389 forge_ce--;
390 }
391 #endif /* DEBUG */
392
393 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
394 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
395
396 dst = (struct sockaddr_in *)&ro->ro_dst;
397
398 /*
399 * If there is a cached route,
400 * check that it is to the same destination
401 * and is still up. If not, free it and try again.
402 * The address family should also be checked in case of sharing the
403 * cache with IPv6.
404 */
405
406 lck_mtx_lock(rt_mtx);
407 if (ro->ro_rt != NULL) {
408 if (ro->ro_rt->generation_id != route_generation &&
409 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) &&
410 (ip->ip_src.s_addr != INADDR_ANY) &&
411 (ifa_foraddr(ip->ip_src.s_addr) == 0)) {
412 error = EADDRNOTAVAIL;
413 lck_mtx_unlock(rt_mtx);
414 goto bad;
415 }
416 if ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
417 dst->sin_family != AF_INET ||
418 dst->sin_addr.s_addr != pkt_dst.s_addr) {
419 rtfree_locked(ro->ro_rt);
420 ro->ro_rt = NULL;
421 }
422 if (ro->ro_rt && ro->ro_rt->generation_id != route_generation)
423 ro->ro_rt->generation_id = route_generation;
424 }
425 if (ro->ro_rt == NULL) {
426 bzero(dst, sizeof(*dst));
427 dst->sin_family = AF_INET;
428 dst->sin_len = sizeof(*dst);
429 dst->sin_addr = pkt_dst;
430 }
431 /*
432 * If routing to interface only,
433 * short circuit routing lookup.
434 */
435 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
436 #define sintosa(sin) ((struct sockaddr *)(sin))
437 if (flags & IP_ROUTETOIF) {
438 if (ia)
439 ifafree(&ia->ia_ifa);
440 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
441 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
442 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
443 error = ENETUNREACH;
444 lck_mtx_unlock(rt_mtx);
445 goto bad;
446 }
447 }
448 ifp = ia->ia_ifp;
449 ip->ip_ttl = 1;
450 isbroadcast = in_broadcast(dst->sin_addr, ifp);
451 } else {
452
453 #if CONFIG_FORCE_OUT_IFP
454 /* Check if this packet should be forced out a specific interface */
455 if (ro->ro_rt == 0 && pdp_ifp != NULL) {
456 pdp_context_route_locked(pdp_ifp, ro);
457
458 if (ro->ro_rt == NULL) {
459 OSAddAtomic(1, (UInt32*)&ipstat.ips_noroute);
460 error = EHOSTUNREACH;
461 lck_mtx_unlock(rt_mtx);
462 goto bad;
463 }
464 }
465 #endif
466
467 /*
468 * If this is the case, we probably don't want to allocate
469 * a protocol-cloned route since we didn't get one from the
470 * ULP. This lets TCP do its thing, while not burdening
471 * forwarding or ICMP with the overhead of cloning a route.
472 * Of course, we still want to do any cloning requested by
473 * the link layer, as this is probably required in all cases
474 * for correct operation (as it is for ARP).
475 */
476
477 if (ro->ro_rt == 0) {
478 unsigned long ign = RTF_PRCLONING;
479 /*
480 * We make an exception here: if the destination
481 * address is INADDR_BROADCAST, allocate a protocol-
482 * cloned host route so that we end up with a route
483 * marked with the RTF_BROADCAST flag. Otherwise,
484 * we would end up referring to the default route,
485 * instead of creating a cloned host route entry.
486 * That would introduce inconsistencies between ULPs
487 * that allocate a route and those that don't. The
488 * RTF_BROADCAST route is important since we'd want
489 * to send out undirected IP broadcast packets using
490 * link-level broadcast address.
491 *
492 * This exception will no longer be necessary when
493 * the RTF_PRCLONING scheme is no longer present.
494 */
495 if (dst->sin_addr.s_addr == INADDR_BROADCAST)
496 ign &= ~RTF_PRCLONING;
497
498 rtalloc_ign_locked(ro, ign);
499 }
500 if (ro->ro_rt == 0) {
501 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
502 error = EHOSTUNREACH;
503 lck_mtx_unlock(rt_mtx);
504 goto bad;
505 }
506
507 if (ia)
508 ifafree(&ia->ia_ifa);
509 ia = ifatoia(ro->ro_rt->rt_ifa);
510 if (ia)
511 ifaref(&ia->ia_ifa);
512 ifp = ro->ro_rt->rt_ifp;
513 ro->ro_rt->rt_use++;
514 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
515 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
516 if (ro->ro_rt->rt_flags & RTF_HOST)
517 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
518 else
519 isbroadcast = in_broadcast(dst->sin_addr, ifp);
520 }
521 lck_mtx_unlock(rt_mtx);
522 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
523 struct in_multi *inm;
524
525 m->m_flags |= M_MCAST;
526 /*
527 * IP destination address is multicast. Make sure "dst"
528 * still points to the address in "ro". (It may have been
529 * changed to point to a gateway address, above.)
530 */
531 dst = (struct sockaddr_in *)&ro->ro_dst;
532 /*
533 * See if the caller provided any multicast options
534 */
535 if (imo != NULL) {
536 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
537 if (imo->imo_multicast_ifp != NULL) {
538 ifp = imo->imo_multicast_ifp;
539 }
540 #if MROUTING
541 if (imo->imo_multicast_vif != -1 &&
542 ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
543 ip->ip_src.s_addr =
544 ip_mcast_src(imo->imo_multicast_vif);
545 #endif /* MROUTING */
546 } else
547 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
548 /*
549 * Confirm that the outgoing interface supports multicast.
550 */
551 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
552 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
553 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
554 error = ENETUNREACH;
555 goto bad;
556 }
557 }
558 /*
559 * If source address not specified yet, use address
560 * of outgoing interface.
561 */
562 if (ip->ip_src.s_addr == INADDR_ANY) {
563 register struct in_ifaddr *ia1;
564 lck_mtx_lock(rt_mtx);
565 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
566 if (ia1->ia_ifp == ifp) {
567 ip->ip_src = IA_SIN(ia1)->sin_addr;
568
569 break;
570 }
571 lck_mtx_unlock(rt_mtx);
572 if (ip->ip_src.s_addr == INADDR_ANY) {
573 error = ENETUNREACH;
574 goto bad;
575 }
576 }
577
578 ifnet_lock_shared(ifp);
579 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
580 ifnet_lock_done(ifp);
581 if (inm != NULL &&
582 (imo == NULL || imo->imo_multicast_loop)) {
583 /*
584 * If we belong to the destination multicast group
585 * on the outgoing interface, and the caller did not
586 * forbid loopback, loop back a copy.
587 */
588 if (!TAILQ_EMPTY(&ipv4_filters)) {
589 struct ipfilter *filter;
590 int seen = (inject_filter_ref == 0);
591 struct ipf_pktopts *ippo = 0, ipf_pktopts;
592
593 if (imo) {
594 ippo = &ipf_pktopts;
595 ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
596 ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
597 ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
598 }
599
600 ipf_ref();
601
602 /* 4135317 - always pass network byte order to filter */
603 HTONS(ip->ip_len);
604 HTONS(ip->ip_off);
605
606 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
607 if (seen == 0) {
608 if ((struct ipfilter *)inject_filter_ref == filter)
609 seen = 1;
610 } else if (filter->ipf_filter.ipf_output) {
611 errno_t result;
612 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
613 if (result == EJUSTRETURN) {
614 ipf_unref();
615 goto done;
616 }
617 if (result != 0) {
618 ipf_unref();
619 goto bad;
620 }
621 }
622 }
623
624 /* set back to host byte order */
625 ip = mtod(m, struct ip *);
626 NTOHS(ip->ip_len);
627 NTOHS(ip->ip_off);
628
629 ipf_unref();
630 didfilter = 1;
631 }
632 ip_mloopback(ifp, m, dst, hlen);
633 }
634 #if MROUTING
635 else {
636 /*
637 * If we are acting as a multicast router, perform
638 * multicast forwarding as if the packet had just
639 * arrived on the interface to which we are about
640 * to send. The multicast forwarding function
641 * recursively calls this function, using the
642 * IP_FORWARDING flag to prevent infinite recursion.
643 *
644 * Multicasts that are looped back by ip_mloopback(),
645 * above, will be forwarded by the ip_input() routine,
646 * if necessary.
647 */
648 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
649 /*
650 * Check if rsvp daemon is running. If not, don't
651 * set ip_moptions. This ensures that the packet
652 * is multicast and not just sent down one link
653 * as prescribed by rsvpd.
654 */
655 if (!rsvp_on)
656 imo = NULL;
657 if (ip_mforward(ip, ifp, m, imo) != 0) {
658 m_freem(m);
659 goto done;
660 }
661 }
662 }
663 #endif /* MROUTING */
664
665 /*
666 * Multicasts with a time-to-live of zero may be looped-
667 * back, above, but must not be transmitted on a network.
668 * Also, multicasts addressed to the loopback interface
669 * are not sent -- the above call to ip_mloopback() will
670 * loop back a copy if this host actually belongs to the
671 * destination group on the loopback interface.
672 */
673 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
674 m_freem(m);
675 goto done;
676 }
677
678 goto sendit;
679 }
680 #ifndef notdef
681 /*
682 * If source address not specified yet, use address
683 * of outgoing interface.
684 */
685 if (ip->ip_src.s_addr == INADDR_ANY) {
686 ip->ip_src = IA_SIN(ia)->sin_addr;
687 #if IPFIREWALL_FORWARD
688 /* Keep note that we did this - if the firewall changes
689 * the next-hop, our interface may change, changing the
690 * default source IP. It's a shame so much effort happens
691 * twice. Oh well.
692 */
693 fwd_rewrite_src++;
694 #endif /* IPFIREWALL_FORWARD */
695 }
696 #endif /* notdef */
697
698 /*
699 * Look for broadcast address and
700 * and verify user is allowed to send
701 * such a packet.
702 */
703 if (isbroadcast) {
704 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
705 error = EADDRNOTAVAIL;
706 goto bad;
707 }
708 if ((flags & IP_ALLOWBROADCAST) == 0) {
709 error = EACCES;
710 goto bad;
711 }
712 /* don't allow broadcast messages to be fragmented */
713 if ((u_short)ip->ip_len > ifp->if_mtu) {
714 error = EMSGSIZE;
715 goto bad;
716 }
717 m->m_flags |= M_BCAST;
718 } else {
719 m->m_flags &= ~M_BCAST;
720 }
721
722 sendit:
723 /*
724 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
725 */
726 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
727 ip_linklocal_stat.iplls_out_total++;
728 if (ip->ip_ttl != MAXTTL) {
729 ip_linklocal_stat.iplls_out_badttl++;
730 ip->ip_ttl = MAXTTL;
731 }
732 }
733
734 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
735 struct ipfilter *filter;
736 int seen = (inject_filter_ref == 0);
737
738 ipf_ref();
739
740 /* 4135317 - always pass network byte order to filter */
741 HTONS(ip->ip_len);
742 HTONS(ip->ip_off);
743
744 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
745 if (seen == 0) {
746 if ((struct ipfilter *)inject_filter_ref == filter)
747 seen = 1;
748 } else if (filter->ipf_filter.ipf_output) {
749 errno_t result;
750 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
751 if (result == EJUSTRETURN) {
752 ipf_unref();
753 goto done;
754 }
755 if (result != 0) {
756 ipf_unref();
757 goto bad;
758 }
759 }
760 }
761
762 /* set back to host byte order */
763 ip = mtod(m, struct ip *);
764 NTOHS(ip->ip_len);
765 NTOHS(ip->ip_off);
766
767 ipf_unref();
768 }
769
770 #if IPSEC
771 /* temporary for testing only: bypass ipsec alltogether */
772
773 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
774 goto skip_ipsec;
775
776 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
777
778
779 /* get SP for this packet */
780 if (so == NULL)
781 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
782 else
783 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
784
785 if (sp == NULL) {
786 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
787 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
788 goto bad;
789 }
790
791 error = 0;
792
793 /* check policy */
794 switch (sp->policy) {
795 case IPSEC_POLICY_DISCARD:
796 case IPSEC_POLICY_GENERATE:
797 /*
798 * This packet is just discarded.
799 */
800 IPSEC_STAT_INCREMENT(ipsecstat.out_polvio);
801 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
802 goto bad;
803
804 case IPSEC_POLICY_BYPASS:
805 case IPSEC_POLICY_NONE:
806 /* no need to do IPsec. */
807 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
808 goto skip_ipsec;
809
810 case IPSEC_POLICY_IPSEC:
811 if (sp->req == NULL) {
812 /* acquire a policy */
813 error = key_spdacquire(sp);
814 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
815 goto bad;
816 }
817 break;
818
819 case IPSEC_POLICY_ENTRUST:
820 default:
821 printf("ip_output: Invalid policy found. %d\n", sp->policy);
822 }
823 {
824 struct ipsec_output_state state;
825 bzero(&state, sizeof(state));
826 state.m = m;
827 if (flags & IP_ROUTETOIF) {
828 state.ro = &iproute;
829 bzero(&iproute, sizeof(iproute));
830 } else
831 state.ro = ro;
832 state.dst = (struct sockaddr *)dst;
833
834 ip->ip_sum = 0;
835
836 /*
837 * XXX
838 * delayed checksums are not currently compatible with IPsec
839 */
840 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
841 in_delayed_cksum(m);
842 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
843 }
844
845 HTONS(ip->ip_len);
846 HTONS(ip->ip_off);
847
848 error = ipsec4_output(&state, sp, flags);
849
850 m0 = m = state.m;
851
852 if (flags & IP_ROUTETOIF) {
853 /*
854 * if we have tunnel mode SA, we may need to ignore
855 * IP_ROUTETOIF.
856 */
857 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
858 flags &= ~IP_ROUTETOIF;
859 ro = state.ro;
860 }
861 } else
862 ro = state.ro;
863
864 dst = (struct sockaddr_in *)state.dst;
865 if (error) {
866 /* mbuf is already reclaimed in ipsec4_output. */
867 m0 = NULL;
868 switch (error) {
869 case EHOSTUNREACH:
870 case ENETUNREACH:
871 case EMSGSIZE:
872 case ENOBUFS:
873 case ENOMEM:
874 break;
875 default:
876 printf("ip4_output (ipsec): error code %d\n", error);
877 /*fall through*/
878 case ENOENT:
879 /* don't show these error codes to the user */
880 error = 0;
881 break;
882 }
883 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
884 goto bad;
885 }
886 }
887
888 /* be sure to update variables that are affected by ipsec4_output() */
889 ip = mtod(m, struct ip *);
890
891 #ifdef _IP_VHL
892 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
893 #else
894 hlen = ip->ip_hl << 2;
895 #endif
896 /* Check that there wasn't a route change and src is still valid */
897
898 lck_mtx_lock(rt_mtx);
899 if (ro->ro_rt && ro->ro_rt->generation_id != route_generation) {
900 if (ifa_foraddr(ip->ip_src.s_addr) == 0 && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
901 error = EADDRNOTAVAIL;
902 lck_mtx_unlock(rt_mtx);
903 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 5,0,0,0,0);
904 goto bad;
905 }
906 rtfree_locked(ro->ro_rt);
907 ro->ro_rt = NULL;
908 }
909
910 if (ro->ro_rt == NULL) {
911 if ((flags & IP_ROUTETOIF) == 0) {
912 printf("ip_output: "
913 "can't update route after IPsec processing\n");
914 error = EHOSTUNREACH; /*XXX*/
915 lck_mtx_unlock(rt_mtx);
916 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 6,0,0,0,0);
917 goto bad;
918 }
919 } else {
920 if (ia)
921 ifafree(&ia->ia_ifa);
922 ia = ifatoia(ro->ro_rt->rt_ifa);
923 if (ia)
924 ifaref(&ia->ia_ifa);
925 ifp = ro->ro_rt->rt_ifp;
926 }
927 lck_mtx_unlock(rt_mtx);
928
929 /* make it flipped, again. */
930 NTOHS(ip->ip_len);
931 NTOHS(ip->ip_off);
932 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
933
934 /* Pass to filters again */
935 if (!TAILQ_EMPTY(&ipv4_filters)) {
936 struct ipfilter *filter;
937
938 ipf_ref();
939
940 /* 4135317 - always pass network byte order to filter */
941 HTONS(ip->ip_len);
942 HTONS(ip->ip_off);
943
944 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
945 if (filter->ipf_filter.ipf_output) {
946 errno_t result;
947 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
948 if (result == EJUSTRETURN) {
949 ipf_unref();
950 goto done;
951 }
952 if (result != 0) {
953 ipf_unref();
954 goto bad;
955 }
956 }
957 }
958
959 /* set back to host byte order */
960 ip = mtod(m, struct ip *);
961 NTOHS(ip->ip_len);
962 NTOHS(ip->ip_off);
963
964 ipf_unref();
965 }
966 skip_ipsec:
967 #endif /*IPSEC*/
968
969 #if IPFIREWALL
970 /*
971 * IpHack's section.
972 * - Xlate: translate packet's addr/port (NAT).
973 * - Firewall: deny/allow/etc.
974 * - Wrap: fake packet's addr/port <unimpl.>
975 * - Encapsulate: put it in another IP and send out. <unimp.>
976 */
977 if (fr_checkp) {
978 struct mbuf *m1 = m;
979
980 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
981 goto done;
982 }
983 ip = mtod(m0 = m = m1, struct ip *);
984 }
985
986 /*
987 * Check with the firewall...
988 * but not if we are already being fwd'd from a firewall.
989 */
990 if (fw_enable && IPFW_LOADED && !args.next_hop) {
991 struct sockaddr_in *old = dst;
992
993 args.m = m;
994 args.next_hop = dst;
995 args.oif = ifp;
996 off = ip_fw_chk_ptr(&args);
997 m = args.m;
998 dst = args.next_hop;
999
1000 /*
1001 * On return we must do the following:
1002 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1003 * 1<=off<= 0xffff -> DIVERT
1004 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1005 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1006 * dst != old -> IPFIREWALL_FORWARD
1007 * off==0, dst==old -> accept
1008 * If some of the above modules is not compiled in, then
1009 * we should't have to check the corresponding condition
1010 * (because the ipfw control socket should not accept
1011 * unsupported rules), but better play safe and drop
1012 * packets in case of doubt.
1013 */
1014 m0 = m;
1015 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
1016 if (m)
1017 m_freem(m);
1018 error = EACCES ;
1019 goto done ;
1020 }
1021 ip = mtod(m, struct ip *);
1022
1023 if (off == 0 && dst == old) {/* common case */
1024 goto pass ;
1025 }
1026 #if DUMMYNET
1027 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
1028 /*
1029 * pass the pkt to dummynet. Need to include
1030 * pipe number, m, ifp, ro, dst because these are
1031 * not recomputed in the next pass.
1032 * All other parameters have been already used and
1033 * so they are not needed anymore.
1034 * XXX note: if the ifp or ro entry are deleted
1035 * while a pkt is in dummynet, we are in trouble!
1036 */
1037 args.ro = ro;
1038 args.dst = dst;
1039 args.flags = flags;
1040
1041 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
1042 &args);
1043 goto done;
1044 }
1045 #endif /* DUMMYNET */
1046 #if IPDIVERT
1047 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
1048 struct mbuf *clone = NULL;
1049
1050 /* Clone packet if we're doing a 'tee' */
1051 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
1052 clone = m_dup(m, M_DONTWAIT);
1053 /*
1054 * XXX
1055 * delayed checksums are not currently compatible
1056 * with divert sockets.
1057 */
1058 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1059 in_delayed_cksum(m);
1060 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1061 }
1062
1063 /* Restore packet header fields to original values */
1064 HTONS(ip->ip_len);
1065 HTONS(ip->ip_off);
1066
1067 /* Deliver packet to divert input routine */
1068 divert_packet(m, 0, off & 0xffff, args.divert_rule);
1069
1070 /* If 'tee', continue with original packet */
1071 if (clone != NULL) {
1072 m0 = m = clone;
1073 ip = mtod(m, struct ip *);
1074 goto pass;
1075 }
1076 goto done;
1077 }
1078 #endif
1079
1080 #if IPFIREWALL_FORWARD
1081 /* Here we check dst to make sure it's directly reachable on the
1082 * interface we previously thought it was.
1083 * If it isn't (which may be likely in some situations) we have
1084 * to re-route it (ie, find a route for the next-hop and the
1085 * associated interface) and set them here. This is nested
1086 * forwarding which in most cases is undesirable, except where
1087 * such control is nigh impossible. So we do it here.
1088 * And I'm babbling.
1089 */
1090 if (off == 0 && old != dst) {
1091 struct in_ifaddr *ia_fw;
1092
1093 /* It's changed... */
1094 /* There must be a better way to do this next line... */
1095 static struct route sro_fwd, *ro_fwd = &sro_fwd;
1096 #if IPFIREWALL_FORWARD_DEBUG
1097 printf("IPFIREWALL_FORWARD: New dst ip: ");
1098 print_ip(dst->sin_addr);
1099 printf("\n");
1100 #endif
1101 /*
1102 * We need to figure out if we have been forwarded
1103 * to a local socket. If so then we should somehow
1104 * "loop back" to ip_input, and get directed to the
1105 * PCB as if we had received this packet. This is
1106 * because it may be dificult to identify the packets
1107 * you want to forward until they are being output
1108 * and have selected an interface. (e.g. locally
1109 * initiated packets) If we used the loopback inteface,
1110 * we would not be able to control what happens
1111 * as the packet runs through ip_input() as
1112 * it is done through a ISR.
1113 */
1114 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1115 /*
1116 * If the addr to forward to is one
1117 * of ours, we pretend to
1118 * be the destination for this packet.
1119 */
1120 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
1121 dst->sin_addr.s_addr)
1122 break;
1123 }
1124 if (ia) {
1125 /* tell ip_input "dont filter" */
1126 struct m_tag *fwd_tag;
1127 struct ip_fwd_tag *ipfwd_tag;
1128
1129 fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD,
1130 sizeof(struct sockaddr_in), M_NOWAIT);
1131 if (fwd_tag == NULL) {
1132 error = ENOBUFS;
1133 goto bad;
1134 }
1135
1136 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
1137 ipfwd_tag->next_hop = args.next_hop;
1138
1139 m_tag_prepend(m, fwd_tag);
1140
1141 if (m->m_pkthdr.rcvif == NULL)
1142 m->m_pkthdr.rcvif = ifunit("lo0");
1143 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1144 m->m_pkthdr.csum_flags) == 0) {
1145 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1146 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1147 m->m_pkthdr.csum_flags |=
1148 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1149 m->m_pkthdr.csum_data = 0xffff;
1150 }
1151 m->m_pkthdr.csum_flags |=
1152 CSUM_IP_CHECKED | CSUM_IP_VALID;
1153 }
1154 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1155 in_delayed_cksum(m);
1156 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1157 ip->ip_sum = in_cksum(m, hlen);
1158 }
1159 HTONS(ip->ip_len);
1160 HTONS(ip->ip_off);
1161
1162
1163 /* we need to call dlil_output to run filters
1164 * and resync to avoid recursion loops.
1165 */
1166 if (lo_ifp) {
1167 dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
1168 }
1169 else {
1170 printf("ip_output: no loopback ifp for forwarding!!!\n");
1171 }
1172 goto done;
1173 }
1174 /* Some of the logic for this was
1175 * nicked from above.
1176 *
1177 * This rewrites the cached route in a local PCB.
1178 * Is this what we want to do?
1179 */
1180 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1181
1182 ro_fwd->ro_rt = 0;
1183 lck_mtx_lock(rt_mtx);
1184 rtalloc_ign_locked(ro_fwd, RTF_PRCLONING);
1185
1186 if (ro_fwd->ro_rt == 0) {
1187 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
1188 error = EHOSTUNREACH;
1189 lck_mtx_unlock(rt_mtx);
1190 goto bad;
1191 }
1192
1193 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
1194 ifp = ro_fwd->ro_rt->rt_ifp;
1195 ro_fwd->ro_rt->rt_use++;
1196 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
1197 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
1198 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
1199 isbroadcast =
1200 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
1201 else
1202 isbroadcast = in_broadcast(dst->sin_addr, ifp);
1203 rtfree_locked(ro->ro_rt);
1204 ro->ro_rt = ro_fwd->ro_rt;
1205 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
1206 lck_mtx_unlock(rt_mtx);
1207
1208 /*
1209 * If we added a default src ip earlier,
1210 * which would have been gotten from the-then
1211 * interface, do it again, from the new one.
1212 */
1213 if (fwd_rewrite_src)
1214 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
1215 goto pass ;
1216 }
1217 #endif /* IPFIREWALL_FORWARD */
1218 /*
1219 * if we get here, none of the above matches, and
1220 * we have to drop the pkt
1221 */
1222 m_freem(m);
1223 error = EACCES; /* not sure this is the right error msg */
1224 goto done;
1225 }
1226 #endif /* IPFIREWALL */
1227
1228 pass:
1229 #if __APPLE__
1230 /* Do not allow loopback address to wind up on a wire */
1231 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1232 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1233 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
1234 OSAddAtomic(1, (SInt32*)&ipstat.ips_badaddr);
1235 m_freem(m);
1236 /*
1237 * Do not simply drop the packet just like a firewall -- we want the
1238 * the application to feel the pain.
1239 * Return ENETUNREACH like ip6_output does in some similar cases.
1240 * This can startle the otherwise clueless process that specifies
1241 * loopback as the source address.
1242 */
1243 error = ENETUNREACH;
1244 goto done;
1245 }
1246 #endif
1247 m->m_pkthdr.csum_flags |= CSUM_IP;
1248 sw_csum = m->m_pkthdr.csum_flags
1249 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1250
1251 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1252 /*
1253 * Special case code for GMACE
1254 * frames that can be checksumed by GMACE SUM16 HW:
1255 * frame >64, no fragments, no UDP
1256 */
1257 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1258 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1259 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1260 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1261 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
1262 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
1263 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1264 m->m_pkthdr.csum_data += offset;
1265 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1266 }
1267 else {
1268 /* let the software handle any UDP or TCP checksums */
1269 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1270 }
1271 } else if (apple_hwcksum_tx == 0) {
1272 sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
1273 m->m_pkthdr.csum_flags;
1274 }
1275
1276 if (sw_csum & CSUM_DELAY_DATA) {
1277 in_delayed_cksum(m);
1278 sw_csum &= ~CSUM_DELAY_DATA;
1279 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1280 }
1281
1282 if (apple_hwcksum_tx != 0) {
1283 m->m_pkthdr.csum_flags &=
1284 IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1285 } else {
1286 m->m_pkthdr.csum_flags = 0;
1287 }
1288
1289 /*
1290 * If small enough for interface, or the interface will take
1291 * care of the fragmentation for us, can just send directly.
1292 */
1293 if ((u_short)ip->ip_len <= ifp->if_mtu ||
1294 ifp->if_hwassist & CSUM_FRAGMENT) {
1295 struct rtentry *rte;
1296
1297 HTONS(ip->ip_len);
1298 HTONS(ip->ip_off);
1299 ip->ip_sum = 0;
1300 if (sw_csum & CSUM_DELAY_IP) {
1301 ip->ip_sum = in_cksum(m, hlen);
1302 }
1303
1304 #ifndef __APPLE__
1305 /* Record statistics for this interface address. */
1306 if (!(flags & IP_FORWARDING) && ia != NULL) {
1307 ia->ia_ifa.if_opackets++;
1308 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1309 }
1310 #endif
1311
1312 #if IPSEC
1313 /* clean ipsec history once it goes out of the node */
1314 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1315 ipsec_delaux(m);
1316 #endif
1317 if (packetchain == 0) {
1318 lck_mtx_lock(rt_mtx);
1319 if ((rte = ro->ro_rt) != NULL)
1320 rtref(rte);
1321 lck_mtx_unlock(rt_mtx);
1322 error = ifnet_output(ifp, PF_INET, m, rte,
1323 (struct sockaddr *)dst);
1324 if (rte != NULL)
1325 rtfree(rte);
1326 goto done;
1327 }
1328 else { /* packet chaining allows us to reuse the route for all packets */
1329 m = m->m_nextpkt;
1330 if (m == NULL) {
1331 if (pktcnt > ip_maxchainsent)
1332 ip_maxchainsent = pktcnt;
1333 lck_mtx_lock(rt_mtx);
1334 if ((rte = ro->ro_rt) != NULL)
1335 rtref(rte);
1336 lck_mtx_unlock(rt_mtx);
1337 //send
1338 error = ifnet_output(ifp, PF_INET, packetlist,
1339 rte, (struct sockaddr *)dst);
1340 if (rte != NULL)
1341 rtfree(rte);
1342 pktcnt = 0;
1343 goto done;
1344
1345 }
1346 m0 = m;
1347 pktcnt++;
1348 goto loopit;
1349 }
1350 }
1351 /*
1352 * Too large for interface; fragment if possible.
1353 * Must be able to put at least 8 bytes per fragment.
1354 */
1355 if (ip->ip_off & IP_DF) {
1356 error = EMSGSIZE;
1357 /*
1358 * This case can happen if the user changed the MTU
1359 * of an interface after enabling IP on it. Because
1360 * most netifs don't keep track of routes pointing to
1361 * them, there is no way for one to update all its
1362 * routes when the MTU is changed.
1363 */
1364
1365 lck_mtx_lock(rt_mtx);
1366 if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1367 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1368 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1369 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1370 }
1371 lck_mtx_unlock(rt_mtx);
1372 OSAddAtomic(1, (SInt32*)&ipstat.ips_cantfrag);
1373 goto bad;
1374 }
1375 len = (ifp->if_mtu - hlen) &~ 7;
1376 if (len < 8) {
1377 error = EMSGSIZE;
1378 goto bad;
1379 }
1380
1381 /*
1382 * if the interface will not calculate checksums on
1383 * fragmented packets, then do it here.
1384 */
1385 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1386 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1387 in_delayed_cksum(m);
1388 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1389 }
1390
1391
1392 {
1393 int mhlen, firstlen = len;
1394 struct mbuf **mnext = &m->m_nextpkt;
1395 int nfrags = 1;
1396
1397 /*
1398 * Loop through length of segment after first fragment,
1399 * make new header and copy data of each part and link onto chain.
1400 */
1401 m0 = m;
1402 mhlen = sizeof (struct ip);
1403 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
1404 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1405 if (m == 0) {
1406 error = ENOBUFS;
1407 OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped);
1408 goto sendorfree;
1409 }
1410 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1411 m->m_data += max_linkhdr;
1412 mhip = mtod(m, struct ip *);
1413 *mhip = *ip;
1414 if (hlen > sizeof (struct ip)) {
1415 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1416 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1417 }
1418 m->m_len = mhlen;
1419 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1420 if (ip->ip_off & IP_MF)
1421 mhip->ip_off |= IP_MF;
1422 if (off + len >= (u_short)ip->ip_len)
1423 len = (u_short)ip->ip_len - off;
1424 else
1425 mhip->ip_off |= IP_MF;
1426 mhip->ip_len = htons((u_short)(len + mhlen));
1427 m->m_next = m_copy(m0, off, len);
1428 if (m->m_next == 0) {
1429 (void) m_free(m);
1430 error = ENOBUFS; /* ??? */
1431 OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped);
1432 goto sendorfree;
1433 }
1434 m->m_pkthdr.len = mhlen + len;
1435 m->m_pkthdr.rcvif = 0;
1436 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
1437 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
1438 #if CONFIG_MACF_NET
1439 mac_netinet_fragment(m0, m);
1440 #endif
1441 HTONS(mhip->ip_off);
1442 mhip->ip_sum = 0;
1443 if (sw_csum & CSUM_DELAY_IP) {
1444 mhip->ip_sum = in_cksum(m, mhlen);
1445 }
1446 *mnext = m;
1447 mnext = &m->m_nextpkt;
1448 nfrags++;
1449 }
1450 OSAddAtomic(nfrags, (SInt32*)&ipstat.ips_ofragments);
1451
1452 /* set first/last markers for fragment chain */
1453 m->m_flags |= M_LASTFRAG;
1454 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1455 m0->m_pkthdr.csum_data = nfrags;
1456
1457 /*
1458 * Update first fragment by trimming what's been copied out
1459 * and updating header, then send each fragment (in order).
1460 */
1461 m = m0;
1462 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1463 m->m_pkthdr.len = hlen + firstlen;
1464 ip->ip_len = htons((u_short)m->m_pkthdr.len);
1465 ip->ip_off |= IP_MF;
1466 HTONS(ip->ip_off);
1467 ip->ip_sum = 0;
1468 if (sw_csum & CSUM_DELAY_IP) {
1469 ip->ip_sum = in_cksum(m, hlen);
1470 }
1471 sendorfree:
1472
1473 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1474 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1475
1476 for (m = m0; m; m = m0) {
1477 m0 = m->m_nextpkt;
1478 m->m_nextpkt = 0;
1479 #if IPSEC
1480 /* clean ipsec history once it goes out of the node */
1481 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1482 ipsec_delaux(m);
1483 #endif
1484 if (error == 0) {
1485 struct rtentry *rte;
1486 #ifndef __APPLE__
1487 /* Record statistics for this interface address. */
1488 if (ia != NULL) {
1489 ia->ia_ifa.if_opackets++;
1490 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1491 }
1492 #endif
1493 if ((packetchain != 0) && (pktcnt > 0))
1494 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
1495 lck_mtx_lock(rt_mtx);
1496 if ((rte = ro->ro_rt) != NULL)
1497 rtref(rte);
1498 lck_mtx_unlock(rt_mtx);
1499 error = ifnet_output(ifp, PF_INET, m, rte,
1500 (struct sockaddr *)dst);
1501 if (rte != NULL)
1502 rtfree(rte);
1503 } else
1504 m_freem(m);
1505 }
1506
1507 if (error == 0)
1508 OSAddAtomic(1, (SInt32*)&ipstat.ips_fragmented);
1509 }
1510 done:
1511 if (ia) {
1512 ifafree(&ia->ia_ifa);
1513 ia = NULL;
1514 }
1515 #if IPSEC
1516 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
1517 if (ro == &iproute && ro->ro_rt) {
1518 rtfree(ro->ro_rt);
1519 ro->ro_rt = NULL;
1520 }
1521 if (sp != NULL) {
1522 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1523 printf("DP ip_output call free SP:%x\n", sp));
1524 key_freesp(sp, KEY_SADB_UNLOCKED);
1525 }
1526 }
1527 #endif /* IPSEC */
1528
1529 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1530 return (error);
1531 bad:
1532 m_freem(m0);
1533 goto done;
1534 }
1535
1536 static void
1537 ip_out_cksum_stats(int proto, u_int32_t len)
1538 {
1539 switch (proto) {
1540 case IPPROTO_TCP:
1541 tcp_out_cksum_stats(len);
1542 break;
1543 case IPPROTO_UDP:
1544 udp_out_cksum_stats(len);
1545 break;
1546 default:
1547 /* keep only TCP or UDP stats for now */
1548 break;
1549 }
1550 }
1551
1552 void
1553 in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
1554 {
1555 struct ip *ip;
1556 unsigned char buf[sizeof(struct ip)];
1557 u_short csum, offset, ip_len;
1558 struct mbuf *m = m0;
1559
1560 while (ip_offset >= m->m_len) {
1561 ip_offset -= m->m_len;
1562 m = m->m_next;
1563 if (m == NULL) {
1564 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
1565 return;
1566 }
1567 }
1568
1569 /* Sometimes the IP header is not contiguous, yes this can happen! */
1570 if (ip_offset + sizeof(struct ip) > m->m_len) {
1571 #if DEBUG
1572 printf("delayed m_pullup, m->len: %ld off: %d\n",
1573 m->m_len, ip_offset);
1574 #endif
1575 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1576
1577 ip = (struct ip *)buf;
1578 } else {
1579 ip = (struct ip*)(m->m_data + ip_offset);
1580 }
1581
1582 /* Gross */
1583 if (ip_offset) {
1584 m->m_len -= ip_offset;
1585 m->m_data += ip_offset;
1586 }
1587
1588 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1589
1590 /*
1591 * We could be in the context of an IP or interface filter; in the
1592 * former case, ip_len would be in host (correct) order while for
1593 * the latter it would be in network order. Because of this, we
1594 * attempt to interpret the length field by comparing it against
1595 * the actual packet length. If the comparison fails, byte swap
1596 * the length and check again. If it still fails, then the packet
1597 * is bogus and we give up.
1598 */
1599 ip_len = ip->ip_len;
1600 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1601 ip_len = SWAP16(ip_len);
1602 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1603 printf("in_delayed_cksum_offset: ip_len %d (%d) "
1604 "doesn't match actual length %d\n", ip->ip_len,
1605 ip_len, (m0->m_pkthdr.len - ip_offset));
1606 return;
1607 }
1608 }
1609
1610 csum = in_cksum_skip(m, ip_len, offset);
1611
1612 /* Update stats */
1613 ip_out_cksum_stats(ip->ip_p, ip_len - offset);
1614
1615 if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1616 csum = 0xffff;
1617 offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1618
1619 /* Gross */
1620 if (ip_offset) {
1621 if (M_LEADINGSPACE(m) < ip_offset)
1622 panic("in_delayed_cksum_offset - chain modified!\n");
1623 m->m_len += ip_offset;
1624 m->m_data -= ip_offset;
1625 }
1626
1627 if (offset > ip_len) /* bogus offset */
1628 return;
1629
1630 /* Insert the checksum in the existing chain */
1631 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
1632 char tmp[2];
1633
1634 #if DEBUG
1635 printf("delayed m_copyback, m->len: %ld off: %d p: %d\n",
1636 m->m_len, offset + ip_offset, ip->ip_p);
1637 #endif
1638 *(u_short *)tmp = csum;
1639 m_copyback(m, offset + ip_offset, 2, tmp);
1640 } else
1641 *(u_short *)(m->m_data + offset + ip_offset) = csum;
1642 }
1643
1644 void
1645 in_delayed_cksum(struct mbuf *m)
1646 {
1647 in_delayed_cksum_offset(m, 0);
1648 }
1649
1650 void
1651 in_cksum_offset(struct mbuf* m, size_t ip_offset)
1652 {
1653 struct ip* ip = NULL;
1654 int hlen = 0;
1655 unsigned char buf[sizeof(struct ip)];
1656 int swapped = 0;
1657
1658 while (ip_offset >= m->m_len) {
1659 ip_offset -= m->m_len;
1660 m = m->m_next;
1661 if (m == NULL) {
1662 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1663 return;
1664 }
1665 }
1666
1667 /* Sometimes the IP header is not contiguous, yes this can happen! */
1668 if (ip_offset + sizeof(struct ip) > m->m_len) {
1669
1670 #if DEBUG
1671 printf("in_cksum_offset - delayed m_pullup, m->len: %ld off: %lu\n",
1672 m->m_len, ip_offset);
1673 #endif
1674 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1675
1676 ip = (struct ip *)buf;
1677 ip->ip_sum = 0;
1678 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum);
1679 } else {
1680 ip = (struct ip*)(m->m_data + ip_offset);
1681 ip->ip_sum = 0;
1682 }
1683
1684 /* Gross */
1685 if (ip_offset) {
1686 m->m_len -= ip_offset;
1687 m->m_data += ip_offset;
1688 }
1689
1690 #ifdef _IP_VHL
1691 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1692 #else
1693 hlen = ip->ip_hl << 2;
1694 #endif
1695 /*
1696 * We could be in the context of an IP or interface filter; in the
1697 * former case, ip_len would be in host order while for the latter
1698 * it would be in network (correct) order. Because of this, we
1699 * attempt to interpret the length field by comparing it against
1700 * the actual packet length. If the comparison fails, byte swap
1701 * the length and check again. If it still fails, then the packet
1702 * is bogus and we give up.
1703 */
1704 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1705 ip->ip_len = SWAP16(ip->ip_len);
1706 swapped = 1;
1707 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1708 ip->ip_len = SWAP16(ip->ip_len);
1709 printf("in_cksum_offset: ip_len %d (%d) "
1710 "doesn't match actual length %lu\n",
1711 ip->ip_len, SWAP16(ip->ip_len),
1712 (m->m_pkthdr.len - ip_offset));
1713 return;
1714 }
1715 }
1716
1717 ip->ip_sum = 0;
1718 ip->ip_sum = in_cksum(m, hlen);
1719 if (swapped)
1720 ip->ip_len = SWAP16(ip->ip_len);
1721
1722 /* Gross */
1723 if (ip_offset) {
1724 if (M_LEADINGSPACE(m) < ip_offset)
1725 panic("in_cksum_offset - chain modified!\n");
1726 m->m_len += ip_offset;
1727 m->m_data -= ip_offset;
1728 }
1729
1730 /* Insert the checksum in the existing chain if IP header not contiguous */
1731 if (ip_offset + sizeof(struct ip) > m->m_len) {
1732 char tmp[2];
1733
1734 #if DEBUG
1735 printf("in_cksum_offset m_copyback, m->len: %lu off: %lu p: %d\n",
1736 m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
1737 #endif
1738 *(u_short *)tmp = ip->ip_sum;
1739 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
1740 }
1741 }
1742
1743 /*
1744 * Insert IP options into preformed packet.
1745 * Adjust IP destination as required for IP source routing,
1746 * as indicated by a non-zero in_addr at the start of the options.
1747 *
1748 * XXX This routine assumes that the packet has no options in place.
1749 */
1750 static struct mbuf *
1751 ip_insertoptions(m, opt, phlen)
1752 register struct mbuf *m;
1753 struct mbuf *opt;
1754 int *phlen;
1755 {
1756 register struct ipoption *p = mtod(opt, struct ipoption *);
1757 struct mbuf *n;
1758 register struct ip *ip = mtod(m, struct ip *);
1759 unsigned optlen;
1760
1761 optlen = opt->m_len - sizeof(p->ipopt_dst);
1762 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1763 return (m); /* XXX should fail */
1764 if (p->ipopt_dst.s_addr)
1765 ip->ip_dst = p->ipopt_dst;
1766 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1767 MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1768 if (n == 0)
1769 return (m);
1770 n->m_pkthdr.rcvif = 0;
1771 #if CONFIG_MACF_NET
1772 mac_mbuf_label_copy(m, n);
1773 #endif
1774 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1775 m->m_len -= sizeof(struct ip);
1776 m->m_data += sizeof(struct ip);
1777 n->m_next = m;
1778 m = n;
1779 m->m_len = optlen + sizeof(struct ip);
1780 m->m_data += max_linkhdr;
1781 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1782 } else {
1783 m->m_data -= optlen;
1784 m->m_len += optlen;
1785 m->m_pkthdr.len += optlen;
1786 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1787 }
1788 ip = mtod(m, struct ip *);
1789 bcopy(p->ipopt_list, ip + 1, optlen);
1790 *phlen = sizeof(struct ip) + optlen;
1791 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1792 ip->ip_len += optlen;
1793 return (m);
1794 }
1795
1796 /*
1797 * Copy options from ip to jp,
1798 * omitting those not copied during fragmentation.
1799 */
1800 int
1801 ip_optcopy(ip, jp)
1802 struct ip *ip, *jp;
1803 {
1804 register u_char *cp, *dp;
1805 int opt, optlen, cnt;
1806
1807 cp = (u_char *)(ip + 1);
1808 dp = (u_char *)(jp + 1);
1809 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1810 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1811 opt = cp[0];
1812 if (opt == IPOPT_EOL)
1813 break;
1814 if (opt == IPOPT_NOP) {
1815 /* Preserve for IP mcast tunnel's LSRR alignment. */
1816 *dp++ = IPOPT_NOP;
1817 optlen = 1;
1818 continue;
1819 }
1820 #if DIAGNOSTIC
1821 if (cnt < IPOPT_OLEN + sizeof(*cp))
1822 panic("malformed IPv4 option passed to ip_optcopy");
1823 #endif
1824 optlen = cp[IPOPT_OLEN];
1825 #if DIAGNOSTIC
1826 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1827 panic("malformed IPv4 option passed to ip_optcopy");
1828 #endif
1829 /* bogus lengths should have been caught by ip_dooptions */
1830 if (optlen > cnt)
1831 optlen = cnt;
1832 if (IPOPT_COPIED(opt)) {
1833 bcopy(cp, dp, optlen);
1834 dp += optlen;
1835 }
1836 }
1837 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1838 *dp++ = IPOPT_EOL;
1839 return (optlen);
1840 }
1841
1842 /*
1843 * IP socket option processing.
1844 */
1845 int
1846 ip_ctloutput(so, sopt)
1847 struct socket *so;
1848 struct sockopt *sopt;
1849 {
1850 struct inpcb *inp = sotoinpcb(so);
1851 int error, optval;
1852
1853 error = optval = 0;
1854 if (sopt->sopt_level != IPPROTO_IP) {
1855 return (EINVAL);
1856 }
1857
1858 switch (sopt->sopt_dir) {
1859 case SOPT_SET:
1860 switch (sopt->sopt_name) {
1861 case IP_OPTIONS:
1862 #ifdef notyet
1863 case IP_RETOPTS:
1864 #endif
1865 {
1866 struct mbuf *m;
1867 if (sopt->sopt_valsize > MLEN) {
1868 error = EMSGSIZE;
1869 break;
1870 }
1871 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1872 if (m == 0) {
1873 error = ENOBUFS;
1874 break;
1875 }
1876 m->m_len = sopt->sopt_valsize;
1877 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1878 m->m_len);
1879 if (error)
1880 break;
1881
1882 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1883 m));
1884 }
1885
1886 case IP_TOS:
1887 case IP_TTL:
1888 case IP_RECVOPTS:
1889 case IP_RECVRETOPTS:
1890 case IP_RECVDSTADDR:
1891 case IP_RECVIF:
1892 case IP_RECVTTL:
1893 #if defined(NFAITH) && NFAITH > 0
1894 case IP_FAITH:
1895 #endif
1896 error = sooptcopyin(sopt, &optval, sizeof optval,
1897 sizeof optval);
1898 if (error)
1899 break;
1900
1901 switch (sopt->sopt_name) {
1902 case IP_TOS:
1903 inp->inp_ip_tos = optval;
1904 break;
1905
1906 case IP_TTL:
1907 inp->inp_ip_ttl = optval;
1908 break;
1909 #define OPTSET(bit) \
1910 if (optval) \
1911 inp->inp_flags |= bit; \
1912 else \
1913 inp->inp_flags &= ~bit;
1914
1915 case IP_RECVOPTS:
1916 OPTSET(INP_RECVOPTS);
1917 break;
1918
1919 case IP_RECVRETOPTS:
1920 OPTSET(INP_RECVRETOPTS);
1921 break;
1922
1923 case IP_RECVDSTADDR:
1924 OPTSET(INP_RECVDSTADDR);
1925 break;
1926
1927 case IP_RECVIF:
1928 OPTSET(INP_RECVIF);
1929 break;
1930
1931 case IP_RECVTTL:
1932 OPTSET(INP_RECVTTL);
1933 break;
1934
1935 #if defined(NFAITH) && NFAITH > 0
1936 case IP_FAITH:
1937 OPTSET(INP_FAITH);
1938 break;
1939 #endif
1940 }
1941 break;
1942 #undef OPTSET
1943
1944 #if CONFIG_FORCE_OUT_IFP
1945 case IP_FORCE_OUT_IFP: {
1946 char ifname[IFNAMSIZ];
1947 ifnet_t ifp;
1948
1949 /* Verify interface name parameter is sane */
1950 if (sopt->sopt_valsize > sizeof(ifname)) {
1951 error = EINVAL;
1952 break;
1953 }
1954
1955 /* Copy the interface name */
1956 if (sopt->sopt_valsize != 0) {
1957 error = sooptcopyin(sopt, ifname, sizeof(ifname), sopt->sopt_valsize);
1958 if (error)
1959 break;
1960 }
1961
1962 if (sopt->sopt_valsize == 0 || ifname[0] == 0) {
1963 // Set pdp_ifp to NULL
1964 inp->pdp_ifp = NULL;
1965
1966 // Flush the route
1967 if (inp->inp_route.ro_rt) {
1968 rtfree(inp->inp_route.ro_rt);
1969 inp->inp_route.ro_rt = NULL;
1970 }
1971
1972 break;
1973 }
1974
1975 /* Verify name is NULL terminated */
1976 if (ifname[sopt->sopt_valsize - 1] != 0) {
1977 error = EINVAL;
1978 break;
1979 }
1980
1981 if (ifnet_find_by_name(ifname, &ifp) != 0) {
1982 error = ENXIO;
1983 break;
1984 }
1985
1986 /* Won't actually free. Since we don't release this later, we should do it now. */
1987 ifnet_release(ifp);
1988
1989 /* This only works for point-to-point interfaces */
1990 if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
1991 error = ENOTSUP;
1992 break;
1993 }
1994
1995 inp->pdp_ifp = ifp;
1996 }
1997 break;
1998 #endif
1999 case IP_MULTICAST_IF:
2000 case IP_MULTICAST_VIF:
2001 case IP_MULTICAST_TTL:
2002 case IP_MULTICAST_LOOP:
2003 case IP_ADD_MEMBERSHIP:
2004 case IP_DROP_MEMBERSHIP:
2005 error = ip_setmoptions(sopt, &inp->inp_moptions);
2006 break;
2007
2008 case IP_PORTRANGE:
2009 error = sooptcopyin(sopt, &optval, sizeof optval,
2010 sizeof optval);
2011 if (error)
2012 break;
2013
2014 switch (optval) {
2015 case IP_PORTRANGE_DEFAULT:
2016 inp->inp_flags &= ~(INP_LOWPORT);
2017 inp->inp_flags &= ~(INP_HIGHPORT);
2018 break;
2019
2020 case IP_PORTRANGE_HIGH:
2021 inp->inp_flags &= ~(INP_LOWPORT);
2022 inp->inp_flags |= INP_HIGHPORT;
2023 break;
2024
2025 case IP_PORTRANGE_LOW:
2026 inp->inp_flags &= ~(INP_HIGHPORT);
2027 inp->inp_flags |= INP_LOWPORT;
2028 break;
2029
2030 default:
2031 error = EINVAL;
2032 break;
2033 }
2034 break;
2035
2036 #if IPSEC
2037 case IP_IPSEC_POLICY:
2038 {
2039 caddr_t req = NULL;
2040 size_t len = 0;
2041 int priv;
2042 struct mbuf *m;
2043 int optname;
2044
2045 if (sopt->sopt_valsize > MCLBYTES) {
2046 error = EMSGSIZE;
2047 break;
2048 }
2049 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
2050 break;
2051 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
2052 break;
2053 priv = (sopt->sopt_p != NULL &&
2054 proc_suser(sopt->sopt_p) != 0) ? 0 : 1;
2055 if (m) {
2056 req = mtod(m, caddr_t);
2057 len = m->m_len;
2058 }
2059 optname = sopt->sopt_name;
2060 error = ipsec4_set_policy(inp, optname, req, len, priv);
2061 m_freem(m);
2062 break;
2063 }
2064 #endif /*IPSEC*/
2065
2066 #if TRAFFIC_MGT
2067 case IP_TRAFFIC_MGT_BACKGROUND:
2068 {
2069 unsigned background = 0;
2070 error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background));
2071 if (error)
2072 break;
2073
2074 if (background)
2075 so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
2076 else
2077 so->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
2078
2079 break;
2080 }
2081 #endif /* TRAFFIC_MGT */
2082
2083 default:
2084 error = ENOPROTOOPT;
2085 break;
2086 }
2087 break;
2088
2089 case SOPT_GET:
2090 switch (sopt->sopt_name) {
2091 case IP_OPTIONS:
2092 case IP_RETOPTS:
2093 if (inp->inp_options)
2094 error = sooptcopyout(sopt,
2095 mtod(inp->inp_options,
2096 char *),
2097 inp->inp_options->m_len);
2098 else
2099 sopt->sopt_valsize = 0;
2100 break;
2101
2102 case IP_TOS:
2103 case IP_TTL:
2104 case IP_RECVOPTS:
2105 case IP_RECVRETOPTS:
2106 case IP_RECVDSTADDR:
2107 case IP_RECVIF:
2108 case IP_RECVTTL:
2109 case IP_PORTRANGE:
2110 #if defined(NFAITH) && NFAITH > 0
2111 case IP_FAITH:
2112 #endif
2113 switch (sopt->sopt_name) {
2114
2115 case IP_TOS:
2116 optval = inp->inp_ip_tos;
2117 break;
2118
2119 case IP_TTL:
2120 optval = inp->inp_ip_ttl;
2121 break;
2122
2123 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2124
2125 case IP_RECVOPTS:
2126 optval = OPTBIT(INP_RECVOPTS);
2127 break;
2128
2129 case IP_RECVRETOPTS:
2130 optval = OPTBIT(INP_RECVRETOPTS);
2131 break;
2132
2133 case IP_RECVDSTADDR:
2134 optval = OPTBIT(INP_RECVDSTADDR);
2135 break;
2136
2137 case IP_RECVIF:
2138 optval = OPTBIT(INP_RECVIF);
2139 break;
2140
2141 case IP_RECVTTL:
2142 optval = OPTBIT(INP_RECVTTL);
2143 break;
2144
2145 case IP_PORTRANGE:
2146 if (inp->inp_flags & INP_HIGHPORT)
2147 optval = IP_PORTRANGE_HIGH;
2148 else if (inp->inp_flags & INP_LOWPORT)
2149 optval = IP_PORTRANGE_LOW;
2150 else
2151 optval = 0;
2152 break;
2153
2154 #if defined(NFAITH) && NFAITH > 0
2155 case IP_FAITH:
2156 optval = OPTBIT(INP_FAITH);
2157 break;
2158 #endif
2159 }
2160 error = sooptcopyout(sopt, &optval, sizeof optval);
2161 break;
2162
2163 case IP_MULTICAST_IF:
2164 case IP_MULTICAST_VIF:
2165 case IP_MULTICAST_TTL:
2166 case IP_MULTICAST_LOOP:
2167 case IP_ADD_MEMBERSHIP:
2168 case IP_DROP_MEMBERSHIP:
2169 error = ip_getmoptions(sopt, inp->inp_moptions);
2170 break;
2171
2172 #if IPSEC
2173 case IP_IPSEC_POLICY:
2174 {
2175 struct mbuf *m = NULL;
2176 caddr_t req = NULL;
2177 size_t len = 0;
2178
2179 if (m != 0) {
2180 req = mtod(m, caddr_t);
2181 len = m->m_len;
2182 }
2183 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
2184 if (error == 0)
2185 error = soopt_mcopyout(sopt, m); /* XXX */
2186 if (error == 0)
2187 m_freem(m);
2188 break;
2189 }
2190 #endif /*IPSEC*/
2191
2192 #if TRAFFIC_MGT
2193 case IP_TRAFFIC_MGT_BACKGROUND:
2194 {
2195 unsigned background = so->so_traffic_mgt_flags;
2196 return (sooptcopyout(sopt, &background, sizeof(background)));
2197 break;
2198 }
2199 #endif /* TRAFFIC_MGT */
2200
2201 default:
2202 error = ENOPROTOOPT;
2203 break;
2204 }
2205 break;
2206 }
2207 return (error);
2208 }
2209
2210 /*
2211 * Set up IP options in pcb for insertion in output packets.
2212 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2213 * with destination address if source routed.
2214 */
2215 static int
2216 ip_pcbopts(
2217 __unused int optname,
2218 struct mbuf **pcbopt,
2219 register struct mbuf *m)
2220 {
2221 register int cnt, optlen;
2222 register u_char *cp;
2223 u_char opt;
2224
2225 /* turn off any old options */
2226 if (*pcbopt)
2227 (void)m_free(*pcbopt);
2228 *pcbopt = 0;
2229 if (m == (struct mbuf *)0 || m->m_len == 0) {
2230 /*
2231 * Only turning off any previous options.
2232 */
2233 if (m)
2234 (void)m_free(m);
2235 return (0);
2236 }
2237
2238 #ifndef vax
2239 if (m->m_len % sizeof(int32_t))
2240 goto bad;
2241 #endif
2242 /*
2243 * IP first-hop destination address will be stored before
2244 * actual options; move other options back
2245 * and clear it when none present.
2246 */
2247 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
2248 goto bad;
2249 cnt = m->m_len;
2250 m->m_len += sizeof(struct in_addr);
2251 cp = mtod(m, u_char *) + sizeof(struct in_addr);
2252 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
2253 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
2254
2255 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2256 opt = cp[IPOPT_OPTVAL];
2257 if (opt == IPOPT_EOL)
2258 break;
2259 if (opt == IPOPT_NOP)
2260 optlen = 1;
2261 else {
2262 if (cnt < IPOPT_OLEN + sizeof(*cp))
2263 goto bad;
2264 optlen = cp[IPOPT_OLEN];
2265 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2266 goto bad;
2267 }
2268 switch (opt) {
2269
2270 default:
2271 break;
2272
2273 case IPOPT_LSRR:
2274 case IPOPT_SSRR:
2275 /*
2276 * user process specifies route as:
2277 * ->A->B->C->D
2278 * D must be our final destination (but we can't
2279 * check that since we may not have connected yet).
2280 * A is first hop destination, which doesn't appear in
2281 * actual IP option, but is stored before the options.
2282 */
2283 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
2284 goto bad;
2285 m->m_len -= sizeof(struct in_addr);
2286 cnt -= sizeof(struct in_addr);
2287 optlen -= sizeof(struct in_addr);
2288 cp[IPOPT_OLEN] = optlen;
2289 /*
2290 * Move first hop before start of options.
2291 */
2292 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
2293 sizeof(struct in_addr));
2294 /*
2295 * Then copy rest of options back
2296 * to close up the deleted entry.
2297 */
2298 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
2299 sizeof(struct in_addr)),
2300 (caddr_t)&cp[IPOPT_OFFSET+1],
2301 (unsigned)cnt + sizeof(struct in_addr));
2302 break;
2303 }
2304 }
2305 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
2306 goto bad;
2307 *pcbopt = m;
2308 return (0);
2309
2310 bad:
2311 (void)m_free(m);
2312 return (EINVAL);
2313 }
2314
2315 /*
2316 * XXX
2317 * The whole multicast option thing needs to be re-thought.
2318 * Several of these options are equally applicable to non-multicast
2319 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
2320 * standard option (IP_TTL).
2321 */
2322
2323 /*
2324 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2325 */
2326 static struct ifnet *
2327 ip_multicast_if(a, ifindexp)
2328 struct in_addr *a;
2329 int *ifindexp;
2330 {
2331 int ifindex;
2332 struct ifnet *ifp;
2333
2334 if (ifindexp)
2335 *ifindexp = 0;
2336 if (ntohl(a->s_addr) >> 24 == 0) {
2337 ifindex = ntohl(a->s_addr) & 0xffffff;
2338 ifnet_head_lock_shared();
2339 if (ifindex < 0 || if_index < ifindex) {
2340 ifnet_head_done();
2341 return NULL;
2342 }
2343 ifp = ifindex2ifnet[ifindex];
2344 ifnet_head_done();
2345 if (ifindexp)
2346 *ifindexp = ifindex;
2347 } else {
2348 INADDR_TO_IFP(*a, ifp);
2349 }
2350 return ifp;
2351 }
2352
2353 /*
2354 * Set the IP multicast options in response to user setsockopt().
2355 */
2356 static int
2357 ip_setmoptions(sopt, imop)
2358 struct sockopt *sopt;
2359 struct ip_moptions **imop;
2360 {
2361 int error = 0;
2362 int i;
2363 struct in_addr addr;
2364 struct ip_mreq mreq;
2365 struct ifnet *ifp = NULL;
2366 struct ip_moptions *imo = *imop;
2367 int ifindex;
2368
2369 if (imo == NULL) {
2370 /*
2371 * No multicast option buffer attached to the pcb;
2372 * allocate one and initialize to default values.
2373 */
2374 error = ip_createmoptions(imop);
2375 if (error != 0)
2376 return error;
2377 imo = *imop;
2378 }
2379
2380 switch (sopt->sopt_name) {
2381 /* store an index number for the vif you wanna use in the send */
2382 #if MROUTING
2383 case IP_MULTICAST_VIF:
2384 if (legal_vif_num == 0) {
2385 error = EOPNOTSUPP;
2386 break;
2387 }
2388 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2389 if (error)
2390 break;
2391 if (!legal_vif_num(i) && (i != -1)) {
2392 error = EINVAL;
2393 break;
2394 }
2395 imo->imo_multicast_vif = i;
2396 break;
2397 #endif /* MROUTING */
2398
2399 case IP_MULTICAST_IF:
2400 /*
2401 * Select the interface for outgoing multicast packets.
2402 */
2403 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
2404 if (error)
2405 break;
2406 /*
2407 * INADDR_ANY is used to remove a previous selection.
2408 * When no interface is selected, a default one is
2409 * chosen every time a multicast packet is sent.
2410 */
2411 if (addr.s_addr == INADDR_ANY) {
2412 imo->imo_multicast_ifp = NULL;
2413 break;
2414 }
2415 /*
2416 * The selected interface is identified by its local
2417 * IP address. Find the interface and confirm that
2418 * it supports multicasting.
2419 */
2420 ifp = ip_multicast_if(&addr, &ifindex);
2421 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2422 error = EADDRNOTAVAIL;
2423 break;
2424 }
2425 imo->imo_multicast_ifp = ifp;
2426 if (ifindex)
2427 imo->imo_multicast_addr = addr;
2428 else
2429 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2430 break;
2431
2432 case IP_MULTICAST_TTL:
2433 /*
2434 * Set the IP time-to-live for outgoing multicast packets.
2435 * The original multicast API required a char argument,
2436 * which is inconsistent with the rest of the socket API.
2437 * We allow either a char or an int.
2438 */
2439 if (sopt->sopt_valsize == 1) {
2440 u_char ttl;
2441 error = sooptcopyin(sopt, &ttl, 1, 1);
2442 if (error)
2443 break;
2444 imo->imo_multicast_ttl = ttl;
2445 } else {
2446 u_int ttl;
2447 error = sooptcopyin(sopt, &ttl, sizeof ttl,
2448 sizeof ttl);
2449 if (error)
2450 break;
2451 if (ttl > 255)
2452 error = EINVAL;
2453 else
2454 imo->imo_multicast_ttl = ttl;
2455 }
2456 break;
2457
2458 case IP_MULTICAST_LOOP:
2459 /*
2460 * Set the loopback flag for outgoing multicast packets.
2461 * Must be zero or one. The original multicast API required a
2462 * char argument, which is inconsistent with the rest
2463 * of the socket API. We allow either a char or an int.
2464 */
2465 if (sopt->sopt_valsize == 1) {
2466 u_char loop;
2467 error = sooptcopyin(sopt, &loop, 1, 1);
2468 if (error)
2469 break;
2470 imo->imo_multicast_loop = !!loop;
2471 } else {
2472 u_int loop;
2473 error = sooptcopyin(sopt, &loop, sizeof loop,
2474 sizeof loop);
2475 if (error)
2476 break;
2477 imo->imo_multicast_loop = !!loop;
2478 }
2479 break;
2480
2481 case IP_ADD_MEMBERSHIP:
2482 /*
2483 * Add a multicast group membership.
2484 * Group must be a valid IP multicast address.
2485 */
2486 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2487 if (error)
2488 break;
2489
2490 error = ip_addmembership(imo, &mreq);
2491 break;
2492
2493 case IP_DROP_MEMBERSHIP:
2494 /*
2495 * Drop a multicast group membership.
2496 * Group must be a valid IP multicast address.
2497 */
2498 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2499 if (error)
2500 break;
2501
2502 error = ip_dropmembership(imo, &mreq);
2503 break;
2504
2505 default:
2506 error = EOPNOTSUPP;
2507 break;
2508 }
2509
2510 /*
2511 * If all options have default values, no need to keep the mbuf.
2512 */
2513 if (imo->imo_multicast_ifp == NULL &&
2514 imo->imo_multicast_vif == (u_long)-1 &&
2515 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2516 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2517 imo->imo_num_memberships == 0) {
2518 FREE(*imop, M_IPMOPTS);
2519 *imop = NULL;
2520 }
2521
2522 return (error);
2523 }
2524
2525 /*
2526 * Set the IP multicast options in response to user setsockopt().
2527 */
2528 __private_extern__ int
2529 ip_createmoptions(
2530 struct ip_moptions **imop)
2531 {
2532 struct ip_moptions *imo;
2533 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
2534 M_WAITOK);
2535
2536 if (imo == NULL)
2537 return (ENOBUFS);
2538 *imop = imo;
2539 imo->imo_multicast_ifp = NULL;
2540 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2541 imo->imo_multicast_vif = -1;
2542 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2543 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2544 imo->imo_num_memberships = 0;
2545
2546 return 0;
2547 }
2548
2549 /*
2550 * Add membership to an IPv4 multicast.
2551 */
2552 __private_extern__ int
2553 ip_addmembership(
2554 struct ip_moptions *imo,
2555 struct ip_mreq *mreq)
2556 {
2557 struct route ro;
2558 struct sockaddr_in *dst;
2559 struct ifnet *ifp = NULL;
2560 int error = 0;
2561 int i;
2562
2563 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2564 error = EINVAL;
2565 return error;
2566 }
2567 /*
2568 * If no interface address was provided, use the interface of
2569 * the route to the given multicast address.
2570 */
2571 if (mreq->imr_interface.s_addr == INADDR_ANY) {
2572 bzero((caddr_t)&ro, sizeof(ro));
2573 dst = (struct sockaddr_in *)&ro.ro_dst;
2574 dst->sin_len = sizeof(*dst);
2575 dst->sin_family = AF_INET;
2576 dst->sin_addr = mreq->imr_multiaddr;
2577 lck_mtx_lock(rt_mtx);
2578 rtalloc_ign_locked(&ro, 0UL);
2579 if (ro.ro_rt != NULL) {
2580 ifp = ro.ro_rt->rt_ifp;
2581 rtfree_locked(ro.ro_rt);
2582 }
2583 else {
2584 /* If there's no default route, try using loopback */
2585 mreq->imr_interface.s_addr = INADDR_LOOPBACK;
2586 }
2587 lck_mtx_unlock(rt_mtx);
2588 }
2589
2590 if (ifp == NULL) {
2591 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2592 }
2593
2594 /*
2595 * See if we found an interface, and confirm that it
2596 * supports multicast.
2597 */
2598 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2599 error = EADDRNOTAVAIL;
2600 return error;
2601 }
2602 /*
2603 * See if the membership already exists or if all the
2604 * membership slots are full.
2605 */
2606 for (i = 0; i < imo->imo_num_memberships; ++i) {
2607 if (imo->imo_membership[i]->inm_ifp == ifp &&
2608 imo->imo_membership[i]->inm_addr.s_addr
2609 == mreq->imr_multiaddr.s_addr)
2610 break;
2611 }
2612 if (i < imo->imo_num_memberships) {
2613 error = EADDRINUSE;
2614 return error;
2615 }
2616 if (i == IP_MAX_MEMBERSHIPS) {
2617 error = ETOOMANYREFS;
2618 return error;
2619 }
2620 /*
2621 * Everything looks good; add a new record to the multicast
2622 * address list for the given interface.
2623 */
2624 if ((imo->imo_membership[i] =
2625 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
2626 error = ENOBUFS;
2627 return error;
2628 }
2629 ++imo->imo_num_memberships;
2630
2631 return error;
2632 }
2633
2634 /*
2635 * Drop membership of an IPv4 multicast.
2636 */
2637 __private_extern__ int
2638 ip_dropmembership(
2639 struct ip_moptions *imo,
2640 struct ip_mreq *mreq)
2641 {
2642 int error = 0;
2643 struct ifnet* ifp = NULL;
2644 int i;
2645
2646 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2647 error = EINVAL;
2648 return error;
2649 }
2650
2651 /*
2652 * If an interface address was specified, get a pointer
2653 * to its ifnet structure.
2654 */
2655 if (mreq->imr_interface.s_addr == INADDR_ANY)
2656 ifp = NULL;
2657 else {
2658 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2659 if (ifp == NULL) {
2660 error = EADDRNOTAVAIL;
2661 return error;
2662 }
2663 }
2664 /*
2665 * Find the membership in the membership array.
2666 */
2667 for (i = 0; i < imo->imo_num_memberships; ++i) {
2668 if ((ifp == NULL ||
2669 imo->imo_membership[i]->inm_ifp == ifp) &&
2670 imo->imo_membership[i]->inm_addr.s_addr ==
2671 mreq->imr_multiaddr.s_addr)
2672 break;
2673 }
2674 if (i == imo->imo_num_memberships) {
2675 error = EADDRNOTAVAIL;
2676 return error;
2677 }
2678 /*
2679 * Give up the multicast address record to which the
2680 * membership points.
2681 */
2682 in_delmulti(&imo->imo_membership[i]);
2683 /*
2684 * Remove the gap in the membership array.
2685 */
2686 for (++i; i < imo->imo_num_memberships; ++i)
2687 imo->imo_membership[i-1] = imo->imo_membership[i];
2688 --imo->imo_num_memberships;
2689
2690 return error;
2691 }
2692
2693 /*
2694 * Return the IP multicast options in response to user getsockopt().
2695 */
2696 static int
2697 ip_getmoptions(sopt, imo)
2698 struct sockopt *sopt;
2699 register struct ip_moptions *imo;
2700 {
2701 struct in_addr addr;
2702 struct in_ifaddr *ia;
2703 int error, optval;
2704 u_char coptval;
2705
2706 error = 0;
2707 switch (sopt->sopt_name) {
2708 #if MROUTING
2709 case IP_MULTICAST_VIF:
2710 if (imo != NULL)
2711 optval = imo->imo_multicast_vif;
2712 else
2713 optval = -1;
2714 error = sooptcopyout(sopt, &optval, sizeof optval);
2715 break;
2716 #endif /* MROUTING */
2717
2718 case IP_MULTICAST_IF:
2719 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2720 addr.s_addr = INADDR_ANY;
2721 else if (imo->imo_multicast_addr.s_addr) {
2722 /* return the value user has set */
2723 addr = imo->imo_multicast_addr;
2724 } else {
2725 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2726 addr.s_addr = (ia == NULL) ? INADDR_ANY
2727 : IA_SIN(ia)->sin_addr.s_addr;
2728 }
2729 error = sooptcopyout(sopt, &addr, sizeof addr);
2730 break;
2731
2732 case IP_MULTICAST_TTL:
2733 if (imo == 0)
2734 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2735 else
2736 optval = coptval = imo->imo_multicast_ttl;
2737 if (sopt->sopt_valsize == 1)
2738 error = sooptcopyout(sopt, &coptval, 1);
2739 else
2740 error = sooptcopyout(sopt, &optval, sizeof optval);
2741 break;
2742
2743 case IP_MULTICAST_LOOP:
2744 if (imo == 0)
2745 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2746 else
2747 optval = coptval = imo->imo_multicast_loop;
2748 if (sopt->sopt_valsize == 1)
2749 error = sooptcopyout(sopt, &coptval, 1);
2750 else
2751 error = sooptcopyout(sopt, &optval, sizeof optval);
2752 break;
2753
2754 default:
2755 error = ENOPROTOOPT;
2756 break;
2757 }
2758 return (error);
2759 }
2760
2761 /*
2762 * Discard the IP multicast options.
2763 */
2764 void
2765 ip_freemoptions(imo)
2766 register struct ip_moptions *imo;
2767 {
2768 register int i;
2769
2770 if (imo != NULL) {
2771 for (i = 0; i < imo->imo_num_memberships; ++i)
2772 in_delmulti(&imo->imo_membership[i]);
2773 FREE(imo, M_IPMOPTS);
2774 }
2775 }
2776
2777 /*
2778 * Routine called from ip_output() to loop back a copy of an IP multicast
2779 * packet to the input queue of a specified interface. Note that this
2780 * calls the output routine of the loopback "driver", but with an interface
2781 * pointer that might NOT be a loopback interface -- evil, but easier than
2782 * replicating that code here.
2783 */
2784 static void
2785 ip_mloopback(ifp, m, dst, hlen)
2786 struct ifnet *ifp;
2787 register struct mbuf *m;
2788 register struct sockaddr_in *dst;
2789 int hlen;
2790 {
2791 register struct ip *ip;
2792 struct mbuf *copym;
2793 int sw_csum = (apple_hwcksum_tx == 0);
2794
2795 copym = m_copy(m, 0, M_COPYALL);
2796 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2797 copym = m_pullup(copym, hlen);
2798
2799 if (copym == NULL)
2800 return;
2801
2802 /*
2803 * We don't bother to fragment if the IP length is greater
2804 * than the interface's MTU. Can this possibly matter?
2805 */
2806 ip = mtod(copym, struct ip *);
2807 HTONS(ip->ip_len);
2808 HTONS(ip->ip_off);
2809 ip->ip_sum = 0;
2810 ip->ip_sum = in_cksum(copym, hlen);
2811 /*
2812 * NB:
2813 * It's not clear whether there are any lingering
2814 * reentrancy problems in other areas which might
2815 * be exposed by using ip_input directly (in
2816 * particular, everything which modifies the packet
2817 * in-place). Yet another option is using the
2818 * protosw directly to deliver the looped back
2819 * packet. For the moment, we'll err on the side
2820 * of safety by using if_simloop().
2821 */
2822 #if 1 /* XXX */
2823 if (dst->sin_family != AF_INET) {
2824 printf("ip_mloopback: bad address family %d\n",
2825 dst->sin_family);
2826 dst->sin_family = AF_INET;
2827 }
2828 #endif
2829
2830 /*
2831 * Mark checksum as valid or calculate checksum for loopback.
2832 *
2833 * This is done this way because we have to embed the ifp of
2834 * the interface we will send the original copy of the packet
2835 * out on in the mbuf. ip_input will check if_hwassist of the
2836 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
2837 * The UDP checksum has not been calculated yet.
2838 */
2839 if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
2840 if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
2841 copym->m_pkthdr.csum_flags |=
2842 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2843 CSUM_IP_CHECKED | CSUM_IP_VALID;
2844 copym->m_pkthdr.csum_data = 0xffff;
2845 } else {
2846 NTOHS(ip->ip_len);
2847 in_delayed_cksum(copym);
2848 HTONS(ip->ip_len);
2849 }
2850 }
2851
2852 /*
2853 * TedW:
2854 * We need to send all loopback traffic down to dlil in case
2855 * a filter has tapped-in.
2856 */
2857
2858 /*
2859 * Stuff the 'real' ifp into the pkthdr, to be used in matching
2860 * in ip_input(); we need the loopback ifp/dl_tag passed as args
2861 * to make the loopback driver compliant with the data link
2862 * requirements.
2863 */
2864 if (lo_ifp) {
2865 copym->m_pkthdr.rcvif = ifp;
2866 dlil_output(lo_ifp, PF_INET, copym, 0,
2867 (struct sockaddr *) dst, 0);
2868 } else {
2869 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
2870 m_freem(copym);
2871 }
2872 }