]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
c065797e73441f6bc823ffbbb070fb48abb4a16e
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #define _IP_VHL
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <kern/locks.h>
81 #include <sys/sysctl.h>
82
83 #include <net/if.h>
84 #include <net/route.h>
85
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #include <netinet/in_pcb.h>
90 #include <netinet/in_var.h>
91 #include <netinet/ip_var.h>
92
93 #include <netinet/kpi_ipfilter_var.h>
94
95 #if CONFIG_MACF_NET
96 #include <security/mac_framework.h>
97 #endif
98
99 #include "faith.h"
100
101 #include <net/dlil.h>
102 #include <sys/kdebug.h>
103 #include <libkern/OSAtomic.h>
104
105 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
106 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
107 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
108 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
109
110 #define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
111
112 #if IPSEC
113 #include <netinet6/ipsec.h>
114 #include <netkey/key.h>
115 #if IPSEC_DEBUG
116 #include <netkey/key_debug.h>
117 #else
118 #define KEYDEBUG(lev,arg)
119 #endif
120 #endif /*IPSEC*/
121
122 #include <netinet/ip_fw.h>
123 #include <netinet/ip_divert.h>
124
125 #if DUMMYNET
126 #include <netinet/ip_dummynet.h>
127 #endif
128
129 #if IPFIREWALL_FORWARD_DEBUG
130 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
131 (ntohl(a.s_addr)>>16)&0xFF,\
132 (ntohl(a.s_addr)>>8)&0xFF,\
133 (ntohl(a.s_addr))&0xFF);
134 #endif
135
136
137 u_short ip_id;
138
139 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
140 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
141 static void ip_mloopback(struct ifnet *, struct mbuf *,
142 struct sockaddr_in *, int);
143 static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
144 static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
145 static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
146
147 static void ip_out_cksum_stats(int, u_int32_t);
148
149 int ip_createmoptions(struct ip_moptions **imop);
150 int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
151 int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
152 int ip_optcopy(struct ip *, struct ip *);
153 void in_delayed_cksum_offset(struct mbuf *, int );
154 void in_cksum_offset(struct mbuf* , size_t );
155
156 extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
157
158 extern u_long route_generation;
159
160 extern struct protosw inetsw[];
161
162 extern struct ip_linklocal_stat ip_linklocal_stat;
163 extern lck_mtx_t *ip_mutex;
164
165 /* temporary: for testing */
166 #if IPSEC
167 extern int ipsec_bypass;
168 #endif
169
170 static int ip_maxchainsent = 0;
171 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
172 &ip_maxchainsent, 0, "use dlil_output_list");
173 #if DEBUG
174 static int forge_ce = 0;
175 SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW,
176 &forge_ce, 0, "Forge ECN CE");
177 #endif /* DEBUG */
178 /*
179 * IP output. The packet in mbuf chain m contains a skeletal IP
180 * header (with len, off, ttl, proto, tos, src, dst).
181 * The mbuf chain containing the packet will be freed.
182 * The mbuf opt, if present, will not be freed.
183 */
184 int
185 ip_output(
186 struct mbuf *m0,
187 struct mbuf *opt,
188 struct route *ro,
189 int flags,
190 struct ip_moptions *imo,
191 struct ifnet *ifp)
192 {
193 int error;
194 error = ip_output_list(m0, 0, opt, ro, flags, imo, ifp);
195 return error;
196 }
197
198 /*
199 * Returns: 0 Success
200 * ENOMEM
201 * EADDRNOTAVAIL
202 * ENETUNREACH
203 * EHOSTUNREACH
204 * EACCES
205 * EMSGSIZE
206 * ENOBUFS
207 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
208 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
209 * key_spdacquire:??? [IPSEC]
210 * ipsec4_output:??? [IPSEC]
211 * <fr_checkp>:??? [firewall]
212 * ip_dn_io_ptr:??? [dummynet]
213 * dlil_output:??? [DLIL]
214 * dlil_output_list:??? [DLIL]
215 *
216 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
217 * only used as the error return from this function where one of
218 * these functions fails to return a policy.
219 */
220 int
221 ip_output_list(
222 struct mbuf *m0,
223 int packetchain,
224 struct mbuf *opt,
225 struct route *ro,
226 int flags,
227 struct ip_moptions *imo,
228 #if CONFIG_FORCE_OUT_IFP
229 struct ifnet *pdp_ifp
230 #else
231 __unused struct ifnet *unused_ifp
232 #endif
233 )
234 {
235 struct ip *ip, *mhip;
236 struct ifnet *ifp = NULL;
237 struct mbuf *m = m0;
238 int hlen = sizeof (struct ip);
239 int len = 0, off, error = 0;
240 struct sockaddr_in *dst = NULL;
241 struct in_ifaddr *ia = NULL;
242 int isbroadcast, sw_csum;
243 struct in_addr pkt_dst;
244 #if IPSEC
245 struct route iproute;
246 struct socket *so = NULL;
247 struct secpolicy *sp = NULL;
248 #endif
249 #if IPFIREWALL_FORWARD
250 int fwd_rewrite_src = 0;
251 #endif
252 struct ip_fw_args args;
253 int didfilter = 0;
254 ipfilter_t inject_filter_ref = 0;
255 struct m_tag *tag;
256 struct route saved_route;
257 struct mbuf * packetlist;
258 int pktcnt = 0;
259
260
261 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
262
263 packetlist = m0;
264 args.next_hop = NULL;
265 #if IPFIREWALL
266 args.eh = NULL;
267 args.rule = NULL;
268 args.divert_rule = 0; /* divert cookie */
269
270 /* Grab info from mtags prepended to the chain */
271 #if DUMMYNET
272 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
273 struct dn_pkt_tag *dn_tag;
274
275 dn_tag = (struct dn_pkt_tag *)(tag+1);
276 args.rule = dn_tag->rule;
277 opt = NULL;
278 saved_route = dn_tag->ro;
279 ro = &saved_route;
280
281 imo = NULL;
282 dst = dn_tag->dn_dst;
283 ifp = dn_tag->ifp;
284 flags = dn_tag->flags;
285
286 m_tag_delete(m0, tag);
287 }
288 #endif /* DUMMYNET */
289
290 #if IPDIVERT
291 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
292 struct divert_tag *div_tag;
293
294 div_tag = (struct divert_tag *)(tag+1);
295 args.divert_rule = div_tag->cookie;
296
297 m_tag_delete(m0, tag);
298 }
299 #endif /* IPDIVERT */
300 #endif /* IPFIREWALL */
301
302 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
303 struct ip_fwd_tag *ipfwd_tag;
304
305 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
306 args.next_hop = ipfwd_tag->next_hop;
307
308 m_tag_delete(m0, tag);
309 }
310
311 m = m0;
312
313 #if DIAGNOSTIC
314 if ( !m || (m->m_flags & M_PKTHDR) != 0)
315 panic("ip_output no HDR");
316 if (!ro)
317 panic("ip_output no route, proto = %d",
318 mtod(m, struct ip *)->ip_p);
319 #endif
320
321 #if IPFIREWALL
322 if (args.rule != NULL) { /* dummynet already saw us */
323 ip = mtod(m, struct ip *);
324 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
325 lck_mtx_lock(rt_mtx);
326 if (ro->ro_rt != NULL)
327 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
328 if (ia)
329 ifaref(&ia->ia_ifa);
330 lck_mtx_unlock(rt_mtx);
331 #if IPSEC
332 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
333 so = ipsec_getsocket(m);
334 (void)ipsec_setsocket(m, NULL);
335 }
336 #endif
337 goto sendit;
338 }
339 #endif /* IPFIREWALL */
340
341 #if IPSEC
342 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
343 so = ipsec_getsocket(m);
344 (void)ipsec_setsocket(m, NULL);
345 }
346 #endif
347 loopit:
348 /*
349 * No need to proccess packet twice if we've
350 * already seen it
351 */
352 inject_filter_ref = ipf_get_inject_filter(m);
353
354 if (opt) {
355 m = ip_insertoptions(m, opt, &len);
356 hlen = len;
357 }
358 ip = mtod(m, struct ip *);
359 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
360
361 /*
362 * Fill in IP header.
363 */
364 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
365 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
366 ip->ip_off &= IP_DF;
367 #if RANDOM_IP_ID
368 ip->ip_id = ip_randomid();
369 #else
370 ip->ip_id = htons(ip_id++);
371 #endif
372 OSAddAtomic(1, (SInt32*)&ipstat.ips_localout);
373 } else {
374 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
375 }
376
377 #if DEBUG
378 /* For debugging, we let the stack forge congestion */
379 if (forge_ce != 0 &&
380 ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 ||
381 (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) {
382 ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE;
383 forge_ce--;
384 }
385 #endif /* DEBUG */
386
387 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
388 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
389
390 dst = (struct sockaddr_in *)&ro->ro_dst;
391
392 /*
393 * If there is a cached route,
394 * check that it is to the same destination
395 * and is still up. If not, free it and try again.
396 * The address family should also be checked in case of sharing the
397 * cache with IPv6.
398 */
399
400 lck_mtx_lock(rt_mtx);
401 if (ro->ro_rt != NULL) {
402 if (ro->ro_rt->generation_id != route_generation &&
403 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) &&
404 (ip->ip_src.s_addr != INADDR_ANY) &&
405 (ifa_foraddr(ip->ip_src.s_addr) == 0)) {
406 error = EADDRNOTAVAIL;
407 lck_mtx_unlock(rt_mtx);
408 goto bad;
409 }
410 if ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
411 dst->sin_family != AF_INET ||
412 dst->sin_addr.s_addr != pkt_dst.s_addr) {
413 rtfree_locked(ro->ro_rt);
414 ro->ro_rt = NULL;
415 }
416 if (ro->ro_rt && ro->ro_rt->generation_id != route_generation)
417 ro->ro_rt->generation_id = route_generation;
418 }
419 if (ro->ro_rt == NULL) {
420 bzero(dst, sizeof(*dst));
421 dst->sin_family = AF_INET;
422 dst->sin_len = sizeof(*dst);
423 dst->sin_addr = pkt_dst;
424 }
425 /*
426 * If routing to interface only,
427 * short circuit routing lookup.
428 */
429 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
430 #define sintosa(sin) ((struct sockaddr *)(sin))
431 if (flags & IP_ROUTETOIF) {
432 if (ia)
433 ifafree(&ia->ia_ifa);
434 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
435 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
436 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
437 error = ENETUNREACH;
438 lck_mtx_unlock(rt_mtx);
439 goto bad;
440 }
441 }
442 ifp = ia->ia_ifp;
443 ip->ip_ttl = 1;
444 isbroadcast = in_broadcast(dst->sin_addr, ifp);
445 } else {
446
447 #if CONFIG_FORCE_OUT_IFP
448 /* Check if this packet should be forced out a specific interface */
449 if (ro->ro_rt == 0 && pdp_ifp != NULL) {
450 pdp_context_route_locked(pdp_ifp, ro);
451
452 if (ro->ro_rt == NULL) {
453 OSAddAtomic(1, (UInt32*)&ipstat.ips_noroute);
454 error = EHOSTUNREACH;
455 lck_mtx_unlock(rt_mtx);
456 goto bad;
457 }
458 }
459 #endif
460
461 /*
462 * If this is the case, we probably don't want to allocate
463 * a protocol-cloned route since we didn't get one from the
464 * ULP. This lets TCP do its thing, while not burdening
465 * forwarding or ICMP with the overhead of cloning a route.
466 * Of course, we still want to do any cloning requested by
467 * the link layer, as this is probably required in all cases
468 * for correct operation (as it is for ARP).
469 */
470
471 if (ro->ro_rt == 0) {
472 unsigned long ign = RTF_PRCLONING;
473 /*
474 * We make an exception here: if the destination
475 * address is INADDR_BROADCAST, allocate a protocol-
476 * cloned host route so that we end up with a route
477 * marked with the RTF_BROADCAST flag. Otherwise,
478 * we would end up referring to the default route,
479 * instead of creating a cloned host route entry.
480 * That would introduce inconsistencies between ULPs
481 * that allocate a route and those that don't. The
482 * RTF_BROADCAST route is important since we'd want
483 * to send out undirected IP broadcast packets using
484 * link-level broadcast address.
485 *
486 * This exception will no longer be necessary when
487 * the RTF_PRCLONING scheme is no longer present.
488 */
489 if (dst->sin_addr.s_addr == INADDR_BROADCAST)
490 ign &= ~RTF_PRCLONING;
491
492 rtalloc_ign_locked(ro, ign);
493 }
494 if (ro->ro_rt == 0) {
495 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
496 error = EHOSTUNREACH;
497 lck_mtx_unlock(rt_mtx);
498 goto bad;
499 }
500
501 if (ia)
502 ifafree(&ia->ia_ifa);
503 ia = ifatoia(ro->ro_rt->rt_ifa);
504 if (ia)
505 ifaref(&ia->ia_ifa);
506 ifp = ro->ro_rt->rt_ifp;
507 ro->ro_rt->rt_use++;
508 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
509 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
510 if (ro->ro_rt->rt_flags & RTF_HOST)
511 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
512 else
513 isbroadcast = in_broadcast(dst->sin_addr, ifp);
514 }
515 lck_mtx_unlock(rt_mtx);
516 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
517 struct in_multi *inm;
518
519 m->m_flags |= M_MCAST;
520 /*
521 * IP destination address is multicast. Make sure "dst"
522 * still points to the address in "ro". (It may have been
523 * changed to point to a gateway address, above.)
524 */
525 dst = (struct sockaddr_in *)&ro->ro_dst;
526 /*
527 * See if the caller provided any multicast options
528 */
529 if (imo != NULL) {
530 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
531 if (imo->imo_multicast_ifp != NULL) {
532 ifp = imo->imo_multicast_ifp;
533 }
534 #if MROUTING
535 if (imo->imo_multicast_vif != -1 &&
536 ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
537 ip->ip_src.s_addr =
538 ip_mcast_src(imo->imo_multicast_vif);
539 #endif /* MROUTING */
540 } else
541 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
542 /*
543 * Confirm that the outgoing interface supports multicast.
544 */
545 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
546 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
547 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
548 error = ENETUNREACH;
549 goto bad;
550 }
551 }
552 /*
553 * If source address not specified yet, use address
554 * of outgoing interface.
555 */
556 if (ip->ip_src.s_addr == INADDR_ANY) {
557 register struct in_ifaddr *ia1;
558 lck_mtx_lock(rt_mtx);
559 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
560 if (ia1->ia_ifp == ifp) {
561 ip->ip_src = IA_SIN(ia1)->sin_addr;
562
563 break;
564 }
565 lck_mtx_unlock(rt_mtx);
566 if (ip->ip_src.s_addr == INADDR_ANY) {
567 error = ENETUNREACH;
568 goto bad;
569 }
570 }
571
572 ifnet_lock_shared(ifp);
573 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
574 ifnet_lock_done(ifp);
575 if (inm != NULL &&
576 (imo == NULL || imo->imo_multicast_loop)) {
577 /*
578 * If we belong to the destination multicast group
579 * on the outgoing interface, and the caller did not
580 * forbid loopback, loop back a copy.
581 */
582 if (!TAILQ_EMPTY(&ipv4_filters)) {
583 struct ipfilter *filter;
584 int seen = (inject_filter_ref == 0);
585 struct ipf_pktopts *ippo = 0, ipf_pktopts;
586
587 if (imo) {
588 ippo = &ipf_pktopts;
589 ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
590 ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
591 ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
592 }
593
594 ipf_ref();
595
596 /* 4135317 - always pass network byte order to filter */
597 HTONS(ip->ip_len);
598 HTONS(ip->ip_off);
599
600 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
601 if (seen == 0) {
602 if ((struct ipfilter *)inject_filter_ref == filter)
603 seen = 1;
604 } else if (filter->ipf_filter.ipf_output) {
605 errno_t result;
606 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
607 if (result == EJUSTRETURN) {
608 ipf_unref();
609 goto done;
610 }
611 if (result != 0) {
612 ipf_unref();
613 goto bad;
614 }
615 }
616 }
617
618 /* set back to host byte order */
619 ip = mtod(m, struct ip *);
620 NTOHS(ip->ip_len);
621 NTOHS(ip->ip_off);
622
623 ipf_unref();
624 didfilter = 1;
625 }
626 ip_mloopback(ifp, m, dst, hlen);
627 }
628 #if MROUTING
629 else {
630 /*
631 * If we are acting as a multicast router, perform
632 * multicast forwarding as if the packet had just
633 * arrived on the interface to which we are about
634 * to send. The multicast forwarding function
635 * recursively calls this function, using the
636 * IP_FORWARDING flag to prevent infinite recursion.
637 *
638 * Multicasts that are looped back by ip_mloopback(),
639 * above, will be forwarded by the ip_input() routine,
640 * if necessary.
641 */
642 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
643 /*
644 * Check if rsvp daemon is running. If not, don't
645 * set ip_moptions. This ensures that the packet
646 * is multicast and not just sent down one link
647 * as prescribed by rsvpd.
648 */
649 if (!rsvp_on)
650 imo = NULL;
651 if (ip_mforward(ip, ifp, m, imo) != 0) {
652 m_freem(m);
653 goto done;
654 }
655 }
656 }
657 #endif /* MROUTING */
658
659 /*
660 * Multicasts with a time-to-live of zero may be looped-
661 * back, above, but must not be transmitted on a network.
662 * Also, multicasts addressed to the loopback interface
663 * are not sent -- the above call to ip_mloopback() will
664 * loop back a copy if this host actually belongs to the
665 * destination group on the loopback interface.
666 */
667 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
668 m_freem(m);
669 goto done;
670 }
671
672 goto sendit;
673 }
674 #ifndef notdef
675 /*
676 * If source address not specified yet, use address
677 * of outgoing interface.
678 */
679 if (ip->ip_src.s_addr == INADDR_ANY) {
680 ip->ip_src = IA_SIN(ia)->sin_addr;
681 #if IPFIREWALL_FORWARD
682 /* Keep note that we did this - if the firewall changes
683 * the next-hop, our interface may change, changing the
684 * default source IP. It's a shame so much effort happens
685 * twice. Oh well.
686 */
687 fwd_rewrite_src++;
688 #endif /* IPFIREWALL_FORWARD */
689 }
690 #endif /* notdef */
691
692 /*
693 * Look for broadcast address and
694 * and verify user is allowed to send
695 * such a packet.
696 */
697 if (isbroadcast) {
698 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
699 error = EADDRNOTAVAIL;
700 goto bad;
701 }
702 if ((flags & IP_ALLOWBROADCAST) == 0) {
703 error = EACCES;
704 goto bad;
705 }
706 /* don't allow broadcast messages to be fragmented */
707 if ((u_short)ip->ip_len > ifp->if_mtu) {
708 error = EMSGSIZE;
709 goto bad;
710 }
711 m->m_flags |= M_BCAST;
712 } else {
713 m->m_flags &= ~M_BCAST;
714 }
715
716 sendit:
717 /*
718 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
719 */
720 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
721 ip_linklocal_stat.iplls_out_total++;
722 if (ip->ip_ttl != MAXTTL) {
723 ip_linklocal_stat.iplls_out_badttl++;
724 ip->ip_ttl = MAXTTL;
725 }
726 }
727
728 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
729 struct ipfilter *filter;
730 int seen = (inject_filter_ref == 0);
731
732 ipf_ref();
733
734 /* 4135317 - always pass network byte order to filter */
735 HTONS(ip->ip_len);
736 HTONS(ip->ip_off);
737
738 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
739 if (seen == 0) {
740 if ((struct ipfilter *)inject_filter_ref == filter)
741 seen = 1;
742 } else if (filter->ipf_filter.ipf_output) {
743 errno_t result;
744 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
745 if (result == EJUSTRETURN) {
746 ipf_unref();
747 goto done;
748 }
749 if (result != 0) {
750 ipf_unref();
751 goto bad;
752 }
753 }
754 }
755
756 /* set back to host byte order */
757 ip = mtod(m, struct ip *);
758 NTOHS(ip->ip_len);
759 NTOHS(ip->ip_off);
760
761 ipf_unref();
762 }
763
764 #if IPSEC
765 /* temporary for testing only: bypass ipsec alltogether */
766
767 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
768 goto skip_ipsec;
769
770 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
771
772
773 /* get SP for this packet */
774 if (so == NULL)
775 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
776 else
777 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
778
779 if (sp == NULL) {
780 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
781 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
782 goto bad;
783 }
784
785 error = 0;
786
787 /* check policy */
788 switch (sp->policy) {
789 case IPSEC_POLICY_DISCARD:
790 case IPSEC_POLICY_GENERATE:
791 /*
792 * This packet is just discarded.
793 */
794 IPSEC_STAT_INCREMENT(ipsecstat.out_polvio);
795 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
796 goto bad;
797
798 case IPSEC_POLICY_BYPASS:
799 case IPSEC_POLICY_NONE:
800 /* no need to do IPsec. */
801 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
802 goto skip_ipsec;
803
804 case IPSEC_POLICY_IPSEC:
805 if (sp->req == NULL) {
806 /* acquire a policy */
807 error = key_spdacquire(sp);
808 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
809 goto bad;
810 }
811 break;
812
813 case IPSEC_POLICY_ENTRUST:
814 default:
815 printf("ip_output: Invalid policy found. %d\n", sp->policy);
816 }
817 {
818 struct ipsec_output_state state;
819 bzero(&state, sizeof(state));
820 state.m = m;
821 if (flags & IP_ROUTETOIF) {
822 state.ro = &iproute;
823 bzero(&iproute, sizeof(iproute));
824 } else
825 state.ro = ro;
826 state.dst = (struct sockaddr *)dst;
827
828 ip->ip_sum = 0;
829
830 /*
831 * XXX
832 * delayed checksums are not currently compatible with IPsec
833 */
834 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
835 in_delayed_cksum(m);
836 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
837 }
838
839 HTONS(ip->ip_len);
840 HTONS(ip->ip_off);
841
842 error = ipsec4_output(&state, sp, flags);
843
844 m0 = m = state.m;
845
846 if (flags & IP_ROUTETOIF) {
847 /*
848 * if we have tunnel mode SA, we may need to ignore
849 * IP_ROUTETOIF.
850 */
851 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
852 flags &= ~IP_ROUTETOIF;
853 ro = state.ro;
854 }
855 } else
856 ro = state.ro;
857
858 dst = (struct sockaddr_in *)state.dst;
859 if (error) {
860 /* mbuf is already reclaimed in ipsec4_output. */
861 m0 = NULL;
862 switch (error) {
863 case EHOSTUNREACH:
864 case ENETUNREACH:
865 case EMSGSIZE:
866 case ENOBUFS:
867 case ENOMEM:
868 break;
869 default:
870 printf("ip4_output (ipsec): error code %d\n", error);
871 /*fall through*/
872 case ENOENT:
873 /* don't show these error codes to the user */
874 error = 0;
875 break;
876 }
877 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
878 goto bad;
879 }
880 }
881
882 /* be sure to update variables that are affected by ipsec4_output() */
883 ip = mtod(m, struct ip *);
884
885 #ifdef _IP_VHL
886 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
887 #else
888 hlen = ip->ip_hl << 2;
889 #endif
890 /* Check that there wasn't a route change and src is still valid */
891
892 lck_mtx_lock(rt_mtx);
893 if (ro->ro_rt && ro->ro_rt->generation_id != route_generation) {
894 if (ifa_foraddr(ip->ip_src.s_addr) == 0 && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
895 error = EADDRNOTAVAIL;
896 lck_mtx_unlock(rt_mtx);
897 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 5,0,0,0,0);
898 goto bad;
899 }
900 rtfree_locked(ro->ro_rt);
901 ro->ro_rt = NULL;
902 }
903
904 if (ro->ro_rt == NULL) {
905 if ((flags & IP_ROUTETOIF) == 0) {
906 printf("ip_output: "
907 "can't update route after IPsec processing\n");
908 error = EHOSTUNREACH; /*XXX*/
909 lck_mtx_unlock(rt_mtx);
910 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 6,0,0,0,0);
911 goto bad;
912 }
913 } else {
914 if (ia)
915 ifafree(&ia->ia_ifa);
916 ia = ifatoia(ro->ro_rt->rt_ifa);
917 if (ia)
918 ifaref(&ia->ia_ifa);
919 ifp = ro->ro_rt->rt_ifp;
920 }
921 lck_mtx_unlock(rt_mtx);
922
923 /* make it flipped, again. */
924 NTOHS(ip->ip_len);
925 NTOHS(ip->ip_off);
926 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
927
928 /* Pass to filters again */
929 if (!TAILQ_EMPTY(&ipv4_filters)) {
930 struct ipfilter *filter;
931
932 ipf_ref();
933
934 /* 4135317 - always pass network byte order to filter */
935 HTONS(ip->ip_len);
936 HTONS(ip->ip_off);
937
938 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
939 if (filter->ipf_filter.ipf_output) {
940 errno_t result;
941 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
942 if (result == EJUSTRETURN) {
943 ipf_unref();
944 goto done;
945 }
946 if (result != 0) {
947 ipf_unref();
948 goto bad;
949 }
950 }
951 }
952
953 /* set back to host byte order */
954 ip = mtod(m, struct ip *);
955 NTOHS(ip->ip_len);
956 NTOHS(ip->ip_off);
957
958 ipf_unref();
959 }
960 skip_ipsec:
961 #endif /*IPSEC*/
962
963 #if IPFIREWALL
964 /*
965 * IpHack's section.
966 * - Xlate: translate packet's addr/port (NAT).
967 * - Firewall: deny/allow/etc.
968 * - Wrap: fake packet's addr/port <unimpl.>
969 * - Encapsulate: put it in another IP and send out. <unimp.>
970 */
971 if (fr_checkp) {
972 struct mbuf *m1 = m;
973
974 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
975 goto done;
976 }
977 ip = mtod(m0 = m = m1, struct ip *);
978 }
979
980 /*
981 * Check with the firewall...
982 * but not if we are already being fwd'd from a firewall.
983 */
984 if (fw_enable && IPFW_LOADED && !args.next_hop) {
985 struct sockaddr_in *old = dst;
986
987 args.m = m;
988 args.next_hop = dst;
989 args.oif = ifp;
990 off = ip_fw_chk_ptr(&args);
991 m = args.m;
992 dst = args.next_hop;
993
994 /*
995 * On return we must do the following:
996 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
997 * 1<=off<= 0xffff -> DIVERT
998 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
999 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1000 * dst != old -> IPFIREWALL_FORWARD
1001 * off==0, dst==old -> accept
1002 * If some of the above modules is not compiled in, then
1003 * we should't have to check the corresponding condition
1004 * (because the ipfw control socket should not accept
1005 * unsupported rules), but better play safe and drop
1006 * packets in case of doubt.
1007 */
1008 m0 = m;
1009 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
1010 if (m)
1011 m_freem(m);
1012 error = EACCES ;
1013 goto done ;
1014 }
1015 ip = mtod(m, struct ip *);
1016
1017 if (off == 0 && dst == old) {/* common case */
1018 goto pass ;
1019 }
1020 #if DUMMYNET
1021 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
1022 /*
1023 * pass the pkt to dummynet. Need to include
1024 * pipe number, m, ifp, ro, dst because these are
1025 * not recomputed in the next pass.
1026 * All other parameters have been already used and
1027 * so they are not needed anymore.
1028 * XXX note: if the ifp or ro entry are deleted
1029 * while a pkt is in dummynet, we are in trouble!
1030 */
1031 args.ro = ro;
1032 args.dst = dst;
1033 args.flags = flags;
1034
1035 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
1036 &args);
1037 goto done;
1038 }
1039 #endif /* DUMMYNET */
1040 #if IPDIVERT
1041 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
1042 struct mbuf *clone = NULL;
1043
1044 /* Clone packet if we're doing a 'tee' */
1045 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
1046 clone = m_dup(m, M_DONTWAIT);
1047 /*
1048 * XXX
1049 * delayed checksums are not currently compatible
1050 * with divert sockets.
1051 */
1052 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1053 in_delayed_cksum(m);
1054 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1055 }
1056
1057 /* Restore packet header fields to original values */
1058 HTONS(ip->ip_len);
1059 HTONS(ip->ip_off);
1060
1061 /* Deliver packet to divert input routine */
1062 divert_packet(m, 0, off & 0xffff, args.divert_rule);
1063
1064 /* If 'tee', continue with original packet */
1065 if (clone != NULL) {
1066 m0 = m = clone;
1067 ip = mtod(m, struct ip *);
1068 goto pass;
1069 }
1070 goto done;
1071 }
1072 #endif
1073
1074 #if IPFIREWALL_FORWARD
1075 /* Here we check dst to make sure it's directly reachable on the
1076 * interface we previously thought it was.
1077 * If it isn't (which may be likely in some situations) we have
1078 * to re-route it (ie, find a route for the next-hop and the
1079 * associated interface) and set them here. This is nested
1080 * forwarding which in most cases is undesirable, except where
1081 * such control is nigh impossible. So we do it here.
1082 * And I'm babbling.
1083 */
1084 if (off == 0 && old != dst) {
1085 struct in_ifaddr *ia_fw;
1086
1087 /* It's changed... */
1088 /* There must be a better way to do this next line... */
1089 static struct route sro_fwd, *ro_fwd = &sro_fwd;
1090 #if IPFIREWALL_FORWARD_DEBUG
1091 printf("IPFIREWALL_FORWARD: New dst ip: ");
1092 print_ip(dst->sin_addr);
1093 printf("\n");
1094 #endif
1095 /*
1096 * We need to figure out if we have been forwarded
1097 * to a local socket. If so then we should somehow
1098 * "loop back" to ip_input, and get directed to the
1099 * PCB as if we had received this packet. This is
1100 * because it may be dificult to identify the packets
1101 * you want to forward until they are being output
1102 * and have selected an interface. (e.g. locally
1103 * initiated packets) If we used the loopback inteface,
1104 * we would not be able to control what happens
1105 * as the packet runs through ip_input() as
1106 * it is done through a ISR.
1107 */
1108 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1109 /*
1110 * If the addr to forward to is one
1111 * of ours, we pretend to
1112 * be the destination for this packet.
1113 */
1114 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
1115 dst->sin_addr.s_addr)
1116 break;
1117 }
1118 if (ia) {
1119 /* tell ip_input "dont filter" */
1120 struct m_tag *fwd_tag;
1121 struct ip_fwd_tag *ipfwd_tag;
1122
1123 fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD,
1124 sizeof(struct sockaddr_in), M_NOWAIT);
1125 if (fwd_tag == NULL) {
1126 error = ENOBUFS;
1127 goto bad;
1128 }
1129
1130 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
1131 ipfwd_tag->next_hop = args.next_hop;
1132
1133 m_tag_prepend(m, fwd_tag);
1134
1135 if (m->m_pkthdr.rcvif == NULL)
1136 m->m_pkthdr.rcvif = ifunit("lo0");
1137 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1138 m->m_pkthdr.csum_flags) == 0) {
1139 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1140 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1141 m->m_pkthdr.csum_flags |=
1142 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1143 m->m_pkthdr.csum_data = 0xffff;
1144 }
1145 m->m_pkthdr.csum_flags |=
1146 CSUM_IP_CHECKED | CSUM_IP_VALID;
1147 }
1148 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1149 in_delayed_cksum(m);
1150 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1151 ip->ip_sum = in_cksum(m, hlen);
1152 }
1153 HTONS(ip->ip_len);
1154 HTONS(ip->ip_off);
1155
1156
1157 /* we need to call dlil_output to run filters
1158 * and resync to avoid recursion loops.
1159 */
1160 if (lo_ifp) {
1161 dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
1162 }
1163 else {
1164 printf("ip_output: no loopback ifp for forwarding!!!\n");
1165 }
1166 goto done;
1167 }
1168 /* Some of the logic for this was
1169 * nicked from above.
1170 *
1171 * This rewrites the cached route in a local PCB.
1172 * Is this what we want to do?
1173 */
1174 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1175
1176 ro_fwd->ro_rt = 0;
1177 lck_mtx_lock(rt_mtx);
1178 rtalloc_ign_locked(ro_fwd, RTF_PRCLONING);
1179
1180 if (ro_fwd->ro_rt == 0) {
1181 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
1182 error = EHOSTUNREACH;
1183 lck_mtx_unlock(rt_mtx);
1184 goto bad;
1185 }
1186
1187 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
1188 ifp = ro_fwd->ro_rt->rt_ifp;
1189 ro_fwd->ro_rt->rt_use++;
1190 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
1191 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
1192 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
1193 isbroadcast =
1194 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
1195 else
1196 isbroadcast = in_broadcast(dst->sin_addr, ifp);
1197 rtfree_locked(ro->ro_rt);
1198 ro->ro_rt = ro_fwd->ro_rt;
1199 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
1200 lck_mtx_unlock(rt_mtx);
1201
1202 /*
1203 * If we added a default src ip earlier,
1204 * which would have been gotten from the-then
1205 * interface, do it again, from the new one.
1206 */
1207 if (fwd_rewrite_src)
1208 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
1209 goto pass ;
1210 }
1211 #endif /* IPFIREWALL_FORWARD */
1212 /*
1213 * if we get here, none of the above matches, and
1214 * we have to drop the pkt
1215 */
1216 m_freem(m);
1217 error = EACCES; /* not sure this is the right error msg */
1218 goto done;
1219 }
1220 #endif /* IPFIREWALL */
1221
1222 pass:
1223 #if __APPLE__
1224 /* Do not allow loopback address to wind up on a wire */
1225 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1226 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1227 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
1228 OSAddAtomic(1, (SInt32*)&ipstat.ips_badaddr);
1229 m_freem(m);
1230 /*
1231 * Do not simply drop the packet just like a firewall -- we want the
1232 * the application to feel the pain.
1233 * Return ENETUNREACH like ip6_output does in some similar cases.
1234 * This can startle the otherwise clueless process that specifies
1235 * loopback as the source address.
1236 */
1237 error = ENETUNREACH;
1238 goto done;
1239 }
1240 #endif
1241 m->m_pkthdr.csum_flags |= CSUM_IP;
1242 sw_csum = m->m_pkthdr.csum_flags
1243 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1244
1245 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1246 /*
1247 * Special case code for GMACE
1248 * frames that can be checksumed by GMACE SUM16 HW:
1249 * frame >64, no fragments, no UDP
1250 */
1251 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1252 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1253 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1254 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1255 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
1256 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
1257 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1258 m->m_pkthdr.csum_data += offset;
1259 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1260 }
1261 else {
1262 /* let the software handle any UDP or TCP checksums */
1263 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1264 }
1265 } else if (apple_hwcksum_tx == 0) {
1266 sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
1267 m->m_pkthdr.csum_flags;
1268 }
1269
1270 if (sw_csum & CSUM_DELAY_DATA) {
1271 in_delayed_cksum(m);
1272 sw_csum &= ~CSUM_DELAY_DATA;
1273 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1274 }
1275
1276 if (apple_hwcksum_tx != 0) {
1277 m->m_pkthdr.csum_flags &=
1278 IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1279 } else {
1280 m->m_pkthdr.csum_flags = 0;
1281 }
1282
1283 /*
1284 * If small enough for interface, or the interface will take
1285 * care of the fragmentation for us, can just send directly.
1286 */
1287 if ((u_short)ip->ip_len <= ifp->if_mtu ||
1288 ifp->if_hwassist & CSUM_FRAGMENT) {
1289 struct rtentry *rte;
1290
1291 HTONS(ip->ip_len);
1292 HTONS(ip->ip_off);
1293 ip->ip_sum = 0;
1294 if (sw_csum & CSUM_DELAY_IP) {
1295 ip->ip_sum = in_cksum(m, hlen);
1296 }
1297
1298 #ifndef __APPLE__
1299 /* Record statistics for this interface address. */
1300 if (!(flags & IP_FORWARDING) && ia != NULL) {
1301 ia->ia_ifa.if_opackets++;
1302 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1303 }
1304 #endif
1305
1306 #if IPSEC
1307 /* clean ipsec history once it goes out of the node */
1308 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1309 ipsec_delaux(m);
1310 #endif
1311 if (packetchain == 0) {
1312 lck_mtx_lock(rt_mtx);
1313 if ((rte = ro->ro_rt) != NULL)
1314 rtref(rte);
1315 lck_mtx_unlock(rt_mtx);
1316 error = ifnet_output(ifp, PF_INET, m, rte,
1317 (struct sockaddr *)dst);
1318 if (rte != NULL)
1319 rtfree(rte);
1320 goto done;
1321 }
1322 else { /* packet chaining allows us to reuse the route for all packets */
1323 m = m->m_nextpkt;
1324 if (m == NULL) {
1325 if (pktcnt > ip_maxchainsent)
1326 ip_maxchainsent = pktcnt;
1327 lck_mtx_lock(rt_mtx);
1328 if ((rte = ro->ro_rt) != NULL)
1329 rtref(rte);
1330 lck_mtx_unlock(rt_mtx);
1331 //send
1332 error = ifnet_output(ifp, PF_INET, packetlist,
1333 rte, (struct sockaddr *)dst);
1334 if (rte != NULL)
1335 rtfree(rte);
1336 pktcnt = 0;
1337 goto done;
1338
1339 }
1340 m0 = m;
1341 pktcnt++;
1342 goto loopit;
1343 }
1344 }
1345 /*
1346 * Too large for interface; fragment if possible.
1347 * Must be able to put at least 8 bytes per fragment.
1348 */
1349 if (ip->ip_off & IP_DF) {
1350 error = EMSGSIZE;
1351 /*
1352 * This case can happen if the user changed the MTU
1353 * of an interface after enabling IP on it. Because
1354 * most netifs don't keep track of routes pointing to
1355 * them, there is no way for one to update all its
1356 * routes when the MTU is changed.
1357 */
1358
1359 lck_mtx_lock(rt_mtx);
1360 if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1361 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1362 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1363 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1364 }
1365 lck_mtx_unlock(rt_mtx);
1366 OSAddAtomic(1, (SInt32*)&ipstat.ips_cantfrag);
1367 goto bad;
1368 }
1369 len = (ifp->if_mtu - hlen) &~ 7;
1370 if (len < 8) {
1371 error = EMSGSIZE;
1372 goto bad;
1373 }
1374
1375 /*
1376 * if the interface will not calculate checksums on
1377 * fragmented packets, then do it here.
1378 */
1379 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1380 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1381 in_delayed_cksum(m);
1382 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1383 }
1384
1385
1386 {
1387 int mhlen, firstlen = len;
1388 struct mbuf **mnext = &m->m_nextpkt;
1389 int nfrags = 1;
1390
1391 /*
1392 * Loop through length of segment after first fragment,
1393 * make new header and copy data of each part and link onto chain.
1394 */
1395 m0 = m;
1396 mhlen = sizeof (struct ip);
1397 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
1398 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1399 if (m == 0) {
1400 error = ENOBUFS;
1401 OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped);
1402 goto sendorfree;
1403 }
1404 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1405 m->m_data += max_linkhdr;
1406 mhip = mtod(m, struct ip *);
1407 *mhip = *ip;
1408 if (hlen > sizeof (struct ip)) {
1409 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1410 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1411 }
1412 m->m_len = mhlen;
1413 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1414 if (ip->ip_off & IP_MF)
1415 mhip->ip_off |= IP_MF;
1416 if (off + len >= (u_short)ip->ip_len)
1417 len = (u_short)ip->ip_len - off;
1418 else
1419 mhip->ip_off |= IP_MF;
1420 mhip->ip_len = htons((u_short)(len + mhlen));
1421 m->m_next = m_copy(m0, off, len);
1422 if (m->m_next == 0) {
1423 (void) m_free(m);
1424 error = ENOBUFS; /* ??? */
1425 OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped);
1426 goto sendorfree;
1427 }
1428 m->m_pkthdr.len = mhlen + len;
1429 m->m_pkthdr.rcvif = 0;
1430 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
1431 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
1432 #if CONFIG_MACF_NET
1433 mac_netinet_fragment(m0, m);
1434 #endif
1435 HTONS(mhip->ip_off);
1436 mhip->ip_sum = 0;
1437 if (sw_csum & CSUM_DELAY_IP) {
1438 mhip->ip_sum = in_cksum(m, mhlen);
1439 }
1440 *mnext = m;
1441 mnext = &m->m_nextpkt;
1442 nfrags++;
1443 }
1444 OSAddAtomic(nfrags, (SInt32*)&ipstat.ips_ofragments);
1445
1446 /* set first/last markers for fragment chain */
1447 m->m_flags |= M_LASTFRAG;
1448 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1449 m0->m_pkthdr.csum_data = nfrags;
1450
1451 /*
1452 * Update first fragment by trimming what's been copied out
1453 * and updating header, then send each fragment (in order).
1454 */
1455 m = m0;
1456 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1457 m->m_pkthdr.len = hlen + firstlen;
1458 ip->ip_len = htons((u_short)m->m_pkthdr.len);
1459 ip->ip_off |= IP_MF;
1460 HTONS(ip->ip_off);
1461 ip->ip_sum = 0;
1462 if (sw_csum & CSUM_DELAY_IP) {
1463 ip->ip_sum = in_cksum(m, hlen);
1464 }
1465 sendorfree:
1466
1467 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1468 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1469
1470 for (m = m0; m; m = m0) {
1471 m0 = m->m_nextpkt;
1472 m->m_nextpkt = 0;
1473 #if IPSEC
1474 /* clean ipsec history once it goes out of the node */
1475 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1476 ipsec_delaux(m);
1477 #endif
1478 if (error == 0) {
1479 struct rtentry *rte;
1480 #ifndef __APPLE__
1481 /* Record statistics for this interface address. */
1482 if (ia != NULL) {
1483 ia->ia_ifa.if_opackets++;
1484 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1485 }
1486 #endif
1487 if ((packetchain != 0) && (pktcnt > 0))
1488 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
1489 lck_mtx_lock(rt_mtx);
1490 if ((rte = ro->ro_rt) != NULL)
1491 rtref(rte);
1492 lck_mtx_unlock(rt_mtx);
1493 error = ifnet_output(ifp, PF_INET, m, rte,
1494 (struct sockaddr *)dst);
1495 if (rte != NULL)
1496 rtfree(rte);
1497 } else
1498 m_freem(m);
1499 }
1500
1501 if (error == 0)
1502 OSAddAtomic(1, (SInt32*)&ipstat.ips_fragmented);
1503 }
1504 done:
1505 if (ia) {
1506 ifafree(&ia->ia_ifa);
1507 ia = NULL;
1508 }
1509 #if IPSEC
1510 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
1511 if (ro == &iproute && ro->ro_rt) {
1512 rtfree(ro->ro_rt);
1513 ro->ro_rt = NULL;
1514 }
1515 if (sp != NULL) {
1516 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1517 printf("DP ip_output call free SP:%x\n", sp));
1518 key_freesp(sp, KEY_SADB_UNLOCKED);
1519 }
1520 }
1521 #endif /* IPSEC */
1522
1523 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1524 return (error);
1525 bad:
1526 m_freem(m0);
1527 goto done;
1528 }
1529
1530 static void
1531 ip_out_cksum_stats(int proto, u_int32_t len)
1532 {
1533 switch (proto) {
1534 case IPPROTO_TCP:
1535 tcp_out_cksum_stats(len);
1536 break;
1537 case IPPROTO_UDP:
1538 udp_out_cksum_stats(len);
1539 break;
1540 default:
1541 /* keep only TCP or UDP stats for now */
1542 break;
1543 }
1544 }
1545
1546 void
1547 in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
1548 {
1549 struct ip *ip;
1550 unsigned char buf[sizeof(struct ip)];
1551 u_short csum, offset, ip_len;
1552 struct mbuf *m = m0;
1553
1554 while (ip_offset >= m->m_len) {
1555 ip_offset -= m->m_len;
1556 m = m->m_next;
1557 if (m == NULL) {
1558 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
1559 return;
1560 }
1561 }
1562
1563 /* Sometimes the IP header is not contiguous, yes this can happen! */
1564 if (ip_offset + sizeof(struct ip) > m->m_len) {
1565 #if DEBUG
1566 printf("delayed m_pullup, m->len: %ld off: %d\n",
1567 m->m_len, ip_offset);
1568 #endif
1569 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1570
1571 ip = (struct ip *)buf;
1572 } else {
1573 ip = (struct ip*)(m->m_data + ip_offset);
1574 }
1575
1576 /* Gross */
1577 if (ip_offset) {
1578 m->m_len -= ip_offset;
1579 m->m_data += ip_offset;
1580 }
1581
1582 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1583
1584 /*
1585 * We could be in the context of an IP or interface filter; in the
1586 * former case, ip_len would be in host (correct) order while for
1587 * the latter it would be in network order. Because of this, we
1588 * attempt to interpret the length field by comparing it against
1589 * the actual packet length. If the comparison fails, byte swap
1590 * the length and check again. If it still fails, then the packet
1591 * is bogus and we give up.
1592 */
1593 ip_len = ip->ip_len;
1594 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1595 ip_len = SWAP16(ip_len);
1596 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1597 printf("in_delayed_cksum_offset: ip_len %d (%d) "
1598 "doesn't match actual length %d\n", ip->ip_len,
1599 ip_len, (m0->m_pkthdr.len - ip_offset));
1600 return;
1601 }
1602 }
1603
1604 csum = in_cksum_skip(m, ip_len, offset);
1605
1606 /* Update stats */
1607 ip_out_cksum_stats(ip->ip_p, ip_len - offset);
1608
1609 if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1610 csum = 0xffff;
1611 offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1612
1613 /* Gross */
1614 if (ip_offset) {
1615 if (M_LEADINGSPACE(m) < ip_offset)
1616 panic("in_delayed_cksum_offset - chain modified!\n");
1617 m->m_len += ip_offset;
1618 m->m_data -= ip_offset;
1619 }
1620
1621 if (offset > ip_len) /* bogus offset */
1622 return;
1623
1624 /* Insert the checksum in the existing chain */
1625 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
1626 char tmp[2];
1627
1628 #if DEBUG
1629 printf("delayed m_copyback, m->len: %ld off: %d p: %d\n",
1630 m->m_len, offset + ip_offset, ip->ip_p);
1631 #endif
1632 *(u_short *)tmp = csum;
1633 m_copyback(m, offset + ip_offset, 2, tmp);
1634 } else
1635 *(u_short *)(m->m_data + offset + ip_offset) = csum;
1636 }
1637
1638 void
1639 in_delayed_cksum(struct mbuf *m)
1640 {
1641 in_delayed_cksum_offset(m, 0);
1642 }
1643
1644 void
1645 in_cksum_offset(struct mbuf* m, size_t ip_offset)
1646 {
1647 struct ip* ip = NULL;
1648 int hlen = 0;
1649 unsigned char buf[sizeof(struct ip)];
1650 int swapped = 0;
1651
1652 while (ip_offset >= m->m_len) {
1653 ip_offset -= m->m_len;
1654 m = m->m_next;
1655 if (m == NULL) {
1656 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1657 return;
1658 }
1659 }
1660
1661 /* Sometimes the IP header is not contiguous, yes this can happen! */
1662 if (ip_offset + sizeof(struct ip) > m->m_len) {
1663
1664 #if DEBUG
1665 printf("in_cksum_offset - delayed m_pullup, m->len: %ld off: %lu\n",
1666 m->m_len, ip_offset);
1667 #endif
1668 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1669
1670 ip = (struct ip *)buf;
1671 ip->ip_sum = 0;
1672 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum);
1673 } else {
1674 ip = (struct ip*)(m->m_data + ip_offset);
1675 ip->ip_sum = 0;
1676 }
1677
1678 /* Gross */
1679 if (ip_offset) {
1680 m->m_len -= ip_offset;
1681 m->m_data += ip_offset;
1682 }
1683
1684 #ifdef _IP_VHL
1685 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1686 #else
1687 hlen = ip->ip_hl << 2;
1688 #endif
1689 /*
1690 * We could be in the context of an IP or interface filter; in the
1691 * former case, ip_len would be in host order while for the latter
1692 * it would be in network (correct) order. Because of this, we
1693 * attempt to interpret the length field by comparing it against
1694 * the actual packet length. If the comparison fails, byte swap
1695 * the length and check again. If it still fails, then the packet
1696 * is bogus and we give up.
1697 */
1698 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1699 ip->ip_len = SWAP16(ip->ip_len);
1700 swapped = 1;
1701 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1702 ip->ip_len = SWAP16(ip->ip_len);
1703 printf("in_cksum_offset: ip_len %d (%d) "
1704 "doesn't match actual length %lu\n",
1705 ip->ip_len, SWAP16(ip->ip_len),
1706 (m->m_pkthdr.len - ip_offset));
1707 return;
1708 }
1709 }
1710
1711 ip->ip_sum = 0;
1712 ip->ip_sum = in_cksum(m, hlen);
1713 if (swapped)
1714 ip->ip_len = SWAP16(ip->ip_len);
1715
1716 /* Gross */
1717 if (ip_offset) {
1718 if (M_LEADINGSPACE(m) < ip_offset)
1719 panic("in_cksum_offset - chain modified!\n");
1720 m->m_len += ip_offset;
1721 m->m_data -= ip_offset;
1722 }
1723
1724 /* Insert the checksum in the existing chain if IP header not contiguous */
1725 if (ip_offset + sizeof(struct ip) > m->m_len) {
1726 char tmp[2];
1727
1728 #if DEBUG
1729 printf("in_cksum_offset m_copyback, m->len: %lu off: %lu p: %d\n",
1730 m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
1731 #endif
1732 *(u_short *)tmp = ip->ip_sum;
1733 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
1734 }
1735 }
1736
1737 /*
1738 * Insert IP options into preformed packet.
1739 * Adjust IP destination as required for IP source routing,
1740 * as indicated by a non-zero in_addr at the start of the options.
1741 *
1742 * XXX This routine assumes that the packet has no options in place.
1743 */
1744 static struct mbuf *
1745 ip_insertoptions(m, opt, phlen)
1746 register struct mbuf *m;
1747 struct mbuf *opt;
1748 int *phlen;
1749 {
1750 register struct ipoption *p = mtod(opt, struct ipoption *);
1751 struct mbuf *n;
1752 register struct ip *ip = mtod(m, struct ip *);
1753 unsigned optlen;
1754
1755 optlen = opt->m_len - sizeof(p->ipopt_dst);
1756 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1757 return (m); /* XXX should fail */
1758 if (p->ipopt_dst.s_addr)
1759 ip->ip_dst = p->ipopt_dst;
1760 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1761 MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1762 if (n == 0)
1763 return (m);
1764 n->m_pkthdr.rcvif = 0;
1765 #if CONFIG_MACF_NET
1766 mac_mbuf_label_copy(m, n);
1767 #endif
1768 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1769 m->m_len -= sizeof(struct ip);
1770 m->m_data += sizeof(struct ip);
1771 n->m_next = m;
1772 m = n;
1773 m->m_len = optlen + sizeof(struct ip);
1774 m->m_data += max_linkhdr;
1775 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1776 } else {
1777 m->m_data -= optlen;
1778 m->m_len += optlen;
1779 m->m_pkthdr.len += optlen;
1780 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1781 }
1782 ip = mtod(m, struct ip *);
1783 bcopy(p->ipopt_list, ip + 1, optlen);
1784 *phlen = sizeof(struct ip) + optlen;
1785 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1786 ip->ip_len += optlen;
1787 return (m);
1788 }
1789
1790 /*
1791 * Copy options from ip to jp,
1792 * omitting those not copied during fragmentation.
1793 */
1794 int
1795 ip_optcopy(ip, jp)
1796 struct ip *ip, *jp;
1797 {
1798 register u_char *cp, *dp;
1799 int opt, optlen, cnt;
1800
1801 cp = (u_char *)(ip + 1);
1802 dp = (u_char *)(jp + 1);
1803 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1804 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1805 opt = cp[0];
1806 if (opt == IPOPT_EOL)
1807 break;
1808 if (opt == IPOPT_NOP) {
1809 /* Preserve for IP mcast tunnel's LSRR alignment. */
1810 *dp++ = IPOPT_NOP;
1811 optlen = 1;
1812 continue;
1813 }
1814 #if DIAGNOSTIC
1815 if (cnt < IPOPT_OLEN + sizeof(*cp))
1816 panic("malformed IPv4 option passed to ip_optcopy");
1817 #endif
1818 optlen = cp[IPOPT_OLEN];
1819 #if DIAGNOSTIC
1820 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1821 panic("malformed IPv4 option passed to ip_optcopy");
1822 #endif
1823 /* bogus lengths should have been caught by ip_dooptions */
1824 if (optlen > cnt)
1825 optlen = cnt;
1826 if (IPOPT_COPIED(opt)) {
1827 bcopy(cp, dp, optlen);
1828 dp += optlen;
1829 }
1830 }
1831 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1832 *dp++ = IPOPT_EOL;
1833 return (optlen);
1834 }
1835
1836 /*
1837 * IP socket option processing.
1838 */
1839 int
1840 ip_ctloutput(so, sopt)
1841 struct socket *so;
1842 struct sockopt *sopt;
1843 {
1844 struct inpcb *inp = sotoinpcb(so);
1845 int error, optval;
1846
1847 error = optval = 0;
1848 if (sopt->sopt_level != IPPROTO_IP) {
1849 return (EINVAL);
1850 }
1851
1852 switch (sopt->sopt_dir) {
1853 case SOPT_SET:
1854 switch (sopt->sopt_name) {
1855 case IP_OPTIONS:
1856 #ifdef notyet
1857 case IP_RETOPTS:
1858 #endif
1859 {
1860 struct mbuf *m;
1861 if (sopt->sopt_valsize > MLEN) {
1862 error = EMSGSIZE;
1863 break;
1864 }
1865 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1866 if (m == 0) {
1867 error = ENOBUFS;
1868 break;
1869 }
1870 m->m_len = sopt->sopt_valsize;
1871 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1872 m->m_len);
1873 if (error)
1874 break;
1875
1876 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1877 m));
1878 }
1879
1880 case IP_TOS:
1881 case IP_TTL:
1882 case IP_RECVOPTS:
1883 case IP_RECVRETOPTS:
1884 case IP_RECVDSTADDR:
1885 case IP_RECVIF:
1886 case IP_RECVTTL:
1887 #if defined(NFAITH) && NFAITH > 0
1888 case IP_FAITH:
1889 #endif
1890 error = sooptcopyin(sopt, &optval, sizeof optval,
1891 sizeof optval);
1892 if (error)
1893 break;
1894
1895 switch (sopt->sopt_name) {
1896 case IP_TOS:
1897 inp->inp_ip_tos = optval;
1898 break;
1899
1900 case IP_TTL:
1901 inp->inp_ip_ttl = optval;
1902 break;
1903 #define OPTSET(bit) \
1904 if (optval) \
1905 inp->inp_flags |= bit; \
1906 else \
1907 inp->inp_flags &= ~bit;
1908
1909 case IP_RECVOPTS:
1910 OPTSET(INP_RECVOPTS);
1911 break;
1912
1913 case IP_RECVRETOPTS:
1914 OPTSET(INP_RECVRETOPTS);
1915 break;
1916
1917 case IP_RECVDSTADDR:
1918 OPTSET(INP_RECVDSTADDR);
1919 break;
1920
1921 case IP_RECVIF:
1922 OPTSET(INP_RECVIF);
1923 break;
1924
1925 case IP_RECVTTL:
1926 OPTSET(INP_RECVTTL);
1927 break;
1928
1929 #if defined(NFAITH) && NFAITH > 0
1930 case IP_FAITH:
1931 OPTSET(INP_FAITH);
1932 break;
1933 #endif
1934 }
1935 break;
1936 #undef OPTSET
1937
1938 #if CONFIG_FORCE_OUT_IFP
1939 case IP_FORCE_OUT_IFP: {
1940 char ifname[IFNAMSIZ];
1941 ifnet_t ifp;
1942
1943 /* Verify interface name parameter is sane */
1944 if (sopt->sopt_valsize > sizeof(ifname)) {
1945 error = EINVAL;
1946 break;
1947 }
1948
1949 /* Copy the interface name */
1950 if (sopt->sopt_valsize != 0) {
1951 error = sooptcopyin(sopt, ifname, sizeof(ifname), sopt->sopt_valsize);
1952 if (error)
1953 break;
1954 }
1955
1956 if (sopt->sopt_valsize == 0 || ifname[0] == 0) {
1957 // Set pdp_ifp to NULL
1958 inp->pdp_ifp = NULL;
1959
1960 // Flush the route
1961 if (inp->inp_route.ro_rt) {
1962 rtfree(inp->inp_route.ro_rt);
1963 inp->inp_route.ro_rt = NULL;
1964 }
1965
1966 break;
1967 }
1968
1969 /* Verify name is NULL terminated */
1970 if (ifname[sopt->sopt_valsize - 1] != 0) {
1971 error = EINVAL;
1972 break;
1973 }
1974
1975 if (ifnet_find_by_name(ifname, &ifp) != 0) {
1976 error = ENXIO;
1977 break;
1978 }
1979
1980 /* Won't actually free. Since we don't release this later, we should do it now. */
1981 ifnet_release(ifp);
1982
1983 /* This only works for point-to-point interfaces */
1984 if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
1985 error = ENOTSUP;
1986 break;
1987 }
1988
1989 inp->pdp_ifp = ifp;
1990 }
1991 break;
1992 #endif
1993 case IP_MULTICAST_IF:
1994 case IP_MULTICAST_VIF:
1995 case IP_MULTICAST_TTL:
1996 case IP_MULTICAST_LOOP:
1997 case IP_ADD_MEMBERSHIP:
1998 case IP_DROP_MEMBERSHIP:
1999 error = ip_setmoptions(sopt, &inp->inp_moptions);
2000 break;
2001
2002 case IP_PORTRANGE:
2003 error = sooptcopyin(sopt, &optval, sizeof optval,
2004 sizeof optval);
2005 if (error)
2006 break;
2007
2008 switch (optval) {
2009 case IP_PORTRANGE_DEFAULT:
2010 inp->inp_flags &= ~(INP_LOWPORT);
2011 inp->inp_flags &= ~(INP_HIGHPORT);
2012 break;
2013
2014 case IP_PORTRANGE_HIGH:
2015 inp->inp_flags &= ~(INP_LOWPORT);
2016 inp->inp_flags |= INP_HIGHPORT;
2017 break;
2018
2019 case IP_PORTRANGE_LOW:
2020 inp->inp_flags &= ~(INP_HIGHPORT);
2021 inp->inp_flags |= INP_LOWPORT;
2022 break;
2023
2024 default:
2025 error = EINVAL;
2026 break;
2027 }
2028 break;
2029
2030 #if IPSEC
2031 case IP_IPSEC_POLICY:
2032 {
2033 caddr_t req = NULL;
2034 size_t len = 0;
2035 int priv;
2036 struct mbuf *m;
2037 int optname;
2038
2039 if (sopt->sopt_valsize > MCLBYTES) {
2040 error = EMSGSIZE;
2041 break;
2042 }
2043 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
2044 break;
2045 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
2046 break;
2047 priv = (sopt->sopt_p != NULL &&
2048 proc_suser(sopt->sopt_p) != 0) ? 0 : 1;
2049 if (m) {
2050 req = mtod(m, caddr_t);
2051 len = m->m_len;
2052 }
2053 optname = sopt->sopt_name;
2054 error = ipsec4_set_policy(inp, optname, req, len, priv);
2055 m_freem(m);
2056 break;
2057 }
2058 #endif /*IPSEC*/
2059
2060 #if TRAFFIC_MGT
2061 case IP_TRAFFIC_MGT_BACKGROUND:
2062 {
2063 unsigned background = 0;
2064 error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background));
2065 if (error)
2066 break;
2067
2068 if (background)
2069 so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
2070 else
2071 so->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
2072
2073 break;
2074 }
2075 #endif /* TRAFFIC_MGT */
2076
2077 default:
2078 error = ENOPROTOOPT;
2079 break;
2080 }
2081 break;
2082
2083 case SOPT_GET:
2084 switch (sopt->sopt_name) {
2085 case IP_OPTIONS:
2086 case IP_RETOPTS:
2087 if (inp->inp_options)
2088 error = sooptcopyout(sopt,
2089 mtod(inp->inp_options,
2090 char *),
2091 inp->inp_options->m_len);
2092 else
2093 sopt->sopt_valsize = 0;
2094 break;
2095
2096 case IP_TOS:
2097 case IP_TTL:
2098 case IP_RECVOPTS:
2099 case IP_RECVRETOPTS:
2100 case IP_RECVDSTADDR:
2101 case IP_RECVIF:
2102 case IP_RECVTTL:
2103 case IP_PORTRANGE:
2104 #if defined(NFAITH) && NFAITH > 0
2105 case IP_FAITH:
2106 #endif
2107 switch (sopt->sopt_name) {
2108
2109 case IP_TOS:
2110 optval = inp->inp_ip_tos;
2111 break;
2112
2113 case IP_TTL:
2114 optval = inp->inp_ip_ttl;
2115 break;
2116
2117 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2118
2119 case IP_RECVOPTS:
2120 optval = OPTBIT(INP_RECVOPTS);
2121 break;
2122
2123 case IP_RECVRETOPTS:
2124 optval = OPTBIT(INP_RECVRETOPTS);
2125 break;
2126
2127 case IP_RECVDSTADDR:
2128 optval = OPTBIT(INP_RECVDSTADDR);
2129 break;
2130
2131 case IP_RECVIF:
2132 optval = OPTBIT(INP_RECVIF);
2133 break;
2134
2135 case IP_RECVTTL:
2136 optval = OPTBIT(INP_RECVTTL);
2137 break;
2138
2139 case IP_PORTRANGE:
2140 if (inp->inp_flags & INP_HIGHPORT)
2141 optval = IP_PORTRANGE_HIGH;
2142 else if (inp->inp_flags & INP_LOWPORT)
2143 optval = IP_PORTRANGE_LOW;
2144 else
2145 optval = 0;
2146 break;
2147
2148 #if defined(NFAITH) && NFAITH > 0
2149 case IP_FAITH:
2150 optval = OPTBIT(INP_FAITH);
2151 break;
2152 #endif
2153 }
2154 error = sooptcopyout(sopt, &optval, sizeof optval);
2155 break;
2156
2157 case IP_MULTICAST_IF:
2158 case IP_MULTICAST_VIF:
2159 case IP_MULTICAST_TTL:
2160 case IP_MULTICAST_LOOP:
2161 case IP_ADD_MEMBERSHIP:
2162 case IP_DROP_MEMBERSHIP:
2163 error = ip_getmoptions(sopt, inp->inp_moptions);
2164 break;
2165
2166 #if IPSEC
2167 case IP_IPSEC_POLICY:
2168 {
2169 struct mbuf *m = NULL;
2170 caddr_t req = NULL;
2171 size_t len = 0;
2172
2173 if (m != 0) {
2174 req = mtod(m, caddr_t);
2175 len = m->m_len;
2176 }
2177 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
2178 if (error == 0)
2179 error = soopt_mcopyout(sopt, m); /* XXX */
2180 if (error == 0)
2181 m_freem(m);
2182 break;
2183 }
2184 #endif /*IPSEC*/
2185
2186 #if TRAFFIC_MGT
2187 case IP_TRAFFIC_MGT_BACKGROUND:
2188 {
2189 unsigned background = so->so_traffic_mgt_flags;
2190 return (sooptcopyout(sopt, &background, sizeof(background)));
2191 break;
2192 }
2193 #endif /* TRAFFIC_MGT */
2194
2195 default:
2196 error = ENOPROTOOPT;
2197 break;
2198 }
2199 break;
2200 }
2201 return (error);
2202 }
2203
2204 /*
2205 * Set up IP options in pcb for insertion in output packets.
2206 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2207 * with destination address if source routed.
2208 */
2209 static int
2210 ip_pcbopts(
2211 __unused int optname,
2212 struct mbuf **pcbopt,
2213 register struct mbuf *m)
2214 {
2215 register int cnt, optlen;
2216 register u_char *cp;
2217 u_char opt;
2218
2219 /* turn off any old options */
2220 if (*pcbopt)
2221 (void)m_free(*pcbopt);
2222 *pcbopt = 0;
2223 if (m == (struct mbuf *)0 || m->m_len == 0) {
2224 /*
2225 * Only turning off any previous options.
2226 */
2227 if (m)
2228 (void)m_free(m);
2229 return (0);
2230 }
2231
2232 #ifndef vax
2233 if (m->m_len % sizeof(int32_t))
2234 goto bad;
2235 #endif
2236 /*
2237 * IP first-hop destination address will be stored before
2238 * actual options; move other options back
2239 * and clear it when none present.
2240 */
2241 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
2242 goto bad;
2243 cnt = m->m_len;
2244 m->m_len += sizeof(struct in_addr);
2245 cp = mtod(m, u_char *) + sizeof(struct in_addr);
2246 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
2247 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
2248
2249 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2250 opt = cp[IPOPT_OPTVAL];
2251 if (opt == IPOPT_EOL)
2252 break;
2253 if (opt == IPOPT_NOP)
2254 optlen = 1;
2255 else {
2256 if (cnt < IPOPT_OLEN + sizeof(*cp))
2257 goto bad;
2258 optlen = cp[IPOPT_OLEN];
2259 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2260 goto bad;
2261 }
2262 switch (opt) {
2263
2264 default:
2265 break;
2266
2267 case IPOPT_LSRR:
2268 case IPOPT_SSRR:
2269 /*
2270 * user process specifies route as:
2271 * ->A->B->C->D
2272 * D must be our final destination (but we can't
2273 * check that since we may not have connected yet).
2274 * A is first hop destination, which doesn't appear in
2275 * actual IP option, but is stored before the options.
2276 */
2277 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
2278 goto bad;
2279 m->m_len -= sizeof(struct in_addr);
2280 cnt -= sizeof(struct in_addr);
2281 optlen -= sizeof(struct in_addr);
2282 cp[IPOPT_OLEN] = optlen;
2283 /*
2284 * Move first hop before start of options.
2285 */
2286 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
2287 sizeof(struct in_addr));
2288 /*
2289 * Then copy rest of options back
2290 * to close up the deleted entry.
2291 */
2292 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
2293 sizeof(struct in_addr)),
2294 (caddr_t)&cp[IPOPT_OFFSET+1],
2295 (unsigned)cnt + sizeof(struct in_addr));
2296 break;
2297 }
2298 }
2299 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
2300 goto bad;
2301 *pcbopt = m;
2302 return (0);
2303
2304 bad:
2305 (void)m_free(m);
2306 return (EINVAL);
2307 }
2308
2309 /*
2310 * XXX
2311 * The whole multicast option thing needs to be re-thought.
2312 * Several of these options are equally applicable to non-multicast
2313 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
2314 * standard option (IP_TTL).
2315 */
2316
2317 /*
2318 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2319 */
2320 static struct ifnet *
2321 ip_multicast_if(a, ifindexp)
2322 struct in_addr *a;
2323 int *ifindexp;
2324 {
2325 int ifindex;
2326 struct ifnet *ifp;
2327
2328 if (ifindexp)
2329 *ifindexp = 0;
2330 if (ntohl(a->s_addr) >> 24 == 0) {
2331 ifindex = ntohl(a->s_addr) & 0xffffff;
2332 ifnet_head_lock_shared();
2333 if (ifindex < 0 || if_index < ifindex) {
2334 ifnet_head_done();
2335 return NULL;
2336 }
2337 ifp = ifindex2ifnet[ifindex];
2338 ifnet_head_done();
2339 if (ifindexp)
2340 *ifindexp = ifindex;
2341 } else {
2342 INADDR_TO_IFP(*a, ifp);
2343 }
2344 return ifp;
2345 }
2346
2347 /*
2348 * Set the IP multicast options in response to user setsockopt().
2349 */
2350 static int
2351 ip_setmoptions(sopt, imop)
2352 struct sockopt *sopt;
2353 struct ip_moptions **imop;
2354 {
2355 int error = 0;
2356 int i;
2357 struct in_addr addr;
2358 struct ip_mreq mreq;
2359 struct ifnet *ifp = NULL;
2360 struct ip_moptions *imo = *imop;
2361 int ifindex;
2362
2363 if (imo == NULL) {
2364 /*
2365 * No multicast option buffer attached to the pcb;
2366 * allocate one and initialize to default values.
2367 */
2368 error = ip_createmoptions(imop);
2369 if (error != 0)
2370 return error;
2371 imo = *imop;
2372 }
2373
2374 switch (sopt->sopt_name) {
2375 /* store an index number for the vif you wanna use in the send */
2376 #if MROUTING
2377 case IP_MULTICAST_VIF:
2378 if (legal_vif_num == 0) {
2379 error = EOPNOTSUPP;
2380 break;
2381 }
2382 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2383 if (error)
2384 break;
2385 if (!legal_vif_num(i) && (i != -1)) {
2386 error = EINVAL;
2387 break;
2388 }
2389 imo->imo_multicast_vif = i;
2390 break;
2391 #endif /* MROUTING */
2392
2393 case IP_MULTICAST_IF:
2394 /*
2395 * Select the interface for outgoing multicast packets.
2396 */
2397 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
2398 if (error)
2399 break;
2400 /*
2401 * INADDR_ANY is used to remove a previous selection.
2402 * When no interface is selected, a default one is
2403 * chosen every time a multicast packet is sent.
2404 */
2405 if (addr.s_addr == INADDR_ANY) {
2406 imo->imo_multicast_ifp = NULL;
2407 break;
2408 }
2409 /*
2410 * The selected interface is identified by its local
2411 * IP address. Find the interface and confirm that
2412 * it supports multicasting.
2413 */
2414 ifp = ip_multicast_if(&addr, &ifindex);
2415 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2416 error = EADDRNOTAVAIL;
2417 break;
2418 }
2419 imo->imo_multicast_ifp = ifp;
2420 if (ifindex)
2421 imo->imo_multicast_addr = addr;
2422 else
2423 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2424 break;
2425
2426 case IP_MULTICAST_TTL:
2427 /*
2428 * Set the IP time-to-live for outgoing multicast packets.
2429 * The original multicast API required a char argument,
2430 * which is inconsistent with the rest of the socket API.
2431 * We allow either a char or an int.
2432 */
2433 if (sopt->sopt_valsize == 1) {
2434 u_char ttl;
2435 error = sooptcopyin(sopt, &ttl, 1, 1);
2436 if (error)
2437 break;
2438 imo->imo_multicast_ttl = ttl;
2439 } else {
2440 u_int ttl;
2441 error = sooptcopyin(sopt, &ttl, sizeof ttl,
2442 sizeof ttl);
2443 if (error)
2444 break;
2445 if (ttl > 255)
2446 error = EINVAL;
2447 else
2448 imo->imo_multicast_ttl = ttl;
2449 }
2450 break;
2451
2452 case IP_MULTICAST_LOOP:
2453 /*
2454 * Set the loopback flag for outgoing multicast packets.
2455 * Must be zero or one. The original multicast API required a
2456 * char argument, which is inconsistent with the rest
2457 * of the socket API. We allow either a char or an int.
2458 */
2459 if (sopt->sopt_valsize == 1) {
2460 u_char loop;
2461 error = sooptcopyin(sopt, &loop, 1, 1);
2462 if (error)
2463 break;
2464 imo->imo_multicast_loop = !!loop;
2465 } else {
2466 u_int loop;
2467 error = sooptcopyin(sopt, &loop, sizeof loop,
2468 sizeof loop);
2469 if (error)
2470 break;
2471 imo->imo_multicast_loop = !!loop;
2472 }
2473 break;
2474
2475 case IP_ADD_MEMBERSHIP:
2476 /*
2477 * Add a multicast group membership.
2478 * Group must be a valid IP multicast address.
2479 */
2480 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2481 if (error)
2482 break;
2483
2484 error = ip_addmembership(imo, &mreq);
2485 break;
2486
2487 case IP_DROP_MEMBERSHIP:
2488 /*
2489 * Drop a multicast group membership.
2490 * Group must be a valid IP multicast address.
2491 */
2492 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2493 if (error)
2494 break;
2495
2496 error = ip_dropmembership(imo, &mreq);
2497 break;
2498
2499 default:
2500 error = EOPNOTSUPP;
2501 break;
2502 }
2503
2504 /*
2505 * If all options have default values, no need to keep the mbuf.
2506 */
2507 if (imo->imo_multicast_ifp == NULL &&
2508 imo->imo_multicast_vif == (u_long)-1 &&
2509 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2510 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2511 imo->imo_num_memberships == 0) {
2512 FREE(*imop, M_IPMOPTS);
2513 *imop = NULL;
2514 }
2515
2516 return (error);
2517 }
2518
2519 /*
2520 * Set the IP multicast options in response to user setsockopt().
2521 */
2522 __private_extern__ int
2523 ip_createmoptions(
2524 struct ip_moptions **imop)
2525 {
2526 struct ip_moptions *imo;
2527 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
2528 M_WAITOK);
2529
2530 if (imo == NULL)
2531 return (ENOBUFS);
2532 *imop = imo;
2533 imo->imo_multicast_ifp = NULL;
2534 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2535 imo->imo_multicast_vif = -1;
2536 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2537 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2538 imo->imo_num_memberships = 0;
2539
2540 return 0;
2541 }
2542
2543 /*
2544 * Add membership to an IPv4 multicast.
2545 */
2546 __private_extern__ int
2547 ip_addmembership(
2548 struct ip_moptions *imo,
2549 struct ip_mreq *mreq)
2550 {
2551 struct route ro;
2552 struct sockaddr_in *dst;
2553 struct ifnet *ifp = NULL;
2554 int error = 0;
2555 int i;
2556
2557 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2558 error = EINVAL;
2559 return error;
2560 }
2561 /*
2562 * If no interface address was provided, use the interface of
2563 * the route to the given multicast address.
2564 */
2565 if (mreq->imr_interface.s_addr == INADDR_ANY) {
2566 bzero((caddr_t)&ro, sizeof(ro));
2567 dst = (struct sockaddr_in *)&ro.ro_dst;
2568 dst->sin_len = sizeof(*dst);
2569 dst->sin_family = AF_INET;
2570 dst->sin_addr = mreq->imr_multiaddr;
2571 lck_mtx_lock(rt_mtx);
2572 rtalloc_ign_locked(&ro, 0UL);
2573 if (ro.ro_rt != NULL) {
2574 ifp = ro.ro_rt->rt_ifp;
2575 rtfree_locked(ro.ro_rt);
2576 }
2577 else {
2578 /* If there's no default route, try using loopback */
2579 mreq->imr_interface.s_addr = INADDR_LOOPBACK;
2580 }
2581 lck_mtx_unlock(rt_mtx);
2582 }
2583
2584 if (ifp == NULL) {
2585 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2586 }
2587
2588 /*
2589 * See if we found an interface, and confirm that it
2590 * supports multicast.
2591 */
2592 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2593 error = EADDRNOTAVAIL;
2594 return error;
2595 }
2596 /*
2597 * See if the membership already exists or if all the
2598 * membership slots are full.
2599 */
2600 for (i = 0; i < imo->imo_num_memberships; ++i) {
2601 if (imo->imo_membership[i]->inm_ifp == ifp &&
2602 imo->imo_membership[i]->inm_addr.s_addr
2603 == mreq->imr_multiaddr.s_addr)
2604 break;
2605 }
2606 if (i < imo->imo_num_memberships) {
2607 error = EADDRINUSE;
2608 return error;
2609 }
2610 if (i == IP_MAX_MEMBERSHIPS) {
2611 error = ETOOMANYREFS;
2612 return error;
2613 }
2614 /*
2615 * Everything looks good; add a new record to the multicast
2616 * address list for the given interface.
2617 */
2618 if ((imo->imo_membership[i] =
2619 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
2620 error = ENOBUFS;
2621 return error;
2622 }
2623 ++imo->imo_num_memberships;
2624
2625 return error;
2626 }
2627
2628 /*
2629 * Drop membership of an IPv4 multicast.
2630 */
2631 __private_extern__ int
2632 ip_dropmembership(
2633 struct ip_moptions *imo,
2634 struct ip_mreq *mreq)
2635 {
2636 int error = 0;
2637 struct ifnet* ifp = NULL;
2638 int i;
2639
2640 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2641 error = EINVAL;
2642 return error;
2643 }
2644
2645 /*
2646 * If an interface address was specified, get a pointer
2647 * to its ifnet structure.
2648 */
2649 if (mreq->imr_interface.s_addr == INADDR_ANY)
2650 ifp = NULL;
2651 else {
2652 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2653 if (ifp == NULL) {
2654 error = EADDRNOTAVAIL;
2655 return error;
2656 }
2657 }
2658 /*
2659 * Find the membership in the membership array.
2660 */
2661 for (i = 0; i < imo->imo_num_memberships; ++i) {
2662 if ((ifp == NULL ||
2663 imo->imo_membership[i]->inm_ifp == ifp) &&
2664 imo->imo_membership[i]->inm_addr.s_addr ==
2665 mreq->imr_multiaddr.s_addr)
2666 break;
2667 }
2668 if (i == imo->imo_num_memberships) {
2669 error = EADDRNOTAVAIL;
2670 return error;
2671 }
2672 /*
2673 * Give up the multicast address record to which the
2674 * membership points.
2675 */
2676 in_delmulti(&imo->imo_membership[i]);
2677 /*
2678 * Remove the gap in the membership array.
2679 */
2680 for (++i; i < imo->imo_num_memberships; ++i)
2681 imo->imo_membership[i-1] = imo->imo_membership[i];
2682 --imo->imo_num_memberships;
2683
2684 return error;
2685 }
2686
2687 /*
2688 * Return the IP multicast options in response to user getsockopt().
2689 */
2690 static int
2691 ip_getmoptions(sopt, imo)
2692 struct sockopt *sopt;
2693 register struct ip_moptions *imo;
2694 {
2695 struct in_addr addr;
2696 struct in_ifaddr *ia;
2697 int error, optval;
2698 u_char coptval;
2699
2700 error = 0;
2701 switch (sopt->sopt_name) {
2702 #if MROUTING
2703 case IP_MULTICAST_VIF:
2704 if (imo != NULL)
2705 optval = imo->imo_multicast_vif;
2706 else
2707 optval = -1;
2708 error = sooptcopyout(sopt, &optval, sizeof optval);
2709 break;
2710 #endif /* MROUTING */
2711
2712 case IP_MULTICAST_IF:
2713 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2714 addr.s_addr = INADDR_ANY;
2715 else if (imo->imo_multicast_addr.s_addr) {
2716 /* return the value user has set */
2717 addr = imo->imo_multicast_addr;
2718 } else {
2719 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2720 addr.s_addr = (ia == NULL) ? INADDR_ANY
2721 : IA_SIN(ia)->sin_addr.s_addr;
2722 }
2723 error = sooptcopyout(sopt, &addr, sizeof addr);
2724 break;
2725
2726 case IP_MULTICAST_TTL:
2727 if (imo == 0)
2728 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2729 else
2730 optval = coptval = imo->imo_multicast_ttl;
2731 if (sopt->sopt_valsize == 1)
2732 error = sooptcopyout(sopt, &coptval, 1);
2733 else
2734 error = sooptcopyout(sopt, &optval, sizeof optval);
2735 break;
2736
2737 case IP_MULTICAST_LOOP:
2738 if (imo == 0)
2739 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2740 else
2741 optval = coptval = imo->imo_multicast_loop;
2742 if (sopt->sopt_valsize == 1)
2743 error = sooptcopyout(sopt, &coptval, 1);
2744 else
2745 error = sooptcopyout(sopt, &optval, sizeof optval);
2746 break;
2747
2748 default:
2749 error = ENOPROTOOPT;
2750 break;
2751 }
2752 return (error);
2753 }
2754
2755 /*
2756 * Discard the IP multicast options.
2757 */
2758 void
2759 ip_freemoptions(imo)
2760 register struct ip_moptions *imo;
2761 {
2762 register int i;
2763
2764 if (imo != NULL) {
2765 for (i = 0; i < imo->imo_num_memberships; ++i)
2766 in_delmulti(&imo->imo_membership[i]);
2767 FREE(imo, M_IPMOPTS);
2768 }
2769 }
2770
2771 /*
2772 * Routine called from ip_output() to loop back a copy of an IP multicast
2773 * packet to the input queue of a specified interface. Note that this
2774 * calls the output routine of the loopback "driver", but with an interface
2775 * pointer that might NOT be a loopback interface -- evil, but easier than
2776 * replicating that code here.
2777 */
2778 static void
2779 ip_mloopback(ifp, m, dst, hlen)
2780 struct ifnet *ifp;
2781 register struct mbuf *m;
2782 register struct sockaddr_in *dst;
2783 int hlen;
2784 {
2785 register struct ip *ip;
2786 struct mbuf *copym;
2787 int sw_csum = (apple_hwcksum_tx == 0);
2788
2789 copym = m_copy(m, 0, M_COPYALL);
2790 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2791 copym = m_pullup(copym, hlen);
2792
2793 if (copym == NULL)
2794 return;
2795
2796 /*
2797 * We don't bother to fragment if the IP length is greater
2798 * than the interface's MTU. Can this possibly matter?
2799 */
2800 ip = mtod(copym, struct ip *);
2801 HTONS(ip->ip_len);
2802 HTONS(ip->ip_off);
2803 ip->ip_sum = 0;
2804 ip->ip_sum = in_cksum(copym, hlen);
2805 /*
2806 * NB:
2807 * It's not clear whether there are any lingering
2808 * reentrancy problems in other areas which might
2809 * be exposed by using ip_input directly (in
2810 * particular, everything which modifies the packet
2811 * in-place). Yet another option is using the
2812 * protosw directly to deliver the looped back
2813 * packet. For the moment, we'll err on the side
2814 * of safety by using if_simloop().
2815 */
2816 #if 1 /* XXX */
2817 if (dst->sin_family != AF_INET) {
2818 printf("ip_mloopback: bad address family %d\n",
2819 dst->sin_family);
2820 dst->sin_family = AF_INET;
2821 }
2822 #endif
2823
2824 /*
2825 * Mark checksum as valid or calculate checksum for loopback.
2826 *
2827 * This is done this way because we have to embed the ifp of
2828 * the interface we will send the original copy of the packet
2829 * out on in the mbuf. ip_input will check if_hwassist of the
2830 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
2831 * The UDP checksum has not been calculated yet.
2832 */
2833 if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
2834 if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
2835 copym->m_pkthdr.csum_flags |=
2836 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2837 CSUM_IP_CHECKED | CSUM_IP_VALID;
2838 copym->m_pkthdr.csum_data = 0xffff;
2839 } else {
2840 NTOHS(ip->ip_len);
2841 in_delayed_cksum(copym);
2842 HTONS(ip->ip_len);
2843 }
2844 }
2845
2846 /*
2847 * TedW:
2848 * We need to send all loopback traffic down to dlil in case
2849 * a filter has tapped-in.
2850 */
2851
2852 /*
2853 * Stuff the 'real' ifp into the pkthdr, to be used in matching
2854 * in ip_input(); we need the loopback ifp/dl_tag passed as args
2855 * to make the loopback driver compliant with the data link
2856 * requirements.
2857 */
2858 if (lo_ifp) {
2859 copym->m_pkthdr.rcvif = ifp;
2860 dlil_output(lo_ifp, PF_INET, copym, 0,
2861 (struct sockaddr *) dst, 0);
2862 } else {
2863 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
2864 m_freem(copym);
2865 }
2866 }