]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
047b6b7ceb3dc00b02fb169ba7ebe0065985931a
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #define _IP_VHL
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <kern/locks.h>
81 #include <sys/sysctl.h>
82
83 #include <net/if.h>
84 #include <net/if_dl.h>
85 #include <net/route.h>
86
87 #include <netinet/in.h>
88 #include <netinet/in_systm.h>
89 #include <netinet/ip.h>
90 #include <netinet/in_pcb.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip_var.h>
93
94 #include <netinet/kpi_ipfilter_var.h>
95
96 #if CONFIG_MACF_NET
97 #include <security/mac_framework.h>
98 #endif
99
100 #include "faith.h"
101
102 #include <net/dlil.h>
103 #include <sys/kdebug.h>
104 #include <libkern/OSAtomic.h>
105
106 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
107 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
108 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
109 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
110
111 #define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
112
113 #if IPSEC
114 #include <netinet6/ipsec.h>
115 #include <netkey/key.h>
116 #if IPSEC_DEBUG
117 #include <netkey/key_debug.h>
118 #else
119 #define KEYDEBUG(lev,arg)
120 #endif
121 #endif /*IPSEC*/
122
123 #include <netinet/ip_fw.h>
124 #include <netinet/ip_divert.h>
125
126 #if DUMMYNET
127 #include <netinet/ip_dummynet.h>
128 #endif
129
130 #if IPFIREWALL_FORWARD_DEBUG
131 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
132 (ntohl(a.s_addr)>>16)&0xFF,\
133 (ntohl(a.s_addr)>>8)&0xFF,\
134 (ntohl(a.s_addr))&0xFF);
135 #endif
136
137
138 u_short ip_id;
139
140 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
141 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
142 static void ip_mloopback(struct ifnet *, struct mbuf *,
143 struct sockaddr_in *, int);
144 static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
145 static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
146 static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
147
148 static void ip_out_cksum_stats(int, u_int32_t);
149 static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int);
150 static void ip_bindif(struct inpcb *, unsigned int);
151
152 int ip_createmoptions(struct ip_moptions **imop);
153 int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
154 int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
155 int ip_optcopy(struct ip *, struct ip *);
156 void in_delayed_cksum_offset(struct mbuf *, int );
157 void in_cksum_offset(struct mbuf* , size_t );
158
159 extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
160
161 extern u_long route_generation;
162
163 extern struct protosw inetsw[];
164
165 extern struct ip_linklocal_stat ip_linklocal_stat;
166 extern lck_mtx_t *ip_mutex;
167
168 /* temporary: for testing */
169 #if IPSEC
170 extern int ipsec_bypass;
171 #endif
172
173 static int ip_maxchainsent = 0;
174 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
175 &ip_maxchainsent, 0, "use dlil_output_list");
176 #if DEBUG
177 static int forge_ce = 0;
178 SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW,
179 &forge_ce, 0, "Forge ECN CE");
180 #endif /* DEBUG */
181
182 static int ip_select_srcif_debug = 0;
183 SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW,
184 &ip_select_srcif_debug, 0, "log source interface selection debug info");
185
186 /*
187 * IP output. The packet in mbuf chain m contains a skeletal IP
188 * header (with len, off, ttl, proto, tos, src, dst).
189 * The mbuf chain containing the packet will be freed.
190 * The mbuf opt, if present, will not be freed.
191 */
192 int
193 ip_output(
194 struct mbuf *m0,
195 struct mbuf *opt,
196 struct route *ro,
197 int flags,
198 struct ip_moptions *imo,
199 struct ip_out_args *ipoa)
200 {
201 int error;
202 error = ip_output_list(m0, 0, opt, ro, flags, imo, ipoa);
203 return error;
204 }
205
206 /*
207 * Returns: 0 Success
208 * ENOMEM
209 * EADDRNOTAVAIL
210 * ENETUNREACH
211 * EHOSTUNREACH
212 * EACCES
213 * EMSGSIZE
214 * ENOBUFS
215 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
216 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
217 * key_spdacquire:??? [IPSEC]
218 * ipsec4_output:??? [IPSEC]
219 * <fr_checkp>:??? [firewall]
220 * ip_dn_io_ptr:??? [dummynet]
221 * dlil_output:??? [DLIL]
222 * dlil_output_list:??? [DLIL]
223 *
224 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
225 * only used as the error return from this function where one of
226 * these functions fails to return a policy.
227 */
228 int
229 ip_output_list(
230 struct mbuf *m0,
231 int packetchain,
232 struct mbuf *opt,
233 struct route *ro,
234 int flags,
235 struct ip_moptions *imo,
236 struct ip_out_args *ipoa
237 )
238 {
239 struct ip *ip, *mhip;
240 struct ifnet *ifp = NULL;
241 struct mbuf *m = m0;
242 int hlen = sizeof (struct ip);
243 int len = 0, off, error = 0;
244 struct sockaddr_in *dst = NULL;
245 struct in_ifaddr *ia = NULL;
246 int isbroadcast, sw_csum;
247 struct in_addr pkt_dst;
248 #if IPSEC
249 struct route iproute;
250 struct socket *so = NULL;
251 struct secpolicy *sp = NULL;
252 #endif
253 #if IPFIREWALL_FORWARD
254 int fwd_rewrite_src = 0;
255 #endif
256 #if IPFIREWALL
257 struct ip_fw_args args;
258 #endif
259 int didfilter = 0;
260 ipfilter_t inject_filter_ref = 0;
261 struct m_tag *tag;
262 struct route saved_route;
263 struct ip_out_args saved_ipoa;
264 struct mbuf * packetlist;
265 int pktcnt = 0;
266 unsigned int ifscope;
267 boolean_t select_srcif;
268
269 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
270
271 packetlist = m0;
272 #if IPFIREWALL
273 args.next_hop = NULL;
274 args.eh = NULL;
275 args.rule = NULL;
276 args.divert_rule = 0; /* divert cookie */
277 args.ipoa = NULL;
278
279 /* Grab info from mtags prepended to the chain */
280 #if DUMMYNET
281 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
282 struct dn_pkt_tag *dn_tag;
283
284 dn_tag = (struct dn_pkt_tag *)(tag+1);
285 args.rule = dn_tag->rule;
286 opt = NULL;
287 saved_route = dn_tag->ro;
288 ro = &saved_route;
289
290 imo = NULL;
291 dst = dn_tag->dn_dst;
292 ifp = dn_tag->ifp;
293 flags = dn_tag->flags;
294 saved_ipoa = dn_tag->ipoa;
295 ipoa = &saved_ipoa;
296
297 m_tag_delete(m0, tag);
298 }
299 #endif /* DUMMYNET */
300
301 #if IPDIVERT
302 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
303 struct divert_tag *div_tag;
304
305 div_tag = (struct divert_tag *)(tag+1);
306 args.divert_rule = div_tag->cookie;
307
308 m_tag_delete(m0, tag);
309 }
310 #endif /* IPDIVERT */
311
312 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
313 struct ip_fwd_tag *ipfwd_tag;
314
315 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
316 args.next_hop = ipfwd_tag->next_hop;
317
318 m_tag_delete(m0, tag);
319 }
320 #endif /* IPFIREWALL */
321
322 m = m0;
323
324 #if DIAGNOSTIC
325 if ( !m || (m->m_flags & M_PKTHDR) != 0)
326 panic("ip_output no HDR");
327 if (!ro)
328 panic("ip_output no route, proto = %d",
329 mtod(m, struct ip *)->ip_p);
330 #endif
331
332 /*
333 * Do not perform source interface selection when forwarding.
334 * At present the IP_OUTARGS flag implies a request for IP to
335 * perform source interface selection.
336 */
337 if (ip_doscopedroute &&
338 (flags & (IP_OUTARGS | IP_FORWARDING)) == IP_OUTARGS) {
339 select_srcif = TRUE;
340 ifscope = ipoa->ipoa_ifscope;
341 } else {
342 select_srcif = FALSE;
343 ifscope = IFSCOPE_NONE;
344 }
345
346 #if IPFIREWALL
347 if (args.rule != NULL) { /* dummynet already saw us */
348 ip = mtod(m, struct ip *);
349 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
350 lck_mtx_lock(rt_mtx);
351 if (ro->ro_rt != NULL)
352 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
353 if (ia)
354 ifaref(&ia->ia_ifa);
355 lck_mtx_unlock(rt_mtx);
356 #if IPSEC
357 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
358 so = ipsec_getsocket(m);
359 (void)ipsec_setsocket(m, NULL);
360 }
361 #endif
362 goto sendit;
363 }
364 #endif /* IPFIREWALL */
365
366 #if IPSEC
367 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
368 so = ipsec_getsocket(m);
369 (void)ipsec_setsocket(m, NULL);
370 }
371 #endif
372 loopit:
373 /*
374 * No need to proccess packet twice if we've
375 * already seen it
376 */
377 inject_filter_ref = ipf_get_inject_filter(m);
378
379 if (opt) {
380 m = ip_insertoptions(m, opt, &len);
381 hlen = len;
382 }
383 ip = mtod(m, struct ip *);
384 #if IPFIREWALL
385 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
386 #else
387 pkt_dst = ip->ip_dst;
388 #endif
389
390 /*
391 * Fill in IP header.
392 */
393 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
394 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
395 ip->ip_off &= IP_DF;
396 #if RANDOM_IP_ID
397 ip->ip_id = ip_randomid();
398 #else
399 ip->ip_id = htons(ip_id++);
400 #endif
401 OSAddAtomic(1, (SInt32*)&ipstat.ips_localout);
402 } else {
403 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
404 }
405
406 #if DEBUG
407 /* For debugging, we let the stack forge congestion */
408 if (forge_ce != 0 &&
409 ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 ||
410 (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) {
411 ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE;
412 forge_ce--;
413 }
414 #endif /* DEBUG */
415
416 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
417 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
418
419 dst = (struct sockaddr_in *)&ro->ro_dst;
420
421 /*
422 * If there is a cached route,
423 * check that it is to the same destination
424 * and is still up. If not, free it and try again.
425 * The address family should also be checked in case of sharing the
426 * cache with IPv6.
427 */
428
429 lck_mtx_lock(rt_mtx);
430 if (ro->ro_rt != NULL) {
431 if (ro->ro_rt->generation_id != route_generation &&
432 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) &&
433 (ip->ip_src.s_addr != INADDR_ANY) &&
434 (ifa_foraddr(ip->ip_src.s_addr) == 0)) {
435 error = EADDRNOTAVAIL;
436 lck_mtx_unlock(rt_mtx);
437 goto bad;
438 }
439 if ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
440 dst->sin_family != AF_INET ||
441 dst->sin_addr.s_addr != pkt_dst.s_addr) {
442 rtfree_locked(ro->ro_rt);
443 ro->ro_rt = NULL;
444 }
445 /*
446 * If we're doing source interface selection, we may not
447 * want to use this route; only synch up the generation
448 * count otherwise.
449 */
450 if (!select_srcif && ro->ro_rt != NULL &&
451 ro->ro_rt->generation_id != route_generation)
452 ro->ro_rt->generation_id = route_generation;
453 }
454 if (ro->ro_rt == NULL) {
455 bzero(dst, sizeof(*dst));
456 dst->sin_family = AF_INET;
457 dst->sin_len = sizeof(*dst);
458 dst->sin_addr = pkt_dst;
459 }
460 /*
461 * If routing to interface only,
462 * short circuit routing lookup.
463 */
464 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
465 #define sintosa(sin) ((struct sockaddr *)(sin))
466 if (flags & IP_ROUTETOIF) {
467 if (ia)
468 ifafree(&ia->ia_ifa);
469 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
470 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
471 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
472 error = ENETUNREACH;
473 lck_mtx_unlock(rt_mtx);
474 goto bad;
475 }
476 }
477 ifp = ia->ia_ifp;
478 ip->ip_ttl = 1;
479 isbroadcast = in_broadcast(dst->sin_addr, ifp);
480 } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) &&
481 imo != NULL && imo->imo_multicast_ifp != NULL) {
482 /*
483 * Bypass the normal routing lookup for multicast
484 * packets if the interface is specified.
485 */
486 ifp = imo->imo_multicast_ifp;
487 isbroadcast = 0;
488 if (ia != NULL)
489 ifafree(&ia->ia_ifa);
490
491 /* Could use IFP_TO_IA instead but rt_mtx is already held */
492 for (ia = TAILQ_FIRST(&in_ifaddrhead);
493 ia != NULL && ia->ia_ifp != ifp;
494 ia = TAILQ_NEXT(ia, ia_link))
495 continue;
496
497 if (ia != NULL)
498 ifaref(&ia->ia_ifa);
499 } else {
500 boolean_t cloneok = FALSE;
501 /*
502 * Perform source interface selection; the source IP address
503 * must belong to one of the addresses of the interface used
504 * by the route. For performance reasons, do this only if
505 * there is no route, or if the routing table has changed,
506 * or if we haven't done source interface selection on this
507 * route (for this PCB instance) before.
508 */
509 if (select_srcif && ip->ip_src.s_addr != INADDR_ANY &&
510 (ro->ro_rt == NULL ||
511 ro->ro_rt->generation_id != route_generation ||
512 !(ro->ro_flags & ROF_SRCIF_SELECTED))) {
513 struct ifaddr *ifa;
514
515 /* Find the source interface */
516 ifa = in_selectsrcif(ip, ro, ifscope);
517
518 /*
519 * If the source address is spoofed (in the case
520 * of IP_RAWOUTPUT), or if this is destined for
521 * local/loopback, just let it go out using the
522 * interface of the route. Otherwise, there's no
523 * interface having such an address, so bail out.
524 */
525 if (ifa == NULL && !(flags & IP_RAWOUTPUT) &&
526 ifscope != lo_ifp->if_index) {
527 error = EADDRNOTAVAIL;
528 lck_mtx_unlock(rt_mtx);
529 goto bad;
530 }
531
532 /*
533 * If the caller didn't explicitly specify the scope,
534 * pick it up from the source interface. If the cached
535 * route was wrong and was blown away as part of source
536 * interface selection, don't mask out RTF_PRCLONING
537 * since that route may have been allocated by the ULP,
538 * unless the IP header was created by the caller or
539 * the destination is IPv4 LLA. The check for the
540 * latter is needed because IPv4 LLAs are never scoped
541 * in the current implementation, and we don't want to
542 * replace the resolved IPv4 LLA route with one whose
543 * gateway points to that of the default gateway on
544 * the primary interface of the system.
545 */
546 if (ifa != NULL) {
547 if (ifscope == IFSCOPE_NONE)
548 ifscope = ifa->ifa_ifp->if_index;
549 ifafree(ifa);
550 cloneok = (!(flags & IP_RAWOUTPUT) &&
551 !(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))));
552 }
553 }
554
555 /*
556 * If this is the case, we probably don't want to allocate
557 * a protocol-cloned route since we didn't get one from the
558 * ULP. This lets TCP do its thing, while not burdening
559 * forwarding or ICMP with the overhead of cloning a route.
560 * Of course, we still want to do any cloning requested by
561 * the link layer, as this is probably required in all cases
562 * for correct operation (as it is for ARP).
563 */
564 if (ro->ro_rt == NULL) {
565 unsigned long ign = RTF_PRCLONING;
566 /*
567 * We make an exception here: if the destination
568 * address is INADDR_BROADCAST, allocate a protocol-
569 * cloned host route so that we end up with a route
570 * marked with the RTF_BROADCAST flag. Otherwise,
571 * we would end up referring to the default route,
572 * instead of creating a cloned host route entry.
573 * That would introduce inconsistencies between ULPs
574 * that allocate a route and those that don't. The
575 * RTF_BROADCAST route is important since we'd want
576 * to send out undirected IP broadcast packets using
577 * link-level broadcast address. Another exception
578 * is for ULP-created routes that got blown away by
579 * source interface selection (see above).
580 *
581 * These exceptions will no longer be necessary when
582 * the RTF_PRCLONING scheme is no longer present.
583 */
584 if (cloneok || dst->sin_addr.s_addr == INADDR_BROADCAST)
585 ign &= ~RTF_PRCLONING;
586
587 rtalloc_scoped_ign_locked(ro, ign, ifscope);
588 }
589
590 if (ro->ro_rt == NULL) {
591 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
592 error = EHOSTUNREACH;
593 lck_mtx_unlock(rt_mtx);
594 goto bad;
595 }
596
597 if (ia)
598 ifafree(&ia->ia_ifa);
599 ia = ifatoia(ro->ro_rt->rt_ifa);
600 if (ia)
601 ifaref(&ia->ia_ifa);
602 ifp = ro->ro_rt->rt_ifp;
603 ro->ro_rt->rt_use++;
604 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
605 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
606 if (ro->ro_rt->rt_flags & RTF_HOST)
607 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
608 else
609 isbroadcast = in_broadcast(dst->sin_addr, ifp);
610 }
611 lck_mtx_unlock(rt_mtx);
612 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
613 struct in_multi *inm;
614
615 m->m_flags |= M_MCAST;
616 /*
617 * IP destination address is multicast. Make sure "dst"
618 * still points to the address in "ro". (It may have been
619 * changed to point to a gateway address, above.)
620 */
621 dst = (struct sockaddr_in *)&ro->ro_dst;
622 /*
623 * See if the caller provided any multicast options
624 */
625 if (imo != NULL) {
626 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
627 if (imo->imo_multicast_ifp != NULL) {
628 ifp = imo->imo_multicast_ifp;
629 }
630 #if MROUTING
631 if (imo->imo_multicast_vif != -1 &&
632 ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
633 ip->ip_src.s_addr =
634 ip_mcast_src(imo->imo_multicast_vif);
635 #endif /* MROUTING */
636 } else
637 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
638 /*
639 * Confirm that the outgoing interface supports multicast.
640 */
641 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
642 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
643 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
644 error = ENETUNREACH;
645 goto bad;
646 }
647 }
648 /*
649 * If source address not specified yet, use address
650 * of outgoing interface.
651 */
652 if (ip->ip_src.s_addr == INADDR_ANY) {
653 register struct in_ifaddr *ia1;
654 lck_mtx_lock(rt_mtx);
655 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
656 if (ia1->ia_ifp == ifp) {
657 ip->ip_src = IA_SIN(ia1)->sin_addr;
658
659 break;
660 }
661 lck_mtx_unlock(rt_mtx);
662 if (ip->ip_src.s_addr == INADDR_ANY) {
663 error = ENETUNREACH;
664 goto bad;
665 }
666 }
667
668 ifnet_lock_shared(ifp);
669 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
670 ifnet_lock_done(ifp);
671 if (inm != NULL &&
672 (imo == NULL || imo->imo_multicast_loop)) {
673 /*
674 * If we belong to the destination multicast group
675 * on the outgoing interface, and the caller did not
676 * forbid loopback, loop back a copy.
677 */
678 if (!TAILQ_EMPTY(&ipv4_filters)) {
679 struct ipfilter *filter;
680 int seen = (inject_filter_ref == 0);
681 struct ipf_pktopts *ippo = 0, ipf_pktopts;
682
683 if (imo) {
684 ippo = &ipf_pktopts;
685 ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
686 ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
687 ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
688 }
689
690 ipf_ref();
691
692 /* 4135317 - always pass network byte order to filter */
693 HTONS(ip->ip_len);
694 HTONS(ip->ip_off);
695
696 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
697 if (seen == 0) {
698 if ((struct ipfilter *)inject_filter_ref == filter)
699 seen = 1;
700 } else if (filter->ipf_filter.ipf_output) {
701 errno_t result;
702 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
703 if (result == EJUSTRETURN) {
704 ipf_unref();
705 goto done;
706 }
707 if (result != 0) {
708 ipf_unref();
709 goto bad;
710 }
711 }
712 }
713
714 /* set back to host byte order */
715 ip = mtod(m, struct ip *);
716 NTOHS(ip->ip_len);
717 NTOHS(ip->ip_off);
718
719 ipf_unref();
720 didfilter = 1;
721 }
722 ip_mloopback(ifp, m, dst, hlen);
723 }
724 #if MROUTING
725 else {
726 /*
727 * If we are acting as a multicast router, perform
728 * multicast forwarding as if the packet had just
729 * arrived on the interface to which we are about
730 * to send. The multicast forwarding function
731 * recursively calls this function, using the
732 * IP_FORWARDING flag to prevent infinite recursion.
733 *
734 * Multicasts that are looped back by ip_mloopback(),
735 * above, will be forwarded by the ip_input() routine,
736 * if necessary.
737 */
738 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
739 /*
740 * Check if rsvp daemon is running. If not, don't
741 * set ip_moptions. This ensures that the packet
742 * is multicast and not just sent down one link
743 * as prescribed by rsvpd.
744 */
745 if (!rsvp_on)
746 imo = NULL;
747 if (ip_mforward(ip, ifp, m, imo) != 0) {
748 m_freem(m);
749 goto done;
750 }
751 }
752 }
753 #endif /* MROUTING */
754
755 /*
756 * Multicasts with a time-to-live of zero may be looped-
757 * back, above, but must not be transmitted on a network.
758 * Also, multicasts addressed to the loopback interface
759 * are not sent -- the above call to ip_mloopback() will
760 * loop back a copy if this host actually belongs to the
761 * destination group on the loopback interface.
762 */
763 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
764 m_freem(m);
765 goto done;
766 }
767
768 goto sendit;
769 }
770 #ifndef notdef
771 /*
772 * If source address not specified yet, use address
773 * of outgoing interface.
774 */
775 if (ip->ip_src.s_addr == INADDR_ANY) {
776 ip->ip_src = IA_SIN(ia)->sin_addr;
777 #if IPFIREWALL_FORWARD
778 /* Keep note that we did this - if the firewall changes
779 * the next-hop, our interface may change, changing the
780 * default source IP. It's a shame so much effort happens
781 * twice. Oh well.
782 */
783 fwd_rewrite_src++;
784 #endif /* IPFIREWALL_FORWARD */
785 }
786 #endif /* notdef */
787
788 /*
789 * Look for broadcast address and
790 * and verify user is allowed to send
791 * such a packet.
792 */
793 if (isbroadcast) {
794 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
795 error = EADDRNOTAVAIL;
796 goto bad;
797 }
798 if ((flags & IP_ALLOWBROADCAST) == 0) {
799 error = EACCES;
800 goto bad;
801 }
802 /* don't allow broadcast messages to be fragmented */
803 if ((u_short)ip->ip_len > ifp->if_mtu) {
804 error = EMSGSIZE;
805 goto bad;
806 }
807 m->m_flags |= M_BCAST;
808 } else {
809 m->m_flags &= ~M_BCAST;
810 }
811
812 sendit:
813 /*
814 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
815 */
816 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
817 ip_linklocal_stat.iplls_out_total++;
818 if (ip->ip_ttl != MAXTTL) {
819 ip_linklocal_stat.iplls_out_badttl++;
820 ip->ip_ttl = MAXTTL;
821 }
822 }
823
824 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
825 struct ipfilter *filter;
826 int seen = (inject_filter_ref == 0);
827
828 ipf_ref();
829
830 /* 4135317 - always pass network byte order to filter */
831 HTONS(ip->ip_len);
832 HTONS(ip->ip_off);
833
834 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
835 if (seen == 0) {
836 if ((struct ipfilter *)inject_filter_ref == filter)
837 seen = 1;
838 } else if (filter->ipf_filter.ipf_output) {
839 errno_t result;
840 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
841 if (result == EJUSTRETURN) {
842 ipf_unref();
843 goto done;
844 }
845 if (result != 0) {
846 ipf_unref();
847 goto bad;
848 }
849 }
850 }
851
852 /* set back to host byte order */
853 ip = mtod(m, struct ip *);
854 NTOHS(ip->ip_len);
855 NTOHS(ip->ip_off);
856
857 ipf_unref();
858 }
859
860 #if IPSEC
861 /* temporary for testing only: bypass ipsec alltogether */
862
863 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
864 goto skip_ipsec;
865
866 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
867
868
869 /* get SP for this packet */
870 if (so == NULL)
871 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
872 else
873 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
874
875 if (sp == NULL) {
876 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
877 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
878 goto bad;
879 }
880
881 error = 0;
882
883 /* check policy */
884 switch (sp->policy) {
885 case IPSEC_POLICY_DISCARD:
886 case IPSEC_POLICY_GENERATE:
887 /*
888 * This packet is just discarded.
889 */
890 IPSEC_STAT_INCREMENT(ipsecstat.out_polvio);
891 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
892 goto bad;
893
894 case IPSEC_POLICY_BYPASS:
895 case IPSEC_POLICY_NONE:
896 /* no need to do IPsec. */
897 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
898 goto skip_ipsec;
899
900 case IPSEC_POLICY_IPSEC:
901 if (sp->req == NULL) {
902 /* acquire a policy */
903 error = key_spdacquire(sp);
904 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
905 goto bad;
906 }
907 break;
908
909 case IPSEC_POLICY_ENTRUST:
910 default:
911 printf("ip_output: Invalid policy found. %d\n", sp->policy);
912 }
913 {
914 struct ipsec_output_state state;
915 bzero(&state, sizeof(state));
916 state.m = m;
917 if (flags & IP_ROUTETOIF) {
918 state.ro = &iproute;
919 bzero(&iproute, sizeof(iproute));
920 } else
921 state.ro = ro;
922 state.dst = (struct sockaddr *)dst;
923
924 ip->ip_sum = 0;
925
926 /*
927 * XXX
928 * delayed checksums are not currently compatible with IPsec
929 */
930 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
931 in_delayed_cksum(m);
932 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
933 }
934
935 HTONS(ip->ip_len);
936 HTONS(ip->ip_off);
937
938 error = ipsec4_output(&state, sp, flags);
939
940 m0 = m = state.m;
941
942 if (flags & IP_ROUTETOIF) {
943 /*
944 * if we have tunnel mode SA, we may need to ignore
945 * IP_ROUTETOIF.
946 */
947 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
948 flags &= ~IP_ROUTETOIF;
949 ro = state.ro;
950 }
951 } else
952 ro = state.ro;
953
954 dst = (struct sockaddr_in *)state.dst;
955 if (error) {
956 /* mbuf is already reclaimed in ipsec4_output. */
957 m0 = NULL;
958 switch (error) {
959 case EHOSTUNREACH:
960 case ENETUNREACH:
961 case EMSGSIZE:
962 case ENOBUFS:
963 case ENOMEM:
964 break;
965 default:
966 printf("ip4_output (ipsec): error code %d\n", error);
967 /*fall through*/
968 case ENOENT:
969 /* don't show these error codes to the user */
970 error = 0;
971 break;
972 }
973 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
974 goto bad;
975 }
976 }
977
978 /* be sure to update variables that are affected by ipsec4_output() */
979 ip = mtod(m, struct ip *);
980
981 #ifdef _IP_VHL
982 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
983 #else
984 hlen = ip->ip_hl << 2;
985 #endif
986 /* Check that there wasn't a route change and src is still valid */
987
988 lck_mtx_lock(rt_mtx);
989 if (ro->ro_rt && ro->ro_rt->generation_id != route_generation) {
990 if (ifa_foraddr(ip->ip_src.s_addr) == 0 && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
991 error = EADDRNOTAVAIL;
992 lck_mtx_unlock(rt_mtx);
993 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 5,0,0,0,0);
994 goto bad;
995 }
996 rtfree_locked(ro->ro_rt);
997 ro->ro_rt = NULL;
998 }
999
1000 if (ro->ro_rt == NULL) {
1001 if ((flags & IP_ROUTETOIF) == 0) {
1002 printf("ip_output: "
1003 "can't update route after IPsec processing\n");
1004 error = EHOSTUNREACH; /*XXX*/
1005 lck_mtx_unlock(rt_mtx);
1006 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 6,0,0,0,0);
1007 goto bad;
1008 }
1009 } else {
1010 if (ia)
1011 ifafree(&ia->ia_ifa);
1012 ia = ifatoia(ro->ro_rt->rt_ifa);
1013 if (ia)
1014 ifaref(&ia->ia_ifa);
1015 ifp = ro->ro_rt->rt_ifp;
1016 }
1017 lck_mtx_unlock(rt_mtx);
1018
1019 /* make it flipped, again. */
1020 NTOHS(ip->ip_len);
1021 NTOHS(ip->ip_off);
1022 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
1023
1024 /* Pass to filters again */
1025 if (!TAILQ_EMPTY(&ipv4_filters)) {
1026 struct ipfilter *filter;
1027
1028 ipf_ref();
1029
1030 /* 4135317 - always pass network byte order to filter */
1031 HTONS(ip->ip_len);
1032 HTONS(ip->ip_off);
1033
1034 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
1035 if (filter->ipf_filter.ipf_output) {
1036 errno_t result;
1037 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
1038 if (result == EJUSTRETURN) {
1039 ipf_unref();
1040 goto done;
1041 }
1042 if (result != 0) {
1043 ipf_unref();
1044 goto bad;
1045 }
1046 }
1047 }
1048
1049 /* set back to host byte order */
1050 ip = mtod(m, struct ip *);
1051 NTOHS(ip->ip_len);
1052 NTOHS(ip->ip_off);
1053
1054 ipf_unref();
1055 }
1056 skip_ipsec:
1057 #endif /*IPSEC*/
1058
1059 #if IPFIREWALL
1060 /*
1061 * IpHack's section.
1062 * - Xlate: translate packet's addr/port (NAT).
1063 * - Firewall: deny/allow/etc.
1064 * - Wrap: fake packet's addr/port <unimpl.>
1065 * - Encapsulate: put it in another IP and send out. <unimp.>
1066 */
1067 if (fr_checkp) {
1068 struct mbuf *m1 = m;
1069
1070 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
1071 goto done;
1072 }
1073 ip = mtod(m0 = m = m1, struct ip *);
1074 }
1075
1076 /*
1077 * Check with the firewall...
1078 * but not if we are already being fwd'd from a firewall.
1079 */
1080 if (fw_enable && IPFW_LOADED && !args.next_hop) {
1081 struct sockaddr_in *old = dst;
1082
1083 args.m = m;
1084 args.next_hop = dst;
1085 args.oif = ifp;
1086 off = ip_fw_chk_ptr(&args);
1087 m = args.m;
1088 dst = args.next_hop;
1089
1090 /*
1091 * On return we must do the following:
1092 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1093 * 1<=off<= 0xffff -> DIVERT
1094 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1095 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1096 * dst != old -> IPFIREWALL_FORWARD
1097 * off==0, dst==old -> accept
1098 * If some of the above modules is not compiled in, then
1099 * we should't have to check the corresponding condition
1100 * (because the ipfw control socket should not accept
1101 * unsupported rules), but better play safe and drop
1102 * packets in case of doubt.
1103 */
1104 m0 = m;
1105 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
1106 if (m)
1107 m_freem(m);
1108 error = EACCES ;
1109 goto done ;
1110 }
1111 ip = mtod(m, struct ip *);
1112
1113 if (off == 0 && dst == old) {/* common case */
1114 goto pass ;
1115 }
1116 #if DUMMYNET
1117 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
1118 /*
1119 * pass the pkt to dummynet. Need to include
1120 * pipe number, m, ifp, ro, dst because these are
1121 * not recomputed in the next pass.
1122 * All other parameters have been already used and
1123 * so they are not needed anymore.
1124 * XXX note: if the ifp or ro entry are deleted
1125 * while a pkt is in dummynet, we are in trouble!
1126 */
1127 args.ro = ro;
1128 args.dst = dst;
1129 args.flags = flags;
1130 if (flags & IP_OUTARGS)
1131 args.ipoa = ipoa;
1132
1133 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
1134 &args);
1135 goto done;
1136 }
1137 #endif /* DUMMYNET */
1138 #if IPDIVERT
1139 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
1140 struct mbuf *clone = NULL;
1141
1142 /* Clone packet if we're doing a 'tee' */
1143 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
1144 clone = m_dup(m, M_DONTWAIT);
1145 /*
1146 * XXX
1147 * delayed checksums are not currently compatible
1148 * with divert sockets.
1149 */
1150 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1151 in_delayed_cksum(m);
1152 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1153 }
1154
1155 /* Restore packet header fields to original values */
1156 HTONS(ip->ip_len);
1157 HTONS(ip->ip_off);
1158
1159 /* Deliver packet to divert input routine */
1160 divert_packet(m, 0, off & 0xffff, args.divert_rule);
1161
1162 /* If 'tee', continue with original packet */
1163 if (clone != NULL) {
1164 m0 = m = clone;
1165 ip = mtod(m, struct ip *);
1166 goto pass;
1167 }
1168 goto done;
1169 }
1170 #endif
1171
1172 #if IPFIREWALL_FORWARD
1173 /* Here we check dst to make sure it's directly reachable on the
1174 * interface we previously thought it was.
1175 * If it isn't (which may be likely in some situations) we have
1176 * to re-route it (ie, find a route for the next-hop and the
1177 * associated interface) and set them here. This is nested
1178 * forwarding which in most cases is undesirable, except where
1179 * such control is nigh impossible. So we do it here.
1180 * And I'm babbling.
1181 */
1182 if (off == 0 && old != dst) {
1183 struct in_ifaddr *ia_fw;
1184
1185 /* It's changed... */
1186 /* There must be a better way to do this next line... */
1187 static struct route sro_fwd, *ro_fwd = &sro_fwd;
1188 #if IPFIREWALL_FORWARD_DEBUG
1189 printf("IPFIREWALL_FORWARD: New dst ip: ");
1190 print_ip(dst->sin_addr);
1191 printf("\n");
1192 #endif
1193 /*
1194 * We need to figure out if we have been forwarded
1195 * to a local socket. If so then we should somehow
1196 * "loop back" to ip_input, and get directed to the
1197 * PCB as if we had received this packet. This is
1198 * because it may be dificult to identify the packets
1199 * you want to forward until they are being output
1200 * and have selected an interface. (e.g. locally
1201 * initiated packets) If we used the loopback inteface,
1202 * we would not be able to control what happens
1203 * as the packet runs through ip_input() as
1204 * it is done through a ISR.
1205 */
1206 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1207 /*
1208 * If the addr to forward to is one
1209 * of ours, we pretend to
1210 * be the destination for this packet.
1211 */
1212 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
1213 dst->sin_addr.s_addr)
1214 break;
1215 }
1216 if (ia) {
1217 /* tell ip_input "dont filter" */
1218 struct m_tag *fwd_tag;
1219 struct ip_fwd_tag *ipfwd_tag;
1220
1221 fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD,
1222 sizeof(struct sockaddr_in), M_NOWAIT);
1223 if (fwd_tag == NULL) {
1224 error = ENOBUFS;
1225 goto bad;
1226 }
1227
1228 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
1229 ipfwd_tag->next_hop = args.next_hop;
1230
1231 m_tag_prepend(m, fwd_tag);
1232
1233 if (m->m_pkthdr.rcvif == NULL)
1234 m->m_pkthdr.rcvif = ifunit("lo0");
1235 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1236 m->m_pkthdr.csum_flags) == 0) {
1237 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1238 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1239 m->m_pkthdr.csum_flags |=
1240 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1241 m->m_pkthdr.csum_data = 0xffff;
1242 }
1243 m->m_pkthdr.csum_flags |=
1244 CSUM_IP_CHECKED | CSUM_IP_VALID;
1245 }
1246 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1247 in_delayed_cksum(m);
1248 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1249 ip->ip_sum = in_cksum(m, hlen);
1250 }
1251 HTONS(ip->ip_len);
1252 HTONS(ip->ip_off);
1253
1254
1255 /* we need to call dlil_output to run filters
1256 * and resync to avoid recursion loops.
1257 */
1258 if (lo_ifp) {
1259 dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
1260 }
1261 else {
1262 printf("ip_output: no loopback ifp for forwarding!!!\n");
1263 }
1264 goto done;
1265 }
1266 /* Some of the logic for this was
1267 * nicked from above.
1268 *
1269 * This rewrites the cached route in a local PCB.
1270 * Is this what we want to do?
1271 */
1272 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1273
1274 ro_fwd->ro_rt = 0;
1275 lck_mtx_lock(rt_mtx);
1276 rtalloc_ign_locked(ro_fwd, RTF_PRCLONING);
1277
1278 if (ro_fwd->ro_rt == 0) {
1279 OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
1280 error = EHOSTUNREACH;
1281 lck_mtx_unlock(rt_mtx);
1282 goto bad;
1283 }
1284
1285 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
1286 ifp = ro_fwd->ro_rt->rt_ifp;
1287 ro_fwd->ro_rt->rt_use++;
1288 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
1289 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
1290 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
1291 isbroadcast =
1292 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
1293 else
1294 isbroadcast = in_broadcast(dst->sin_addr, ifp);
1295 rtfree_locked(ro->ro_rt);
1296 ro->ro_rt = ro_fwd->ro_rt;
1297 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
1298 lck_mtx_unlock(rt_mtx);
1299
1300 /*
1301 * If we added a default src ip earlier,
1302 * which would have been gotten from the-then
1303 * interface, do it again, from the new one.
1304 */
1305 if (fwd_rewrite_src)
1306 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
1307 goto pass ;
1308 }
1309 #endif /* IPFIREWALL_FORWARD */
1310 /*
1311 * if we get here, none of the above matches, and
1312 * we have to drop the pkt
1313 */
1314 m_freem(m);
1315 error = EACCES; /* not sure this is the right error msg */
1316 goto done;
1317 }
1318 #endif /* IPFIREWALL */
1319
1320 pass:
1321 #if __APPLE__
1322 /* Do not allow loopback address to wind up on a wire */
1323 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1324 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1325 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
1326 OSAddAtomic(1, (SInt32*)&ipstat.ips_badaddr);
1327 m_freem(m);
1328 /*
1329 * Do not simply drop the packet just like a firewall -- we want the
1330 * the application to feel the pain.
1331 * Return ENETUNREACH like ip6_output does in some similar cases.
1332 * This can startle the otherwise clueless process that specifies
1333 * loopback as the source address.
1334 */
1335 error = ENETUNREACH;
1336 goto done;
1337 }
1338 #endif
1339 m->m_pkthdr.csum_flags |= CSUM_IP;
1340 sw_csum = m->m_pkthdr.csum_flags
1341 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1342
1343 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1344 /*
1345 * Special case code for GMACE
1346 * frames that can be checksumed by GMACE SUM16 HW:
1347 * frame >64, no fragments, no UDP
1348 */
1349 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1350 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1351 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1352 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1353 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
1354 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
1355 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1356 m->m_pkthdr.csum_data += offset;
1357 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1358 }
1359 else {
1360 /* let the software handle any UDP or TCP checksums */
1361 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1362 }
1363 } else if (apple_hwcksum_tx == 0) {
1364 sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
1365 m->m_pkthdr.csum_flags;
1366 }
1367
1368 if (sw_csum & CSUM_DELAY_DATA) {
1369 in_delayed_cksum(m);
1370 sw_csum &= ~CSUM_DELAY_DATA;
1371 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1372 }
1373
1374 if (apple_hwcksum_tx != 0) {
1375 m->m_pkthdr.csum_flags &=
1376 IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1377 } else {
1378 m->m_pkthdr.csum_flags = 0;
1379 }
1380
1381 /*
1382 * If small enough for interface, or the interface will take
1383 * care of the fragmentation for us, can just send directly.
1384 */
1385 if ((u_short)ip->ip_len <= ifp->if_mtu ||
1386 ifp->if_hwassist & CSUM_FRAGMENT) {
1387 struct rtentry *rte;
1388
1389 HTONS(ip->ip_len);
1390 HTONS(ip->ip_off);
1391 ip->ip_sum = 0;
1392 if (sw_csum & CSUM_DELAY_IP) {
1393 ip->ip_sum = in_cksum(m, hlen);
1394 }
1395
1396 #ifndef __APPLE__
1397 /* Record statistics for this interface address. */
1398 if (!(flags & IP_FORWARDING) && ia != NULL) {
1399 ia->ia_ifa.if_opackets++;
1400 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1401 }
1402 #endif
1403
1404 #if IPSEC
1405 /* clean ipsec history once it goes out of the node */
1406 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1407 ipsec_delaux(m);
1408 #endif
1409 if (packetchain == 0) {
1410 lck_mtx_lock(rt_mtx);
1411 if ((rte = ro->ro_rt) != NULL)
1412 rtref(rte);
1413 lck_mtx_unlock(rt_mtx);
1414 error = ifnet_output(ifp, PF_INET, m, rte,
1415 (struct sockaddr *)dst);
1416 if (rte != NULL)
1417 rtfree(rte);
1418 goto done;
1419 }
1420 else { /* packet chaining allows us to reuse the route for all packets */
1421 m = m->m_nextpkt;
1422 if (m == NULL) {
1423 if (pktcnt > ip_maxchainsent)
1424 ip_maxchainsent = pktcnt;
1425 lck_mtx_lock(rt_mtx);
1426 if ((rte = ro->ro_rt) != NULL)
1427 rtref(rte);
1428 lck_mtx_unlock(rt_mtx);
1429 //send
1430 error = ifnet_output(ifp, PF_INET, packetlist,
1431 rte, (struct sockaddr *)dst);
1432 if (rte != NULL)
1433 rtfree(rte);
1434 pktcnt = 0;
1435 goto done;
1436
1437 }
1438 m0 = m;
1439 pktcnt++;
1440 goto loopit;
1441 }
1442 }
1443 /*
1444 * Too large for interface; fragment if possible.
1445 * Must be able to put at least 8 bytes per fragment.
1446 */
1447 if (ip->ip_off & IP_DF) {
1448 error = EMSGSIZE;
1449 /*
1450 * This case can happen if the user changed the MTU
1451 * of an interface after enabling IP on it. Because
1452 * most netifs don't keep track of routes pointing to
1453 * them, there is no way for one to update all its
1454 * routes when the MTU is changed.
1455 */
1456
1457 lck_mtx_lock(rt_mtx);
1458 if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1459 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1460 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1461 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1462 }
1463 lck_mtx_unlock(rt_mtx);
1464 OSAddAtomic(1, (SInt32*)&ipstat.ips_cantfrag);
1465 goto bad;
1466 }
1467 len = (ifp->if_mtu - hlen) &~ 7;
1468 if (len < 8) {
1469 error = EMSGSIZE;
1470 goto bad;
1471 }
1472
1473 /*
1474 * if the interface will not calculate checksums on
1475 * fragmented packets, then do it here.
1476 */
1477 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1478 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1479 in_delayed_cksum(m);
1480 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1481 }
1482
1483
1484 {
1485 int mhlen, firstlen = len;
1486 struct mbuf **mnext = &m->m_nextpkt;
1487 int nfrags = 1;
1488
1489 /*
1490 * Loop through length of segment after first fragment,
1491 * make new header and copy data of each part and link onto chain.
1492 */
1493 m0 = m;
1494 mhlen = sizeof (struct ip);
1495 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
1496 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1497 if (m == 0) {
1498 error = ENOBUFS;
1499 OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped);
1500 goto sendorfree;
1501 }
1502 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1503 m->m_data += max_linkhdr;
1504 mhip = mtod(m, struct ip *);
1505 *mhip = *ip;
1506 if (hlen > sizeof (struct ip)) {
1507 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1508 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1509 }
1510 m->m_len = mhlen;
1511 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1512 if (ip->ip_off & IP_MF)
1513 mhip->ip_off |= IP_MF;
1514 if (off + len >= (u_short)ip->ip_len)
1515 len = (u_short)ip->ip_len - off;
1516 else
1517 mhip->ip_off |= IP_MF;
1518 mhip->ip_len = htons((u_short)(len + mhlen));
1519 m->m_next = m_copy(m0, off, len);
1520 if (m->m_next == 0) {
1521 (void) m_free(m);
1522 error = ENOBUFS; /* ??? */
1523 OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped);
1524 goto sendorfree;
1525 }
1526 m->m_pkthdr.len = mhlen + len;
1527 m->m_pkthdr.rcvif = 0;
1528 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
1529 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
1530 #if CONFIG_MACF_NET
1531 mac_netinet_fragment(m0, m);
1532 #endif
1533 HTONS(mhip->ip_off);
1534 mhip->ip_sum = 0;
1535 if (sw_csum & CSUM_DELAY_IP) {
1536 mhip->ip_sum = in_cksum(m, mhlen);
1537 }
1538 *mnext = m;
1539 mnext = &m->m_nextpkt;
1540 nfrags++;
1541 }
1542 OSAddAtomic(nfrags, (SInt32*)&ipstat.ips_ofragments);
1543
1544 /* set first/last markers for fragment chain */
1545 m->m_flags |= M_LASTFRAG;
1546 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1547 m0->m_pkthdr.csum_data = nfrags;
1548
1549 /*
1550 * Update first fragment by trimming what's been copied out
1551 * and updating header, then send each fragment (in order).
1552 */
1553 m = m0;
1554 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1555 m->m_pkthdr.len = hlen + firstlen;
1556 ip->ip_len = htons((u_short)m->m_pkthdr.len);
1557 ip->ip_off |= IP_MF;
1558 HTONS(ip->ip_off);
1559 ip->ip_sum = 0;
1560 if (sw_csum & CSUM_DELAY_IP) {
1561 ip->ip_sum = in_cksum(m, hlen);
1562 }
1563 sendorfree:
1564
1565 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1566 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1567
1568 for (m = m0; m; m = m0) {
1569 m0 = m->m_nextpkt;
1570 m->m_nextpkt = 0;
1571 #if IPSEC
1572 /* clean ipsec history once it goes out of the node */
1573 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1574 ipsec_delaux(m);
1575 #endif
1576 if (error == 0) {
1577 struct rtentry *rte;
1578 #ifndef __APPLE__
1579 /* Record statistics for this interface address. */
1580 if (ia != NULL) {
1581 ia->ia_ifa.if_opackets++;
1582 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1583 }
1584 #endif
1585 if ((packetchain != 0) && (pktcnt > 0))
1586 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
1587 lck_mtx_lock(rt_mtx);
1588 if ((rte = ro->ro_rt) != NULL)
1589 rtref(rte);
1590 lck_mtx_unlock(rt_mtx);
1591 error = ifnet_output(ifp, PF_INET, m, rte,
1592 (struct sockaddr *)dst);
1593 if (rte != NULL)
1594 rtfree(rte);
1595 } else
1596 m_freem(m);
1597 }
1598
1599 if (error == 0)
1600 OSAddAtomic(1, (SInt32*)&ipstat.ips_fragmented);
1601 }
1602 done:
1603 if (ia) {
1604 ifafree(&ia->ia_ifa);
1605 ia = NULL;
1606 }
1607 #if IPSEC
1608 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
1609 if (ro == &iproute && ro->ro_rt) {
1610 rtfree(ro->ro_rt);
1611 ro->ro_rt = NULL;
1612 }
1613 if (sp != NULL) {
1614 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1615 printf("DP ip_output call free SP:%x\n", sp));
1616 key_freesp(sp, KEY_SADB_UNLOCKED);
1617 }
1618 }
1619 #endif /* IPSEC */
1620
1621 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1622 return (error);
1623 bad:
1624 m_freem(m0);
1625 goto done;
1626 }
1627
1628 static void
1629 ip_out_cksum_stats(int proto, u_int32_t len)
1630 {
1631 switch (proto) {
1632 case IPPROTO_TCP:
1633 tcp_out_cksum_stats(len);
1634 break;
1635 case IPPROTO_UDP:
1636 udp_out_cksum_stats(len);
1637 break;
1638 default:
1639 /* keep only TCP or UDP stats for now */
1640 break;
1641 }
1642 }
1643
1644 void
1645 in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
1646 {
1647 struct ip *ip;
1648 unsigned char buf[sizeof(struct ip)];
1649 u_short csum, offset, ip_len;
1650 struct mbuf *m = m0;
1651
1652 while (ip_offset >= m->m_len) {
1653 ip_offset -= m->m_len;
1654 m = m->m_next;
1655 if (m == NULL) {
1656 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
1657 return;
1658 }
1659 }
1660
1661 /* Sometimes the IP header is not contiguous, yes this can happen! */
1662 if (ip_offset + sizeof(struct ip) > m->m_len) {
1663 #if DEBUG
1664 printf("delayed m_pullup, m->len: %ld off: %d\n",
1665 m->m_len, ip_offset);
1666 #endif
1667 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1668
1669 ip = (struct ip *)buf;
1670 } else {
1671 ip = (struct ip*)(m->m_data + ip_offset);
1672 }
1673
1674 /* Gross */
1675 if (ip_offset) {
1676 m->m_len -= ip_offset;
1677 m->m_data += ip_offset;
1678 }
1679
1680 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1681
1682 /*
1683 * We could be in the context of an IP or interface filter; in the
1684 * former case, ip_len would be in host (correct) order while for
1685 * the latter it would be in network order. Because of this, we
1686 * attempt to interpret the length field by comparing it against
1687 * the actual packet length. If the comparison fails, byte swap
1688 * the length and check again. If it still fails, then the packet
1689 * is bogus and we give up.
1690 */
1691 ip_len = ip->ip_len;
1692 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1693 ip_len = SWAP16(ip_len);
1694 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1695 printf("in_delayed_cksum_offset: ip_len %d (%d) "
1696 "doesn't match actual length %d\n", ip->ip_len,
1697 ip_len, (m0->m_pkthdr.len - ip_offset));
1698 return;
1699 }
1700 }
1701
1702 csum = in_cksum_skip(m, ip_len, offset);
1703
1704 /* Update stats */
1705 ip_out_cksum_stats(ip->ip_p, ip_len - offset);
1706
1707 if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1708 csum = 0xffff;
1709 offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1710
1711 /* Gross */
1712 if (ip_offset) {
1713 if (M_LEADINGSPACE(m) < ip_offset)
1714 panic("in_delayed_cksum_offset - chain modified!\n");
1715 m->m_len += ip_offset;
1716 m->m_data -= ip_offset;
1717 }
1718
1719 if (offset > ip_len) /* bogus offset */
1720 return;
1721
1722 /* Insert the checksum in the existing chain */
1723 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
1724 char tmp[2];
1725
1726 #if DEBUG
1727 printf("delayed m_copyback, m->len: %ld off: %d p: %d\n",
1728 m->m_len, offset + ip_offset, ip->ip_p);
1729 #endif
1730 *(u_short *)tmp = csum;
1731 m_copyback(m, offset + ip_offset, 2, tmp);
1732 } else
1733 *(u_short *)(m->m_data + offset + ip_offset) = csum;
1734 }
1735
1736 void
1737 in_delayed_cksum(struct mbuf *m)
1738 {
1739 in_delayed_cksum_offset(m, 0);
1740 }
1741
1742 void
1743 in_cksum_offset(struct mbuf* m, size_t ip_offset)
1744 {
1745 struct ip* ip = NULL;
1746 int hlen = 0;
1747 unsigned char buf[sizeof(struct ip)];
1748 int swapped = 0;
1749
1750 while (ip_offset >= m->m_len) {
1751 ip_offset -= m->m_len;
1752 m = m->m_next;
1753 if (m == NULL) {
1754 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1755 return;
1756 }
1757 }
1758
1759 /* Sometimes the IP header is not contiguous, yes this can happen! */
1760 if (ip_offset + sizeof(struct ip) > m->m_len) {
1761
1762 #if DEBUG
1763 printf("in_cksum_offset - delayed m_pullup, m->len: %ld off: %lu\n",
1764 m->m_len, ip_offset);
1765 #endif
1766 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1767
1768 ip = (struct ip *)buf;
1769 ip->ip_sum = 0;
1770 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum);
1771 } else {
1772 ip = (struct ip*)(m->m_data + ip_offset);
1773 ip->ip_sum = 0;
1774 }
1775
1776 /* Gross */
1777 if (ip_offset) {
1778 m->m_len -= ip_offset;
1779 m->m_data += ip_offset;
1780 }
1781
1782 #ifdef _IP_VHL
1783 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1784 #else
1785 hlen = ip->ip_hl << 2;
1786 #endif
1787 /*
1788 * We could be in the context of an IP or interface filter; in the
1789 * former case, ip_len would be in host order while for the latter
1790 * it would be in network (correct) order. Because of this, we
1791 * attempt to interpret the length field by comparing it against
1792 * the actual packet length. If the comparison fails, byte swap
1793 * the length and check again. If it still fails, then the packet
1794 * is bogus and we give up.
1795 */
1796 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1797 ip->ip_len = SWAP16(ip->ip_len);
1798 swapped = 1;
1799 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1800 ip->ip_len = SWAP16(ip->ip_len);
1801 printf("in_cksum_offset: ip_len %d (%d) "
1802 "doesn't match actual length %lu\n",
1803 ip->ip_len, SWAP16(ip->ip_len),
1804 (m->m_pkthdr.len - ip_offset));
1805 return;
1806 }
1807 }
1808
1809 ip->ip_sum = 0;
1810 ip->ip_sum = in_cksum(m, hlen);
1811 if (swapped)
1812 ip->ip_len = SWAP16(ip->ip_len);
1813
1814 /* Gross */
1815 if (ip_offset) {
1816 if (M_LEADINGSPACE(m) < ip_offset)
1817 panic("in_cksum_offset - chain modified!\n");
1818 m->m_len += ip_offset;
1819 m->m_data -= ip_offset;
1820 }
1821
1822 /* Insert the checksum in the existing chain if IP header not contiguous */
1823 if (ip_offset + sizeof(struct ip) > m->m_len) {
1824 char tmp[2];
1825
1826 #if DEBUG
1827 printf("in_cksum_offset m_copyback, m->len: %lu off: %lu p: %d\n",
1828 m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
1829 #endif
1830 *(u_short *)tmp = ip->ip_sum;
1831 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
1832 }
1833 }
1834
1835 /*
1836 * Insert IP options into preformed packet.
1837 * Adjust IP destination as required for IP source routing,
1838 * as indicated by a non-zero in_addr at the start of the options.
1839 *
1840 * XXX This routine assumes that the packet has no options in place.
1841 */
1842 static struct mbuf *
1843 ip_insertoptions(m, opt, phlen)
1844 register struct mbuf *m;
1845 struct mbuf *opt;
1846 int *phlen;
1847 {
1848 register struct ipoption *p = mtod(opt, struct ipoption *);
1849 struct mbuf *n;
1850 register struct ip *ip = mtod(m, struct ip *);
1851 unsigned optlen;
1852
1853 optlen = opt->m_len - sizeof(p->ipopt_dst);
1854 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1855 return (m); /* XXX should fail */
1856 if (p->ipopt_dst.s_addr)
1857 ip->ip_dst = p->ipopt_dst;
1858 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1859 MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1860 if (n == 0)
1861 return (m);
1862 n->m_pkthdr.rcvif = 0;
1863 #if CONFIG_MACF_NET
1864 mac_mbuf_label_copy(m, n);
1865 #endif
1866 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1867 m->m_len -= sizeof(struct ip);
1868 m->m_data += sizeof(struct ip);
1869 n->m_next = m;
1870 m = n;
1871 m->m_len = optlen + sizeof(struct ip);
1872 m->m_data += max_linkhdr;
1873 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1874 } else {
1875 m->m_data -= optlen;
1876 m->m_len += optlen;
1877 m->m_pkthdr.len += optlen;
1878 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1879 }
1880 ip = mtod(m, struct ip *);
1881 bcopy(p->ipopt_list, ip + 1, optlen);
1882 *phlen = sizeof(struct ip) + optlen;
1883 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1884 ip->ip_len += optlen;
1885 return (m);
1886 }
1887
1888 /*
1889 * Copy options from ip to jp,
1890 * omitting those not copied during fragmentation.
1891 */
1892 int
1893 ip_optcopy(ip, jp)
1894 struct ip *ip, *jp;
1895 {
1896 register u_char *cp, *dp;
1897 int opt, optlen, cnt;
1898
1899 cp = (u_char *)(ip + 1);
1900 dp = (u_char *)(jp + 1);
1901 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1902 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1903 opt = cp[0];
1904 if (opt == IPOPT_EOL)
1905 break;
1906 if (opt == IPOPT_NOP) {
1907 /* Preserve for IP mcast tunnel's LSRR alignment. */
1908 *dp++ = IPOPT_NOP;
1909 optlen = 1;
1910 continue;
1911 }
1912 #if DIAGNOSTIC
1913 if (cnt < IPOPT_OLEN + sizeof(*cp))
1914 panic("malformed IPv4 option passed to ip_optcopy");
1915 #endif
1916 optlen = cp[IPOPT_OLEN];
1917 #if DIAGNOSTIC
1918 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1919 panic("malformed IPv4 option passed to ip_optcopy");
1920 #endif
1921 /* bogus lengths should have been caught by ip_dooptions */
1922 if (optlen > cnt)
1923 optlen = cnt;
1924 if (IPOPT_COPIED(opt)) {
1925 bcopy(cp, dp, optlen);
1926 dp += optlen;
1927 }
1928 }
1929 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1930 *dp++ = IPOPT_EOL;
1931 return (optlen);
1932 }
1933
1934 /*
1935 * IP socket option processing.
1936 */
1937 int
1938 ip_ctloutput(so, sopt)
1939 struct socket *so;
1940 struct sockopt *sopt;
1941 {
1942 struct inpcb *inp = sotoinpcb(so);
1943 int error, optval;
1944
1945 error = optval = 0;
1946 if (sopt->sopt_level != IPPROTO_IP) {
1947 return (EINVAL);
1948 }
1949
1950 switch (sopt->sopt_dir) {
1951 case SOPT_SET:
1952 switch (sopt->sopt_name) {
1953 case IP_OPTIONS:
1954 #ifdef notyet
1955 case IP_RETOPTS:
1956 #endif
1957 {
1958 struct mbuf *m;
1959 if (sopt->sopt_valsize > MLEN) {
1960 error = EMSGSIZE;
1961 break;
1962 }
1963 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1964 if (m == 0) {
1965 error = ENOBUFS;
1966 break;
1967 }
1968 m->m_len = sopt->sopt_valsize;
1969 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1970 m->m_len);
1971 if (error)
1972 break;
1973
1974 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1975 m));
1976 }
1977
1978 case IP_TOS:
1979 case IP_TTL:
1980 case IP_RECVOPTS:
1981 case IP_RECVRETOPTS:
1982 case IP_RECVDSTADDR:
1983 case IP_RECVIF:
1984 case IP_RECVTTL:
1985 #if defined(NFAITH) && NFAITH > 0
1986 case IP_FAITH:
1987 #endif
1988 error = sooptcopyin(sopt, &optval, sizeof optval,
1989 sizeof optval);
1990 if (error)
1991 break;
1992
1993 switch (sopt->sopt_name) {
1994 case IP_TOS:
1995 inp->inp_ip_tos = optval;
1996 break;
1997
1998 case IP_TTL:
1999 inp->inp_ip_ttl = optval;
2000 break;
2001 #define OPTSET(bit) \
2002 if (optval) \
2003 inp->inp_flags |= bit; \
2004 else \
2005 inp->inp_flags &= ~bit;
2006
2007 case IP_RECVOPTS:
2008 OPTSET(INP_RECVOPTS);
2009 break;
2010
2011 case IP_RECVRETOPTS:
2012 OPTSET(INP_RECVRETOPTS);
2013 break;
2014
2015 case IP_RECVDSTADDR:
2016 OPTSET(INP_RECVDSTADDR);
2017 break;
2018
2019 case IP_RECVIF:
2020 OPTSET(INP_RECVIF);
2021 break;
2022
2023 case IP_RECVTTL:
2024 OPTSET(INP_RECVTTL);
2025 break;
2026
2027 #if defined(NFAITH) && NFAITH > 0
2028 case IP_FAITH:
2029 OPTSET(INP_FAITH);
2030 break;
2031 #endif
2032 }
2033 break;
2034 #undef OPTSET
2035
2036 #if CONFIG_FORCE_OUT_IFP
2037 /*
2038 * Apple private interface, similar to IP_BOUND_IF, except
2039 * that the parameter is a NULL-terminated string containing
2040 * the name of the network interface; an emptry string means
2041 * unbind. Applications are encouraged to use IP_BOUND_IF
2042 * instead, as that is the current "official" API.
2043 */
2044 case IP_FORCE_OUT_IFP: {
2045 char ifname[IFNAMSIZ];
2046 unsigned int ifscope;
2047
2048 /* This option is settable only for IPv4 */
2049 if (!(inp->inp_vflag & INP_IPV4)) {
2050 error = EINVAL;
2051 break;
2052 }
2053
2054 /* Verify interface name parameter is sane */
2055 if (sopt->sopt_valsize > sizeof(ifname)) {
2056 error = EINVAL;
2057 break;
2058 }
2059
2060 /* Copy the interface name */
2061 if (sopt->sopt_valsize != 0) {
2062 error = sooptcopyin(sopt, ifname,
2063 sizeof (ifname), sopt->sopt_valsize);
2064 if (error)
2065 break;
2066 }
2067
2068 if (sopt->sopt_valsize == 0 || ifname[0] == NULL) {
2069 /* Unbind this socket from any interface */
2070 ifscope = IFSCOPE_NONE;
2071 } else {
2072 ifnet_t ifp;
2073
2074 /* Verify name is NULL terminated */
2075 if (ifname[sopt->sopt_valsize - 1] != NULL) {
2076 error = EINVAL;
2077 break;
2078 }
2079
2080 /* Bail out if given bogus interface name */
2081 if (ifnet_find_by_name(ifname, &ifp) != 0) {
2082 error = ENXIO;
2083 break;
2084 }
2085
2086 /* Bind this socket to this interface */
2087 ifscope = ifp->if_index;
2088
2089 /*
2090 * Won't actually free; since we don't release
2091 * this later, we should do it now.
2092 */
2093 ifnet_release(ifp);
2094 }
2095 ip_bindif(inp, ifscope);
2096 }
2097 break;
2098 #endif
2099 case IP_MULTICAST_IF:
2100 case IP_MULTICAST_VIF:
2101 case IP_MULTICAST_TTL:
2102 case IP_MULTICAST_LOOP:
2103 case IP_ADD_MEMBERSHIP:
2104 case IP_DROP_MEMBERSHIP:
2105 error = ip_setmoptions(sopt, &inp->inp_moptions);
2106 break;
2107
2108 case IP_PORTRANGE:
2109 error = sooptcopyin(sopt, &optval, sizeof optval,
2110 sizeof optval);
2111 if (error)
2112 break;
2113
2114 switch (optval) {
2115 case IP_PORTRANGE_DEFAULT:
2116 inp->inp_flags &= ~(INP_LOWPORT);
2117 inp->inp_flags &= ~(INP_HIGHPORT);
2118 break;
2119
2120 case IP_PORTRANGE_HIGH:
2121 inp->inp_flags &= ~(INP_LOWPORT);
2122 inp->inp_flags |= INP_HIGHPORT;
2123 break;
2124
2125 case IP_PORTRANGE_LOW:
2126 inp->inp_flags &= ~(INP_HIGHPORT);
2127 inp->inp_flags |= INP_LOWPORT;
2128 break;
2129
2130 default:
2131 error = EINVAL;
2132 break;
2133 }
2134 break;
2135
2136 #if IPSEC
2137 case IP_IPSEC_POLICY:
2138 {
2139 caddr_t req = NULL;
2140 size_t len = 0;
2141 int priv;
2142 struct mbuf *m;
2143 int optname;
2144
2145 if (sopt->sopt_valsize > MCLBYTES) {
2146 error = EMSGSIZE;
2147 break;
2148 }
2149 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
2150 break;
2151 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
2152 break;
2153 priv = (sopt->sopt_p != NULL &&
2154 proc_suser(sopt->sopt_p) != 0) ? 0 : 1;
2155 if (m) {
2156 req = mtod(m, caddr_t);
2157 len = m->m_len;
2158 }
2159 optname = sopt->sopt_name;
2160 error = ipsec4_set_policy(inp, optname, req, len, priv);
2161 m_freem(m);
2162 break;
2163 }
2164 #endif /*IPSEC*/
2165
2166 #if TRAFFIC_MGT
2167 case IP_TRAFFIC_MGT_BACKGROUND:
2168 {
2169 unsigned background = 0;
2170 error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background));
2171 if (error)
2172 break;
2173
2174 if (background)
2175 so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
2176 else
2177 so->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
2178
2179 break;
2180 }
2181 #endif /* TRAFFIC_MGT */
2182
2183 /*
2184 * On a multihomed system, scoped routing can be used to
2185 * restrict the source interface used for sending packets.
2186 * The socket option IP_BOUND_IF binds a particular AF_INET
2187 * socket to an interface such that data sent on the socket
2188 * is restricted to that interface. This is unlike the
2189 * SO_DONTROUTE option where the routing table is bypassed;
2190 * therefore it allows for a greater flexibility and control
2191 * over the system behavior, and does not place any restriction
2192 * on the destination address type (e.g. unicast, multicast,
2193 * or broadcast if applicable) or whether or not the host is
2194 * directly reachable. Note that in the multicast transmit
2195 * case, IP_MULTICAST_IF takes precedence over IP_BOUND_IF,
2196 * since the former practically bypasses the routing table;
2197 * in this case, IP_BOUND_IF sets the default interface used
2198 * for sending multicast packets in the absence of an explicit
2199 * transmit interface set via IP_MULTICAST_IF.
2200 */
2201 case IP_BOUND_IF:
2202 /* This option is settable only for IPv4 */
2203 if (!(inp->inp_vflag & INP_IPV4)) {
2204 error = EINVAL;
2205 break;
2206 }
2207
2208 error = sooptcopyin(sopt, &optval, sizeof (optval),
2209 sizeof (optval));
2210
2211 if (error)
2212 break;
2213
2214 ip_bindif(inp, optval);
2215 break;
2216
2217 default:
2218 error = ENOPROTOOPT;
2219 break;
2220 }
2221 break;
2222
2223 case SOPT_GET:
2224 switch (sopt->sopt_name) {
2225 case IP_OPTIONS:
2226 case IP_RETOPTS:
2227 if (inp->inp_options)
2228 error = sooptcopyout(sopt,
2229 mtod(inp->inp_options,
2230 char *),
2231 inp->inp_options->m_len);
2232 else
2233 sopt->sopt_valsize = 0;
2234 break;
2235
2236 case IP_TOS:
2237 case IP_TTL:
2238 case IP_RECVOPTS:
2239 case IP_RECVRETOPTS:
2240 case IP_RECVDSTADDR:
2241 case IP_RECVIF:
2242 case IP_RECVTTL:
2243 case IP_PORTRANGE:
2244 #if defined(NFAITH) && NFAITH > 0
2245 case IP_FAITH:
2246 #endif
2247 switch (sopt->sopt_name) {
2248
2249 case IP_TOS:
2250 optval = inp->inp_ip_tos;
2251 break;
2252
2253 case IP_TTL:
2254 optval = inp->inp_ip_ttl;
2255 break;
2256
2257 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2258
2259 case IP_RECVOPTS:
2260 optval = OPTBIT(INP_RECVOPTS);
2261 break;
2262
2263 case IP_RECVRETOPTS:
2264 optval = OPTBIT(INP_RECVRETOPTS);
2265 break;
2266
2267 case IP_RECVDSTADDR:
2268 optval = OPTBIT(INP_RECVDSTADDR);
2269 break;
2270
2271 case IP_RECVIF:
2272 optval = OPTBIT(INP_RECVIF);
2273 break;
2274
2275 case IP_RECVTTL:
2276 optval = OPTBIT(INP_RECVTTL);
2277 break;
2278
2279 case IP_PORTRANGE:
2280 if (inp->inp_flags & INP_HIGHPORT)
2281 optval = IP_PORTRANGE_HIGH;
2282 else if (inp->inp_flags & INP_LOWPORT)
2283 optval = IP_PORTRANGE_LOW;
2284 else
2285 optval = 0;
2286 break;
2287
2288 #if defined(NFAITH) && NFAITH > 0
2289 case IP_FAITH:
2290 optval = OPTBIT(INP_FAITH);
2291 break;
2292 #endif
2293 }
2294 error = sooptcopyout(sopt, &optval, sizeof optval);
2295 break;
2296
2297 case IP_MULTICAST_IF:
2298 case IP_MULTICAST_VIF:
2299 case IP_MULTICAST_TTL:
2300 case IP_MULTICAST_LOOP:
2301 case IP_ADD_MEMBERSHIP:
2302 case IP_DROP_MEMBERSHIP:
2303 error = ip_getmoptions(sopt, inp->inp_moptions);
2304 break;
2305
2306 #if IPSEC
2307 case IP_IPSEC_POLICY:
2308 {
2309 struct mbuf *m = NULL;
2310 caddr_t req = NULL;
2311 size_t len = 0;
2312
2313 if (m != 0) {
2314 req = mtod(m, caddr_t);
2315 len = m->m_len;
2316 }
2317 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
2318 if (error == 0)
2319 error = soopt_mcopyout(sopt, m); /* XXX */
2320 if (error == 0)
2321 m_freem(m);
2322 break;
2323 }
2324 #endif /*IPSEC*/
2325
2326 #if TRAFFIC_MGT
2327 case IP_TRAFFIC_MGT_BACKGROUND:
2328 {
2329 unsigned background = so->so_traffic_mgt_flags;
2330 return (sooptcopyout(sopt, &background, sizeof(background)));
2331 break;
2332 }
2333 #endif /* TRAFFIC_MGT */
2334
2335 case IP_BOUND_IF:
2336 if (inp->inp_flags & INP_BOUND_IF)
2337 optval = inp->inp_boundif;
2338 error = sooptcopyout(sopt, &optval, sizeof (optval));
2339 break;
2340
2341 default:
2342 error = ENOPROTOOPT;
2343 break;
2344 }
2345 break;
2346 }
2347 return (error);
2348 }
2349
2350 /*
2351 * Set up IP options in pcb for insertion in output packets.
2352 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2353 * with destination address if source routed.
2354 */
2355 static int
2356 ip_pcbopts(
2357 __unused int optname,
2358 struct mbuf **pcbopt,
2359 register struct mbuf *m)
2360 {
2361 register int cnt, optlen;
2362 register u_char *cp;
2363 u_char opt;
2364
2365 /* turn off any old options */
2366 if (*pcbopt)
2367 (void)m_free(*pcbopt);
2368 *pcbopt = 0;
2369 if (m == (struct mbuf *)0 || m->m_len == 0) {
2370 /*
2371 * Only turning off any previous options.
2372 */
2373 if (m)
2374 (void)m_free(m);
2375 return (0);
2376 }
2377
2378 #ifndef vax
2379 if (m->m_len % sizeof(int32_t))
2380 goto bad;
2381 #endif
2382 /*
2383 * IP first-hop destination address will be stored before
2384 * actual options; move other options back
2385 * and clear it when none present.
2386 */
2387 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
2388 goto bad;
2389 cnt = m->m_len;
2390 m->m_len += sizeof(struct in_addr);
2391 cp = mtod(m, u_char *) + sizeof(struct in_addr);
2392 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
2393 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
2394
2395 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2396 opt = cp[IPOPT_OPTVAL];
2397 if (opt == IPOPT_EOL)
2398 break;
2399 if (opt == IPOPT_NOP)
2400 optlen = 1;
2401 else {
2402 if (cnt < IPOPT_OLEN + sizeof(*cp))
2403 goto bad;
2404 optlen = cp[IPOPT_OLEN];
2405 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2406 goto bad;
2407 }
2408 switch (opt) {
2409
2410 default:
2411 break;
2412
2413 case IPOPT_LSRR:
2414 case IPOPT_SSRR:
2415 /*
2416 * user process specifies route as:
2417 * ->A->B->C->D
2418 * D must be our final destination (but we can't
2419 * check that since we may not have connected yet).
2420 * A is first hop destination, which doesn't appear in
2421 * actual IP option, but is stored before the options.
2422 */
2423 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
2424 goto bad;
2425 m->m_len -= sizeof(struct in_addr);
2426 cnt -= sizeof(struct in_addr);
2427 optlen -= sizeof(struct in_addr);
2428 cp[IPOPT_OLEN] = optlen;
2429 /*
2430 * Move first hop before start of options.
2431 */
2432 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
2433 sizeof(struct in_addr));
2434 /*
2435 * Then copy rest of options back
2436 * to close up the deleted entry.
2437 */
2438 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
2439 sizeof(struct in_addr)),
2440 (caddr_t)&cp[IPOPT_OFFSET+1],
2441 (unsigned)cnt + sizeof(struct in_addr));
2442 break;
2443 }
2444 }
2445 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
2446 goto bad;
2447 *pcbopt = m;
2448 return (0);
2449
2450 bad:
2451 (void)m_free(m);
2452 return (EINVAL);
2453 }
2454
2455 /*
2456 * XXX
2457 * The whole multicast option thing needs to be re-thought.
2458 * Several of these options are equally applicable to non-multicast
2459 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
2460 * standard option (IP_TTL).
2461 */
2462
2463 /*
2464 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2465 */
2466 static struct ifnet *
2467 ip_multicast_if(a, ifindexp)
2468 struct in_addr *a;
2469 int *ifindexp;
2470 {
2471 int ifindex;
2472 struct ifnet *ifp;
2473
2474 if (ifindexp)
2475 *ifindexp = 0;
2476 if (ntohl(a->s_addr) >> 24 == 0) {
2477 ifindex = ntohl(a->s_addr) & 0xffffff;
2478 ifnet_head_lock_shared();
2479 if (ifindex < 0 || if_index < ifindex) {
2480 ifnet_head_done();
2481 return NULL;
2482 }
2483 ifp = ifindex2ifnet[ifindex];
2484 ifnet_head_done();
2485 if (ifindexp)
2486 *ifindexp = ifindex;
2487 } else {
2488 INADDR_TO_IFP(*a, ifp);
2489 }
2490 return ifp;
2491 }
2492
2493 /*
2494 * Set the IP multicast options in response to user setsockopt().
2495 */
2496 static int
2497 ip_setmoptions(sopt, imop)
2498 struct sockopt *sopt;
2499 struct ip_moptions **imop;
2500 {
2501 int error = 0;
2502 int i;
2503 struct in_addr addr;
2504 struct ip_mreq mreq;
2505 struct ifnet *ifp = NULL;
2506 struct ip_moptions *imo = *imop;
2507 int ifindex;
2508
2509 if (imo == NULL) {
2510 /*
2511 * No multicast option buffer attached to the pcb;
2512 * allocate one and initialize to default values.
2513 */
2514 error = ip_createmoptions(imop);
2515 if (error != 0)
2516 return error;
2517 imo = *imop;
2518 }
2519
2520 switch (sopt->sopt_name) {
2521 /* store an index number for the vif you wanna use in the send */
2522 #if MROUTING
2523 case IP_MULTICAST_VIF:
2524 if (legal_vif_num == 0) {
2525 error = EOPNOTSUPP;
2526 break;
2527 }
2528 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2529 if (error)
2530 break;
2531 if (!legal_vif_num(i) && (i != -1)) {
2532 error = EINVAL;
2533 break;
2534 }
2535 imo->imo_multicast_vif = i;
2536 break;
2537 #endif /* MROUTING */
2538
2539 case IP_MULTICAST_IF:
2540 /*
2541 * Select the interface for outgoing multicast packets.
2542 */
2543 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
2544 if (error)
2545 break;
2546 /*
2547 * INADDR_ANY is used to remove a previous selection.
2548 * When no interface is selected, a default one is
2549 * chosen every time a multicast packet is sent.
2550 */
2551 if (addr.s_addr == INADDR_ANY) {
2552 imo->imo_multicast_ifp = NULL;
2553 break;
2554 }
2555 /*
2556 * The selected interface is identified by its local
2557 * IP address. Find the interface and confirm that
2558 * it supports multicasting.
2559 */
2560 ifp = ip_multicast_if(&addr, &ifindex);
2561 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2562 error = EADDRNOTAVAIL;
2563 break;
2564 }
2565 imo->imo_multicast_ifp = ifp;
2566 if (ifindex)
2567 imo->imo_multicast_addr = addr;
2568 else
2569 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2570 break;
2571
2572 case IP_MULTICAST_TTL:
2573 /*
2574 * Set the IP time-to-live for outgoing multicast packets.
2575 * The original multicast API required a char argument,
2576 * which is inconsistent with the rest of the socket API.
2577 * We allow either a char or an int.
2578 */
2579 if (sopt->sopt_valsize == 1) {
2580 u_char ttl;
2581 error = sooptcopyin(sopt, &ttl, 1, 1);
2582 if (error)
2583 break;
2584 imo->imo_multicast_ttl = ttl;
2585 } else {
2586 u_int ttl;
2587 error = sooptcopyin(sopt, &ttl, sizeof ttl,
2588 sizeof ttl);
2589 if (error)
2590 break;
2591 if (ttl > 255)
2592 error = EINVAL;
2593 else
2594 imo->imo_multicast_ttl = ttl;
2595 }
2596 break;
2597
2598 case IP_MULTICAST_LOOP:
2599 /*
2600 * Set the loopback flag for outgoing multicast packets.
2601 * Must be zero or one. The original multicast API required a
2602 * char argument, which is inconsistent with the rest
2603 * of the socket API. We allow either a char or an int.
2604 */
2605 if (sopt->sopt_valsize == 1) {
2606 u_char loop;
2607 error = sooptcopyin(sopt, &loop, 1, 1);
2608 if (error)
2609 break;
2610 imo->imo_multicast_loop = !!loop;
2611 } else {
2612 u_int loop;
2613 error = sooptcopyin(sopt, &loop, sizeof loop,
2614 sizeof loop);
2615 if (error)
2616 break;
2617 imo->imo_multicast_loop = !!loop;
2618 }
2619 break;
2620
2621 case IP_ADD_MEMBERSHIP:
2622 /*
2623 * Add a multicast group membership.
2624 * Group must be a valid IP multicast address.
2625 */
2626 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2627 if (error)
2628 break;
2629
2630 error = ip_addmembership(imo, &mreq);
2631 break;
2632
2633 case IP_DROP_MEMBERSHIP:
2634 /*
2635 * Drop a multicast group membership.
2636 * Group must be a valid IP multicast address.
2637 */
2638 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2639 if (error)
2640 break;
2641
2642 error = ip_dropmembership(imo, &mreq);
2643 break;
2644
2645 default:
2646 error = EOPNOTSUPP;
2647 break;
2648 }
2649
2650 /*
2651 * If all options have default values, no need to keep the mbuf.
2652 */
2653 if (imo->imo_multicast_ifp == NULL &&
2654 imo->imo_multicast_vif == (u_long)-1 &&
2655 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2656 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2657 imo->imo_num_memberships == 0) {
2658 FREE(*imop, M_IPMOPTS);
2659 *imop = NULL;
2660 }
2661
2662 return (error);
2663 }
2664
2665 /*
2666 * Set the IP multicast options in response to user setsockopt().
2667 */
2668 __private_extern__ int
2669 ip_createmoptions(
2670 struct ip_moptions **imop)
2671 {
2672 struct ip_moptions *imo;
2673 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
2674 M_WAITOK);
2675
2676 if (imo == NULL)
2677 return (ENOBUFS);
2678 *imop = imo;
2679 imo->imo_multicast_ifp = NULL;
2680 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2681 imo->imo_multicast_vif = -1;
2682 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2683 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2684 imo->imo_num_memberships = 0;
2685
2686 return 0;
2687 }
2688
2689 /*
2690 * Add membership to an IPv4 multicast.
2691 */
2692 __private_extern__ int
2693 ip_addmembership(
2694 struct ip_moptions *imo,
2695 struct ip_mreq *mreq)
2696 {
2697 struct route ro;
2698 struct sockaddr_in *dst;
2699 struct ifnet *ifp = NULL;
2700 int error = 0;
2701 int i;
2702
2703 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2704 error = EINVAL;
2705 return error;
2706 }
2707 /*
2708 * If no interface address was provided, use the interface of
2709 * the route to the given multicast address.
2710 */
2711 if (mreq->imr_interface.s_addr == INADDR_ANY) {
2712 bzero((caddr_t)&ro, sizeof(ro));
2713 dst = (struct sockaddr_in *)&ro.ro_dst;
2714 dst->sin_len = sizeof(*dst);
2715 dst->sin_family = AF_INET;
2716 dst->sin_addr = mreq->imr_multiaddr;
2717 lck_mtx_lock(rt_mtx);
2718 rtalloc_ign_locked(&ro, 0UL);
2719 if (ro.ro_rt != NULL) {
2720 ifp = ro.ro_rt->rt_ifp;
2721 rtfree_locked(ro.ro_rt);
2722 }
2723 else {
2724 /* If there's no default route, try using loopback */
2725 mreq->imr_interface.s_addr = INADDR_LOOPBACK;
2726 }
2727 lck_mtx_unlock(rt_mtx);
2728 }
2729
2730 if (ifp == NULL) {
2731 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2732 }
2733
2734 /*
2735 * See if we found an interface, and confirm that it
2736 * supports multicast.
2737 */
2738 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2739 error = EADDRNOTAVAIL;
2740 return error;
2741 }
2742 /*
2743 * See if the membership already exists or if all the
2744 * membership slots are full.
2745 */
2746 for (i = 0; i < imo->imo_num_memberships; ++i) {
2747 if (imo->imo_membership[i]->inm_ifp == ifp &&
2748 imo->imo_membership[i]->inm_addr.s_addr
2749 == mreq->imr_multiaddr.s_addr)
2750 break;
2751 }
2752 if (i < imo->imo_num_memberships) {
2753 error = EADDRINUSE;
2754 return error;
2755 }
2756 if (i == IP_MAX_MEMBERSHIPS) {
2757 error = ETOOMANYREFS;
2758 return error;
2759 }
2760 /*
2761 * Everything looks good; add a new record to the multicast
2762 * address list for the given interface.
2763 */
2764 if ((imo->imo_membership[i] =
2765 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
2766 error = ENOBUFS;
2767 return error;
2768 }
2769 ++imo->imo_num_memberships;
2770
2771 return error;
2772 }
2773
2774 /*
2775 * Drop membership of an IPv4 multicast.
2776 */
2777 __private_extern__ int
2778 ip_dropmembership(
2779 struct ip_moptions *imo,
2780 struct ip_mreq *mreq)
2781 {
2782 int error = 0;
2783 struct ifnet* ifp = NULL;
2784 int i;
2785
2786 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2787 error = EINVAL;
2788 return error;
2789 }
2790
2791 /*
2792 * If an interface address was specified, get a pointer
2793 * to its ifnet structure.
2794 */
2795 if (mreq->imr_interface.s_addr == INADDR_ANY)
2796 ifp = NULL;
2797 else {
2798 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2799 if (ifp == NULL) {
2800 error = EADDRNOTAVAIL;
2801 return error;
2802 }
2803 }
2804 /*
2805 * Find the membership in the membership array.
2806 */
2807 for (i = 0; i < imo->imo_num_memberships; ++i) {
2808 if ((ifp == NULL ||
2809 imo->imo_membership[i]->inm_ifp == ifp) &&
2810 imo->imo_membership[i]->inm_addr.s_addr ==
2811 mreq->imr_multiaddr.s_addr)
2812 break;
2813 }
2814 if (i == imo->imo_num_memberships) {
2815 error = EADDRNOTAVAIL;
2816 return error;
2817 }
2818 /*
2819 * Give up the multicast address record to which the
2820 * membership points.
2821 */
2822 in_delmulti(&imo->imo_membership[i]);
2823 /*
2824 * Remove the gap in the membership array.
2825 */
2826 for (++i; i < imo->imo_num_memberships; ++i)
2827 imo->imo_membership[i-1] = imo->imo_membership[i];
2828 --imo->imo_num_memberships;
2829
2830 return error;
2831 }
2832
2833 /*
2834 * Return the IP multicast options in response to user getsockopt().
2835 */
2836 static int
2837 ip_getmoptions(sopt, imo)
2838 struct sockopt *sopt;
2839 register struct ip_moptions *imo;
2840 {
2841 struct in_addr addr;
2842 struct in_ifaddr *ia;
2843 int error, optval;
2844 u_char coptval;
2845
2846 error = 0;
2847 switch (sopt->sopt_name) {
2848 #if MROUTING
2849 case IP_MULTICAST_VIF:
2850 if (imo != NULL)
2851 optval = imo->imo_multicast_vif;
2852 else
2853 optval = -1;
2854 error = sooptcopyout(sopt, &optval, sizeof optval);
2855 break;
2856 #endif /* MROUTING */
2857
2858 case IP_MULTICAST_IF:
2859 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2860 addr.s_addr = INADDR_ANY;
2861 else if (imo->imo_multicast_addr.s_addr) {
2862 /* return the value user has set */
2863 addr = imo->imo_multicast_addr;
2864 } else {
2865 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2866 addr.s_addr = (ia == NULL) ? INADDR_ANY
2867 : IA_SIN(ia)->sin_addr.s_addr;
2868 }
2869 error = sooptcopyout(sopt, &addr, sizeof addr);
2870 break;
2871
2872 case IP_MULTICAST_TTL:
2873 if (imo == 0)
2874 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2875 else
2876 optval = coptval = imo->imo_multicast_ttl;
2877 if (sopt->sopt_valsize == 1)
2878 error = sooptcopyout(sopt, &coptval, 1);
2879 else
2880 error = sooptcopyout(sopt, &optval, sizeof optval);
2881 break;
2882
2883 case IP_MULTICAST_LOOP:
2884 if (imo == 0)
2885 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2886 else
2887 optval = coptval = imo->imo_multicast_loop;
2888 if (sopt->sopt_valsize == 1)
2889 error = sooptcopyout(sopt, &coptval, 1);
2890 else
2891 error = sooptcopyout(sopt, &optval, sizeof optval);
2892 break;
2893
2894 default:
2895 error = ENOPROTOOPT;
2896 break;
2897 }
2898 return (error);
2899 }
2900
2901 /*
2902 * Discard the IP multicast options.
2903 */
2904 void
2905 ip_freemoptions(imo)
2906 register struct ip_moptions *imo;
2907 {
2908 register int i;
2909
2910 if (imo != NULL) {
2911 for (i = 0; i < imo->imo_num_memberships; ++i)
2912 in_delmulti(&imo->imo_membership[i]);
2913 FREE(imo, M_IPMOPTS);
2914 }
2915 }
2916
2917 /*
2918 * Routine called from ip_output() to loop back a copy of an IP multicast
2919 * packet to the input queue of a specified interface. Note that this
2920 * calls the output routine of the loopback "driver", but with an interface
2921 * pointer that might NOT be a loopback interface -- evil, but easier than
2922 * replicating that code here.
2923 */
2924 static void
2925 ip_mloopback(ifp, m, dst, hlen)
2926 struct ifnet *ifp;
2927 register struct mbuf *m;
2928 register struct sockaddr_in *dst;
2929 int hlen;
2930 {
2931 register struct ip *ip;
2932 struct mbuf *copym;
2933 int sw_csum = (apple_hwcksum_tx == 0);
2934
2935 copym = m_copy(m, 0, M_COPYALL);
2936 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2937 copym = m_pullup(copym, hlen);
2938
2939 if (copym == NULL)
2940 return;
2941
2942 /*
2943 * We don't bother to fragment if the IP length is greater
2944 * than the interface's MTU. Can this possibly matter?
2945 */
2946 ip = mtod(copym, struct ip *);
2947 HTONS(ip->ip_len);
2948 HTONS(ip->ip_off);
2949 ip->ip_sum = 0;
2950 ip->ip_sum = in_cksum(copym, hlen);
2951 /*
2952 * NB:
2953 * It's not clear whether there are any lingering
2954 * reentrancy problems in other areas which might
2955 * be exposed by using ip_input directly (in
2956 * particular, everything which modifies the packet
2957 * in-place). Yet another option is using the
2958 * protosw directly to deliver the looped back
2959 * packet. For the moment, we'll err on the side
2960 * of safety by using if_simloop().
2961 */
2962 #if 1 /* XXX */
2963 if (dst->sin_family != AF_INET) {
2964 printf("ip_mloopback: bad address family %d\n",
2965 dst->sin_family);
2966 dst->sin_family = AF_INET;
2967 }
2968 #endif
2969
2970 /*
2971 * Mark checksum as valid or calculate checksum for loopback.
2972 *
2973 * This is done this way because we have to embed the ifp of
2974 * the interface we will send the original copy of the packet
2975 * out on in the mbuf. ip_input will check if_hwassist of the
2976 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
2977 * The UDP checksum has not been calculated yet.
2978 */
2979 if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
2980 if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
2981 copym->m_pkthdr.csum_flags |=
2982 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2983 CSUM_IP_CHECKED | CSUM_IP_VALID;
2984 copym->m_pkthdr.csum_data = 0xffff;
2985 } else {
2986 NTOHS(ip->ip_len);
2987 in_delayed_cksum(copym);
2988 HTONS(ip->ip_len);
2989 }
2990 }
2991
2992 /*
2993 * TedW:
2994 * We need to send all loopback traffic down to dlil in case
2995 * a filter has tapped-in.
2996 */
2997
2998 /*
2999 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3000 * in ip_input(); we need the loopback ifp/dl_tag passed as args
3001 * to make the loopback driver compliant with the data link
3002 * requirements.
3003 */
3004 if (lo_ifp) {
3005 copym->m_pkthdr.rcvif = ifp;
3006 dlil_output(lo_ifp, PF_INET, copym, 0,
3007 (struct sockaddr *) dst, 0);
3008 } else {
3009 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
3010 m_freem(copym);
3011 }
3012 }
3013
3014 /*
3015 * Given a source IP address (and route, if available), determine the best
3016 * interface to send the packet from.
3017 */
3018 static struct ifaddr *
3019 in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
3020 {
3021 struct ifaddr *ifa = NULL;
3022 struct sockaddr src = { sizeof (struct sockaddr_in), AF_INET, { 0, } };
3023 struct ifnet *rt_ifp;
3024 char ip_src[16], ip_dst[16];
3025
3026 if (ip_select_srcif_debug) {
3027 (void) inet_ntop(AF_INET, &ip->ip_src.s_addr, ip_src,
3028 sizeof (ip_src));
3029 (void) inet_ntop(AF_INET, &ip->ip_dst.s_addr, ip_dst,
3030 sizeof (ip_dst));
3031 }
3032
3033 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
3034
3035 ((struct sockaddr_in *)&src)->sin_addr.s_addr = ip->ip_src.s_addr;
3036 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
3037
3038 /*
3039 * Given the source IP address, find a suitable source interface
3040 * to use for transmission; if the caller has specified a scope,
3041 * optimize the search by looking at the addresses only for that
3042 * interface. This is still suboptimal, however, as we need to
3043 * traverse the per-interface list.
3044 */
3045 if (ifscope != IFSCOPE_NONE || ro->ro_rt != NULL) {
3046 unsigned int scope = ifscope;
3047
3048 /*
3049 * If no scope is specified and the route is stale (pointing
3050 * to a defunct interface) use the current primary interface;
3051 * this happens when switching between interfaces configured
3052 * with the same IP address. Otherwise pick up the scope
3053 * information from the route; the ULP may have looked up a
3054 * correct route and we just need to verify it here and mark
3055 * it with the ROF_SRCIF_SELECTED flag below.
3056 */
3057 if (scope == IFSCOPE_NONE) {
3058 scope = rt_ifp->if_index;
3059 if (scope != get_primary_ifscope() &&
3060 ro->ro_rt->generation_id != route_generation)
3061 scope = get_primary_ifscope();
3062 }
3063
3064 ifa = ifa_ifwithaddr_scoped(&src, scope);
3065
3066 if (ip_select_srcif_debug && ifa != NULL) {
3067 if (ro->ro_rt != NULL) {
3068 printf("%s->%s ifscope %d->%d ifa_if %s%d "
3069 "ro_if %s%d\n", ip_src, ip_dst, ifscope,
3070 scope, ifa->ifa_ifp->if_name,
3071 ifa->ifa_ifp->if_unit, rt_ifp->if_name,
3072 rt_ifp->if_unit);
3073 } else {
3074 printf("%s->%s ifscope %d->%d ifa_if %s%d\n",
3075 ip_src, ip_dst, ifscope, scope,
3076 ifa->ifa_ifp->if_name,
3077 ifa->ifa_ifp->if_unit);
3078 }
3079 }
3080 }
3081
3082 /*
3083 * Slow path; search for an interface having the corresponding source
3084 * IP address if the scope was not specified by the caller, and:
3085 *
3086 * 1) There currently isn't any route, or,
3087 * 2) The interface used by the route does not own that source
3088 * IP address; in this case, the route will get blown away
3089 * and we'll do a more specific scoped search using the newly
3090 * found interface.
3091 */
3092 if (ifa == NULL && ifscope == IFSCOPE_NONE) {
3093 ifa = ifa_ifwithaddr(&src);
3094
3095 if (ip_select_srcif_debug && ifa != NULL) {
3096 printf("%s->%s ifscope %d ifa_if %s%d\n",
3097 ip_src, ip_dst, ifscope, ifa->ifa_ifp->if_name,
3098 ifa->ifa_ifp->if_unit);
3099 }
3100 }
3101
3102 /*
3103 * If there is a non-loopback route with the wrong interface, or if
3104 * there is no interface configured with such an address, blow it
3105 * away. Except for local/loopback, we look for one with a matching
3106 * interface scope/index.
3107 */
3108 if (ro->ro_rt != NULL &&
3109 (ifa == NULL || (ifa->ifa_ifp != rt_ifp && rt_ifp != lo_ifp) ||
3110 !(ro->ro_rt->rt_flags & RTF_UP))) {
3111 if (ip_select_srcif_debug) {
3112 if (ifa != NULL) {
3113 printf("%s->%s ifscope %d ro_if %s%d != "
3114 "ifa_if %s%d (cached route cleared)\n",
3115 ip_src, ip_dst, ifscope, rt_ifp->if_name,
3116 rt_ifp->if_unit, ifa->ifa_ifp->if_name,
3117 ifa->ifa_ifp->if_unit);
3118 } else {
3119 printf("%s->%s ifscope %d ro_if %s%d "
3120 "(no ifa_if found)\n",
3121 ip_src, ip_dst, ifscope, rt_ifp->if_name,
3122 rt_ifp->if_unit);
3123 }
3124 }
3125
3126 rtfree_locked(ro->ro_rt);
3127 ro->ro_rt = NULL;
3128 ro->ro_flags &= ~ROF_SRCIF_SELECTED;
3129
3130 /*
3131 * If the destination is IPv4 LLA and the route's interface
3132 * doesn't match the source interface, then the source IP
3133 * address is wrong; it most likely belongs to the primary
3134 * interface associated with the IPv4 LL subnet. Drop the
3135 * packet rather than letting it go out and return an error
3136 * to the ULP. This actually applies not only to IPv4 LL
3137 * but other shared subnets; for now we explicitly test only
3138 * for the former case and save the latter for future.
3139 */
3140 if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) &&
3141 !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) && ifa != NULL) {
3142 ifafree(ifa);
3143 ifa = NULL;
3144 }
3145 }
3146
3147 if (ip_select_srcif_debug && ifa == NULL) {
3148 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
3149 ip_src, ip_dst, ifscope);
3150 }
3151
3152 /*
3153 * If there is a route, mark it accordingly. If there isn't one,
3154 * we'll get here again during the next transmit (possibly with a
3155 * route) and the flag will get set at that point. For IPv4 LLA
3156 * destination, mark it only if the route has been fully resolved;
3157 * otherwise we want to come back here again when the route points
3158 * to the interface over which the ARP reply arrives on.
3159 */
3160 if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
3161 (ro->ro_rt->rt_gateway->sa_family == AF_LINK &&
3162 SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) {
3163 ro->ro_flags |= ROF_SRCIF_SELECTED;
3164 ro->ro_rt->generation_id = route_generation;
3165 }
3166
3167 return (ifa);
3168 }
3169
3170 /*
3171 * Handler for setting IP_FORCE_OUT_IFP or IP_BOUND_IF socket option.
3172 */
3173 static void
3174 ip_bindif(struct inpcb *inp, unsigned int ifscope)
3175 {
3176 /*
3177 * A zero interface scope value indicates an "unbind".
3178 * Otherwise, take in whatever value the app desires;
3179 * the app may already know the scope (or force itself
3180 * to such a scope) ahead of time before the interface
3181 * gets attached. It doesn't matter either way; any
3182 * route lookup from this point on will require an
3183 * exact match for the embedded interface scope.
3184 */
3185 inp->inp_boundif = ifscope;
3186 if (inp->inp_boundif == IFSCOPE_NONE)
3187 inp->inp_flags &= ~INP_BOUND_IF;
3188 else
3189 inp->inp_flags |= INP_BOUND_IF;
3190
3191 lck_mtx_lock(rt_mtx);
3192 /* Blow away any cached route in the PCB */
3193 if (inp->inp_route.ro_rt != NULL) {
3194 rtfree_locked(inp->inp_route.ro_rt);
3195 inp->inp_route.ro_rt = NULL;
3196 }
3197 lck_mtx_unlock(rt_mtx);
3198 }