]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
xnu-1504.9.17.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #define _IP_VHL
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <kern/locks.h>
81 #include <sys/sysctl.h>
82
83 #include <machine/endian.h>
84
85 #include <net/if.h>
86 #include <net/if_dl.h>
87 #include <net/route.h>
88
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/ip.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94 #include <netinet/ip_var.h>
95
96 #include <netinet/kpi_ipfilter_var.h>
97
98 #if CONFIG_MACF_NET
99 #include <security/mac_framework.h>
100 #endif
101
102 #include "faith.h"
103
104 #include <net/dlil.h>
105 #include <sys/kdebug.h>
106 #include <libkern/OSAtomic.h>
107
108 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
109 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
110 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
111 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
112
113 #define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
114
115 #if IPSEC
116 #include <netinet6/ipsec.h>
117 #include <netkey/key.h>
118 #if IPSEC_DEBUG
119 #include <netkey/key_debug.h>
120 #else
121 #define KEYDEBUG(lev,arg)
122 #endif
123 #endif /*IPSEC*/
124
125 #include <netinet/ip_fw.h>
126 #include <netinet/ip_divert.h>
127
128 #if DUMMYNET
129 #include <netinet/ip_dummynet.h>
130 #endif
131
132 #if PF
133 #include <net/pfvar.h>
134 #endif /* PF */
135
136 #if IPFIREWALL_FORWARD_DEBUG
137 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
138 (ntohl(a.s_addr)>>16)&0xFF,\
139 (ntohl(a.s_addr)>>8)&0xFF,\
140 (ntohl(a.s_addr))&0xFF);
141 #endif
142
143
144 u_short ip_id;
145
146 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
147 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
148 static void ip_mloopback(struct ifnet *, struct mbuf *,
149 struct sockaddr_in *, int);
150 static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
151 static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
152 static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
153
154 static void ip_out_cksum_stats(int, u_int32_t);
155 static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int);
156 static void ip_bindif(struct inpcb *, unsigned int);
157
158 int ip_createmoptions(struct ip_moptions **imop);
159 int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
160 int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
161 int ip_optcopy(struct ip *, struct ip *);
162 void in_delayed_cksum_offset(struct mbuf *, int );
163 void in_cksum_offset(struct mbuf* , size_t );
164
165 extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
166
167 extern struct protosw inetsw[];
168
169 extern struct ip_linklocal_stat ip_linklocal_stat;
170 extern lck_mtx_t *ip_mutex;
171
172 /* temporary: for testing */
173 #if IPSEC
174 extern int ipsec_bypass;
175 #endif
176
177 static int ip_maxchainsent = 0;
178 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
179 &ip_maxchainsent, 0, "use dlil_output_list");
180 #if DEBUG
181 static int forge_ce = 0;
182 SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW,
183 &forge_ce, 0, "Forge ECN CE");
184 #endif /* DEBUG */
185
186 static int ip_select_srcif_debug = 0;
187 SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW,
188 &ip_select_srcif_debug, 0, "log source interface selection debug info");
189
190 /*
191 * IP output. The packet in mbuf chain m contains a skeletal IP
192 * header (with len, off, ttl, proto, tos, src, dst).
193 * The mbuf chain containing the packet will be freed.
194 * The mbuf opt, if present, will not be freed.
195 */
196 int
197 ip_output(
198 struct mbuf *m0,
199 struct mbuf *opt,
200 struct route *ro,
201 int flags,
202 struct ip_moptions *imo,
203 struct ip_out_args *ipoa)
204 {
205 int error;
206 error = ip_output_list(m0, 0, opt, ro, flags, imo, ipoa);
207 return error;
208 }
209
210 /*
211 * Returns: 0 Success
212 * ENOMEM
213 * EADDRNOTAVAIL
214 * ENETUNREACH
215 * EHOSTUNREACH
216 * EACCES
217 * EMSGSIZE
218 * ENOBUFS
219 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
220 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
221 * key_spdacquire:??? [IPSEC]
222 * ipsec4_output:??? [IPSEC]
223 * <fr_checkp>:??? [firewall]
224 * ip_dn_io_ptr:??? [dummynet]
225 * dlil_output:??? [DLIL]
226 * dlil_output_list:??? [DLIL]
227 *
228 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
229 * only used as the error return from this function where one of
230 * these functions fails to return a policy.
231 */
232 int
233 ip_output_list(
234 struct mbuf *m0,
235 int packetchain,
236 struct mbuf *opt,
237 struct route *ro,
238 int flags,
239 struct ip_moptions *imo,
240 struct ip_out_args *ipoa
241 )
242 {
243 struct ip *ip;
244 struct ifnet *ifp = NULL;
245 struct mbuf *m = m0, **mppn = NULL;
246 int hlen = sizeof (struct ip);
247 int len = 0, off, error = 0;
248 struct sockaddr_in *dst = NULL;
249 struct in_ifaddr *ia = NULL, *src_ia = NULL;
250 int isbroadcast, sw_csum;
251 struct in_addr pkt_dst;
252 #if IPSEC
253 struct route iproute;
254 struct socket *so = NULL;
255 struct secpolicy *sp = NULL;
256 #endif
257 #if IPFIREWALL_FORWARD
258 int fwd_rewrite_src = 0;
259 #endif
260 #if IPFIREWALL
261 struct ip_fw_args args;
262 #endif
263 int didfilter = 0;
264 ipfilter_t inject_filter_ref = 0;
265 struct m_tag *tag;
266 struct route saved_route;
267 struct ip_out_args saved_ipoa;
268 struct mbuf * packetlist;
269 int pktcnt = 0, tso = 0;
270 unsigned int ifscope;
271 boolean_t select_srcif;
272
273 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
274
275 packetlist = m0;
276 #if IPFIREWALL
277 args.next_hop = NULL;
278 args.eh = NULL;
279 args.rule = NULL;
280 args.divert_rule = 0; /* divert cookie */
281 args.ipoa = NULL;
282
283 if (SLIST_EMPTY(&m0->m_pkthdr.tags))
284 goto ipfw_tags_done;
285
286 /* Grab info from mtags prepended to the chain */
287 #if DUMMYNET
288 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
289 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
290 struct dn_pkt_tag *dn_tag;
291
292 dn_tag = (struct dn_pkt_tag *)(tag+1);
293 args.rule = dn_tag->rule;
294 opt = NULL;
295 saved_route = dn_tag->ro;
296 ro = &saved_route;
297
298 imo = NULL;
299 dst = dn_tag->dn_dst;
300 ifp = dn_tag->ifp;
301 flags = dn_tag->flags;
302 saved_ipoa = dn_tag->ipoa;
303 ipoa = &saved_ipoa;
304
305 m_tag_delete(m0, tag);
306 }
307 #endif /* DUMMYNET */
308
309 #if IPDIVERT
310 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
311 KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
312 struct divert_tag *div_tag;
313
314 div_tag = (struct divert_tag *)(tag+1);
315 args.divert_rule = div_tag->cookie;
316
317 m_tag_delete(m0, tag);
318 }
319 #endif /* IPDIVERT */
320
321 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
322 KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
323 struct ip_fwd_tag *ipfwd_tag;
324
325 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
326 args.next_hop = ipfwd_tag->next_hop;
327
328 m_tag_delete(m0, tag);
329 }
330 ipfw_tags_done:
331 #endif /* IPFIREWALL */
332
333 m = m0;
334
335 #if DIAGNOSTIC
336 if ( !m || (m->m_flags & M_PKTHDR) != 0)
337 panic("ip_output no HDR");
338 if (!ro)
339 panic("ip_output no route, proto = %d",
340 mtod(m, struct ip *)->ip_p);
341 #endif
342
343 /*
344 * At present the IP_OUTARGS flag implies a request for IP to
345 * perform source interface selection. In the forwarding case,
346 * only the ifscope value is used, as source interface selection
347 * doesn't take place.
348 */
349 if (ip_doscopedroute && (flags & IP_OUTARGS)) {
350 select_srcif = !(flags & IP_FORWARDING);
351 ifscope = ipoa->ipoa_ifscope;
352 } else {
353 select_srcif = FALSE;
354 ifscope = IFSCOPE_NONE;
355 }
356
357 #if IPFIREWALL
358 if (args.rule != NULL) { /* dummynet already saw us */
359 ip = mtod(m, struct ip *);
360 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
361 if (ro->ro_rt != NULL) {
362 RT_LOCK_SPIN(ro->ro_rt);
363 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
364 if (ia)
365 ifaref(&ia->ia_ifa);
366 RT_UNLOCK(ro->ro_rt);
367 }
368 #if IPSEC
369 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
370 so = ipsec_getsocket(m);
371 (void)ipsec_setsocket(m, NULL);
372 }
373 #endif
374 goto sendit;
375 }
376 #endif /* IPFIREWALL */
377
378 #if IPSEC
379 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
380 so = ipsec_getsocket(m);
381 (void)ipsec_setsocket(m, NULL);
382 }
383 #endif
384 loopit:
385 /*
386 * No need to proccess packet twice if we've
387 * already seen it
388 */
389 if (!SLIST_EMPTY(&m->m_pkthdr.tags))
390 inject_filter_ref = ipf_get_inject_filter(m);
391 else
392 inject_filter_ref = 0;
393
394 if (opt) {
395 m = ip_insertoptions(m, opt, &len);
396 hlen = len;
397 }
398 ip = mtod(m, struct ip *);
399 #if IPFIREWALL
400 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
401 #else
402 pkt_dst = ip->ip_dst;
403 #endif
404
405 /*
406 * Fill in IP header.
407 */
408 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
409 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
410 ip->ip_off &= IP_DF;
411 #if RANDOM_IP_ID
412 ip->ip_id = ip_randomid();
413 #else
414 ip->ip_id = htons(ip_id++);
415 #endif
416 OSAddAtomic(1, &ipstat.ips_localout);
417 } else {
418 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
419 }
420
421 #if DEBUG
422 /* For debugging, we let the stack forge congestion */
423 if (forge_ce != 0 &&
424 ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 ||
425 (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) {
426 ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE;
427 forge_ce--;
428 }
429 #endif /* DEBUG */
430
431 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
432 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
433
434 dst = (struct sockaddr_in *)&ro->ro_dst;
435
436 /*
437 * If there is a cached route,
438 * check that it is to the same destination
439 * and is still up. If not, free it and try again.
440 * The address family should also be checked in case of sharing the
441 * cache with IPv6.
442 */
443
444 if (ro->ro_rt != NULL) {
445 if (ro->ro_rt->generation_id != route_generation &&
446 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) &&
447 (ip->ip_src.s_addr != INADDR_ANY)) {
448 src_ia = ifa_foraddr(ip->ip_src.s_addr);
449 if (src_ia == NULL) {
450 error = EADDRNOTAVAIL;
451 goto bad;
452 }
453 ifafree(&src_ia->ia_ifa);
454 }
455 /*
456 * Test rt_flags without holding rt_lock for performance
457 * reasons; if the route is down it will hopefully be
458 * caught by the layer below (since it uses this route
459 * as a hint) or during the next transmit.
460 */
461 if ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
462 dst->sin_family != AF_INET ||
463 dst->sin_addr.s_addr != pkt_dst.s_addr) {
464 rtfree(ro->ro_rt);
465 ro->ro_rt = NULL;
466 }
467 /*
468 * If we're doing source interface selection, we may not
469 * want to use this route; only synch up the generation
470 * count otherwise.
471 */
472 if (!select_srcif && ro->ro_rt != NULL &&
473 ro->ro_rt->generation_id != route_generation)
474 ro->ro_rt->generation_id = route_generation;
475 }
476 if (ro->ro_rt == NULL) {
477 bzero(dst, sizeof(*dst));
478 dst->sin_family = AF_INET;
479 dst->sin_len = sizeof(*dst);
480 dst->sin_addr = pkt_dst;
481 }
482 /*
483 * If routing to interface only,
484 * short circuit routing lookup.
485 */
486 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
487 #define sintosa(sin) ((struct sockaddr *)(sin))
488 if (flags & IP_ROUTETOIF) {
489 if (ia)
490 ifafree(&ia->ia_ifa);
491 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
492 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
493 OSAddAtomic(1, &ipstat.ips_noroute);
494 error = ENETUNREACH;
495 goto bad;
496 }
497 }
498 ifp = ia->ia_ifp;
499 ip->ip_ttl = 1;
500 isbroadcast = in_broadcast(dst->sin_addr, ifp);
501 } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) &&
502 imo != NULL && imo->imo_multicast_ifp != NULL) {
503 /*
504 * Bypass the normal routing lookup for multicast
505 * packets if the interface is specified.
506 */
507 ifp = imo->imo_multicast_ifp;
508 isbroadcast = 0;
509 if (ia != NULL)
510 ifafree(&ia->ia_ifa);
511
512 /* Macro takes reference on ia */
513 IFP_TO_IA(ifp, ia);
514 } else {
515 boolean_t cloneok = FALSE;
516 /*
517 * Perform source interface selection; the source IP address
518 * must belong to one of the addresses of the interface used
519 * by the route. For performance reasons, do this only if
520 * there is no route, or if the routing table has changed,
521 * or if we haven't done source interface selection on this
522 * route (for this PCB instance) before.
523 */
524 if (select_srcif && ip->ip_src.s_addr != INADDR_ANY &&
525 (ro->ro_rt == NULL || !(ro->ro_rt->rt_flags & RTF_UP) ||
526 ro->ro_rt->generation_id != route_generation ||
527 !(ro->ro_flags & ROF_SRCIF_SELECTED))) {
528 struct ifaddr *ifa;
529
530 /* Find the source interface */
531 ifa = in_selectsrcif(ip, ro, ifscope);
532
533 /*
534 * If the source address is spoofed (in the case
535 * of IP_RAWOUTPUT), or if this is destined for
536 * local/loopback, just let it go out using the
537 * interface of the route. Otherwise, there's no
538 * interface having such an address, so bail out.
539 */
540 if (ifa == NULL && !(flags & IP_RAWOUTPUT) &&
541 ifscope != lo_ifp->if_index) {
542 error = EADDRNOTAVAIL;
543 goto bad;
544 }
545
546 /*
547 * If the caller didn't explicitly specify the scope,
548 * pick it up from the source interface. If the cached
549 * route was wrong and was blown away as part of source
550 * interface selection, don't mask out RTF_PRCLONING
551 * since that route may have been allocated by the ULP,
552 * unless the IP header was created by the caller or
553 * the destination is IPv4 LLA. The check for the
554 * latter is needed because IPv4 LLAs are never scoped
555 * in the current implementation, and we don't want to
556 * replace the resolved IPv4 LLA route with one whose
557 * gateway points to that of the default gateway on
558 * the primary interface of the system.
559 */
560 if (ifa != NULL) {
561 if (ifscope == IFSCOPE_NONE)
562 ifscope = ifa->ifa_ifp->if_index;
563 ifafree(ifa);
564 cloneok = (!(flags & IP_RAWOUTPUT) &&
565 !(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))));
566 }
567 }
568
569 /*
570 * If this is the case, we probably don't want to allocate
571 * a protocol-cloned route since we didn't get one from the
572 * ULP. This lets TCP do its thing, while not burdening
573 * forwarding or ICMP with the overhead of cloning a route.
574 * Of course, we still want to do any cloning requested by
575 * the link layer, as this is probably required in all cases
576 * for correct operation (as it is for ARP).
577 */
578 if (ro->ro_rt == NULL) {
579 unsigned long ign = RTF_PRCLONING;
580 /*
581 * We make an exception here: if the destination
582 * address is INADDR_BROADCAST, allocate a protocol-
583 * cloned host route so that we end up with a route
584 * marked with the RTF_BROADCAST flag. Otherwise,
585 * we would end up referring to the default route,
586 * instead of creating a cloned host route entry.
587 * That would introduce inconsistencies between ULPs
588 * that allocate a route and those that don't. The
589 * RTF_BROADCAST route is important since we'd want
590 * to send out undirected IP broadcast packets using
591 * link-level broadcast address. Another exception
592 * is for ULP-created routes that got blown away by
593 * source interface selection (see above).
594 *
595 * These exceptions will no longer be necessary when
596 * the RTF_PRCLONING scheme is no longer present.
597 */
598 if (cloneok || dst->sin_addr.s_addr == INADDR_BROADCAST)
599 ign &= ~RTF_PRCLONING;
600
601 /*
602 * Loosen the route lookup criteria if the ifscope
603 * corresponds to the loopback interface; this is
604 * needed to support Application Layer Gateways
605 * listening on loopback, in conjunction with packet
606 * filter redirection rules. The final source IP
607 * address will be rewritten by the packet filter
608 * prior to the RFC1122 loopback check below.
609 */
610 if (ifscope == lo_ifp->if_index)
611 rtalloc_ign(ro, ign);
612 else
613 rtalloc_scoped_ign(ro, ign, ifscope);
614 }
615
616 if (ro->ro_rt == NULL) {
617 OSAddAtomic(1, &ipstat.ips_noroute);
618 error = EHOSTUNREACH;
619 goto bad;
620 }
621
622 if (ia)
623 ifafree(&ia->ia_ifa);
624 RT_LOCK_SPIN(ro->ro_rt);
625 ia = ifatoia(ro->ro_rt->rt_ifa);
626 if (ia)
627 ifaref(&ia->ia_ifa);
628 ifp = ro->ro_rt->rt_ifp;
629 ro->ro_rt->rt_use++;
630 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
631 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
632 if (ro->ro_rt->rt_flags & RTF_HOST) {
633 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
634 } else {
635 /* Become a regular mutex */
636 RT_CONVERT_LOCK(ro->ro_rt);
637 isbroadcast = in_broadcast(dst->sin_addr, ifp);
638 }
639 RT_UNLOCK(ro->ro_rt);
640 }
641
642 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
643 struct in_multi *inm;
644
645 m->m_flags |= M_MCAST;
646 /*
647 * IP destination address is multicast. Make sure "dst"
648 * still points to the address in "ro". (It may have been
649 * changed to point to a gateway address, above.)
650 */
651 dst = (struct sockaddr_in *)&ro->ro_dst;
652 /*
653 * See if the caller provided any multicast options
654 */
655 if (imo != NULL) {
656 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
657 if (imo->imo_multicast_ifp != NULL) {
658 ifp = imo->imo_multicast_ifp;
659 }
660 #if MROUTING
661 if (imo->imo_multicast_vif != -1 &&
662 ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
663 ip->ip_src.s_addr =
664 ip_mcast_src(imo->imo_multicast_vif);
665 #endif /* MROUTING */
666 } else
667 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
668 /*
669 * Confirm that the outgoing interface supports multicast.
670 */
671 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
672 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
673 OSAddAtomic(1, &ipstat.ips_noroute);
674 error = ENETUNREACH;
675 goto bad;
676 }
677 }
678 /*
679 * If source address not specified yet, use address
680 * of outgoing interface.
681 */
682 if (ip->ip_src.s_addr == INADDR_ANY) {
683 struct in_ifaddr *ia1;
684 lck_rw_lock_shared(in_ifaddr_rwlock);
685 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
686 if (ia1->ia_ifp == ifp) {
687 ip->ip_src = IA_SIN(ia1)->sin_addr;
688 break;
689 }
690 lck_rw_done(in_ifaddr_rwlock);
691 if (ip->ip_src.s_addr == INADDR_ANY) {
692 error = ENETUNREACH;
693 goto bad;
694 }
695 }
696
697 ifnet_lock_shared(ifp);
698 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
699 ifnet_lock_done(ifp);
700 if (inm != NULL &&
701 (imo == NULL || imo->imo_multicast_loop)) {
702 /*
703 * If we belong to the destination multicast group
704 * on the outgoing interface, and the caller did not
705 * forbid loopback, loop back a copy.
706 */
707 if (!TAILQ_EMPTY(&ipv4_filters)) {
708 struct ipfilter *filter;
709 int seen = (inject_filter_ref == 0);
710 struct ipf_pktopts *ippo = 0, ipf_pktopts;
711
712 if (imo) {
713 ippo = &ipf_pktopts;
714 ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
715 ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
716 ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
717 }
718
719 ipf_ref();
720
721 /* 4135317 - always pass network byte order to filter */
722
723 #if BYTE_ORDER != BIG_ENDIAN
724 HTONS(ip->ip_len);
725 HTONS(ip->ip_off);
726 #endif
727
728 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
729 if (seen == 0) {
730 if ((struct ipfilter *)inject_filter_ref == filter)
731 seen = 1;
732 } else if (filter->ipf_filter.ipf_output) {
733 errno_t result;
734 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
735 if (result == EJUSTRETURN) {
736 ipf_unref();
737 goto done;
738 }
739 if (result != 0) {
740 ipf_unref();
741 goto bad;
742 }
743 }
744 }
745
746 /* set back to host byte order */
747 ip = mtod(m, struct ip *);
748
749 #if BYTE_ORDER != BIG_ENDIAN
750 NTOHS(ip->ip_len);
751 NTOHS(ip->ip_off);
752 #endif
753
754 ipf_unref();
755 didfilter = 1;
756 }
757 ip_mloopback(ifp, m, dst, hlen);
758 }
759 #if MROUTING
760 else {
761 /*
762 * If we are acting as a multicast router, perform
763 * multicast forwarding as if the packet had just
764 * arrived on the interface to which we are about
765 * to send. The multicast forwarding function
766 * recursively calls this function, using the
767 * IP_FORWARDING flag to prevent infinite recursion.
768 *
769 * Multicasts that are looped back by ip_mloopback(),
770 * above, will be forwarded by the ip_input() routine,
771 * if necessary.
772 */
773 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
774 /*
775 * Check if rsvp daemon is running. If not, don't
776 * set ip_moptions. This ensures that the packet
777 * is multicast and not just sent down one link
778 * as prescribed by rsvpd.
779 */
780 if (!rsvp_on)
781 imo = NULL;
782 if (ip_mforward(ip, ifp, m, imo) != 0) {
783 m_freem(m);
784 goto done;
785 }
786 }
787 }
788 #endif /* MROUTING */
789
790 /*
791 * Multicasts with a time-to-live of zero may be looped-
792 * back, above, but must not be transmitted on a network.
793 * Also, multicasts addressed to the loopback interface
794 * are not sent -- the above call to ip_mloopback() will
795 * loop back a copy if this host actually belongs to the
796 * destination group on the loopback interface.
797 */
798 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
799 m_freem(m);
800 goto done;
801 }
802
803 goto sendit;
804 }
805 #ifndef notdef
806 /*
807 * If source address not specified yet, use address
808 * of outgoing interface.
809 */
810 if (ip->ip_src.s_addr == INADDR_ANY) {
811 ip->ip_src = IA_SIN(ia)->sin_addr;
812 #if IPFIREWALL_FORWARD
813 /* Keep note that we did this - if the firewall changes
814 * the next-hop, our interface may change, changing the
815 * default source IP. It's a shame so much effort happens
816 * twice. Oh well.
817 */
818 fwd_rewrite_src++;
819 #endif /* IPFIREWALL_FORWARD */
820 }
821 #endif /* notdef */
822
823 /*
824 * Look for broadcast address and
825 * and verify user is allowed to send
826 * such a packet.
827 */
828 if (isbroadcast) {
829 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
830 error = EADDRNOTAVAIL;
831 goto bad;
832 }
833 if ((flags & IP_ALLOWBROADCAST) == 0) {
834 error = EACCES;
835 goto bad;
836 }
837 /* don't allow broadcast messages to be fragmented */
838 if ((u_short)ip->ip_len > ifp->if_mtu) {
839 error = EMSGSIZE;
840 goto bad;
841 }
842 m->m_flags |= M_BCAST;
843 } else {
844 m->m_flags &= ~M_BCAST;
845 }
846
847 sendit:
848 #if PF
849 /* Invoke outbound packet filter */
850 if (pf_af_hook(ifp, mppn, &m, AF_INET, FALSE) != 0) {
851 if (packetlist == m0) {
852 packetlist = m;
853 mppn = NULL;
854 }
855 if (m != NULL) {
856 m0 = m;
857 /* Next packet in the chain */
858 goto loopit;
859 } else if (packetlist != NULL) {
860 /* No more packet; send down the chain */
861 goto sendchain;
862 }
863 /* Nothing left; we're done */
864 goto done;
865 }
866 m0 = m;
867 ip = mtod(m, struct ip *);
868 pkt_dst = ip->ip_dst;
869 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
870 #endif /* PF */
871 /*
872 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
873 */
874 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
875 ip_linklocal_stat.iplls_out_total++;
876 if (ip->ip_ttl != MAXTTL) {
877 ip_linklocal_stat.iplls_out_badttl++;
878 ip->ip_ttl = MAXTTL;
879 }
880 }
881
882 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
883 struct ipfilter *filter;
884 int seen = (inject_filter_ref == 0);
885
886 /* Check that a TSO frame isn't passed to a filter.
887 * This could happen if a filter is inserted while
888 * TCP is sending the TSO packet.
889 */
890 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
891 error = EMSGSIZE;
892 goto bad;
893 }
894
895 ipf_ref();
896
897 /* 4135317 - always pass network byte order to filter */
898
899 #if BYTE_ORDER != BIG_ENDIAN
900 HTONS(ip->ip_len);
901 HTONS(ip->ip_off);
902 #endif
903
904 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
905 if (seen == 0) {
906 if ((struct ipfilter *)inject_filter_ref == filter)
907 seen = 1;
908 } else if (filter->ipf_filter.ipf_output) {
909 errno_t result;
910 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
911 if (result == EJUSTRETURN) {
912 ipf_unref();
913 goto done;
914 }
915 if (result != 0) {
916 ipf_unref();
917 goto bad;
918 }
919 }
920 }
921
922 /* set back to host byte order */
923 ip = mtod(m, struct ip *);
924
925 #if BYTE_ORDER != BIG_ENDIAN
926 NTOHS(ip->ip_len);
927 NTOHS(ip->ip_off);
928 #endif
929
930 ipf_unref();
931 }
932
933 #if IPSEC
934 /* temporary for testing only: bypass ipsec alltogether */
935
936 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
937 goto skip_ipsec;
938
939 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
940
941
942 /* get SP for this packet */
943 if (so == NULL)
944 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
945 else
946 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
947
948 if (sp == NULL) {
949 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
950 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
951 goto bad;
952 }
953
954 error = 0;
955
956 /* check policy */
957 switch (sp->policy) {
958 case IPSEC_POLICY_DISCARD:
959 case IPSEC_POLICY_GENERATE:
960 /*
961 * This packet is just discarded.
962 */
963 IPSEC_STAT_INCREMENT(ipsecstat.out_polvio);
964 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
965 goto bad;
966
967 case IPSEC_POLICY_BYPASS:
968 case IPSEC_POLICY_NONE:
969 /* no need to do IPsec. */
970 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
971 goto skip_ipsec;
972
973 case IPSEC_POLICY_IPSEC:
974 if (sp->req == NULL) {
975 /* acquire a policy */
976 error = key_spdacquire(sp);
977 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
978 goto bad;
979 }
980 break;
981
982 case IPSEC_POLICY_ENTRUST:
983 default:
984 printf("ip_output: Invalid policy found. %d\n", sp->policy);
985 }
986 {
987 struct ipsec_output_state state;
988 bzero(&state, sizeof(state));
989 state.m = m;
990 if (flags & IP_ROUTETOIF) {
991 state.ro = &iproute;
992 bzero(&iproute, sizeof(iproute));
993 } else
994 state.ro = ro;
995 state.dst = (struct sockaddr *)dst;
996
997 ip->ip_sum = 0;
998
999 /*
1000 * XXX
1001 * delayed checksums are not currently compatible with IPsec
1002 */
1003 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1004 in_delayed_cksum(m);
1005 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1006 }
1007
1008
1009 #if BYTE_ORDER != BIG_ENDIAN
1010 HTONS(ip->ip_len);
1011 HTONS(ip->ip_off);
1012 #endif
1013
1014 error = ipsec4_output(&state, sp, flags);
1015
1016 m0 = m = state.m;
1017
1018 if (flags & IP_ROUTETOIF) {
1019 /*
1020 * if we have tunnel mode SA, we may need to ignore
1021 * IP_ROUTETOIF.
1022 */
1023 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
1024 flags &= ~IP_ROUTETOIF;
1025 ro = state.ro;
1026 }
1027 } else
1028 ro = state.ro;
1029
1030 dst = (struct sockaddr_in *)state.dst;
1031 if (error) {
1032 /* mbuf is already reclaimed in ipsec4_output. */
1033 m0 = NULL;
1034 switch (error) {
1035 case EHOSTUNREACH:
1036 case ENETUNREACH:
1037 case EMSGSIZE:
1038 case ENOBUFS:
1039 case ENOMEM:
1040 break;
1041 default:
1042 printf("ip4_output (ipsec): error code %d\n", error);
1043 /*fall through*/
1044 case ENOENT:
1045 /* don't show these error codes to the user */
1046 error = 0;
1047 break;
1048 }
1049 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
1050 goto bad;
1051 }
1052 }
1053
1054 /* be sure to update variables that are affected by ipsec4_output() */
1055 ip = mtod(m, struct ip *);
1056
1057 #ifdef _IP_VHL
1058 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1059 #else
1060 hlen = ip->ip_hl << 2;
1061 #endif
1062 /* Check that there wasn't a route change and src is still valid */
1063 if (ro->ro_rt != NULL && ro->ro_rt->generation_id != route_generation) {
1064 if ((src_ia = ifa_foraddr(ip->ip_src.s_addr)) == NULL &&
1065 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
1066 error = EADDRNOTAVAIL;
1067 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1068 5,0,0,0,0);
1069 goto bad;
1070 }
1071 rtfree(ro->ro_rt);
1072 ro->ro_rt = NULL;
1073 if (src_ia != NULL)
1074 ifafree(&src_ia->ia_ifa);
1075 }
1076
1077 if (ro->ro_rt == NULL) {
1078 if ((flags & IP_ROUTETOIF) == 0) {
1079 printf("ip_output: can't update route after "
1080 "IPsec processing\n");
1081 error = EHOSTUNREACH; /*XXX*/
1082 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1083 6,0,0,0,0);
1084 goto bad;
1085 }
1086 } else {
1087 if (ia)
1088 ifafree(&ia->ia_ifa);
1089 RT_LOCK_SPIN(ro->ro_rt);
1090 ia = ifatoia(ro->ro_rt->rt_ifa);
1091 if (ia)
1092 ifaref(&ia->ia_ifa);
1093 ifp = ro->ro_rt->rt_ifp;
1094 RT_UNLOCK(ro->ro_rt);
1095 }
1096
1097 /* make it flipped, again. */
1098
1099 #if BYTE_ORDER != BIG_ENDIAN
1100 NTOHS(ip->ip_len);
1101 NTOHS(ip->ip_off);
1102 #endif
1103
1104 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
1105
1106 /* Pass to filters again */
1107 if (!TAILQ_EMPTY(&ipv4_filters)) {
1108 struct ipfilter *filter;
1109
1110 /* Check that a TSO frame isn't passed to a filter.
1111 * This could happen if a filter is inserted while
1112 * TCP is sending the TSO packet.
1113 */
1114 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1115 error = EMSGSIZE;
1116 goto bad;
1117 }
1118
1119 ipf_ref();
1120
1121 /* 4135317 - always pass network byte order to filter */
1122
1123 #if BYTE_ORDER != BIG_ENDIAN
1124 HTONS(ip->ip_len);
1125 HTONS(ip->ip_off);
1126 #endif
1127
1128 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
1129 if (filter->ipf_filter.ipf_output) {
1130 errno_t result;
1131 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
1132 if (result == EJUSTRETURN) {
1133 ipf_unref();
1134 goto done;
1135 }
1136 if (result != 0) {
1137 ipf_unref();
1138 goto bad;
1139 }
1140 }
1141 }
1142
1143 /* set back to host byte order */
1144 ip = mtod(m, struct ip *);
1145
1146 #if BYTE_ORDER != BIG_ENDIAN
1147 NTOHS(ip->ip_len);
1148 NTOHS(ip->ip_off);
1149 #endif
1150
1151 ipf_unref();
1152 }
1153 skip_ipsec:
1154 #endif /*IPSEC*/
1155
1156 #if IPFIREWALL
1157 /*
1158 * IpHack's section.
1159 * - Xlate: translate packet's addr/port (NAT).
1160 * - Firewall: deny/allow/etc.
1161 * - Wrap: fake packet's addr/port <unimpl.>
1162 * - Encapsulate: put it in another IP and send out. <unimp.>
1163 */
1164 if (fr_checkp) {
1165 struct mbuf *m1 = m;
1166
1167 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
1168 goto done;
1169 }
1170 ip = mtod(m0 = m = m1, struct ip *);
1171 }
1172
1173 /*
1174 * Check with the firewall...
1175 * but not if we are already being fwd'd from a firewall.
1176 */
1177 if (fw_enable && IPFW_LOADED && !args.next_hop) {
1178 struct sockaddr_in *old = dst;
1179
1180 args.m = m;
1181 args.next_hop = dst;
1182 args.oif = ifp;
1183 off = ip_fw_chk_ptr(&args);
1184 m = args.m;
1185 dst = args.next_hop;
1186
1187 /*
1188 * On return we must do the following:
1189 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1190 * 1<=off<= 0xffff -> DIVERT
1191 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1192 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1193 * dst != old -> IPFIREWALL_FORWARD
1194 * off==0, dst==old -> accept
1195 * If some of the above modules is not compiled in, then
1196 * we should't have to check the corresponding condition
1197 * (because the ipfw control socket should not accept
1198 * unsupported rules), but better play safe and drop
1199 * packets in case of doubt.
1200 */
1201 m0 = m;
1202 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
1203 if (m)
1204 m_freem(m);
1205 error = EACCES ;
1206 goto done ;
1207 }
1208 ip = mtod(m, struct ip *);
1209
1210 if (off == 0 && dst == old) {/* common case */
1211 goto pass ;
1212 }
1213 #if DUMMYNET
1214 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
1215 /*
1216 * pass the pkt to dummynet. Need to include
1217 * pipe number, m, ifp, ro, dst because these are
1218 * not recomputed in the next pass.
1219 * All other parameters have been already used and
1220 * so they are not needed anymore.
1221 * XXX note: if the ifp or ro entry are deleted
1222 * while a pkt is in dummynet, we are in trouble!
1223 */
1224 args.ro = ro;
1225 args.dst = dst;
1226 args.flags = flags;
1227 if (flags & IP_OUTARGS)
1228 args.ipoa = ipoa;
1229
1230 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
1231 &args);
1232 goto done;
1233 }
1234 #endif /* DUMMYNET */
1235 #if IPDIVERT
1236 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
1237 struct mbuf *clone = NULL;
1238
1239 /* Clone packet if we're doing a 'tee' */
1240 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
1241 clone = m_dup(m, M_DONTWAIT);
1242 /*
1243 * XXX
1244 * delayed checksums are not currently compatible
1245 * with divert sockets.
1246 */
1247 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1248 in_delayed_cksum(m);
1249 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1250 }
1251
1252 /* Restore packet header fields to original values */
1253
1254 #if BYTE_ORDER != BIG_ENDIAN
1255 HTONS(ip->ip_len);
1256 HTONS(ip->ip_off);
1257 #endif
1258
1259 /* Deliver packet to divert input routine */
1260 divert_packet(m, 0, off & 0xffff, args.divert_rule);
1261
1262 /* If 'tee', continue with original packet */
1263 if (clone != NULL) {
1264 m0 = m = clone;
1265 ip = mtod(m, struct ip *);
1266 goto pass;
1267 }
1268 goto done;
1269 }
1270 #endif
1271
1272 #if IPFIREWALL_FORWARD
1273 /* Here we check dst to make sure it's directly reachable on the
1274 * interface we previously thought it was.
1275 * If it isn't (which may be likely in some situations) we have
1276 * to re-route it (ie, find a route for the next-hop and the
1277 * associated interface) and set them here. This is nested
1278 * forwarding which in most cases is undesirable, except where
1279 * such control is nigh impossible. So we do it here.
1280 * And I'm babbling.
1281 */
1282 if (off == 0 && old != dst) {
1283 struct in_ifaddr *ia_fw;
1284
1285 /* It's changed... */
1286 /* There must be a better way to do this next line... */
1287 static struct route sro_fwd, *ro_fwd = &sro_fwd;
1288 #if IPFIREWALL_FORWARD_DEBUG
1289 printf("IPFIREWALL_FORWARD: New dst ip: ");
1290 print_ip(dst->sin_addr);
1291 printf("\n");
1292 #endif
1293 /*
1294 * We need to figure out if we have been forwarded
1295 * to a local socket. If so then we should somehow
1296 * "loop back" to ip_input, and get directed to the
1297 * PCB as if we had received this packet. This is
1298 * because it may be dificult to identify the packets
1299 * you want to forward until they are being output
1300 * and have selected an interface. (e.g. locally
1301 * initiated packets) If we used the loopback inteface,
1302 * we would not be able to control what happens
1303 * as the packet runs through ip_input() as
1304 * it is done through a ISR.
1305 */
1306 lck_rw_lock_shared(in_ifaddr_rwlock);
1307 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1308 /*
1309 * If the addr to forward to is one
1310 * of ours, we pretend to
1311 * be the destination for this packet.
1312 */
1313 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
1314 dst->sin_addr.s_addr)
1315 break;
1316 }
1317 lck_rw_done(in_ifaddr_rwlock);
1318 if (ia_fw) {
1319 /* tell ip_input "dont filter" */
1320 struct m_tag *fwd_tag;
1321 struct ip_fwd_tag *ipfwd_tag;
1322
1323 fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID,
1324 KERNEL_TAG_TYPE_IPFORWARD,
1325 sizeof (*ipfwd_tag), M_NOWAIT);
1326 if (fwd_tag == NULL) {
1327 error = ENOBUFS;
1328 goto bad;
1329 }
1330
1331 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
1332 ipfwd_tag->next_hop = args.next_hop;
1333
1334 m_tag_prepend(m, fwd_tag);
1335
1336 if (m->m_pkthdr.rcvif == NULL)
1337 m->m_pkthdr.rcvif = ifunit("lo0");
1338 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1339 m->m_pkthdr.csum_flags) == 0) {
1340 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1341 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1342 m->m_pkthdr.csum_flags |=
1343 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1344 m->m_pkthdr.csum_data = 0xffff;
1345 }
1346 m->m_pkthdr.csum_flags |=
1347 CSUM_IP_CHECKED | CSUM_IP_VALID;
1348 }
1349 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1350 in_delayed_cksum(m);
1351 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1352 ip->ip_sum = in_cksum(m, hlen);
1353 }
1354
1355 #if BYTE_ORDER != BIG_ENDIAN
1356 HTONS(ip->ip_len);
1357 HTONS(ip->ip_off);
1358 #endif
1359
1360 /* we need to call dlil_output to run filters
1361 * and resync to avoid recursion loops.
1362 */
1363 if (lo_ifp) {
1364 dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
1365 }
1366 else {
1367 printf("ip_output: no loopback ifp for forwarding!!!\n");
1368 }
1369 goto done;
1370 }
1371 /* Some of the logic for this was
1372 * nicked from above.
1373 *
1374 * This rewrites the cached route in a local PCB.
1375 * Is this what we want to do?
1376 */
1377 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1378
1379 ro_fwd->ro_rt = NULL;
1380 rtalloc_ign(ro_fwd, RTF_PRCLONING);
1381
1382 if (ro_fwd->ro_rt == NULL) {
1383 OSAddAtomic(1, &ipstat.ips_noroute);
1384 error = EHOSTUNREACH;
1385 goto bad;
1386 }
1387
1388 RT_LOCK_SPIN(ro_fwd->ro_rt);
1389 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
1390 if (ia_fw != NULL)
1391 ifaref(&ia_fw->ia_ifa);
1392 ifp = ro_fwd->ro_rt->rt_ifp;
1393 ro_fwd->ro_rt->rt_use++;
1394 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
1395 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
1396 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) {
1397 isbroadcast =
1398 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
1399 } else {
1400 /* Become a regular mutex */
1401 RT_CONVERT_LOCK(ro_fwd->ro_rt);
1402 isbroadcast = in_broadcast(dst->sin_addr, ifp);
1403 }
1404 RT_UNLOCK(ro_fwd->ro_rt);
1405 rtfree(ro->ro_rt);
1406 ro->ro_rt = ro_fwd->ro_rt;
1407 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
1408
1409 /*
1410 * If we added a default src ip earlier,
1411 * which would have been gotten from the-then
1412 * interface, do it again, from the new one.
1413 */
1414 if (ia_fw != NULL) {
1415 if (fwd_rewrite_src)
1416 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
1417 ifafree(&ia_fw->ia_ifa);
1418 }
1419 goto pass ;
1420 }
1421 #endif /* IPFIREWALL_FORWARD */
1422 /*
1423 * if we get here, none of the above matches, and
1424 * we have to drop the pkt
1425 */
1426 m_freem(m);
1427 error = EACCES; /* not sure this is the right error msg */
1428 goto done;
1429 }
1430 #endif /* IPFIREWALL */
1431
1432 pass:
1433 #if __APPLE__
1434 /* Do not allow loopback address to wind up on a wire */
1435 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1436 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1437 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
1438 OSAddAtomic(1, &ipstat.ips_badaddr);
1439 m_freem(m);
1440 /*
1441 * Do not simply drop the packet just like a firewall -- we want the
1442 * the application to feel the pain.
1443 * Return ENETUNREACH like ip6_output does in some similar cases.
1444 * This can startle the otherwise clueless process that specifies
1445 * loopback as the source address.
1446 */
1447 error = ENETUNREACH;
1448 goto done;
1449 }
1450 #endif
1451 m->m_pkthdr.csum_flags |= CSUM_IP;
1452 tso = (ifp->if_hwassist & IFNET_TSO_IPV4) && (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4);
1453
1454 sw_csum = m->m_pkthdr.csum_flags
1455 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1456
1457 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1458 /*
1459 * Special case code for GMACE
1460 * frames that can be checksumed by GMACE SUM16 HW:
1461 * frame >64, no fragments, no UDP
1462 */
1463 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1464 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1465 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1466 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1467 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
1468 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
1469 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1470 m->m_pkthdr.csum_data += offset;
1471 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1472 }
1473 else {
1474 /* let the software handle any UDP or TCP checksums */
1475 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1476 }
1477 } else if (apple_hwcksum_tx == 0) {
1478 sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
1479 m->m_pkthdr.csum_flags;
1480 }
1481
1482 if (sw_csum & CSUM_DELAY_DATA) {
1483 in_delayed_cksum(m);
1484 sw_csum &= ~CSUM_DELAY_DATA;
1485 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1486 }
1487
1488 if (apple_hwcksum_tx != 0) {
1489 m->m_pkthdr.csum_flags &=
1490 IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1491 } else {
1492 m->m_pkthdr.csum_flags = 0;
1493 }
1494
1495 /*
1496 * If small enough for interface, or the interface will take
1497 * care of the fragmentation for us, can just send directly.
1498 */
1499 if ((u_short)ip->ip_len <= ifp->if_mtu || tso ||
1500 ifp->if_hwassist & CSUM_FRAGMENT) {
1501 if (tso)
1502 m->m_pkthdr.csum_flags |= CSUM_TSO_IPV4;
1503
1504
1505 #if BYTE_ORDER != BIG_ENDIAN
1506 HTONS(ip->ip_len);
1507 HTONS(ip->ip_off);
1508 #endif
1509
1510 ip->ip_sum = 0;
1511 if (sw_csum & CSUM_DELAY_IP) {
1512 ip->ip_sum = in_cksum(m, hlen);
1513 }
1514
1515 #ifndef __APPLE__
1516 /* Record statistics for this interface address. */
1517 if (!(flags & IP_FORWARDING) && ia != NULL) {
1518 ia->ia_ifa.if_opackets++;
1519 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1520 }
1521 #endif
1522
1523 #if IPSEC
1524 /* clean ipsec history once it goes out of the node */
1525 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1526 ipsec_delaux(m);
1527 #endif
1528 if (packetchain == 0) {
1529 error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
1530 (struct sockaddr *)dst);
1531 goto done;
1532 }
1533 else { /* packet chaining allows us to reuse the route for all packets */
1534 mppn = &m->m_nextpkt;
1535 m = m->m_nextpkt;
1536 if (m == NULL) {
1537 #if PF
1538 sendchain:
1539 #endif /* PF */
1540 if (pktcnt > ip_maxchainsent)
1541 ip_maxchainsent = pktcnt;
1542 //send
1543 error = ifnet_output(ifp, PF_INET, packetlist,
1544 ro->ro_rt, (struct sockaddr *)dst);
1545 pktcnt = 0;
1546 goto done;
1547
1548 }
1549 m0 = m;
1550 pktcnt++;
1551 goto loopit;
1552 }
1553 }
1554 /*
1555 * Too large for interface; fragment if possible.
1556 * Must be able to put at least 8 bytes per fragment.
1557 */
1558
1559 if (ip->ip_off & IP_DF || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
1560 error = EMSGSIZE;
1561 /*
1562 * This case can happen if the user changed the MTU
1563 *
1564 * of an interface after enabling IP on it. Because
1565 * most netifs don't keep track of routes pointing to
1566 * them, there is no way for one to update all its
1567 * routes when the MTU is changed.
1568 */
1569 RT_LOCK_SPIN(ro->ro_rt);
1570 if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1571 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1572 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1573 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1574 }
1575 RT_UNLOCK(ro->ro_rt);
1576 OSAddAtomic(1, &ipstat.ips_cantfrag);
1577 goto bad;
1578 }
1579
1580 error = ip_fragment(m, ifp, ifp->if_mtu, sw_csum);
1581 if (error != 0) {
1582 m0 = m = NULL;
1583 goto bad;
1584 }
1585
1586 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1587 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1588
1589 for (m = m0; m; m = m0) {
1590 m0 = m->m_nextpkt;
1591 m->m_nextpkt = 0;
1592 #if IPSEC
1593 /* clean ipsec history once it goes out of the node */
1594 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1595 ipsec_delaux(m);
1596 #endif
1597 if (error == 0) {
1598 #ifndef __APPLE__
1599 /* Record statistics for this interface address. */
1600 if (ia != NULL) {
1601 ia->ia_ifa.if_opackets++;
1602 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1603 }
1604 #endif
1605 if ((packetchain != 0) && (pktcnt > 0))
1606 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
1607 error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
1608 (struct sockaddr *)dst);
1609 } else
1610 m_freem(m);
1611 }
1612
1613 if (error == 0)
1614 OSAddAtomic(1, &ipstat.ips_fragmented);
1615
1616 done:
1617 if (ia) {
1618 ifafree(&ia->ia_ifa);
1619 ia = NULL;
1620 }
1621 #if IPSEC
1622 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
1623 if (ro == &iproute && ro->ro_rt) {
1624 rtfree(ro->ro_rt);
1625 ro->ro_rt = NULL;
1626 }
1627 if (sp != NULL) {
1628 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1629 printf("DP ip_output call free SP:%x\n", sp));
1630 key_freesp(sp, KEY_SADB_UNLOCKED);
1631 }
1632 }
1633 #endif /* IPSEC */
1634
1635 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1636 return (error);
1637 bad:
1638 m_freem(m0);
1639 goto done;
1640 }
1641
1642 int
1643 ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum)
1644 {
1645 struct ip *ip, *mhip;
1646 int len, hlen, mhlen, firstlen, off, error = 0;
1647 struct mbuf **mnext = &m->m_nextpkt, *m0;
1648 int nfrags = 1;
1649
1650 ip = mtod(m, struct ip *);
1651 #ifdef _IP_VHL
1652 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1653 #else
1654 hlen = ip->ip_hl << 2;
1655 #endif
1656
1657 firstlen = len = (mtu - hlen) &~ 7;
1658 if (len < 8) {
1659 m_freem(m);
1660 return (EMSGSIZE);
1661 }
1662
1663 /*
1664 * if the interface will not calculate checksums on
1665 * fragmented packets, then do it here.
1666 */
1667 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1668 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1669 in_delayed_cksum(m);
1670 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1671 }
1672
1673 /*
1674 * Loop through length of segment after first fragment,
1675 * make new header and copy data of each part and link onto chain.
1676 */
1677 m0 = m;
1678 mhlen = sizeof (struct ip);
1679 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
1680 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1681 if (m == 0) {
1682 error = ENOBUFS;
1683 OSAddAtomic(1, &ipstat.ips_odropped);
1684 goto sendorfree;
1685 }
1686 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1687 m->m_data += max_linkhdr;
1688 mhip = mtod(m, struct ip *);
1689 *mhip = *ip;
1690 if (hlen > sizeof (struct ip)) {
1691 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1692 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1693 }
1694 m->m_len = mhlen;
1695 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1696 if (ip->ip_off & IP_MF)
1697 mhip->ip_off |= IP_MF;
1698 if (off + len >= (u_short)ip->ip_len)
1699 len = (u_short)ip->ip_len - off;
1700 else
1701 mhip->ip_off |= IP_MF;
1702 mhip->ip_len = htons((u_short)(len + mhlen));
1703 m->m_next = m_copy(m0, off, len);
1704 if (m->m_next == 0) {
1705 (void) m_free(m);
1706 error = ENOBUFS; /* ??? */
1707 OSAddAtomic(1, &ipstat.ips_odropped);
1708 goto sendorfree;
1709 }
1710 m->m_pkthdr.len = mhlen + len;
1711 m->m_pkthdr.rcvif = 0;
1712 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
1713 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
1714 #if CONFIG_MACF_NET
1715 mac_netinet_fragment(m0, m);
1716 #endif
1717
1718 #if BYTE_ORDER != BIG_ENDIAN
1719 HTONS(mhip->ip_off);
1720 #endif
1721
1722 mhip->ip_sum = 0;
1723 if (sw_csum & CSUM_DELAY_IP) {
1724 mhip->ip_sum = in_cksum(m, mhlen);
1725 }
1726 *mnext = m;
1727 mnext = &m->m_nextpkt;
1728 nfrags++;
1729 }
1730 OSAddAtomic(nfrags, &ipstat.ips_ofragments);
1731
1732 /* set first/last markers for fragment chain */
1733 m->m_flags |= M_LASTFRAG;
1734 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1735 m0->m_pkthdr.csum_data = nfrags;
1736
1737 /*
1738 * Update first fragment by trimming what's been copied out
1739 * and updating header, then send each fragment (in order).
1740 */
1741 m = m0;
1742 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1743 m->m_pkthdr.len = hlen + firstlen;
1744 ip->ip_len = htons((u_short)m->m_pkthdr.len);
1745 ip->ip_off |= IP_MF;
1746
1747 #if BYTE_ORDER != BIG_ENDIAN
1748 HTONS(ip->ip_off);
1749 #endif
1750
1751 ip->ip_sum = 0;
1752 if (sw_csum & CSUM_DELAY_IP) {
1753 ip->ip_sum = in_cksum(m, hlen);
1754 }
1755 sendorfree:
1756 if (error)
1757 m_freem_list(m0);
1758
1759 return (error);
1760 }
1761
1762 static void
1763 ip_out_cksum_stats(int proto, u_int32_t len)
1764 {
1765 switch (proto) {
1766 case IPPROTO_TCP:
1767 tcp_out_cksum_stats(len);
1768 break;
1769 case IPPROTO_UDP:
1770 udp_out_cksum_stats(len);
1771 break;
1772 default:
1773 /* keep only TCP or UDP stats for now */
1774 break;
1775 }
1776 }
1777
1778 void
1779 in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
1780 {
1781 struct ip *ip;
1782 unsigned char buf[sizeof(struct ip)];
1783 u_short csum, offset, ip_len;
1784 struct mbuf *m = m0;
1785
1786 while (ip_offset >= m->m_len) {
1787 ip_offset -= m->m_len;
1788 m = m->m_next;
1789 if (m == NULL) {
1790 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
1791 return;
1792 }
1793 }
1794
1795 /* Sometimes the IP header is not contiguous, yes this can happen! */
1796 if (ip_offset + sizeof(struct ip) > m->m_len) {
1797 #if DEBUG
1798 printf("delayed m_pullup, m->len: %d off: %d\n",
1799 m->m_len, ip_offset);
1800 #endif
1801 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1802
1803 ip = (struct ip *)buf;
1804 } else {
1805 ip = (struct ip*)(m->m_data + ip_offset);
1806 }
1807
1808 /* Gross */
1809 if (ip_offset) {
1810 m->m_len -= ip_offset;
1811 m->m_data += ip_offset;
1812 }
1813
1814 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1815
1816 /*
1817 * We could be in the context of an IP or interface filter; in the
1818 * former case, ip_len would be in host (correct) order while for
1819 * the latter it would be in network order. Because of this, we
1820 * attempt to interpret the length field by comparing it against
1821 * the actual packet length. If the comparison fails, byte swap
1822 * the length and check again. If it still fails, then the packet
1823 * is bogus and we give up.
1824 */
1825 ip_len = ip->ip_len;
1826 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1827 ip_len = SWAP16(ip_len);
1828 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1829 printf("in_delayed_cksum_offset: ip_len %d (%d) "
1830 "doesn't match actual length %d\n", ip->ip_len,
1831 ip_len, (m0->m_pkthdr.len - ip_offset));
1832 return;
1833 }
1834 }
1835
1836 csum = in_cksum_skip(m, ip_len, offset);
1837
1838 /* Update stats */
1839 ip_out_cksum_stats(ip->ip_p, ip_len - offset);
1840
1841 if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1842 csum = 0xffff;
1843 offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1844
1845 /* Gross */
1846 if (ip_offset) {
1847 if (M_LEADINGSPACE(m) < ip_offset)
1848 panic("in_delayed_cksum_offset - chain modified!\n");
1849 m->m_len += ip_offset;
1850 m->m_data -= ip_offset;
1851 }
1852
1853 if (offset > ip_len) /* bogus offset */
1854 return;
1855
1856 /* Insert the checksum in the existing chain */
1857 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
1858 char tmp[2];
1859
1860 #if DEBUG
1861 printf("delayed m_copyback, m->len: %d off: %d p: %d\n",
1862 m->m_len, offset + ip_offset, ip->ip_p);
1863 #endif
1864 *(u_short *)tmp = csum;
1865 m_copyback(m, offset + ip_offset, 2, tmp);
1866 } else
1867 *(u_short *)(m->m_data + offset + ip_offset) = csum;
1868 }
1869
1870 void
1871 in_delayed_cksum(struct mbuf *m)
1872 {
1873 in_delayed_cksum_offset(m, 0);
1874 }
1875
1876 void
1877 in_cksum_offset(struct mbuf* m, size_t ip_offset)
1878 {
1879 struct ip* ip = NULL;
1880 int hlen = 0;
1881 unsigned char buf[sizeof(struct ip)];
1882 int swapped = 0;
1883
1884 while (ip_offset >= m->m_len) {
1885 ip_offset -= m->m_len;
1886 m = m->m_next;
1887 if (m == NULL) {
1888 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1889 return;
1890 }
1891 }
1892
1893 /* Sometimes the IP header is not contiguous, yes this can happen! */
1894 if (ip_offset + sizeof(struct ip) > m->m_len) {
1895
1896 #if DEBUG
1897 printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %lu\n",
1898 m->m_len, ip_offset);
1899 #endif
1900 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1901
1902 ip = (struct ip *)buf;
1903 ip->ip_sum = 0;
1904 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum);
1905 } else {
1906 ip = (struct ip*)(m->m_data + ip_offset);
1907 ip->ip_sum = 0;
1908 }
1909
1910 /* Gross */
1911 if (ip_offset) {
1912 m->m_len -= ip_offset;
1913 m->m_data += ip_offset;
1914 }
1915
1916 #ifdef _IP_VHL
1917 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1918 #else
1919 hlen = ip->ip_hl << 2;
1920 #endif
1921 /*
1922 * We could be in the context of an IP or interface filter; in the
1923 * former case, ip_len would be in host order while for the latter
1924 * it would be in network (correct) order. Because of this, we
1925 * attempt to interpret the length field by comparing it against
1926 * the actual packet length. If the comparison fails, byte swap
1927 * the length and check again. If it still fails, then the packet
1928 * is bogus and we give up.
1929 */
1930 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1931 ip->ip_len = SWAP16(ip->ip_len);
1932 swapped = 1;
1933 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1934 ip->ip_len = SWAP16(ip->ip_len);
1935 printf("in_cksum_offset: ip_len %d (%d) "
1936 "doesn't match actual length %lu\n",
1937 ip->ip_len, SWAP16(ip->ip_len),
1938 (m->m_pkthdr.len - ip_offset));
1939 return;
1940 }
1941 }
1942
1943 ip->ip_sum = 0;
1944 ip->ip_sum = in_cksum(m, hlen);
1945 if (swapped)
1946 ip->ip_len = SWAP16(ip->ip_len);
1947
1948 /* Gross */
1949 if (ip_offset) {
1950 if (M_LEADINGSPACE(m) < ip_offset)
1951 panic("in_cksum_offset - chain modified!\n");
1952 m->m_len += ip_offset;
1953 m->m_data -= ip_offset;
1954 }
1955
1956 /* Insert the checksum in the existing chain if IP header not contiguous */
1957 if (ip_offset + sizeof(struct ip) > m->m_len) {
1958 char tmp[2];
1959
1960 #if DEBUG
1961 printf("in_cksum_offset m_copyback, m->len: %u off: %lu p: %d\n",
1962 m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
1963 #endif
1964 *(u_short *)tmp = ip->ip_sum;
1965 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
1966 }
1967 }
1968
1969 /*
1970 * Insert IP options into preformed packet.
1971 * Adjust IP destination as required for IP source routing,
1972 * as indicated by a non-zero in_addr at the start of the options.
1973 *
1974 * XXX This routine assumes that the packet has no options in place.
1975 */
1976 static struct mbuf *
1977 ip_insertoptions(m, opt, phlen)
1978 register struct mbuf *m;
1979 struct mbuf *opt;
1980 int *phlen;
1981 {
1982 register struct ipoption *p = mtod(opt, struct ipoption *);
1983 struct mbuf *n;
1984 register struct ip *ip = mtod(m, struct ip *);
1985 unsigned optlen;
1986
1987 optlen = opt->m_len - sizeof(p->ipopt_dst);
1988 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1989 return (m); /* XXX should fail */
1990 if (p->ipopt_dst.s_addr)
1991 ip->ip_dst = p->ipopt_dst;
1992 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1993 MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1994 if (n == 0)
1995 return (m);
1996 n->m_pkthdr.rcvif = 0;
1997 #if CONFIG_MACF_NET
1998 mac_mbuf_label_copy(m, n);
1999 #endif
2000 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
2001 m->m_len -= sizeof(struct ip);
2002 m->m_data += sizeof(struct ip);
2003 n->m_next = m;
2004 m = n;
2005 m->m_len = optlen + sizeof(struct ip);
2006 m->m_data += max_linkhdr;
2007 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
2008 } else {
2009 m->m_data -= optlen;
2010 m->m_len += optlen;
2011 m->m_pkthdr.len += optlen;
2012 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
2013 }
2014 ip = mtod(m, struct ip *);
2015 bcopy(p->ipopt_list, ip + 1, optlen);
2016 *phlen = sizeof(struct ip) + optlen;
2017 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
2018 ip->ip_len += optlen;
2019 return (m);
2020 }
2021
2022 /*
2023 * Copy options from ip to jp,
2024 * omitting those not copied during fragmentation.
2025 */
2026 int
2027 ip_optcopy(ip, jp)
2028 struct ip *ip, *jp;
2029 {
2030 register u_char *cp, *dp;
2031 int opt, optlen, cnt;
2032
2033 cp = (u_char *)(ip + 1);
2034 dp = (u_char *)(jp + 1);
2035 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
2036 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2037 opt = cp[0];
2038 if (opt == IPOPT_EOL)
2039 break;
2040 if (opt == IPOPT_NOP) {
2041 /* Preserve for IP mcast tunnel's LSRR alignment. */
2042 *dp++ = IPOPT_NOP;
2043 optlen = 1;
2044 continue;
2045 }
2046 #if DIAGNOSTIC
2047 if (cnt < IPOPT_OLEN + sizeof(*cp))
2048 panic("malformed IPv4 option passed to ip_optcopy");
2049 #endif
2050 optlen = cp[IPOPT_OLEN];
2051 #if DIAGNOSTIC
2052 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2053 panic("malformed IPv4 option passed to ip_optcopy");
2054 #endif
2055 /* bogus lengths should have been caught by ip_dooptions */
2056 if (optlen > cnt)
2057 optlen = cnt;
2058 if (IPOPT_COPIED(opt)) {
2059 bcopy(cp, dp, optlen);
2060 dp += optlen;
2061 }
2062 }
2063 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
2064 *dp++ = IPOPT_EOL;
2065 return (optlen);
2066 }
2067
2068 /*
2069 * IP socket option processing.
2070 */
2071 int
2072 ip_ctloutput(so, sopt)
2073 struct socket *so;
2074 struct sockopt *sopt;
2075 {
2076 struct inpcb *inp = sotoinpcb(so);
2077 int error, optval;
2078
2079 error = optval = 0;
2080 if (sopt->sopt_level != IPPROTO_IP) {
2081 return (EINVAL);
2082 }
2083
2084 switch (sopt->sopt_dir) {
2085 case SOPT_SET:
2086 switch (sopt->sopt_name) {
2087 case IP_OPTIONS:
2088 #ifdef notyet
2089 case IP_RETOPTS:
2090 #endif
2091 {
2092 struct mbuf *m;
2093 if (sopt->sopt_valsize > MLEN) {
2094 error = EMSGSIZE;
2095 break;
2096 }
2097 MGET(m, sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT,
2098 MT_HEADER);
2099 if (m == 0) {
2100 error = ENOBUFS;
2101 break;
2102 }
2103 m->m_len = sopt->sopt_valsize;
2104 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
2105 m->m_len);
2106 if (error)
2107 break;
2108
2109 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
2110 m));
2111 }
2112
2113 case IP_TOS:
2114 case IP_TTL:
2115 case IP_RECVOPTS:
2116 case IP_RECVRETOPTS:
2117 case IP_RECVDSTADDR:
2118 case IP_RECVIF:
2119 case IP_RECVTTL:
2120 #if defined(NFAITH) && NFAITH > 0
2121 case IP_FAITH:
2122 #endif
2123 error = sooptcopyin(sopt, &optval, sizeof optval,
2124 sizeof optval);
2125 if (error)
2126 break;
2127
2128 switch (sopt->sopt_name) {
2129 case IP_TOS:
2130 inp->inp_ip_tos = optval;
2131 break;
2132
2133 case IP_TTL:
2134 inp->inp_ip_ttl = optval;
2135 break;
2136 #define OPTSET(bit) \
2137 if (optval) \
2138 inp->inp_flags |= bit; \
2139 else \
2140 inp->inp_flags &= ~bit;
2141
2142 case IP_RECVOPTS:
2143 OPTSET(INP_RECVOPTS);
2144 break;
2145
2146 case IP_RECVRETOPTS:
2147 OPTSET(INP_RECVRETOPTS);
2148 break;
2149
2150 case IP_RECVDSTADDR:
2151 OPTSET(INP_RECVDSTADDR);
2152 break;
2153
2154 case IP_RECVIF:
2155 OPTSET(INP_RECVIF);
2156 break;
2157
2158 case IP_RECVTTL:
2159 OPTSET(INP_RECVTTL);
2160 break;
2161
2162 #if defined(NFAITH) && NFAITH > 0
2163 case IP_FAITH:
2164 OPTSET(INP_FAITH);
2165 break;
2166 #endif
2167 }
2168 break;
2169 #undef OPTSET
2170
2171 #if CONFIG_FORCE_OUT_IFP
2172 /*
2173 * Apple private interface, similar to IP_BOUND_IF, except
2174 * that the parameter is a NULL-terminated string containing
2175 * the name of the network interface; an emptry string means
2176 * unbind. Applications are encouraged to use IP_BOUND_IF
2177 * instead, as that is the current "official" API.
2178 */
2179 case IP_FORCE_OUT_IFP: {
2180 char ifname[IFNAMSIZ];
2181 unsigned int ifscope;
2182
2183 /* This option is settable only for IPv4 */
2184 if (!(inp->inp_vflag & INP_IPV4)) {
2185 error = EINVAL;
2186 break;
2187 }
2188
2189 /* Verify interface name parameter is sane */
2190 if (sopt->sopt_valsize > sizeof(ifname)) {
2191 error = EINVAL;
2192 break;
2193 }
2194
2195 /* Copy the interface name */
2196 if (sopt->sopt_valsize != 0) {
2197 error = sooptcopyin(sopt, ifname,
2198 sizeof (ifname), sopt->sopt_valsize);
2199 if (error)
2200 break;
2201 }
2202
2203 if (sopt->sopt_valsize == 0 || ifname[0] == NULL) {
2204 /* Unbind this socket from any interface */
2205 ifscope = IFSCOPE_NONE;
2206 } else {
2207 ifnet_t ifp;
2208
2209 /* Verify name is NULL terminated */
2210 if (ifname[sopt->sopt_valsize - 1] != NULL) {
2211 error = EINVAL;
2212 break;
2213 }
2214
2215 /* Bail out if given bogus interface name */
2216 if (ifnet_find_by_name(ifname, &ifp) != 0) {
2217 error = ENXIO;
2218 break;
2219 }
2220
2221 /* Bind this socket to this interface */
2222 ifscope = ifp->if_index;
2223
2224 /*
2225 * Won't actually free; since we don't release
2226 * this later, we should do it now.
2227 */
2228 ifnet_release(ifp);
2229 }
2230 ip_bindif(inp, ifscope);
2231 }
2232 break;
2233 #endif
2234 case IP_MULTICAST_IF:
2235 case IP_MULTICAST_VIF:
2236 case IP_MULTICAST_TTL:
2237 case IP_MULTICAST_LOOP:
2238 case IP_ADD_MEMBERSHIP:
2239 case IP_DROP_MEMBERSHIP:
2240 error = ip_setmoptions(sopt, &inp->inp_moptions);
2241 break;
2242
2243 case IP_PORTRANGE:
2244 error = sooptcopyin(sopt, &optval, sizeof optval,
2245 sizeof optval);
2246 if (error)
2247 break;
2248
2249 switch (optval) {
2250 case IP_PORTRANGE_DEFAULT:
2251 inp->inp_flags &= ~(INP_LOWPORT);
2252 inp->inp_flags &= ~(INP_HIGHPORT);
2253 break;
2254
2255 case IP_PORTRANGE_HIGH:
2256 inp->inp_flags &= ~(INP_LOWPORT);
2257 inp->inp_flags |= INP_HIGHPORT;
2258 break;
2259
2260 case IP_PORTRANGE_LOW:
2261 inp->inp_flags &= ~(INP_HIGHPORT);
2262 inp->inp_flags |= INP_LOWPORT;
2263 break;
2264
2265 default:
2266 error = EINVAL;
2267 break;
2268 }
2269 break;
2270
2271 #if IPSEC
2272 case IP_IPSEC_POLICY:
2273 {
2274 caddr_t req = NULL;
2275 size_t len = 0;
2276 int priv;
2277 struct mbuf *m;
2278 int optname;
2279
2280 if (sopt->sopt_valsize > MCLBYTES) {
2281 error = EMSGSIZE;
2282 break;
2283 }
2284 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
2285 break;
2286 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
2287 break;
2288 priv = (proc_suser(sopt->sopt_p) == 0);
2289 if (m) {
2290 req = mtod(m, caddr_t);
2291 len = m->m_len;
2292 }
2293 optname = sopt->sopt_name;
2294 error = ipsec4_set_policy(inp, optname, req, len, priv);
2295 m_freem(m);
2296 break;
2297 }
2298 #endif /*IPSEC*/
2299
2300 #if TRAFFIC_MGT
2301 case IP_TRAFFIC_MGT_BACKGROUND:
2302 {
2303 unsigned background = 0;
2304 error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background));
2305 if (error)
2306 break;
2307
2308 if (background) {
2309 socket_set_traffic_mgt_flags(so,
2310 TRAFFIC_MGT_SO_BACKGROUND |
2311 TRAFFIC_MGT_SO_BG_REGULATE);
2312 } else {
2313 socket_clear_traffic_mgt_flags(so,
2314 TRAFFIC_MGT_SO_BACKGROUND |
2315 TRAFFIC_MGT_SO_BG_REGULATE);
2316 }
2317
2318 break;
2319 }
2320 #endif /* TRAFFIC_MGT */
2321
2322 /*
2323 * On a multihomed system, scoped routing can be used to
2324 * restrict the source interface used for sending packets.
2325 * The socket option IP_BOUND_IF binds a particular AF_INET
2326 * socket to an interface such that data sent on the socket
2327 * is restricted to that interface. This is unlike the
2328 * SO_DONTROUTE option where the routing table is bypassed;
2329 * therefore it allows for a greater flexibility and control
2330 * over the system behavior, and does not place any restriction
2331 * on the destination address type (e.g. unicast, multicast,
2332 * or broadcast if applicable) or whether or not the host is
2333 * directly reachable. Note that in the multicast transmit
2334 * case, IP_MULTICAST_IF takes precedence over IP_BOUND_IF,
2335 * since the former practically bypasses the routing table;
2336 * in this case, IP_BOUND_IF sets the default interface used
2337 * for sending multicast packets in the absence of an explicit
2338 * transmit interface set via IP_MULTICAST_IF.
2339 */
2340 case IP_BOUND_IF:
2341 /* This option is settable only for IPv4 */
2342 if (!(inp->inp_vflag & INP_IPV4)) {
2343 error = EINVAL;
2344 break;
2345 }
2346
2347 error = sooptcopyin(sopt, &optval, sizeof (optval),
2348 sizeof (optval));
2349
2350 if (error)
2351 break;
2352
2353 ip_bindif(inp, optval);
2354 break;
2355
2356 default:
2357 error = ENOPROTOOPT;
2358 break;
2359 }
2360 break;
2361
2362 case SOPT_GET:
2363 switch (sopt->sopt_name) {
2364 case IP_OPTIONS:
2365 case IP_RETOPTS:
2366 if (inp->inp_options)
2367 error = sooptcopyout(sopt,
2368 mtod(inp->inp_options,
2369 char *),
2370 inp->inp_options->m_len);
2371 else
2372 sopt->sopt_valsize = 0;
2373 break;
2374
2375 case IP_TOS:
2376 case IP_TTL:
2377 case IP_RECVOPTS:
2378 case IP_RECVRETOPTS:
2379 case IP_RECVDSTADDR:
2380 case IP_RECVIF:
2381 case IP_RECVTTL:
2382 case IP_PORTRANGE:
2383 #if defined(NFAITH) && NFAITH > 0
2384 case IP_FAITH:
2385 #endif
2386 switch (sopt->sopt_name) {
2387
2388 case IP_TOS:
2389 optval = inp->inp_ip_tos;
2390 break;
2391
2392 case IP_TTL:
2393 optval = inp->inp_ip_ttl;
2394 break;
2395
2396 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2397
2398 case IP_RECVOPTS:
2399 optval = OPTBIT(INP_RECVOPTS);
2400 break;
2401
2402 case IP_RECVRETOPTS:
2403 optval = OPTBIT(INP_RECVRETOPTS);
2404 break;
2405
2406 case IP_RECVDSTADDR:
2407 optval = OPTBIT(INP_RECVDSTADDR);
2408 break;
2409
2410 case IP_RECVIF:
2411 optval = OPTBIT(INP_RECVIF);
2412 break;
2413
2414 case IP_RECVTTL:
2415 optval = OPTBIT(INP_RECVTTL);
2416 break;
2417
2418 case IP_PORTRANGE:
2419 if (inp->inp_flags & INP_HIGHPORT)
2420 optval = IP_PORTRANGE_HIGH;
2421 else if (inp->inp_flags & INP_LOWPORT)
2422 optval = IP_PORTRANGE_LOW;
2423 else
2424 optval = 0;
2425 break;
2426
2427 #if defined(NFAITH) && NFAITH > 0
2428 case IP_FAITH:
2429 optval = OPTBIT(INP_FAITH);
2430 break;
2431 #endif
2432 }
2433 error = sooptcopyout(sopt, &optval, sizeof optval);
2434 break;
2435
2436 case IP_MULTICAST_IF:
2437 case IP_MULTICAST_VIF:
2438 case IP_MULTICAST_TTL:
2439 case IP_MULTICAST_LOOP:
2440 case IP_ADD_MEMBERSHIP:
2441 case IP_DROP_MEMBERSHIP:
2442 error = ip_getmoptions(sopt, inp->inp_moptions);
2443 break;
2444
2445 #if IPSEC
2446 case IP_IPSEC_POLICY:
2447 {
2448 struct mbuf *m = NULL;
2449 caddr_t req = NULL;
2450 size_t len = 0;
2451
2452 if (m != 0) {
2453 req = mtod(m, caddr_t);
2454 len = m->m_len;
2455 }
2456 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
2457 if (error == 0)
2458 error = soopt_mcopyout(sopt, m); /* XXX */
2459 if (error == 0)
2460 m_freem(m);
2461 break;
2462 }
2463 #endif /*IPSEC*/
2464
2465 #if TRAFFIC_MGT
2466 case IP_TRAFFIC_MGT_BACKGROUND:
2467 {
2468 unsigned background = so->so_traffic_mgt_flags;
2469 return (sooptcopyout(sopt, &background, sizeof(background)));
2470 break;
2471 }
2472 #endif /* TRAFFIC_MGT */
2473
2474 case IP_BOUND_IF:
2475 if (inp->inp_flags & INP_BOUND_IF)
2476 optval = inp->inp_boundif;
2477 error = sooptcopyout(sopt, &optval, sizeof (optval));
2478 break;
2479
2480 default:
2481 error = ENOPROTOOPT;
2482 break;
2483 }
2484 break;
2485 }
2486 return (error);
2487 }
2488
2489 /*
2490 * Set up IP options in pcb for insertion in output packets.
2491 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2492 * with destination address if source routed.
2493 */
2494 static int
2495 ip_pcbopts(
2496 __unused int optname,
2497 struct mbuf **pcbopt,
2498 register struct mbuf *m)
2499 {
2500 register int cnt, optlen;
2501 register u_char *cp;
2502 u_char opt;
2503
2504 /* turn off any old options */
2505 if (*pcbopt)
2506 (void)m_free(*pcbopt);
2507 *pcbopt = 0;
2508 if (m == (struct mbuf *)0 || m->m_len == 0) {
2509 /*
2510 * Only turning off any previous options.
2511 */
2512 if (m)
2513 (void)m_free(m);
2514 return (0);
2515 }
2516
2517 #ifndef vax
2518 if (m->m_len % sizeof(int32_t))
2519 goto bad;
2520 #endif
2521 /*
2522 * IP first-hop destination address will be stored before
2523 * actual options; move other options back
2524 * and clear it when none present.
2525 */
2526 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
2527 goto bad;
2528 cnt = m->m_len;
2529 m->m_len += sizeof(struct in_addr);
2530 cp = mtod(m, u_char *) + sizeof(struct in_addr);
2531 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
2532 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
2533
2534 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2535 opt = cp[IPOPT_OPTVAL];
2536 if (opt == IPOPT_EOL)
2537 break;
2538 if (opt == IPOPT_NOP)
2539 optlen = 1;
2540 else {
2541 if (cnt < IPOPT_OLEN + sizeof(*cp))
2542 goto bad;
2543 optlen = cp[IPOPT_OLEN];
2544 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2545 goto bad;
2546 }
2547 switch (opt) {
2548
2549 default:
2550 break;
2551
2552 case IPOPT_LSRR:
2553 case IPOPT_SSRR:
2554 /*
2555 * user process specifies route as:
2556 * ->A->B->C->D
2557 * D must be our final destination (but we can't
2558 * check that since we may not have connected yet).
2559 * A is first hop destination, which doesn't appear in
2560 * actual IP option, but is stored before the options.
2561 */
2562 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
2563 goto bad;
2564 m->m_len -= sizeof(struct in_addr);
2565 cnt -= sizeof(struct in_addr);
2566 optlen -= sizeof(struct in_addr);
2567 cp[IPOPT_OLEN] = optlen;
2568 /*
2569 * Move first hop before start of options.
2570 */
2571 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
2572 sizeof(struct in_addr));
2573 /*
2574 * Then copy rest of options back
2575 * to close up the deleted entry.
2576 */
2577 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
2578 sizeof(struct in_addr)),
2579 (caddr_t)&cp[IPOPT_OFFSET+1],
2580 (unsigned)cnt + sizeof(struct in_addr));
2581 break;
2582 }
2583 }
2584 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
2585 goto bad;
2586 *pcbopt = m;
2587 return (0);
2588
2589 bad:
2590 (void)m_free(m);
2591 return (EINVAL);
2592 }
2593
2594 /*
2595 * XXX
2596 * The whole multicast option thing needs to be re-thought.
2597 * Several of these options are equally applicable to non-multicast
2598 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
2599 * standard option (IP_TTL).
2600 */
2601
2602 /*
2603 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2604 */
2605 static struct ifnet *
2606 ip_multicast_if(a, ifindexp)
2607 struct in_addr *a;
2608 int *ifindexp;
2609 {
2610 int ifindex;
2611 struct ifnet *ifp;
2612
2613 if (ifindexp)
2614 *ifindexp = 0;
2615 if (ntohl(a->s_addr) >> 24 == 0) {
2616 ifindex = ntohl(a->s_addr) & 0xffffff;
2617 ifnet_head_lock_shared();
2618 if (ifindex < 0 || if_index < ifindex) {
2619 ifnet_head_done();
2620 return NULL;
2621 }
2622 ifp = ifindex2ifnet[ifindex];
2623 ifnet_head_done();
2624 if (ifindexp)
2625 *ifindexp = ifindex;
2626 } else {
2627 INADDR_TO_IFP(*a, ifp);
2628 }
2629 return ifp;
2630 }
2631
2632 /*
2633 * Set the IP multicast options in response to user setsockopt().
2634 */
2635 static int
2636 ip_setmoptions(sopt, imop)
2637 struct sockopt *sopt;
2638 struct ip_moptions **imop;
2639 {
2640 int error = 0;
2641 struct in_addr addr;
2642 struct ip_mreq mreq;
2643 struct ifnet *ifp = NULL;
2644 struct ip_moptions *imo = *imop;
2645 int ifindex;
2646
2647 if (imo == NULL) {
2648 /*
2649 * No multicast option buffer attached to the pcb;
2650 * allocate one and initialize to default values.
2651 */
2652 error = ip_createmoptions(imop);
2653 if (error != 0)
2654 return error;
2655 imo = *imop;
2656 }
2657
2658 switch (sopt->sopt_name) {
2659 /* store an index number for the vif you wanna use in the send */
2660 #if MROUTING
2661 case IP_MULTICAST_VIF:
2662 {
2663 int i;
2664 if (legal_vif_num == 0) {
2665 error = EOPNOTSUPP;
2666 break;
2667 }
2668 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2669 if (error)
2670 break;
2671 if (!legal_vif_num(i) && (i != -1)) {
2672 error = EINVAL;
2673 break;
2674 }
2675 imo->imo_multicast_vif = i;
2676 break;
2677 }
2678 #endif /* MROUTING */
2679
2680 case IP_MULTICAST_IF:
2681 /*
2682 * Select the interface for outgoing multicast packets.
2683 */
2684 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
2685 if (error)
2686 break;
2687 /*
2688 * INADDR_ANY is used to remove a previous selection.
2689 * When no interface is selected, a default one is
2690 * chosen every time a multicast packet is sent.
2691 */
2692 if (addr.s_addr == INADDR_ANY) {
2693 imo->imo_multicast_ifp = NULL;
2694 break;
2695 }
2696 /*
2697 * The selected interface is identified by its local
2698 * IP address. Find the interface and confirm that
2699 * it supports multicasting.
2700 */
2701 ifp = ip_multicast_if(&addr, &ifindex);
2702 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2703 error = EADDRNOTAVAIL;
2704 break;
2705 }
2706 imo->imo_multicast_ifp = ifp;
2707 if (ifindex)
2708 imo->imo_multicast_addr = addr;
2709 else
2710 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2711 break;
2712
2713 case IP_MULTICAST_TTL:
2714 /*
2715 * Set the IP time-to-live for outgoing multicast packets.
2716 * The original multicast API required a char argument,
2717 * which is inconsistent with the rest of the socket API.
2718 * We allow either a char or an int.
2719 */
2720 if (sopt->sopt_valsize == 1) {
2721 u_char ttl;
2722 error = sooptcopyin(sopt, &ttl, 1, 1);
2723 if (error)
2724 break;
2725 imo->imo_multicast_ttl = ttl;
2726 } else {
2727 u_int ttl;
2728 error = sooptcopyin(sopt, &ttl, sizeof ttl,
2729 sizeof ttl);
2730 if (error)
2731 break;
2732 if (ttl > 255)
2733 error = EINVAL;
2734 else
2735 imo->imo_multicast_ttl = ttl;
2736 }
2737 break;
2738
2739 case IP_MULTICAST_LOOP:
2740 /*
2741 * Set the loopback flag for outgoing multicast packets.
2742 * Must be zero or one. The original multicast API required a
2743 * char argument, which is inconsistent with the rest
2744 * of the socket API. We allow either a char or an int.
2745 */
2746 if (sopt->sopt_valsize == 1) {
2747 u_char loop;
2748 error = sooptcopyin(sopt, &loop, 1, 1);
2749 if (error)
2750 break;
2751 imo->imo_multicast_loop = !!loop;
2752 } else {
2753 u_int loop;
2754 error = sooptcopyin(sopt, &loop, sizeof loop,
2755 sizeof loop);
2756 if (error)
2757 break;
2758 imo->imo_multicast_loop = !!loop;
2759 }
2760 break;
2761
2762 case IP_ADD_MEMBERSHIP:
2763 /*
2764 * Add a multicast group membership.
2765 * Group must be a valid IP multicast address.
2766 */
2767 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2768 if (error)
2769 break;
2770
2771 error = ip_addmembership(imo, &mreq);
2772 break;
2773
2774 case IP_DROP_MEMBERSHIP:
2775 /*
2776 * Drop a multicast group membership.
2777 * Group must be a valid IP multicast address.
2778 */
2779 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2780 if (error)
2781 break;
2782
2783 error = ip_dropmembership(imo, &mreq);
2784 break;
2785
2786 default:
2787 error = EOPNOTSUPP;
2788 break;
2789 }
2790
2791 /*
2792 * If all options have default values, no need to keep the mbuf.
2793 */
2794 if (imo->imo_multicast_ifp == NULL &&
2795 imo->imo_multicast_vif == (u_int32_t)-1 &&
2796 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2797 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2798 imo->imo_num_memberships == 0) {
2799 FREE(*imop, M_IPMOPTS);
2800 *imop = NULL;
2801 }
2802
2803 return (error);
2804 }
2805
2806 /*
2807 * Set the IP multicast options in response to user setsockopt().
2808 */
2809 __private_extern__ int
2810 ip_createmoptions(
2811 struct ip_moptions **imop)
2812 {
2813 struct ip_moptions *imo;
2814 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
2815 M_WAITOK);
2816
2817 if (imo == NULL)
2818 return (ENOBUFS);
2819 *imop = imo;
2820 imo->imo_multicast_ifp = NULL;
2821 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2822 imo->imo_multicast_vif = -1;
2823 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2824 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2825 imo->imo_num_memberships = 0;
2826
2827 return 0;
2828 }
2829
2830 /*
2831 * Add membership to an IPv4 multicast.
2832 */
2833 __private_extern__ int
2834 ip_addmembership(
2835 struct ip_moptions *imo,
2836 struct ip_mreq *mreq)
2837 {
2838 struct route ro;
2839 struct sockaddr_in *dst;
2840 struct ifnet *ifp = NULL;
2841 int error = 0;
2842 int i;
2843
2844 bzero((caddr_t)&ro, sizeof(ro));
2845
2846 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2847 error = EINVAL;
2848 goto done;
2849 }
2850 /*
2851 * If no interface address was provided, use the interface of
2852 * the route to the given multicast address.
2853 */
2854 if (mreq->imr_interface.s_addr == INADDR_ANY) {
2855 dst = (struct sockaddr_in *)&ro.ro_dst;
2856 dst->sin_len = sizeof(*dst);
2857 dst->sin_family = AF_INET;
2858 dst->sin_addr = mreq->imr_multiaddr;
2859 rtalloc_ign(&ro, 0);
2860 if (ro.ro_rt != NULL) {
2861 ifp = ro.ro_rt->rt_ifp;
2862 } else {
2863 /* If there's no default route, try using loopback */
2864 mreq->imr_interface.s_addr = htonl(INADDR_LOOPBACK);
2865 }
2866 }
2867
2868 if (ifp == NULL) {
2869 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2870 }
2871
2872 /*
2873 * See if we found an interface, and confirm that it
2874 * supports multicast.
2875 */
2876 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2877 error = EADDRNOTAVAIL;
2878 goto done;
2879 }
2880 /*
2881 * See if the membership already exists or if all the
2882 * membership slots are full.
2883 */
2884 for (i = 0; i < imo->imo_num_memberships; ++i) {
2885 if (imo->imo_membership[i]->inm_ifp == ifp &&
2886 imo->imo_membership[i]->inm_addr.s_addr
2887 == mreq->imr_multiaddr.s_addr)
2888 break;
2889 }
2890 if (i < imo->imo_num_memberships) {
2891 error = EADDRINUSE;
2892 goto done;
2893 }
2894 if (i == IP_MAX_MEMBERSHIPS) {
2895 error = ETOOMANYREFS;
2896 goto done;
2897 }
2898 /*
2899 * Everything looks good; add a new record to the multicast
2900 * address list for the given interface.
2901 */
2902 if ((imo->imo_membership[i] =
2903 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
2904 error = ENOBUFS;
2905 goto done;
2906 }
2907 ++imo->imo_num_memberships;
2908
2909 done:
2910 if (ro.ro_rt != NULL)
2911 rtfree(ro.ro_rt);
2912
2913 return error;
2914 }
2915
2916 /*
2917 * Drop membership of an IPv4 multicast.
2918 */
2919 __private_extern__ int
2920 ip_dropmembership(
2921 struct ip_moptions *imo,
2922 struct ip_mreq *mreq)
2923 {
2924 int error = 0;
2925 struct ifnet* ifp = NULL;
2926 int i;
2927
2928 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2929 error = EINVAL;
2930 return error;
2931 }
2932
2933 /*
2934 * If an interface address was specified, get a pointer
2935 * to its ifnet structure.
2936 */
2937 if (mreq->imr_interface.s_addr == INADDR_ANY)
2938 ifp = NULL;
2939 else {
2940 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2941 if (ifp == NULL) {
2942 error = EADDRNOTAVAIL;
2943 return error;
2944 }
2945 }
2946 /*
2947 * Find the membership in the membership array.
2948 */
2949 for (i = 0; i < imo->imo_num_memberships; ++i) {
2950 if ((ifp == NULL ||
2951 imo->imo_membership[i]->inm_ifp == ifp) &&
2952 imo->imo_membership[i]->inm_addr.s_addr ==
2953 mreq->imr_multiaddr.s_addr)
2954 break;
2955 }
2956 if (i == imo->imo_num_memberships) {
2957 error = EADDRNOTAVAIL;
2958 return error;
2959 }
2960 /*
2961 * Give up the multicast address record to which the
2962 * membership points.
2963 */
2964 in_delmulti(&imo->imo_membership[i]);
2965 /*
2966 * Remove the gap in the membership array.
2967 */
2968 for (++i; i < imo->imo_num_memberships; ++i)
2969 imo->imo_membership[i-1] = imo->imo_membership[i];
2970 --imo->imo_num_memberships;
2971
2972 return error;
2973 }
2974
2975 /*
2976 * Return the IP multicast options in response to user getsockopt().
2977 */
2978 static int
2979 ip_getmoptions(sopt, imo)
2980 struct sockopt *sopt;
2981 register struct ip_moptions *imo;
2982 {
2983 struct in_addr addr;
2984 struct in_ifaddr *ia;
2985 int error, optval;
2986 u_char coptval;
2987
2988 error = 0;
2989 switch (sopt->sopt_name) {
2990 #if MROUTING
2991 case IP_MULTICAST_VIF:
2992 if (imo != NULL)
2993 optval = imo->imo_multicast_vif;
2994 else
2995 optval = -1;
2996 error = sooptcopyout(sopt, &optval, sizeof optval);
2997 break;
2998 #endif /* MROUTING */
2999
3000 case IP_MULTICAST_IF:
3001 if (imo == NULL || imo->imo_multicast_ifp == NULL)
3002 addr.s_addr = INADDR_ANY;
3003 else if (imo->imo_multicast_addr.s_addr) {
3004 /* return the value user has set */
3005 addr = imo->imo_multicast_addr;
3006 } else {
3007 IFP_TO_IA(imo->imo_multicast_ifp, ia);
3008 addr.s_addr = (ia == NULL) ? INADDR_ANY
3009 : IA_SIN(ia)->sin_addr.s_addr;
3010 if (ia != NULL)
3011 ifafree(&ia->ia_ifa);
3012 }
3013 error = sooptcopyout(sopt, &addr, sizeof addr);
3014 break;
3015
3016 case IP_MULTICAST_TTL:
3017 if (imo == 0)
3018 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
3019 else
3020 optval = coptval = imo->imo_multicast_ttl;
3021 if (sopt->sopt_valsize == 1)
3022 error = sooptcopyout(sopt, &coptval, 1);
3023 else
3024 error = sooptcopyout(sopt, &optval, sizeof optval);
3025 break;
3026
3027 case IP_MULTICAST_LOOP:
3028 if (imo == 0)
3029 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
3030 else
3031 optval = coptval = imo->imo_multicast_loop;
3032 if (sopt->sopt_valsize == 1)
3033 error = sooptcopyout(sopt, &coptval, 1);
3034 else
3035 error = sooptcopyout(sopt, &optval, sizeof optval);
3036 break;
3037
3038 default:
3039 error = ENOPROTOOPT;
3040 break;
3041 }
3042 return (error);
3043 }
3044
3045 /*
3046 * Discard the IP multicast options.
3047 */
3048 void
3049 ip_freemoptions(imo)
3050 register struct ip_moptions *imo;
3051 {
3052 register int i;
3053
3054 if (imo != NULL) {
3055 for (i = 0; i < imo->imo_num_memberships; ++i)
3056 in_delmulti(&imo->imo_membership[i]);
3057 FREE(imo, M_IPMOPTS);
3058 }
3059 }
3060
3061 /*
3062 * Routine called from ip_output() to loop back a copy of an IP multicast
3063 * packet to the input queue of a specified interface. Note that this
3064 * calls the output routine of the loopback "driver", but with an interface
3065 * pointer that might NOT be a loopback interface -- evil, but easier than
3066 * replicating that code here.
3067 */
3068 static void
3069 ip_mloopback(ifp, m, dst, hlen)
3070 struct ifnet *ifp;
3071 register struct mbuf *m;
3072 register struct sockaddr_in *dst;
3073 int hlen;
3074 {
3075 register struct ip *ip;
3076 struct mbuf *copym;
3077 int sw_csum = (apple_hwcksum_tx == 0);
3078
3079 copym = m_copy(m, 0, M_COPYALL);
3080 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
3081 copym = m_pullup(copym, hlen);
3082
3083 if (copym == NULL)
3084 return;
3085
3086 /*
3087 * We don't bother to fragment if the IP length is greater
3088 * than the interface's MTU. Can this possibly matter?
3089 */
3090 ip = mtod(copym, struct ip *);
3091
3092 #if BYTE_ORDER != BIG_ENDIAN
3093 HTONS(ip->ip_len);
3094 HTONS(ip->ip_off);
3095 #endif
3096
3097 ip->ip_sum = 0;
3098 ip->ip_sum = in_cksum(copym, hlen);
3099 /*
3100 * NB:
3101 * It's not clear whether there are any lingering
3102 * reentrancy problems in other areas which might
3103 * be exposed by using ip_input directly (in
3104 * particular, everything which modifies the packet
3105 * in-place). Yet another option is using the
3106 * protosw directly to deliver the looped back
3107 * packet. For the moment, we'll err on the side
3108 * of safety by using if_simloop().
3109 */
3110 #if 1 /* XXX */
3111 if (dst->sin_family != AF_INET) {
3112 printf("ip_mloopback: bad address family %d\n",
3113 dst->sin_family);
3114 dst->sin_family = AF_INET;
3115 }
3116 #endif
3117
3118 /*
3119 * Mark checksum as valid or calculate checksum for loopback.
3120 *
3121 * This is done this way because we have to embed the ifp of
3122 * the interface we will send the original copy of the packet
3123 * out on in the mbuf. ip_input will check if_hwassist of the
3124 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
3125 * The UDP checksum has not been calculated yet.
3126 */
3127 if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
3128 if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
3129 copym->m_pkthdr.csum_flags |=
3130 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3131 CSUM_IP_CHECKED | CSUM_IP_VALID;
3132 copym->m_pkthdr.csum_data = 0xffff;
3133 } else {
3134
3135 #if BYTE_ORDER != BIG_ENDIAN
3136 NTOHS(ip->ip_len);
3137 #endif
3138
3139 in_delayed_cksum(copym);
3140
3141 #if BYTE_ORDER != BIG_ENDIAN
3142 HTONS(ip->ip_len);
3143 #endif
3144
3145 }
3146 }
3147
3148 /*
3149 * TedW:
3150 * We need to send all loopback traffic down to dlil in case
3151 * a filter has tapped-in.
3152 */
3153
3154 /*
3155 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3156 * in ip_input(); we need the loopback ifp/dl_tag passed as args
3157 * to make the loopback driver compliant with the data link
3158 * requirements.
3159 */
3160 if (lo_ifp) {
3161 copym->m_pkthdr.rcvif = ifp;
3162 dlil_output(lo_ifp, PF_INET, copym, 0,
3163 (struct sockaddr *) dst, 0);
3164 } else {
3165 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
3166 m_freem(copym);
3167 }
3168 }
3169
3170 /*
3171 * Given a source IP address (and route, if available), determine the best
3172 * interface to send the packet from. Checking for (and updating) the
3173 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
3174 * without any locks based on the assumption that ip_output() is single-
3175 * threaded per-pcb, i.e. for any given pcb there can only be one thread
3176 * performing output at the IP layer.
3177 */
3178 static struct ifaddr *
3179 in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
3180 {
3181 struct ifaddr *ifa = NULL;
3182 struct in_addr src = ip->ip_src;
3183 struct in_addr dst = ip->ip_dst;
3184 struct ifnet *rt_ifp;
3185 char s_src[MAX_IPv4_STR_LEN], s_dst[MAX_IPv4_STR_LEN];
3186
3187 if (ip_select_srcif_debug) {
3188 (void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof (s_src));
3189 (void) inet_ntop(AF_INET, &dst.s_addr, s_dst, sizeof (s_dst));
3190 }
3191
3192 if (ro->ro_rt != NULL)
3193 RT_LOCK(ro->ro_rt);
3194
3195 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
3196
3197 /*
3198 * Given the source IP address, find a suitable source interface
3199 * to use for transmission; if the caller has specified a scope,
3200 * optimize the search by looking at the addresses only for that
3201 * interface. This is still suboptimal, however, as we need to
3202 * traverse the per-interface list.
3203 */
3204 if (ifscope != IFSCOPE_NONE || ro->ro_rt != NULL) {
3205 unsigned int scope = ifscope;
3206
3207 /*
3208 * If no scope is specified and the route is stale (pointing
3209 * to a defunct interface) use the current primary interface;
3210 * this happens when switching between interfaces configured
3211 * with the same IP address. Otherwise pick up the scope
3212 * information from the route; the ULP may have looked up a
3213 * correct route and we just need to verify it here and mark
3214 * it with the ROF_SRCIF_SELECTED flag below.
3215 */
3216 if (scope == IFSCOPE_NONE) {
3217 scope = rt_ifp->if_index;
3218 if (scope != get_primary_ifscope() &&
3219 ro->ro_rt->generation_id != route_generation)
3220 scope = get_primary_ifscope();
3221 }
3222
3223 ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope);
3224
3225 if (ifa == NULL && ip->ip_p != IPPROTO_UDP &&
3226 ip->ip_p != IPPROTO_TCP && ipforwarding) {
3227 /*
3228 * If forwarding is enabled, and if the packet isn't
3229 * TCP or UDP, check if the source address belongs
3230 * to one of our own interfaces; if so, demote the
3231 * interface scope and do a route lookup right below.
3232 */
3233 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
3234 if (ifa != NULL) {
3235 ifafree(ifa);
3236 ifa = NULL;
3237 ifscope = IFSCOPE_NONE;
3238 }
3239 }
3240
3241 if (ip_select_srcif_debug && ifa != NULL) {
3242 if (ro->ro_rt != NULL) {
3243 printf("%s->%s ifscope %d->%d ifa_if %s%d "
3244 "ro_if %s%d\n", s_src, s_dst, ifscope,
3245 scope, ifa->ifa_ifp->if_name,
3246 ifa->ifa_ifp->if_unit, rt_ifp->if_name,
3247 rt_ifp->if_unit);
3248 } else {
3249 printf("%s->%s ifscope %d->%d ifa_if %s%d\n",
3250 s_src, s_dst, ifscope, scope,
3251 ifa->ifa_ifp->if_name,
3252 ifa->ifa_ifp->if_unit);
3253 }
3254 }
3255 }
3256
3257 /*
3258 * Slow path; search for an interface having the corresponding source
3259 * IP address if the scope was not specified by the caller, and:
3260 *
3261 * 1) There currently isn't any route, or,
3262 * 2) The interface used by the route does not own that source
3263 * IP address; in this case, the route will get blown away
3264 * and we'll do a more specific scoped search using the newly
3265 * found interface.
3266 */
3267 if (ifa == NULL && ifscope == IFSCOPE_NONE) {
3268 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
3269
3270 /*
3271 * If we have the IP address, but not the route, we don't
3272 * really know whether or not it belongs to the correct
3273 * interface (it could be shared across multiple interfaces.)
3274 * The only way to find out is to do a route lookup.
3275 */
3276 if (ifa != NULL && ro->ro_rt == NULL) {
3277 struct rtentry *rt;
3278 struct sockaddr_in sin;
3279 struct ifaddr *oifa = NULL;
3280
3281 bzero(&sin, sizeof (sin));
3282 sin.sin_family = AF_INET;
3283 sin.sin_len = sizeof (sin);
3284 sin.sin_addr = dst;
3285
3286 lck_mtx_lock(rnh_lock);
3287 if ((rt = rt_lookup(TRUE, (struct sockaddr *)&sin, NULL,
3288 rt_tables[AF_INET], IFSCOPE_NONE)) != NULL) {
3289 RT_LOCK(rt);
3290 /*
3291 * If the route uses a different interface,
3292 * use that one instead. The IP address of
3293 * the ifaddr that we pick up here is not
3294 * relevant.
3295 */
3296 if (ifa->ifa_ifp != rt->rt_ifp) {
3297 oifa = ifa;
3298 ifa = rt->rt_ifa;
3299 ifaref(ifa);
3300 RT_UNLOCK(rt);
3301 } else {
3302 RT_UNLOCK(rt);
3303 }
3304 rtfree_locked(rt);
3305 }
3306 lck_mtx_unlock(rnh_lock);
3307
3308 if (oifa != NULL) {
3309 struct ifaddr *iifa;
3310
3311 /*
3312 * See if the interface pointed to by the
3313 * route is configured with the source IP
3314 * address of the packet.
3315 */
3316 iifa = (struct ifaddr *)ifa_foraddr_scoped(
3317 src.s_addr, ifa->ifa_ifp->if_index);
3318
3319 if (iifa != NULL) {
3320 /*
3321 * Found it; drop the original one
3322 * as well as the route interface
3323 * address, and use this instead.
3324 */
3325 ifafree(oifa);
3326 ifafree(ifa);
3327 ifa = iifa;
3328 } else if (!ipforwarding ||
3329 (rt->rt_flags & RTF_GATEWAY)) {
3330 /*
3331 * This interface doesn't have that
3332 * source IP address; drop the route
3333 * interface address and just use the
3334 * original one, and let the caller
3335 * do a scoped route lookup.
3336 */
3337 ifafree(ifa);
3338 ifa = oifa;
3339 } else {
3340 /*
3341 * Forwarding is enabled and the source
3342 * address belongs to one of our own
3343 * interfaces which isn't the outgoing
3344 * interface, and we have a route, and
3345 * the destination is on a network that
3346 * is directly attached (onlink); drop
3347 * the original one and use the route
3348 * interface address instead.
3349 */
3350 ifafree(oifa);
3351 }
3352 }
3353 } else if (ifa != NULL && ro->ro_rt != NULL &&
3354 !(ro->ro_rt->rt_flags & RTF_GATEWAY) &&
3355 ifa->ifa_ifp != ro->ro_rt->rt_ifp && ipforwarding) {
3356 /*
3357 * Forwarding is enabled and the source address belongs
3358 * to one of our own interfaces which isn't the same
3359 * as the interface used by the known route; drop the
3360 * original one and use the route interface address.
3361 */
3362 ifafree(ifa);
3363 ifa = ro->ro_rt->rt_ifa;
3364 ifaref(ifa);
3365 }
3366
3367 if (ip_select_srcif_debug && ifa != NULL) {
3368 printf("%s->%s ifscope %d ifa_if %s%d\n",
3369 s_src, s_dst, ifscope, ifa->ifa_ifp->if_name,
3370 ifa->ifa_ifp->if_unit);
3371 }
3372 }
3373
3374 if (ro->ro_rt != NULL)
3375 RT_LOCK_ASSERT_HELD(ro->ro_rt);
3376 /*
3377 * If there is a non-loopback route with the wrong interface, or if
3378 * there is no interface configured with such an address, blow it
3379 * away. Except for local/loopback, we look for one with a matching
3380 * interface scope/index.
3381 */
3382 if (ro->ro_rt != NULL &&
3383 (ifa == NULL || (ifa->ifa_ifp != rt_ifp && rt_ifp != lo_ifp) ||
3384 !(ro->ro_rt->rt_flags & RTF_UP))) {
3385 if (ip_select_srcif_debug) {
3386 if (ifa != NULL) {
3387 printf("%s->%s ifscope %d ro_if %s%d != "
3388 "ifa_if %s%d (cached route cleared)\n",
3389 s_src, s_dst, ifscope, rt_ifp->if_name,
3390 rt_ifp->if_unit, ifa->ifa_ifp->if_name,
3391 ifa->ifa_ifp->if_unit);
3392 } else {
3393 printf("%s->%s ifscope %d ro_if %s%d "
3394 "(no ifa_if found)\n",
3395 s_src, s_dst, ifscope, rt_ifp->if_name,
3396 rt_ifp->if_unit);
3397 }
3398 }
3399
3400 RT_UNLOCK(ro->ro_rt);
3401 rtfree(ro->ro_rt);
3402 ro->ro_rt = NULL;
3403 ro->ro_flags &= ~ROF_SRCIF_SELECTED;
3404
3405 /*
3406 * If the destination is IPv4 LLA and the route's interface
3407 * doesn't match the source interface, then the source IP
3408 * address is wrong; it most likely belongs to the primary
3409 * interface associated with the IPv4 LL subnet. Drop the
3410 * packet rather than letting it go out and return an error
3411 * to the ULP. This actually applies not only to IPv4 LL
3412 * but other shared subnets; for now we explicitly test only
3413 * for the former case and save the latter for future.
3414 */
3415 if (IN_LINKLOCAL(ntohl(dst.s_addr)) &&
3416 !IN_LINKLOCAL(ntohl(src.s_addr)) && ifa != NULL) {
3417 ifafree(ifa);
3418 ifa = NULL;
3419 }
3420 }
3421
3422 if (ip_select_srcif_debug && ifa == NULL) {
3423 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
3424 s_src, s_dst, ifscope);
3425 }
3426
3427 /*
3428 * If there is a route, mark it accordingly. If there isn't one,
3429 * we'll get here again during the next transmit (possibly with a
3430 * route) and the flag will get set at that point. For IPv4 LLA
3431 * destination, mark it only if the route has been fully resolved;
3432 * otherwise we want to come back here again when the route points
3433 * to the interface over which the ARP reply arrives on.
3434 */
3435 if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(dst.s_addr)) ||
3436 (ro->ro_rt->rt_gateway->sa_family == AF_LINK &&
3437 SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) {
3438 ro->ro_flags |= ROF_SRCIF_SELECTED;
3439 ro->ro_rt->generation_id = route_generation;
3440 }
3441
3442 if (ro->ro_rt != NULL)
3443 RT_UNLOCK(ro->ro_rt);
3444
3445 return (ifa);
3446 }
3447
3448 /*
3449 * Handler for setting IP_FORCE_OUT_IFP or IP_BOUND_IF socket option.
3450 */
3451 static void
3452 ip_bindif(struct inpcb *inp, unsigned int ifscope)
3453 {
3454 /*
3455 * A zero interface scope value indicates an "unbind".
3456 * Otherwise, take in whatever value the app desires;
3457 * the app may already know the scope (or force itself
3458 * to such a scope) ahead of time before the interface
3459 * gets attached. It doesn't matter either way; any
3460 * route lookup from this point on will require an
3461 * exact match for the embedded interface scope.
3462 */
3463 inp->inp_boundif = ifscope;
3464 if (inp->inp_boundif == IFSCOPE_NONE)
3465 inp->inp_flags &= ~INP_BOUND_IF;
3466 else
3467 inp->inp_flags |= INP_BOUND_IF;
3468
3469 /* Blow away any cached route in the PCB */
3470 if (inp->inp_route.ro_rt != NULL) {
3471 rtfree(inp->inp_route.ro_rt);
3472 inp->inp_route.ro_rt = NULL;
3473 }
3474 }