]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
2a1fef6d446d9b6e0c1c4f1e764e1e417b87bc33
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #define _IP_VHL
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <kern/locks.h>
81 #include <sys/sysctl.h>
82
83 #include <machine/endian.h>
84
85 #include <net/if.h>
86 #include <net/if_dl.h>
87 #include <net/route.h>
88
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/ip.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94 #include <netinet/ip_var.h>
95
96 #include <netinet/kpi_ipfilter_var.h>
97
98 #if CONFIG_MACF_NET
99 #include <security/mac_framework.h>
100 #endif
101
102 #include "faith.h"
103
104 #include <net/dlil.h>
105 #include <sys/kdebug.h>
106 #include <libkern/OSAtomic.h>
107
108 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
109 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
110 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
111 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
112
113 #define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
114
115 #if IPSEC
116 #include <netinet6/ipsec.h>
117 #include <netkey/key.h>
118 #if IPSEC_DEBUG
119 #include <netkey/key_debug.h>
120 #else
121 #define KEYDEBUG(lev,arg)
122 #endif
123 #endif /*IPSEC*/
124
125 #include <netinet/ip_fw.h>
126 #include <netinet/ip_divert.h>
127
128 #if DUMMYNET
129 #include <netinet/ip_dummynet.h>
130 #endif
131
132 #if PF
133 #include <net/pfvar.h>
134 #endif /* PF */
135
136 #if IPFIREWALL_FORWARD_DEBUG
137 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
138 (ntohl(a.s_addr)>>16)&0xFF,\
139 (ntohl(a.s_addr)>>8)&0xFF,\
140 (ntohl(a.s_addr))&0xFF);
141 #endif
142
143
144 u_short ip_id;
145
146 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
147 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
148 static void ip_mloopback(struct ifnet *, struct mbuf *,
149 struct sockaddr_in *, int);
150 static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
151 static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
152 static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
153
154 static void ip_out_cksum_stats(int, u_int32_t);
155 static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int);
156 static void ip_bindif(struct inpcb *, unsigned int);
157
158 int ip_createmoptions(struct ip_moptions **imop);
159 int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
160 int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
161 int ip_optcopy(struct ip *, struct ip *);
162 void in_delayed_cksum_offset(struct mbuf *, int );
163 void in_cksum_offset(struct mbuf* , size_t );
164
165 extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
166
167 extern struct protosw inetsw[];
168
169 extern struct ip_linklocal_stat ip_linklocal_stat;
170 extern lck_mtx_t *ip_mutex;
171
172 /* temporary: for testing */
173 #if IPSEC
174 extern int ipsec_bypass;
175 #endif
176
177 static int ip_maxchainsent = 0;
178 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
179 &ip_maxchainsent, 0, "use dlil_output_list");
180 #if DEBUG
181 static int forge_ce = 0;
182 SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW,
183 &forge_ce, 0, "Forge ECN CE");
184 #endif /* DEBUG */
185
186 static int ip_select_srcif_debug = 0;
187 SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW,
188 &ip_select_srcif_debug, 0, "log source interface selection debug info");
189
190 /*
191 * IP output. The packet in mbuf chain m contains a skeletal IP
192 * header (with len, off, ttl, proto, tos, src, dst).
193 * The mbuf chain containing the packet will be freed.
194 * The mbuf opt, if present, will not be freed.
195 */
196 int
197 ip_output(
198 struct mbuf *m0,
199 struct mbuf *opt,
200 struct route *ro,
201 int flags,
202 struct ip_moptions *imo,
203 struct ip_out_args *ipoa)
204 {
205 int error;
206 error = ip_output_list(m0, 0, opt, ro, flags, imo, ipoa);
207 return error;
208 }
209
210 /*
211 * Returns: 0 Success
212 * ENOMEM
213 * EADDRNOTAVAIL
214 * ENETUNREACH
215 * EHOSTUNREACH
216 * EACCES
217 * EMSGSIZE
218 * ENOBUFS
219 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
220 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
221 * key_spdacquire:??? [IPSEC]
222 * ipsec4_output:??? [IPSEC]
223 * <fr_checkp>:??? [firewall]
224 * ip_dn_io_ptr:??? [dummynet]
225 * dlil_output:??? [DLIL]
226 * dlil_output_list:??? [DLIL]
227 *
228 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
229 * only used as the error return from this function where one of
230 * these functions fails to return a policy.
231 */
232 int
233 ip_output_list(
234 struct mbuf *m0,
235 int packetchain,
236 struct mbuf *opt,
237 struct route *ro,
238 int flags,
239 struct ip_moptions *imo,
240 struct ip_out_args *ipoa
241 )
242 {
243 struct ip *ip;
244 struct ifnet *ifp = NULL;
245 struct mbuf *m = m0, **mppn = NULL;
246 int hlen = sizeof (struct ip);
247 int len = 0, off, error = 0;
248 struct sockaddr_in *dst = NULL;
249 struct in_ifaddr *ia = NULL, *src_ia = NULL;
250 int isbroadcast, sw_csum;
251 struct in_addr pkt_dst;
252 #if IPSEC
253 struct route iproute;
254 struct socket *so = NULL;
255 struct secpolicy *sp = NULL;
256 #endif
257 #if IPFIREWALL_FORWARD
258 int fwd_rewrite_src = 0;
259 #endif
260 #if IPFIREWALL
261 struct ip_fw_args args;
262 #endif
263 int didfilter = 0;
264 ipfilter_t inject_filter_ref = 0;
265 struct m_tag *tag;
266 struct route saved_route;
267 struct ip_out_args saved_ipoa;
268 struct mbuf * packetlist;
269 int pktcnt = 0, tso = 0;
270 unsigned int ifscope;
271 boolean_t select_srcif;
272
273 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
274
275 packetlist = m0;
276 #if IPFIREWALL
277 args.next_hop = NULL;
278 args.eh = NULL;
279 args.rule = NULL;
280 args.divert_rule = 0; /* divert cookie */
281 args.ipoa = NULL;
282
283 if (SLIST_EMPTY(&m0->m_pkthdr.tags))
284 goto ipfw_tags_done;
285
286 /* Grab info from mtags prepended to the chain */
287 #if DUMMYNET
288 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
289 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
290 struct dn_pkt_tag *dn_tag;
291
292 dn_tag = (struct dn_pkt_tag *)(tag+1);
293 args.rule = dn_tag->rule;
294 opt = NULL;
295 saved_route = dn_tag->ro;
296 ro = &saved_route;
297
298 imo = NULL;
299 dst = dn_tag->dn_dst;
300 ifp = dn_tag->ifp;
301 flags = dn_tag->flags;
302 saved_ipoa = dn_tag->ipoa;
303 ipoa = &saved_ipoa;
304
305 m_tag_delete(m0, tag);
306 }
307 #endif /* DUMMYNET */
308
309 #if IPDIVERT
310 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
311 KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
312 struct divert_tag *div_tag;
313
314 div_tag = (struct divert_tag *)(tag+1);
315 args.divert_rule = div_tag->cookie;
316
317 m_tag_delete(m0, tag);
318 }
319 #endif /* IPDIVERT */
320
321 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
322 KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
323 struct ip_fwd_tag *ipfwd_tag;
324
325 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
326 args.next_hop = ipfwd_tag->next_hop;
327
328 m_tag_delete(m0, tag);
329 }
330 ipfw_tags_done:
331 #endif /* IPFIREWALL */
332
333 m = m0;
334
335 #if DIAGNOSTIC
336 if ( !m || (m->m_flags & M_PKTHDR) != 0)
337 panic("ip_output no HDR");
338 if (!ro)
339 panic("ip_output no route, proto = %d",
340 mtod(m, struct ip *)->ip_p);
341 #endif
342
343 /*
344 * At present the IP_OUTARGS flag implies a request for IP to
345 * perform source interface selection. In the forwarding case,
346 * only the ifscope value is used, as source interface selection
347 * doesn't take place.
348 */
349 if (ip_doscopedroute && (flags & IP_OUTARGS)) {
350 select_srcif = !(flags & IP_FORWARDING);
351 ifscope = ipoa->ipoa_ifscope;
352 } else {
353 select_srcif = FALSE;
354 ifscope = IFSCOPE_NONE;
355 }
356
357 #if IPFIREWALL
358 if (args.rule != NULL) { /* dummynet already saw us */
359 ip = mtod(m, struct ip *);
360 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
361 if (ro->ro_rt != NULL) {
362 RT_LOCK_SPIN(ro->ro_rt);
363 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
364 if (ia)
365 ifaref(&ia->ia_ifa);
366 RT_UNLOCK(ro->ro_rt);
367 }
368 #if IPSEC
369 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
370 so = ipsec_getsocket(m);
371 (void)ipsec_setsocket(m, NULL);
372 }
373 #endif
374 goto sendit;
375 }
376 #endif /* IPFIREWALL */
377
378 #if IPSEC
379 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
380 so = ipsec_getsocket(m);
381 (void)ipsec_setsocket(m, NULL);
382 }
383 #endif
384 loopit:
385 /*
386 * No need to proccess packet twice if we've
387 * already seen it
388 */
389 if (!SLIST_EMPTY(&m->m_pkthdr.tags))
390 inject_filter_ref = ipf_get_inject_filter(m);
391 else
392 inject_filter_ref = 0;
393
394 if (opt) {
395 m = ip_insertoptions(m, opt, &len);
396 hlen = len;
397 }
398 ip = mtod(m, struct ip *);
399 #if IPFIREWALL
400 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
401 #else
402 pkt_dst = ip->ip_dst;
403 #endif
404
405 /*
406 * Fill in IP header.
407 */
408 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
409 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
410 ip->ip_off &= IP_DF;
411 #if RANDOM_IP_ID
412 ip->ip_id = ip_randomid();
413 #else
414 ip->ip_id = htons(ip_id++);
415 #endif
416 OSAddAtomic(1, &ipstat.ips_localout);
417 } else {
418 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
419 }
420
421 #if DEBUG
422 /* For debugging, we let the stack forge congestion */
423 if (forge_ce != 0 &&
424 ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 ||
425 (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) {
426 ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE;
427 forge_ce--;
428 }
429 #endif /* DEBUG */
430
431 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
432 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
433
434 dst = (struct sockaddr_in *)&ro->ro_dst;
435
436 /*
437 * If there is a cached route,
438 * check that it is to the same destination
439 * and is still up. If not, free it and try again.
440 * The address family should also be checked in case of sharing the
441 * cache with IPv6.
442 */
443
444 if (ro->ro_rt != NULL) {
445 if (ro->ro_rt->generation_id != route_generation &&
446 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) &&
447 (ip->ip_src.s_addr != INADDR_ANY)) {
448 src_ia = ifa_foraddr(ip->ip_src.s_addr);
449 if (src_ia == NULL) {
450 error = EADDRNOTAVAIL;
451 goto bad;
452 }
453 ifafree(&src_ia->ia_ifa);
454 }
455 /*
456 * Test rt_flags without holding rt_lock for performance
457 * reasons; if the route is down it will hopefully be
458 * caught by the layer below (since it uses this route
459 * as a hint) or during the next transmit.
460 */
461 if ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
462 dst->sin_family != AF_INET ||
463 dst->sin_addr.s_addr != pkt_dst.s_addr) {
464 rtfree(ro->ro_rt);
465 ro->ro_rt = NULL;
466 }
467 /*
468 * If we're doing source interface selection, we may not
469 * want to use this route; only synch up the generation
470 * count otherwise.
471 */
472 if (!select_srcif && ro->ro_rt != NULL &&
473 ro->ro_rt->generation_id != route_generation)
474 ro->ro_rt->generation_id = route_generation;
475 }
476 if (ro->ro_rt == NULL) {
477 bzero(dst, sizeof(*dst));
478 dst->sin_family = AF_INET;
479 dst->sin_len = sizeof(*dst);
480 dst->sin_addr = pkt_dst;
481 }
482 /*
483 * If routing to interface only,
484 * short circuit routing lookup.
485 */
486 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
487 #define sintosa(sin) ((struct sockaddr *)(sin))
488 if (flags & IP_ROUTETOIF) {
489 if (ia)
490 ifafree(&ia->ia_ifa);
491 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
492 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
493 OSAddAtomic(1, &ipstat.ips_noroute);
494 error = ENETUNREACH;
495 goto bad;
496 }
497 }
498 ifp = ia->ia_ifp;
499 ip->ip_ttl = 1;
500 isbroadcast = in_broadcast(dst->sin_addr, ifp);
501 } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) &&
502 imo != NULL && imo->imo_multicast_ifp != NULL) {
503 /*
504 * Bypass the normal routing lookup for multicast
505 * packets if the interface is specified.
506 */
507 ifp = imo->imo_multicast_ifp;
508 isbroadcast = 0;
509 if (ia != NULL)
510 ifafree(&ia->ia_ifa);
511
512 /* Macro takes reference on ia */
513 IFP_TO_IA(ifp, ia);
514 } else {
515 boolean_t cloneok = FALSE;
516 /*
517 * Perform source interface selection; the source IP address
518 * must belong to one of the addresses of the interface used
519 * by the route. For performance reasons, do this only if
520 * there is no route, or if the routing table has changed,
521 * or if we haven't done source interface selection on this
522 * route (for this PCB instance) before.
523 */
524 if (select_srcif && ip->ip_src.s_addr != INADDR_ANY &&
525 (ro->ro_rt == NULL || !(ro->ro_rt->rt_flags & RTF_UP) ||
526 ro->ro_rt->generation_id != route_generation ||
527 !(ro->ro_flags & ROF_SRCIF_SELECTED))) {
528 struct ifaddr *ifa;
529
530 /* Find the source interface */
531 ifa = in_selectsrcif(ip, ro, ifscope);
532
533 /*
534 * If the source address is spoofed (in the case
535 * of IP_RAWOUTPUT), or if this is destined for
536 * local/loopback, just let it go out using the
537 * interface of the route. Otherwise, there's no
538 * interface having such an address, so bail out.
539 */
540 if (ifa == NULL && !(flags & IP_RAWOUTPUT) &&
541 ifscope != lo_ifp->if_index) {
542 error = EADDRNOTAVAIL;
543 goto bad;
544 }
545
546 /*
547 * If the caller didn't explicitly specify the scope,
548 * pick it up from the source interface. If the cached
549 * route was wrong and was blown away as part of source
550 * interface selection, don't mask out RTF_PRCLONING
551 * since that route may have been allocated by the ULP,
552 * unless the IP header was created by the caller or
553 * the destination is IPv4 LLA. The check for the
554 * latter is needed because IPv4 LLAs are never scoped
555 * in the current implementation, and we don't want to
556 * replace the resolved IPv4 LLA route with one whose
557 * gateway points to that of the default gateway on
558 * the primary interface of the system.
559 */
560 if (ifa != NULL) {
561 if (ifscope == IFSCOPE_NONE)
562 ifscope = ifa->ifa_ifp->if_index;
563 ifafree(ifa);
564 cloneok = (!(flags & IP_RAWOUTPUT) &&
565 !(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))));
566 }
567 }
568
569 /*
570 * If this is the case, we probably don't want to allocate
571 * a protocol-cloned route since we didn't get one from the
572 * ULP. This lets TCP do its thing, while not burdening
573 * forwarding or ICMP with the overhead of cloning a route.
574 * Of course, we still want to do any cloning requested by
575 * the link layer, as this is probably required in all cases
576 * for correct operation (as it is for ARP).
577 */
578 if (ro->ro_rt == NULL) {
579 unsigned long ign = RTF_PRCLONING;
580 /*
581 * We make an exception here: if the destination
582 * address is INADDR_BROADCAST, allocate a protocol-
583 * cloned host route so that we end up with a route
584 * marked with the RTF_BROADCAST flag. Otherwise,
585 * we would end up referring to the default route,
586 * instead of creating a cloned host route entry.
587 * That would introduce inconsistencies between ULPs
588 * that allocate a route and those that don't. The
589 * RTF_BROADCAST route is important since we'd want
590 * to send out undirected IP broadcast packets using
591 * link-level broadcast address. Another exception
592 * is for ULP-created routes that got blown away by
593 * source interface selection (see above).
594 *
595 * These exceptions will no longer be necessary when
596 * the RTF_PRCLONING scheme is no longer present.
597 */
598 if (cloneok || dst->sin_addr.s_addr == INADDR_BROADCAST)
599 ign &= ~RTF_PRCLONING;
600
601 /*
602 * Loosen the route lookup criteria if the ifscope
603 * corresponds to the loopback interface; this is
604 * needed to support Application Layer Gateways
605 * listening on loopback, in conjunction with packet
606 * filter redirection rules. The final source IP
607 * address will be rewritten by the packet filter
608 * prior to the RFC1122 loopback check below.
609 */
610 if (ifscope == lo_ifp->if_index)
611 rtalloc_ign(ro, ign);
612 else
613 rtalloc_scoped_ign(ro, ign, ifscope);
614 }
615
616 if (ro->ro_rt == NULL) {
617 OSAddAtomic(1, &ipstat.ips_noroute);
618 error = EHOSTUNREACH;
619 goto bad;
620 }
621
622 if (ia)
623 ifafree(&ia->ia_ifa);
624 RT_LOCK_SPIN(ro->ro_rt);
625 ia = ifatoia(ro->ro_rt->rt_ifa);
626 if (ia)
627 ifaref(&ia->ia_ifa);
628 ifp = ro->ro_rt->rt_ifp;
629 ro->ro_rt->rt_use++;
630 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
631 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
632 if (ro->ro_rt->rt_flags & RTF_HOST)
633 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
634 else
635 isbroadcast = in_broadcast(dst->sin_addr, ifp);
636 RT_UNLOCK(ro->ro_rt);
637 }
638
639 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
640 struct in_multi *inm;
641
642 m->m_flags |= M_MCAST;
643 /*
644 * IP destination address is multicast. Make sure "dst"
645 * still points to the address in "ro". (It may have been
646 * changed to point to a gateway address, above.)
647 */
648 dst = (struct sockaddr_in *)&ro->ro_dst;
649 /*
650 * See if the caller provided any multicast options
651 */
652 if (imo != NULL) {
653 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
654 if (imo->imo_multicast_ifp != NULL) {
655 ifp = imo->imo_multicast_ifp;
656 }
657 #if MROUTING
658 if (imo->imo_multicast_vif != -1 &&
659 ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
660 ip->ip_src.s_addr =
661 ip_mcast_src(imo->imo_multicast_vif);
662 #endif /* MROUTING */
663 } else
664 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
665 /*
666 * Confirm that the outgoing interface supports multicast.
667 */
668 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
669 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
670 OSAddAtomic(1, &ipstat.ips_noroute);
671 error = ENETUNREACH;
672 goto bad;
673 }
674 }
675 /*
676 * If source address not specified yet, use address
677 * of outgoing interface.
678 */
679 if (ip->ip_src.s_addr == INADDR_ANY) {
680 struct in_ifaddr *ia1;
681 lck_rw_lock_shared(in_ifaddr_rwlock);
682 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
683 if (ia1->ia_ifp == ifp) {
684 ip->ip_src = IA_SIN(ia1)->sin_addr;
685 break;
686 }
687 lck_rw_done(in_ifaddr_rwlock);
688 if (ip->ip_src.s_addr == INADDR_ANY) {
689 error = ENETUNREACH;
690 goto bad;
691 }
692 }
693
694 ifnet_lock_shared(ifp);
695 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
696 ifnet_lock_done(ifp);
697 if (inm != NULL &&
698 (imo == NULL || imo->imo_multicast_loop)) {
699 /*
700 * If we belong to the destination multicast group
701 * on the outgoing interface, and the caller did not
702 * forbid loopback, loop back a copy.
703 */
704 if (!TAILQ_EMPTY(&ipv4_filters)) {
705 struct ipfilter *filter;
706 int seen = (inject_filter_ref == 0);
707 struct ipf_pktopts *ippo = 0, ipf_pktopts;
708
709 if (imo) {
710 ippo = &ipf_pktopts;
711 ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
712 ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
713 ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
714 }
715
716 ipf_ref();
717
718 /* 4135317 - always pass network byte order to filter */
719
720 #if BYTE_ORDER != BIG_ENDIAN
721 HTONS(ip->ip_len);
722 HTONS(ip->ip_off);
723 #endif
724
725 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
726 if (seen == 0) {
727 if ((struct ipfilter *)inject_filter_ref == filter)
728 seen = 1;
729 } else if (filter->ipf_filter.ipf_output) {
730 errno_t result;
731 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
732 if (result == EJUSTRETURN) {
733 ipf_unref();
734 goto done;
735 }
736 if (result != 0) {
737 ipf_unref();
738 goto bad;
739 }
740 }
741 }
742
743 /* set back to host byte order */
744 ip = mtod(m, struct ip *);
745
746 #if BYTE_ORDER != BIG_ENDIAN
747 NTOHS(ip->ip_len);
748 NTOHS(ip->ip_off);
749 #endif
750
751 ipf_unref();
752 didfilter = 1;
753 }
754 ip_mloopback(ifp, m, dst, hlen);
755 }
756 #if MROUTING
757 else {
758 /*
759 * If we are acting as a multicast router, perform
760 * multicast forwarding as if the packet had just
761 * arrived on the interface to which we are about
762 * to send. The multicast forwarding function
763 * recursively calls this function, using the
764 * IP_FORWARDING flag to prevent infinite recursion.
765 *
766 * Multicasts that are looped back by ip_mloopback(),
767 * above, will be forwarded by the ip_input() routine,
768 * if necessary.
769 */
770 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
771 /*
772 * Check if rsvp daemon is running. If not, don't
773 * set ip_moptions. This ensures that the packet
774 * is multicast and not just sent down one link
775 * as prescribed by rsvpd.
776 */
777 if (!rsvp_on)
778 imo = NULL;
779 if (ip_mforward(ip, ifp, m, imo) != 0) {
780 m_freem(m);
781 goto done;
782 }
783 }
784 }
785 #endif /* MROUTING */
786
787 /*
788 * Multicasts with a time-to-live of zero may be looped-
789 * back, above, but must not be transmitted on a network.
790 * Also, multicasts addressed to the loopback interface
791 * are not sent -- the above call to ip_mloopback() will
792 * loop back a copy if this host actually belongs to the
793 * destination group on the loopback interface.
794 */
795 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
796 m_freem(m);
797 goto done;
798 }
799
800 goto sendit;
801 }
802 #ifndef notdef
803 /*
804 * If source address not specified yet, use address
805 * of outgoing interface.
806 */
807 if (ip->ip_src.s_addr == INADDR_ANY) {
808 ip->ip_src = IA_SIN(ia)->sin_addr;
809 #if IPFIREWALL_FORWARD
810 /* Keep note that we did this - if the firewall changes
811 * the next-hop, our interface may change, changing the
812 * default source IP. It's a shame so much effort happens
813 * twice. Oh well.
814 */
815 fwd_rewrite_src++;
816 #endif /* IPFIREWALL_FORWARD */
817 }
818 #endif /* notdef */
819
820 /*
821 * Look for broadcast address and
822 * and verify user is allowed to send
823 * such a packet.
824 */
825 if (isbroadcast) {
826 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
827 error = EADDRNOTAVAIL;
828 goto bad;
829 }
830 if ((flags & IP_ALLOWBROADCAST) == 0) {
831 error = EACCES;
832 goto bad;
833 }
834 /* don't allow broadcast messages to be fragmented */
835 if ((u_short)ip->ip_len > ifp->if_mtu) {
836 error = EMSGSIZE;
837 goto bad;
838 }
839 m->m_flags |= M_BCAST;
840 } else {
841 m->m_flags &= ~M_BCAST;
842 }
843
844 sendit:
845 #if PF
846 /* Invoke outbound packet filter */
847 if (pf_af_hook(ifp, mppn, &m, AF_INET, FALSE) != 0) {
848 if (packetlist == m0) {
849 packetlist = m;
850 mppn = NULL;
851 }
852 if (m != NULL) {
853 m0 = m;
854 /* Next packet in the chain */
855 goto loopit;
856 } else if (packetlist != NULL) {
857 /* No more packet; send down the chain */
858 goto sendchain;
859 }
860 /* Nothing left; we're done */
861 goto done;
862 }
863 m0 = m;
864 ip = mtod(m, struct ip *);
865 pkt_dst = ip->ip_dst;
866 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
867 #endif /* PF */
868 /*
869 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
870 */
871 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
872 ip_linklocal_stat.iplls_out_total++;
873 if (ip->ip_ttl != MAXTTL) {
874 ip_linklocal_stat.iplls_out_badttl++;
875 ip->ip_ttl = MAXTTL;
876 }
877 }
878
879 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
880 struct ipfilter *filter;
881 int seen = (inject_filter_ref == 0);
882
883 /* Check that a TSO frame isn't passed to a filter.
884 * This could happen if a filter is inserted while
885 * TCP is sending the TSO packet.
886 */
887 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
888 error = EMSGSIZE;
889 goto bad;
890 }
891
892 ipf_ref();
893
894 /* 4135317 - always pass network byte order to filter */
895
896 #if BYTE_ORDER != BIG_ENDIAN
897 HTONS(ip->ip_len);
898 HTONS(ip->ip_off);
899 #endif
900
901 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
902 if (seen == 0) {
903 if ((struct ipfilter *)inject_filter_ref == filter)
904 seen = 1;
905 } else if (filter->ipf_filter.ipf_output) {
906 errno_t result;
907 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
908 if (result == EJUSTRETURN) {
909 ipf_unref();
910 goto done;
911 }
912 if (result != 0) {
913 ipf_unref();
914 goto bad;
915 }
916 }
917 }
918
919 /* set back to host byte order */
920 ip = mtod(m, struct ip *);
921
922 #if BYTE_ORDER != BIG_ENDIAN
923 NTOHS(ip->ip_len);
924 NTOHS(ip->ip_off);
925 #endif
926
927 ipf_unref();
928 }
929
930 #if IPSEC
931 /* temporary for testing only: bypass ipsec alltogether */
932
933 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
934 goto skip_ipsec;
935
936 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
937
938
939 /* get SP for this packet */
940 if (so == NULL)
941 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
942 else
943 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
944
945 if (sp == NULL) {
946 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
947 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
948 goto bad;
949 }
950
951 error = 0;
952
953 /* check policy */
954 switch (sp->policy) {
955 case IPSEC_POLICY_DISCARD:
956 case IPSEC_POLICY_GENERATE:
957 /*
958 * This packet is just discarded.
959 */
960 IPSEC_STAT_INCREMENT(ipsecstat.out_polvio);
961 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
962 goto bad;
963
964 case IPSEC_POLICY_BYPASS:
965 case IPSEC_POLICY_NONE:
966 /* no need to do IPsec. */
967 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
968 goto skip_ipsec;
969
970 case IPSEC_POLICY_IPSEC:
971 if (sp->req == NULL) {
972 /* acquire a policy */
973 error = key_spdacquire(sp);
974 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
975 goto bad;
976 }
977 break;
978
979 case IPSEC_POLICY_ENTRUST:
980 default:
981 printf("ip_output: Invalid policy found. %d\n", sp->policy);
982 }
983 {
984 struct ipsec_output_state state;
985 bzero(&state, sizeof(state));
986 state.m = m;
987 if (flags & IP_ROUTETOIF) {
988 state.ro = &iproute;
989 bzero(&iproute, sizeof(iproute));
990 } else
991 state.ro = ro;
992 state.dst = (struct sockaddr *)dst;
993
994 ip->ip_sum = 0;
995
996 /*
997 * XXX
998 * delayed checksums are not currently compatible with IPsec
999 */
1000 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1001 in_delayed_cksum(m);
1002 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1003 }
1004
1005
1006 #if BYTE_ORDER != BIG_ENDIAN
1007 HTONS(ip->ip_len);
1008 HTONS(ip->ip_off);
1009 #endif
1010
1011 error = ipsec4_output(&state, sp, flags);
1012
1013 m0 = m = state.m;
1014
1015 if (flags & IP_ROUTETOIF) {
1016 /*
1017 * if we have tunnel mode SA, we may need to ignore
1018 * IP_ROUTETOIF.
1019 */
1020 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
1021 flags &= ~IP_ROUTETOIF;
1022 ro = state.ro;
1023 }
1024 } else
1025 ro = state.ro;
1026
1027 dst = (struct sockaddr_in *)state.dst;
1028 if (error) {
1029 /* mbuf is already reclaimed in ipsec4_output. */
1030 m0 = NULL;
1031 switch (error) {
1032 case EHOSTUNREACH:
1033 case ENETUNREACH:
1034 case EMSGSIZE:
1035 case ENOBUFS:
1036 case ENOMEM:
1037 break;
1038 default:
1039 printf("ip4_output (ipsec): error code %d\n", error);
1040 /*fall through*/
1041 case ENOENT:
1042 /* don't show these error codes to the user */
1043 error = 0;
1044 break;
1045 }
1046 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
1047 goto bad;
1048 }
1049 }
1050
1051 /* be sure to update variables that are affected by ipsec4_output() */
1052 ip = mtod(m, struct ip *);
1053
1054 #ifdef _IP_VHL
1055 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1056 #else
1057 hlen = ip->ip_hl << 2;
1058 #endif
1059 /* Check that there wasn't a route change and src is still valid */
1060 if (ro->ro_rt != NULL && ro->ro_rt->generation_id != route_generation) {
1061 if ((src_ia = ifa_foraddr(ip->ip_src.s_addr)) == NULL &&
1062 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
1063 error = EADDRNOTAVAIL;
1064 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1065 5,0,0,0,0);
1066 goto bad;
1067 }
1068 rtfree(ro->ro_rt);
1069 ro->ro_rt = NULL;
1070 if (src_ia != NULL)
1071 ifafree(&src_ia->ia_ifa);
1072 }
1073
1074 if (ro->ro_rt == NULL) {
1075 if ((flags & IP_ROUTETOIF) == 0) {
1076 printf("ip_output: can't update route after "
1077 "IPsec processing\n");
1078 error = EHOSTUNREACH; /*XXX*/
1079 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1080 6,0,0,0,0);
1081 goto bad;
1082 }
1083 } else {
1084 if (ia)
1085 ifafree(&ia->ia_ifa);
1086 RT_LOCK_SPIN(ro->ro_rt);
1087 ia = ifatoia(ro->ro_rt->rt_ifa);
1088 if (ia)
1089 ifaref(&ia->ia_ifa);
1090 ifp = ro->ro_rt->rt_ifp;
1091 RT_UNLOCK(ro->ro_rt);
1092 }
1093
1094 /* make it flipped, again. */
1095
1096 #if BYTE_ORDER != BIG_ENDIAN
1097 NTOHS(ip->ip_len);
1098 NTOHS(ip->ip_off);
1099 #endif
1100
1101 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
1102
1103 /* Pass to filters again */
1104 if (!TAILQ_EMPTY(&ipv4_filters)) {
1105 struct ipfilter *filter;
1106
1107 /* Check that a TSO frame isn't passed to a filter.
1108 * This could happen if a filter is inserted while
1109 * TCP is sending the TSO packet.
1110 */
1111 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1112 error = EMSGSIZE;
1113 goto bad;
1114 }
1115
1116 ipf_ref();
1117
1118 /* 4135317 - always pass network byte order to filter */
1119
1120 #if BYTE_ORDER != BIG_ENDIAN
1121 HTONS(ip->ip_len);
1122 HTONS(ip->ip_off);
1123 #endif
1124
1125 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
1126 if (filter->ipf_filter.ipf_output) {
1127 errno_t result;
1128 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
1129 if (result == EJUSTRETURN) {
1130 ipf_unref();
1131 goto done;
1132 }
1133 if (result != 0) {
1134 ipf_unref();
1135 goto bad;
1136 }
1137 }
1138 }
1139
1140 /* set back to host byte order */
1141 ip = mtod(m, struct ip *);
1142
1143 #if BYTE_ORDER != BIG_ENDIAN
1144 NTOHS(ip->ip_len);
1145 NTOHS(ip->ip_off);
1146 #endif
1147
1148 ipf_unref();
1149 }
1150 skip_ipsec:
1151 #endif /*IPSEC*/
1152
1153 #if IPFIREWALL
1154 /*
1155 * IpHack's section.
1156 * - Xlate: translate packet's addr/port (NAT).
1157 * - Firewall: deny/allow/etc.
1158 * - Wrap: fake packet's addr/port <unimpl.>
1159 * - Encapsulate: put it in another IP and send out. <unimp.>
1160 */
1161 if (fr_checkp) {
1162 struct mbuf *m1 = m;
1163
1164 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
1165 goto done;
1166 }
1167 ip = mtod(m0 = m = m1, struct ip *);
1168 }
1169
1170 /*
1171 * Check with the firewall...
1172 * but not if we are already being fwd'd from a firewall.
1173 */
1174 if (fw_enable && IPFW_LOADED && !args.next_hop) {
1175 struct sockaddr_in *old = dst;
1176
1177 args.m = m;
1178 args.next_hop = dst;
1179 args.oif = ifp;
1180 off = ip_fw_chk_ptr(&args);
1181 m = args.m;
1182 dst = args.next_hop;
1183
1184 /*
1185 * On return we must do the following:
1186 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1187 * 1<=off<= 0xffff -> DIVERT
1188 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1189 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1190 * dst != old -> IPFIREWALL_FORWARD
1191 * off==0, dst==old -> accept
1192 * If some of the above modules is not compiled in, then
1193 * we should't have to check the corresponding condition
1194 * (because the ipfw control socket should not accept
1195 * unsupported rules), but better play safe and drop
1196 * packets in case of doubt.
1197 */
1198 m0 = m;
1199 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
1200 if (m)
1201 m_freem(m);
1202 error = EACCES ;
1203 goto done ;
1204 }
1205 ip = mtod(m, struct ip *);
1206
1207 if (off == 0 && dst == old) {/* common case */
1208 goto pass ;
1209 }
1210 #if DUMMYNET
1211 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
1212 /*
1213 * pass the pkt to dummynet. Need to include
1214 * pipe number, m, ifp, ro, dst because these are
1215 * not recomputed in the next pass.
1216 * All other parameters have been already used and
1217 * so they are not needed anymore.
1218 * XXX note: if the ifp or ro entry are deleted
1219 * while a pkt is in dummynet, we are in trouble!
1220 */
1221 args.ro = ro;
1222 args.dst = dst;
1223 args.flags = flags;
1224 if (flags & IP_OUTARGS)
1225 args.ipoa = ipoa;
1226
1227 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
1228 &args);
1229 goto done;
1230 }
1231 #endif /* DUMMYNET */
1232 #if IPDIVERT
1233 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
1234 struct mbuf *clone = NULL;
1235
1236 /* Clone packet if we're doing a 'tee' */
1237 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
1238 clone = m_dup(m, M_DONTWAIT);
1239 /*
1240 * XXX
1241 * delayed checksums are not currently compatible
1242 * with divert sockets.
1243 */
1244 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1245 in_delayed_cksum(m);
1246 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1247 }
1248
1249 /* Restore packet header fields to original values */
1250
1251 #if BYTE_ORDER != BIG_ENDIAN
1252 HTONS(ip->ip_len);
1253 HTONS(ip->ip_off);
1254 #endif
1255
1256 /* Deliver packet to divert input routine */
1257 divert_packet(m, 0, off & 0xffff, args.divert_rule);
1258
1259 /* If 'tee', continue with original packet */
1260 if (clone != NULL) {
1261 m0 = m = clone;
1262 ip = mtod(m, struct ip *);
1263 goto pass;
1264 }
1265 goto done;
1266 }
1267 #endif
1268
1269 #if IPFIREWALL_FORWARD
1270 /* Here we check dst to make sure it's directly reachable on the
1271 * interface we previously thought it was.
1272 * If it isn't (which may be likely in some situations) we have
1273 * to re-route it (ie, find a route for the next-hop and the
1274 * associated interface) and set them here. This is nested
1275 * forwarding which in most cases is undesirable, except where
1276 * such control is nigh impossible. So we do it here.
1277 * And I'm babbling.
1278 */
1279 if (off == 0 && old != dst) {
1280 struct in_ifaddr *ia_fw;
1281
1282 /* It's changed... */
1283 /* There must be a better way to do this next line... */
1284 static struct route sro_fwd, *ro_fwd = &sro_fwd;
1285 #if IPFIREWALL_FORWARD_DEBUG
1286 printf("IPFIREWALL_FORWARD: New dst ip: ");
1287 print_ip(dst->sin_addr);
1288 printf("\n");
1289 #endif
1290 /*
1291 * We need to figure out if we have been forwarded
1292 * to a local socket. If so then we should somehow
1293 * "loop back" to ip_input, and get directed to the
1294 * PCB as if we had received this packet. This is
1295 * because it may be dificult to identify the packets
1296 * you want to forward until they are being output
1297 * and have selected an interface. (e.g. locally
1298 * initiated packets) If we used the loopback inteface,
1299 * we would not be able to control what happens
1300 * as the packet runs through ip_input() as
1301 * it is done through a ISR.
1302 */
1303 lck_rw_lock_shared(in_ifaddr_rwlock);
1304 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1305 /*
1306 * If the addr to forward to is one
1307 * of ours, we pretend to
1308 * be the destination for this packet.
1309 */
1310 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
1311 dst->sin_addr.s_addr)
1312 break;
1313 }
1314 lck_rw_done(in_ifaddr_rwlock);
1315 if (ia_fw) {
1316 /* tell ip_input "dont filter" */
1317 struct m_tag *fwd_tag;
1318 struct ip_fwd_tag *ipfwd_tag;
1319
1320 fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID,
1321 KERNEL_TAG_TYPE_IPFORWARD,
1322 sizeof (*ipfwd_tag), M_NOWAIT);
1323 if (fwd_tag == NULL) {
1324 error = ENOBUFS;
1325 goto bad;
1326 }
1327
1328 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
1329 ipfwd_tag->next_hop = args.next_hop;
1330
1331 m_tag_prepend(m, fwd_tag);
1332
1333 if (m->m_pkthdr.rcvif == NULL)
1334 m->m_pkthdr.rcvif = ifunit("lo0");
1335 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1336 m->m_pkthdr.csum_flags) == 0) {
1337 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1338 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1339 m->m_pkthdr.csum_flags |=
1340 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1341 m->m_pkthdr.csum_data = 0xffff;
1342 }
1343 m->m_pkthdr.csum_flags |=
1344 CSUM_IP_CHECKED | CSUM_IP_VALID;
1345 }
1346 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1347 in_delayed_cksum(m);
1348 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1349 ip->ip_sum = in_cksum(m, hlen);
1350 }
1351
1352 #if BYTE_ORDER != BIG_ENDIAN
1353 HTONS(ip->ip_len);
1354 HTONS(ip->ip_off);
1355 #endif
1356
1357 /* we need to call dlil_output to run filters
1358 * and resync to avoid recursion loops.
1359 */
1360 if (lo_ifp) {
1361 dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
1362 }
1363 else {
1364 printf("ip_output: no loopback ifp for forwarding!!!\n");
1365 }
1366 goto done;
1367 }
1368 /* Some of the logic for this was
1369 * nicked from above.
1370 *
1371 * This rewrites the cached route in a local PCB.
1372 * Is this what we want to do?
1373 */
1374 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1375
1376 ro_fwd->ro_rt = NULL;
1377 rtalloc_ign(ro_fwd, RTF_PRCLONING);
1378
1379 if (ro_fwd->ro_rt == NULL) {
1380 OSAddAtomic(1, &ipstat.ips_noroute);
1381 error = EHOSTUNREACH;
1382 goto bad;
1383 }
1384
1385 RT_LOCK_SPIN(ro_fwd->ro_rt);
1386 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
1387 if (ia_fw != NULL)
1388 ifaref(&ia_fw->ia_ifa);
1389 ifp = ro_fwd->ro_rt->rt_ifp;
1390 ro_fwd->ro_rt->rt_use++;
1391 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
1392 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
1393 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
1394 isbroadcast =
1395 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
1396 else
1397 isbroadcast = in_broadcast(dst->sin_addr, ifp);
1398 RT_UNLOCK(ro_fwd->ro_rt);
1399 rtfree(ro->ro_rt);
1400 ro->ro_rt = ro_fwd->ro_rt;
1401 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
1402
1403 /*
1404 * If we added a default src ip earlier,
1405 * which would have been gotten from the-then
1406 * interface, do it again, from the new one.
1407 */
1408 if (ia_fw != NULL) {
1409 if (fwd_rewrite_src)
1410 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
1411 ifafree(&ia_fw->ia_ifa);
1412 }
1413 goto pass ;
1414 }
1415 #endif /* IPFIREWALL_FORWARD */
1416 /*
1417 * if we get here, none of the above matches, and
1418 * we have to drop the pkt
1419 */
1420 m_freem(m);
1421 error = EACCES; /* not sure this is the right error msg */
1422 goto done;
1423 }
1424 #endif /* IPFIREWALL */
1425
1426 pass:
1427 #if __APPLE__
1428 /* Do not allow loopback address to wind up on a wire */
1429 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1430 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1431 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
1432 OSAddAtomic(1, &ipstat.ips_badaddr);
1433 m_freem(m);
1434 /*
1435 * Do not simply drop the packet just like a firewall -- we want the
1436 * the application to feel the pain.
1437 * Return ENETUNREACH like ip6_output does in some similar cases.
1438 * This can startle the otherwise clueless process that specifies
1439 * loopback as the source address.
1440 */
1441 error = ENETUNREACH;
1442 goto done;
1443 }
1444 #endif
1445 m->m_pkthdr.csum_flags |= CSUM_IP;
1446 tso = (ifp->if_hwassist & IFNET_TSO_IPV4) && (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4);
1447
1448 sw_csum = m->m_pkthdr.csum_flags
1449 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1450
1451 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1452 /*
1453 * Special case code for GMACE
1454 * frames that can be checksumed by GMACE SUM16 HW:
1455 * frame >64, no fragments, no UDP
1456 */
1457 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1458 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1459 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1460 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1461 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
1462 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
1463 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1464 m->m_pkthdr.csum_data += offset;
1465 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1466 }
1467 else {
1468 /* let the software handle any UDP or TCP checksums */
1469 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1470 }
1471 } else if (apple_hwcksum_tx == 0) {
1472 sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
1473 m->m_pkthdr.csum_flags;
1474 }
1475
1476 if (sw_csum & CSUM_DELAY_DATA) {
1477 in_delayed_cksum(m);
1478 sw_csum &= ~CSUM_DELAY_DATA;
1479 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1480 }
1481
1482 if (apple_hwcksum_tx != 0) {
1483 m->m_pkthdr.csum_flags &=
1484 IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1485 } else {
1486 m->m_pkthdr.csum_flags = 0;
1487 }
1488
1489 /*
1490 * If small enough for interface, or the interface will take
1491 * care of the fragmentation for us, can just send directly.
1492 */
1493 if ((u_short)ip->ip_len <= ifp->if_mtu || tso ||
1494 ifp->if_hwassist & CSUM_FRAGMENT) {
1495 if (tso)
1496 m->m_pkthdr.csum_flags |= CSUM_TSO_IPV4;
1497
1498
1499 #if BYTE_ORDER != BIG_ENDIAN
1500 HTONS(ip->ip_len);
1501 HTONS(ip->ip_off);
1502 #endif
1503
1504 ip->ip_sum = 0;
1505 if (sw_csum & CSUM_DELAY_IP) {
1506 ip->ip_sum = in_cksum(m, hlen);
1507 }
1508
1509 #ifndef __APPLE__
1510 /* Record statistics for this interface address. */
1511 if (!(flags & IP_FORWARDING) && ia != NULL) {
1512 ia->ia_ifa.if_opackets++;
1513 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1514 }
1515 #endif
1516
1517 #if IPSEC
1518 /* clean ipsec history once it goes out of the node */
1519 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1520 ipsec_delaux(m);
1521 #endif
1522 if (packetchain == 0) {
1523 error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
1524 (struct sockaddr *)dst);
1525 goto done;
1526 }
1527 else { /* packet chaining allows us to reuse the route for all packets */
1528 mppn = &m->m_nextpkt;
1529 m = m->m_nextpkt;
1530 if (m == NULL) {
1531 #if PF
1532 sendchain:
1533 #endif /* PF */
1534 if (pktcnt > ip_maxchainsent)
1535 ip_maxchainsent = pktcnt;
1536 //send
1537 error = ifnet_output(ifp, PF_INET, packetlist,
1538 ro->ro_rt, (struct sockaddr *)dst);
1539 pktcnt = 0;
1540 goto done;
1541
1542 }
1543 m0 = m;
1544 pktcnt++;
1545 goto loopit;
1546 }
1547 }
1548 /*
1549 * Too large for interface; fragment if possible.
1550 * Must be able to put at least 8 bytes per fragment.
1551 */
1552
1553 if (ip->ip_off & IP_DF || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
1554 error = EMSGSIZE;
1555 /*
1556 * This case can happen if the user changed the MTU
1557 *
1558 * of an interface after enabling IP on it. Because
1559 * most netifs don't keep track of routes pointing to
1560 * them, there is no way for one to update all its
1561 * routes when the MTU is changed.
1562 */
1563 RT_LOCK_SPIN(ro->ro_rt);
1564 if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1565 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1566 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1567 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1568 }
1569 RT_UNLOCK(ro->ro_rt);
1570 OSAddAtomic(1, &ipstat.ips_cantfrag);
1571 goto bad;
1572 }
1573
1574 error = ip_fragment(m, ifp, ifp->if_mtu, sw_csum);
1575 if (error != 0) {
1576 m0 = m = NULL;
1577 goto bad;
1578 }
1579
1580 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1581 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1582
1583 for (m = m0; m; m = m0) {
1584 m0 = m->m_nextpkt;
1585 m->m_nextpkt = 0;
1586 #if IPSEC
1587 /* clean ipsec history once it goes out of the node */
1588 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1589 ipsec_delaux(m);
1590 #endif
1591 if (error == 0) {
1592 #ifndef __APPLE__
1593 /* Record statistics for this interface address. */
1594 if (ia != NULL) {
1595 ia->ia_ifa.if_opackets++;
1596 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1597 }
1598 #endif
1599 if ((packetchain != 0) && (pktcnt > 0))
1600 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
1601 error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
1602 (struct sockaddr *)dst);
1603 } else
1604 m_freem(m);
1605 }
1606
1607 if (error == 0)
1608 OSAddAtomic(1, &ipstat.ips_fragmented);
1609
1610 done:
1611 if (ia) {
1612 ifafree(&ia->ia_ifa);
1613 ia = NULL;
1614 }
1615 #if IPSEC
1616 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
1617 if (ro == &iproute && ro->ro_rt) {
1618 rtfree(ro->ro_rt);
1619 ro->ro_rt = NULL;
1620 }
1621 if (sp != NULL) {
1622 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1623 printf("DP ip_output call free SP:%x\n", sp));
1624 key_freesp(sp, KEY_SADB_UNLOCKED);
1625 }
1626 }
1627 #endif /* IPSEC */
1628
1629 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1630 return (error);
1631 bad:
1632 m_freem(m0);
1633 goto done;
1634 }
1635
1636 int
1637 ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum)
1638 {
1639 struct ip *ip, *mhip;
1640 int len, hlen, mhlen, firstlen, off, error = 0;
1641 struct mbuf **mnext = &m->m_nextpkt, *m0;
1642 int nfrags = 1;
1643
1644 ip = mtod(m, struct ip *);
1645 #ifdef _IP_VHL
1646 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1647 #else
1648 hlen = ip->ip_hl << 2;
1649 #endif
1650
1651 firstlen = len = (mtu - hlen) &~ 7;
1652 if (len < 8) {
1653 m_freem(m);
1654 return (EMSGSIZE);
1655 }
1656
1657 /*
1658 * if the interface will not calculate checksums on
1659 * fragmented packets, then do it here.
1660 */
1661 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1662 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1663 in_delayed_cksum(m);
1664 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1665 }
1666
1667 /*
1668 * Loop through length of segment after first fragment,
1669 * make new header and copy data of each part and link onto chain.
1670 */
1671 m0 = m;
1672 mhlen = sizeof (struct ip);
1673 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
1674 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1675 if (m == 0) {
1676 error = ENOBUFS;
1677 OSAddAtomic(1, &ipstat.ips_odropped);
1678 goto sendorfree;
1679 }
1680 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1681 m->m_data += max_linkhdr;
1682 mhip = mtod(m, struct ip *);
1683 *mhip = *ip;
1684 if (hlen > sizeof (struct ip)) {
1685 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1686 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1687 }
1688 m->m_len = mhlen;
1689 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1690 if (ip->ip_off & IP_MF)
1691 mhip->ip_off |= IP_MF;
1692 if (off + len >= (u_short)ip->ip_len)
1693 len = (u_short)ip->ip_len - off;
1694 else
1695 mhip->ip_off |= IP_MF;
1696 mhip->ip_len = htons((u_short)(len + mhlen));
1697 m->m_next = m_copy(m0, off, len);
1698 if (m->m_next == 0) {
1699 (void) m_free(m);
1700 error = ENOBUFS; /* ??? */
1701 OSAddAtomic(1, &ipstat.ips_odropped);
1702 goto sendorfree;
1703 }
1704 m->m_pkthdr.len = mhlen + len;
1705 m->m_pkthdr.rcvif = 0;
1706 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
1707 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
1708 #if CONFIG_MACF_NET
1709 mac_netinet_fragment(m0, m);
1710 #endif
1711
1712 #if BYTE_ORDER != BIG_ENDIAN
1713 HTONS(mhip->ip_off);
1714 #endif
1715
1716 mhip->ip_sum = 0;
1717 if (sw_csum & CSUM_DELAY_IP) {
1718 mhip->ip_sum = in_cksum(m, mhlen);
1719 }
1720 *mnext = m;
1721 mnext = &m->m_nextpkt;
1722 nfrags++;
1723 }
1724 OSAddAtomic(nfrags, &ipstat.ips_ofragments);
1725
1726 /* set first/last markers for fragment chain */
1727 m->m_flags |= M_LASTFRAG;
1728 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1729 m0->m_pkthdr.csum_data = nfrags;
1730
1731 /*
1732 * Update first fragment by trimming what's been copied out
1733 * and updating header, then send each fragment (in order).
1734 */
1735 m = m0;
1736 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1737 m->m_pkthdr.len = hlen + firstlen;
1738 ip->ip_len = htons((u_short)m->m_pkthdr.len);
1739 ip->ip_off |= IP_MF;
1740
1741 #if BYTE_ORDER != BIG_ENDIAN
1742 HTONS(ip->ip_off);
1743 #endif
1744
1745 ip->ip_sum = 0;
1746 if (sw_csum & CSUM_DELAY_IP) {
1747 ip->ip_sum = in_cksum(m, hlen);
1748 }
1749 sendorfree:
1750 if (error)
1751 m_freem_list(m0);
1752
1753 return (error);
1754 }
1755
1756 static void
1757 ip_out_cksum_stats(int proto, u_int32_t len)
1758 {
1759 switch (proto) {
1760 case IPPROTO_TCP:
1761 tcp_out_cksum_stats(len);
1762 break;
1763 case IPPROTO_UDP:
1764 udp_out_cksum_stats(len);
1765 break;
1766 default:
1767 /* keep only TCP or UDP stats for now */
1768 break;
1769 }
1770 }
1771
1772 void
1773 in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
1774 {
1775 struct ip *ip;
1776 unsigned char buf[sizeof(struct ip)];
1777 u_short csum, offset, ip_len;
1778 struct mbuf *m = m0;
1779
1780 while (ip_offset >= m->m_len) {
1781 ip_offset -= m->m_len;
1782 m = m->m_next;
1783 if (m == NULL) {
1784 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
1785 return;
1786 }
1787 }
1788
1789 /* Sometimes the IP header is not contiguous, yes this can happen! */
1790 if (ip_offset + sizeof(struct ip) > m->m_len) {
1791 #if DEBUG
1792 printf("delayed m_pullup, m->len: %d off: %d\n",
1793 m->m_len, ip_offset);
1794 #endif
1795 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1796
1797 ip = (struct ip *)buf;
1798 } else {
1799 ip = (struct ip*)(m->m_data + ip_offset);
1800 }
1801
1802 /* Gross */
1803 if (ip_offset) {
1804 m->m_len -= ip_offset;
1805 m->m_data += ip_offset;
1806 }
1807
1808 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1809
1810 /*
1811 * We could be in the context of an IP or interface filter; in the
1812 * former case, ip_len would be in host (correct) order while for
1813 * the latter it would be in network order. Because of this, we
1814 * attempt to interpret the length field by comparing it against
1815 * the actual packet length. If the comparison fails, byte swap
1816 * the length and check again. If it still fails, then the packet
1817 * is bogus and we give up.
1818 */
1819 ip_len = ip->ip_len;
1820 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1821 ip_len = SWAP16(ip_len);
1822 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1823 printf("in_delayed_cksum_offset: ip_len %d (%d) "
1824 "doesn't match actual length %d\n", ip->ip_len,
1825 ip_len, (m0->m_pkthdr.len - ip_offset));
1826 return;
1827 }
1828 }
1829
1830 csum = in_cksum_skip(m, ip_len, offset);
1831
1832 /* Update stats */
1833 ip_out_cksum_stats(ip->ip_p, ip_len - offset);
1834
1835 if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1836 csum = 0xffff;
1837 offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1838
1839 /* Gross */
1840 if (ip_offset) {
1841 if (M_LEADINGSPACE(m) < ip_offset)
1842 panic("in_delayed_cksum_offset - chain modified!\n");
1843 m->m_len += ip_offset;
1844 m->m_data -= ip_offset;
1845 }
1846
1847 if (offset > ip_len) /* bogus offset */
1848 return;
1849
1850 /* Insert the checksum in the existing chain */
1851 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
1852 char tmp[2];
1853
1854 #if DEBUG
1855 printf("delayed m_copyback, m->len: %d off: %d p: %d\n",
1856 m->m_len, offset + ip_offset, ip->ip_p);
1857 #endif
1858 *(u_short *)tmp = csum;
1859 m_copyback(m, offset + ip_offset, 2, tmp);
1860 } else
1861 *(u_short *)(m->m_data + offset + ip_offset) = csum;
1862 }
1863
1864 void
1865 in_delayed_cksum(struct mbuf *m)
1866 {
1867 in_delayed_cksum_offset(m, 0);
1868 }
1869
1870 void
1871 in_cksum_offset(struct mbuf* m, size_t ip_offset)
1872 {
1873 struct ip* ip = NULL;
1874 int hlen = 0;
1875 unsigned char buf[sizeof(struct ip)];
1876 int swapped = 0;
1877
1878 while (ip_offset >= m->m_len) {
1879 ip_offset -= m->m_len;
1880 m = m->m_next;
1881 if (m == NULL) {
1882 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1883 return;
1884 }
1885 }
1886
1887 /* Sometimes the IP header is not contiguous, yes this can happen! */
1888 if (ip_offset + sizeof(struct ip) > m->m_len) {
1889
1890 #if DEBUG
1891 printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %lu\n",
1892 m->m_len, ip_offset);
1893 #endif
1894 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1895
1896 ip = (struct ip *)buf;
1897 ip->ip_sum = 0;
1898 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum);
1899 } else {
1900 ip = (struct ip*)(m->m_data + ip_offset);
1901 ip->ip_sum = 0;
1902 }
1903
1904 /* Gross */
1905 if (ip_offset) {
1906 m->m_len -= ip_offset;
1907 m->m_data += ip_offset;
1908 }
1909
1910 #ifdef _IP_VHL
1911 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1912 #else
1913 hlen = ip->ip_hl << 2;
1914 #endif
1915 /*
1916 * We could be in the context of an IP or interface filter; in the
1917 * former case, ip_len would be in host order while for the latter
1918 * it would be in network (correct) order. Because of this, we
1919 * attempt to interpret the length field by comparing it against
1920 * the actual packet length. If the comparison fails, byte swap
1921 * the length and check again. If it still fails, then the packet
1922 * is bogus and we give up.
1923 */
1924 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1925 ip->ip_len = SWAP16(ip->ip_len);
1926 swapped = 1;
1927 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1928 ip->ip_len = SWAP16(ip->ip_len);
1929 printf("in_cksum_offset: ip_len %d (%d) "
1930 "doesn't match actual length %lu\n",
1931 ip->ip_len, SWAP16(ip->ip_len),
1932 (m->m_pkthdr.len - ip_offset));
1933 return;
1934 }
1935 }
1936
1937 ip->ip_sum = 0;
1938 ip->ip_sum = in_cksum(m, hlen);
1939 if (swapped)
1940 ip->ip_len = SWAP16(ip->ip_len);
1941
1942 /* Gross */
1943 if (ip_offset) {
1944 if (M_LEADINGSPACE(m) < ip_offset)
1945 panic("in_cksum_offset - chain modified!\n");
1946 m->m_len += ip_offset;
1947 m->m_data -= ip_offset;
1948 }
1949
1950 /* Insert the checksum in the existing chain if IP header not contiguous */
1951 if (ip_offset + sizeof(struct ip) > m->m_len) {
1952 char tmp[2];
1953
1954 #if DEBUG
1955 printf("in_cksum_offset m_copyback, m->len: %u off: %lu p: %d\n",
1956 m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
1957 #endif
1958 *(u_short *)tmp = ip->ip_sum;
1959 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
1960 }
1961 }
1962
1963 /*
1964 * Insert IP options into preformed packet.
1965 * Adjust IP destination as required for IP source routing,
1966 * as indicated by a non-zero in_addr at the start of the options.
1967 *
1968 * XXX This routine assumes that the packet has no options in place.
1969 */
1970 static struct mbuf *
1971 ip_insertoptions(m, opt, phlen)
1972 register struct mbuf *m;
1973 struct mbuf *opt;
1974 int *phlen;
1975 {
1976 register struct ipoption *p = mtod(opt, struct ipoption *);
1977 struct mbuf *n;
1978 register struct ip *ip = mtod(m, struct ip *);
1979 unsigned optlen;
1980
1981 optlen = opt->m_len - sizeof(p->ipopt_dst);
1982 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1983 return (m); /* XXX should fail */
1984 if (p->ipopt_dst.s_addr)
1985 ip->ip_dst = p->ipopt_dst;
1986 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1987 MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1988 if (n == 0)
1989 return (m);
1990 n->m_pkthdr.rcvif = 0;
1991 #if CONFIG_MACF_NET
1992 mac_mbuf_label_copy(m, n);
1993 #endif
1994 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1995 m->m_len -= sizeof(struct ip);
1996 m->m_data += sizeof(struct ip);
1997 n->m_next = m;
1998 m = n;
1999 m->m_len = optlen + sizeof(struct ip);
2000 m->m_data += max_linkhdr;
2001 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
2002 } else {
2003 m->m_data -= optlen;
2004 m->m_len += optlen;
2005 m->m_pkthdr.len += optlen;
2006 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
2007 }
2008 ip = mtod(m, struct ip *);
2009 bcopy(p->ipopt_list, ip + 1, optlen);
2010 *phlen = sizeof(struct ip) + optlen;
2011 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
2012 ip->ip_len += optlen;
2013 return (m);
2014 }
2015
2016 /*
2017 * Copy options from ip to jp,
2018 * omitting those not copied during fragmentation.
2019 */
2020 int
2021 ip_optcopy(ip, jp)
2022 struct ip *ip, *jp;
2023 {
2024 register u_char *cp, *dp;
2025 int opt, optlen, cnt;
2026
2027 cp = (u_char *)(ip + 1);
2028 dp = (u_char *)(jp + 1);
2029 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
2030 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2031 opt = cp[0];
2032 if (opt == IPOPT_EOL)
2033 break;
2034 if (opt == IPOPT_NOP) {
2035 /* Preserve for IP mcast tunnel's LSRR alignment. */
2036 *dp++ = IPOPT_NOP;
2037 optlen = 1;
2038 continue;
2039 }
2040 #if DIAGNOSTIC
2041 if (cnt < IPOPT_OLEN + sizeof(*cp))
2042 panic("malformed IPv4 option passed to ip_optcopy");
2043 #endif
2044 optlen = cp[IPOPT_OLEN];
2045 #if DIAGNOSTIC
2046 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2047 panic("malformed IPv4 option passed to ip_optcopy");
2048 #endif
2049 /* bogus lengths should have been caught by ip_dooptions */
2050 if (optlen > cnt)
2051 optlen = cnt;
2052 if (IPOPT_COPIED(opt)) {
2053 bcopy(cp, dp, optlen);
2054 dp += optlen;
2055 }
2056 }
2057 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
2058 *dp++ = IPOPT_EOL;
2059 return (optlen);
2060 }
2061
2062 /*
2063 * IP socket option processing.
2064 */
2065 int
2066 ip_ctloutput(so, sopt)
2067 struct socket *so;
2068 struct sockopt *sopt;
2069 {
2070 struct inpcb *inp = sotoinpcb(so);
2071 int error, optval;
2072
2073 error = optval = 0;
2074 if (sopt->sopt_level != IPPROTO_IP) {
2075 return (EINVAL);
2076 }
2077
2078 switch (sopt->sopt_dir) {
2079 case SOPT_SET:
2080 switch (sopt->sopt_name) {
2081 case IP_OPTIONS:
2082 #ifdef notyet
2083 case IP_RETOPTS:
2084 #endif
2085 {
2086 struct mbuf *m;
2087 if (sopt->sopt_valsize > MLEN) {
2088 error = EMSGSIZE;
2089 break;
2090 }
2091 MGET(m, sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT,
2092 MT_HEADER);
2093 if (m == 0) {
2094 error = ENOBUFS;
2095 break;
2096 }
2097 m->m_len = sopt->sopt_valsize;
2098 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
2099 m->m_len);
2100 if (error)
2101 break;
2102
2103 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
2104 m));
2105 }
2106
2107 case IP_TOS:
2108 case IP_TTL:
2109 case IP_RECVOPTS:
2110 case IP_RECVRETOPTS:
2111 case IP_RECVDSTADDR:
2112 case IP_RECVIF:
2113 case IP_RECVTTL:
2114 #if defined(NFAITH) && NFAITH > 0
2115 case IP_FAITH:
2116 #endif
2117 error = sooptcopyin(sopt, &optval, sizeof optval,
2118 sizeof optval);
2119 if (error)
2120 break;
2121
2122 switch (sopt->sopt_name) {
2123 case IP_TOS:
2124 inp->inp_ip_tos = optval;
2125 break;
2126
2127 case IP_TTL:
2128 inp->inp_ip_ttl = optval;
2129 break;
2130 #define OPTSET(bit) \
2131 if (optval) \
2132 inp->inp_flags |= bit; \
2133 else \
2134 inp->inp_flags &= ~bit;
2135
2136 case IP_RECVOPTS:
2137 OPTSET(INP_RECVOPTS);
2138 break;
2139
2140 case IP_RECVRETOPTS:
2141 OPTSET(INP_RECVRETOPTS);
2142 break;
2143
2144 case IP_RECVDSTADDR:
2145 OPTSET(INP_RECVDSTADDR);
2146 break;
2147
2148 case IP_RECVIF:
2149 OPTSET(INP_RECVIF);
2150 break;
2151
2152 case IP_RECVTTL:
2153 OPTSET(INP_RECVTTL);
2154 break;
2155
2156 #if defined(NFAITH) && NFAITH > 0
2157 case IP_FAITH:
2158 OPTSET(INP_FAITH);
2159 break;
2160 #endif
2161 }
2162 break;
2163 #undef OPTSET
2164
2165 #if CONFIG_FORCE_OUT_IFP
2166 /*
2167 * Apple private interface, similar to IP_BOUND_IF, except
2168 * that the parameter is a NULL-terminated string containing
2169 * the name of the network interface; an emptry string means
2170 * unbind. Applications are encouraged to use IP_BOUND_IF
2171 * instead, as that is the current "official" API.
2172 */
2173 case IP_FORCE_OUT_IFP: {
2174 char ifname[IFNAMSIZ];
2175 unsigned int ifscope;
2176
2177 /* This option is settable only for IPv4 */
2178 if (!(inp->inp_vflag & INP_IPV4)) {
2179 error = EINVAL;
2180 break;
2181 }
2182
2183 /* Verify interface name parameter is sane */
2184 if (sopt->sopt_valsize > sizeof(ifname)) {
2185 error = EINVAL;
2186 break;
2187 }
2188
2189 /* Copy the interface name */
2190 if (sopt->sopt_valsize != 0) {
2191 error = sooptcopyin(sopt, ifname,
2192 sizeof (ifname), sopt->sopt_valsize);
2193 if (error)
2194 break;
2195 }
2196
2197 if (sopt->sopt_valsize == 0 || ifname[0] == NULL) {
2198 /* Unbind this socket from any interface */
2199 ifscope = IFSCOPE_NONE;
2200 } else {
2201 ifnet_t ifp;
2202
2203 /* Verify name is NULL terminated */
2204 if (ifname[sopt->sopt_valsize - 1] != NULL) {
2205 error = EINVAL;
2206 break;
2207 }
2208
2209 /* Bail out if given bogus interface name */
2210 if (ifnet_find_by_name(ifname, &ifp) != 0) {
2211 error = ENXIO;
2212 break;
2213 }
2214
2215 /* Bind this socket to this interface */
2216 ifscope = ifp->if_index;
2217
2218 /*
2219 * Won't actually free; since we don't release
2220 * this later, we should do it now.
2221 */
2222 ifnet_release(ifp);
2223 }
2224 ip_bindif(inp, ifscope);
2225 }
2226 break;
2227 #endif
2228 case IP_MULTICAST_IF:
2229 case IP_MULTICAST_VIF:
2230 case IP_MULTICAST_TTL:
2231 case IP_MULTICAST_LOOP:
2232 case IP_ADD_MEMBERSHIP:
2233 case IP_DROP_MEMBERSHIP:
2234 error = ip_setmoptions(sopt, &inp->inp_moptions);
2235 break;
2236
2237 case IP_PORTRANGE:
2238 error = sooptcopyin(sopt, &optval, sizeof optval,
2239 sizeof optval);
2240 if (error)
2241 break;
2242
2243 switch (optval) {
2244 case IP_PORTRANGE_DEFAULT:
2245 inp->inp_flags &= ~(INP_LOWPORT);
2246 inp->inp_flags &= ~(INP_HIGHPORT);
2247 break;
2248
2249 case IP_PORTRANGE_HIGH:
2250 inp->inp_flags &= ~(INP_LOWPORT);
2251 inp->inp_flags |= INP_HIGHPORT;
2252 break;
2253
2254 case IP_PORTRANGE_LOW:
2255 inp->inp_flags &= ~(INP_HIGHPORT);
2256 inp->inp_flags |= INP_LOWPORT;
2257 break;
2258
2259 default:
2260 error = EINVAL;
2261 break;
2262 }
2263 break;
2264
2265 #if IPSEC
2266 case IP_IPSEC_POLICY:
2267 {
2268 caddr_t req = NULL;
2269 size_t len = 0;
2270 int priv;
2271 struct mbuf *m;
2272 int optname;
2273
2274 if (sopt->sopt_valsize > MCLBYTES) {
2275 error = EMSGSIZE;
2276 break;
2277 }
2278 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
2279 break;
2280 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
2281 break;
2282 priv = (proc_suser(sopt->sopt_p) == 0);
2283 if (m) {
2284 req = mtod(m, caddr_t);
2285 len = m->m_len;
2286 }
2287 optname = sopt->sopt_name;
2288 error = ipsec4_set_policy(inp, optname, req, len, priv);
2289 m_freem(m);
2290 break;
2291 }
2292 #endif /*IPSEC*/
2293
2294 #if TRAFFIC_MGT
2295 case IP_TRAFFIC_MGT_BACKGROUND:
2296 {
2297 unsigned background = 0;
2298 error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background));
2299 if (error)
2300 break;
2301
2302 if (background)
2303 so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
2304 else
2305 so->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
2306
2307 break;
2308 }
2309 #endif /* TRAFFIC_MGT */
2310
2311 /*
2312 * On a multihomed system, scoped routing can be used to
2313 * restrict the source interface used for sending packets.
2314 * The socket option IP_BOUND_IF binds a particular AF_INET
2315 * socket to an interface such that data sent on the socket
2316 * is restricted to that interface. This is unlike the
2317 * SO_DONTROUTE option where the routing table is bypassed;
2318 * therefore it allows for a greater flexibility and control
2319 * over the system behavior, and does not place any restriction
2320 * on the destination address type (e.g. unicast, multicast,
2321 * or broadcast if applicable) or whether or not the host is
2322 * directly reachable. Note that in the multicast transmit
2323 * case, IP_MULTICAST_IF takes precedence over IP_BOUND_IF,
2324 * since the former practically bypasses the routing table;
2325 * in this case, IP_BOUND_IF sets the default interface used
2326 * for sending multicast packets in the absence of an explicit
2327 * transmit interface set via IP_MULTICAST_IF.
2328 */
2329 case IP_BOUND_IF:
2330 /* This option is settable only for IPv4 */
2331 if (!(inp->inp_vflag & INP_IPV4)) {
2332 error = EINVAL;
2333 break;
2334 }
2335
2336 error = sooptcopyin(sopt, &optval, sizeof (optval),
2337 sizeof (optval));
2338
2339 if (error)
2340 break;
2341
2342 ip_bindif(inp, optval);
2343 break;
2344
2345 default:
2346 error = ENOPROTOOPT;
2347 break;
2348 }
2349 break;
2350
2351 case SOPT_GET:
2352 switch (sopt->sopt_name) {
2353 case IP_OPTIONS:
2354 case IP_RETOPTS:
2355 if (inp->inp_options)
2356 error = sooptcopyout(sopt,
2357 mtod(inp->inp_options,
2358 char *),
2359 inp->inp_options->m_len);
2360 else
2361 sopt->sopt_valsize = 0;
2362 break;
2363
2364 case IP_TOS:
2365 case IP_TTL:
2366 case IP_RECVOPTS:
2367 case IP_RECVRETOPTS:
2368 case IP_RECVDSTADDR:
2369 case IP_RECVIF:
2370 case IP_RECVTTL:
2371 case IP_PORTRANGE:
2372 #if defined(NFAITH) && NFAITH > 0
2373 case IP_FAITH:
2374 #endif
2375 switch (sopt->sopt_name) {
2376
2377 case IP_TOS:
2378 optval = inp->inp_ip_tos;
2379 break;
2380
2381 case IP_TTL:
2382 optval = inp->inp_ip_ttl;
2383 break;
2384
2385 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2386
2387 case IP_RECVOPTS:
2388 optval = OPTBIT(INP_RECVOPTS);
2389 break;
2390
2391 case IP_RECVRETOPTS:
2392 optval = OPTBIT(INP_RECVRETOPTS);
2393 break;
2394
2395 case IP_RECVDSTADDR:
2396 optval = OPTBIT(INP_RECVDSTADDR);
2397 break;
2398
2399 case IP_RECVIF:
2400 optval = OPTBIT(INP_RECVIF);
2401 break;
2402
2403 case IP_RECVTTL:
2404 optval = OPTBIT(INP_RECVTTL);
2405 break;
2406
2407 case IP_PORTRANGE:
2408 if (inp->inp_flags & INP_HIGHPORT)
2409 optval = IP_PORTRANGE_HIGH;
2410 else if (inp->inp_flags & INP_LOWPORT)
2411 optval = IP_PORTRANGE_LOW;
2412 else
2413 optval = 0;
2414 break;
2415
2416 #if defined(NFAITH) && NFAITH > 0
2417 case IP_FAITH:
2418 optval = OPTBIT(INP_FAITH);
2419 break;
2420 #endif
2421 }
2422 error = sooptcopyout(sopt, &optval, sizeof optval);
2423 break;
2424
2425 case IP_MULTICAST_IF:
2426 case IP_MULTICAST_VIF:
2427 case IP_MULTICAST_TTL:
2428 case IP_MULTICAST_LOOP:
2429 case IP_ADD_MEMBERSHIP:
2430 case IP_DROP_MEMBERSHIP:
2431 error = ip_getmoptions(sopt, inp->inp_moptions);
2432 break;
2433
2434 #if IPSEC
2435 case IP_IPSEC_POLICY:
2436 {
2437 struct mbuf *m = NULL;
2438 caddr_t req = NULL;
2439 size_t len = 0;
2440
2441 if (m != 0) {
2442 req = mtod(m, caddr_t);
2443 len = m->m_len;
2444 }
2445 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
2446 if (error == 0)
2447 error = soopt_mcopyout(sopt, m); /* XXX */
2448 if (error == 0)
2449 m_freem(m);
2450 break;
2451 }
2452 #endif /*IPSEC*/
2453
2454 #if TRAFFIC_MGT
2455 case IP_TRAFFIC_MGT_BACKGROUND:
2456 {
2457 unsigned background = so->so_traffic_mgt_flags;
2458 return (sooptcopyout(sopt, &background, sizeof(background)));
2459 break;
2460 }
2461 #endif /* TRAFFIC_MGT */
2462
2463 case IP_BOUND_IF:
2464 if (inp->inp_flags & INP_BOUND_IF)
2465 optval = inp->inp_boundif;
2466 error = sooptcopyout(sopt, &optval, sizeof (optval));
2467 break;
2468
2469 default:
2470 error = ENOPROTOOPT;
2471 break;
2472 }
2473 break;
2474 }
2475 return (error);
2476 }
2477
2478 /*
2479 * Set up IP options in pcb for insertion in output packets.
2480 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2481 * with destination address if source routed.
2482 */
2483 static int
2484 ip_pcbopts(
2485 __unused int optname,
2486 struct mbuf **pcbopt,
2487 register struct mbuf *m)
2488 {
2489 register int cnt, optlen;
2490 register u_char *cp;
2491 u_char opt;
2492
2493 /* turn off any old options */
2494 if (*pcbopt)
2495 (void)m_free(*pcbopt);
2496 *pcbopt = 0;
2497 if (m == (struct mbuf *)0 || m->m_len == 0) {
2498 /*
2499 * Only turning off any previous options.
2500 */
2501 if (m)
2502 (void)m_free(m);
2503 return (0);
2504 }
2505
2506 #ifndef vax
2507 if (m->m_len % sizeof(int32_t))
2508 goto bad;
2509 #endif
2510 /*
2511 * IP first-hop destination address will be stored before
2512 * actual options; move other options back
2513 * and clear it when none present.
2514 */
2515 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
2516 goto bad;
2517 cnt = m->m_len;
2518 m->m_len += sizeof(struct in_addr);
2519 cp = mtod(m, u_char *) + sizeof(struct in_addr);
2520 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
2521 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
2522
2523 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2524 opt = cp[IPOPT_OPTVAL];
2525 if (opt == IPOPT_EOL)
2526 break;
2527 if (opt == IPOPT_NOP)
2528 optlen = 1;
2529 else {
2530 if (cnt < IPOPT_OLEN + sizeof(*cp))
2531 goto bad;
2532 optlen = cp[IPOPT_OLEN];
2533 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2534 goto bad;
2535 }
2536 switch (opt) {
2537
2538 default:
2539 break;
2540
2541 case IPOPT_LSRR:
2542 case IPOPT_SSRR:
2543 /*
2544 * user process specifies route as:
2545 * ->A->B->C->D
2546 * D must be our final destination (but we can't
2547 * check that since we may not have connected yet).
2548 * A is first hop destination, which doesn't appear in
2549 * actual IP option, but is stored before the options.
2550 */
2551 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
2552 goto bad;
2553 m->m_len -= sizeof(struct in_addr);
2554 cnt -= sizeof(struct in_addr);
2555 optlen -= sizeof(struct in_addr);
2556 cp[IPOPT_OLEN] = optlen;
2557 /*
2558 * Move first hop before start of options.
2559 */
2560 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
2561 sizeof(struct in_addr));
2562 /*
2563 * Then copy rest of options back
2564 * to close up the deleted entry.
2565 */
2566 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
2567 sizeof(struct in_addr)),
2568 (caddr_t)&cp[IPOPT_OFFSET+1],
2569 (unsigned)cnt + sizeof(struct in_addr));
2570 break;
2571 }
2572 }
2573 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
2574 goto bad;
2575 *pcbopt = m;
2576 return (0);
2577
2578 bad:
2579 (void)m_free(m);
2580 return (EINVAL);
2581 }
2582
2583 /*
2584 * XXX
2585 * The whole multicast option thing needs to be re-thought.
2586 * Several of these options are equally applicable to non-multicast
2587 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
2588 * standard option (IP_TTL).
2589 */
2590
2591 /*
2592 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2593 */
2594 static struct ifnet *
2595 ip_multicast_if(a, ifindexp)
2596 struct in_addr *a;
2597 int *ifindexp;
2598 {
2599 int ifindex;
2600 struct ifnet *ifp;
2601
2602 if (ifindexp)
2603 *ifindexp = 0;
2604 if (ntohl(a->s_addr) >> 24 == 0) {
2605 ifindex = ntohl(a->s_addr) & 0xffffff;
2606 ifnet_head_lock_shared();
2607 if (ifindex < 0 || if_index < ifindex) {
2608 ifnet_head_done();
2609 return NULL;
2610 }
2611 ifp = ifindex2ifnet[ifindex];
2612 ifnet_head_done();
2613 if (ifindexp)
2614 *ifindexp = ifindex;
2615 } else {
2616 INADDR_TO_IFP(*a, ifp);
2617 }
2618 return ifp;
2619 }
2620
2621 /*
2622 * Set the IP multicast options in response to user setsockopt().
2623 */
2624 static int
2625 ip_setmoptions(sopt, imop)
2626 struct sockopt *sopt;
2627 struct ip_moptions **imop;
2628 {
2629 int error = 0;
2630 int i;
2631 struct in_addr addr;
2632 struct ip_mreq mreq;
2633 struct ifnet *ifp = NULL;
2634 struct ip_moptions *imo = *imop;
2635 int ifindex;
2636
2637 if (imo == NULL) {
2638 /*
2639 * No multicast option buffer attached to the pcb;
2640 * allocate one and initialize to default values.
2641 */
2642 error = ip_createmoptions(imop);
2643 if (error != 0)
2644 return error;
2645 imo = *imop;
2646 }
2647
2648 switch (sopt->sopt_name) {
2649 /* store an index number for the vif you wanna use in the send */
2650 #if MROUTING
2651 case IP_MULTICAST_VIF:
2652 if (legal_vif_num == 0) {
2653 error = EOPNOTSUPP;
2654 break;
2655 }
2656 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2657 if (error)
2658 break;
2659 if (!legal_vif_num(i) && (i != -1)) {
2660 error = EINVAL;
2661 break;
2662 }
2663 imo->imo_multicast_vif = i;
2664 break;
2665 #endif /* MROUTING */
2666
2667 case IP_MULTICAST_IF:
2668 /*
2669 * Select the interface for outgoing multicast packets.
2670 */
2671 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
2672 if (error)
2673 break;
2674 /*
2675 * INADDR_ANY is used to remove a previous selection.
2676 * When no interface is selected, a default one is
2677 * chosen every time a multicast packet is sent.
2678 */
2679 if (addr.s_addr == INADDR_ANY) {
2680 imo->imo_multicast_ifp = NULL;
2681 break;
2682 }
2683 /*
2684 * The selected interface is identified by its local
2685 * IP address. Find the interface and confirm that
2686 * it supports multicasting.
2687 */
2688 ifp = ip_multicast_if(&addr, &ifindex);
2689 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2690 error = EADDRNOTAVAIL;
2691 break;
2692 }
2693 imo->imo_multicast_ifp = ifp;
2694 if (ifindex)
2695 imo->imo_multicast_addr = addr;
2696 else
2697 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2698 break;
2699
2700 case IP_MULTICAST_TTL:
2701 /*
2702 * Set the IP time-to-live for outgoing multicast packets.
2703 * The original multicast API required a char argument,
2704 * which is inconsistent with the rest of the socket API.
2705 * We allow either a char or an int.
2706 */
2707 if (sopt->sopt_valsize == 1) {
2708 u_char ttl;
2709 error = sooptcopyin(sopt, &ttl, 1, 1);
2710 if (error)
2711 break;
2712 imo->imo_multicast_ttl = ttl;
2713 } else {
2714 u_int ttl;
2715 error = sooptcopyin(sopt, &ttl, sizeof ttl,
2716 sizeof ttl);
2717 if (error)
2718 break;
2719 if (ttl > 255)
2720 error = EINVAL;
2721 else
2722 imo->imo_multicast_ttl = ttl;
2723 }
2724 break;
2725
2726 case IP_MULTICAST_LOOP:
2727 /*
2728 * Set the loopback flag for outgoing multicast packets.
2729 * Must be zero or one. The original multicast API required a
2730 * char argument, which is inconsistent with the rest
2731 * of the socket API. We allow either a char or an int.
2732 */
2733 if (sopt->sopt_valsize == 1) {
2734 u_char loop;
2735 error = sooptcopyin(sopt, &loop, 1, 1);
2736 if (error)
2737 break;
2738 imo->imo_multicast_loop = !!loop;
2739 } else {
2740 u_int loop;
2741 error = sooptcopyin(sopt, &loop, sizeof loop,
2742 sizeof loop);
2743 if (error)
2744 break;
2745 imo->imo_multicast_loop = !!loop;
2746 }
2747 break;
2748
2749 case IP_ADD_MEMBERSHIP:
2750 /*
2751 * Add a multicast group membership.
2752 * Group must be a valid IP multicast address.
2753 */
2754 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2755 if (error)
2756 break;
2757
2758 error = ip_addmembership(imo, &mreq);
2759 break;
2760
2761 case IP_DROP_MEMBERSHIP:
2762 /*
2763 * Drop a multicast group membership.
2764 * Group must be a valid IP multicast address.
2765 */
2766 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2767 if (error)
2768 break;
2769
2770 error = ip_dropmembership(imo, &mreq);
2771 break;
2772
2773 default:
2774 error = EOPNOTSUPP;
2775 break;
2776 }
2777
2778 /*
2779 * If all options have default values, no need to keep the mbuf.
2780 */
2781 if (imo->imo_multicast_ifp == NULL &&
2782 imo->imo_multicast_vif == (u_int32_t)-1 &&
2783 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2784 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2785 imo->imo_num_memberships == 0) {
2786 FREE(*imop, M_IPMOPTS);
2787 *imop = NULL;
2788 }
2789
2790 return (error);
2791 }
2792
2793 /*
2794 * Set the IP multicast options in response to user setsockopt().
2795 */
2796 __private_extern__ int
2797 ip_createmoptions(
2798 struct ip_moptions **imop)
2799 {
2800 struct ip_moptions *imo;
2801 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
2802 M_WAITOK);
2803
2804 if (imo == NULL)
2805 return (ENOBUFS);
2806 *imop = imo;
2807 imo->imo_multicast_ifp = NULL;
2808 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2809 imo->imo_multicast_vif = -1;
2810 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2811 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2812 imo->imo_num_memberships = 0;
2813
2814 return 0;
2815 }
2816
2817 /*
2818 * Add membership to an IPv4 multicast.
2819 */
2820 __private_extern__ int
2821 ip_addmembership(
2822 struct ip_moptions *imo,
2823 struct ip_mreq *mreq)
2824 {
2825 struct route ro;
2826 struct sockaddr_in *dst;
2827 struct ifnet *ifp = NULL;
2828 int error = 0;
2829 int i;
2830
2831 bzero((caddr_t)&ro, sizeof(ro));
2832
2833 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2834 error = EINVAL;
2835 goto done;
2836 }
2837 /*
2838 * If no interface address was provided, use the interface of
2839 * the route to the given multicast address.
2840 */
2841 if (mreq->imr_interface.s_addr == INADDR_ANY) {
2842 dst = (struct sockaddr_in *)&ro.ro_dst;
2843 dst->sin_len = sizeof(*dst);
2844 dst->sin_family = AF_INET;
2845 dst->sin_addr = mreq->imr_multiaddr;
2846 rtalloc_ign(&ro, 0);
2847 if (ro.ro_rt != NULL) {
2848 ifp = ro.ro_rt->rt_ifp;
2849 } else {
2850 /* If there's no default route, try using loopback */
2851 mreq->imr_interface.s_addr = htonl(INADDR_LOOPBACK);
2852 }
2853 }
2854
2855 if (ifp == NULL) {
2856 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2857 }
2858
2859 /*
2860 * See if we found an interface, and confirm that it
2861 * supports multicast.
2862 */
2863 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2864 error = EADDRNOTAVAIL;
2865 goto done;
2866 }
2867 /*
2868 * See if the membership already exists or if all the
2869 * membership slots are full.
2870 */
2871 for (i = 0; i < imo->imo_num_memberships; ++i) {
2872 if (imo->imo_membership[i]->inm_ifp == ifp &&
2873 imo->imo_membership[i]->inm_addr.s_addr
2874 == mreq->imr_multiaddr.s_addr)
2875 break;
2876 }
2877 if (i < imo->imo_num_memberships) {
2878 error = EADDRINUSE;
2879 goto done;
2880 }
2881 if (i == IP_MAX_MEMBERSHIPS) {
2882 error = ETOOMANYREFS;
2883 goto done;
2884 }
2885 /*
2886 * Everything looks good; add a new record to the multicast
2887 * address list for the given interface.
2888 */
2889 if ((imo->imo_membership[i] =
2890 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
2891 error = ENOBUFS;
2892 goto done;
2893 }
2894 ++imo->imo_num_memberships;
2895
2896 done:
2897 if (ro.ro_rt != NULL)
2898 rtfree(ro.ro_rt);
2899
2900 return error;
2901 }
2902
2903 /*
2904 * Drop membership of an IPv4 multicast.
2905 */
2906 __private_extern__ int
2907 ip_dropmembership(
2908 struct ip_moptions *imo,
2909 struct ip_mreq *mreq)
2910 {
2911 int error = 0;
2912 struct ifnet* ifp = NULL;
2913 int i;
2914
2915 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2916 error = EINVAL;
2917 return error;
2918 }
2919
2920 /*
2921 * If an interface address was specified, get a pointer
2922 * to its ifnet structure.
2923 */
2924 if (mreq->imr_interface.s_addr == INADDR_ANY)
2925 ifp = NULL;
2926 else {
2927 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2928 if (ifp == NULL) {
2929 error = EADDRNOTAVAIL;
2930 return error;
2931 }
2932 }
2933 /*
2934 * Find the membership in the membership array.
2935 */
2936 for (i = 0; i < imo->imo_num_memberships; ++i) {
2937 if ((ifp == NULL ||
2938 imo->imo_membership[i]->inm_ifp == ifp) &&
2939 imo->imo_membership[i]->inm_addr.s_addr ==
2940 mreq->imr_multiaddr.s_addr)
2941 break;
2942 }
2943 if (i == imo->imo_num_memberships) {
2944 error = EADDRNOTAVAIL;
2945 return error;
2946 }
2947 /*
2948 * Give up the multicast address record to which the
2949 * membership points.
2950 */
2951 in_delmulti(&imo->imo_membership[i]);
2952 /*
2953 * Remove the gap in the membership array.
2954 */
2955 for (++i; i < imo->imo_num_memberships; ++i)
2956 imo->imo_membership[i-1] = imo->imo_membership[i];
2957 --imo->imo_num_memberships;
2958
2959 return error;
2960 }
2961
2962 /*
2963 * Return the IP multicast options in response to user getsockopt().
2964 */
2965 static int
2966 ip_getmoptions(sopt, imo)
2967 struct sockopt *sopt;
2968 register struct ip_moptions *imo;
2969 {
2970 struct in_addr addr;
2971 struct in_ifaddr *ia;
2972 int error, optval;
2973 u_char coptval;
2974
2975 error = 0;
2976 switch (sopt->sopt_name) {
2977 #if MROUTING
2978 case IP_MULTICAST_VIF:
2979 if (imo != NULL)
2980 optval = imo->imo_multicast_vif;
2981 else
2982 optval = -1;
2983 error = sooptcopyout(sopt, &optval, sizeof optval);
2984 break;
2985 #endif /* MROUTING */
2986
2987 case IP_MULTICAST_IF:
2988 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2989 addr.s_addr = INADDR_ANY;
2990 else if (imo->imo_multicast_addr.s_addr) {
2991 /* return the value user has set */
2992 addr = imo->imo_multicast_addr;
2993 } else {
2994 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2995 addr.s_addr = (ia == NULL) ? INADDR_ANY
2996 : IA_SIN(ia)->sin_addr.s_addr;
2997 if (ia != NULL)
2998 ifafree(&ia->ia_ifa);
2999 }
3000 error = sooptcopyout(sopt, &addr, sizeof addr);
3001 break;
3002
3003 case IP_MULTICAST_TTL:
3004 if (imo == 0)
3005 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
3006 else
3007 optval = coptval = imo->imo_multicast_ttl;
3008 if (sopt->sopt_valsize == 1)
3009 error = sooptcopyout(sopt, &coptval, 1);
3010 else
3011 error = sooptcopyout(sopt, &optval, sizeof optval);
3012 break;
3013
3014 case IP_MULTICAST_LOOP:
3015 if (imo == 0)
3016 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
3017 else
3018 optval = coptval = imo->imo_multicast_loop;
3019 if (sopt->sopt_valsize == 1)
3020 error = sooptcopyout(sopt, &coptval, 1);
3021 else
3022 error = sooptcopyout(sopt, &optval, sizeof optval);
3023 break;
3024
3025 default:
3026 error = ENOPROTOOPT;
3027 break;
3028 }
3029 return (error);
3030 }
3031
3032 /*
3033 * Discard the IP multicast options.
3034 */
3035 void
3036 ip_freemoptions(imo)
3037 register struct ip_moptions *imo;
3038 {
3039 register int i;
3040
3041 if (imo != NULL) {
3042 for (i = 0; i < imo->imo_num_memberships; ++i)
3043 in_delmulti(&imo->imo_membership[i]);
3044 FREE(imo, M_IPMOPTS);
3045 }
3046 }
3047
3048 /*
3049 * Routine called from ip_output() to loop back a copy of an IP multicast
3050 * packet to the input queue of a specified interface. Note that this
3051 * calls the output routine of the loopback "driver", but with an interface
3052 * pointer that might NOT be a loopback interface -- evil, but easier than
3053 * replicating that code here.
3054 */
3055 static void
3056 ip_mloopback(ifp, m, dst, hlen)
3057 struct ifnet *ifp;
3058 register struct mbuf *m;
3059 register struct sockaddr_in *dst;
3060 int hlen;
3061 {
3062 register struct ip *ip;
3063 struct mbuf *copym;
3064 int sw_csum = (apple_hwcksum_tx == 0);
3065
3066 copym = m_copy(m, 0, M_COPYALL);
3067 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
3068 copym = m_pullup(copym, hlen);
3069
3070 if (copym == NULL)
3071 return;
3072
3073 /*
3074 * We don't bother to fragment if the IP length is greater
3075 * than the interface's MTU. Can this possibly matter?
3076 */
3077 ip = mtod(copym, struct ip *);
3078
3079 #if BYTE_ORDER != BIG_ENDIAN
3080 HTONS(ip->ip_len);
3081 HTONS(ip->ip_off);
3082 #endif
3083
3084 ip->ip_sum = 0;
3085 ip->ip_sum = in_cksum(copym, hlen);
3086 /*
3087 * NB:
3088 * It's not clear whether there are any lingering
3089 * reentrancy problems in other areas which might
3090 * be exposed by using ip_input directly (in
3091 * particular, everything which modifies the packet
3092 * in-place). Yet another option is using the
3093 * protosw directly to deliver the looped back
3094 * packet. For the moment, we'll err on the side
3095 * of safety by using if_simloop().
3096 */
3097 #if 1 /* XXX */
3098 if (dst->sin_family != AF_INET) {
3099 printf("ip_mloopback: bad address family %d\n",
3100 dst->sin_family);
3101 dst->sin_family = AF_INET;
3102 }
3103 #endif
3104
3105 /*
3106 * Mark checksum as valid or calculate checksum for loopback.
3107 *
3108 * This is done this way because we have to embed the ifp of
3109 * the interface we will send the original copy of the packet
3110 * out on in the mbuf. ip_input will check if_hwassist of the
3111 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
3112 * The UDP checksum has not been calculated yet.
3113 */
3114 if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
3115 if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
3116 copym->m_pkthdr.csum_flags |=
3117 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3118 CSUM_IP_CHECKED | CSUM_IP_VALID;
3119 copym->m_pkthdr.csum_data = 0xffff;
3120 } else {
3121
3122 #if BYTE_ORDER != BIG_ENDIAN
3123 NTOHS(ip->ip_len);
3124 #endif
3125
3126 in_delayed_cksum(copym);
3127
3128 #if BYTE_ORDER != BIG_ENDIAN
3129 HTONS(ip->ip_len);
3130 #endif
3131
3132 }
3133 }
3134
3135 /*
3136 * TedW:
3137 * We need to send all loopback traffic down to dlil in case
3138 * a filter has tapped-in.
3139 */
3140
3141 /*
3142 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3143 * in ip_input(); we need the loopback ifp/dl_tag passed as args
3144 * to make the loopback driver compliant with the data link
3145 * requirements.
3146 */
3147 if (lo_ifp) {
3148 copym->m_pkthdr.rcvif = ifp;
3149 dlil_output(lo_ifp, PF_INET, copym, 0,
3150 (struct sockaddr *) dst, 0);
3151 } else {
3152 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
3153 m_freem(copym);
3154 }
3155 }
3156
3157 /*
3158 * Given a source IP address (and route, if available), determine the best
3159 * interface to send the packet from. Checking for (and updating) the
3160 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
3161 * without any locks based on the assumption that ip_output() is single-
3162 * threaded per-pcb, i.e. for any given pcb there can only be one thread
3163 * performing output at the IP layer.
3164 */
3165 static struct ifaddr *
3166 in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
3167 {
3168 struct ifaddr *ifa = NULL;
3169 struct in_addr src = ip->ip_src;
3170 struct in_addr dst = ip->ip_dst;
3171 struct ifnet *rt_ifp;
3172 char s_src[16], s_dst[16];
3173
3174 if (ip_select_srcif_debug) {
3175 (void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof (s_src));
3176 (void) inet_ntop(AF_INET, &dst.s_addr, s_dst, sizeof (s_dst));
3177 }
3178
3179 if (ro->ro_rt != NULL)
3180 RT_LOCK(ro->ro_rt);
3181
3182 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
3183
3184 /*
3185 * Given the source IP address, find a suitable source interface
3186 * to use for transmission; if the caller has specified a scope,
3187 * optimize the search by looking at the addresses only for that
3188 * interface. This is still suboptimal, however, as we need to
3189 * traverse the per-interface list.
3190 */
3191 if (ifscope != IFSCOPE_NONE || ro->ro_rt != NULL) {
3192 unsigned int scope = ifscope;
3193
3194 /*
3195 * If no scope is specified and the route is stale (pointing
3196 * to a defunct interface) use the current primary interface;
3197 * this happens when switching between interfaces configured
3198 * with the same IP address. Otherwise pick up the scope
3199 * information from the route; the ULP may have looked up a
3200 * correct route and we just need to verify it here and mark
3201 * it with the ROF_SRCIF_SELECTED flag below.
3202 */
3203 if (scope == IFSCOPE_NONE) {
3204 scope = rt_ifp->if_index;
3205 if (scope != get_primary_ifscope() &&
3206 ro->ro_rt->generation_id != route_generation)
3207 scope = get_primary_ifscope();
3208 }
3209
3210 ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope);
3211
3212 if (ip_select_srcif_debug && ifa != NULL) {
3213 if (ro->ro_rt != NULL) {
3214 printf("%s->%s ifscope %d->%d ifa_if %s%d "
3215 "ro_if %s%d\n", s_src, s_dst, ifscope,
3216 scope, ifa->ifa_ifp->if_name,
3217 ifa->ifa_ifp->if_unit, rt_ifp->if_name,
3218 rt_ifp->if_unit);
3219 } else {
3220 printf("%s->%s ifscope %d->%d ifa_if %s%d\n",
3221 s_src, s_dst, ifscope, scope,
3222 ifa->ifa_ifp->if_name,
3223 ifa->ifa_ifp->if_unit);
3224 }
3225 }
3226 }
3227
3228 /*
3229 * Slow path; search for an interface having the corresponding source
3230 * IP address if the scope was not specified by the caller, and:
3231 *
3232 * 1) There currently isn't any route, or,
3233 * 2) The interface used by the route does not own that source
3234 * IP address; in this case, the route will get blown away
3235 * and we'll do a more specific scoped search using the newly
3236 * found interface.
3237 */
3238 if (ifa == NULL && ifscope == IFSCOPE_NONE) {
3239 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
3240
3241 if (ip_select_srcif_debug && ifa != NULL) {
3242 printf("%s->%s ifscope %d ifa_if %s%d\n",
3243 s_src, s_dst, ifscope, ifa->ifa_ifp->if_name,
3244 ifa->ifa_ifp->if_unit);
3245 }
3246 }
3247
3248 if (ro->ro_rt != NULL)
3249 RT_LOCK_ASSERT_HELD(ro->ro_rt);
3250 /*
3251 * If there is a non-loopback route with the wrong interface, or if
3252 * there is no interface configured with such an address, blow it
3253 * away. Except for local/loopback, we look for one with a matching
3254 * interface scope/index.
3255 */
3256 if (ro->ro_rt != NULL &&
3257 (ifa == NULL || (ifa->ifa_ifp != rt_ifp && rt_ifp != lo_ifp) ||
3258 !(ro->ro_rt->rt_flags & RTF_UP))) {
3259 if (ip_select_srcif_debug) {
3260 if (ifa != NULL) {
3261 printf("%s->%s ifscope %d ro_if %s%d != "
3262 "ifa_if %s%d (cached route cleared)\n",
3263 s_src, s_dst, ifscope, rt_ifp->if_name,
3264 rt_ifp->if_unit, ifa->ifa_ifp->if_name,
3265 ifa->ifa_ifp->if_unit);
3266 } else {
3267 printf("%s->%s ifscope %d ro_if %s%d "
3268 "(no ifa_if found)\n",
3269 s_src, s_dst, ifscope, rt_ifp->if_name,
3270 rt_ifp->if_unit);
3271 }
3272 }
3273
3274 RT_UNLOCK(ro->ro_rt);
3275 rtfree(ro->ro_rt);
3276 ro->ro_rt = NULL;
3277 ro->ro_flags &= ~ROF_SRCIF_SELECTED;
3278
3279 /*
3280 * If the destination is IPv4 LLA and the route's interface
3281 * doesn't match the source interface, then the source IP
3282 * address is wrong; it most likely belongs to the primary
3283 * interface associated with the IPv4 LL subnet. Drop the
3284 * packet rather than letting it go out and return an error
3285 * to the ULP. This actually applies not only to IPv4 LL
3286 * but other shared subnets; for now we explicitly test only
3287 * for the former case and save the latter for future.
3288 */
3289 if (IN_LINKLOCAL(ntohl(dst.s_addr)) &&
3290 !IN_LINKLOCAL(ntohl(src.s_addr)) && ifa != NULL) {
3291 ifafree(ifa);
3292 ifa = NULL;
3293 }
3294 }
3295
3296 if (ip_select_srcif_debug && ifa == NULL) {
3297 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
3298 s_src, s_dst, ifscope);
3299 }
3300
3301 /*
3302 * If there is a route, mark it accordingly. If there isn't one,
3303 * we'll get here again during the next transmit (possibly with a
3304 * route) and the flag will get set at that point. For IPv4 LLA
3305 * destination, mark it only if the route has been fully resolved;
3306 * otherwise we want to come back here again when the route points
3307 * to the interface over which the ARP reply arrives on.
3308 */
3309 if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(dst.s_addr)) ||
3310 (ro->ro_rt->rt_gateway->sa_family == AF_LINK &&
3311 SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) {
3312 ro->ro_flags |= ROF_SRCIF_SELECTED;
3313 ro->ro_rt->generation_id = route_generation;
3314 }
3315
3316 if (ro->ro_rt != NULL)
3317 RT_UNLOCK(ro->ro_rt);
3318
3319 return (ifa);
3320 }
3321
3322 /*
3323 * Handler for setting IP_FORCE_OUT_IFP or IP_BOUND_IF socket option.
3324 */
3325 static void
3326 ip_bindif(struct inpcb *inp, unsigned int ifscope)
3327 {
3328 /*
3329 * A zero interface scope value indicates an "unbind".
3330 * Otherwise, take in whatever value the app desires;
3331 * the app may already know the scope (or force itself
3332 * to such a scope) ahead of time before the interface
3333 * gets attached. It doesn't matter either way; any
3334 * route lookup from this point on will require an
3335 * exact match for the embedded interface scope.
3336 */
3337 inp->inp_boundif = ifscope;
3338 if (inp->inp_boundif == IFSCOPE_NONE)
3339 inp->inp_flags &= ~INP_BOUND_IF;
3340 else
3341 inp->inp_flags |= INP_BOUND_IF;
3342
3343 /* Blow away any cached route in the PCB */
3344 if (inp->inp_route.ro_rt != NULL) {
3345 rtfree(inp->inp_route.ro_rt);
3346 inp->inp_route.ro_rt = NULL;
3347 }
3348 }