]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
a989e64e33da6224275720ec668870ec9deeac80
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1988, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #define _IP_VHL
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <kern/locks.h>
81 #include <sys/sysctl.h>
82
83 #include <machine/endian.h>
84
85 #include <net/if.h>
86 #include <net/if_dl.h>
87 #include <net/route.h>
88
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/ip.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94 #include <netinet/ip_var.h>
95
96 #include <netinet/kpi_ipfilter_var.h>
97
98 #if CONFIG_MACF_NET
99 #include <security/mac_framework.h>
100 #endif
101
102 #include "faith.h"
103
104 #include <net/dlil.h>
105 #include <sys/kdebug.h>
106 #include <libkern/OSAtomic.h>
107
108 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
109 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
110 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
111 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
112
113 #define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
114
115 #if IPSEC
116 #include <netinet6/ipsec.h>
117 #include <netkey/key.h>
118 #if IPSEC_DEBUG
119 #include <netkey/key_debug.h>
120 #else
121 #define KEYDEBUG(lev,arg)
122 #endif
123 #endif /*IPSEC*/
124
125 #include <netinet/ip_fw.h>
126 #include <netinet/ip_divert.h>
127
128 #if DUMMYNET
129 #include <netinet/ip_dummynet.h>
130 #endif
131
132 #if PF
133 #include <net/pfvar.h>
134 #endif /* PF */
135
136 #if IPFIREWALL_FORWARD_DEBUG
137 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
138 (ntohl(a.s_addr)>>16)&0xFF,\
139 (ntohl(a.s_addr)>>8)&0xFF,\
140 (ntohl(a.s_addr))&0xFF);
141 #endif
142
143
144 u_short ip_id;
145
146 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
147 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
148 static void ip_mloopback(struct ifnet *, struct mbuf *,
149 struct sockaddr_in *, int);
150 static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
151 static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
152 static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
153
154 static void ip_out_cksum_stats(int, u_int32_t);
155 static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int);
156 static void ip_bindif(struct inpcb *, unsigned int);
157
158 int ip_createmoptions(struct ip_moptions **imop);
159 int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
160 int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
161 int ip_optcopy(struct ip *, struct ip *);
162 void in_delayed_cksum_offset(struct mbuf *, int );
163 void in_cksum_offset(struct mbuf* , size_t );
164
165 extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
166
167 extern struct protosw inetsw[];
168
169 extern struct ip_linklocal_stat ip_linklocal_stat;
170 extern lck_mtx_t *ip_mutex;
171
172 /* temporary: for testing */
173 #if IPSEC
174 extern int ipsec_bypass;
175 #endif
176
177 static int ip_maxchainsent = 0;
178 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
179 &ip_maxchainsent, 0, "use dlil_output_list");
180 #if DEBUG
181 static int forge_ce = 0;
182 SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW,
183 &forge_ce, 0, "Forge ECN CE");
184 #endif /* DEBUG */
185
186 static int ip_select_srcif_debug = 0;
187 SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW,
188 &ip_select_srcif_debug, 0, "log source interface selection debug info");
189
190 /*
191 * IP output. The packet in mbuf chain m contains a skeletal IP
192 * header (with len, off, ttl, proto, tos, src, dst).
193 * The mbuf chain containing the packet will be freed.
194 * The mbuf opt, if present, will not be freed.
195 */
196 int
197 ip_output(
198 struct mbuf *m0,
199 struct mbuf *opt,
200 struct route *ro,
201 int flags,
202 struct ip_moptions *imo,
203 struct ip_out_args *ipoa)
204 {
205 int error;
206 error = ip_output_list(m0, 0, opt, ro, flags, imo, ipoa);
207 return error;
208 }
209
210 /*
211 * Returns: 0 Success
212 * ENOMEM
213 * EADDRNOTAVAIL
214 * ENETUNREACH
215 * EHOSTUNREACH
216 * EACCES
217 * EMSGSIZE
218 * ENOBUFS
219 * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified]
220 * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified]
221 * key_spdacquire:??? [IPSEC]
222 * ipsec4_output:??? [IPSEC]
223 * <fr_checkp>:??? [firewall]
224 * ip_dn_io_ptr:??? [dummynet]
225 * dlil_output:??? [DLIL]
226 * dlil_output_list:??? [DLIL]
227 *
228 * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are
229 * only used as the error return from this function where one of
230 * these functions fails to return a policy.
231 */
232 int
233 ip_output_list(
234 struct mbuf *m0,
235 int packetchain,
236 struct mbuf *opt,
237 struct route *ro,
238 int flags,
239 struct ip_moptions *imo,
240 struct ip_out_args *ipoa
241 )
242 {
243 struct ip *ip;
244 struct ifnet *ifp = NULL;
245 struct mbuf *m = m0, **mppn = NULL;
246 int hlen = sizeof (struct ip);
247 int len = 0, off, error = 0;
248 struct sockaddr_in *dst = NULL;
249 struct in_ifaddr *ia = NULL, *src_ia = NULL;
250 int isbroadcast, sw_csum;
251 struct in_addr pkt_dst;
252 #if IPSEC
253 struct route iproute;
254 struct socket *so = NULL;
255 struct secpolicy *sp = NULL;
256 #endif
257 #if IPFIREWALL_FORWARD
258 int fwd_rewrite_src = 0;
259 #endif
260 #if IPFIREWALL
261 struct ip_fw_args args;
262 #endif
263 int didfilter = 0;
264 ipfilter_t inject_filter_ref = 0;
265 struct m_tag *tag;
266 struct route saved_route;
267 struct ip_out_args saved_ipoa;
268 struct mbuf * packetlist;
269 int pktcnt = 0, tso = 0;
270 unsigned int ifscope;
271 boolean_t select_srcif;
272
273 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
274
275 packetlist = m0;
276 #if IPFIREWALL
277 args.next_hop = NULL;
278 args.eh = NULL;
279 args.rule = NULL;
280 args.divert_rule = 0; /* divert cookie */
281 args.ipoa = NULL;
282
283 if (SLIST_EMPTY(&m0->m_pkthdr.tags))
284 goto ipfw_tags_done;
285
286 /* Grab info from mtags prepended to the chain */
287 #if DUMMYNET
288 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
289 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
290 struct dn_pkt_tag *dn_tag;
291
292 dn_tag = (struct dn_pkt_tag *)(tag+1);
293 args.rule = dn_tag->rule;
294 opt = NULL;
295 saved_route = dn_tag->ro;
296 ro = &saved_route;
297
298 imo = NULL;
299 dst = dn_tag->dn_dst;
300 ifp = dn_tag->ifp;
301 flags = dn_tag->flags;
302 saved_ipoa = dn_tag->ipoa;
303 ipoa = &saved_ipoa;
304
305 m_tag_delete(m0, tag);
306 }
307 #endif /* DUMMYNET */
308
309 #if IPDIVERT
310 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
311 KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
312 struct divert_tag *div_tag;
313
314 div_tag = (struct divert_tag *)(tag+1);
315 args.divert_rule = div_tag->cookie;
316
317 m_tag_delete(m0, tag);
318 }
319 #endif /* IPDIVERT */
320
321 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
322 KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
323 struct ip_fwd_tag *ipfwd_tag;
324
325 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
326 args.next_hop = ipfwd_tag->next_hop;
327
328 m_tag_delete(m0, tag);
329 }
330 ipfw_tags_done:
331 #endif /* IPFIREWALL */
332
333 m = m0;
334
335 #if DIAGNOSTIC
336 if ( !m || (m->m_flags & M_PKTHDR) != 0)
337 panic("ip_output no HDR");
338 if (!ro)
339 panic("ip_output no route, proto = %d",
340 mtod(m, struct ip *)->ip_p);
341 #endif
342
343 /*
344 * At present the IP_OUTARGS flag implies a request for IP to
345 * perform source interface selection. In the forwarding case,
346 * only the ifscope value is used, as source interface selection
347 * doesn't take place.
348 */
349 if (ip_doscopedroute && (flags & IP_OUTARGS)) {
350 select_srcif = !(flags & IP_FORWARDING);
351 ifscope = ipoa->ipoa_ifscope;
352 } else {
353 select_srcif = FALSE;
354 ifscope = IFSCOPE_NONE;
355 }
356
357 #if IPFIREWALL
358 if (args.rule != NULL) { /* dummynet already saw us */
359 ip = mtod(m, struct ip *);
360 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
361 if (ro->ro_rt != NULL) {
362 RT_LOCK_SPIN(ro->ro_rt);
363 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
364 if (ia)
365 ifaref(&ia->ia_ifa);
366 RT_UNLOCK(ro->ro_rt);
367 }
368 #if IPSEC
369 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
370 so = ipsec_getsocket(m);
371 (void)ipsec_setsocket(m, NULL);
372 }
373 #endif
374 goto sendit;
375 }
376 #endif /* IPFIREWALL */
377
378 #if IPSEC
379 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
380 so = ipsec_getsocket(m);
381 (void)ipsec_setsocket(m, NULL);
382 }
383 #endif
384 loopit:
385 /*
386 * No need to proccess packet twice if we've
387 * already seen it
388 */
389 if (!SLIST_EMPTY(&m->m_pkthdr.tags))
390 inject_filter_ref = ipf_get_inject_filter(m);
391 else
392 inject_filter_ref = 0;
393
394 if (opt) {
395 m = ip_insertoptions(m, opt, &len);
396 hlen = len;
397 }
398 ip = mtod(m, struct ip *);
399 #if IPFIREWALL
400 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
401 #else
402 pkt_dst = ip->ip_dst;
403 #endif
404
405 /*
406 * Fill in IP header.
407 */
408 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
409 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
410 ip->ip_off &= IP_DF;
411 #if RANDOM_IP_ID
412 ip->ip_id = ip_randomid();
413 #else
414 ip->ip_id = htons(ip_id++);
415 #endif
416 OSAddAtomic(1, &ipstat.ips_localout);
417 } else {
418 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
419 }
420
421 #if DEBUG
422 /* For debugging, we let the stack forge congestion */
423 if (forge_ce != 0 &&
424 ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 ||
425 (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) {
426 ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE;
427 forge_ce--;
428 }
429 #endif /* DEBUG */
430
431 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
432 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
433
434 dst = (struct sockaddr_in *)&ro->ro_dst;
435
436 /*
437 * If there is a cached route,
438 * check that it is to the same destination
439 * and is still up. If not, free it and try again.
440 * The address family should also be checked in case of sharing the
441 * cache with IPv6.
442 */
443
444 if (ro->ro_rt != NULL) {
445 if (ro->ro_rt->generation_id != route_generation &&
446 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) &&
447 (ip->ip_src.s_addr != INADDR_ANY)) {
448 src_ia = ifa_foraddr(ip->ip_src.s_addr);
449 if (src_ia == NULL) {
450 error = EADDRNOTAVAIL;
451 goto bad;
452 }
453 ifafree(&src_ia->ia_ifa);
454 }
455 /*
456 * Test rt_flags without holding rt_lock for performance
457 * reasons; if the route is down it will hopefully be
458 * caught by the layer below (since it uses this route
459 * as a hint) or during the next transmit.
460 */
461 if ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
462 dst->sin_family != AF_INET ||
463 dst->sin_addr.s_addr != pkt_dst.s_addr) {
464 rtfree(ro->ro_rt);
465 ro->ro_rt = NULL;
466 }
467 /*
468 * If we're doing source interface selection, we may not
469 * want to use this route; only synch up the generation
470 * count otherwise.
471 */
472 if (!select_srcif && ro->ro_rt != NULL &&
473 ro->ro_rt->generation_id != route_generation)
474 ro->ro_rt->generation_id = route_generation;
475 }
476 if (ro->ro_rt == NULL) {
477 bzero(dst, sizeof(*dst));
478 dst->sin_family = AF_INET;
479 dst->sin_len = sizeof(*dst);
480 dst->sin_addr = pkt_dst;
481 }
482 /*
483 * If routing to interface only,
484 * short circuit routing lookup.
485 */
486 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
487 #define sintosa(sin) ((struct sockaddr *)(sin))
488 if (flags & IP_ROUTETOIF) {
489 if (ia)
490 ifafree(&ia->ia_ifa);
491 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
492 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
493 OSAddAtomic(1, &ipstat.ips_noroute);
494 error = ENETUNREACH;
495 goto bad;
496 }
497 }
498 ifp = ia->ia_ifp;
499 ip->ip_ttl = 1;
500 isbroadcast = in_broadcast(dst->sin_addr, ifp);
501 } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) &&
502 imo != NULL && imo->imo_multicast_ifp != NULL) {
503 /*
504 * Bypass the normal routing lookup for multicast
505 * packets if the interface is specified.
506 */
507 ifp = imo->imo_multicast_ifp;
508 isbroadcast = 0;
509 if (ia != NULL)
510 ifafree(&ia->ia_ifa);
511
512 /* Macro takes reference on ia */
513 IFP_TO_IA(ifp, ia);
514 } else {
515 boolean_t cloneok = FALSE;
516 /*
517 * Perform source interface selection; the source IP address
518 * must belong to one of the addresses of the interface used
519 * by the route. For performance reasons, do this only if
520 * there is no route, or if the routing table has changed,
521 * or if we haven't done source interface selection on this
522 * route (for this PCB instance) before.
523 */
524 if (select_srcif && ip->ip_src.s_addr != INADDR_ANY &&
525 (ro->ro_rt == NULL || !(ro->ro_rt->rt_flags & RTF_UP) ||
526 ro->ro_rt->generation_id != route_generation ||
527 !(ro->ro_flags & ROF_SRCIF_SELECTED))) {
528 struct ifaddr *ifa;
529
530 /* Find the source interface */
531 ifa = in_selectsrcif(ip, ro, ifscope);
532
533 /*
534 * If the source address is spoofed (in the case
535 * of IP_RAWOUTPUT), or if this is destined for
536 * local/loopback, just let it go out using the
537 * interface of the route. Otherwise, there's no
538 * interface having such an address, so bail out.
539 */
540 if (ifa == NULL && !(flags & IP_RAWOUTPUT) &&
541 ifscope != lo_ifp->if_index) {
542 error = EADDRNOTAVAIL;
543 goto bad;
544 }
545
546 /*
547 * If the caller didn't explicitly specify the scope,
548 * pick it up from the source interface. If the cached
549 * route was wrong and was blown away as part of source
550 * interface selection, don't mask out RTF_PRCLONING
551 * since that route may have been allocated by the ULP,
552 * unless the IP header was created by the caller or
553 * the destination is IPv4 LLA. The check for the
554 * latter is needed because IPv4 LLAs are never scoped
555 * in the current implementation, and we don't want to
556 * replace the resolved IPv4 LLA route with one whose
557 * gateway points to that of the default gateway on
558 * the primary interface of the system.
559 */
560 if (ifa != NULL) {
561 if (ifscope == IFSCOPE_NONE)
562 ifscope = ifa->ifa_ifp->if_index;
563 ifafree(ifa);
564 cloneok = (!(flags & IP_RAWOUTPUT) &&
565 !(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))));
566 }
567 }
568
569 /*
570 * If this is the case, we probably don't want to allocate
571 * a protocol-cloned route since we didn't get one from the
572 * ULP. This lets TCP do its thing, while not burdening
573 * forwarding or ICMP with the overhead of cloning a route.
574 * Of course, we still want to do any cloning requested by
575 * the link layer, as this is probably required in all cases
576 * for correct operation (as it is for ARP).
577 */
578 if (ro->ro_rt == NULL) {
579 unsigned long ign = RTF_PRCLONING;
580 /*
581 * We make an exception here: if the destination
582 * address is INADDR_BROADCAST, allocate a protocol-
583 * cloned host route so that we end up with a route
584 * marked with the RTF_BROADCAST flag. Otherwise,
585 * we would end up referring to the default route,
586 * instead of creating a cloned host route entry.
587 * That would introduce inconsistencies between ULPs
588 * that allocate a route and those that don't. The
589 * RTF_BROADCAST route is important since we'd want
590 * to send out undirected IP broadcast packets using
591 * link-level broadcast address. Another exception
592 * is for ULP-created routes that got blown away by
593 * source interface selection (see above).
594 *
595 * These exceptions will no longer be necessary when
596 * the RTF_PRCLONING scheme is no longer present.
597 */
598 if (cloneok || dst->sin_addr.s_addr == INADDR_BROADCAST)
599 ign &= ~RTF_PRCLONING;
600
601 /*
602 * Loosen the route lookup criteria if the ifscope
603 * corresponds to the loopback interface; this is
604 * needed to support Application Layer Gateways
605 * listening on loopback, in conjunction with packet
606 * filter redirection rules. The final source IP
607 * address will be rewritten by the packet filter
608 * prior to the RFC1122 loopback check below.
609 */
610 if (ifscope == lo_ifp->if_index)
611 rtalloc_ign(ro, ign);
612 else
613 rtalloc_scoped_ign(ro, ign, ifscope);
614 }
615
616 if (ro->ro_rt == NULL) {
617 OSAddAtomic(1, &ipstat.ips_noroute);
618 error = EHOSTUNREACH;
619 goto bad;
620 }
621
622 if (ia)
623 ifafree(&ia->ia_ifa);
624 RT_LOCK_SPIN(ro->ro_rt);
625 ia = ifatoia(ro->ro_rt->rt_ifa);
626 if (ia)
627 ifaref(&ia->ia_ifa);
628 ifp = ro->ro_rt->rt_ifp;
629 ro->ro_rt->rt_use++;
630 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
631 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
632 if (ro->ro_rt->rt_flags & RTF_HOST) {
633 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
634 } else {
635 /* Become a regular mutex */
636 RT_CONVERT_LOCK(ro->ro_rt);
637 isbroadcast = in_broadcast(dst->sin_addr, ifp);
638 }
639 RT_UNLOCK(ro->ro_rt);
640 }
641
642 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
643 struct in_multi *inm;
644
645 m->m_flags |= M_MCAST;
646 /*
647 * IP destination address is multicast. Make sure "dst"
648 * still points to the address in "ro". (It may have been
649 * changed to point to a gateway address, above.)
650 */
651 dst = (struct sockaddr_in *)&ro->ro_dst;
652 /*
653 * See if the caller provided any multicast options
654 */
655 if (imo != NULL) {
656 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
657 if (imo->imo_multicast_ifp != NULL) {
658 ifp = imo->imo_multicast_ifp;
659 }
660 #if MROUTING
661 if (imo->imo_multicast_vif != -1 &&
662 ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
663 ip->ip_src.s_addr =
664 ip_mcast_src(imo->imo_multicast_vif);
665 #endif /* MROUTING */
666 } else
667 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
668 /*
669 * Confirm that the outgoing interface supports multicast.
670 */
671 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
672 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
673 OSAddAtomic(1, &ipstat.ips_noroute);
674 error = ENETUNREACH;
675 goto bad;
676 }
677 }
678 /*
679 * If source address not specified yet, use address
680 * of outgoing interface.
681 */
682 if (ip->ip_src.s_addr == INADDR_ANY) {
683 struct in_ifaddr *ia1;
684 lck_rw_lock_shared(in_ifaddr_rwlock);
685 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
686 if (ia1->ia_ifp == ifp) {
687 ip->ip_src = IA_SIN(ia1)->sin_addr;
688 break;
689 }
690 lck_rw_done(in_ifaddr_rwlock);
691 if (ip->ip_src.s_addr == INADDR_ANY) {
692 error = ENETUNREACH;
693 goto bad;
694 }
695 }
696
697 ifnet_lock_shared(ifp);
698 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
699 ifnet_lock_done(ifp);
700 if (inm != NULL &&
701 (imo == NULL || imo->imo_multicast_loop)) {
702 /*
703 * If we belong to the destination multicast group
704 * on the outgoing interface, and the caller did not
705 * forbid loopback, loop back a copy.
706 */
707 if (!TAILQ_EMPTY(&ipv4_filters)) {
708 struct ipfilter *filter;
709 int seen = (inject_filter_ref == 0);
710 struct ipf_pktopts *ippo = 0, ipf_pktopts;
711
712 if (imo) {
713 ippo = &ipf_pktopts;
714 ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
715 ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
716 ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
717 }
718
719 ipf_ref();
720
721 /* 4135317 - always pass network byte order to filter */
722
723 #if BYTE_ORDER != BIG_ENDIAN
724 HTONS(ip->ip_len);
725 HTONS(ip->ip_off);
726 #endif
727
728 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
729 if (seen == 0) {
730 if ((struct ipfilter *)inject_filter_ref == filter)
731 seen = 1;
732 } else if (filter->ipf_filter.ipf_output) {
733 errno_t result;
734 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
735 if (result == EJUSTRETURN) {
736 ipf_unref();
737 goto done;
738 }
739 if (result != 0) {
740 ipf_unref();
741 goto bad;
742 }
743 }
744 }
745
746 /* set back to host byte order */
747 ip = mtod(m, struct ip *);
748
749 #if BYTE_ORDER != BIG_ENDIAN
750 NTOHS(ip->ip_len);
751 NTOHS(ip->ip_off);
752 #endif
753
754 ipf_unref();
755 didfilter = 1;
756 }
757 ip_mloopback(ifp, m, dst, hlen);
758 }
759 #if MROUTING
760 else {
761 /*
762 * If we are acting as a multicast router, perform
763 * multicast forwarding as if the packet had just
764 * arrived on the interface to which we are about
765 * to send. The multicast forwarding function
766 * recursively calls this function, using the
767 * IP_FORWARDING flag to prevent infinite recursion.
768 *
769 * Multicasts that are looped back by ip_mloopback(),
770 * above, will be forwarded by the ip_input() routine,
771 * if necessary.
772 */
773 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
774 /*
775 * Check if rsvp daemon is running. If not, don't
776 * set ip_moptions. This ensures that the packet
777 * is multicast and not just sent down one link
778 * as prescribed by rsvpd.
779 */
780 if (!rsvp_on)
781 imo = NULL;
782 if (ip_mforward(ip, ifp, m, imo) != 0) {
783 m_freem(m);
784 goto done;
785 }
786 }
787 }
788 #endif /* MROUTING */
789
790 /*
791 * Multicasts with a time-to-live of zero may be looped-
792 * back, above, but must not be transmitted on a network.
793 * Also, multicasts addressed to the loopback interface
794 * are not sent -- the above call to ip_mloopback() will
795 * loop back a copy if this host actually belongs to the
796 * destination group on the loopback interface.
797 */
798 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
799 m_freem(m);
800 goto done;
801 }
802
803 goto sendit;
804 }
805 #ifndef notdef
806 /*
807 * If source address not specified yet, use address
808 * of outgoing interface.
809 */
810 if (ip->ip_src.s_addr == INADDR_ANY) {
811 ip->ip_src = IA_SIN(ia)->sin_addr;
812 #if IPFIREWALL_FORWARD
813 /* Keep note that we did this - if the firewall changes
814 * the next-hop, our interface may change, changing the
815 * default source IP. It's a shame so much effort happens
816 * twice. Oh well.
817 */
818 fwd_rewrite_src++;
819 #endif /* IPFIREWALL_FORWARD */
820 }
821 #endif /* notdef */
822
823 /*
824 * Look for broadcast address and
825 * and verify user is allowed to send
826 * such a packet.
827 */
828 if (isbroadcast) {
829 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
830 error = EADDRNOTAVAIL;
831 goto bad;
832 }
833 if ((flags & IP_ALLOWBROADCAST) == 0) {
834 error = EACCES;
835 goto bad;
836 }
837 /* don't allow broadcast messages to be fragmented */
838 if ((u_short)ip->ip_len > ifp->if_mtu) {
839 error = EMSGSIZE;
840 goto bad;
841 }
842 m->m_flags |= M_BCAST;
843 } else {
844 m->m_flags &= ~M_BCAST;
845 }
846
847 sendit:
848 #if PF
849 /* Invoke outbound packet filter */
850 if (pf_af_hook(ifp, mppn, &m, AF_INET, FALSE) != 0) {
851 if (packetlist == m0) {
852 packetlist = m;
853 mppn = NULL;
854 }
855 if (m != NULL) {
856 m0 = m;
857 /* Next packet in the chain */
858 goto loopit;
859 } else if (packetlist != NULL) {
860 /* No more packet; send down the chain */
861 goto sendchain;
862 }
863 /* Nothing left; we're done */
864 goto done;
865 }
866 m0 = m;
867 ip = mtod(m, struct ip *);
868 pkt_dst = ip->ip_dst;
869 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
870 #endif /* PF */
871 /*
872 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
873 */
874 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
875 ip_linklocal_stat.iplls_out_total++;
876 if (ip->ip_ttl != MAXTTL) {
877 ip_linklocal_stat.iplls_out_badttl++;
878 ip->ip_ttl = MAXTTL;
879 }
880 }
881
882 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
883 struct ipfilter *filter;
884 int seen = (inject_filter_ref == 0);
885
886 /* Check that a TSO frame isn't passed to a filter.
887 * This could happen if a filter is inserted while
888 * TCP is sending the TSO packet.
889 */
890 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
891 error = EMSGSIZE;
892 goto bad;
893 }
894
895 ipf_ref();
896
897 /* 4135317 - always pass network byte order to filter */
898
899 #if BYTE_ORDER != BIG_ENDIAN
900 HTONS(ip->ip_len);
901 HTONS(ip->ip_off);
902 #endif
903
904 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
905 if (seen == 0) {
906 if ((struct ipfilter *)inject_filter_ref == filter)
907 seen = 1;
908 } else if (filter->ipf_filter.ipf_output) {
909 errno_t result;
910 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
911 if (result == EJUSTRETURN) {
912 ipf_unref();
913 goto done;
914 }
915 if (result != 0) {
916 ipf_unref();
917 goto bad;
918 }
919 }
920 }
921
922 /* set back to host byte order */
923 ip = mtod(m, struct ip *);
924
925 #if BYTE_ORDER != BIG_ENDIAN
926 NTOHS(ip->ip_len);
927 NTOHS(ip->ip_off);
928 #endif
929
930 ipf_unref();
931 }
932
933 #if IPSEC
934 /* temporary for testing only: bypass ipsec alltogether */
935
936 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
937 goto skip_ipsec;
938
939 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
940
941
942 /* get SP for this packet */
943 if (so == NULL)
944 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
945 else
946 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
947
948 if (sp == NULL) {
949 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
950 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
951 goto bad;
952 }
953
954 error = 0;
955
956 /* check policy */
957 switch (sp->policy) {
958 case IPSEC_POLICY_DISCARD:
959 case IPSEC_POLICY_GENERATE:
960 /*
961 * This packet is just discarded.
962 */
963 IPSEC_STAT_INCREMENT(ipsecstat.out_polvio);
964 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
965 goto bad;
966
967 case IPSEC_POLICY_BYPASS:
968 case IPSEC_POLICY_NONE:
969 /* no need to do IPsec. */
970 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
971 goto skip_ipsec;
972
973 case IPSEC_POLICY_IPSEC:
974 if (sp->req == NULL) {
975 /* acquire a policy */
976 error = key_spdacquire(sp);
977 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
978 goto bad;
979 }
980 break;
981
982 case IPSEC_POLICY_ENTRUST:
983 default:
984 printf("ip_output: Invalid policy found. %d\n", sp->policy);
985 }
986 {
987 struct ipsec_output_state state;
988 bzero(&state, sizeof(state));
989 state.m = m;
990 if (flags & IP_ROUTETOIF) {
991 state.ro = &iproute;
992 bzero(&iproute, sizeof(iproute));
993 } else
994 state.ro = ro;
995 state.dst = (struct sockaddr *)dst;
996
997 ip->ip_sum = 0;
998
999 /*
1000 * XXX
1001 * delayed checksums are not currently compatible with IPsec
1002 */
1003 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1004 in_delayed_cksum(m);
1005 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1006 }
1007
1008
1009 #if BYTE_ORDER != BIG_ENDIAN
1010 HTONS(ip->ip_len);
1011 HTONS(ip->ip_off);
1012 #endif
1013
1014 error = ipsec4_output(&state, sp, flags);
1015
1016 m0 = m = state.m;
1017
1018 if (flags & IP_ROUTETOIF) {
1019 /*
1020 * if we have tunnel mode SA, we may need to ignore
1021 * IP_ROUTETOIF.
1022 */
1023 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
1024 flags &= ~IP_ROUTETOIF;
1025 ro = state.ro;
1026 }
1027 } else
1028 ro = state.ro;
1029
1030 dst = (struct sockaddr_in *)state.dst;
1031 if (error) {
1032 /* mbuf is already reclaimed in ipsec4_output. */
1033 m0 = NULL;
1034 switch (error) {
1035 case EHOSTUNREACH:
1036 case ENETUNREACH:
1037 case EMSGSIZE:
1038 case ENOBUFS:
1039 case ENOMEM:
1040 break;
1041 default:
1042 printf("ip4_output (ipsec): error code %d\n", error);
1043 /*fall through*/
1044 case ENOENT:
1045 /* don't show these error codes to the user */
1046 error = 0;
1047 break;
1048 }
1049 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
1050 goto bad;
1051 }
1052 }
1053
1054 /* be sure to update variables that are affected by ipsec4_output() */
1055 ip = mtod(m, struct ip *);
1056
1057 #ifdef _IP_VHL
1058 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1059 #else
1060 hlen = ip->ip_hl << 2;
1061 #endif
1062 /* Check that there wasn't a route change and src is still valid */
1063 if (ro->ro_rt != NULL && ro->ro_rt->generation_id != route_generation) {
1064 if ((src_ia = ifa_foraddr(ip->ip_src.s_addr)) == NULL &&
1065 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
1066 error = EADDRNOTAVAIL;
1067 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1068 5,0,0,0,0);
1069 goto bad;
1070 }
1071 rtfree(ro->ro_rt);
1072 ro->ro_rt = NULL;
1073 if (src_ia != NULL)
1074 ifafree(&src_ia->ia_ifa);
1075 }
1076
1077 if (ro->ro_rt == NULL) {
1078 if ((flags & IP_ROUTETOIF) == 0) {
1079 printf("ip_output: can't update route after "
1080 "IPsec processing\n");
1081 error = EHOSTUNREACH; /*XXX*/
1082 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1083 6,0,0,0,0);
1084 goto bad;
1085 }
1086 } else {
1087 if (ia)
1088 ifafree(&ia->ia_ifa);
1089 RT_LOCK_SPIN(ro->ro_rt);
1090 ia = ifatoia(ro->ro_rt->rt_ifa);
1091 if (ia)
1092 ifaref(&ia->ia_ifa);
1093 ifp = ro->ro_rt->rt_ifp;
1094 RT_UNLOCK(ro->ro_rt);
1095 }
1096
1097 /* make it flipped, again. */
1098
1099 #if BYTE_ORDER != BIG_ENDIAN
1100 NTOHS(ip->ip_len);
1101 NTOHS(ip->ip_off);
1102 #endif
1103
1104 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
1105
1106 /* Pass to filters again */
1107 if (!TAILQ_EMPTY(&ipv4_filters)) {
1108 struct ipfilter *filter;
1109
1110 /* Check that a TSO frame isn't passed to a filter.
1111 * This could happen if a filter is inserted while
1112 * TCP is sending the TSO packet.
1113 */
1114 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1115 error = EMSGSIZE;
1116 goto bad;
1117 }
1118
1119 ipf_ref();
1120
1121 /* 4135317 - always pass network byte order to filter */
1122
1123 #if BYTE_ORDER != BIG_ENDIAN
1124 HTONS(ip->ip_len);
1125 HTONS(ip->ip_off);
1126 #endif
1127
1128 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
1129 if (filter->ipf_filter.ipf_output) {
1130 errno_t result;
1131 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
1132 if (result == EJUSTRETURN) {
1133 ipf_unref();
1134 goto done;
1135 }
1136 if (result != 0) {
1137 ipf_unref();
1138 goto bad;
1139 }
1140 }
1141 }
1142
1143 /* set back to host byte order */
1144 ip = mtod(m, struct ip *);
1145
1146 #if BYTE_ORDER != BIG_ENDIAN
1147 NTOHS(ip->ip_len);
1148 NTOHS(ip->ip_off);
1149 #endif
1150
1151 ipf_unref();
1152 }
1153 skip_ipsec:
1154 #endif /*IPSEC*/
1155
1156 #if IPFIREWALL
1157 /*
1158 * IpHack's section.
1159 * - Xlate: translate packet's addr/port (NAT).
1160 * - Firewall: deny/allow/etc.
1161 * - Wrap: fake packet's addr/port <unimpl.>
1162 * - Encapsulate: put it in another IP and send out. <unimp.>
1163 */
1164 if (fr_checkp) {
1165 struct mbuf *m1 = m;
1166
1167 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
1168 goto done;
1169 }
1170 ip = mtod(m0 = m = m1, struct ip *);
1171 }
1172
1173 /*
1174 * Check with the firewall...
1175 * but not if we are already being fwd'd from a firewall.
1176 */
1177 if (fw_enable && IPFW_LOADED && !args.next_hop) {
1178 struct sockaddr_in *old = dst;
1179
1180 args.m = m;
1181 args.next_hop = dst;
1182 args.oif = ifp;
1183 off = ip_fw_chk_ptr(&args);
1184 m = args.m;
1185 dst = args.next_hop;
1186
1187 /*
1188 * On return we must do the following:
1189 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
1190 * 1<=off<= 0xffff -> DIVERT
1191 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
1192 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
1193 * dst != old -> IPFIREWALL_FORWARD
1194 * off==0, dst==old -> accept
1195 * If some of the above modules is not compiled in, then
1196 * we should't have to check the corresponding condition
1197 * (because the ipfw control socket should not accept
1198 * unsupported rules), but better play safe and drop
1199 * packets in case of doubt.
1200 */
1201 m0 = m;
1202 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
1203 if (m)
1204 m_freem(m);
1205 error = EACCES ;
1206 goto done ;
1207 }
1208 ip = mtod(m, struct ip *);
1209
1210 if (off == 0 && dst == old) {/* common case */
1211 goto pass ;
1212 }
1213 #if DUMMYNET
1214 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
1215 /*
1216 * pass the pkt to dummynet. Need to include
1217 * pipe number, m, ifp, ro, dst because these are
1218 * not recomputed in the next pass.
1219 * All other parameters have been already used and
1220 * so they are not needed anymore.
1221 * XXX note: if the ifp or ro entry are deleted
1222 * while a pkt is in dummynet, we are in trouble!
1223 */
1224 args.ro = ro;
1225 args.dst = dst;
1226 args.flags = flags;
1227 if (flags & IP_OUTARGS)
1228 args.ipoa = ipoa;
1229
1230 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
1231 &args);
1232 goto done;
1233 }
1234 #endif /* DUMMYNET */
1235 #if IPDIVERT
1236 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
1237 struct mbuf *clone = NULL;
1238
1239 /* Clone packet if we're doing a 'tee' */
1240 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
1241 clone = m_dup(m, M_DONTWAIT);
1242 /*
1243 * XXX
1244 * delayed checksums are not currently compatible
1245 * with divert sockets.
1246 */
1247 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1248 in_delayed_cksum(m);
1249 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1250 }
1251
1252 /* Restore packet header fields to original values */
1253
1254 #if BYTE_ORDER != BIG_ENDIAN
1255 HTONS(ip->ip_len);
1256 HTONS(ip->ip_off);
1257 #endif
1258
1259 /* Deliver packet to divert input routine */
1260 divert_packet(m, 0, off & 0xffff, args.divert_rule);
1261
1262 /* If 'tee', continue with original packet */
1263 if (clone != NULL) {
1264 m0 = m = clone;
1265 ip = mtod(m, struct ip *);
1266 goto pass;
1267 }
1268 goto done;
1269 }
1270 #endif
1271
1272 #if IPFIREWALL_FORWARD
1273 /* Here we check dst to make sure it's directly reachable on the
1274 * interface we previously thought it was.
1275 * If it isn't (which may be likely in some situations) we have
1276 * to re-route it (ie, find a route for the next-hop and the
1277 * associated interface) and set them here. This is nested
1278 * forwarding which in most cases is undesirable, except where
1279 * such control is nigh impossible. So we do it here.
1280 * And I'm babbling.
1281 */
1282 if (off == 0 && old != dst) {
1283 struct in_ifaddr *ia_fw;
1284
1285 /* It's changed... */
1286 /* There must be a better way to do this next line... */
1287 static struct route sro_fwd, *ro_fwd = &sro_fwd;
1288 #if IPFIREWALL_FORWARD_DEBUG
1289 printf("IPFIREWALL_FORWARD: New dst ip: ");
1290 print_ip(dst->sin_addr);
1291 printf("\n");
1292 #endif
1293 /*
1294 * We need to figure out if we have been forwarded
1295 * to a local socket. If so then we should somehow
1296 * "loop back" to ip_input, and get directed to the
1297 * PCB as if we had received this packet. This is
1298 * because it may be dificult to identify the packets
1299 * you want to forward until they are being output
1300 * and have selected an interface. (e.g. locally
1301 * initiated packets) If we used the loopback inteface,
1302 * we would not be able to control what happens
1303 * as the packet runs through ip_input() as
1304 * it is done through a ISR.
1305 */
1306 lck_rw_lock_shared(in_ifaddr_rwlock);
1307 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1308 /*
1309 * If the addr to forward to is one
1310 * of ours, we pretend to
1311 * be the destination for this packet.
1312 */
1313 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
1314 dst->sin_addr.s_addr)
1315 break;
1316 }
1317 lck_rw_done(in_ifaddr_rwlock);
1318 if (ia_fw) {
1319 /* tell ip_input "dont filter" */
1320 struct m_tag *fwd_tag;
1321 struct ip_fwd_tag *ipfwd_tag;
1322
1323 fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID,
1324 KERNEL_TAG_TYPE_IPFORWARD,
1325 sizeof (*ipfwd_tag), M_NOWAIT);
1326 if (fwd_tag == NULL) {
1327 error = ENOBUFS;
1328 goto bad;
1329 }
1330
1331 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
1332 ipfwd_tag->next_hop = args.next_hop;
1333
1334 m_tag_prepend(m, fwd_tag);
1335
1336 if (m->m_pkthdr.rcvif == NULL)
1337 m->m_pkthdr.rcvif = ifunit("lo0");
1338 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1339 m->m_pkthdr.csum_flags) == 0) {
1340 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1341 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1342 m->m_pkthdr.csum_flags |=
1343 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1344 m->m_pkthdr.csum_data = 0xffff;
1345 }
1346 m->m_pkthdr.csum_flags |=
1347 CSUM_IP_CHECKED | CSUM_IP_VALID;
1348 }
1349 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1350 in_delayed_cksum(m);
1351 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1352 ip->ip_sum = in_cksum(m, hlen);
1353 }
1354
1355 #if BYTE_ORDER != BIG_ENDIAN
1356 HTONS(ip->ip_len);
1357 HTONS(ip->ip_off);
1358 #endif
1359
1360 /* we need to call dlil_output to run filters
1361 * and resync to avoid recursion loops.
1362 */
1363 if (lo_ifp) {
1364 dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
1365 }
1366 else {
1367 printf("ip_output: no loopback ifp for forwarding!!!\n");
1368 }
1369 goto done;
1370 }
1371 /* Some of the logic for this was
1372 * nicked from above.
1373 *
1374 * This rewrites the cached route in a local PCB.
1375 * Is this what we want to do?
1376 */
1377 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1378
1379 ro_fwd->ro_rt = NULL;
1380 rtalloc_ign(ro_fwd, RTF_PRCLONING);
1381
1382 if (ro_fwd->ro_rt == NULL) {
1383 OSAddAtomic(1, &ipstat.ips_noroute);
1384 error = EHOSTUNREACH;
1385 goto bad;
1386 }
1387
1388 RT_LOCK_SPIN(ro_fwd->ro_rt);
1389 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
1390 if (ia_fw != NULL)
1391 ifaref(&ia_fw->ia_ifa);
1392 ifp = ro_fwd->ro_rt->rt_ifp;
1393 ro_fwd->ro_rt->rt_use++;
1394 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
1395 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
1396 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) {
1397 isbroadcast =
1398 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
1399 } else {
1400 /* Become a regular mutex */
1401 RT_CONVERT_LOCK(ro_fwd->ro_rt);
1402 isbroadcast = in_broadcast(dst->sin_addr, ifp);
1403 }
1404 RT_UNLOCK(ro_fwd->ro_rt);
1405 rtfree(ro->ro_rt);
1406 ro->ro_rt = ro_fwd->ro_rt;
1407 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
1408
1409 /*
1410 * If we added a default src ip earlier,
1411 * which would have been gotten from the-then
1412 * interface, do it again, from the new one.
1413 */
1414 if (ia_fw != NULL) {
1415 if (fwd_rewrite_src)
1416 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
1417 ifafree(&ia_fw->ia_ifa);
1418 }
1419 goto pass ;
1420 }
1421 #endif /* IPFIREWALL_FORWARD */
1422 /*
1423 * if we get here, none of the above matches, and
1424 * we have to drop the pkt
1425 */
1426 m_freem(m);
1427 error = EACCES; /* not sure this is the right error msg */
1428 goto done;
1429 }
1430 #endif /* IPFIREWALL */
1431
1432 pass:
1433 #if __APPLE__
1434 /* Do not allow loopback address to wind up on a wire */
1435 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1436 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1437 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
1438 OSAddAtomic(1, &ipstat.ips_badaddr);
1439 m_freem(m);
1440 /*
1441 * Do not simply drop the packet just like a firewall -- we want the
1442 * the application to feel the pain.
1443 * Return ENETUNREACH like ip6_output does in some similar cases.
1444 * This can startle the otherwise clueless process that specifies
1445 * loopback as the source address.
1446 */
1447 error = ENETUNREACH;
1448 goto done;
1449 }
1450 #endif
1451 m->m_pkthdr.csum_flags |= CSUM_IP;
1452 tso = (ifp->if_hwassist & IFNET_TSO_IPV4) && (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4);
1453
1454 sw_csum = m->m_pkthdr.csum_flags
1455 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1456
1457 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1458 /*
1459 * Special case code for GMACE
1460 * frames that can be checksumed by GMACE SUM16 HW:
1461 * frame >64, no fragments, no UDP
1462 */
1463 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1464 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1465 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1466 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1467 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
1468 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
1469 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1470 m->m_pkthdr.csum_data += offset;
1471 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1472 }
1473 else {
1474 /* let the software handle any UDP or TCP checksums */
1475 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1476 }
1477 } else if (apple_hwcksum_tx == 0) {
1478 sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
1479 m->m_pkthdr.csum_flags;
1480 }
1481
1482 if (sw_csum & CSUM_DELAY_DATA) {
1483 in_delayed_cksum(m);
1484 sw_csum &= ~CSUM_DELAY_DATA;
1485 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1486 }
1487
1488 if (apple_hwcksum_tx != 0) {
1489 m->m_pkthdr.csum_flags &=
1490 IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1491 } else {
1492 m->m_pkthdr.csum_flags = 0;
1493 }
1494
1495 /*
1496 * If small enough for interface, or the interface will take
1497 * care of the fragmentation for us, can just send directly.
1498 */
1499 if ((u_short)ip->ip_len <= ifp->if_mtu || tso ||
1500 ifp->if_hwassist & CSUM_FRAGMENT) {
1501 if (tso)
1502 m->m_pkthdr.csum_flags |= CSUM_TSO_IPV4;
1503
1504
1505 #if BYTE_ORDER != BIG_ENDIAN
1506 HTONS(ip->ip_len);
1507 HTONS(ip->ip_off);
1508 #endif
1509
1510 ip->ip_sum = 0;
1511 if (sw_csum & CSUM_DELAY_IP) {
1512 ip->ip_sum = in_cksum(m, hlen);
1513 }
1514
1515 #ifndef __APPLE__
1516 /* Record statistics for this interface address. */
1517 if (!(flags & IP_FORWARDING) && ia != NULL) {
1518 ia->ia_ifa.if_opackets++;
1519 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1520 }
1521 #endif
1522
1523 #if IPSEC
1524 /* clean ipsec history once it goes out of the node */
1525 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1526 ipsec_delaux(m);
1527 #endif
1528 if (packetchain == 0) {
1529 error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
1530 (struct sockaddr *)dst);
1531 goto done;
1532 }
1533 else { /* packet chaining allows us to reuse the route for all packets */
1534 mppn = &m->m_nextpkt;
1535 m = m->m_nextpkt;
1536 if (m == NULL) {
1537 #if PF
1538 sendchain:
1539 #endif /* PF */
1540 if (pktcnt > ip_maxchainsent)
1541 ip_maxchainsent = pktcnt;
1542 //send
1543 error = ifnet_output(ifp, PF_INET, packetlist,
1544 ro->ro_rt, (struct sockaddr *)dst);
1545 pktcnt = 0;
1546 goto done;
1547
1548 }
1549 m0 = m;
1550 pktcnt++;
1551 goto loopit;
1552 }
1553 }
1554 /*
1555 * Too large for interface; fragment if possible.
1556 * Must be able to put at least 8 bytes per fragment.
1557 */
1558
1559 if (ip->ip_off & IP_DF || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
1560 error = EMSGSIZE;
1561 /*
1562 * This case can happen if the user changed the MTU
1563 *
1564 * of an interface after enabling IP on it. Because
1565 * most netifs don't keep track of routes pointing to
1566 * them, there is no way for one to update all its
1567 * routes when the MTU is changed.
1568 */
1569 RT_LOCK_SPIN(ro->ro_rt);
1570 if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1571 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1572 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1573 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1574 }
1575 RT_UNLOCK(ro->ro_rt);
1576 OSAddAtomic(1, &ipstat.ips_cantfrag);
1577 goto bad;
1578 }
1579
1580 error = ip_fragment(m, ifp, ifp->if_mtu, sw_csum);
1581 if (error != 0) {
1582 m0 = m = NULL;
1583 goto bad;
1584 }
1585
1586 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1587 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1588
1589 for (m = m0; m; m = m0) {
1590 m0 = m->m_nextpkt;
1591 m->m_nextpkt = 0;
1592 #if IPSEC
1593 /* clean ipsec history once it goes out of the node */
1594 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1595 ipsec_delaux(m);
1596 #endif
1597 if (error == 0) {
1598 #ifndef __APPLE__
1599 /* Record statistics for this interface address. */
1600 if (ia != NULL) {
1601 ia->ia_ifa.if_opackets++;
1602 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1603 }
1604 #endif
1605 if ((packetchain != 0) && (pktcnt > 0))
1606 panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
1607 error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
1608 (struct sockaddr *)dst);
1609 } else
1610 m_freem(m);
1611 }
1612
1613 if (error == 0)
1614 OSAddAtomic(1, &ipstat.ips_fragmented);
1615
1616 done:
1617 if (ia) {
1618 ifafree(&ia->ia_ifa);
1619 ia = NULL;
1620 }
1621 #if IPSEC
1622 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
1623 if (ro == &iproute && ro->ro_rt) {
1624 rtfree(ro->ro_rt);
1625 ro->ro_rt = NULL;
1626 }
1627 if (sp != NULL) {
1628 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1629 printf("DP ip_output call free SP:%x\n", sp));
1630 key_freesp(sp, KEY_SADB_UNLOCKED);
1631 }
1632 }
1633 #endif /* IPSEC */
1634
1635 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1636 return (error);
1637 bad:
1638 m_freem(m0);
1639 goto done;
1640 }
1641
1642 int
1643 ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum)
1644 {
1645 struct ip *ip, *mhip;
1646 int len, hlen, mhlen, firstlen, off, error = 0;
1647 struct mbuf **mnext = &m->m_nextpkt, *m0;
1648 int nfrags = 1;
1649
1650 ip = mtod(m, struct ip *);
1651 #ifdef _IP_VHL
1652 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1653 #else
1654 hlen = ip->ip_hl << 2;
1655 #endif
1656
1657 firstlen = len = (mtu - hlen) &~ 7;
1658 if (len < 8) {
1659 m_freem(m);
1660 return (EMSGSIZE);
1661 }
1662
1663 /*
1664 * if the interface will not calculate checksums on
1665 * fragmented packets, then do it here.
1666 */
1667 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1668 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1669 in_delayed_cksum(m);
1670 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1671 }
1672
1673 /*
1674 * Loop through length of segment after first fragment,
1675 * make new header and copy data of each part and link onto chain.
1676 */
1677 m0 = m;
1678 mhlen = sizeof (struct ip);
1679 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
1680 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1681 if (m == 0) {
1682 error = ENOBUFS;
1683 OSAddAtomic(1, &ipstat.ips_odropped);
1684 goto sendorfree;
1685 }
1686 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1687 m->m_data += max_linkhdr;
1688 mhip = mtod(m, struct ip *);
1689 *mhip = *ip;
1690 if (hlen > sizeof (struct ip)) {
1691 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1692 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1693 }
1694 m->m_len = mhlen;
1695 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1696 if (ip->ip_off & IP_MF)
1697 mhip->ip_off |= IP_MF;
1698 if (off + len >= (u_short)ip->ip_len)
1699 len = (u_short)ip->ip_len - off;
1700 else
1701 mhip->ip_off |= IP_MF;
1702 mhip->ip_len = htons((u_short)(len + mhlen));
1703 m->m_next = m_copy(m0, off, len);
1704 if (m->m_next == 0) {
1705 (void) m_free(m);
1706 error = ENOBUFS; /* ??? */
1707 OSAddAtomic(1, &ipstat.ips_odropped);
1708 goto sendorfree;
1709 }
1710 m->m_pkthdr.len = mhlen + len;
1711 m->m_pkthdr.rcvif = 0;
1712 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
1713 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
1714 #if CONFIG_MACF_NET
1715 mac_netinet_fragment(m0, m);
1716 #endif
1717
1718 #if BYTE_ORDER != BIG_ENDIAN
1719 HTONS(mhip->ip_off);
1720 #endif
1721
1722 mhip->ip_sum = 0;
1723 if (sw_csum & CSUM_DELAY_IP) {
1724 mhip->ip_sum = in_cksum(m, mhlen);
1725 }
1726 *mnext = m;
1727 mnext = &m->m_nextpkt;
1728 nfrags++;
1729 }
1730 OSAddAtomic(nfrags, &ipstat.ips_ofragments);
1731
1732 /* set first/last markers for fragment chain */
1733 m->m_flags |= M_LASTFRAG;
1734 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1735 m0->m_pkthdr.csum_data = nfrags;
1736
1737 /*
1738 * Update first fragment by trimming what's been copied out
1739 * and updating header, then send each fragment (in order).
1740 */
1741 m = m0;
1742 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1743 m->m_pkthdr.len = hlen + firstlen;
1744 ip->ip_len = htons((u_short)m->m_pkthdr.len);
1745 ip->ip_off |= IP_MF;
1746
1747 #if BYTE_ORDER != BIG_ENDIAN
1748 HTONS(ip->ip_off);
1749 #endif
1750
1751 ip->ip_sum = 0;
1752 if (sw_csum & CSUM_DELAY_IP) {
1753 ip->ip_sum = in_cksum(m, hlen);
1754 }
1755 sendorfree:
1756 if (error)
1757 m_freem_list(m0);
1758
1759 return (error);
1760 }
1761
1762 static void
1763 ip_out_cksum_stats(int proto, u_int32_t len)
1764 {
1765 switch (proto) {
1766 case IPPROTO_TCP:
1767 tcp_out_cksum_stats(len);
1768 break;
1769 case IPPROTO_UDP:
1770 udp_out_cksum_stats(len);
1771 break;
1772 default:
1773 /* keep only TCP or UDP stats for now */
1774 break;
1775 }
1776 }
1777
1778 void
1779 in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
1780 {
1781 struct ip *ip;
1782 unsigned char buf[sizeof(struct ip)];
1783 u_short csum, offset, ip_len;
1784 struct mbuf *m = m0;
1785
1786 while (ip_offset >= m->m_len) {
1787 ip_offset -= m->m_len;
1788 m = m->m_next;
1789 if (m == NULL) {
1790 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
1791 return;
1792 }
1793 }
1794
1795 /* Sometimes the IP header is not contiguous, yes this can happen! */
1796 if (ip_offset + sizeof(struct ip) > m->m_len) {
1797 #if DEBUG
1798 printf("delayed m_pullup, m->len: %d off: %d\n",
1799 m->m_len, ip_offset);
1800 #endif
1801 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1802
1803 ip = (struct ip *)buf;
1804 } else {
1805 ip = (struct ip*)(m->m_data + ip_offset);
1806 }
1807
1808 /* Gross */
1809 if (ip_offset) {
1810 m->m_len -= ip_offset;
1811 m->m_data += ip_offset;
1812 }
1813
1814 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1815
1816 /*
1817 * We could be in the context of an IP or interface filter; in the
1818 * former case, ip_len would be in host (correct) order while for
1819 * the latter it would be in network order. Because of this, we
1820 * attempt to interpret the length field by comparing it against
1821 * the actual packet length. If the comparison fails, byte swap
1822 * the length and check again. If it still fails, then the packet
1823 * is bogus and we give up.
1824 */
1825 ip_len = ip->ip_len;
1826 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1827 ip_len = SWAP16(ip_len);
1828 if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
1829 printf("in_delayed_cksum_offset: ip_len %d (%d) "
1830 "doesn't match actual length %d\n", ip->ip_len,
1831 ip_len, (m0->m_pkthdr.len - ip_offset));
1832 return;
1833 }
1834 }
1835
1836 csum = in_cksum_skip(m, ip_len, offset);
1837
1838 /* Update stats */
1839 ip_out_cksum_stats(ip->ip_p, ip_len - offset);
1840
1841 if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1842 csum = 0xffff;
1843 offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1844
1845 /* Gross */
1846 if (ip_offset) {
1847 if (M_LEADINGSPACE(m) < ip_offset)
1848 panic("in_delayed_cksum_offset - chain modified!\n");
1849 m->m_len += ip_offset;
1850 m->m_data -= ip_offset;
1851 }
1852
1853 if (offset > ip_len) /* bogus offset */
1854 return;
1855
1856 /* Insert the checksum in the existing chain */
1857 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
1858 char tmp[2];
1859
1860 #if DEBUG
1861 printf("delayed m_copyback, m->len: %d off: %d p: %d\n",
1862 m->m_len, offset + ip_offset, ip->ip_p);
1863 #endif
1864 *(u_short *)tmp = csum;
1865 m_copyback(m, offset + ip_offset, 2, tmp);
1866 } else
1867 *(u_short *)(m->m_data + offset + ip_offset) = csum;
1868 }
1869
1870 void
1871 in_delayed_cksum(struct mbuf *m)
1872 {
1873 in_delayed_cksum_offset(m, 0);
1874 }
1875
1876 void
1877 in_cksum_offset(struct mbuf* m, size_t ip_offset)
1878 {
1879 struct ip* ip = NULL;
1880 int hlen = 0;
1881 unsigned char buf[sizeof(struct ip)];
1882 int swapped = 0;
1883
1884 while (ip_offset >= m->m_len) {
1885 ip_offset -= m->m_len;
1886 m = m->m_next;
1887 if (m == NULL) {
1888 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1889 return;
1890 }
1891 }
1892
1893 /* Sometimes the IP header is not contiguous, yes this can happen! */
1894 if (ip_offset + sizeof(struct ip) > m->m_len) {
1895
1896 #if DEBUG
1897 printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %lu\n",
1898 m->m_len, ip_offset);
1899 #endif
1900 m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf);
1901
1902 ip = (struct ip *)buf;
1903 ip->ip_sum = 0;
1904 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, (caddr_t)&ip->ip_sum);
1905 } else {
1906 ip = (struct ip*)(m->m_data + ip_offset);
1907 ip->ip_sum = 0;
1908 }
1909
1910 /* Gross */
1911 if (ip_offset) {
1912 m->m_len -= ip_offset;
1913 m->m_data += ip_offset;
1914 }
1915
1916 #ifdef _IP_VHL
1917 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1918 #else
1919 hlen = ip->ip_hl << 2;
1920 #endif
1921 /*
1922 * We could be in the context of an IP or interface filter; in the
1923 * former case, ip_len would be in host order while for the latter
1924 * it would be in network (correct) order. Because of this, we
1925 * attempt to interpret the length field by comparing it against
1926 * the actual packet length. If the comparison fails, byte swap
1927 * the length and check again. If it still fails, then the packet
1928 * is bogus and we give up.
1929 */
1930 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1931 ip->ip_len = SWAP16(ip->ip_len);
1932 swapped = 1;
1933 if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
1934 ip->ip_len = SWAP16(ip->ip_len);
1935 printf("in_cksum_offset: ip_len %d (%d) "
1936 "doesn't match actual length %lu\n",
1937 ip->ip_len, SWAP16(ip->ip_len),
1938 (m->m_pkthdr.len - ip_offset));
1939 return;
1940 }
1941 }
1942
1943 ip->ip_sum = 0;
1944 ip->ip_sum = in_cksum(m, hlen);
1945 if (swapped)
1946 ip->ip_len = SWAP16(ip->ip_len);
1947
1948 /* Gross */
1949 if (ip_offset) {
1950 if (M_LEADINGSPACE(m) < ip_offset)
1951 panic("in_cksum_offset - chain modified!\n");
1952 m->m_len += ip_offset;
1953 m->m_data -= ip_offset;
1954 }
1955
1956 /* Insert the checksum in the existing chain if IP header not contiguous */
1957 if (ip_offset + sizeof(struct ip) > m->m_len) {
1958 char tmp[2];
1959
1960 #if DEBUG
1961 printf("in_cksum_offset m_copyback, m->len: %u off: %lu p: %d\n",
1962 m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p);
1963 #endif
1964 *(u_short *)tmp = ip->ip_sum;
1965 m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp);
1966 }
1967 }
1968
1969 /*
1970 * Insert IP options into preformed packet.
1971 * Adjust IP destination as required for IP source routing,
1972 * as indicated by a non-zero in_addr at the start of the options.
1973 *
1974 * XXX This routine assumes that the packet has no options in place.
1975 */
1976 static struct mbuf *
1977 ip_insertoptions(m, opt, phlen)
1978 register struct mbuf *m;
1979 struct mbuf *opt;
1980 int *phlen;
1981 {
1982 register struct ipoption *p = mtod(opt, struct ipoption *);
1983 struct mbuf *n;
1984 register struct ip *ip = mtod(m, struct ip *);
1985 unsigned optlen;
1986
1987 optlen = opt->m_len - sizeof(p->ipopt_dst);
1988 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1989 return (m); /* XXX should fail */
1990 if (p->ipopt_dst.s_addr)
1991 ip->ip_dst = p->ipopt_dst;
1992 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1993 MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1994 if (n == 0)
1995 return (m);
1996 n->m_pkthdr.rcvif = 0;
1997 #if CONFIG_MACF_NET
1998 mac_mbuf_label_copy(m, n);
1999 #endif
2000 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
2001 m->m_len -= sizeof(struct ip);
2002 m->m_data += sizeof(struct ip);
2003 n->m_next = m;
2004 m = n;
2005 m->m_len = optlen + sizeof(struct ip);
2006 m->m_data += max_linkhdr;
2007 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
2008 } else {
2009 m->m_data -= optlen;
2010 m->m_len += optlen;
2011 m->m_pkthdr.len += optlen;
2012 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
2013 }
2014 ip = mtod(m, struct ip *);
2015 bcopy(p->ipopt_list, ip + 1, optlen);
2016 *phlen = sizeof(struct ip) + optlen;
2017 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
2018 ip->ip_len += optlen;
2019 return (m);
2020 }
2021
2022 /*
2023 * Copy options from ip to jp,
2024 * omitting those not copied during fragmentation.
2025 */
2026 int
2027 ip_optcopy(ip, jp)
2028 struct ip *ip, *jp;
2029 {
2030 register u_char *cp, *dp;
2031 int opt, optlen, cnt;
2032
2033 cp = (u_char *)(ip + 1);
2034 dp = (u_char *)(jp + 1);
2035 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
2036 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2037 opt = cp[0];
2038 if (opt == IPOPT_EOL)
2039 break;
2040 if (opt == IPOPT_NOP) {
2041 /* Preserve for IP mcast tunnel's LSRR alignment. */
2042 *dp++ = IPOPT_NOP;
2043 optlen = 1;
2044 continue;
2045 }
2046 #if DIAGNOSTIC
2047 if (cnt < IPOPT_OLEN + sizeof(*cp))
2048 panic("malformed IPv4 option passed to ip_optcopy");
2049 #endif
2050 optlen = cp[IPOPT_OLEN];
2051 #if DIAGNOSTIC
2052 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2053 panic("malformed IPv4 option passed to ip_optcopy");
2054 #endif
2055 /* bogus lengths should have been caught by ip_dooptions */
2056 if (optlen > cnt)
2057 optlen = cnt;
2058 if (IPOPT_COPIED(opt)) {
2059 bcopy(cp, dp, optlen);
2060 dp += optlen;
2061 }
2062 }
2063 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
2064 *dp++ = IPOPT_EOL;
2065 return (optlen);
2066 }
2067
2068 /*
2069 * IP socket option processing.
2070 */
2071 int
2072 ip_ctloutput(so, sopt)
2073 struct socket *so;
2074 struct sockopt *sopt;
2075 {
2076 struct inpcb *inp = sotoinpcb(so);
2077 int error, optval;
2078
2079 error = optval = 0;
2080 if (sopt->sopt_level != IPPROTO_IP) {
2081 return (EINVAL);
2082 }
2083
2084 switch (sopt->sopt_dir) {
2085 case SOPT_SET:
2086 switch (sopt->sopt_name) {
2087 case IP_OPTIONS:
2088 #ifdef notyet
2089 case IP_RETOPTS:
2090 #endif
2091 {
2092 struct mbuf *m;
2093 if (sopt->sopt_valsize > MLEN) {
2094 error = EMSGSIZE;
2095 break;
2096 }
2097 MGET(m, sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT,
2098 MT_HEADER);
2099 if (m == 0) {
2100 error = ENOBUFS;
2101 break;
2102 }
2103 m->m_len = sopt->sopt_valsize;
2104 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
2105 m->m_len);
2106 if (error)
2107 break;
2108
2109 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
2110 m));
2111 }
2112
2113 case IP_TOS:
2114 case IP_TTL:
2115 case IP_RECVOPTS:
2116 case IP_RECVRETOPTS:
2117 case IP_RECVDSTADDR:
2118 case IP_RECVIF:
2119 case IP_RECVTTL:
2120 #if defined(NFAITH) && NFAITH > 0
2121 case IP_FAITH:
2122 #endif
2123 error = sooptcopyin(sopt, &optval, sizeof optval,
2124 sizeof optval);
2125 if (error)
2126 break;
2127
2128 switch (sopt->sopt_name) {
2129 case IP_TOS:
2130 inp->inp_ip_tos = optval;
2131 break;
2132
2133 case IP_TTL:
2134 inp->inp_ip_ttl = optval;
2135 break;
2136 #define OPTSET(bit) \
2137 if (optval) \
2138 inp->inp_flags |= bit; \
2139 else \
2140 inp->inp_flags &= ~bit;
2141
2142 case IP_RECVOPTS:
2143 OPTSET(INP_RECVOPTS);
2144 break;
2145
2146 case IP_RECVRETOPTS:
2147 OPTSET(INP_RECVRETOPTS);
2148 break;
2149
2150 case IP_RECVDSTADDR:
2151 OPTSET(INP_RECVDSTADDR);
2152 break;
2153
2154 case IP_RECVIF:
2155 OPTSET(INP_RECVIF);
2156 break;
2157
2158 case IP_RECVTTL:
2159 OPTSET(INP_RECVTTL);
2160 break;
2161
2162 #if defined(NFAITH) && NFAITH > 0
2163 case IP_FAITH:
2164 OPTSET(INP_FAITH);
2165 break;
2166 #endif
2167 }
2168 break;
2169 #undef OPTSET
2170
2171 #if CONFIG_FORCE_OUT_IFP
2172 /*
2173 * Apple private interface, similar to IP_BOUND_IF, except
2174 * that the parameter is a NULL-terminated string containing
2175 * the name of the network interface; an emptry string means
2176 * unbind. Applications are encouraged to use IP_BOUND_IF
2177 * instead, as that is the current "official" API.
2178 */
2179 case IP_FORCE_OUT_IFP: {
2180 char ifname[IFNAMSIZ];
2181 unsigned int ifscope;
2182
2183 /* This option is settable only for IPv4 */
2184 if (!(inp->inp_vflag & INP_IPV4)) {
2185 error = EINVAL;
2186 break;
2187 }
2188
2189 /* Verify interface name parameter is sane */
2190 if (sopt->sopt_valsize > sizeof(ifname)) {
2191 error = EINVAL;
2192 break;
2193 }
2194
2195 /* Copy the interface name */
2196 if (sopt->sopt_valsize != 0) {
2197 error = sooptcopyin(sopt, ifname,
2198 sizeof (ifname), sopt->sopt_valsize);
2199 if (error)
2200 break;
2201 }
2202
2203 if (sopt->sopt_valsize == 0 || ifname[0] == NULL) {
2204 /* Unbind this socket from any interface */
2205 ifscope = IFSCOPE_NONE;
2206 } else {
2207 ifnet_t ifp;
2208
2209 /* Verify name is NULL terminated */
2210 if (ifname[sopt->sopt_valsize - 1] != NULL) {
2211 error = EINVAL;
2212 break;
2213 }
2214
2215 /* Bail out if given bogus interface name */
2216 if (ifnet_find_by_name(ifname, &ifp) != 0) {
2217 error = ENXIO;
2218 break;
2219 }
2220
2221 /* Bind this socket to this interface */
2222 ifscope = ifp->if_index;
2223
2224 /*
2225 * Won't actually free; since we don't release
2226 * this later, we should do it now.
2227 */
2228 ifnet_release(ifp);
2229 }
2230 ip_bindif(inp, ifscope);
2231 }
2232 break;
2233 #endif
2234 case IP_MULTICAST_IF:
2235 case IP_MULTICAST_VIF:
2236 case IP_MULTICAST_TTL:
2237 case IP_MULTICAST_LOOP:
2238 case IP_ADD_MEMBERSHIP:
2239 case IP_DROP_MEMBERSHIP:
2240 error = ip_setmoptions(sopt, &inp->inp_moptions);
2241 break;
2242
2243 case IP_PORTRANGE:
2244 error = sooptcopyin(sopt, &optval, sizeof optval,
2245 sizeof optval);
2246 if (error)
2247 break;
2248
2249 switch (optval) {
2250 case IP_PORTRANGE_DEFAULT:
2251 inp->inp_flags &= ~(INP_LOWPORT);
2252 inp->inp_flags &= ~(INP_HIGHPORT);
2253 break;
2254
2255 case IP_PORTRANGE_HIGH:
2256 inp->inp_flags &= ~(INP_LOWPORT);
2257 inp->inp_flags |= INP_HIGHPORT;
2258 break;
2259
2260 case IP_PORTRANGE_LOW:
2261 inp->inp_flags &= ~(INP_HIGHPORT);
2262 inp->inp_flags |= INP_LOWPORT;
2263 break;
2264
2265 default:
2266 error = EINVAL;
2267 break;
2268 }
2269 break;
2270
2271 #if IPSEC
2272 case IP_IPSEC_POLICY:
2273 {
2274 caddr_t req = NULL;
2275 size_t len = 0;
2276 int priv;
2277 struct mbuf *m;
2278 int optname;
2279
2280 if (sopt->sopt_valsize > MCLBYTES) {
2281 error = EMSGSIZE;
2282 break;
2283 }
2284 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
2285 break;
2286 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
2287 break;
2288 priv = (proc_suser(sopt->sopt_p) == 0);
2289 if (m) {
2290 req = mtod(m, caddr_t);
2291 len = m->m_len;
2292 }
2293 optname = sopt->sopt_name;
2294 error = ipsec4_set_policy(inp, optname, req, len, priv);
2295 m_freem(m);
2296 break;
2297 }
2298 #endif /*IPSEC*/
2299
2300 #if TRAFFIC_MGT
2301 case IP_TRAFFIC_MGT_BACKGROUND:
2302 {
2303 unsigned background = 0;
2304 error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background));
2305 if (error)
2306 break;
2307
2308 if (background)
2309 so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
2310 else
2311 so->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
2312
2313 break;
2314 }
2315 #endif /* TRAFFIC_MGT */
2316
2317 /*
2318 * On a multihomed system, scoped routing can be used to
2319 * restrict the source interface used for sending packets.
2320 * The socket option IP_BOUND_IF binds a particular AF_INET
2321 * socket to an interface such that data sent on the socket
2322 * is restricted to that interface. This is unlike the
2323 * SO_DONTROUTE option where the routing table is bypassed;
2324 * therefore it allows for a greater flexibility and control
2325 * over the system behavior, and does not place any restriction
2326 * on the destination address type (e.g. unicast, multicast,
2327 * or broadcast if applicable) or whether or not the host is
2328 * directly reachable. Note that in the multicast transmit
2329 * case, IP_MULTICAST_IF takes precedence over IP_BOUND_IF,
2330 * since the former practically bypasses the routing table;
2331 * in this case, IP_BOUND_IF sets the default interface used
2332 * for sending multicast packets in the absence of an explicit
2333 * transmit interface set via IP_MULTICAST_IF.
2334 */
2335 case IP_BOUND_IF:
2336 /* This option is settable only for IPv4 */
2337 if (!(inp->inp_vflag & INP_IPV4)) {
2338 error = EINVAL;
2339 break;
2340 }
2341
2342 error = sooptcopyin(sopt, &optval, sizeof (optval),
2343 sizeof (optval));
2344
2345 if (error)
2346 break;
2347
2348 ip_bindif(inp, optval);
2349 break;
2350
2351 default:
2352 error = ENOPROTOOPT;
2353 break;
2354 }
2355 break;
2356
2357 case SOPT_GET:
2358 switch (sopt->sopt_name) {
2359 case IP_OPTIONS:
2360 case IP_RETOPTS:
2361 if (inp->inp_options)
2362 error = sooptcopyout(sopt,
2363 mtod(inp->inp_options,
2364 char *),
2365 inp->inp_options->m_len);
2366 else
2367 sopt->sopt_valsize = 0;
2368 break;
2369
2370 case IP_TOS:
2371 case IP_TTL:
2372 case IP_RECVOPTS:
2373 case IP_RECVRETOPTS:
2374 case IP_RECVDSTADDR:
2375 case IP_RECVIF:
2376 case IP_RECVTTL:
2377 case IP_PORTRANGE:
2378 #if defined(NFAITH) && NFAITH > 0
2379 case IP_FAITH:
2380 #endif
2381 switch (sopt->sopt_name) {
2382
2383 case IP_TOS:
2384 optval = inp->inp_ip_tos;
2385 break;
2386
2387 case IP_TTL:
2388 optval = inp->inp_ip_ttl;
2389 break;
2390
2391 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
2392
2393 case IP_RECVOPTS:
2394 optval = OPTBIT(INP_RECVOPTS);
2395 break;
2396
2397 case IP_RECVRETOPTS:
2398 optval = OPTBIT(INP_RECVRETOPTS);
2399 break;
2400
2401 case IP_RECVDSTADDR:
2402 optval = OPTBIT(INP_RECVDSTADDR);
2403 break;
2404
2405 case IP_RECVIF:
2406 optval = OPTBIT(INP_RECVIF);
2407 break;
2408
2409 case IP_RECVTTL:
2410 optval = OPTBIT(INP_RECVTTL);
2411 break;
2412
2413 case IP_PORTRANGE:
2414 if (inp->inp_flags & INP_HIGHPORT)
2415 optval = IP_PORTRANGE_HIGH;
2416 else if (inp->inp_flags & INP_LOWPORT)
2417 optval = IP_PORTRANGE_LOW;
2418 else
2419 optval = 0;
2420 break;
2421
2422 #if defined(NFAITH) && NFAITH > 0
2423 case IP_FAITH:
2424 optval = OPTBIT(INP_FAITH);
2425 break;
2426 #endif
2427 }
2428 error = sooptcopyout(sopt, &optval, sizeof optval);
2429 break;
2430
2431 case IP_MULTICAST_IF:
2432 case IP_MULTICAST_VIF:
2433 case IP_MULTICAST_TTL:
2434 case IP_MULTICAST_LOOP:
2435 case IP_ADD_MEMBERSHIP:
2436 case IP_DROP_MEMBERSHIP:
2437 error = ip_getmoptions(sopt, inp->inp_moptions);
2438 break;
2439
2440 #if IPSEC
2441 case IP_IPSEC_POLICY:
2442 {
2443 struct mbuf *m = NULL;
2444 caddr_t req = NULL;
2445 size_t len = 0;
2446
2447 if (m != 0) {
2448 req = mtod(m, caddr_t);
2449 len = m->m_len;
2450 }
2451 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
2452 if (error == 0)
2453 error = soopt_mcopyout(sopt, m); /* XXX */
2454 if (error == 0)
2455 m_freem(m);
2456 break;
2457 }
2458 #endif /*IPSEC*/
2459
2460 #if TRAFFIC_MGT
2461 case IP_TRAFFIC_MGT_BACKGROUND:
2462 {
2463 unsigned background = so->so_traffic_mgt_flags;
2464 return (sooptcopyout(sopt, &background, sizeof(background)));
2465 break;
2466 }
2467 #endif /* TRAFFIC_MGT */
2468
2469 case IP_BOUND_IF:
2470 if (inp->inp_flags & INP_BOUND_IF)
2471 optval = inp->inp_boundif;
2472 error = sooptcopyout(sopt, &optval, sizeof (optval));
2473 break;
2474
2475 default:
2476 error = ENOPROTOOPT;
2477 break;
2478 }
2479 break;
2480 }
2481 return (error);
2482 }
2483
2484 /*
2485 * Set up IP options in pcb for insertion in output packets.
2486 * Store in mbuf with pointer in pcbopt, adding pseudo-option
2487 * with destination address if source routed.
2488 */
2489 static int
2490 ip_pcbopts(
2491 __unused int optname,
2492 struct mbuf **pcbopt,
2493 register struct mbuf *m)
2494 {
2495 register int cnt, optlen;
2496 register u_char *cp;
2497 u_char opt;
2498
2499 /* turn off any old options */
2500 if (*pcbopt)
2501 (void)m_free(*pcbopt);
2502 *pcbopt = 0;
2503 if (m == (struct mbuf *)0 || m->m_len == 0) {
2504 /*
2505 * Only turning off any previous options.
2506 */
2507 if (m)
2508 (void)m_free(m);
2509 return (0);
2510 }
2511
2512 #ifndef vax
2513 if (m->m_len % sizeof(int32_t))
2514 goto bad;
2515 #endif
2516 /*
2517 * IP first-hop destination address will be stored before
2518 * actual options; move other options back
2519 * and clear it when none present.
2520 */
2521 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
2522 goto bad;
2523 cnt = m->m_len;
2524 m->m_len += sizeof(struct in_addr);
2525 cp = mtod(m, u_char *) + sizeof(struct in_addr);
2526 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
2527 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
2528
2529 for (; cnt > 0; cnt -= optlen, cp += optlen) {
2530 opt = cp[IPOPT_OPTVAL];
2531 if (opt == IPOPT_EOL)
2532 break;
2533 if (opt == IPOPT_NOP)
2534 optlen = 1;
2535 else {
2536 if (cnt < IPOPT_OLEN + sizeof(*cp))
2537 goto bad;
2538 optlen = cp[IPOPT_OLEN];
2539 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
2540 goto bad;
2541 }
2542 switch (opt) {
2543
2544 default:
2545 break;
2546
2547 case IPOPT_LSRR:
2548 case IPOPT_SSRR:
2549 /*
2550 * user process specifies route as:
2551 * ->A->B->C->D
2552 * D must be our final destination (but we can't
2553 * check that since we may not have connected yet).
2554 * A is first hop destination, which doesn't appear in
2555 * actual IP option, but is stored before the options.
2556 */
2557 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
2558 goto bad;
2559 m->m_len -= sizeof(struct in_addr);
2560 cnt -= sizeof(struct in_addr);
2561 optlen -= sizeof(struct in_addr);
2562 cp[IPOPT_OLEN] = optlen;
2563 /*
2564 * Move first hop before start of options.
2565 */
2566 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
2567 sizeof(struct in_addr));
2568 /*
2569 * Then copy rest of options back
2570 * to close up the deleted entry.
2571 */
2572 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
2573 sizeof(struct in_addr)),
2574 (caddr_t)&cp[IPOPT_OFFSET+1],
2575 (unsigned)cnt + sizeof(struct in_addr));
2576 break;
2577 }
2578 }
2579 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
2580 goto bad;
2581 *pcbopt = m;
2582 return (0);
2583
2584 bad:
2585 (void)m_free(m);
2586 return (EINVAL);
2587 }
2588
2589 /*
2590 * XXX
2591 * The whole multicast option thing needs to be re-thought.
2592 * Several of these options are equally applicable to non-multicast
2593 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
2594 * standard option (IP_TTL).
2595 */
2596
2597 /*
2598 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2599 */
2600 static struct ifnet *
2601 ip_multicast_if(a, ifindexp)
2602 struct in_addr *a;
2603 int *ifindexp;
2604 {
2605 int ifindex;
2606 struct ifnet *ifp;
2607
2608 if (ifindexp)
2609 *ifindexp = 0;
2610 if (ntohl(a->s_addr) >> 24 == 0) {
2611 ifindex = ntohl(a->s_addr) & 0xffffff;
2612 ifnet_head_lock_shared();
2613 if (ifindex < 0 || if_index < ifindex) {
2614 ifnet_head_done();
2615 return NULL;
2616 }
2617 ifp = ifindex2ifnet[ifindex];
2618 ifnet_head_done();
2619 if (ifindexp)
2620 *ifindexp = ifindex;
2621 } else {
2622 INADDR_TO_IFP(*a, ifp);
2623 }
2624 return ifp;
2625 }
2626
2627 /*
2628 * Set the IP multicast options in response to user setsockopt().
2629 */
2630 static int
2631 ip_setmoptions(sopt, imop)
2632 struct sockopt *sopt;
2633 struct ip_moptions **imop;
2634 {
2635 int error = 0;
2636 struct in_addr addr;
2637 struct ip_mreq mreq;
2638 struct ifnet *ifp = NULL;
2639 struct ip_moptions *imo = *imop;
2640 int ifindex;
2641
2642 if (imo == NULL) {
2643 /*
2644 * No multicast option buffer attached to the pcb;
2645 * allocate one and initialize to default values.
2646 */
2647 error = ip_createmoptions(imop);
2648 if (error != 0)
2649 return error;
2650 imo = *imop;
2651 }
2652
2653 switch (sopt->sopt_name) {
2654 /* store an index number for the vif you wanna use in the send */
2655 #if MROUTING
2656 case IP_MULTICAST_VIF:
2657 {
2658 int i;
2659 if (legal_vif_num == 0) {
2660 error = EOPNOTSUPP;
2661 break;
2662 }
2663 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2664 if (error)
2665 break;
2666 if (!legal_vif_num(i) && (i != -1)) {
2667 error = EINVAL;
2668 break;
2669 }
2670 imo->imo_multicast_vif = i;
2671 break;
2672 }
2673 #endif /* MROUTING */
2674
2675 case IP_MULTICAST_IF:
2676 /*
2677 * Select the interface for outgoing multicast packets.
2678 */
2679 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
2680 if (error)
2681 break;
2682 /*
2683 * INADDR_ANY is used to remove a previous selection.
2684 * When no interface is selected, a default one is
2685 * chosen every time a multicast packet is sent.
2686 */
2687 if (addr.s_addr == INADDR_ANY) {
2688 imo->imo_multicast_ifp = NULL;
2689 break;
2690 }
2691 /*
2692 * The selected interface is identified by its local
2693 * IP address. Find the interface and confirm that
2694 * it supports multicasting.
2695 */
2696 ifp = ip_multicast_if(&addr, &ifindex);
2697 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2698 error = EADDRNOTAVAIL;
2699 break;
2700 }
2701 imo->imo_multicast_ifp = ifp;
2702 if (ifindex)
2703 imo->imo_multicast_addr = addr;
2704 else
2705 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2706 break;
2707
2708 case IP_MULTICAST_TTL:
2709 /*
2710 * Set the IP time-to-live for outgoing multicast packets.
2711 * The original multicast API required a char argument,
2712 * which is inconsistent with the rest of the socket API.
2713 * We allow either a char or an int.
2714 */
2715 if (sopt->sopt_valsize == 1) {
2716 u_char ttl;
2717 error = sooptcopyin(sopt, &ttl, 1, 1);
2718 if (error)
2719 break;
2720 imo->imo_multicast_ttl = ttl;
2721 } else {
2722 u_int ttl;
2723 error = sooptcopyin(sopt, &ttl, sizeof ttl,
2724 sizeof ttl);
2725 if (error)
2726 break;
2727 if (ttl > 255)
2728 error = EINVAL;
2729 else
2730 imo->imo_multicast_ttl = ttl;
2731 }
2732 break;
2733
2734 case IP_MULTICAST_LOOP:
2735 /*
2736 * Set the loopback flag for outgoing multicast packets.
2737 * Must be zero or one. The original multicast API required a
2738 * char argument, which is inconsistent with the rest
2739 * of the socket API. We allow either a char or an int.
2740 */
2741 if (sopt->sopt_valsize == 1) {
2742 u_char loop;
2743 error = sooptcopyin(sopt, &loop, 1, 1);
2744 if (error)
2745 break;
2746 imo->imo_multicast_loop = !!loop;
2747 } else {
2748 u_int loop;
2749 error = sooptcopyin(sopt, &loop, sizeof loop,
2750 sizeof loop);
2751 if (error)
2752 break;
2753 imo->imo_multicast_loop = !!loop;
2754 }
2755 break;
2756
2757 case IP_ADD_MEMBERSHIP:
2758 /*
2759 * Add a multicast group membership.
2760 * Group must be a valid IP multicast address.
2761 */
2762 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2763 if (error)
2764 break;
2765
2766 error = ip_addmembership(imo, &mreq);
2767 break;
2768
2769 case IP_DROP_MEMBERSHIP:
2770 /*
2771 * Drop a multicast group membership.
2772 * Group must be a valid IP multicast address.
2773 */
2774 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2775 if (error)
2776 break;
2777
2778 error = ip_dropmembership(imo, &mreq);
2779 break;
2780
2781 default:
2782 error = EOPNOTSUPP;
2783 break;
2784 }
2785
2786 /*
2787 * If all options have default values, no need to keep the mbuf.
2788 */
2789 if (imo->imo_multicast_ifp == NULL &&
2790 imo->imo_multicast_vif == (u_int32_t)-1 &&
2791 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2792 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2793 imo->imo_num_memberships == 0) {
2794 FREE(*imop, M_IPMOPTS);
2795 *imop = NULL;
2796 }
2797
2798 return (error);
2799 }
2800
2801 /*
2802 * Set the IP multicast options in response to user setsockopt().
2803 */
2804 __private_extern__ int
2805 ip_createmoptions(
2806 struct ip_moptions **imop)
2807 {
2808 struct ip_moptions *imo;
2809 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
2810 M_WAITOK);
2811
2812 if (imo == NULL)
2813 return (ENOBUFS);
2814 *imop = imo;
2815 imo->imo_multicast_ifp = NULL;
2816 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2817 imo->imo_multicast_vif = -1;
2818 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2819 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2820 imo->imo_num_memberships = 0;
2821
2822 return 0;
2823 }
2824
2825 /*
2826 * Add membership to an IPv4 multicast.
2827 */
2828 __private_extern__ int
2829 ip_addmembership(
2830 struct ip_moptions *imo,
2831 struct ip_mreq *mreq)
2832 {
2833 struct route ro;
2834 struct sockaddr_in *dst;
2835 struct ifnet *ifp = NULL;
2836 int error = 0;
2837 int i;
2838
2839 bzero((caddr_t)&ro, sizeof(ro));
2840
2841 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2842 error = EINVAL;
2843 goto done;
2844 }
2845 /*
2846 * If no interface address was provided, use the interface of
2847 * the route to the given multicast address.
2848 */
2849 if (mreq->imr_interface.s_addr == INADDR_ANY) {
2850 dst = (struct sockaddr_in *)&ro.ro_dst;
2851 dst->sin_len = sizeof(*dst);
2852 dst->sin_family = AF_INET;
2853 dst->sin_addr = mreq->imr_multiaddr;
2854 rtalloc_ign(&ro, 0);
2855 if (ro.ro_rt != NULL) {
2856 ifp = ro.ro_rt->rt_ifp;
2857 } else {
2858 /* If there's no default route, try using loopback */
2859 mreq->imr_interface.s_addr = htonl(INADDR_LOOPBACK);
2860 }
2861 }
2862
2863 if (ifp == NULL) {
2864 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2865 }
2866
2867 /*
2868 * See if we found an interface, and confirm that it
2869 * supports multicast.
2870 */
2871 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2872 error = EADDRNOTAVAIL;
2873 goto done;
2874 }
2875 /*
2876 * See if the membership already exists or if all the
2877 * membership slots are full.
2878 */
2879 for (i = 0; i < imo->imo_num_memberships; ++i) {
2880 if (imo->imo_membership[i]->inm_ifp == ifp &&
2881 imo->imo_membership[i]->inm_addr.s_addr
2882 == mreq->imr_multiaddr.s_addr)
2883 break;
2884 }
2885 if (i < imo->imo_num_memberships) {
2886 error = EADDRINUSE;
2887 goto done;
2888 }
2889 if (i == IP_MAX_MEMBERSHIPS) {
2890 error = ETOOMANYREFS;
2891 goto done;
2892 }
2893 /*
2894 * Everything looks good; add a new record to the multicast
2895 * address list for the given interface.
2896 */
2897 if ((imo->imo_membership[i] =
2898 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
2899 error = ENOBUFS;
2900 goto done;
2901 }
2902 ++imo->imo_num_memberships;
2903
2904 done:
2905 if (ro.ro_rt != NULL)
2906 rtfree(ro.ro_rt);
2907
2908 return error;
2909 }
2910
2911 /*
2912 * Drop membership of an IPv4 multicast.
2913 */
2914 __private_extern__ int
2915 ip_dropmembership(
2916 struct ip_moptions *imo,
2917 struct ip_mreq *mreq)
2918 {
2919 int error = 0;
2920 struct ifnet* ifp = NULL;
2921 int i;
2922
2923 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2924 error = EINVAL;
2925 return error;
2926 }
2927
2928 /*
2929 * If an interface address was specified, get a pointer
2930 * to its ifnet structure.
2931 */
2932 if (mreq->imr_interface.s_addr == INADDR_ANY)
2933 ifp = NULL;
2934 else {
2935 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2936 if (ifp == NULL) {
2937 error = EADDRNOTAVAIL;
2938 return error;
2939 }
2940 }
2941 /*
2942 * Find the membership in the membership array.
2943 */
2944 for (i = 0; i < imo->imo_num_memberships; ++i) {
2945 if ((ifp == NULL ||
2946 imo->imo_membership[i]->inm_ifp == ifp) &&
2947 imo->imo_membership[i]->inm_addr.s_addr ==
2948 mreq->imr_multiaddr.s_addr)
2949 break;
2950 }
2951 if (i == imo->imo_num_memberships) {
2952 error = EADDRNOTAVAIL;
2953 return error;
2954 }
2955 /*
2956 * Give up the multicast address record to which the
2957 * membership points.
2958 */
2959 in_delmulti(&imo->imo_membership[i]);
2960 /*
2961 * Remove the gap in the membership array.
2962 */
2963 for (++i; i < imo->imo_num_memberships; ++i)
2964 imo->imo_membership[i-1] = imo->imo_membership[i];
2965 --imo->imo_num_memberships;
2966
2967 return error;
2968 }
2969
2970 /*
2971 * Return the IP multicast options in response to user getsockopt().
2972 */
2973 static int
2974 ip_getmoptions(sopt, imo)
2975 struct sockopt *sopt;
2976 register struct ip_moptions *imo;
2977 {
2978 struct in_addr addr;
2979 struct in_ifaddr *ia;
2980 int error, optval;
2981 u_char coptval;
2982
2983 error = 0;
2984 switch (sopt->sopt_name) {
2985 #if MROUTING
2986 case IP_MULTICAST_VIF:
2987 if (imo != NULL)
2988 optval = imo->imo_multicast_vif;
2989 else
2990 optval = -1;
2991 error = sooptcopyout(sopt, &optval, sizeof optval);
2992 break;
2993 #endif /* MROUTING */
2994
2995 case IP_MULTICAST_IF:
2996 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2997 addr.s_addr = INADDR_ANY;
2998 else if (imo->imo_multicast_addr.s_addr) {
2999 /* return the value user has set */
3000 addr = imo->imo_multicast_addr;
3001 } else {
3002 IFP_TO_IA(imo->imo_multicast_ifp, ia);
3003 addr.s_addr = (ia == NULL) ? INADDR_ANY
3004 : IA_SIN(ia)->sin_addr.s_addr;
3005 if (ia != NULL)
3006 ifafree(&ia->ia_ifa);
3007 }
3008 error = sooptcopyout(sopt, &addr, sizeof addr);
3009 break;
3010
3011 case IP_MULTICAST_TTL:
3012 if (imo == 0)
3013 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
3014 else
3015 optval = coptval = imo->imo_multicast_ttl;
3016 if (sopt->sopt_valsize == 1)
3017 error = sooptcopyout(sopt, &coptval, 1);
3018 else
3019 error = sooptcopyout(sopt, &optval, sizeof optval);
3020 break;
3021
3022 case IP_MULTICAST_LOOP:
3023 if (imo == 0)
3024 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
3025 else
3026 optval = coptval = imo->imo_multicast_loop;
3027 if (sopt->sopt_valsize == 1)
3028 error = sooptcopyout(sopt, &coptval, 1);
3029 else
3030 error = sooptcopyout(sopt, &optval, sizeof optval);
3031 break;
3032
3033 default:
3034 error = ENOPROTOOPT;
3035 break;
3036 }
3037 return (error);
3038 }
3039
3040 /*
3041 * Discard the IP multicast options.
3042 */
3043 void
3044 ip_freemoptions(imo)
3045 register struct ip_moptions *imo;
3046 {
3047 register int i;
3048
3049 if (imo != NULL) {
3050 for (i = 0; i < imo->imo_num_memberships; ++i)
3051 in_delmulti(&imo->imo_membership[i]);
3052 FREE(imo, M_IPMOPTS);
3053 }
3054 }
3055
3056 /*
3057 * Routine called from ip_output() to loop back a copy of an IP multicast
3058 * packet to the input queue of a specified interface. Note that this
3059 * calls the output routine of the loopback "driver", but with an interface
3060 * pointer that might NOT be a loopback interface -- evil, but easier than
3061 * replicating that code here.
3062 */
3063 static void
3064 ip_mloopback(ifp, m, dst, hlen)
3065 struct ifnet *ifp;
3066 register struct mbuf *m;
3067 register struct sockaddr_in *dst;
3068 int hlen;
3069 {
3070 register struct ip *ip;
3071 struct mbuf *copym;
3072 int sw_csum = (apple_hwcksum_tx == 0);
3073
3074 copym = m_copy(m, 0, M_COPYALL);
3075 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
3076 copym = m_pullup(copym, hlen);
3077
3078 if (copym == NULL)
3079 return;
3080
3081 /*
3082 * We don't bother to fragment if the IP length is greater
3083 * than the interface's MTU. Can this possibly matter?
3084 */
3085 ip = mtod(copym, struct ip *);
3086
3087 #if BYTE_ORDER != BIG_ENDIAN
3088 HTONS(ip->ip_len);
3089 HTONS(ip->ip_off);
3090 #endif
3091
3092 ip->ip_sum = 0;
3093 ip->ip_sum = in_cksum(copym, hlen);
3094 /*
3095 * NB:
3096 * It's not clear whether there are any lingering
3097 * reentrancy problems in other areas which might
3098 * be exposed by using ip_input directly (in
3099 * particular, everything which modifies the packet
3100 * in-place). Yet another option is using the
3101 * protosw directly to deliver the looped back
3102 * packet. For the moment, we'll err on the side
3103 * of safety by using if_simloop().
3104 */
3105 #if 1 /* XXX */
3106 if (dst->sin_family != AF_INET) {
3107 printf("ip_mloopback: bad address family %d\n",
3108 dst->sin_family);
3109 dst->sin_family = AF_INET;
3110 }
3111 #endif
3112
3113 /*
3114 * Mark checksum as valid or calculate checksum for loopback.
3115 *
3116 * This is done this way because we have to embed the ifp of
3117 * the interface we will send the original copy of the packet
3118 * out on in the mbuf. ip_input will check if_hwassist of the
3119 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
3120 * The UDP checksum has not been calculated yet.
3121 */
3122 if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
3123 if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
3124 copym->m_pkthdr.csum_flags |=
3125 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3126 CSUM_IP_CHECKED | CSUM_IP_VALID;
3127 copym->m_pkthdr.csum_data = 0xffff;
3128 } else {
3129
3130 #if BYTE_ORDER != BIG_ENDIAN
3131 NTOHS(ip->ip_len);
3132 #endif
3133
3134 in_delayed_cksum(copym);
3135
3136 #if BYTE_ORDER != BIG_ENDIAN
3137 HTONS(ip->ip_len);
3138 #endif
3139
3140 }
3141 }
3142
3143 /*
3144 * TedW:
3145 * We need to send all loopback traffic down to dlil in case
3146 * a filter has tapped-in.
3147 */
3148
3149 /*
3150 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3151 * in ip_input(); we need the loopback ifp/dl_tag passed as args
3152 * to make the loopback driver compliant with the data link
3153 * requirements.
3154 */
3155 if (lo_ifp) {
3156 copym->m_pkthdr.rcvif = ifp;
3157 dlil_output(lo_ifp, PF_INET, copym, 0,
3158 (struct sockaddr *) dst, 0);
3159 } else {
3160 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
3161 m_freem(copym);
3162 }
3163 }
3164
3165 /*
3166 * Given a source IP address (and route, if available), determine the best
3167 * interface to send the packet from. Checking for (and updating) the
3168 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
3169 * without any locks based on the assumption that ip_output() is single-
3170 * threaded per-pcb, i.e. for any given pcb there can only be one thread
3171 * performing output at the IP layer.
3172 */
3173 static struct ifaddr *
3174 in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
3175 {
3176 struct ifaddr *ifa = NULL;
3177 struct in_addr src = ip->ip_src;
3178 struct in_addr dst = ip->ip_dst;
3179 struct ifnet *rt_ifp;
3180 char s_src[16], s_dst[16];
3181
3182 if (ip_select_srcif_debug) {
3183 (void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof (s_src));
3184 (void) inet_ntop(AF_INET, &dst.s_addr, s_dst, sizeof (s_dst));
3185 }
3186
3187 if (ro->ro_rt != NULL)
3188 RT_LOCK(ro->ro_rt);
3189
3190 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
3191
3192 /*
3193 * Given the source IP address, find a suitable source interface
3194 * to use for transmission; if the caller has specified a scope,
3195 * optimize the search by looking at the addresses only for that
3196 * interface. This is still suboptimal, however, as we need to
3197 * traverse the per-interface list.
3198 */
3199 if (ifscope != IFSCOPE_NONE || ro->ro_rt != NULL) {
3200 unsigned int scope = ifscope;
3201
3202 /*
3203 * If no scope is specified and the route is stale (pointing
3204 * to a defunct interface) use the current primary interface;
3205 * this happens when switching between interfaces configured
3206 * with the same IP address. Otherwise pick up the scope
3207 * information from the route; the ULP may have looked up a
3208 * correct route and we just need to verify it here and mark
3209 * it with the ROF_SRCIF_SELECTED flag below.
3210 */
3211 if (scope == IFSCOPE_NONE) {
3212 scope = rt_ifp->if_index;
3213 if (scope != get_primary_ifscope() &&
3214 ro->ro_rt->generation_id != route_generation)
3215 scope = get_primary_ifscope();
3216 }
3217
3218 ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope);
3219
3220 if (ip_select_srcif_debug && ifa != NULL) {
3221 if (ro->ro_rt != NULL) {
3222 printf("%s->%s ifscope %d->%d ifa_if %s%d "
3223 "ro_if %s%d\n", s_src, s_dst, ifscope,
3224 scope, ifa->ifa_ifp->if_name,
3225 ifa->ifa_ifp->if_unit, rt_ifp->if_name,
3226 rt_ifp->if_unit);
3227 } else {
3228 printf("%s->%s ifscope %d->%d ifa_if %s%d\n",
3229 s_src, s_dst, ifscope, scope,
3230 ifa->ifa_ifp->if_name,
3231 ifa->ifa_ifp->if_unit);
3232 }
3233 }
3234 }
3235
3236 /*
3237 * Slow path; search for an interface having the corresponding source
3238 * IP address if the scope was not specified by the caller, and:
3239 *
3240 * 1) There currently isn't any route, or,
3241 * 2) The interface used by the route does not own that source
3242 * IP address; in this case, the route will get blown away
3243 * and we'll do a more specific scoped search using the newly
3244 * found interface.
3245 */
3246 if (ifa == NULL && ifscope == IFSCOPE_NONE) {
3247 ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
3248
3249 if (ip_select_srcif_debug && ifa != NULL) {
3250 printf("%s->%s ifscope %d ifa_if %s%d\n",
3251 s_src, s_dst, ifscope, ifa->ifa_ifp->if_name,
3252 ifa->ifa_ifp->if_unit);
3253 }
3254 }
3255
3256 if (ro->ro_rt != NULL)
3257 RT_LOCK_ASSERT_HELD(ro->ro_rt);
3258 /*
3259 * If there is a non-loopback route with the wrong interface, or if
3260 * there is no interface configured with such an address, blow it
3261 * away. Except for local/loopback, we look for one with a matching
3262 * interface scope/index.
3263 */
3264 if (ro->ro_rt != NULL &&
3265 (ifa == NULL || (ifa->ifa_ifp != rt_ifp && rt_ifp != lo_ifp) ||
3266 !(ro->ro_rt->rt_flags & RTF_UP))) {
3267 if (ip_select_srcif_debug) {
3268 if (ifa != NULL) {
3269 printf("%s->%s ifscope %d ro_if %s%d != "
3270 "ifa_if %s%d (cached route cleared)\n",
3271 s_src, s_dst, ifscope, rt_ifp->if_name,
3272 rt_ifp->if_unit, ifa->ifa_ifp->if_name,
3273 ifa->ifa_ifp->if_unit);
3274 } else {
3275 printf("%s->%s ifscope %d ro_if %s%d "
3276 "(no ifa_if found)\n",
3277 s_src, s_dst, ifscope, rt_ifp->if_name,
3278 rt_ifp->if_unit);
3279 }
3280 }
3281
3282 RT_UNLOCK(ro->ro_rt);
3283 rtfree(ro->ro_rt);
3284 ro->ro_rt = NULL;
3285 ro->ro_flags &= ~ROF_SRCIF_SELECTED;
3286
3287 /*
3288 * If the destination is IPv4 LLA and the route's interface
3289 * doesn't match the source interface, then the source IP
3290 * address is wrong; it most likely belongs to the primary
3291 * interface associated with the IPv4 LL subnet. Drop the
3292 * packet rather than letting it go out and return an error
3293 * to the ULP. This actually applies not only to IPv4 LL
3294 * but other shared subnets; for now we explicitly test only
3295 * for the former case and save the latter for future.
3296 */
3297 if (IN_LINKLOCAL(ntohl(dst.s_addr)) &&
3298 !IN_LINKLOCAL(ntohl(src.s_addr)) && ifa != NULL) {
3299 ifafree(ifa);
3300 ifa = NULL;
3301 }
3302 }
3303
3304 if (ip_select_srcif_debug && ifa == NULL) {
3305 printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
3306 s_src, s_dst, ifscope);
3307 }
3308
3309 /*
3310 * If there is a route, mark it accordingly. If there isn't one,
3311 * we'll get here again during the next transmit (possibly with a
3312 * route) and the flag will get set at that point. For IPv4 LLA
3313 * destination, mark it only if the route has been fully resolved;
3314 * otherwise we want to come back here again when the route points
3315 * to the interface over which the ARP reply arrives on.
3316 */
3317 if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(dst.s_addr)) ||
3318 (ro->ro_rt->rt_gateway->sa_family == AF_LINK &&
3319 SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) {
3320 ro->ro_flags |= ROF_SRCIF_SELECTED;
3321 ro->ro_rt->generation_id = route_generation;
3322 }
3323
3324 if (ro->ro_rt != NULL)
3325 RT_UNLOCK(ro->ro_rt);
3326
3327 return (ifa);
3328 }
3329
3330 /*
3331 * Handler for setting IP_FORCE_OUT_IFP or IP_BOUND_IF socket option.
3332 */
3333 static void
3334 ip_bindif(struct inpcb *inp, unsigned int ifscope)
3335 {
3336 /*
3337 * A zero interface scope value indicates an "unbind".
3338 * Otherwise, take in whatever value the app desires;
3339 * the app may already know the scope (or force itself
3340 * to such a scope) ahead of time before the interface
3341 * gets attached. It doesn't matter either way; any
3342 * route lookup from this point on will require an
3343 * exact match for the embedded interface scope.
3344 */
3345 inp->inp_boundif = ifscope;
3346 if (inp->inp_boundif == IFSCOPE_NONE)
3347 inp->inp_flags &= ~INP_BOUND_IF;
3348 else
3349 inp->inp_flags |= INP_BOUND_IF;
3350
3351 /* Blow away any cached route in the PCB */
3352 if (inp->inp_route.ro_rt != NULL) {
3353 rtfree(inp->inp_route.ro_rt);
3354 inp->inp_route.ro_rt = NULL;
3355 }
3356 }