]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
xnu-123.5.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * Copyright (c) 1982, 1986, 1988, 1990, 1993
24 * The Regents of the University of California. All rights reserved.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 * 3. All advertising materials mentioning features or use of this software
35 * must display the following acknowledgement:
36 * This product includes software developed by the University of
37 * California, Berkeley and its contributors.
38 * 4. Neither the name of the University nor the names of its contributors
39 * may be used to endorse or promote products derived from this software
40 * without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
55 */
56
57 #define _IP_VHL
58
59 #if ISFB31
60 #include "opt_ipfw.h"
61 #include "opt_ipdn.h"
62 #include "opt_ipdivert.h"
63 #include "opt_ipfilter.h"
64 #endif
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/kernel.h>
69 #include <sys/malloc.h>
70 #include <sys/mbuf.h>
71 #include <sys/protosw.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74
75 #include <net/if.h>
76 #include <net/route.h>
77
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/ip.h>
81 #if INET6
82 #include <netinet/ip6.h>
83 #include <netinet6/ip6_var.h>
84 #endif
85 #include <netinet/in_pcb.h>
86 #include <netinet/in_var.h>
87 #include <netinet/ip_var.h>
88 #include <net/dlil.h>
89
90 #include <sys/kdebug.h>
91
92 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
93 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
94 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
95
96
97 #ifdef vax
98 #include <machine/mtpr.h>
99 #endif
100
101 #if ISFB31
102 #include <machine/in_cksum.h>
103
104 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
105 #endif
106
107 //static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
108
109 #if IPSEC
110 #include <netinet6/ipsec.h>
111 #include <netkey/key.h>
112 #include <netkey/key_debug.h>
113
114 #endif /*IPSEC*/
115
116 #if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
117 #undef COMPAT_IPFW
118 #define COMPAT_IPFW 1
119 #else
120 #undef COMPAT_IPFW
121 #endif
122
123 #if COMPAT_IPFW
124 #include <netinet/ip_fw.h>
125 #endif
126
127 #if DUMMYNET
128 #include <netinet/ip_dummynet.h>
129 #endif
130
131 #if IPFIREWALL_FORWARD_DEBUG
132 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
133 (ntohl(a.s_addr)>>16)&0xFF,\
134 (ntohl(a.s_addr)>>8)&0xFF,\
135 (ntohl(a.s_addr))&0xFF);
136 #endif
137
138 u_short ip_id;
139
140 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
141 static void ip_mloopback
142 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
143 static int ip_getmoptions
144 __P((struct sockopt *, struct ip_moptions *));
145 static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
146 static int ip_setmoptions
147 __P((struct sockopt *, struct ip_moptions **));
148 static u_long lo_dl_tag = 0;
149
150 #if IPFILTER_LKM || IPFILTER
151 int ip_optcopy __P((struct ip *, struct ip *));
152 extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
153 #else
154 static int ip_optcopy __P((struct ip *, struct ip *));
155 #endif
156
157
158 extern struct protosw inetsw[];
159
160 /*
161 * IP output. The packet in mbuf chain m contains a skeletal IP
162 * header (with len, off, ttl, proto, tos, src, dst).
163 * The mbuf chain containing the packet will be freed.
164 * The mbuf opt, if present, will not be freed.
165 */
166 int
167 ip_output(m0, opt, ro, flags, imo)
168 struct mbuf *m0;
169 struct mbuf *opt;
170 struct route *ro;
171 int flags;
172 struct ip_moptions *imo;
173 {
174 struct ip *ip, *mhip;
175 struct ifnet *ifp;
176 u_long dl_tag;
177 struct mbuf *m = m0;
178 int hlen = sizeof (struct ip);
179 int len, off, error = 0;
180 struct sockaddr_in *dst;
181 struct in_ifaddr *ia;
182 int isbroadcast;
183 #if IPSEC
184 struct route iproute;
185 struct socket *so;
186 struct secpolicy *sp = NULL;
187 #endif
188 #if IPFIREWALL_FORWARD
189 int fwd_rewrite_src = 0;
190 #endif
191
192
193 #if !IPDIVERT /* dummy variable for the firewall code to play with */
194 u_short ip_divert_cookie = 0 ;
195 #endif
196 #if COMPAT_IPFW
197 struct ip_fw_chain *rule = NULL ;
198 #endif
199
200 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
201
202 #if IPSEC
203 /*
204 * NOTE: m->m_pkthdr is NULL cleared below just to prevent ipfw code
205 * from SEGV.
206 * ipfw code uses rcvif to determine incoming interface, and
207 * KAME uses rcvif for ipsec processing.
208 * ipfw may not be working right with KAME at this moment.
209 * We need more tests.
210 */
211 #if DUMMYNET
212 if (m->m_type == MT_DUMMYNET) {
213 if (m->m_next != NULL) {
214 so = (struct socket *)m->m_next->m_pkthdr.rcvif;
215 m->m_next->m_pkthdr.rcvif = NULL;
216 } else
217 so = NULL;
218 } else
219 #endif
220 {
221 so = ipsec_getsocket(m);
222 ipsec_setsocket(m, NULL);
223 }
224 #endif /*IPSEC*/
225
226
227 #if IPFIREWALL && DUMMYNET
228 /*
229 * dummynet packet are prepended a vestigial mbuf with
230 * m_type = MT_DUMMYNET and m_data pointing to the matching
231 * rule.
232 */
233 if (m->m_type == MT_DUMMYNET) {
234 struct mbuf *tmp_m = m ;
235 /*
236 * the packet was already tagged, so part of the
237 * processing was already done, and we need to go down.
238 * opt, flags and imo have already been used, and now
239 * they are used to hold ifp and hlen and NULL, respectively.
240 */
241 rule = (struct ip_fw_chain *)(m->m_data) ;
242 m = m->m_next ;
243 FREE(tmp_m, M_IPFW);
244 ip = mtod(m, struct ip *);
245 dst = (struct sockaddr_in *)&ro->ro_dst;
246 ifp = (struct ifnet *)opt;
247 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
248 opt = NULL ;
249 flags = 0 ; /* XXX is this correct ? */
250 goto sendit;
251 } else
252 rule = NULL ;
253 #endif
254
255 #if DIAGNOSTIC
256 if ((m->m_flags & M_PKTHDR) == 0)
257 panic("ip_output no HDR");
258 if (!ro)
259 panic("ip_output no route, proto = %d",
260 mtod(m, struct ip *)->ip_p);
261 #endif
262 if (opt) {
263 m = ip_insertoptions(m, opt, &len);
264 hlen = len;
265 }
266 ip = mtod(m, struct ip *);
267 /*
268 * Fill in IP header.
269 */
270 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
271 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
272 ip->ip_off &= IP_DF;
273 ip->ip_id = htons(ip_id++);
274 ipstat.ips_localout++;
275 } else {
276 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
277 }
278
279 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
280 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
281
282 dst = (struct sockaddr_in *)&ro->ro_dst;
283 /*
284 * If there is a cached route,
285 * check that it is to the same destination
286 * and is still up. If not, free it and try again.
287 */
288 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
289 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
290 RTFREE(ro->ro_rt);
291 ro->ro_rt = (struct rtentry *)0;
292 }
293 if (ro->ro_rt == 0) {
294 dst->sin_family = AF_INET;
295 dst->sin_len = sizeof(*dst);
296 dst->sin_addr = ip->ip_dst;
297 }
298 /*
299 * If routing to interface only,
300 * short circuit routing lookup.
301 */
302 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
303 #define sintosa(sin) ((struct sockaddr *)(sin))
304 if (flags & IP_ROUTETOIF) {
305 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
306 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
307 ipstat.ips_noroute++;
308 error = ENETUNREACH;
309 goto bad;
310 }
311 ifp = ia->ia_ifp;
312 dl_tag = ia->ia_ifa.ifa_dlt;
313 ip->ip_ttl = 1;
314 isbroadcast = in_broadcast(dst->sin_addr, ifp);
315 } else {
316 /*
317 * If this is the case, we probably don't want to allocate
318 * a protocol-cloned route since we didn't get one from the
319 * ULP. This lets TCP do its thing, while not burdening
320 * forwarding or ICMP with the overhead of cloning a route.
321 * Of course, we still want to do any cloning requested by
322 * the link layer, as this is probably required in all cases
323 * for correct operation (as it is for ARP).
324 */
325 if (ro->ro_rt == 0)
326 rtalloc_ign(ro, RTF_PRCLONING);
327 if (ro->ro_rt == 0) {
328 ipstat.ips_noroute++;
329 error = EHOSTUNREACH;
330 goto bad;
331 }
332 ia = ifatoia(ro->ro_rt->rt_ifa);
333 ifp = ro->ro_rt->rt_ifp;
334 dl_tag = ro->ro_rt->rt_dlt;
335 ro->ro_rt->rt_use++;
336 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
337 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
338 if (ro->ro_rt->rt_flags & RTF_HOST)
339 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
340 else
341 isbroadcast = in_broadcast(dst->sin_addr, ifp);
342 }
343 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
344 struct in_multi *inm;
345
346 m->m_flags |= M_MCAST;
347 /*
348 * IP destination address is multicast. Make sure "dst"
349 * still points to the address in "ro". (It may have been
350 * changed to point to a gateway address, above.)
351 */
352 dst = (struct sockaddr_in *)&ro->ro_dst;
353 /*
354 * See if the caller provided any multicast options
355 */
356 if (imo != NULL) {
357 ip->ip_ttl = imo->imo_multicast_ttl;
358 if (imo->imo_multicast_ifp != NULL)
359 ifp = imo->imo_multicast_ifp;
360 if (imo->imo_multicast_vif != -1)
361 ip->ip_src.s_addr =
362 ip_mcast_src(imo->imo_multicast_vif);
363 } else
364 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
365 /*
366 * Confirm that the outgoing interface supports multicast.
367 */
368 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
369 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
370 ipstat.ips_noroute++;
371 error = ENETUNREACH;
372 goto bad;
373 }
374 }
375 /*
376 * If source address not specified yet, use address
377 * of outgoing interface.
378 */
379 if (ip->ip_src.s_addr == INADDR_ANY) {
380 register struct in_ifaddr *ia1;
381
382 for (ia1 = in_ifaddrhead.tqh_first; ia1;
383 ia1 = ia1->ia_link.tqe_next)
384 if (ia1->ia_ifp == ifp) {
385 ip->ip_src = IA_SIN(ia1)->sin_addr;
386 break;
387 }
388 }
389
390 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
391 if (inm != NULL &&
392 (imo == NULL || imo->imo_multicast_loop)) {
393 /*
394 * If we belong to the destination multicast group
395 * on the outgoing interface, and the caller did not
396 * forbid loopback, loop back a copy.
397 */
398 ip_mloopback(ifp, m, dst, hlen);
399 }
400 else {
401 /*
402 * If we are acting as a multicast router, perform
403 * multicast forwarding as if the packet had just
404 * arrived on the interface to which we are about
405 * to send. The multicast forwarding function
406 * recursively calls this function, using the
407 * IP_FORWARDING flag to prevent infinite recursion.
408 *
409 * Multicasts that are looped back by ip_mloopback(),
410 * above, will be forwarded by the ip_input() routine,
411 * if necessary.
412 */
413 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
414 /*
415 * Check if rsvp daemon is running. If not, don't
416 * set ip_moptions. This ensures that the packet
417 * is multicast and not just sent down one link
418 * as prescribed by rsvpd.
419 */
420 if (!rsvp_on)
421 imo = NULL;
422 if (ip_mforward(ip, ifp, m, imo) != 0) {
423 m_freem(m);
424 goto done;
425 }
426 }
427 }
428
429 /*
430 * Multicasts with a time-to-live of zero may be looped-
431 * back, above, but must not be transmitted on a network.
432 * Also, multicasts addressed to the loopback interface
433 * are not sent -- the above call to ip_mloopback() will
434 * loop back a copy if this host actually belongs to the
435 * destination group on the loopback interface.
436 */
437 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
438 m_freem(m);
439 goto done;
440 }
441
442 goto sendit;
443 }
444 #ifndef notdef
445 /*
446 * If source address not specified yet, use address
447 * of outgoing interface.
448 */
449 if (ip->ip_src.s_addr == INADDR_ANY) {
450 ip->ip_src = IA_SIN(ia)->sin_addr;
451 #if IPFIREWALL_FORWARD
452 /* Keep note that we did this - if the firewall changes
453 * the next-hop, our interface may change, changing the
454 * default source IP. It's a shame so much effort happens
455 * twice. Oh well.
456 */
457 fwd_rewrite_src++;
458 #endif /* IPFIREWALL_FORWARD */
459 }
460 #endif /* notdef */
461 /*
462 * Verify that we have any chance at all of being able to queue
463 * the packet or packet fragments
464 */
465 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
466 ifp->if_snd.ifq_maxlen) {
467 error = ENOBUFS;
468 goto bad;
469 }
470
471 /*
472 * Look for broadcast address and
473 * and verify user is allowed to send
474 * such a packet.
475 */
476 if (isbroadcast) {
477 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
478 error = EADDRNOTAVAIL;
479 goto bad;
480 }
481 if ((flags & IP_ALLOWBROADCAST) == 0) {
482 error = EACCES;
483 goto bad;
484 }
485 /* don't allow broadcast messages to be fragmented */
486 if ((u_short)ip->ip_len > ifp->if_mtu) {
487 error = EMSGSIZE;
488 goto bad;
489 }
490 m->m_flags |= M_BCAST;
491 } else {
492 m->m_flags &= ~M_BCAST;
493 }
494
495 sendit:
496 /*
497 * IpHack's section.
498 * - Xlate: translate packet's addr/port (NAT).
499 * - Firewall: deny/allow/etc.
500 * - Wrap: fake packet's addr/port <unimpl.>
501 * - Encapsulate: put it in another IP and send out. <unimp.>
502 */
503 #if IPFILTER || IPFILTER_LKM
504 if (fr_checkp) {
505 struct mbuf *m1 = m;
506
507 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1)
508 goto done;
509 ip = mtod(m = m1, struct ip *);
510 }
511 #endif
512
513 #if COMPAT_IPFW
514 if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, ifp, IP_NAT_OUT)) {
515 error = EACCES;
516 goto done;
517 }
518
519 /*
520 * Check with the firewall...
521 */
522 if (ip_fw_chk_ptr) {
523 struct sockaddr_in *old = dst;
524
525 off = (*ip_fw_chk_ptr)(&ip,
526 hlen, ifp, &ip_divert_cookie, &m, &rule, &dst);
527 /*
528 * On return we must do the following:
529 * m == NULL -> drop the pkt
530 * 1<=off<= 0xffff -> DIVERT
531 * (off & 0x10000) -> send to a DUMMYNET pipe
532 * dst != old -> IPFIREWALL_FORWARD
533 * off==0, dst==old -> accept
534 * If some of the above modules is not compiled in, then
535 * we should't have to check the corresponding condition
536 * (because the ipfw control socket should not accept
537 * unsupported rules), but better play safe and drop
538 * packets in case of doubt.
539 */
540 if (!m) { /* firewall said to reject */
541 error = EACCES;
542 goto done;
543 }
544 if (off == 0 && dst == old) /* common case */
545 goto pass ;
546 #if DUMMYNET
547 if (off & 0x10000) {
548 /*
549 * pass the pkt to dummynet. Need to include
550 * pipe number, m, ifp, ro, hlen because these are
551 * not recomputed in the next pass.
552 * All other parameters have been already used and
553 * so they are not needed anymore.
554 * XXX note: if the ifp or ro entry are deleted
555 * while a pkt is in dummynet, we are in trouble!
556 */
557 dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,ifp,ro,hlen,rule);
558 goto done;
559 }
560 #endif
561 #if IPDIVERT
562 if (off > 0 && off < 0x10000) { /* Divert packet */
563 ip_divert_port = off & 0xffff ;
564 (*ip_protox[IPPROTO_DIVERT]->pr_input)(m, 0);
565 goto done;
566 }
567 #endif
568
569 #if IPFIREWALL_FORWARD
570 /* Here we check dst to make sure it's directly reachable on the
571 * interface we previously thought it was.
572 * If it isn't (which may be likely in some situations) we have
573 * to re-route it (ie, find a route for the next-hop and the
574 * associated interface) and set them here. This is nested
575 * forwarding which in most cases is undesirable, except where
576 * such control is nigh impossible. So we do it here.
577 * And I'm babbling.
578 */
579 if (off == 0 && old != dst) {
580 struct in_ifaddr *ia;
581
582 /* It's changed... */
583 /* There must be a better way to do this next line... */
584 static struct route sro_fwd, *ro_fwd = &sro_fwd;
585 #if IPFIREWALL_FORWARD_DEBUG
586 printf("IPFIREWALL_FORWARD: New dst ip: ");
587 print_ip(dst->sin_addr);
588 printf("\n");
589 #endif
590 /*
591 * We need to figure out if we have been forwarded
592 * to a local socket. If so then we should somehow
593 * "loop back" to ip_input, and get directed to the
594 * PCB as if we had received this packet. This is
595 * because it may be dificult to identify the packets
596 * you want to forward until they are being output
597 * and have selected an interface. (e.g. locally
598 * initiated packets) If we used the loopback inteface,
599 * we would not be able to control what happens
600 * as the packet runs through ip_input() as
601 * it is done through a ISR.
602 */
603 for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
604 ia = TAILQ_NEXT(ia, ia_link)) {
605 /*
606 * If the addr to forward to is one
607 * of ours, we pretend to
608 * be the destination for this packet.
609 */
610 if (IA_SIN(ia)->sin_addr.s_addr ==
611 dst->sin_addr.s_addr)
612 break;
613 }
614 if (ia) {
615 /* tell ip_input "dont filter" */
616 ip_fw_fwd_addr = dst;
617 if (m->m_pkthdr.rcvif == NULL)
618 m->m_pkthdr.rcvif = ifunit("lo0");
619 ip->ip_len = htons((u_short)ip->ip_len);
620 ip->ip_off = htons((u_short)ip->ip_off);
621 ip->ip_sum = 0;
622
623 ip->ip_sum = in_cksum(m, hlen);
624
625 ip_input(m);
626 goto done;
627 }
628 /* Some of the logic for this was
629 * nicked from above.
630 *
631 * This rewrites the cached route in a local PCB.
632 * Is this what we want to do?
633 */
634 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
635
636 ro_fwd->ro_rt = 0;
637 rtalloc_ign(ro_fwd, RTF_PRCLONING);
638
639 if (ro_fwd->ro_rt == 0) {
640 ipstat.ips_noroute++;
641 error = EHOSTUNREACH;
642 goto bad;
643 }
644
645 ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
646 ifp = ro_fwd->ro_rt->rt_ifp;
647 dl_tag = ro->ro_rt->rt_dlt;
648 ro_fwd->ro_rt->rt_use++;
649 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
650 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
651 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
652 isbroadcast =
653 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
654 else
655 isbroadcast = in_broadcast(dst->sin_addr, ifp);
656 RTFREE(ro->ro_rt);
657 ro->ro_rt = ro_fwd->ro_rt;
658 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
659
660 /*
661 * If we added a default src ip earlier,
662 * which would have been gotten from the-then
663 * interface, do it again, from the new one.
664 */
665 if (fwd_rewrite_src)
666 ip->ip_src = IA_SIN(ia)->sin_addr;
667 goto pass ;
668 }
669 #endif /* IPFIREWALL_FORWARD */
670 /*
671 * if we get here, none of the above matches, and
672 * we have to drop the pkt
673 */
674 m_freem(m);
675 error = EACCES; /* not sure this is the right error msg */
676 goto done;
677 }
678 #endif /* COMPAT_IPFW */
679
680 pass:
681
682 #if defined(PM)
683 /*
684 * Processing IP filter/NAT.
685 * Return TRUE iff this packet is discarded.
686 * Return FALSE iff this packet is accepted.
687 */
688
689 if (doNatFil && pm_out(ro->ro_rt->rt_ifp, ip, m))
690 goto done;
691 #endif
692
693 #if IPSEC
694 /* get SP for this packet */
695 if (so == NULL)
696 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
697 else
698 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
699
700 if (sp == NULL) {
701 ipsecstat.out_inval++;
702 goto bad;
703 }
704
705 error = 0;
706
707 /* check policy */
708 switch (sp->policy) {
709 case IPSEC_POLICY_DISCARD:
710 /*
711 * This packet is just discarded.
712 */
713 ipsecstat.out_polvio++;
714 goto bad;
715
716 case IPSEC_POLICY_BYPASS:
717 case IPSEC_POLICY_NONE:
718 /* no need to do IPsec. */
719 goto skip_ipsec;
720
721 case IPSEC_POLICY_IPSEC:
722 if (sp->req == NULL) {
723 /* XXX should be panic ? */
724 printf("ip_output: No IPsec request specified.\n");
725 error = EINVAL;
726 goto bad;
727 }
728 break;
729
730 case IPSEC_POLICY_ENTRUST:
731 default:
732 printf("ip_output: Invalid policy found. %d\n", sp->policy);
733 }
734
735 ip->ip_len = htons((u_short)ip->ip_len);
736 ip->ip_off = htons((u_short)ip->ip_off);
737 ip->ip_sum = 0;
738
739 {
740 struct ipsec_output_state state;
741 bzero(&state, sizeof(state));
742 state.m = m;
743 if (flags & IP_ROUTETOIF) {
744 state.ro = &iproute;
745 bzero(&iproute, sizeof(iproute));
746 } else
747 state.ro = ro;
748 state.dst = (struct sockaddr *)dst;
749
750 error = ipsec4_output(&state, sp, flags);
751
752 m = state.m;
753 if (flags & IP_ROUTETOIF) {
754 /*
755 * if we have tunnel mode SA, we may need to ignore
756 * IP_ROUTETOIF.
757 */
758 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
759 flags &= ~IP_ROUTETOIF;
760 ro = state.ro;
761 }
762 } else
763 ro = state.ro;
764 dst = (struct sockaddr_in *)state.dst;
765 if (error) {
766 /* mbuf is already reclaimed in ipsec4_output. */
767 m0 = NULL;
768 switch (error) {
769 case EHOSTUNREACH:
770 case ENETUNREACH:
771 case EMSGSIZE:
772 case ENOBUFS:
773 case ENOMEM:
774 break;
775 default:
776 printf("ip4_output (ipsec): error code %d\n", error);
777 /*fall through*/
778 case ENOENT:
779 /* don't show these error codes to the user */
780 error = 0;
781 break;
782 }
783 goto bad;
784 }
785 }
786
787 /* be sure to update variables that are affected by ipsec4_output() */
788 ip = mtod(m, struct ip *);
789 #ifdef _IP_VHL
790 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
791 #else
792 hlen = ip->ip_hl << 2;
793 #endif
794 if (ro->ro_rt == NULL) {
795 if ((flags & IP_ROUTETOIF) == 0) {
796 printf("ip_output: "
797 "can't update route after IPsec processing\n");
798 error = EHOSTUNREACH; /*XXX*/
799 goto bad;
800 }
801 } else {
802 /* nobody uses ia beyond here */
803 ifp = ro->ro_rt->rt_ifp;
804 }
805
806 /* make it flipped, again. */
807 ip->ip_len = ntohs((u_short)ip->ip_len);
808 ip->ip_off = ntohs((u_short)ip->ip_off);
809 skip_ipsec:
810 #endif /*IPSEC*/
811
812 /*
813 * If small enough for interface, can just send directly.
814 */
815 if ((u_short)ip->ip_len <= ifp->if_mtu) {
816 ip->ip_len = htons((u_short)ip->ip_len);
817 ip->ip_off = htons((u_short)ip->ip_off);
818 ip->ip_sum = 0;
819 ip->ip_sum = in_cksum(m, hlen);
820 error = dlil_output(dl_tag, m, (void *) ro->ro_rt,
821 (struct sockaddr *)dst, 0);
822 goto done;
823 }
824 /*
825 * Too large for interface; fragment if possible.
826 * Must be able to put at least 8 bytes per fragment.
827 */
828 if (ip->ip_off & IP_DF) {
829 error = EMSGSIZE;
830 /*
831 * This case can happen if the user changed the MTU
832 * of an interface after enabling IP on it. Because
833 * most netifs don't keep track of routes pointing to
834 * them, there is no way for one to update all its
835 * routes when the MTU is changed.
836 */
837 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
838 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
839 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
840 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
841 }
842 ipstat.ips_cantfrag++;
843 goto bad;
844 }
845 len = (ifp->if_mtu - hlen) &~ 7;
846 if (len < 8) {
847 error = EMSGSIZE;
848 goto bad;
849 }
850
851 {
852 int mhlen, firstlen = len;
853 struct mbuf **mnext = &m->m_nextpkt;
854
855 /*
856 * Loop through length of segment after first fragment,
857 * make new header and copy data of each part and link onto chain.
858 */
859 m0 = m;
860 mhlen = sizeof (struct ip);
861 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
862 MGETHDR(m, M_DONTWAIT, MT_HEADER);
863 if (m == 0) {
864 error = ENOBUFS;
865 ipstat.ips_odropped++;
866 goto sendorfree;
867 }
868 m->m_flags |= (m0->m_flags & M_MCAST);
869 m->m_data += max_linkhdr;
870 mhip = mtod(m, struct ip *);
871 *mhip = *ip;
872 if (hlen > sizeof (struct ip)) {
873 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
874 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
875 }
876 m->m_len = mhlen;
877 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
878 if (ip->ip_off & IP_MF)
879 mhip->ip_off |= IP_MF;
880 if (off + len >= (u_short)ip->ip_len)
881 len = (u_short)ip->ip_len - off;
882 else
883 mhip->ip_off |= IP_MF;
884 mhip->ip_len = htons((u_short)(len + mhlen));
885 m->m_next = m_copy(m0, off, len);
886 if (m->m_next == 0) {
887 (void) m_free(m);
888 error = ENOBUFS; /* ??? */
889 ipstat.ips_odropped++;
890 goto sendorfree;
891 }
892 m->m_pkthdr.len = mhlen + len;
893 m->m_pkthdr.rcvif = (struct ifnet *)0;
894 mhip->ip_off = htons((u_short)mhip->ip_off);
895 mhip->ip_sum = 0;
896 mhip->ip_sum = in_cksum(m, mhlen);
897 *mnext = m;
898 mnext = &m->m_nextpkt;
899 ipstat.ips_ofragments++;
900 }
901 /*
902 * Update first fragment by trimming what's been copied out
903 * and updating header, then send each fragment (in order).
904 */
905 m = m0;
906 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
907 m->m_pkthdr.len = hlen + firstlen;
908 ip->ip_len = htons((u_short)m->m_pkthdr.len);
909 ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
910 ip->ip_sum = 0;
911 ip->ip_sum = in_cksum(m, hlen);
912
913 sendorfree:
914
915 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
916 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
917
918 for (m = m0; m; m = m0) {
919 m0 = m->m_nextpkt;
920 m->m_nextpkt = 0;
921 if (error == 0)
922 error = dlil_output(dl_tag, m, (void *) ro->ro_rt,
923 (struct sockaddr *)dst, 0);
924 else
925 m_freem(m);
926 }
927
928 if (error == 0)
929 ipstat.ips_fragmented++;
930 }
931 done:
932 #if IPSEC
933 if (ro == &iproute && ro->ro_rt) {
934 RTFREE(ro->ro_rt);
935 ro->ro_rt = NULL;
936 }
937 if (sp != NULL) {
938 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
939 printf("DP ip_output call free SP:%x\n", sp));
940 key_freesp(sp);
941 }
942 #endif /* IPSEC */
943
944 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
945 return (error);
946 bad:
947 m_freem(m0);
948 goto done;
949 }
950
951 /*
952 * Insert IP options into preformed packet.
953 * Adjust IP destination as required for IP source routing,
954 * as indicated by a non-zero in_addr at the start of the options.
955 *
956 * XXX This routine assumes that the packet has no options in place.
957 */
958 static struct mbuf *
959 ip_insertoptions(m, opt, phlen)
960 register struct mbuf *m;
961 struct mbuf *opt;
962 int *phlen;
963 {
964 register struct ipoption *p = mtod(opt, struct ipoption *);
965 struct mbuf *n;
966 register struct ip *ip = mtod(m, struct ip *);
967 unsigned optlen;
968
969 optlen = opt->m_len - sizeof(p->ipopt_dst);
970 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
971 return (m); /* XXX should fail */
972 if (p->ipopt_dst.s_addr)
973 ip->ip_dst = p->ipopt_dst;
974 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
975 MGETHDR(n, M_DONTWAIT, MT_HEADER);
976 if (n == 0)
977 return (m);
978 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
979 m->m_len -= sizeof(struct ip);
980 m->m_data += sizeof(struct ip);
981 n->m_next = m;
982 m = n;
983 m->m_len = optlen + sizeof(struct ip);
984 m->m_data += max_linkhdr;
985 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
986 } else {
987 m->m_data -= optlen;
988 m->m_len += optlen;
989 m->m_pkthdr.len += optlen;
990 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
991 }
992 ip = mtod(m, struct ip *);
993 bcopy(p->ipopt_list, ip + 1, optlen);
994 *phlen = sizeof(struct ip) + optlen;
995 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
996 ip->ip_len += optlen;
997 return (m);
998 }
999
1000 /*
1001 * Copy options from ip to jp,
1002 * omitting those not copied during fragmentation.
1003 */
1004 #if !IPFILTER && !IPFILTER_LKM
1005 static
1006 #endif
1007 int
1008 ip_optcopy(ip, jp)
1009 struct ip *ip, *jp;
1010 {
1011 register u_char *cp, *dp;
1012 int opt, optlen, cnt;
1013
1014 cp = (u_char *)(ip + 1);
1015 dp = (u_char *)(jp + 1);
1016 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1017 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1018 opt = cp[0];
1019 if (opt == IPOPT_EOL)
1020 break;
1021 if (opt == IPOPT_NOP) {
1022 /* Preserve for IP mcast tunnel's LSRR alignment. */
1023 *dp++ = IPOPT_NOP;
1024 optlen = 1;
1025 continue;
1026 } else
1027 optlen = cp[IPOPT_OLEN];
1028 /* bogus lengths should have been caught by ip_dooptions */
1029 if (optlen > cnt)
1030 optlen = cnt;
1031 if (IPOPT_COPIED(opt)) {
1032 bcopy(cp, dp, optlen);
1033 dp += optlen;
1034 }
1035 }
1036 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1037 *dp++ = IPOPT_EOL;
1038 return (optlen);
1039 }
1040
1041 /*
1042 * IP socket option processing.
1043 */
1044 int
1045 ip_ctloutput(so, sopt)
1046 struct socket *so;
1047 struct sockopt *sopt;
1048 {
1049 struct inpcb *inp = sotoinpcb(so);
1050 int error, optval;
1051
1052 error = optval = 0;
1053 if (sopt->sopt_level != IPPROTO_IP) {
1054 return (EINVAL);
1055 }
1056
1057 switch (sopt->sopt_dir) {
1058 case SOPT_SET:
1059 switch (sopt->sopt_name) {
1060 case IP_OPTIONS:
1061 #ifdef notyet
1062 case IP_RETOPTS:
1063 #endif
1064 {
1065 struct mbuf *m;
1066 if (sopt->sopt_valsize > MLEN) {
1067 error = EMSGSIZE;
1068 break;
1069 }
1070 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1071 if (m == 0) {
1072 error = ENOBUFS;
1073 break;
1074 }
1075 m->m_len = sopt->sopt_valsize;
1076 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1077 m->m_len);
1078 if (error)
1079 break;
1080
1081 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1082 m));
1083 }
1084
1085 case IP_TOS:
1086 case IP_TTL:
1087 case IP_RECVOPTS:
1088 case IP_RECVRETOPTS:
1089 case IP_RECVDSTADDR:
1090 case IP_RECVIF:
1091 case IP_FAITH:
1092 error = sooptcopyin(sopt, &optval, sizeof optval,
1093 sizeof optval);
1094 if (error)
1095 break;
1096
1097 switch (sopt->sopt_name) {
1098 case IP_TOS:
1099 inp->inp_ip_tos = optval;
1100 break;
1101
1102 case IP_TTL:
1103 inp->inp_ip_ttl = optval;
1104 break;
1105 #define OPTSET(bit) \
1106 if (optval) \
1107 inp->inp_flags |= bit; \
1108 else \
1109 inp->inp_flags &= ~bit;
1110
1111 case IP_RECVOPTS:
1112 OPTSET(INP_RECVOPTS);
1113 break;
1114
1115 case IP_RECVRETOPTS:
1116 OPTSET(INP_RECVRETOPTS);
1117 break;
1118
1119 case IP_RECVDSTADDR:
1120 OPTSET(INP_RECVDSTADDR);
1121 break;
1122
1123 case IP_RECVIF:
1124 OPTSET(INP_RECVIF);
1125 break;
1126
1127 case IP_FAITH:
1128 OPTSET(INP_FAITH);
1129 break;
1130 }
1131 break;
1132 #undef OPTSET
1133
1134 case IP_MULTICAST_IF:
1135 case IP_MULTICAST_VIF:
1136 case IP_MULTICAST_TTL:
1137 case IP_MULTICAST_LOOP:
1138 case IP_ADD_MEMBERSHIP:
1139 case IP_DROP_MEMBERSHIP:
1140 error = ip_setmoptions(sopt, &inp->inp_moptions);
1141 break;
1142
1143 case IP_PORTRANGE:
1144 error = sooptcopyin(sopt, &optval, sizeof optval,
1145 sizeof optval);
1146 if (error)
1147 break;
1148
1149 switch (optval) {
1150 case IP_PORTRANGE_DEFAULT:
1151 inp->inp_flags &= ~(INP_LOWPORT);
1152 inp->inp_flags &= ~(INP_HIGHPORT);
1153 break;
1154
1155 case IP_PORTRANGE_HIGH:
1156 inp->inp_flags &= ~(INP_LOWPORT);
1157 inp->inp_flags |= INP_HIGHPORT;
1158 break;
1159
1160 case IP_PORTRANGE_LOW:
1161 inp->inp_flags &= ~(INP_HIGHPORT);
1162 inp->inp_flags |= INP_LOWPORT;
1163 break;
1164
1165 default:
1166 error = EINVAL;
1167 break;
1168 }
1169 break;
1170
1171 #if IPSEC
1172 case IP_IPSEC_POLICY:
1173 {
1174 caddr_t req = NULL;
1175 size_t len = 0;
1176 int priv;
1177 struct mbuf *m;
1178 int optname;
1179
1180 if (error = sooptgetm(sopt, &m)) /* XXX */
1181 break;
1182 if (error = sooptmcopyin(sopt, m)) /* XXX */
1183 break;
1184 priv = (sopt->sopt_p != NULL &&
1185 suser(sopt->sopt_p->p_ucred,
1186 &sopt->sopt_p->p_acflag) != 0) ? 0 : 1;
1187 if (m) {
1188 req = mtod(m, caddr_t);
1189 len = m->m_len;
1190 }
1191 optname = sopt->sopt_name;
1192 error = ipsec4_set_policy(inp, optname, req, len, priv);
1193 m_freem(m);
1194 break;
1195 }
1196 #endif /*IPSEC*/
1197
1198 default:
1199 error = ENOPROTOOPT;
1200 break;
1201 }
1202 break;
1203
1204 case SOPT_GET:
1205 switch (sopt->sopt_name) {
1206 case IP_OPTIONS:
1207 case IP_RETOPTS:
1208 if (inp->inp_options)
1209 error = sooptcopyout(sopt,
1210 mtod(inp->inp_options,
1211 char *),
1212 inp->inp_options->m_len);
1213 else
1214 sopt->sopt_valsize = 0;
1215 break;
1216
1217 case IP_TOS:
1218 case IP_TTL:
1219 case IP_RECVOPTS:
1220 case IP_RECVRETOPTS:
1221 case IP_RECVDSTADDR:
1222 case IP_RECVIF:
1223 case IP_PORTRANGE:
1224 case IP_FAITH:
1225 switch (sopt->sopt_name) {
1226
1227 case IP_TOS:
1228 optval = inp->inp_ip_tos;
1229 break;
1230
1231 case IP_TTL:
1232 optval = inp->inp_ip_ttl;
1233 break;
1234
1235 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1236
1237 case IP_RECVOPTS:
1238 optval = OPTBIT(INP_RECVOPTS);
1239 break;
1240
1241 case IP_RECVRETOPTS:
1242 optval = OPTBIT(INP_RECVRETOPTS);
1243 break;
1244
1245 case IP_RECVDSTADDR:
1246 optval = OPTBIT(INP_RECVDSTADDR);
1247 break;
1248
1249 case IP_RECVIF:
1250 optval = OPTBIT(INP_RECVIF);
1251 break;
1252
1253 case IP_PORTRANGE:
1254 if (inp->inp_flags & INP_HIGHPORT)
1255 optval = IP_PORTRANGE_HIGH;
1256 else if (inp->inp_flags & INP_LOWPORT)
1257 optval = IP_PORTRANGE_LOW;
1258 else
1259 optval = 0;
1260 break;
1261
1262 case IP_FAITH:
1263 optval = OPTBIT(INP_FAITH);
1264 break;
1265 }
1266 error = sooptcopyout(sopt, &optval, sizeof optval);
1267 break;
1268
1269 case IP_MULTICAST_IF:
1270 case IP_MULTICAST_VIF:
1271 case IP_MULTICAST_TTL:
1272 case IP_MULTICAST_LOOP:
1273 case IP_ADD_MEMBERSHIP:
1274 case IP_DROP_MEMBERSHIP:
1275 error = ip_getmoptions(sopt, inp->inp_moptions);
1276 break;
1277
1278 #if IPSEC
1279 case IP_IPSEC_POLICY:
1280 {
1281 struct mbuf *m = NULL;
1282 size_t len = 0;
1283 caddr_t req = NULL;
1284
1285 if (error = sooptgetm(sopt, &m)) /* XXX */
1286 break;
1287 if (error = sooptmcopyin(sopt, m)) /* XXX */
1288 break;
1289 if (m) {
1290 req = mtod(m, caddr_t);
1291 len = m->m_len;
1292 }
1293
1294 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1295 if (error == 0)
1296 error = sooptmcopyout(sopt, m); /* XXX */
1297
1298 /* if error, m_freem called at soopt_mcopyout(). */
1299 if (error == 0)
1300 m_freem(m);
1301 break;
1302 }
1303 #endif /*IPSEC*/
1304
1305 default:
1306 error = ENOPROTOOPT;
1307 break;
1308 }
1309 break;
1310 }
1311 return (error);
1312 }
1313
1314 /*
1315 * Set up IP options in pcb for insertion in output packets.
1316 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1317 * with destination address if source routed.
1318 */
1319 static int
1320 ip_pcbopts(optname, pcbopt, m)
1321 int optname;
1322 struct mbuf **pcbopt;
1323 register struct mbuf *m;
1324 {
1325 register int cnt, optlen;
1326 register u_char *cp;
1327 u_char opt;
1328
1329 /* turn off any old options */
1330 if (*pcbopt)
1331 (void)m_free(*pcbopt);
1332 *pcbopt = 0;
1333 if (m == (struct mbuf *)0 || m->m_len == 0) {
1334 /*
1335 * Only turning off any previous options.
1336 */
1337 if (m)
1338 (void)m_free(m);
1339 return (0);
1340 }
1341
1342 #ifndef vax
1343 if (m->m_len % sizeof(int32_t))
1344 goto bad;
1345 #endif
1346 /*
1347 * IP first-hop destination address will be stored before
1348 * actual options; move other options back
1349 * and clear it when none present.
1350 */
1351 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1352 goto bad;
1353 cnt = m->m_len;
1354 m->m_len += sizeof(struct in_addr);
1355 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1356 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1357 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1358
1359 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1360 opt = cp[IPOPT_OPTVAL];
1361 if (opt == IPOPT_EOL)
1362 break;
1363 if (opt == IPOPT_NOP)
1364 optlen = 1;
1365 else {
1366 if (cnt < IPOPT_OLEN + sizeof(*cp))
1367 goto bad;
1368 optlen = cp[IPOPT_OLEN];
1369 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1370 goto bad;
1371 }
1372 switch (opt) {
1373
1374 default:
1375 break;
1376
1377 case IPOPT_LSRR:
1378 case IPOPT_SSRR:
1379 /*
1380 * user process specifies route as:
1381 * ->A->B->C->D
1382 * D must be our final destination (but we can't
1383 * check that since we may not have connected yet).
1384 * A is first hop destination, which doesn't appear in
1385 * actual IP option, but is stored before the options.
1386 */
1387 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1388 goto bad;
1389 m->m_len -= sizeof(struct in_addr);
1390 cnt -= sizeof(struct in_addr);
1391 optlen -= sizeof(struct in_addr);
1392 cp[IPOPT_OLEN] = optlen;
1393 /*
1394 * Move first hop before start of options.
1395 */
1396 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1397 sizeof(struct in_addr));
1398 /*
1399 * Then copy rest of options back
1400 * to close up the deleted entry.
1401 */
1402 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1403 sizeof(struct in_addr)),
1404 (caddr_t)&cp[IPOPT_OFFSET+1],
1405 (unsigned)cnt + sizeof(struct in_addr));
1406 break;
1407 }
1408 }
1409 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1410 goto bad;
1411 *pcbopt = m;
1412 return (0);
1413
1414 bad:
1415 (void)m_free(m);
1416 return (EINVAL);
1417 }
1418
1419 /*
1420 * XXX
1421 * The whole multicast option thing needs to be re-thought.
1422 * Several of these options are equally applicable to non-multicast
1423 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1424 * standard option (IP_TTL).
1425 */
1426 /*
1427 * Set the IP multicast options in response to user setsockopt().
1428 */
1429 static int
1430 ip_setmoptions(sopt, imop)
1431 struct sockopt *sopt;
1432 struct ip_moptions **imop;
1433 {
1434 int error = 0;
1435 int i;
1436 struct in_addr addr;
1437 struct ip_mreq mreq;
1438 struct ifnet *ifp;
1439 struct ip_moptions *imo = *imop;
1440 struct route ro;
1441 struct sockaddr_in *dst;
1442 int s;
1443
1444 if (imo == NULL) {
1445 /*
1446 * No multicast option buffer attached to the pcb;
1447 * allocate one and initialize to default values.
1448 */
1449 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
1450 M_WAITOK);
1451
1452 if (imo == NULL)
1453 return (ENOBUFS);
1454 *imop = imo;
1455 imo->imo_multicast_ifp = NULL;
1456 imo->imo_multicast_vif = -1;
1457 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1458 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1459 imo->imo_num_memberships = 0;
1460 }
1461
1462 switch (sopt->sopt_name) {
1463 /* store an index number for the vif you wanna use in the send */
1464 case IP_MULTICAST_VIF:
1465 if (legal_vif_num == 0) {
1466 error = EOPNOTSUPP;
1467 break;
1468 }
1469 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1470 if (error)
1471 break;
1472 if (!legal_vif_num(i) && (i != -1)) {
1473 error = EINVAL;
1474 break;
1475 }
1476 imo->imo_multicast_vif = i;
1477 break;
1478
1479 case IP_MULTICAST_IF:
1480 /*
1481 * Select the interface for outgoing multicast packets.
1482 */
1483 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1484 if (error)
1485 break;
1486 /*
1487 * INADDR_ANY is used to remove a previous selection.
1488 * When no interface is selected, a default one is
1489 * chosen every time a multicast packet is sent.
1490 */
1491 if (addr.s_addr == INADDR_ANY) {
1492 imo->imo_multicast_ifp = NULL;
1493 break;
1494 }
1495 /*
1496 * The selected interface is identified by its local
1497 * IP address. Find the interface and confirm that
1498 * it supports multicasting.
1499 */
1500 s = splimp();
1501 INADDR_TO_IFP(addr, ifp);
1502 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1503 splx(s);
1504 error = EADDRNOTAVAIL;
1505 break;
1506 }
1507 imo->imo_multicast_ifp = ifp;
1508 splx(s);
1509 break;
1510
1511 case IP_MULTICAST_TTL:
1512 /*
1513 * Set the IP time-to-live for outgoing multicast packets.
1514 * The original multicast API required a char argument,
1515 * which is inconsistent with the rest of the socket API.
1516 * We allow either a char or an int.
1517 */
1518 if (sopt->sopt_valsize == 1) {
1519 u_char ttl;
1520 error = sooptcopyin(sopt, &ttl, 1, 1);
1521 if (error)
1522 break;
1523 imo->imo_multicast_ttl = ttl;
1524 } else {
1525 u_int ttl;
1526 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1527 sizeof ttl);
1528 if (error)
1529 break;
1530 if (ttl > 255)
1531 error = EINVAL;
1532 else
1533 imo->imo_multicast_ttl = ttl;
1534 }
1535 break;
1536
1537 case IP_MULTICAST_LOOP:
1538 /*
1539 * Set the loopback flag for outgoing multicast packets.
1540 * Must be zero or one. The original multicast API required a
1541 * char argument, which is inconsistent with the rest
1542 * of the socket API. We allow either a char or an int.
1543 */
1544 if (sopt->sopt_valsize == 1) {
1545 u_char loop;
1546 error = sooptcopyin(sopt, &loop, 1, 1);
1547 if (error)
1548 break;
1549 imo->imo_multicast_loop = !!loop;
1550 } else {
1551 u_int loop;
1552 error = sooptcopyin(sopt, &loop, sizeof loop,
1553 sizeof loop);
1554 if (error)
1555 break;
1556 imo->imo_multicast_loop = !!loop;
1557 }
1558 break;
1559
1560 case IP_ADD_MEMBERSHIP:
1561 /*
1562 * Add a multicast group membership.
1563 * Group must be a valid IP multicast address.
1564 */
1565 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1566 if (error)
1567 break;
1568
1569 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1570 error = EINVAL;
1571 break;
1572 }
1573 s = splimp();
1574 /*
1575 * If no interface address was provided, use the interface of
1576 * the route to the given multicast address.
1577 */
1578 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1579 bzero((caddr_t)&ro, sizeof(ro));
1580 dst = (struct sockaddr_in *)&ro.ro_dst;
1581 dst->sin_len = sizeof(*dst);
1582 dst->sin_family = AF_INET;
1583 dst->sin_addr = mreq.imr_multiaddr;
1584 rtalloc(&ro);
1585 if (ro.ro_rt == NULL) {
1586 error = EADDRNOTAVAIL;
1587 splx(s);
1588 break;
1589 }
1590 ifp = ro.ro_rt->rt_ifp;
1591 rtfree(ro.ro_rt);
1592 }
1593 else {
1594 INADDR_TO_IFP(mreq.imr_interface, ifp);
1595 }
1596
1597 /*
1598 * See if we found an interface, and confirm that it
1599 * supports multicast.
1600 */
1601 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1602 error = EADDRNOTAVAIL;
1603 splx(s);
1604 break;
1605 }
1606 /*
1607 * See if the membership already exists or if all the
1608 * membership slots are full.
1609 */
1610 for (i = 0; i < imo->imo_num_memberships; ++i) {
1611 if (imo->imo_membership[i]->inm_ifp == ifp &&
1612 imo->imo_membership[i]->inm_addr.s_addr
1613 == mreq.imr_multiaddr.s_addr)
1614 break;
1615 }
1616 if (i < imo->imo_num_memberships) {
1617 error = EADDRINUSE;
1618 splx(s);
1619 break;
1620 }
1621 if (i == IP_MAX_MEMBERSHIPS) {
1622 error = ETOOMANYREFS;
1623 splx(s);
1624 break;
1625 }
1626 /*
1627 * Everything looks good; add a new record to the multicast
1628 * address list for the given interface.
1629 */
1630 if ((imo->imo_membership[i] =
1631 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1632 error = ENOBUFS;
1633 splx(s);
1634 break;
1635 }
1636 ++imo->imo_num_memberships;
1637 splx(s);
1638 break;
1639
1640 case IP_DROP_MEMBERSHIP:
1641 /*
1642 * Drop a multicast group membership.
1643 * Group must be a valid IP multicast address.
1644 */
1645 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1646 if (error)
1647 break;
1648
1649 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1650 error = EINVAL;
1651 break;
1652 }
1653
1654 s = splimp();
1655 /*
1656 * If an interface address was specified, get a pointer
1657 * to its ifnet structure.
1658 */
1659 if (mreq.imr_interface.s_addr == INADDR_ANY)
1660 ifp = NULL;
1661 else {
1662 INADDR_TO_IFP(mreq.imr_interface, ifp);
1663 if (ifp == NULL) {
1664 error = EADDRNOTAVAIL;
1665 splx(s);
1666 break;
1667 }
1668 }
1669 /*
1670 * Find the membership in the membership array.
1671 */
1672 for (i = 0; i < imo->imo_num_memberships; ++i) {
1673 if ((ifp == NULL ||
1674 imo->imo_membership[i]->inm_ifp == ifp) &&
1675 imo->imo_membership[i]->inm_addr.s_addr ==
1676 mreq.imr_multiaddr.s_addr)
1677 break;
1678 }
1679 if (i == imo->imo_num_memberships) {
1680 error = EADDRNOTAVAIL;
1681 splx(s);
1682 break;
1683 }
1684 /*
1685 * Give up the multicast address record to which the
1686 * membership points.
1687 */
1688 in_delmulti(imo->imo_membership[i]);
1689 /*
1690 * Remove the gap in the membership array.
1691 */
1692 for (++i; i < imo->imo_num_memberships; ++i)
1693 imo->imo_membership[i-1] = imo->imo_membership[i];
1694 --imo->imo_num_memberships;
1695 splx(s);
1696 break;
1697
1698 default:
1699 error = EOPNOTSUPP;
1700 break;
1701 }
1702
1703 /*
1704 * If all options have default values, no need to keep the mbuf.
1705 */
1706 if (imo->imo_multicast_ifp == NULL &&
1707 imo->imo_multicast_vif == -1 &&
1708 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1709 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1710 imo->imo_num_memberships == 0) {
1711 FREE(*imop, M_IPMOPTS);
1712 *imop = NULL;
1713 }
1714
1715 return (error);
1716 }
1717
1718 /*
1719 * Return the IP multicast options in response to user getsockopt().
1720 */
1721 static int
1722 ip_getmoptions(sopt, imo)
1723 struct sockopt *sopt;
1724 register struct ip_moptions *imo;
1725 {
1726 struct in_addr addr;
1727 struct in_ifaddr *ia;
1728 int error, optval;
1729 u_char coptval;
1730
1731 error = 0;
1732 switch (sopt->sopt_name) {
1733 case IP_MULTICAST_VIF:
1734 if (imo != NULL)
1735 optval = imo->imo_multicast_vif;
1736 else
1737 optval = -1;
1738 error = sooptcopyout(sopt, &optval, sizeof optval);
1739 break;
1740
1741 case IP_MULTICAST_IF:
1742 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1743 addr.s_addr = INADDR_ANY;
1744 else {
1745 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1746 addr.s_addr = (ia == NULL) ? INADDR_ANY
1747 : IA_SIN(ia)->sin_addr.s_addr;
1748 }
1749 error = sooptcopyout(sopt, &addr, sizeof addr);
1750 break;
1751
1752 case IP_MULTICAST_TTL:
1753 if (imo == 0)
1754 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1755 else
1756 optval = coptval = imo->imo_multicast_ttl;
1757 if (sopt->sopt_valsize == 1)
1758 error = sooptcopyout(sopt, &coptval, 1);
1759 else
1760 error = sooptcopyout(sopt, &optval, sizeof optval);
1761 break;
1762
1763 case IP_MULTICAST_LOOP:
1764 if (imo == 0)
1765 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1766 else
1767 optval = coptval = imo->imo_multicast_loop;
1768 if (sopt->sopt_valsize == 1)
1769 error = sooptcopyout(sopt, &coptval, 1);
1770 else
1771 error = sooptcopyout(sopt, &optval, sizeof optval);
1772 break;
1773
1774 default:
1775 error = ENOPROTOOPT;
1776 break;
1777 }
1778 return (error);
1779 }
1780
1781 /*
1782 * Discard the IP multicast options.
1783 */
1784 void
1785 ip_freemoptions(imo)
1786 register struct ip_moptions *imo;
1787 {
1788 register int i;
1789
1790 if (imo != NULL) {
1791 for (i = 0; i < imo->imo_num_memberships; ++i)
1792 in_delmulti(imo->imo_membership[i]);
1793 FREE(imo, M_IPMOPTS);
1794 }
1795 }
1796
1797 /*
1798 * Routine called from ip_output() to loop back a copy of an IP multicast
1799 * packet to the input queue of a specified interface. Note that this
1800 * calls the output routine of the loopback "driver", but with an interface
1801 * pointer that might NOT be a loopback interface -- evil, but easier than
1802 * replicating that code here.
1803 */
1804 static void
1805 ip_mloopback(ifp, m, dst, hlen)
1806 struct ifnet *ifp;
1807 register struct mbuf *m;
1808 register struct sockaddr_in *dst;
1809 int hlen;
1810 {
1811 register struct ip *ip;
1812 struct mbuf *copym;
1813
1814 copym = m_copy(m, 0, M_COPYALL);
1815 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1816 copym = m_pullup(copym, hlen);
1817 if (copym != NULL) {
1818 /*
1819 * We don't bother to fragment if the IP length is greater
1820 * than the interface's MTU. Can this possibly matter?
1821 */
1822 ip = mtod(copym, struct ip *);
1823 ip->ip_len = htons((u_short)ip->ip_len);
1824 ip->ip_off = htons((u_short)ip->ip_off);
1825 ip->ip_sum = 0;
1826 ip->ip_sum = in_cksum(copym, hlen);
1827
1828 /*
1829 * NB:
1830 * It's not clear whether there are any lingering
1831 * reentrancy problems in other areas which might
1832 * be exposed by using ip_input directly (in
1833 * particular, everything which modifies the packet
1834 * in-place). Yet another option is using the
1835 * protosw directly to deliver the looped back
1836 * packet. For the moment, we'll err on the side
1837 * of safety by using if_simloop().
1838 */
1839 #if 1 /* XXX */
1840 if (dst->sin_family != AF_INET) {
1841 printf("ip_mloopback: bad address family %d\n",
1842 dst->sin_family);
1843 dst->sin_family = AF_INET;
1844 }
1845 #endif
1846
1847 /*
1848 * TedW:
1849 * We need to send all loopback traffic down to dlil in case
1850 * a filter has tapped-in.
1851 */
1852
1853 if (lo_dl_tag == 0)
1854 dlil_find_dltag(APPLE_IF_FAM_LOOPBACK, 0, PF_INET, &lo_dl_tag);
1855
1856 /*
1857 * Stuff the 'real' ifp into the pkthdr, to be used in matching
1858 * in ip_input(); we need the loopback ifp/dl_tag passed as args
1859 * to make the loopback driver compliant with the data link
1860 * requirements.
1861 */
1862 if (lo_dl_tag)
1863 { copym->m_pkthdr.rcvif = ifp;
1864 dlil_output(lo_dl_tag, copym, 0, (struct sockaddr *) dst, 0);
1865 } else {
1866 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
1867 m_freem(copym);
1868 }
1869
1870 /* if_simloop(ifp, copym, (struct sockaddr *)dst, 0);*/
1871 }
1872 }