]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
xnu-201.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * Copyright (c) 1982, 1986, 1988, 1990, 1993
24 * The Regents of the University of California. All rights reserved.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 * 3. All advertising materials mentioning features or use of this software
35 * must display the following acknowledgement:
36 * This product includes software developed by the University of
37 * California, Berkeley and its contributors.
38 * 4. Neither the name of the University nor the names of its contributors
39 * may be used to endorse or promote products derived from this software
40 * without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
55 */
56
57 #define _IP_VHL
58
59 #if ISFB31
60 #include "opt_ipfw.h"
61 #include "opt_ipdn.h"
62 #include "opt_ipdivert.h"
63 #include "opt_ipfilter.h"
64 #endif
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/kernel.h>
69 #include <sys/malloc.h>
70 #include <sys/mbuf.h>
71 #include <sys/protosw.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74
75 #include <net/if.h>
76 #include <net/route.h>
77
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/ip.h>
81 #if INET6
82 #include <netinet/ip6.h>
83 #include <netinet6/ip6_var.h>
84 #endif
85 #include <netinet/in_pcb.h>
86 #include <netinet/in_var.h>
87 #include <netinet/ip_var.h>
88 #include <net/dlil.h>
89
90 #include <sys/kdebug.h>
91
92 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
93 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
94 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
95
96
97 #ifdef vax
98 #include <machine/mtpr.h>
99 #endif
100
101 #if ISFB31
102 #include <machine/in_cksum.h>
103
104 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
105 #endif
106
107 //static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
108
109 #if IPSEC
110 #include <netinet6/ipsec.h>
111 #include <netkey/key.h>
112 #include <netkey/key_debug.h>
113
114 #endif /*IPSEC*/
115
116 #if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
117 #undef COMPAT_IPFW
118 #define COMPAT_IPFW 1
119 #else
120 #undef COMPAT_IPFW
121 #endif
122
123 #if COMPAT_IPFW
124 #include <netinet/ip_fw.h>
125 #endif
126
127 #if DUMMYNET
128 #include <netinet/ip_dummynet.h>
129 #endif
130
131 #if IPFIREWALL_FORWARD_DEBUG
132 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
133 (ntohl(a.s_addr)>>16)&0xFF,\
134 (ntohl(a.s_addr)>>8)&0xFF,\
135 (ntohl(a.s_addr))&0xFF);
136 #endif
137
138 u_short ip_id;
139
140 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
141 static void ip_mloopback
142 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
143 static int ip_getmoptions
144 __P((struct sockopt *, struct ip_moptions *));
145 static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
146 static int ip_setmoptions
147 __P((struct sockopt *, struct ip_moptions **));
148 static u_long lo_dl_tag = 0;
149 static int ip_optcopy __P((struct ip *, struct ip *));
150
151 void in_delayed_cksum(struct mbuf *m);
152 extern int apple_hwcksum_tx;
153
154 extern struct protosw inetsw[];
155
156 /*
157 * IP output. The packet in mbuf chain m contains a skeletal IP
158 * header (with len, off, ttl, proto, tos, src, dst).
159 * The mbuf chain containing the packet will be freed.
160 * The mbuf opt, if present, will not be freed.
161 */
162 int
163 ip_output(m0, opt, ro, flags, imo)
164 struct mbuf *m0;
165 struct mbuf *opt;
166 struct route *ro;
167 int flags;
168 struct ip_moptions *imo;
169 {
170 struct ip *ip, *mhip;
171 struct ifnet *ifp;
172 u_long dl_tag;
173 struct mbuf *m = m0;
174 int hlen = sizeof (struct ip);
175 int len, off, error = 0;
176 struct sockaddr_in *dst;
177 struct in_ifaddr *ia;
178 int isbroadcast, sw_csum;
179 #if IPSEC
180 struct route iproute;
181 struct socket *so;
182 struct secpolicy *sp = NULL;
183 #endif
184 #if IPFIREWALL_FORWARD
185 int fwd_rewrite_src = 0;
186 #endif
187
188
189 #if !IPDIVERT /* dummy variable for the firewall code to play with */
190 u_short ip_divert_cookie = 0 ;
191 #endif
192 #if COMPAT_IPFW
193 struct ip_fw_chain *rule = NULL ;
194 #endif
195
196 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
197
198 #if IPSEC
199 /*
200 * NOTE: m->m_pkthdr is NULL cleared below just to prevent ipfw code
201 * from SEGV.
202 * ipfw code uses rcvif to determine incoming interface, and
203 * KAME uses rcvif for ipsec processing.
204 * ipfw may not be working right with KAME at this moment.
205 * We need more tests.
206 */
207 #if DUMMYNET
208 if (m->m_type == MT_DUMMYNET) {
209 if (m->m_next != NULL) {
210 so = (struct socket *)m->m_next->m_pkthdr.rcvif;
211 m->m_next->m_pkthdr.rcvif = NULL;
212 } else
213 so = NULL;
214 } else
215 #endif
216 {
217 so = ipsec_getsocket(m);
218 ipsec_setsocket(m, NULL);
219 }
220 #endif /*IPSEC*/
221
222
223 #if IPFIREWALL && DUMMYNET
224 /*
225 * dummynet packet are prepended a vestigial mbuf with
226 * m_type = MT_DUMMYNET and m_data pointing to the matching
227 * rule.
228 */
229 if (m->m_type == MT_DUMMYNET) {
230 struct mbuf *tmp_m = m ;
231 /*
232 * the packet was already tagged, so part of the
233 * processing was already done, and we need to go down.
234 * opt, flags and imo have already been used, and now
235 * they are used to hold ifp and hlen and NULL, respectively.
236 */
237 rule = (struct ip_fw_chain *)(m->m_data) ;
238 m = m->m_next ;
239 FREE(tmp_m, M_IPFW);
240 ip = mtod(m, struct ip *);
241 dst = (struct sockaddr_in *)&ro->ro_dst;
242 ifp = (struct ifnet *)opt;
243 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
244 opt = NULL ;
245 flags = 0 ; /* XXX is this correct ? */
246 goto sendit;
247 } else
248 rule = NULL ;
249 #endif
250
251 #if DIAGNOSTIC
252 if ((m->m_flags & M_PKTHDR) == 0)
253 panic("ip_output no HDR");
254 if (!ro)
255 panic("ip_output no route, proto = %d",
256 mtod(m, struct ip *)->ip_p);
257 #endif
258 if (opt) {
259 m = ip_insertoptions(m, opt, &len);
260 hlen = len;
261 }
262 ip = mtod(m, struct ip *);
263 /*
264 * Fill in IP header.
265 */
266 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
267 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
268 ip->ip_off &= IP_DF;
269 ip->ip_id = htons(ip_id++);
270 ipstat.ips_localout++;
271 } else {
272 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
273 }
274
275 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
276 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
277
278 dst = (struct sockaddr_in *)&ro->ro_dst;
279 /*
280 * If there is a cached route,
281 * check that it is to the same destination
282 * and is still up. If not, free it and try again.
283 */
284 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
285 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
286 RTFREE(ro->ro_rt);
287 ro->ro_rt = (struct rtentry *)0;
288 }
289 if (ro->ro_rt == 0) {
290 dst->sin_family = AF_INET;
291 dst->sin_len = sizeof(*dst);
292 dst->sin_addr = ip->ip_dst;
293 }
294 /*
295 * If routing to interface only,
296 * short circuit routing lookup.
297 */
298 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
299 #define sintosa(sin) ((struct sockaddr *)(sin))
300 if (flags & IP_ROUTETOIF) {
301 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
302 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
303 ipstat.ips_noroute++;
304 error = ENETUNREACH;
305 goto bad;
306 }
307 ifp = ia->ia_ifp;
308 dl_tag = ia->ia_ifa.ifa_dlt;
309 ip->ip_ttl = 1;
310 isbroadcast = in_broadcast(dst->sin_addr, ifp);
311 } else {
312 /*
313 * If this is the case, we probably don't want to allocate
314 * a protocol-cloned route since we didn't get one from the
315 * ULP. This lets TCP do its thing, while not burdening
316 * forwarding or ICMP with the overhead of cloning a route.
317 * Of course, we still want to do any cloning requested by
318 * the link layer, as this is probably required in all cases
319 * for correct operation (as it is for ARP).
320 */
321 if (ro->ro_rt == 0)
322 rtalloc_ign(ro, RTF_PRCLONING);
323 if (ro->ro_rt == 0) {
324 ipstat.ips_noroute++;
325 error = EHOSTUNREACH;
326 goto bad;
327 }
328 ia = ifatoia(ro->ro_rt->rt_ifa);
329 ifp = ro->ro_rt->rt_ifp;
330 dl_tag = ro->ro_rt->rt_dlt;
331 ro->ro_rt->rt_use++;
332 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
333 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
334 if (ro->ro_rt->rt_flags & RTF_HOST)
335 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
336 else
337 isbroadcast = in_broadcast(dst->sin_addr, ifp);
338 }
339 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
340 struct in_multi *inm;
341
342 m->m_flags |= M_MCAST;
343 /*
344 * IP destination address is multicast. Make sure "dst"
345 * still points to the address in "ro". (It may have been
346 * changed to point to a gateway address, above.)
347 */
348 dst = (struct sockaddr_in *)&ro->ro_dst;
349 /*
350 * See if the caller provided any multicast options
351 */
352 if (imo != NULL) {
353 ip->ip_ttl = imo->imo_multicast_ttl;
354 if (imo->imo_multicast_ifp != NULL)
355 ifp = imo->imo_multicast_ifp;
356 if (imo->imo_multicast_vif != -1)
357 ip->ip_src.s_addr =
358 ip_mcast_src(imo->imo_multicast_vif);
359 } else
360 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
361 /*
362 * Confirm that the outgoing interface supports multicast.
363 */
364 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
365 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
366 ipstat.ips_noroute++;
367 error = ENETUNREACH;
368 goto bad;
369 }
370 }
371 /*
372 * If source address not specified yet, use address
373 * of outgoing interface.
374 */
375 if (ip->ip_src.s_addr == INADDR_ANY) {
376 register struct in_ifaddr *ia1;
377
378 for (ia1 = in_ifaddrhead.tqh_first; ia1;
379 ia1 = ia1->ia_link.tqe_next)
380 if (ia1->ia_ifp == ifp) {
381 ip->ip_src = IA_SIN(ia1)->sin_addr;
382 break;
383 }
384 }
385
386 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
387 if (inm != NULL &&
388 (imo == NULL || imo->imo_multicast_loop)) {
389 /*
390 * If we belong to the destination multicast group
391 * on the outgoing interface, and the caller did not
392 * forbid loopback, loop back a copy.
393 */
394 ip_mloopback(ifp, m, dst, hlen);
395 }
396 else {
397 /*
398 * If we are acting as a multicast router, perform
399 * multicast forwarding as if the packet had just
400 * arrived on the interface to which we are about
401 * to send. The multicast forwarding function
402 * recursively calls this function, using the
403 * IP_FORWARDING flag to prevent infinite recursion.
404 *
405 * Multicasts that are looped back by ip_mloopback(),
406 * above, will be forwarded by the ip_input() routine,
407 * if necessary.
408 */
409 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
410 /*
411 * Check if rsvp daemon is running. If not, don't
412 * set ip_moptions. This ensures that the packet
413 * is multicast and not just sent down one link
414 * as prescribed by rsvpd.
415 */
416 if (!rsvp_on)
417 imo = NULL;
418 if (ip_mforward(ip, ifp, m, imo) != 0) {
419 m_freem(m);
420 goto done;
421 }
422 }
423 }
424
425 /*
426 * Multicasts with a time-to-live of zero may be looped-
427 * back, above, but must not be transmitted on a network.
428 * Also, multicasts addressed to the loopback interface
429 * are not sent -- the above call to ip_mloopback() will
430 * loop back a copy if this host actually belongs to the
431 * destination group on the loopback interface.
432 */
433 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
434 m_freem(m);
435 goto done;
436 }
437
438 goto sendit;
439 }
440 #ifndef notdef
441 /*
442 * If source address not specified yet, use address
443 * of outgoing interface.
444 */
445 if (ip->ip_src.s_addr == INADDR_ANY) {
446 ip->ip_src = IA_SIN(ia)->sin_addr;
447 #if IPFIREWALL_FORWARD
448 /* Keep note that we did this - if the firewall changes
449 * the next-hop, our interface may change, changing the
450 * default source IP. It's a shame so much effort happens
451 * twice. Oh well.
452 */
453 fwd_rewrite_src++;
454 #endif /* IPFIREWALL_FORWARD */
455 }
456 #endif /* notdef */
457 /*
458 * Verify that we have any chance at all of being able to queue
459 * the packet or packet fragments
460 */
461 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
462 ifp->if_snd.ifq_maxlen) {
463 error = ENOBUFS;
464 goto bad;
465 }
466
467 /*
468 * Look for broadcast address and
469 * and verify user is allowed to send
470 * such a packet.
471 */
472 if (isbroadcast) {
473 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
474 error = EADDRNOTAVAIL;
475 goto bad;
476 }
477 if ((flags & IP_ALLOWBROADCAST) == 0) {
478 error = EACCES;
479 goto bad;
480 }
481 /* don't allow broadcast messages to be fragmented */
482 if ((u_short)ip->ip_len > ifp->if_mtu) {
483 error = EMSGSIZE;
484 goto bad;
485 }
486 m->m_flags |= M_BCAST;
487 } else {
488 m->m_flags &= ~M_BCAST;
489 }
490
491 sendit:
492 /*
493 * IpHack's section.
494 * - Xlate: translate packet's addr/port (NAT).
495 * - Firewall: deny/allow/etc.
496 * - Wrap: fake packet's addr/port <unimpl.>
497 * - Encapsulate: put it in another IP and send out. <unimp.>
498 */
499 #if COMPAT_IPFW
500 if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, ifp, IP_NAT_OUT)) {
501 error = EACCES;
502 goto done;
503 }
504
505 /*
506 * Check with the firewall...
507 */
508 if (ip_fw_chk_ptr) {
509 struct sockaddr_in *old = dst;
510
511 off = (*ip_fw_chk_ptr)(&ip,
512 hlen, ifp, &ip_divert_cookie, &m, &rule, &dst);
513 /*
514 * On return we must do the following:
515 * m == NULL -> drop the pkt
516 * 1<=off<= 0xffff -> DIVERT
517 * (off & 0x10000) -> send to a DUMMYNET pipe
518 * dst != old -> IPFIREWALL_FORWARD
519 * off==0, dst==old -> accept
520 * If some of the above modules is not compiled in, then
521 * we should't have to check the corresponding condition
522 * (because the ipfw control socket should not accept
523 * unsupported rules), but better play safe and drop
524 * packets in case of doubt.
525 */
526 if (!m) { /* firewall said to reject */
527 error = EACCES;
528 goto done;
529 }
530 if (off == 0 && dst == old) /* common case */
531 goto pass ;
532 #if DUMMYNET
533 if (off & 0x10000) {
534 /*
535 * pass the pkt to dummynet. Need to include
536 * pipe number, m, ifp, ro, hlen because these are
537 * not recomputed in the next pass.
538 * All other parameters have been already used and
539 * so they are not needed anymore.
540 * XXX note: if the ifp or ro entry are deleted
541 * while a pkt is in dummynet, we are in trouble!
542 */
543 dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,ifp,ro,hlen,rule);
544 goto done;
545 }
546 #endif
547 #if IPDIVERT
548 if (off > 0 && off < 0x10000) { /* Divert packet */
549
550 /*
551 * delayed checksums are not currently compatible
552 * with divert sockets.
553 */
554 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
555 in_delayed_cksum(m);
556 if (m == NULL)
557 return(ENOMEM);
558 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
559 }
560
561 /* Restore packet header fields to original values */
562 ip->ip_len = htons((u_short)ip->ip_len);
563 ip->ip_off = htons((u_short)ip->ip_off);
564
565 ip_divert_port = off & 0xffff ;
566 (*ip_protox[IPPROTO_DIVERT]->pr_input)(m, 0);
567 goto done;
568 }
569 #endif
570
571 #if IPFIREWALL_FORWARD
572 /* Here we check dst to make sure it's directly reachable on the
573 * interface we previously thought it was.
574 * If it isn't (which may be likely in some situations) we have
575 * to re-route it (ie, find a route for the next-hop and the
576 * associated interface) and set them here. This is nested
577 * forwarding which in most cases is undesirable, except where
578 * such control is nigh impossible. So we do it here.
579 * And I'm babbling.
580 */
581 if (off == 0 && old != dst) {
582 struct in_ifaddr *ia;
583
584 /* It's changed... */
585 /* There must be a better way to do this next line... */
586 static struct route sro_fwd, *ro_fwd = &sro_fwd;
587 #if IPFIREWALL_FORWARD_DEBUG
588 printf("IPFIREWALL_FORWARD: New dst ip: ");
589 print_ip(dst->sin_addr);
590 printf("\n");
591 #endif
592 /*
593 * We need to figure out if we have been forwarded
594 * to a local socket. If so then we should somehow
595 * "loop back" to ip_input, and get directed to the
596 * PCB as if we had received this packet. This is
597 * because it may be dificult to identify the packets
598 * you want to forward until they are being output
599 * and have selected an interface. (e.g. locally
600 * initiated packets) If we used the loopback inteface,
601 * we would not be able to control what happens
602 * as the packet runs through ip_input() as
603 * it is done through a ISR.
604 */
605 for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
606 ia = TAILQ_NEXT(ia, ia_link)) {
607 /*
608 * If the addr to forward to is one
609 * of ours, we pretend to
610 * be the destination for this packet.
611 */
612 if (IA_SIN(ia)->sin_addr.s_addr ==
613 dst->sin_addr.s_addr)
614 break;
615 }
616 if (ia) {
617 /* tell ip_input "dont filter" */
618 ip_fw_fwd_addr = dst;
619 if (m->m_pkthdr.rcvif == NULL)
620 m->m_pkthdr.rcvif = ifunit("lo0");
621
622 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
623 m->m_pkthdr.csum_flags |=
624 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
625 m0->m_pkthdr.csum_data = 0xffff;
626 }
627 m->m_pkthdr.csum_flags |=
628 CSUM_IP_CHECKED | CSUM_IP_VALID;
629 ip->ip_len = htons((u_short)ip->ip_len);
630 ip->ip_off = htons((u_short)ip->ip_off);
631
632
633 ip_input(m);
634 goto done;
635 }
636 /* Some of the logic for this was
637 * nicked from above.
638 *
639 * This rewrites the cached route in a local PCB.
640 * Is this what we want to do?
641 */
642 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
643
644 ro_fwd->ro_rt = 0;
645 rtalloc_ign(ro_fwd, RTF_PRCLONING);
646
647 if (ro_fwd->ro_rt == 0) {
648 ipstat.ips_noroute++;
649 error = EHOSTUNREACH;
650 goto bad;
651 }
652
653 ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
654 ifp = ro_fwd->ro_rt->rt_ifp;
655 dl_tag = ro->ro_rt->rt_dlt;
656 ro_fwd->ro_rt->rt_use++;
657 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
658 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
659 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
660 isbroadcast =
661 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
662 else
663 isbroadcast = in_broadcast(dst->sin_addr, ifp);
664 RTFREE(ro->ro_rt);
665 ro->ro_rt = ro_fwd->ro_rt;
666 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
667
668 /*
669 * If we added a default src ip earlier,
670 * which would have been gotten from the-then
671 * interface, do it again, from the new one.
672 */
673 if (fwd_rewrite_src)
674 ip->ip_src = IA_SIN(ia)->sin_addr;
675 goto pass ;
676 }
677 #endif /* IPFIREWALL_FORWARD */
678 /*
679 * if we get here, none of the above matches, and
680 * we have to drop the pkt
681 */
682 m_freem(m);
683 error = EACCES; /* not sure this is the right error msg */
684 goto done;
685 }
686 #endif /* COMPAT_IPFW */
687
688 pass:
689
690 #if defined(PM)
691 /*
692 * Processing IP filter/NAT.
693 * Return TRUE iff this packet is discarded.
694 * Return FALSE iff this packet is accepted.
695 */
696
697 if (doNatFil && pm_out(ro->ro_rt->rt_ifp, ip, m))
698 goto done;
699 #endif
700
701 #if IPSEC
702 /* get SP for this packet */
703 if (so == NULL)
704 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
705 else
706 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
707
708 if (sp == NULL) {
709 ipsecstat.out_inval++;
710 goto bad;
711 }
712
713 error = 0;
714
715 /* check policy */
716 switch (sp->policy) {
717 case IPSEC_POLICY_DISCARD:
718 /*
719 * This packet is just discarded.
720 */
721 ipsecstat.out_polvio++;
722 goto bad;
723
724 case IPSEC_POLICY_BYPASS:
725 case IPSEC_POLICY_NONE:
726 /* no need to do IPsec. */
727 goto skip_ipsec;
728
729 case IPSEC_POLICY_IPSEC:
730 if (sp->req == NULL) {
731 /* XXX should be panic ? */
732 printf("ip_output: No IPsec request specified.\n");
733 error = EINVAL;
734 goto bad;
735 }
736 break;
737
738 case IPSEC_POLICY_ENTRUST:
739 default:
740 printf("ip_output: Invalid policy found. %d\n", sp->policy);
741 }
742
743
744 {
745 struct ipsec_output_state state;
746 bzero(&state, sizeof(state));
747 state.m = m;
748 if (flags & IP_ROUTETOIF) {
749 state.ro = &iproute;
750 bzero(&iproute, sizeof(iproute));
751 } else
752 state.ro = ro;
753 state.dst = (struct sockaddr *)dst;
754
755 ip->ip_sum = 0;
756
757 /*
758 * delayed checksums are not currently compatible with IPsec
759 */
760 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
761 in_delayed_cksum(m);
762 if (m == NULL)
763 return(ENOMEM);
764 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
765 }
766
767 ip->ip_len = htons((u_short)ip->ip_len);
768 ip->ip_off = htons((u_short)ip->ip_off);
769
770 error = ipsec4_output(&state, sp, flags);
771
772 m = state.m;
773 if (flags & IP_ROUTETOIF) {
774 /*
775 * if we have tunnel mode SA, we may need to ignore
776 * IP_ROUTETOIF.
777 */
778 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
779 flags &= ~IP_ROUTETOIF;
780 ro = state.ro;
781 }
782 } else
783 ro = state.ro;
784 dst = (struct sockaddr_in *)state.dst;
785 if (error) {
786 /* mbuf is already reclaimed in ipsec4_output. */
787 m0 = NULL;
788 switch (error) {
789 case EHOSTUNREACH:
790 case ENETUNREACH:
791 case EMSGSIZE:
792 case ENOBUFS:
793 case ENOMEM:
794 break;
795 default:
796 printf("ip4_output (ipsec): error code %d\n", error);
797 /*fall through*/
798 case ENOENT:
799 /* don't show these error codes to the user */
800 error = 0;
801 break;
802 }
803 goto bad;
804 }
805 }
806
807 /* be sure to update variables that are affected by ipsec4_output() */
808 ip = mtod(m, struct ip *);
809 #ifdef _IP_VHL
810 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
811 #else
812 hlen = ip->ip_hl << 2;
813 #endif
814 if (ro->ro_rt == NULL) {
815 if ((flags & IP_ROUTETOIF) == 0) {
816 printf("ip_output: "
817 "can't update route after IPsec processing\n");
818 error = EHOSTUNREACH; /*XXX*/
819 goto bad;
820 }
821 } else {
822 /* nobody uses ia beyond here */
823 ifp = ro->ro_rt->rt_ifp;
824 }
825
826 /* make it flipped, again. */
827 ip->ip_len = ntohs((u_short)ip->ip_len);
828 ip->ip_off = ntohs((u_short)ip->ip_off);
829 skip_ipsec:
830 #endif /*IPSEC*/
831
832
833 sw_csum = m->m_pkthdr.csum_flags | CSUM_IP;
834
835
836 /* frames that can be checksumed by GMACE SUM16 HW: frame >64, no fragments, no UDP odd length */
837
838 if (apple_hwcksum_tx && (sw_csum & CSUM_DELAY_DATA) && (ifp->if_hwassist & CSUM_TCP_SUM16)
839 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)
840 && !((ip->ip_len & 0x1) && (sw_csum & CSUM_UDP)) ) {
841
842 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
843 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
844 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
845 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
846 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
847 m->m_pkthdr.csum_data += offset;
848 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
849 }
850 else {
851 if (ifp->if_hwassist & CSUM_TCP_SUM16) /* force SW checksuming */
852 m->m_pkthdr.csum_flags = 0;
853 else { /* not Apple enet */
854 m->m_pkthdr.csum_flags = sw_csum & ifp->if_hwassist;
855 sw_csum &= ~ifp->if_hwassist;
856 }
857
858 if (sw_csum & CSUM_DELAY_DATA) { /* perform TCP/UDP checksuming now */
859 in_delayed_cksum(m);
860 if (m == NULL)
861 return(ENOMEM);
862 sw_csum &= ~CSUM_DELAY_DATA;
863 }
864 }
865
866 /*
867 * If small enough for interface, or the interface will take
868 * care of the fragmentation for us, can just send directly.
869 */
870 if ((u_short)ip->ip_len <= ifp->if_mtu ||
871 ifp->if_hwassist & CSUM_FRAGMENT) {
872
873 ip->ip_len = htons((u_short)ip->ip_len);
874 ip->ip_off = htons((u_short)ip->ip_off);
875 ip->ip_sum = 0;
876 if (sw_csum & CSUM_DELAY_IP)
877 ip->ip_sum = in_cksum(m, hlen);
878 error = dlil_output(dl_tag, m, (void *) ro->ro_rt,
879 (struct sockaddr *)dst, 0);
880 goto done;
881 }
882 /*
883 * Too large for interface; fragment if possible.
884 * Must be able to put at least 8 bytes per fragment.
885 */
886 if (ip->ip_off & IP_DF) {
887 error = EMSGSIZE;
888 /*
889 * This case can happen if the user changed the MTU
890 * of an interface after enabling IP on it. Because
891 * most netifs don't keep track of routes pointing to
892 * them, there is no way for one to update all its
893 * routes when the MTU is changed.
894 */
895 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
896 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
897 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
898 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
899 }
900 ipstat.ips_cantfrag++;
901 goto bad;
902 }
903 len = (ifp->if_mtu - hlen) &~ 7;
904 if (len < 8) {
905 error = EMSGSIZE;
906 goto bad;
907 }
908
909 /*
910 * if the interface will not calculate checksums on
911 * fragmented packets, then do it here.
912 */
913 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
914 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
915 in_delayed_cksum(m);
916 if (m == NULL)
917 return(ENOMEM);
918 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
919 }
920
921
922 {
923 int mhlen, firstlen = len;
924 struct mbuf **mnext = &m->m_nextpkt;
925 int nfrags = 1;
926
927
928 /*
929 * Loop through length of segment after first fragment,
930 * make new header and copy data of each part and link onto chain.
931 */
932 m0 = m;
933 mhlen = sizeof (struct ip);
934 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
935 MGETHDR(m, M_DONTWAIT, MT_HEADER);
936 if (m == 0) {
937 error = ENOBUFS;
938 ipstat.ips_odropped++;
939 goto sendorfree;
940 }
941 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
942 m->m_data += max_linkhdr;
943 mhip = mtod(m, struct ip *);
944 *mhip = *ip;
945 if (hlen > sizeof (struct ip)) {
946 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
947 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
948 }
949 m->m_len = mhlen;
950 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
951 if (ip->ip_off & IP_MF)
952 mhip->ip_off |= IP_MF;
953 if (off + len >= (u_short)ip->ip_len)
954 len = (u_short)ip->ip_len - off;
955 else
956 mhip->ip_off |= IP_MF;
957 mhip->ip_len = htons((u_short)(len + mhlen));
958 m->m_next = m_copy(m0, off, len);
959 if (m->m_next == 0) {
960 (void) m_free(m);
961 error = ENOBUFS; /* ??? */
962 ipstat.ips_odropped++;
963 goto sendorfree;
964 }
965 m->m_pkthdr.len = mhlen + len;
966 m->m_pkthdr.rcvif = (struct ifnet *)0;
967 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
968 mhip->ip_off = htons((u_short)mhip->ip_off);
969 mhip->ip_sum = 0;
970 if (sw_csum & CSUM_DELAY_IP)
971 mhip->ip_sum = in_cksum(m, mhlen);
972 *mnext = m;
973 mnext = &m->m_nextpkt;
974 nfrags++;
975 }
976 ipstat.ips_ofragments += nfrags;
977
978 /* set first/last markers for fragment chain */
979 m0->m_flags |= M_FRAG;
980 m0->m_pkthdr.csum_data = nfrags;
981
982 /*
983 * Update first fragment by trimming what's been copied out
984 * and updating header, then send each fragment (in order).
985 */
986 m = m0;
987 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
988 m->m_pkthdr.len = hlen + firstlen;
989 ip->ip_len = htons((u_short)m->m_pkthdr.len);
990 ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
991 ip->ip_sum = 0;
992 if (sw_csum & CSUM_DELAY_IP)
993 ip->ip_sum = in_cksum(m, hlen);
994
995 sendorfree:
996
997 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
998 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
999
1000 for (m = m0; m; m = m0) {
1001 m0 = m->m_nextpkt;
1002 m->m_nextpkt = 0;
1003 if (error == 0)
1004 error = dlil_output(dl_tag, m, (void *) ro->ro_rt,
1005 (struct sockaddr *)dst, 0);
1006 else
1007 m_freem(m);
1008 }
1009
1010 if (error == 0)
1011 ipstat.ips_fragmented++;
1012 }
1013 done:
1014 #if IPSEC
1015 if (ro == &iproute && ro->ro_rt) {
1016 RTFREE(ro->ro_rt);
1017 ro->ro_rt = NULL;
1018 }
1019 if (sp != NULL) {
1020 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1021 printf("DP ip_output call free SP:%x\n", sp));
1022 key_freesp(sp);
1023 }
1024 #endif /* IPSEC */
1025
1026 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1027 return (error);
1028 bad:
1029 m_freem(m0);
1030 goto done;
1031 }
1032
1033 extern u_short in_chksum_skip(struct mbuf *, int, int);
1034
1035 void
1036 in_delayed_cksum(struct mbuf *m)
1037 {
1038 struct ip *ip;
1039 u_short csum, csum2, offset;
1040
1041 ip = mtod(m, struct ip *);
1042 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1043
1044 csum = in_cksum_skip(m, ip->ip_len, offset);
1045
1046 if (csum == 0)
1047 csum = 0xffff;
1048
1049 offset += m->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1050
1051 if (offset > ip->ip_len) /* bogus offset */
1052 return;
1053
1054 if (offset + sizeof(u_short) > m->m_len) {
1055 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1056 m->m_len, offset, ip->ip_p);
1057 /*
1058 * XXX
1059 * this shouldn't happen, but if it does, the
1060 * correct behavior may be to insert the checksum
1061 * in the existing chain instead of rearranging it.
1062 */
1063 if (m = m_pullup(m, offset + sizeof(u_short)) == 0)
1064 return;
1065 }
1066
1067 *(u_short *)(m->m_data + offset) = csum;
1068 }
1069
1070 /*
1071 * Insert IP options into preformed packet.
1072 * Adjust IP destination as required for IP source routing,
1073 * as indicated by a non-zero in_addr at the start of the options.
1074 *
1075 * XXX This routine assumes that the packet has no options in place.
1076 */
1077 static struct mbuf *
1078 ip_insertoptions(m, opt, phlen)
1079 register struct mbuf *m;
1080 struct mbuf *opt;
1081 int *phlen;
1082 {
1083 register struct ipoption *p = mtod(opt, struct ipoption *);
1084 struct mbuf *n;
1085 register struct ip *ip = mtod(m, struct ip *);
1086 unsigned optlen;
1087
1088 optlen = opt->m_len - sizeof(p->ipopt_dst);
1089 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1090 return (m); /* XXX should fail */
1091 if (p->ipopt_dst.s_addr)
1092 ip->ip_dst = p->ipopt_dst;
1093 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1094 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1095 if (n == 0)
1096 return (m);
1097 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1098 m->m_len -= sizeof(struct ip);
1099 m->m_data += sizeof(struct ip);
1100 n->m_next = m;
1101 m = n;
1102 m->m_len = optlen + sizeof(struct ip);
1103 m->m_data += max_linkhdr;
1104 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1105 } else {
1106 m->m_data -= optlen;
1107 m->m_len += optlen;
1108 m->m_pkthdr.len += optlen;
1109 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1110 }
1111 ip = mtod(m, struct ip *);
1112 bcopy(p->ipopt_list, ip + 1, optlen);
1113 *phlen = sizeof(struct ip) + optlen;
1114 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1115 ip->ip_len += optlen;
1116 return (m);
1117 }
1118
1119 /*
1120 * Copy options from ip to jp,
1121 * omitting those not copied during fragmentation.
1122 */
1123 int
1124 ip_optcopy(ip, jp)
1125 struct ip *ip, *jp;
1126 {
1127 register u_char *cp, *dp;
1128 int opt, optlen, cnt;
1129
1130 cp = (u_char *)(ip + 1);
1131 dp = (u_char *)(jp + 1);
1132 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1133 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1134 opt = cp[0];
1135 if (opt == IPOPT_EOL)
1136 break;
1137 if (opt == IPOPT_NOP) {
1138 /* Preserve for IP mcast tunnel's LSRR alignment. */
1139 *dp++ = IPOPT_NOP;
1140 optlen = 1;
1141 continue;
1142 } else
1143 optlen = cp[IPOPT_OLEN];
1144 /* bogus lengths should have been caught by ip_dooptions */
1145 if (optlen > cnt)
1146 optlen = cnt;
1147 if (IPOPT_COPIED(opt)) {
1148 bcopy(cp, dp, optlen);
1149 dp += optlen;
1150 }
1151 }
1152 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1153 *dp++ = IPOPT_EOL;
1154 return (optlen);
1155 }
1156
1157 /*
1158 * IP socket option processing.
1159 */
1160 int
1161 ip_ctloutput(so, sopt)
1162 struct socket *so;
1163 struct sockopt *sopt;
1164 {
1165 struct inpcb *inp = sotoinpcb(so);
1166 int error, optval;
1167
1168 error = optval = 0;
1169 if (sopt->sopt_level != IPPROTO_IP) {
1170 return (EINVAL);
1171 }
1172
1173 switch (sopt->sopt_dir) {
1174 case SOPT_SET:
1175 switch (sopt->sopt_name) {
1176 case IP_OPTIONS:
1177 #ifdef notyet
1178 case IP_RETOPTS:
1179 #endif
1180 {
1181 struct mbuf *m;
1182 if (sopt->sopt_valsize > MLEN) {
1183 error = EMSGSIZE;
1184 break;
1185 }
1186 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1187 if (m == 0) {
1188 error = ENOBUFS;
1189 break;
1190 }
1191 m->m_len = sopt->sopt_valsize;
1192 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1193 m->m_len);
1194 if (error)
1195 break;
1196
1197 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1198 m));
1199 }
1200
1201 case IP_TOS:
1202 case IP_TTL:
1203 case IP_RECVOPTS:
1204 case IP_RECVRETOPTS:
1205 case IP_RECVDSTADDR:
1206 case IP_RECVIF:
1207 case IP_FAITH:
1208 error = sooptcopyin(sopt, &optval, sizeof optval,
1209 sizeof optval);
1210 if (error)
1211 break;
1212
1213 switch (sopt->sopt_name) {
1214 case IP_TOS:
1215 inp->inp_ip_tos = optval;
1216 break;
1217
1218 case IP_TTL:
1219 inp->inp_ip_ttl = optval;
1220 break;
1221 #define OPTSET(bit) \
1222 if (optval) \
1223 inp->inp_flags |= bit; \
1224 else \
1225 inp->inp_flags &= ~bit;
1226
1227 case IP_RECVOPTS:
1228 OPTSET(INP_RECVOPTS);
1229 break;
1230
1231 case IP_RECVRETOPTS:
1232 OPTSET(INP_RECVRETOPTS);
1233 break;
1234
1235 case IP_RECVDSTADDR:
1236 OPTSET(INP_RECVDSTADDR);
1237 break;
1238
1239 case IP_RECVIF:
1240 OPTSET(INP_RECVIF);
1241 break;
1242
1243 case IP_FAITH:
1244 OPTSET(INP_FAITH);
1245 break;
1246 }
1247 break;
1248 #undef OPTSET
1249
1250 case IP_MULTICAST_IF:
1251 case IP_MULTICAST_VIF:
1252 case IP_MULTICAST_TTL:
1253 case IP_MULTICAST_LOOP:
1254 case IP_ADD_MEMBERSHIP:
1255 case IP_DROP_MEMBERSHIP:
1256 error = ip_setmoptions(sopt, &inp->inp_moptions);
1257 break;
1258
1259 case IP_PORTRANGE:
1260 error = sooptcopyin(sopt, &optval, sizeof optval,
1261 sizeof optval);
1262 if (error)
1263 break;
1264
1265 switch (optval) {
1266 case IP_PORTRANGE_DEFAULT:
1267 inp->inp_flags &= ~(INP_LOWPORT);
1268 inp->inp_flags &= ~(INP_HIGHPORT);
1269 break;
1270
1271 case IP_PORTRANGE_HIGH:
1272 inp->inp_flags &= ~(INP_LOWPORT);
1273 inp->inp_flags |= INP_HIGHPORT;
1274 break;
1275
1276 case IP_PORTRANGE_LOW:
1277 inp->inp_flags &= ~(INP_HIGHPORT);
1278 inp->inp_flags |= INP_LOWPORT;
1279 break;
1280
1281 default:
1282 error = EINVAL;
1283 break;
1284 }
1285 break;
1286
1287 #if IPSEC
1288 case IP_IPSEC_POLICY:
1289 {
1290 caddr_t req = NULL;
1291 size_t len = 0;
1292 int priv;
1293 struct mbuf *m;
1294 int optname;
1295
1296 if (error = sooptgetm(sopt, &m)) /* XXX */
1297 break;
1298 if (error = sooptmcopyin(sopt, m)) /* XXX */
1299 break;
1300 priv = (sopt->sopt_p != NULL &&
1301 suser(sopt->sopt_p->p_ucred,
1302 &sopt->sopt_p->p_acflag) != 0) ? 0 : 1;
1303 if (m) {
1304 req = mtod(m, caddr_t);
1305 len = m->m_len;
1306 }
1307 optname = sopt->sopt_name;
1308 error = ipsec4_set_policy(inp, optname, req, len, priv);
1309 m_freem(m);
1310 break;
1311 }
1312 #endif /*IPSEC*/
1313
1314 default:
1315 error = ENOPROTOOPT;
1316 break;
1317 }
1318 break;
1319
1320 case SOPT_GET:
1321 switch (sopt->sopt_name) {
1322 case IP_OPTIONS:
1323 case IP_RETOPTS:
1324 if (inp->inp_options)
1325 error = sooptcopyout(sopt,
1326 mtod(inp->inp_options,
1327 char *),
1328 inp->inp_options->m_len);
1329 else
1330 sopt->sopt_valsize = 0;
1331 break;
1332
1333 case IP_TOS:
1334 case IP_TTL:
1335 case IP_RECVOPTS:
1336 case IP_RECVRETOPTS:
1337 case IP_RECVDSTADDR:
1338 case IP_RECVIF:
1339 case IP_PORTRANGE:
1340 case IP_FAITH:
1341 switch (sopt->sopt_name) {
1342
1343 case IP_TOS:
1344 optval = inp->inp_ip_tos;
1345 break;
1346
1347 case IP_TTL:
1348 optval = inp->inp_ip_ttl;
1349 break;
1350
1351 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1352
1353 case IP_RECVOPTS:
1354 optval = OPTBIT(INP_RECVOPTS);
1355 break;
1356
1357 case IP_RECVRETOPTS:
1358 optval = OPTBIT(INP_RECVRETOPTS);
1359 break;
1360
1361 case IP_RECVDSTADDR:
1362 optval = OPTBIT(INP_RECVDSTADDR);
1363 break;
1364
1365 case IP_RECVIF:
1366 optval = OPTBIT(INP_RECVIF);
1367 break;
1368
1369 case IP_PORTRANGE:
1370 if (inp->inp_flags & INP_HIGHPORT)
1371 optval = IP_PORTRANGE_HIGH;
1372 else if (inp->inp_flags & INP_LOWPORT)
1373 optval = IP_PORTRANGE_LOW;
1374 else
1375 optval = 0;
1376 break;
1377
1378 case IP_FAITH:
1379 optval = OPTBIT(INP_FAITH);
1380 break;
1381 }
1382 error = sooptcopyout(sopt, &optval, sizeof optval);
1383 break;
1384
1385 case IP_MULTICAST_IF:
1386 case IP_MULTICAST_VIF:
1387 case IP_MULTICAST_TTL:
1388 case IP_MULTICAST_LOOP:
1389 case IP_ADD_MEMBERSHIP:
1390 case IP_DROP_MEMBERSHIP:
1391 error = ip_getmoptions(sopt, inp->inp_moptions);
1392 break;
1393
1394 #if IPSEC
1395 case IP_IPSEC_POLICY:
1396 {
1397 struct mbuf *m = NULL;
1398 size_t len = 0;
1399 caddr_t req = NULL;
1400
1401 if (error = sooptgetm(sopt, &m)) /* XXX */
1402 break;
1403 if (error = sooptmcopyin(sopt, m)) /* XXX */
1404 break;
1405 if (m) {
1406 req = mtod(m, caddr_t);
1407 len = m->m_len;
1408 }
1409
1410 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1411 if (error == 0)
1412 error = sooptmcopyout(sopt, m); /* XXX */
1413
1414 /* if error, m_freem called at soopt_mcopyout(). */
1415 if (error == 0)
1416 m_freem(m);
1417 break;
1418 }
1419 #endif /*IPSEC*/
1420
1421 default:
1422 error = ENOPROTOOPT;
1423 break;
1424 }
1425 break;
1426 }
1427 return (error);
1428 }
1429
1430 /*
1431 * Set up IP options in pcb for insertion in output packets.
1432 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1433 * with destination address if source routed.
1434 */
1435 static int
1436 ip_pcbopts(optname, pcbopt, m)
1437 int optname;
1438 struct mbuf **pcbopt;
1439 register struct mbuf *m;
1440 {
1441 register int cnt, optlen;
1442 register u_char *cp;
1443 u_char opt;
1444
1445 /* turn off any old options */
1446 if (*pcbopt)
1447 (void)m_free(*pcbopt);
1448 *pcbopt = 0;
1449 if (m == (struct mbuf *)0 || m->m_len == 0) {
1450 /*
1451 * Only turning off any previous options.
1452 */
1453 if (m)
1454 (void)m_free(m);
1455 return (0);
1456 }
1457
1458 #ifndef vax
1459 if (m->m_len % sizeof(int32_t))
1460 goto bad;
1461 #endif
1462 /*
1463 * IP first-hop destination address will be stored before
1464 * actual options; move other options back
1465 * and clear it when none present.
1466 */
1467 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1468 goto bad;
1469 cnt = m->m_len;
1470 m->m_len += sizeof(struct in_addr);
1471 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1472 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1473 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1474
1475 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1476 opt = cp[IPOPT_OPTVAL];
1477 if (opt == IPOPT_EOL)
1478 break;
1479 if (opt == IPOPT_NOP)
1480 optlen = 1;
1481 else {
1482 if (cnt < IPOPT_OLEN + sizeof(*cp))
1483 goto bad;
1484 optlen = cp[IPOPT_OLEN];
1485 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1486 goto bad;
1487 }
1488 switch (opt) {
1489
1490 default:
1491 break;
1492
1493 case IPOPT_LSRR:
1494 case IPOPT_SSRR:
1495 /*
1496 * user process specifies route as:
1497 * ->A->B->C->D
1498 * D must be our final destination (but we can't
1499 * check that since we may not have connected yet).
1500 * A is first hop destination, which doesn't appear in
1501 * actual IP option, but is stored before the options.
1502 */
1503 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1504 goto bad;
1505 m->m_len -= sizeof(struct in_addr);
1506 cnt -= sizeof(struct in_addr);
1507 optlen -= sizeof(struct in_addr);
1508 cp[IPOPT_OLEN] = optlen;
1509 /*
1510 * Move first hop before start of options.
1511 */
1512 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1513 sizeof(struct in_addr));
1514 /*
1515 * Then copy rest of options back
1516 * to close up the deleted entry.
1517 */
1518 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1519 sizeof(struct in_addr)),
1520 (caddr_t)&cp[IPOPT_OFFSET+1],
1521 (unsigned)cnt + sizeof(struct in_addr));
1522 break;
1523 }
1524 }
1525 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1526 goto bad;
1527 *pcbopt = m;
1528 return (0);
1529
1530 bad:
1531 (void)m_free(m);
1532 return (EINVAL);
1533 }
1534
1535 /*
1536 * XXX
1537 * The whole multicast option thing needs to be re-thought.
1538 * Several of these options are equally applicable to non-multicast
1539 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1540 * standard option (IP_TTL).
1541 */
1542 /*
1543 * Set the IP multicast options in response to user setsockopt().
1544 */
1545 static int
1546 ip_setmoptions(sopt, imop)
1547 struct sockopt *sopt;
1548 struct ip_moptions **imop;
1549 {
1550 int error = 0;
1551 int i;
1552 struct in_addr addr;
1553 struct ip_mreq mreq;
1554 struct ifnet *ifp;
1555 struct ip_moptions *imo = *imop;
1556 struct route ro;
1557 struct sockaddr_in *dst;
1558 int s;
1559
1560 if (imo == NULL) {
1561 /*
1562 * No multicast option buffer attached to the pcb;
1563 * allocate one and initialize to default values.
1564 */
1565 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
1566 M_WAITOK);
1567
1568 if (imo == NULL)
1569 return (ENOBUFS);
1570 *imop = imo;
1571 imo->imo_multicast_ifp = NULL;
1572 imo->imo_multicast_vif = -1;
1573 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1574 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1575 imo->imo_num_memberships = 0;
1576 }
1577
1578 switch (sopt->sopt_name) {
1579 /* store an index number for the vif you wanna use in the send */
1580 case IP_MULTICAST_VIF:
1581 if (legal_vif_num == 0) {
1582 error = EOPNOTSUPP;
1583 break;
1584 }
1585 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1586 if (error)
1587 break;
1588 if (!legal_vif_num(i) && (i != -1)) {
1589 error = EINVAL;
1590 break;
1591 }
1592 imo->imo_multicast_vif = i;
1593 break;
1594
1595 case IP_MULTICAST_IF:
1596 /*
1597 * Select the interface for outgoing multicast packets.
1598 */
1599 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1600 if (error)
1601 break;
1602 /*
1603 * INADDR_ANY is used to remove a previous selection.
1604 * When no interface is selected, a default one is
1605 * chosen every time a multicast packet is sent.
1606 */
1607 if (addr.s_addr == INADDR_ANY) {
1608 imo->imo_multicast_ifp = NULL;
1609 break;
1610 }
1611 /*
1612 * The selected interface is identified by its local
1613 * IP address. Find the interface and confirm that
1614 * it supports multicasting.
1615 */
1616 s = splimp();
1617 INADDR_TO_IFP(addr, ifp);
1618 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1619 splx(s);
1620 error = EADDRNOTAVAIL;
1621 break;
1622 }
1623 imo->imo_multicast_ifp = ifp;
1624 splx(s);
1625 break;
1626
1627 case IP_MULTICAST_TTL:
1628 /*
1629 * Set the IP time-to-live for outgoing multicast packets.
1630 * The original multicast API required a char argument,
1631 * which is inconsistent with the rest of the socket API.
1632 * We allow either a char or an int.
1633 */
1634 if (sopt->sopt_valsize == 1) {
1635 u_char ttl;
1636 error = sooptcopyin(sopt, &ttl, 1, 1);
1637 if (error)
1638 break;
1639 imo->imo_multicast_ttl = ttl;
1640 } else {
1641 u_int ttl;
1642 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1643 sizeof ttl);
1644 if (error)
1645 break;
1646 if (ttl > 255)
1647 error = EINVAL;
1648 else
1649 imo->imo_multicast_ttl = ttl;
1650 }
1651 break;
1652
1653 case IP_MULTICAST_LOOP:
1654 /*
1655 * Set the loopback flag for outgoing multicast packets.
1656 * Must be zero or one. The original multicast API required a
1657 * char argument, which is inconsistent with the rest
1658 * of the socket API. We allow either a char or an int.
1659 */
1660 if (sopt->sopt_valsize == 1) {
1661 u_char loop;
1662 error = sooptcopyin(sopt, &loop, 1, 1);
1663 if (error)
1664 break;
1665 imo->imo_multicast_loop = !!loop;
1666 } else {
1667 u_int loop;
1668 error = sooptcopyin(sopt, &loop, sizeof loop,
1669 sizeof loop);
1670 if (error)
1671 break;
1672 imo->imo_multicast_loop = !!loop;
1673 }
1674 break;
1675
1676 case IP_ADD_MEMBERSHIP:
1677 /*
1678 * Add a multicast group membership.
1679 * Group must be a valid IP multicast address.
1680 */
1681 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1682 if (error)
1683 break;
1684
1685 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1686 error = EINVAL;
1687 break;
1688 }
1689 s = splimp();
1690 /*
1691 * If no interface address was provided, use the interface of
1692 * the route to the given multicast address.
1693 */
1694 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1695 bzero((caddr_t)&ro, sizeof(ro));
1696 dst = (struct sockaddr_in *)&ro.ro_dst;
1697 dst->sin_len = sizeof(*dst);
1698 dst->sin_family = AF_INET;
1699 dst->sin_addr = mreq.imr_multiaddr;
1700 rtalloc(&ro);
1701 if (ro.ro_rt == NULL) {
1702 error = EADDRNOTAVAIL;
1703 splx(s);
1704 break;
1705 }
1706 ifp = ro.ro_rt->rt_ifp;
1707 rtfree(ro.ro_rt);
1708 }
1709 else {
1710 INADDR_TO_IFP(mreq.imr_interface, ifp);
1711 }
1712
1713 /*
1714 * See if we found an interface, and confirm that it
1715 * supports multicast.
1716 */
1717 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1718 error = EADDRNOTAVAIL;
1719 splx(s);
1720 break;
1721 }
1722 /*
1723 * See if the membership already exists or if all the
1724 * membership slots are full.
1725 */
1726 for (i = 0; i < imo->imo_num_memberships; ++i) {
1727 if (imo->imo_membership[i]->inm_ifp == ifp &&
1728 imo->imo_membership[i]->inm_addr.s_addr
1729 == mreq.imr_multiaddr.s_addr)
1730 break;
1731 }
1732 if (i < imo->imo_num_memberships) {
1733 error = EADDRINUSE;
1734 splx(s);
1735 break;
1736 }
1737 if (i == IP_MAX_MEMBERSHIPS) {
1738 error = ETOOMANYREFS;
1739 splx(s);
1740 break;
1741 }
1742 /*
1743 * Everything looks good; add a new record to the multicast
1744 * address list for the given interface.
1745 */
1746 if ((imo->imo_membership[i] =
1747 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1748 error = ENOBUFS;
1749 splx(s);
1750 break;
1751 }
1752 ++imo->imo_num_memberships;
1753 splx(s);
1754 break;
1755
1756 case IP_DROP_MEMBERSHIP:
1757 /*
1758 * Drop a multicast group membership.
1759 * Group must be a valid IP multicast address.
1760 */
1761 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1762 if (error)
1763 break;
1764
1765 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1766 error = EINVAL;
1767 break;
1768 }
1769
1770 s = splimp();
1771 /*
1772 * If an interface address was specified, get a pointer
1773 * to its ifnet structure.
1774 */
1775 if (mreq.imr_interface.s_addr == INADDR_ANY)
1776 ifp = NULL;
1777 else {
1778 INADDR_TO_IFP(mreq.imr_interface, ifp);
1779 if (ifp == NULL) {
1780 error = EADDRNOTAVAIL;
1781 splx(s);
1782 break;
1783 }
1784 }
1785 /*
1786 * Find the membership in the membership array.
1787 */
1788 for (i = 0; i < imo->imo_num_memberships; ++i) {
1789 if ((ifp == NULL ||
1790 imo->imo_membership[i]->inm_ifp == ifp) &&
1791 imo->imo_membership[i]->inm_addr.s_addr ==
1792 mreq.imr_multiaddr.s_addr)
1793 break;
1794 }
1795 if (i == imo->imo_num_memberships) {
1796 error = EADDRNOTAVAIL;
1797 splx(s);
1798 break;
1799 }
1800 /*
1801 * Give up the multicast address record to which the
1802 * membership points.
1803 */
1804 in_delmulti(imo->imo_membership[i]);
1805 /*
1806 * Remove the gap in the membership array.
1807 */
1808 for (++i; i < imo->imo_num_memberships; ++i)
1809 imo->imo_membership[i-1] = imo->imo_membership[i];
1810 --imo->imo_num_memberships;
1811 splx(s);
1812 break;
1813
1814 default:
1815 error = EOPNOTSUPP;
1816 break;
1817 }
1818
1819 /*
1820 * If all options have default values, no need to keep the mbuf.
1821 */
1822 if (imo->imo_multicast_ifp == NULL &&
1823 imo->imo_multicast_vif == -1 &&
1824 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1825 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1826 imo->imo_num_memberships == 0) {
1827 FREE(*imop, M_IPMOPTS);
1828 *imop = NULL;
1829 }
1830
1831 return (error);
1832 }
1833
1834 /*
1835 * Return the IP multicast options in response to user getsockopt().
1836 */
1837 static int
1838 ip_getmoptions(sopt, imo)
1839 struct sockopt *sopt;
1840 register struct ip_moptions *imo;
1841 {
1842 struct in_addr addr;
1843 struct in_ifaddr *ia;
1844 int error, optval;
1845 u_char coptval;
1846
1847 error = 0;
1848 switch (sopt->sopt_name) {
1849 case IP_MULTICAST_VIF:
1850 if (imo != NULL)
1851 optval = imo->imo_multicast_vif;
1852 else
1853 optval = -1;
1854 error = sooptcopyout(sopt, &optval, sizeof optval);
1855 break;
1856
1857 case IP_MULTICAST_IF:
1858 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1859 addr.s_addr = INADDR_ANY;
1860 else {
1861 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1862 addr.s_addr = (ia == NULL) ? INADDR_ANY
1863 : IA_SIN(ia)->sin_addr.s_addr;
1864 }
1865 error = sooptcopyout(sopt, &addr, sizeof addr);
1866 break;
1867
1868 case IP_MULTICAST_TTL:
1869 if (imo == 0)
1870 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1871 else
1872 optval = coptval = imo->imo_multicast_ttl;
1873 if (sopt->sopt_valsize == 1)
1874 error = sooptcopyout(sopt, &coptval, 1);
1875 else
1876 error = sooptcopyout(sopt, &optval, sizeof optval);
1877 break;
1878
1879 case IP_MULTICAST_LOOP:
1880 if (imo == 0)
1881 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1882 else
1883 optval = coptval = imo->imo_multicast_loop;
1884 if (sopt->sopt_valsize == 1)
1885 error = sooptcopyout(sopt, &coptval, 1);
1886 else
1887 error = sooptcopyout(sopt, &optval, sizeof optval);
1888 break;
1889
1890 default:
1891 error = ENOPROTOOPT;
1892 break;
1893 }
1894 return (error);
1895 }
1896
1897 /*
1898 * Discard the IP multicast options.
1899 */
1900 void
1901 ip_freemoptions(imo)
1902 register struct ip_moptions *imo;
1903 {
1904 register int i;
1905
1906 if (imo != NULL) {
1907 for (i = 0; i < imo->imo_num_memberships; ++i)
1908 in_delmulti(imo->imo_membership[i]);
1909 FREE(imo, M_IPMOPTS);
1910 }
1911 }
1912
1913 /*
1914 * Routine called from ip_output() to loop back a copy of an IP multicast
1915 * packet to the input queue of a specified interface. Note that this
1916 * calls the output routine of the loopback "driver", but with an interface
1917 * pointer that might NOT be a loopback interface -- evil, but easier than
1918 * replicating that code here.
1919 */
1920 static void
1921 ip_mloopback(ifp, m, dst, hlen)
1922 struct ifnet *ifp;
1923 register struct mbuf *m;
1924 register struct sockaddr_in *dst;
1925 int hlen;
1926 {
1927 register struct ip *ip;
1928 struct mbuf *copym;
1929
1930 copym = m_copy(m, 0, M_COPYALL);
1931 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1932 copym = m_pullup(copym, hlen);
1933 if (copym != NULL) {
1934 /*
1935 * We don't bother to fragment if the IP length is greater
1936 * than the interface's MTU. Can this possibly matter?
1937 */
1938 ip = mtod(copym, struct ip *);
1939 ip->ip_len = htons((u_short)ip->ip_len);
1940 ip->ip_off = htons((u_short)ip->ip_off);
1941 ip->ip_sum = 0;
1942 ip->ip_sum = in_cksum(copym, hlen);
1943
1944 /*
1945 * NB:
1946 * It's not clear whether there are any lingering
1947 * reentrancy problems in other areas which might
1948 * be exposed by using ip_input directly (in
1949 * particular, everything which modifies the packet
1950 * in-place). Yet another option is using the
1951 * protosw directly to deliver the looped back
1952 * packet. For the moment, we'll err on the side
1953 * of safety by using if_simloop().
1954 */
1955 #if 1 /* XXX */
1956 if (dst->sin_family != AF_INET) {
1957 printf("ip_mloopback: bad address family %d\n",
1958 dst->sin_family);
1959 dst->sin_family = AF_INET;
1960 }
1961 #endif
1962
1963
1964 /*
1965 * Mark checksum as valid or calculate checksum for loopback.
1966 *
1967 * This is done this way because we have to embed the ifp of
1968 * the interface we will send the original copy of the packet
1969 * out on in the mbuf. ip_input will check if_hwassist of the
1970 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
1971 * The UDP checksum has not been calculated yet.
1972 */
1973 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1974 if (ifp->if_hwassist) {
1975 copym->m_pkthdr.csum_flags |=
1976 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
1977 CSUM_IP_CHECKED | CSUM_IP_VALID;
1978 copym->m_pkthdr.csum_data = 0xffff;
1979 } else
1980 in_delayed_cksum(copym);
1981 }
1982
1983
1984 /*
1985 * TedW:
1986 * We need to send all loopback traffic down to dlil in case
1987 * a filter has tapped-in.
1988 */
1989
1990 if (lo_dl_tag == 0)
1991 dlil_find_dltag(APPLE_IF_FAM_LOOPBACK, 0, PF_INET, &lo_dl_tag);
1992
1993 /*
1994 * Stuff the 'real' ifp into the pkthdr, to be used in matching
1995 * in ip_input(); we need the loopback ifp/dl_tag passed as args
1996 * to make the loopback driver compliant with the data link
1997 * requirements.
1998 */
1999 if (lo_dl_tag)
2000 { copym->m_pkthdr.rcvif = ifp;
2001 dlil_output(lo_dl_tag, copym, 0, (struct sockaddr *) dst, 0);
2002 } else {
2003 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
2004 m_freem(copym);
2005 }
2006
2007 /* if_simloop(ifp, copym, (struct sockaddr *)dst, 0);*/
2008 }
2009 }