]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
xnu-201.42.3.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * Copyright (c) 1982, 1986, 1988, 1990, 1993
24 * The Regents of the University of California. All rights reserved.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 * 3. All advertising materials mentioning features or use of this software
35 * must display the following acknowledgement:
36 * This product includes software developed by the University of
37 * California, Berkeley and its contributors.
38 * 4. Neither the name of the University nor the names of its contributors
39 * may be used to endorse or promote products derived from this software
40 * without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
55 */
56
57 #define _IP_VHL
58
59 #if ISFB31
60 #include "opt_ipfw.h"
61 #include "opt_ipdn.h"
62 #include "opt_ipdivert.h"
63 #include "opt_ipfilter.h"
64 #endif
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/kernel.h>
69 #include <sys/malloc.h>
70 #include <sys/mbuf.h>
71 #include <sys/protosw.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74
75 #include <net/if.h>
76 #include <net/route.h>
77
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/ip.h>
81 #if INET6
82 #include <netinet/ip6.h>
83 #include <netinet6/ip6_var.h>
84 #endif
85 #include <netinet/in_pcb.h>
86 #include <netinet/in_var.h>
87 #include <netinet/ip_var.h>
88 #include <net/dlil.h>
89
90 #include <sys/kdebug.h>
91
92 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
93 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
94 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
95
96
97 #ifdef vax
98 #include <machine/mtpr.h>
99 #endif
100
101 #if ISFB31
102 #include <machine/in_cksum.h>
103
104 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
105 #endif
106
107 //static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
108
109 #if IPSEC
110 #include <netinet6/ipsec.h>
111 #include <netkey/key.h>
112 #include <netkey/key_debug.h>
113
114 #endif /*IPSEC*/
115
116 #if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
117 #undef COMPAT_IPFW
118 #define COMPAT_IPFW 1
119 #else
120 #undef COMPAT_IPFW
121 #endif
122
123 #if COMPAT_IPFW
124 #include <netinet/ip_fw.h>
125 #endif
126
127 #if DUMMYNET
128 #include <netinet/ip_dummynet.h>
129 #endif
130
131 #if IPFIREWALL_FORWARD_DEBUG
132 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
133 (ntohl(a.s_addr)>>16)&0xFF,\
134 (ntohl(a.s_addr)>>8)&0xFF,\
135 (ntohl(a.s_addr))&0xFF);
136 #endif
137
138 u_short ip_id;
139
140 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
141 static void ip_mloopback
142 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
143 static int ip_getmoptions
144 __P((struct sockopt *, struct ip_moptions *));
145 static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
146 static int ip_setmoptions
147 __P((struct sockopt *, struct ip_moptions **));
148 static u_long lo_dl_tag = 0;
149 static int ip_optcopy __P((struct ip *, struct ip *));
150
151 void in_delayed_cksum(struct mbuf *m);
152 extern int apple_hwcksum_tx;
153
154 extern struct protosw inetsw[];
155
156 /*
157 * IP output. The packet in mbuf chain m contains a skeletal IP
158 * header (with len, off, ttl, proto, tos, src, dst).
159 * The mbuf chain containing the packet will be freed.
160 * The mbuf opt, if present, will not be freed.
161 */
162 int
163 ip_output(m0, opt, ro, flags, imo)
164 struct mbuf *m0;
165 struct mbuf *opt;
166 struct route *ro;
167 int flags;
168 struct ip_moptions *imo;
169 {
170 struct ip *ip, *mhip;
171 struct ifnet *ifp;
172 u_long dl_tag;
173 struct mbuf *m = m0;
174 int hlen = sizeof (struct ip);
175 int len, off, error = 0;
176 struct sockaddr_in *dst;
177 struct in_ifaddr *ia;
178 int isbroadcast, sw_csum;
179 #if IPSEC
180 struct route iproute;
181 struct socket *so;
182 struct secpolicy *sp = NULL;
183 #endif
184 #if IPFIREWALL_FORWARD
185 int fwd_rewrite_src = 0;
186 #endif
187
188
189 #if !IPDIVERT /* dummy variable for the firewall code to play with */
190 u_short ip_divert_cookie = 0 ;
191 #endif
192 #if COMPAT_IPFW
193 struct ip_fw_chain *rule = NULL ;
194 #endif
195
196 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
197
198 #if IPSEC
199 /*
200 * NOTE: m->m_pkthdr is NULL cleared below just to prevent ipfw code
201 * from SEGV.
202 * ipfw code uses rcvif to determine incoming interface, and
203 * KAME uses rcvif for ipsec processing.
204 * ipfw may not be working right with KAME at this moment.
205 * We need more tests.
206 */
207 #if DUMMYNET
208 if (m->m_type == MT_DUMMYNET) {
209 if (m->m_next != NULL) {
210 so = (struct socket *)m->m_next->m_pkthdr.rcvif;
211 m->m_next->m_pkthdr.rcvif = NULL;
212 } else
213 so = NULL;
214 } else
215 #endif
216 {
217 so = ipsec_getsocket(m);
218 ipsec_setsocket(m, NULL);
219 }
220 #endif /*IPSEC*/
221
222
223 #if IPFIREWALL && DUMMYNET
224 /*
225 * dummynet packet are prepended a vestigial mbuf with
226 * m_type = MT_DUMMYNET and m_data pointing to the matching
227 * rule.
228 */
229 if (m->m_type == MT_DUMMYNET) {
230 struct mbuf *tmp_m = m ;
231 /*
232 * the packet was already tagged, so part of the
233 * processing was already done, and we need to go down.
234 * opt, flags and imo have already been used, and now
235 * they are used to hold ifp and hlen and NULL, respectively.
236 */
237 rule = (struct ip_fw_chain *)(m->m_data) ;
238 m = m->m_next ;
239 FREE(tmp_m, M_IPFW);
240 ip = mtod(m, struct ip *);
241 dst = (struct sockaddr_in *)&ro->ro_dst;
242 ifp = (struct ifnet *)opt;
243 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
244 opt = NULL ;
245 flags = 0 ; /* XXX is this correct ? */
246 goto sendit;
247 } else
248 rule = NULL ;
249 #endif
250
251 #if DIAGNOSTIC
252 if ((m->m_flags & M_PKTHDR) == 0)
253 panic("ip_output no HDR");
254 if (!ro)
255 panic("ip_output no route, proto = %d",
256 mtod(m, struct ip *)->ip_p);
257 #endif
258 if (opt) {
259 m = ip_insertoptions(m, opt, &len);
260 hlen = len;
261 }
262 ip = mtod(m, struct ip *);
263 /*
264 * Fill in IP header.
265 */
266 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
267 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
268 ip->ip_off &= IP_DF;
269 ip->ip_id = htons(ip_id++);
270 ipstat.ips_localout++;
271 } else {
272 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
273 }
274
275 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
276 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
277
278 dst = (struct sockaddr_in *)&ro->ro_dst;
279 /*
280 * If there is a cached route,
281 * check that it is to the same destination
282 * and is still up. If not, free it and try again.
283 */
284 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
285 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
286 RTFREE(ro->ro_rt);
287 ro->ro_rt = (struct rtentry *)0;
288 }
289 if (ro->ro_rt == 0) {
290 dst->sin_family = AF_INET;
291 dst->sin_len = sizeof(*dst);
292 dst->sin_addr = ip->ip_dst;
293 }
294 /*
295 * If routing to interface only,
296 * short circuit routing lookup.
297 */
298 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
299 #define sintosa(sin) ((struct sockaddr *)(sin))
300 if (flags & IP_ROUTETOIF) {
301 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
302 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
303 ipstat.ips_noroute++;
304 error = ENETUNREACH;
305 goto bad;
306 }
307 ifp = ia->ia_ifp;
308 dl_tag = ia->ia_ifa.ifa_dlt;
309 ip->ip_ttl = 1;
310 isbroadcast = in_broadcast(dst->sin_addr, ifp);
311 } else {
312 /*
313 * If this is the case, we probably don't want to allocate
314 * a protocol-cloned route since we didn't get one from the
315 * ULP. This lets TCP do its thing, while not burdening
316 * forwarding or ICMP with the overhead of cloning a route.
317 * Of course, we still want to do any cloning requested by
318 * the link layer, as this is probably required in all cases
319 * for correct operation (as it is for ARP).
320 */
321 if (ro->ro_rt == 0)
322 rtalloc_ign(ro, RTF_PRCLONING);
323 if (ro->ro_rt == 0) {
324 ipstat.ips_noroute++;
325 error = EHOSTUNREACH;
326 goto bad;
327 }
328 ia = ifatoia(ro->ro_rt->rt_ifa);
329 ifp = ro->ro_rt->rt_ifp;
330 dl_tag = ro->ro_rt->rt_dlt;
331 ro->ro_rt->rt_use++;
332 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
333 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
334 if (ro->ro_rt->rt_flags & RTF_HOST)
335 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
336 else
337 isbroadcast = in_broadcast(dst->sin_addr, ifp);
338 }
339 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
340 struct in_multi *inm;
341
342 m->m_flags |= M_MCAST;
343 /*
344 * IP destination address is multicast. Make sure "dst"
345 * still points to the address in "ro". (It may have been
346 * changed to point to a gateway address, above.)
347 */
348 dst = (struct sockaddr_in *)&ro->ro_dst;
349 /*
350 * See if the caller provided any multicast options
351 */
352 if (imo != NULL) {
353 ip->ip_ttl = imo->imo_multicast_ttl;
354 if (imo->imo_multicast_ifp != NULL) {
355 ifp = imo->imo_multicast_ifp;
356 dl_tag = ifp->if_data.default_proto;
357 }
358 if (imo->imo_multicast_vif != -1)
359 ip->ip_src.s_addr =
360 ip_mcast_src(imo->imo_multicast_vif);
361 } else
362 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
363 /*
364 * Confirm that the outgoing interface supports multicast.
365 */
366 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
367 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
368 ipstat.ips_noroute++;
369 error = ENETUNREACH;
370 goto bad;
371 }
372 }
373 /*
374 * If source address not specified yet, use address
375 * of outgoing interface.
376 */
377 if (ip->ip_src.s_addr == INADDR_ANY) {
378 register struct in_ifaddr *ia1;
379
380 for (ia1 = in_ifaddrhead.tqh_first; ia1;
381 ia1 = ia1->ia_link.tqe_next)
382 if (ia1->ia_ifp == ifp) {
383 ip->ip_src = IA_SIN(ia1)->sin_addr;
384 break;
385 }
386 }
387
388 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
389 if (inm != NULL &&
390 (imo == NULL || imo->imo_multicast_loop)) {
391 /*
392 * If we belong to the destination multicast group
393 * on the outgoing interface, and the caller did not
394 * forbid loopback, loop back a copy.
395 */
396 ip_mloopback(ifp, m, dst, hlen);
397 }
398 else {
399 /*
400 * If we are acting as a multicast router, perform
401 * multicast forwarding as if the packet had just
402 * arrived on the interface to which we are about
403 * to send. The multicast forwarding function
404 * recursively calls this function, using the
405 * IP_FORWARDING flag to prevent infinite recursion.
406 *
407 * Multicasts that are looped back by ip_mloopback(),
408 * above, will be forwarded by the ip_input() routine,
409 * if necessary.
410 */
411 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
412 /*
413 * Check if rsvp daemon is running. If not, don't
414 * set ip_moptions. This ensures that the packet
415 * is multicast and not just sent down one link
416 * as prescribed by rsvpd.
417 */
418 if (!rsvp_on)
419 imo = NULL;
420 if (ip_mforward(ip, ifp, m, imo) != 0) {
421 m_freem(m);
422 goto done;
423 }
424 }
425 }
426
427 /*
428 * Multicasts with a time-to-live of zero may be looped-
429 * back, above, but must not be transmitted on a network.
430 * Also, multicasts addressed to the loopback interface
431 * are not sent -- the above call to ip_mloopback() will
432 * loop back a copy if this host actually belongs to the
433 * destination group on the loopback interface.
434 */
435 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
436 m_freem(m);
437 goto done;
438 }
439
440 goto sendit;
441 }
442 #ifndef notdef
443 /*
444 * If source address not specified yet, use address
445 * of outgoing interface.
446 */
447 if (ip->ip_src.s_addr == INADDR_ANY) {
448 ip->ip_src = IA_SIN(ia)->sin_addr;
449 #if IPFIREWALL_FORWARD
450 /* Keep note that we did this - if the firewall changes
451 * the next-hop, our interface may change, changing the
452 * default source IP. It's a shame so much effort happens
453 * twice. Oh well.
454 */
455 fwd_rewrite_src++;
456 #endif /* IPFIREWALL_FORWARD */
457 }
458 #endif /* notdef */
459 /*
460 * Verify that we have any chance at all of being able to queue
461 * the packet or packet fragments
462 */
463 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
464 ifp->if_snd.ifq_maxlen) {
465 error = ENOBUFS;
466 goto bad;
467 }
468
469 /*
470 * Look for broadcast address and
471 * and verify user is allowed to send
472 * such a packet.
473 */
474 if (isbroadcast) {
475 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
476 error = EADDRNOTAVAIL;
477 goto bad;
478 }
479 if ((flags & IP_ALLOWBROADCAST) == 0) {
480 error = EACCES;
481 goto bad;
482 }
483 /* don't allow broadcast messages to be fragmented */
484 if ((u_short)ip->ip_len > ifp->if_mtu) {
485 error = EMSGSIZE;
486 goto bad;
487 }
488 m->m_flags |= M_BCAST;
489 } else {
490 m->m_flags &= ~M_BCAST;
491 }
492
493 sendit:
494 /*
495 * IpHack's section.
496 * - Xlate: translate packet's addr/port (NAT).
497 * - Firewall: deny/allow/etc.
498 * - Wrap: fake packet's addr/port <unimpl.>
499 * - Encapsulate: put it in another IP and send out. <unimp.>
500 */
501 #if COMPAT_IPFW
502 if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, ifp, IP_NAT_OUT)) {
503 error = EACCES;
504 goto done;
505 }
506
507 /*
508 * Check with the firewall...
509 */
510 if (ip_fw_chk_ptr) {
511 struct sockaddr_in *old = dst;
512
513 off = (*ip_fw_chk_ptr)(&ip,
514 hlen, ifp, &ip_divert_cookie, &m, &rule, &dst);
515 /*
516 * On return we must do the following:
517 * m == NULL -> drop the pkt
518 * 1<=off<= 0xffff -> DIVERT
519 * (off & 0x10000) -> send to a DUMMYNET pipe
520 * dst != old -> IPFIREWALL_FORWARD
521 * off==0, dst==old -> accept
522 * If some of the above modules is not compiled in, then
523 * we should't have to check the corresponding condition
524 * (because the ipfw control socket should not accept
525 * unsupported rules), but better play safe and drop
526 * packets in case of doubt.
527 */
528 if (!m) { /* firewall said to reject */
529 error = EACCES;
530 goto done;
531 }
532 if (off == 0 && dst == old) /* common case */
533 goto pass ;
534 #if DUMMYNET
535 if (off & 0x10000) {
536 /*
537 * pass the pkt to dummynet. Need to include
538 * pipe number, m, ifp, ro, hlen because these are
539 * not recomputed in the next pass.
540 * All other parameters have been already used and
541 * so they are not needed anymore.
542 * XXX note: if the ifp or ro entry are deleted
543 * while a pkt is in dummynet, we are in trouble!
544 */
545 dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,ifp,ro,hlen,rule);
546 goto done;
547 }
548 #endif
549 #if IPDIVERT
550 if (off > 0 && off < 0x10000) { /* Divert packet */
551
552 /*
553 * delayed checksums are not currently compatible
554 * with divert sockets.
555 */
556 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
557 in_delayed_cksum(m);
558 if (m == NULL)
559 return(ENOMEM);
560 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
561 }
562
563 /* Restore packet header fields to original values */
564 ip->ip_len = htons((u_short)ip->ip_len);
565 ip->ip_off = htons((u_short)ip->ip_off);
566
567 ip_divert_port = off & 0xffff ;
568 (*ip_protox[IPPROTO_DIVERT]->pr_input)(m, 0);
569 goto done;
570 }
571 #endif
572
573 #if IPFIREWALL_FORWARD
574 /* Here we check dst to make sure it's directly reachable on the
575 * interface we previously thought it was.
576 * If it isn't (which may be likely in some situations) we have
577 * to re-route it (ie, find a route for the next-hop and the
578 * associated interface) and set them here. This is nested
579 * forwarding which in most cases is undesirable, except where
580 * such control is nigh impossible. So we do it here.
581 * And I'm babbling.
582 */
583 if (off == 0 && old != dst) {
584 struct in_ifaddr *ia;
585
586 /* It's changed... */
587 /* There must be a better way to do this next line... */
588 static struct route sro_fwd, *ro_fwd = &sro_fwd;
589 #if IPFIREWALL_FORWARD_DEBUG
590 printf("IPFIREWALL_FORWARD: New dst ip: ");
591 print_ip(dst->sin_addr);
592 printf("\n");
593 #endif
594 /*
595 * We need to figure out if we have been forwarded
596 * to a local socket. If so then we should somehow
597 * "loop back" to ip_input, and get directed to the
598 * PCB as if we had received this packet. This is
599 * because it may be dificult to identify the packets
600 * you want to forward until they are being output
601 * and have selected an interface. (e.g. locally
602 * initiated packets) If we used the loopback inteface,
603 * we would not be able to control what happens
604 * as the packet runs through ip_input() as
605 * it is done through a ISR.
606 */
607 for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
608 ia = TAILQ_NEXT(ia, ia_link)) {
609 /*
610 * If the addr to forward to is one
611 * of ours, we pretend to
612 * be the destination for this packet.
613 */
614 if (IA_SIN(ia)->sin_addr.s_addr ==
615 dst->sin_addr.s_addr)
616 break;
617 }
618 if (ia) {
619 /* tell ip_input "dont filter" */
620 ip_fw_fwd_addr = dst;
621 if (m->m_pkthdr.rcvif == NULL)
622 m->m_pkthdr.rcvif = ifunit("lo0");
623
624 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
625 m->m_pkthdr.csum_flags |=
626 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
627 m0->m_pkthdr.csum_data = 0xffff;
628 }
629 m->m_pkthdr.csum_flags |=
630 CSUM_IP_CHECKED | CSUM_IP_VALID;
631 ip->ip_len = htons((u_short)ip->ip_len);
632 ip->ip_off = htons((u_short)ip->ip_off);
633
634
635 ip_input(m);
636 goto done;
637 }
638 /* Some of the logic for this was
639 * nicked from above.
640 *
641 * This rewrites the cached route in a local PCB.
642 * Is this what we want to do?
643 */
644 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
645
646 ro_fwd->ro_rt = 0;
647 rtalloc_ign(ro_fwd, RTF_PRCLONING);
648
649 if (ro_fwd->ro_rt == 0) {
650 ipstat.ips_noroute++;
651 error = EHOSTUNREACH;
652 goto bad;
653 }
654
655 ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
656 ifp = ro_fwd->ro_rt->rt_ifp;
657 dl_tag = ro->ro_rt->rt_dlt;
658 ro_fwd->ro_rt->rt_use++;
659 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
660 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
661 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
662 isbroadcast =
663 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
664 else
665 isbroadcast = in_broadcast(dst->sin_addr, ifp);
666 RTFREE(ro->ro_rt);
667 ro->ro_rt = ro_fwd->ro_rt;
668 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
669
670 /*
671 * If we added a default src ip earlier,
672 * which would have been gotten from the-then
673 * interface, do it again, from the new one.
674 */
675 if (fwd_rewrite_src)
676 ip->ip_src = IA_SIN(ia)->sin_addr;
677 goto pass ;
678 }
679 #endif /* IPFIREWALL_FORWARD */
680 /*
681 * if we get here, none of the above matches, and
682 * we have to drop the pkt
683 */
684 m_freem(m);
685 error = EACCES; /* not sure this is the right error msg */
686 goto done;
687 }
688 #endif /* COMPAT_IPFW */
689
690 pass:
691
692 #if defined(PM)
693 /*
694 * Processing IP filter/NAT.
695 * Return TRUE iff this packet is discarded.
696 * Return FALSE iff this packet is accepted.
697 */
698
699 if (doNatFil && pm_out(ro->ro_rt->rt_ifp, ip, m))
700 goto done;
701 #endif
702
703 #if IPSEC
704 /* get SP for this packet */
705 if (so == NULL)
706 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
707 else
708 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
709
710 if (sp == NULL) {
711 ipsecstat.out_inval++;
712 goto bad;
713 }
714
715 error = 0;
716
717 /* check policy */
718 switch (sp->policy) {
719 case IPSEC_POLICY_DISCARD:
720 /*
721 * This packet is just discarded.
722 */
723 ipsecstat.out_polvio++;
724 goto bad;
725
726 case IPSEC_POLICY_BYPASS:
727 case IPSEC_POLICY_NONE:
728 /* no need to do IPsec. */
729 goto skip_ipsec;
730
731 case IPSEC_POLICY_IPSEC:
732 if (sp->req == NULL) {
733 /* XXX should be panic ? */
734 printf("ip_output: No IPsec request specified.\n");
735 error = EINVAL;
736 goto bad;
737 }
738 break;
739
740 case IPSEC_POLICY_ENTRUST:
741 default:
742 printf("ip_output: Invalid policy found. %d\n", sp->policy);
743 }
744
745
746 {
747 struct ipsec_output_state state;
748 bzero(&state, sizeof(state));
749 state.m = m;
750 if (flags & IP_ROUTETOIF) {
751 state.ro = &iproute;
752 bzero(&iproute, sizeof(iproute));
753 } else
754 state.ro = ro;
755 state.dst = (struct sockaddr *)dst;
756
757 ip->ip_sum = 0;
758
759 /*
760 * delayed checksums are not currently compatible with IPsec
761 */
762 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
763 in_delayed_cksum(m);
764 if (m == NULL)
765 return(ENOMEM);
766 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
767 }
768
769 ip->ip_len = htons((u_short)ip->ip_len);
770 ip->ip_off = htons((u_short)ip->ip_off);
771
772 error = ipsec4_output(&state, sp, flags);
773
774 m = state.m;
775 if (flags & IP_ROUTETOIF) {
776 /*
777 * if we have tunnel mode SA, we may need to ignore
778 * IP_ROUTETOIF.
779 */
780 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
781 flags &= ~IP_ROUTETOIF;
782 ro = state.ro;
783 }
784 } else
785 ro = state.ro;
786 dst = (struct sockaddr_in *)state.dst;
787 if (error) {
788 /* mbuf is already reclaimed in ipsec4_output. */
789 m0 = NULL;
790 switch (error) {
791 case EHOSTUNREACH:
792 case ENETUNREACH:
793 case EMSGSIZE:
794 case ENOBUFS:
795 case ENOMEM:
796 break;
797 default:
798 printf("ip4_output (ipsec): error code %d\n", error);
799 /*fall through*/
800 case ENOENT:
801 /* don't show these error codes to the user */
802 error = 0;
803 break;
804 }
805 goto bad;
806 }
807 }
808
809 /* be sure to update variables that are affected by ipsec4_output() */
810 ip = mtod(m, struct ip *);
811 #ifdef _IP_VHL
812 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
813 #else
814 hlen = ip->ip_hl << 2;
815 #endif
816 if (ro->ro_rt == NULL) {
817 if ((flags & IP_ROUTETOIF) == 0) {
818 printf("ip_output: "
819 "can't update route after IPsec processing\n");
820 error = EHOSTUNREACH; /*XXX*/
821 goto bad;
822 }
823 } else {
824 /* nobody uses ia beyond here */
825 ifp = ro->ro_rt->rt_ifp;
826 }
827
828 /* make it flipped, again. */
829 ip->ip_len = ntohs((u_short)ip->ip_len);
830 ip->ip_off = ntohs((u_short)ip->ip_off);
831 skip_ipsec:
832 #endif /*IPSEC*/
833
834
835 sw_csum = m->m_pkthdr.csum_flags | CSUM_IP;
836
837
838 /* frames that can be checksumed by GMACE SUM16 HW: frame >64, no fragments, no UDP odd length */
839
840 if (apple_hwcksum_tx && (sw_csum & CSUM_DELAY_DATA) && (ifp->if_hwassist & CSUM_TCP_SUM16)
841 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)
842 && !((ip->ip_len & 0x1) && (sw_csum & CSUM_UDP)) ) {
843
844 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
845 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
846 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
847 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
848 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
849 m->m_pkthdr.csum_data += offset;
850 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
851 }
852 else {
853 if (ifp->if_hwassist & CSUM_TCP_SUM16) /* force SW checksuming */
854 m->m_pkthdr.csum_flags = 0;
855 else { /* not Apple enet */
856 m->m_pkthdr.csum_flags = sw_csum & ifp->if_hwassist;
857 sw_csum &= ~ifp->if_hwassist;
858 }
859
860 if (sw_csum & CSUM_DELAY_DATA) { /* perform TCP/UDP checksuming now */
861 in_delayed_cksum(m);
862 if (m == NULL)
863 return(ENOMEM);
864 sw_csum &= ~CSUM_DELAY_DATA;
865 }
866 }
867
868 /*
869 * If small enough for interface, or the interface will take
870 * care of the fragmentation for us, can just send directly.
871 */
872 if ((u_short)ip->ip_len <= ifp->if_mtu ||
873 ifp->if_hwassist & CSUM_FRAGMENT) {
874
875 ip->ip_len = htons((u_short)ip->ip_len);
876 ip->ip_off = htons((u_short)ip->ip_off);
877 ip->ip_sum = 0;
878 if (sw_csum & CSUM_DELAY_IP)
879 ip->ip_sum = in_cksum(m, hlen);
880 error = dlil_output(dl_tag, m, (void *) ro->ro_rt,
881 (struct sockaddr *)dst, 0);
882 goto done;
883 }
884 /*
885 * Too large for interface; fragment if possible.
886 * Must be able to put at least 8 bytes per fragment.
887 */
888 if (ip->ip_off & IP_DF) {
889 error = EMSGSIZE;
890 /*
891 * This case can happen if the user changed the MTU
892 * of an interface after enabling IP on it. Because
893 * most netifs don't keep track of routes pointing to
894 * them, there is no way for one to update all its
895 * routes when the MTU is changed.
896 */
897 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
898 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
899 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
900 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
901 }
902 ipstat.ips_cantfrag++;
903 goto bad;
904 }
905 len = (ifp->if_mtu - hlen) &~ 7;
906 if (len < 8) {
907 error = EMSGSIZE;
908 goto bad;
909 }
910
911 /*
912 * if the interface will not calculate checksums on
913 * fragmented packets, then do it here.
914 */
915 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
916 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
917 in_delayed_cksum(m);
918 if (m == NULL)
919 return(ENOMEM);
920 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
921 }
922
923
924 {
925 int mhlen, firstlen = len;
926 struct mbuf **mnext = &m->m_nextpkt;
927 int nfrags = 1;
928
929
930 /*
931 * Loop through length of segment after first fragment,
932 * make new header and copy data of each part and link onto chain.
933 */
934 m0 = m;
935 mhlen = sizeof (struct ip);
936 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
937 MGETHDR(m, M_DONTWAIT, MT_HEADER);
938 if (m == 0) {
939 error = ENOBUFS;
940 ipstat.ips_odropped++;
941 goto sendorfree;
942 }
943 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
944 m->m_data += max_linkhdr;
945 mhip = mtod(m, struct ip *);
946 *mhip = *ip;
947 if (hlen > sizeof (struct ip)) {
948 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
949 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
950 }
951 m->m_len = mhlen;
952 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
953 if (ip->ip_off & IP_MF)
954 mhip->ip_off |= IP_MF;
955 if (off + len >= (u_short)ip->ip_len)
956 len = (u_short)ip->ip_len - off;
957 else
958 mhip->ip_off |= IP_MF;
959 mhip->ip_len = htons((u_short)(len + mhlen));
960 m->m_next = m_copy(m0, off, len);
961 if (m->m_next == 0) {
962 (void) m_free(m);
963 error = ENOBUFS; /* ??? */
964 ipstat.ips_odropped++;
965 goto sendorfree;
966 }
967 m->m_pkthdr.len = mhlen + len;
968 m->m_pkthdr.rcvif = (struct ifnet *)0;
969 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
970 mhip->ip_off = htons((u_short)mhip->ip_off);
971 mhip->ip_sum = 0;
972 if (sw_csum & CSUM_DELAY_IP)
973 mhip->ip_sum = in_cksum(m, mhlen);
974 *mnext = m;
975 mnext = &m->m_nextpkt;
976 nfrags++;
977 }
978 ipstat.ips_ofragments += nfrags;
979
980 /* set first/last markers for fragment chain */
981 m0->m_flags |= M_FRAG;
982 m0->m_pkthdr.csum_data = nfrags;
983
984 /*
985 * Update first fragment by trimming what's been copied out
986 * and updating header, then send each fragment (in order).
987 */
988 m = m0;
989 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
990 m->m_pkthdr.len = hlen + firstlen;
991 ip->ip_len = htons((u_short)m->m_pkthdr.len);
992 ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
993 ip->ip_sum = 0;
994 if (sw_csum & CSUM_DELAY_IP)
995 ip->ip_sum = in_cksum(m, hlen);
996
997 sendorfree:
998
999 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1000 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1001
1002 for (m = m0; m; m = m0) {
1003 m0 = m->m_nextpkt;
1004 m->m_nextpkt = 0;
1005 if (error == 0)
1006 error = dlil_output(dl_tag, m, (void *) ro->ro_rt,
1007 (struct sockaddr *)dst, 0);
1008 else
1009 m_freem(m);
1010 }
1011
1012 if (error == 0)
1013 ipstat.ips_fragmented++;
1014 }
1015 done:
1016 #if IPSEC
1017 if (ro == &iproute && ro->ro_rt) {
1018 RTFREE(ro->ro_rt);
1019 ro->ro_rt = NULL;
1020 }
1021 if (sp != NULL) {
1022 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1023 printf("DP ip_output call free SP:%x\n", sp));
1024 key_freesp(sp);
1025 }
1026 #endif /* IPSEC */
1027
1028 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1029 return (error);
1030 bad:
1031 m_freem(m0);
1032 goto done;
1033 }
1034
1035 extern u_short in_chksum_skip(struct mbuf *, int, int);
1036
1037 void
1038 in_delayed_cksum(struct mbuf *m)
1039 {
1040 struct ip *ip;
1041 u_short csum, csum2, offset;
1042
1043 ip = mtod(m, struct ip *);
1044 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1045
1046 csum = in_cksum_skip(m, ip->ip_len, offset);
1047
1048 if ((m->m_pkthdr.csum_flags & CSUM_UDP) && csum == 0)
1049 csum = 0xffff;
1050
1051 offset += m->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1052
1053 if (offset > ip->ip_len) /* bogus offset */
1054 return;
1055
1056 if (offset + sizeof(u_short) > m->m_len) {
1057 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1058 m->m_len, offset, ip->ip_p);
1059 /*
1060 * XXX
1061 * this shouldn't happen, but if it does, the
1062 * correct behavior may be to insert the checksum
1063 * in the existing chain instead of rearranging it.
1064 */
1065 if (m = m_pullup(m, offset + sizeof(u_short)) == 0)
1066 return;
1067 }
1068
1069 *(u_short *)(m->m_data + offset) = csum;
1070 }
1071
1072 /*
1073 * Insert IP options into preformed packet.
1074 * Adjust IP destination as required for IP source routing,
1075 * as indicated by a non-zero in_addr at the start of the options.
1076 *
1077 * XXX This routine assumes that the packet has no options in place.
1078 */
1079 static struct mbuf *
1080 ip_insertoptions(m, opt, phlen)
1081 register struct mbuf *m;
1082 struct mbuf *opt;
1083 int *phlen;
1084 {
1085 register struct ipoption *p = mtod(opt, struct ipoption *);
1086 struct mbuf *n;
1087 register struct ip *ip = mtod(m, struct ip *);
1088 unsigned optlen;
1089
1090 optlen = opt->m_len - sizeof(p->ipopt_dst);
1091 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1092 return (m); /* XXX should fail */
1093 if (p->ipopt_dst.s_addr)
1094 ip->ip_dst = p->ipopt_dst;
1095 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1096 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1097 if (n == 0)
1098 return (m);
1099 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1100 m->m_len -= sizeof(struct ip);
1101 m->m_data += sizeof(struct ip);
1102 n->m_next = m;
1103 m = n;
1104 m->m_len = optlen + sizeof(struct ip);
1105 m->m_data += max_linkhdr;
1106 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1107 } else {
1108 m->m_data -= optlen;
1109 m->m_len += optlen;
1110 m->m_pkthdr.len += optlen;
1111 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1112 }
1113 ip = mtod(m, struct ip *);
1114 bcopy(p->ipopt_list, ip + 1, optlen);
1115 *phlen = sizeof(struct ip) + optlen;
1116 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1117 ip->ip_len += optlen;
1118 return (m);
1119 }
1120
1121 /*
1122 * Copy options from ip to jp,
1123 * omitting those not copied during fragmentation.
1124 */
1125 int
1126 ip_optcopy(ip, jp)
1127 struct ip *ip, *jp;
1128 {
1129 register u_char *cp, *dp;
1130 int opt, optlen, cnt;
1131
1132 cp = (u_char *)(ip + 1);
1133 dp = (u_char *)(jp + 1);
1134 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1135 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1136 opt = cp[0];
1137 if (opt == IPOPT_EOL)
1138 break;
1139 if (opt == IPOPT_NOP) {
1140 /* Preserve for IP mcast tunnel's LSRR alignment. */
1141 *dp++ = IPOPT_NOP;
1142 optlen = 1;
1143 continue;
1144 } else
1145 optlen = cp[IPOPT_OLEN];
1146 /* bogus lengths should have been caught by ip_dooptions */
1147 if (optlen > cnt)
1148 optlen = cnt;
1149 if (IPOPT_COPIED(opt)) {
1150 bcopy(cp, dp, optlen);
1151 dp += optlen;
1152 }
1153 }
1154 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1155 *dp++ = IPOPT_EOL;
1156 return (optlen);
1157 }
1158
1159 /*
1160 * IP socket option processing.
1161 */
1162 int
1163 ip_ctloutput(so, sopt)
1164 struct socket *so;
1165 struct sockopt *sopt;
1166 {
1167 struct inpcb *inp = sotoinpcb(so);
1168 int error, optval;
1169
1170 error = optval = 0;
1171 if (sopt->sopt_level != IPPROTO_IP) {
1172 return (EINVAL);
1173 }
1174
1175 switch (sopt->sopt_dir) {
1176 case SOPT_SET:
1177 switch (sopt->sopt_name) {
1178 case IP_OPTIONS:
1179 #ifdef notyet
1180 case IP_RETOPTS:
1181 #endif
1182 {
1183 struct mbuf *m;
1184 if (sopt->sopt_valsize > MLEN) {
1185 error = EMSGSIZE;
1186 break;
1187 }
1188 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1189 if (m == 0) {
1190 error = ENOBUFS;
1191 break;
1192 }
1193 m->m_len = sopt->sopt_valsize;
1194 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1195 m->m_len);
1196 if (error)
1197 break;
1198
1199 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1200 m));
1201 }
1202
1203 case IP_TOS:
1204 case IP_TTL:
1205 case IP_RECVOPTS:
1206 case IP_RECVRETOPTS:
1207 case IP_RECVDSTADDR:
1208 case IP_RECVIF:
1209 case IP_FAITH:
1210 error = sooptcopyin(sopt, &optval, sizeof optval,
1211 sizeof optval);
1212 if (error)
1213 break;
1214
1215 switch (sopt->sopt_name) {
1216 case IP_TOS:
1217 inp->inp_ip_tos = optval;
1218 break;
1219
1220 case IP_TTL:
1221 inp->inp_ip_ttl = optval;
1222 break;
1223 #define OPTSET(bit) \
1224 if (optval) \
1225 inp->inp_flags |= bit; \
1226 else \
1227 inp->inp_flags &= ~bit;
1228
1229 case IP_RECVOPTS:
1230 OPTSET(INP_RECVOPTS);
1231 break;
1232
1233 case IP_RECVRETOPTS:
1234 OPTSET(INP_RECVRETOPTS);
1235 break;
1236
1237 case IP_RECVDSTADDR:
1238 OPTSET(INP_RECVDSTADDR);
1239 break;
1240
1241 case IP_RECVIF:
1242 OPTSET(INP_RECVIF);
1243 break;
1244
1245 case IP_FAITH:
1246 OPTSET(INP_FAITH);
1247 break;
1248 }
1249 break;
1250 #undef OPTSET
1251
1252 case IP_MULTICAST_IF:
1253 case IP_MULTICAST_VIF:
1254 case IP_MULTICAST_TTL:
1255 case IP_MULTICAST_LOOP:
1256 case IP_ADD_MEMBERSHIP:
1257 case IP_DROP_MEMBERSHIP:
1258 error = ip_setmoptions(sopt, &inp->inp_moptions);
1259 break;
1260
1261 case IP_PORTRANGE:
1262 error = sooptcopyin(sopt, &optval, sizeof optval,
1263 sizeof optval);
1264 if (error)
1265 break;
1266
1267 switch (optval) {
1268 case IP_PORTRANGE_DEFAULT:
1269 inp->inp_flags &= ~(INP_LOWPORT);
1270 inp->inp_flags &= ~(INP_HIGHPORT);
1271 break;
1272
1273 case IP_PORTRANGE_HIGH:
1274 inp->inp_flags &= ~(INP_LOWPORT);
1275 inp->inp_flags |= INP_HIGHPORT;
1276 break;
1277
1278 case IP_PORTRANGE_LOW:
1279 inp->inp_flags &= ~(INP_HIGHPORT);
1280 inp->inp_flags |= INP_LOWPORT;
1281 break;
1282
1283 default:
1284 error = EINVAL;
1285 break;
1286 }
1287 break;
1288
1289 #if IPSEC
1290 case IP_IPSEC_POLICY:
1291 {
1292 caddr_t req = NULL;
1293 size_t len = 0;
1294 int priv;
1295 struct mbuf *m;
1296 int optname;
1297
1298 if (error = sooptgetm(sopt, &m)) /* XXX */
1299 break;
1300 if (error = sooptmcopyin(sopt, m)) /* XXX */
1301 break;
1302 priv = (sopt->sopt_p != NULL &&
1303 suser(sopt->sopt_p->p_ucred,
1304 &sopt->sopt_p->p_acflag) != 0) ? 0 : 1;
1305 if (m) {
1306 req = mtod(m, caddr_t);
1307 len = m->m_len;
1308 }
1309 optname = sopt->sopt_name;
1310 error = ipsec4_set_policy(inp, optname, req, len, priv);
1311 m_freem(m);
1312 break;
1313 }
1314 #endif /*IPSEC*/
1315
1316 default:
1317 error = ENOPROTOOPT;
1318 break;
1319 }
1320 break;
1321
1322 case SOPT_GET:
1323 switch (sopt->sopt_name) {
1324 case IP_OPTIONS:
1325 case IP_RETOPTS:
1326 if (inp->inp_options)
1327 error = sooptcopyout(sopt,
1328 mtod(inp->inp_options,
1329 char *),
1330 inp->inp_options->m_len);
1331 else
1332 sopt->sopt_valsize = 0;
1333 break;
1334
1335 case IP_TOS:
1336 case IP_TTL:
1337 case IP_RECVOPTS:
1338 case IP_RECVRETOPTS:
1339 case IP_RECVDSTADDR:
1340 case IP_RECVIF:
1341 case IP_PORTRANGE:
1342 case IP_FAITH:
1343 switch (sopt->sopt_name) {
1344
1345 case IP_TOS:
1346 optval = inp->inp_ip_tos;
1347 break;
1348
1349 case IP_TTL:
1350 optval = inp->inp_ip_ttl;
1351 break;
1352
1353 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1354
1355 case IP_RECVOPTS:
1356 optval = OPTBIT(INP_RECVOPTS);
1357 break;
1358
1359 case IP_RECVRETOPTS:
1360 optval = OPTBIT(INP_RECVRETOPTS);
1361 break;
1362
1363 case IP_RECVDSTADDR:
1364 optval = OPTBIT(INP_RECVDSTADDR);
1365 break;
1366
1367 case IP_RECVIF:
1368 optval = OPTBIT(INP_RECVIF);
1369 break;
1370
1371 case IP_PORTRANGE:
1372 if (inp->inp_flags & INP_HIGHPORT)
1373 optval = IP_PORTRANGE_HIGH;
1374 else if (inp->inp_flags & INP_LOWPORT)
1375 optval = IP_PORTRANGE_LOW;
1376 else
1377 optval = 0;
1378 break;
1379
1380 case IP_FAITH:
1381 optval = OPTBIT(INP_FAITH);
1382 break;
1383 }
1384 error = sooptcopyout(sopt, &optval, sizeof optval);
1385 break;
1386
1387 case IP_MULTICAST_IF:
1388 case IP_MULTICAST_VIF:
1389 case IP_MULTICAST_TTL:
1390 case IP_MULTICAST_LOOP:
1391 case IP_ADD_MEMBERSHIP:
1392 case IP_DROP_MEMBERSHIP:
1393 error = ip_getmoptions(sopt, inp->inp_moptions);
1394 break;
1395
1396 #if IPSEC
1397 case IP_IPSEC_POLICY:
1398 {
1399 struct mbuf *m = NULL;
1400 size_t len = 0;
1401 caddr_t req = NULL;
1402
1403 if (error = sooptgetm(sopt, &m)) /* XXX */
1404 break;
1405 if (error = sooptmcopyin(sopt, m)) /* XXX */
1406 break;
1407 if (m) {
1408 req = mtod(m, caddr_t);
1409 len = m->m_len;
1410 }
1411
1412 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1413 if (error == 0)
1414 error = sooptmcopyout(sopt, m); /* XXX */
1415
1416 /* if error, m_freem called at soopt_mcopyout(). */
1417 if (error == 0)
1418 m_freem(m);
1419 break;
1420 }
1421 #endif /*IPSEC*/
1422
1423 default:
1424 error = ENOPROTOOPT;
1425 break;
1426 }
1427 break;
1428 }
1429 return (error);
1430 }
1431
1432 /*
1433 * Set up IP options in pcb for insertion in output packets.
1434 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1435 * with destination address if source routed.
1436 */
1437 static int
1438 ip_pcbopts(optname, pcbopt, m)
1439 int optname;
1440 struct mbuf **pcbopt;
1441 register struct mbuf *m;
1442 {
1443 register int cnt, optlen;
1444 register u_char *cp;
1445 u_char opt;
1446
1447 /* turn off any old options */
1448 if (*pcbopt)
1449 (void)m_free(*pcbopt);
1450 *pcbopt = 0;
1451 if (m == (struct mbuf *)0 || m->m_len == 0) {
1452 /*
1453 * Only turning off any previous options.
1454 */
1455 if (m)
1456 (void)m_free(m);
1457 return (0);
1458 }
1459
1460 #ifndef vax
1461 if (m->m_len % sizeof(int32_t))
1462 goto bad;
1463 #endif
1464 /*
1465 * IP first-hop destination address will be stored before
1466 * actual options; move other options back
1467 * and clear it when none present.
1468 */
1469 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1470 goto bad;
1471 cnt = m->m_len;
1472 m->m_len += sizeof(struct in_addr);
1473 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1474 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1475 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1476
1477 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1478 opt = cp[IPOPT_OPTVAL];
1479 if (opt == IPOPT_EOL)
1480 break;
1481 if (opt == IPOPT_NOP)
1482 optlen = 1;
1483 else {
1484 if (cnt < IPOPT_OLEN + sizeof(*cp))
1485 goto bad;
1486 optlen = cp[IPOPT_OLEN];
1487 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1488 goto bad;
1489 }
1490 switch (opt) {
1491
1492 default:
1493 break;
1494
1495 case IPOPT_LSRR:
1496 case IPOPT_SSRR:
1497 /*
1498 * user process specifies route as:
1499 * ->A->B->C->D
1500 * D must be our final destination (but we can't
1501 * check that since we may not have connected yet).
1502 * A is first hop destination, which doesn't appear in
1503 * actual IP option, but is stored before the options.
1504 */
1505 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1506 goto bad;
1507 m->m_len -= sizeof(struct in_addr);
1508 cnt -= sizeof(struct in_addr);
1509 optlen -= sizeof(struct in_addr);
1510 cp[IPOPT_OLEN] = optlen;
1511 /*
1512 * Move first hop before start of options.
1513 */
1514 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1515 sizeof(struct in_addr));
1516 /*
1517 * Then copy rest of options back
1518 * to close up the deleted entry.
1519 */
1520 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1521 sizeof(struct in_addr)),
1522 (caddr_t)&cp[IPOPT_OFFSET+1],
1523 (unsigned)cnt + sizeof(struct in_addr));
1524 break;
1525 }
1526 }
1527 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1528 goto bad;
1529 *pcbopt = m;
1530 return (0);
1531
1532 bad:
1533 (void)m_free(m);
1534 return (EINVAL);
1535 }
1536
1537 /*
1538 * XXX
1539 * The whole multicast option thing needs to be re-thought.
1540 * Several of these options are equally applicable to non-multicast
1541 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1542 * standard option (IP_TTL).
1543 */
1544 /*
1545 * Set the IP multicast options in response to user setsockopt().
1546 */
1547 static int
1548 ip_setmoptions(sopt, imop)
1549 struct sockopt *sopt;
1550 struct ip_moptions **imop;
1551 {
1552 int error = 0;
1553 int i;
1554 struct in_addr addr;
1555 struct ip_mreq mreq;
1556 struct ifnet *ifp;
1557 struct ip_moptions *imo = *imop;
1558 struct route ro;
1559 struct sockaddr_in *dst;
1560 int s;
1561
1562 if (imo == NULL) {
1563 /*
1564 * No multicast option buffer attached to the pcb;
1565 * allocate one and initialize to default values.
1566 */
1567 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
1568 M_WAITOK);
1569
1570 if (imo == NULL)
1571 return (ENOBUFS);
1572 *imop = imo;
1573 imo->imo_multicast_ifp = NULL;
1574 imo->imo_multicast_vif = -1;
1575 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1576 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1577 imo->imo_num_memberships = 0;
1578 }
1579
1580 switch (sopt->sopt_name) {
1581 /* store an index number for the vif you wanna use in the send */
1582 case IP_MULTICAST_VIF:
1583 if (legal_vif_num == 0) {
1584 error = EOPNOTSUPP;
1585 break;
1586 }
1587 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1588 if (error)
1589 break;
1590 if (!legal_vif_num(i) && (i != -1)) {
1591 error = EINVAL;
1592 break;
1593 }
1594 imo->imo_multicast_vif = i;
1595 break;
1596
1597 case IP_MULTICAST_IF:
1598 /*
1599 * Select the interface for outgoing multicast packets.
1600 */
1601 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1602 if (error)
1603 break;
1604 /*
1605 * INADDR_ANY is used to remove a previous selection.
1606 * When no interface is selected, a default one is
1607 * chosen every time a multicast packet is sent.
1608 */
1609 if (addr.s_addr == INADDR_ANY) {
1610 imo->imo_multicast_ifp = NULL;
1611 break;
1612 }
1613 /*
1614 * The selected interface is identified by its local
1615 * IP address. Find the interface and confirm that
1616 * it supports multicasting.
1617 */
1618 s = splimp();
1619 INADDR_TO_IFP(addr, ifp);
1620 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1621 splx(s);
1622 error = EADDRNOTAVAIL;
1623 break;
1624 }
1625 imo->imo_multicast_ifp = ifp;
1626 splx(s);
1627 break;
1628
1629 case IP_MULTICAST_TTL:
1630 /*
1631 * Set the IP time-to-live for outgoing multicast packets.
1632 * The original multicast API required a char argument,
1633 * which is inconsistent with the rest of the socket API.
1634 * We allow either a char or an int.
1635 */
1636 if (sopt->sopt_valsize == 1) {
1637 u_char ttl;
1638 error = sooptcopyin(sopt, &ttl, 1, 1);
1639 if (error)
1640 break;
1641 imo->imo_multicast_ttl = ttl;
1642 } else {
1643 u_int ttl;
1644 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1645 sizeof ttl);
1646 if (error)
1647 break;
1648 if (ttl > 255)
1649 error = EINVAL;
1650 else
1651 imo->imo_multicast_ttl = ttl;
1652 }
1653 break;
1654
1655 case IP_MULTICAST_LOOP:
1656 /*
1657 * Set the loopback flag for outgoing multicast packets.
1658 * Must be zero or one. The original multicast API required a
1659 * char argument, which is inconsistent with the rest
1660 * of the socket API. We allow either a char or an int.
1661 */
1662 if (sopt->sopt_valsize == 1) {
1663 u_char loop;
1664 error = sooptcopyin(sopt, &loop, 1, 1);
1665 if (error)
1666 break;
1667 imo->imo_multicast_loop = !!loop;
1668 } else {
1669 u_int loop;
1670 error = sooptcopyin(sopt, &loop, sizeof loop,
1671 sizeof loop);
1672 if (error)
1673 break;
1674 imo->imo_multicast_loop = !!loop;
1675 }
1676 break;
1677
1678 case IP_ADD_MEMBERSHIP:
1679 /*
1680 * Add a multicast group membership.
1681 * Group must be a valid IP multicast address.
1682 */
1683 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1684 if (error)
1685 break;
1686
1687 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1688 error = EINVAL;
1689 break;
1690 }
1691 s = splimp();
1692 /*
1693 * If no interface address was provided, use the interface of
1694 * the route to the given multicast address.
1695 */
1696 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1697 bzero((caddr_t)&ro, sizeof(ro));
1698 dst = (struct sockaddr_in *)&ro.ro_dst;
1699 dst->sin_len = sizeof(*dst);
1700 dst->sin_family = AF_INET;
1701 dst->sin_addr = mreq.imr_multiaddr;
1702 rtalloc(&ro);
1703 if (ro.ro_rt == NULL) {
1704 error = EADDRNOTAVAIL;
1705 splx(s);
1706 break;
1707 }
1708 ifp = ro.ro_rt->rt_ifp;
1709 rtfree(ro.ro_rt);
1710 }
1711 else {
1712 INADDR_TO_IFP(mreq.imr_interface, ifp);
1713 }
1714
1715 /*
1716 * See if we found an interface, and confirm that it
1717 * supports multicast.
1718 */
1719 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1720 error = EADDRNOTAVAIL;
1721 splx(s);
1722 break;
1723 }
1724 /*
1725 * See if the membership already exists or if all the
1726 * membership slots are full.
1727 */
1728 for (i = 0; i < imo->imo_num_memberships; ++i) {
1729 if (imo->imo_membership[i]->inm_ifp == ifp &&
1730 imo->imo_membership[i]->inm_addr.s_addr
1731 == mreq.imr_multiaddr.s_addr)
1732 break;
1733 }
1734 if (i < imo->imo_num_memberships) {
1735 error = EADDRINUSE;
1736 splx(s);
1737 break;
1738 }
1739 if (i == IP_MAX_MEMBERSHIPS) {
1740 error = ETOOMANYREFS;
1741 splx(s);
1742 break;
1743 }
1744 /*
1745 * Everything looks good; add a new record to the multicast
1746 * address list for the given interface.
1747 */
1748 if ((imo->imo_membership[i] =
1749 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1750 error = ENOBUFS;
1751 splx(s);
1752 break;
1753 }
1754 ++imo->imo_num_memberships;
1755 splx(s);
1756 break;
1757
1758 case IP_DROP_MEMBERSHIP:
1759 /*
1760 * Drop a multicast group membership.
1761 * Group must be a valid IP multicast address.
1762 */
1763 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1764 if (error)
1765 break;
1766
1767 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1768 error = EINVAL;
1769 break;
1770 }
1771
1772 s = splimp();
1773 /*
1774 * If an interface address was specified, get a pointer
1775 * to its ifnet structure.
1776 */
1777 if (mreq.imr_interface.s_addr == INADDR_ANY)
1778 ifp = NULL;
1779 else {
1780 INADDR_TO_IFP(mreq.imr_interface, ifp);
1781 if (ifp == NULL) {
1782 error = EADDRNOTAVAIL;
1783 splx(s);
1784 break;
1785 }
1786 }
1787 /*
1788 * Find the membership in the membership array.
1789 */
1790 for (i = 0; i < imo->imo_num_memberships; ++i) {
1791 if ((ifp == NULL ||
1792 imo->imo_membership[i]->inm_ifp == ifp) &&
1793 imo->imo_membership[i]->inm_addr.s_addr ==
1794 mreq.imr_multiaddr.s_addr)
1795 break;
1796 }
1797 if (i == imo->imo_num_memberships) {
1798 error = EADDRNOTAVAIL;
1799 splx(s);
1800 break;
1801 }
1802 /*
1803 * Give up the multicast address record to which the
1804 * membership points.
1805 */
1806 in_delmulti(imo->imo_membership[i]);
1807 /*
1808 * Remove the gap in the membership array.
1809 */
1810 for (++i; i < imo->imo_num_memberships; ++i)
1811 imo->imo_membership[i-1] = imo->imo_membership[i];
1812 --imo->imo_num_memberships;
1813 splx(s);
1814 break;
1815
1816 default:
1817 error = EOPNOTSUPP;
1818 break;
1819 }
1820
1821 /*
1822 * If all options have default values, no need to keep the mbuf.
1823 */
1824 if (imo->imo_multicast_ifp == NULL &&
1825 imo->imo_multicast_vif == -1 &&
1826 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1827 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1828 imo->imo_num_memberships == 0) {
1829 FREE(*imop, M_IPMOPTS);
1830 *imop = NULL;
1831 }
1832
1833 return (error);
1834 }
1835
1836 /*
1837 * Return the IP multicast options in response to user getsockopt().
1838 */
1839 static int
1840 ip_getmoptions(sopt, imo)
1841 struct sockopt *sopt;
1842 register struct ip_moptions *imo;
1843 {
1844 struct in_addr addr;
1845 struct in_ifaddr *ia;
1846 int error, optval;
1847 u_char coptval;
1848
1849 error = 0;
1850 switch (sopt->sopt_name) {
1851 case IP_MULTICAST_VIF:
1852 if (imo != NULL)
1853 optval = imo->imo_multicast_vif;
1854 else
1855 optval = -1;
1856 error = sooptcopyout(sopt, &optval, sizeof optval);
1857 break;
1858
1859 case IP_MULTICAST_IF:
1860 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1861 addr.s_addr = INADDR_ANY;
1862 else {
1863 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1864 addr.s_addr = (ia == NULL) ? INADDR_ANY
1865 : IA_SIN(ia)->sin_addr.s_addr;
1866 }
1867 error = sooptcopyout(sopt, &addr, sizeof addr);
1868 break;
1869
1870 case IP_MULTICAST_TTL:
1871 if (imo == 0)
1872 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1873 else
1874 optval = coptval = imo->imo_multicast_ttl;
1875 if (sopt->sopt_valsize == 1)
1876 error = sooptcopyout(sopt, &coptval, 1);
1877 else
1878 error = sooptcopyout(sopt, &optval, sizeof optval);
1879 break;
1880
1881 case IP_MULTICAST_LOOP:
1882 if (imo == 0)
1883 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1884 else
1885 optval = coptval = imo->imo_multicast_loop;
1886 if (sopt->sopt_valsize == 1)
1887 error = sooptcopyout(sopt, &coptval, 1);
1888 else
1889 error = sooptcopyout(sopt, &optval, sizeof optval);
1890 break;
1891
1892 default:
1893 error = ENOPROTOOPT;
1894 break;
1895 }
1896 return (error);
1897 }
1898
1899 /*
1900 * Discard the IP multicast options.
1901 */
1902 void
1903 ip_freemoptions(imo)
1904 register struct ip_moptions *imo;
1905 {
1906 register int i;
1907
1908 if (imo != NULL) {
1909 for (i = 0; i < imo->imo_num_memberships; ++i)
1910 in_delmulti(imo->imo_membership[i]);
1911 FREE(imo, M_IPMOPTS);
1912 }
1913 }
1914
1915 /*
1916 * Routine called from ip_output() to loop back a copy of an IP multicast
1917 * packet to the input queue of a specified interface. Note that this
1918 * calls the output routine of the loopback "driver", but with an interface
1919 * pointer that might NOT be a loopback interface -- evil, but easier than
1920 * replicating that code here.
1921 */
1922 static void
1923 ip_mloopback(ifp, m, dst, hlen)
1924 struct ifnet *ifp;
1925 register struct mbuf *m;
1926 register struct sockaddr_in *dst;
1927 int hlen;
1928 {
1929 register struct ip *ip;
1930 struct mbuf *copym;
1931
1932 copym = m_copy(m, 0, M_COPYALL);
1933 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1934 copym = m_pullup(copym, hlen);
1935 if (copym != NULL) {
1936 /*
1937 * We don't bother to fragment if the IP length is greater
1938 * than the interface's MTU. Can this possibly matter?
1939 */
1940 ip = mtod(copym, struct ip *);
1941 ip->ip_len = htons((u_short)ip->ip_len);
1942 ip->ip_off = htons((u_short)ip->ip_off);
1943 ip->ip_sum = 0;
1944 ip->ip_sum = in_cksum(copym, hlen);
1945
1946 /*
1947 * NB:
1948 * It's not clear whether there are any lingering
1949 * reentrancy problems in other areas which might
1950 * be exposed by using ip_input directly (in
1951 * particular, everything which modifies the packet
1952 * in-place). Yet another option is using the
1953 * protosw directly to deliver the looped back
1954 * packet. For the moment, we'll err on the side
1955 * of safety by using if_simloop().
1956 */
1957 #if 1 /* XXX */
1958 if (dst->sin_family != AF_INET) {
1959 printf("ip_mloopback: bad address family %d\n",
1960 dst->sin_family);
1961 dst->sin_family = AF_INET;
1962 }
1963 #endif
1964
1965
1966 /*
1967 * Mark checksum as valid or calculate checksum for loopback.
1968 *
1969 * This is done this way because we have to embed the ifp of
1970 * the interface we will send the original copy of the packet
1971 * out on in the mbuf. ip_input will check if_hwassist of the
1972 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
1973 * The UDP checksum has not been calculated yet.
1974 */
1975 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1976 if (ifp->if_hwassist) {
1977 copym->m_pkthdr.csum_flags |=
1978 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
1979 CSUM_IP_CHECKED | CSUM_IP_VALID;
1980 copym->m_pkthdr.csum_data = 0xffff;
1981 } else
1982 in_delayed_cksum(copym);
1983 }
1984
1985
1986 /*
1987 * TedW:
1988 * We need to send all loopback traffic down to dlil in case
1989 * a filter has tapped-in.
1990 */
1991
1992 if (lo_dl_tag == 0)
1993 dlil_find_dltag(APPLE_IF_FAM_LOOPBACK, 0, PF_INET, &lo_dl_tag);
1994
1995 /*
1996 * Stuff the 'real' ifp into the pkthdr, to be used in matching
1997 * in ip_input(); we need the loopback ifp/dl_tag passed as args
1998 * to make the loopback driver compliant with the data link
1999 * requirements.
2000 */
2001 if (lo_dl_tag)
2002 { copym->m_pkthdr.rcvif = ifp;
2003 dlil_output(lo_dl_tag, copym, 0, (struct sockaddr *) dst, 0);
2004 } else {
2005 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
2006 m_freem(copym);
2007 }
2008
2009 /* if_simloop(ifp, copym, (struct sockaddr *)dst, 0);*/
2010 }
2011 }