]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
e365ce7f187e1914362a842ec7847599611890d3
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /*
26 * Copyright (c) 1982, 1986, 1988, 1990, 1993
27 * The Regents of the University of California. All rights reserved.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions
31 * are met:
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * 2. Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in the
36 * documentation and/or other materials provided with the distribution.
37 * 3. All advertising materials mentioning features or use of this software
38 * must display the following acknowledgement:
39 * This product includes software developed by the University of
40 * California, Berkeley and its contributors.
41 * 4. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
58 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
59 */
60
61 #define _IP_VHL
62
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/kernel.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/protosw.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71
72 #include <net/if.h>
73 #include <net/route.h>
74
75 #include <netinet/in.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/in_pcb.h>
79 #include <netinet/in_var.h>
80 #include <netinet/ip_var.h>
81
82 #include "faith.h"
83
84 #include <net/dlil.h>
85 #include <sys/kdebug.h>
86
87 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
88 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
89 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
90
91
92 #if vax
93 #include <machine/mtpr.h>
94 #endif
95
96 #if __FreeBSD__
97 #include <machine/in_cksum.h>
98
99 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
100 #endif
101
102 #if IPSEC
103 #include <netinet6/ipsec.h>
104 #include <netkey/key.h>
105 #if IPSEC_DEBUG
106 #include <netkey/key_debug.h>
107 #else
108 #define KEYDEBUG(lev,arg)
109 #endif
110 #endif /*IPSEC*/
111
112 #include <netinet/ip_fw.h>
113
114 #if DUMMYNET
115 #include <netinet/ip_dummynet.h>
116 #endif
117
118 #if IPFIREWALL_FORWARD_DEBUG
119 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
120 (ntohl(a.s_addr)>>16)&0xFF,\
121 (ntohl(a.s_addr)>>8)&0xFF,\
122 (ntohl(a.s_addr))&0xFF);
123 #endif
124
125 u_short ip_id;
126
127 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
128 static struct ifnet *ip_multicast_if __P((struct in_addr *, int *));
129 static void ip_mloopback
130 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
131 static int ip_getmoptions
132 __P((struct sockopt *, struct ip_moptions *));
133 static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
134 static int ip_setmoptions
135 __P((struct sockopt *, struct ip_moptions **));
136
137 int ip_optcopy __P((struct ip *, struct ip *));
138 extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
139 #ifdef __APPLE__
140 extern struct mbuf* m_dup(register struct mbuf *m, int how);
141 #endif
142
143 static u_long lo_dl_tag = 0;
144
145 void in_delayed_cksum(struct mbuf *m);
146 extern int apple_hwcksum_tx;
147
148 extern struct protosw inetsw[];
149
150 extern struct ip_linklocal_stat ip_linklocal_stat;
151
152 /* temporary: for testing */
153 #if IPSEC
154 extern int ipsec_bypass;
155 #endif
156
157 /*
158 * IP output. The packet in mbuf chain m contains a skeletal IP
159 * header (with len, off, ttl, proto, tos, src, dst).
160 * The mbuf chain containing the packet will be freed.
161 * The mbuf opt, if present, will not be freed.
162 */
163 int
164 ip_output(m0, opt, ro, flags, imo)
165 struct mbuf *m0;
166 struct mbuf *opt;
167 struct route *ro;
168 int flags;
169 struct ip_moptions *imo;
170 {
171 struct ip *ip, *mhip;
172 struct ifnet *ifp;
173 u_long dl_tag;
174 struct mbuf *m = m0;
175 int hlen = sizeof (struct ip);
176 int len, off, error = 0;
177 struct sockaddr_in *dst;
178 struct in_ifaddr *ia = NULL;
179 int isbroadcast, sw_csum;
180 #if IPSEC
181 struct route iproute;
182 struct socket *so = NULL;
183 struct secpolicy *sp = NULL;
184 #endif
185 u_int16_t divert_cookie; /* firewall cookie */
186 #if IPFIREWALL_FORWARD
187 int fwd_rewrite_src = 0;
188 #endif
189 struct ip_fw_chain *rule = NULL;
190
191 #if IPDIVERT
192 /* Get and reset firewall cookie */
193 divert_cookie = ip_divert_cookie;
194 ip_divert_cookie = 0;
195 #else
196 divert_cookie = 0;
197 #endif
198
199 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
200
201 #if IPFIREWALL && DUMMYNET
202 /*
203 * dummynet packet are prepended a vestigial mbuf with
204 * m_type = MT_DUMMYNET and m_data pointing to the matching
205 * rule.
206 */
207 if (m->m_type == MT_DUMMYNET) {
208 /*
209 * the packet was already tagged, so part of the
210 * processing was already done, and we need to go down.
211 * Get parameters from the header.
212 */
213 rule = (struct ip_fw_chain *)(m->m_data) ;
214 opt = NULL ;
215 ro = & ( ((struct dn_pkt *)m)->ro ) ;
216 imo = NULL ;
217 dst = ((struct dn_pkt *)m)->dn_dst ;
218 ifp = ((struct dn_pkt *)m)->ifp ;
219 flags = ((struct dn_pkt *)m)->flags ;
220 m0 = m = m->m_next ;
221 #if IPSEC
222 if (ipsec_bypass == 0) {
223 so = ipsec_getsocket(m);
224 (void)ipsec_setsocket(m, NULL);
225 }
226 #endif
227 ip = mtod(m, struct ip *);
228 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
229 if (ro->ro_rt != NULL)
230 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
231 goto sendit;
232 } else
233 rule = NULL ;
234 #endif
235 #if IPSEC
236 if (ipsec_bypass == 0) {
237 so = ipsec_getsocket(m);
238 (void)ipsec_setsocket(m, NULL);
239 }
240 #endif
241
242 #if DIAGNOSTIC
243 if ((m->m_flags & M_PKTHDR) == 0)
244 panic("ip_output no HDR");
245 if (!ro)
246 panic("ip_output no route, proto = %d",
247 mtod(m, struct ip *)->ip_p);
248 #endif
249 if (opt) {
250 m = ip_insertoptions(m, opt, &len);
251 hlen = len;
252 }
253 ip = mtod(m, struct ip *);
254 /*
255 * Fill in IP header.
256 */
257 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
258 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
259 ip->ip_off &= IP_DF;
260 #if RANDOM_IP_ID
261 ip->ip_id = ip_randomid();
262 #else
263 ip->ip_id = htons(ip_id++);
264 #endif
265 ipstat.ips_localout++;
266 } else {
267 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
268 }
269
270 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
271 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
272
273 dst = (struct sockaddr_in *)&ro->ro_dst;
274 /*
275 * If there is a cached route,
276 * check that it is to the same destination
277 * and is still up. If not, free it and try again.
278 */
279 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
280 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
281 rtfree(ro->ro_rt);
282 ro->ro_rt = (struct rtentry *)0;
283 }
284 if (ro->ro_rt == 0) {
285 dst->sin_family = AF_INET;
286 dst->sin_len = sizeof(*dst);
287 dst->sin_addr = ip->ip_dst;
288 }
289 /*
290 * If routing to interface only,
291 * short circuit routing lookup.
292 */
293 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
294 #define sintosa(sin) ((struct sockaddr *)(sin))
295 if (flags & IP_ROUTETOIF) {
296 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
297 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
298 ipstat.ips_noroute++;
299 error = ENETUNREACH;
300 goto bad;
301 }
302 ifp = ia->ia_ifp;
303 dl_tag = ia->ia_ifa.ifa_dlt;
304 ip->ip_ttl = 1;
305 isbroadcast = in_broadcast(dst->sin_addr, ifp);
306 } else {
307 /*
308 * If this is the case, we probably don't want to allocate
309 * a protocol-cloned route since we didn't get one from the
310 * ULP. This lets TCP do its thing, while not burdening
311 * forwarding or ICMP with the overhead of cloning a route.
312 * Of course, we still want to do any cloning requested by
313 * the link layer, as this is probably required in all cases
314 * for correct operation (as it is for ARP).
315 */
316 if (ro->ro_rt == 0)
317 rtalloc_ign(ro, RTF_PRCLONING);
318 if (ro->ro_rt == 0) {
319 ipstat.ips_noroute++;
320 error = EHOSTUNREACH;
321 goto bad;
322 }
323 ia = ifatoia(ro->ro_rt->rt_ifa);
324 ifp = ro->ro_rt->rt_ifp;
325 dl_tag = ro->ro_rt->rt_dlt;
326 ro->ro_rt->rt_use++;
327 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
328 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
329 if (ro->ro_rt->rt_flags & RTF_HOST)
330 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
331 else
332 isbroadcast = in_broadcast(dst->sin_addr, ifp);
333 }
334 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
335 struct in_multi *inm;
336
337 m->m_flags |= M_MCAST;
338 /*
339 * IP destination address is multicast. Make sure "dst"
340 * still points to the address in "ro". (It may have been
341 * changed to point to a gateway address, above.)
342 */
343 dst = (struct sockaddr_in *)&ro->ro_dst;
344 /*
345 * See if the caller provided any multicast options
346 */
347 if (imo != NULL) {
348 ip->ip_ttl = imo->imo_multicast_ttl;
349 if (imo->imo_multicast_ifp != NULL) {
350 ifp = imo->imo_multicast_ifp;
351 dl_tag = ifp->if_data.default_proto;
352 }
353 if (imo->imo_multicast_vif != -1)
354 ip->ip_src.s_addr =
355 ip_mcast_src(imo->imo_multicast_vif);
356 } else
357 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
358 /*
359 * Confirm that the outgoing interface supports multicast.
360 */
361 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
362 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
363 ipstat.ips_noroute++;
364 error = ENETUNREACH;
365 goto bad;
366 }
367 }
368 /*
369 * If source address not specified yet, use address
370 * of outgoing interface.
371 */
372 if (ip->ip_src.s_addr == INADDR_ANY) {
373 register struct in_ifaddr *ia1;
374
375 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
376 if (ia1->ia_ifp == ifp) {
377 ip->ip_src = IA_SIN(ia1)->sin_addr;
378 break;
379 }
380 }
381
382 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
383 if (inm != NULL &&
384 (imo == NULL || imo->imo_multicast_loop)) {
385 /*
386 * If we belong to the destination multicast group
387 * on the outgoing interface, and the caller did not
388 * forbid loopback, loop back a copy.
389 */
390 ip_mloopback(ifp, m, dst, hlen);
391 }
392 else {
393 /*
394 * If we are acting as a multicast router, perform
395 * multicast forwarding as if the packet had just
396 * arrived on the interface to which we are about
397 * to send. The multicast forwarding function
398 * recursively calls this function, using the
399 * IP_FORWARDING flag to prevent infinite recursion.
400 *
401 * Multicasts that are looped back by ip_mloopback(),
402 * above, will be forwarded by the ip_input() routine,
403 * if necessary.
404 */
405 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
406 /*
407 * Check if rsvp daemon is running. If not, don't
408 * set ip_moptions. This ensures that the packet
409 * is multicast and not just sent down one link
410 * as prescribed by rsvpd.
411 */
412 if (!rsvp_on)
413 imo = NULL;
414 if (ip_mforward(ip, ifp, m, imo) != 0) {
415 m_freem(m);
416 goto done;
417 }
418 }
419 }
420
421 /*
422 * Multicasts with a time-to-live of zero may be looped-
423 * back, above, but must not be transmitted on a network.
424 * Also, multicasts addressed to the loopback interface
425 * are not sent -- the above call to ip_mloopback() will
426 * loop back a copy if this host actually belongs to the
427 * destination group on the loopback interface.
428 */
429 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
430 m_freem(m);
431 goto done;
432 }
433
434 goto sendit;
435 }
436 #ifndef notdef
437 /*
438 * If source address not specified yet, use address
439 * of outgoing interface.
440 */
441 if (ip->ip_src.s_addr == INADDR_ANY) {
442 ip->ip_src = IA_SIN(ia)->sin_addr;
443 #if IPFIREWALL_FORWARD
444 /* Keep note that we did this - if the firewall changes
445 * the next-hop, our interface may change, changing the
446 * default source IP. It's a shame so much effort happens
447 * twice. Oh well.
448 */
449 fwd_rewrite_src++;
450 #endif /* IPFIREWALL_FORWARD */
451 }
452 #endif /* notdef */
453 /*
454 * Verify that we have any chance at all of being able to queue
455 * the packet or packet fragments
456 */
457 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
458 ifp->if_snd.ifq_maxlen) {
459 error = ENOBUFS;
460 goto bad;
461 }
462
463 /*
464 * Look for broadcast address and
465 * and verify user is allowed to send
466 * such a packet.
467 */
468 if (isbroadcast) {
469 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
470 error = EADDRNOTAVAIL;
471 goto bad;
472 }
473 if ((flags & IP_ALLOWBROADCAST) == 0) {
474 error = EACCES;
475 goto bad;
476 }
477 /* don't allow broadcast messages to be fragmented */
478 if ((u_short)ip->ip_len > ifp->if_mtu) {
479 error = EMSGSIZE;
480 goto bad;
481 }
482 m->m_flags |= M_BCAST;
483 } else {
484 m->m_flags &= ~M_BCAST;
485 }
486
487 sendit:
488 /*
489 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
490 */
491 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
492 ip_linklocal_stat.iplls_out_total++;
493 if (ip->ip_ttl != MAXTTL) {
494 ip_linklocal_stat.iplls_out_badttl++;
495 ip->ip_ttl = MAXTTL;
496 }
497 }
498
499 #if IPSEC
500 /* temporary for testing only: bypass ipsec alltogether */
501
502 if (ipsec_bypass != 0)
503 goto skip_ipsec;
504
505 /* get SP for this packet */
506 if (so == NULL)
507 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
508 else
509 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
510
511 if (sp == NULL) {
512 ipsecstat.out_inval++;
513 goto bad;
514 }
515
516 error = 0;
517
518 /* check policy */
519 switch (sp->policy) {
520 case IPSEC_POLICY_DISCARD:
521 /*
522 * This packet is just discarded.
523 */
524 ipsecstat.out_polvio++;
525 goto bad;
526
527 case IPSEC_POLICY_BYPASS:
528 case IPSEC_POLICY_NONE:
529 /* no need to do IPsec. */
530 goto skip_ipsec;
531
532 case IPSEC_POLICY_IPSEC:
533 if (sp->req == NULL) {
534 /* acquire a policy */
535 error = key_spdacquire(sp);
536 goto bad;
537 }
538 break;
539
540 case IPSEC_POLICY_ENTRUST:
541 default:
542 printf("ip_output: Invalid policy found. %d\n", sp->policy);
543 }
544 {
545 struct ipsec_output_state state;
546 bzero(&state, sizeof(state));
547 state.m = m;
548 if (flags & IP_ROUTETOIF) {
549 state.ro = &iproute;
550 bzero(&iproute, sizeof(iproute));
551 } else
552 state.ro = ro;
553 state.dst = (struct sockaddr *)dst;
554
555 ip->ip_sum = 0;
556
557 /*
558 * XXX
559 * delayed checksums are not currently compatible with IPsec
560 */
561 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
562 in_delayed_cksum(m);
563 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
564 }
565
566 HTONS(ip->ip_len);
567 HTONS(ip->ip_off);
568
569 error = ipsec4_output(&state, sp, flags);
570
571 m = state.m;
572 if (flags & IP_ROUTETOIF) {
573 /*
574 * if we have tunnel mode SA, we may need to ignore
575 * IP_ROUTETOIF.
576 */
577 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
578 flags &= ~IP_ROUTETOIF;
579 ro = state.ro;
580 }
581 } else
582 ro = state.ro;
583 dst = (struct sockaddr_in *)state.dst;
584 if (error) {
585 /* mbuf is already reclaimed in ipsec4_output. */
586 m0 = NULL;
587 switch (error) {
588 case EHOSTUNREACH:
589 case ENETUNREACH:
590 case EMSGSIZE:
591 case ENOBUFS:
592 case ENOMEM:
593 break;
594 default:
595 printf("ip4_output (ipsec): error code %d\n", error);
596 /*fall through*/
597 case ENOENT:
598 /* don't show these error codes to the user */
599 error = 0;
600 break;
601 }
602 goto bad;
603 }
604 }
605
606 /* be sure to update variables that are affected by ipsec4_output() */
607 ip = mtod(m, struct ip *);
608 #ifdef _IP_VHL
609 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
610 #else
611 hlen = ip->ip_hl << 2;
612 #endif
613 if (ro->ro_rt == NULL) {
614 if ((flags & IP_ROUTETOIF) == 0) {
615 printf("ip_output: "
616 "can't update route after IPsec processing\n");
617 error = EHOSTUNREACH; /*XXX*/
618 goto bad;
619 }
620 } else {
621 ia = ifatoia(ro->ro_rt->rt_ifa);
622 ifp = ro->ro_rt->rt_ifp;
623 dl_tag = ia->ia_ifa.ifa_dlt;
624 }
625
626 /* make it flipped, again. */
627 NTOHS(ip->ip_len);
628 NTOHS(ip->ip_off);
629 skip_ipsec:
630 #endif /*IPSEC*/
631
632 /*
633 * IpHack's section.
634 * - Xlate: translate packet's addr/port (NAT).
635 * - Firewall: deny/allow/etc.
636 * - Wrap: fake packet's addr/port <unimpl.>
637 * - Encapsulate: put it in another IP and send out. <unimp.>
638 */
639 if (fr_checkp) {
640 struct mbuf *m1 = m;
641
642 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1)
643 goto done;
644 ip = mtod(m = m1, struct ip *);
645 }
646
647 /*
648 * Check with the firewall...
649 */
650 if (fw_enable && ip_fw_chk_ptr) {
651 struct sockaddr_in *old = dst;
652
653 off = (*ip_fw_chk_ptr)(&ip,
654 hlen, ifp, &divert_cookie, &m, &rule, &dst);
655 /*
656 * On return we must do the following:
657 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
658 * 1<=off<= 0xffff -> DIVERT
659 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
660 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
661 * dst != old -> IPFIREWALL_FORWARD
662 * off==0, dst==old -> accept
663 * If some of the above modules is not compiled in, then
664 * we should't have to check the corresponding condition
665 * (because the ipfw control socket should not accept
666 * unsupported rules), but better play safe and drop
667 * packets in case of doubt.
668 */
669 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
670 if (m)
671 m_freem(m);
672 error = EACCES ;
673 goto done ;
674 }
675 ip = mtod(m, struct ip *);
676 if (off == 0 && dst == old) /* common case */
677 goto pass ;
678 #if DUMMYNET
679 if ((off & IP_FW_PORT_DYNT_FLAG) != 0) {
680 /*
681 * pass the pkt to dummynet. Need to include
682 * pipe number, m, ifp, ro, dst because these are
683 * not recomputed in the next pass.
684 * All other parameters have been already used and
685 * so they are not needed anymore.
686 * XXX note: if the ifp or ro entry are deleted
687 * while a pkt is in dummynet, we are in trouble!
688 */
689 error = dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,
690 ifp,ro,dst,rule, flags);
691 goto done;
692 }
693 #endif
694 #if IPDIVERT
695 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
696 struct mbuf *clone = NULL;
697
698 /* Clone packet if we're doing a 'tee' */
699 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
700 clone = m_dup(m, M_DONTWAIT);
701 /*
702 * XXX
703 * delayed checksums are not currently compatible
704 * with divert sockets.
705 */
706 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
707 in_delayed_cksum(m);
708 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
709 }
710
711 /* Restore packet header fields to original values */
712 HTONS(ip->ip_len);
713 HTONS(ip->ip_off);
714
715 /* Deliver packet to divert input routine */
716 ip_divert_cookie = divert_cookie;
717 divert_packet(m, 0, off & 0xffff);
718
719 /* If 'tee', continue with original packet */
720 if (clone != NULL) {
721 m = clone;
722 ip = mtod(m, struct ip *);
723 goto pass;
724 }
725 goto done;
726 }
727 #endif
728
729 #if IPFIREWALL_FORWARD
730 /* Here we check dst to make sure it's directly reachable on the
731 * interface we previously thought it was.
732 * If it isn't (which may be likely in some situations) we have
733 * to re-route it (ie, find a route for the next-hop and the
734 * associated interface) and set them here. This is nested
735 * forwarding which in most cases is undesirable, except where
736 * such control is nigh impossible. So we do it here.
737 * And I'm babbling.
738 */
739 if (off == 0 && old != dst) {
740 struct in_ifaddr *ia;
741
742 /* It's changed... */
743 /* There must be a better way to do this next line... */
744 static struct route sro_fwd, *ro_fwd = &sro_fwd;
745 #if IPFIREWALL_FORWARD_DEBUG
746 printf("IPFIREWALL_FORWARD: New dst ip: ");
747 print_ip(dst->sin_addr);
748 printf("\n");
749 #endif
750 /*
751 * We need to figure out if we have been forwarded
752 * to a local socket. If so then we should somehow
753 * "loop back" to ip_input, and get directed to the
754 * PCB as if we had received this packet. This is
755 * because it may be dificult to identify the packets
756 * you want to forward until they are being output
757 * and have selected an interface. (e.g. locally
758 * initiated packets) If we used the loopback inteface,
759 * we would not be able to control what happens
760 * as the packet runs through ip_input() as
761 * it is done through a ISR.
762 */
763 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
764 /*
765 * If the addr to forward to is one
766 * of ours, we pretend to
767 * be the destination for this packet.
768 */
769 if (IA_SIN(ia)->sin_addr.s_addr ==
770 dst->sin_addr.s_addr)
771 break;
772 }
773 if (ia) {
774 /* tell ip_input "dont filter" */
775 ip_fw_fwd_addr = dst;
776 if (m->m_pkthdr.rcvif == NULL)
777 m->m_pkthdr.rcvif = ifunit("lo0");
778 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
779 m->m_pkthdr.csum_flags |=
780 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
781 m0->m_pkthdr.csum_data = 0xffff;
782 }
783 m->m_pkthdr.csum_flags |=
784 CSUM_IP_CHECKED | CSUM_IP_VALID;
785 HTONS(ip->ip_len);
786 HTONS(ip->ip_off);
787 ip_input(m);
788 goto done;
789 }
790 /* Some of the logic for this was
791 * nicked from above.
792 *
793 * This rewrites the cached route in a local PCB.
794 * Is this what we want to do?
795 */
796 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
797
798 ro_fwd->ro_rt = 0;
799 rtalloc_ign(ro_fwd, RTF_PRCLONING);
800
801 if (ro_fwd->ro_rt == 0) {
802 ipstat.ips_noroute++;
803 error = EHOSTUNREACH;
804 goto bad;
805 }
806
807 ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
808 ifp = ro_fwd->ro_rt->rt_ifp;
809 dl_tag = ro_fwd->ro_rt->rt_dlt;
810 ro_fwd->ro_rt->rt_use++;
811 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
812 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
813 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
814 isbroadcast =
815 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
816 else
817 isbroadcast = in_broadcast(dst->sin_addr, ifp);
818 rtfree(ro->ro_rt);
819 ro->ro_rt = ro_fwd->ro_rt;
820 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
821
822 /*
823 * If we added a default src ip earlier,
824 * which would have been gotten from the-then
825 * interface, do it again, from the new one.
826 */
827 if (fwd_rewrite_src)
828 ip->ip_src = IA_SIN(ia)->sin_addr;
829 goto pass ;
830 }
831 #endif /* IPFIREWALL_FORWARD */
832 /*
833 * if we get here, none of the above matches, and
834 * we have to drop the pkt
835 */
836 m_freem(m);
837 error = EACCES; /* not sure this is the right error msg */
838 goto done;
839 }
840
841 pass:
842 m->m_pkthdr.csum_flags |= CSUM_IP;
843 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
844
845 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
846 /*
847 * Special case code for GMACE
848 * frames that can be checksumed by GMACE SUM16 HW:
849 * frame >64, no fragments, no UDP
850 */
851 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
852 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
853 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
854 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
855 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
856 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
857 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
858 m->m_pkthdr.csum_data += offset;
859 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
860 }
861 else {
862 /* let the software handle any UDP or TCP checksums */
863 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
864 }
865 }
866
867 if (sw_csum & CSUM_DELAY_DATA) {
868 in_delayed_cksum(m);
869 sw_csum &= ~CSUM_DELAY_DATA;
870 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
871 }
872
873 m->m_pkthdr.csum_flags &= ifp->if_hwassist;
874
875 /*
876 * If small enough for interface, or the interface will take
877 * care of the fragmentation for us, can just send directly.
878 */
879 if ((u_short)ip->ip_len <= ifp->if_mtu ||
880 ifp->if_hwassist & CSUM_FRAGMENT) {
881 HTONS(ip->ip_len);
882 HTONS(ip->ip_off);
883 ip->ip_sum = 0;
884 if (sw_csum & CSUM_DELAY_IP) {
885 ip->ip_sum = in_cksum(m, hlen);
886 }
887
888 #ifndef __APPLE__
889 /* Record statistics for this interface address. */
890 if (!(flags & IP_FORWARDING) && ia != NULL) {
891 ia->ia_ifa.if_opackets++;
892 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
893 }
894 #endif
895
896 #if IPSEC
897 /* clean ipsec history once it goes out of the node */
898 if (ipsec_bypass == 0)
899 ipsec_delaux(m);
900 #endif
901 #if __APPLE__
902 error = dlil_output(dl_tag, m, (void *) ro->ro_rt,
903 (struct sockaddr *)dst, 0);
904 #else
905 error = (*ifp->if_output)(ifp, m,
906 (struct sockaddr *)dst, ro->ro_rt);
907 #endif
908 goto done;
909 }
910 /*
911 * Too large for interface; fragment if possible.
912 * Must be able to put at least 8 bytes per fragment.
913 */
914 if (ip->ip_off & IP_DF) {
915 error = EMSGSIZE;
916 /*
917 * This case can happen if the user changed the MTU
918 * of an interface after enabling IP on it. Because
919 * most netifs don't keep track of routes pointing to
920 * them, there is no way for one to update all its
921 * routes when the MTU is changed.
922 */
923 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
924 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
925 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
926 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
927 }
928 ipstat.ips_cantfrag++;
929 goto bad;
930 }
931 len = (ifp->if_mtu - hlen) &~ 7;
932 if (len < 8) {
933 error = EMSGSIZE;
934 goto bad;
935 }
936
937 /*
938 * if the interface will not calculate checksums on
939 * fragmented packets, then do it here.
940 */
941 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
942 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
943 in_delayed_cksum(m);
944 if (m == NULL)
945 return(ENOMEM);
946 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
947 }
948
949
950 {
951 int mhlen, firstlen = len;
952 struct mbuf **mnext = &m->m_nextpkt;
953 int nfrags = 1;
954
955 /*
956 * Loop through length of segment after first fragment,
957 * make new header and copy data of each part and link onto chain.
958 */
959 m0 = m;
960 mhlen = sizeof (struct ip);
961 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
962 MGETHDR(m, M_DONTWAIT, MT_HEADER);
963 if (m == 0) {
964 error = ENOBUFS;
965 ipstat.ips_odropped++;
966 goto sendorfree;
967 }
968 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
969 m->m_data += max_linkhdr;
970 mhip = mtod(m, struct ip *);
971 *mhip = *ip;
972 if (hlen > sizeof (struct ip)) {
973 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
974 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
975 }
976 m->m_len = mhlen;
977 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
978 if (ip->ip_off & IP_MF)
979 mhip->ip_off |= IP_MF;
980 if (off + len >= (u_short)ip->ip_len)
981 len = (u_short)ip->ip_len - off;
982 else
983 mhip->ip_off |= IP_MF;
984 mhip->ip_len = htons((u_short)(len + mhlen));
985 m->m_next = m_copy(m0, off, len);
986 if (m->m_next == 0) {
987 (void) m_free(m);
988 error = ENOBUFS; /* ??? */
989 ipstat.ips_odropped++;
990 goto sendorfree;
991 }
992 m->m_pkthdr.len = mhlen + len;
993 m->m_pkthdr.rcvif = (struct ifnet *)0;
994 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
995 HTONS(mhip->ip_off);
996 mhip->ip_sum = 0;
997 if (sw_csum & CSUM_DELAY_IP) {
998 mhip->ip_sum = in_cksum(m, mhlen);
999 }
1000 *mnext = m;
1001 mnext = &m->m_nextpkt;
1002 nfrags++;
1003 }
1004 ipstat.ips_ofragments += nfrags;
1005
1006 /* set first/last markers for fragment chain */
1007 m->m_flags |= M_LASTFRAG;
1008 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1009 m0->m_pkthdr.csum_data = nfrags;
1010
1011 /*
1012 * Update first fragment by trimming what's been copied out
1013 * and updating header, then send each fragment (in order).
1014 */
1015 m = m0;
1016 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1017 m->m_pkthdr.len = hlen + firstlen;
1018 ip->ip_len = htons((u_short)m->m_pkthdr.len);
1019 ip->ip_off |= IP_MF;
1020 HTONS(ip->ip_off);
1021 ip->ip_sum = 0;
1022 if (sw_csum & CSUM_DELAY_IP) {
1023 ip->ip_sum = in_cksum(m, hlen);
1024 }
1025 sendorfree:
1026
1027 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1028 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1029
1030 for (m = m0; m; m = m0) {
1031 m0 = m->m_nextpkt;
1032 m->m_nextpkt = 0;
1033 #if IPSEC
1034 /* clean ipsec history once it goes out of the node */
1035 if (ipsec_bypass == 0)
1036 ipsec_delaux(m);
1037 #endif
1038 if (error == 0) {
1039 #ifndef __APPLE__
1040 /* Record statistics for this interface address. */
1041 if (ia != NULL) {
1042 ia->ia_ifa.if_opackets++;
1043 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1044 }
1045 #endif
1046
1047 #if __APPLE__
1048 error = dlil_output(dl_tag, m, (void *) ro->ro_rt,
1049 (struct sockaddr *)dst, 0);
1050 #else
1051 error = (*ifp->if_output)(ifp, m,
1052 (struct sockaddr *)dst, ro->ro_rt);
1053 #endif
1054 } else
1055 m_freem(m);
1056 }
1057
1058 if (error == 0)
1059 ipstat.ips_fragmented++;
1060 }
1061 done:
1062 #if IPSEC
1063 if (ipsec_bypass == 0) {
1064 if (ro == &iproute && ro->ro_rt) {
1065 rtfree(ro->ro_rt);
1066 ro->ro_rt = NULL;
1067 }
1068 if (sp != NULL) {
1069 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1070 printf("DP ip_output call free SP:%x\n", sp));
1071 key_freesp(sp);
1072 }
1073 }
1074 #endif /* IPSEC */
1075
1076 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1077 return (error);
1078 bad:
1079 m_freem(m0);
1080 goto done;
1081 }
1082
1083 void
1084 in_delayed_cksum(struct mbuf *m)
1085 {
1086 struct ip *ip;
1087 u_short csum, offset;
1088 ip = mtod(m, struct ip *);
1089 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1090 csum = in_cksum_skip(m, ip->ip_len, offset);
1091 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1092 csum = 0xffff;
1093 offset += m->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1094
1095 if (offset > ip->ip_len) /* bogus offset */
1096 return;
1097
1098 if (offset + sizeof(u_short) > m->m_len) {
1099 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1100 m->m_len, offset, ip->ip_p);
1101 /*
1102 * XXX
1103 * this shouldn't happen, but if it does, the
1104 * correct behavior may be to insert the checksum
1105 * in the existing chain instead of rearranging it.
1106 */
1107 m = m_pullup(m, offset + sizeof(u_short));
1108 }
1109 *(u_short *)(m->m_data + offset) = csum;
1110 }
1111
1112 /*
1113 * Insert IP options into preformed packet.
1114 * Adjust IP destination as required for IP source routing,
1115 * as indicated by a non-zero in_addr at the start of the options.
1116 *
1117 * XXX This routine assumes that the packet has no options in place.
1118 */
1119 static struct mbuf *
1120 ip_insertoptions(m, opt, phlen)
1121 register struct mbuf *m;
1122 struct mbuf *opt;
1123 int *phlen;
1124 {
1125 register struct ipoption *p = mtod(opt, struct ipoption *);
1126 struct mbuf *n;
1127 register struct ip *ip = mtod(m, struct ip *);
1128 unsigned optlen;
1129
1130 optlen = opt->m_len - sizeof(p->ipopt_dst);
1131 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1132 return (m); /* XXX should fail */
1133 if (p->ipopt_dst.s_addr)
1134 ip->ip_dst = p->ipopt_dst;
1135 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1136 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1137 if (n == 0)
1138 return (m);
1139 n->m_pkthdr.rcvif = (struct ifnet *)0;
1140 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1141 m->m_len -= sizeof(struct ip);
1142 m->m_data += sizeof(struct ip);
1143 n->m_next = m;
1144 m = n;
1145 m->m_len = optlen + sizeof(struct ip);
1146 m->m_data += max_linkhdr;
1147 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1148 } else {
1149 m->m_data -= optlen;
1150 m->m_len += optlen;
1151 m->m_pkthdr.len += optlen;
1152 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1153 }
1154 ip = mtod(m, struct ip *);
1155 bcopy(p->ipopt_list, ip + 1, optlen);
1156 *phlen = sizeof(struct ip) + optlen;
1157 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1158 ip->ip_len += optlen;
1159 return (m);
1160 }
1161
1162 /*
1163 * Copy options from ip to jp,
1164 * omitting those not copied during fragmentation.
1165 */
1166 int
1167 ip_optcopy(ip, jp)
1168 struct ip *ip, *jp;
1169 {
1170 register u_char *cp, *dp;
1171 int opt, optlen, cnt;
1172
1173 cp = (u_char *)(ip + 1);
1174 dp = (u_char *)(jp + 1);
1175 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1176 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1177 opt = cp[0];
1178 if (opt == IPOPT_EOL)
1179 break;
1180 if (opt == IPOPT_NOP) {
1181 /* Preserve for IP mcast tunnel's LSRR alignment. */
1182 *dp++ = IPOPT_NOP;
1183 optlen = 1;
1184 continue;
1185 }
1186 #if DIAGNOSTIC
1187 if (cnt < IPOPT_OLEN + sizeof(*cp))
1188 panic("malformed IPv4 option passed to ip_optcopy");
1189 #endif
1190 optlen = cp[IPOPT_OLEN];
1191 #if DIAGNOSTIC
1192 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1193 panic("malformed IPv4 option passed to ip_optcopy");
1194 #endif
1195 /* bogus lengths should have been caught by ip_dooptions */
1196 if (optlen > cnt)
1197 optlen = cnt;
1198 if (IPOPT_COPIED(opt)) {
1199 bcopy(cp, dp, optlen);
1200 dp += optlen;
1201 }
1202 }
1203 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1204 *dp++ = IPOPT_EOL;
1205 return (optlen);
1206 }
1207
1208 /*
1209 * IP socket option processing.
1210 */
1211 int
1212 ip_ctloutput(so, sopt)
1213 struct socket *so;
1214 struct sockopt *sopt;
1215 {
1216 struct inpcb *inp = sotoinpcb(so);
1217 int error, optval;
1218
1219 error = optval = 0;
1220 if (sopt->sopt_level != IPPROTO_IP) {
1221 return (EINVAL);
1222 }
1223
1224 switch (sopt->sopt_dir) {
1225 case SOPT_SET:
1226 switch (sopt->sopt_name) {
1227 case IP_OPTIONS:
1228 #ifdef notyet
1229 case IP_RETOPTS:
1230 #endif
1231 {
1232 struct mbuf *m;
1233 if (sopt->sopt_valsize > MLEN) {
1234 error = EMSGSIZE;
1235 break;
1236 }
1237 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1238 if (m == 0) {
1239 error = ENOBUFS;
1240 break;
1241 }
1242 m->m_len = sopt->sopt_valsize;
1243 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1244 m->m_len);
1245 if (error)
1246 break;
1247
1248 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1249 m));
1250 }
1251
1252 case IP_TOS:
1253 case IP_TTL:
1254 case IP_RECVOPTS:
1255 case IP_RECVRETOPTS:
1256 case IP_RECVDSTADDR:
1257 case IP_RECVIF:
1258 #if defined(NFAITH) && NFAITH > 0
1259 case IP_FAITH:
1260 #endif
1261 error = sooptcopyin(sopt, &optval, sizeof optval,
1262 sizeof optval);
1263 if (error)
1264 break;
1265
1266 switch (sopt->sopt_name) {
1267 case IP_TOS:
1268 inp->inp_ip_tos = optval;
1269 break;
1270
1271 case IP_TTL:
1272 inp->inp_ip_ttl = optval;
1273 break;
1274 #define OPTSET(bit) \
1275 if (optval) \
1276 inp->inp_flags |= bit; \
1277 else \
1278 inp->inp_flags &= ~bit;
1279
1280 case IP_RECVOPTS:
1281 OPTSET(INP_RECVOPTS);
1282 break;
1283
1284 case IP_RECVRETOPTS:
1285 OPTSET(INP_RECVRETOPTS);
1286 break;
1287
1288 case IP_RECVDSTADDR:
1289 OPTSET(INP_RECVDSTADDR);
1290 break;
1291
1292 case IP_RECVIF:
1293 OPTSET(INP_RECVIF);
1294 break;
1295
1296 #if defined(NFAITH) && NFAITH > 0
1297 case IP_FAITH:
1298 OPTSET(INP_FAITH);
1299 break;
1300 #endif
1301 }
1302 break;
1303 #undef OPTSET
1304
1305 case IP_MULTICAST_IF:
1306 case IP_MULTICAST_VIF:
1307 case IP_MULTICAST_TTL:
1308 case IP_MULTICAST_LOOP:
1309 case IP_ADD_MEMBERSHIP:
1310 case IP_DROP_MEMBERSHIP:
1311 error = ip_setmoptions(sopt, &inp->inp_moptions);
1312 break;
1313
1314 case IP_PORTRANGE:
1315 error = sooptcopyin(sopt, &optval, sizeof optval,
1316 sizeof optval);
1317 if (error)
1318 break;
1319
1320 switch (optval) {
1321 case IP_PORTRANGE_DEFAULT:
1322 inp->inp_flags &= ~(INP_LOWPORT);
1323 inp->inp_flags &= ~(INP_HIGHPORT);
1324 break;
1325
1326 case IP_PORTRANGE_HIGH:
1327 inp->inp_flags &= ~(INP_LOWPORT);
1328 inp->inp_flags |= INP_HIGHPORT;
1329 break;
1330
1331 case IP_PORTRANGE_LOW:
1332 inp->inp_flags &= ~(INP_HIGHPORT);
1333 inp->inp_flags |= INP_LOWPORT;
1334 break;
1335
1336 default:
1337 error = EINVAL;
1338 break;
1339 }
1340 break;
1341
1342 #if IPSEC
1343 case IP_IPSEC_POLICY:
1344 {
1345 caddr_t req = NULL;
1346 size_t len = 0;
1347 int priv;
1348 struct mbuf *m;
1349 int optname;
1350
1351 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1352 break;
1353 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1354 break;
1355 priv = (sopt->sopt_p != NULL &&
1356 suser(sopt->sopt_p->p_ucred,
1357 &sopt->sopt_p->p_acflag) != 0) ? 0 : 1;
1358 if (m) {
1359 req = mtod(m, caddr_t);
1360 len = m->m_len;
1361 }
1362 optname = sopt->sopt_name;
1363 error = ipsec4_set_policy(inp, optname, req, len, priv);
1364 m_freem(m);
1365 break;
1366 }
1367 #endif /*IPSEC*/
1368
1369 default:
1370 error = ENOPROTOOPT;
1371 break;
1372 }
1373 break;
1374
1375 case SOPT_GET:
1376 switch (sopt->sopt_name) {
1377 case IP_OPTIONS:
1378 case IP_RETOPTS:
1379 if (inp->inp_options)
1380 error = sooptcopyout(sopt,
1381 mtod(inp->inp_options,
1382 char *),
1383 inp->inp_options->m_len);
1384 else
1385 sopt->sopt_valsize = 0;
1386 break;
1387
1388 case IP_TOS:
1389 case IP_TTL:
1390 case IP_RECVOPTS:
1391 case IP_RECVRETOPTS:
1392 case IP_RECVDSTADDR:
1393 case IP_RECVIF:
1394 case IP_PORTRANGE:
1395 #if defined(NFAITH) && NFAITH > 0
1396 case IP_FAITH:
1397 #endif
1398 switch (sopt->sopt_name) {
1399
1400 case IP_TOS:
1401 optval = inp->inp_ip_tos;
1402 break;
1403
1404 case IP_TTL:
1405 optval = inp->inp_ip_ttl;
1406 break;
1407
1408 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1409
1410 case IP_RECVOPTS:
1411 optval = OPTBIT(INP_RECVOPTS);
1412 break;
1413
1414 case IP_RECVRETOPTS:
1415 optval = OPTBIT(INP_RECVRETOPTS);
1416 break;
1417
1418 case IP_RECVDSTADDR:
1419 optval = OPTBIT(INP_RECVDSTADDR);
1420 break;
1421
1422 case IP_RECVIF:
1423 optval = OPTBIT(INP_RECVIF);
1424 break;
1425
1426 case IP_PORTRANGE:
1427 if (inp->inp_flags & INP_HIGHPORT)
1428 optval = IP_PORTRANGE_HIGH;
1429 else if (inp->inp_flags & INP_LOWPORT)
1430 optval = IP_PORTRANGE_LOW;
1431 else
1432 optval = 0;
1433 break;
1434
1435 #if defined(NFAITH) && NFAITH > 0
1436 case IP_FAITH:
1437 optval = OPTBIT(INP_FAITH);
1438 break;
1439 #endif
1440 }
1441 error = sooptcopyout(sopt, &optval, sizeof optval);
1442 break;
1443
1444 case IP_MULTICAST_IF:
1445 case IP_MULTICAST_VIF:
1446 case IP_MULTICAST_TTL:
1447 case IP_MULTICAST_LOOP:
1448 case IP_ADD_MEMBERSHIP:
1449 case IP_DROP_MEMBERSHIP:
1450 error = ip_getmoptions(sopt, inp->inp_moptions);
1451 break;
1452
1453 #if IPSEC
1454 case IP_IPSEC_POLICY:
1455 {
1456 struct mbuf *m = NULL;
1457 caddr_t req = NULL;
1458 size_t len = 0;
1459
1460 if (m != 0) {
1461 req = mtod(m, caddr_t);
1462 len = m->m_len;
1463 }
1464 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1465 if (error == 0)
1466 error = soopt_mcopyout(sopt, m); /* XXX */
1467 if (error == 0)
1468 m_freem(m);
1469 break;
1470 }
1471 #endif /*IPSEC*/
1472
1473 default:
1474 error = ENOPROTOOPT;
1475 break;
1476 }
1477 break;
1478 }
1479 return (error);
1480 }
1481
1482 /*
1483 * Set up IP options in pcb for insertion in output packets.
1484 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1485 * with destination address if source routed.
1486 */
1487 static int
1488 ip_pcbopts(optname, pcbopt, m)
1489 int optname;
1490 struct mbuf **pcbopt;
1491 register struct mbuf *m;
1492 {
1493 register int cnt, optlen;
1494 register u_char *cp;
1495 u_char opt;
1496
1497 /* turn off any old options */
1498 if (*pcbopt)
1499 (void)m_free(*pcbopt);
1500 *pcbopt = 0;
1501 if (m == (struct mbuf *)0 || m->m_len == 0) {
1502 /*
1503 * Only turning off any previous options.
1504 */
1505 if (m)
1506 (void)m_free(m);
1507 return (0);
1508 }
1509
1510 #ifndef vax
1511 if (m->m_len % sizeof(int32_t))
1512 goto bad;
1513 #endif
1514 /*
1515 * IP first-hop destination address will be stored before
1516 * actual options; move other options back
1517 * and clear it when none present.
1518 */
1519 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1520 goto bad;
1521 cnt = m->m_len;
1522 m->m_len += sizeof(struct in_addr);
1523 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1524 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1525 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1526
1527 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1528 opt = cp[IPOPT_OPTVAL];
1529 if (opt == IPOPT_EOL)
1530 break;
1531 if (opt == IPOPT_NOP)
1532 optlen = 1;
1533 else {
1534 if (cnt < IPOPT_OLEN + sizeof(*cp))
1535 goto bad;
1536 optlen = cp[IPOPT_OLEN];
1537 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1538 goto bad;
1539 }
1540 switch (opt) {
1541
1542 default:
1543 break;
1544
1545 case IPOPT_LSRR:
1546 case IPOPT_SSRR:
1547 /*
1548 * user process specifies route as:
1549 * ->A->B->C->D
1550 * D must be our final destination (but we can't
1551 * check that since we may not have connected yet).
1552 * A is first hop destination, which doesn't appear in
1553 * actual IP option, but is stored before the options.
1554 */
1555 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1556 goto bad;
1557 m->m_len -= sizeof(struct in_addr);
1558 cnt -= sizeof(struct in_addr);
1559 optlen -= sizeof(struct in_addr);
1560 cp[IPOPT_OLEN] = optlen;
1561 /*
1562 * Move first hop before start of options.
1563 */
1564 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1565 sizeof(struct in_addr));
1566 /*
1567 * Then copy rest of options back
1568 * to close up the deleted entry.
1569 */
1570 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1571 sizeof(struct in_addr)),
1572 (caddr_t)&cp[IPOPT_OFFSET+1],
1573 (unsigned)cnt + sizeof(struct in_addr));
1574 break;
1575 }
1576 }
1577 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1578 goto bad;
1579 *pcbopt = m;
1580 return (0);
1581
1582 bad:
1583 (void)m_free(m);
1584 return (EINVAL);
1585 }
1586
1587 /*
1588 * XXX
1589 * The whole multicast option thing needs to be re-thought.
1590 * Several of these options are equally applicable to non-multicast
1591 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1592 * standard option (IP_TTL).
1593 */
1594
1595 /*
1596 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1597 */
1598 static struct ifnet *
1599 ip_multicast_if(a, ifindexp)
1600 struct in_addr *a;
1601 int *ifindexp;
1602 {
1603 int ifindex;
1604 struct ifnet *ifp;
1605
1606 if (ifindexp)
1607 *ifindexp = 0;
1608 if (ntohl(a->s_addr) >> 24 == 0) {
1609 ifindex = ntohl(a->s_addr) & 0xffffff;
1610 if (ifindex < 0 || if_index < ifindex)
1611 return NULL;
1612 ifp = ifindex2ifnet[ifindex];
1613 if (ifindexp)
1614 *ifindexp = ifindex;
1615 } else {
1616 INADDR_TO_IFP(*a, ifp);
1617 }
1618 return ifp;
1619 }
1620
1621 /*
1622 * Set the IP multicast options in response to user setsockopt().
1623 */
1624 static int
1625 ip_setmoptions(sopt, imop)
1626 struct sockopt *sopt;
1627 struct ip_moptions **imop;
1628 {
1629 int error = 0;
1630 int i;
1631 struct in_addr addr;
1632 struct ip_mreq mreq;
1633 struct ifnet *ifp = NULL;
1634 struct ip_moptions *imo = *imop;
1635 struct route ro;
1636 struct sockaddr_in *dst;
1637 int ifindex;
1638 int s;
1639
1640 if (imo == NULL) {
1641 /*
1642 * No multicast option buffer attached to the pcb;
1643 * allocate one and initialize to default values.
1644 */
1645 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
1646 M_WAITOK);
1647
1648 if (imo == NULL)
1649 return (ENOBUFS);
1650 *imop = imo;
1651 imo->imo_multicast_ifp = NULL;
1652 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1653 imo->imo_multicast_vif = -1;
1654 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1655 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1656 imo->imo_num_memberships = 0;
1657 }
1658
1659 switch (sopt->sopt_name) {
1660 /* store an index number for the vif you wanna use in the send */
1661 case IP_MULTICAST_VIF:
1662 if (legal_vif_num == 0) {
1663 error = EOPNOTSUPP;
1664 break;
1665 }
1666 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1667 if (error)
1668 break;
1669 if (!legal_vif_num(i) && (i != -1)) {
1670 error = EINVAL;
1671 break;
1672 }
1673 imo->imo_multicast_vif = i;
1674 break;
1675
1676 case IP_MULTICAST_IF:
1677 /*
1678 * Select the interface for outgoing multicast packets.
1679 */
1680 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1681 if (error)
1682 break;
1683 /*
1684 * INADDR_ANY is used to remove a previous selection.
1685 * When no interface is selected, a default one is
1686 * chosen every time a multicast packet is sent.
1687 */
1688 if (addr.s_addr == INADDR_ANY) {
1689 imo->imo_multicast_ifp = NULL;
1690 break;
1691 }
1692 /*
1693 * The selected interface is identified by its local
1694 * IP address. Find the interface and confirm that
1695 * it supports multicasting.
1696 */
1697 s = splimp();
1698 ifp = ip_multicast_if(&addr, &ifindex);
1699 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1700 splx(s);
1701 error = EADDRNOTAVAIL;
1702 break;
1703 }
1704 imo->imo_multicast_ifp = ifp;
1705 if (ifindex)
1706 imo->imo_multicast_addr = addr;
1707 else
1708 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1709 splx(s);
1710 break;
1711
1712 case IP_MULTICAST_TTL:
1713 /*
1714 * Set the IP time-to-live for outgoing multicast packets.
1715 * The original multicast API required a char argument,
1716 * which is inconsistent with the rest of the socket API.
1717 * We allow either a char or an int.
1718 */
1719 if (sopt->sopt_valsize == 1) {
1720 u_char ttl;
1721 error = sooptcopyin(sopt, &ttl, 1, 1);
1722 if (error)
1723 break;
1724 imo->imo_multicast_ttl = ttl;
1725 } else {
1726 u_int ttl;
1727 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1728 sizeof ttl);
1729 if (error)
1730 break;
1731 if (ttl > 255)
1732 error = EINVAL;
1733 else
1734 imo->imo_multicast_ttl = ttl;
1735 }
1736 break;
1737
1738 case IP_MULTICAST_LOOP:
1739 /*
1740 * Set the loopback flag for outgoing multicast packets.
1741 * Must be zero or one. The original multicast API required a
1742 * char argument, which is inconsistent with the rest
1743 * of the socket API. We allow either a char or an int.
1744 */
1745 if (sopt->sopt_valsize == 1) {
1746 u_char loop;
1747 error = sooptcopyin(sopt, &loop, 1, 1);
1748 if (error)
1749 break;
1750 imo->imo_multicast_loop = !!loop;
1751 } else {
1752 u_int loop;
1753 error = sooptcopyin(sopt, &loop, sizeof loop,
1754 sizeof loop);
1755 if (error)
1756 break;
1757 imo->imo_multicast_loop = !!loop;
1758 }
1759 break;
1760
1761 case IP_ADD_MEMBERSHIP:
1762 /*
1763 * Add a multicast group membership.
1764 * Group must be a valid IP multicast address.
1765 */
1766 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1767 if (error)
1768 break;
1769
1770 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1771 error = EINVAL;
1772 break;
1773 }
1774 s = splimp();
1775 /*
1776 * If no interface address was provided, use the interface of
1777 * the route to the given multicast address.
1778 */
1779 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1780 bzero((caddr_t)&ro, sizeof(ro));
1781 dst = (struct sockaddr_in *)&ro.ro_dst;
1782 dst->sin_len = sizeof(*dst);
1783 dst->sin_family = AF_INET;
1784 dst->sin_addr = mreq.imr_multiaddr;
1785 rtalloc(&ro);
1786 if (ro.ro_rt != NULL) {
1787 ifp = ro.ro_rt->rt_ifp;
1788 rtfree(ro.ro_rt);
1789 }
1790 else {
1791 /* If there's no default route, try using loopback */
1792 mreq.imr_interface.s_addr = INADDR_LOOPBACK;
1793 }
1794 }
1795
1796 if (ifp == NULL) {
1797 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1798 }
1799
1800 /*
1801 * See if we found an interface, and confirm that it
1802 * supports multicast.
1803 */
1804 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1805 error = EADDRNOTAVAIL;
1806 splx(s);
1807 break;
1808 }
1809 /*
1810 * See if the membership already exists or if all the
1811 * membership slots are full.
1812 */
1813 for (i = 0; i < imo->imo_num_memberships; ++i) {
1814 if (imo->imo_membership[i]->inm_ifp == ifp &&
1815 imo->imo_membership[i]->inm_addr.s_addr
1816 == mreq.imr_multiaddr.s_addr)
1817 break;
1818 }
1819 if (i < imo->imo_num_memberships) {
1820 error = EADDRINUSE;
1821 splx(s);
1822 break;
1823 }
1824 if (i == IP_MAX_MEMBERSHIPS) {
1825 error = ETOOMANYREFS;
1826 splx(s);
1827 break;
1828 }
1829 /*
1830 * Everything looks good; add a new record to the multicast
1831 * address list for the given interface.
1832 */
1833 if ((imo->imo_membership[i] =
1834 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1835 error = ENOBUFS;
1836 splx(s);
1837 break;
1838 }
1839 ++imo->imo_num_memberships;
1840 splx(s);
1841 break;
1842
1843 case IP_DROP_MEMBERSHIP:
1844 /*
1845 * Drop a multicast group membership.
1846 * Group must be a valid IP multicast address.
1847 */
1848 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1849 if (error)
1850 break;
1851
1852 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1853 error = EINVAL;
1854 break;
1855 }
1856
1857 s = splimp();
1858 /*
1859 * If an interface address was specified, get a pointer
1860 * to its ifnet structure.
1861 */
1862 if (mreq.imr_interface.s_addr == INADDR_ANY)
1863 ifp = NULL;
1864 else {
1865 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1866 if (ifp == NULL) {
1867 error = EADDRNOTAVAIL;
1868 splx(s);
1869 break;
1870 }
1871 }
1872 /*
1873 * Find the membership in the membership array.
1874 */
1875 for (i = 0; i < imo->imo_num_memberships; ++i) {
1876 if ((ifp == NULL ||
1877 imo->imo_membership[i]->inm_ifp == ifp) &&
1878 imo->imo_membership[i]->inm_addr.s_addr ==
1879 mreq.imr_multiaddr.s_addr)
1880 break;
1881 }
1882 if (i == imo->imo_num_memberships) {
1883 error = EADDRNOTAVAIL;
1884 splx(s);
1885 break;
1886 }
1887 /*
1888 * Give up the multicast address record to which the
1889 * membership points.
1890 */
1891 in_delmulti(imo->imo_membership[i]);
1892 /*
1893 * Remove the gap in the membership array.
1894 */
1895 for (++i; i < imo->imo_num_memberships; ++i)
1896 imo->imo_membership[i-1] = imo->imo_membership[i];
1897 --imo->imo_num_memberships;
1898 splx(s);
1899 break;
1900
1901 default:
1902 error = EOPNOTSUPP;
1903 break;
1904 }
1905
1906 /*
1907 * If all options have default values, no need to keep the mbuf.
1908 */
1909 if (imo->imo_multicast_ifp == NULL &&
1910 imo->imo_multicast_vif == -1 &&
1911 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1912 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1913 imo->imo_num_memberships == 0) {
1914 FREE(*imop, M_IPMOPTS);
1915 *imop = NULL;
1916 }
1917
1918 return (error);
1919 }
1920
1921 /*
1922 * Return the IP multicast options in response to user getsockopt().
1923 */
1924 static int
1925 ip_getmoptions(sopt, imo)
1926 struct sockopt *sopt;
1927 register struct ip_moptions *imo;
1928 {
1929 struct in_addr addr;
1930 struct in_ifaddr *ia;
1931 int error, optval;
1932 u_char coptval;
1933
1934 error = 0;
1935 switch (sopt->sopt_name) {
1936 case IP_MULTICAST_VIF:
1937 if (imo != NULL)
1938 optval = imo->imo_multicast_vif;
1939 else
1940 optval = -1;
1941 error = sooptcopyout(sopt, &optval, sizeof optval);
1942 break;
1943
1944 case IP_MULTICAST_IF:
1945 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1946 addr.s_addr = INADDR_ANY;
1947 else if (imo->imo_multicast_addr.s_addr) {
1948 /* return the value user has set */
1949 addr = imo->imo_multicast_addr;
1950 } else {
1951 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1952 addr.s_addr = (ia == NULL) ? INADDR_ANY
1953 : IA_SIN(ia)->sin_addr.s_addr;
1954 }
1955 error = sooptcopyout(sopt, &addr, sizeof addr);
1956 break;
1957
1958 case IP_MULTICAST_TTL:
1959 if (imo == 0)
1960 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1961 else
1962 optval = coptval = imo->imo_multicast_ttl;
1963 if (sopt->sopt_valsize == 1)
1964 error = sooptcopyout(sopt, &coptval, 1);
1965 else
1966 error = sooptcopyout(sopt, &optval, sizeof optval);
1967 break;
1968
1969 case IP_MULTICAST_LOOP:
1970 if (imo == 0)
1971 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1972 else
1973 optval = coptval = imo->imo_multicast_loop;
1974 if (sopt->sopt_valsize == 1)
1975 error = sooptcopyout(sopt, &coptval, 1);
1976 else
1977 error = sooptcopyout(sopt, &optval, sizeof optval);
1978 break;
1979
1980 default:
1981 error = ENOPROTOOPT;
1982 break;
1983 }
1984 return (error);
1985 }
1986
1987 /*
1988 * Discard the IP multicast options.
1989 */
1990 void
1991 ip_freemoptions(imo)
1992 register struct ip_moptions *imo;
1993 {
1994 register int i;
1995
1996 if (imo != NULL) {
1997 for (i = 0; i < imo->imo_num_memberships; ++i)
1998 if (imo->imo_membership[i] != NULL)
1999 in_delmulti(imo->imo_membership[i]);
2000 FREE(imo, M_IPMOPTS);
2001 }
2002 }
2003
2004 /*
2005 * Routine called from ip_output() to loop back a copy of an IP multicast
2006 * packet to the input queue of a specified interface. Note that this
2007 * calls the output routine of the loopback "driver", but with an interface
2008 * pointer that might NOT be a loopback interface -- evil, but easier than
2009 * replicating that code here.
2010 */
2011 static void
2012 ip_mloopback(ifp, m, dst, hlen)
2013 struct ifnet *ifp;
2014 register struct mbuf *m;
2015 register struct sockaddr_in *dst;
2016 int hlen;
2017 {
2018 register struct ip *ip;
2019 struct mbuf *copym;
2020
2021 copym = m_copy(m, 0, M_COPYALL);
2022 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2023 copym = m_pullup(copym, hlen);
2024 if (copym != NULL) {
2025 /*
2026 * We don't bother to fragment if the IP length is greater
2027 * than the interface's MTU. Can this possibly matter?
2028 */
2029 ip = mtod(copym, struct ip *);
2030 HTONS(ip->ip_len);
2031 HTONS(ip->ip_off);
2032 ip->ip_sum = 0;
2033 ip->ip_sum = in_cksum(copym, hlen);
2034 /*
2035 * NB:
2036 * It's not clear whether there are any lingering
2037 * reentrancy problems in other areas which might
2038 * be exposed by using ip_input directly (in
2039 * particular, everything which modifies the packet
2040 * in-place). Yet another option is using the
2041 * protosw directly to deliver the looped back
2042 * packet. For the moment, we'll err on the side
2043 * of safety by using if_simloop().
2044 */
2045 #if 1 /* XXX */
2046 if (dst->sin_family != AF_INET) {
2047 printf("ip_mloopback: bad address family %d\n",
2048 dst->sin_family);
2049 dst->sin_family = AF_INET;
2050 }
2051 #endif
2052
2053
2054 /*
2055 * Mark checksum as valid or calculate checksum for loopback.
2056 *
2057 * This is done this way because we have to embed the ifp of
2058 * the interface we will send the original copy of the packet
2059 * out on in the mbuf. ip_input will check if_hwassist of the
2060 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
2061 * The UDP checksum has not been calculated yet.
2062 */
2063 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
2064 if (ifp->if_hwassist) {
2065 copym->m_pkthdr.csum_flags |=
2066 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2067 CSUM_IP_CHECKED | CSUM_IP_VALID;
2068 copym->m_pkthdr.csum_data = 0xffff;
2069 } else {
2070 NTOHS(ip->ip_len);
2071 in_delayed_cksum(copym);
2072 HTONS(ip->ip_len);
2073 }
2074 }
2075
2076
2077 /*
2078 * TedW:
2079 * We need to send all loopback traffic down to dlil in case
2080 * a filter has tapped-in.
2081 */
2082
2083 if (lo_dl_tag == 0)
2084 dlil_find_dltag(APPLE_IF_FAM_LOOPBACK, 0, PF_INET, &lo_dl_tag);
2085
2086 /*
2087 * Stuff the 'real' ifp into the pkthdr, to be used in matching
2088 * in ip_input(); we need the loopback ifp/dl_tag passed as args
2089 * to make the loopback driver compliant with the data link
2090 * requirements.
2091 */
2092 if (lo_dl_tag) {
2093 copym->m_pkthdr.rcvif = ifp;
2094 dlil_output(lo_dl_tag, copym, 0, (struct sockaddr *) dst, 0);
2095 } else {
2096 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
2097 m_freem(copym);
2098 }
2099
2100 /* if_simloop(ifp, copym, (struct sockaddr *)dst, 0);*/
2101 }
2102 }