]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
e4dc272d826184e767db39824ad9157a46bfd200
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /*
31 * Copyright (c) 1982, 1986, 1988, 1990, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the University of
45 * California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
63 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
64 */
65
66 #define _IP_VHL
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/kernel.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <kern/locks.h>
77 #include <sys/sysctl.h>
78
79 #include <net/if.h>
80 #include <net/route.h>
81
82 #include <netinet/in.h>
83 #include <netinet/in_systm.h>
84 #include <netinet/ip.h>
85 #include <netinet/in_pcb.h>
86 #include <netinet/in_var.h>
87 #include <netinet/ip_var.h>
88
89 #include <netinet/kpi_ipfilter_var.h>
90
91 #include "faith.h"
92
93 #include <net/dlil.h>
94 #include <sys/kdebug.h>
95
96 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
97 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
98 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
99 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
100
101
102 #if IPSEC
103 #include <netinet6/ipsec.h>
104 #include <netkey/key.h>
105 #if IPSEC_DEBUG
106 #include <netkey/key_debug.h>
107 #else
108 #define KEYDEBUG(lev,arg)
109 #endif
110 #endif /*IPSEC*/
111
112 #include <netinet/ip_fw.h>
113 #include <netinet/ip_divert.h>
114
115 #if DUMMYNET
116 #include <netinet/ip_dummynet.h>
117 #endif
118
119 #if IPFIREWALL_FORWARD_DEBUG
120 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
121 (ntohl(a.s_addr)>>16)&0xFF,\
122 (ntohl(a.s_addr)>>8)&0xFF,\
123 (ntohl(a.s_addr))&0xFF);
124 #endif
125
126 #if IPSEC
127 extern lck_mtx_t *sadb_mutex;
128 #endif
129
130 u_short ip_id;
131
132 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
133 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
134 static void ip_mloopback(struct ifnet *, struct mbuf *,
135 struct sockaddr_in *, int);
136 static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
137 static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
138 static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
139
140 int ip_createmoptions(struct ip_moptions **imop);
141 int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
142 int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
143 int ip_optcopy(struct ip *, struct ip *);
144 extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
145 #ifdef __APPLE__
146 extern struct mbuf* m_dup(register struct mbuf *m, int how);
147 #endif
148
149 extern int apple_hwcksum_tx;
150 extern u_long route_generation;
151
152 extern struct protosw inetsw[];
153
154 extern struct ip_linklocal_stat ip_linklocal_stat;
155 extern lck_mtx_t *ip_mutex;
156
157 /* temporary: for testing */
158 #if IPSEC
159 extern int ipsec_bypass;
160 #endif
161
162 static int ip_maxchainsent = 0;
163 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
164 &ip_maxchainsent, 0, "use dlil_output_list");
165 /*
166 * IP output. The packet in mbuf chain m contains a skeletal IP
167 * header (with len, off, ttl, proto, tos, src, dst).
168 * The mbuf chain containing the packet will be freed.
169 * The mbuf opt, if present, will not be freed.
170 */
171 int
172 ip_output(
173 struct mbuf *m0,
174 struct mbuf *opt,
175 struct route *ro,
176 int flags,
177 struct ip_moptions *imo)
178 {
179 int error;
180 error = ip_output_list(m0, 0, opt, ro, flags, imo);
181 return error;
182 }
183
184 int
185 ip_output_list(
186 struct mbuf *m0,
187 int packetchain,
188 struct mbuf *opt,
189 struct route *ro,
190 int flags,
191 struct ip_moptions *imo)
192 {
193 struct ip *ip, *mhip;
194 struct ifnet *ifp = NULL;
195 struct mbuf *m = m0;
196 int hlen = sizeof (struct ip);
197 int len, off, error = 0;
198 struct sockaddr_in *dst = NULL;
199 struct in_ifaddr *ia = NULL;
200 int isbroadcast, sw_csum;
201 struct in_addr pkt_dst;
202 #if IPSEC
203 struct route iproute;
204 struct socket *so = NULL;
205 struct secpolicy *sp = NULL;
206 #endif
207 #if IPFIREWALL_FORWARD
208 int fwd_rewrite_src = 0;
209 #endif
210 struct ip_fw_args args;
211 int didfilter = 0;
212 ipfilter_t inject_filter_ref = 0;
213 struct m_tag *tag;
214 struct route dn_route;
215 struct mbuf * packetlist;
216 int pktcnt = 0;
217
218 lck_mtx_lock(ip_mutex);
219
220 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
221
222 packetlist = m0;
223 args.eh = NULL;
224 args.rule = NULL;
225 args.next_hop = NULL;
226 args.divert_rule = 0; /* divert cookie */
227
228 /* Grab info from mtags prepended to the chain */
229 #if DUMMYNET
230 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
231 struct dn_pkt_tag *dn_tag;
232
233 dn_tag = (struct dn_pkt_tag *)(tag+1);
234 args.rule = dn_tag->rule;
235 opt = NULL;
236 dn_route = dn_tag->ro;
237 ro = &dn_route;
238
239 imo = NULL;
240 dst = dn_tag->dn_dst;
241 ifp = dn_tag->ifp;
242 flags = dn_tag->flags;
243
244 m_tag_delete(m0, tag);
245 }
246 #endif /* DUMMYNET */
247
248 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
249 struct divert_tag *div_tag;
250
251 div_tag = (struct divert_tag *)(tag+1);
252 args.divert_rule = div_tag->cookie;
253
254 m_tag_delete(m0, tag);
255 }
256 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
257 struct ip_fwd_tag *ipfwd_tag;
258
259 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
260 args.next_hop = ipfwd_tag->next_hop;
261
262 m_tag_delete(m0, tag);
263 }
264
265 m = m0;
266
267 #if DIAGNOSTIC
268 if ( !m || (m->m_flags & M_PKTHDR) != 0)
269 panic("ip_output no HDR");
270 if (!ro)
271 panic("ip_output no route, proto = %d",
272 mtod(m, struct ip *)->ip_p);
273 #endif
274
275 if (args.rule != NULL) { /* dummynet already saw us */
276 ip = mtod(m, struct ip *);
277 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
278 if (ro->ro_rt != NULL)
279 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
280 if (ia)
281 ifaref(&ia->ia_ifa);
282 #if IPSEC
283 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
284 so = ipsec_getsocket(m);
285 (void)ipsec_setsocket(m, NULL);
286 }
287 #endif
288 goto sendit;
289 }
290
291 #if IPSEC
292 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
293 so = ipsec_getsocket(m);
294 (void)ipsec_setsocket(m, NULL);
295 }
296 #endif
297 loopit:
298 /*
299 * No need to proccess packet twice if we've
300 * already seen it
301 */
302 inject_filter_ref = ipf_get_inject_filter(m);
303
304 if (opt) {
305 m = ip_insertoptions(m, opt, &len);
306 hlen = len;
307 }
308 ip = mtod(m, struct ip *);
309 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
310
311 /*
312 * Fill in IP header.
313 */
314 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
315 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
316 ip->ip_off &= IP_DF;
317 #if RANDOM_IP_ID
318 ip->ip_id = ip_randomid();
319 #else
320 ip->ip_id = htons(ip_id++);
321 #endif
322 ipstat.ips_localout++;
323 } else {
324 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
325 }
326
327 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
328 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
329
330 dst = (struct sockaddr_in *)&ro->ro_dst;
331
332 /*
333 * If there is a cached route,
334 * check that it is to the same destination
335 * and is still up. If not, free it and try again.
336 * The address family should also be checked in case of sharing the
337 * cache with IPv6.
338 */
339
340 {
341 if (ro->ro_rt && (ro->ro_rt->generation_id != route_generation) &&
342 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) && (ip->ip_src.s_addr != INADDR_ANY) &&
343 (ifa_foraddr(ip->ip_src.s_addr) == 0)) {
344 error = EADDRNOTAVAIL;
345 goto bad;
346 }
347 }
348 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
349 dst->sin_family != AF_INET ||
350 dst->sin_addr.s_addr != pkt_dst.s_addr)) {
351 rtfree(ro->ro_rt);
352 ro->ro_rt = (struct rtentry *)0;
353 }
354 if (ro->ro_rt == 0) {
355 bzero(dst, sizeof(*dst));
356 dst->sin_family = AF_INET;
357 dst->sin_len = sizeof(*dst);
358 dst->sin_addr = pkt_dst;
359 }
360 /*
361 * If routing to interface only,
362 * short circuit routing lookup.
363 */
364 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
365 #define sintosa(sin) ((struct sockaddr *)(sin))
366 if (flags & IP_ROUTETOIF) {
367 if (ia)
368 ifafree(&ia->ia_ifa);
369 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
370 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
371 ipstat.ips_noroute++;
372 error = ENETUNREACH;
373 goto bad;
374 }
375 }
376 ifp = ia->ia_ifp;
377 ip->ip_ttl = 1;
378 isbroadcast = in_broadcast(dst->sin_addr, ifp);
379 } else {
380 /*
381 * If this is the case, we probably don't want to allocate
382 * a protocol-cloned route since we didn't get one from the
383 * ULP. This lets TCP do its thing, while not burdening
384 * forwarding or ICMP with the overhead of cloning a route.
385 * Of course, we still want to do any cloning requested by
386 * the link layer, as this is probably required in all cases
387 * for correct operation (as it is for ARP).
388 */
389 if (ro->ro_rt == 0)
390 rtalloc_ign(ro, RTF_PRCLONING);
391 if (ro->ro_rt == 0) {
392 ipstat.ips_noroute++;
393 error = EHOSTUNREACH;
394 goto bad;
395 }
396 if (ia)
397 ifafree(&ia->ia_ifa);
398 ia = ifatoia(ro->ro_rt->rt_ifa);
399 if (ia)
400 ifaref(&ia->ia_ifa);
401 ifp = ro->ro_rt->rt_ifp;
402 ro->ro_rt->rt_use++;
403 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
404 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
405 if (ro->ro_rt->rt_flags & RTF_HOST)
406 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
407 else
408 isbroadcast = in_broadcast(dst->sin_addr, ifp);
409 }
410 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
411 struct in_multi *inm;
412
413 m->m_flags |= M_MCAST;
414 /*
415 * IP destination address is multicast. Make sure "dst"
416 * still points to the address in "ro". (It may have been
417 * changed to point to a gateway address, above.)
418 */
419 dst = (struct sockaddr_in *)&ro->ro_dst;
420 /*
421 * See if the caller provided any multicast options
422 */
423 if (imo != NULL) {
424 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
425 if (imo->imo_multicast_ifp != NULL) {
426 ifp = imo->imo_multicast_ifp;
427 }
428 if (imo->imo_multicast_vif != -1 &&
429 ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
430 ip->ip_src.s_addr =
431 ip_mcast_src(imo->imo_multicast_vif);
432 } else
433 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
434 /*
435 * Confirm that the outgoing interface supports multicast.
436 */
437 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
438 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
439 ipstat.ips_noroute++;
440 error = ENETUNREACH;
441 goto bad;
442 }
443 }
444 /*
445 * If source address not specified yet, use address
446 * of outgoing interface.
447 */
448 if (ip->ip_src.s_addr == INADDR_ANY) {
449 register struct in_ifaddr *ia1;
450
451 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
452 if (ia1->ia_ifp == ifp) {
453 ip->ip_src = IA_SIN(ia1)->sin_addr;
454
455 break;
456 }
457 if (ip->ip_src.s_addr == INADDR_ANY) {
458 error = ENETUNREACH;
459 goto bad;
460 }
461 }
462
463 ifnet_lock_shared(ifp);
464 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
465 ifnet_lock_done(ifp);
466 if (inm != NULL &&
467 (imo == NULL || imo->imo_multicast_loop)) {
468 /*
469 * If we belong to the destination multicast group
470 * on the outgoing interface, and the caller did not
471 * forbid loopback, loop back a copy.
472 */
473 if (!TAILQ_EMPTY(&ipv4_filters)) {
474 struct ipfilter *filter;
475 int seen = (inject_filter_ref == 0);
476 struct ipf_pktopts *ippo = 0, ipf_pktopts;
477
478 if (imo) {
479 ippo = &ipf_pktopts;
480 ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
481 ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
482 ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
483 }
484
485 lck_mtx_unlock(ip_mutex);
486 ipf_ref();
487 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
488 if (seen == 0) {
489 if ((struct ipfilter *)inject_filter_ref == filter)
490 seen = 1;
491 } else if (filter->ipf_filter.ipf_output) {
492 errno_t result;
493 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
494 if (result == EJUSTRETURN) {
495 ipf_unref();
496 goto done;
497 }
498 if (result != 0) {
499 ipf_unref();
500 lck_mtx_lock(ip_mutex);
501 goto bad;
502 }
503 }
504 }
505 lck_mtx_lock(ip_mutex);
506 ipf_unref();
507 didfilter = 1;
508 }
509 ip_mloopback(ifp, m, dst, hlen);
510 }
511 else {
512 /*
513 * If we are acting as a multicast router, perform
514 * multicast forwarding as if the packet had just
515 * arrived on the interface to which we are about
516 * to send. The multicast forwarding function
517 * recursively calls this function, using the
518 * IP_FORWARDING flag to prevent infinite recursion.
519 *
520 * Multicasts that are looped back by ip_mloopback(),
521 * above, will be forwarded by the ip_input() routine,
522 * if necessary.
523 */
524 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
525 /*
526 * Check if rsvp daemon is running. If not, don't
527 * set ip_moptions. This ensures that the packet
528 * is multicast and not just sent down one link
529 * as prescribed by rsvpd.
530 */
531 if (!rsvp_on)
532 imo = NULL;
533 if (ip_mforward(ip, ifp, m, imo) != 0) {
534 m_freem(m);
535 lck_mtx_unlock(ip_mutex);
536 goto done;
537 }
538 }
539 }
540
541 /*
542 * Multicasts with a time-to-live of zero may be looped-
543 * back, above, but must not be transmitted on a network.
544 * Also, multicasts addressed to the loopback interface
545 * are not sent -- the above call to ip_mloopback() will
546 * loop back a copy if this host actually belongs to the
547 * destination group on the loopback interface.
548 */
549 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
550 m_freem(m);
551 lck_mtx_unlock(ip_mutex);
552 goto done;
553 }
554
555 goto sendit;
556 }
557 #ifndef notdef
558 /*
559 * If source address not specified yet, use address
560 * of outgoing interface.
561 */
562 if (ip->ip_src.s_addr == INADDR_ANY) {
563 ip->ip_src = IA_SIN(ia)->sin_addr;
564 #if IPFIREWALL_FORWARD
565 /* Keep note that we did this - if the firewall changes
566 * the next-hop, our interface may change, changing the
567 * default source IP. It's a shame so much effort happens
568 * twice. Oh well.
569 */
570 fwd_rewrite_src++;
571 #endif /* IPFIREWALL_FORWARD */
572 }
573 #endif /* notdef */
574
575 /*
576 * Look for broadcast address and
577 * and verify user is allowed to send
578 * such a packet.
579 */
580 if (isbroadcast) {
581 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
582 error = EADDRNOTAVAIL;
583 goto bad;
584 }
585 if ((flags & IP_ALLOWBROADCAST) == 0) {
586 error = EACCES;
587 goto bad;
588 }
589 /* don't allow broadcast messages to be fragmented */
590 if ((u_short)ip->ip_len > ifp->if_mtu) {
591 error = EMSGSIZE;
592 goto bad;
593 }
594 m->m_flags |= M_BCAST;
595 } else {
596 m->m_flags &= ~M_BCAST;
597 }
598
599 sendit:
600 /*
601 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
602 */
603 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
604 ip_linklocal_stat.iplls_out_total++;
605 if (ip->ip_ttl != MAXTTL) {
606 ip_linklocal_stat.iplls_out_badttl++;
607 ip->ip_ttl = MAXTTL;
608 }
609 }
610
611 injectit:
612 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
613 struct ipfilter *filter;
614 int seen = (inject_filter_ref == 0);
615
616 lck_mtx_unlock(ip_mutex);
617 ipf_ref();
618 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
619 if (seen == 0) {
620 if ((struct ipfilter *)inject_filter_ref == filter)
621 seen = 1;
622 } else if (filter->ipf_filter.ipf_output) {
623 errno_t result;
624 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
625 if (result == EJUSTRETURN) {
626 ipf_unref();
627 goto done;
628 }
629 if (result != 0) {
630 ipf_unref();
631 lck_mtx_lock(ip_mutex);
632 goto bad;
633 }
634 }
635 }
636 ipf_unref();
637 lck_mtx_lock(ip_mutex);
638 }
639
640 #if IPSEC
641 /* temporary for testing only: bypass ipsec alltogether */
642
643 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
644 goto skip_ipsec;
645
646 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
647
648 lck_mtx_lock(sadb_mutex);
649
650 /* get SP for this packet */
651 if (so == NULL)
652 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
653 else
654 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
655
656 if (sp == NULL) {
657 ipsecstat.out_inval++;
658 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
659 lck_mtx_unlock(sadb_mutex);
660 goto bad;
661 }
662
663 error = 0;
664
665 /* check policy */
666 switch (sp->policy) {
667 case IPSEC_POLICY_DISCARD:
668 /*
669 * This packet is just discarded.
670 */
671 ipsecstat.out_polvio++;
672 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
673 lck_mtx_unlock(sadb_mutex);
674 goto bad;
675
676 case IPSEC_POLICY_BYPASS:
677 case IPSEC_POLICY_NONE:
678 /* no need to do IPsec. */
679 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
680 lck_mtx_unlock(sadb_mutex);
681 goto skip_ipsec;
682
683 case IPSEC_POLICY_IPSEC:
684 if (sp->req == NULL) {
685 /* acquire a policy */
686 error = key_spdacquire(sp);
687 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
688 lck_mtx_unlock(sadb_mutex);
689 goto bad;
690 }
691 break;
692
693 case IPSEC_POLICY_ENTRUST:
694 default:
695 printf("ip_output: Invalid policy found. %d\n", sp->policy);
696 }
697 {
698 struct ipsec_output_state state;
699 bzero(&state, sizeof(state));
700 state.m = m;
701 if (flags & IP_ROUTETOIF) {
702 state.ro = &iproute;
703 bzero(&iproute, sizeof(iproute));
704 } else
705 state.ro = ro;
706 state.dst = (struct sockaddr *)dst;
707
708 ip->ip_sum = 0;
709
710 /*
711 * XXX
712 * delayed checksums are not currently compatible with IPsec
713 */
714 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
715 in_delayed_cksum(m);
716 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
717 }
718
719 HTONS(ip->ip_len);
720 HTONS(ip->ip_off);
721
722 lck_mtx_unlock(ip_mutex);
723 error = ipsec4_output(&state, sp, flags);
724 lck_mtx_unlock(sadb_mutex);
725 lck_mtx_lock(ip_mutex);
726
727 m0 = m = state.m;
728
729 if (flags & IP_ROUTETOIF) {
730 /*
731 * if we have tunnel mode SA, we may need to ignore
732 * IP_ROUTETOIF.
733 */
734 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
735 flags &= ~IP_ROUTETOIF;
736 ro = state.ro;
737 }
738 } else
739 ro = state.ro;
740
741 dst = (struct sockaddr_in *)state.dst;
742 if (error) {
743 /* mbuf is already reclaimed in ipsec4_output. */
744 m0 = NULL;
745 switch (error) {
746 case EHOSTUNREACH:
747 case ENETUNREACH:
748 case EMSGSIZE:
749 case ENOBUFS:
750 case ENOMEM:
751 break;
752 default:
753 printf("ip4_output (ipsec): error code %d\n", error);
754 /*fall through*/
755 case ENOENT:
756 /* don't show these error codes to the user */
757 error = 0;
758 break;
759 }
760 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
761 goto bad;
762 }
763 }
764
765 /* be sure to update variables that are affected by ipsec4_output() */
766 ip = mtod(m, struct ip *);
767
768 #ifdef _IP_VHL
769 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
770 #else
771 hlen = ip->ip_hl << 2;
772 #endif
773 /* Check that there wasn't a route change and src is still valid */
774
775 if (ro->ro_rt->generation_id != route_generation) {
776 if (ifa_foraddr(ip->ip_src.s_addr) == 0 && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
777 error = EADDRNOTAVAIL;
778 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 5,0,0,0,0);
779 goto bad;
780 }
781 rtfree(ro->ro_rt);
782 ro->ro_rt = NULL;
783 }
784
785 if (ro->ro_rt == NULL) {
786 if ((flags & IP_ROUTETOIF) == 0) {
787 printf("ip_output: "
788 "can't update route after IPsec processing\n");
789 error = EHOSTUNREACH; /*XXX*/
790 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 6,0,0,0,0);
791 goto bad;
792 }
793 } else {
794 if (ia)
795 ifafree(&ia->ia_ifa);
796 ia = ifatoia(ro->ro_rt->rt_ifa);
797 if (ia)
798 ifaref(&ia->ia_ifa);
799 ifp = ro->ro_rt->rt_ifp;
800 }
801
802 /* make it flipped, again. */
803 NTOHS(ip->ip_len);
804 NTOHS(ip->ip_off);
805 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
806
807 /* Pass to filters again */
808 if (!TAILQ_EMPTY(&ipv4_filters)) {
809 struct ipfilter *filter;
810
811 lck_mtx_unlock(ip_mutex);
812 ipf_ref();
813 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
814 if (filter->ipf_filter.ipf_output) {
815 errno_t result;
816 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
817 if (result == EJUSTRETURN) {
818 ipf_unref();
819 goto done;
820 }
821 if (result != 0) {
822 ipf_unref();
823 lck_mtx_lock(ip_mutex);
824 goto bad;
825 }
826 }
827 }
828 ipf_unref();
829 lck_mtx_lock(ip_mutex);
830 }
831 skip_ipsec:
832 #endif /*IPSEC*/
833
834 /*
835 * IpHack's section.
836 * - Xlate: translate packet's addr/port (NAT).
837 * - Firewall: deny/allow/etc.
838 * - Wrap: fake packet's addr/port <unimpl.>
839 * - Encapsulate: put it in another IP and send out. <unimp.>
840 */
841 if (fr_checkp) {
842 struct mbuf *m1 = m;
843
844 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
845 lck_mtx_unlock(ip_mutex);
846 goto done;
847 }
848 ip = mtod(m0 = m = m1, struct ip *);
849 }
850
851 /*
852 * Check with the firewall...
853 * but not if we are already being fwd'd from a firewall.
854 */
855 if (fw_enable && IPFW_LOADED && !args.next_hop) {
856 struct sockaddr_in *old = dst;
857
858 args.m = m;
859 args.next_hop = dst;
860 args.oif = ifp;
861 lck_mtx_unlock(ip_mutex);
862 off = ip_fw_chk_ptr(&args);
863 m = args.m;
864 dst = args.next_hop;
865
866 /*
867 * On return we must do the following:
868 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
869 * 1<=off<= 0xffff -> DIVERT
870 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
871 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
872 * dst != old -> IPFIREWALL_FORWARD
873 * off==0, dst==old -> accept
874 * If some of the above modules is not compiled in, then
875 * we should't have to check the corresponding condition
876 * (because the ipfw control socket should not accept
877 * unsupported rules), but better play safe and drop
878 * packets in case of doubt.
879 */
880 m0 = m;
881 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
882 if (m)
883 m_freem(m);
884 error = EACCES ;
885 goto done ;
886 }
887 ip = mtod(m, struct ip *);
888 if (off == 0 && dst == old) {/* common case */
889 lck_mtx_lock(ip_mutex);
890 goto pass ;
891 }
892 #if DUMMYNET
893 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
894 /*
895 * pass the pkt to dummynet. Need to include
896 * pipe number, m, ifp, ro, dst because these are
897 * not recomputed in the next pass.
898 * All other parameters have been already used and
899 * so they are not needed anymore.
900 * XXX note: if the ifp or ro entry are deleted
901 * while a pkt is in dummynet, we are in trouble!
902 */
903 args.ro = ro;
904 args.dst = dst;
905 args.flags = flags;
906
907 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
908 &args);
909 goto done;
910 }
911 #endif /* DUMMYNET */
912 lck_mtx_lock(ip_mutex);
913 #if IPDIVERT
914 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
915 struct mbuf *clone = NULL;
916
917 /* Clone packet if we're doing a 'tee' */
918 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
919 clone = m_dup(m, M_DONTWAIT);
920 /*
921 * XXX
922 * delayed checksums are not currently compatible
923 * with divert sockets.
924 */
925 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
926 in_delayed_cksum(m);
927 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
928 }
929
930 /* Restore packet header fields to original values */
931 HTONS(ip->ip_len);
932 HTONS(ip->ip_off);
933
934 /* Deliver packet to divert input routine */
935 divert_packet(m, 0, off & 0xffff, args.divert_rule);
936
937 /* If 'tee', continue with original packet */
938 if (clone != NULL) {
939 m0 = m = clone;
940 ip = mtod(m, struct ip *);
941 goto pass;
942 }
943 lck_mtx_unlock(ip_mutex);
944 goto done;
945 }
946 #endif
947
948 #if IPFIREWALL_FORWARD
949 /* Here we check dst to make sure it's directly reachable on the
950 * interface we previously thought it was.
951 * If it isn't (which may be likely in some situations) we have
952 * to re-route it (ie, find a route for the next-hop and the
953 * associated interface) and set them here. This is nested
954 * forwarding which in most cases is undesirable, except where
955 * such control is nigh impossible. So we do it here.
956 * And I'm babbling.
957 */
958 if (off == 0 && old != dst) {
959 struct in_ifaddr *ia_fw;
960
961 /* It's changed... */
962 /* There must be a better way to do this next line... */
963 static struct route sro_fwd, *ro_fwd = &sro_fwd;
964 #if IPFIREWALL_FORWARD_DEBUG
965 printf("IPFIREWALL_FORWARD: New dst ip: ");
966 print_ip(dst->sin_addr);
967 printf("\n");
968 #endif
969 /*
970 * We need to figure out if we have been forwarded
971 * to a local socket. If so then we should somehow
972 * "loop back" to ip_input, and get directed to the
973 * PCB as if we had received this packet. This is
974 * because it may be dificult to identify the packets
975 * you want to forward until they are being output
976 * and have selected an interface. (e.g. locally
977 * initiated packets) If we used the loopback inteface,
978 * we would not be able to control what happens
979 * as the packet runs through ip_input() as
980 * it is done through a ISR.
981 */
982 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
983 /*
984 * If the addr to forward to is one
985 * of ours, we pretend to
986 * be the destination for this packet.
987 */
988 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
989 dst->sin_addr.s_addr)
990 break;
991 }
992 if (ia) {
993 /* tell ip_input "dont filter" */
994 struct m_tag *fwd_tag;
995 struct ip_fwd_tag *ipfwd_tag;
996
997 fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD,
998 sizeof(struct sockaddr_in), M_NOWAIT);
999 if (fwd_tag == NULL) {
1000 error = ENOBUFS;
1001 goto bad;
1002 }
1003
1004 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
1005 ipfwd_tag->next_hop = args.next_hop;
1006
1007 m_tag_prepend(m, fwd_tag);
1008
1009 if (m->m_pkthdr.rcvif == NULL)
1010 m->m_pkthdr.rcvif = ifunit("lo0");
1011 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1012 m->m_pkthdr.csum_flags) == 0) {
1013 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1014 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1015 m->m_pkthdr.csum_flags |=
1016 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1017 m->m_pkthdr.csum_data = 0xffff;
1018 }
1019 m->m_pkthdr.csum_flags |=
1020 CSUM_IP_CHECKED | CSUM_IP_VALID;
1021 }
1022 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1023 in_delayed_cksum(m);
1024 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1025 ip->ip_sum = in_cksum(m, hlen);
1026 }
1027 HTONS(ip->ip_len);
1028 HTONS(ip->ip_off);
1029
1030 lck_mtx_unlock(ip_mutex);
1031
1032 /* we need to call dlil_output to run filters
1033 * and resync to avoid recursion loops.
1034 */
1035 if (lo_ifp) {
1036 dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
1037 }
1038 else {
1039 printf("ip_output: no loopback ifp for forwarding!!!\n");
1040 }
1041 goto done;
1042 }
1043 /* Some of the logic for this was
1044 * nicked from above.
1045 *
1046 * This rewrites the cached route in a local PCB.
1047 * Is this what we want to do?
1048 */
1049 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1050
1051 ro_fwd->ro_rt = 0;
1052 rtalloc_ign(ro_fwd, RTF_PRCLONING);
1053
1054 if (ro_fwd->ro_rt == 0) {
1055 ipstat.ips_noroute++;
1056 error = EHOSTUNREACH;
1057 goto bad;
1058 }
1059
1060 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
1061 ifp = ro_fwd->ro_rt->rt_ifp;
1062 ro_fwd->ro_rt->rt_use++;
1063 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
1064 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
1065 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
1066 isbroadcast =
1067 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
1068 else
1069 isbroadcast = in_broadcast(dst->sin_addr, ifp);
1070 rtfree(ro->ro_rt);
1071 ro->ro_rt = ro_fwd->ro_rt;
1072 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
1073
1074 /*
1075 * If we added a default src ip earlier,
1076 * which would have been gotten from the-then
1077 * interface, do it again, from the new one.
1078 */
1079 if (fwd_rewrite_src)
1080 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
1081 goto pass ;
1082 }
1083 #endif /* IPFIREWALL_FORWARD */
1084 /*
1085 * if we get here, none of the above matches, and
1086 * we have to drop the pkt
1087 */
1088 m_freem(m);
1089 error = EACCES; /* not sure this is the right error msg */
1090 lck_mtx_unlock(ip_mutex);
1091 goto done;
1092 }
1093
1094 pass:
1095 #if __APPLE__
1096 /* Do not allow loopback address to wind up on a wire */
1097 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1098 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1099 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
1100 ipstat.ips_badaddr++;
1101 m_freem(m);
1102 /*
1103 * Do not simply drop the packet just like a firewall -- we want the
1104 * the application to feel the pain.
1105 * Return ENETUNREACH like ip6_output does in some similar cases.
1106 * This can startle the otherwise clueless process that specifies
1107 * loopback as the source address.
1108 */
1109 error = ENETUNREACH;
1110 lck_mtx_unlock(ip_mutex);
1111 goto done;
1112 }
1113 #endif
1114 m->m_pkthdr.csum_flags |= CSUM_IP;
1115 sw_csum = m->m_pkthdr.csum_flags
1116 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1117
1118 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1119 /*
1120 * Special case code for GMACE
1121 * frames that can be checksumed by GMACE SUM16 HW:
1122 * frame >64, no fragments, no UDP
1123 */
1124 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1125 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1126 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1127 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1128 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
1129 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
1130 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1131 m->m_pkthdr.csum_data += offset;
1132 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1133 }
1134 else {
1135 /* let the software handle any UDP or TCP checksums */
1136 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1137 }
1138 }
1139
1140 if (sw_csum & CSUM_DELAY_DATA) {
1141 in_delayed_cksum(m);
1142 sw_csum &= ~CSUM_DELAY_DATA;
1143 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1144 }
1145
1146 m->m_pkthdr.csum_flags &= IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1147
1148 /*
1149 * If small enough for interface, or the interface will take
1150 * care of the fragmentation for us, can just send directly.
1151 */
1152 if ((u_short)ip->ip_len <= ifp->if_mtu ||
1153 ifp->if_hwassist & CSUM_FRAGMENT) {
1154 HTONS(ip->ip_len);
1155 HTONS(ip->ip_off);
1156 ip->ip_sum = 0;
1157 if (sw_csum & CSUM_DELAY_IP) {
1158 ip->ip_sum = in_cksum(m, hlen);
1159 }
1160
1161 #ifndef __APPLE__
1162 /* Record statistics for this interface address. */
1163 if (!(flags & IP_FORWARDING) && ia != NULL) {
1164 ia->ia_ifa.if_opackets++;
1165 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1166 }
1167 #endif
1168
1169 #if IPSEC
1170 /* clean ipsec history once it goes out of the node */
1171 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1172 ipsec_delaux(m);
1173 #endif
1174 if (packetchain == 0) {
1175 lck_mtx_unlock(ip_mutex);
1176 error = dlil_output(ifp, PF_INET, m, (void *) ro->ro_rt,
1177 (struct sockaddr *)dst, 0);
1178 goto done;
1179 }
1180 else { /* packet chaining allows us to reuse the route for all packets */
1181 m = m->m_nextpkt;
1182 if (m == NULL) {
1183 if (pktcnt > ip_maxchainsent)
1184 ip_maxchainsent = pktcnt;
1185 //send
1186 lck_mtx_unlock(ip_mutex);
1187 error = dlil_output_list(ifp, PF_INET, packetlist, (void *) ro->ro_rt,
1188 (struct sockaddr *)dst, 0);
1189 pktcnt = 0;
1190 goto done;
1191
1192 }
1193 m0 = m;
1194 pktcnt++;
1195 goto loopit;
1196 }
1197 }
1198 /*
1199 * Too large for interface; fragment if possible.
1200 * Must be able to put at least 8 bytes per fragment.
1201 */
1202 if (ip->ip_off & IP_DF) {
1203 error = EMSGSIZE;
1204 /*
1205 * This case can happen if the user changed the MTU
1206 * of an interface after enabling IP on it. Because
1207 * most netifs don't keep track of routes pointing to
1208 * them, there is no way for one to update all its
1209 * routes when the MTU is changed.
1210 */
1211 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1212 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1213 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1214 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1215 }
1216 ipstat.ips_cantfrag++;
1217 goto bad;
1218 }
1219 len = (ifp->if_mtu - hlen) &~ 7;
1220 if (len < 8) {
1221 error = EMSGSIZE;
1222 goto bad;
1223 }
1224
1225 /*
1226 * if the interface will not calculate checksums on
1227 * fragmented packets, then do it here.
1228 */
1229 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1230 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1231 in_delayed_cksum(m);
1232 if (m == NULL) {
1233 lck_mtx_unlock(ip_mutex);
1234 return(ENOMEM);
1235 }
1236 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1237 }
1238
1239
1240 {
1241 int mhlen, firstlen = len;
1242 struct mbuf **mnext = &m->m_nextpkt;
1243 int nfrags = 1;
1244
1245 /*
1246 * Loop through length of segment after first fragment,
1247 * make new header and copy data of each part and link onto chain.
1248 */
1249 m0 = m;
1250 mhlen = sizeof (struct ip);
1251 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
1252 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1253 if (m == 0) {
1254 error = ENOBUFS;
1255 ipstat.ips_odropped++;
1256 goto sendorfree;
1257 }
1258 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1259 m->m_data += max_linkhdr;
1260 mhip = mtod(m, struct ip *);
1261 *mhip = *ip;
1262 if (hlen > sizeof (struct ip)) {
1263 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1264 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1265 }
1266 m->m_len = mhlen;
1267 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1268 if (ip->ip_off & IP_MF)
1269 mhip->ip_off |= IP_MF;
1270 if (off + len >= (u_short)ip->ip_len)
1271 len = (u_short)ip->ip_len - off;
1272 else
1273 mhip->ip_off |= IP_MF;
1274 mhip->ip_len = htons((u_short)(len + mhlen));
1275 m->m_next = m_copy(m0, off, len);
1276 if (m->m_next == 0) {
1277 (void) m_free(m);
1278 error = ENOBUFS; /* ??? */
1279 ipstat.ips_odropped++;
1280 goto sendorfree;
1281 }
1282 m->m_pkthdr.len = mhlen + len;
1283 m->m_pkthdr.rcvif = 0;
1284 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
1285 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
1286 HTONS(mhip->ip_off);
1287 mhip->ip_sum = 0;
1288 if (sw_csum & CSUM_DELAY_IP) {
1289 mhip->ip_sum = in_cksum(m, mhlen);
1290 }
1291 *mnext = m;
1292 mnext = &m->m_nextpkt;
1293 nfrags++;
1294 }
1295 ipstat.ips_ofragments += nfrags;
1296
1297 /* set first/last markers for fragment chain */
1298 m->m_flags |= M_LASTFRAG;
1299 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1300 m0->m_pkthdr.csum_data = nfrags;
1301
1302 /*
1303 * Update first fragment by trimming what's been copied out
1304 * and updating header, then send each fragment (in order).
1305 */
1306 m = m0;
1307 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1308 m->m_pkthdr.len = hlen + firstlen;
1309 ip->ip_len = htons((u_short)m->m_pkthdr.len);
1310 ip->ip_off |= IP_MF;
1311 HTONS(ip->ip_off);
1312 ip->ip_sum = 0;
1313 if (sw_csum & CSUM_DELAY_IP) {
1314 ip->ip_sum = in_cksum(m, hlen);
1315 }
1316 sendorfree:
1317
1318 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1319 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1320
1321 lck_mtx_unlock(ip_mutex);
1322 for (m = m0; m; m = m0) {
1323 m0 = m->m_nextpkt;
1324 m->m_nextpkt = 0;
1325 #if IPSEC
1326 /* clean ipsec history once it goes out of the node */
1327 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1328 ipsec_delaux(m);
1329 #endif
1330 if (error == 0) {
1331 #ifndef __APPLE__
1332 /* Record statistics for this interface address. */
1333 if (ia != NULL) {
1334 ia->ia_ifa.if_opackets++;
1335 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1336 }
1337 #endif
1338 if ((packetchain != 0) && (pktcnt > 0))
1339 panic("ip_output: mix of packet in packetlist is wrong=%x", packetlist);
1340 error = dlil_output(ifp, PF_INET, m, (void *) ro->ro_rt,
1341 (struct sockaddr *)dst, 0);
1342 } else
1343 m_freem(m);
1344 }
1345
1346 if (error == 0)
1347 ipstat.ips_fragmented++;
1348 }
1349 done:
1350 if (ia) {
1351 ifafree(&ia->ia_ifa);
1352 ia = NULL;
1353 }
1354 #if IPSEC
1355 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
1356 if (ro == &iproute && ro->ro_rt) {
1357 rtfree(ro->ro_rt);
1358 ro->ro_rt = NULL;
1359 }
1360 if (sp != NULL) {
1361 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1362 printf("DP ip_output call free SP:%x\n", sp));
1363 lck_mtx_lock(sadb_mutex);
1364 key_freesp(sp);
1365 lck_mtx_unlock(sadb_mutex);
1366 }
1367 }
1368 #endif /* IPSEC */
1369
1370 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1371 return (error);
1372 bad:
1373 m_freem(m0);
1374 lck_mtx_unlock(ip_mutex);
1375 goto done;
1376 }
1377
1378 void
1379 in_delayed_cksum_offset(struct mbuf *m, int ip_offset)
1380 {
1381 struct ip *ip;
1382 u_short csum, offset;
1383
1384 while (ip_offset > m->m_len) {
1385 ip_offset -= m->m_len;
1386 m = m->m_next;
1387 if (m) {
1388 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
1389 return;
1390 }
1391 }
1392
1393 if (ip_offset + sizeof(struct ip) > m->m_len) {
1394 printf("delayed m_pullup, m->len: %d off: %d\n",
1395 m->m_len, ip_offset);
1396 /*
1397 * XXX
1398 * this shouldn't happen
1399 */
1400 m = m_pullup(m, ip_offset + sizeof(struct ip));
1401 }
1402
1403 /* Gross */
1404 if (ip_offset) {
1405 m->m_len -= ip_offset;
1406 m->m_data += ip_offset;
1407 }
1408
1409 ip = mtod(m, struct ip*);
1410 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1411 csum = in_cksum_skip(m, ip->ip_len, offset);
1412 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1413 csum = 0xffff;
1414 offset += m->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1415
1416 /* Gross */
1417 if (ip_offset) {
1418 if (M_LEADINGSPACE(m) < ip_offset)
1419 panic("in_delayed_cksum_withoffset - chain modified!\n");
1420 m->m_len += ip_offset;
1421 m->m_data -= ip_offset;
1422 }
1423
1424 if (offset > ip->ip_len) /* bogus offset */
1425 return;
1426
1427 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
1428 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1429 m->m_len, offset + ip_offset, ip->ip_p);
1430 /*
1431 * XXX
1432 * this shouldn't happen, but if it does, the
1433 * correct behavior may be to insert the checksum
1434 * in the existing chain instead of rearranging it.
1435 */
1436 m = m_pullup(m, offset + ip_offset + sizeof(u_short));
1437 }
1438 *(u_short *)(m->m_data + offset + ip_offset) = csum;
1439 }
1440
1441 void
1442 in_delayed_cksum(struct mbuf *m)
1443 {
1444 in_delayed_cksum_offset(m, 0);
1445 }
1446
1447 void
1448 in_cksum_offset(struct mbuf* m, size_t ip_offset)
1449 {
1450 struct ip* ip = NULL;
1451 int hlen = 0;
1452
1453 while (ip_offset > m->m_len) {
1454 ip_offset -= m->m_len;
1455 m = m->m_next;
1456 if (m) {
1457 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1458 return;
1459 }
1460 }
1461
1462 if (ip_offset + sizeof(struct ip) > m->m_len) {
1463 printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %d\n",
1464 m->m_len, ip_offset);
1465 /*
1466 * XXX
1467 * this shouldn't happen
1468 */
1469 m = m_pullup(m, ip_offset + sizeof(struct ip));
1470 }
1471
1472 /* Gross */
1473 if (ip_offset) {
1474 m->m_len -= ip_offset;
1475 m->m_data += ip_offset;
1476 }
1477
1478 ip = mtod(m, struct ip*);
1479
1480 #ifdef _IP_VHL
1481 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1482 #else
1483 hlen = ip->ip_hl << 2;
1484 #endif
1485
1486 ip->ip_sum = 0;
1487 ip->ip_sum = in_cksum(m, hlen);
1488
1489 /* Gross */
1490 if (ip_offset) {
1491 if (M_LEADINGSPACE(m) < ip_offset)
1492 panic("in_cksum_offset - chain modified!\n");
1493 m->m_len += ip_offset;
1494 m->m_data -= ip_offset;
1495 }
1496 }
1497
1498 /*
1499 * Insert IP options into preformed packet.
1500 * Adjust IP destination as required for IP source routing,
1501 * as indicated by a non-zero in_addr at the start of the options.
1502 *
1503 * XXX This routine assumes that the packet has no options in place.
1504 */
1505 static struct mbuf *
1506 ip_insertoptions(m, opt, phlen)
1507 register struct mbuf *m;
1508 struct mbuf *opt;
1509 int *phlen;
1510 {
1511 register struct ipoption *p = mtod(opt, struct ipoption *);
1512 struct mbuf *n;
1513 register struct ip *ip = mtod(m, struct ip *);
1514 unsigned optlen;
1515
1516 optlen = opt->m_len - sizeof(p->ipopt_dst);
1517 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1518 return (m); /* XXX should fail */
1519 if (p->ipopt_dst.s_addr)
1520 ip->ip_dst = p->ipopt_dst;
1521 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1522 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1523 if (n == 0)
1524 return (m);
1525 n->m_pkthdr.rcvif = 0;
1526 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1527 m->m_len -= sizeof(struct ip);
1528 m->m_data += sizeof(struct ip);
1529 n->m_next = m;
1530 m = n;
1531 m->m_len = optlen + sizeof(struct ip);
1532 m->m_data += max_linkhdr;
1533 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1534 } else {
1535 m->m_data -= optlen;
1536 m->m_len += optlen;
1537 m->m_pkthdr.len += optlen;
1538 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1539 }
1540 ip = mtod(m, struct ip *);
1541 bcopy(p->ipopt_list, ip + 1, optlen);
1542 *phlen = sizeof(struct ip) + optlen;
1543 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1544 ip->ip_len += optlen;
1545 return (m);
1546 }
1547
1548 /*
1549 * Copy options from ip to jp,
1550 * omitting those not copied during fragmentation.
1551 */
1552 int
1553 ip_optcopy(ip, jp)
1554 struct ip *ip, *jp;
1555 {
1556 register u_char *cp, *dp;
1557 int opt, optlen, cnt;
1558
1559 cp = (u_char *)(ip + 1);
1560 dp = (u_char *)(jp + 1);
1561 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1562 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1563 opt = cp[0];
1564 if (opt == IPOPT_EOL)
1565 break;
1566 if (opt == IPOPT_NOP) {
1567 /* Preserve for IP mcast tunnel's LSRR alignment. */
1568 *dp++ = IPOPT_NOP;
1569 optlen = 1;
1570 continue;
1571 }
1572 #if DIAGNOSTIC
1573 if (cnt < IPOPT_OLEN + sizeof(*cp))
1574 panic("malformed IPv4 option passed to ip_optcopy");
1575 #endif
1576 optlen = cp[IPOPT_OLEN];
1577 #if DIAGNOSTIC
1578 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1579 panic("malformed IPv4 option passed to ip_optcopy");
1580 #endif
1581 /* bogus lengths should have been caught by ip_dooptions */
1582 if (optlen > cnt)
1583 optlen = cnt;
1584 if (IPOPT_COPIED(opt)) {
1585 bcopy(cp, dp, optlen);
1586 dp += optlen;
1587 }
1588 }
1589 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1590 *dp++ = IPOPT_EOL;
1591 return (optlen);
1592 }
1593
1594 /*
1595 * IP socket option processing.
1596 */
1597 int
1598 ip_ctloutput(so, sopt)
1599 struct socket *so;
1600 struct sockopt *sopt;
1601 {
1602 struct inpcb *inp = sotoinpcb(so);
1603 int error, optval;
1604
1605 error = optval = 0;
1606 if (sopt->sopt_level != IPPROTO_IP) {
1607 return (EINVAL);
1608 }
1609
1610 switch (sopt->sopt_dir) {
1611 case SOPT_SET:
1612 switch (sopt->sopt_name) {
1613 case IP_OPTIONS:
1614 #ifdef notyet
1615 case IP_RETOPTS:
1616 #endif
1617 {
1618 struct mbuf *m;
1619 if (sopt->sopt_valsize > MLEN) {
1620 error = EMSGSIZE;
1621 break;
1622 }
1623 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1624 if (m == 0) {
1625 error = ENOBUFS;
1626 break;
1627 }
1628 m->m_len = sopt->sopt_valsize;
1629 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1630 m->m_len);
1631 if (error)
1632 break;
1633
1634 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1635 m));
1636 }
1637
1638 case IP_TOS:
1639 case IP_TTL:
1640 case IP_RECVOPTS:
1641 case IP_RECVRETOPTS:
1642 case IP_RECVDSTADDR:
1643 case IP_RECVIF:
1644 case IP_RECVTTL:
1645 #if defined(NFAITH) && NFAITH > 0
1646 case IP_FAITH:
1647 #endif
1648 error = sooptcopyin(sopt, &optval, sizeof optval,
1649 sizeof optval);
1650 if (error)
1651 break;
1652
1653 switch (sopt->sopt_name) {
1654 case IP_TOS:
1655 inp->inp_ip_tos = optval;
1656 break;
1657
1658 case IP_TTL:
1659 inp->inp_ip_ttl = optval;
1660 break;
1661 #define OPTSET(bit) \
1662 if (optval) \
1663 inp->inp_flags |= bit; \
1664 else \
1665 inp->inp_flags &= ~bit;
1666
1667 case IP_RECVOPTS:
1668 OPTSET(INP_RECVOPTS);
1669 break;
1670
1671 case IP_RECVRETOPTS:
1672 OPTSET(INP_RECVRETOPTS);
1673 break;
1674
1675 case IP_RECVDSTADDR:
1676 OPTSET(INP_RECVDSTADDR);
1677 break;
1678
1679 case IP_RECVIF:
1680 OPTSET(INP_RECVIF);
1681 break;
1682
1683 case IP_RECVTTL:
1684 OPTSET(INP_RECVTTL);
1685 break;
1686
1687 #if defined(NFAITH) && NFAITH > 0
1688 case IP_FAITH:
1689 OPTSET(INP_FAITH);
1690 break;
1691 #endif
1692 }
1693 break;
1694 #undef OPTSET
1695
1696 case IP_MULTICAST_IF:
1697 case IP_MULTICAST_VIF:
1698 case IP_MULTICAST_TTL:
1699 case IP_MULTICAST_LOOP:
1700 case IP_ADD_MEMBERSHIP:
1701 case IP_DROP_MEMBERSHIP:
1702 error = ip_setmoptions(sopt, &inp->inp_moptions);
1703 break;
1704
1705 case IP_PORTRANGE:
1706 error = sooptcopyin(sopt, &optval, sizeof optval,
1707 sizeof optval);
1708 if (error)
1709 break;
1710
1711 switch (optval) {
1712 case IP_PORTRANGE_DEFAULT:
1713 inp->inp_flags &= ~(INP_LOWPORT);
1714 inp->inp_flags &= ~(INP_HIGHPORT);
1715 break;
1716
1717 case IP_PORTRANGE_HIGH:
1718 inp->inp_flags &= ~(INP_LOWPORT);
1719 inp->inp_flags |= INP_HIGHPORT;
1720 break;
1721
1722 case IP_PORTRANGE_LOW:
1723 inp->inp_flags &= ~(INP_HIGHPORT);
1724 inp->inp_flags |= INP_LOWPORT;
1725 break;
1726
1727 default:
1728 error = EINVAL;
1729 break;
1730 }
1731 break;
1732
1733 #if IPSEC
1734 case IP_IPSEC_POLICY:
1735 {
1736 caddr_t req = NULL;
1737 size_t len = 0;
1738 int priv;
1739 struct mbuf *m;
1740 int optname;
1741
1742 if (sopt->sopt_valsize > MCLBYTES) {
1743 error = EMSGSIZE;
1744 break;
1745 }
1746 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1747 break;
1748 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1749 break;
1750 priv = (sopt->sopt_p != NULL &&
1751 proc_suser(sopt->sopt_p) != 0) ? 0 : 1;
1752 if (m) {
1753 req = mtod(m, caddr_t);
1754 len = m->m_len;
1755 }
1756 optname = sopt->sopt_name;
1757 lck_mtx_lock(sadb_mutex);
1758 error = ipsec4_set_policy(inp, optname, req, len, priv);
1759 lck_mtx_unlock(sadb_mutex);
1760 m_freem(m);
1761 break;
1762 }
1763 #endif /*IPSEC*/
1764
1765 default:
1766 error = ENOPROTOOPT;
1767 break;
1768 }
1769 break;
1770
1771 case SOPT_GET:
1772 switch (sopt->sopt_name) {
1773 case IP_OPTIONS:
1774 case IP_RETOPTS:
1775 if (inp->inp_options)
1776 error = sooptcopyout(sopt,
1777 mtod(inp->inp_options,
1778 char *),
1779 inp->inp_options->m_len);
1780 else
1781 sopt->sopt_valsize = 0;
1782 break;
1783
1784 case IP_TOS:
1785 case IP_TTL:
1786 case IP_RECVOPTS:
1787 case IP_RECVRETOPTS:
1788 case IP_RECVDSTADDR:
1789 case IP_RECVIF:
1790 case IP_RECVTTL:
1791 case IP_PORTRANGE:
1792 #if defined(NFAITH) && NFAITH > 0
1793 case IP_FAITH:
1794 #endif
1795 switch (sopt->sopt_name) {
1796
1797 case IP_TOS:
1798 optval = inp->inp_ip_tos;
1799 break;
1800
1801 case IP_TTL:
1802 optval = inp->inp_ip_ttl;
1803 break;
1804
1805 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1806
1807 case IP_RECVOPTS:
1808 optval = OPTBIT(INP_RECVOPTS);
1809 break;
1810
1811 case IP_RECVRETOPTS:
1812 optval = OPTBIT(INP_RECVRETOPTS);
1813 break;
1814
1815 case IP_RECVDSTADDR:
1816 optval = OPTBIT(INP_RECVDSTADDR);
1817 break;
1818
1819 case IP_RECVIF:
1820 optval = OPTBIT(INP_RECVIF);
1821 break;
1822
1823 case IP_RECVTTL:
1824 optval = OPTBIT(INP_RECVTTL);
1825 break;
1826
1827 case IP_PORTRANGE:
1828 if (inp->inp_flags & INP_HIGHPORT)
1829 optval = IP_PORTRANGE_HIGH;
1830 else if (inp->inp_flags & INP_LOWPORT)
1831 optval = IP_PORTRANGE_LOW;
1832 else
1833 optval = 0;
1834 break;
1835
1836 #if defined(NFAITH) && NFAITH > 0
1837 case IP_FAITH:
1838 optval = OPTBIT(INP_FAITH);
1839 break;
1840 #endif
1841 }
1842 error = sooptcopyout(sopt, &optval, sizeof optval);
1843 break;
1844
1845 case IP_MULTICAST_IF:
1846 case IP_MULTICAST_VIF:
1847 case IP_MULTICAST_TTL:
1848 case IP_MULTICAST_LOOP:
1849 case IP_ADD_MEMBERSHIP:
1850 case IP_DROP_MEMBERSHIP:
1851 error = ip_getmoptions(sopt, inp->inp_moptions);
1852 break;
1853
1854 #if IPSEC
1855 case IP_IPSEC_POLICY:
1856 {
1857 struct mbuf *m = NULL;
1858 caddr_t req = NULL;
1859 size_t len = 0;
1860
1861 if (m != 0) {
1862 req = mtod(m, caddr_t);
1863 len = m->m_len;
1864 }
1865 lck_mtx_lock(sadb_mutex);
1866 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1867 lck_mtx_unlock(sadb_mutex);
1868 if (error == 0)
1869 error = soopt_mcopyout(sopt, m); /* XXX */
1870 if (error == 0)
1871 m_freem(m);
1872 break;
1873 }
1874 #endif /*IPSEC*/
1875
1876 default:
1877 error = ENOPROTOOPT;
1878 break;
1879 }
1880 break;
1881 }
1882 return (error);
1883 }
1884
1885 /*
1886 * Set up IP options in pcb for insertion in output packets.
1887 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1888 * with destination address if source routed.
1889 */
1890 static int
1891 ip_pcbopts(optname, pcbopt, m)
1892 int optname;
1893 struct mbuf **pcbopt;
1894 register struct mbuf *m;
1895 {
1896 register int cnt, optlen;
1897 register u_char *cp;
1898 u_char opt;
1899
1900 /* turn off any old options */
1901 if (*pcbopt)
1902 (void)m_free(*pcbopt);
1903 *pcbopt = 0;
1904 if (m == (struct mbuf *)0 || m->m_len == 0) {
1905 /*
1906 * Only turning off any previous options.
1907 */
1908 if (m)
1909 (void)m_free(m);
1910 return (0);
1911 }
1912
1913 #ifndef vax
1914 if (m->m_len % sizeof(int32_t))
1915 goto bad;
1916 #endif
1917 /*
1918 * IP first-hop destination address will be stored before
1919 * actual options; move other options back
1920 * and clear it when none present.
1921 */
1922 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1923 goto bad;
1924 cnt = m->m_len;
1925 m->m_len += sizeof(struct in_addr);
1926 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1927 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1928 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1929
1930 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1931 opt = cp[IPOPT_OPTVAL];
1932 if (opt == IPOPT_EOL)
1933 break;
1934 if (opt == IPOPT_NOP)
1935 optlen = 1;
1936 else {
1937 if (cnt < IPOPT_OLEN + sizeof(*cp))
1938 goto bad;
1939 optlen = cp[IPOPT_OLEN];
1940 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1941 goto bad;
1942 }
1943 switch (opt) {
1944
1945 default:
1946 break;
1947
1948 case IPOPT_LSRR:
1949 case IPOPT_SSRR:
1950 /*
1951 * user process specifies route as:
1952 * ->A->B->C->D
1953 * D must be our final destination (but we can't
1954 * check that since we may not have connected yet).
1955 * A is first hop destination, which doesn't appear in
1956 * actual IP option, but is stored before the options.
1957 */
1958 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1959 goto bad;
1960 m->m_len -= sizeof(struct in_addr);
1961 cnt -= sizeof(struct in_addr);
1962 optlen -= sizeof(struct in_addr);
1963 cp[IPOPT_OLEN] = optlen;
1964 /*
1965 * Move first hop before start of options.
1966 */
1967 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1968 sizeof(struct in_addr));
1969 /*
1970 * Then copy rest of options back
1971 * to close up the deleted entry.
1972 */
1973 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1974 sizeof(struct in_addr)),
1975 (caddr_t)&cp[IPOPT_OFFSET+1],
1976 (unsigned)cnt + sizeof(struct in_addr));
1977 break;
1978 }
1979 }
1980 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1981 goto bad;
1982 *pcbopt = m;
1983 return (0);
1984
1985 bad:
1986 (void)m_free(m);
1987 return (EINVAL);
1988 }
1989
1990 /*
1991 * XXX
1992 * The whole multicast option thing needs to be re-thought.
1993 * Several of these options are equally applicable to non-multicast
1994 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1995 * standard option (IP_TTL).
1996 */
1997
1998 /*
1999 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2000 */
2001 static struct ifnet *
2002 ip_multicast_if(a, ifindexp)
2003 struct in_addr *a;
2004 int *ifindexp;
2005 {
2006 int ifindex;
2007 struct ifnet *ifp;
2008
2009 if (ifindexp)
2010 *ifindexp = 0;
2011 if (ntohl(a->s_addr) >> 24 == 0) {
2012 ifindex = ntohl(a->s_addr) & 0xffffff;
2013 ifnet_head_lock_shared();
2014 if (ifindex < 0 || if_index < ifindex) {
2015 ifnet_head_done();
2016 return NULL;
2017 }
2018 ifp = ifindex2ifnet[ifindex];
2019 ifnet_head_done();
2020 if (ifindexp)
2021 *ifindexp = ifindex;
2022 } else {
2023 INADDR_TO_IFP(*a, ifp);
2024 }
2025 return ifp;
2026 }
2027
2028 /*
2029 * Set the IP multicast options in response to user setsockopt().
2030 */
2031 static int
2032 ip_setmoptions(sopt, imop)
2033 struct sockopt *sopt;
2034 struct ip_moptions **imop;
2035 {
2036 int error = 0;
2037 int i;
2038 struct in_addr addr;
2039 struct ip_mreq mreq;
2040 struct ifnet *ifp = NULL;
2041 struct ip_moptions *imo = *imop;
2042 int ifindex;
2043
2044 if (imo == NULL) {
2045 /*
2046 * No multicast option buffer attached to the pcb;
2047 * allocate one and initialize to default values.
2048 */
2049 error = ip_createmoptions(imop);
2050 if (error != 0)
2051 return error;
2052 imo = *imop;
2053 }
2054
2055 switch (sopt->sopt_name) {
2056 /* store an index number for the vif you wanna use in the send */
2057 case IP_MULTICAST_VIF:
2058 if (legal_vif_num == 0) {
2059 error = EOPNOTSUPP;
2060 break;
2061 }
2062 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2063 if (error)
2064 break;
2065 if (!legal_vif_num(i) && (i != -1)) {
2066 error = EINVAL;
2067 break;
2068 }
2069 imo->imo_multicast_vif = i;
2070 break;
2071
2072 case IP_MULTICAST_IF:
2073 /*
2074 * Select the interface for outgoing multicast packets.
2075 */
2076 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
2077 if (error)
2078 break;
2079 /*
2080 * INADDR_ANY is used to remove a previous selection.
2081 * When no interface is selected, a default one is
2082 * chosen every time a multicast packet is sent.
2083 */
2084 if (addr.s_addr == INADDR_ANY) {
2085 imo->imo_multicast_ifp = NULL;
2086 break;
2087 }
2088 /*
2089 * The selected interface is identified by its local
2090 * IP address. Find the interface and confirm that
2091 * it supports multicasting.
2092 */
2093 ifp = ip_multicast_if(&addr, &ifindex);
2094 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2095 error = EADDRNOTAVAIL;
2096 break;
2097 }
2098 imo->imo_multicast_ifp = ifp;
2099 if (ifindex)
2100 imo->imo_multicast_addr = addr;
2101 else
2102 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2103 break;
2104
2105 case IP_MULTICAST_TTL:
2106 /*
2107 * Set the IP time-to-live for outgoing multicast packets.
2108 * The original multicast API required a char argument,
2109 * which is inconsistent with the rest of the socket API.
2110 * We allow either a char or an int.
2111 */
2112 if (sopt->sopt_valsize == 1) {
2113 u_char ttl;
2114 error = sooptcopyin(sopt, &ttl, 1, 1);
2115 if (error)
2116 break;
2117 imo->imo_multicast_ttl = ttl;
2118 } else {
2119 u_int ttl;
2120 error = sooptcopyin(sopt, &ttl, sizeof ttl,
2121 sizeof ttl);
2122 if (error)
2123 break;
2124 if (ttl > 255)
2125 error = EINVAL;
2126 else
2127 imo->imo_multicast_ttl = ttl;
2128 }
2129 break;
2130
2131 case IP_MULTICAST_LOOP:
2132 /*
2133 * Set the loopback flag for outgoing multicast packets.
2134 * Must be zero or one. The original multicast API required a
2135 * char argument, which is inconsistent with the rest
2136 * of the socket API. We allow either a char or an int.
2137 */
2138 if (sopt->sopt_valsize == 1) {
2139 u_char loop;
2140 error = sooptcopyin(sopt, &loop, 1, 1);
2141 if (error)
2142 break;
2143 imo->imo_multicast_loop = !!loop;
2144 } else {
2145 u_int loop;
2146 error = sooptcopyin(sopt, &loop, sizeof loop,
2147 sizeof loop);
2148 if (error)
2149 break;
2150 imo->imo_multicast_loop = !!loop;
2151 }
2152 break;
2153
2154 case IP_ADD_MEMBERSHIP:
2155 /*
2156 * Add a multicast group membership.
2157 * Group must be a valid IP multicast address.
2158 */
2159 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2160 if (error)
2161 break;
2162
2163 error = ip_addmembership(imo, &mreq);
2164 break;
2165
2166 case IP_DROP_MEMBERSHIP:
2167 /*
2168 * Drop a multicast group membership.
2169 * Group must be a valid IP multicast address.
2170 */
2171 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2172 if (error)
2173 break;
2174
2175 error = ip_dropmembership(imo, &mreq);
2176 break;
2177
2178 default:
2179 error = EOPNOTSUPP;
2180 break;
2181 }
2182
2183 /*
2184 * If all options have default values, no need to keep the mbuf.
2185 */
2186 if (imo->imo_multicast_ifp == NULL &&
2187 imo->imo_multicast_vif == -1 &&
2188 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2189 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2190 imo->imo_num_memberships == 0) {
2191 FREE(*imop, M_IPMOPTS);
2192 *imop = NULL;
2193 }
2194
2195 return (error);
2196 }
2197
2198 /*
2199 * Set the IP multicast options in response to user setsockopt().
2200 */
2201 __private_extern__ int
2202 ip_createmoptions(
2203 struct ip_moptions **imop)
2204 {
2205 struct ip_moptions *imo;
2206 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
2207 M_WAITOK);
2208
2209 if (imo == NULL)
2210 return (ENOBUFS);
2211 *imop = imo;
2212 imo->imo_multicast_ifp = NULL;
2213 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2214 imo->imo_multicast_vif = -1;
2215 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2216 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2217 imo->imo_num_memberships = 0;
2218
2219 return 0;
2220 }
2221
2222 /*
2223 * Add membership to an IPv4 multicast.
2224 */
2225 __private_extern__ int
2226 ip_addmembership(
2227 struct ip_moptions *imo,
2228 struct ip_mreq *mreq)
2229 {
2230 struct route ro;
2231 struct sockaddr_in *dst;
2232 struct ifnet *ifp = NULL;
2233 int error = 0;
2234 int i;
2235
2236 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2237 error = EINVAL;
2238 return error;
2239 }
2240 /*
2241 * If no interface address was provided, use the interface of
2242 * the route to the given multicast address.
2243 */
2244 if (mreq->imr_interface.s_addr == INADDR_ANY) {
2245 bzero((caddr_t)&ro, sizeof(ro));
2246 dst = (struct sockaddr_in *)&ro.ro_dst;
2247 dst->sin_len = sizeof(*dst);
2248 dst->sin_family = AF_INET;
2249 dst->sin_addr = mreq->imr_multiaddr;
2250 rtalloc(&ro);
2251 if (ro.ro_rt != NULL) {
2252 ifp = ro.ro_rt->rt_ifp;
2253 rtfree(ro.ro_rt);
2254 }
2255 else {
2256 /* If there's no default route, try using loopback */
2257 mreq->imr_interface.s_addr = INADDR_LOOPBACK;
2258 }
2259 }
2260
2261 if (ifp == NULL) {
2262 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2263 }
2264
2265 /*
2266 * See if we found an interface, and confirm that it
2267 * supports multicast.
2268 */
2269 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2270 error = EADDRNOTAVAIL;
2271 return error;
2272 }
2273 /*
2274 * See if the membership already exists or if all the
2275 * membership slots are full.
2276 */
2277 for (i = 0; i < imo->imo_num_memberships; ++i) {
2278 if (imo->imo_membership[i]->inm_ifp == ifp &&
2279 imo->imo_membership[i]->inm_addr.s_addr
2280 == mreq->imr_multiaddr.s_addr)
2281 break;
2282 }
2283 if (i < imo->imo_num_memberships) {
2284 error = EADDRINUSE;
2285 return error;
2286 }
2287 if (i == IP_MAX_MEMBERSHIPS) {
2288 error = ETOOMANYREFS;
2289 return error;
2290 }
2291 /*
2292 * Everything looks good; add a new record to the multicast
2293 * address list for the given interface.
2294 */
2295 if ((imo->imo_membership[i] =
2296 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
2297 error = ENOBUFS;
2298 return error;
2299 }
2300 ++imo->imo_num_memberships;
2301
2302 return error;
2303 }
2304
2305 /*
2306 * Drop membership of an IPv4 multicast.
2307 */
2308 __private_extern__ int
2309 ip_dropmembership(
2310 struct ip_moptions *imo,
2311 struct ip_mreq *mreq)
2312 {
2313 int error = 0;
2314 struct ifnet* ifp = NULL;
2315 int i;
2316
2317 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2318 error = EINVAL;
2319 return error;
2320 }
2321
2322 /*
2323 * If an interface address was specified, get a pointer
2324 * to its ifnet structure.
2325 */
2326 if (mreq->imr_interface.s_addr == INADDR_ANY)
2327 ifp = NULL;
2328 else {
2329 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2330 if (ifp == NULL) {
2331 error = EADDRNOTAVAIL;
2332 return error;
2333 }
2334 }
2335 /*
2336 * Find the membership in the membership array.
2337 */
2338 for (i = 0; i < imo->imo_num_memberships; ++i) {
2339 if ((ifp == NULL ||
2340 imo->imo_membership[i]->inm_ifp == ifp) &&
2341 imo->imo_membership[i]->inm_addr.s_addr ==
2342 mreq->imr_multiaddr.s_addr)
2343 break;
2344 }
2345 if (i == imo->imo_num_memberships) {
2346 error = EADDRNOTAVAIL;
2347 return error;
2348 }
2349 /*
2350 * Give up the multicast address record to which the
2351 * membership points.
2352 */
2353 in_delmulti(&imo->imo_membership[i]);
2354 /*
2355 * Remove the gap in the membership array.
2356 */
2357 for (++i; i < imo->imo_num_memberships; ++i)
2358 imo->imo_membership[i-1] = imo->imo_membership[i];
2359 --imo->imo_num_memberships;
2360
2361 return error;
2362 }
2363
2364 /*
2365 * Return the IP multicast options in response to user getsockopt().
2366 */
2367 static int
2368 ip_getmoptions(sopt, imo)
2369 struct sockopt *sopt;
2370 register struct ip_moptions *imo;
2371 {
2372 struct in_addr addr;
2373 struct in_ifaddr *ia;
2374 int error, optval;
2375 u_char coptval;
2376
2377 error = 0;
2378 switch (sopt->sopt_name) {
2379 case IP_MULTICAST_VIF:
2380 if (imo != NULL)
2381 optval = imo->imo_multicast_vif;
2382 else
2383 optval = -1;
2384 error = sooptcopyout(sopt, &optval, sizeof optval);
2385 break;
2386
2387 case IP_MULTICAST_IF:
2388 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2389 addr.s_addr = INADDR_ANY;
2390 else if (imo->imo_multicast_addr.s_addr) {
2391 /* return the value user has set */
2392 addr = imo->imo_multicast_addr;
2393 } else {
2394 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2395 addr.s_addr = (ia == NULL) ? INADDR_ANY
2396 : IA_SIN(ia)->sin_addr.s_addr;
2397 }
2398 error = sooptcopyout(sopt, &addr, sizeof addr);
2399 break;
2400
2401 case IP_MULTICAST_TTL:
2402 if (imo == 0)
2403 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2404 else
2405 optval = coptval = imo->imo_multicast_ttl;
2406 if (sopt->sopt_valsize == 1)
2407 error = sooptcopyout(sopt, &coptval, 1);
2408 else
2409 error = sooptcopyout(sopt, &optval, sizeof optval);
2410 break;
2411
2412 case IP_MULTICAST_LOOP:
2413 if (imo == 0)
2414 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2415 else
2416 optval = coptval = imo->imo_multicast_loop;
2417 if (sopt->sopt_valsize == 1)
2418 error = sooptcopyout(sopt, &coptval, 1);
2419 else
2420 error = sooptcopyout(sopt, &optval, sizeof optval);
2421 break;
2422
2423 default:
2424 error = ENOPROTOOPT;
2425 break;
2426 }
2427 return (error);
2428 }
2429
2430 /*
2431 * Discard the IP multicast options.
2432 */
2433 void
2434 ip_freemoptions(imo)
2435 register struct ip_moptions *imo;
2436 {
2437 register int i;
2438
2439 if (imo != NULL) {
2440 for (i = 0; i < imo->imo_num_memberships; ++i)
2441 in_delmulti(&imo->imo_membership[i]);
2442 FREE(imo, M_IPMOPTS);
2443 }
2444 }
2445
2446 /*
2447 * Routine called from ip_output() to loop back a copy of an IP multicast
2448 * packet to the input queue of a specified interface. Note that this
2449 * calls the output routine of the loopback "driver", but with an interface
2450 * pointer that might NOT be a loopback interface -- evil, but easier than
2451 * replicating that code here.
2452 */
2453 static void
2454 ip_mloopback(ifp, m, dst, hlen)
2455 struct ifnet *ifp;
2456 register struct mbuf *m;
2457 register struct sockaddr_in *dst;
2458 int hlen;
2459 {
2460 register struct ip *ip;
2461 struct mbuf *copym;
2462
2463 copym = m_copy(m, 0, M_COPYALL);
2464 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2465 copym = m_pullup(copym, hlen);
2466 if (copym != NULL) {
2467 /*
2468 * We don't bother to fragment if the IP length is greater
2469 * than the interface's MTU. Can this possibly matter?
2470 */
2471 ip = mtod(copym, struct ip *);
2472 HTONS(ip->ip_len);
2473 HTONS(ip->ip_off);
2474 ip->ip_sum = 0;
2475 ip->ip_sum = in_cksum(copym, hlen);
2476 /*
2477 * NB:
2478 * It's not clear whether there are any lingering
2479 * reentrancy problems in other areas which might
2480 * be exposed by using ip_input directly (in
2481 * particular, everything which modifies the packet
2482 * in-place). Yet another option is using the
2483 * protosw directly to deliver the looped back
2484 * packet. For the moment, we'll err on the side
2485 * of safety by using if_simloop().
2486 */
2487 #if 1 /* XXX */
2488 if (dst->sin_family != AF_INET) {
2489 printf("ip_mloopback: bad address family %d\n",
2490 dst->sin_family);
2491 dst->sin_family = AF_INET;
2492 }
2493 #endif
2494
2495
2496 /*
2497 * Mark checksum as valid or calculate checksum for loopback.
2498 *
2499 * This is done this way because we have to embed the ifp of
2500 * the interface we will send the original copy of the packet
2501 * out on in the mbuf. ip_input will check if_hwassist of the
2502 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
2503 * The UDP checksum has not been calculated yet.
2504 */
2505 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
2506 if (IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
2507 copym->m_pkthdr.csum_flags |=
2508 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2509 CSUM_IP_CHECKED | CSUM_IP_VALID;
2510 copym->m_pkthdr.csum_data = 0xffff;
2511 } else {
2512 NTOHS(ip->ip_len);
2513 in_delayed_cksum(copym);
2514 HTONS(ip->ip_len);
2515 }
2516 }
2517
2518
2519 /*
2520 * TedW:
2521 * We need to send all loopback traffic down to dlil in case
2522 * a filter has tapped-in.
2523 */
2524
2525 /*
2526 * Stuff the 'real' ifp into the pkthdr, to be used in matching
2527 * in ip_input(); we need the loopback ifp/dl_tag passed as args
2528 * to make the loopback driver compliant with the data link
2529 * requirements.
2530 */
2531 if (lo_ifp) {
2532 copym->m_pkthdr.rcvif = ifp;
2533 dlil_output(lo_ifp, PF_INET, copym, 0, (struct sockaddr *) dst, 0);
2534 } else {
2535 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
2536 m_freem(copym);
2537 }
2538
2539 /* if_simloop(ifp, copym, (struct sockaddr *)dst, 0);*/
2540 }
2541 }