]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_output.c
xnu-792.13.8.tar.gz
[apple/xnu.git] / bsd / netinet / ip_output.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /*
31 * Copyright (c) 1982, 1986, 1988, 1990, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the University of
45 * California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
63 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $
64 */
65
66 #define _IP_VHL
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/kernel.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <kern/locks.h>
77 #include <sys/sysctl.h>
78
79 #include <net/if.h>
80 #include <net/route.h>
81
82 #include <netinet/in.h>
83 #include <netinet/in_systm.h>
84 #include <netinet/ip.h>
85 #include <netinet/in_pcb.h>
86 #include <netinet/in_var.h>
87 #include <netinet/ip_var.h>
88
89 #include <netinet/kpi_ipfilter_var.h>
90
91 #include "faith.h"
92
93 #include <net/dlil.h>
94 #include <sys/kdebug.h>
95
96 #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
97 #define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
98 #define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
99 #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
100
101
102 #if IPSEC
103 #include <netinet6/ipsec.h>
104 #include <netkey/key.h>
105 #if IPSEC_DEBUG
106 #include <netkey/key_debug.h>
107 #else
108 #define KEYDEBUG(lev,arg)
109 #endif
110 #endif /*IPSEC*/
111
112 #include <netinet/ip_fw.h>
113 #include <netinet/ip_divert.h>
114
115 #if DUMMYNET
116 #include <netinet/ip_dummynet.h>
117 #endif
118
119 #if IPFIREWALL_FORWARD_DEBUG
120 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
121 (ntohl(a.s_addr)>>16)&0xFF,\
122 (ntohl(a.s_addr)>>8)&0xFF,\
123 (ntohl(a.s_addr))&0xFF);
124 #endif
125
126 #if IPSEC
127 extern lck_mtx_t *sadb_mutex;
128 #endif
129
130 u_short ip_id;
131
132 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
133 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
134 static void ip_mloopback(struct ifnet *, struct mbuf *,
135 struct sockaddr_in *, int);
136 static int ip_getmoptions(struct sockopt *, struct ip_moptions *);
137 static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
138 static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
139
140 int ip_createmoptions(struct ip_moptions **imop);
141 int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
142 int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
143 int ip_optcopy(struct ip *, struct ip *);
144 extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **);
145 #ifdef __APPLE__
146 extern struct mbuf* m_dup(register struct mbuf *m, int how);
147 #endif
148
149 extern int apple_hwcksum_tx;
150 extern u_long route_generation;
151
152 extern struct protosw inetsw[];
153
154 extern struct ip_linklocal_stat ip_linklocal_stat;
155 extern lck_mtx_t *ip_mutex;
156
157 /* temporary: for testing */
158 #if IPSEC
159 extern int ipsec_bypass;
160 #endif
161
162 static int ip_maxchainsent = 0;
163 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
164 &ip_maxchainsent, 0, "use dlil_output_list");
165 /*
166 * IP output. The packet in mbuf chain m contains a skeletal IP
167 * header (with len, off, ttl, proto, tos, src, dst).
168 * The mbuf chain containing the packet will be freed.
169 * The mbuf opt, if present, will not be freed.
170 */
171 int
172 ip_output(
173 struct mbuf *m0,
174 struct mbuf *opt,
175 struct route *ro,
176 int flags,
177 struct ip_moptions *imo)
178 {
179 int error;
180 error = ip_output_list(m0, 0, opt, ro, flags, imo);
181 return error;
182 }
183
184 int
185 ip_output_list(
186 struct mbuf *m0,
187 int packetchain,
188 struct mbuf *opt,
189 struct route *ro,
190 int flags,
191 struct ip_moptions *imo)
192 {
193 struct ip *ip, *mhip;
194 struct ifnet *ifp = NULL;
195 struct mbuf *m = m0;
196 int hlen = sizeof (struct ip);
197 int len, off, error = 0;
198 struct sockaddr_in *dst = NULL;
199 struct in_ifaddr *ia = NULL;
200 int isbroadcast, sw_csum;
201 struct in_addr pkt_dst;
202 #if IPSEC
203 struct route iproute;
204 struct socket *so = NULL;
205 struct secpolicy *sp = NULL;
206 #endif
207 #if IPFIREWALL_FORWARD
208 int fwd_rewrite_src = 0;
209 #endif
210 struct ip_fw_args args;
211 int didfilter = 0;
212 ipfilter_t inject_filter_ref = 0;
213 struct m_tag *tag;
214 struct route dn_route;
215 struct mbuf * packetlist;
216 int pktcnt = 0;
217
218 lck_mtx_lock(ip_mutex);
219
220 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
221
222 packetlist = m0;
223 args.eh = NULL;
224 args.rule = NULL;
225 args.next_hop = NULL;
226 args.divert_rule = 0; /* divert cookie */
227
228 /* Grab info from mtags prepended to the chain */
229 #if DUMMYNET
230 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
231 struct dn_pkt_tag *dn_tag;
232
233 dn_tag = (struct dn_pkt_tag *)(tag+1);
234 args.rule = dn_tag->rule;
235 opt = NULL;
236 dn_route = dn_tag->ro;
237 ro = &dn_route;
238
239 imo = NULL;
240 dst = dn_tag->dn_dst;
241 ifp = dn_tag->ifp;
242 flags = dn_tag->flags;
243
244 m_tag_delete(m0, tag);
245 }
246 #endif /* DUMMYNET */
247
248 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
249 struct divert_tag *div_tag;
250
251 div_tag = (struct divert_tag *)(tag+1);
252 args.divert_rule = div_tag->cookie;
253
254 m_tag_delete(m0, tag);
255 }
256 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
257 struct ip_fwd_tag *ipfwd_tag;
258
259 ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
260 args.next_hop = ipfwd_tag->next_hop;
261
262 m_tag_delete(m0, tag);
263 }
264
265 m = m0;
266
267 #if DIAGNOSTIC
268 if ( !m || (m->m_flags & M_PKTHDR) != 0)
269 panic("ip_output no HDR");
270 if (!ro)
271 panic("ip_output no route, proto = %d",
272 mtod(m, struct ip *)->ip_p);
273 #endif
274
275 if (args.rule != NULL) { /* dummynet already saw us */
276 ip = mtod(m, struct ip *);
277 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
278 if (ro->ro_rt != NULL)
279 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
280 if (ia)
281 ifaref(&ia->ia_ifa);
282 #if IPSEC
283 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
284 so = ipsec_getsocket(m);
285 (void)ipsec_setsocket(m, NULL);
286 }
287 #endif
288 goto sendit;
289 }
290
291 #if IPSEC
292 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
293 so = ipsec_getsocket(m);
294 (void)ipsec_setsocket(m, NULL);
295 }
296 #endif
297 loopit:
298 /*
299 * No need to proccess packet twice if we've
300 * already seen it
301 */
302 inject_filter_ref = ipf_get_inject_filter(m);
303
304 if (opt) {
305 m = ip_insertoptions(m, opt, &len);
306 hlen = len;
307 }
308 ip = mtod(m, struct ip *);
309 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
310
311 /*
312 * Fill in IP header.
313 */
314 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
315 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
316 ip->ip_off &= IP_DF;
317 #if RANDOM_IP_ID
318 ip->ip_id = ip_randomid();
319 #else
320 ip->ip_id = htons(ip_id++);
321 #endif
322 ipstat.ips_localout++;
323 } else {
324 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
325 }
326
327 KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr,
328 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
329
330 dst = (struct sockaddr_in *)&ro->ro_dst;
331
332 /*
333 * If there is a cached route,
334 * check that it is to the same destination
335 * and is still up. If not, free it and try again.
336 * The address family should also be checked in case of sharing the
337 * cache with IPv6.
338 */
339
340 {
341 if (ro->ro_rt && (ro->ro_rt->generation_id != route_generation) &&
342 ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) && (ip->ip_src.s_addr != INADDR_ANY) &&
343 (ifa_foraddr(ip->ip_src.s_addr) == 0)) {
344 error = EADDRNOTAVAIL;
345 goto bad;
346 }
347 }
348 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
349 dst->sin_family != AF_INET ||
350 dst->sin_addr.s_addr != pkt_dst.s_addr)) {
351 rtfree(ro->ro_rt);
352 ro->ro_rt = (struct rtentry *)0;
353 }
354 if (ro->ro_rt == 0) {
355 bzero(dst, sizeof(*dst));
356 dst->sin_family = AF_INET;
357 dst->sin_len = sizeof(*dst);
358 dst->sin_addr = pkt_dst;
359 }
360 /*
361 * If routing to interface only,
362 * short circuit routing lookup.
363 */
364 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
365 #define sintosa(sin) ((struct sockaddr *)(sin))
366 if (flags & IP_ROUTETOIF) {
367 if (ia)
368 ifafree(&ia->ia_ifa);
369 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
370 if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
371 ipstat.ips_noroute++;
372 error = ENETUNREACH;
373 goto bad;
374 }
375 }
376 ifp = ia->ia_ifp;
377 ip->ip_ttl = 1;
378 isbroadcast = in_broadcast(dst->sin_addr, ifp);
379 } else {
380 /*
381 * If this is the case, we probably don't want to allocate
382 * a protocol-cloned route since we didn't get one from the
383 * ULP. This lets TCP do its thing, while not burdening
384 * forwarding or ICMP with the overhead of cloning a route.
385 * Of course, we still want to do any cloning requested by
386 * the link layer, as this is probably required in all cases
387 * for correct operation (as it is for ARP).
388 */
389 if (ro->ro_rt == 0)
390 rtalloc_ign(ro, RTF_PRCLONING);
391 if (ro->ro_rt == 0) {
392 ipstat.ips_noroute++;
393 error = EHOSTUNREACH;
394 goto bad;
395 }
396 if (ia)
397 ifafree(&ia->ia_ifa);
398 ia = ifatoia(ro->ro_rt->rt_ifa);
399 if (ia)
400 ifaref(&ia->ia_ifa);
401 ifp = ro->ro_rt->rt_ifp;
402 ro->ro_rt->rt_use++;
403 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
404 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
405 if (ro->ro_rt->rt_flags & RTF_HOST)
406 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
407 else
408 isbroadcast = in_broadcast(dst->sin_addr, ifp);
409 }
410 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
411 struct in_multi *inm;
412
413 m->m_flags |= M_MCAST;
414 /*
415 * IP destination address is multicast. Make sure "dst"
416 * still points to the address in "ro". (It may have been
417 * changed to point to a gateway address, above.)
418 */
419 dst = (struct sockaddr_in *)&ro->ro_dst;
420 /*
421 * See if the caller provided any multicast options
422 */
423 if (imo != NULL) {
424 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
425 if (imo->imo_multicast_ifp != NULL) {
426 ifp = imo->imo_multicast_ifp;
427 }
428 if (imo->imo_multicast_vif != -1 &&
429 ((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
430 ip->ip_src.s_addr =
431 ip_mcast_src(imo->imo_multicast_vif);
432 } else
433 if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
434 /*
435 * Confirm that the outgoing interface supports multicast.
436 */
437 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
438 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
439 ipstat.ips_noroute++;
440 error = ENETUNREACH;
441 goto bad;
442 }
443 }
444 /*
445 * If source address not specified yet, use address
446 * of outgoing interface.
447 */
448 if (ip->ip_src.s_addr == INADDR_ANY) {
449 register struct in_ifaddr *ia1;
450
451 TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
452 if (ia1->ia_ifp == ifp) {
453 ip->ip_src = IA_SIN(ia1)->sin_addr;
454
455 break;
456 }
457 if (ip->ip_src.s_addr == INADDR_ANY) {
458 error = ENETUNREACH;
459 goto bad;
460 }
461 }
462
463 ifnet_lock_shared(ifp);
464 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
465 ifnet_lock_done(ifp);
466 if (inm != NULL &&
467 (imo == NULL || imo->imo_multicast_loop)) {
468 /*
469 * If we belong to the destination multicast group
470 * on the outgoing interface, and the caller did not
471 * forbid loopback, loop back a copy.
472 */
473 if (!TAILQ_EMPTY(&ipv4_filters)) {
474 struct ipfilter *filter;
475 int seen = (inject_filter_ref == 0);
476 struct ipf_pktopts *ippo = 0, ipf_pktopts;
477
478 if (imo) {
479 ippo = &ipf_pktopts;
480 ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
481 ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
482 ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
483 }
484
485 lck_mtx_unlock(ip_mutex);
486 ipf_ref();
487
488 /* 4135317 - always pass network byte order to filter */
489 HTONS(ip->ip_len);
490 HTONS(ip->ip_off);
491
492 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
493 if (seen == 0) {
494 if ((struct ipfilter *)inject_filter_ref == filter)
495 seen = 1;
496 } else if (filter->ipf_filter.ipf_output) {
497 errno_t result;
498 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
499 if (result == EJUSTRETURN) {
500 ipf_unref();
501 goto done;
502 }
503 if (result != 0) {
504 ipf_unref();
505 lck_mtx_lock(ip_mutex);
506 goto bad;
507 }
508 }
509 }
510
511 /* set back to host byte order */
512 NTOHS(ip->ip_len);
513 NTOHS(ip->ip_off);
514
515 lck_mtx_lock(ip_mutex);
516 ipf_unref();
517 didfilter = 1;
518 }
519 ip_mloopback(ifp, m, dst, hlen);
520 }
521 else {
522 /*
523 * If we are acting as a multicast router, perform
524 * multicast forwarding as if the packet had just
525 * arrived on the interface to which we are about
526 * to send. The multicast forwarding function
527 * recursively calls this function, using the
528 * IP_FORWARDING flag to prevent infinite recursion.
529 *
530 * Multicasts that are looped back by ip_mloopback(),
531 * above, will be forwarded by the ip_input() routine,
532 * if necessary.
533 */
534 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
535 /*
536 * Check if rsvp daemon is running. If not, don't
537 * set ip_moptions. This ensures that the packet
538 * is multicast and not just sent down one link
539 * as prescribed by rsvpd.
540 */
541 if (!rsvp_on)
542 imo = NULL;
543 if (ip_mforward(ip, ifp, m, imo) != 0) {
544 m_freem(m);
545 lck_mtx_unlock(ip_mutex);
546 goto done;
547 }
548 }
549 }
550
551 /*
552 * Multicasts with a time-to-live of zero may be looped-
553 * back, above, but must not be transmitted on a network.
554 * Also, multicasts addressed to the loopback interface
555 * are not sent -- the above call to ip_mloopback() will
556 * loop back a copy if this host actually belongs to the
557 * destination group on the loopback interface.
558 */
559 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
560 m_freem(m);
561 lck_mtx_unlock(ip_mutex);
562 goto done;
563 }
564
565 goto sendit;
566 }
567 #ifndef notdef
568 /*
569 * If source address not specified yet, use address
570 * of outgoing interface.
571 */
572 if (ip->ip_src.s_addr == INADDR_ANY) {
573 ip->ip_src = IA_SIN(ia)->sin_addr;
574 #if IPFIREWALL_FORWARD
575 /* Keep note that we did this - if the firewall changes
576 * the next-hop, our interface may change, changing the
577 * default source IP. It's a shame so much effort happens
578 * twice. Oh well.
579 */
580 fwd_rewrite_src++;
581 #endif /* IPFIREWALL_FORWARD */
582 }
583 #endif /* notdef */
584
585 /*
586 * Look for broadcast address and
587 * and verify user is allowed to send
588 * such a packet.
589 */
590 if (isbroadcast) {
591 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
592 error = EADDRNOTAVAIL;
593 goto bad;
594 }
595 if ((flags & IP_ALLOWBROADCAST) == 0) {
596 error = EACCES;
597 goto bad;
598 }
599 /* don't allow broadcast messages to be fragmented */
600 if ((u_short)ip->ip_len > ifp->if_mtu) {
601 error = EMSGSIZE;
602 goto bad;
603 }
604 m->m_flags |= M_BCAST;
605 } else {
606 m->m_flags &= ~M_BCAST;
607 }
608
609 sendit:
610 /*
611 * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
612 */
613 if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
614 ip_linklocal_stat.iplls_out_total++;
615 if (ip->ip_ttl != MAXTTL) {
616 ip_linklocal_stat.iplls_out_badttl++;
617 ip->ip_ttl = MAXTTL;
618 }
619 }
620
621 injectit:
622 if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
623 struct ipfilter *filter;
624 int seen = (inject_filter_ref == 0);
625
626 lck_mtx_unlock(ip_mutex);
627 ipf_ref();
628
629 /* 4135317 - always pass network byte order to filter */
630 HTONS(ip->ip_len);
631 HTONS(ip->ip_off);
632
633 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
634 if (seen == 0) {
635 if ((struct ipfilter *)inject_filter_ref == filter)
636 seen = 1;
637 } else if (filter->ipf_filter.ipf_output) {
638 errno_t result;
639 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
640 if (result == EJUSTRETURN) {
641 ipf_unref();
642 goto done;
643 }
644 if (result != 0) {
645 ipf_unref();
646 lck_mtx_lock(ip_mutex);
647 goto bad;
648 }
649 }
650 }
651
652 /* set back to host byte order */
653 NTOHS(ip->ip_len);
654 NTOHS(ip->ip_off);
655
656 ipf_unref();
657 lck_mtx_lock(ip_mutex);
658 }
659
660 #if IPSEC
661 /* temporary for testing only: bypass ipsec alltogether */
662
663 if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0)
664 goto skip_ipsec;
665
666 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
667
668 lck_mtx_lock(sadb_mutex);
669
670 /* get SP for this packet */
671 if (so == NULL)
672 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
673 else
674 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
675
676 if (sp == NULL) {
677 ipsecstat.out_inval++;
678 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
679 lck_mtx_unlock(sadb_mutex);
680 goto bad;
681 }
682
683 error = 0;
684
685 /* check policy */
686 switch (sp->policy) {
687 case IPSEC_POLICY_DISCARD:
688 /*
689 * This packet is just discarded.
690 */
691 ipsecstat.out_polvio++;
692 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0);
693 lck_mtx_unlock(sadb_mutex);
694 goto bad;
695
696 case IPSEC_POLICY_BYPASS:
697 case IPSEC_POLICY_NONE:
698 /* no need to do IPsec. */
699 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0);
700 lck_mtx_unlock(sadb_mutex);
701 goto skip_ipsec;
702
703 case IPSEC_POLICY_IPSEC:
704 if (sp->req == NULL) {
705 /* acquire a policy */
706 error = key_spdacquire(sp);
707 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0);
708 lck_mtx_unlock(sadb_mutex);
709 goto bad;
710 }
711 break;
712
713 case IPSEC_POLICY_ENTRUST:
714 default:
715 printf("ip_output: Invalid policy found. %d\n", sp->policy);
716 }
717 {
718 struct ipsec_output_state state;
719 bzero(&state, sizeof(state));
720 state.m = m;
721 if (flags & IP_ROUTETOIF) {
722 state.ro = &iproute;
723 bzero(&iproute, sizeof(iproute));
724 } else
725 state.ro = ro;
726 state.dst = (struct sockaddr *)dst;
727
728 ip->ip_sum = 0;
729
730 /*
731 * XXX
732 * delayed checksums are not currently compatible with IPsec
733 */
734 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
735 in_delayed_cksum(m);
736 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
737 }
738
739 HTONS(ip->ip_len);
740 HTONS(ip->ip_off);
741
742 lck_mtx_unlock(ip_mutex);
743 error = ipsec4_output(&state, sp, flags);
744 lck_mtx_unlock(sadb_mutex);
745 lck_mtx_lock(ip_mutex);
746
747 m0 = m = state.m;
748
749 if (flags & IP_ROUTETOIF) {
750 /*
751 * if we have tunnel mode SA, we may need to ignore
752 * IP_ROUTETOIF.
753 */
754 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
755 flags &= ~IP_ROUTETOIF;
756 ro = state.ro;
757 }
758 } else
759 ro = state.ro;
760
761 dst = (struct sockaddr_in *)state.dst;
762 if (error) {
763 /* mbuf is already reclaimed in ipsec4_output. */
764 m0 = NULL;
765 switch (error) {
766 case EHOSTUNREACH:
767 case ENETUNREACH:
768 case EMSGSIZE:
769 case ENOBUFS:
770 case ENOMEM:
771 break;
772 default:
773 printf("ip4_output (ipsec): error code %d\n", error);
774 /*fall through*/
775 case ENOENT:
776 /* don't show these error codes to the user */
777 error = 0;
778 break;
779 }
780 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0);
781 goto bad;
782 }
783 }
784
785 /* be sure to update variables that are affected by ipsec4_output() */
786 ip = mtod(m, struct ip *);
787
788 #ifdef _IP_VHL
789 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
790 #else
791 hlen = ip->ip_hl << 2;
792 #endif
793 /* Check that there wasn't a route change and src is still valid */
794
795 if (ro->ro_rt->generation_id != route_generation) {
796 if (ifa_foraddr(ip->ip_src.s_addr) == 0 && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) {
797 error = EADDRNOTAVAIL;
798 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 5,0,0,0,0);
799 goto bad;
800 }
801 rtfree(ro->ro_rt);
802 ro->ro_rt = NULL;
803 }
804
805 if (ro->ro_rt == NULL) {
806 if ((flags & IP_ROUTETOIF) == 0) {
807 printf("ip_output: "
808 "can't update route after IPsec processing\n");
809 error = EHOSTUNREACH; /*XXX*/
810 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 6,0,0,0,0);
811 goto bad;
812 }
813 } else {
814 if (ia)
815 ifafree(&ia->ia_ifa);
816 ia = ifatoia(ro->ro_rt->rt_ifa);
817 if (ia)
818 ifaref(&ia->ia_ifa);
819 ifp = ro->ro_rt->rt_ifp;
820 }
821
822 /* make it flipped, again. */
823 NTOHS(ip->ip_len);
824 NTOHS(ip->ip_off);
825 KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff);
826
827 /* Pass to filters again */
828 if (!TAILQ_EMPTY(&ipv4_filters)) {
829 struct ipfilter *filter;
830
831 lck_mtx_unlock(ip_mutex);
832 ipf_ref();
833
834 /* 4135317 - always pass network byte order to filter */
835 HTONS(ip->ip_len);
836 HTONS(ip->ip_off);
837
838 TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
839 if (filter->ipf_filter.ipf_output) {
840 errno_t result;
841 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
842 if (result == EJUSTRETURN) {
843 ipf_unref();
844 goto done;
845 }
846 if (result != 0) {
847 ipf_unref();
848 lck_mtx_lock(ip_mutex);
849 goto bad;
850 }
851 }
852 }
853
854 /* set back to host byte order */
855 NTOHS(ip->ip_len);
856 NTOHS(ip->ip_off);
857
858 ipf_unref();
859 lck_mtx_lock(ip_mutex);
860 }
861 skip_ipsec:
862 #endif /*IPSEC*/
863
864 /*
865 * IpHack's section.
866 * - Xlate: translate packet's addr/port (NAT).
867 * - Firewall: deny/allow/etc.
868 * - Wrap: fake packet's addr/port <unimpl.>
869 * - Encapsulate: put it in another IP and send out. <unimp.>
870 */
871 if (fr_checkp) {
872 struct mbuf *m1 = m;
873
874 if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) {
875 lck_mtx_unlock(ip_mutex);
876 goto done;
877 }
878 ip = mtod(m0 = m = m1, struct ip *);
879 }
880
881 /*
882 * Check with the firewall...
883 * but not if we are already being fwd'd from a firewall.
884 */
885 if (fw_enable && IPFW_LOADED && !args.next_hop) {
886 struct sockaddr_in *old = dst;
887
888 args.m = m;
889 args.next_hop = dst;
890 args.oif = ifp;
891 lck_mtx_unlock(ip_mutex);
892 off = ip_fw_chk_ptr(&args);
893 m = args.m;
894 dst = args.next_hop;
895
896 /*
897 * On return we must do the following:
898 * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new)
899 * 1<=off<= 0xffff -> DIVERT
900 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
901 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
902 * dst != old -> IPFIREWALL_FORWARD
903 * off==0, dst==old -> accept
904 * If some of the above modules is not compiled in, then
905 * we should't have to check the corresponding condition
906 * (because the ipfw control socket should not accept
907 * unsupported rules), but better play safe and drop
908 * packets in case of doubt.
909 */
910 m0 = m;
911 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
912 if (m)
913 m_freem(m);
914 error = EACCES ;
915 goto done ;
916 }
917 ip = mtod(m, struct ip *);
918 if (off == 0 && dst == old) {/* common case */
919 lck_mtx_lock(ip_mutex);
920 goto pass ;
921 }
922 #if DUMMYNET
923 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
924 /*
925 * pass the pkt to dummynet. Need to include
926 * pipe number, m, ifp, ro, dst because these are
927 * not recomputed in the next pass.
928 * All other parameters have been already used and
929 * so they are not needed anymore.
930 * XXX note: if the ifp or ro entry are deleted
931 * while a pkt is in dummynet, we are in trouble!
932 */
933 args.ro = ro;
934 args.dst = dst;
935 args.flags = flags;
936
937 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
938 &args);
939 goto done;
940 }
941 #endif /* DUMMYNET */
942 lck_mtx_lock(ip_mutex);
943 #if IPDIVERT
944 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
945 struct mbuf *clone = NULL;
946
947 /* Clone packet if we're doing a 'tee' */
948 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
949 clone = m_dup(m, M_DONTWAIT);
950 /*
951 * XXX
952 * delayed checksums are not currently compatible
953 * with divert sockets.
954 */
955 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
956 in_delayed_cksum(m);
957 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
958 }
959
960 /* Restore packet header fields to original values */
961 HTONS(ip->ip_len);
962 HTONS(ip->ip_off);
963
964 /* Deliver packet to divert input routine */
965 divert_packet(m, 0, off & 0xffff, args.divert_rule);
966
967 /* If 'tee', continue with original packet */
968 if (clone != NULL) {
969 m0 = m = clone;
970 ip = mtod(m, struct ip *);
971 goto pass;
972 }
973 lck_mtx_unlock(ip_mutex);
974 goto done;
975 }
976 #endif
977
978 #if IPFIREWALL_FORWARD
979 /* Here we check dst to make sure it's directly reachable on the
980 * interface we previously thought it was.
981 * If it isn't (which may be likely in some situations) we have
982 * to re-route it (ie, find a route for the next-hop and the
983 * associated interface) and set them here. This is nested
984 * forwarding which in most cases is undesirable, except where
985 * such control is nigh impossible. So we do it here.
986 * And I'm babbling.
987 */
988 if (off == 0 && old != dst) {
989 struct in_ifaddr *ia_fw;
990
991 /* It's changed... */
992 /* There must be a better way to do this next line... */
993 static struct route sro_fwd, *ro_fwd = &sro_fwd;
994 #if IPFIREWALL_FORWARD_DEBUG
995 printf("IPFIREWALL_FORWARD: New dst ip: ");
996 print_ip(dst->sin_addr);
997 printf("\n");
998 #endif
999 /*
1000 * We need to figure out if we have been forwarded
1001 * to a local socket. If so then we should somehow
1002 * "loop back" to ip_input, and get directed to the
1003 * PCB as if we had received this packet. This is
1004 * because it may be dificult to identify the packets
1005 * you want to forward until they are being output
1006 * and have selected an interface. (e.g. locally
1007 * initiated packets) If we used the loopback inteface,
1008 * we would not be able to control what happens
1009 * as the packet runs through ip_input() as
1010 * it is done through a ISR.
1011 */
1012 TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
1013 /*
1014 * If the addr to forward to is one
1015 * of ours, we pretend to
1016 * be the destination for this packet.
1017 */
1018 if (IA_SIN(ia_fw)->sin_addr.s_addr ==
1019 dst->sin_addr.s_addr)
1020 break;
1021 }
1022 if (ia) {
1023 /* tell ip_input "dont filter" */
1024 struct m_tag *fwd_tag;
1025 struct ip_fwd_tag *ipfwd_tag;
1026
1027 fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD,
1028 sizeof(struct sockaddr_in), M_NOWAIT);
1029 if (fwd_tag == NULL) {
1030 error = ENOBUFS;
1031 goto bad;
1032 }
1033
1034 ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
1035 ipfwd_tag->next_hop = args.next_hop;
1036
1037 m_tag_prepend(m, fwd_tag);
1038
1039 if (m->m_pkthdr.rcvif == NULL)
1040 m->m_pkthdr.rcvif = ifunit("lo0");
1041 if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) &
1042 m->m_pkthdr.csum_flags) == 0) {
1043 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1044 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1045 m->m_pkthdr.csum_flags |=
1046 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1047 m->m_pkthdr.csum_data = 0xffff;
1048 }
1049 m->m_pkthdr.csum_flags |=
1050 CSUM_IP_CHECKED | CSUM_IP_VALID;
1051 }
1052 else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1053 in_delayed_cksum(m);
1054 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1055 ip->ip_sum = in_cksum(m, hlen);
1056 }
1057 HTONS(ip->ip_len);
1058 HTONS(ip->ip_off);
1059
1060 lck_mtx_unlock(ip_mutex);
1061
1062 /* we need to call dlil_output to run filters
1063 * and resync to avoid recursion loops.
1064 */
1065 if (lo_ifp) {
1066 dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0);
1067 }
1068 else {
1069 printf("ip_output: no loopback ifp for forwarding!!!\n");
1070 }
1071 goto done;
1072 }
1073 /* Some of the logic for this was
1074 * nicked from above.
1075 *
1076 * This rewrites the cached route in a local PCB.
1077 * Is this what we want to do?
1078 */
1079 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
1080
1081 ro_fwd->ro_rt = 0;
1082 rtalloc_ign(ro_fwd, RTF_PRCLONING);
1083
1084 if (ro_fwd->ro_rt == 0) {
1085 ipstat.ips_noroute++;
1086 error = EHOSTUNREACH;
1087 goto bad;
1088 }
1089
1090 ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
1091 ifp = ro_fwd->ro_rt->rt_ifp;
1092 ro_fwd->ro_rt->rt_use++;
1093 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
1094 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
1095 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
1096 isbroadcast =
1097 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
1098 else
1099 isbroadcast = in_broadcast(dst->sin_addr, ifp);
1100 rtfree(ro->ro_rt);
1101 ro->ro_rt = ro_fwd->ro_rt;
1102 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
1103
1104 /*
1105 * If we added a default src ip earlier,
1106 * which would have been gotten from the-then
1107 * interface, do it again, from the new one.
1108 */
1109 if (fwd_rewrite_src)
1110 ip->ip_src = IA_SIN(ia_fw)->sin_addr;
1111 goto pass ;
1112 }
1113 #endif /* IPFIREWALL_FORWARD */
1114 /*
1115 * if we get here, none of the above matches, and
1116 * we have to drop the pkt
1117 */
1118 m_freem(m);
1119 error = EACCES; /* not sure this is the right error msg */
1120 lck_mtx_unlock(ip_mutex);
1121 goto done;
1122 }
1123
1124 pass:
1125 #if __APPLE__
1126 /* Do not allow loopback address to wind up on a wire */
1127 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
1128 ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1129 (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
1130 ipstat.ips_badaddr++;
1131 m_freem(m);
1132 /*
1133 * Do not simply drop the packet just like a firewall -- we want the
1134 * the application to feel the pain.
1135 * Return ENETUNREACH like ip6_output does in some similar cases.
1136 * This can startle the otherwise clueless process that specifies
1137 * loopback as the source address.
1138 */
1139 error = ENETUNREACH;
1140 lck_mtx_unlock(ip_mutex);
1141 goto done;
1142 }
1143 #endif
1144 m->m_pkthdr.csum_flags |= CSUM_IP;
1145 sw_csum = m->m_pkthdr.csum_flags
1146 & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1147
1148 if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) {
1149 /*
1150 * Special case code for GMACE
1151 * frames that can be checksumed by GMACE SUM16 HW:
1152 * frame >64, no fragments, no UDP
1153 */
1154 if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP)
1155 && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) {
1156 /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */
1157 u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */
1158 u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF;
1159 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */
1160 m->m_pkthdr.csum_data = (csumprev + offset) << 16 ;
1161 m->m_pkthdr.csum_data += offset;
1162 sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */
1163 }
1164 else {
1165 /* let the software handle any UDP or TCP checksums */
1166 sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
1167 }
1168 }
1169
1170 if (sw_csum & CSUM_DELAY_DATA) {
1171 in_delayed_cksum(m);
1172 sw_csum &= ~CSUM_DELAY_DATA;
1173 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1174 }
1175
1176 m->m_pkthdr.csum_flags &= IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1177
1178 /*
1179 * If small enough for interface, or the interface will take
1180 * care of the fragmentation for us, can just send directly.
1181 */
1182 if ((u_short)ip->ip_len <= ifp->if_mtu ||
1183 ifp->if_hwassist & CSUM_FRAGMENT) {
1184 HTONS(ip->ip_len);
1185 HTONS(ip->ip_off);
1186 ip->ip_sum = 0;
1187 if (sw_csum & CSUM_DELAY_IP) {
1188 ip->ip_sum = in_cksum(m, hlen);
1189 }
1190
1191 #ifndef __APPLE__
1192 /* Record statistics for this interface address. */
1193 if (!(flags & IP_FORWARDING) && ia != NULL) {
1194 ia->ia_ifa.if_opackets++;
1195 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1196 }
1197 #endif
1198
1199 #if IPSEC
1200 /* clean ipsec history once it goes out of the node */
1201 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1202 ipsec_delaux(m);
1203 #endif
1204 if (packetchain == 0) {
1205 lck_mtx_unlock(ip_mutex);
1206 error = dlil_output(ifp, PF_INET, m, (void *) ro->ro_rt,
1207 (struct sockaddr *)dst, 0);
1208 goto done;
1209 }
1210 else { /* packet chaining allows us to reuse the route for all packets */
1211 m = m->m_nextpkt;
1212 if (m == NULL) {
1213 if (pktcnt > ip_maxchainsent)
1214 ip_maxchainsent = pktcnt;
1215 //send
1216 lck_mtx_unlock(ip_mutex);
1217 error = dlil_output_list(ifp, PF_INET, packetlist, (void *) ro->ro_rt,
1218 (struct sockaddr *)dst, 0);
1219 pktcnt = 0;
1220 goto done;
1221
1222 }
1223 m0 = m;
1224 pktcnt++;
1225 goto loopit;
1226 }
1227 }
1228 /*
1229 * Too large for interface; fragment if possible.
1230 * Must be able to put at least 8 bytes per fragment.
1231 */
1232 if (ip->ip_off & IP_DF) {
1233 error = EMSGSIZE;
1234 /*
1235 * This case can happen if the user changed the MTU
1236 * of an interface after enabling IP on it. Because
1237 * most netifs don't keep track of routes pointing to
1238 * them, there is no way for one to update all its
1239 * routes when the MTU is changed.
1240 */
1241 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1242 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1243 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1244 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1245 }
1246 ipstat.ips_cantfrag++;
1247 goto bad;
1248 }
1249 len = (ifp->if_mtu - hlen) &~ 7;
1250 if (len < 8) {
1251 error = EMSGSIZE;
1252 goto bad;
1253 }
1254
1255 /*
1256 * if the interface will not calculate checksums on
1257 * fragmented packets, then do it here.
1258 */
1259 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1260 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1261 in_delayed_cksum(m);
1262 if (m == NULL) {
1263 lck_mtx_unlock(ip_mutex);
1264 return(ENOMEM);
1265 }
1266 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1267 }
1268
1269
1270 {
1271 int mhlen, firstlen = len;
1272 struct mbuf **mnext = &m->m_nextpkt;
1273 int nfrags = 1;
1274
1275 /*
1276 * Loop through length of segment after first fragment,
1277 * make new header and copy data of each part and link onto chain.
1278 */
1279 m0 = m;
1280 mhlen = sizeof (struct ip);
1281 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
1282 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1283 if (m == 0) {
1284 error = ENOBUFS;
1285 ipstat.ips_odropped++;
1286 goto sendorfree;
1287 }
1288 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1289 m->m_data += max_linkhdr;
1290 mhip = mtod(m, struct ip *);
1291 *mhip = *ip;
1292 if (hlen > sizeof (struct ip)) {
1293 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1294 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
1295 }
1296 m->m_len = mhlen;
1297 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
1298 if (ip->ip_off & IP_MF)
1299 mhip->ip_off |= IP_MF;
1300 if (off + len >= (u_short)ip->ip_len)
1301 len = (u_short)ip->ip_len - off;
1302 else
1303 mhip->ip_off |= IP_MF;
1304 mhip->ip_len = htons((u_short)(len + mhlen));
1305 m->m_next = m_copy(m0, off, len);
1306 if (m->m_next == 0) {
1307 (void) m_free(m);
1308 error = ENOBUFS; /* ??? */
1309 ipstat.ips_odropped++;
1310 goto sendorfree;
1311 }
1312 m->m_pkthdr.len = mhlen + len;
1313 m->m_pkthdr.rcvif = 0;
1314 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
1315 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
1316 HTONS(mhip->ip_off);
1317 mhip->ip_sum = 0;
1318 if (sw_csum & CSUM_DELAY_IP) {
1319 mhip->ip_sum = in_cksum(m, mhlen);
1320 }
1321 *mnext = m;
1322 mnext = &m->m_nextpkt;
1323 nfrags++;
1324 }
1325 ipstat.ips_ofragments += nfrags;
1326
1327 /* set first/last markers for fragment chain */
1328 m->m_flags |= M_LASTFRAG;
1329 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1330 m0->m_pkthdr.csum_data = nfrags;
1331
1332 /*
1333 * Update first fragment by trimming what's been copied out
1334 * and updating header, then send each fragment (in order).
1335 */
1336 m = m0;
1337 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1338 m->m_pkthdr.len = hlen + firstlen;
1339 ip->ip_len = htons((u_short)m->m_pkthdr.len);
1340 ip->ip_off |= IP_MF;
1341 HTONS(ip->ip_off);
1342 ip->ip_sum = 0;
1343 if (sw_csum & CSUM_DELAY_IP) {
1344 ip->ip_sum = in_cksum(m, hlen);
1345 }
1346 sendorfree:
1347
1348 KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
1349 ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
1350
1351 lck_mtx_unlock(ip_mutex);
1352 for (m = m0; m; m = m0) {
1353 m0 = m->m_nextpkt;
1354 m->m_nextpkt = 0;
1355 #if IPSEC
1356 /* clean ipsec history once it goes out of the node */
1357 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0)
1358 ipsec_delaux(m);
1359 #endif
1360 if (error == 0) {
1361 #ifndef __APPLE__
1362 /* Record statistics for this interface address. */
1363 if (ia != NULL) {
1364 ia->ia_ifa.if_opackets++;
1365 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1366 }
1367 #endif
1368 if ((packetchain != 0) && (pktcnt > 0))
1369 panic("ip_output: mix of packet in packetlist is wrong=%x", packetlist);
1370 error = dlil_output(ifp, PF_INET, m, (void *) ro->ro_rt,
1371 (struct sockaddr *)dst, 0);
1372 } else
1373 m_freem(m);
1374 }
1375
1376 if (error == 0)
1377 ipstat.ips_fragmented++;
1378 }
1379 done:
1380 if (ia) {
1381 ifafree(&ia->ia_ifa);
1382 ia = NULL;
1383 }
1384 #if IPSEC
1385 if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) {
1386 if (ro == &iproute && ro->ro_rt) {
1387 rtfree(ro->ro_rt);
1388 ro->ro_rt = NULL;
1389 }
1390 if (sp != NULL) {
1391 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1392 printf("DP ip_output call free SP:%x\n", sp));
1393 lck_mtx_lock(sadb_mutex);
1394 key_freesp(sp);
1395 lck_mtx_unlock(sadb_mutex);
1396 }
1397 }
1398 #endif /* IPSEC */
1399
1400 KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0);
1401 return (error);
1402 bad:
1403 m_freem(m0);
1404 lck_mtx_unlock(ip_mutex);
1405 goto done;
1406 }
1407
1408 void
1409 in_delayed_cksum_offset(struct mbuf *m, int ip_offset)
1410 {
1411 struct ip *ip;
1412 u_short csum, offset;
1413
1414 while (ip_offset > m->m_len) {
1415 ip_offset -= m->m_len;
1416 m = m->m_next;
1417 if (m) {
1418 printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n");
1419 return;
1420 }
1421 }
1422
1423 if (ip_offset + sizeof(struct ip) > m->m_len) {
1424 printf("delayed m_pullup, m->len: %d off: %d\n",
1425 m->m_len, ip_offset);
1426 /*
1427 * XXX
1428 * this shouldn't happen
1429 */
1430 m = m_pullup(m, ip_offset + sizeof(struct ip));
1431 }
1432
1433 /* Gross */
1434 if (ip_offset) {
1435 m->m_len -= ip_offset;
1436 m->m_data += ip_offset;
1437 }
1438
1439 ip = mtod(m, struct ip*);
1440 offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1441 csum = in_cksum_skip(m, ip->ip_len, offset);
1442 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1443 csum = 0xffff;
1444 offset += m->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */
1445
1446 /* Gross */
1447 if (ip_offset) {
1448 if (M_LEADINGSPACE(m) < ip_offset)
1449 panic("in_delayed_cksum_withoffset - chain modified!\n");
1450 m->m_len += ip_offset;
1451 m->m_data -= ip_offset;
1452 }
1453
1454 if (offset > ip->ip_len) /* bogus offset */
1455 return;
1456
1457 if (offset + ip_offset + sizeof(u_short) > m->m_len) {
1458 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1459 m->m_len, offset + ip_offset, ip->ip_p);
1460 /*
1461 * XXX
1462 * this shouldn't happen, but if it does, the
1463 * correct behavior may be to insert the checksum
1464 * in the existing chain instead of rearranging it.
1465 */
1466 m = m_pullup(m, offset + ip_offset + sizeof(u_short));
1467 }
1468 *(u_short *)(m->m_data + offset + ip_offset) = csum;
1469 }
1470
1471 void
1472 in_delayed_cksum(struct mbuf *m)
1473 {
1474 in_delayed_cksum_offset(m, 0);
1475 }
1476
1477 void
1478 in_cksum_offset(struct mbuf* m, size_t ip_offset)
1479 {
1480 struct ip* ip = NULL;
1481 int hlen = 0;
1482
1483 while (ip_offset > m->m_len) {
1484 ip_offset -= m->m_len;
1485 m = m->m_next;
1486 if (m) {
1487 printf("in_cksum_offset failed - ip_offset wasn't in the packet\n");
1488 return;
1489 }
1490 }
1491
1492 if (ip_offset + sizeof(struct ip) > m->m_len) {
1493 printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %d\n",
1494 m->m_len, ip_offset);
1495 /*
1496 * XXX
1497 * this shouldn't happen
1498 */
1499 m = m_pullup(m, ip_offset + sizeof(struct ip));
1500 }
1501
1502 /* Gross */
1503 if (ip_offset) {
1504 m->m_len -= ip_offset;
1505 m->m_data += ip_offset;
1506 }
1507
1508 ip = mtod(m, struct ip*);
1509
1510 #ifdef _IP_VHL
1511 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1512 #else
1513 hlen = ip->ip_hl << 2;
1514 #endif
1515
1516 ip->ip_sum = 0;
1517 ip->ip_sum = in_cksum(m, hlen);
1518
1519 /* Gross */
1520 if (ip_offset) {
1521 if (M_LEADINGSPACE(m) < ip_offset)
1522 panic("in_cksum_offset - chain modified!\n");
1523 m->m_len += ip_offset;
1524 m->m_data -= ip_offset;
1525 }
1526 }
1527
1528 /*
1529 * Insert IP options into preformed packet.
1530 * Adjust IP destination as required for IP source routing,
1531 * as indicated by a non-zero in_addr at the start of the options.
1532 *
1533 * XXX This routine assumes that the packet has no options in place.
1534 */
1535 static struct mbuf *
1536 ip_insertoptions(m, opt, phlen)
1537 register struct mbuf *m;
1538 struct mbuf *opt;
1539 int *phlen;
1540 {
1541 register struct ipoption *p = mtod(opt, struct ipoption *);
1542 struct mbuf *n;
1543 register struct ip *ip = mtod(m, struct ip *);
1544 unsigned optlen;
1545
1546 optlen = opt->m_len - sizeof(p->ipopt_dst);
1547 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1548 return (m); /* XXX should fail */
1549 if (p->ipopt_dst.s_addr)
1550 ip->ip_dst = p->ipopt_dst;
1551 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1552 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1553 if (n == 0)
1554 return (m);
1555 n->m_pkthdr.rcvif = 0;
1556 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1557 m->m_len -= sizeof(struct ip);
1558 m->m_data += sizeof(struct ip);
1559 n->m_next = m;
1560 m = n;
1561 m->m_len = optlen + sizeof(struct ip);
1562 m->m_data += max_linkhdr;
1563 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1564 } else {
1565 m->m_data -= optlen;
1566 m->m_len += optlen;
1567 m->m_pkthdr.len += optlen;
1568 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1569 }
1570 ip = mtod(m, struct ip *);
1571 bcopy(p->ipopt_list, ip + 1, optlen);
1572 *phlen = sizeof(struct ip) + optlen;
1573 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1574 ip->ip_len += optlen;
1575 return (m);
1576 }
1577
1578 /*
1579 * Copy options from ip to jp,
1580 * omitting those not copied during fragmentation.
1581 */
1582 int
1583 ip_optcopy(ip, jp)
1584 struct ip *ip, *jp;
1585 {
1586 register u_char *cp, *dp;
1587 int opt, optlen, cnt;
1588
1589 cp = (u_char *)(ip + 1);
1590 dp = (u_char *)(jp + 1);
1591 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1592 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1593 opt = cp[0];
1594 if (opt == IPOPT_EOL)
1595 break;
1596 if (opt == IPOPT_NOP) {
1597 /* Preserve for IP mcast tunnel's LSRR alignment. */
1598 *dp++ = IPOPT_NOP;
1599 optlen = 1;
1600 continue;
1601 }
1602 #if DIAGNOSTIC
1603 if (cnt < IPOPT_OLEN + sizeof(*cp))
1604 panic("malformed IPv4 option passed to ip_optcopy");
1605 #endif
1606 optlen = cp[IPOPT_OLEN];
1607 #if DIAGNOSTIC
1608 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1609 panic("malformed IPv4 option passed to ip_optcopy");
1610 #endif
1611 /* bogus lengths should have been caught by ip_dooptions */
1612 if (optlen > cnt)
1613 optlen = cnt;
1614 if (IPOPT_COPIED(opt)) {
1615 bcopy(cp, dp, optlen);
1616 dp += optlen;
1617 }
1618 }
1619 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1620 *dp++ = IPOPT_EOL;
1621 return (optlen);
1622 }
1623
1624 /*
1625 * IP socket option processing.
1626 */
1627 int
1628 ip_ctloutput(so, sopt)
1629 struct socket *so;
1630 struct sockopt *sopt;
1631 {
1632 struct inpcb *inp = sotoinpcb(so);
1633 int error, optval;
1634
1635 error = optval = 0;
1636 if (sopt->sopt_level != IPPROTO_IP) {
1637 return (EINVAL);
1638 }
1639
1640 switch (sopt->sopt_dir) {
1641 case SOPT_SET:
1642 switch (sopt->sopt_name) {
1643 case IP_OPTIONS:
1644 #ifdef notyet
1645 case IP_RETOPTS:
1646 #endif
1647 {
1648 struct mbuf *m;
1649 if (sopt->sopt_valsize > MLEN) {
1650 error = EMSGSIZE;
1651 break;
1652 }
1653 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1654 if (m == 0) {
1655 error = ENOBUFS;
1656 break;
1657 }
1658 m->m_len = sopt->sopt_valsize;
1659 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1660 m->m_len);
1661 if (error)
1662 break;
1663
1664 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1665 m));
1666 }
1667
1668 case IP_TOS:
1669 case IP_TTL:
1670 case IP_RECVOPTS:
1671 case IP_RECVRETOPTS:
1672 case IP_RECVDSTADDR:
1673 case IP_RECVIF:
1674 case IP_RECVTTL:
1675 #if defined(NFAITH) && NFAITH > 0
1676 case IP_FAITH:
1677 #endif
1678 error = sooptcopyin(sopt, &optval, sizeof optval,
1679 sizeof optval);
1680 if (error)
1681 break;
1682
1683 switch (sopt->sopt_name) {
1684 case IP_TOS:
1685 inp->inp_ip_tos = optval;
1686 break;
1687
1688 case IP_TTL:
1689 inp->inp_ip_ttl = optval;
1690 break;
1691 #define OPTSET(bit) \
1692 if (optval) \
1693 inp->inp_flags |= bit; \
1694 else \
1695 inp->inp_flags &= ~bit;
1696
1697 case IP_RECVOPTS:
1698 OPTSET(INP_RECVOPTS);
1699 break;
1700
1701 case IP_RECVRETOPTS:
1702 OPTSET(INP_RECVRETOPTS);
1703 break;
1704
1705 case IP_RECVDSTADDR:
1706 OPTSET(INP_RECVDSTADDR);
1707 break;
1708
1709 case IP_RECVIF:
1710 OPTSET(INP_RECVIF);
1711 break;
1712
1713 case IP_RECVTTL:
1714 OPTSET(INP_RECVTTL);
1715 break;
1716
1717 #if defined(NFAITH) && NFAITH > 0
1718 case IP_FAITH:
1719 OPTSET(INP_FAITH);
1720 break;
1721 #endif
1722 }
1723 break;
1724 #undef OPTSET
1725
1726 case IP_MULTICAST_IF:
1727 case IP_MULTICAST_VIF:
1728 case IP_MULTICAST_TTL:
1729 case IP_MULTICAST_LOOP:
1730 case IP_ADD_MEMBERSHIP:
1731 case IP_DROP_MEMBERSHIP:
1732 error = ip_setmoptions(sopt, &inp->inp_moptions);
1733 break;
1734
1735 case IP_PORTRANGE:
1736 error = sooptcopyin(sopt, &optval, sizeof optval,
1737 sizeof optval);
1738 if (error)
1739 break;
1740
1741 switch (optval) {
1742 case IP_PORTRANGE_DEFAULT:
1743 inp->inp_flags &= ~(INP_LOWPORT);
1744 inp->inp_flags &= ~(INP_HIGHPORT);
1745 break;
1746
1747 case IP_PORTRANGE_HIGH:
1748 inp->inp_flags &= ~(INP_LOWPORT);
1749 inp->inp_flags |= INP_HIGHPORT;
1750 break;
1751
1752 case IP_PORTRANGE_LOW:
1753 inp->inp_flags &= ~(INP_HIGHPORT);
1754 inp->inp_flags |= INP_LOWPORT;
1755 break;
1756
1757 default:
1758 error = EINVAL;
1759 break;
1760 }
1761 break;
1762
1763 #if IPSEC
1764 case IP_IPSEC_POLICY:
1765 {
1766 caddr_t req = NULL;
1767 size_t len = 0;
1768 int priv;
1769 struct mbuf *m;
1770 int optname;
1771
1772 if (sopt->sopt_valsize > MCLBYTES) {
1773 error = EMSGSIZE;
1774 break;
1775 }
1776 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1777 break;
1778 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1779 break;
1780 priv = (sopt->sopt_p != NULL &&
1781 proc_suser(sopt->sopt_p) != 0) ? 0 : 1;
1782 if (m) {
1783 req = mtod(m, caddr_t);
1784 len = m->m_len;
1785 }
1786 optname = sopt->sopt_name;
1787 lck_mtx_lock(sadb_mutex);
1788 error = ipsec4_set_policy(inp, optname, req, len, priv);
1789 lck_mtx_unlock(sadb_mutex);
1790 m_freem(m);
1791 break;
1792 }
1793 #endif /*IPSEC*/
1794
1795 default:
1796 error = ENOPROTOOPT;
1797 break;
1798 }
1799 break;
1800
1801 case SOPT_GET:
1802 switch (sopt->sopt_name) {
1803 case IP_OPTIONS:
1804 case IP_RETOPTS:
1805 if (inp->inp_options)
1806 error = sooptcopyout(sopt,
1807 mtod(inp->inp_options,
1808 char *),
1809 inp->inp_options->m_len);
1810 else
1811 sopt->sopt_valsize = 0;
1812 break;
1813
1814 case IP_TOS:
1815 case IP_TTL:
1816 case IP_RECVOPTS:
1817 case IP_RECVRETOPTS:
1818 case IP_RECVDSTADDR:
1819 case IP_RECVIF:
1820 case IP_RECVTTL:
1821 case IP_PORTRANGE:
1822 #if defined(NFAITH) && NFAITH > 0
1823 case IP_FAITH:
1824 #endif
1825 switch (sopt->sopt_name) {
1826
1827 case IP_TOS:
1828 optval = inp->inp_ip_tos;
1829 break;
1830
1831 case IP_TTL:
1832 optval = inp->inp_ip_ttl;
1833 break;
1834
1835 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1836
1837 case IP_RECVOPTS:
1838 optval = OPTBIT(INP_RECVOPTS);
1839 break;
1840
1841 case IP_RECVRETOPTS:
1842 optval = OPTBIT(INP_RECVRETOPTS);
1843 break;
1844
1845 case IP_RECVDSTADDR:
1846 optval = OPTBIT(INP_RECVDSTADDR);
1847 break;
1848
1849 case IP_RECVIF:
1850 optval = OPTBIT(INP_RECVIF);
1851 break;
1852
1853 case IP_RECVTTL:
1854 optval = OPTBIT(INP_RECVTTL);
1855 break;
1856
1857 case IP_PORTRANGE:
1858 if (inp->inp_flags & INP_HIGHPORT)
1859 optval = IP_PORTRANGE_HIGH;
1860 else if (inp->inp_flags & INP_LOWPORT)
1861 optval = IP_PORTRANGE_LOW;
1862 else
1863 optval = 0;
1864 break;
1865
1866 #if defined(NFAITH) && NFAITH > 0
1867 case IP_FAITH:
1868 optval = OPTBIT(INP_FAITH);
1869 break;
1870 #endif
1871 }
1872 error = sooptcopyout(sopt, &optval, sizeof optval);
1873 break;
1874
1875 case IP_MULTICAST_IF:
1876 case IP_MULTICAST_VIF:
1877 case IP_MULTICAST_TTL:
1878 case IP_MULTICAST_LOOP:
1879 case IP_ADD_MEMBERSHIP:
1880 case IP_DROP_MEMBERSHIP:
1881 error = ip_getmoptions(sopt, inp->inp_moptions);
1882 break;
1883
1884 #if IPSEC
1885 case IP_IPSEC_POLICY:
1886 {
1887 struct mbuf *m = NULL;
1888 caddr_t req = NULL;
1889 size_t len = 0;
1890
1891 if (m != 0) {
1892 req = mtod(m, caddr_t);
1893 len = m->m_len;
1894 }
1895 lck_mtx_lock(sadb_mutex);
1896 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1897 lck_mtx_unlock(sadb_mutex);
1898 if (error == 0)
1899 error = soopt_mcopyout(sopt, m); /* XXX */
1900 if (error == 0)
1901 m_freem(m);
1902 break;
1903 }
1904 #endif /*IPSEC*/
1905
1906 default:
1907 error = ENOPROTOOPT;
1908 break;
1909 }
1910 break;
1911 }
1912 return (error);
1913 }
1914
1915 /*
1916 * Set up IP options in pcb for insertion in output packets.
1917 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1918 * with destination address if source routed.
1919 */
1920 static int
1921 ip_pcbopts(optname, pcbopt, m)
1922 int optname;
1923 struct mbuf **pcbopt;
1924 register struct mbuf *m;
1925 {
1926 register int cnt, optlen;
1927 register u_char *cp;
1928 u_char opt;
1929
1930 /* turn off any old options */
1931 if (*pcbopt)
1932 (void)m_free(*pcbopt);
1933 *pcbopt = 0;
1934 if (m == (struct mbuf *)0 || m->m_len == 0) {
1935 /*
1936 * Only turning off any previous options.
1937 */
1938 if (m)
1939 (void)m_free(m);
1940 return (0);
1941 }
1942
1943 #ifndef vax
1944 if (m->m_len % sizeof(int32_t))
1945 goto bad;
1946 #endif
1947 /*
1948 * IP first-hop destination address will be stored before
1949 * actual options; move other options back
1950 * and clear it when none present.
1951 */
1952 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1953 goto bad;
1954 cnt = m->m_len;
1955 m->m_len += sizeof(struct in_addr);
1956 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1957 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1958 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1959
1960 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1961 opt = cp[IPOPT_OPTVAL];
1962 if (opt == IPOPT_EOL)
1963 break;
1964 if (opt == IPOPT_NOP)
1965 optlen = 1;
1966 else {
1967 if (cnt < IPOPT_OLEN + sizeof(*cp))
1968 goto bad;
1969 optlen = cp[IPOPT_OLEN];
1970 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1971 goto bad;
1972 }
1973 switch (opt) {
1974
1975 default:
1976 break;
1977
1978 case IPOPT_LSRR:
1979 case IPOPT_SSRR:
1980 /*
1981 * user process specifies route as:
1982 * ->A->B->C->D
1983 * D must be our final destination (but we can't
1984 * check that since we may not have connected yet).
1985 * A is first hop destination, which doesn't appear in
1986 * actual IP option, but is stored before the options.
1987 */
1988 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1989 goto bad;
1990 m->m_len -= sizeof(struct in_addr);
1991 cnt -= sizeof(struct in_addr);
1992 optlen -= sizeof(struct in_addr);
1993 cp[IPOPT_OLEN] = optlen;
1994 /*
1995 * Move first hop before start of options.
1996 */
1997 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1998 sizeof(struct in_addr));
1999 /*
2000 * Then copy rest of options back
2001 * to close up the deleted entry.
2002 */
2003 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
2004 sizeof(struct in_addr)),
2005 (caddr_t)&cp[IPOPT_OFFSET+1],
2006 (unsigned)cnt + sizeof(struct in_addr));
2007 break;
2008 }
2009 }
2010 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
2011 goto bad;
2012 *pcbopt = m;
2013 return (0);
2014
2015 bad:
2016 (void)m_free(m);
2017 return (EINVAL);
2018 }
2019
2020 /*
2021 * XXX
2022 * The whole multicast option thing needs to be re-thought.
2023 * Several of these options are equally applicable to non-multicast
2024 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
2025 * standard option (IP_TTL).
2026 */
2027
2028 /*
2029 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
2030 */
2031 static struct ifnet *
2032 ip_multicast_if(a, ifindexp)
2033 struct in_addr *a;
2034 int *ifindexp;
2035 {
2036 int ifindex;
2037 struct ifnet *ifp;
2038
2039 if (ifindexp)
2040 *ifindexp = 0;
2041 if (ntohl(a->s_addr) >> 24 == 0) {
2042 ifindex = ntohl(a->s_addr) & 0xffffff;
2043 ifnet_head_lock_shared();
2044 if (ifindex < 0 || if_index < ifindex) {
2045 ifnet_head_done();
2046 return NULL;
2047 }
2048 ifp = ifindex2ifnet[ifindex];
2049 ifnet_head_done();
2050 if (ifindexp)
2051 *ifindexp = ifindex;
2052 } else {
2053 INADDR_TO_IFP(*a, ifp);
2054 }
2055 return ifp;
2056 }
2057
2058 /*
2059 * Set the IP multicast options in response to user setsockopt().
2060 */
2061 static int
2062 ip_setmoptions(sopt, imop)
2063 struct sockopt *sopt;
2064 struct ip_moptions **imop;
2065 {
2066 int error = 0;
2067 int i;
2068 struct in_addr addr;
2069 struct ip_mreq mreq;
2070 struct ifnet *ifp = NULL;
2071 struct ip_moptions *imo = *imop;
2072 int ifindex;
2073
2074 if (imo == NULL) {
2075 /*
2076 * No multicast option buffer attached to the pcb;
2077 * allocate one and initialize to default values.
2078 */
2079 error = ip_createmoptions(imop);
2080 if (error != 0)
2081 return error;
2082 imo = *imop;
2083 }
2084
2085 switch (sopt->sopt_name) {
2086 /* store an index number for the vif you wanna use in the send */
2087 case IP_MULTICAST_VIF:
2088 if (legal_vif_num == 0) {
2089 error = EOPNOTSUPP;
2090 break;
2091 }
2092 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2093 if (error)
2094 break;
2095 if (!legal_vif_num(i) && (i != -1)) {
2096 error = EINVAL;
2097 break;
2098 }
2099 imo->imo_multicast_vif = i;
2100 break;
2101
2102 case IP_MULTICAST_IF:
2103 /*
2104 * Select the interface for outgoing multicast packets.
2105 */
2106 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
2107 if (error)
2108 break;
2109 /*
2110 * INADDR_ANY is used to remove a previous selection.
2111 * When no interface is selected, a default one is
2112 * chosen every time a multicast packet is sent.
2113 */
2114 if (addr.s_addr == INADDR_ANY) {
2115 imo->imo_multicast_ifp = NULL;
2116 break;
2117 }
2118 /*
2119 * The selected interface is identified by its local
2120 * IP address. Find the interface and confirm that
2121 * it supports multicasting.
2122 */
2123 ifp = ip_multicast_if(&addr, &ifindex);
2124 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2125 error = EADDRNOTAVAIL;
2126 break;
2127 }
2128 imo->imo_multicast_ifp = ifp;
2129 if (ifindex)
2130 imo->imo_multicast_addr = addr;
2131 else
2132 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2133 break;
2134
2135 case IP_MULTICAST_TTL:
2136 /*
2137 * Set the IP time-to-live for outgoing multicast packets.
2138 * The original multicast API required a char argument,
2139 * which is inconsistent with the rest of the socket API.
2140 * We allow either a char or an int.
2141 */
2142 if (sopt->sopt_valsize == 1) {
2143 u_char ttl;
2144 error = sooptcopyin(sopt, &ttl, 1, 1);
2145 if (error)
2146 break;
2147 imo->imo_multicast_ttl = ttl;
2148 } else {
2149 u_int ttl;
2150 error = sooptcopyin(sopt, &ttl, sizeof ttl,
2151 sizeof ttl);
2152 if (error)
2153 break;
2154 if (ttl > 255)
2155 error = EINVAL;
2156 else
2157 imo->imo_multicast_ttl = ttl;
2158 }
2159 break;
2160
2161 case IP_MULTICAST_LOOP:
2162 /*
2163 * Set the loopback flag for outgoing multicast packets.
2164 * Must be zero or one. The original multicast API required a
2165 * char argument, which is inconsistent with the rest
2166 * of the socket API. We allow either a char or an int.
2167 */
2168 if (sopt->sopt_valsize == 1) {
2169 u_char loop;
2170 error = sooptcopyin(sopt, &loop, 1, 1);
2171 if (error)
2172 break;
2173 imo->imo_multicast_loop = !!loop;
2174 } else {
2175 u_int loop;
2176 error = sooptcopyin(sopt, &loop, sizeof loop,
2177 sizeof loop);
2178 if (error)
2179 break;
2180 imo->imo_multicast_loop = !!loop;
2181 }
2182 break;
2183
2184 case IP_ADD_MEMBERSHIP:
2185 /*
2186 * Add a multicast group membership.
2187 * Group must be a valid IP multicast address.
2188 */
2189 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2190 if (error)
2191 break;
2192
2193 error = ip_addmembership(imo, &mreq);
2194 break;
2195
2196 case IP_DROP_MEMBERSHIP:
2197 /*
2198 * Drop a multicast group membership.
2199 * Group must be a valid IP multicast address.
2200 */
2201 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2202 if (error)
2203 break;
2204
2205 error = ip_dropmembership(imo, &mreq);
2206 break;
2207
2208 default:
2209 error = EOPNOTSUPP;
2210 break;
2211 }
2212
2213 /*
2214 * If all options have default values, no need to keep the mbuf.
2215 */
2216 if (imo->imo_multicast_ifp == NULL &&
2217 imo->imo_multicast_vif == -1 &&
2218 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2219 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2220 imo->imo_num_memberships == 0) {
2221 FREE(*imop, M_IPMOPTS);
2222 *imop = NULL;
2223 }
2224
2225 return (error);
2226 }
2227
2228 /*
2229 * Set the IP multicast options in response to user setsockopt().
2230 */
2231 __private_extern__ int
2232 ip_createmoptions(
2233 struct ip_moptions **imop)
2234 {
2235 struct ip_moptions *imo;
2236 imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
2237 M_WAITOK);
2238
2239 if (imo == NULL)
2240 return (ENOBUFS);
2241 *imop = imo;
2242 imo->imo_multicast_ifp = NULL;
2243 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2244 imo->imo_multicast_vif = -1;
2245 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2246 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2247 imo->imo_num_memberships = 0;
2248
2249 return 0;
2250 }
2251
2252 /*
2253 * Add membership to an IPv4 multicast.
2254 */
2255 __private_extern__ int
2256 ip_addmembership(
2257 struct ip_moptions *imo,
2258 struct ip_mreq *mreq)
2259 {
2260 struct route ro;
2261 struct sockaddr_in *dst;
2262 struct ifnet *ifp = NULL;
2263 int error = 0;
2264 int i;
2265
2266 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2267 error = EINVAL;
2268 return error;
2269 }
2270 /*
2271 * If no interface address was provided, use the interface of
2272 * the route to the given multicast address.
2273 */
2274 if (mreq->imr_interface.s_addr == INADDR_ANY) {
2275 bzero((caddr_t)&ro, sizeof(ro));
2276 dst = (struct sockaddr_in *)&ro.ro_dst;
2277 dst->sin_len = sizeof(*dst);
2278 dst->sin_family = AF_INET;
2279 dst->sin_addr = mreq->imr_multiaddr;
2280 rtalloc(&ro);
2281 if (ro.ro_rt != NULL) {
2282 ifp = ro.ro_rt->rt_ifp;
2283 rtfree(ro.ro_rt);
2284 }
2285 else {
2286 /* If there's no default route, try using loopback */
2287 mreq->imr_interface.s_addr = INADDR_LOOPBACK;
2288 }
2289 }
2290
2291 if (ifp == NULL) {
2292 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2293 }
2294
2295 /*
2296 * See if we found an interface, and confirm that it
2297 * supports multicast.
2298 */
2299 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2300 error = EADDRNOTAVAIL;
2301 return error;
2302 }
2303 /*
2304 * See if the membership already exists or if all the
2305 * membership slots are full.
2306 */
2307 for (i = 0; i < imo->imo_num_memberships; ++i) {
2308 if (imo->imo_membership[i]->inm_ifp == ifp &&
2309 imo->imo_membership[i]->inm_addr.s_addr
2310 == mreq->imr_multiaddr.s_addr)
2311 break;
2312 }
2313 if (i < imo->imo_num_memberships) {
2314 error = EADDRINUSE;
2315 return error;
2316 }
2317 if (i == IP_MAX_MEMBERSHIPS) {
2318 error = ETOOMANYREFS;
2319 return error;
2320 }
2321 /*
2322 * Everything looks good; add a new record to the multicast
2323 * address list for the given interface.
2324 */
2325 if ((imo->imo_membership[i] =
2326 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
2327 error = ENOBUFS;
2328 return error;
2329 }
2330 ++imo->imo_num_memberships;
2331
2332 return error;
2333 }
2334
2335 /*
2336 * Drop membership of an IPv4 multicast.
2337 */
2338 __private_extern__ int
2339 ip_dropmembership(
2340 struct ip_moptions *imo,
2341 struct ip_mreq *mreq)
2342 {
2343 int error = 0;
2344 struct ifnet* ifp = NULL;
2345 int i;
2346
2347 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
2348 error = EINVAL;
2349 return error;
2350 }
2351
2352 /*
2353 * If an interface address was specified, get a pointer
2354 * to its ifnet structure.
2355 */
2356 if (mreq->imr_interface.s_addr == INADDR_ANY)
2357 ifp = NULL;
2358 else {
2359 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
2360 if (ifp == NULL) {
2361 error = EADDRNOTAVAIL;
2362 return error;
2363 }
2364 }
2365 /*
2366 * Find the membership in the membership array.
2367 */
2368 for (i = 0; i < imo->imo_num_memberships; ++i) {
2369 if ((ifp == NULL ||
2370 imo->imo_membership[i]->inm_ifp == ifp) &&
2371 imo->imo_membership[i]->inm_addr.s_addr ==
2372 mreq->imr_multiaddr.s_addr)
2373 break;
2374 }
2375 if (i == imo->imo_num_memberships) {
2376 error = EADDRNOTAVAIL;
2377 return error;
2378 }
2379 /*
2380 * Give up the multicast address record to which the
2381 * membership points.
2382 */
2383 in_delmulti(&imo->imo_membership[i]);
2384 /*
2385 * Remove the gap in the membership array.
2386 */
2387 for (++i; i < imo->imo_num_memberships; ++i)
2388 imo->imo_membership[i-1] = imo->imo_membership[i];
2389 --imo->imo_num_memberships;
2390
2391 return error;
2392 }
2393
2394 /*
2395 * Return the IP multicast options in response to user getsockopt().
2396 */
2397 static int
2398 ip_getmoptions(sopt, imo)
2399 struct sockopt *sopt;
2400 register struct ip_moptions *imo;
2401 {
2402 struct in_addr addr;
2403 struct in_ifaddr *ia;
2404 int error, optval;
2405 u_char coptval;
2406
2407 error = 0;
2408 switch (sopt->sopt_name) {
2409 case IP_MULTICAST_VIF:
2410 if (imo != NULL)
2411 optval = imo->imo_multicast_vif;
2412 else
2413 optval = -1;
2414 error = sooptcopyout(sopt, &optval, sizeof optval);
2415 break;
2416
2417 case IP_MULTICAST_IF:
2418 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2419 addr.s_addr = INADDR_ANY;
2420 else if (imo->imo_multicast_addr.s_addr) {
2421 /* return the value user has set */
2422 addr = imo->imo_multicast_addr;
2423 } else {
2424 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2425 addr.s_addr = (ia == NULL) ? INADDR_ANY
2426 : IA_SIN(ia)->sin_addr.s_addr;
2427 }
2428 error = sooptcopyout(sopt, &addr, sizeof addr);
2429 break;
2430
2431 case IP_MULTICAST_TTL:
2432 if (imo == 0)
2433 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2434 else
2435 optval = coptval = imo->imo_multicast_ttl;
2436 if (sopt->sopt_valsize == 1)
2437 error = sooptcopyout(sopt, &coptval, 1);
2438 else
2439 error = sooptcopyout(sopt, &optval, sizeof optval);
2440 break;
2441
2442 case IP_MULTICAST_LOOP:
2443 if (imo == 0)
2444 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2445 else
2446 optval = coptval = imo->imo_multicast_loop;
2447 if (sopt->sopt_valsize == 1)
2448 error = sooptcopyout(sopt, &coptval, 1);
2449 else
2450 error = sooptcopyout(sopt, &optval, sizeof optval);
2451 break;
2452
2453 default:
2454 error = ENOPROTOOPT;
2455 break;
2456 }
2457 return (error);
2458 }
2459
2460 /*
2461 * Discard the IP multicast options.
2462 */
2463 void
2464 ip_freemoptions(imo)
2465 register struct ip_moptions *imo;
2466 {
2467 register int i;
2468
2469 if (imo != NULL) {
2470 for (i = 0; i < imo->imo_num_memberships; ++i)
2471 in_delmulti(&imo->imo_membership[i]);
2472 FREE(imo, M_IPMOPTS);
2473 }
2474 }
2475
2476 /*
2477 * Routine called from ip_output() to loop back a copy of an IP multicast
2478 * packet to the input queue of a specified interface. Note that this
2479 * calls the output routine of the loopback "driver", but with an interface
2480 * pointer that might NOT be a loopback interface -- evil, but easier than
2481 * replicating that code here.
2482 */
2483 static void
2484 ip_mloopback(ifp, m, dst, hlen)
2485 struct ifnet *ifp;
2486 register struct mbuf *m;
2487 register struct sockaddr_in *dst;
2488 int hlen;
2489 {
2490 register struct ip *ip;
2491 struct mbuf *copym;
2492
2493 copym = m_copy(m, 0, M_COPYALL);
2494 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2495 copym = m_pullup(copym, hlen);
2496 if (copym != NULL) {
2497 /*
2498 * We don't bother to fragment if the IP length is greater
2499 * than the interface's MTU. Can this possibly matter?
2500 */
2501 ip = mtod(copym, struct ip *);
2502 HTONS(ip->ip_len);
2503 HTONS(ip->ip_off);
2504 ip->ip_sum = 0;
2505 ip->ip_sum = in_cksum(copym, hlen);
2506 /*
2507 * NB:
2508 * It's not clear whether there are any lingering
2509 * reentrancy problems in other areas which might
2510 * be exposed by using ip_input directly (in
2511 * particular, everything which modifies the packet
2512 * in-place). Yet another option is using the
2513 * protosw directly to deliver the looped back
2514 * packet. For the moment, we'll err on the side
2515 * of safety by using if_simloop().
2516 */
2517 #if 1 /* XXX */
2518 if (dst->sin_family != AF_INET) {
2519 printf("ip_mloopback: bad address family %d\n",
2520 dst->sin_family);
2521 dst->sin_family = AF_INET;
2522 }
2523 #endif
2524
2525
2526 /*
2527 * Mark checksum as valid or calculate checksum for loopback.
2528 *
2529 * This is done this way because we have to embed the ifp of
2530 * the interface we will send the original copy of the packet
2531 * out on in the mbuf. ip_input will check if_hwassist of the
2532 * embedded ifp and ignore all csum_flags if if_hwassist is 0.
2533 * The UDP checksum has not been calculated yet.
2534 */
2535 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
2536 if (IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) {
2537 copym->m_pkthdr.csum_flags |=
2538 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2539 CSUM_IP_CHECKED | CSUM_IP_VALID;
2540 copym->m_pkthdr.csum_data = 0xffff;
2541 } else {
2542 NTOHS(ip->ip_len);
2543 in_delayed_cksum(copym);
2544 HTONS(ip->ip_len);
2545 }
2546 }
2547
2548
2549 /*
2550 * TedW:
2551 * We need to send all loopback traffic down to dlil in case
2552 * a filter has tapped-in.
2553 */
2554
2555 /*
2556 * Stuff the 'real' ifp into the pkthdr, to be used in matching
2557 * in ip_input(); we need the loopback ifp/dl_tag passed as args
2558 * to make the loopback driver compliant with the data link
2559 * requirements.
2560 */
2561 if (lo_ifp) {
2562 copym->m_pkthdr.rcvif = ifp;
2563 dlil_output(lo_ifp, PF_INET, copym, 0, (struct sockaddr *) dst, 0);
2564 } else {
2565 printf("Warning: ip_output call to dlil_find_dltag failed!\n");
2566 m_freem(copym);
2567 }
2568
2569 /* if_simloop(ifp, copym, (struct sockaddr *)dst, 0);*/
2570 }
2571 }