]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/netinet/ip_icmp.c
xnu-792.13.8.tar.gz
[apple/xnu.git] / bsd / netinet / ip_icmp.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30/*
31 * Copyright (c) 1982, 1986, 1988, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the University of
45 * California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
63 */
64
65#include <sys/param.h>
66#include <sys/systm.h>
67#include <sys/mbuf.h>
68#include <sys/protosw.h>
69#include <sys/socket.h>
70#include <sys/time.h>
71#include <sys/kernel.h>
72#include <sys/sysctl.h>
73
74#include <net/if.h>
75#include <net/route.h>
76
77#define _IP_VHL
78#include <netinet/in.h>
79#include <netinet/in_systm.h>
80#include <netinet/in_var.h>
81#include <netinet/ip.h>
82#include <netinet/ip_icmp.h>
83#include <netinet/ip_var.h>
84#include <netinet/icmp_var.h>
85#include <netinet/tcp.h>
86#include <netinet/tcp_fsm.h>
87#include <netinet/tcp_seq.h>
88#include <netinet/tcp_timer.h>
89#include <netinet/tcp_var.h>
90#include <netinet/tcpip.h>
91
92#if IPSEC
93#include <netinet6/ipsec.h>
94#include <netkey/key.h>
95#endif
96
97#if defined(NFAITH) && NFAITH > 0
98#include "faith.h"
99#include <net/if_types.h>
100#endif
101
102 /* XXX This one should go in sys/mbuf.h. It is used to avoid that
103 * a firewall-generated packet loops forever through the firewall.
104 */
105#ifndef M_SKIP_FIREWALL
106#define M_SKIP_FIREWALL 0x4000
107#endif
108
109/*
110 * ICMP routines: error generation, receive packet processing, and
111 * routines to turnaround packets back to the originator, and
112 * host table maintenance routines.
113 */
114
115static struct icmpstat icmpstat;
116SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD,
117 &icmpstat, icmpstat, "");
118
119static int icmpmaskrepl = 0;
120SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
121 &icmpmaskrepl, 0, "");
122
123static int icmptimestamp = 0;
124SYSCTL_INT(_net_inet_icmp, ICMPCTL_TIMESTAMP, timestamp, CTLFLAG_RW,
125 &icmptimestamp, 0, "");
126
127static int drop_redirect = 0;
128SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
129 &drop_redirect, 0, "");
130
131static int log_redirect = 0;
132SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
133 &log_redirect, 0, "");
134
135#if ICMP_BANDLIM
136
137/*
138 * ICMP error-response bandwidth limiting sysctl. If not enabled, sysctl
139 * variable content is -1 and read-only.
140 */
141
142static int icmplim = 250;
143SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
144 &icmplim, 0, "");
145#else
146
147static int icmplim = -1;
148SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD,
149 &icmplim, 0, "");
150
151#endif
152
153/*
154 * ICMP broadcast echo sysctl
155 */
156
157static int icmpbmcastecho = 1;
158SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
159 &icmpbmcastecho, 0, "");
160
161
162#if ICMPPRINTFS
163int icmpprintfs = 0;
164#endif
165
166static void icmp_reflect(struct mbuf *);
167static void icmp_send(struct mbuf *, struct mbuf *);
168static int ip_next_mtu(int, int);
169
170extern struct protosw inetsw[];
171
172/*
173 * Generate an error packet of type error
174 * in response to bad packet ip.
175 */
176void
177icmp_error(
178 struct mbuf *n,
179 int type,
180 int code,
181 n_long dest,
182 struct ifnet *destifp)
183{
184 register struct ip *oip = mtod(n, struct ip *), *nip;
185 register unsigned oiplen = IP_VHL_HL(oip->ip_vhl) << 2;
186 register struct icmp *icp;
187 register struct mbuf *m;
188 unsigned icmplen;
189
190#if ICMPPRINTFS
191 if (icmpprintfs)
192 printf("icmp_error(%p, %x, %d)\n", oip, type, code);
193#endif
194 if (type != ICMP_REDIRECT)
195 icmpstat.icps_error++;
196 /*
197 * Don't send error if not the first fragment of message.
198 * Don't error if the old packet protocol was ICMP
199 * error message, only known informational types.
200 */
201 if (oip->ip_off &~ (IP_MF|IP_DF))
202 goto freeit;
203 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
204 n->m_len >= oiplen + ICMP_MINLEN &&
205 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
206 icmpstat.icps_oldicmp++;
207 goto freeit;
208 }
209 /* Don't send error in response to a multicast or broadcast packet */
210 if (n->m_flags & (M_BCAST|M_MCAST))
211 goto freeit;
212 /*
213 * First, formulate icmp message
214 */
215 m = m_gethdr(M_DONTWAIT, MT_HEADER);
216 if (m == NULL)
217 goto freeit;
218
219 if (n->m_flags & M_SKIP_FIREWALL) {
220 /* set M_SKIP_FIREWALL to skip firewall check, since we're called from firewall */
221 m->m_flags |= M_SKIP_FIREWALL;
222 }
223
224 icmplen = min(oiplen + 8, oip->ip_len);
225 if (icmplen < sizeof(struct ip)) {
226 printf("icmp_error: bad length\n");
227 m_free(m);
228 goto freeit;
229 }
230 m->m_len = icmplen + ICMP_MINLEN;
231 MH_ALIGN(m, m->m_len);
232 icp = mtod(m, struct icmp *);
233 if ((u_int)type > ICMP_MAXTYPE)
234 panic("icmp_error");
235 icmpstat.icps_outhist[type]++;
236 icp->icmp_type = type;
237 if (type == ICMP_REDIRECT)
238 icp->icmp_gwaddr.s_addr = dest;
239 else {
240 icp->icmp_void = 0;
241 /*
242 * The following assignments assume an overlay with the
243 * zeroed icmp_void field.
244 */
245 if (type == ICMP_PARAMPROB) {
246 icp->icmp_pptr = code;
247 code = 0;
248 } else if (type == ICMP_UNREACH &&
249 code == ICMP_UNREACH_NEEDFRAG && destifp) {
250 icp->icmp_nextmtu = htons(destifp->if_mtu);
251 }
252 }
253
254 icp->icmp_code = code;
255 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
256 nip = &icp->icmp_ip;
257
258 /*
259 * Convert fields to network representation.
260 */
261 HTONS(nip->ip_len);
262 HTONS(nip->ip_off);
263
264 /*
265 * Now, copy old ip header (without options)
266 * in front of icmp message.
267 */
268 if (m->m_data - sizeof(struct ip) < m->m_pktdat)
269 panic("icmp len");
270 m->m_data -= sizeof(struct ip);
271 m->m_len += sizeof(struct ip);
272 m->m_pkthdr.len = m->m_len;
273 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
274 m->m_pkthdr.aux = NULL; /* for IPsec */
275 nip = mtod(m, struct ip *);
276 bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
277 nip->ip_len = m->m_len;
278 nip->ip_vhl = IP_VHL_BORING;
279 nip->ip_p = IPPROTO_ICMP;
280 nip->ip_tos = 0;
281 icmp_reflect(m);
282
283freeit:
284 m_freem(n);
285}
286
287static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
288static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
289static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
290
291/*
292 * Process a received ICMP message.
293 */
294void
295icmp_input(m, hlen)
296 register struct mbuf *m;
297 int hlen;
298{
299 register struct icmp *icp;
300 register struct ip *ip = mtod(m, struct ip *);
301 int icmplen = ip->ip_len;
302 register int i;
303 struct in_ifaddr *ia;
304 void (*ctlfunc)(int, struct sockaddr *, void *);
305 int code;
306 char ipv4str[MAX_IPv4_STR_LEN];
307
308 /*
309 * Locate icmp structure in mbuf, and check
310 * that not corrupted and of at least minimum length.
311 */
312#if ICMPPRINTFS
313 if (icmpprintfs) {
314 char buf[MAX_IPv4_STR_LEN];
315
316 printf("icmp_input from %s to %s, len %d\n",
317 inet_ntop(AF_INET, &ip->ip_src, buf, sizeof(buf)),
318 inet_ntop(AF_INET, &ip->ip_dst, ipv4str, sizeof(ipv4str)),
319 icmplen);
320 }
321#endif
322 if (icmplen < ICMP_MINLEN) {
323 icmpstat.icps_tooshort++;
324 goto freeit;
325 }
326 i = hlen + min(icmplen, ICMP_ADVLENMIN);
327 if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
328 icmpstat.icps_tooshort++;
329 return;
330 }
331 ip = mtod(m, struct ip *);
332 m->m_len -= hlen;
333 m->m_data += hlen;
334 icp = mtod(m, struct icmp *);
335 if (in_cksum(m, icmplen)) {
336 icmpstat.icps_checksum++;
337 goto freeit;
338 }
339 m->m_len += hlen;
340 m->m_data -= hlen;
341
342#if defined(NFAITH) && 0 < NFAITH
343 if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
344 /*
345 * Deliver very specific ICMP type only.
346 */
347 switch (icp->icmp_type) {
348 case ICMP_UNREACH:
349 case ICMP_TIMXCEED:
350 break;
351 default:
352 goto freeit;
353 }
354 }
355#endif
356
357#if ICMPPRINTFS
358 if (icmpprintfs)
359 printf("icmp_input, type %d code %d\n", icp->icmp_type,
360 icp->icmp_code);
361#endif
362
363 /*
364 * Message type specific processing.
365 */
366 if (icp->icmp_type > ICMP_MAXTYPE)
367 goto raw;
368 icmpstat.icps_inhist[icp->icmp_type]++;
369 code = icp->icmp_code;
370 switch (icp->icmp_type) {
371
372 case ICMP_UNREACH:
373 switch (code) {
374 case ICMP_UNREACH_NET:
375 case ICMP_UNREACH_HOST:
376 case ICMP_UNREACH_SRCFAIL:
377 case ICMP_UNREACH_NET_UNKNOWN:
378 case ICMP_UNREACH_HOST_UNKNOWN:
379 case ICMP_UNREACH_ISOLATED:
380 case ICMP_UNREACH_TOSNET:
381 case ICMP_UNREACH_TOSHOST:
382 case ICMP_UNREACH_HOST_PRECEDENCE:
383 case ICMP_UNREACH_PRECEDENCE_CUTOFF:
384 code = PRC_UNREACH_NET;
385 break;
386
387 case ICMP_UNREACH_NEEDFRAG:
388 code = PRC_MSGSIZE;
389 break;
390
391 /*
392 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
393 * Treat subcodes 2,3 as immediate RST
394 */
395 case ICMP_UNREACH_PROTOCOL:
396 case ICMP_UNREACH_PORT:
397 code = PRC_UNREACH_PORT;
398 break;
399
400 case ICMP_UNREACH_NET_PROHIB:
401 case ICMP_UNREACH_HOST_PROHIB:
402 case ICMP_UNREACH_FILTER_PROHIB:
403 code = PRC_UNREACH_ADMIN_PROHIB;
404 break;
405
406 default:
407 goto badcode;
408 }
409 goto deliver;
410
411 case ICMP_TIMXCEED:
412 if (code > 1)
413 goto badcode;
414 code += PRC_TIMXCEED_INTRANS;
415 goto deliver;
416
417 case ICMP_PARAMPROB:
418 if (code > 1)
419 goto badcode;
420 code = PRC_PARAMPROB;
421 goto deliver;
422
423 case ICMP_SOURCEQUENCH:
424 if (code)
425 goto badcode;
426 code = PRC_QUENCH;
427 deliver:
428 /*
429 * Problem with datagram; advise higher level routines.
430 */
431 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
432 IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
433 icmpstat.icps_badlen++;
434 goto freeit;
435 }
436 NTOHS(icp->icmp_ip.ip_len);
437 /* Discard ICMP's in response to multicast packets */
438 if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
439 goto badcode;
440#if ICMPPRINTFS
441 if (icmpprintfs)
442 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
443#endif
444 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
445#if 1
446 /*
447 * MTU discovery:
448 * If we got a needfrag and there is a host route to the
449 * original destination, and the MTU is not locked, then
450 * set the MTU in the route to the suggested new value
451 * (if given) and then notify as usual. The ULPs will
452 * notice that the MTU has changed and adapt accordingly.
453 * If no new MTU was suggested, then we guess a new one
454 * less than the current value. If the new MTU is
455 * unreasonably small (defined by sysctl tcp_minmss), then
456 * we reset the MTU to the interface value and enable the
457 * lock bit, indicating that we are no longer doing MTU
458 * discovery.
459 */
460 if (code == PRC_MSGSIZE) {
461 struct rtentry *rt;
462 int mtu;
463
464 rt = rtalloc1((struct sockaddr *)&icmpsrc, 0,
465 RTF_CLONING | RTF_PRCLONING);
466 if (rt && (rt->rt_flags & RTF_HOST)
467 && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
468 mtu = ntohs(icp->icmp_nextmtu);
469 if (!mtu)
470 mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu,
471 1);
472#if DEBUG_MTUDISC
473 printf("MTU for %s reduced to %d\n",
474 inet_ntop(AF_INET, &icmpsrc.sin_addr, ipv4str,
475 sizeof(ipv4str)),
476 mtu);
477#endif
478 if (mtu < max(296, (tcp_minmss + sizeof(struct tcpiphdr)))) {
479 /* rt->rt_rmx.rmx_mtu =
480 rt->rt_ifp->if_mtu; */
481 rt->rt_rmx.rmx_locks |= RTV_MTU;
482 } else if (rt->rt_rmx.rmx_mtu > mtu) {
483 rt->rt_rmx.rmx_mtu = mtu;
484 }
485 }
486 if (rt)
487 rtfree(rt);
488 }
489
490#endif
491 /*
492 * XXX if the packet contains [IPv4 AH TCP], we can't make a
493 * notification to TCP layer.
494 */
495 ctlfunc = ip_protox[icp->icmp_ip.ip_p]->pr_ctlinput;
496 if (ctlfunc)
497 (*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
498 (void *)&icp->icmp_ip);
499 break;
500
501 badcode:
502 icmpstat.icps_badcode++;
503 break;
504
505 case ICMP_ECHO:
506 if (!icmpbmcastecho
507 && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
508 icmpstat.icps_bmcastecho++;
509 break;
510 }
511 icp->icmp_type = ICMP_ECHOREPLY;
512#if ICMP_BANDLIM
513 if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
514 goto freeit;
515 else
516#endif
517 goto reflect;
518
519 case ICMP_TSTAMP:
520
521 if (icmptimestamp == 0)
522 break;
523
524 if (!icmpbmcastecho
525 && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
526 icmpstat.icps_bmcasttstamp++;
527 break;
528 }
529 if (icmplen < ICMP_TSLEN) {
530 icmpstat.icps_badlen++;
531 break;
532 }
533 icp->icmp_type = ICMP_TSTAMPREPLY;
534 icp->icmp_rtime = iptime();
535 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
536#if ICMP_BANDLIM
537 if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
538 goto freeit;
539 else
540#endif
541 goto reflect;
542
543 case ICMP_MASKREQ:
544#define satosin(sa) ((struct sockaddr_in *)(sa))
545 if (icmpmaskrepl == 0)
546 break;
547 /*
548 * We are not able to respond with all ones broadcast
549 * unless we receive it over a point-to-point interface.
550 */
551 if (icmplen < ICMP_MASKLEN)
552 break;
553 switch (ip->ip_dst.s_addr) {
554
555 case INADDR_BROADCAST:
556 case INADDR_ANY:
557 icmpdst.sin_addr = ip->ip_src;
558 break;
559
560 default:
561 icmpdst.sin_addr = ip->ip_dst;
562 }
563 ia = (struct in_ifaddr *)ifaof_ifpforaddr(
564 (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
565 if (ia == 0)
566 break;
567 if (ia->ia_ifp == 0) {
568 ifafree(&ia->ia_ifa);
569 ia = 0;
570 break;
571 }
572 icp->icmp_type = ICMP_MASKREPLY;
573 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
574 if (ip->ip_src.s_addr == 0) {
575 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
576 ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
577 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
578 ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
579 }
580 ifafree(&ia->ia_ifa);
581reflect:
582 ip->ip_len += hlen; /* since ip_input deducts this */
583 icmpstat.icps_reflect++;
584 icmpstat.icps_outhist[icp->icmp_type]++;
585 icmp_reflect(m);
586 return;
587
588 case ICMP_REDIRECT:
589 if (log_redirect) {
590 u_long src, dst, gw;
591
592 src = ntohl(ip->ip_src.s_addr);
593 dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
594 gw = ntohl(icp->icmp_gwaddr.s_addr);
595 printf("icmp redirect from %d.%d.%d.%d: "
596 "%d.%d.%d.%d => %d.%d.%d.%d\n",
597 (int)(src >> 24), (int)((src >> 16) & 0xff),
598 (int)((src >> 8) & 0xff), (int)(src & 0xff),
599 (int)(dst >> 24), (int)((dst >> 16) & 0xff),
600 (int)((dst >> 8) & 0xff), (int)(dst & 0xff),
601 (int)(gw >> 24), (int)((gw >> 16) & 0xff),
602 (int)((gw >> 8) & 0xff), (int)(gw & 0xff));
603 }
604 if (drop_redirect)
605 break;
606 if (code > 3)
607 goto badcode;
608 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
609 IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
610 icmpstat.icps_badlen++;
611 break;
612 }
613 /*
614 * Short circuit routing redirects to force
615 * immediate change in the kernel's routing
616 * tables. The message is also handed to anyone
617 * listening on a raw socket (e.g. the routing
618 * daemon for use in updating its tables).
619 */
620 icmpgw.sin_addr = ip->ip_src;
621 icmpdst.sin_addr = icp->icmp_gwaddr;
622#if ICMPPRINTFS
623 if (icmpprintfs) {
624 char buf[MAX_IPv4_STR_LEN];
625
626 printf("redirect dst %s to %s\n",
627 inet_ntop(AF_INET, &icp->icmp_ip.ip_dst, buf, sizeof(buf)),
628 inet_ntop(AF_INET, &icp->icmp_gwaddr, ipv4str,
629 sizeof(ipv4str)));
630 }
631#endif
632 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
633 rtredirect((struct sockaddr *)&icmpsrc,
634 (struct sockaddr *)&icmpdst,
635 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
636 (struct sockaddr *)&icmpgw, (struct rtentry **)0);
637 pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
638#if IPSEC
639 key_sa_routechange((struct sockaddr *)&icmpsrc);
640#endif
641 break;
642
643 /*
644 * No kernel processing for the following;
645 * just fall through to send to raw listener.
646 */
647 case ICMP_ECHOREPLY:
648 case ICMP_ROUTERADVERT:
649 case ICMP_ROUTERSOLICIT:
650 case ICMP_TSTAMPREPLY:
651 case ICMP_IREQREPLY:
652 case ICMP_MASKREPLY:
653 default:
654 break;
655 }
656
657raw:
658 rip_input(m, hlen);
659 return;
660
661freeit:
662 m_freem(m);
663}
664
665/*
666 * Reflect the ip packet back to the source
667 */
668static void
669icmp_reflect(m)
670 struct mbuf *m;
671{
672 register struct ip *ip = mtod(m, struct ip *);
673 register struct in_ifaddr *ia;
674 struct in_addr t;
675 struct mbuf *opts = 0;
676 int optlen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip);
677
678 if (!in_canforward(ip->ip_src) &&
679 ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
680 (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
681 m_freem(m); /* Bad return address */
682 goto done; /* Ip_output() will check for broadcast */
683 }
684 t = ip->ip_dst;
685 ip->ip_dst = ip->ip_src;
686 /*
687 * If the incoming packet was addressed directly to us,
688 * use dst as the src for the reply. Otherwise (broadcast
689 * or anonymous), use the address which corresponds
690 * to the incoming interface.
691 */
692 lck_mtx_lock(rt_mtx);
693 for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) {
694 if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
695 break;
696 if (ia->ia_ifp && (ia->ia_ifp->if_flags & IFF_BROADCAST) &&
697 t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr)
698 break;
699 }
700 if (ia)
701 ifaref(&ia->ia_ifa);
702 icmpdst.sin_addr = t;
703 if ((ia == (struct in_ifaddr *)0) && m->m_pkthdr.rcvif)
704 ia = (struct in_ifaddr *)ifaof_ifpforaddr(
705 (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
706 /*
707 * The following happens if the packet was not addressed to us,
708 * and was received on an interface with no IP address.
709 */
710 if (ia == (struct in_ifaddr *)0) {
711 ia = in_ifaddrhead.tqh_first;
712 if (ia == (struct in_ifaddr *)0) {/* no address yet, bail out */
713 m_freem(m);
714 lck_mtx_unlock(rt_mtx);
715 goto done;
716 }
717 ifaref(&ia->ia_ifa);
718 }
719 lck_mtx_unlock(rt_mtx);
720 t = IA_SIN(ia)->sin_addr;
721 ip->ip_src = t;
722 ip->ip_ttl = ip_defttl;
723 ifafree(&ia->ia_ifa);
724 ia = NULL;
725
726 if (optlen > 0) {
727 register u_char *cp;
728 int opt, cnt;
729 u_int len;
730
731 /*
732 * Retrieve any source routing from the incoming packet;
733 * add on any record-route or timestamp options.
734 */
735 cp = (u_char *) (ip + 1);
736 if ((opts = ip_srcroute()) == 0 &&
737 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
738 opts->m_len = sizeof(struct in_addr);
739 mtod(opts, struct in_addr *)->s_addr = 0;
740 }
741 if (opts) {
742#if ICMPPRINTFS
743 if (icmpprintfs)
744 printf("icmp_reflect optlen %d rt %d => ",
745 optlen, opts->m_len);
746#endif
747 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
748 opt = cp[IPOPT_OPTVAL];
749 if (opt == IPOPT_EOL)
750 break;
751 if (opt == IPOPT_NOP)
752 len = 1;
753 else {
754 if (cnt < IPOPT_OLEN + sizeof(*cp))
755 break;
756 len = cp[IPOPT_OLEN];
757 if (len < IPOPT_OLEN + sizeof(*cp) ||
758 len > cnt)
759 break;
760 }
761 /*
762 * Should check for overflow, but it "can't happen"
763 */
764 if (opt == IPOPT_RR || opt == IPOPT_TS ||
765 opt == IPOPT_SECURITY) {
766 bcopy((caddr_t)cp,
767 mtod(opts, caddr_t) + opts->m_len, len);
768 opts->m_len += len;
769 }
770 }
771 /* Terminate & pad, if necessary */
772 cnt = opts->m_len % 4;
773 if (cnt) {
774 for (; cnt < 4; cnt++) {
775 *(mtod(opts, caddr_t) + opts->m_len) =
776 IPOPT_EOL;
777 opts->m_len++;
778 }
779 }
780#if ICMPPRINTFS
781 if (icmpprintfs)
782 printf("%d\n", opts->m_len);
783#endif
784 }
785 /*
786 * Now strip out original options by copying rest of first
787 * mbuf's data back, and adjust the IP length.
788 */
789 ip->ip_len -= optlen;
790 ip->ip_vhl = IP_VHL_BORING;
791 m->m_len -= optlen;
792 if (m->m_flags & M_PKTHDR)
793 m->m_pkthdr.len -= optlen;
794 optlen += sizeof(struct ip);
795 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
796 (unsigned)(m->m_len - sizeof(struct ip)));
797 }
798 m->m_flags &= ~(M_BCAST|M_MCAST);
799 icmp_send(m, opts);
800done:
801 if (opts)
802 (void)m_free(opts);
803}
804
805/*
806 * Send an icmp packet back to the ip level,
807 * after supplying a checksum.
808 */
809static void
810icmp_send(m, opts)
811 register struct mbuf *m;
812 struct mbuf *opts;
813{
814 register struct ip *ip = mtod(m, struct ip *);
815 register int hlen;
816 register struct icmp *icp;
817 struct route ro;
818 char ipv4str[MAX_IPv4_STR_LEN];
819
820 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
821 m->m_data += hlen;
822 m->m_len -= hlen;
823 icp = mtod(m, struct icmp *);
824 icp->icmp_cksum = 0;
825 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
826 m->m_data -= hlen;
827 m->m_len += hlen;
828 m->m_pkthdr.rcvif = 0;
829 m->m_pkthdr.aux = NULL;
830 m->m_pkthdr.csum_data = 0;
831 m->m_pkthdr.csum_flags = 0;
832#if ICMPPRINTFS
833 if (icmpprintfs) {
834 char buf[MAX_IPv4_STR_LEN];
835
836 printf("icmp_send dst %s src %s\n",
837 inet_ntop(AF_INET, &ip->ip_dst, buf, sizeof(buf)),
838 inet_ntop(AF_INET, &ip->ip_src, ipv4str, sizeof(ipv4str)));
839 }
840#endif
841 bzero(&ro, sizeof ro);
842 (void) ip_output(m, opts, &ro, 0, NULL);
843 if (ro.ro_rt)
844 rtfree(ro.ro_rt);
845}
846
847n_time
848iptime()
849{
850 struct timeval atv;
851 u_long t;
852
853 microtime(&atv);
854 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
855 return (htonl(t));
856}
857
858#if 1
859/*
860 * Return the next larger or smaller MTU plateau (table from RFC 1191)
861 * given current value MTU. If DIR is less than zero, a larger plateau
862 * is returned; otherwise, a smaller value is returned.
863 */
864static int
865ip_next_mtu(mtu, dir)
866 int mtu;
867 int dir;
868{
869 static int mtutab[] = {
870 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
871 68, 0
872 };
873 int i;
874
875 for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
876 if (mtu >= mtutab[i])
877 break;
878 }
879
880 if (dir < 0) {
881 if (i == 0) {
882 return 0;
883 } else {
884 return mtutab[i - 1];
885 }
886 } else {
887 if (mtutab[i] == 0) {
888 return 0;
889 } else if(mtu > mtutab[i]) {
890 return mtutab[i];
891 } else {
892 return mtutab[i + 1];
893 }
894 }
895}
896#endif
897
898#if ICMP_BANDLIM
899
900/*
901 * badport_bandlim() - check for ICMP bandwidth limit
902 *
903 * Return 0 if it is ok to send an ICMP error response, -1 if we have
904 * hit our bandwidth limit and it is not ok.
905 *
906 * If icmplim is <= 0, the feature is disabled and 0 is returned.
907 *
908 * For now we separate the TCP and UDP subsystems w/ different 'which'
909 * values. We may eventually remove this separation (and simplify the
910 * code further).
911 *
912 * Note that the printing of the error message is delayed so we can
913 * properly print the icmp error rate that the system was trying to do
914 * (i.e. 22000/100 pps, etc...). This can cause long delays in printing
915 * the 'final' error, but it doesn't make sense to solve the printing
916 * delay with more complex code.
917 */
918
919int
920badport_bandlim(int which)
921{
922 static struct timeval lticks[BANDLIM_MAX + 1];
923 static int lpackets[BANDLIM_MAX + 1];
924 struct timeval time;
925 int secs;
926
927 const char *bandlimittype[] = {
928 "Limiting icmp unreach response",
929 "Limiting icmp ping response",
930 "Limiting icmp tstamp response",
931 "Limiting closed port RST response",
932 "Limiting open port RST response"
933 };
934
935 /*
936 * Return ok status if feature disabled or argument out of
937 * ranage.
938 */
939
940 if (icmplim <= 0 || which > BANDLIM_MAX || which < 0)
941 return(0);
942
943 getmicrouptime(&time);
944
945 secs = time.tv_sec - lticks[which].tv_sec ;
946
947 /*
948 * reset stats when cumulative delta exceeds one second.
949 */
950
951 if ((secs > 1) || (secs == 1 && (lticks[which].tv_usec > time.tv_usec))) {
952 if (lpackets[which] > icmplim) {
953 printf("%s from %d to %d packets per second\n",
954 bandlimittype[which],
955 lpackets[which],
956 icmplim
957 );
958 }
959 lticks[which].tv_sec = time.tv_sec;
960 lticks[which].tv_usec = time.tv_usec;
961 lpackets[which] = 0;
962 }
963
964 /*
965 * bump packet count
966 */
967
968 if (++lpackets[which] > icmplim) {
969 return(-1);
970 }
971 return(0);
972}
973
974#endif
975
976#if __APPLE__
977
978/*
979 * Non-privileged ICMP socket operations
980 * - send ICMP echo request
981 * - all ICMP
982 * - limited socket options
983 */
984
985#include <netinet/ip_icmp.h>
986#include <netinet/in_pcb.h>
987
988extern struct domain inetdomain;
989extern u_long rip_sendspace;
990extern u_long rip_recvspace;
991extern struct inpcbinfo ripcbinfo;
992
993int rip_abort(struct socket *);
994int rip_bind(struct socket *, struct sockaddr *, struct proc *);
995int rip_connect(struct socket *, struct sockaddr *, struct proc *);
996int rip_detach(struct socket *);
997int rip_disconnect(struct socket *);
998int rip_shutdown(struct socket *);
999
1000__private_extern__ int icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct proc *p);
1001__private_extern__ int icmp_dgram_attach(struct socket *so, int proto, struct proc *p);
1002__private_extern__ int icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt);
1003
1004__private_extern__ struct pr_usrreqs icmp_dgram_usrreqs = {
1005 rip_abort, pru_accept_notsupp, icmp_dgram_attach, rip_bind, rip_connect,
1006 pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
1007 pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
1008 pru_rcvoob_notsupp, icmp_dgram_send, pru_sense_null, rip_shutdown,
1009 in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp
1010};
1011
1012/* Like rip_attach but without root privilege enforcement */
1013__private_extern__ int
1014icmp_dgram_attach(struct socket *so, int proto, struct proc *p)
1015{
1016 struct inpcb *inp;
1017 int error, s;
1018
1019 inp = sotoinpcb(so);
1020 if (inp)
1021 panic("icmp_dgram_attach");
1022
1023 error = soreserve(so, rip_sendspace, rip_recvspace);
1024 if (error)
1025 return error;
1026 s = splnet();
1027 error = in_pcballoc(so, &ripcbinfo, p);
1028 splx(s);
1029 if (error)
1030 return error;
1031 inp = (struct inpcb *)so->so_pcb;
1032 inp->inp_vflag |= INP_IPV4;
1033 inp->inp_ip_p = IPPROTO_ICMP;
1034 inp->inp_ip_ttl = ip_defttl;
1035 return 0;
1036}
1037
1038/*
1039 * Raw IP socket option processing.
1040 */
1041__private_extern__ int
1042icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
1043{
1044 struct inpcb *inp = sotoinpcb(so);
1045 int error, optval;
1046
1047 if (sopt->sopt_level != IPPROTO_IP)
1048 return (EINVAL);
1049
1050 switch (sopt->sopt_name) {
1051 case IP_OPTIONS:
1052 case IP_HDRINCL:
1053 case IP_TOS:
1054 case IP_TTL:
1055 case IP_RECVOPTS:
1056 case IP_RECVRETOPTS:
1057 case IP_RECVDSTADDR:
1058 case IP_RETOPTS:
1059 case IP_MULTICAST_IF:
1060 case IP_MULTICAST_TTL:
1061 case IP_MULTICAST_LOOP:
1062 case IP_ADD_MEMBERSHIP:
1063 case IP_DROP_MEMBERSHIP:
1064 case IP_MULTICAST_VIF:
1065 case IP_PORTRANGE:
1066 case IP_RECVIF:
1067 case IP_IPSEC_POLICY:
1068#if defined(NFAITH) && NFAITH > 0
1069 case IP_FAITH:
1070#endif
1071 case IP_STRIPHDR:
1072 case IP_RECVTTL:
1073 error = rip_ctloutput(so, sopt);
1074 break;
1075
1076 default:
1077 error = EINVAL;
1078 break;
1079 }
1080
1081 return (error);
1082}
1083
1084__private_extern__ int
1085icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
1086 struct mbuf *control, struct proc *p)
1087{
1088 struct ip *ip;
1089 struct inpcb *inp = sotoinpcb(so);
1090 int hlen;
1091 struct icmp *icp;
1092 struct in_ifaddr *ia = NULL;
1093 int icmplen;
1094
1095 if ((inp->inp_flags & INP_HDRINCL) != 0) {
1096 /*
1097 * This is not raw IP, we liberal only for fields TOS, id and TTL
1098 */
1099 ip = mtod(m, struct ip *);
1100
1101 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1102 /* Some sanity checks */
1103 if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
1104 goto bad;
1105 }
1106 /* Only IPv4 */
1107 if (IP_VHL_V(ip->ip_vhl) != 4)
1108 goto bad;
1109 if (hlen < 20 || hlen > 40 || ip->ip_len != m->m_pkthdr.len)
1110 goto bad;
1111 /* Bogus fragments can tie up peer resources */
1112 if (ip->ip_off != 0)
1113 goto bad;
1114 /* Allow only ICMP even for user provided IP header */
1115 if (ip->ip_p != IPPROTO_ICMP)
1116 goto bad;
1117 /* To prevent spoofing, specified source address must be one of ours */
1118 if (ip->ip_src.s_addr != INADDR_ANY) {
1119 socket_unlock(so, 0);
1120 lck_mtx_lock(rt_mtx);
1121 if (TAILQ_EMPTY(&in_ifaddrhead)) {
1122 lck_mtx_unlock(rt_mtx);
1123 socket_lock(so, 0);
1124 goto bad;
1125 }
1126 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1127 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_src.s_addr) {
1128 lck_mtx_unlock(rt_mtx);
1129 socket_lock(so, 0);
1130 goto ours;
1131 }
1132 }
1133 lck_mtx_unlock(rt_mtx);
1134 socket_lock(so, 0);
1135 goto bad;
1136 }
1137ours:
1138 /* Do not trust we got a valid checksum */
1139 ip->ip_sum = 0;
1140
1141 icp = (struct icmp *)(((char *)m->m_data) + hlen);
1142 icmplen = m->m_pkthdr.len - hlen;
1143 } else {
1144 if ((icmplen = m->m_pkthdr.len) < ICMP_MINLEN) {
1145 goto bad;
1146 }
1147 icp = mtod(m, struct icmp *);
1148 }
1149 /*
1150 * Allow only to send request types with code 0
1151 */
1152 if (icp->icmp_code != 0)
1153 goto bad;
1154 switch (icp->icmp_type) {
1155 case ICMP_ECHO:
1156 break;
1157 case ICMP_TSTAMP:
1158 if (icmplen != 20)
1159 goto bad;
1160 break;
1161 case ICMP_MASKREQ:
1162 if (icmplen != 12)
1163 goto bad;
1164 break;
1165 default:
1166 goto bad;
1167 }
1168 return rip_send(so, flags, m, nam, control, p);
1169bad:
1170 m_freem(m);
1171 return EINVAL;
1172}
1173
1174#endif /* __APPLE__ */
1175