]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/ip_icmp.c
269beae8d5e2d15b86c0f062fe0686d7be38976f
[apple/xnu.git] / bsd / netinet / ip_icmp.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /*
24 * Copyright (c) 1982, 1986, 1988, 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
56 */
57
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/mbuf.h>
61 #include <sys/protosw.h>
62 #include <sys/socket.h>
63 #include <sys/time.h>
64 #include <sys/kernel.h>
65 #include <sys/sysctl.h>
66
67 #include <net/if.h>
68 #include <net/route.h>
69
70 #define _IP_VHL
71 #include <netinet/in.h>
72 #include <netinet/in_systm.h>
73 #include <netinet/in_var.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip_icmp.h>
76 #include <netinet/ip_var.h>
77 #include <netinet/icmp_var.h>
78 #include <netinet/tcp.h>
79 #include <netinet/tcp_fsm.h>
80 #include <netinet/tcp_seq.h>
81 #include <netinet/tcp_timer.h>
82 #include <netinet/tcp_var.h>
83 #include <netinet/tcpip.h>
84
85 #if IPSEC
86 #include <netinet6/ipsec.h>
87 #include <netkey/key.h>
88 #endif
89
90 #if defined(NFAITH) && NFAITH > 0
91 #include "faith.h"
92 #include <net/if_types.h>
93 #endif
94
95 /*
96 * ICMP routines: error generation, receive packet processing, and
97 * routines to turnaround packets back to the originator, and
98 * host table maintenance routines.
99 */
100
101 static struct icmpstat icmpstat;
102 SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD,
103 &icmpstat, icmpstat, "");
104
105 static int icmpmaskrepl = 0;
106 SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
107 &icmpmaskrepl, 0, "");
108
109 static int icmptimestamp = 0;
110 SYSCTL_INT(_net_inet_icmp, ICMPCTL_TIMESTAMP, timestamp, CTLFLAG_RW,
111 &icmptimestamp, 0, "");
112
113 static int drop_redirect = 0;
114 SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
115 &drop_redirect, 0, "");
116
117 static int log_redirect = 0;
118 SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
119 &log_redirect, 0, "");
120
121 #if ICMP_BANDLIM
122
123 /*
124 * ICMP error-response bandwidth limiting sysctl. If not enabled, sysctl
125 * variable content is -1 and read-only.
126 */
127
128 static int icmplim = 250;
129 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
130 &icmplim, 0, "");
131 #else
132
133 static int icmplim = -1;
134 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD,
135 &icmplim, 0, "");
136
137 #endif
138
139 /*
140 * ICMP broadcast echo sysctl
141 */
142
143 static int icmpbmcastecho = 1;
144 SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
145 &icmpbmcastecho, 0, "");
146
147
148 #if ICMPPRINTFS
149 int icmpprintfs = 0;
150 #endif
151
152 static void icmp_reflect(struct mbuf *);
153 static void icmp_send(struct mbuf *, struct mbuf *);
154 static int ip_next_mtu(int, int);
155
156 extern struct protosw inetsw[];
157
158 /*
159 * Generate an error packet of type error
160 * in response to bad packet ip.
161 */
162 void
163 icmp_error(
164 struct mbuf *n,
165 int type,
166 int code,
167 n_long dest,
168 struct ifnet *destifp)
169 {
170 register struct ip *oip = mtod(n, struct ip *), *nip;
171 register unsigned oiplen = IP_VHL_HL(oip->ip_vhl) << 2;
172 register struct icmp *icp;
173 register struct mbuf *m;
174 unsigned icmplen;
175
176 #if ICMPPRINTFS
177 if (icmpprintfs)
178 printf("icmp_error(%p, %x, %d)\n", oip, type, code);
179 #endif
180 if (type != ICMP_REDIRECT)
181 icmpstat.icps_error++;
182 /*
183 * Don't send error if not the first fragment of message.
184 * Don't error if the old packet protocol was ICMP
185 * error message, only known informational types.
186 */
187 if (oip->ip_off &~ (IP_MF|IP_DF))
188 goto freeit;
189 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
190 n->m_len >= oiplen + ICMP_MINLEN &&
191 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
192 icmpstat.icps_oldicmp++;
193 goto freeit;
194 }
195 /* Don't send error in response to a multicast or broadcast packet */
196 if (n->m_flags & (M_BCAST|M_MCAST))
197 goto freeit;
198 /*
199 * First, formulate icmp message
200 */
201 m = m_gethdr(M_DONTWAIT, MT_HEADER);
202 if (m == NULL)
203 goto freeit;
204 icmplen = min(oiplen + 8, oip->ip_len);
205 if (icmplen < sizeof(struct ip)) {
206 printf("icmp_error: bad length\n");
207 m_free(m);
208 goto freeit;
209 }
210 m->m_len = icmplen + ICMP_MINLEN;
211 MH_ALIGN(m, m->m_len);
212 icp = mtod(m, struct icmp *);
213 if ((u_int)type > ICMP_MAXTYPE)
214 panic("icmp_error");
215 icmpstat.icps_outhist[type]++;
216 icp->icmp_type = type;
217 if (type == ICMP_REDIRECT)
218 icp->icmp_gwaddr.s_addr = dest;
219 else {
220 icp->icmp_void = 0;
221 /*
222 * The following assignments assume an overlay with the
223 * zeroed icmp_void field.
224 */
225 if (type == ICMP_PARAMPROB) {
226 icp->icmp_pptr = code;
227 code = 0;
228 } else if (type == ICMP_UNREACH &&
229 code == ICMP_UNREACH_NEEDFRAG && destifp) {
230 icp->icmp_nextmtu = htons(destifp->if_mtu);
231 }
232 }
233
234 icp->icmp_code = code;
235 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
236 nip = &icp->icmp_ip;
237
238 /*
239 * Convert fields to network representation.
240 */
241 HTONS(nip->ip_len);
242 HTONS(nip->ip_off);
243
244 /*
245 * Now, copy old ip header (without options)
246 * in front of icmp message.
247 */
248 if (m->m_data - sizeof(struct ip) < m->m_pktdat)
249 panic("icmp len");
250 m->m_data -= sizeof(struct ip);
251 m->m_len += sizeof(struct ip);
252 m->m_pkthdr.len = m->m_len;
253 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
254 m->m_pkthdr.aux = NULL; /* for IPsec */
255 nip = mtod(m, struct ip *);
256 bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
257 nip->ip_len = m->m_len;
258 nip->ip_vhl = IP_VHL_BORING;
259 nip->ip_p = IPPROTO_ICMP;
260 nip->ip_tos = 0;
261 icmp_reflect(m);
262
263 freeit:
264 m_freem(n);
265 }
266
267 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
268 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
269 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
270
271 /*
272 * Process a received ICMP message.
273 */
274 void
275 icmp_input(m, hlen)
276 register struct mbuf *m;
277 int hlen;
278 {
279 register struct icmp *icp;
280 register struct ip *ip = mtod(m, struct ip *);
281 int icmplen = ip->ip_len;
282 register int i;
283 struct in_ifaddr *ia;
284 void (*ctlfunc)(int, struct sockaddr *, void *);
285 int code;
286 char ipv4str[MAX_IPv4_STR_LEN];
287
288 /*
289 * Locate icmp structure in mbuf, and check
290 * that not corrupted and of at least minimum length.
291 */
292 #if ICMPPRINTFS
293 if (icmpprintfs) {
294 char buf[MAX_IPv4_STR_LEN];
295
296 printf("icmp_input from %s to %s, len %d\n",
297 inet_ntop(AF_INET, &ip->ip_src, buf, sizeof(buf)),
298 inet_ntop(AF_INET, &ip->ip_dst, ipv4str, sizeof(ipv4str)),
299 icmplen);
300 }
301 #endif
302 if (icmplen < ICMP_MINLEN) {
303 icmpstat.icps_tooshort++;
304 goto freeit;
305 }
306 i = hlen + min(icmplen, ICMP_ADVLENMIN);
307 if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
308 icmpstat.icps_tooshort++;
309 return;
310 }
311 ip = mtod(m, struct ip *);
312 m->m_len -= hlen;
313 m->m_data += hlen;
314 icp = mtod(m, struct icmp *);
315 if (in_cksum(m, icmplen)) {
316 icmpstat.icps_checksum++;
317 goto freeit;
318 }
319 m->m_len += hlen;
320 m->m_data -= hlen;
321
322 #if defined(NFAITH) && 0 < NFAITH
323 if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
324 /*
325 * Deliver very specific ICMP type only.
326 */
327 switch (icp->icmp_type) {
328 case ICMP_UNREACH:
329 case ICMP_TIMXCEED:
330 break;
331 default:
332 goto freeit;
333 }
334 }
335 #endif
336
337 #if ICMPPRINTFS
338 if (icmpprintfs)
339 printf("icmp_input, type %d code %d\n", icp->icmp_type,
340 icp->icmp_code);
341 #endif
342
343 /*
344 * Message type specific processing.
345 */
346 if (icp->icmp_type > ICMP_MAXTYPE)
347 goto raw;
348 icmpstat.icps_inhist[icp->icmp_type]++;
349 code = icp->icmp_code;
350 switch (icp->icmp_type) {
351
352 case ICMP_UNREACH:
353 switch (code) {
354 case ICMP_UNREACH_NET:
355 case ICMP_UNREACH_HOST:
356 case ICMP_UNREACH_SRCFAIL:
357 case ICMP_UNREACH_NET_UNKNOWN:
358 case ICMP_UNREACH_HOST_UNKNOWN:
359 case ICMP_UNREACH_ISOLATED:
360 case ICMP_UNREACH_TOSNET:
361 case ICMP_UNREACH_TOSHOST:
362 case ICMP_UNREACH_HOST_PRECEDENCE:
363 case ICMP_UNREACH_PRECEDENCE_CUTOFF:
364 code = PRC_UNREACH_NET;
365 break;
366
367 case ICMP_UNREACH_NEEDFRAG:
368 code = PRC_MSGSIZE;
369 break;
370
371 /*
372 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
373 * Treat subcodes 2,3 as immediate RST
374 */
375 case ICMP_UNREACH_PROTOCOL:
376 case ICMP_UNREACH_PORT:
377 code = PRC_UNREACH_PORT;
378 break;
379
380 case ICMP_UNREACH_NET_PROHIB:
381 case ICMP_UNREACH_HOST_PROHIB:
382 case ICMP_UNREACH_FILTER_PROHIB:
383 code = PRC_UNREACH_ADMIN_PROHIB;
384 break;
385
386 default:
387 goto badcode;
388 }
389 goto deliver;
390
391 case ICMP_TIMXCEED:
392 if (code > 1)
393 goto badcode;
394 code += PRC_TIMXCEED_INTRANS;
395 goto deliver;
396
397 case ICMP_PARAMPROB:
398 if (code > 1)
399 goto badcode;
400 code = PRC_PARAMPROB;
401 goto deliver;
402
403 case ICMP_SOURCEQUENCH:
404 if (code)
405 goto badcode;
406 code = PRC_QUENCH;
407 deliver:
408 /*
409 * Problem with datagram; advise higher level routines.
410 */
411 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
412 IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
413 icmpstat.icps_badlen++;
414 goto freeit;
415 }
416 NTOHS(icp->icmp_ip.ip_len);
417 /* Discard ICMP's in response to multicast packets */
418 if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
419 goto badcode;
420 #if ICMPPRINTFS
421 if (icmpprintfs)
422 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
423 #endif
424 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
425 #if 1
426 /*
427 * MTU discovery:
428 * If we got a needfrag and there is a host route to the
429 * original destination, and the MTU is not locked, then
430 * set the MTU in the route to the suggested new value
431 * (if given) and then notify as usual. The ULPs will
432 * notice that the MTU has changed and adapt accordingly.
433 * If no new MTU was suggested, then we guess a new one
434 * less than the current value. If the new MTU is
435 * unreasonably small (defined by sysctl tcp_minmss), then
436 * we reset the MTU to the interface value and enable the
437 * lock bit, indicating that we are no longer doing MTU
438 * discovery.
439 */
440 if (code == PRC_MSGSIZE) {
441 struct rtentry *rt;
442 int mtu;
443
444 rt = rtalloc1((struct sockaddr *)&icmpsrc, 0,
445 RTF_CLONING | RTF_PRCLONING);
446 if (rt && (rt->rt_flags & RTF_HOST)
447 && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
448 mtu = ntohs(icp->icmp_nextmtu);
449 if (!mtu)
450 mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu,
451 1);
452 #if DEBUG_MTUDISC
453 printf("MTU for %s reduced to %d\n",
454 inet_ntop(AF_INET, &icmpsrc.sin_addr, ipv4str,
455 sizeof(ipv4str)),
456 mtu);
457 #endif
458 if (mtu < max(296, (tcp_minmss + sizeof(struct tcpiphdr)))) {
459 /* rt->rt_rmx.rmx_mtu =
460 rt->rt_ifp->if_mtu; */
461 rt->rt_rmx.rmx_locks |= RTV_MTU;
462 } else if (rt->rt_rmx.rmx_mtu > mtu) {
463 rt->rt_rmx.rmx_mtu = mtu;
464 }
465 }
466 if (rt)
467 rtfree(rt);
468 }
469
470 #endif
471 /*
472 * XXX if the packet contains [IPv4 AH TCP], we can't make a
473 * notification to TCP layer.
474 */
475 ctlfunc = ip_protox[icp->icmp_ip.ip_p]->pr_ctlinput;
476 if (ctlfunc)
477 (*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
478 (void *)&icp->icmp_ip);
479 break;
480
481 badcode:
482 icmpstat.icps_badcode++;
483 break;
484
485 case ICMP_ECHO:
486 if (!icmpbmcastecho
487 && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
488 icmpstat.icps_bmcastecho++;
489 break;
490 }
491 icp->icmp_type = ICMP_ECHOREPLY;
492 #if ICMP_BANDLIM
493 if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
494 goto freeit;
495 else
496 #endif
497 goto reflect;
498
499 case ICMP_TSTAMP:
500
501 if (icmptimestamp == 0)
502 break;
503
504 if (!icmpbmcastecho
505 && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
506 icmpstat.icps_bmcasttstamp++;
507 break;
508 }
509 if (icmplen < ICMP_TSLEN) {
510 icmpstat.icps_badlen++;
511 break;
512 }
513 icp->icmp_type = ICMP_TSTAMPREPLY;
514 icp->icmp_rtime = iptime();
515 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
516 #if ICMP_BANDLIM
517 if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
518 goto freeit;
519 else
520 #endif
521 goto reflect;
522
523 case ICMP_MASKREQ:
524 #define satosin(sa) ((struct sockaddr_in *)(sa))
525 if (icmpmaskrepl == 0)
526 break;
527 /*
528 * We are not able to respond with all ones broadcast
529 * unless we receive it over a point-to-point interface.
530 */
531 if (icmplen < ICMP_MASKLEN)
532 break;
533 switch (ip->ip_dst.s_addr) {
534
535 case INADDR_BROADCAST:
536 case INADDR_ANY:
537 icmpdst.sin_addr = ip->ip_src;
538 break;
539
540 default:
541 icmpdst.sin_addr = ip->ip_dst;
542 }
543 ia = (struct in_ifaddr *)ifaof_ifpforaddr(
544 (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
545 if (ia == 0)
546 break;
547 if (ia->ia_ifp == 0) {
548 ifafree(&ia->ia_ifa);
549 ia = 0;
550 break;
551 }
552 icp->icmp_type = ICMP_MASKREPLY;
553 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
554 if (ip->ip_src.s_addr == 0) {
555 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
556 ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
557 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
558 ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
559 }
560 ifafree(&ia->ia_ifa);
561 reflect:
562 ip->ip_len += hlen; /* since ip_input deducts this */
563 icmpstat.icps_reflect++;
564 icmpstat.icps_outhist[icp->icmp_type]++;
565 icmp_reflect(m);
566 return;
567
568 case ICMP_REDIRECT:
569 if (log_redirect) {
570 u_long src, dst, gw;
571
572 src = ntohl(ip->ip_src.s_addr);
573 dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
574 gw = ntohl(icp->icmp_gwaddr.s_addr);
575 printf("icmp redirect from %d.%d.%d.%d: "
576 "%d.%d.%d.%d => %d.%d.%d.%d\n",
577 (int)(src >> 24), (int)((src >> 16) & 0xff),
578 (int)((src >> 8) & 0xff), (int)(src & 0xff),
579 (int)(dst >> 24), (int)((dst >> 16) & 0xff),
580 (int)((dst >> 8) & 0xff), (int)(dst & 0xff),
581 (int)(gw >> 24), (int)((gw >> 16) & 0xff),
582 (int)((gw >> 8) & 0xff), (int)(gw & 0xff));
583 }
584 if (drop_redirect)
585 break;
586 if (code > 3)
587 goto badcode;
588 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
589 IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
590 icmpstat.icps_badlen++;
591 break;
592 }
593 /*
594 * Short circuit routing redirects to force
595 * immediate change in the kernel's routing
596 * tables. The message is also handed to anyone
597 * listening on a raw socket (e.g. the routing
598 * daemon for use in updating its tables).
599 */
600 icmpgw.sin_addr = ip->ip_src;
601 icmpdst.sin_addr = icp->icmp_gwaddr;
602 #if ICMPPRINTFS
603 if (icmpprintfs) {
604 char buf[MAX_IPv4_STR_LEN];
605
606 printf("redirect dst %s to %s\n",
607 inet_ntop(AF_INET, &icp->icmp_ip.ip_dst, buf, sizeof(buf)),
608 inet_ntop(AF_INET, &icp->icmp_gwaddr, ipv4str,
609 sizeof(ipv4str)));
610 }
611 #endif
612 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
613 rtredirect((struct sockaddr *)&icmpsrc,
614 (struct sockaddr *)&icmpdst,
615 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
616 (struct sockaddr *)&icmpgw, (struct rtentry **)0);
617 pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
618 #if IPSEC
619 key_sa_routechange((struct sockaddr *)&icmpsrc);
620 #endif
621 break;
622
623 /*
624 * No kernel processing for the following;
625 * just fall through to send to raw listener.
626 */
627 case ICMP_ECHOREPLY:
628 case ICMP_ROUTERADVERT:
629 case ICMP_ROUTERSOLICIT:
630 case ICMP_TSTAMPREPLY:
631 case ICMP_IREQREPLY:
632 case ICMP_MASKREPLY:
633 default:
634 break;
635 }
636
637 raw:
638 rip_input(m, hlen);
639 return;
640
641 freeit:
642 m_freem(m);
643 }
644
645 /*
646 * Reflect the ip packet back to the source
647 */
648 static void
649 icmp_reflect(m)
650 struct mbuf *m;
651 {
652 register struct ip *ip = mtod(m, struct ip *);
653 register struct in_ifaddr *ia;
654 struct in_addr t;
655 struct mbuf *opts = 0;
656 int optlen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip);
657
658 if (!in_canforward(ip->ip_src) &&
659 ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
660 (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
661 m_freem(m); /* Bad return address */
662 goto done; /* Ip_output() will check for broadcast */
663 }
664 t = ip->ip_dst;
665 ip->ip_dst = ip->ip_src;
666 /*
667 * If the incoming packet was addressed directly to us,
668 * use dst as the src for the reply. Otherwise (broadcast
669 * or anonymous), use the address which corresponds
670 * to the incoming interface.
671 */
672 lck_mtx_lock(rt_mtx);
673 for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) {
674 if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
675 break;
676 if (ia->ia_ifp && (ia->ia_ifp->if_flags & IFF_BROADCAST) &&
677 t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr)
678 break;
679 }
680 if (ia)
681 ifaref(&ia->ia_ifa);
682 icmpdst.sin_addr = t;
683 if ((ia == (struct in_ifaddr *)0) && m->m_pkthdr.rcvif)
684 ia = (struct in_ifaddr *)ifaof_ifpforaddr(
685 (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
686 /*
687 * The following happens if the packet was not addressed to us,
688 * and was received on an interface with no IP address.
689 */
690 if (ia == (struct in_ifaddr *)0) {
691 ia = in_ifaddrhead.tqh_first;
692 if (ia == (struct in_ifaddr *)0) {/* no address yet, bail out */
693 m_freem(m);
694 lck_mtx_unlock(rt_mtx);
695 goto done;
696 }
697 ifaref(&ia->ia_ifa);
698 }
699 lck_mtx_unlock(rt_mtx);
700 t = IA_SIN(ia)->sin_addr;
701 ip->ip_src = t;
702 ip->ip_ttl = ip_defttl;
703 ifafree(&ia->ia_ifa);
704 ia = NULL;
705
706 if (optlen > 0) {
707 register u_char *cp;
708 int opt, cnt;
709 u_int len;
710
711 /*
712 * Retrieve any source routing from the incoming packet;
713 * add on any record-route or timestamp options.
714 */
715 cp = (u_char *) (ip + 1);
716 if ((opts = ip_srcroute()) == 0 &&
717 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
718 opts->m_len = sizeof(struct in_addr);
719 mtod(opts, struct in_addr *)->s_addr = 0;
720 }
721 if (opts) {
722 #if ICMPPRINTFS
723 if (icmpprintfs)
724 printf("icmp_reflect optlen %d rt %d => ",
725 optlen, opts->m_len);
726 #endif
727 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
728 opt = cp[IPOPT_OPTVAL];
729 if (opt == IPOPT_EOL)
730 break;
731 if (opt == IPOPT_NOP)
732 len = 1;
733 else {
734 if (cnt < IPOPT_OLEN + sizeof(*cp))
735 break;
736 len = cp[IPOPT_OLEN];
737 if (len < IPOPT_OLEN + sizeof(*cp) ||
738 len > cnt)
739 break;
740 }
741 /*
742 * Should check for overflow, but it "can't happen"
743 */
744 if (opt == IPOPT_RR || opt == IPOPT_TS ||
745 opt == IPOPT_SECURITY) {
746 bcopy((caddr_t)cp,
747 mtod(opts, caddr_t) + opts->m_len, len);
748 opts->m_len += len;
749 }
750 }
751 /* Terminate & pad, if necessary */
752 cnt = opts->m_len % 4;
753 if (cnt) {
754 for (; cnt < 4; cnt++) {
755 *(mtod(opts, caddr_t) + opts->m_len) =
756 IPOPT_EOL;
757 opts->m_len++;
758 }
759 }
760 #if ICMPPRINTFS
761 if (icmpprintfs)
762 printf("%d\n", opts->m_len);
763 #endif
764 }
765 /*
766 * Now strip out original options by copying rest of first
767 * mbuf's data back, and adjust the IP length.
768 */
769 ip->ip_len -= optlen;
770 ip->ip_vhl = IP_VHL_BORING;
771 m->m_len -= optlen;
772 if (m->m_flags & M_PKTHDR)
773 m->m_pkthdr.len -= optlen;
774 optlen += sizeof(struct ip);
775 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
776 (unsigned)(m->m_len - sizeof(struct ip)));
777 }
778 m->m_flags &= ~(M_BCAST|M_MCAST);
779 icmp_send(m, opts);
780 done:
781 if (opts)
782 (void)m_free(opts);
783 }
784
785 /*
786 * Send an icmp packet back to the ip level,
787 * after supplying a checksum.
788 */
789 static void
790 icmp_send(m, opts)
791 register struct mbuf *m;
792 struct mbuf *opts;
793 {
794 register struct ip *ip = mtod(m, struct ip *);
795 register int hlen;
796 register struct icmp *icp;
797 struct route ro;
798 char ipv4str[MAX_IPv4_STR_LEN];
799
800 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
801 m->m_data += hlen;
802 m->m_len -= hlen;
803 icp = mtod(m, struct icmp *);
804 icp->icmp_cksum = 0;
805 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
806 m->m_data -= hlen;
807 m->m_len += hlen;
808 m->m_pkthdr.rcvif = 0;
809 m->m_pkthdr.aux = NULL;
810 m->m_pkthdr.csum_data = 0;
811 m->m_pkthdr.csum_flags = 0;
812 #if ICMPPRINTFS
813 if (icmpprintfs) {
814 char buf[MAX_IPv4_STR_LEN];
815
816 printf("icmp_send dst %s src %s\n",
817 inet_ntop(AF_INET, &ip->ip_dst, buf, sizeof(buf)),
818 inet_ntop(AF_INET, &ip->ip_src, ipv4str, sizeof(ipv4str)));
819 }
820 #endif
821 bzero(&ro, sizeof ro);
822 (void) ip_output(m, opts, &ro, 0, NULL);
823 if (ro.ro_rt)
824 rtfree(ro.ro_rt);
825 }
826
827 n_time
828 iptime()
829 {
830 struct timeval atv;
831 u_long t;
832
833 microtime(&atv);
834 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
835 return (htonl(t));
836 }
837
838 #if 1
839 /*
840 * Return the next larger or smaller MTU plateau (table from RFC 1191)
841 * given current value MTU. If DIR is less than zero, a larger plateau
842 * is returned; otherwise, a smaller value is returned.
843 */
844 static int
845 ip_next_mtu(mtu, dir)
846 int mtu;
847 int dir;
848 {
849 static int mtutab[] = {
850 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
851 68, 0
852 };
853 int i;
854
855 for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
856 if (mtu >= mtutab[i])
857 break;
858 }
859
860 if (dir < 0) {
861 if (i == 0) {
862 return 0;
863 } else {
864 return mtutab[i - 1];
865 }
866 } else {
867 if (mtutab[i] == 0) {
868 return 0;
869 } else if(mtu > mtutab[i]) {
870 return mtutab[i];
871 } else {
872 return mtutab[i + 1];
873 }
874 }
875 }
876 #endif
877
878 #if ICMP_BANDLIM
879
880 /*
881 * badport_bandlim() - check for ICMP bandwidth limit
882 *
883 * Return 0 if it is ok to send an ICMP error response, -1 if we have
884 * hit our bandwidth limit and it is not ok.
885 *
886 * If icmplim is <= 0, the feature is disabled and 0 is returned.
887 *
888 * For now we separate the TCP and UDP subsystems w/ different 'which'
889 * values. We may eventually remove this separation (and simplify the
890 * code further).
891 *
892 * Note that the printing of the error message is delayed so we can
893 * properly print the icmp error rate that the system was trying to do
894 * (i.e. 22000/100 pps, etc...). This can cause long delays in printing
895 * the 'final' error, but it doesn't make sense to solve the printing
896 * delay with more complex code.
897 */
898
899 int
900 badport_bandlim(int which)
901 {
902 static struct timeval lticks[BANDLIM_MAX + 1];
903 static int lpackets[BANDLIM_MAX + 1];
904 struct timeval time;
905 int secs;
906
907 const char *bandlimittype[] = {
908 "Limiting icmp unreach response",
909 "Limiting icmp ping response",
910 "Limiting icmp tstamp response",
911 "Limiting closed port RST response",
912 "Limiting open port RST response"
913 };
914
915 /*
916 * Return ok status if feature disabled or argument out of
917 * ranage.
918 */
919
920 if (icmplim <= 0 || which > BANDLIM_MAX || which < 0)
921 return(0);
922
923 getmicrouptime(&time);
924
925 secs = time.tv_sec - lticks[which].tv_sec ;
926
927 /*
928 * reset stats when cumulative delta exceeds one second.
929 */
930
931 if ((secs > 1) || (secs == 1 && (lticks[which].tv_usec > time.tv_usec))) {
932 if (lpackets[which] > icmplim) {
933 printf("%s from %d to %d packets per second\n",
934 bandlimittype[which],
935 lpackets[which],
936 icmplim
937 );
938 }
939 lticks[which].tv_sec = time.tv_sec;
940 lticks[which].tv_usec = time.tv_usec;
941 lpackets[which] = 0;
942 }
943
944 /*
945 * bump packet count
946 */
947
948 if (++lpackets[which] > icmplim) {
949 return(-1);
950 }
951 return(0);
952 }
953
954 #endif
955
956 #if __APPLE__
957
958 /*
959 * Non-privileged ICMP socket operations
960 * - send ICMP echo request
961 * - all ICMP
962 * - limited socket options
963 */
964
965 #include <netinet/ip_icmp.h>
966 #include <netinet/in_pcb.h>
967
968 extern struct domain inetdomain;
969 extern u_long rip_sendspace;
970 extern u_long rip_recvspace;
971 extern struct inpcbinfo ripcbinfo;
972
973 int rip_abort(struct socket *);
974 int rip_bind(struct socket *, struct sockaddr *, struct proc *);
975 int rip_connect(struct socket *, struct sockaddr *, struct proc *);
976 int rip_detach(struct socket *);
977 int rip_disconnect(struct socket *);
978 int rip_shutdown(struct socket *);
979
980 __private_extern__ int icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct proc *p);
981 __private_extern__ int icmp_dgram_attach(struct socket *so, int proto, struct proc *p);
982 __private_extern__ int icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt);
983
984 __private_extern__ struct pr_usrreqs icmp_dgram_usrreqs = {
985 rip_abort, pru_accept_notsupp, icmp_dgram_attach, rip_bind, rip_connect,
986 pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
987 pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
988 pru_rcvoob_notsupp, icmp_dgram_send, pru_sense_null, rip_shutdown,
989 in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp
990 };
991
992 /* Like rip_attach but without root privilege enforcement */
993 __private_extern__ int
994 icmp_dgram_attach(struct socket *so, int proto, struct proc *p)
995 {
996 struct inpcb *inp;
997 int error, s;
998
999 inp = sotoinpcb(so);
1000 if (inp)
1001 panic("icmp_dgram_attach");
1002
1003 error = soreserve(so, rip_sendspace, rip_recvspace);
1004 if (error)
1005 return error;
1006 s = splnet();
1007 error = in_pcballoc(so, &ripcbinfo, p);
1008 splx(s);
1009 if (error)
1010 return error;
1011 inp = (struct inpcb *)so->so_pcb;
1012 inp->inp_vflag |= INP_IPV4;
1013 inp->inp_ip_p = IPPROTO_ICMP;
1014 inp->inp_ip_ttl = ip_defttl;
1015 return 0;
1016 }
1017
1018 /*
1019 * Raw IP socket option processing.
1020 */
1021 __private_extern__ int
1022 icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
1023 {
1024 struct inpcb *inp = sotoinpcb(so);
1025 int error, optval;
1026
1027 if (sopt->sopt_level != IPPROTO_IP)
1028 return (EINVAL);
1029
1030 switch (sopt->sopt_name) {
1031 case IP_OPTIONS:
1032 case IP_HDRINCL:
1033 case IP_TOS:
1034 case IP_TTL:
1035 case IP_RECVOPTS:
1036 case IP_RECVRETOPTS:
1037 case IP_RECVDSTADDR:
1038 case IP_RETOPTS:
1039 case IP_MULTICAST_IF:
1040 case IP_MULTICAST_TTL:
1041 case IP_MULTICAST_LOOP:
1042 case IP_ADD_MEMBERSHIP:
1043 case IP_DROP_MEMBERSHIP:
1044 case IP_MULTICAST_VIF:
1045 case IP_PORTRANGE:
1046 case IP_RECVIF:
1047 case IP_IPSEC_POLICY:
1048 #if defined(NFAITH) && NFAITH > 0
1049 case IP_FAITH:
1050 #endif
1051 case IP_STRIPHDR:
1052 case IP_RECVTTL:
1053 error = rip_ctloutput(so, sopt);
1054 break;
1055
1056 default:
1057 error = EINVAL;
1058 break;
1059 }
1060
1061 return (error);
1062 }
1063
1064 __private_extern__ int
1065 icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
1066 struct mbuf *control, struct proc *p)
1067 {
1068 struct ip *ip;
1069 struct inpcb *inp = sotoinpcb(so);
1070 int hlen;
1071 struct icmp *icp;
1072 struct in_ifaddr *ia = NULL;
1073 int icmplen;
1074
1075 if ((inp->inp_flags & INP_HDRINCL) != 0) {
1076 /*
1077 * This is not raw IP, we liberal only for fields TOS, id and TTL
1078 */
1079 ip = mtod(m, struct ip *);
1080
1081 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
1082 /* Some sanity checks */
1083 if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
1084 goto bad;
1085 }
1086 /* Only IPv4 */
1087 if (IP_VHL_V(ip->ip_vhl) != 4)
1088 goto bad;
1089 if (hlen < 20 || hlen > 40 || ip->ip_len != m->m_pkthdr.len)
1090 goto bad;
1091 /* Bogus fragments can tie up peer resources */
1092 if (ip->ip_off != 0)
1093 goto bad;
1094 /* Allow only ICMP even for user provided IP header */
1095 if (ip->ip_p != IPPROTO_ICMP)
1096 goto bad;
1097 /* To prevent spoofing, specified source address must be one of ours */
1098 if (ip->ip_src.s_addr != INADDR_ANY) {
1099 socket_unlock(so, 0);
1100 lck_mtx_lock(rt_mtx);
1101 if (TAILQ_EMPTY(&in_ifaddrhead)) {
1102 lck_mtx_unlock(rt_mtx);
1103 socket_lock(so, 0);
1104 goto bad;
1105 }
1106 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1107 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_src.s_addr) {
1108 lck_mtx_unlock(rt_mtx);
1109 socket_lock(so, 0);
1110 goto ours;
1111 }
1112 }
1113 lck_mtx_unlock(rt_mtx);
1114 socket_lock(so, 0);
1115 goto bad;
1116 }
1117 ours:
1118 /* Do not trust we got a valid checksum */
1119 ip->ip_sum = 0;
1120
1121 icp = (struct icmp *)(((char *)m->m_data) + hlen);
1122 icmplen = m->m_pkthdr.len - hlen;
1123 } else {
1124 if ((icmplen = m->m_pkthdr.len) < ICMP_MINLEN) {
1125 goto bad;
1126 }
1127 icp = mtod(m, struct icmp *);
1128 }
1129 /*
1130 * Allow only to send request types with code 0
1131 */
1132 if (icp->icmp_code != 0)
1133 goto bad;
1134 switch (icp->icmp_type) {
1135 case ICMP_ECHO:
1136 break;
1137 case ICMP_TSTAMP:
1138 if (icmplen != 20)
1139 goto bad;
1140 break;
1141 case ICMP_MASKREQ:
1142 if (icmplen != 12)
1143 goto bad;
1144 break;
1145 default:
1146 goto bad;
1147 }
1148 return rip_send(so, flags, m, nam, control, p);
1149 bad:
1150 m_freem(m);
1151 return EINVAL;
1152 }
1153
1154 #endif /* __APPLE__ */
1155