]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/raw_ip.c
xnu-1504.9.26.tar.gz
[apple/xnu.git] / bsd / netinet / raw_ip.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1988, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
61 */
62 /*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/malloc.h>
73 #include <sys/mbuf.h>
74 #include <sys/proc.h>
75 #include <sys/domain.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/sysctl.h>
80 #include <libkern/OSAtomic.h>
81 #include <kern/zalloc.h>
82
83 #include <pexpert/pexpert.h>
84
85 #include <net/if.h>
86 #include <net/route.h>
87
88 #define _IP_VHL
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/ip.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94 #include <netinet/ip_var.h>
95 #include <netinet/ip_mroute.h>
96
97 #if INET6
98 #include <netinet6/in6_pcb.h>
99 #endif /* INET6 */
100
101 #include <netinet/ip_fw.h>
102
103 #if IPSEC
104 #include <netinet6/ipsec.h>
105 #endif /*IPSEC*/
106
107 #if DUMMYNET
108 #include <netinet/ip_dummynet.h>
109 #endif
110
111 #if CONFIG_MACF_NET
112 #include <security/mac_framework.h>
113 #endif /* MAC_NET */
114
115 int load_ipfw(void);
116 int rip_detach(struct socket *);
117 int rip_abort(struct socket *);
118 int rip_disconnect(struct socket *);
119 int rip_bind(struct socket *, struct sockaddr *, struct proc *);
120 int rip_connect(struct socket *, struct sockaddr *, struct proc *);
121 int rip_shutdown(struct socket *);
122
123 #if IPSEC
124 extern int ipsec_bypass;
125 #endif
126
127 struct inpcbhead ripcb;
128 struct inpcbinfo ripcbinfo;
129
130 /* control hooks for ipfw and dummynet */
131 #if IPFIREWALL
132 ip_fw_ctl_t *ip_fw_ctl_ptr;
133 #if DUMMYNET
134 ip_dn_ctl_t *ip_dn_ctl_ptr;
135 #endif /* DUMMYNET */
136 #endif /* IPFIREWALL */
137
138 /*
139 * Nominal space allocated to a raw ip socket.
140 */
141 #define RIPSNDQ 8192
142 #define RIPRCVQ 8192
143
144 /*
145 * Raw interface to IP protocol.
146 */
147
148 /*
149 * Initialize raw connection block q.
150 */
151 void
152 rip_init()
153 {
154 struct inpcbinfo *pcbinfo;
155
156 LIST_INIT(&ripcb);
157 ripcbinfo.listhead = &ripcb;
158 /*
159 * XXX We don't use the hash list for raw IP, but it's easier
160 * to allocate a one entry hash list than it is to check all
161 * over the place for hashbase == NULL.
162 */
163 ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
164 ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
165
166 ripcbinfo.ipi_zone = (void *) zinit(sizeof(struct inpcb),
167 (4096 * sizeof(struct inpcb)),
168 4096, "ripzone");
169
170 pcbinfo = &ripcbinfo;
171 /*
172 * allocate lock group attribute and group for udp pcb mutexes
173 */
174 pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init();
175
176 pcbinfo->mtx_grp = lck_grp_alloc_init("ripcb", pcbinfo->mtx_grp_attr);
177
178 /*
179 * allocate the lock attribute for udp pcb mutexes
180 */
181 pcbinfo->mtx_attr = lck_attr_alloc_init();
182
183 if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL)
184 return; /* pretty much dead if this fails... */
185
186 }
187
188 static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET , 0, {0}, {0,0,0,0,0,0,0,0,} };
189 /*
190 * Setup generic address and protocol structures
191 * for raw_input routine, then pass them along with
192 * mbuf chain.
193 */
194 void
195 rip_input(m, iphlen)
196 struct mbuf *m;
197 int iphlen;
198 {
199 register struct ip *ip = mtod(m, struct ip *);
200 register struct inpcb *inp;
201 struct inpcb *last = 0;
202 struct mbuf *opts = 0;
203 int skipit;
204
205 ripsrc.sin_addr = ip->ip_src;
206 lck_rw_lock_shared(ripcbinfo.mtx);
207 LIST_FOREACH(inp, &ripcb, inp_list) {
208 #if INET6
209 if ((inp->inp_vflag & INP_IPV4) == 0)
210 continue;
211 #endif
212 if (inp->inp_ip_p && (inp->inp_ip_p != ip->ip_p))
213 continue;
214 if (inp->inp_laddr.s_addr &&
215 inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
216 continue;
217 if (inp->inp_faddr.s_addr &&
218 inp->inp_faddr.s_addr != ip->ip_src.s_addr)
219 continue;
220 if (last) {
221 struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
222
223 #if IPSEC
224 /* check AH/ESP integrity. */
225 skipit = 0;
226 if (ipsec_bypass == 0 && n) {
227 if (ipsec4_in_reject_so(n, last->inp_socket)) {
228 m_freem(n);
229 IPSEC_STAT_INCREMENT(ipsecstat.in_polvio);
230 /* do not inject data to pcb */
231 skipit = 1;
232 }
233 }
234 #endif /*IPSEC*/
235 #if CONFIG_MACF_NET
236 if (n && skipit == 0) {
237 if (mac_inpcb_check_deliver(last, n, AF_INET,
238 SOCK_RAW) != 0)
239 skipit = 1;
240 }
241 #endif
242 if (n && skipit == 0) {
243 int error = 0;
244 if (last->inp_flags & INP_CONTROLOPTS ||
245 last->inp_socket->so_options & SO_TIMESTAMP)
246 ip_savecontrol(last, &opts, ip, n);
247 if (last->inp_flags & INP_STRIPHDR) {
248 n->m_len -= iphlen;
249 n->m_pkthdr.len -= iphlen;
250 n->m_data += iphlen;
251 }
252 // ###LOCK need to lock that socket?
253 if (sbappendaddr(&last->inp_socket->so_rcv,
254 (struct sockaddr *)&ripsrc, n,
255 opts, &error) != 0) {
256 sorwakeup(last->inp_socket);
257 }
258 else {
259 if (error) {
260 /* should notify about lost packet */
261 kprintf("rip_input can't append to socket\n");
262 }
263 }
264 opts = 0;
265 }
266 }
267 last = inp;
268 }
269 lck_rw_done(ripcbinfo.mtx);
270 #if IPSEC
271 /* check AH/ESP integrity. */
272 skipit = 0;
273 if (ipsec_bypass == 0 && last) {
274 if (ipsec4_in_reject_so(m, last->inp_socket)) {
275 m_freem(m);
276 IPSEC_STAT_INCREMENT(ipsecstat.in_polvio);
277 OSAddAtomic(1, &ipstat.ips_delivered);
278 /* do not inject data to pcb */
279 skipit = 1;
280 }
281 }
282 #endif /*IPSEC*/
283 #if CONFIG_MACF_NET
284 if (last && skipit == 0) {
285 if (mac_inpcb_check_deliver(last, m, AF_INET, SOCK_RAW) != 0)
286 skipit = 1;
287 }
288 #endif
289 if (skipit == 0) {
290 if (last) {
291 if (last->inp_flags & INP_CONTROLOPTS ||
292 last->inp_socket->so_options & SO_TIMESTAMP)
293 ip_savecontrol(last, &opts, ip, m);
294 if (last->inp_flags & INP_STRIPHDR) {
295 m->m_len -= iphlen;
296 m->m_pkthdr.len -= iphlen;
297 m->m_data += iphlen;
298 }
299 if (sbappendaddr(&last->inp_socket->so_rcv,
300 (struct sockaddr *)&ripsrc, m, opts, NULL) != 0) {
301 sorwakeup(last->inp_socket);
302 } else {
303 kprintf("rip_input(2) can't append to socket\n");
304 }
305 } else {
306 m_freem(m);
307 OSAddAtomic(1, &ipstat.ips_noproto);
308 OSAddAtomic(-1, &ipstat.ips_delivered);
309 }
310 }
311 }
312
313 /*
314 * Generate IP header and pass packet to ip_output.
315 * Tack on options user may have setup with control call.
316 */
317 int
318 rip_output(m, so, dst)
319 register struct mbuf *m;
320 struct socket *so;
321 u_int32_t dst;
322 {
323 register struct ip *ip;
324 register struct inpcb *inp = sotoinpcb(so);
325 int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
326 struct ip_out_args ipoa;
327 int error = 0;
328
329 /* If socket was bound to an ifindex, tell ip_output about it */
330 ipoa.ipoa_ifscope = (inp->inp_flags & INP_BOUND_IF) ?
331 inp->inp_boundif : IFSCOPE_NONE;
332 flags |= IP_OUTARGS;
333
334 /*
335 * If the user handed us a complete IP packet, use it.
336 * Otherwise, allocate an mbuf for a header and fill it in.
337 */
338 if ((inp->inp_flags & INP_HDRINCL) == 0) {
339 if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
340 m_freem(m);
341 return(EMSGSIZE);
342 }
343 M_PREPEND(m, sizeof(struct ip), M_WAIT);
344 if (m == NULL)
345 return ENOBUFS;
346 ip = mtod(m, struct ip *);
347 ip->ip_tos = inp->inp_ip_tos;
348 ip->ip_off = 0;
349 ip->ip_p = inp->inp_ip_p;
350 ip->ip_len = m->m_pkthdr.len;
351 ip->ip_src = inp->inp_laddr;
352 ip->ip_dst.s_addr = dst;
353 ip->ip_ttl = inp->inp_ip_ttl;
354 } else {
355 if (m->m_pkthdr.len > IP_MAXPACKET) {
356 m_freem(m);
357 return(EMSGSIZE);
358 }
359 ip = mtod(m, struct ip *);
360 /* don't allow both user specified and setsockopt options,
361 and don't allow packet length sizes that will crash */
362 if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2))
363 && inp->inp_options)
364 || (ip->ip_len > m->m_pkthdr.len)
365 || (ip->ip_len < (IP_VHL_HL(ip->ip_vhl) << 2))) {
366 m_freem(m);
367 return EINVAL;
368 }
369 if (ip->ip_id == 0)
370 #if RANDOM_IP_ID
371 ip->ip_id = ip_randomid();
372 #else
373 ip->ip_id = htons(ip_id++);
374 #endif
375 /* XXX prevent ip_output from overwriting header fields */
376 flags |= IP_RAWOUTPUT;
377 OSAddAtomic(1, &ipstat.ips_rawout);
378 }
379
380 #if IPSEC
381 if (ipsec_bypass == 0 && ipsec_setsocket(m, so) != 0) {
382 m_freem(m);
383 return ENOBUFS;
384 }
385 #endif /*IPSEC*/
386
387 if (inp->inp_route.ro_rt != NULL &&
388 inp->inp_route.ro_rt->generation_id != route_generation) {
389 rtfree(inp->inp_route.ro_rt);
390 inp->inp_route.ro_rt = NULL;
391 }
392
393 #if PKT_PRIORITY
394 if (soisbackground(so))
395 m_prio_background(m);
396 #endif /* PKT_PRIORITY */
397
398 #if CONFIG_MACF_NET
399 mac_mbuf_label_associate_inpcb(inp, m);
400 #endif
401
402 /*
403 * The domain lock is held across ip_output, so it is okay
404 * to pass the PCB cached route pointer directly to IP and
405 * the modules beneath it.
406 */
407 error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
408 inp->inp_moptions, &ipoa);
409
410 #if IFNET_ROUTE_REFCNT
411 /*
412 * Always discard the cached route for unconnected socket
413 * or if it is a non-unicast route.
414 */
415 if (inp->inp_route.ro_rt != NULL &&
416 ((inp->inp_route.ro_rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST)) ||
417 inp->inp_socket == NULL ||
418 inp->inp_socket->so_state != SS_ISCONNECTED)) {
419 rtfree(inp->inp_route.ro_rt);
420 inp->inp_route.ro_rt = NULL;
421 }
422 #endif /* IFNET_ROUTE_REFCNT */
423
424 return (error);
425 }
426
427 #if IPFIREWALL
428 int
429 load_ipfw(void)
430 {
431 kern_return_t err;
432
433 ipfw_init();
434
435 #if DUMMYNET
436 if (!DUMMYNET_LOADED)
437 ip_dn_init();
438 #endif /* DUMMYNET */
439 err = 0;
440
441 return err == 0 && ip_fw_ctl_ptr == NULL ? -1 : err;
442 }
443 #endif /* IPFIREWALL */
444
445 /*
446 * Raw IP socket option processing.
447 */
448 int
449 rip_ctloutput(so, sopt)
450 struct socket *so;
451 struct sockopt *sopt;
452 {
453 struct inpcb *inp = sotoinpcb(so);
454 int error, optval;
455
456 if (sopt->sopt_level != IPPROTO_IP)
457 return (EINVAL);
458
459 error = 0;
460
461 switch (sopt->sopt_dir) {
462 case SOPT_GET:
463 switch (sopt->sopt_name) {
464 case IP_HDRINCL:
465 optval = inp->inp_flags & INP_HDRINCL;
466 error = sooptcopyout(sopt, &optval, sizeof optval);
467 break;
468
469 case IP_STRIPHDR:
470 optval = inp->inp_flags & INP_STRIPHDR;
471 error = sooptcopyout(sopt, &optval, sizeof optval);
472 break;
473
474 #if IPFIREWALL
475 case IP_FW_ADD:
476 case IP_FW_GET:
477 case IP_OLD_FW_ADD:
478 case IP_OLD_FW_GET:
479 if (ip_fw_ctl_ptr == 0)
480 error = load_ipfw();
481 if (ip_fw_ctl_ptr && error == 0)
482 error = ip_fw_ctl_ptr(sopt);
483 else
484 error = ENOPROTOOPT;
485 break;
486 #endif /* IPFIREWALL */
487
488 #if DUMMYNET
489 case IP_DUMMYNET_GET:
490 if (DUMMYNET_LOADED)
491 error = ip_dn_ctl_ptr(sopt);
492 else
493 error = ENOPROTOOPT;
494 break ;
495 #endif /* DUMMYNET */
496
497 #if MROUTING
498 case MRT_INIT:
499 case MRT_DONE:
500 case MRT_ADD_VIF:
501 case MRT_DEL_VIF:
502 case MRT_ADD_MFC:
503 case MRT_DEL_MFC:
504 case MRT_VERSION:
505 case MRT_ASSERT:
506 error = ip_mrouter_get(so, sopt);
507 break;
508 #endif /* MROUTING */
509
510 default:
511 error = ip_ctloutput(so, sopt);
512 break;
513 }
514 break;
515
516 case SOPT_SET:
517 switch (sopt->sopt_name) {
518 case IP_HDRINCL:
519 error = sooptcopyin(sopt, &optval, sizeof optval,
520 sizeof optval);
521 if (error)
522 break;
523 if (optval)
524 inp->inp_flags |= INP_HDRINCL;
525 else
526 inp->inp_flags &= ~INP_HDRINCL;
527 break;
528
529 case IP_STRIPHDR:
530 error = sooptcopyin(sopt, &optval, sizeof optval,
531 sizeof optval);
532 if (error)
533 break;
534 if (optval)
535 inp->inp_flags |= INP_STRIPHDR;
536 else
537 inp->inp_flags &= ~INP_STRIPHDR;
538 break;
539
540
541 #if IPFIREWALL
542 case IP_FW_ADD:
543 case IP_FW_DEL:
544 case IP_FW_FLUSH:
545 case IP_FW_ZERO:
546 case IP_FW_RESETLOG:
547 case IP_OLD_FW_ADD:
548 case IP_OLD_FW_DEL:
549 case IP_OLD_FW_FLUSH:
550 case IP_OLD_FW_ZERO:
551 case IP_OLD_FW_RESETLOG:
552 if (ip_fw_ctl_ptr == 0)
553 error = load_ipfw();
554 if (ip_fw_ctl_ptr && error == 0)
555 error = ip_fw_ctl_ptr(sopt);
556 else
557 error = ENOPROTOOPT;
558 break;
559 #endif /* IPFIREWALL */
560
561 #if DUMMYNET
562 case IP_DUMMYNET_CONFIGURE:
563 case IP_DUMMYNET_DEL:
564 case IP_DUMMYNET_FLUSH:
565 if (DUMMYNET_LOADED)
566 error = ip_dn_ctl_ptr(sopt);
567 else
568 error = ENOPROTOOPT ;
569 break ;
570 #endif
571
572 #if MROUTING
573 case IP_RSVP_ON:
574 error = ip_rsvp_init(so);
575 break;
576
577 case IP_RSVP_OFF:
578 error = ip_rsvp_done();
579 break;
580
581 /* XXX - should be combined */
582 case IP_RSVP_VIF_ON:
583 error = ip_rsvp_vif_init(so, sopt);
584 break;
585
586 case IP_RSVP_VIF_OFF:
587 error = ip_rsvp_vif_done(so, sopt);
588 break;
589
590 case MRT_INIT:
591 case MRT_DONE:
592 case MRT_ADD_VIF:
593 case MRT_DEL_VIF:
594 case MRT_ADD_MFC:
595 case MRT_DEL_MFC:
596 case MRT_VERSION:
597 case MRT_ASSERT:
598 error = ip_mrouter_set(so, sopt);
599 break;
600 #endif /* MROUTING */
601
602 default:
603 error = ip_ctloutput(so, sopt);
604 break;
605 }
606 break;
607 }
608
609 return (error);
610 }
611
612 /*
613 * This function exists solely to receive the PRC_IFDOWN messages which
614 * are sent by if_down(). It looks for an ifaddr whose ifa_addr is sa,
615 * and calls in_ifadown() to remove all routes corresponding to that address.
616 * It also receives the PRC_IFUP messages from if_up() and reinstalls the
617 * interface routes.
618 */
619 void
620 rip_ctlinput(
621 int cmd,
622 struct sockaddr *sa,
623 __unused void *vip)
624 {
625 struct in_ifaddr *ia;
626 struct ifnet *ifp;
627 int err;
628 int flags, done = 0;
629
630 switch (cmd) {
631 case PRC_IFDOWN:
632 lck_rw_lock_shared(in_ifaddr_rwlock);
633 for (ia = in_ifaddrhead.tqh_first; ia;
634 ia = ia->ia_link.tqe_next) {
635 if (ia->ia_ifa.ifa_addr == sa
636 && (ia->ia_flags & IFA_ROUTE)) {
637 done = 1;
638 ifaref(&ia->ia_ifa);
639 lck_rw_done(in_ifaddr_rwlock);
640 lck_mtx_lock(rnh_lock);
641 /*
642 * in_ifscrub kills the interface route.
643 */
644 in_ifscrub(ia->ia_ifp, ia, 1);
645 /*
646 * in_ifadown gets rid of all the rest of
647 * the routes. This is not quite the right
648 * thing to do, but at least if we are running
649 * a routing process they will come back.
650 */
651 in_ifadown(&ia->ia_ifa, 1);
652 lck_mtx_unlock(rnh_lock);
653 ifafree(&ia->ia_ifa);
654 break;
655 }
656 }
657 if (!done)
658 lck_rw_done(in_ifaddr_rwlock);
659 break;
660
661 case PRC_IFUP:
662 lck_rw_lock_shared(in_ifaddr_rwlock);
663 for (ia = in_ifaddrhead.tqh_first; ia;
664 ia = ia->ia_link.tqe_next) {
665 if (ia->ia_ifa.ifa_addr == sa)
666 break;
667 }
668 if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) {
669 lck_rw_done(in_ifaddr_rwlock);
670 return;
671 }
672 ifaref(&ia->ia_ifa);
673 lck_rw_done(in_ifaddr_rwlock);
674
675 flags = RTF_UP;
676 ifp = ia->ia_ifa.ifa_ifp;
677
678 if ((ifp->if_flags & IFF_LOOPBACK)
679 || (ifp->if_flags & IFF_POINTOPOINT))
680 flags |= RTF_HOST;
681
682 err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
683 if (err == 0)
684 ia->ia_flags |= IFA_ROUTE;
685 ifafree(&ia->ia_ifa);
686 break;
687 }
688 }
689
690 u_int32_t rip_sendspace = RIPSNDQ;
691 u_int32_t rip_recvspace = RIPRCVQ;
692
693 SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
694 &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
695 SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
696 &rip_recvspace, 0, "Maximum incoming raw IP datagram size");
697
698 static int
699 rip_attach(struct socket *so, int proto, struct proc *p)
700 {
701 struct inpcb *inp;
702 int error;
703
704 inp = sotoinpcb(so);
705 if (inp)
706 panic("rip_attach");
707 if ((so->so_state & SS_PRIV) == 0)
708 return (EPERM);
709
710 error = soreserve(so, rip_sendspace, rip_recvspace);
711 if (error)
712 return error;
713 error = in_pcballoc(so, &ripcbinfo, p);
714 if (error)
715 return error;
716 inp = (struct inpcb *)so->so_pcb;
717 inp->inp_vflag |= INP_IPV4;
718 inp->inp_ip_p = proto;
719 inp->inp_ip_ttl = ip_defttl;
720 return 0;
721 }
722
723 __private_extern__ int
724 rip_detach(struct socket *so)
725 {
726 struct inpcb *inp;
727
728 inp = sotoinpcb(so);
729 if (inp == 0)
730 panic("rip_detach");
731 #if MROUTING
732 if (so == ip_mrouter)
733 ip_mrouter_done();
734 ip_rsvp_force_done(so);
735 if (so == ip_rsvpd)
736 ip_rsvp_done();
737 #endif /* MROUTING */
738 in_pcbdetach(inp);
739 return 0;
740 }
741
742 __private_extern__ int
743 rip_abort(struct socket *so)
744 {
745 soisdisconnected(so);
746 return rip_detach(so);
747 }
748
749 __private_extern__ int
750 rip_disconnect(struct socket *so)
751 {
752 if ((so->so_state & SS_ISCONNECTED) == 0)
753 return ENOTCONN;
754 return rip_abort(so);
755 }
756
757 __private_extern__ int
758 rip_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
759 {
760 struct inpcb *inp = sotoinpcb(so);
761 struct sockaddr_in *addr = (struct sockaddr_in *)nam;
762 struct ifaddr *ifa = NULL;
763
764 if (nam->sa_len != sizeof(*addr))
765 return EINVAL;
766
767 if (TAILQ_EMPTY(&ifnet_head) || ((addr->sin_family != AF_INET) &&
768 (addr->sin_family != AF_IMPLINK)) ||
769 (addr->sin_addr.s_addr &&
770 (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == 0)) {
771 return EADDRNOTAVAIL;
772 }
773 else if (ifa) {
774 ifafree(ifa);
775 ifa = NULL;
776 }
777 inp->inp_laddr = addr->sin_addr;
778 return 0;
779 }
780
781 __private_extern__ int
782 rip_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
783 {
784 struct inpcb *inp = sotoinpcb(so);
785 struct sockaddr_in *addr = (struct sockaddr_in *)nam;
786
787 if (nam->sa_len != sizeof(*addr))
788 return EINVAL;
789 if (TAILQ_EMPTY(&ifnet_head))
790 return EADDRNOTAVAIL;
791 if ((addr->sin_family != AF_INET) &&
792 (addr->sin_family != AF_IMPLINK))
793 return EAFNOSUPPORT;
794 inp->inp_faddr = addr->sin_addr;
795 soisconnected(so);
796 return 0;
797 }
798
799 __private_extern__ int
800 rip_shutdown(struct socket *so)
801 {
802 socantsendmore(so);
803 return 0;
804 }
805
806 __private_extern__ int
807 rip_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr *nam,
808 __unused struct mbuf *control, __unused struct proc *p)
809 {
810 struct inpcb *inp = sotoinpcb(so);
811 register u_int32_t dst;
812
813 if (so->so_state & SS_ISCONNECTED) {
814 if (nam) {
815 m_freem(m);
816 return EISCONN;
817 }
818 dst = inp->inp_faddr.s_addr;
819 } else {
820 if (nam == NULL) {
821 m_freem(m);
822 return ENOTCONN;
823 }
824 dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
825 }
826 return rip_output(m, so, dst);
827 }
828
829 /* note: rip_unlock is called from different protos instead of the generic socket_unlock,
830 * it will handle the socket dealloc on last reference
831 * */
832 int
833 rip_unlock(struct socket *so, int refcount, void *debug)
834 {
835 void *lr_saved;
836 struct inpcb *inp = sotoinpcb(so);
837
838 if (debug == NULL)
839 lr_saved = __builtin_return_address(0);
840 else
841 lr_saved = debug;
842
843 if (refcount) {
844 if (so->so_usecount <= 0) {
845 panic("rip_unlock: bad refoucnt so=%p val=%x lrh= %s\n",
846 so, so->so_usecount, solockhistory_nr(so));
847 /* NOTREACHED */
848 }
849 so->so_usecount--;
850 if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) {
851 /* cleanup after last reference */
852 lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx);
853 lck_rw_lock_exclusive(ripcbinfo.mtx);
854 if (inp->inp_state != INPCB_STATE_DEAD) {
855 #if INET6
856 if (INP_CHECK_SOCKAF(so, AF_INET6))
857 in6_pcbdetach(inp);
858 else
859 #endif /* INET6 */
860 in_pcbdetach(inp);
861 }
862 in_pcbdispose(inp);
863 lck_rw_done(ripcbinfo.mtx);
864 return(0);
865 }
866 }
867 so->unlock_lr[so->next_unlock_lr] = lr_saved;
868 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
869 lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx);
870 return(0);
871 }
872
873 static int
874 rip_pcblist SYSCTL_HANDLER_ARGS
875 {
876 #pragma unused(oidp, arg1, arg2)
877 int error, i, n;
878 struct inpcb *inp, **inp_list;
879 inp_gen_t gencnt;
880 struct xinpgen xig;
881
882 /*
883 * The process of preparing the TCB list is too time-consuming and
884 * resource-intensive to repeat twice on every request.
885 */
886 lck_rw_lock_exclusive(ripcbinfo.mtx);
887 if (req->oldptr == USER_ADDR_NULL) {
888 n = ripcbinfo.ipi_count;
889 req->oldidx = 2 * (sizeof xig)
890 + (n + n/8) * sizeof(struct xinpcb);
891 lck_rw_done(ripcbinfo.mtx);
892 return 0;
893 }
894
895 if (req->newptr != USER_ADDR_NULL) {
896 lck_rw_done(ripcbinfo.mtx);
897 return EPERM;
898 }
899
900 /*
901 * OK, now we're committed to doing something.
902 */
903 gencnt = ripcbinfo.ipi_gencnt;
904 n = ripcbinfo.ipi_count;
905
906 bzero(&xig, sizeof(xig));
907 xig.xig_len = sizeof xig;
908 xig.xig_count = n;
909 xig.xig_gen = gencnt;
910 xig.xig_sogen = so_gencnt;
911 error = SYSCTL_OUT(req, &xig, sizeof xig);
912 if (error) {
913 lck_rw_done(ripcbinfo.mtx);
914 return error;
915 }
916 /*
917 * We are done if there is no pcb
918 */
919 if (n == 0) {
920 lck_rw_done(ripcbinfo.mtx);
921 return 0;
922 }
923
924 inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
925 if (inp_list == 0) {
926 lck_rw_done(ripcbinfo.mtx);
927 return ENOMEM;
928 }
929
930 for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n;
931 inp = inp->inp_list.le_next) {
932 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD)
933 inp_list[i++] = inp;
934 }
935 n = i;
936
937 error = 0;
938 for (i = 0; i < n; i++) {
939 inp = inp_list[i];
940 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
941 struct xinpcb xi;
942
943 bzero(&xi, sizeof(xi));
944 xi.xi_len = sizeof xi;
945 /* XXX should avoid extra copy */
946 inpcb_to_compat(inp, &xi.xi_inp);
947 if (inp->inp_socket)
948 sotoxsocket(inp->inp_socket, &xi.xi_socket);
949 error = SYSCTL_OUT(req, &xi, sizeof xi);
950 }
951 }
952 if (!error) {
953 /*
954 * Give the user an updated idea of our state.
955 * If the generation differs from what we told
956 * her before, she knows that something happened
957 * while we were processing this request, and it
958 * might be necessary to retry.
959 */
960 bzero(&xig, sizeof(xig));
961 xig.xig_len = sizeof xig;
962 xig.xig_gen = ripcbinfo.ipi_gencnt;
963 xig.xig_sogen = so_gencnt;
964 xig.xig_count = ripcbinfo.ipi_count;
965 error = SYSCTL_OUT(req, &xig, sizeof xig);
966 }
967 FREE(inp_list, M_TEMP);
968 lck_rw_done(ripcbinfo.mtx);
969 return error;
970 }
971
972 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0,
973 rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
974
975 #if !CONFIG_EMBEDDED
976
977 static int
978 rip_pcblist64 SYSCTL_HANDLER_ARGS
979 {
980 #pragma unused(oidp, arg1, arg2)
981 int error, i, n;
982 struct inpcb *inp, **inp_list;
983 inp_gen_t gencnt;
984 struct xinpgen xig;
985
986 /*
987 * The process of preparing the TCB list is too time-consuming and
988 * resource-intensive to repeat twice on every request.
989 */
990 lck_rw_lock_exclusive(ripcbinfo.mtx);
991 if (req->oldptr == USER_ADDR_NULL) {
992 n = ripcbinfo.ipi_count;
993 req->oldidx = 2 * (sizeof xig)
994 + (n + n/8) * sizeof(struct xinpcb64);
995 lck_rw_done(ripcbinfo.mtx);
996 return 0;
997 }
998
999 if (req->newptr != USER_ADDR_NULL) {
1000 lck_rw_done(ripcbinfo.mtx);
1001 return EPERM;
1002 }
1003
1004 /*
1005 * OK, now we're committed to doing something.
1006 */
1007 gencnt = ripcbinfo.ipi_gencnt;
1008 n = ripcbinfo.ipi_count;
1009
1010 bzero(&xig, sizeof(xig));
1011 xig.xig_len = sizeof xig;
1012 xig.xig_count = n;
1013 xig.xig_gen = gencnt;
1014 xig.xig_sogen = so_gencnt;
1015 error = SYSCTL_OUT(req, &xig, sizeof xig);
1016 if (error) {
1017 lck_rw_done(ripcbinfo.mtx);
1018 return error;
1019 }
1020 /*
1021 * We are done if there is no pcb
1022 */
1023 if (n == 0) {
1024 lck_rw_done(ripcbinfo.mtx);
1025 return 0;
1026 }
1027
1028 inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
1029 if (inp_list == 0) {
1030 lck_rw_done(ripcbinfo.mtx);
1031 return ENOMEM;
1032 }
1033
1034 for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n;
1035 inp = inp->inp_list.le_next) {
1036 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD)
1037 inp_list[i++] = inp;
1038 }
1039 n = i;
1040
1041 error = 0;
1042 for (i = 0; i < n; i++) {
1043 inp = inp_list[i];
1044 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
1045 struct xinpcb64 xi;
1046
1047 bzero(&xi, sizeof(xi));
1048 xi.xi_len = sizeof xi;
1049 inpcb_to_xinpcb64(inp, &xi);
1050 if (inp->inp_socket)
1051 sotoxsocket64(inp->inp_socket, &xi.xi_socket);
1052 error = SYSCTL_OUT(req, &xi, sizeof xi);
1053 }
1054 }
1055 if (!error) {
1056 /*
1057 * Give the user an updated idea of our state.
1058 * If the generation differs from what we told
1059 * her before, she knows that something happened
1060 * while we were processing this request, and it
1061 * might be necessary to retry.
1062 */
1063 bzero(&xig, sizeof(xig));
1064 xig.xig_len = sizeof xig;
1065 xig.xig_gen = ripcbinfo.ipi_gencnt;
1066 xig.xig_sogen = so_gencnt;
1067 xig.xig_count = ripcbinfo.ipi_count;
1068 error = SYSCTL_OUT(req, &xig, sizeof xig);
1069 }
1070 FREE(inp_list, M_TEMP);
1071 lck_rw_done(ripcbinfo.mtx);
1072 return error;
1073 }
1074
1075 SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist64, CTLFLAG_RD, 0, 0,
1076 rip_pcblist64, "S,xinpcb64", "List of active raw IP sockets");
1077
1078 #endif /* !CONFIG_EMBEDDED */
1079
1080 struct pr_usrreqs rip_usrreqs = {
1081 rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect,
1082 pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
1083 pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
1084 pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown,
1085 in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp
1086 };
1087 /* DSEP Review Done pl-20051213-v02 @3253 */