]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/netinet/raw_ip.c
xnu-1699.32.7.tar.gz
[apple/xnu.git] / bsd / netinet / raw_ip.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1988, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
61 */
62/*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/malloc.h>
73#include <sys/mbuf.h>
74#include <sys/proc.h>
75#include <sys/domain.h>
76#include <sys/protosw.h>
77#include <sys/socket.h>
78#include <sys/socketvar.h>
79#include <sys/sysctl.h>
80#include <libkern/OSAtomic.h>
81#include <kern/zalloc.h>
82
83#include <pexpert/pexpert.h>
84
85#include <net/if.h>
86#include <net/route.h>
87
88#define _IP_VHL
89#include <netinet/in.h>
90#include <netinet/in_systm.h>
91#include <netinet/ip.h>
92#include <netinet/in_pcb.h>
93#include <netinet/in_var.h>
94#include <netinet/ip_var.h>
95#include <netinet/ip_mroute.h>
96
97#if INET6
98#include <netinet6/in6_pcb.h>
99#endif /* INET6 */
100
101#include <netinet/ip_fw.h>
102
103#if IPSEC
104#include <netinet6/ipsec.h>
105#endif /*IPSEC*/
106
107#if DUMMYNET
108#include <netinet/ip_dummynet.h>
109#endif
110
111#if CONFIG_MACF_NET
112#include <security/mac_framework.h>
113#endif /* MAC_NET */
114
115int load_ipfw(void);
116int rip_detach(struct socket *);
117int rip_abort(struct socket *);
118int rip_disconnect(struct socket *);
119int rip_bind(struct socket *, struct sockaddr *, struct proc *);
120int rip_connect(struct socket *, struct sockaddr *, struct proc *);
121int rip_shutdown(struct socket *);
122
123#if IPSEC
124extern int ipsec_bypass;
125#endif
126
127struct inpcbhead ripcb;
128struct inpcbinfo ripcbinfo;
129
130/* control hooks for ipfw and dummynet */
131#if IPFIREWALL
132ip_fw_ctl_t *ip_fw_ctl_ptr;
133#if DUMMYNET
134ip_dn_ctl_t *ip_dn_ctl_ptr;
135#endif /* DUMMYNET */
136#endif /* IPFIREWALL */
137
138/*
139 * Nominal space allocated to a raw ip socket.
140 */
141#define RIPSNDQ 8192
142#define RIPRCVQ 8192
143
144/*
145 * Raw interface to IP protocol.
146 */
147
148/*
149 * Initialize raw connection block q.
150 */
151void
152rip_init()
153{
154 struct inpcbinfo *pcbinfo;
155
156 LIST_INIT(&ripcb);
157 ripcbinfo.listhead = &ripcb;
158 /*
159 * XXX We don't use the hash list for raw IP, but it's easier
160 * to allocate a one entry hash list than it is to check all
161 * over the place for hashbase == NULL.
162 */
163 ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
164 ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
165
166 ripcbinfo.ipi_zone = (void *) zinit(sizeof(struct inpcb),
167 (4096 * sizeof(struct inpcb)),
168 4096, "ripzone");
169
170 pcbinfo = &ripcbinfo;
171 /*
172 * allocate lock group attribute and group for udp pcb mutexes
173 */
174 pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init();
175
176 pcbinfo->mtx_grp = lck_grp_alloc_init("ripcb", pcbinfo->mtx_grp_attr);
177
178 /*
179 * allocate the lock attribute for udp pcb mutexes
180 */
181 pcbinfo->mtx_attr = lck_attr_alloc_init();
182
183 if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL)
184 return; /* pretty much dead if this fails... */
185
186}
187
188static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET , 0, {0}, {0,0,0,0,0,0,0,0,} };
189/*
190 * Setup generic address and protocol structures
191 * for raw_input routine, then pass them along with
192 * mbuf chain.
193 */
194void
195rip_input(m, iphlen)
196 struct mbuf *m;
197 int iphlen;
198{
199 register struct ip *ip = mtod(m, struct ip *);
200 register struct inpcb *inp;
201 struct inpcb *last = 0;
202 struct mbuf *opts = 0;
203 int skipit = 0, ret = 0;
204
205 ripsrc.sin_addr = ip->ip_src;
206 lck_rw_lock_shared(ripcbinfo.mtx);
207 LIST_FOREACH(inp, &ripcb, inp_list) {
208#if INET6
209 if ((inp->inp_vflag & INP_IPV4) == 0)
210 continue;
211#endif
212 if (inp->inp_ip_p && (inp->inp_ip_p != ip->ip_p))
213 continue;
214 if (inp->inp_laddr.s_addr &&
215 inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
216 continue;
217 if (inp->inp_faddr.s_addr &&
218 inp->inp_faddr.s_addr != ip->ip_src.s_addr)
219 continue;
220 if (last) {
221 struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
222
223 skipit = 0;
224#if IPSEC
225 /* check AH/ESP integrity. */
226 if (ipsec_bypass == 0 && n) {
227 if (ipsec4_in_reject_so(n, last->inp_socket)) {
228 m_freem(n);
229 IPSEC_STAT_INCREMENT(ipsecstat.in_polvio);
230 /* do not inject data to pcb */
231 skipit = 1;
232 }
233 }
234#endif /*IPSEC*/
235#if CONFIG_MACF_NET
236 if (n && skipit == 0) {
237 if (mac_inpcb_check_deliver(last, n, AF_INET,
238 SOCK_RAW) != 0) {
239 m_freem(n);
240 skipit = 1;
241 }
242 }
243#endif
244 if (n && skipit == 0) {
245 int error = 0;
246 if ((last->inp_flags & INP_CONTROLOPTS) != 0 ||
247 (last->inp_socket->so_options & SO_TIMESTAMP) != 0 ||
248 (last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
249 ret = ip_savecontrol(last, &opts, ip, n);
250 if (ret != 0) {
251 m_freem(n);
252 m_freem(opts);
253 last = inp;
254 continue;
255 }
256 }
257 if (last->inp_flags & INP_STRIPHDR) {
258 n->m_len -= iphlen;
259 n->m_pkthdr.len -= iphlen;
260 n->m_data += iphlen;
261 }
262 so_recv_data_stat(last->inp_socket, m, 0);
263 if (sbappendaddr(&last->inp_socket->so_rcv,
264 (struct sockaddr *)&ripsrc, n,
265 opts, &error) != 0) {
266 sorwakeup(last->inp_socket);
267 } else {
268 if (error) {
269 /* should notify about lost packet */
270 kprintf("rip_input can't append to socket\n");
271 }
272 }
273 opts = 0;
274 }
275 }
276 last = inp;
277 }
278
279 skipit = 0;
280#if IPSEC
281 /* check AH/ESP integrity. */
282 if (ipsec_bypass == 0 && last) {
283 if (ipsec4_in_reject_so(m, last->inp_socket)) {
284 m_freem(m);
285 IPSEC_STAT_INCREMENT(ipsecstat.in_polvio);
286 OSAddAtomic(1, &ipstat.ips_delivered);
287 /* do not inject data to pcb */
288 skipit = 1;
289 }
290 }
291#endif /*IPSEC*/
292#if CONFIG_MACF_NET
293 if (last && skipit == 0) {
294 if (mac_inpcb_check_deliver(last, m, AF_INET, SOCK_RAW) != 0) {
295 skipit = 1;
296 m_freem(m);
297 }
298 }
299#endif
300 if (skipit == 0) {
301 if (last) {
302 if ((last->inp_flags & INP_CONTROLOPTS) != 0 ||
303 (last->inp_socket->so_options & SO_TIMESTAMP) != 0 ||
304 (last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
305 ret = ip_savecontrol(last, &opts, ip, m);
306 if (ret != 0) {
307 m_freem(m);
308 m_freem(opts);
309 goto unlock;
310 }
311 }
312 if (last->inp_flags & INP_STRIPHDR) {
313 m->m_len -= iphlen;
314 m->m_pkthdr.len -= iphlen;
315 m->m_data += iphlen;
316 }
317 so_recv_data_stat(last->inp_socket, m, 0);
318 if (sbappendaddr(&last->inp_socket->so_rcv,
319 (struct sockaddr *)&ripsrc, m, opts, NULL) != 0) {
320 sorwakeup(last->inp_socket);
321 } else {
322 kprintf("rip_input(2) can't append to socket\n");
323 }
324 } else {
325 m_freem(m);
326 OSAddAtomic(1, &ipstat.ips_noproto);
327 OSAddAtomic(-1, &ipstat.ips_delivered);
328 }
329 }
330unlock:
331 /*
332 * Keep the list locked because socket filter may force the socket lock
333 * to be released when calling sbappendaddr() -- see rdar://7627704
334 */
335 lck_rw_done(ripcbinfo.mtx);
336}
337
338/*
339 * Generate IP header and pass packet to ip_output.
340 * Tack on options user may have setup with control call.
341 */
342int
343rip_output(
344 struct mbuf *m,
345 struct socket *so,
346 u_int32_t dst,
347 struct mbuf *control)
348{
349 register struct ip *ip;
350 register struct inpcb *inp = sotoinpcb(so);
351 int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
352 struct ip_out_args ipoa;
353 struct ip_moptions *imo;
354 int error = 0;
355 mbuf_traffic_class_t mtc = MBUF_TC_UNSPEC;
356
357 if (control != NULL) {
358 mtc = mbuf_traffic_class_from_control(control);
359
360 m_freem(control);
361 }
362 /* If socket was bound to an ifindex, tell ip_output about it */
363 ipoa.ipoa_boundif = (inp->inp_flags & INP_BOUND_IF) ?
364 inp->inp_boundif : IFSCOPE_NONE;
365 ipoa.ipoa_nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
366 flags |= IP_OUTARGS;
367
368 /*
369 * If the user handed us a complete IP packet, use it.
370 * Otherwise, allocate an mbuf for a header and fill it in.
371 */
372 if ((inp->inp_flags & INP_HDRINCL) == 0) {
373 if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
374 m_freem(m);
375 return(EMSGSIZE);
376 }
377 M_PREPEND(m, sizeof(struct ip), M_WAIT);
378 if (m == NULL)
379 return ENOBUFS;
380 ip = mtod(m, struct ip *);
381 ip->ip_tos = inp->inp_ip_tos;
382 ip->ip_off = 0;
383 ip->ip_p = inp->inp_ip_p;
384 ip->ip_len = m->m_pkthdr.len;
385 ip->ip_src = inp->inp_laddr;
386 ip->ip_dst.s_addr = dst;
387 ip->ip_ttl = inp->inp_ip_ttl;
388 } else {
389 if (m->m_pkthdr.len > IP_MAXPACKET) {
390 m_freem(m);
391 return(EMSGSIZE);
392 }
393 ip = mtod(m, struct ip *);
394 /* don't allow both user specified and setsockopt options,
395 and don't allow packet length sizes that will crash */
396 if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2))
397 && inp->inp_options)
398 || (ip->ip_len > m->m_pkthdr.len)
399 || (ip->ip_len < (IP_VHL_HL(ip->ip_vhl) << 2))) {
400 m_freem(m);
401 return EINVAL;
402 }
403 if (ip->ip_id == 0)
404#if RANDOM_IP_ID
405 ip->ip_id = ip_randomid();
406#else
407 ip->ip_id = htons(ip_id++);
408#endif
409 /* XXX prevent ip_output from overwriting header fields */
410 flags |= IP_RAWOUTPUT;
411 OSAddAtomic(1, &ipstat.ips_rawout);
412 }
413
414#if IPSEC
415 if (ipsec_bypass == 0 && ipsec_setsocket(m, so) != 0) {
416 m_freem(m);
417 return ENOBUFS;
418 }
419#endif /*IPSEC*/
420
421 if (inp->inp_route.ro_rt != NULL &&
422 inp->inp_route.ro_rt->generation_id != route_generation) {
423 rtfree(inp->inp_route.ro_rt);
424 inp->inp_route.ro_rt = NULL;
425 }
426
427 set_packet_tclass(m, so, mtc, 0);
428
429#if CONFIG_MACF_NET
430 mac_mbuf_label_associate_inpcb(inp, m);
431#endif
432
433 imo = inp->inp_moptions;
434 if (imo != NULL)
435 IMO_ADDREF(imo);
436 /*
437 * The domain lock is held across ip_output, so it is okay
438 * to pass the PCB cached route pointer directly to IP and
439 * the modules beneath it.
440 */
441 error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
442 imo, &ipoa);
443
444 if (imo != NULL)
445 IMO_REMREF(imo);
446
447 if (inp->inp_route.ro_rt != NULL) {
448 struct rtentry *rt = inp->inp_route.ro_rt;
449 unsigned int outif;
450
451 if ((rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST)) ||
452 inp->inp_socket == NULL ||
453 !(inp->inp_socket->so_state & SS_ISCONNECTED)) {
454 rt = NULL; /* unusable */
455 }
456 /*
457 * Always discard the cached route for unconnected
458 * socket or if it is a multicast route.
459 */
460 if (rt == NULL) {
461 rtfree(inp->inp_route.ro_rt);
462 inp->inp_route.ro_rt = NULL;
463 }
464 /*
465 * If this is a connected socket and the destination
466 * route is unicast, update outif with that of the route
467 * interface index used by IP.
468 */
469 if (rt != NULL &&
470 (outif = rt->rt_ifp->if_index) != inp->inp_last_outif)
471 inp->inp_last_outif = outif;
472 }
473
474 return (error);
475}
476
477#if IPFIREWALL
478int
479load_ipfw(void)
480{
481 kern_return_t err;
482
483 ipfw_init();
484
485#if DUMMYNET
486 if (!DUMMYNET_LOADED)
487 ip_dn_init();
488#endif /* DUMMYNET */
489 err = 0;
490
491 return err == 0 && ip_fw_ctl_ptr == NULL ? -1 : err;
492}
493#endif /* IPFIREWALL */
494
495/*
496 * Raw IP socket option processing.
497 */
498int
499rip_ctloutput(so, sopt)
500 struct socket *so;
501 struct sockopt *sopt;
502{
503 struct inpcb *inp = sotoinpcb(so);
504 int error, optval;
505
506 if (sopt->sopt_level != IPPROTO_IP)
507 return (EINVAL);
508
509 error = 0;
510
511 switch (sopt->sopt_dir) {
512 case SOPT_GET:
513 switch (sopt->sopt_name) {
514 case IP_HDRINCL:
515 optval = inp->inp_flags & INP_HDRINCL;
516 error = sooptcopyout(sopt, &optval, sizeof optval);
517 break;
518
519 case IP_STRIPHDR:
520 optval = inp->inp_flags & INP_STRIPHDR;
521 error = sooptcopyout(sopt, &optval, sizeof optval);
522 break;
523
524#if IPFIREWALL
525 case IP_FW_ADD:
526 case IP_FW_GET:
527 case IP_OLD_FW_ADD:
528 case IP_OLD_FW_GET:
529 if (ip_fw_ctl_ptr == 0)
530 error = load_ipfw();
531 if (ip_fw_ctl_ptr && error == 0)
532 error = ip_fw_ctl_ptr(sopt);
533 else
534 error = ENOPROTOOPT;
535 break;
536#endif /* IPFIREWALL */
537
538#if DUMMYNET
539 case IP_DUMMYNET_GET:
540 if (DUMMYNET_LOADED)
541 error = ip_dn_ctl_ptr(sopt);
542 else
543 error = ENOPROTOOPT;
544 break ;
545#endif /* DUMMYNET */
546
547#if MROUTING
548 case MRT_INIT:
549 case MRT_DONE:
550 case MRT_ADD_VIF:
551 case MRT_DEL_VIF:
552 case MRT_ADD_MFC:
553 case MRT_DEL_MFC:
554 case MRT_VERSION:
555 case MRT_ASSERT:
556 error = ip_mrouter_get(so, sopt);
557 break;
558#endif /* MROUTING */
559
560 default:
561 error = ip_ctloutput(so, sopt);
562 break;
563 }
564 break;
565
566 case SOPT_SET:
567 switch (sopt->sopt_name) {
568 case IP_HDRINCL:
569 error = sooptcopyin(sopt, &optval, sizeof optval,
570 sizeof optval);
571 if (error)
572 break;
573 if (optval)
574 inp->inp_flags |= INP_HDRINCL;
575 else
576 inp->inp_flags &= ~INP_HDRINCL;
577 break;
578
579 case IP_STRIPHDR:
580 error = sooptcopyin(sopt, &optval, sizeof optval,
581 sizeof optval);
582 if (error)
583 break;
584 if (optval)
585 inp->inp_flags |= INP_STRIPHDR;
586 else
587 inp->inp_flags &= ~INP_STRIPHDR;
588 break;
589
590
591#if IPFIREWALL
592 case IP_FW_ADD:
593 case IP_FW_DEL:
594 case IP_FW_FLUSH:
595 case IP_FW_ZERO:
596 case IP_FW_RESETLOG:
597 case IP_OLD_FW_ADD:
598 case IP_OLD_FW_DEL:
599 case IP_OLD_FW_FLUSH:
600 case IP_OLD_FW_ZERO:
601 case IP_OLD_FW_RESETLOG:
602 if (ip_fw_ctl_ptr == 0)
603 error = load_ipfw();
604 if (ip_fw_ctl_ptr && error == 0)
605 error = ip_fw_ctl_ptr(sopt);
606 else
607 error = ENOPROTOOPT;
608 break;
609#endif /* IPFIREWALL */
610
611#if DUMMYNET
612 case IP_DUMMYNET_CONFIGURE:
613 case IP_DUMMYNET_DEL:
614 case IP_DUMMYNET_FLUSH:
615 if (DUMMYNET_LOADED)
616 error = ip_dn_ctl_ptr(sopt);
617 else
618 error = ENOPROTOOPT ;
619 break ;
620#endif
621
622#if MROUTING
623 case IP_RSVP_ON:
624 error = ip_rsvp_init(so);
625 break;
626
627 case IP_RSVP_OFF:
628 error = ip_rsvp_done();
629 break;
630
631 /* XXX - should be combined */
632 case IP_RSVP_VIF_ON:
633 error = ip_rsvp_vif_init(so, sopt);
634 break;
635
636 case IP_RSVP_VIF_OFF:
637 error = ip_rsvp_vif_done(so, sopt);
638 break;
639
640 case MRT_INIT:
641 case MRT_DONE:
642 case MRT_ADD_VIF:
643 case MRT_DEL_VIF:
644 case MRT_ADD_MFC:
645 case MRT_DEL_MFC:
646 case MRT_VERSION:
647 case MRT_ASSERT:
648 error = ip_mrouter_set(so, sopt);
649 break;
650#endif /* MROUTING */
651
652 default:
653 error = ip_ctloutput(so, sopt);
654 break;
655 }
656 break;
657 }
658
659 return (error);
660}
661
662/*
663 * This function exists solely to receive the PRC_IFDOWN messages which
664 * are sent by if_down(). It looks for an ifaddr whose ifa_addr is sa,
665 * and calls in_ifadown() to remove all routes corresponding to that address.
666 * It also receives the PRC_IFUP messages from if_up() and reinstalls the
667 * interface routes.
668 */
669void
670rip_ctlinput(
671 int cmd,
672 struct sockaddr *sa,
673 __unused void *vip)
674{
675 struct in_ifaddr *ia;
676 struct ifnet *ifp;
677 int err;
678 int flags, done = 0;
679
680 switch (cmd) {
681 case PRC_IFDOWN:
682 lck_rw_lock_shared(in_ifaddr_rwlock);
683 for (ia = in_ifaddrhead.tqh_first; ia;
684 ia = ia->ia_link.tqe_next) {
685 IFA_LOCK(&ia->ia_ifa);
686 if (ia->ia_ifa.ifa_addr == sa &&
687 (ia->ia_flags & IFA_ROUTE)) {
688 done = 1;
689 IFA_ADDREF_LOCKED(&ia->ia_ifa);
690 IFA_UNLOCK(&ia->ia_ifa);
691 lck_rw_done(in_ifaddr_rwlock);
692 lck_mtx_lock(rnh_lock);
693 /*
694 * in_ifscrub kills the interface route.
695 */
696 in_ifscrub(ia->ia_ifp, ia, 1);
697 /*
698 * in_ifadown gets rid of all the rest of
699 * the routes. This is not quite the right
700 * thing to do, but at least if we are running
701 * a routing process they will come back.
702 */
703 in_ifadown(&ia->ia_ifa, 1);
704 lck_mtx_unlock(rnh_lock);
705 IFA_REMREF(&ia->ia_ifa);
706 break;
707 }
708 IFA_UNLOCK(&ia->ia_ifa);
709 }
710 if (!done)
711 lck_rw_done(in_ifaddr_rwlock);
712 break;
713
714 case PRC_IFUP:
715 lck_rw_lock_shared(in_ifaddr_rwlock);
716 for (ia = in_ifaddrhead.tqh_first; ia;
717 ia = ia->ia_link.tqe_next) {
718 IFA_LOCK(&ia->ia_ifa);
719 if (ia->ia_ifa.ifa_addr == sa) {
720 /* keep it locked */
721 break;
722 }
723 IFA_UNLOCK(&ia->ia_ifa);
724 }
725 if (ia == NULL || (ia->ia_flags & IFA_ROUTE) ||
726 (ia->ia_ifa.ifa_debug & IFD_NOTREADY)) {
727 if (ia != NULL)
728 IFA_UNLOCK(&ia->ia_ifa);
729 lck_rw_done(in_ifaddr_rwlock);
730 return;
731 }
732 IFA_ADDREF_LOCKED(&ia->ia_ifa);
733 IFA_UNLOCK(&ia->ia_ifa);
734 lck_rw_done(in_ifaddr_rwlock);
735
736 flags = RTF_UP;
737 ifp = ia->ia_ifa.ifa_ifp;
738
739 if ((ifp->if_flags & IFF_LOOPBACK)
740 || (ifp->if_flags & IFF_POINTOPOINT))
741 flags |= RTF_HOST;
742
743 err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
744 if (err == 0) {
745 IFA_LOCK_SPIN(&ia->ia_ifa);
746 ia->ia_flags |= IFA_ROUTE;
747 IFA_UNLOCK(&ia->ia_ifa);
748 }
749 IFA_REMREF(&ia->ia_ifa);
750 break;
751 }
752}
753
754u_int32_t rip_sendspace = RIPSNDQ;
755u_int32_t rip_recvspace = RIPRCVQ;
756
757SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
758 &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
759SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
760 &rip_recvspace, 0, "Maximum incoming raw IP datagram size");
761
762static int
763rip_attach(struct socket *so, int proto, struct proc *p)
764{
765 struct inpcb *inp;
766 int error;
767
768 inp = sotoinpcb(so);
769 if (inp)
770 panic("rip_attach");
771 if ((so->so_state & SS_PRIV) == 0)
772 return (EPERM);
773
774 error = soreserve(so, rip_sendspace, rip_recvspace);
775 if (error)
776 return error;
777 error = in_pcballoc(so, &ripcbinfo, p);
778 if (error)
779 return error;
780 inp = (struct inpcb *)so->so_pcb;
781 inp->inp_vflag |= INP_IPV4;
782 inp->inp_ip_p = proto;
783 inp->inp_ip_ttl = ip_defttl;
784 return 0;
785}
786
787__private_extern__ int
788rip_detach(struct socket *so)
789{
790 struct inpcb *inp;
791
792 inp = sotoinpcb(so);
793 if (inp == 0)
794 panic("rip_detach");
795#if MROUTING
796 if (so == ip_mrouter)
797 ip_mrouter_done();
798 ip_rsvp_force_done(so);
799 if (so == ip_rsvpd)
800 ip_rsvp_done();
801#endif /* MROUTING */
802 in_pcbdetach(inp);
803 return 0;
804}
805
806__private_extern__ int
807rip_abort(struct socket *so)
808{
809 soisdisconnected(so);
810 return rip_detach(so);
811}
812
813__private_extern__ int
814rip_disconnect(struct socket *so)
815{
816 if ((so->so_state & SS_ISCONNECTED) == 0)
817 return ENOTCONN;
818 return rip_abort(so);
819}
820
821__private_extern__ int
822rip_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
823{
824 struct inpcb *inp = sotoinpcb(so);
825 struct sockaddr_in *addr = (struct sockaddr_in *)nam;
826 struct ifaddr *ifa = NULL;
827 unsigned int outif = 0;
828
829 if (nam->sa_len != sizeof(*addr))
830 return EINVAL;
831
832 if (TAILQ_EMPTY(&ifnet_head) || ((addr->sin_family != AF_INET) &&
833 (addr->sin_family != AF_IMPLINK)) ||
834 (addr->sin_addr.s_addr &&
835 (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == 0)) {
836 return EADDRNOTAVAIL;
837 }
838 else if (ifa) {
839 IFA_LOCK(ifa);
840 outif = ifa->ifa_ifp->if_index;
841 IFA_UNLOCK(ifa);
842 IFA_REMREF(ifa);
843 }
844 inp->inp_laddr = addr->sin_addr;
845 inp->inp_last_outif = outif;
846 return 0;
847}
848
849__private_extern__ int
850rip_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
851{
852 struct inpcb *inp = sotoinpcb(so);
853 struct sockaddr_in *addr = (struct sockaddr_in *)nam;
854
855 if (nam->sa_len != sizeof(*addr))
856 return EINVAL;
857 if (TAILQ_EMPTY(&ifnet_head))
858 return EADDRNOTAVAIL;
859 if ((addr->sin_family != AF_INET) &&
860 (addr->sin_family != AF_IMPLINK))
861 return EAFNOSUPPORT;
862 inp->inp_faddr = addr->sin_addr;
863 soisconnected(so);
864 return 0;
865}
866
867__private_extern__ int
868rip_shutdown(struct socket *so)
869{
870 socantsendmore(so);
871 return 0;
872}
873
874__private_extern__ int
875rip_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr *nam,
876 struct mbuf *control, __unused struct proc *p)
877{
878 struct inpcb *inp = sotoinpcb(so);
879 register u_int32_t dst;
880
881 if (so->so_state & SS_ISCONNECTED) {
882 if (nam) {
883 m_freem(m);
884 return EISCONN;
885 }
886 dst = inp->inp_faddr.s_addr;
887 } else {
888 if (nam == NULL) {
889 m_freem(m);
890 return ENOTCONN;
891 }
892 dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
893 }
894 return rip_output(m, so, dst, control);
895}
896
897/* note: rip_unlock is called from different protos instead of the generic socket_unlock,
898 * it will handle the socket dealloc on last reference
899 * */
900int
901rip_unlock(struct socket *so, int refcount, void *debug)
902{
903 void *lr_saved;
904 struct inpcb *inp = sotoinpcb(so);
905
906 if (debug == NULL)
907 lr_saved = __builtin_return_address(0);
908 else
909 lr_saved = debug;
910
911 if (refcount) {
912 if (so->so_usecount <= 0) {
913 panic("rip_unlock: bad refoucnt so=%p val=%x lrh= %s\n",
914 so, so->so_usecount, solockhistory_nr(so));
915 /* NOTREACHED */
916 }
917 so->so_usecount--;
918 if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) {
919 /* cleanup after last reference */
920 lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx);
921 lck_rw_lock_exclusive(ripcbinfo.mtx);
922 if (inp->inp_state != INPCB_STATE_DEAD) {
923#if INET6
924 if (INP_CHECK_SOCKAF(so, AF_INET6))
925 in6_pcbdetach(inp);
926 else
927#endif /* INET6 */
928 in_pcbdetach(inp);
929 }
930 in_pcbdispose(inp);
931 lck_rw_done(ripcbinfo.mtx);
932 return(0);
933 }
934 }
935 so->unlock_lr[so->next_unlock_lr] = lr_saved;
936 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
937 lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx);
938 return(0);
939}
940
941static int
942rip_pcblist SYSCTL_HANDLER_ARGS
943{
944#pragma unused(oidp, arg1, arg2)
945 int error, i, n;
946 struct inpcb *inp, **inp_list;
947 inp_gen_t gencnt;
948 struct xinpgen xig;
949
950 /*
951 * The process of preparing the TCB list is too time-consuming and
952 * resource-intensive to repeat twice on every request.
953 */
954 lck_rw_lock_exclusive(ripcbinfo.mtx);
955 if (req->oldptr == USER_ADDR_NULL) {
956 n = ripcbinfo.ipi_count;
957 req->oldidx = 2 * (sizeof xig)
958 + (n + n/8) * sizeof(struct xinpcb);
959 lck_rw_done(ripcbinfo.mtx);
960 return 0;
961 }
962
963 if (req->newptr != USER_ADDR_NULL) {
964 lck_rw_done(ripcbinfo.mtx);
965 return EPERM;
966 }
967
968 /*
969 * OK, now we're committed to doing something.
970 */
971 gencnt = ripcbinfo.ipi_gencnt;
972 n = ripcbinfo.ipi_count;
973
974 bzero(&xig, sizeof(xig));
975 xig.xig_len = sizeof xig;
976 xig.xig_count = n;
977 xig.xig_gen = gencnt;
978 xig.xig_sogen = so_gencnt;
979 error = SYSCTL_OUT(req, &xig, sizeof xig);
980 if (error) {
981 lck_rw_done(ripcbinfo.mtx);
982 return error;
983 }
984 /*
985 * We are done if there is no pcb
986 */
987 if (n == 0) {
988 lck_rw_done(ripcbinfo.mtx);
989 return 0;
990 }
991
992 inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
993 if (inp_list == 0) {
994 lck_rw_done(ripcbinfo.mtx);
995 return ENOMEM;
996 }
997
998 for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n;
999 inp = inp->inp_list.le_next) {
1000 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD)
1001 inp_list[i++] = inp;
1002 }
1003 n = i;
1004
1005 error = 0;
1006 for (i = 0; i < n; i++) {
1007 inp = inp_list[i];
1008 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
1009 struct xinpcb xi;
1010
1011 bzero(&xi, sizeof(xi));
1012 xi.xi_len = sizeof xi;
1013 /* XXX should avoid extra copy */
1014 inpcb_to_compat(inp, &xi.xi_inp);
1015 if (inp->inp_socket)
1016 sotoxsocket(inp->inp_socket, &xi.xi_socket);
1017 error = SYSCTL_OUT(req, &xi, sizeof xi);
1018 }
1019 }
1020 if (!error) {
1021 /*
1022 * Give the user an updated idea of our state.
1023 * If the generation differs from what we told
1024 * her before, she knows that something happened
1025 * while we were processing this request, and it
1026 * might be necessary to retry.
1027 */
1028 bzero(&xig, sizeof(xig));
1029 xig.xig_len = sizeof xig;
1030 xig.xig_gen = ripcbinfo.ipi_gencnt;
1031 xig.xig_sogen = so_gencnt;
1032 xig.xig_count = ripcbinfo.ipi_count;
1033 error = SYSCTL_OUT(req, &xig, sizeof xig);
1034 }
1035 FREE(inp_list, M_TEMP);
1036 lck_rw_done(ripcbinfo.mtx);
1037 return error;
1038}
1039
1040SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
1041 rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
1042
1043#if !CONFIG_EMBEDDED
1044
1045static int
1046rip_pcblist64 SYSCTL_HANDLER_ARGS
1047{
1048#pragma unused(oidp, arg1, arg2)
1049 int error, i, n;
1050 struct inpcb *inp, **inp_list;
1051 inp_gen_t gencnt;
1052 struct xinpgen xig;
1053
1054 /*
1055 * The process of preparing the TCB list is too time-consuming and
1056 * resource-intensive to repeat twice on every request.
1057 */
1058 lck_rw_lock_exclusive(ripcbinfo.mtx);
1059 if (req->oldptr == USER_ADDR_NULL) {
1060 n = ripcbinfo.ipi_count;
1061 req->oldidx = 2 * (sizeof xig)
1062 + (n + n/8) * sizeof(struct xinpcb64);
1063 lck_rw_done(ripcbinfo.mtx);
1064 return 0;
1065 }
1066
1067 if (req->newptr != USER_ADDR_NULL) {
1068 lck_rw_done(ripcbinfo.mtx);
1069 return EPERM;
1070 }
1071
1072 /*
1073 * OK, now we're committed to doing something.
1074 */
1075 gencnt = ripcbinfo.ipi_gencnt;
1076 n = ripcbinfo.ipi_count;
1077
1078 bzero(&xig, sizeof(xig));
1079 xig.xig_len = sizeof xig;
1080 xig.xig_count = n;
1081 xig.xig_gen = gencnt;
1082 xig.xig_sogen = so_gencnt;
1083 error = SYSCTL_OUT(req, &xig, sizeof xig);
1084 if (error) {
1085 lck_rw_done(ripcbinfo.mtx);
1086 return error;
1087 }
1088 /*
1089 * We are done if there is no pcb
1090 */
1091 if (n == 0) {
1092 lck_rw_done(ripcbinfo.mtx);
1093 return 0;
1094 }
1095
1096 inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
1097 if (inp_list == 0) {
1098 lck_rw_done(ripcbinfo.mtx);
1099 return ENOMEM;
1100 }
1101
1102 for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n;
1103 inp = inp->inp_list.le_next) {
1104 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD)
1105 inp_list[i++] = inp;
1106 }
1107 n = i;
1108
1109 error = 0;
1110 for (i = 0; i < n; i++) {
1111 inp = inp_list[i];
1112 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
1113 struct xinpcb64 xi;
1114
1115 bzero(&xi, sizeof(xi));
1116 xi.xi_len = sizeof xi;
1117 inpcb_to_xinpcb64(inp, &xi);
1118 if (inp->inp_socket)
1119 sotoxsocket64(inp->inp_socket, &xi.xi_socket);
1120 error = SYSCTL_OUT(req, &xi, sizeof xi);
1121 }
1122 }
1123 if (!error) {
1124 /*
1125 * Give the user an updated idea of our state.
1126 * If the generation differs from what we told
1127 * her before, she knows that something happened
1128 * while we were processing this request, and it
1129 * might be necessary to retry.
1130 */
1131 bzero(&xig, sizeof(xig));
1132 xig.xig_len = sizeof xig;
1133 xig.xig_gen = ripcbinfo.ipi_gencnt;
1134 xig.xig_sogen = so_gencnt;
1135 xig.xig_count = ripcbinfo.ipi_count;
1136 error = SYSCTL_OUT(req, &xig, sizeof xig);
1137 }
1138 FREE(inp_list, M_TEMP);
1139 lck_rw_done(ripcbinfo.mtx);
1140 return error;
1141}
1142
1143SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
1144 rip_pcblist64, "S,xinpcb64", "List of active raw IP sockets");
1145
1146#endif /* !CONFIG_EMBEDDED */
1147
1148
1149static int
1150rip_pcblist_n SYSCTL_HANDLER_ARGS
1151{
1152#pragma unused(oidp, arg1, arg2)
1153 int error = 0;
1154
1155 error = get_pcblist_n(IPPROTO_IP, req, &ripcbinfo);
1156
1157 return error;
1158}
1159
1160SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
1161 rip_pcblist_n, "S,xinpcb_n", "List of active raw IP sockets");
1162
1163struct pr_usrreqs rip_usrreqs = {
1164 rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect,
1165 pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
1166 pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
1167 pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown,
1168 in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp
1169};
1170/* DSEP Review Done pl-20051213-v02 @3253 */