]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/raw_ip.c
173b506a6f3fc2cae1a52db7d0f2dd9edf4b2db0
[apple/xnu.git] / bsd / netinet / raw_ip.c
1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1988, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
61 */
62 /*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/malloc.h>
73 #include <sys/mbuf.h>
74 #include <sys/mcache.h>
75 #include <sys/proc.h>
76 #include <sys/domain.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/sysctl.h>
81 #include <libkern/OSAtomic.h>
82 #include <kern/zalloc.h>
83
84 #include <pexpert/pexpert.h>
85
86 #include <net/if.h>
87 #include <net/route.h>
88
89 #define _IP_VHL
90 #include <netinet/in.h>
91 #include <netinet/in_systm.h>
92 #include <netinet/ip.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/in_var.h>
95 #include <netinet/ip_var.h>
96
97 #if INET6
98 #include <netinet6/in6_pcb.h>
99 #endif /* INET6 */
100
101 #include <netinet/ip_fw.h>
102
103 #if IPSEC
104 #include <netinet6/ipsec.h>
105 #endif /*IPSEC*/
106
107 #if DUMMYNET
108 #include <netinet/ip_dummynet.h>
109 #endif
110
111 #if CONFIG_MACF_NET
112 #include <security/mac_framework.h>
113 #endif /* MAC_NET */
114
115 int load_ipfw(void);
116 int rip_detach(struct socket *);
117 int rip_abort(struct socket *);
118 int rip_disconnect(struct socket *);
119 int rip_bind(struct socket *, struct sockaddr *, struct proc *);
120 int rip_connect(struct socket *, struct sockaddr *, struct proc *);
121 int rip_shutdown(struct socket *);
122
123 struct inpcbhead ripcb;
124 struct inpcbinfo ripcbinfo;
125
126 /* control hooks for ipfw and dummynet */
127 #if IPFIREWALL
128 ip_fw_ctl_t *ip_fw_ctl_ptr;
129 #endif /* IPFIREWALL */
130 #if DUMMYNET
131 ip_dn_ctl_t *ip_dn_ctl_ptr;
132 #endif /* DUMMYNET */
133
134 /*
135 * Nominal space allocated to a raw ip socket.
136 */
137 #define RIPSNDQ 8192
138 #define RIPRCVQ 8192
139
140 /*
141 * Raw interface to IP protocol.
142 */
143
144 /*
145 * Initialize raw connection block q.
146 */
147 void
148 rip_init(struct protosw *pp, struct domain *dp)
149 {
150 #pragma unused(dp)
151 static int rip_initialized = 0;
152 struct inpcbinfo *pcbinfo;
153
154 VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED);
155
156 if (rip_initialized)
157 return;
158 rip_initialized = 1;
159
160 LIST_INIT(&ripcb);
161 ripcbinfo.ipi_listhead = &ripcb;
162 /*
163 * XXX We don't use the hash list for raw IP, but it's easier
164 * to allocate a one entry hash list than it is to check all
165 * over the place for ipi_hashbase == NULL.
166 */
167 ripcbinfo.ipi_hashbase = hashinit(1, M_PCB, &ripcbinfo.ipi_hashmask);
168 ripcbinfo.ipi_porthashbase = hashinit(1, M_PCB, &ripcbinfo.ipi_porthashmask);
169
170 ripcbinfo.ipi_zone = zinit(sizeof(struct inpcb),
171 (4096 * sizeof(struct inpcb)), 4096, "ripzone");
172
173 pcbinfo = &ripcbinfo;
174 /*
175 * allocate lock group attribute and group for udp pcb mutexes
176 */
177 pcbinfo->ipi_lock_grp_attr = lck_grp_attr_alloc_init();
178 pcbinfo->ipi_lock_grp = lck_grp_alloc_init("ripcb", pcbinfo->ipi_lock_grp_attr);
179
180 /*
181 * allocate the lock attribute for udp pcb mutexes
182 */
183 pcbinfo->ipi_lock_attr = lck_attr_alloc_init();
184 if ((pcbinfo->ipi_lock = lck_rw_alloc_init(pcbinfo->ipi_lock_grp,
185 pcbinfo->ipi_lock_attr)) == NULL) {
186 panic("%s: unable to allocate PCB lock\n", __func__);
187 /* NOTREACHED */
188 }
189
190 in_pcbinfo_attach(&ripcbinfo);
191 }
192
193 static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET , 0, {0}, {0,0,0,0,0,0,0,0,} };
194 /*
195 * Setup generic address and protocol structures
196 * for raw_input routine, then pass them along with
197 * mbuf chain.
198 */
199 void
200 rip_input(m, iphlen)
201 struct mbuf *m;
202 int iphlen;
203 {
204 struct ip *ip = mtod(m, struct ip *);
205 struct inpcb *inp;
206 struct inpcb *last = 0;
207 struct mbuf *opts = 0;
208 int skipit = 0, ret = 0;
209 struct ifnet *ifp = m->m_pkthdr.rcvif;
210
211 /* Expect 32-bit aligned data pointer on strict-align platforms */
212 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
213
214 ripsrc.sin_addr = ip->ip_src;
215 lck_rw_lock_shared(ripcbinfo.ipi_lock);
216 LIST_FOREACH(inp, &ripcb, inp_list) {
217 #if INET6
218 if ((inp->inp_vflag & INP_IPV4) == 0)
219 continue;
220 #endif
221 if (inp->inp_ip_p && (inp->inp_ip_p != ip->ip_p))
222 continue;
223 if (inp->inp_laddr.s_addr &&
224 inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
225 continue;
226 if (inp->inp_faddr.s_addr &&
227 inp->inp_faddr.s_addr != ip->ip_src.s_addr)
228 continue;
229 if (inp_restricted_recv(inp, ifp))
230 continue;
231 if (last) {
232 struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
233
234 skipit = 0;
235
236 #if NECP
237 if (n && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0, &ip->ip_dst, &ip->ip_src, ifp, NULL)) {
238 m_freem(n);
239 /* do not inject data to pcb */
240 skipit = 1;
241 }
242 #endif /* NECP */
243 #if CONFIG_MACF_NET
244 if (n && skipit == 0) {
245 if (mac_inpcb_check_deliver(last, n, AF_INET,
246 SOCK_RAW) != 0) {
247 m_freem(n);
248 skipit = 1;
249 }
250 }
251 #endif
252 if (n && skipit == 0) {
253 int error = 0;
254 if ((last->inp_flags & INP_CONTROLOPTS) != 0 ||
255 (last->inp_socket->so_options & SO_TIMESTAMP) != 0 ||
256 (last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
257 ret = ip_savecontrol(last, &opts, ip, n);
258 if (ret != 0) {
259 m_freem(n);
260 m_freem(opts);
261 last = inp;
262 continue;
263 }
264 }
265 if (last->inp_flags & INP_STRIPHDR) {
266 n->m_len -= iphlen;
267 n->m_pkthdr.len -= iphlen;
268 n->m_data += iphlen;
269 }
270 so_recv_data_stat(last->inp_socket, m, 0);
271 if (sbappendaddr(&last->inp_socket->so_rcv,
272 (struct sockaddr *)&ripsrc, n,
273 opts, &error) != 0) {
274 sorwakeup(last->inp_socket);
275 } else {
276 if (error) {
277 /* should notify about lost packet */
278 kprintf("rip_input can't append to socket\n");
279 }
280 }
281 opts = 0;
282 }
283 }
284 last = inp;
285 }
286
287 skipit = 0;
288 #if NECP
289 if (last && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0, &ip->ip_dst, &ip->ip_src, ifp, NULL)) {
290 m_freem(m);
291 OSAddAtomic(1, &ipstat.ips_delivered);
292 /* do not inject data to pcb */
293 skipit = 1;
294 }
295 #endif /* NECP */
296 #if CONFIG_MACF_NET
297 if (last && skipit == 0) {
298 if (mac_inpcb_check_deliver(last, m, AF_INET, SOCK_RAW) != 0) {
299 skipit = 1;
300 m_freem(m);
301 }
302 }
303 #endif
304 if (skipit == 0) {
305 if (last) {
306 if ((last->inp_flags & INP_CONTROLOPTS) != 0 ||
307 (last->inp_socket->so_options & SO_TIMESTAMP) != 0 ||
308 (last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
309 ret = ip_savecontrol(last, &opts, ip, m);
310 if (ret != 0) {
311 m_freem(m);
312 m_freem(opts);
313 goto unlock;
314 }
315 }
316 if (last->inp_flags & INP_STRIPHDR) {
317 m->m_len -= iphlen;
318 m->m_pkthdr.len -= iphlen;
319 m->m_data += iphlen;
320 }
321 so_recv_data_stat(last->inp_socket, m, 0);
322 if (sbappendaddr(&last->inp_socket->so_rcv,
323 (struct sockaddr *)&ripsrc, m, opts, NULL) != 0) {
324 sorwakeup(last->inp_socket);
325 } else {
326 kprintf("rip_input(2) can't append to socket\n");
327 }
328 } else {
329 m_freem(m);
330 OSAddAtomic(1, &ipstat.ips_noproto);
331 OSAddAtomic(-1, &ipstat.ips_delivered);
332 }
333 }
334 unlock:
335 /*
336 * Keep the list locked because socket filter may force the socket lock
337 * to be released when calling sbappendaddr() -- see rdar://7627704
338 */
339 lck_rw_done(ripcbinfo.ipi_lock);
340 }
341
342 /*
343 * Generate IP header and pass packet to ip_output.
344 * Tack on options user may have setup with control call.
345 */
346 int
347 rip_output(
348 struct mbuf *m,
349 struct socket *so,
350 u_int32_t dst,
351 struct mbuf *control)
352 {
353 struct ip *ip;
354 struct inpcb *inp = sotoinpcb(so);
355 int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
356 struct ip_out_args ipoa =
357 { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 };
358 struct ip_moptions *imo;
359 int error = 0;
360 mbuf_svc_class_t msc = MBUF_SC_UNSPEC;
361
362 if (control != NULL) {
363 msc = mbuf_service_class_from_control(control);
364
365 m_freem(control);
366 control = NULL;
367 }
368
369 if (inp == NULL
370 #if NECP
371 || (necp_socket_should_use_flow_divert(inp))
372 #endif /* NECP */
373 ) {
374 if (m != NULL)
375 m_freem(m);
376 VERIFY(control == NULL);
377 return (inp == NULL ? EINVAL : EPROTOTYPE);
378 }
379
380 flags |= IP_OUTARGS;
381 /* If socket was bound to an ifindex, tell ip_output about it */
382 if (inp->inp_flags & INP_BOUND_IF) {
383 ipoa.ipoa_boundif = inp->inp_boundifp->if_index;
384 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
385 }
386 if (INP_NO_CELLULAR(inp))
387 ipoa.ipoa_flags |= IPOAF_NO_CELLULAR;
388 if (INP_NO_EXPENSIVE(inp))
389 ipoa.ipoa_flags |= IPOAF_NO_EXPENSIVE;
390 if (INP_AWDL_UNRESTRICTED(inp))
391 ipoa.ipoa_flags |= IPOAF_AWDL_UNRESTRICTED;
392
393 if (inp->inp_flowhash == 0)
394 inp->inp_flowhash = inp_calc_flowhash(inp);
395
396 /*
397 * If the user handed us a complete IP packet, use it.
398 * Otherwise, allocate an mbuf for a header and fill it in.
399 */
400 if ((inp->inp_flags & INP_HDRINCL) == 0) {
401 if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
402 m_freem(m);
403 return(EMSGSIZE);
404 }
405 M_PREPEND(m, sizeof(struct ip), M_WAIT);
406 if (m == NULL)
407 return ENOBUFS;
408 ip = mtod(m, struct ip *);
409 ip->ip_tos = inp->inp_ip_tos;
410 ip->ip_off = 0;
411 ip->ip_p = inp->inp_ip_p;
412 ip->ip_len = m->m_pkthdr.len;
413 ip->ip_src = inp->inp_laddr;
414 ip->ip_dst.s_addr = dst;
415 ip->ip_ttl = inp->inp_ip_ttl;
416 } else {
417 if (m->m_pkthdr.len > IP_MAXPACKET) {
418 m_freem(m);
419 return(EMSGSIZE);
420 }
421 ip = mtod(m, struct ip *);
422 /* don't allow both user specified and setsockopt options,
423 and don't allow packet length sizes that will crash */
424 if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2))
425 && inp->inp_options)
426 || (ip->ip_len > m->m_pkthdr.len)
427 || (ip->ip_len < (IP_VHL_HL(ip->ip_vhl) << 2))) {
428 m_freem(m);
429 return EINVAL;
430 }
431 if (ip->ip_id == 0)
432 ip->ip_id = ip_randomid();
433 /* XXX prevent ip_output from overwriting header fields */
434 flags |= IP_RAWOUTPUT;
435 OSAddAtomic(1, &ipstat.ips_rawout);
436 }
437
438 if (inp->inp_laddr.s_addr != INADDR_ANY)
439 ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR;
440
441 #if NECP
442 {
443 necp_kernel_policy_id policy_id;
444 if (!necp_socket_is_allowed_to_send_recv_v4(inp, 0, 0, &ip->ip_src, &ip->ip_dst, NULL, &policy_id)) {
445 m_freem(m);
446 return(EHOSTUNREACH);
447 }
448
449 necp_mark_packet_from_socket(m, inp, policy_id);
450 }
451 #endif /* NECP */
452
453 #if IPSEC
454 if (inp->inp_sp != NULL && ipsec_setsocket(m, so) != 0) {
455 m_freem(m);
456 return ENOBUFS;
457 }
458 #endif /*IPSEC*/
459
460 if (ROUTE_UNUSABLE(&inp->inp_route))
461 ROUTE_RELEASE(&inp->inp_route);
462
463 set_packet_service_class(m, so, msc, 0);
464 m->m_pkthdr.pkt_flowsrc = FLOWSRC_INPCB;
465 m->m_pkthdr.pkt_flowid = inp->inp_flowhash;
466 m->m_pkthdr.pkt_flags |= (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC |
467 PKTF_FLOW_RAWSOCK);
468 m->m_pkthdr.pkt_proto = inp->inp_ip_p;
469
470 #if CONFIG_MACF_NET
471 mac_mbuf_label_associate_inpcb(inp, m);
472 #endif
473
474 imo = inp->inp_moptions;
475 if (imo != NULL)
476 IMO_ADDREF(imo);
477 /*
478 * The domain lock is held across ip_output, so it is okay
479 * to pass the PCB cached route pointer directly to IP and
480 * the modules beneath it.
481 */
482 error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
483 imo, &ipoa);
484
485 if (imo != NULL)
486 IMO_REMREF(imo);
487
488 if (inp->inp_route.ro_rt != NULL) {
489 struct rtentry *rt = inp->inp_route.ro_rt;
490 struct ifnet *outif;
491
492 if ((rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST)) ||
493 inp->inp_socket == NULL ||
494 !(inp->inp_socket->so_state & SS_ISCONNECTED)) {
495 rt = NULL; /* unusable */
496 }
497 /*
498 * Always discard the cached route for unconnected
499 * socket or if it is a multicast route.
500 */
501 if (rt == NULL)
502 ROUTE_RELEASE(&inp->inp_route);
503
504 /*
505 * If this is a connected socket and the destination
506 * route is unicast, update outif with that of the
507 * route interface used by IP.
508 */
509 if (rt != NULL && (outif = rt->rt_ifp) != inp->inp_last_outifp)
510 inp->inp_last_outifp = outif;
511 } else {
512 ROUTE_RELEASE(&inp->inp_route);
513 }
514
515 /*
516 * If output interface was cellular/expensive, and this socket is
517 * denied access to it, generate an event.
518 */
519 if (error != 0 && (ipoa.ipoa_retflags & IPOARF_IFDENIED) &&
520 (INP_NO_CELLULAR(inp) || INP_NO_EXPENSIVE(inp)))
521 soevent(so, (SO_FILT_HINT_LOCKED|SO_FILT_HINT_IFDENIED));
522
523 return (error);
524 }
525
526 #if IPFIREWALL
527 int
528 load_ipfw(void)
529 {
530 kern_return_t err;
531
532 ipfw_init();
533
534 #if DUMMYNET
535 if (!DUMMYNET_LOADED)
536 ip_dn_init();
537 #endif /* DUMMYNET */
538 err = 0;
539
540 return err == 0 && ip_fw_ctl_ptr == NULL ? -1 : err;
541 }
542 #endif /* IPFIREWALL */
543
544 /*
545 * Raw IP socket option processing.
546 */
547 int
548 rip_ctloutput(so, sopt)
549 struct socket *so;
550 struct sockopt *sopt;
551 {
552 struct inpcb *inp = sotoinpcb(so);
553 int error, optval;
554
555 /* Allow <SOL_SOCKET,SO_FLUSH> at this level */
556 if (sopt->sopt_level != IPPROTO_IP &&
557 !(sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_FLUSH))
558 return (EINVAL);
559
560 error = 0;
561
562 switch (sopt->sopt_dir) {
563 case SOPT_GET:
564 switch (sopt->sopt_name) {
565 case IP_HDRINCL:
566 optval = inp->inp_flags & INP_HDRINCL;
567 error = sooptcopyout(sopt, &optval, sizeof optval);
568 break;
569
570 case IP_STRIPHDR:
571 optval = inp->inp_flags & INP_STRIPHDR;
572 error = sooptcopyout(sopt, &optval, sizeof optval);
573 break;
574
575 #if IPFIREWALL
576 case IP_FW_ADD:
577 case IP_FW_GET:
578 case IP_OLD_FW_ADD:
579 case IP_OLD_FW_GET:
580 if (ip_fw_ctl_ptr == 0)
581 error = load_ipfw();
582 if (ip_fw_ctl_ptr && error == 0)
583 error = ip_fw_ctl_ptr(sopt);
584 else
585 error = ENOPROTOOPT;
586 break;
587 #endif /* IPFIREWALL */
588
589 #if DUMMYNET
590 case IP_DUMMYNET_GET:
591 if (!DUMMYNET_LOADED)
592 ip_dn_init();
593 if (DUMMYNET_LOADED)
594 error = ip_dn_ctl_ptr(sopt);
595 else
596 error = ENOPROTOOPT;
597 break ;
598 #endif /* DUMMYNET */
599
600 default:
601 error = ip_ctloutput(so, sopt);
602 break;
603 }
604 break;
605
606 case SOPT_SET:
607 switch (sopt->sopt_name) {
608 case IP_HDRINCL:
609 error = sooptcopyin(sopt, &optval, sizeof optval,
610 sizeof optval);
611 if (error)
612 break;
613 if (optval)
614 inp->inp_flags |= INP_HDRINCL;
615 else
616 inp->inp_flags &= ~INP_HDRINCL;
617 break;
618
619 case IP_STRIPHDR:
620 error = sooptcopyin(sopt, &optval, sizeof optval,
621 sizeof optval);
622 if (error)
623 break;
624 if (optval)
625 inp->inp_flags |= INP_STRIPHDR;
626 else
627 inp->inp_flags &= ~INP_STRIPHDR;
628 break;
629
630 #if IPFIREWALL
631 case IP_FW_ADD:
632 case IP_FW_DEL:
633 case IP_FW_FLUSH:
634 case IP_FW_ZERO:
635 case IP_FW_RESETLOG:
636 case IP_OLD_FW_ADD:
637 case IP_OLD_FW_DEL:
638 case IP_OLD_FW_FLUSH:
639 case IP_OLD_FW_ZERO:
640 case IP_OLD_FW_RESETLOG:
641 if (ip_fw_ctl_ptr == 0)
642 error = load_ipfw();
643 if (ip_fw_ctl_ptr && error == 0)
644 error = ip_fw_ctl_ptr(sopt);
645 else
646 error = ENOPROTOOPT;
647 break;
648 #endif /* IPFIREWALL */
649
650 #if DUMMYNET
651 case IP_DUMMYNET_CONFIGURE:
652 case IP_DUMMYNET_DEL:
653 case IP_DUMMYNET_FLUSH:
654 if (!DUMMYNET_LOADED)
655 ip_dn_init();
656 if (DUMMYNET_LOADED)
657 error = ip_dn_ctl_ptr(sopt);
658 else
659 error = ENOPROTOOPT ;
660 break ;
661 #endif
662
663 case SO_FLUSH:
664 if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
665 sizeof (optval))) != 0)
666 break;
667
668 error = inp_flush(inp, optval);
669 break;
670
671 default:
672 error = ip_ctloutput(so, sopt);
673 break;
674 }
675 break;
676 }
677
678 return (error);
679 }
680
681 /*
682 * This function exists solely to receive the PRC_IFDOWN messages which
683 * are sent by if_down(). It looks for an ifaddr whose ifa_addr is sa,
684 * and calls in_ifadown() to remove all routes corresponding to that address.
685 * It also receives the PRC_IFUP messages from if_up() and reinstalls the
686 * interface routes.
687 */
688 void
689 rip_ctlinput(
690 int cmd,
691 struct sockaddr *sa,
692 __unused void *vip)
693 {
694 struct in_ifaddr *ia;
695 struct ifnet *ifp;
696 int err;
697 int flags, done = 0;
698
699 switch (cmd) {
700 case PRC_IFDOWN:
701 lck_rw_lock_shared(in_ifaddr_rwlock);
702 for (ia = in_ifaddrhead.tqh_first; ia;
703 ia = ia->ia_link.tqe_next) {
704 IFA_LOCK(&ia->ia_ifa);
705 if (ia->ia_ifa.ifa_addr == sa &&
706 (ia->ia_flags & IFA_ROUTE)) {
707 done = 1;
708 IFA_ADDREF_LOCKED(&ia->ia_ifa);
709 IFA_UNLOCK(&ia->ia_ifa);
710 lck_rw_done(in_ifaddr_rwlock);
711 lck_mtx_lock(rnh_lock);
712 /*
713 * in_ifscrub kills the interface route.
714 */
715 in_ifscrub(ia->ia_ifp, ia, 1);
716 /*
717 * in_ifadown gets rid of all the rest of
718 * the routes. This is not quite the right
719 * thing to do, but at least if we are running
720 * a routing process they will come back.
721 */
722 in_ifadown(&ia->ia_ifa, 1);
723 lck_mtx_unlock(rnh_lock);
724 IFA_REMREF(&ia->ia_ifa);
725 break;
726 }
727 IFA_UNLOCK(&ia->ia_ifa);
728 }
729 if (!done)
730 lck_rw_done(in_ifaddr_rwlock);
731 break;
732
733 case PRC_IFUP:
734 lck_rw_lock_shared(in_ifaddr_rwlock);
735 for (ia = in_ifaddrhead.tqh_first; ia;
736 ia = ia->ia_link.tqe_next) {
737 IFA_LOCK(&ia->ia_ifa);
738 if (ia->ia_ifa.ifa_addr == sa) {
739 /* keep it locked */
740 break;
741 }
742 IFA_UNLOCK(&ia->ia_ifa);
743 }
744 if (ia == NULL || (ia->ia_flags & IFA_ROUTE) ||
745 (ia->ia_ifa.ifa_debug & IFD_NOTREADY)) {
746 if (ia != NULL)
747 IFA_UNLOCK(&ia->ia_ifa);
748 lck_rw_done(in_ifaddr_rwlock);
749 return;
750 }
751 IFA_ADDREF_LOCKED(&ia->ia_ifa);
752 IFA_UNLOCK(&ia->ia_ifa);
753 lck_rw_done(in_ifaddr_rwlock);
754
755 flags = RTF_UP;
756 ifp = ia->ia_ifa.ifa_ifp;
757
758 if ((ifp->if_flags & IFF_LOOPBACK)
759 || (ifp->if_flags & IFF_POINTOPOINT))
760 flags |= RTF_HOST;
761
762 err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
763 if (err == 0) {
764 IFA_LOCK_SPIN(&ia->ia_ifa);
765 ia->ia_flags |= IFA_ROUTE;
766 IFA_UNLOCK(&ia->ia_ifa);
767 }
768 IFA_REMREF(&ia->ia_ifa);
769 break;
770 }
771 }
772
773 u_int32_t rip_sendspace = RIPSNDQ;
774 u_int32_t rip_recvspace = RIPRCVQ;
775
776 SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
777 &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
778 SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
779 &rip_recvspace, 0, "Maximum incoming raw IP datagram size");
780 SYSCTL_UINT(_net_inet_raw, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED,
781 &ripcbinfo.ipi_count, 0, "Number of active PCBs");
782
783 static int
784 rip_attach(struct socket *so, int proto, struct proc *p)
785 {
786 struct inpcb *inp;
787 int error;
788
789 inp = sotoinpcb(so);
790 if (inp)
791 panic("rip_attach");
792 if ((so->so_state & SS_PRIV) == 0)
793 return (EPERM);
794
795 error = soreserve(so, rip_sendspace, rip_recvspace);
796 if (error)
797 return error;
798 error = in_pcballoc(so, &ripcbinfo, p);
799 if (error)
800 return error;
801 inp = (struct inpcb *)so->so_pcb;
802 inp->inp_vflag |= INP_IPV4;
803 inp->inp_ip_p = proto;
804 inp->inp_ip_ttl = ip_defttl;
805 return 0;
806 }
807
808 __private_extern__ int
809 rip_detach(struct socket *so)
810 {
811 struct inpcb *inp;
812
813 inp = sotoinpcb(so);
814 if (inp == 0)
815 panic("rip_detach");
816 in_pcbdetach(inp);
817 return 0;
818 }
819
820 __private_extern__ int
821 rip_abort(struct socket *so)
822 {
823 soisdisconnected(so);
824 return rip_detach(so);
825 }
826
827 __private_extern__ int
828 rip_disconnect(struct socket *so)
829 {
830 if ((so->so_state & SS_ISCONNECTED) == 0)
831 return ENOTCONN;
832 return rip_abort(so);
833 }
834
835 __private_extern__ int
836 rip_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
837 {
838 #pragma unused(p)
839 struct inpcb *inp = sotoinpcb(so);
840 struct sockaddr_in sin;
841 struct ifaddr *ifa = NULL;
842 struct ifnet *outif = NULL;
843
844 if (inp == NULL
845 #if NECP
846 || (necp_socket_should_use_flow_divert(inp))
847 #endif /* NECP */
848 )
849 return (inp == NULL ? EINVAL : EPROTOTYPE);
850
851 if (nam->sa_len != sizeof (struct sockaddr_in))
852 return (EINVAL);
853
854 /* Sanitized local copy for interface address searches */
855 bzero(&sin, sizeof (sin));
856 sin.sin_family = AF_INET;
857 sin.sin_len = sizeof (struct sockaddr_in);
858 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
859
860 if (TAILQ_EMPTY(&ifnet_head) ||
861 (sin.sin_family != AF_INET && sin.sin_family != AF_IMPLINK) ||
862 (sin.sin_addr.s_addr && (ifa = ifa_ifwithaddr(SA(&sin))) == 0)) {
863 return (EADDRNOTAVAIL);
864 } else if (ifa) {
865 /*
866 * Opportunistically determine the outbound
867 * interface that may be used; this may not
868 * hold true if we end up using a route
869 * going over a different interface, e.g.
870 * when sending to a local address. This
871 * will get updated again after sending.
872 */
873 IFA_LOCK(ifa);
874 outif = ifa->ifa_ifp;
875 IFA_UNLOCK(ifa);
876 IFA_REMREF(ifa);
877 }
878 inp->inp_laddr = sin.sin_addr;
879 inp->inp_last_outifp = outif;
880 return (0);
881 }
882
883 __private_extern__ int
884 rip_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
885 {
886 struct inpcb *inp = sotoinpcb(so);
887 struct sockaddr_in *addr = (struct sockaddr_in *)(void *)nam;
888
889 if (inp == NULL
890 #if NECP
891 || (necp_socket_should_use_flow_divert(inp))
892 #endif /* NECP */
893 )
894 return (inp == NULL ? EINVAL : EPROTOTYPE);
895 if (nam->sa_len != sizeof(*addr))
896 return EINVAL;
897 if (TAILQ_EMPTY(&ifnet_head))
898 return EADDRNOTAVAIL;
899 if ((addr->sin_family != AF_INET) &&
900 (addr->sin_family != AF_IMPLINK))
901 return EAFNOSUPPORT;
902 inp->inp_faddr = addr->sin_addr;
903 soisconnected(so);
904
905 return 0;
906 }
907
908 __private_extern__ int
909 rip_shutdown(struct socket *so)
910 {
911 socantsendmore(so);
912 return 0;
913 }
914
915 __private_extern__ int
916 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
917 struct mbuf *control, struct proc *p)
918 {
919 #pragma unused(flags, p)
920 struct inpcb *inp = sotoinpcb(so);
921 u_int32_t dst;
922 int error = 0;
923
924 if (inp == NULL
925 #if NECP
926 || (necp_socket_should_use_flow_divert(inp) && (error = EPROTOTYPE))
927 #endif /* NECP */
928 ) {
929 if (inp == NULL)
930 error = EINVAL;
931 else
932 error = EPROTOTYPE;
933 goto bad;
934 }
935
936 if (so->so_state & SS_ISCONNECTED) {
937 if (nam != NULL) {
938 error = EISCONN;
939 goto bad;
940 }
941 dst = inp->inp_faddr.s_addr;
942 } else {
943 if (nam == NULL) {
944 error = ENOTCONN;
945 goto bad;
946 }
947 dst = ((struct sockaddr_in *)(void *)nam)->sin_addr.s_addr;
948 }
949 return (rip_output(m, so, dst, control));
950
951 bad:
952 VERIFY(error != 0);
953
954 if (m != NULL)
955 m_freem(m);
956 if (control != NULL)
957 m_freem(control);
958
959 return (error);
960 }
961
962 /* note: rip_unlock is called from different protos instead of the generic socket_unlock,
963 * it will handle the socket dealloc on last reference
964 * */
965 int
966 rip_unlock(struct socket *so, int refcount, void *debug)
967 {
968 void *lr_saved;
969 struct inpcb *inp = sotoinpcb(so);
970
971 if (debug == NULL)
972 lr_saved = __builtin_return_address(0);
973 else
974 lr_saved = debug;
975
976 if (refcount) {
977 if (so->so_usecount <= 0) {
978 panic("rip_unlock: bad refoucnt so=%p val=%x lrh= %s\n",
979 so, so->so_usecount, solockhistory_nr(so));
980 /* NOTREACHED */
981 }
982 so->so_usecount--;
983 if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) {
984 /* cleanup after last reference */
985 lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx);
986 lck_rw_lock_exclusive(ripcbinfo.ipi_lock);
987 if (inp->inp_state != INPCB_STATE_DEAD) {
988 #if INET6
989 if (SOCK_CHECK_DOM(so, PF_INET6))
990 in6_pcbdetach(inp);
991 else
992 #endif /* INET6 */
993 in_pcbdetach(inp);
994 }
995 in_pcbdispose(inp);
996 lck_rw_done(ripcbinfo.ipi_lock);
997 return(0);
998 }
999 }
1000 so->unlock_lr[so->next_unlock_lr] = lr_saved;
1001 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
1002 lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx);
1003 return(0);
1004 }
1005
1006 static int
1007 rip_pcblist SYSCTL_HANDLER_ARGS
1008 {
1009 #pragma unused(oidp, arg1, arg2)
1010 int error, i, n;
1011 struct inpcb *inp, **inp_list;
1012 inp_gen_t gencnt;
1013 struct xinpgen xig;
1014
1015 /*
1016 * The process of preparing the TCB list is too time-consuming and
1017 * resource-intensive to repeat twice on every request.
1018 */
1019 lck_rw_lock_exclusive(ripcbinfo.ipi_lock);
1020 if (req->oldptr == USER_ADDR_NULL) {
1021 n = ripcbinfo.ipi_count;
1022 req->oldidx = 2 * (sizeof xig)
1023 + (n + n/8) * sizeof(struct xinpcb);
1024 lck_rw_done(ripcbinfo.ipi_lock);
1025 return 0;
1026 }
1027
1028 if (req->newptr != USER_ADDR_NULL) {
1029 lck_rw_done(ripcbinfo.ipi_lock);
1030 return EPERM;
1031 }
1032
1033 /*
1034 * OK, now we're committed to doing something.
1035 */
1036 gencnt = ripcbinfo.ipi_gencnt;
1037 n = ripcbinfo.ipi_count;
1038
1039 bzero(&xig, sizeof(xig));
1040 xig.xig_len = sizeof xig;
1041 xig.xig_count = n;
1042 xig.xig_gen = gencnt;
1043 xig.xig_sogen = so_gencnt;
1044 error = SYSCTL_OUT(req, &xig, sizeof xig);
1045 if (error) {
1046 lck_rw_done(ripcbinfo.ipi_lock);
1047 return error;
1048 }
1049 /*
1050 * We are done if there is no pcb
1051 */
1052 if (n == 0) {
1053 lck_rw_done(ripcbinfo.ipi_lock);
1054 return 0;
1055 }
1056
1057 inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
1058 if (inp_list == 0) {
1059 lck_rw_done(ripcbinfo.ipi_lock);
1060 return ENOMEM;
1061 }
1062
1063 for (inp = ripcbinfo.ipi_listhead->lh_first, i = 0; inp && i < n;
1064 inp = inp->inp_list.le_next) {
1065 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD)
1066 inp_list[i++] = inp;
1067 }
1068 n = i;
1069
1070 error = 0;
1071 for (i = 0; i < n; i++) {
1072 inp = inp_list[i];
1073 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
1074 struct xinpcb xi;
1075
1076 bzero(&xi, sizeof(xi));
1077 xi.xi_len = sizeof xi;
1078 /* XXX should avoid extra copy */
1079 inpcb_to_compat(inp, &xi.xi_inp);
1080 if (inp->inp_socket)
1081 sotoxsocket(inp->inp_socket, &xi.xi_socket);
1082 error = SYSCTL_OUT(req, &xi, sizeof xi);
1083 }
1084 }
1085 if (!error) {
1086 /*
1087 * Give the user an updated idea of our state.
1088 * If the generation differs from what we told
1089 * her before, she knows that something happened
1090 * while we were processing this request, and it
1091 * might be necessary to retry.
1092 */
1093 bzero(&xig, sizeof(xig));
1094 xig.xig_len = sizeof xig;
1095 xig.xig_gen = ripcbinfo.ipi_gencnt;
1096 xig.xig_sogen = so_gencnt;
1097 xig.xig_count = ripcbinfo.ipi_count;
1098 error = SYSCTL_OUT(req, &xig, sizeof xig);
1099 }
1100 FREE(inp_list, M_TEMP);
1101 lck_rw_done(ripcbinfo.ipi_lock);
1102 return error;
1103 }
1104
1105 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
1106 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
1107 rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
1108
1109
1110 static int
1111 rip_pcblist64 SYSCTL_HANDLER_ARGS
1112 {
1113 #pragma unused(oidp, arg1, arg2)
1114 int error, i, n;
1115 struct inpcb *inp, **inp_list;
1116 inp_gen_t gencnt;
1117 struct xinpgen xig;
1118
1119 /*
1120 * The process of preparing the TCB list is too time-consuming and
1121 * resource-intensive to repeat twice on every request.
1122 */
1123 lck_rw_lock_exclusive(ripcbinfo.ipi_lock);
1124 if (req->oldptr == USER_ADDR_NULL) {
1125 n = ripcbinfo.ipi_count;
1126 req->oldidx = 2 * (sizeof xig)
1127 + (n + n/8) * sizeof(struct xinpcb64);
1128 lck_rw_done(ripcbinfo.ipi_lock);
1129 return 0;
1130 }
1131
1132 if (req->newptr != USER_ADDR_NULL) {
1133 lck_rw_done(ripcbinfo.ipi_lock);
1134 return EPERM;
1135 }
1136
1137 /*
1138 * OK, now we're committed to doing something.
1139 */
1140 gencnt = ripcbinfo.ipi_gencnt;
1141 n = ripcbinfo.ipi_count;
1142
1143 bzero(&xig, sizeof(xig));
1144 xig.xig_len = sizeof xig;
1145 xig.xig_count = n;
1146 xig.xig_gen = gencnt;
1147 xig.xig_sogen = so_gencnt;
1148 error = SYSCTL_OUT(req, &xig, sizeof xig);
1149 if (error) {
1150 lck_rw_done(ripcbinfo.ipi_lock);
1151 return error;
1152 }
1153 /*
1154 * We are done if there is no pcb
1155 */
1156 if (n == 0) {
1157 lck_rw_done(ripcbinfo.ipi_lock);
1158 return 0;
1159 }
1160
1161 inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
1162 if (inp_list == 0) {
1163 lck_rw_done(ripcbinfo.ipi_lock);
1164 return ENOMEM;
1165 }
1166
1167 for (inp = ripcbinfo.ipi_listhead->lh_first, i = 0; inp && i < n;
1168 inp = inp->inp_list.le_next) {
1169 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD)
1170 inp_list[i++] = inp;
1171 }
1172 n = i;
1173
1174 error = 0;
1175 for (i = 0; i < n; i++) {
1176 inp = inp_list[i];
1177 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
1178 struct xinpcb64 xi;
1179
1180 bzero(&xi, sizeof(xi));
1181 xi.xi_len = sizeof xi;
1182 inpcb_to_xinpcb64(inp, &xi);
1183 if (inp->inp_socket)
1184 sotoxsocket64(inp->inp_socket, &xi.xi_socket);
1185 error = SYSCTL_OUT(req, &xi, sizeof xi);
1186 }
1187 }
1188 if (!error) {
1189 /*
1190 * Give the user an updated idea of our state.
1191 * If the generation differs from what we told
1192 * her before, she knows that something happened
1193 * while we were processing this request, and it
1194 * might be necessary to retry.
1195 */
1196 bzero(&xig, sizeof(xig));
1197 xig.xig_len = sizeof xig;
1198 xig.xig_gen = ripcbinfo.ipi_gencnt;
1199 xig.xig_sogen = so_gencnt;
1200 xig.xig_count = ripcbinfo.ipi_count;
1201 error = SYSCTL_OUT(req, &xig, sizeof xig);
1202 }
1203 FREE(inp_list, M_TEMP);
1204 lck_rw_done(ripcbinfo.ipi_lock);
1205 return error;
1206 }
1207
1208 SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist64,
1209 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
1210 rip_pcblist64, "S,xinpcb64", "List of active raw IP sockets");
1211
1212
1213
1214 static int
1215 rip_pcblist_n SYSCTL_HANDLER_ARGS
1216 {
1217 #pragma unused(oidp, arg1, arg2)
1218 int error = 0;
1219
1220 error = get_pcblist_n(IPPROTO_IP, req, &ripcbinfo);
1221
1222 return error;
1223 }
1224
1225 SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist_n,
1226 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
1227 rip_pcblist_n, "S,xinpcb_n", "List of active raw IP sockets");
1228
1229 struct pr_usrreqs rip_usrreqs = {
1230 .pru_abort = rip_abort,
1231 .pru_attach = rip_attach,
1232 .pru_bind = rip_bind,
1233 .pru_connect = rip_connect,
1234 .pru_control = in_control,
1235 .pru_detach = rip_detach,
1236 .pru_disconnect = rip_disconnect,
1237 .pru_peeraddr = in_getpeeraddr,
1238 .pru_send = rip_send,
1239 .pru_shutdown = rip_shutdown,
1240 .pru_sockaddr = in_getsockaddr,
1241 .pru_sosend = sosend,
1242 .pru_soreceive = soreceive,
1243 };
1244 /* DSEP Review Done pl-20051213-v02 @3253 */