2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1988, 1993
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/malloc.h>
74 #include <sys/mcache.h>
76 #include <sys/domain.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/sysctl.h>
81 #include <libkern/OSAtomic.h>
82 #include <kern/zalloc.h>
84 #include <pexpert/pexpert.h>
87 #include <net/route.h>
90 #include <netinet/in.h>
91 #include <netinet/in_systm.h>
92 #include <netinet/ip.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/in_var.h>
95 #include <netinet/ip_var.h>
96 #include <netinet/ip_mroute.h>
99 #include <netinet6/in6_pcb.h>
102 #include <netinet/ip_fw.h>
105 #include <netinet6/ipsec.h>
109 #include <netinet/ip_dummynet.h>
113 #include <security/mac_framework.h>
117 int rip_detach(struct socket
*);
118 int rip_abort(struct socket
*);
119 int rip_disconnect(struct socket
*);
120 int rip_bind(struct socket
*, struct sockaddr
*, struct proc
*);
121 int rip_connect(struct socket
*, struct sockaddr
*, struct proc
*);
122 int rip_shutdown(struct socket
*);
125 extern int ipsec_bypass
;
128 struct inpcbhead ripcb
;
129 struct inpcbinfo ripcbinfo
;
131 /* control hooks for ipfw and dummynet */
133 ip_fw_ctl_t
*ip_fw_ctl_ptr
;
134 #endif /* IPFIREWALL */
136 ip_dn_ctl_t
*ip_dn_ctl_ptr
;
137 #endif /* DUMMYNET */
140 * Nominal space allocated to a raw ip socket.
146 * Raw interface to IP protocol.
150 * Initialize raw connection block q.
153 rip_init(struct protosw
*pp
, struct domain
*dp
)
156 static int rip_initialized
= 0;
157 struct inpcbinfo
*pcbinfo
;
159 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
|PR_ATTACHED
)) == PR_ATTACHED
);
166 ripcbinfo
.ipi_listhead
= &ripcb
;
168 * XXX We don't use the hash list for raw IP, but it's easier
169 * to allocate a one entry hash list than it is to check all
170 * over the place for ipi_hashbase == NULL.
172 ripcbinfo
.ipi_hashbase
= hashinit(1, M_PCB
, &ripcbinfo
.ipi_hashmask
);
173 ripcbinfo
.ipi_porthashbase
= hashinit(1, M_PCB
, &ripcbinfo
.ipi_porthashmask
);
175 ripcbinfo
.ipi_zone
= zinit(sizeof(struct inpcb
),
176 (4096 * sizeof(struct inpcb
)), 4096, "ripzone");
178 pcbinfo
= &ripcbinfo
;
180 * allocate lock group attribute and group for udp pcb mutexes
182 pcbinfo
->ipi_lock_grp_attr
= lck_grp_attr_alloc_init();
183 pcbinfo
->ipi_lock_grp
= lck_grp_alloc_init("ripcb", pcbinfo
->ipi_lock_grp_attr
);
186 * allocate the lock attribute for udp pcb mutexes
188 pcbinfo
->ipi_lock_attr
= lck_attr_alloc_init();
189 if ((pcbinfo
->ipi_lock
= lck_rw_alloc_init(pcbinfo
->ipi_lock_grp
,
190 pcbinfo
->ipi_lock_attr
)) == NULL
) {
191 panic("%s: unable to allocate PCB lock\n", __func__
);
195 in_pcbinfo_attach(&ripcbinfo
);
198 static struct sockaddr_in ripsrc
= { sizeof(ripsrc
), AF_INET
, 0, {0}, {0,0,0,0,0,0,0,0,} };
200 * Setup generic address and protocol structures
201 * for raw_input routine, then pass them along with
209 struct ip
*ip
= mtod(m
, struct ip
*);
211 struct inpcb
*last
= 0;
212 struct mbuf
*opts
= 0;
213 int skipit
= 0, ret
= 0;
214 struct ifnet
*ifp
= m
->m_pkthdr
.rcvif
;
216 /* Expect 32-bit aligned data pointer on strict-align platforms */
217 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m
);
219 ripsrc
.sin_addr
= ip
->ip_src
;
220 lck_rw_lock_shared(ripcbinfo
.ipi_lock
);
221 LIST_FOREACH(inp
, &ripcb
, inp_list
) {
223 if ((inp
->inp_vflag
& INP_IPV4
) == 0)
226 if (inp
->inp_ip_p
&& (inp
->inp_ip_p
!= ip
->ip_p
))
228 if (inp
->inp_laddr
.s_addr
&&
229 inp
->inp_laddr
.s_addr
!= ip
->ip_dst
.s_addr
)
231 if (inp
->inp_faddr
.s_addr
&&
232 inp
->inp_faddr
.s_addr
!= ip
->ip_src
.s_addr
)
235 if (inp_restricted(inp
, ifp
))
238 if (ifp
!= NULL
&& IFNET_IS_CELLULAR(ifp
) &&
239 (inp
->inp_flags
& INP_NO_IFT_CELLULAR
))
243 struct mbuf
*n
= m_copy(m
, 0, (int)M_COPYALL
);
247 /* check AH/ESP integrity. */
248 if (ipsec_bypass
== 0 && n
) {
249 if (ipsec4_in_reject_so(n
, last
->inp_socket
)) {
251 IPSEC_STAT_INCREMENT(ipsecstat
.in_polvio
);
252 /* do not inject data to pcb */
258 if (n
&& skipit
== 0) {
259 if (mac_inpcb_check_deliver(last
, n
, AF_INET
,
266 if (n
&& skipit
== 0) {
268 if ((last
->inp_flags
& INP_CONTROLOPTS
) != 0 ||
269 (last
->inp_socket
->so_options
& SO_TIMESTAMP
) != 0 ||
270 (last
->inp_socket
->so_options
& SO_TIMESTAMP_MONOTONIC
) != 0) {
271 ret
= ip_savecontrol(last
, &opts
, ip
, n
);
279 if (last
->inp_flags
& INP_STRIPHDR
) {
281 n
->m_pkthdr
.len
-= iphlen
;
284 so_recv_data_stat(last
->inp_socket
, m
, 0);
285 if (sbappendaddr(&last
->inp_socket
->so_rcv
,
286 (struct sockaddr
*)&ripsrc
, n
,
287 opts
, &error
) != 0) {
288 sorwakeup(last
->inp_socket
);
291 /* should notify about lost packet */
292 kprintf("rip_input can't append to socket\n");
303 /* check AH/ESP integrity. */
304 if (ipsec_bypass
== 0 && last
) {
305 if (ipsec4_in_reject_so(m
, last
->inp_socket
)) {
307 IPSEC_STAT_INCREMENT(ipsecstat
.in_polvio
);
308 OSAddAtomic(1, &ipstat
.ips_delivered
);
309 /* do not inject data to pcb */
315 if (last
&& skipit
== 0) {
316 if (mac_inpcb_check_deliver(last
, m
, AF_INET
, SOCK_RAW
) != 0) {
324 if ((last
->inp_flags
& INP_CONTROLOPTS
) != 0 ||
325 (last
->inp_socket
->so_options
& SO_TIMESTAMP
) != 0 ||
326 (last
->inp_socket
->so_options
& SO_TIMESTAMP_MONOTONIC
) != 0) {
327 ret
= ip_savecontrol(last
, &opts
, ip
, m
);
334 if (last
->inp_flags
& INP_STRIPHDR
) {
336 m
->m_pkthdr
.len
-= iphlen
;
339 so_recv_data_stat(last
->inp_socket
, m
, 0);
340 if (sbappendaddr(&last
->inp_socket
->so_rcv
,
341 (struct sockaddr
*)&ripsrc
, m
, opts
, NULL
) != 0) {
342 sorwakeup(last
->inp_socket
);
344 kprintf("rip_input(2) can't append to socket\n");
348 OSAddAtomic(1, &ipstat
.ips_noproto
);
349 OSAddAtomic(-1, &ipstat
.ips_delivered
);
354 * Keep the list locked because socket filter may force the socket lock
355 * to be released when calling sbappendaddr() -- see rdar://7627704
357 lck_rw_done(ripcbinfo
.ipi_lock
);
361 * Generate IP header and pass packet to ip_output.
362 * Tack on options user may have setup with control call.
369 struct mbuf
*control
)
372 struct inpcb
*inp
= sotoinpcb(so
);
373 int flags
= (so
->so_options
& SO_DONTROUTE
) | IP_ALLOWBROADCAST
;
374 struct ip_out_args ipoa
=
375 { IFSCOPE_NONE
, { 0 }, IPOAF_SELECT_SRCIF
, 0 };
376 struct ip_moptions
*imo
;
378 mbuf_svc_class_t msc
= MBUF_SC_UNSPEC
;
380 if (control
!= NULL
) {
381 msc
= mbuf_service_class_from_control(control
);
387 if (inp
== NULL
|| (inp
->inp_flags2
& INP2_WANT_FLOW_DIVERT
)) {
390 VERIFY(control
== NULL
);
391 return (inp
== NULL
? EINVAL
: EPROTOTYPE
);
395 /* If socket was bound to an ifindex, tell ip_output about it */
396 if (inp
->inp_flags
& INP_BOUND_IF
) {
397 ipoa
.ipoa_boundif
= inp
->inp_boundifp
->if_index
;
398 ipoa
.ipoa_flags
|= IPOAF_BOUND_IF
;
400 if (inp
->inp_flags
& INP_NO_IFT_CELLULAR
)
401 ipoa
.ipoa_flags
|= IPOAF_NO_CELLULAR
;
403 if (inp
->inp_flowhash
== 0)
404 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
407 * If the user handed us a complete IP packet, use it.
408 * Otherwise, allocate an mbuf for a header and fill it in.
410 if ((inp
->inp_flags
& INP_HDRINCL
) == 0) {
411 if (m
->m_pkthdr
.len
+ sizeof(struct ip
) > IP_MAXPACKET
) {
415 M_PREPEND(m
, sizeof(struct ip
), M_WAIT
);
418 ip
= mtod(m
, struct ip
*);
419 ip
->ip_tos
= inp
->inp_ip_tos
;
421 ip
->ip_p
= inp
->inp_ip_p
;
422 ip
->ip_len
= m
->m_pkthdr
.len
;
423 ip
->ip_src
= inp
->inp_laddr
;
424 ip
->ip_dst
.s_addr
= dst
;
425 ip
->ip_ttl
= inp
->inp_ip_ttl
;
427 if (m
->m_pkthdr
.len
> IP_MAXPACKET
) {
431 ip
= mtod(m
, struct ip
*);
432 /* don't allow both user specified and setsockopt options,
433 and don't allow packet length sizes that will crash */
434 if (((IP_VHL_HL(ip
->ip_vhl
) != (sizeof (*ip
) >> 2))
436 || (ip
->ip_len
> m
->m_pkthdr
.len
)
437 || (ip
->ip_len
< (IP_VHL_HL(ip
->ip_vhl
) << 2))) {
442 ip
->ip_id
= ip_randomid();
443 /* XXX prevent ip_output from overwriting header fields */
444 flags
|= IP_RAWOUTPUT
;
445 OSAddAtomic(1, &ipstat
.ips_rawout
);
448 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
)
449 ipoa
.ipoa_flags
|= IPOAF_BOUND_SRCADDR
;
452 if (ipsec_bypass
== 0 && ipsec_setsocket(m
, so
) != 0) {
458 if (ROUTE_UNUSABLE(&inp
->inp_route
))
459 ROUTE_RELEASE(&inp
->inp_route
);
461 set_packet_service_class(m
, so
, msc
, 0);
462 m
->m_pkthdr
.pkt_flowsrc
= FLOWSRC_INPCB
;
463 m
->m_pkthdr
.pkt_flowid
= inp
->inp_flowhash
;
464 m
->m_pkthdr
.pkt_flags
|= (PKTF_FLOW_ID
| PKTF_FLOW_LOCALSRC
|
466 m
->m_pkthdr
.pkt_proto
= inp
->inp_ip_p
;
469 mac_mbuf_label_associate_inpcb(inp
, m
);
472 imo
= inp
->inp_moptions
;
476 * The domain lock is held across ip_output, so it is okay
477 * to pass the PCB cached route pointer directly to IP and
478 * the modules beneath it.
480 error
= ip_output(m
, inp
->inp_options
, &inp
->inp_route
, flags
,
486 if (inp
->inp_route
.ro_rt
!= NULL
) {
487 struct rtentry
*rt
= inp
->inp_route
.ro_rt
;
490 if ((rt
->rt_flags
& (RTF_MULTICAST
|RTF_BROADCAST
)) ||
491 inp
->inp_socket
== NULL
||
492 !(inp
->inp_socket
->so_state
& SS_ISCONNECTED
)) {
493 rt
= NULL
; /* unusable */
496 * Always discard the cached route for unconnected
497 * socket or if it is a multicast route.
500 ROUTE_RELEASE(&inp
->inp_route
);
503 * If this is a connected socket and the destination
504 * route is unicast, update outif with that of the
505 * route interface used by IP.
507 if (rt
!= NULL
&& (outif
= rt
->rt_ifp
) != inp
->inp_last_outifp
)
508 inp
->inp_last_outifp
= outif
;
510 ROUTE_RELEASE(&inp
->inp_route
);
514 * If output interface was cellular, and this socket is denied
515 * access to it, generate an event.
517 if (error
!= 0 && (ipoa
.ipoa_retflags
& IPOARF_IFDENIED
) &&
518 (inp
->inp_flags
& INP_NO_IFT_CELLULAR
))
519 soevent(so
, (SO_FILT_HINT_LOCKED
|SO_FILT_HINT_IFDENIED
));
533 if (!DUMMYNET_LOADED
)
535 #endif /* DUMMYNET */
538 return err
== 0 && ip_fw_ctl_ptr
== NULL
? -1 : err
;
540 #endif /* IPFIREWALL */
543 * Raw IP socket option processing.
546 rip_ctloutput(so
, sopt
)
548 struct sockopt
*sopt
;
550 struct inpcb
*inp
= sotoinpcb(so
);
553 /* Allow <SOL_SOCKET,SO_FLUSH> at this level */
554 if (sopt
->sopt_level
!= IPPROTO_IP
&&
555 !(sopt
->sopt_level
== SOL_SOCKET
&& sopt
->sopt_name
== SO_FLUSH
))
560 switch (sopt
->sopt_dir
) {
562 switch (sopt
->sopt_name
) {
564 optval
= inp
->inp_flags
& INP_HDRINCL
;
565 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
569 optval
= inp
->inp_flags
& INP_STRIPHDR
;
570 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
578 if (ip_fw_ctl_ptr
== 0)
580 if (ip_fw_ctl_ptr
&& error
== 0)
581 error
= ip_fw_ctl_ptr(sopt
);
585 #endif /* IPFIREWALL */
588 case IP_DUMMYNET_GET
:
589 if (!DUMMYNET_LOADED
)
592 error
= ip_dn_ctl_ptr(sopt
);
596 #endif /* DUMMYNET */
607 error
= ip_mrouter_get(so
, sopt
);
609 #endif /* MROUTING */
612 error
= ip_ctloutput(so
, sopt
);
618 switch (sopt
->sopt_name
) {
620 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
625 inp
->inp_flags
|= INP_HDRINCL
;
627 inp
->inp_flags
&= ~INP_HDRINCL
;
631 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
636 inp
->inp_flags
|= INP_STRIPHDR
;
638 inp
->inp_flags
&= ~INP_STRIPHDR
;
649 case IP_OLD_FW_FLUSH
:
651 case IP_OLD_FW_RESETLOG
:
652 if (ip_fw_ctl_ptr
== 0)
654 if (ip_fw_ctl_ptr
&& error
== 0)
655 error
= ip_fw_ctl_ptr(sopt
);
659 #endif /* IPFIREWALL */
662 case IP_DUMMYNET_CONFIGURE
:
663 case IP_DUMMYNET_DEL
:
664 case IP_DUMMYNET_FLUSH
:
665 if (!DUMMYNET_LOADED
)
668 error
= ip_dn_ctl_ptr(sopt
);
670 error
= ENOPROTOOPT
;
676 error
= ip_rsvp_init(so
);
680 error
= ip_rsvp_done();
683 /* XXX - should be combined */
685 error
= ip_rsvp_vif_init(so
, sopt
);
688 case IP_RSVP_VIF_OFF
:
689 error
= ip_rsvp_vif_done(so
, sopt
);
700 error
= ip_mrouter_set(so
, sopt
);
702 #endif /* MROUTING */
705 if ((error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
706 sizeof (optval
))) != 0)
709 error
= inp_flush(inp
, optval
);
713 error
= ip_ctloutput(so
, sopt
);
723 * This function exists solely to receive the PRC_IFDOWN messages which
724 * are sent by if_down(). It looks for an ifaddr whose ifa_addr is sa,
725 * and calls in_ifadown() to remove all routes corresponding to that address.
726 * It also receives the PRC_IFUP messages from if_up() and reinstalls the
735 struct in_ifaddr
*ia
;
742 lck_rw_lock_shared(in_ifaddr_rwlock
);
743 for (ia
= in_ifaddrhead
.tqh_first
; ia
;
744 ia
= ia
->ia_link
.tqe_next
) {
745 IFA_LOCK(&ia
->ia_ifa
);
746 if (ia
->ia_ifa
.ifa_addr
== sa
&&
747 (ia
->ia_flags
& IFA_ROUTE
)) {
749 IFA_ADDREF_LOCKED(&ia
->ia_ifa
);
750 IFA_UNLOCK(&ia
->ia_ifa
);
751 lck_rw_done(in_ifaddr_rwlock
);
752 lck_mtx_lock(rnh_lock
);
754 * in_ifscrub kills the interface route.
756 in_ifscrub(ia
->ia_ifp
, ia
, 1);
758 * in_ifadown gets rid of all the rest of
759 * the routes. This is not quite the right
760 * thing to do, but at least if we are running
761 * a routing process they will come back.
763 in_ifadown(&ia
->ia_ifa
, 1);
764 lck_mtx_unlock(rnh_lock
);
765 IFA_REMREF(&ia
->ia_ifa
);
768 IFA_UNLOCK(&ia
->ia_ifa
);
771 lck_rw_done(in_ifaddr_rwlock
);
775 lck_rw_lock_shared(in_ifaddr_rwlock
);
776 for (ia
= in_ifaddrhead
.tqh_first
; ia
;
777 ia
= ia
->ia_link
.tqe_next
) {
778 IFA_LOCK(&ia
->ia_ifa
);
779 if (ia
->ia_ifa
.ifa_addr
== sa
) {
783 IFA_UNLOCK(&ia
->ia_ifa
);
785 if (ia
== NULL
|| (ia
->ia_flags
& IFA_ROUTE
) ||
786 (ia
->ia_ifa
.ifa_debug
& IFD_NOTREADY
)) {
788 IFA_UNLOCK(&ia
->ia_ifa
);
789 lck_rw_done(in_ifaddr_rwlock
);
792 IFA_ADDREF_LOCKED(&ia
->ia_ifa
);
793 IFA_UNLOCK(&ia
->ia_ifa
);
794 lck_rw_done(in_ifaddr_rwlock
);
797 ifp
= ia
->ia_ifa
.ifa_ifp
;
799 if ((ifp
->if_flags
& IFF_LOOPBACK
)
800 || (ifp
->if_flags
& IFF_POINTOPOINT
))
803 err
= rtinit(&ia
->ia_ifa
, RTM_ADD
, flags
);
805 IFA_LOCK_SPIN(&ia
->ia_ifa
);
806 ia
->ia_flags
|= IFA_ROUTE
;
807 IFA_UNLOCK(&ia
->ia_ifa
);
809 IFA_REMREF(&ia
->ia_ifa
);
814 u_int32_t rip_sendspace
= RIPSNDQ
;
815 u_int32_t rip_recvspace
= RIPRCVQ
;
817 SYSCTL_INT(_net_inet_raw
, OID_AUTO
, maxdgram
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
818 &rip_sendspace
, 0, "Maximum outgoing raw IP datagram size");
819 SYSCTL_INT(_net_inet_raw
, OID_AUTO
, recvspace
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
820 &rip_recvspace
, 0, "Maximum incoming raw IP datagram size");
821 SYSCTL_UINT(_net_inet_raw
, OID_AUTO
, pcbcount
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
822 &ripcbinfo
.ipi_count
, 0, "Number of active PCBs");
825 rip_attach(struct socket
*so
, int proto
, struct proc
*p
)
833 if ((so
->so_state
& SS_PRIV
) == 0)
836 error
= soreserve(so
, rip_sendspace
, rip_recvspace
);
839 error
= in_pcballoc(so
, &ripcbinfo
, p
);
842 inp
= (struct inpcb
*)so
->so_pcb
;
843 inp
->inp_vflag
|= INP_IPV4
;
844 inp
->inp_ip_p
= proto
;
845 inp
->inp_ip_ttl
= ip_defttl
;
849 __private_extern__
int
850 rip_detach(struct socket
*so
)
858 if (so
== ip_mrouter
)
860 ip_rsvp_force_done(so
);
863 #endif /* MROUTING */
868 __private_extern__
int
869 rip_abort(struct socket
*so
)
871 soisdisconnected(so
);
872 return rip_detach(so
);
875 __private_extern__
int
876 rip_disconnect(struct socket
*so
)
878 if ((so
->so_state
& SS_ISCONNECTED
) == 0)
880 return rip_abort(so
);
883 __private_extern__
int
884 rip_bind(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
887 struct inpcb
*inp
= sotoinpcb(so
);
888 struct sockaddr_in sin
;
889 struct ifaddr
*ifa
= NULL
;
890 struct ifnet
*outif
= NULL
;
892 if (inp
== NULL
|| (inp
->inp_flags2
& INP2_WANT_FLOW_DIVERT
))
893 return (inp
== NULL
? EINVAL
: EPROTOTYPE
);
895 if (nam
->sa_len
!= sizeof (struct sockaddr_in
))
898 /* Sanitized local copy for interface address searches */
899 bzero(&sin
, sizeof (sin
));
900 sin
.sin_family
= AF_INET
;
901 sin
.sin_len
= sizeof (struct sockaddr_in
);
902 sin
.sin_addr
.s_addr
= SIN(nam
)->sin_addr
.s_addr
;
904 if (TAILQ_EMPTY(&ifnet_head
) ||
905 (sin
.sin_family
!= AF_INET
&& sin
.sin_family
!= AF_IMPLINK
) ||
906 (sin
.sin_addr
.s_addr
&& (ifa
= ifa_ifwithaddr(SA(&sin
))) == 0)) {
907 return (EADDRNOTAVAIL
);
910 * Opportunistically determine the outbound
911 * interface that may be used; this may not
912 * hold true if we end up using a route
913 * going over a different interface, e.g.
914 * when sending to a local address. This
915 * will get updated again after sending.
918 outif
= ifa
->ifa_ifp
;
922 inp
->inp_laddr
= sin
.sin_addr
;
923 inp
->inp_last_outifp
= outif
;
927 __private_extern__
int
928 rip_connect(struct socket
*so
, struct sockaddr
*nam
, __unused
struct proc
*p
)
930 struct inpcb
*inp
= sotoinpcb(so
);
931 struct sockaddr_in
*addr
= (struct sockaddr_in
*)(void *)nam
;
933 if (inp
== NULL
|| (inp
->inp_flags2
& INP2_WANT_FLOW_DIVERT
))
934 return (inp
== NULL
? EINVAL
: EPROTOTYPE
);
935 if (nam
->sa_len
!= sizeof(*addr
))
937 if (TAILQ_EMPTY(&ifnet_head
))
938 return EADDRNOTAVAIL
;
939 if ((addr
->sin_family
!= AF_INET
) &&
940 (addr
->sin_family
!= AF_IMPLINK
))
942 inp
->inp_faddr
= addr
->sin_addr
;
948 __private_extern__
int
949 rip_shutdown(struct socket
*so
)
955 __private_extern__
int
956 rip_send(struct socket
*so
, int flags
, struct mbuf
*m
, struct sockaddr
*nam
,
957 struct mbuf
*control
, struct proc
*p
)
959 #pragma unused(flags, p)
960 struct inpcb
*inp
= sotoinpcb(so
);
964 if (inp
== NULL
|| (inp
->inp_flags2
& INP2_WANT_FLOW_DIVERT
)) {
965 error
= (inp
== NULL
? EINVAL
: EPROTOTYPE
);
969 if (so
->so_state
& SS_ISCONNECTED
) {
974 dst
= inp
->inp_faddr
.s_addr
;
980 dst
= ((struct sockaddr_in
*)(void *)nam
)->sin_addr
.s_addr
;
982 return (rip_output(m
, so
, dst
, control
));
995 /* note: rip_unlock is called from different protos instead of the generic socket_unlock,
996 * it will handle the socket dealloc on last reference
999 rip_unlock(struct socket
*so
, int refcount
, void *debug
)
1002 struct inpcb
*inp
= sotoinpcb(so
);
1005 lr_saved
= __builtin_return_address(0);
1010 if (so
->so_usecount
<= 0) {
1011 panic("rip_unlock: bad refoucnt so=%p val=%x lrh= %s\n",
1012 so
, so
->so_usecount
, solockhistory_nr(so
));
1016 if (so
->so_usecount
== 0 && (inp
->inp_wantcnt
== WNT_STOPUSING
)) {
1017 /* cleanup after last reference */
1018 lck_mtx_unlock(so
->so_proto
->pr_domain
->dom_mtx
);
1019 lck_rw_lock_exclusive(ripcbinfo
.ipi_lock
);
1020 if (inp
->inp_state
!= INPCB_STATE_DEAD
) {
1022 if (SOCK_CHECK_DOM(so
, PF_INET6
))
1029 lck_rw_done(ripcbinfo
.ipi_lock
);
1033 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
1034 so
->next_unlock_lr
= (so
->next_unlock_lr
+1) % SO_LCKDBG_MAX
;
1035 lck_mtx_unlock(so
->so_proto
->pr_domain
->dom_mtx
);
1040 rip_pcblist SYSCTL_HANDLER_ARGS
1042 #pragma unused(oidp, arg1, arg2)
1044 struct inpcb
*inp
, **inp_list
;
1049 * The process of preparing the TCB list is too time-consuming and
1050 * resource-intensive to repeat twice on every request.
1052 lck_rw_lock_exclusive(ripcbinfo
.ipi_lock
);
1053 if (req
->oldptr
== USER_ADDR_NULL
) {
1054 n
= ripcbinfo
.ipi_count
;
1055 req
->oldidx
= 2 * (sizeof xig
)
1056 + (n
+ n
/8) * sizeof(struct xinpcb
);
1057 lck_rw_done(ripcbinfo
.ipi_lock
);
1061 if (req
->newptr
!= USER_ADDR_NULL
) {
1062 lck_rw_done(ripcbinfo
.ipi_lock
);
1067 * OK, now we're committed to doing something.
1069 gencnt
= ripcbinfo
.ipi_gencnt
;
1070 n
= ripcbinfo
.ipi_count
;
1072 bzero(&xig
, sizeof(xig
));
1073 xig
.xig_len
= sizeof xig
;
1075 xig
.xig_gen
= gencnt
;
1076 xig
.xig_sogen
= so_gencnt
;
1077 error
= SYSCTL_OUT(req
, &xig
, sizeof xig
);
1079 lck_rw_done(ripcbinfo
.ipi_lock
);
1083 * We are done if there is no pcb
1086 lck_rw_done(ripcbinfo
.ipi_lock
);
1090 inp_list
= _MALLOC(n
* sizeof *inp_list
, M_TEMP
, M_WAITOK
);
1091 if (inp_list
== 0) {
1092 lck_rw_done(ripcbinfo
.ipi_lock
);
1096 for (inp
= ripcbinfo
.ipi_listhead
->lh_first
, i
= 0; inp
&& i
< n
;
1097 inp
= inp
->inp_list
.le_next
) {
1098 if (inp
->inp_gencnt
<= gencnt
&& inp
->inp_state
!= INPCB_STATE_DEAD
)
1099 inp_list
[i
++] = inp
;
1104 for (i
= 0; i
< n
; i
++) {
1106 if (inp
->inp_gencnt
<= gencnt
&& inp
->inp_state
!= INPCB_STATE_DEAD
) {
1109 bzero(&xi
, sizeof(xi
));
1110 xi
.xi_len
= sizeof xi
;
1111 /* XXX should avoid extra copy */
1112 inpcb_to_compat(inp
, &xi
.xi_inp
);
1113 if (inp
->inp_socket
)
1114 sotoxsocket(inp
->inp_socket
, &xi
.xi_socket
);
1115 error
= SYSCTL_OUT(req
, &xi
, sizeof xi
);
1120 * Give the user an updated idea of our state.
1121 * If the generation differs from what we told
1122 * her before, she knows that something happened
1123 * while we were processing this request, and it
1124 * might be necessary to retry.
1126 bzero(&xig
, sizeof(xig
));
1127 xig
.xig_len
= sizeof xig
;
1128 xig
.xig_gen
= ripcbinfo
.ipi_gencnt
;
1129 xig
.xig_sogen
= so_gencnt
;
1130 xig
.xig_count
= ripcbinfo
.ipi_count
;
1131 error
= SYSCTL_OUT(req
, &xig
, sizeof xig
);
1133 FREE(inp_list
, M_TEMP
);
1134 lck_rw_done(ripcbinfo
.ipi_lock
);
1138 SYSCTL_PROC(_net_inet_raw
, OID_AUTO
/*XXX*/, pcblist
, CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
1139 rip_pcblist
, "S,xinpcb", "List of active raw IP sockets");
1143 rip_pcblist64 SYSCTL_HANDLER_ARGS
1145 #pragma unused(oidp, arg1, arg2)
1147 struct inpcb
*inp
, **inp_list
;
1152 * The process of preparing the TCB list is too time-consuming and
1153 * resource-intensive to repeat twice on every request.
1155 lck_rw_lock_exclusive(ripcbinfo
.ipi_lock
);
1156 if (req
->oldptr
== USER_ADDR_NULL
) {
1157 n
= ripcbinfo
.ipi_count
;
1158 req
->oldidx
= 2 * (sizeof xig
)
1159 + (n
+ n
/8) * sizeof(struct xinpcb64
);
1160 lck_rw_done(ripcbinfo
.ipi_lock
);
1164 if (req
->newptr
!= USER_ADDR_NULL
) {
1165 lck_rw_done(ripcbinfo
.ipi_lock
);
1170 * OK, now we're committed to doing something.
1172 gencnt
= ripcbinfo
.ipi_gencnt
;
1173 n
= ripcbinfo
.ipi_count
;
1175 bzero(&xig
, sizeof(xig
));
1176 xig
.xig_len
= sizeof xig
;
1178 xig
.xig_gen
= gencnt
;
1179 xig
.xig_sogen
= so_gencnt
;
1180 error
= SYSCTL_OUT(req
, &xig
, sizeof xig
);
1182 lck_rw_done(ripcbinfo
.ipi_lock
);
1186 * We are done if there is no pcb
1189 lck_rw_done(ripcbinfo
.ipi_lock
);
1193 inp_list
= _MALLOC(n
* sizeof *inp_list
, M_TEMP
, M_WAITOK
);
1194 if (inp_list
== 0) {
1195 lck_rw_done(ripcbinfo
.ipi_lock
);
1199 for (inp
= ripcbinfo
.ipi_listhead
->lh_first
, i
= 0; inp
&& i
< n
;
1200 inp
= inp
->inp_list
.le_next
) {
1201 if (inp
->inp_gencnt
<= gencnt
&& inp
->inp_state
!= INPCB_STATE_DEAD
)
1202 inp_list
[i
++] = inp
;
1207 for (i
= 0; i
< n
; i
++) {
1209 if (inp
->inp_gencnt
<= gencnt
&& inp
->inp_state
!= INPCB_STATE_DEAD
) {
1212 bzero(&xi
, sizeof(xi
));
1213 xi
.xi_len
= sizeof xi
;
1214 inpcb_to_xinpcb64(inp
, &xi
);
1215 if (inp
->inp_socket
)
1216 sotoxsocket64(inp
->inp_socket
, &xi
.xi_socket
);
1217 error
= SYSCTL_OUT(req
, &xi
, sizeof xi
);
1222 * Give the user an updated idea of our state.
1223 * If the generation differs from what we told
1224 * her before, she knows that something happened
1225 * while we were processing this request, and it
1226 * might be necessary to retry.
1228 bzero(&xig
, sizeof(xig
));
1229 xig
.xig_len
= sizeof xig
;
1230 xig
.xig_gen
= ripcbinfo
.ipi_gencnt
;
1231 xig
.xig_sogen
= so_gencnt
;
1232 xig
.xig_count
= ripcbinfo
.ipi_count
;
1233 error
= SYSCTL_OUT(req
, &xig
, sizeof xig
);
1235 FREE(inp_list
, M_TEMP
);
1236 lck_rw_done(ripcbinfo
.ipi_lock
);
1240 SYSCTL_PROC(_net_inet_raw
, OID_AUTO
, pcblist64
, CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
1241 rip_pcblist64
, "S,xinpcb64", "List of active raw IP sockets");
1246 rip_pcblist_n SYSCTL_HANDLER_ARGS
1248 #pragma unused(oidp, arg1, arg2)
1251 error
= get_pcblist_n(IPPROTO_IP
, req
, &ripcbinfo
);
1256 SYSCTL_PROC(_net_inet_raw
, OID_AUTO
, pcblist_n
, CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
1257 rip_pcblist_n
, "S,xinpcb_n", "List of active raw IP sockets");
1259 struct pr_usrreqs rip_usrreqs
= {
1260 .pru_abort
= rip_abort
,
1261 .pru_attach
= rip_attach
,
1262 .pru_bind
= rip_bind
,
1263 .pru_connect
= rip_connect
,
1264 .pru_control
= in_control
,
1265 .pru_detach
= rip_detach
,
1266 .pru_disconnect
= rip_disconnect
,
1267 .pru_peeraddr
= in_getpeeraddr
,
1268 .pru_send
= rip_send
,
1269 .pru_shutdown
= rip_shutdown
,
1270 .pru_sockaddr
= in_getsockaddr
,
1271 .pru_sosend
= sosend
,
1272 .pru_soreceive
= soreceive
,
1274 /* DSEP Review Done pl-20051213-v02 @3253 */