2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1988, 1993
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/malloc.h>
74 #include <sys/mcache.h>
76 #include <sys/domain.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/sysctl.h>
81 #include <libkern/OSAtomic.h>
82 #include <kern/zalloc.h>
84 #include <pexpert/pexpert.h>
87 #include <net/net_api_stats.h>
88 #include <net/route.h>
89 #include <net/content_filter.h>
92 #include <netinet/in.h>
93 #include <netinet/in_systm.h>
94 #include <netinet/in_tclass.h>
95 #include <netinet/ip.h>
96 #include <netinet/in_pcb.h>
97 #include <netinet/in_var.h>
98 #include <netinet/ip_var.h>
100 #include <netinet6/in6_pcb.h>
104 #include <netinet6/ipsec.h>
108 #include <netinet/ip_dummynet.h>
109 #endif /* DUMMYNET */
111 int rip_detach(struct socket
*);
112 int rip_abort(struct socket
*);
113 int rip_disconnect(struct socket
*);
114 int rip_bind(struct socket
*, struct sockaddr
*, struct proc
*);
115 int rip_connect(struct socket
*, struct sockaddr
*, struct proc
*);
116 int rip_shutdown(struct socket
*);
118 struct inpcbhead ripcb
;
119 struct inpcbinfo ripcbinfo
;
121 /* control hooks for dummynet */
123 ip_dn_ctl_t
*ip_dn_ctl_ptr
;
124 #endif /* DUMMYNET */
127 * Nominal space allocated to a raw ip socket.
133 * Raw interface to IP protocol.
137 * Initialize raw connection block q.
140 rip_init(struct protosw
*pp
, struct domain
*dp
)
143 static int rip_initialized
= 0;
144 struct inpcbinfo
*pcbinfo
;
146 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
| PR_ATTACHED
)) == PR_ATTACHED
);
148 if (rip_initialized
) {
154 ripcbinfo
.ipi_listhead
= &ripcb
;
156 * XXX We don't use the hash list for raw IP, but it's easier
157 * to allocate a one entry hash list than it is to check all
158 * over the place for ipi_hashbase == NULL.
160 ripcbinfo
.ipi_hashbase
= hashinit(1, M_PCB
, &ripcbinfo
.ipi_hashmask
);
161 ripcbinfo
.ipi_porthashbase
= hashinit(1, M_PCB
, &ripcbinfo
.ipi_porthashmask
);
163 ripcbinfo
.ipi_zone
= zone_create("ripzone", sizeof(struct inpcb
),
166 pcbinfo
= &ripcbinfo
;
168 * allocate lock group attribute and group for udp pcb mutexes
170 pcbinfo
->ipi_lock_grp_attr
= lck_grp_attr_alloc_init();
171 pcbinfo
->ipi_lock_grp
= lck_grp_alloc_init("ripcb", pcbinfo
->ipi_lock_grp_attr
);
174 * allocate the lock attribute for udp pcb mutexes
176 pcbinfo
->ipi_lock_attr
= lck_attr_alloc_init();
177 if ((pcbinfo
->ipi_lock
= lck_rw_alloc_init(pcbinfo
->ipi_lock_grp
,
178 pcbinfo
->ipi_lock_attr
)) == NULL
) {
179 panic("%s: unable to allocate PCB lock\n", __func__
);
183 in_pcbinfo_attach(&ripcbinfo
);
186 static struct sockaddr_in ripsrc
= {
187 .sin_len
= sizeof(ripsrc
),
188 .sin_family
= AF_INET
,
190 .sin_addr
= { .s_addr
= 0 },
191 .sin_zero
= {0, 0, 0, 0, 0, 0, 0, 0, }
195 * Setup generic address and protocol structures
196 * for raw_input routine, then pass them along with
200 rip_input(struct mbuf
*m
, int iphlen
)
202 struct ip
*ip
= mtod(m
, struct ip
*);
204 struct inpcb
*last
= 0;
205 struct mbuf
*opts
= 0;
206 int skipit
= 0, ret
= 0;
207 struct ifnet
*ifp
= m
->m_pkthdr
.rcvif
;
209 /* Expect 32-bit aligned data pointer on strict-align platforms */
210 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m
);
212 ripsrc
.sin_addr
= ip
->ip_src
;
213 lck_rw_lock_shared(ripcbinfo
.ipi_lock
);
214 LIST_FOREACH(inp
, &ripcb
, inp_list
) {
215 if ((inp
->inp_vflag
& INP_IPV4
) == 0) {
218 if (inp
->inp_ip_p
&& (inp
->inp_ip_p
!= ip
->ip_p
)) {
221 if (inp
->inp_laddr
.s_addr
&&
222 inp
->inp_laddr
.s_addr
!= ip
->ip_dst
.s_addr
) {
225 if (inp
->inp_faddr
.s_addr
&&
226 inp
->inp_faddr
.s_addr
!= ip
->ip_src
.s_addr
) {
229 if (inp_restricted_recv(inp
, ifp
)) {
233 struct mbuf
*n
= m_copy(m
, 0, (int)M_COPYALL
);
238 if (n
&& !necp_socket_is_allowed_to_send_recv_v4(last
, 0, 0,
239 &ip
->ip_dst
, &ip
->ip_src
, ifp
, 0, NULL
, NULL
, NULL
, NULL
)) {
241 /* do not inject data to pcb */
245 if (n
&& skipit
== 0) {
247 if ((last
->inp_flags
& INP_CONTROLOPTS
) != 0 ||
249 /* Content Filter needs to see local address */
250 (last
->inp_socket
->so_cfil_db
!= NULL
) ||
252 (last
->inp_socket
->so_options
& SO_TIMESTAMP
) != 0 ||
253 (last
->inp_socket
->so_options
& SO_TIMESTAMP_MONOTONIC
) != 0 ||
254 (last
->inp_socket
->so_options
& SO_TIMESTAMP_CONTINUOUS
) != 0) {
255 ret
= ip_savecontrol(last
, &opts
, ip
, n
);
263 if (last
->inp_flags
& INP_STRIPHDR
266 * If socket is subject to Content Filter, delay stripping until reinject
268 && (last
->inp_socket
->so_cfil_db
== NULL
)
272 n
->m_pkthdr
.len
-= iphlen
;
275 so_recv_data_stat(last
->inp_socket
, m
, 0);
276 if (sbappendaddr(&last
->inp_socket
->so_rcv
,
277 (struct sockaddr
*)&ripsrc
, n
,
278 opts
, &error
) != 0) {
279 sorwakeup(last
->inp_socket
);
282 /* should notify about lost packet */
283 ipstat
.ips_raw_sappend_fail
++;
294 if (last
&& !necp_socket_is_allowed_to_send_recv_v4(last
, 0, 0,
295 &ip
->ip_dst
, &ip
->ip_src
, ifp
, 0, NULL
, NULL
, NULL
, NULL
)) {
297 OSAddAtomic(1, &ipstat
.ips_delivered
);
298 /* do not inject data to pcb */
304 if ((last
->inp_flags
& INP_CONTROLOPTS
) != 0 ||
306 /* Content Filter needs to see local address */
307 (last
->inp_socket
->so_cfil_db
!= NULL
) ||
309 (last
->inp_socket
->so_options
& SO_TIMESTAMP
) != 0 ||
310 (last
->inp_socket
->so_options
& SO_TIMESTAMP_MONOTONIC
) != 0 ||
311 (last
->inp_socket
->so_options
& SO_TIMESTAMP_CONTINUOUS
) != 0) {
312 ret
= ip_savecontrol(last
, &opts
, ip
, m
);
319 if (last
->inp_flags
& INP_STRIPHDR
322 * If socket is subject to Content Filter, delay stripping until reinject
324 && (last
->inp_socket
->so_cfil_db
== NULL
)
328 m
->m_pkthdr
.len
-= iphlen
;
331 so_recv_data_stat(last
->inp_socket
, m
, 0);
332 if (sbappendaddr(&last
->inp_socket
->so_rcv
,
333 (struct sockaddr
*)&ripsrc
, m
, opts
, NULL
) != 0) {
334 sorwakeup(last
->inp_socket
);
336 ipstat
.ips_raw_sappend_fail
++;
340 OSAddAtomic(1, &ipstat
.ips_noproto
);
341 OSAddAtomic(-1, &ipstat
.ips_delivered
);
346 * Keep the list locked because socket filter may force the socket lock
347 * to be released when calling sbappendaddr() -- see rdar://7627704
349 lck_rw_done(ripcbinfo
.ipi_lock
);
353 * Generate IP header and pass packet to ip_output.
354 * Tack on options user may have setup with control call.
361 struct mbuf
*control
)
364 struct inpcb
*inp
= sotoinpcb(so
);
365 int flags
= (so
->so_options
& SO_DONTROUTE
) | IP_ALLOWBROADCAST
;
366 int inp_flags
= inp
? inp
->inp_flags
: 0;
367 struct ip_out_args ipoa
;
368 struct ip_moptions
*imo
;
369 int tos
= IPTOS_UNSPEC
;
372 struct m_tag
*cfil_tag
= NULL
;
373 bool cfil_faddr_use
= false;
374 uint32_t cfil_so_state_change_cnt
= 0;
375 uint32_t cfil_so_options
= 0;
376 int cfil_inp_flags
= 0;
377 struct sockaddr
*cfil_faddr
= NULL
;
378 struct sockaddr_in
*cfil_sin
;
383 * If socket is subject to Content Filter and no addr is passed in,
384 * retrieve CFIL saved state from mbuf and use it if necessary.
386 if (so
->so_cfil_db
&& dst
== INADDR_ANY
) {
387 cfil_tag
= cfil_dgram_get_socket_state(m
, &cfil_so_state_change_cnt
, &cfil_so_options
, &cfil_faddr
, &cfil_inp_flags
);
389 cfil_sin
= SIN(cfil_faddr
);
390 flags
= (cfil_so_options
& SO_DONTROUTE
) | IP_ALLOWBROADCAST
;
391 inp_flags
= cfil_inp_flags
;
392 if (inp
&& inp
->inp_faddr
.s_addr
== INADDR_ANY
) {
394 * Socket is unconnected, simply use the saved faddr as 'addr' to go through
395 * the connect/disconnect logic.
397 dst
= cfil_sin
->sin_addr
.s_addr
;
398 } else if ((so
->so_state_change_cnt
!= cfil_so_state_change_cnt
) &&
399 (inp
->inp_fport
!= cfil_sin
->sin_port
||
400 inp
->inp_faddr
.s_addr
!= cfil_sin
->sin_addr
.s_addr
)) {
402 * Socket is connected but socket state and dest addr/port changed.
403 * We need to use the saved faddr and socket options.
405 cfil_faddr_use
= true;
407 m_tag_free(cfil_tag
);
412 if (so
->so_state
& SS_ISCONNECTED
) {
413 if (dst
!= INADDR_ANY
) {
417 if (control
!= NULL
) {
422 dst
= cfil_faddr_use
? cfil_sin
->sin_addr
.s_addr
: inp
->inp_faddr
.s_addr
;
424 if (dst
== INADDR_ANY
) {
428 if (control
!= NULL
) {
435 bzero(&ipoa
, sizeof(ipoa
));
436 ipoa
.ipoa_boundif
= IFSCOPE_NONE
;
437 ipoa
.ipoa_flags
= IPOAF_SELECT_SRCIF
;
439 int sotc
= SO_TC_UNSPEC
;
440 int netsvctype
= _NET_SERVICE_TYPE_UNSPEC
;
443 if (control
!= NULL
) {
444 tos
= so_tos_from_control(control
);
445 sotc
= so_tc_from_control(control
, &netsvctype
);
450 if (sotc
== SO_TC_UNSPEC
) {
451 sotc
= so
->so_traffic_class
;
452 netsvctype
= so
->so_netsvctype
;
457 || (necp_socket_should_use_flow_divert(inp
))
463 VERIFY(control
== NULL
);
464 return inp
== NULL
? EINVAL
: EPROTOTYPE
;
468 /* If socket was bound to an ifindex, tell ip_output about it */
469 if (inp
->inp_flags
& INP_BOUND_IF
) {
470 ipoa
.ipoa_boundif
= inp
->inp_boundifp
->if_index
;
471 ipoa
.ipoa_flags
|= IPOAF_BOUND_IF
;
473 if (INP_NO_CELLULAR(inp
)) {
474 ipoa
.ipoa_flags
|= IPOAF_NO_CELLULAR
;
476 if (INP_NO_EXPENSIVE(inp
)) {
477 ipoa
.ipoa_flags
|= IPOAF_NO_EXPENSIVE
;
479 if (INP_NO_CONSTRAINED(inp
)) {
480 ipoa
.ipoa_flags
|= IPOAF_NO_CONSTRAINED
;
482 if (INP_AWDL_UNRESTRICTED(inp
)) {
483 ipoa
.ipoa_flags
|= IPOAF_AWDL_UNRESTRICTED
;
485 ipoa
.ipoa_sotc
= sotc
;
486 ipoa
.ipoa_netsvctype
= netsvctype
;
488 if (inp
->inp_flowhash
== 0) {
489 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
493 * If the user handed us a complete IP packet, use it.
494 * Otherwise, allocate an mbuf for a header and fill it in.
496 if ((inp_flags
& INP_HDRINCL
) == 0) {
497 if (m
->m_pkthdr
.len
+ sizeof(struct ip
) > IP_MAXPACKET
) {
501 M_PREPEND(m
, sizeof(struct ip
), M_WAIT
, 1);
505 ip
= mtod(m
, struct ip
*);
506 if (tos
!= IPTOS_UNSPEC
) {
507 ip
->ip_tos
= (uint8_t)(tos
& IPTOS_MASK
);
509 ip
->ip_tos
= inp
->inp_ip_tos
;
511 if (inp
->inp_flags2
& INP2_DONTFRAG
) {
516 ip
->ip_p
= inp
->inp_ip_p
;
517 ip
->ip_len
= (uint16_t)m
->m_pkthdr
.len
;
518 ip
->ip_src
= inp
->inp_laddr
;
519 ip
->ip_dst
.s_addr
= dst
;
520 ip
->ip_ttl
= inp
->inp_ip_ttl
;
522 if (m
->m_pkthdr
.len
> IP_MAXPACKET
) {
526 ip
= mtod(m
, struct ip
*);
527 /* don't allow both user specified and setsockopt options,
528 * and don't allow packet length sizes that will crash */
529 if (((IP_VHL_HL(ip
->ip_vhl
) != (sizeof(*ip
) >> 2))
531 || (ip
->ip_len
> m
->m_pkthdr
.len
)
532 || (ip
->ip_len
< (IP_VHL_HL(ip
->ip_vhl
) << 2))) {
536 if (ip
->ip_id
== 0 && !(rfc6864
&& IP_OFF_IS_ATOMIC(ntohs(ip
->ip_off
)))) {
537 ip
->ip_id
= ip_randomid();
539 /* XXX prevent ip_output from overwriting header fields */
540 flags
|= IP_RAWOUTPUT
;
541 OSAddAtomic(1, &ipstat
.ips_rawout
);
544 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
) {
545 ipoa
.ipoa_flags
|= IPOAF_BOUND_SRCADDR
;
550 necp_kernel_policy_id policy_id
;
551 necp_kernel_policy_id skip_policy_id
;
552 u_int32_t route_rule_id
;
553 u_int32_t pass_flags
;
556 * We need a route to perform NECP route rule checks
558 if ((net_qos_policy_restricted
!= 0 &&
559 ROUTE_UNUSABLE(&inp
->inp_route
))
564 struct sockaddr_in to
;
565 struct sockaddr_in from
;
566 struct in_addr laddr
= ip
->ip_src
;
568 ROUTE_RELEASE(&inp
->inp_route
);
570 bzero(&from
, sizeof(struct sockaddr_in
));
571 from
.sin_family
= AF_INET
;
572 from
.sin_len
= sizeof(struct sockaddr_in
);
573 from
.sin_addr
= laddr
;
575 bzero(&to
, sizeof(struct sockaddr_in
));
576 to
.sin_family
= AF_INET
;
577 to
.sin_len
= sizeof(struct sockaddr_in
);
578 to
.sin_addr
.s_addr
= ip
->ip_dst
.s_addr
;
580 if ((error
= in_pcbladdr(inp
, (struct sockaddr
*)&to
,
581 &laddr
, ipoa
.ipoa_boundif
, NULL
, 1)) != 0) {
582 printf("%s in_pcbladdr(%p) error %d\n",
583 __func__
, inp
, error
);
588 inp_update_necp_policy(inp
, (struct sockaddr
*)&from
,
589 (struct sockaddr
*)&to
, ipoa
.ipoa_boundif
);
590 inp
->inp_policyresult
.results
.qos_marking_gencount
= 0;
593 if (!necp_socket_is_allowed_to_send_recv_v4(inp
, 0, 0,
594 &ip
->ip_src
, &ip
->ip_dst
, NULL
, 0, &policy_id
, &route_rule_id
, &skip_policy_id
, &pass_flags
)) {
599 necp_mark_packet_from_socket(m
, inp
, policy_id
, route_rule_id
, skip_policy_id
, pass_flags
);
601 if (net_qos_policy_restricted
!= 0) {
602 struct ifnet
*rt_ifp
= NULL
;
604 if (inp
->inp_route
.ro_rt
!= NULL
) {
605 rt_ifp
= inp
->inp_route
.ro_rt
->rt_ifp
;
608 necp_socket_update_qos_marking(inp
, inp
->inp_route
.ro_rt
, route_rule_id
);
612 if ((so
->so_flags1
& SOF1_QOSMARKING_ALLOWED
)) {
613 ipoa
.ipoa_flags
|= IPOAF_QOSMARKING_ALLOWED
;
617 if (inp
->inp_sp
!= NULL
&& ipsec_setsocket(m
, so
) != 0) {
623 if (ROUTE_UNUSABLE(&inp
->inp_route
)) {
624 ROUTE_RELEASE(&inp
->inp_route
);
627 set_packet_service_class(m
, so
, sotc
, 0);
628 m
->m_pkthdr
.pkt_flowsrc
= FLOWSRC_INPCB
;
629 m
->m_pkthdr
.pkt_flowid
= inp
->inp_flowhash
;
630 m
->m_pkthdr
.pkt_flags
|= (PKTF_FLOW_ID
| PKTF_FLOW_LOCALSRC
|
632 m
->m_pkthdr
.pkt_proto
= inp
->inp_ip_p
;
633 m
->m_pkthdr
.tx_rawip_pid
= so
->last_pid
;
634 m
->m_pkthdr
.tx_rawip_e_pid
= so
->e_pid
;
635 if (so
->so_flags
& SOF_DELEGATED
) {
636 m
->m_pkthdr
.tx_rawip_e_pid
= so
->e_pid
;
638 m
->m_pkthdr
.tx_rawip_e_pid
= 0;
641 imo
= inp
->inp_moptions
;
646 * The domain lock is held across ip_output, so it is okay
647 * to pass the PCB cached route pointer directly to IP and
648 * the modules beneath it.
650 // TODO: PASS DOWN ROUTE RULE ID
651 error
= ip_output(m
, inp
->inp_options
, &inp
->inp_route
, flags
,
658 if (inp
->inp_route
.ro_rt
!= NULL
) {
659 struct rtentry
*rt
= inp
->inp_route
.ro_rt
;
662 if ((rt
->rt_flags
& (RTF_MULTICAST
| RTF_BROADCAST
)) ||
663 inp
->inp_socket
== NULL
||
665 /* Discard temporary route for cfil case */
668 !(inp
->inp_socket
->so_state
& SS_ISCONNECTED
)) {
669 rt
= NULL
; /* unusable */
672 * Always discard the cached route for unconnected
673 * socket or if it is a multicast route.
676 ROUTE_RELEASE(&inp
->inp_route
);
680 * If this is a connected socket and the destination
681 * route is unicast, update outif with that of the
682 * route interface used by IP.
685 (outif
= rt
->rt_ifp
) != inp
->inp_last_outifp
) {
686 inp
->inp_last_outifp
= outif
;
689 ROUTE_RELEASE(&inp
->inp_route
);
693 * If output interface was cellular/expensive/constrained, and this socket is
694 * denied access to it, generate an event.
696 if (error
!= 0 && (ipoa
.ipoa_retflags
& IPOARF_IFDENIED
) &&
697 (INP_NO_CELLULAR(inp
) || INP_NO_EXPENSIVE(inp
) || INP_NO_CONSTRAINED(inp
))) {
698 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_IFDENIED
));
706 * Raw IP socket option processing.
709 rip_ctloutput(struct socket
*so
, struct sockopt
*sopt
)
711 struct inpcb
*inp
= sotoinpcb(so
);
714 /* Allow <SOL_SOCKET,SO_FLUSH> at this level */
715 if (sopt
->sopt_level
!= IPPROTO_IP
&&
716 !(sopt
->sopt_level
== SOL_SOCKET
&& sopt
->sopt_name
== SO_FLUSH
)) {
722 switch (sopt
->sopt_dir
) {
724 switch (sopt
->sopt_name
) {
726 optval
= inp
->inp_flags
& INP_HDRINCL
;
727 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
731 optval
= inp
->inp_flags
& INP_STRIPHDR
;
732 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
737 case IP_DUMMYNET_GET
:
738 if (!DUMMYNET_LOADED
) {
741 if (DUMMYNET_LOADED
) {
742 error
= ip_dn_ctl_ptr(sopt
);
747 #endif /* DUMMYNET */
750 error
= ip_ctloutput(so
, sopt
);
756 switch (sopt
->sopt_name
) {
758 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
764 inp
->inp_flags
|= INP_HDRINCL
;
766 inp
->inp_flags
&= ~INP_HDRINCL
;
771 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
777 inp
->inp_flags
|= INP_STRIPHDR
;
779 inp
->inp_flags
&= ~INP_STRIPHDR
;
785 case IP_DUMMYNET_CONFIGURE
:
786 case IP_DUMMYNET_DEL
:
787 case IP_DUMMYNET_FLUSH
:
788 if (!DUMMYNET_LOADED
) {
791 if (DUMMYNET_LOADED
) {
792 error
= ip_dn_ctl_ptr(sopt
);
797 #endif /* DUMMYNET */
800 if ((error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
801 sizeof(optval
))) != 0) {
805 error
= inp_flush(inp
, optval
);
809 error
= ip_ctloutput(so
, sopt
);
819 * This function exists solely to receive the PRC_IFDOWN messages which
820 * are sent by if_down(). It looks for an ifaddr whose ifa_addr is sa,
821 * and calls in_ifadown() to remove all routes corresponding to that address.
822 * It also receives the PRC_IFUP messages from if_up() and reinstalls the
830 __unused
struct ifnet
*ifp
)
832 struct in_ifaddr
*ia
= NULL
;
833 struct ifnet
*iaifp
= NULL
;
839 lck_rw_lock_shared(in_ifaddr_rwlock
);
840 for (ia
= in_ifaddrhead
.tqh_first
; ia
;
841 ia
= ia
->ia_link
.tqe_next
) {
842 IFA_LOCK(&ia
->ia_ifa
);
843 if (ia
->ia_ifa
.ifa_addr
== sa
&&
844 (ia
->ia_flags
& IFA_ROUTE
)) {
846 IFA_ADDREF_LOCKED(&ia
->ia_ifa
);
847 IFA_UNLOCK(&ia
->ia_ifa
);
848 lck_rw_done(in_ifaddr_rwlock
);
849 lck_mtx_lock(rnh_lock
);
851 * in_ifscrub kills the interface route.
853 in_ifscrub(ia
->ia_ifp
, ia
, 1);
855 * in_ifadown gets rid of all the rest of
856 * the routes. This is not quite the right
857 * thing to do, but at least if we are running
858 * a routing process they will come back.
860 in_ifadown(&ia
->ia_ifa
, 1);
861 lck_mtx_unlock(rnh_lock
);
862 IFA_REMREF(&ia
->ia_ifa
);
865 IFA_UNLOCK(&ia
->ia_ifa
);
868 lck_rw_done(in_ifaddr_rwlock
);
873 lck_rw_lock_shared(in_ifaddr_rwlock
);
874 for (ia
= in_ifaddrhead
.tqh_first
; ia
;
875 ia
= ia
->ia_link
.tqe_next
) {
876 IFA_LOCK(&ia
->ia_ifa
);
877 if (ia
->ia_ifa
.ifa_addr
== sa
) {
881 IFA_UNLOCK(&ia
->ia_ifa
);
883 if (ia
== NULL
|| (ia
->ia_flags
& IFA_ROUTE
) ||
884 (ia
->ia_ifa
.ifa_debug
& IFD_NOTREADY
)) {
886 IFA_UNLOCK(&ia
->ia_ifa
);
888 lck_rw_done(in_ifaddr_rwlock
);
891 IFA_ADDREF_LOCKED(&ia
->ia_ifa
);
892 IFA_UNLOCK(&ia
->ia_ifa
);
893 lck_rw_done(in_ifaddr_rwlock
);
896 iaifp
= ia
->ia_ifa
.ifa_ifp
;
898 if ((iaifp
->if_flags
& IFF_LOOPBACK
)
899 || (iaifp
->if_flags
& IFF_POINTOPOINT
)) {
903 err
= rtinit(&ia
->ia_ifa
, RTM_ADD
, flags
);
905 IFA_LOCK_SPIN(&ia
->ia_ifa
);
906 ia
->ia_flags
|= IFA_ROUTE
;
907 IFA_UNLOCK(&ia
->ia_ifa
);
909 IFA_REMREF(&ia
->ia_ifa
);
914 u_int32_t rip_sendspace
= RIPSNDQ
;
915 u_int32_t rip_recvspace
= RIPRCVQ
;
917 SYSCTL_INT(_net_inet_raw
, OID_AUTO
, maxdgram
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
918 &rip_sendspace
, 0, "Maximum outgoing raw IP datagram size");
919 SYSCTL_INT(_net_inet_raw
, OID_AUTO
, recvspace
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
920 &rip_recvspace
, 0, "Maximum incoming raw IP datagram size");
921 SYSCTL_UINT(_net_inet_raw
, OID_AUTO
, pcbcount
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
922 &ripcbinfo
.ipi_count
, 0, "Number of active PCBs");
925 rip_attach(struct socket
*so
, int proto
, struct proc
*p
)
934 if ((so
->so_state
& SS_PRIV
) == 0) {
937 if (proto
> UINT8_MAX
) {
941 error
= soreserve(so
, rip_sendspace
, rip_recvspace
);
945 error
= in_pcballoc(so
, &ripcbinfo
, p
);
949 inp
= (struct inpcb
*)so
->so_pcb
;
950 inp
->inp_vflag
|= INP_IPV4
;
951 VERIFY(proto
<= UINT8_MAX
);
952 inp
->inp_ip_p
= (u_char
)proto
;
953 inp
->inp_ip_ttl
= (u_char
)ip_defttl
;
957 __private_extern__
int
958 rip_detach(struct socket
*so
)
970 __private_extern__
int
971 rip_abort(struct socket
*so
)
973 soisdisconnected(so
);
974 return rip_detach(so
);
977 __private_extern__
int
978 rip_disconnect(struct socket
*so
)
980 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
983 return rip_abort(so
);
986 __private_extern__
int
987 rip_bind(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
990 struct inpcb
*inp
= sotoinpcb(so
);
991 struct sockaddr_in sin
;
992 struct ifaddr
*ifa
= NULL
;
993 struct ifnet
*outif
= NULL
;
997 || (necp_socket_should_use_flow_divert(inp
))
1000 return inp
== NULL
? EINVAL
: EPROTOTYPE
;
1003 if (nam
->sa_len
!= sizeof(struct sockaddr_in
)) {
1007 /* Sanitized local copy for interface address searches */
1008 bzero(&sin
, sizeof(sin
));
1009 sin
.sin_family
= AF_INET
;
1010 sin
.sin_len
= sizeof(struct sockaddr_in
);
1011 sin
.sin_addr
.s_addr
= SIN(nam
)->sin_addr
.s_addr
;
1013 if (TAILQ_EMPTY(&ifnet_head
) ||
1014 (sin
.sin_family
!= AF_INET
&& sin
.sin_family
!= AF_IMPLINK
) ||
1015 (sin
.sin_addr
.s_addr
&& (ifa
= ifa_ifwithaddr(SA(&sin
))) == 0)) {
1016 return EADDRNOTAVAIL
;
1019 * Opportunistically determine the outbound
1020 * interface that may be used; this may not
1021 * hold true if we end up using a route
1022 * going over a different interface, e.g.
1023 * when sending to a local address. This
1024 * will get updated again after sending.
1027 outif
= ifa
->ifa_ifp
;
1031 inp
->inp_laddr
= sin
.sin_addr
;
1032 inp
->inp_last_outifp
= outif
;
1037 __private_extern__
int
1038 rip_connect(struct socket
*so
, struct sockaddr
*nam
, __unused
struct proc
*p
)
1040 struct inpcb
*inp
= sotoinpcb(so
);
1041 struct sockaddr_in
*addr
= (struct sockaddr_in
*)(void *)nam
;
1045 || (necp_socket_should_use_flow_divert(inp
))
1048 return inp
== NULL
? EINVAL
: EPROTOTYPE
;
1050 if (nam
->sa_len
!= sizeof(*addr
)) {
1053 if (TAILQ_EMPTY(&ifnet_head
)) {
1054 return EADDRNOTAVAIL
;
1056 if ((addr
->sin_family
!= AF_INET
) &&
1057 (addr
->sin_family
!= AF_IMPLINK
)) {
1058 return EAFNOSUPPORT
;
1061 if (!(so
->so_flags1
& SOF1_CONNECT_COUNTED
)) {
1062 so
->so_flags1
|= SOF1_CONNECT_COUNTED
;
1063 INC_ATOMIC_INT64_LIM(net_api_stats
.nas_socket_inet_dgram_connected
);
1066 inp
->inp_faddr
= addr
->sin_addr
;
1072 __private_extern__
int
1073 rip_shutdown(struct socket
*so
)
1079 __private_extern__
int
1080 rip_send(struct socket
*so
, int flags
, struct mbuf
*m
, struct sockaddr
*nam
,
1081 struct mbuf
*control
, struct proc
*p
)
1083 #pragma unused(flags, p)
1084 struct inpcb
*inp
= sotoinpcb(so
);
1085 u_int32_t dst
= INADDR_ANY
;
1090 || (necp_socket_should_use_flow_divert(inp
) && (error
= EPROTOTYPE
))
1102 dst
= ((struct sockaddr_in
*)(void *)nam
)->sin_addr
.s_addr
;
1104 return rip_output(m
, so
, dst
, control
);
1112 if (control
!= NULL
) {
1119 /* note: rip_unlock is called from different protos instead of the generic socket_unlock,
1120 * it will handle the socket dealloc on last reference
1123 rip_unlock(struct socket
*so
, int refcount
, void *debug
)
1126 struct inpcb
*inp
= sotoinpcb(so
);
1128 if (debug
== NULL
) {
1129 lr_saved
= __builtin_return_address(0);
1135 if (so
->so_usecount
<= 0) {
1136 panic("rip_unlock: bad refoucnt so=%p val=%x lrh= %s\n",
1137 so
, so
->so_usecount
, solockhistory_nr(so
));
1141 if (so
->so_usecount
== 0 && (inp
->inp_wantcnt
== WNT_STOPUSING
)) {
1142 /* cleanup after last reference */
1143 lck_mtx_unlock(so
->so_proto
->pr_domain
->dom_mtx
);
1144 lck_rw_lock_exclusive(ripcbinfo
.ipi_lock
);
1145 if (inp
->inp_state
!= INPCB_STATE_DEAD
) {
1146 if (SOCK_CHECK_DOM(so
, PF_INET6
)) {
1153 lck_rw_done(ripcbinfo
.ipi_lock
);
1157 so
->unlock_lr
[so
->next_unlock_lr
] = lr_saved
;
1158 so
->next_unlock_lr
= (so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
1159 lck_mtx_unlock(so
->so_proto
->pr_domain
->dom_mtx
);
1164 rip_pcblist SYSCTL_HANDLER_ARGS
1166 #pragma unused(oidp, arg1, arg2)
1168 struct inpcb
*inp
, **inp_list
;
1173 * The process of preparing the TCB list is too time-consuming and
1174 * resource-intensive to repeat twice on every request.
1176 lck_rw_lock_exclusive(ripcbinfo
.ipi_lock
);
1177 if (req
->oldptr
== USER_ADDR_NULL
) {
1178 n
= ripcbinfo
.ipi_count
;
1179 req
->oldidx
= 2 * (sizeof xig
)
1180 + (n
+ n
/ 8) * sizeof(struct xinpcb
);
1181 lck_rw_done(ripcbinfo
.ipi_lock
);
1185 if (req
->newptr
!= USER_ADDR_NULL
) {
1186 lck_rw_done(ripcbinfo
.ipi_lock
);
1191 * OK, now we're committed to doing something.
1193 gencnt
= ripcbinfo
.ipi_gencnt
;
1194 n
= ripcbinfo
.ipi_count
;
1196 bzero(&xig
, sizeof(xig
));
1197 xig
.xig_len
= sizeof xig
;
1199 xig
.xig_gen
= gencnt
;
1200 xig
.xig_sogen
= so_gencnt
;
1201 error
= SYSCTL_OUT(req
, &xig
, sizeof xig
);
1203 lck_rw_done(ripcbinfo
.ipi_lock
);
1207 * We are done if there is no pcb
1210 lck_rw_done(ripcbinfo
.ipi_lock
);
1214 inp_list
= _MALLOC(n
* sizeof *inp_list
, M_TEMP
, M_WAITOK
);
1215 if (inp_list
== 0) {
1216 lck_rw_done(ripcbinfo
.ipi_lock
);
1220 for (inp
= ripcbinfo
.ipi_listhead
->lh_first
, i
= 0; inp
&& i
< n
;
1221 inp
= inp
->inp_list
.le_next
) {
1222 if (inp
->inp_gencnt
<= gencnt
&& inp
->inp_state
!= INPCB_STATE_DEAD
) {
1223 inp_list
[i
++] = inp
;
1229 for (i
= 0; i
< n
; i
++) {
1231 if (inp
->inp_gencnt
<= gencnt
&& inp
->inp_state
!= INPCB_STATE_DEAD
) {
1234 bzero(&xi
, sizeof(xi
));
1235 xi
.xi_len
= sizeof xi
;
1236 /* XXX should avoid extra copy */
1237 inpcb_to_compat(inp
, &xi
.xi_inp
);
1238 if (inp
->inp_socket
) {
1239 sotoxsocket(inp
->inp_socket
, &xi
.xi_socket
);
1241 error
= SYSCTL_OUT(req
, &xi
, sizeof xi
);
1246 * Give the user an updated idea of our state.
1247 * If the generation differs from what we told
1248 * her before, she knows that something happened
1249 * while we were processing this request, and it
1250 * might be necessary to retry.
1252 bzero(&xig
, sizeof(xig
));
1253 xig
.xig_len
= sizeof xig
;
1254 xig
.xig_gen
= ripcbinfo
.ipi_gencnt
;
1255 xig
.xig_sogen
= so_gencnt
;
1256 xig
.xig_count
= ripcbinfo
.ipi_count
;
1257 error
= SYSCTL_OUT(req
, &xig
, sizeof xig
);
1259 FREE(inp_list
, M_TEMP
);
1260 lck_rw_done(ripcbinfo
.ipi_lock
);
1264 SYSCTL_PROC(_net_inet_raw
, OID_AUTO
/*XXX*/, pcblist
,
1265 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
1266 rip_pcblist
, "S,xinpcb", "List of active raw IP sockets");
1268 #if XNU_TARGET_OS_OSX
1271 rip_pcblist64 SYSCTL_HANDLER_ARGS
1273 #pragma unused(oidp, arg1, arg2)
1275 struct inpcb
*inp
, **inp_list
;
1280 * The process of preparing the TCB list is too time-consuming and
1281 * resource-intensive to repeat twice on every request.
1283 lck_rw_lock_exclusive(ripcbinfo
.ipi_lock
);
1284 if (req
->oldptr
== USER_ADDR_NULL
) {
1285 n
= ripcbinfo
.ipi_count
;
1286 req
->oldidx
= 2 * (sizeof xig
)
1287 + (n
+ n
/ 8) * sizeof(struct xinpcb64
);
1288 lck_rw_done(ripcbinfo
.ipi_lock
);
1292 if (req
->newptr
!= USER_ADDR_NULL
) {
1293 lck_rw_done(ripcbinfo
.ipi_lock
);
1298 * OK, now we're committed to doing something.
1300 gencnt
= ripcbinfo
.ipi_gencnt
;
1301 n
= ripcbinfo
.ipi_count
;
1303 bzero(&xig
, sizeof(xig
));
1304 xig
.xig_len
= sizeof xig
;
1306 xig
.xig_gen
= gencnt
;
1307 xig
.xig_sogen
= so_gencnt
;
1308 error
= SYSCTL_OUT(req
, &xig
, sizeof xig
);
1310 lck_rw_done(ripcbinfo
.ipi_lock
);
1314 * We are done if there is no pcb
1317 lck_rw_done(ripcbinfo
.ipi_lock
);
1321 inp_list
= _MALLOC(n
* sizeof *inp_list
, M_TEMP
, M_WAITOK
);
1322 if (inp_list
== 0) {
1323 lck_rw_done(ripcbinfo
.ipi_lock
);
1327 for (inp
= ripcbinfo
.ipi_listhead
->lh_first
, i
= 0; inp
&& i
< n
;
1328 inp
= inp
->inp_list
.le_next
) {
1329 if (inp
->inp_gencnt
<= gencnt
&& inp
->inp_state
!= INPCB_STATE_DEAD
) {
1330 inp_list
[i
++] = inp
;
1336 for (i
= 0; i
< n
; i
++) {
1338 if (inp
->inp_gencnt
<= gencnt
&& inp
->inp_state
!= INPCB_STATE_DEAD
) {
1341 bzero(&xi
, sizeof(xi
));
1342 xi
.xi_len
= sizeof xi
;
1343 inpcb_to_xinpcb64(inp
, &xi
);
1344 if (inp
->inp_socket
) {
1345 sotoxsocket64(inp
->inp_socket
, &xi
.xi_socket
);
1347 error
= SYSCTL_OUT(req
, &xi
, sizeof xi
);
1352 * Give the user an updated idea of our state.
1353 * If the generation differs from what we told
1354 * her before, she knows that something happened
1355 * while we were processing this request, and it
1356 * might be necessary to retry.
1358 bzero(&xig
, sizeof(xig
));
1359 xig
.xig_len
= sizeof xig
;
1360 xig
.xig_gen
= ripcbinfo
.ipi_gencnt
;
1361 xig
.xig_sogen
= so_gencnt
;
1362 xig
.xig_count
= ripcbinfo
.ipi_count
;
1363 error
= SYSCTL_OUT(req
, &xig
, sizeof xig
);
1365 FREE(inp_list
, M_TEMP
);
1366 lck_rw_done(ripcbinfo
.ipi_lock
);
1370 SYSCTL_PROC(_net_inet_raw
, OID_AUTO
, pcblist64
,
1371 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
1372 rip_pcblist64
, "S,xinpcb64", "List of active raw IP sockets");
1374 #endif /* XNU_TARGET_OS_OSX */
1378 rip_pcblist_n SYSCTL_HANDLER_ARGS
1380 #pragma unused(oidp, arg1, arg2)
1383 error
= get_pcblist_n(IPPROTO_IP
, req
, &ripcbinfo
);
1388 SYSCTL_PROC(_net_inet_raw
, OID_AUTO
, pcblist_n
,
1389 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
, 0, 0,
1390 rip_pcblist_n
, "S,xinpcb_n", "List of active raw IP sockets");
1392 struct pr_usrreqs rip_usrreqs
= {
1393 .pru_abort
= rip_abort
,
1394 .pru_attach
= rip_attach
,
1395 .pru_bind
= rip_bind
,
1396 .pru_connect
= rip_connect
,
1397 .pru_control
= in_control
,
1398 .pru_detach
= rip_detach
,
1399 .pru_disconnect
= rip_disconnect
,
1400 .pru_peeraddr
= in_getpeeraddr
,
1401 .pru_send
= rip_send
,
1402 .pru_shutdown
= rip_shutdown
,
1403 .pru_sockaddr
= in_getsockaddr
,
1404 .pru_sosend
= sosend
,
1405 .pru_soreceive
= soreceive
,
1407 /* DSEP Review Done pl-20051213-v02 @3253 */