2 * Copyright (c) 2010-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 #include <sys/types.h>
62 #include <sys/malloc.h>
63 #include <sys/socket.h>
64 #include <sys/socketvar.h>
65 #include <sys/protosw.h>
66 #include <sys/domain.h>
67 #include <sys/kernel.h>
68 #include <sys/sysctl.h>
69 #include <sys/dtrace.h>
70 #include <sys/kauth.h>
72 #include <net/route.h>
73 #include <net/if_var.h>
75 #include <netinet/in.h>
76 #include <netinet/in_pcb.h>
77 #include <netinet/ip_var.h>
79 #include <netinet/udp.h>
80 #include <netinet/udp_var.h>
82 #include <netinet/tcp.h>
83 #include <netinet/tcp_fsm.h>
84 #include <netinet/tcp_seq.h>
85 #include <netinet/tcp_timer.h>
86 #include <netinet/tcp_var.h>
89 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof(u_int64_t))
93 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
97 void sotoxsocket_n(struct socket
*, struct xsocket_n
*);
98 void sbtoxsockbuf_n(struct sockbuf
*, struct xsockbuf_n
*);
99 void sbtoxsockstat_n(struct socket
*, struct xsockstat_n
*);
100 void inpcb_to_xinpcb_n(struct inpcb
*, struct xinpcb_n
*);
101 void tcpcb_to_xtcpcb_n(struct tcpcb
*, struct xtcpcb_n
*);
103 __private_extern__
void
104 sotoxsocket_n(struct socket
*so
, struct xsocket_n
*xso
)
106 xso
->xso_len
= sizeof(struct xsocket_n
);
107 xso
->xso_kind
= XSO_SOCKET
;
110 xso
->xso_so
= (u_int64_t
)(uintptr_t)so
;
111 xso
->so_type
= so
->so_type
;
112 xso
->so_options
= so
->so_options
;
113 xso
->so_linger
= so
->so_linger
;
114 xso
->so_state
= so
->so_state
;
115 xso
->so_pcb
= (u_int64_t
)(uintptr_t)so
->so_pcb
;
117 xso
->xso_protocol
= so
->so_proto
->pr_protocol
;
118 xso
->xso_family
= so
->so_proto
->pr_domain
->dom_family
;
120 xso
->xso_protocol
= xso
->xso_family
= 0;
122 xso
->so_qlen
= so
->so_qlen
;
123 xso
->so_incqlen
= so
->so_incqlen
;
124 xso
->so_qlimit
= so
->so_qlimit
;
125 xso
->so_timeo
= so
->so_timeo
;
126 xso
->so_error
= so
->so_error
;
127 xso
->so_pgid
= so
->so_pgid
;
128 xso
->so_oobmark
= so
->so_oobmark
;
129 xso
->so_uid
= kauth_cred_getuid(so
->so_cred
);
133 __private_extern__
void
134 sbtoxsockbuf_n(struct sockbuf
*sb
, struct xsockbuf_n
*xsb
)
136 xsb
->xsb_len
= sizeof(struct xsockbuf_n
);
137 xsb
->xsb_kind
= (sb
->sb_flags
& SB_RECV
) ? XSO_RCVBUF
: XSO_SNDBUF
;
140 xsb
->sb_cc
= sb
->sb_cc
;
141 xsb
->sb_hiwat
= sb
->sb_hiwat
;
142 xsb
->sb_mbcnt
= sb
->sb_mbcnt
;
143 xsb
->sb_mbmax
= sb
->sb_mbmax
;
144 xsb
->sb_lowat
= sb
->sb_lowat
;
145 xsb
->sb_flags
= sb
->sb_flags
;
146 xsb
->sb_timeo
= (short)
147 (sb
->sb_timeo
.tv_sec
* hz
) + sb
->sb_timeo
.tv_usec
/ tick
;
148 if (xsb
->sb_timeo
== 0 && sb
->sb_timeo
.tv_usec
!= 0)
153 __private_extern__
void
154 sbtoxsockstat_n(struct socket
*so
, struct xsockstat_n
*xst
)
158 xst
->xst_len
= sizeof(struct xsockstat_n
);
159 xst
->xst_kind
= XSO_STATS
;
161 for (i
= 0; i
< SO_TC_STATS_MAX
; i
++) {
162 xst
->xst_tc_stats
[i
].rxpackets
= so
->so_tc_stats
[i
].rxpackets
;
163 xst
->xst_tc_stats
[i
].rxbytes
= so
->so_tc_stats
[i
].rxbytes
;
164 xst
->xst_tc_stats
[i
].txpackets
= so
->so_tc_stats
[i
].txpackets
;
165 xst
->xst_tc_stats
[i
].txbytes
= so
->so_tc_stats
[i
].txbytes
;
169 __private_extern__
void
170 inpcb_to_xinpcb_n(struct inpcb
*inp
, struct xinpcb_n
*xinp
)
172 xinp
->xi_len
= sizeof(struct xinpcb_n
);
173 xinp
->xi_kind
= XSO_INPCB
;
174 xinp
->xi_inpp
= (u_int64_t
)(uintptr_t)inp
;
175 xinp
->inp_fport
= inp
->inp_fport
;
176 xinp
->inp_lport
= inp
->inp_lport
;
177 xinp
->inp_ppcb
= (u_int64_t
)(uintptr_t)inp
->inp_ppcb
;
178 xinp
->inp_gencnt
= inp
->inp_gencnt
;
179 xinp
->inp_flags
= inp
->inp_flags
;
180 xinp
->inp_flow
= inp
->inp_flow
;
181 xinp
->inp_vflag
= inp
->inp_vflag
;
182 xinp
->inp_ip_ttl
= inp
->inp_ip_ttl
;
183 xinp
->inp_ip_p
= inp
->inp_ip_p
;
184 xinp
->inp_dependfaddr
.inp6_foreign
= inp
->inp_dependfaddr
.inp6_foreign
;
185 xinp
->inp_dependladdr
.inp6_local
= inp
->inp_dependladdr
.inp6_local
;
186 xinp
->inp_depend4
.inp4_ip_tos
= inp
->inp_depend4
.inp4_ip_tos
;
187 xinp
->inp_depend6
.inp6_hlim
= inp
->inp_depend6
.inp6_hlim
;
188 xinp
->inp_depend6
.inp6_cksum
= inp
->inp_depend6
.inp6_cksum
;
189 xinp
->inp_depend6
.inp6_ifindex
= inp
->inp_depend6
.inp6_ifindex
;
190 xinp
->inp_depend6
.inp6_hops
= inp
->inp_depend6
.inp6_hops
;
191 xinp
->inp_flowhash
= inp
->inp_flowhash
;
194 __private_extern__
void
195 tcpcb_to_xtcpcb_n(struct tcpcb
*tp
, struct xtcpcb_n
*xt
)
199 xt
->xt_len
= sizeof(struct xtcpcb_n
);
200 xt
->xt_kind
= XSO_TCPCB
;
202 xt
->t_segq
= (u_int32_t
)(uintptr_t)tp
->t_segq
.lh_first
;
203 xt
->t_dupacks
= tp
->t_dupacks
;
204 for (i
= 0; i
< TCPT_NTIMERS_EXT
; i
++)
205 xt
->t_timer
[i
] = tp
->t_timer
[i
];
206 xt
->t_state
= tp
->t_state
;
207 xt
->t_flags
= tp
->t_flags
;
208 xt
->t_force
= tp
->t_force
;
209 xt
->snd_una
= tp
->snd_una
;
210 xt
->snd_max
= tp
->snd_max
;
211 xt
->snd_nxt
= tp
->snd_nxt
;
212 xt
->snd_up
= tp
->snd_up
;
213 xt
->snd_wl1
= tp
->snd_wl1
;
214 xt
->snd_wl2
= tp
->snd_wl2
;
217 xt
->rcv_nxt
= tp
->rcv_nxt
;
218 xt
->rcv_adv
= tp
->rcv_adv
;
219 xt
->rcv_wnd
= tp
->rcv_wnd
;
220 xt
->rcv_up
= tp
->rcv_up
;
221 xt
->snd_wnd
= tp
->snd_wnd
;
222 xt
->snd_cwnd
= tp
->snd_cwnd
;
223 xt
->snd_ssthresh
= tp
->snd_ssthresh
;
224 xt
->t_maxopd
= tp
->t_maxopd
;
225 xt
->t_rcvtime
= tp
->t_rcvtime
;
226 xt
->t_starttime
= tp
->t_starttime
;
227 xt
->t_rtttime
= tp
->t_rtttime
;
228 xt
->t_rtseq
= tp
->t_rtseq
;
229 xt
->t_rxtcur
= tp
->t_rxtcur
;
230 xt
->t_maxseg
= tp
->t_maxseg
;
231 xt
->t_srtt
= tp
->t_srtt
;
232 xt
->t_rttvar
= tp
->t_rttvar
;
233 xt
->t_rxtshift
= tp
->t_rxtshift
;
234 xt
->t_rttmin
= tp
->t_rttmin
;
235 xt
->t_rttupdated
= tp
->t_rttupdated
;
236 xt
->max_sndwnd
= tp
->max_sndwnd
;
237 xt
->t_softerror
= tp
->t_softerror
;
238 xt
->t_oobflags
= tp
->t_oobflags
;
239 xt
->t_iobc
= tp
->t_iobc
;
240 xt
->snd_scale
= tp
->snd_scale
;
241 xt
->rcv_scale
= tp
->rcv_scale
;
242 xt
->request_r_scale
= tp
->request_r_scale
;
243 xt
->requested_s_scale
= tp
->requested_s_scale
;
244 xt
->ts_recent
= tp
->ts_recent
;
245 xt
->ts_recent_age
= tp
->ts_recent_age
;
246 xt
->last_ack_sent
= tp
->last_ack_sent
;
247 xt
->cc_send
= tp
->cc_send
;
248 xt
->cc_recv
= tp
->cc_recv
;
249 xt
->snd_recover
= tp
->snd_recover
;
250 xt
->snd_cwnd_prev
= tp
->snd_cwnd_prev
;
251 xt
->snd_ssthresh_prev
= tp
->snd_ssthresh_prev
;
252 xt
->t_badrxtwin
= tp
->t_badrxtwin
;
255 __private_extern__
int
256 get_pcblist_n(short proto
, struct sysctl_req
*req
, struct inpcbinfo
*pcbinfo
)
260 struct inpcb
*inp
, **inp_list
= NULL
;
264 size_t item_size
= ROUNDUP64(sizeof(struct xinpcb_n
)) +
265 ROUNDUP64(sizeof(struct xsocket_n
)) +
266 2 * ROUNDUP64(sizeof(struct xsockbuf_n
)) +
267 ROUNDUP64(sizeof(struct xsockstat_n
));
269 if (proto
== IPPROTO_TCP
)
270 item_size
+= ROUNDUP64(sizeof(struct xtcpcb_n
));
273 * The process of preparing the PCB list is too time-consuming and
274 * resource-intensive to repeat twice on every request.
276 lck_rw_lock_exclusive(pcbinfo
->mtx
);
277 if (req
->oldptr
== USER_ADDR_NULL
) {
278 n
= pcbinfo
->ipi_count
;
279 req
->oldidx
= 2 * (sizeof xig
)
280 + (n
+ n
/8) * item_size
;
284 if (req
->newptr
!= USER_ADDR_NULL
) {
290 * OK, now we're committed to doing something.
292 gencnt
= pcbinfo
->ipi_gencnt
;
293 n
= pcbinfo
->ipi_count
;
295 bzero(&xig
, sizeof(xig
));
296 xig
.xig_len
= sizeof xig
;
298 xig
.xig_gen
= gencnt
;
299 xig
.xig_sogen
= so_gencnt
;
300 error
= SYSCTL_OUT(req
, &xig
, sizeof xig
);
305 * We are done if there is no pcb
311 buf
= _MALLOC(item_size
, M_TEMP
, M_WAITOK
);
317 inp_list
= _MALLOC(n
* sizeof *inp_list
, M_TEMP
, M_WAITOK
);
323 for (inp
= pcbinfo
->listhead
->lh_first
, i
= 0; inp
&& i
< n
;
324 inp
= inp
->inp_list
.le_next
) {
325 if (inp
->inp_gencnt
<= gencnt
&& inp
->inp_state
!= INPCB_STATE_DEAD
)
331 for (i
= 0; i
< n
; i
++) {
333 if (inp
->inp_gencnt
<= gencnt
&& inp
->inp_state
!= INPCB_STATE_DEAD
) {
334 struct xinpcb_n
*xi
= (struct xinpcb_n
*)buf
;
335 struct xsocket_n
*xso
= (struct xsocket_n
*)ADVANCE64(xi
, sizeof(*xi
));
336 struct xsockbuf_n
*xsbrcv
= (struct xsockbuf_n
*)ADVANCE64(xso
, sizeof(*xso
));
337 struct xsockbuf_n
*xsbsnd
= (struct xsockbuf_n
*)ADVANCE64(xsbrcv
, sizeof(*xsbrcv
));
338 struct xsockstat_n
*xsostats
= (struct xsockstat_n
*)ADVANCE64(xsbsnd
, sizeof(*xsbsnd
));
340 bzero(buf
, item_size
);
342 inpcb_to_xinpcb_n(inp
, xi
);
343 sotoxsocket_n(inp
->inp_socket
, xso
);
344 sbtoxsockbuf_n(inp
->inp_socket
? &inp
->inp_socket
->so_rcv
: NULL
, xsbrcv
);
345 sbtoxsockbuf_n(inp
->inp_socket
? &inp
->inp_socket
->so_snd
: NULL
, xsbsnd
);
346 sbtoxsockstat_n(inp
->inp_socket
, xsostats
);
347 if (proto
== IPPROTO_TCP
) {
348 struct xtcpcb_n
*xt
= (struct xtcpcb_n
*)ADVANCE64(xsostats
, sizeof(*xsostats
));
351 * inp->inp_ppcb, can only be NULL on
352 * an initialization race window.
355 if (inp
->inp_ppcb
== NULL
)
358 tcpcb_to_xtcpcb_n((struct tcpcb
*)inp
->inp_ppcb
, xt
);
360 error
= SYSCTL_OUT(req
, buf
, item_size
);
365 * Give the user an updated idea of our state.
366 * If the generation differs from what we told
367 * her before, she knows that something happened
368 * while we were processing this request, and it
369 * might be necessary to retry.
371 bzero(&xig
, sizeof(xig
));
372 xig
.xig_len
= sizeof xig
;
373 xig
.xig_gen
= pcbinfo
->ipi_gencnt
;
374 xig
.xig_sogen
= so_gencnt
;
375 xig
.xig_count
= pcbinfo
->ipi_count
;
376 error
= SYSCTL_OUT(req
, &xig
, sizeof xig
);
379 lck_rw_done(pcbinfo
->mtx
);
381 FREE(inp_list
, M_TEMP
);
387 __private_extern__
void
388 inpcb_get_ports_used(unsigned int ifindex
, uint8_t *bitfield
, struct inpcbinfo
*pcbinfo
)
390 lck_rw_lock_shared(pcbinfo
->mtx
);
393 inp_gen_t gencnt
= pcbinfo
->ipi_gencnt
;
394 for (inp
= LIST_FIRST(pcbinfo
->listhead
); inp
; inp
= LIST_NEXT(inp
, inp_list
)) {
395 if (inp
->inp_gencnt
<= gencnt
&& inp
->inp_state
!= INPCB_STATE_DEAD
&&
396 (ifindex
== 0 || inp
->inp_last_outifp
== NULL
|| ifindex
== inp
->inp_last_outifp
->if_index
)) {
397 uint16_t port
= ntohs(inp
->inp_lport
);
398 bitfield
[port
/ 8] |= 1 << (port
& 0x7);
402 lck_rw_done(pcbinfo
->mtx
);
405 __private_extern__
uint32_t
406 inpcb_count_opportunistic(unsigned int ifindex
, struct inpcbinfo
*pcbinfo
,
409 uint32_t opportunistic
= 0;
411 lck_rw_lock_shared(pcbinfo
->mtx
);
414 inp_gen_t gencnt
= pcbinfo
->ipi_gencnt
;
415 for (inp
= LIST_FIRST(pcbinfo
->listhead
);
416 inp
; inp
= LIST_NEXT(inp
, inp_list
)) {
417 if (inp
->inp_gencnt
<= gencnt
&&
418 inp
->inp_state
!= INPCB_STATE_DEAD
&&
419 inp
->inp_socket
!= NULL
&&
420 so_get_opportunistic(inp
->inp_socket
) &&
421 inp
->inp_last_outifp
!= NULL
&&
422 ifindex
== inp
->inp_last_outifp
->if_index
) {
424 struct socket
*so
= inp
->inp_socket
;
425 if ((flags
& INPCB_OPPORTUNISTIC_SETCMD
) &&
426 (so
->so_state
& SS_ISCONNECTED
)) {
428 if (flags
& INPCB_OPPORTUNISTIC_THROTTLEON
) {
429 so
->so_flags
|= SOF_SUSPENDED
;
431 (SO_FILT_HINT_LOCKED
|
432 SO_FILT_HINT_SUSPEND
));
434 so
->so_flags
&= ~(SOF_SUSPENDED
);
436 (SO_FILT_HINT_LOCKED
|
437 SO_FILT_HINT_RESUME
));
439 SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] "
440 "%s\n", so
->last_pid
, so
, INP_SOCKAF(so
),
442 (so
->so_flags
& SOF_SUSPENDED
) ?
443 "SUSPENDED" : "RESUMED"));
444 socket_unlock(so
, 1);
449 lck_rw_done(pcbinfo
->mtx
);
451 return (opportunistic
);