]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket2.c
cc3c37a5226dd5678ca7457636ba5bf48624c0b4
[apple/xnu.git] / bsd / kern / uipc_socket2.c
1 /*
2 * Copyright (c) 1998-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/domain.h>
73 #include <sys/kernel.h>
74 #include <sys/proc_internal.h>
75 #include <sys/kauth.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/mcache.h>
79 #include <sys/protosw.h>
80 #include <sys/stat.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
83 #include <sys/signalvar.h>
84 #include <sys/sysctl.h>
85 #include <sys/syslog.h>
86 #include <sys/unpcb.h>
87 #include <sys/ev.h>
88 #include <kern/locks.h>
89 #include <net/route.h>
90 #include <net/content_filter.h>
91 #include <netinet/in.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/tcp_var.h>
94 #include <sys/kdebug.h>
95 #include <libkern/OSAtomic.h>
96
97 #if CONFIG_MACF
98 #include <security/mac_framework.h>
99 #endif
100
101 #include <mach/vm_param.h>
102
103 #if MPTCP
104 #include <netinet/mptcp_var.h>
105 #endif
106
107 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
108 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
109
110 SYSCTL_DECL(_kern_ipc);
111
112 __private_extern__ u_int32_t net_io_policy_throttle_best_effort = 0;
113 SYSCTL_INT(_kern_ipc, OID_AUTO, throttle_best_effort,
114 CTLFLAG_RW | CTLFLAG_LOCKED, &net_io_policy_throttle_best_effort, 0, "");
115
116 static inline void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
117 static struct socket *sonewconn_internal(struct socket *, int);
118 static int sbappendcontrol_internal(struct sockbuf *, struct mbuf *,
119 struct mbuf *);
120 static void soevent_ifdenied(struct socket *);
121
122 /*
123 * Primitive routines for operating on sockets and socket buffers
124 */
125 static int soqlimitcompat = 1;
126 static int soqlencomp = 0;
127
128 /*
129 * Based on the number of mbuf clusters configured, high_sb_max and sb_max can
130 * get scaled up or down to suit that memory configuration. high_sb_max is a
131 * higher limit on sb_max that is checked when sb_max gets set through sysctl.
132 */
133
134 u_int32_t sb_max = SB_MAX; /* XXX should be static */
135 u_int32_t high_sb_max = SB_MAX;
136
137 static u_int32_t sb_efficiency = 8; /* parameter for sbreserve() */
138 int32_t total_sbmb_cnt __attribute__((aligned(8))) = 0;
139 int32_t total_sbmb_cnt_floor __attribute__((aligned(8))) = 0;
140 int32_t total_sbmb_cnt_peak __attribute__((aligned(8))) = 0;
141 int64_t sbmb_limreached __attribute__((aligned(8))) = 0;
142
143 u_int32_t net_io_policy_log = 0; /* log socket policy changes */
144 #if CONFIG_PROC_UUID_POLICY
145 u_int32_t net_io_policy_uuid = 1; /* enable UUID socket policy */
146 #endif /* CONFIG_PROC_UUID_POLICY */
147
148 /*
149 * Procedures to manipulate state flags of socket
150 * and do appropriate wakeups. Normal sequence from the
151 * active (originating) side is that soisconnecting() is
152 * called during processing of connect() call,
153 * resulting in an eventual call to soisconnected() if/when the
154 * connection is established. When the connection is torn down
155 * soisdisconnecting() is called during processing of disconnect() call,
156 * and soisdisconnected() is called when the connection to the peer
157 * is totally severed. The semantics of these routines are such that
158 * connectionless protocols can call soisconnected() and soisdisconnected()
159 * only, bypassing the in-progress calls when setting up a ``connection''
160 * takes no time.
161 *
162 * From the passive side, a socket is created with
163 * two queues of sockets: so_incomp for connections in progress
164 * and so_comp for connections already made and awaiting user acceptance.
165 * As a protocol is preparing incoming connections, it creates a socket
166 * structure queued on so_incomp by calling sonewconn(). When the connection
167 * is established, soisconnected() is called, and transfers the
168 * socket structure to so_comp, making it available to accept().
169 *
170 * If a socket is closed with sockets on either
171 * so_incomp or so_comp, these sockets are dropped.
172 *
173 * If higher level protocols are implemented in
174 * the kernel, the wakeups done here will sometimes
175 * cause software-interrupt process scheduling.
176 */
177 void
178 soisconnecting(struct socket *so)
179 {
180 so->so_state &= ~(SS_ISCONNECTED | SS_ISDISCONNECTING);
181 so->so_state |= SS_ISCONNECTING;
182
183 sflt_notify(so, sock_evt_connecting, NULL);
184 }
185
186 void
187 soisconnected(struct socket *so)
188 {
189 /*
190 * If socket is subject to filter and is pending initial verdict,
191 * delay marking socket as connected and do not present the connected
192 * socket to user just yet.
193 */
194 if (cfil_sock_connected_pending_verdict(so)) {
195 return;
196 }
197
198 so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING | SS_ISCONFIRMING);
199 so->so_state |= SS_ISCONNECTED;
200
201 soreserve_preconnect(so, 0);
202
203 sflt_notify(so, sock_evt_connected, NULL);
204
205 if (so->so_head != NULL && (so->so_state & SS_INCOMP)) {
206 struct socket *head = so->so_head;
207 int locked = 0;
208
209 /*
210 * Enforce lock order when the protocol has per socket locks
211 */
212 if (head->so_proto->pr_getlock != NULL) {
213 socket_lock(head, 1);
214 so_acquire_accept_list(head, so);
215 locked = 1;
216 }
217 if (so->so_head == head && (so->so_state & SS_INCOMP)) {
218 so->so_state &= ~SS_INCOMP;
219 so->so_state |= SS_COMP;
220 TAILQ_REMOVE(&head->so_incomp, so, so_list);
221 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
222 head->so_incqlen--;
223
224 /*
225 * We have to release the accept list in
226 * case a socket callback calls sock_accept()
227 */
228 if (locked != 0) {
229 so_release_accept_list(head);
230 socket_unlock(so, 0);
231 }
232 postevent(head, 0, EV_RCONN);
233 sorwakeup(head);
234 wakeup_one((caddr_t)&head->so_timeo);
235
236 if (locked != 0) {
237 socket_unlock(head, 1);
238 socket_lock(so, 0);
239 }
240 } else if (locked != 0) {
241 so_release_accept_list(head);
242 socket_unlock(head, 1);
243 }
244 } else {
245 postevent(so, 0, EV_WCONN);
246 wakeup((caddr_t)&so->so_timeo);
247 sorwakeup(so);
248 sowwakeup(so);
249 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNECTED |
250 SO_FILT_HINT_CONNINFO_UPDATED);
251 }
252 }
253
254 boolean_t
255 socanwrite(struct socket *so)
256 {
257 return (so->so_state & SS_ISCONNECTED) ||
258 !(so->so_proto->pr_flags & PR_CONNREQUIRED) ||
259 (so->so_flags1 & SOF1_PRECONNECT_DATA);
260 }
261
262 void
263 soisdisconnecting(struct socket *so)
264 {
265 so->so_state &= ~SS_ISCONNECTING;
266 so->so_state |= (SS_ISDISCONNECTING | SS_CANTRCVMORE | SS_CANTSENDMORE);
267 soevent(so, SO_FILT_HINT_LOCKED);
268 sflt_notify(so, sock_evt_disconnecting, NULL);
269 wakeup((caddr_t)&so->so_timeo);
270 sowwakeup(so);
271 sorwakeup(so);
272 }
273
274 void
275 soisdisconnected(struct socket *so)
276 {
277 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
278 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
279 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
280 SO_FILT_HINT_CONNINFO_UPDATED);
281 sflt_notify(so, sock_evt_disconnected, NULL);
282 wakeup((caddr_t)&so->so_timeo);
283 sowwakeup(so);
284 sorwakeup(so);
285
286 #if CONTENT_FILTER
287 /* Notify content filters as soon as we cannot send/receive data */
288 cfil_sock_notify_shutdown(so, SHUT_RDWR);
289 #endif /* CONTENT_FILTER */
290 }
291
292 /*
293 * This function will issue a wakeup like soisdisconnected but it will not
294 * notify the socket filters. This will avoid unlocking the socket
295 * in the midst of closing it.
296 */
297 void
298 sodisconnectwakeup(struct socket *so)
299 {
300 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
301 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
302 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
303 SO_FILT_HINT_CONNINFO_UPDATED);
304 wakeup((caddr_t)&so->so_timeo);
305 sowwakeup(so);
306 sorwakeup(so);
307
308 #if CONTENT_FILTER
309 /* Notify content filters as soon as we cannot send/receive data */
310 cfil_sock_notify_shutdown(so, SHUT_RDWR);
311 #endif /* CONTENT_FILTER */
312 }
313
314 /*
315 * When an attempt at a new connection is noted on a socket
316 * which accepts connections, sonewconn is called. If the
317 * connection is possible (subject to space constraints, etc.)
318 * then we allocate a new structure, propoerly linked into the
319 * data structure of the original socket, and return this.
320 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
321 */
322 static struct socket *
323 sonewconn_internal(struct socket *head, int connstatus)
324 {
325 int so_qlen, error = 0;
326 struct socket *so;
327 lck_mtx_t *mutex_held;
328
329 if (head->so_proto->pr_getlock != NULL) {
330 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
331 } else {
332 mutex_held = head->so_proto->pr_domain->dom_mtx;
333 }
334 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
335
336 if (!soqlencomp) {
337 /*
338 * This is the default case; so_qlen represents the
339 * sum of both incomplete and completed queues.
340 */
341 so_qlen = head->so_qlen;
342 } else {
343 /*
344 * When kern.ipc.soqlencomp is set to 1, so_qlen
345 * represents only the completed queue. Since we
346 * cannot let the incomplete queue goes unbounded
347 * (in case of SYN flood), we cap the incomplete
348 * queue length to at most somaxconn, and use that
349 * as so_qlen so that we fail immediately below.
350 */
351 so_qlen = head->so_qlen - head->so_incqlen;
352 if (head->so_incqlen > somaxconn) {
353 so_qlen = somaxconn;
354 }
355 }
356
357 if (so_qlen >=
358 (soqlimitcompat ? head->so_qlimit : (3 * head->so_qlimit / 2))) {
359 return (struct socket *)0;
360 }
361 so = soalloc(1, SOCK_DOM(head), head->so_type);
362 if (so == NULL) {
363 return (struct socket *)0;
364 }
365 /* check if head was closed during the soalloc */
366 if (head->so_proto == NULL) {
367 sodealloc(so);
368 return (struct socket *)0;
369 }
370
371 so->so_type = head->so_type;
372 so->so_options = head->so_options & ~SO_ACCEPTCONN;
373 so->so_linger = head->so_linger;
374 so->so_state = head->so_state | SS_NOFDREF;
375 so->so_proto = head->so_proto;
376 so->so_timeo = head->so_timeo;
377 so->so_pgid = head->so_pgid;
378 kauth_cred_ref(head->so_cred);
379 so->so_cred = head->so_cred;
380 so->last_pid = head->last_pid;
381 so->last_upid = head->last_upid;
382 memcpy(so->last_uuid, head->last_uuid, sizeof(so->last_uuid));
383 if (head->so_flags & SOF_DELEGATED) {
384 so->e_pid = head->e_pid;
385 so->e_upid = head->e_upid;
386 memcpy(so->e_uuid, head->e_uuid, sizeof(so->e_uuid));
387 }
388 /* inherit socket options stored in so_flags */
389 so->so_flags = head->so_flags &
390 (SOF_NOSIGPIPE | SOF_NOADDRAVAIL | SOF_REUSESHAREUID |
391 SOF_NOTIFYCONFLICT | SOF_BINDRANDOMPORT | SOF_NPX_SETOPTSHUT |
392 SOF_NODEFUNCT | SOF_PRIVILEGED_TRAFFIC_CLASS | SOF_NOTSENT_LOWAT |
393 SOF_USELRO | SOF_DELEGATED);
394 so->so_flags1 |= SOF1_INBOUND;
395 so->so_usecount = 1;
396 so->next_lock_lr = 0;
397 so->next_unlock_lr = 0;
398
399 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
400 so->so_rcv.sb_so = so->so_snd.sb_so = so;
401 TAILQ_INIT(&so->so_evlist);
402
403 #if CONFIG_MACF_SOCKET
404 mac_socket_label_associate_accept(head, so);
405 #endif
406
407 /* inherit traffic management properties of listener */
408 so->so_flags1 |=
409 head->so_flags1 & (SOF1_TRAFFIC_MGT_SO_BACKGROUND | SOF1_TC_NET_SERV_TYPE |
410 SOF1_QOSMARKING_ALLOWED | SOF1_QOSMARKING_POLICY_OVERRIDE);
411 so->so_background_thread = head->so_background_thread;
412 so->so_traffic_class = head->so_traffic_class;
413 so->so_netsvctype = head->so_netsvctype;
414
415 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
416 sodealloc(so);
417 return (struct socket *)0;
418 }
419 so->so_rcv.sb_flags |= (head->so_rcv.sb_flags & SB_USRSIZE);
420 so->so_snd.sb_flags |= (head->so_snd.sb_flags & SB_USRSIZE);
421
422 /*
423 * Must be done with head unlocked to avoid deadlock
424 * for protocol with per socket mutexes.
425 */
426 if (head->so_proto->pr_unlock) {
427 socket_unlock(head, 0);
428 }
429 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) ||
430 error) {
431 sodealloc(so);
432 if (head->so_proto->pr_unlock) {
433 socket_lock(head, 0);
434 }
435 return (struct socket *)0;
436 }
437 if (head->so_proto->pr_unlock) {
438 socket_lock(head, 0);
439 /*
440 * Radar 7385998 Recheck that the head is still accepting
441 * to avoid race condition when head is getting closed.
442 */
443 if ((head->so_options & SO_ACCEPTCONN) == 0) {
444 so->so_state &= ~SS_NOFDREF;
445 soclose(so);
446 return (struct socket *)0;
447 }
448 }
449
450 if (so->so_proto->pr_copy_last_owner != NULL) {
451 (*so->so_proto->pr_copy_last_owner)(so, head);
452 }
453 atomic_add_32(&so->so_proto->pr_domain->dom_refs, 1);
454
455 /* Insert in head appropriate lists */
456 so_acquire_accept_list(head, NULL);
457
458 so->so_head = head;
459
460 /*
461 * Since this socket is going to be inserted into the incomp
462 * queue, it can be picked up by another thread in
463 * tcp_dropdropablreq to get dropped before it is setup..
464 * To prevent this race, set in-progress flag which can be
465 * cleared later
466 */
467 so->so_flags |= SOF_INCOMP_INPROGRESS;
468
469 if (connstatus) {
470 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
471 so->so_state |= SS_COMP;
472 } else {
473 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
474 so->so_state |= SS_INCOMP;
475 head->so_incqlen++;
476 }
477 head->so_qlen++;
478
479 so_release_accept_list(head);
480
481 /* Attach socket filters for this protocol */
482 sflt_initsock(so);
483
484 if (connstatus) {
485 so->so_state |= connstatus;
486 sorwakeup(head);
487 wakeup((caddr_t)&head->so_timeo);
488 }
489 return so;
490 }
491
492
493 struct socket *
494 sonewconn(struct socket *head, int connstatus, const struct sockaddr *from)
495 {
496 int error = sflt_connectin(head, from);
497 if (error) {
498 return NULL;
499 }
500
501 return sonewconn_internal(head, connstatus);
502 }
503
504 /*
505 * Socantsendmore indicates that no more data will be sent on the
506 * socket; it would normally be applied to a socket when the user
507 * informs the system that no more data is to be sent, by the protocol
508 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
509 * will be received, and will normally be applied to the socket by a
510 * protocol when it detects that the peer will send no more data.
511 * Data queued for reading in the socket may yet be read.
512 */
513
514 void
515 socantsendmore(struct socket *so)
516 {
517 so->so_state |= SS_CANTSENDMORE;
518 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTSENDMORE);
519 sflt_notify(so, sock_evt_cantsendmore, NULL);
520 sowwakeup(so);
521 }
522
523 void
524 socantrcvmore(struct socket *so)
525 {
526 so->so_state |= SS_CANTRCVMORE;
527 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE);
528 sflt_notify(so, sock_evt_cantrecvmore, NULL);
529 sorwakeup(so);
530 }
531
532 /*
533 * Wait for data to arrive at/drain from a socket buffer.
534 */
535 int
536 sbwait(struct sockbuf *sb)
537 {
538 boolean_t nointr = (sb->sb_flags & SB_NOINTR);
539 void *lr_saved = __builtin_return_address(0);
540 struct socket *so = sb->sb_so;
541 lck_mtx_t *mutex_held;
542 struct timespec ts;
543 int error = 0;
544
545 if (so == NULL) {
546 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
547 __func__, sb, sb->sb_flags, lr_saved);
548 /* NOTREACHED */
549 } else if (so->so_usecount < 1) {
550 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
551 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
552 so->so_usecount, lr_saved, solockhistory_nr(so));
553 /* NOTREACHED */
554 }
555
556 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
557 error = EBADF;
558 if (so->so_flags & SOF_DEFUNCT) {
559 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
560 "(%d)\n", __func__, proc_selfpid(),
561 proc_best_name(current_proc()),
562 (uint64_t)VM_KERNEL_ADDRPERM(so),
563 SOCK_DOM(so), SOCK_TYPE(so), error);
564 }
565 return error;
566 }
567
568 if (so->so_proto->pr_getlock != NULL) {
569 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
570 } else {
571 mutex_held = so->so_proto->pr_domain->dom_mtx;
572 }
573
574 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
575
576 ts.tv_sec = sb->sb_timeo.tv_sec;
577 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
578
579 sb->sb_waiters++;
580 VERIFY(sb->sb_waiters != 0);
581
582 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
583 nointr ? PSOCK : PSOCK | PCATCH,
584 nointr ? "sbwait_nointr" : "sbwait", &ts);
585
586 VERIFY(sb->sb_waiters != 0);
587 sb->sb_waiters--;
588
589 if (so->so_usecount < 1) {
590 panic("%s: 2 sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
591 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
592 so->so_usecount, lr_saved, solockhistory_nr(so));
593 /* NOTREACHED */
594 }
595
596 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
597 error = EBADF;
598 if (so->so_flags & SOF_DEFUNCT) {
599 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
600 "(%d)\n", __func__, proc_selfpid(),
601 proc_best_name(current_proc()),
602 (uint64_t)VM_KERNEL_ADDRPERM(so),
603 SOCK_DOM(so), SOCK_TYPE(so), error);
604 }
605 }
606
607 return error;
608 }
609
610 void
611 sbwakeup(struct sockbuf *sb)
612 {
613 if (sb->sb_waiters > 0) {
614 wakeup((caddr_t)&sb->sb_cc);
615 }
616 }
617
618 /*
619 * Wakeup processes waiting on a socket buffer.
620 * Do asynchronous notification via SIGIO
621 * if the socket has the SS_ASYNC flag set.
622 */
623 void
624 sowakeup(struct socket *so, struct sockbuf *sb, struct socket *so2)
625 {
626 if (so->so_flags & SOF_DEFUNCT) {
627 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] si 0x%x, "
628 "fl 0x%x [%s]\n", __func__, proc_selfpid(),
629 proc_best_name(current_proc()),
630 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
631 SOCK_TYPE(so), (uint32_t)sb->sb_sel.si_flags, sb->sb_flags,
632 (sb->sb_flags & SB_RECV) ? "rcv" : "snd");
633 }
634
635 sb->sb_flags &= ~SB_SEL;
636 selwakeup(&sb->sb_sel);
637 sbwakeup(sb);
638 if (so->so_state & SS_ASYNC) {
639 if (so->so_pgid < 0) {
640 gsignal(-so->so_pgid, SIGIO);
641 } else if (so->so_pgid > 0) {
642 proc_signal(so->so_pgid, SIGIO);
643 }
644 }
645 if (sb->sb_flags & SB_KNOTE) {
646 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
647 }
648 if (sb->sb_flags & SB_UPCALL) {
649 void (*sb_upcall)(struct socket *, void *, int);
650 caddr_t sb_upcallarg;
651 int lock = !(sb->sb_flags & SB_UPCALL_LOCK);
652
653 sb_upcall = sb->sb_upcall;
654 sb_upcallarg = sb->sb_upcallarg;
655 /* Let close know that we're about to do an upcall */
656 so->so_upcallusecount++;
657
658 if (lock) {
659 if (so2) {
660 struct unpcb *unp = sotounpcb(so2);
661 unp->unp_flags |= UNP_DONTDISCONNECT;
662 unp->rw_thrcount++;
663
664 socket_unlock(so2, 0);
665 }
666 socket_unlock(so, 0);
667 }
668 (*sb_upcall)(so, sb_upcallarg, M_DONTWAIT);
669 if (lock) {
670 if (so2 && so > so2) {
671 struct unpcb *unp;
672 socket_lock(so2, 0);
673
674 unp = sotounpcb(so2);
675 unp->rw_thrcount--;
676 if (unp->rw_thrcount == 0) {
677 unp->unp_flags &= ~UNP_DONTDISCONNECT;
678 wakeup(unp);
679 }
680 }
681
682 socket_lock(so, 0);
683
684 if (so2 && so < so2) {
685 struct unpcb *unp;
686 socket_lock(so2, 0);
687
688 unp = sotounpcb(so2);
689 unp->rw_thrcount--;
690 if (unp->rw_thrcount == 0) {
691 unp->unp_flags &= ~UNP_DONTDISCONNECT;
692 wakeup(unp);
693 }
694 }
695 }
696
697 so->so_upcallusecount--;
698 /* Tell close that it's safe to proceed */
699 if ((so->so_flags & SOF_CLOSEWAIT) &&
700 so->so_upcallusecount == 0) {
701 wakeup((caddr_t)&so->so_upcallusecount);
702 }
703 }
704 #if CONTENT_FILTER
705 /*
706 * Trap disconnection events for content filters
707 */
708 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
709 if ((sb->sb_flags & SB_RECV)) {
710 if (so->so_state & (SS_CANTRCVMORE)) {
711 cfil_sock_notify_shutdown(so, SHUT_RD);
712 }
713 } else {
714 if (so->so_state & (SS_CANTSENDMORE)) {
715 cfil_sock_notify_shutdown(so, SHUT_WR);
716 }
717 }
718 }
719 #endif /* CONTENT_FILTER */
720 }
721
722 /*
723 * Socket buffer (struct sockbuf) utility routines.
724 *
725 * Each socket contains two socket buffers: one for sending data and
726 * one for receiving data. Each buffer contains a queue of mbufs,
727 * information about the number of mbufs and amount of data in the
728 * queue, and other fields allowing select() statements and notification
729 * on data availability to be implemented.
730 *
731 * Data stored in a socket buffer is maintained as a list of records.
732 * Each record is a list of mbufs chained together with the m_next
733 * field. Records are chained together with the m_nextpkt field. The upper
734 * level routine soreceive() expects the following conventions to be
735 * observed when placing information in the receive buffer:
736 *
737 * 1. If the protocol requires each message be preceded by the sender's
738 * name, then a record containing that name must be present before
739 * any associated data (mbuf's must be of type MT_SONAME).
740 * 2. If the protocol supports the exchange of ``access rights'' (really
741 * just additional data associated with the message), and there are
742 * ``rights'' to be received, then a record containing this data
743 * should be present (mbuf's must be of type MT_RIGHTS).
744 * 3. If a name or rights record exists, then it must be followed by
745 * a data record, perhaps of zero length.
746 *
747 * Before using a new socket structure it is first necessary to reserve
748 * buffer space to the socket, by calling sbreserve(). This should commit
749 * some of the available buffer space in the system buffer pool for the
750 * socket (currently, it does nothing but enforce limits). The space
751 * should be released by calling sbrelease() when the socket is destroyed.
752 */
753
754 /*
755 * Returns: 0 Success
756 * ENOBUFS
757 */
758 int
759 soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc)
760 {
761 /*
762 * We do not want to fail the creation of a socket
763 * when kern.ipc.maxsockbuf is less than the
764 * default socket buffer socket size of the protocol
765 * so force the buffer sizes to be at most the
766 * limit enforced by sbreserve()
767 */
768 uint64_t maxcc = (uint64_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
769 if (sndcc > maxcc) {
770 sndcc = maxcc;
771 }
772 if (rcvcc > maxcc) {
773 rcvcc = maxcc;
774 }
775 if (sbreserve(&so->so_snd, sndcc) == 0) {
776 goto bad;
777 } else {
778 so->so_snd.sb_idealsize = sndcc;
779 }
780
781 if (sbreserve(&so->so_rcv, rcvcc) == 0) {
782 goto bad2;
783 } else {
784 so->so_rcv.sb_idealsize = rcvcc;
785 }
786
787 if (so->so_rcv.sb_lowat == 0) {
788 so->so_rcv.sb_lowat = 1;
789 }
790 if (so->so_snd.sb_lowat == 0) {
791 so->so_snd.sb_lowat = MCLBYTES;
792 }
793 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) {
794 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
795 }
796 return 0;
797 bad2:
798 so->so_snd.sb_flags &= ~SB_SEL;
799 selthreadclear(&so->so_snd.sb_sel);
800 sbrelease(&so->so_snd);
801 bad:
802 return ENOBUFS;
803 }
804
805 void
806 soreserve_preconnect(struct socket *so, unsigned int pre_cc)
807 {
808 /* As of now, same bytes for both preconnect read and write */
809 so->so_snd.sb_preconn_hiwat = pre_cc;
810 so->so_rcv.sb_preconn_hiwat = pre_cc;
811 }
812
813 /*
814 * Allot mbufs to a sockbuf.
815 * Attempt to scale mbmax so that mbcnt doesn't become limiting
816 * if buffering efficiency is near the normal case.
817 */
818 int
819 sbreserve(struct sockbuf *sb, u_int32_t cc)
820 {
821 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES)) {
822 return 0;
823 }
824 sb->sb_hiwat = cc;
825 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
826 if (sb->sb_lowat > sb->sb_hiwat) {
827 sb->sb_lowat = sb->sb_hiwat;
828 }
829 return 1;
830 }
831
832 /*
833 * Free mbufs held by a socket, and reserved mbuf space.
834 */
835 /* WARNING needs to do selthreadclear() before calling this */
836 void
837 sbrelease(struct sockbuf *sb)
838 {
839 sbflush(sb);
840 sb->sb_hiwat = 0;
841 sb->sb_mbmax = 0;
842 }
843
844 /*
845 * Routines to add and remove
846 * data from an mbuf queue.
847 *
848 * The routines sbappend() or sbappendrecord() are normally called to
849 * append new mbufs to a socket buffer, after checking that adequate
850 * space is available, comparing the function sbspace() with the amount
851 * of data to be added. sbappendrecord() differs from sbappend() in
852 * that data supplied is treated as the beginning of a new record.
853 * To place a sender's address, optional access rights, and data in a
854 * socket receive buffer, sbappendaddr() should be used. To place
855 * access rights and data in a socket receive buffer, sbappendrights()
856 * should be used. In either case, the new data begins a new record.
857 * Note that unlike sbappend() and sbappendrecord(), these routines check
858 * for the caller that there will be enough space to store the data.
859 * Each fails if there is not enough space, or if it cannot find mbufs
860 * to store additional information in.
861 *
862 * Reliable protocols may use the socket send buffer to hold data
863 * awaiting acknowledgement. Data is normally copied from a socket
864 * send buffer in a protocol with m_copy for output to a peer,
865 * and then removing the data from the socket buffer with sbdrop()
866 * or sbdroprecord() when the data is acknowledged by the peer.
867 */
868
869 /*
870 * Append mbuf chain m to the last record in the
871 * socket buffer sb. The additional space associated
872 * the mbuf chain is recorded in sb. Empty mbufs are
873 * discarded and mbufs are compacted where possible.
874 */
875 int
876 sbappend(struct sockbuf *sb, struct mbuf *m)
877 {
878 struct socket *so = sb->sb_so;
879
880 if (m == NULL || (sb->sb_flags & SB_DROP)) {
881 if (m != NULL) {
882 m_freem(m);
883 }
884 return 0;
885 }
886
887 SBLASTRECORDCHK(sb, "sbappend 1");
888
889 if (sb->sb_lastrecord != NULL && (sb->sb_mbtail->m_flags & M_EOR)) {
890 return sbappendrecord(sb, m);
891 }
892
893 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
894 int error = sflt_data_in(so, NULL, &m, NULL, 0);
895 SBLASTRECORDCHK(sb, "sbappend 2");
896
897 #if CONTENT_FILTER
898 if (error == 0) {
899 error = cfil_sock_data_in(so, NULL, m, NULL, 0);
900 }
901 #endif /* CONTENT_FILTER */
902
903 if (error != 0) {
904 if (error != EJUSTRETURN) {
905 m_freem(m);
906 }
907 return 0;
908 }
909 } else if (m) {
910 m->m_flags &= ~M_SKIPCFIL;
911 }
912
913 /* If this is the first record, it's also the last record */
914 if (sb->sb_lastrecord == NULL) {
915 sb->sb_lastrecord = m;
916 }
917
918 sbcompress(sb, m, sb->sb_mbtail);
919 SBLASTRECORDCHK(sb, "sbappend 3");
920 return 1;
921 }
922
923 /*
924 * Similar to sbappend, except that this is optimized for stream sockets.
925 */
926 int
927 sbappendstream(struct sockbuf *sb, struct mbuf *m)
928 {
929 struct socket *so = sb->sb_so;
930
931 if (m == NULL || (sb->sb_flags & SB_DROP)) {
932 if (m != NULL) {
933 m_freem(m);
934 }
935 return 0;
936 }
937
938 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
939 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
940 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
941 /* NOTREACHED */
942 }
943
944 SBLASTMBUFCHK(sb, __func__);
945
946 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
947 int error = sflt_data_in(so, NULL, &m, NULL, 0);
948 SBLASTRECORDCHK(sb, "sbappendstream 1");
949
950 #if CONTENT_FILTER
951 if (error == 0) {
952 error = cfil_sock_data_in(so, NULL, m, NULL, 0);
953 }
954 #endif /* CONTENT_FILTER */
955
956 if (error != 0) {
957 if (error != EJUSTRETURN) {
958 m_freem(m);
959 }
960 return 0;
961 }
962 } else if (m) {
963 m->m_flags &= ~M_SKIPCFIL;
964 }
965
966 sbcompress(sb, m, sb->sb_mbtail);
967 sb->sb_lastrecord = sb->sb_mb;
968 SBLASTRECORDCHK(sb, "sbappendstream 2");
969 return 1;
970 }
971
972 #ifdef SOCKBUF_DEBUG
973 void
974 sbcheck(struct sockbuf *sb)
975 {
976 struct mbuf *m;
977 struct mbuf *n = 0;
978 u_int32_t len = 0, mbcnt = 0;
979 lck_mtx_t *mutex_held;
980
981 if (sb->sb_so->so_proto->pr_getlock != NULL) {
982 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
983 } else {
984 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
985 }
986
987 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
988
989 if (sbchecking == 0) {
990 return;
991 }
992
993 for (m = sb->sb_mb; m; m = n) {
994 n = m->m_nextpkt;
995 for (; m; m = m->m_next) {
996 len += m->m_len;
997 mbcnt += MSIZE;
998 /* XXX pretty sure this is bogus */
999 if (m->m_flags & M_EXT) {
1000 mbcnt += m->m_ext.ext_size;
1001 }
1002 }
1003 }
1004 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
1005 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
1006 mbcnt, sb->sb_mbcnt);
1007 }
1008 }
1009 #endif
1010
1011 void
1012 sblastrecordchk(struct sockbuf *sb, const char *where)
1013 {
1014 struct mbuf *m = sb->sb_mb;
1015
1016 while (m && m->m_nextpkt) {
1017 m = m->m_nextpkt;
1018 }
1019
1020 if (m != sb->sb_lastrecord) {
1021 printf("sblastrecordchk: mb 0x%llx lastrecord 0x%llx "
1022 "last 0x%llx\n",
1023 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
1024 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_lastrecord),
1025 (uint64_t)VM_KERNEL_ADDRPERM(m));
1026 printf("packet chain:\n");
1027 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
1028 printf("\t0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(m));
1029 }
1030 panic("sblastrecordchk from %s", where);
1031 }
1032 }
1033
1034 void
1035 sblastmbufchk(struct sockbuf *sb, const char *where)
1036 {
1037 struct mbuf *m = sb->sb_mb;
1038 struct mbuf *n;
1039
1040 while (m && m->m_nextpkt) {
1041 m = m->m_nextpkt;
1042 }
1043
1044 while (m && m->m_next) {
1045 m = m->m_next;
1046 }
1047
1048 if (m != sb->sb_mbtail) {
1049 printf("sblastmbufchk: mb 0x%llx mbtail 0x%llx last 0x%llx\n",
1050 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
1051 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mbtail),
1052 (uint64_t)VM_KERNEL_ADDRPERM(m));
1053 printf("packet tree:\n");
1054 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
1055 printf("\t");
1056 for (n = m; n != NULL; n = n->m_next) {
1057 printf("0x%llx ",
1058 (uint64_t)VM_KERNEL_ADDRPERM(n));
1059 }
1060 printf("\n");
1061 }
1062 panic("sblastmbufchk from %s", where);
1063 }
1064 }
1065
1066 /*
1067 * Similar to sbappend, except the mbuf chain begins a new record.
1068 */
1069 int
1070 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1071 {
1072 struct mbuf *m;
1073 int space = 0;
1074
1075 if (m0 == NULL || (sb->sb_flags & SB_DROP)) {
1076 if (m0 != NULL) {
1077 m_freem(m0);
1078 }
1079 return 0;
1080 }
1081
1082 for (m = m0; m != NULL; m = m->m_next) {
1083 space += m->m_len;
1084 }
1085
1086 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
1087 m_freem(m0);
1088 return 0;
1089 }
1090
1091 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1092 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
1093 sock_data_filt_flag_record);
1094
1095 #if CONTENT_FILTER
1096 if (error == 0) {
1097 error = cfil_sock_data_in(sb->sb_so, NULL, m0, NULL, 0);
1098 }
1099 #endif /* CONTENT_FILTER */
1100
1101 if (error != 0) {
1102 SBLASTRECORDCHK(sb, "sbappendrecord 1");
1103 if (error != EJUSTRETURN) {
1104 m_freem(m0);
1105 }
1106 return 0;
1107 }
1108 } else if (m0) {
1109 m0->m_flags &= ~M_SKIPCFIL;
1110 }
1111
1112 /*
1113 * Note this permits zero length records.
1114 */
1115 sballoc(sb, m0);
1116 SBLASTRECORDCHK(sb, "sbappendrecord 2");
1117 if (sb->sb_lastrecord != NULL) {
1118 sb->sb_lastrecord->m_nextpkt = m0;
1119 } else {
1120 sb->sb_mb = m0;
1121 }
1122 sb->sb_lastrecord = m0;
1123 sb->sb_mbtail = m0;
1124
1125 m = m0->m_next;
1126 m0->m_next = 0;
1127 if (m && (m0->m_flags & M_EOR)) {
1128 m0->m_flags &= ~M_EOR;
1129 m->m_flags |= M_EOR;
1130 }
1131 sbcompress(sb, m, m0);
1132 SBLASTRECORDCHK(sb, "sbappendrecord 3");
1133 return 1;
1134 }
1135
1136 /*
1137 * Concatenate address (optional), control (optional) and data into one
1138 * single mbuf chain. If sockbuf *sb is passed in, space check will be
1139 * performed.
1140 *
1141 * Returns: mbuf chain pointer if succeeded, NULL if failed
1142 */
1143 struct mbuf *
1144 sbconcat_mbufs(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, struct mbuf *control)
1145 {
1146 struct mbuf *m = NULL, *n = NULL;
1147 int space = 0;
1148
1149 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
1150 panic("sbconcat_mbufs");
1151 }
1152
1153 if (m0) {
1154 space += m0->m_pkthdr.len;
1155 }
1156 for (n = control; n; n = n->m_next) {
1157 space += n->m_len;
1158 if (n->m_next == 0) { /* keep pointer to last control buf */
1159 break;
1160 }
1161 }
1162
1163 if (asa != NULL) {
1164 if (asa->sa_len > MLEN) {
1165 return NULL;
1166 }
1167 space += asa->sa_len;
1168 }
1169
1170 if (sb != NULL && space > sbspace(sb)) {
1171 return NULL;
1172 }
1173
1174 if (n) {
1175 n->m_next = m0; /* concatenate data to control */
1176 } else {
1177 control = m0;
1178 }
1179
1180 if (asa != NULL) {
1181 MGET(m, M_DONTWAIT, MT_SONAME);
1182 if (m == 0) {
1183 if (n) {
1184 /* unchain control and data if necessary */
1185 n->m_next = NULL;
1186 }
1187 return NULL;
1188 }
1189 m->m_len = asa->sa_len;
1190 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
1191
1192 m->m_next = control;
1193 } else {
1194 m = control;
1195 }
1196
1197 return m;
1198 }
1199
1200 /*
1201 * Queue mbuf chain to the receive queue of a socket.
1202 * Parameter space is the total len of the mbuf chain.
1203 * If passed in, sockbuf space will be checked.
1204 *
1205 * Returns: 0 Invalid mbuf chain
1206 * 1 Success
1207 */
1208 int
1209 sbappendchain(struct sockbuf *sb, struct mbuf *m, int space)
1210 {
1211 struct mbuf *n, *nlast;
1212
1213 if (m == NULL) {
1214 return 0;
1215 }
1216
1217 if (space != 0 && space > sbspace(sb)) {
1218 return 0;
1219 }
1220
1221 for (n = m; n->m_next != NULL; n = n->m_next) {
1222 sballoc(sb, n);
1223 }
1224 sballoc(sb, n);
1225 nlast = n;
1226
1227 if (sb->sb_lastrecord != NULL) {
1228 sb->sb_lastrecord->m_nextpkt = m;
1229 } else {
1230 sb->sb_mb = m;
1231 }
1232 sb->sb_lastrecord = m;
1233 sb->sb_mbtail = nlast;
1234
1235 SBLASTMBUFCHK(sb, __func__);
1236 SBLASTRECORDCHK(sb, "sbappendadddr 2");
1237
1238 postevent(0, sb, EV_RWBYTES);
1239 return 1;
1240 }
1241
1242 /*
1243 * Returns: 0 Error: No space/out of mbufs/etc.
1244 * 1 Success
1245 *
1246 * Imputed: (*error_out) errno for error
1247 * ENOBUFS
1248 * sflt_data_in:??? [whatever a filter author chooses]
1249 */
1250 int
1251 sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
1252 struct mbuf *control, int *error_out)
1253 {
1254 int result = 0;
1255 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1256 struct mbuf *mbuf_chain = NULL;
1257
1258 if (error_out) {
1259 *error_out = 0;
1260 }
1261
1262 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
1263 panic("sbappendaddrorfree");
1264 }
1265
1266 if (sb->sb_flags & SB_DROP) {
1267 if (m0 != NULL) {
1268 m_freem(m0);
1269 }
1270 if (control != NULL && !sb_unix) {
1271 m_freem(control);
1272 }
1273 if (error_out != NULL) {
1274 *error_out = EINVAL;
1275 }
1276 return 0;
1277 }
1278
1279 /* Call socket data in filters */
1280 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1281 int error;
1282 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0);
1283 SBLASTRECORDCHK(sb, __func__);
1284
1285 #if CONTENT_FILTER
1286 if (error == 0) {
1287 error = cfil_sock_data_in(sb->sb_so, asa, m0, control,
1288 0);
1289 }
1290 #endif /* CONTENT_FILTER */
1291
1292 if (error) {
1293 if (error != EJUSTRETURN) {
1294 if (m0) {
1295 m_freem(m0);
1296 }
1297 if (control != NULL && !sb_unix) {
1298 m_freem(control);
1299 }
1300 if (error_out) {
1301 *error_out = error;
1302 }
1303 }
1304 return 0;
1305 }
1306 } else if (m0) {
1307 m0->m_flags &= ~M_SKIPCFIL;
1308 }
1309
1310 mbuf_chain = sbconcat_mbufs(sb, asa, m0, control);
1311 SBLASTRECORDCHK(sb, "sbappendadddr 1");
1312 result = sbappendchain(sb, mbuf_chain, 0);
1313 if (result == 0) {
1314 if (m0) {
1315 m_freem(m0);
1316 }
1317 if (control != NULL && !sb_unix) {
1318 m_freem(control);
1319 }
1320 if (error_out) {
1321 *error_out = ENOBUFS;
1322 }
1323 }
1324
1325 return result;
1326 }
1327
1328 inline boolean_t
1329 is_cmsg_valid(struct mbuf *control, struct cmsghdr *cmsg)
1330 {
1331 if (cmsg == NULL) {
1332 return FALSE;
1333 }
1334
1335 if (cmsg->cmsg_len < sizeof(struct cmsghdr)) {
1336 return FALSE;
1337 }
1338
1339 if ((uint8_t *)control->m_data >= (uint8_t *)cmsg + cmsg->cmsg_len) {
1340 return FALSE;
1341 }
1342
1343 if ((uint8_t *)control->m_data + control->m_len <
1344 (uint8_t *)cmsg + cmsg->cmsg_len) {
1345 return FALSE;
1346 }
1347
1348 return TRUE;
1349 }
1350
1351 static int
1352 sbappendcontrol_internal(struct sockbuf *sb, struct mbuf *m0,
1353 struct mbuf *control)
1354 {
1355 struct mbuf *m, *mlast, *n;
1356 int space = 0;
1357
1358 if (control == 0) {
1359 panic("sbappendcontrol");
1360 }
1361
1362 for (m = control;; m = m->m_next) {
1363 space += m->m_len;
1364 if (m->m_next == 0) {
1365 break;
1366 }
1367 }
1368 n = m; /* save pointer to last control buffer */
1369 for (m = m0; m; m = m->m_next) {
1370 space += m->m_len;
1371 }
1372 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
1373 return 0;
1374 }
1375 n->m_next = m0; /* concatenate data to control */
1376 SBLASTRECORDCHK(sb, "sbappendcontrol 1");
1377
1378 for (m = control; m->m_next != NULL; m = m->m_next) {
1379 sballoc(sb, m);
1380 }
1381 sballoc(sb, m);
1382 mlast = m;
1383
1384 if (sb->sb_lastrecord != NULL) {
1385 sb->sb_lastrecord->m_nextpkt = control;
1386 } else {
1387 sb->sb_mb = control;
1388 }
1389 sb->sb_lastrecord = control;
1390 sb->sb_mbtail = mlast;
1391
1392 SBLASTMBUFCHK(sb, __func__);
1393 SBLASTRECORDCHK(sb, "sbappendcontrol 2");
1394
1395 postevent(0, sb, EV_RWBYTES);
1396 return 1;
1397 }
1398
1399 int
1400 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
1401 int *error_out)
1402 {
1403 int result = 0;
1404 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1405
1406 if (error_out) {
1407 *error_out = 0;
1408 }
1409
1410 if (sb->sb_flags & SB_DROP) {
1411 if (m0 != NULL) {
1412 m_freem(m0);
1413 }
1414 if (control != NULL && !sb_unix) {
1415 m_freem(control);
1416 }
1417 if (error_out != NULL) {
1418 *error_out = EINVAL;
1419 }
1420 return 0;
1421 }
1422
1423 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1424 int error;
1425
1426 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0);
1427 SBLASTRECORDCHK(sb, __func__);
1428
1429 #if CONTENT_FILTER
1430 if (error == 0) {
1431 error = cfil_sock_data_in(sb->sb_so, NULL, m0, control,
1432 0);
1433 }
1434 #endif /* CONTENT_FILTER */
1435
1436 if (error) {
1437 if (error != EJUSTRETURN) {
1438 if (m0) {
1439 m_freem(m0);
1440 }
1441 if (control != NULL && !sb_unix) {
1442 m_freem(control);
1443 }
1444 if (error_out) {
1445 *error_out = error;
1446 }
1447 }
1448 return 0;
1449 }
1450 } else if (m0) {
1451 m0->m_flags &= ~M_SKIPCFIL;
1452 }
1453
1454 result = sbappendcontrol_internal(sb, m0, control);
1455 if (result == 0) {
1456 if (m0) {
1457 m_freem(m0);
1458 }
1459 if (control != NULL && !sb_unix) {
1460 m_freem(control);
1461 }
1462 if (error_out) {
1463 *error_out = ENOBUFS;
1464 }
1465 }
1466
1467 return result;
1468 }
1469
1470 /*
1471 * Append a contiguous TCP data blob with TCP sequence number as control data
1472 * as a new msg to the receive socket buffer.
1473 */
1474 int
1475 sbappendmsgstream_rcv(struct sockbuf *sb, struct mbuf *m, uint32_t seqnum,
1476 int unordered)
1477 {
1478 struct mbuf *m_eor = NULL;
1479 u_int32_t data_len = 0;
1480 int ret = 0;
1481 struct socket *so = sb->sb_so;
1482
1483 if (m == NULL) {
1484 return 0;
1485 }
1486
1487 VERIFY((m->m_flags & M_PKTHDR) && m_pktlen(m) > 0);
1488 VERIFY(so->so_msg_state != NULL);
1489 VERIFY(sb->sb_flags & SB_RECV);
1490
1491 /* Keep the TCP sequence number in the mbuf pkthdr */
1492 m->m_pkthdr.msg_seq = seqnum;
1493
1494 /* find last mbuf and set M_EOR */
1495 for (m_eor = m;; m_eor = m_eor->m_next) {
1496 /*
1497 * If the msg is unordered, we need to account for
1498 * these bytes in receive socket buffer size. Otherwise,
1499 * the receive window advertised will shrink because
1500 * of the additional unordered bytes added to the
1501 * receive buffer.
1502 */
1503 if (unordered) {
1504 m_eor->m_flags |= M_UNORDERED_DATA;
1505 data_len += m_eor->m_len;
1506 so->so_msg_state->msg_uno_bytes += m_eor->m_len;
1507 } else {
1508 m_eor->m_flags &= ~M_UNORDERED_DATA;
1509 }
1510 if (m_eor->m_next == NULL) {
1511 break;
1512 }
1513 }
1514
1515 /* set EOR flag at end of byte blob */
1516 m_eor->m_flags |= M_EOR;
1517
1518 /* expand the receive socket buffer to allow unordered data */
1519 if (unordered && !sbreserve(sb, sb->sb_hiwat + data_len)) {
1520 /*
1521 * Could not allocate memory for unordered data, it
1522 * means this packet will have to be delivered in order
1523 */
1524 printf("%s: could not reserve space for unordered data\n",
1525 __func__);
1526 }
1527
1528 if (!unordered && (sb->sb_mbtail != NULL) &&
1529 !(sb->sb_mbtail->m_flags & M_UNORDERED_DATA)) {
1530 sb->sb_mbtail->m_flags &= ~M_EOR;
1531 sbcompress(sb, m, sb->sb_mbtail);
1532 ret = 1;
1533 } else {
1534 ret = sbappendrecord(sb, m);
1535 }
1536 VERIFY(sb->sb_mbtail->m_flags & M_EOR);
1537 return ret;
1538 }
1539
1540 /*
1541 * TCP streams have message based out of order delivery support, or have
1542 * Multipath TCP support, or are regular TCP sockets
1543 */
1544 int
1545 sbappendstream_rcvdemux(struct socket *so, struct mbuf *m, uint32_t seqnum,
1546 int unordered)
1547 {
1548 int ret = 0;
1549
1550 if ((m != NULL) &&
1551 m_pktlen(m) <= 0 &&
1552 !((so->so_flags & SOF_MP_SUBFLOW) &&
1553 (m->m_flags & M_PKTHDR) &&
1554 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN))) {
1555 m_freem(m);
1556 return ret;
1557 }
1558
1559 if (so->so_flags & SOF_ENABLE_MSGS) {
1560 ret = sbappendmsgstream_rcv(&so->so_rcv, m, seqnum, unordered);
1561 }
1562 #if MPTCP
1563 else if (so->so_flags & SOF_MP_SUBFLOW) {
1564 ret = sbappendmptcpstream_rcv(&so->so_rcv, m);
1565 }
1566 #endif /* MPTCP */
1567 else {
1568 ret = sbappendstream(&so->so_rcv, m);
1569 }
1570 return ret;
1571 }
1572
1573 #if MPTCP
1574 int
1575 sbappendmptcpstream_rcv(struct sockbuf *sb, struct mbuf *m)
1576 {
1577 struct socket *so = sb->sb_so;
1578
1579 VERIFY(m == NULL || (m->m_flags & M_PKTHDR));
1580 /* SB_NOCOMPRESS must be set prevent loss of M_PKTHDR data */
1581 VERIFY((sb->sb_flags & (SB_RECV | SB_NOCOMPRESS)) ==
1582 (SB_RECV | SB_NOCOMPRESS));
1583
1584 if (m == NULL || m_pktlen(m) == 0 || (sb->sb_flags & SB_DROP) ||
1585 (so->so_state & SS_CANTRCVMORE)) {
1586 if (m && (m->m_flags & M_PKTHDR) &&
1587 m_pktlen(m) == 0 &&
1588 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) {
1589 mptcp_input(tptomptp(sototcpcb(so))->mpt_mpte, m);
1590 return 1;
1591 } else if (m != NULL) {
1592 m_freem(m);
1593 }
1594 return 0;
1595 }
1596 /* the socket is not closed, so SOF_MP_SUBFLOW must be set */
1597 VERIFY(so->so_flags & SOF_MP_SUBFLOW);
1598
1599 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
1600 panic("%s: nexpkt %p || mb %p != lastrecord %p\n", __func__,
1601 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
1602 /* NOTREACHED */
1603 }
1604
1605 SBLASTMBUFCHK(sb, __func__);
1606
1607 /* No filter support (SB_RECV) on mptcp subflow sockets */
1608
1609 sbcompress(sb, m, sb->sb_mbtail);
1610 sb->sb_lastrecord = sb->sb_mb;
1611 SBLASTRECORDCHK(sb, __func__);
1612 return 1;
1613 }
1614 #endif /* MPTCP */
1615
1616 /*
1617 * Append message to send socket buffer based on priority.
1618 */
1619 int
1620 sbappendmsg_snd(struct sockbuf *sb, struct mbuf *m)
1621 {
1622 struct socket *so = sb->sb_so;
1623 struct msg_priq *priq;
1624 int set_eor = 0;
1625
1626 VERIFY(so->so_msg_state != NULL);
1627
1628 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
1629 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
1630 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
1631 }
1632
1633 SBLASTMBUFCHK(sb, __func__);
1634
1635 if (m == NULL || (sb->sb_flags & SB_DROP) || so->so_msg_state == NULL) {
1636 if (m != NULL) {
1637 m_freem(m);
1638 }
1639 return 0;
1640 }
1641
1642 priq = &so->so_msg_state->msg_priq[m->m_pkthdr.msg_pri];
1643
1644 /* note if we need to propogate M_EOR to the last mbuf */
1645 if (m->m_flags & M_EOR) {
1646 set_eor = 1;
1647
1648 /* Reset M_EOR from the first mbuf */
1649 m->m_flags &= ~(M_EOR);
1650 }
1651
1652 if (priq->msgq_head == NULL) {
1653 VERIFY(priq->msgq_tail == NULL && priq->msgq_lastmsg == NULL);
1654 priq->msgq_head = priq->msgq_lastmsg = m;
1655 } else {
1656 VERIFY(priq->msgq_tail->m_next == NULL);
1657
1658 /* Check if the last message has M_EOR flag set */
1659 if (priq->msgq_tail->m_flags & M_EOR) {
1660 /* Insert as a new message */
1661 priq->msgq_lastmsg->m_nextpkt = m;
1662
1663 /* move the lastmsg pointer */
1664 priq->msgq_lastmsg = m;
1665 } else {
1666 /* Append to the existing message */
1667 priq->msgq_tail->m_next = m;
1668 }
1669 }
1670
1671 /* Update accounting and the queue tail pointer */
1672
1673 while (m->m_next != NULL) {
1674 sballoc(sb, m);
1675 priq->msgq_bytes += m->m_len;
1676 m = m->m_next;
1677 }
1678 sballoc(sb, m);
1679 priq->msgq_bytes += m->m_len;
1680
1681 if (set_eor) {
1682 m->m_flags |= M_EOR;
1683
1684 /*
1685 * Since the user space can not write a new msg
1686 * without completing the previous one, we can
1687 * reset this flag to start sending again.
1688 */
1689 priq->msgq_flags &= ~(MSGQ_MSG_NOTDONE);
1690 }
1691
1692 priq->msgq_tail = m;
1693
1694 SBLASTRECORDCHK(sb, "sbappendstream 2");
1695 postevent(0, sb, EV_RWBYTES);
1696 return 1;
1697 }
1698
1699 /*
1700 * Pull data from priority queues to the serial snd queue
1701 * right before sending.
1702 */
1703 void
1704 sbpull_unordered_data(struct socket *so, int32_t off, int32_t len)
1705 {
1706 int32_t topull, i;
1707 struct msg_priq *priq = NULL;
1708
1709 VERIFY(so->so_msg_state != NULL);
1710
1711 topull = (off + len) - so->so_msg_state->msg_serial_bytes;
1712
1713 i = MSG_PRI_MAX;
1714 while (i >= MSG_PRI_MIN && topull > 0) {
1715 struct mbuf *m = NULL, *mqhead = NULL, *mend = NULL;
1716 priq = &so->so_msg_state->msg_priq[i];
1717 if ((priq->msgq_flags & MSGQ_MSG_NOTDONE) &&
1718 priq->msgq_head == NULL) {
1719 /*
1720 * We were in the middle of sending
1721 * a message and we have not seen the
1722 * end of it.
1723 */
1724 VERIFY(priq->msgq_lastmsg == NULL &&
1725 priq->msgq_tail == NULL);
1726 return;
1727 }
1728 if (priq->msgq_head != NULL) {
1729 int32_t bytes = 0, topull_tmp = topull;
1730 /*
1731 * We found a msg while scanning the priority
1732 * queue from high to low priority.
1733 */
1734 m = priq->msgq_head;
1735 mqhead = m;
1736 mend = m;
1737
1738 /*
1739 * Move bytes from the priority queue to the
1740 * serial queue. Compute the number of bytes
1741 * being added.
1742 */
1743 while (mqhead->m_next != NULL && topull_tmp > 0) {
1744 bytes += mqhead->m_len;
1745 topull_tmp -= mqhead->m_len;
1746 mend = mqhead;
1747 mqhead = mqhead->m_next;
1748 }
1749
1750 if (mqhead->m_next == NULL) {
1751 /*
1752 * If we have only one more mbuf left,
1753 * move the last mbuf of this message to
1754 * serial queue and set the head of the
1755 * queue to be the next message.
1756 */
1757 bytes += mqhead->m_len;
1758 mend = mqhead;
1759 mqhead = m->m_nextpkt;
1760 if (!(mend->m_flags & M_EOR)) {
1761 /*
1762 * We have not seen the end of
1763 * this message, so we can not
1764 * pull anymore.
1765 */
1766 priq->msgq_flags |= MSGQ_MSG_NOTDONE;
1767 } else {
1768 /* Reset M_EOR */
1769 mend->m_flags &= ~(M_EOR);
1770 }
1771 } else {
1772 /* propogate the next msg pointer */
1773 mqhead->m_nextpkt = m->m_nextpkt;
1774 }
1775 priq->msgq_head = mqhead;
1776
1777 /*
1778 * if the lastmsg pointer points to
1779 * the mbuf that is being dequeued, update
1780 * it to point to the new head.
1781 */
1782 if (priq->msgq_lastmsg == m) {
1783 priq->msgq_lastmsg = priq->msgq_head;
1784 }
1785
1786 m->m_nextpkt = NULL;
1787 mend->m_next = NULL;
1788
1789 if (priq->msgq_head == NULL) {
1790 /* Moved all messages, update tail */
1791 priq->msgq_tail = NULL;
1792 VERIFY(priq->msgq_lastmsg == NULL);
1793 }
1794
1795 /* Move it to serial sb_mb queue */
1796 if (so->so_snd.sb_mb == NULL) {
1797 so->so_snd.sb_mb = m;
1798 } else {
1799 so->so_snd.sb_mbtail->m_next = m;
1800 }
1801
1802 priq->msgq_bytes -= bytes;
1803 VERIFY(priq->msgq_bytes >= 0);
1804 sbwakeup(&so->so_snd);
1805
1806 so->so_msg_state->msg_serial_bytes += bytes;
1807 so->so_snd.sb_mbtail = mend;
1808 so->so_snd.sb_lastrecord = so->so_snd.sb_mb;
1809
1810 topull =
1811 (off + len) - so->so_msg_state->msg_serial_bytes;
1812
1813 if (priq->msgq_flags & MSGQ_MSG_NOTDONE) {
1814 break;
1815 }
1816 } else {
1817 --i;
1818 }
1819 }
1820 sblastrecordchk(&so->so_snd, "sbpull_unordered_data");
1821 sblastmbufchk(&so->so_snd, "sbpull_unordered_data");
1822 }
1823
1824 /*
1825 * Compress mbuf chain m into the socket
1826 * buffer sb following mbuf n. If n
1827 * is null, the buffer is presumed empty.
1828 */
1829 static inline void
1830 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1831 {
1832 int eor = 0, compress = (!(sb->sb_flags & SB_NOCOMPRESS));
1833 struct mbuf *o;
1834
1835 if (m == NULL) {
1836 /* There is nothing to compress; just update the tail */
1837 for (; n->m_next != NULL; n = n->m_next) {
1838 ;
1839 }
1840 sb->sb_mbtail = n;
1841 goto done;
1842 }
1843
1844 while (m != NULL) {
1845 eor |= m->m_flags & M_EOR;
1846 if (compress && m->m_len == 0 && (eor == 0 ||
1847 (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) {
1848 if (sb->sb_lastrecord == m) {
1849 sb->sb_lastrecord = m->m_next;
1850 }
1851 m = m_free(m);
1852 continue;
1853 }
1854 if (compress && n != NULL && (n->m_flags & M_EOR) == 0 &&
1855 #ifndef __APPLE__
1856 M_WRITABLE(n) &&
1857 #endif
1858 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1859 m->m_len <= M_TRAILINGSPACE(n) &&
1860 n->m_type == m->m_type) {
1861 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1862 (unsigned)m->m_len);
1863 n->m_len += m->m_len;
1864 sb->sb_cc += m->m_len;
1865 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1866 m->m_type != MT_OOBDATA) {
1867 /* XXX: Probably don't need */
1868 sb->sb_ctl += m->m_len;
1869 }
1870
1871 /* update send byte count */
1872 if (sb->sb_flags & SB_SNDBYTE_CNT) {
1873 inp_incr_sndbytes_total(sb->sb_so,
1874 m->m_len);
1875 inp_incr_sndbytes_unsent(sb->sb_so,
1876 m->m_len);
1877 }
1878 m = m_free(m);
1879 continue;
1880 }
1881 if (n != NULL) {
1882 n->m_next = m;
1883 } else {
1884 sb->sb_mb = m;
1885 }
1886 sb->sb_mbtail = m;
1887 sballoc(sb, m);
1888 n = m;
1889 m->m_flags &= ~M_EOR;
1890 m = m->m_next;
1891 n->m_next = NULL;
1892 }
1893 if (eor != 0) {
1894 if (n != NULL) {
1895 n->m_flags |= eor;
1896 } else {
1897 printf("semi-panic: sbcompress\n");
1898 }
1899 }
1900 done:
1901 SBLASTMBUFCHK(sb, __func__);
1902 postevent(0, sb, EV_RWBYTES);
1903 }
1904
1905 void
1906 sb_empty_assert(struct sockbuf *sb, const char *where)
1907 {
1908 if (!(sb->sb_cc == 0 && sb->sb_mb == NULL && sb->sb_mbcnt == 0 &&
1909 sb->sb_mbtail == NULL && sb->sb_lastrecord == NULL)) {
1910 panic("%s: sb %p so %p cc %d mbcnt %d mb %p mbtail %p "
1911 "lastrecord %p\n", where, sb, sb->sb_so, sb->sb_cc,
1912 sb->sb_mbcnt, sb->sb_mb, sb->sb_mbtail,
1913 sb->sb_lastrecord);
1914 /* NOTREACHED */
1915 }
1916 }
1917
1918 static void
1919 sbflush_priq(struct msg_priq *priq)
1920 {
1921 struct mbuf *m;
1922 m = priq->msgq_head;
1923 if (m != NULL) {
1924 m_freem_list(m);
1925 }
1926 priq->msgq_head = priq->msgq_tail = priq->msgq_lastmsg = NULL;
1927 priq->msgq_bytes = priq->msgq_flags = 0;
1928 }
1929
1930 /*
1931 * Free all mbufs in a sockbuf.
1932 * Check that all resources are reclaimed.
1933 */
1934 void
1935 sbflush(struct sockbuf *sb)
1936 {
1937 void *lr_saved = __builtin_return_address(0);
1938 struct socket *so = sb->sb_so;
1939 u_int32_t i;
1940
1941 /* so_usecount may be 0 if we get here from sofreelastref() */
1942 if (so == NULL) {
1943 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
1944 __func__, sb, sb->sb_flags, lr_saved);
1945 /* NOTREACHED */
1946 } else if (so->so_usecount < 0) {
1947 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
1948 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
1949 so->so_usecount, lr_saved, solockhistory_nr(so));
1950 /* NOTREACHED */
1951 }
1952
1953 /*
1954 * Obtain lock on the socket buffer (SB_LOCK). This is required
1955 * to prevent the socket buffer from being unexpectedly altered
1956 * while it is used by another thread in socket send/receive.
1957 *
1958 * sblock() must not fail here, hence the assertion.
1959 */
1960 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
1961 VERIFY(sb->sb_flags & SB_LOCK);
1962
1963 while (sb->sb_mbcnt > 0) {
1964 /*
1965 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1966 * we would loop forever. Panic instead.
1967 */
1968 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) {
1969 break;
1970 }
1971 sbdrop(sb, (int)sb->sb_cc);
1972 }
1973
1974 if (!(sb->sb_flags & SB_RECV) && (so->so_flags & SOF_ENABLE_MSGS)) {
1975 VERIFY(so->so_msg_state != NULL);
1976 for (i = MSG_PRI_MIN; i <= MSG_PRI_MAX; ++i) {
1977 sbflush_priq(&so->so_msg_state->msg_priq[i]);
1978 }
1979 so->so_msg_state->msg_serial_bytes = 0;
1980 so->so_msg_state->msg_uno_bytes = 0;
1981 }
1982
1983 sb_empty_assert(sb, __func__);
1984 postevent(0, sb, EV_RWBYTES);
1985
1986 sbunlock(sb, TRUE); /* keep socket locked */
1987 }
1988
1989 /*
1990 * Drop data from (the front of) a sockbuf.
1991 * use m_freem_list to free the mbuf structures
1992 * under a single lock... this is done by pruning
1993 * the top of the tree from the body by keeping track
1994 * of where we get to in the tree and then zeroing the
1995 * two pertinent pointers m_nextpkt and m_next
1996 * the socket buffer is then updated to point at the new
1997 * top of the tree and the pruned area is released via
1998 * m_freem_list.
1999 */
2000 void
2001 sbdrop(struct sockbuf *sb, int len)
2002 {
2003 struct mbuf *m, *free_list, *ml;
2004 struct mbuf *next, *last;
2005
2006 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
2007 #if MPTCP
2008 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
2009 ((sb->sb_so->so_flags & SOF_MP_SUBFLOW) ||
2010 (SOCK_CHECK_DOM(sb->sb_so, PF_MULTIPATH) &&
2011 SOCK_CHECK_PROTO(sb->sb_so, IPPROTO_TCP))) &&
2012 !(sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
2013 mptcp_preproc_sbdrop(sb->sb_so, m, (unsigned int)len);
2014 }
2015 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
2016 (sb->sb_so->so_flags & SOF_MP_SUBFLOW) &&
2017 (sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
2018 mptcp_fallback_sbdrop(sb->sb_so, m, len);
2019 }
2020 #endif /* MPTCP */
2021 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
2022
2023 free_list = last = m;
2024 ml = (struct mbuf *)0;
2025
2026 while (len > 0) {
2027 if (m == NULL) {
2028 if (next == NULL) {
2029 /*
2030 * temporarily replacing this panic with printf
2031 * because it occurs occasionally when closing
2032 * a socket when there is no harm in ignoring
2033 * it. This problem will be investigated
2034 * further.
2035 */
2036 /* panic("sbdrop"); */
2037 printf("sbdrop - count not zero\n");
2038 len = 0;
2039 /*
2040 * zero the counts. if we have no mbufs,
2041 * we have no data (PR-2986815)
2042 */
2043 sb->sb_cc = 0;
2044 sb->sb_mbcnt = 0;
2045 if (!(sb->sb_flags & SB_RECV) &&
2046 (sb->sb_so->so_flags & SOF_ENABLE_MSGS)) {
2047 sb->sb_so->so_msg_state->
2048 msg_serial_bytes = 0;
2049 }
2050 break;
2051 }
2052 m = last = next;
2053 next = m->m_nextpkt;
2054 continue;
2055 }
2056 if (m->m_len > len) {
2057 m->m_len -= len;
2058 m->m_data += len;
2059 sb->sb_cc -= len;
2060 /* update the send byte count */
2061 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2062 inp_decr_sndbytes_total(sb->sb_so, len);
2063 }
2064 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
2065 m->m_type != MT_OOBDATA) {
2066 sb->sb_ctl -= len;
2067 }
2068 break;
2069 }
2070 len -= m->m_len;
2071 sbfree(sb, m);
2072
2073 ml = m;
2074 m = m->m_next;
2075 }
2076 while (m && m->m_len == 0) {
2077 sbfree(sb, m);
2078
2079 ml = m;
2080 m = m->m_next;
2081 }
2082 if (ml) {
2083 ml->m_next = (struct mbuf *)0;
2084 last->m_nextpkt = (struct mbuf *)0;
2085 m_freem_list(free_list);
2086 }
2087 if (m) {
2088 sb->sb_mb = m;
2089 m->m_nextpkt = next;
2090 } else {
2091 sb->sb_mb = next;
2092 }
2093
2094 /*
2095 * First part is an inline SB_EMPTY_FIXUP(). Second part
2096 * makes sure sb_lastrecord is up-to-date if we dropped
2097 * part of the last record.
2098 */
2099 m = sb->sb_mb;
2100 if (m == NULL) {
2101 sb->sb_mbtail = NULL;
2102 sb->sb_lastrecord = NULL;
2103 } else if (m->m_nextpkt == NULL) {
2104 sb->sb_lastrecord = m;
2105 }
2106
2107 #if CONTENT_FILTER
2108 cfil_sock_buf_update(sb);
2109 #endif /* CONTENT_FILTER */
2110
2111 postevent(0, sb, EV_RWBYTES);
2112
2113 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
2114 }
2115
2116 /*
2117 * Drop a record off the front of a sockbuf
2118 * and move the next record to the front.
2119 */
2120 void
2121 sbdroprecord(struct sockbuf *sb)
2122 {
2123 struct mbuf *m, *mn;
2124
2125 m = sb->sb_mb;
2126 if (m) {
2127 sb->sb_mb = m->m_nextpkt;
2128 do {
2129 sbfree(sb, m);
2130 MFREE(m, mn);
2131 m = mn;
2132 } while (m);
2133 }
2134 SB_EMPTY_FIXUP(sb);
2135 postevent(0, sb, EV_RWBYTES);
2136 }
2137
2138 /*
2139 * Create a "control" mbuf containing the specified data
2140 * with the specified type for presentation on a socket buffer.
2141 */
2142 struct mbuf *
2143 sbcreatecontrol(caddr_t p, int size, int type, int level)
2144 {
2145 struct cmsghdr *cp;
2146 struct mbuf *m;
2147
2148 if (CMSG_SPACE((u_int)size) > MLEN) {
2149 return (struct mbuf *)NULL;
2150 }
2151 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) {
2152 return (struct mbuf *)NULL;
2153 }
2154 cp = mtod(m, struct cmsghdr *);
2155 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2156 /* XXX check size? */
2157 (void) memcpy(CMSG_DATA(cp), p, size);
2158 m->m_len = CMSG_SPACE(size);
2159 cp->cmsg_len = CMSG_LEN(size);
2160 cp->cmsg_level = level;
2161 cp->cmsg_type = type;
2162 return m;
2163 }
2164
2165 struct mbuf **
2166 sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf **mp)
2167 {
2168 struct mbuf *m;
2169 struct cmsghdr *cp;
2170
2171 if (*mp == NULL) {
2172 *mp = sbcreatecontrol(p, size, type, level);
2173 return mp;
2174 }
2175
2176 if (CMSG_SPACE((u_int)size) + (*mp)->m_len > MLEN) {
2177 mp = &(*mp)->m_next;
2178 *mp = sbcreatecontrol(p, size, type, level);
2179 return mp;
2180 }
2181
2182 m = *mp;
2183
2184 cp = (struct cmsghdr *)(void *)(mtod(m, char *) + m->m_len);
2185 /* CMSG_SPACE ensures 32-bit alignment */
2186 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2187 m->m_len += CMSG_SPACE(size);
2188
2189 /* XXX check size? */
2190 (void) memcpy(CMSG_DATA(cp), p, size);
2191 cp->cmsg_len = CMSG_LEN(size);
2192 cp->cmsg_level = level;
2193 cp->cmsg_type = type;
2194
2195 return mp;
2196 }
2197
2198
2199 /*
2200 * Some routines that return EOPNOTSUPP for entry points that are not
2201 * supported by a protocol. Fill in as needed.
2202 */
2203 int
2204 pru_abort_notsupp(struct socket *so)
2205 {
2206 #pragma unused(so)
2207 return EOPNOTSUPP;
2208 }
2209
2210 int
2211 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
2212 {
2213 #pragma unused(so, nam)
2214 return EOPNOTSUPP;
2215 }
2216
2217 int
2218 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
2219 {
2220 #pragma unused(so, proto, p)
2221 return EOPNOTSUPP;
2222 }
2223
2224 int
2225 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
2226 {
2227 #pragma unused(so, nam, p)
2228 return EOPNOTSUPP;
2229 }
2230
2231 int
2232 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
2233 {
2234 #pragma unused(so, nam, p)
2235 return EOPNOTSUPP;
2236 }
2237
2238 int
2239 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
2240 {
2241 #pragma unused(so1, so2)
2242 return EOPNOTSUPP;
2243 }
2244
2245 int
2246 pru_connectx_notsupp(struct socket *so, struct sockaddr *src,
2247 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
2248 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
2249 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
2250 {
2251 #pragma unused(so, src, dst, p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written)
2252 return EOPNOTSUPP;
2253 }
2254
2255 int
2256 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
2257 struct ifnet *ifp, struct proc *p)
2258 {
2259 #pragma unused(so, cmd, data, ifp, p)
2260 return EOPNOTSUPP;
2261 }
2262
2263 int
2264 pru_detach_notsupp(struct socket *so)
2265 {
2266 #pragma unused(so)
2267 return EOPNOTSUPP;
2268 }
2269
2270 int
2271 pru_disconnect_notsupp(struct socket *so)
2272 {
2273 #pragma unused(so)
2274 return EOPNOTSUPP;
2275 }
2276
2277 int
2278 pru_disconnectx_notsupp(struct socket *so, sae_associd_t aid, sae_connid_t cid)
2279 {
2280 #pragma unused(so, aid, cid)
2281 return EOPNOTSUPP;
2282 }
2283
2284 int
2285 pru_listen_notsupp(struct socket *so, struct proc *p)
2286 {
2287 #pragma unused(so, p)
2288 return EOPNOTSUPP;
2289 }
2290
2291 int
2292 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
2293 {
2294 #pragma unused(so, nam)
2295 return EOPNOTSUPP;
2296 }
2297
2298 int
2299 pru_rcvd_notsupp(struct socket *so, int flags)
2300 {
2301 #pragma unused(so, flags)
2302 return EOPNOTSUPP;
2303 }
2304
2305 int
2306 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
2307 {
2308 #pragma unused(so, m, flags)
2309 return EOPNOTSUPP;
2310 }
2311
2312 int
2313 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
2314 struct sockaddr *addr, struct mbuf *control, struct proc *p)
2315 {
2316 #pragma unused(so, flags, m, addr, control, p)
2317 return EOPNOTSUPP;
2318 }
2319
2320 int
2321 pru_send_list_notsupp(struct socket *so, int flags, struct mbuf *m,
2322 struct sockaddr *addr, struct mbuf *control, struct proc *p)
2323 {
2324 #pragma unused(so, flags, m, addr, control, p)
2325 return EOPNOTSUPP;
2326 }
2327
2328 /*
2329 * This isn't really a ``null'' operation, but it's the default one
2330 * and doesn't do anything destructive.
2331 */
2332 int
2333 pru_sense_null(struct socket *so, void *ub, int isstat64)
2334 {
2335 if (isstat64 != 0) {
2336 struct stat64 *sb64;
2337
2338 sb64 = (struct stat64 *)ub;
2339 sb64->st_blksize = so->so_snd.sb_hiwat;
2340 } else {
2341 struct stat *sb;
2342
2343 sb = (struct stat *)ub;
2344 sb->st_blksize = so->so_snd.sb_hiwat;
2345 }
2346
2347 return 0;
2348 }
2349
2350
2351 int
2352 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
2353 struct mbuf *top, struct mbuf *control, int flags)
2354 {
2355 #pragma unused(so, addr, uio, top, control, flags)
2356 return EOPNOTSUPP;
2357 }
2358
2359 int
2360 pru_sosend_list_notsupp(struct socket *so, struct uio **uio,
2361 u_int uiocnt, int flags)
2362 {
2363 #pragma unused(so, uio, uiocnt, flags)
2364 return EOPNOTSUPP;
2365 }
2366
2367 int
2368 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
2369 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
2370 {
2371 #pragma unused(so, paddr, uio, mp0, controlp, flagsp)
2372 return EOPNOTSUPP;
2373 }
2374
2375 int
2376 pru_soreceive_list_notsupp(struct socket *so,
2377 struct recv_msg_elem *recv_msg_array, u_int uiocnt, int *flagsp)
2378 {
2379 #pragma unused(so, recv_msg_array, uiocnt, flagsp)
2380 return EOPNOTSUPP;
2381 }
2382
2383 int
2384 pru_shutdown_notsupp(struct socket *so)
2385 {
2386 #pragma unused(so)
2387 return EOPNOTSUPP;
2388 }
2389
2390 int
2391 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
2392 {
2393 #pragma unused(so, nam)
2394 return EOPNOTSUPP;
2395 }
2396
2397 int
2398 pru_sopoll_notsupp(struct socket *so, int events, kauth_cred_t cred, void *wql)
2399 {
2400 #pragma unused(so, events, cred, wql)
2401 return EOPNOTSUPP;
2402 }
2403
2404 int
2405 pru_socheckopt_null(struct socket *so, struct sockopt *sopt)
2406 {
2407 #pragma unused(so, sopt)
2408 /*
2409 * Allow all options for set/get by default.
2410 */
2411 return 0;
2412 }
2413
2414 static int
2415 pru_preconnect_null(struct socket *so)
2416 {
2417 #pragma unused(so)
2418 return 0;
2419 }
2420
2421 void
2422 pru_sanitize(struct pr_usrreqs *pru)
2423 {
2424 #define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar)
2425 DEFAULT(pru->pru_abort, pru_abort_notsupp);
2426 DEFAULT(pru->pru_accept, pru_accept_notsupp);
2427 DEFAULT(pru->pru_attach, pru_attach_notsupp);
2428 DEFAULT(pru->pru_bind, pru_bind_notsupp);
2429 DEFAULT(pru->pru_connect, pru_connect_notsupp);
2430 DEFAULT(pru->pru_connect2, pru_connect2_notsupp);
2431 DEFAULT(pru->pru_connectx, pru_connectx_notsupp);
2432 DEFAULT(pru->pru_control, pru_control_notsupp);
2433 DEFAULT(pru->pru_detach, pru_detach_notsupp);
2434 DEFAULT(pru->pru_disconnect, pru_disconnect_notsupp);
2435 DEFAULT(pru->pru_disconnectx, pru_disconnectx_notsupp);
2436 DEFAULT(pru->pru_listen, pru_listen_notsupp);
2437 DEFAULT(pru->pru_peeraddr, pru_peeraddr_notsupp);
2438 DEFAULT(pru->pru_rcvd, pru_rcvd_notsupp);
2439 DEFAULT(pru->pru_rcvoob, pru_rcvoob_notsupp);
2440 DEFAULT(pru->pru_send, pru_send_notsupp);
2441 DEFAULT(pru->pru_send_list, pru_send_list_notsupp);
2442 DEFAULT(pru->pru_sense, pru_sense_null);
2443 DEFAULT(pru->pru_shutdown, pru_shutdown_notsupp);
2444 DEFAULT(pru->pru_sockaddr, pru_sockaddr_notsupp);
2445 DEFAULT(pru->pru_sopoll, pru_sopoll_notsupp);
2446 DEFAULT(pru->pru_soreceive, pru_soreceive_notsupp);
2447 DEFAULT(pru->pru_soreceive_list, pru_soreceive_list_notsupp);
2448 DEFAULT(pru->pru_sosend, pru_sosend_notsupp);
2449 DEFAULT(pru->pru_sosend_list, pru_sosend_list_notsupp);
2450 DEFAULT(pru->pru_socheckopt, pru_socheckopt_null);
2451 DEFAULT(pru->pru_preconnect, pru_preconnect_null);
2452 #undef DEFAULT
2453 }
2454
2455 /*
2456 * The following are macros on BSD and functions on Darwin
2457 */
2458
2459 /*
2460 * Do we need to notify the other side when I/O is possible?
2461 */
2462
2463 int
2464 sb_notify(struct sockbuf *sb)
2465 {
2466 return sb->sb_waiters > 0 ||
2467 (sb->sb_flags & (SB_SEL | SB_ASYNC | SB_UPCALL | SB_KNOTE));
2468 }
2469
2470 /*
2471 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
2472 * This is problematical if the fields are unsigned, as the space might
2473 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
2474 * overflow and return 0.
2475 */
2476 int
2477 sbspace(struct sockbuf *sb)
2478 {
2479 int pending = 0;
2480 int space = imin((int)(sb->sb_hiwat - sb->sb_cc),
2481 (int)(sb->sb_mbmax - sb->sb_mbcnt));
2482
2483 if (sb->sb_preconn_hiwat != 0) {
2484 space = imin((int)(sb->sb_preconn_hiwat - sb->sb_cc), space);
2485 }
2486
2487 if (space < 0) {
2488 space = 0;
2489 }
2490
2491 /* Compensate for data being processed by content filters */
2492 #if CONTENT_FILTER
2493 pending = cfil_sock_data_space(sb);
2494 #endif /* CONTENT_FILTER */
2495 if (pending > space) {
2496 space = 0;
2497 } else {
2498 space -= pending;
2499 }
2500
2501 return space;
2502 }
2503
2504 /*
2505 * If this socket has priority queues, check if there is enough
2506 * space in the priority queue for this msg.
2507 */
2508 int
2509 msgq_sbspace(struct socket *so, struct mbuf *control)
2510 {
2511 int space = 0, error;
2512 u_int32_t msgpri = 0;
2513 VERIFY(so->so_type == SOCK_STREAM &&
2514 SOCK_PROTO(so) == IPPROTO_TCP);
2515 if (control != NULL) {
2516 error = tcp_get_msg_priority(control, &msgpri);
2517 if (error) {
2518 return 0;
2519 }
2520 } else {
2521 msgpri = MSG_PRI_0;
2522 }
2523 space = (so->so_snd.sb_idealsize / MSG_PRI_COUNT) -
2524 so->so_msg_state->msg_priq[msgpri].msgq_bytes;
2525 if (space < 0) {
2526 space = 0;
2527 }
2528 return space;
2529 }
2530
2531 /* do we have to send all at once on a socket? */
2532 int
2533 sosendallatonce(struct socket *so)
2534 {
2535 return so->so_proto->pr_flags & PR_ATOMIC;
2536 }
2537
2538 /* can we read something from so? */
2539 int
2540 soreadable(struct socket *so)
2541 {
2542 return so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
2543 ((so->so_state & SS_CANTRCVMORE)
2544 #if CONTENT_FILTER
2545 && cfil_sock_data_pending(&so->so_rcv) == 0
2546 #endif /* CONTENT_FILTER */
2547 ) ||
2548 so->so_comp.tqh_first || so->so_error;
2549 }
2550
2551 /* can we write something to so? */
2552
2553 int
2554 sowriteable(struct socket *so)
2555 {
2556 if ((so->so_state & SS_CANTSENDMORE) ||
2557 so->so_error > 0) {
2558 return 1;
2559 }
2560 if (so_wait_for_if_feedback(so) || !socanwrite(so)) {
2561 return 0;
2562 }
2563 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
2564 return 1;
2565 }
2566
2567 if (sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat) {
2568 if (so->so_flags & SOF_NOTSENT_LOWAT) {
2569 if ((SOCK_DOM(so) == PF_INET6 ||
2570 SOCK_DOM(so) == PF_INET) &&
2571 so->so_type == SOCK_STREAM) {
2572 return tcp_notsent_lowat_check(so);
2573 }
2574 #if MPTCP
2575 else if ((SOCK_DOM(so) == PF_MULTIPATH) &&
2576 (SOCK_PROTO(so) == IPPROTO_TCP)) {
2577 return mptcp_notsent_lowat_check(so);
2578 }
2579 #endif
2580 else {
2581 return 1;
2582 }
2583 } else {
2584 return 1;
2585 }
2586 }
2587 return 0;
2588 }
2589
2590 /* adjust counters in sb reflecting allocation of m */
2591
2592 void
2593 sballoc(struct sockbuf *sb, struct mbuf *m)
2594 {
2595 u_int32_t cnt = 1;
2596 sb->sb_cc += m->m_len;
2597 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
2598 m->m_type != MT_OOBDATA) {
2599 sb->sb_ctl += m->m_len;
2600 }
2601 sb->sb_mbcnt += MSIZE;
2602
2603 if (m->m_flags & M_EXT) {
2604 sb->sb_mbcnt += m->m_ext.ext_size;
2605 cnt += (m->m_ext.ext_size >> MSIZESHIFT);
2606 }
2607 OSAddAtomic(cnt, &total_sbmb_cnt);
2608 VERIFY(total_sbmb_cnt > 0);
2609 if (total_sbmb_cnt > total_sbmb_cnt_peak) {
2610 total_sbmb_cnt_peak = total_sbmb_cnt;
2611 }
2612
2613 /*
2614 * If data is being added to the send socket buffer,
2615 * update the send byte count
2616 */
2617 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2618 inp_incr_sndbytes_total(sb->sb_so, m->m_len);
2619 inp_incr_sndbytes_unsent(sb->sb_so, m->m_len);
2620 }
2621 }
2622
2623 /* adjust counters in sb reflecting freeing of m */
2624 void
2625 sbfree(struct sockbuf *sb, struct mbuf *m)
2626 {
2627 int cnt = -1;
2628
2629 sb->sb_cc -= m->m_len;
2630 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
2631 m->m_type != MT_OOBDATA) {
2632 sb->sb_ctl -= m->m_len;
2633 }
2634 sb->sb_mbcnt -= MSIZE;
2635 if (m->m_flags & M_EXT) {
2636 sb->sb_mbcnt -= m->m_ext.ext_size;
2637 cnt -= (m->m_ext.ext_size >> MSIZESHIFT);
2638 }
2639 OSAddAtomic(cnt, &total_sbmb_cnt);
2640 VERIFY(total_sbmb_cnt >= 0);
2641 if (total_sbmb_cnt < total_sbmb_cnt_floor) {
2642 total_sbmb_cnt_floor = total_sbmb_cnt;
2643 }
2644
2645 /*
2646 * If data is being removed from the send socket buffer,
2647 * update the send byte count
2648 */
2649 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2650 inp_decr_sndbytes_total(sb->sb_so, m->m_len);
2651 }
2652 }
2653
2654 /*
2655 * Set lock on sockbuf sb; sleep if lock is already held.
2656 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
2657 * Returns error without lock if sleep is interrupted.
2658 */
2659 int
2660 sblock(struct sockbuf *sb, uint32_t flags)
2661 {
2662 boolean_t nointr = ((sb->sb_flags & SB_NOINTR) || (flags & SBL_NOINTR));
2663 void *lr_saved = __builtin_return_address(0);
2664 struct socket *so = sb->sb_so;
2665 void * wchan;
2666 int error = 0;
2667 thread_t tp = current_thread();
2668
2669 VERIFY((flags & SBL_VALID) == flags);
2670
2671 /* so_usecount may be 0 if we get here from sofreelastref() */
2672 if (so == NULL) {
2673 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2674 __func__, sb, sb->sb_flags, lr_saved);
2675 /* NOTREACHED */
2676 } else if (so->so_usecount < 0) {
2677 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2678 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2679 so->so_usecount, lr_saved, solockhistory_nr(so));
2680 /* NOTREACHED */
2681 }
2682
2683 /*
2684 * The content filter thread must hold the sockbuf lock
2685 */
2686 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
2687 /*
2688 * Don't panic if we are defunct because SB_LOCK has
2689 * been cleared by sodefunct()
2690 */
2691 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
2692 panic("%s: SB_LOCK not held for %p\n",
2693 __func__, sb);
2694 }
2695
2696 /* Keep the sockbuf locked */
2697 return 0;
2698 }
2699
2700 if ((sb->sb_flags & SB_LOCK) && !(flags & SBL_WAIT)) {
2701 return EWOULDBLOCK;
2702 }
2703 /*
2704 * We may get here from sorflush(), in which case "sb" may not
2705 * point to the real socket buffer. Use the actual socket buffer
2706 * address from the socket instead.
2707 */
2708 wchan = (sb->sb_flags & SB_RECV) ?
2709 &so->so_rcv.sb_flags : &so->so_snd.sb_flags;
2710
2711 /*
2712 * A content filter thread has exclusive access to the sockbuf
2713 * until it clears the
2714 */
2715 while ((sb->sb_flags & SB_LOCK) ||
2716 ((so->so_flags & SOF_CONTENT_FILTER) &&
2717 sb->sb_cfil_thread != NULL)) {
2718 lck_mtx_t *mutex_held;
2719
2720 /*
2721 * XXX: This code should be moved up above outside of this loop;
2722 * however, we may get here as part of sofreelastref(), and
2723 * at that time pr_getlock() may no longer be able to return
2724 * us the lock. This will be fixed in future.
2725 */
2726 if (so->so_proto->pr_getlock != NULL) {
2727 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2728 } else {
2729 mutex_held = so->so_proto->pr_domain->dom_mtx;
2730 }
2731
2732 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2733
2734 sb->sb_wantlock++;
2735 VERIFY(sb->sb_wantlock != 0);
2736
2737 error = msleep(wchan, mutex_held,
2738 nointr ? PSOCK : PSOCK | PCATCH,
2739 nointr ? "sb_lock_nointr" : "sb_lock", NULL);
2740
2741 VERIFY(sb->sb_wantlock != 0);
2742 sb->sb_wantlock--;
2743
2744 if (error == 0 && (so->so_flags & SOF_DEFUNCT) &&
2745 !(flags & SBL_IGNDEFUNCT)) {
2746 error = EBADF;
2747 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
2748 "(%d)\n", __func__, proc_selfpid(),
2749 proc_best_name(current_proc()),
2750 (uint64_t)VM_KERNEL_ADDRPERM(so),
2751 SOCK_DOM(so), SOCK_TYPE(so), error);
2752 }
2753
2754 if (error != 0) {
2755 return error;
2756 }
2757 }
2758 sb->sb_flags |= SB_LOCK;
2759 return 0;
2760 }
2761
2762 /*
2763 * Release lock on sockbuf sb
2764 */
2765 void
2766 sbunlock(struct sockbuf *sb, boolean_t keeplocked)
2767 {
2768 void *lr_saved = __builtin_return_address(0);
2769 struct socket *so = sb->sb_so;
2770 thread_t tp = current_thread();
2771
2772 /* so_usecount may be 0 if we get here from sofreelastref() */
2773 if (so == NULL) {
2774 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2775 __func__, sb, sb->sb_flags, lr_saved);
2776 /* NOTREACHED */
2777 } else if (so->so_usecount < 0) {
2778 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2779 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2780 so->so_usecount, lr_saved, solockhistory_nr(so));
2781 /* NOTREACHED */
2782 }
2783
2784 /*
2785 * The content filter thread must hold the sockbuf lock
2786 */
2787 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
2788 /*
2789 * Don't panic if we are defunct because SB_LOCK has
2790 * been cleared by sodefunct()
2791 */
2792 if (!(so->so_flags & SOF_DEFUNCT) &&
2793 !(sb->sb_flags & SB_LOCK) &&
2794 !(so->so_state & SS_DEFUNCT) &&
2795 !(so->so_flags1 & SOF1_DEFUNCTINPROG)) {
2796 panic("%s: SB_LOCK not held for %p\n",
2797 __func__, sb);
2798 }
2799 /* Keep the sockbuf locked and proceed */
2800 } else {
2801 VERIFY((sb->sb_flags & SB_LOCK) ||
2802 (so->so_state & SS_DEFUNCT) ||
2803 (so->so_flags1 & SOF1_DEFUNCTINPROG));
2804
2805 sb->sb_flags &= ~SB_LOCK;
2806
2807 if (sb->sb_wantlock > 0) {
2808 /*
2809 * We may get here from sorflush(), in which case "sb"
2810 * may not point to the real socket buffer. Use the
2811 * actual socket buffer address from the socket instead.
2812 */
2813 wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags :
2814 &so->so_snd.sb_flags);
2815 }
2816 }
2817
2818 if (!keeplocked) { /* unlock on exit */
2819 if (so->so_flags & SOF_MP_SUBFLOW || SOCK_DOM(so) == PF_MULTIPATH) {
2820 (*so->so_proto->pr_unlock)(so, 1, lr_saved);
2821 } else {
2822 lck_mtx_t *mutex_held;
2823
2824 if (so->so_proto->pr_getlock != NULL) {
2825 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2826 } else {
2827 mutex_held = so->so_proto->pr_domain->dom_mtx;
2828 }
2829
2830 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2831
2832 VERIFY(so->so_usecount > 0);
2833 so->so_usecount--;
2834 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2835 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2836 lck_mtx_unlock(mutex_held);
2837 }
2838 }
2839 }
2840
2841 void
2842 sorwakeup(struct socket *so)
2843 {
2844 if (sb_notify(&so->so_rcv)) {
2845 sowakeup(so, &so->so_rcv, NULL);
2846 }
2847 }
2848
2849 void
2850 sowwakeup(struct socket *so)
2851 {
2852 if (sb_notify(&so->so_snd)) {
2853 sowakeup(so, &so->so_snd, NULL);
2854 }
2855 }
2856
2857 void
2858 soevent(struct socket *so, long hint)
2859 {
2860 if (so->so_flags & SOF_KNOTE) {
2861 KNOTE(&so->so_klist, hint);
2862 }
2863
2864 soevupcall(so, hint);
2865
2866 /*
2867 * Don't post an event if this a subflow socket or
2868 * the app has opted out of using cellular interface
2869 */
2870 if ((hint & SO_FILT_HINT_IFDENIED) &&
2871 !(so->so_flags & SOF_MP_SUBFLOW) &&
2872 !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR) &&
2873 !(so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE) &&
2874 !(so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
2875 soevent_ifdenied(so);
2876 }
2877 }
2878
2879 void
2880 soevupcall(struct socket *so, u_int32_t hint)
2881 {
2882 if (so->so_event != NULL) {
2883 caddr_t so_eventarg = so->so_eventarg;
2884
2885 hint &= so->so_eventmask;
2886 if (hint != 0) {
2887 so->so_event(so, so_eventarg, hint);
2888 }
2889 }
2890 }
2891
2892 static void
2893 soevent_ifdenied(struct socket *so)
2894 {
2895 struct kev_netpolicy_ifdenied ev_ifdenied;
2896
2897 bzero(&ev_ifdenied, sizeof(ev_ifdenied));
2898 /*
2899 * The event consumer is interested about the effective {upid,pid,uuid}
2900 * info which can be different than the those related to the process
2901 * that recently performed a system call on the socket, i.e. when the
2902 * socket is delegated.
2903 */
2904 if (so->so_flags & SOF_DELEGATED) {
2905 ev_ifdenied.ev_data.eupid = so->e_upid;
2906 ev_ifdenied.ev_data.epid = so->e_pid;
2907 uuid_copy(ev_ifdenied.ev_data.euuid, so->e_uuid);
2908 } else {
2909 ev_ifdenied.ev_data.eupid = so->last_upid;
2910 ev_ifdenied.ev_data.epid = so->last_pid;
2911 uuid_copy(ev_ifdenied.ev_data.euuid, so->last_uuid);
2912 }
2913
2914 if (++so->so_ifdenied_notifies > 1) {
2915 /*
2916 * Allow for at most one kernel event to be generated per
2917 * socket; so_ifdenied_notifies is reset upon changes in
2918 * the UUID policy. See comments in inp_update_policy.
2919 */
2920 if (net_io_policy_log) {
2921 uuid_string_t buf;
2922
2923 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
2924 log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %d "
2925 "euuid %s%s has %d redundant events supressed\n",
2926 __func__, so->last_pid,
2927 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2928 SOCK_TYPE(so), ev_ifdenied.ev_data.epid, buf,
2929 ((so->so_flags & SOF_DELEGATED) ?
2930 " [delegated]" : ""), so->so_ifdenied_notifies);
2931 }
2932 } else {
2933 if (net_io_policy_log) {
2934 uuid_string_t buf;
2935
2936 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
2937 log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %d "
2938 "euuid %s%s event posted\n", __func__,
2939 so->last_pid, (uint64_t)VM_KERNEL_ADDRPERM(so),
2940 SOCK_DOM(so), SOCK_TYPE(so),
2941 ev_ifdenied.ev_data.epid, buf,
2942 ((so->so_flags & SOF_DELEGATED) ?
2943 " [delegated]" : ""));
2944 }
2945 netpolicy_post_msg(KEV_NETPOLICY_IFDENIED, &ev_ifdenied.ev_data,
2946 sizeof(ev_ifdenied));
2947 }
2948 }
2949
2950 /*
2951 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
2952 */
2953 struct sockaddr *
2954 dup_sockaddr(struct sockaddr *sa, int canwait)
2955 {
2956 struct sockaddr *sa2;
2957
2958 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
2959 canwait ? M_WAITOK : M_NOWAIT);
2960 if (sa2) {
2961 bcopy(sa, sa2, sa->sa_len);
2962 }
2963 return sa2;
2964 }
2965
2966 /*
2967 * Create an external-format (``xsocket'') structure using the information
2968 * in the kernel-format socket structure pointed to by so. This is done
2969 * to reduce the spew of irrelevant information over this interface,
2970 * to isolate user code from changes in the kernel structure, and
2971 * potentially to provide information-hiding if we decide that
2972 * some of this information should be hidden from users.
2973 */
2974 void
2975 sotoxsocket(struct socket *so, struct xsocket *xso)
2976 {
2977 xso->xso_len = sizeof(*xso);
2978 xso->xso_so = (_XSOCKET_PTR(struct socket *))VM_KERNEL_ADDRPERM(so);
2979 xso->so_type = so->so_type;
2980 xso->so_options = (short)(so->so_options & 0xffff);
2981 xso->so_linger = so->so_linger;
2982 xso->so_state = so->so_state;
2983 xso->so_pcb = (_XSOCKET_PTR(caddr_t))VM_KERNEL_ADDRPERM(so->so_pcb);
2984 if (so->so_proto) {
2985 xso->xso_protocol = SOCK_PROTO(so);
2986 xso->xso_family = SOCK_DOM(so);
2987 } else {
2988 xso->xso_protocol = xso->xso_family = 0;
2989 }
2990 xso->so_qlen = so->so_qlen;
2991 xso->so_incqlen = so->so_incqlen;
2992 xso->so_qlimit = so->so_qlimit;
2993 xso->so_timeo = so->so_timeo;
2994 xso->so_error = so->so_error;
2995 xso->so_pgid = so->so_pgid;
2996 xso->so_oobmark = so->so_oobmark;
2997 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
2998 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
2999 xso->so_uid = kauth_cred_getuid(so->so_cred);
3000 }
3001
3002
3003 #if !CONFIG_EMBEDDED
3004
3005 void
3006 sotoxsocket64(struct socket *so, struct xsocket64 *xso)
3007 {
3008 xso->xso_len = sizeof(*xso);
3009 xso->xso_so = (u_int64_t)VM_KERNEL_ADDRPERM(so);
3010 xso->so_type = so->so_type;
3011 xso->so_options = (short)(so->so_options & 0xffff);
3012 xso->so_linger = so->so_linger;
3013 xso->so_state = so->so_state;
3014 xso->so_pcb = (u_int64_t)VM_KERNEL_ADDRPERM(so->so_pcb);
3015 if (so->so_proto) {
3016 xso->xso_protocol = SOCK_PROTO(so);
3017 xso->xso_family = SOCK_DOM(so);
3018 } else {
3019 xso->xso_protocol = xso->xso_family = 0;
3020 }
3021 xso->so_qlen = so->so_qlen;
3022 xso->so_incqlen = so->so_incqlen;
3023 xso->so_qlimit = so->so_qlimit;
3024 xso->so_timeo = so->so_timeo;
3025 xso->so_error = so->so_error;
3026 xso->so_pgid = so->so_pgid;
3027 xso->so_oobmark = so->so_oobmark;
3028 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
3029 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
3030 xso->so_uid = kauth_cred_getuid(so->so_cred);
3031 }
3032
3033 #endif /* !CONFIG_EMBEDDED */
3034
3035 /*
3036 * This does the same for sockbufs. Note that the xsockbuf structure,
3037 * since it is always embedded in a socket, does not include a self
3038 * pointer nor a length. We make this entry point public in case
3039 * some other mechanism needs it.
3040 */
3041 void
3042 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
3043 {
3044 xsb->sb_cc = sb->sb_cc;
3045 xsb->sb_hiwat = sb->sb_hiwat;
3046 xsb->sb_mbcnt = sb->sb_mbcnt;
3047 xsb->sb_mbmax = sb->sb_mbmax;
3048 xsb->sb_lowat = sb->sb_lowat;
3049 xsb->sb_flags = sb->sb_flags;
3050 xsb->sb_timeo = (short)
3051 (sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
3052 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0) {
3053 xsb->sb_timeo = 1;
3054 }
3055 }
3056
3057 /*
3058 * Based on the policy set by an all knowing decison maker, throttle sockets
3059 * that either have been marked as belonging to "background" process.
3060 */
3061 inline int
3062 soisthrottled(struct socket *so)
3063 {
3064 return so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND;
3065 }
3066
3067 inline int
3068 soisprivilegedtraffic(struct socket *so)
3069 {
3070 return (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS) ? 1 : 0;
3071 }
3072
3073 inline int
3074 soissrcbackground(struct socket *so)
3075 {
3076 return (so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND) ||
3077 IS_SO_TC_BACKGROUND(so->so_traffic_class);
3078 }
3079
3080 inline int
3081 soissrcrealtime(struct socket *so)
3082 {
3083 return so->so_traffic_class >= SO_TC_AV &&
3084 so->so_traffic_class <= SO_TC_VO;
3085 }
3086
3087 inline int
3088 soissrcbesteffort(struct socket *so)
3089 {
3090 return so->so_traffic_class == SO_TC_BE ||
3091 so->so_traffic_class == SO_TC_RD ||
3092 so->so_traffic_class == SO_TC_OAM;
3093 }
3094
3095 void
3096 soclearfastopen(struct socket *so)
3097 {
3098 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3099 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
3100 }
3101
3102 if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
3103 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
3104 }
3105 }
3106
3107 void
3108 sonullevent(struct socket *so, void *arg, uint32_t hint)
3109 {
3110 #pragma unused(so, arg, hint)
3111 }
3112
3113 /*
3114 * Here is the definition of some of the basic objects in the kern.ipc
3115 * branch of the MIB.
3116 */
3117 SYSCTL_NODE(_kern, KERN_IPC, ipc,
3118 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "IPC");
3119
3120 /* Check that the maximum socket buffer size is within a range */
3121
3122 static int
3123 sysctl_sb_max SYSCTL_HANDLER_ARGS
3124 {
3125 #pragma unused(oidp, arg1, arg2)
3126 u_int32_t new_value;
3127 int changed = 0;
3128 int error = sysctl_io_number(req, sb_max, sizeof(u_int32_t),
3129 &new_value, &changed);
3130 if (!error && changed) {
3131 if (new_value > LOW_SB_MAX && new_value <= high_sb_max) {
3132 sb_max = new_value;
3133 } else {
3134 error = ERANGE;
3135 }
3136 }
3137 return error;
3138 }
3139
3140 SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
3141 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3142 &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size");
3143
3144 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor,
3145 CTLFLAG_RW | CTLFLAG_LOCKED, &sb_efficiency, 0, "");
3146
3147 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters,
3148 CTLFLAG_RD | CTLFLAG_LOCKED, &nmbclusters, 0, "");
3149
3150 SYSCTL_INT(_kern_ipc, OID_AUTO, njcl,
3151 CTLFLAG_RD | CTLFLAG_LOCKED, &njcl, 0, "");
3152
3153 SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes,
3154 CTLFLAG_RD | CTLFLAG_LOCKED, &njclbytes, 0, "");
3155
3156 SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat,
3157 CTLFLAG_RW | CTLFLAG_LOCKED, &soqlimitcompat, 1,
3158 "Enable socket queue limit compatibility");
3159
3160 /*
3161 * Hack alert -- rdar://33572856
3162 * A loopback test we cannot change was failing because it sets
3163 * SO_SENDTIMEO to 5 seconds and that's also the value
3164 * of the minimum persist timer. Because of the persist timer,
3165 * the connection was not idle for 5 seconds and SO_SNDTIMEO
3166 * was not triggering at 5 seconds causing the test failure.
3167 * As a workaround we check the sysctl soqlencomp the test is already
3168 * setting to set disable auto tuning of the receive buffer.
3169 */
3170
3171 extern u_int32_t tcp_do_autorcvbuf;
3172
3173 static int
3174 sysctl_soqlencomp SYSCTL_HANDLER_ARGS
3175 {
3176 #pragma unused(oidp, arg1, arg2)
3177 u_int32_t new_value;
3178 int changed = 0;
3179 int error = sysctl_io_number(req, soqlencomp, sizeof(u_int32_t),
3180 &new_value, &changed);
3181 if (!error && changed) {
3182 soqlencomp = new_value;
3183 if (new_value != 0) {
3184 tcp_do_autorcvbuf = 0;
3185 tcptv_persmin_val = 6 * TCP_RETRANSHZ;
3186 }
3187 }
3188 return error;
3189 }
3190 SYSCTL_PROC(_kern_ipc, OID_AUTO, soqlencomp,
3191 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3192 &soqlencomp, 0, &sysctl_soqlencomp, "IU", "");
3193
3194 SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt, CTLFLAG_RD | CTLFLAG_LOCKED,
3195 &total_sbmb_cnt, 0, "");
3196 SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt_peak, CTLFLAG_RD | CTLFLAG_LOCKED,
3197 &total_sbmb_cnt_peak, 0, "");
3198 SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt_floor, CTLFLAG_RD | CTLFLAG_LOCKED,
3199 &total_sbmb_cnt_floor, 0, "");
3200 SYSCTL_QUAD(_kern_ipc, OID_AUTO, sbmb_limreached, CTLFLAG_RD | CTLFLAG_LOCKED,
3201 &sbmb_limreached, "");
3202
3203
3204 SYSCTL_NODE(_kern_ipc, OID_AUTO, io_policy, CTLFLAG_RW, 0, "network IO policy");
3205
3206 SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
3207 &net_io_policy_log, 0, "");
3208
3209 #if CONFIG_PROC_UUID_POLICY
3210 SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, uuid, CTLFLAG_RW | CTLFLAG_LOCKED,
3211 &net_io_policy_uuid, 0, "");
3212 #endif /* CONFIG_PROC_UUID_POLICY */