]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket2.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
1 /*
2 * Copyright (c) 1998-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/domain.h>
73 #include <sys/kernel.h>
74 #include <sys/proc_internal.h>
75 #include <sys/kauth.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/mcache.h>
79 #include <sys/protosw.h>
80 #include <sys/stat.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
83 #include <sys/signalvar.h>
84 #include <sys/sysctl.h>
85 #include <sys/syslog.h>
86 #include <sys/ev.h>
87 #include <kern/locks.h>
88 #include <net/route.h>
89 #include <net/content_filter.h>
90 #include <netinet/in.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/tcp_var.h>
93 #include <sys/kdebug.h>
94 #include <libkern/OSAtomic.h>
95
96 #if CONFIG_MACF
97 #include <security/mac_framework.h>
98 #endif
99
100 #include <mach/vm_param.h>
101
102 #if MPTCP
103 #include <netinet/mptcp_var.h>
104 #endif
105
106 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
107 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
108
109 SYSCTL_DECL(_kern_ipc);
110
111 __private_extern__ u_int32_t net_io_policy_throttle_best_effort = 0;
112 SYSCTL_INT(_kern_ipc, OID_AUTO, throttle_best_effort,
113 CTLFLAG_RW | CTLFLAG_LOCKED, &net_io_policy_throttle_best_effort, 0, "");
114
115 static inline void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
116 static struct socket *sonewconn_internal(struct socket *, int);
117 static int sbappendcontrol_internal(struct sockbuf *, struct mbuf *,
118 struct mbuf *);
119 static void soevent_ifdenied(struct socket *);
120
121 /*
122 * Primitive routines for operating on sockets and socket buffers
123 */
124 static int soqlimitcompat = 1;
125 static int soqlencomp = 0;
126
127 /*
128 * Based on the number of mbuf clusters configured, high_sb_max and sb_max can
129 * get scaled up or down to suit that memory configuration. high_sb_max is a
130 * higher limit on sb_max that is checked when sb_max gets set through sysctl.
131 */
132
133 u_int32_t sb_max = SB_MAX; /* XXX should be static */
134 u_int32_t high_sb_max = SB_MAX;
135
136 static u_int32_t sb_efficiency = 8; /* parameter for sbreserve() */
137 int32_t total_sbmb_cnt __attribute__((aligned(8))) = 0;
138 int32_t total_sbmb_cnt_floor __attribute__((aligned(8))) = 0;
139 int32_t total_sbmb_cnt_peak __attribute__((aligned(8))) = 0;
140 int64_t sbmb_limreached __attribute__((aligned(8))) = 0;
141
142 u_int32_t net_io_policy_log = 0; /* log socket policy changes */
143 #if CONFIG_PROC_UUID_POLICY
144 u_int32_t net_io_policy_uuid = 1; /* enable UUID socket policy */
145 #endif /* CONFIG_PROC_UUID_POLICY */
146
147 /*
148 * Procedures to manipulate state flags of socket
149 * and do appropriate wakeups. Normal sequence from the
150 * active (originating) side is that soisconnecting() is
151 * called during processing of connect() call,
152 * resulting in an eventual call to soisconnected() if/when the
153 * connection is established. When the connection is torn down
154 * soisdisconnecting() is called during processing of disconnect() call,
155 * and soisdisconnected() is called when the connection to the peer
156 * is totally severed. The semantics of these routines are such that
157 * connectionless protocols can call soisconnected() and soisdisconnected()
158 * only, bypassing the in-progress calls when setting up a ``connection''
159 * takes no time.
160 *
161 * From the passive side, a socket is created with
162 * two queues of sockets: so_incomp for connections in progress
163 * and so_comp for connections already made and awaiting user acceptance.
164 * As a protocol is preparing incoming connections, it creates a socket
165 * structure queued on so_incomp by calling sonewconn(). When the connection
166 * is established, soisconnected() is called, and transfers the
167 * socket structure to so_comp, making it available to accept().
168 *
169 * If a socket is closed with sockets on either
170 * so_incomp or so_comp, these sockets are dropped.
171 *
172 * If higher level protocols are implemented in
173 * the kernel, the wakeups done here will sometimes
174 * cause software-interrupt process scheduling.
175 */
176 void
177 soisconnecting(struct socket *so)
178 {
179 so->so_state &= ~(SS_ISCONNECTED | SS_ISDISCONNECTING);
180 so->so_state |= SS_ISCONNECTING;
181
182 sflt_notify(so, sock_evt_connecting, NULL);
183 }
184
185 void
186 soisconnected(struct socket *so)
187 {
188 so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING | SS_ISCONFIRMING);
189 so->so_state |= SS_ISCONNECTED;
190
191 soreserve_preconnect(so, 0);
192
193 sflt_notify(so, sock_evt_connected, NULL);
194
195 if (so->so_head != NULL && (so->so_state & SS_INCOMP)) {
196 struct socket *head = so->so_head;
197 int locked = 0;
198
199 /*
200 * Enforce lock order when the protocol has per socket locks
201 */
202 if (head->so_proto->pr_getlock != NULL) {
203 socket_lock(head, 1);
204 so_acquire_accept_list(head, so);
205 locked = 1;
206 }
207 if (so->so_head == head && (so->so_state & SS_INCOMP)) {
208 so->so_state &= ~SS_INCOMP;
209 so->so_state |= SS_COMP;
210 TAILQ_REMOVE(&head->so_incomp, so, so_list);
211 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
212 head->so_incqlen--;
213
214 /*
215 * We have to release the accept list in
216 * case a socket callback calls sock_accept()
217 */
218 if (locked != 0) {
219 so_release_accept_list(head);
220 socket_unlock(so, 0);
221 }
222 postevent(head, 0, EV_RCONN);
223 sorwakeup(head);
224 wakeup_one((caddr_t)&head->so_timeo);
225
226 if (locked != 0) {
227 socket_unlock(head, 1);
228 socket_lock(so, 0);
229 }
230 } else if (locked != 0) {
231 so_release_accept_list(head);
232 socket_unlock(head, 1);
233 }
234 } else {
235 postevent(so, 0, EV_WCONN);
236 wakeup((caddr_t)&so->so_timeo);
237 sorwakeup(so);
238 sowwakeup(so);
239 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNECTED |
240 SO_FILT_HINT_CONNINFO_UPDATED);
241 }
242 }
243
244 boolean_t
245 socanwrite(struct socket *so)
246 {
247 return (so->so_state & SS_ISCONNECTED) ||
248 !(so->so_proto->pr_flags & PR_CONNREQUIRED) ||
249 (so->so_flags1 & SOF1_PRECONNECT_DATA);
250 }
251
252 void
253 soisdisconnecting(struct socket *so)
254 {
255 so->so_state &= ~SS_ISCONNECTING;
256 so->so_state |= (SS_ISDISCONNECTING | SS_CANTRCVMORE | SS_CANTSENDMORE);
257 soevent(so, SO_FILT_HINT_LOCKED);
258 sflt_notify(so, sock_evt_disconnecting, NULL);
259 wakeup((caddr_t)&so->so_timeo);
260 sowwakeup(so);
261 sorwakeup(so);
262 }
263
264 void
265 soisdisconnected(struct socket *so)
266 {
267 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
268 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
269 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
270 SO_FILT_HINT_CONNINFO_UPDATED);
271 sflt_notify(so, sock_evt_disconnected, NULL);
272 wakeup((caddr_t)&so->so_timeo);
273 sowwakeup(so);
274 sorwakeup(so);
275
276 #if CONTENT_FILTER
277 /* Notify content filters as soon as we cannot send/receive data */
278 cfil_sock_notify_shutdown(so, SHUT_RDWR);
279 #endif /* CONTENT_FILTER */
280 }
281
282 /*
283 * This function will issue a wakeup like soisdisconnected but it will not
284 * notify the socket filters. This will avoid unlocking the socket
285 * in the midst of closing it.
286 */
287 void
288 sodisconnectwakeup(struct socket *so)
289 {
290 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
291 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
292 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
293 SO_FILT_HINT_CONNINFO_UPDATED);
294 wakeup((caddr_t)&so->so_timeo);
295 sowwakeup(so);
296 sorwakeup(so);
297
298 #if CONTENT_FILTER
299 /* Notify content filters as soon as we cannot send/receive data */
300 cfil_sock_notify_shutdown(so, SHUT_RDWR);
301 #endif /* CONTENT_FILTER */
302 }
303
304 /*
305 * When an attempt at a new connection is noted on a socket
306 * which accepts connections, sonewconn is called. If the
307 * connection is possible (subject to space constraints, etc.)
308 * then we allocate a new structure, propoerly linked into the
309 * data structure of the original socket, and return this.
310 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
311 */
312 static struct socket *
313 sonewconn_internal(struct socket *head, int connstatus)
314 {
315 int so_qlen, error = 0;
316 struct socket *so;
317 lck_mtx_t *mutex_held;
318
319 if (head->so_proto->pr_getlock != NULL) {
320 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
321 } else {
322 mutex_held = head->so_proto->pr_domain->dom_mtx;
323 }
324 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
325
326 if (!soqlencomp) {
327 /*
328 * This is the default case; so_qlen represents the
329 * sum of both incomplete and completed queues.
330 */
331 so_qlen = head->so_qlen;
332 } else {
333 /*
334 * When kern.ipc.soqlencomp is set to 1, so_qlen
335 * represents only the completed queue. Since we
336 * cannot let the incomplete queue goes unbounded
337 * (in case of SYN flood), we cap the incomplete
338 * queue length to at most somaxconn, and use that
339 * as so_qlen so that we fail immediately below.
340 */
341 so_qlen = head->so_qlen - head->so_incqlen;
342 if (head->so_incqlen > somaxconn) {
343 so_qlen = somaxconn;
344 }
345 }
346
347 if (so_qlen >=
348 (soqlimitcompat ? head->so_qlimit : (3 * head->so_qlimit / 2))) {
349 return (struct socket *)0;
350 }
351 so = soalloc(1, SOCK_DOM(head), head->so_type);
352 if (so == NULL) {
353 return (struct socket *)0;
354 }
355 /* check if head was closed during the soalloc */
356 if (head->so_proto == NULL) {
357 sodealloc(so);
358 return (struct socket *)0;
359 }
360
361 so->so_type = head->so_type;
362 so->so_options = head->so_options & ~SO_ACCEPTCONN;
363 so->so_linger = head->so_linger;
364 so->so_state = head->so_state | SS_NOFDREF;
365 so->so_proto = head->so_proto;
366 so->so_timeo = head->so_timeo;
367 so->so_pgid = head->so_pgid;
368 kauth_cred_ref(head->so_cred);
369 so->so_cred = head->so_cred;
370 so->last_pid = head->last_pid;
371 so->last_upid = head->last_upid;
372 memcpy(so->last_uuid, head->last_uuid, sizeof(so->last_uuid));
373 if (head->so_flags & SOF_DELEGATED) {
374 so->e_pid = head->e_pid;
375 so->e_upid = head->e_upid;
376 memcpy(so->e_uuid, head->e_uuid, sizeof(so->e_uuid));
377 }
378 /* inherit socket options stored in so_flags */
379 so->so_flags = head->so_flags &
380 (SOF_NOSIGPIPE | SOF_NOADDRAVAIL | SOF_REUSESHAREUID |
381 SOF_NOTIFYCONFLICT | SOF_BINDRANDOMPORT | SOF_NPX_SETOPTSHUT |
382 SOF_NODEFUNCT | SOF_PRIVILEGED_TRAFFIC_CLASS | SOF_NOTSENT_LOWAT |
383 SOF_USELRO | SOF_DELEGATED);
384 so->so_usecount = 1;
385 so->next_lock_lr = 0;
386 so->next_unlock_lr = 0;
387
388 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
389 so->so_rcv.sb_so = so->so_snd.sb_so = so;
390 TAILQ_INIT(&so->so_evlist);
391
392 #if CONFIG_MACF_SOCKET
393 mac_socket_label_associate_accept(head, so);
394 #endif
395
396 /* inherit traffic management properties of listener */
397 so->so_flags1 |=
398 head->so_flags1 & (SOF1_TRAFFIC_MGT_SO_BACKGROUND);
399 so->so_background_thread = head->so_background_thread;
400 so->so_traffic_class = head->so_traffic_class;
401
402 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
403 sodealloc(so);
404 return (struct socket *)0;
405 }
406 so->so_rcv.sb_flags |= (head->so_rcv.sb_flags & SB_USRSIZE);
407 so->so_snd.sb_flags |= (head->so_snd.sb_flags & SB_USRSIZE);
408
409 /*
410 * Must be done with head unlocked to avoid deadlock
411 * for protocol with per socket mutexes.
412 */
413 if (head->so_proto->pr_unlock) {
414 socket_unlock(head, 0);
415 }
416 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) ||
417 error) {
418 sodealloc(so);
419 if (head->so_proto->pr_unlock) {
420 socket_lock(head, 0);
421 }
422 return (struct socket *)0;
423 }
424 if (head->so_proto->pr_unlock) {
425 socket_lock(head, 0);
426 /*
427 * Radar 7385998 Recheck that the head is still accepting
428 * to avoid race condition when head is getting closed.
429 */
430 if ((head->so_options & SO_ACCEPTCONN) == 0) {
431 so->so_state &= ~SS_NOFDREF;
432 soclose(so);
433 return (struct socket *)0;
434 }
435 }
436
437 atomic_add_32(&so->so_proto->pr_domain->dom_refs, 1);
438
439 /* Insert in head appropriate lists */
440 so_acquire_accept_list(head, NULL);
441
442 so->so_head = head;
443
444 /*
445 * Since this socket is going to be inserted into the incomp
446 * queue, it can be picked up by another thread in
447 * tcp_dropdropablreq to get dropped before it is setup..
448 * To prevent this race, set in-progress flag which can be
449 * cleared later
450 */
451 so->so_flags |= SOF_INCOMP_INPROGRESS;
452
453 if (connstatus) {
454 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
455 so->so_state |= SS_COMP;
456 } else {
457 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
458 so->so_state |= SS_INCOMP;
459 head->so_incqlen++;
460 }
461 head->so_qlen++;
462
463 so_release_accept_list(head);
464
465 /* Attach socket filters for this protocol */
466 sflt_initsock(so);
467
468 if (connstatus) {
469 so->so_state |= connstatus;
470 sorwakeup(head);
471 wakeup((caddr_t)&head->so_timeo);
472 }
473 return so;
474 }
475
476
477 struct socket *
478 sonewconn(struct socket *head, int connstatus, const struct sockaddr *from)
479 {
480 int error = sflt_connectin(head, from);
481 if (error) {
482 return NULL;
483 }
484
485 return sonewconn_internal(head, connstatus);
486 }
487
488 /*
489 * Socantsendmore indicates that no more data will be sent on the
490 * socket; it would normally be applied to a socket when the user
491 * informs the system that no more data is to be sent, by the protocol
492 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
493 * will be received, and will normally be applied to the socket by a
494 * protocol when it detects that the peer will send no more data.
495 * Data queued for reading in the socket may yet be read.
496 */
497
498 void
499 socantsendmore(struct socket *so)
500 {
501 so->so_state |= SS_CANTSENDMORE;
502 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTSENDMORE);
503 sflt_notify(so, sock_evt_cantsendmore, NULL);
504 sowwakeup(so);
505 }
506
507 void
508 socantrcvmore(struct socket *so)
509 {
510 so->so_state |= SS_CANTRCVMORE;
511 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE);
512 sflt_notify(so, sock_evt_cantrecvmore, NULL);
513 sorwakeup(so);
514 }
515
516 /*
517 * Wait for data to arrive at/drain from a socket buffer.
518 */
519 int
520 sbwait(struct sockbuf *sb)
521 {
522 boolean_t nointr = (sb->sb_flags & SB_NOINTR);
523 void *lr_saved = __builtin_return_address(0);
524 struct socket *so = sb->sb_so;
525 lck_mtx_t *mutex_held;
526 struct timespec ts;
527 int error = 0;
528
529 if (so == NULL) {
530 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
531 __func__, sb, sb->sb_flags, lr_saved);
532 /* NOTREACHED */
533 } else if (so->so_usecount < 1) {
534 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
535 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
536 so->so_usecount, lr_saved, solockhistory_nr(so));
537 /* NOTREACHED */
538 }
539
540 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
541 error = EBADF;
542 if (so->so_flags & SOF_DEFUNCT) {
543 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
544 "(%d)\n", __func__, proc_selfpid(),
545 proc_best_name(current_proc()),
546 (uint64_t)VM_KERNEL_ADDRPERM(so),
547 SOCK_DOM(so), SOCK_TYPE(so), error);
548 }
549 return error;
550 }
551
552 if (so->so_proto->pr_getlock != NULL) {
553 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
554 } else {
555 mutex_held = so->so_proto->pr_domain->dom_mtx;
556 }
557
558 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
559
560 ts.tv_sec = sb->sb_timeo.tv_sec;
561 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
562
563 sb->sb_waiters++;
564 VERIFY(sb->sb_waiters != 0);
565
566 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
567 nointr ? PSOCK : PSOCK | PCATCH,
568 nointr ? "sbwait_nointr" : "sbwait", &ts);
569
570 VERIFY(sb->sb_waiters != 0);
571 sb->sb_waiters--;
572
573 if (so->so_usecount < 1) {
574 panic("%s: 2 sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
575 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
576 so->so_usecount, lr_saved, solockhistory_nr(so));
577 /* NOTREACHED */
578 }
579
580 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
581 error = EBADF;
582 if (so->so_flags & SOF_DEFUNCT) {
583 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
584 "(%d)\n", __func__, proc_selfpid(),
585 proc_best_name(current_proc()),
586 (uint64_t)VM_KERNEL_ADDRPERM(so),
587 SOCK_DOM(so), SOCK_TYPE(so), error);
588 }
589 }
590
591 return error;
592 }
593
594 void
595 sbwakeup(struct sockbuf *sb)
596 {
597 if (sb->sb_waiters > 0) {
598 wakeup((caddr_t)&sb->sb_cc);
599 }
600 }
601
602 /*
603 * Wakeup processes waiting on a socket buffer.
604 * Do asynchronous notification via SIGIO
605 * if the socket has the SS_ASYNC flag set.
606 */
607 void
608 sowakeup(struct socket *so, struct sockbuf *sb)
609 {
610 if (so->so_flags & SOF_DEFUNCT) {
611 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] si 0x%x, "
612 "fl 0x%x [%s]\n", __func__, proc_selfpid(),
613 proc_best_name(current_proc()),
614 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
615 SOCK_TYPE(so), (uint32_t)sb->sb_sel.si_flags, sb->sb_flags,
616 (sb->sb_flags & SB_RECV) ? "rcv" : "snd");
617 }
618
619 sb->sb_flags &= ~SB_SEL;
620 selwakeup(&sb->sb_sel);
621 sbwakeup(sb);
622 if (so->so_state & SS_ASYNC) {
623 if (so->so_pgid < 0) {
624 gsignal(-so->so_pgid, SIGIO);
625 } else if (so->so_pgid > 0) {
626 proc_signal(so->so_pgid, SIGIO);
627 }
628 }
629 if (sb->sb_flags & SB_KNOTE) {
630 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
631 }
632 if (sb->sb_flags & SB_UPCALL) {
633 void (*sb_upcall)(struct socket *, void *, int);
634 caddr_t sb_upcallarg;
635 int lock = !(sb->sb_flags & SB_UPCALL_LOCK);
636
637 sb_upcall = sb->sb_upcall;
638 sb_upcallarg = sb->sb_upcallarg;
639 /* Let close know that we're about to do an upcall */
640 so->so_upcallusecount++;
641
642 if (lock) {
643 socket_unlock(so, 0);
644 }
645 (*sb_upcall)(so, sb_upcallarg, M_DONTWAIT);
646 if (lock) {
647 socket_lock(so, 0);
648 }
649
650 so->so_upcallusecount--;
651 /* Tell close that it's safe to proceed */
652 if ((so->so_flags & SOF_CLOSEWAIT) &&
653 so->so_upcallusecount == 0) {
654 wakeup((caddr_t)&so->so_upcallusecount);
655 }
656 }
657 #if CONTENT_FILTER
658 /*
659 * Trap disconnection events for content filters
660 */
661 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
662 if ((sb->sb_flags & SB_RECV)) {
663 if (so->so_state & (SS_CANTRCVMORE)) {
664 cfil_sock_notify_shutdown(so, SHUT_RD);
665 }
666 } else {
667 if (so->so_state & (SS_CANTSENDMORE)) {
668 cfil_sock_notify_shutdown(so, SHUT_WR);
669 }
670 }
671 }
672 #endif /* CONTENT_FILTER */
673 }
674
675 /*
676 * Socket buffer (struct sockbuf) utility routines.
677 *
678 * Each socket contains two socket buffers: one for sending data and
679 * one for receiving data. Each buffer contains a queue of mbufs,
680 * information about the number of mbufs and amount of data in the
681 * queue, and other fields allowing select() statements and notification
682 * on data availability to be implemented.
683 *
684 * Data stored in a socket buffer is maintained as a list of records.
685 * Each record is a list of mbufs chained together with the m_next
686 * field. Records are chained together with the m_nextpkt field. The upper
687 * level routine soreceive() expects the following conventions to be
688 * observed when placing information in the receive buffer:
689 *
690 * 1. If the protocol requires each message be preceded by the sender's
691 * name, then a record containing that name must be present before
692 * any associated data (mbuf's must be of type MT_SONAME).
693 * 2. If the protocol supports the exchange of ``access rights'' (really
694 * just additional data associated with the message), and there are
695 * ``rights'' to be received, then a record containing this data
696 * should be present (mbuf's must be of type MT_RIGHTS).
697 * 3. If a name or rights record exists, then it must be followed by
698 * a data record, perhaps of zero length.
699 *
700 * Before using a new socket structure it is first necessary to reserve
701 * buffer space to the socket, by calling sbreserve(). This should commit
702 * some of the available buffer space in the system buffer pool for the
703 * socket (currently, it does nothing but enforce limits). The space
704 * should be released by calling sbrelease() when the socket is destroyed.
705 */
706
707 /*
708 * Returns: 0 Success
709 * ENOBUFS
710 */
711 int
712 soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc)
713 {
714 /*
715 * We do not want to fail the creation of a socket
716 * when kern.ipc.maxsockbuf is less than the
717 * default socket buffer socket size of the protocol
718 * so force the buffer sizes to be at most the
719 * limit enforced by sbreserve()
720 */
721 uint64_t maxcc = (uint64_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
722 if (sndcc > maxcc) {
723 sndcc = maxcc;
724 }
725 if (rcvcc > maxcc) {
726 rcvcc = maxcc;
727 }
728 if (sbreserve(&so->so_snd, sndcc) == 0) {
729 goto bad;
730 } else {
731 so->so_snd.sb_idealsize = sndcc;
732 }
733
734 if (sbreserve(&so->so_rcv, rcvcc) == 0) {
735 goto bad2;
736 } else {
737 so->so_rcv.sb_idealsize = rcvcc;
738 }
739
740 if (so->so_rcv.sb_lowat == 0) {
741 so->so_rcv.sb_lowat = 1;
742 }
743 if (so->so_snd.sb_lowat == 0) {
744 so->so_snd.sb_lowat = MCLBYTES;
745 }
746 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) {
747 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
748 }
749 return 0;
750 bad2:
751 so->so_snd.sb_flags &= ~SB_SEL;
752 selthreadclear(&so->so_snd.sb_sel);
753 sbrelease(&so->so_snd);
754 bad:
755 return ENOBUFS;
756 }
757
758 void
759 soreserve_preconnect(struct socket *so, unsigned int pre_cc)
760 {
761 /* As of now, same bytes for both preconnect read and write */
762 so->so_snd.sb_preconn_hiwat = pre_cc;
763 so->so_rcv.sb_preconn_hiwat = pre_cc;
764 }
765
766 /*
767 * Allot mbufs to a sockbuf.
768 * Attempt to scale mbmax so that mbcnt doesn't become limiting
769 * if buffering efficiency is near the normal case.
770 */
771 int
772 sbreserve(struct sockbuf *sb, u_int32_t cc)
773 {
774 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES)) {
775 return 0;
776 }
777 sb->sb_hiwat = cc;
778 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
779 if (sb->sb_lowat > sb->sb_hiwat) {
780 sb->sb_lowat = sb->sb_hiwat;
781 }
782 return 1;
783 }
784
785 /*
786 * Free mbufs held by a socket, and reserved mbuf space.
787 */
788 /* WARNING needs to do selthreadclear() before calling this */
789 void
790 sbrelease(struct sockbuf *sb)
791 {
792 sbflush(sb);
793 sb->sb_hiwat = 0;
794 sb->sb_mbmax = 0;
795 }
796
797 /*
798 * Routines to add and remove
799 * data from an mbuf queue.
800 *
801 * The routines sbappend() or sbappendrecord() are normally called to
802 * append new mbufs to a socket buffer, after checking that adequate
803 * space is available, comparing the function sbspace() with the amount
804 * of data to be added. sbappendrecord() differs from sbappend() in
805 * that data supplied is treated as the beginning of a new record.
806 * To place a sender's address, optional access rights, and data in a
807 * socket receive buffer, sbappendaddr() should be used. To place
808 * access rights and data in a socket receive buffer, sbappendrights()
809 * should be used. In either case, the new data begins a new record.
810 * Note that unlike sbappend() and sbappendrecord(), these routines check
811 * for the caller that there will be enough space to store the data.
812 * Each fails if there is not enough space, or if it cannot find mbufs
813 * to store additional information in.
814 *
815 * Reliable protocols may use the socket send buffer to hold data
816 * awaiting acknowledgement. Data is normally copied from a socket
817 * send buffer in a protocol with m_copy for output to a peer,
818 * and then removing the data from the socket buffer with sbdrop()
819 * or sbdroprecord() when the data is acknowledged by the peer.
820 */
821
822 /*
823 * Append mbuf chain m to the last record in the
824 * socket buffer sb. The additional space associated
825 * the mbuf chain is recorded in sb. Empty mbufs are
826 * discarded and mbufs are compacted where possible.
827 */
828 int
829 sbappend(struct sockbuf *sb, struct mbuf *m)
830 {
831 struct socket *so = sb->sb_so;
832
833 if (m == NULL || (sb->sb_flags & SB_DROP)) {
834 if (m != NULL) {
835 m_freem(m);
836 }
837 return 0;
838 }
839
840 SBLASTRECORDCHK(sb, "sbappend 1");
841
842 if (sb->sb_lastrecord != NULL && (sb->sb_mbtail->m_flags & M_EOR)) {
843 return sbappendrecord(sb, m);
844 }
845
846 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
847 int error = sflt_data_in(so, NULL, &m, NULL, 0);
848 SBLASTRECORDCHK(sb, "sbappend 2");
849
850 #if CONTENT_FILTER
851 if (error == 0) {
852 error = cfil_sock_data_in(so, NULL, m, NULL, 0);
853 }
854 #endif /* CONTENT_FILTER */
855
856 if (error != 0) {
857 if (error != EJUSTRETURN) {
858 m_freem(m);
859 }
860 return 0;
861 }
862 } else if (m) {
863 m->m_flags &= ~M_SKIPCFIL;
864 }
865
866 /* If this is the first record, it's also the last record */
867 if (sb->sb_lastrecord == NULL) {
868 sb->sb_lastrecord = m;
869 }
870
871 sbcompress(sb, m, sb->sb_mbtail);
872 SBLASTRECORDCHK(sb, "sbappend 3");
873 return 1;
874 }
875
876 /*
877 * Similar to sbappend, except that this is optimized for stream sockets.
878 */
879 int
880 sbappendstream(struct sockbuf *sb, struct mbuf *m)
881 {
882 struct socket *so = sb->sb_so;
883
884 if (m == NULL || (sb->sb_flags & SB_DROP)) {
885 if (m != NULL) {
886 m_freem(m);
887 }
888 return 0;
889 }
890
891 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
892 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
893 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
894 /* NOTREACHED */
895 }
896
897 SBLASTMBUFCHK(sb, __func__);
898
899 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
900 int error = sflt_data_in(so, NULL, &m, NULL, 0);
901 SBLASTRECORDCHK(sb, "sbappendstream 1");
902
903 #if CONTENT_FILTER
904 if (error == 0) {
905 error = cfil_sock_data_in(so, NULL, m, NULL, 0);
906 }
907 #endif /* CONTENT_FILTER */
908
909 if (error != 0) {
910 if (error != EJUSTRETURN) {
911 m_freem(m);
912 }
913 return 0;
914 }
915 } else if (m) {
916 m->m_flags &= ~M_SKIPCFIL;
917 }
918
919 sbcompress(sb, m, sb->sb_mbtail);
920 sb->sb_lastrecord = sb->sb_mb;
921 SBLASTRECORDCHK(sb, "sbappendstream 2");
922 return 1;
923 }
924
925 #ifdef SOCKBUF_DEBUG
926 void
927 sbcheck(struct sockbuf *sb)
928 {
929 struct mbuf *m;
930 struct mbuf *n = 0;
931 u_int32_t len = 0, mbcnt = 0;
932 lck_mtx_t *mutex_held;
933
934 if (sb->sb_so->so_proto->pr_getlock != NULL) {
935 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
936 } else {
937 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
938 }
939
940 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
941
942 if (sbchecking == 0) {
943 return;
944 }
945
946 for (m = sb->sb_mb; m; m = n) {
947 n = m->m_nextpkt;
948 for (; m; m = m->m_next) {
949 len += m->m_len;
950 mbcnt += MSIZE;
951 /* XXX pretty sure this is bogus */
952 if (m->m_flags & M_EXT) {
953 mbcnt += m->m_ext.ext_size;
954 }
955 }
956 }
957 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
958 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
959 mbcnt, sb->sb_mbcnt);
960 }
961 }
962 #endif
963
964 void
965 sblastrecordchk(struct sockbuf *sb, const char *where)
966 {
967 struct mbuf *m = sb->sb_mb;
968
969 while (m && m->m_nextpkt) {
970 m = m->m_nextpkt;
971 }
972
973 if (m != sb->sb_lastrecord) {
974 printf("sblastrecordchk: mb 0x%llx lastrecord 0x%llx "
975 "last 0x%llx\n",
976 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
977 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_lastrecord),
978 (uint64_t)VM_KERNEL_ADDRPERM(m));
979 printf("packet chain:\n");
980 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
981 printf("\t0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(m));
982 }
983 panic("sblastrecordchk from %s", where);
984 }
985 }
986
987 void
988 sblastmbufchk(struct sockbuf *sb, const char *where)
989 {
990 struct mbuf *m = sb->sb_mb;
991 struct mbuf *n;
992
993 while (m && m->m_nextpkt) {
994 m = m->m_nextpkt;
995 }
996
997 while (m && m->m_next) {
998 m = m->m_next;
999 }
1000
1001 if (m != sb->sb_mbtail) {
1002 printf("sblastmbufchk: mb 0x%llx mbtail 0x%llx last 0x%llx\n",
1003 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
1004 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mbtail),
1005 (uint64_t)VM_KERNEL_ADDRPERM(m));
1006 printf("packet tree:\n");
1007 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
1008 printf("\t");
1009 for (n = m; n != NULL; n = n->m_next) {
1010 printf("0x%llx ",
1011 (uint64_t)VM_KERNEL_ADDRPERM(n));
1012 }
1013 printf("\n");
1014 }
1015 panic("sblastmbufchk from %s", where);
1016 }
1017 }
1018
1019 /*
1020 * Similar to sbappend, except the mbuf chain begins a new record.
1021 */
1022 int
1023 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1024 {
1025 struct mbuf *m;
1026 int space = 0;
1027
1028 if (m0 == NULL || (sb->sb_flags & SB_DROP)) {
1029 if (m0 != NULL) {
1030 m_freem(m0);
1031 }
1032 return 0;
1033 }
1034
1035 for (m = m0; m != NULL; m = m->m_next) {
1036 space += m->m_len;
1037 }
1038
1039 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
1040 m_freem(m0);
1041 return 0;
1042 }
1043
1044 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1045 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
1046 sock_data_filt_flag_record);
1047
1048 #if CONTENT_FILTER
1049 if (error == 0) {
1050 error = cfil_sock_data_in(sb->sb_so, NULL, m0, NULL, 0);
1051 }
1052 #endif /* CONTENT_FILTER */
1053
1054 if (error != 0) {
1055 SBLASTRECORDCHK(sb, "sbappendrecord 1");
1056 if (error != EJUSTRETURN) {
1057 m_freem(m0);
1058 }
1059 return 0;
1060 }
1061 } else if (m0) {
1062 m0->m_flags &= ~M_SKIPCFIL;
1063 }
1064
1065 /*
1066 * Note this permits zero length records.
1067 */
1068 sballoc(sb, m0);
1069 SBLASTRECORDCHK(sb, "sbappendrecord 2");
1070 if (sb->sb_lastrecord != NULL) {
1071 sb->sb_lastrecord->m_nextpkt = m0;
1072 } else {
1073 sb->sb_mb = m0;
1074 }
1075 sb->sb_lastrecord = m0;
1076 sb->sb_mbtail = m0;
1077
1078 m = m0->m_next;
1079 m0->m_next = 0;
1080 if (m && (m0->m_flags & M_EOR)) {
1081 m0->m_flags &= ~M_EOR;
1082 m->m_flags |= M_EOR;
1083 }
1084 sbcompress(sb, m, m0);
1085 SBLASTRECORDCHK(sb, "sbappendrecord 3");
1086 return 1;
1087 }
1088
1089 /*
1090 * As above except that OOB data
1091 * is inserted at the beginning of the sockbuf,
1092 * but after any other OOB data.
1093 */
1094 int
1095 sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
1096 {
1097 struct mbuf *m;
1098 struct mbuf **mp;
1099
1100 if (m0 == 0) {
1101 return 0;
1102 }
1103
1104 SBLASTRECORDCHK(sb, "sbinsertoob 1");
1105
1106 if ((sb->sb_flags & SB_RECV && !(m0->m_flags & M_SKIPCFIL)) != 0) {
1107 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
1108 sock_data_filt_flag_oob);
1109
1110 SBLASTRECORDCHK(sb, "sbinsertoob 2");
1111
1112 #if CONTENT_FILTER
1113 if (error == 0) {
1114 error = cfil_sock_data_in(sb->sb_so, NULL, m0, NULL, 0);
1115 }
1116 #endif /* CONTENT_FILTER */
1117
1118 if (error) {
1119 if (error != EJUSTRETURN) {
1120 m_freem(m0);
1121 }
1122 return 0;
1123 }
1124 } else if (m0) {
1125 m0->m_flags &= ~M_SKIPCFIL;
1126 }
1127
1128 for (mp = &sb->sb_mb; *mp; mp = &((*mp)->m_nextpkt)) {
1129 m = *mp;
1130 again:
1131 switch (m->m_type) {
1132 case MT_OOBDATA:
1133 continue; /* WANT next train */
1134
1135 case MT_CONTROL:
1136 m = m->m_next;
1137 if (m) {
1138 goto again; /* inspect THIS train further */
1139 }
1140 }
1141 break;
1142 }
1143 /*
1144 * Put the first mbuf on the queue.
1145 * Note this permits zero length records.
1146 */
1147 sballoc(sb, m0);
1148 m0->m_nextpkt = *mp;
1149 if (*mp == NULL) {
1150 /* m0 is actually the new tail */
1151 sb->sb_lastrecord = m0;
1152 }
1153 *mp = m0;
1154 m = m0->m_next;
1155 m0->m_next = 0;
1156 if (m && (m0->m_flags & M_EOR)) {
1157 m0->m_flags &= ~M_EOR;
1158 m->m_flags |= M_EOR;
1159 }
1160 sbcompress(sb, m, m0);
1161 SBLASTRECORDCHK(sb, "sbinsertoob 3");
1162 return 1;
1163 }
1164
1165 /*
1166 * Concatenate address (optional), control (optional) and data into one
1167 * single mbuf chain. If sockbuf *sb is passed in, space check will be
1168 * performed.
1169 *
1170 * Returns: mbuf chain pointer if succeeded, NULL if failed
1171 */
1172 struct mbuf *
1173 sbconcat_mbufs(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, struct mbuf *control)
1174 {
1175 struct mbuf *m = NULL, *n = NULL;
1176 int space = 0;
1177
1178 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
1179 panic("sbconcat_mbufs");
1180 }
1181
1182 if (m0) {
1183 space += m0->m_pkthdr.len;
1184 }
1185 for (n = control; n; n = n->m_next) {
1186 space += n->m_len;
1187 if (n->m_next == 0) { /* keep pointer to last control buf */
1188 break;
1189 }
1190 }
1191
1192 if (asa != NULL) {
1193 if (asa->sa_len > MLEN) {
1194 return NULL;
1195 }
1196 space += asa->sa_len;
1197 }
1198
1199 if (sb != NULL && space > sbspace(sb)) {
1200 return NULL;
1201 }
1202
1203 if (n) {
1204 n->m_next = m0; /* concatenate data to control */
1205 } else {
1206 control = m0;
1207 }
1208
1209 if (asa != NULL) {
1210 MGET(m, M_DONTWAIT, MT_SONAME);
1211 if (m == 0) {
1212 if (n) {
1213 /* unchain control and data if necessary */
1214 n->m_next = NULL;
1215 }
1216 return NULL;
1217 }
1218 m->m_len = asa->sa_len;
1219 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
1220
1221 m->m_next = control;
1222 } else {
1223 m = control;
1224 }
1225
1226 return m;
1227 }
1228
1229 /*
1230 * Queue mbuf chain to the receive queue of a socket.
1231 * Parameter space is the total len of the mbuf chain.
1232 * If passed in, sockbuf space will be checked.
1233 *
1234 * Returns: 0 Invalid mbuf chain
1235 * 1 Success
1236 */
1237 int
1238 sbappendchain(struct sockbuf *sb, struct mbuf *m, int space)
1239 {
1240 struct mbuf *n, *nlast;
1241
1242 if (m == NULL) {
1243 return 0;
1244 }
1245
1246 if (space != 0 && space > sbspace(sb)) {
1247 return 0;
1248 }
1249
1250 for (n = m; n->m_next != NULL; n = n->m_next) {
1251 sballoc(sb, n);
1252 }
1253 sballoc(sb, n);
1254 nlast = n;
1255
1256 if (sb->sb_lastrecord != NULL) {
1257 sb->sb_lastrecord->m_nextpkt = m;
1258 } else {
1259 sb->sb_mb = m;
1260 }
1261 sb->sb_lastrecord = m;
1262 sb->sb_mbtail = nlast;
1263
1264 SBLASTMBUFCHK(sb, __func__);
1265 SBLASTRECORDCHK(sb, "sbappendadddr 2");
1266
1267 postevent(0, sb, EV_RWBYTES);
1268 return 1;
1269 }
1270
1271 /*
1272 * Returns: 0 Error: No space/out of mbufs/etc.
1273 * 1 Success
1274 *
1275 * Imputed: (*error_out) errno for error
1276 * ENOBUFS
1277 * sflt_data_in:??? [whatever a filter author chooses]
1278 */
1279 int
1280 sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
1281 struct mbuf *control, int *error_out)
1282 {
1283 int result = 0;
1284 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1285 struct mbuf *mbuf_chain = NULL;
1286
1287 if (error_out) {
1288 *error_out = 0;
1289 }
1290
1291 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
1292 panic("sbappendaddrorfree");
1293 }
1294
1295 if (sb->sb_flags & SB_DROP) {
1296 if (m0 != NULL) {
1297 m_freem(m0);
1298 }
1299 if (control != NULL && !sb_unix) {
1300 m_freem(control);
1301 }
1302 if (error_out != NULL) {
1303 *error_out = EINVAL;
1304 }
1305 return 0;
1306 }
1307
1308 /* Call socket data in filters */
1309 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1310 int error;
1311 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0);
1312 SBLASTRECORDCHK(sb, __func__);
1313
1314 #if CONTENT_FILTER
1315 if (error == 0) {
1316 error = cfil_sock_data_in(sb->sb_so, asa, m0, control,
1317 0);
1318 }
1319 #endif /* CONTENT_FILTER */
1320
1321 if (error) {
1322 if (error != EJUSTRETURN) {
1323 if (m0) {
1324 m_freem(m0);
1325 }
1326 if (control != NULL && !sb_unix) {
1327 m_freem(control);
1328 }
1329 if (error_out) {
1330 *error_out = error;
1331 }
1332 }
1333 return 0;
1334 }
1335 } else if (m0) {
1336 m0->m_flags &= ~M_SKIPCFIL;
1337 }
1338
1339 mbuf_chain = sbconcat_mbufs(sb, asa, m0, control);
1340 SBLASTRECORDCHK(sb, "sbappendadddr 1");
1341 result = sbappendchain(sb, mbuf_chain, 0);
1342 if (result == 0) {
1343 if (m0) {
1344 m_freem(m0);
1345 }
1346 if (control != NULL && !sb_unix) {
1347 m_freem(control);
1348 }
1349 if (error_out) {
1350 *error_out = ENOBUFS;
1351 }
1352 }
1353
1354 return result;
1355 }
1356
1357 inline boolean_t
1358 is_cmsg_valid(struct mbuf *control, struct cmsghdr *cmsg)
1359 {
1360 if (cmsg == NULL) {
1361 return FALSE;
1362 }
1363
1364 if (cmsg->cmsg_len < sizeof(struct cmsghdr)) {
1365 return FALSE;
1366 }
1367
1368 if ((uint8_t *)control->m_data >= (uint8_t *)cmsg + cmsg->cmsg_len) {
1369 return FALSE;
1370 }
1371
1372 if ((uint8_t *)control->m_data + control->m_len <
1373 (uint8_t *)cmsg + cmsg->cmsg_len) {
1374 return FALSE;
1375 }
1376
1377 return TRUE;
1378 }
1379
1380 static int
1381 sbappendcontrol_internal(struct sockbuf *sb, struct mbuf *m0,
1382 struct mbuf *control)
1383 {
1384 struct mbuf *m, *mlast, *n;
1385 int space = 0;
1386
1387 if (control == 0) {
1388 panic("sbappendcontrol");
1389 }
1390
1391 for (m = control;; m = m->m_next) {
1392 space += m->m_len;
1393 if (m->m_next == 0) {
1394 break;
1395 }
1396 }
1397 n = m; /* save pointer to last control buffer */
1398 for (m = m0; m; m = m->m_next) {
1399 space += m->m_len;
1400 }
1401 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
1402 return 0;
1403 }
1404 n->m_next = m0; /* concatenate data to control */
1405 SBLASTRECORDCHK(sb, "sbappendcontrol 1");
1406
1407 for (m = control; m->m_next != NULL; m = m->m_next) {
1408 sballoc(sb, m);
1409 }
1410 sballoc(sb, m);
1411 mlast = m;
1412
1413 if (sb->sb_lastrecord != NULL) {
1414 sb->sb_lastrecord->m_nextpkt = control;
1415 } else {
1416 sb->sb_mb = control;
1417 }
1418 sb->sb_lastrecord = control;
1419 sb->sb_mbtail = mlast;
1420
1421 SBLASTMBUFCHK(sb, __func__);
1422 SBLASTRECORDCHK(sb, "sbappendcontrol 2");
1423
1424 postevent(0, sb, EV_RWBYTES);
1425 return 1;
1426 }
1427
1428 int
1429 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
1430 int *error_out)
1431 {
1432 int result = 0;
1433 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1434
1435 if (error_out) {
1436 *error_out = 0;
1437 }
1438
1439 if (sb->sb_flags & SB_DROP) {
1440 if (m0 != NULL) {
1441 m_freem(m0);
1442 }
1443 if (control != NULL && !sb_unix) {
1444 m_freem(control);
1445 }
1446 if (error_out != NULL) {
1447 *error_out = EINVAL;
1448 }
1449 return 0;
1450 }
1451
1452 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1453 int error;
1454
1455 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0);
1456 SBLASTRECORDCHK(sb, __func__);
1457
1458 #if CONTENT_FILTER
1459 if (error == 0) {
1460 error = cfil_sock_data_in(sb->sb_so, NULL, m0, control,
1461 0);
1462 }
1463 #endif /* CONTENT_FILTER */
1464
1465 if (error) {
1466 if (error != EJUSTRETURN) {
1467 if (m0) {
1468 m_freem(m0);
1469 }
1470 if (control != NULL && !sb_unix) {
1471 m_freem(control);
1472 }
1473 if (error_out) {
1474 *error_out = error;
1475 }
1476 }
1477 return 0;
1478 }
1479 } else if (m0) {
1480 m0->m_flags &= ~M_SKIPCFIL;
1481 }
1482
1483 result = sbappendcontrol_internal(sb, m0, control);
1484 if (result == 0) {
1485 if (m0) {
1486 m_freem(m0);
1487 }
1488 if (control != NULL && !sb_unix) {
1489 m_freem(control);
1490 }
1491 if (error_out) {
1492 *error_out = ENOBUFS;
1493 }
1494 }
1495
1496 return result;
1497 }
1498
1499 /*
1500 * Append a contiguous TCP data blob with TCP sequence number as control data
1501 * as a new msg to the receive socket buffer.
1502 */
1503 int
1504 sbappendmsgstream_rcv(struct sockbuf *sb, struct mbuf *m, uint32_t seqnum,
1505 int unordered)
1506 {
1507 struct mbuf *m_eor = NULL;
1508 u_int32_t data_len = 0;
1509 int ret = 0;
1510 struct socket *so = sb->sb_so;
1511
1512 if (m == NULL) {
1513 return 0;
1514 }
1515
1516 VERIFY((m->m_flags & M_PKTHDR) && m_pktlen(m) > 0);
1517 VERIFY(so->so_msg_state != NULL);
1518 VERIFY(sb->sb_flags & SB_RECV);
1519
1520 /* Keep the TCP sequence number in the mbuf pkthdr */
1521 m->m_pkthdr.msg_seq = seqnum;
1522
1523 /* find last mbuf and set M_EOR */
1524 for (m_eor = m;; m_eor = m_eor->m_next) {
1525 /*
1526 * If the msg is unordered, we need to account for
1527 * these bytes in receive socket buffer size. Otherwise,
1528 * the receive window advertised will shrink because
1529 * of the additional unordered bytes added to the
1530 * receive buffer.
1531 */
1532 if (unordered) {
1533 m_eor->m_flags |= M_UNORDERED_DATA;
1534 data_len += m_eor->m_len;
1535 so->so_msg_state->msg_uno_bytes += m_eor->m_len;
1536 } else {
1537 m_eor->m_flags &= ~M_UNORDERED_DATA;
1538 }
1539 if (m_eor->m_next == NULL) {
1540 break;
1541 }
1542 }
1543
1544 /* set EOR flag at end of byte blob */
1545 m_eor->m_flags |= M_EOR;
1546
1547 /* expand the receive socket buffer to allow unordered data */
1548 if (unordered && !sbreserve(sb, sb->sb_hiwat + data_len)) {
1549 /*
1550 * Could not allocate memory for unordered data, it
1551 * means this packet will have to be delivered in order
1552 */
1553 printf("%s: could not reserve space for unordered data\n",
1554 __func__);
1555 }
1556
1557 if (!unordered && (sb->sb_mbtail != NULL) &&
1558 !(sb->sb_mbtail->m_flags & M_UNORDERED_DATA)) {
1559 sb->sb_mbtail->m_flags &= ~M_EOR;
1560 sbcompress(sb, m, sb->sb_mbtail);
1561 ret = 1;
1562 } else {
1563 ret = sbappendrecord(sb, m);
1564 }
1565 VERIFY(sb->sb_mbtail->m_flags & M_EOR);
1566 return ret;
1567 }
1568
1569 /*
1570 * TCP streams have message based out of order delivery support, or have
1571 * Multipath TCP support, or are regular TCP sockets
1572 */
1573 int
1574 sbappendstream_rcvdemux(struct socket *so, struct mbuf *m, uint32_t seqnum,
1575 int unordered)
1576 {
1577 int ret = 0;
1578
1579 if ((m != NULL) &&
1580 m_pktlen(m) <= 0 &&
1581 !((so->so_flags & SOF_MP_SUBFLOW) &&
1582 (m->m_flags & M_PKTHDR) &&
1583 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN))) {
1584 m_freem(m);
1585 return ret;
1586 }
1587
1588 if (so->so_flags & SOF_ENABLE_MSGS) {
1589 ret = sbappendmsgstream_rcv(&so->so_rcv, m, seqnum, unordered);
1590 }
1591 #if MPTCP
1592 else if (so->so_flags & SOF_MP_SUBFLOW) {
1593 ret = sbappendmptcpstream_rcv(&so->so_rcv, m);
1594 }
1595 #endif /* MPTCP */
1596 else {
1597 ret = sbappendstream(&so->so_rcv, m);
1598 }
1599 return ret;
1600 }
1601
1602 #if MPTCP
1603 int
1604 sbappendmptcpstream_rcv(struct sockbuf *sb, struct mbuf *m)
1605 {
1606 struct socket *so = sb->sb_so;
1607
1608 VERIFY(m == NULL || (m->m_flags & M_PKTHDR));
1609 /* SB_NOCOMPRESS must be set prevent loss of M_PKTHDR data */
1610 VERIFY((sb->sb_flags & (SB_RECV | SB_NOCOMPRESS)) ==
1611 (SB_RECV | SB_NOCOMPRESS));
1612
1613 if (m == NULL || m_pktlen(m) == 0 || (sb->sb_flags & SB_DROP) ||
1614 (so->so_state & SS_CANTRCVMORE)) {
1615 if (m && (m->m_flags & M_PKTHDR) &&
1616 m_pktlen(m) == 0 &&
1617 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) {
1618 mptcp_input(tptomptp(sototcpcb(so))->mpt_mpte, m);
1619 return 1;
1620 } else if (m != NULL) {
1621 m_freem(m);
1622 }
1623 return 0;
1624 }
1625 /* the socket is not closed, so SOF_MP_SUBFLOW must be set */
1626 VERIFY(so->so_flags & SOF_MP_SUBFLOW);
1627
1628 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
1629 panic("%s: nexpkt %p || mb %p != lastrecord %p\n", __func__,
1630 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
1631 /* NOTREACHED */
1632 }
1633
1634 SBLASTMBUFCHK(sb, __func__);
1635
1636 /* No filter support (SB_RECV) on mptcp subflow sockets */
1637
1638 sbcompress(sb, m, sb->sb_mbtail);
1639 sb->sb_lastrecord = sb->sb_mb;
1640 SBLASTRECORDCHK(sb, __func__);
1641 return 1;
1642 }
1643 #endif /* MPTCP */
1644
1645 /*
1646 * Append message to send socket buffer based on priority.
1647 */
1648 int
1649 sbappendmsg_snd(struct sockbuf *sb, struct mbuf *m)
1650 {
1651 struct socket *so = sb->sb_so;
1652 struct msg_priq *priq;
1653 int set_eor = 0;
1654
1655 VERIFY(so->so_msg_state != NULL);
1656
1657 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
1658 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
1659 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
1660 }
1661
1662 SBLASTMBUFCHK(sb, __func__);
1663
1664 if (m == NULL || (sb->sb_flags & SB_DROP) || so->so_msg_state == NULL) {
1665 if (m != NULL) {
1666 m_freem(m);
1667 }
1668 return 0;
1669 }
1670
1671 priq = &so->so_msg_state->msg_priq[m->m_pkthdr.msg_pri];
1672
1673 /* note if we need to propogate M_EOR to the last mbuf */
1674 if (m->m_flags & M_EOR) {
1675 set_eor = 1;
1676
1677 /* Reset M_EOR from the first mbuf */
1678 m->m_flags &= ~(M_EOR);
1679 }
1680
1681 if (priq->msgq_head == NULL) {
1682 VERIFY(priq->msgq_tail == NULL && priq->msgq_lastmsg == NULL);
1683 priq->msgq_head = priq->msgq_lastmsg = m;
1684 } else {
1685 VERIFY(priq->msgq_tail->m_next == NULL);
1686
1687 /* Check if the last message has M_EOR flag set */
1688 if (priq->msgq_tail->m_flags & M_EOR) {
1689 /* Insert as a new message */
1690 priq->msgq_lastmsg->m_nextpkt = m;
1691
1692 /* move the lastmsg pointer */
1693 priq->msgq_lastmsg = m;
1694 } else {
1695 /* Append to the existing message */
1696 priq->msgq_tail->m_next = m;
1697 }
1698 }
1699
1700 /* Update accounting and the queue tail pointer */
1701
1702 while (m->m_next != NULL) {
1703 sballoc(sb, m);
1704 priq->msgq_bytes += m->m_len;
1705 m = m->m_next;
1706 }
1707 sballoc(sb, m);
1708 priq->msgq_bytes += m->m_len;
1709
1710 if (set_eor) {
1711 m->m_flags |= M_EOR;
1712
1713 /*
1714 * Since the user space can not write a new msg
1715 * without completing the previous one, we can
1716 * reset this flag to start sending again.
1717 */
1718 priq->msgq_flags &= ~(MSGQ_MSG_NOTDONE);
1719 }
1720
1721 priq->msgq_tail = m;
1722
1723 SBLASTRECORDCHK(sb, "sbappendstream 2");
1724 postevent(0, sb, EV_RWBYTES);
1725 return 1;
1726 }
1727
1728 /*
1729 * Pull data from priority queues to the serial snd queue
1730 * right before sending.
1731 */
1732 void
1733 sbpull_unordered_data(struct socket *so, int32_t off, int32_t len)
1734 {
1735 int32_t topull, i;
1736 struct msg_priq *priq = NULL;
1737
1738 VERIFY(so->so_msg_state != NULL);
1739
1740 topull = (off + len) - so->so_msg_state->msg_serial_bytes;
1741
1742 i = MSG_PRI_MAX;
1743 while (i >= MSG_PRI_MIN && topull > 0) {
1744 struct mbuf *m = NULL, *mqhead = NULL, *mend = NULL;
1745 priq = &so->so_msg_state->msg_priq[i];
1746 if ((priq->msgq_flags & MSGQ_MSG_NOTDONE) &&
1747 priq->msgq_head == NULL) {
1748 /*
1749 * We were in the middle of sending
1750 * a message and we have not seen the
1751 * end of it.
1752 */
1753 VERIFY(priq->msgq_lastmsg == NULL &&
1754 priq->msgq_tail == NULL);
1755 return;
1756 }
1757 if (priq->msgq_head != NULL) {
1758 int32_t bytes = 0, topull_tmp = topull;
1759 /*
1760 * We found a msg while scanning the priority
1761 * queue from high to low priority.
1762 */
1763 m = priq->msgq_head;
1764 mqhead = m;
1765 mend = m;
1766
1767 /*
1768 * Move bytes from the priority queue to the
1769 * serial queue. Compute the number of bytes
1770 * being added.
1771 */
1772 while (mqhead->m_next != NULL && topull_tmp > 0) {
1773 bytes += mqhead->m_len;
1774 topull_tmp -= mqhead->m_len;
1775 mend = mqhead;
1776 mqhead = mqhead->m_next;
1777 }
1778
1779 if (mqhead->m_next == NULL) {
1780 /*
1781 * If we have only one more mbuf left,
1782 * move the last mbuf of this message to
1783 * serial queue and set the head of the
1784 * queue to be the next message.
1785 */
1786 bytes += mqhead->m_len;
1787 mend = mqhead;
1788 mqhead = m->m_nextpkt;
1789 if (!(mend->m_flags & M_EOR)) {
1790 /*
1791 * We have not seen the end of
1792 * this message, so we can not
1793 * pull anymore.
1794 */
1795 priq->msgq_flags |= MSGQ_MSG_NOTDONE;
1796 } else {
1797 /* Reset M_EOR */
1798 mend->m_flags &= ~(M_EOR);
1799 }
1800 } else {
1801 /* propogate the next msg pointer */
1802 mqhead->m_nextpkt = m->m_nextpkt;
1803 }
1804 priq->msgq_head = mqhead;
1805
1806 /*
1807 * if the lastmsg pointer points to
1808 * the mbuf that is being dequeued, update
1809 * it to point to the new head.
1810 */
1811 if (priq->msgq_lastmsg == m) {
1812 priq->msgq_lastmsg = priq->msgq_head;
1813 }
1814
1815 m->m_nextpkt = NULL;
1816 mend->m_next = NULL;
1817
1818 if (priq->msgq_head == NULL) {
1819 /* Moved all messages, update tail */
1820 priq->msgq_tail = NULL;
1821 VERIFY(priq->msgq_lastmsg == NULL);
1822 }
1823
1824 /* Move it to serial sb_mb queue */
1825 if (so->so_snd.sb_mb == NULL) {
1826 so->so_snd.sb_mb = m;
1827 } else {
1828 so->so_snd.sb_mbtail->m_next = m;
1829 }
1830
1831 priq->msgq_bytes -= bytes;
1832 VERIFY(priq->msgq_bytes >= 0);
1833 sbwakeup(&so->so_snd);
1834
1835 so->so_msg_state->msg_serial_bytes += bytes;
1836 so->so_snd.sb_mbtail = mend;
1837 so->so_snd.sb_lastrecord = so->so_snd.sb_mb;
1838
1839 topull =
1840 (off + len) - so->so_msg_state->msg_serial_bytes;
1841
1842 if (priq->msgq_flags & MSGQ_MSG_NOTDONE) {
1843 break;
1844 }
1845 } else {
1846 --i;
1847 }
1848 }
1849 sblastrecordchk(&so->so_snd, "sbpull_unordered_data");
1850 sblastmbufchk(&so->so_snd, "sbpull_unordered_data");
1851 }
1852
1853 /*
1854 * Compress mbuf chain m into the socket
1855 * buffer sb following mbuf n. If n
1856 * is null, the buffer is presumed empty.
1857 */
1858 static inline void
1859 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1860 {
1861 int eor = 0, compress = (!(sb->sb_flags & SB_NOCOMPRESS));
1862 struct mbuf *o;
1863
1864 if (m == NULL) {
1865 /* There is nothing to compress; just update the tail */
1866 for (; n->m_next != NULL; n = n->m_next) {
1867 ;
1868 }
1869 sb->sb_mbtail = n;
1870 goto done;
1871 }
1872
1873 while (m != NULL) {
1874 eor |= m->m_flags & M_EOR;
1875 if (compress && m->m_len == 0 && (eor == 0 ||
1876 (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) {
1877 if (sb->sb_lastrecord == m) {
1878 sb->sb_lastrecord = m->m_next;
1879 }
1880 m = m_free(m);
1881 continue;
1882 }
1883 if (compress && n != NULL && (n->m_flags & M_EOR) == 0 &&
1884 #ifndef __APPLE__
1885 M_WRITABLE(n) &&
1886 #endif
1887 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1888 m->m_len <= M_TRAILINGSPACE(n) &&
1889 n->m_type == m->m_type) {
1890 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1891 (unsigned)m->m_len);
1892 n->m_len += m->m_len;
1893 sb->sb_cc += m->m_len;
1894 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1895 m->m_type != MT_OOBDATA) {
1896 /* XXX: Probably don't need */
1897 sb->sb_ctl += m->m_len;
1898 }
1899
1900 /* update send byte count */
1901 if (sb->sb_flags & SB_SNDBYTE_CNT) {
1902 inp_incr_sndbytes_total(sb->sb_so,
1903 m->m_len);
1904 inp_incr_sndbytes_unsent(sb->sb_so,
1905 m->m_len);
1906 }
1907 m = m_free(m);
1908 continue;
1909 }
1910 if (n != NULL) {
1911 n->m_next = m;
1912 } else {
1913 sb->sb_mb = m;
1914 }
1915 sb->sb_mbtail = m;
1916 sballoc(sb, m);
1917 n = m;
1918 m->m_flags &= ~M_EOR;
1919 m = m->m_next;
1920 n->m_next = NULL;
1921 }
1922 if (eor != 0) {
1923 if (n != NULL) {
1924 n->m_flags |= eor;
1925 } else {
1926 printf("semi-panic: sbcompress\n");
1927 }
1928 }
1929 done:
1930 SBLASTMBUFCHK(sb, __func__);
1931 postevent(0, sb, EV_RWBYTES);
1932 }
1933
1934 void
1935 sb_empty_assert(struct sockbuf *sb, const char *where)
1936 {
1937 if (!(sb->sb_cc == 0 && sb->sb_mb == NULL && sb->sb_mbcnt == 0 &&
1938 sb->sb_mbtail == NULL && sb->sb_lastrecord == NULL)) {
1939 panic("%s: sb %p so %p cc %d mbcnt %d mb %p mbtail %p "
1940 "lastrecord %p\n", where, sb, sb->sb_so, sb->sb_cc,
1941 sb->sb_mbcnt, sb->sb_mb, sb->sb_mbtail,
1942 sb->sb_lastrecord);
1943 /* NOTREACHED */
1944 }
1945 }
1946
1947 static void
1948 sbflush_priq(struct msg_priq *priq)
1949 {
1950 struct mbuf *m;
1951 m = priq->msgq_head;
1952 if (m != NULL) {
1953 m_freem_list(m);
1954 }
1955 priq->msgq_head = priq->msgq_tail = priq->msgq_lastmsg = NULL;
1956 priq->msgq_bytes = priq->msgq_flags = 0;
1957 }
1958
1959 /*
1960 * Free all mbufs in a sockbuf.
1961 * Check that all resources are reclaimed.
1962 */
1963 void
1964 sbflush(struct sockbuf *sb)
1965 {
1966 void *lr_saved = __builtin_return_address(0);
1967 struct socket *so = sb->sb_so;
1968 u_int32_t i;
1969
1970 /* so_usecount may be 0 if we get here from sofreelastref() */
1971 if (so == NULL) {
1972 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
1973 __func__, sb, sb->sb_flags, lr_saved);
1974 /* NOTREACHED */
1975 } else if (so->so_usecount < 0) {
1976 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
1977 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
1978 so->so_usecount, lr_saved, solockhistory_nr(so));
1979 /* NOTREACHED */
1980 }
1981
1982 /*
1983 * Obtain lock on the socket buffer (SB_LOCK). This is required
1984 * to prevent the socket buffer from being unexpectedly altered
1985 * while it is used by another thread in socket send/receive.
1986 *
1987 * sblock() must not fail here, hence the assertion.
1988 */
1989 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
1990 VERIFY(sb->sb_flags & SB_LOCK);
1991
1992 while (sb->sb_mbcnt > 0) {
1993 /*
1994 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1995 * we would loop forever. Panic instead.
1996 */
1997 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) {
1998 break;
1999 }
2000 sbdrop(sb, (int)sb->sb_cc);
2001 }
2002
2003 if (!(sb->sb_flags & SB_RECV) && (so->so_flags & SOF_ENABLE_MSGS)) {
2004 VERIFY(so->so_msg_state != NULL);
2005 for (i = MSG_PRI_MIN; i <= MSG_PRI_MAX; ++i) {
2006 sbflush_priq(&so->so_msg_state->msg_priq[i]);
2007 }
2008 so->so_msg_state->msg_serial_bytes = 0;
2009 so->so_msg_state->msg_uno_bytes = 0;
2010 }
2011
2012 sb_empty_assert(sb, __func__);
2013 postevent(0, sb, EV_RWBYTES);
2014
2015 sbunlock(sb, TRUE); /* keep socket locked */
2016 }
2017
2018 /*
2019 * Drop data from (the front of) a sockbuf.
2020 * use m_freem_list to free the mbuf structures
2021 * under a single lock... this is done by pruning
2022 * the top of the tree from the body by keeping track
2023 * of where we get to in the tree and then zeroing the
2024 * two pertinent pointers m_nextpkt and m_next
2025 * the socket buffer is then updated to point at the new
2026 * top of the tree and the pruned area is released via
2027 * m_freem_list.
2028 */
2029 void
2030 sbdrop(struct sockbuf *sb, int len)
2031 {
2032 struct mbuf *m, *free_list, *ml;
2033 struct mbuf *next, *last;
2034
2035 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
2036 #if MPTCP
2037 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
2038 ((sb->sb_so->so_flags & SOF_MP_SUBFLOW) ||
2039 (SOCK_CHECK_DOM(sb->sb_so, PF_MULTIPATH) &&
2040 SOCK_CHECK_PROTO(sb->sb_so, IPPROTO_TCP))) &&
2041 !(sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
2042 mptcp_preproc_sbdrop(sb->sb_so, m, (unsigned int)len);
2043 }
2044 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
2045 (sb->sb_so->so_flags & SOF_MP_SUBFLOW) &&
2046 (sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
2047 mptcp_fallback_sbdrop(sb->sb_so, m, len);
2048 }
2049 #endif /* MPTCP */
2050 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
2051
2052 free_list = last = m;
2053 ml = (struct mbuf *)0;
2054
2055 while (len > 0) {
2056 if (m == NULL) {
2057 if (next == NULL) {
2058 /*
2059 * temporarily replacing this panic with printf
2060 * because it occurs occasionally when closing
2061 * a socket when there is no harm in ignoring
2062 * it. This problem will be investigated
2063 * further.
2064 */
2065 /* panic("sbdrop"); */
2066 printf("sbdrop - count not zero\n");
2067 len = 0;
2068 /*
2069 * zero the counts. if we have no mbufs,
2070 * we have no data (PR-2986815)
2071 */
2072 sb->sb_cc = 0;
2073 sb->sb_mbcnt = 0;
2074 if (!(sb->sb_flags & SB_RECV) &&
2075 (sb->sb_so->so_flags & SOF_ENABLE_MSGS)) {
2076 sb->sb_so->so_msg_state->
2077 msg_serial_bytes = 0;
2078 }
2079 break;
2080 }
2081 m = last = next;
2082 next = m->m_nextpkt;
2083 continue;
2084 }
2085 if (m->m_len > len) {
2086 m->m_len -= len;
2087 m->m_data += len;
2088 sb->sb_cc -= len;
2089 /* update the send byte count */
2090 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2091 inp_decr_sndbytes_total(sb->sb_so, len);
2092 }
2093 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
2094 m->m_type != MT_OOBDATA) {
2095 sb->sb_ctl -= len;
2096 }
2097 break;
2098 }
2099 len -= m->m_len;
2100 sbfree(sb, m);
2101
2102 ml = m;
2103 m = m->m_next;
2104 }
2105 while (m && m->m_len == 0) {
2106 sbfree(sb, m);
2107
2108 ml = m;
2109 m = m->m_next;
2110 }
2111 if (ml) {
2112 ml->m_next = (struct mbuf *)0;
2113 last->m_nextpkt = (struct mbuf *)0;
2114 m_freem_list(free_list);
2115 }
2116 if (m) {
2117 sb->sb_mb = m;
2118 m->m_nextpkt = next;
2119 } else {
2120 sb->sb_mb = next;
2121 }
2122
2123 /*
2124 * First part is an inline SB_EMPTY_FIXUP(). Second part
2125 * makes sure sb_lastrecord is up-to-date if we dropped
2126 * part of the last record.
2127 */
2128 m = sb->sb_mb;
2129 if (m == NULL) {
2130 sb->sb_mbtail = NULL;
2131 sb->sb_lastrecord = NULL;
2132 } else if (m->m_nextpkt == NULL) {
2133 sb->sb_lastrecord = m;
2134 }
2135
2136 #if CONTENT_FILTER
2137 cfil_sock_buf_update(sb);
2138 #endif /* CONTENT_FILTER */
2139
2140 postevent(0, sb, EV_RWBYTES);
2141
2142 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
2143 }
2144
2145 /*
2146 * Drop a record off the front of a sockbuf
2147 * and move the next record to the front.
2148 */
2149 void
2150 sbdroprecord(struct sockbuf *sb)
2151 {
2152 struct mbuf *m, *mn;
2153
2154 m = sb->sb_mb;
2155 if (m) {
2156 sb->sb_mb = m->m_nextpkt;
2157 do {
2158 sbfree(sb, m);
2159 MFREE(m, mn);
2160 m = mn;
2161 } while (m);
2162 }
2163 SB_EMPTY_FIXUP(sb);
2164 postevent(0, sb, EV_RWBYTES);
2165 }
2166
2167 /*
2168 * Create a "control" mbuf containing the specified data
2169 * with the specified type for presentation on a socket buffer.
2170 */
2171 struct mbuf *
2172 sbcreatecontrol(caddr_t p, int size, int type, int level)
2173 {
2174 struct cmsghdr *cp;
2175 struct mbuf *m;
2176
2177 if (CMSG_SPACE((u_int)size) > MLEN) {
2178 return (struct mbuf *)NULL;
2179 }
2180 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) {
2181 return (struct mbuf *)NULL;
2182 }
2183 cp = mtod(m, struct cmsghdr *);
2184 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2185 /* XXX check size? */
2186 (void) memcpy(CMSG_DATA(cp), p, size);
2187 m->m_len = CMSG_SPACE(size);
2188 cp->cmsg_len = CMSG_LEN(size);
2189 cp->cmsg_level = level;
2190 cp->cmsg_type = type;
2191 return m;
2192 }
2193
2194 struct mbuf **
2195 sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf **mp)
2196 {
2197 struct mbuf *m;
2198 struct cmsghdr *cp;
2199
2200 if (*mp == NULL) {
2201 *mp = sbcreatecontrol(p, size, type, level);
2202 return mp;
2203 }
2204
2205 if (CMSG_SPACE((u_int)size) + (*mp)->m_len > MLEN) {
2206 mp = &(*mp)->m_next;
2207 *mp = sbcreatecontrol(p, size, type, level);
2208 return mp;
2209 }
2210
2211 m = *mp;
2212
2213 cp = (struct cmsghdr *)(void *)(mtod(m, char *) + m->m_len);
2214 /* CMSG_SPACE ensures 32-bit alignment */
2215 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2216 m->m_len += CMSG_SPACE(size);
2217
2218 /* XXX check size? */
2219 (void) memcpy(CMSG_DATA(cp), p, size);
2220 cp->cmsg_len = CMSG_LEN(size);
2221 cp->cmsg_level = level;
2222 cp->cmsg_type = type;
2223
2224 return mp;
2225 }
2226
2227
2228 /*
2229 * Some routines that return EOPNOTSUPP for entry points that are not
2230 * supported by a protocol. Fill in as needed.
2231 */
2232 int
2233 pru_abort_notsupp(struct socket *so)
2234 {
2235 #pragma unused(so)
2236 return EOPNOTSUPP;
2237 }
2238
2239 int
2240 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
2241 {
2242 #pragma unused(so, nam)
2243 return EOPNOTSUPP;
2244 }
2245
2246 int
2247 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
2248 {
2249 #pragma unused(so, proto, p)
2250 return EOPNOTSUPP;
2251 }
2252
2253 int
2254 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
2255 {
2256 #pragma unused(so, nam, p)
2257 return EOPNOTSUPP;
2258 }
2259
2260 int
2261 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
2262 {
2263 #pragma unused(so, nam, p)
2264 return EOPNOTSUPP;
2265 }
2266
2267 int
2268 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
2269 {
2270 #pragma unused(so1, so2)
2271 return EOPNOTSUPP;
2272 }
2273
2274 int
2275 pru_connectx_notsupp(struct socket *so, struct sockaddr *src,
2276 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
2277 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
2278 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
2279 {
2280 #pragma unused(so, src, dst, p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written)
2281 return EOPNOTSUPP;
2282 }
2283
2284 int
2285 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
2286 struct ifnet *ifp, struct proc *p)
2287 {
2288 #pragma unused(so, cmd, data, ifp, p)
2289 return EOPNOTSUPP;
2290 }
2291
2292 int
2293 pru_detach_notsupp(struct socket *so)
2294 {
2295 #pragma unused(so)
2296 return EOPNOTSUPP;
2297 }
2298
2299 int
2300 pru_disconnect_notsupp(struct socket *so)
2301 {
2302 #pragma unused(so)
2303 return EOPNOTSUPP;
2304 }
2305
2306 int
2307 pru_disconnectx_notsupp(struct socket *so, sae_associd_t aid, sae_connid_t cid)
2308 {
2309 #pragma unused(so, aid, cid)
2310 return EOPNOTSUPP;
2311 }
2312
2313 int
2314 pru_listen_notsupp(struct socket *so, struct proc *p)
2315 {
2316 #pragma unused(so, p)
2317 return EOPNOTSUPP;
2318 }
2319
2320 int
2321 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
2322 {
2323 #pragma unused(so, nam)
2324 return EOPNOTSUPP;
2325 }
2326
2327 int
2328 pru_rcvd_notsupp(struct socket *so, int flags)
2329 {
2330 #pragma unused(so, flags)
2331 return EOPNOTSUPP;
2332 }
2333
2334 int
2335 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
2336 {
2337 #pragma unused(so, m, flags)
2338 return EOPNOTSUPP;
2339 }
2340
2341 int
2342 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
2343 struct sockaddr *addr, struct mbuf *control, struct proc *p)
2344 {
2345 #pragma unused(so, flags, m, addr, control, p)
2346 return EOPNOTSUPP;
2347 }
2348
2349 int
2350 pru_send_list_notsupp(struct socket *so, int flags, struct mbuf *m,
2351 struct sockaddr *addr, struct mbuf *control, struct proc *p)
2352 {
2353 #pragma unused(so, flags, m, addr, control, p)
2354 return EOPNOTSUPP;
2355 }
2356
2357 /*
2358 * This isn't really a ``null'' operation, but it's the default one
2359 * and doesn't do anything destructive.
2360 */
2361 int
2362 pru_sense_null(struct socket *so, void *ub, int isstat64)
2363 {
2364 if (isstat64 != 0) {
2365 struct stat64 *sb64;
2366
2367 sb64 = (struct stat64 *)ub;
2368 sb64->st_blksize = so->so_snd.sb_hiwat;
2369 } else {
2370 struct stat *sb;
2371
2372 sb = (struct stat *)ub;
2373 sb->st_blksize = so->so_snd.sb_hiwat;
2374 }
2375
2376 return 0;
2377 }
2378
2379
2380 int
2381 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
2382 struct mbuf *top, struct mbuf *control, int flags)
2383 {
2384 #pragma unused(so, addr, uio, top, control, flags)
2385 return EOPNOTSUPP;
2386 }
2387
2388 int
2389 pru_sosend_list_notsupp(struct socket *so, struct uio **uio,
2390 u_int uiocnt, int flags)
2391 {
2392 #pragma unused(so, uio, uiocnt, flags)
2393 return EOPNOTSUPP;
2394 }
2395
2396 int
2397 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
2398 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
2399 {
2400 #pragma unused(so, paddr, uio, mp0, controlp, flagsp)
2401 return EOPNOTSUPP;
2402 }
2403
2404 int
2405 pru_soreceive_list_notsupp(struct socket *so,
2406 struct recv_msg_elem *recv_msg_array, u_int uiocnt, int *flagsp)
2407 {
2408 #pragma unused(so, recv_msg_array, uiocnt, flagsp)
2409 return EOPNOTSUPP;
2410 }
2411
2412 int
2413 pru_shutdown_notsupp(struct socket *so)
2414 {
2415 #pragma unused(so)
2416 return EOPNOTSUPP;
2417 }
2418
2419 int
2420 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
2421 {
2422 #pragma unused(so, nam)
2423 return EOPNOTSUPP;
2424 }
2425
2426 int
2427 pru_sopoll_notsupp(struct socket *so, int events, kauth_cred_t cred, void *wql)
2428 {
2429 #pragma unused(so, events, cred, wql)
2430 return EOPNOTSUPP;
2431 }
2432
2433 int
2434 pru_socheckopt_null(struct socket *so, struct sockopt *sopt)
2435 {
2436 #pragma unused(so, sopt)
2437 /*
2438 * Allow all options for set/get by default.
2439 */
2440 return 0;
2441 }
2442
2443 static int
2444 pru_preconnect_null(struct socket *so)
2445 {
2446 #pragma unused(so)
2447 return 0;
2448 }
2449
2450 void
2451 pru_sanitize(struct pr_usrreqs *pru)
2452 {
2453 #define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar)
2454 DEFAULT(pru->pru_abort, pru_abort_notsupp);
2455 DEFAULT(pru->pru_accept, pru_accept_notsupp);
2456 DEFAULT(pru->pru_attach, pru_attach_notsupp);
2457 DEFAULT(pru->pru_bind, pru_bind_notsupp);
2458 DEFAULT(pru->pru_connect, pru_connect_notsupp);
2459 DEFAULT(pru->pru_connect2, pru_connect2_notsupp);
2460 DEFAULT(pru->pru_connectx, pru_connectx_notsupp);
2461 DEFAULT(pru->pru_control, pru_control_notsupp);
2462 DEFAULT(pru->pru_detach, pru_detach_notsupp);
2463 DEFAULT(pru->pru_disconnect, pru_disconnect_notsupp);
2464 DEFAULT(pru->pru_disconnectx, pru_disconnectx_notsupp);
2465 DEFAULT(pru->pru_listen, pru_listen_notsupp);
2466 DEFAULT(pru->pru_peeraddr, pru_peeraddr_notsupp);
2467 DEFAULT(pru->pru_rcvd, pru_rcvd_notsupp);
2468 DEFAULT(pru->pru_rcvoob, pru_rcvoob_notsupp);
2469 DEFAULT(pru->pru_send, pru_send_notsupp);
2470 DEFAULT(pru->pru_send_list, pru_send_list_notsupp);
2471 DEFAULT(pru->pru_sense, pru_sense_null);
2472 DEFAULT(pru->pru_shutdown, pru_shutdown_notsupp);
2473 DEFAULT(pru->pru_sockaddr, pru_sockaddr_notsupp);
2474 DEFAULT(pru->pru_sopoll, pru_sopoll_notsupp);
2475 DEFAULT(pru->pru_soreceive, pru_soreceive_notsupp);
2476 DEFAULT(pru->pru_soreceive_list, pru_soreceive_list_notsupp);
2477 DEFAULT(pru->pru_sosend, pru_sosend_notsupp);
2478 DEFAULT(pru->pru_sosend_list, pru_sosend_list_notsupp);
2479 DEFAULT(pru->pru_socheckopt, pru_socheckopt_null);
2480 DEFAULT(pru->pru_preconnect, pru_preconnect_null);
2481 #undef DEFAULT
2482 }
2483
2484 /*
2485 * The following are macros on BSD and functions on Darwin
2486 */
2487
2488 /*
2489 * Do we need to notify the other side when I/O is possible?
2490 */
2491
2492 int
2493 sb_notify(struct sockbuf *sb)
2494 {
2495 return sb->sb_waiters > 0 ||
2496 (sb->sb_flags & (SB_SEL | SB_ASYNC | SB_UPCALL | SB_KNOTE));
2497 }
2498
2499 /*
2500 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
2501 * This is problematical if the fields are unsigned, as the space might
2502 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
2503 * overflow and return 0.
2504 */
2505 int
2506 sbspace(struct sockbuf *sb)
2507 {
2508 int pending = 0;
2509 int space = imin((int)(sb->sb_hiwat - sb->sb_cc),
2510 (int)(sb->sb_mbmax - sb->sb_mbcnt));
2511
2512 if (sb->sb_preconn_hiwat != 0) {
2513 space = imin((int)(sb->sb_preconn_hiwat - sb->sb_cc), space);
2514 }
2515
2516 if (space < 0) {
2517 space = 0;
2518 }
2519
2520 /* Compensate for data being processed by content filters */
2521 #if CONTENT_FILTER
2522 pending = cfil_sock_data_space(sb);
2523 #endif /* CONTENT_FILTER */
2524 if (pending > space) {
2525 space = 0;
2526 } else {
2527 space -= pending;
2528 }
2529
2530 return space;
2531 }
2532
2533 /*
2534 * If this socket has priority queues, check if there is enough
2535 * space in the priority queue for this msg.
2536 */
2537 int
2538 msgq_sbspace(struct socket *so, struct mbuf *control)
2539 {
2540 int space = 0, error;
2541 u_int32_t msgpri = 0;
2542 VERIFY(so->so_type == SOCK_STREAM &&
2543 SOCK_PROTO(so) == IPPROTO_TCP);
2544 if (control != NULL) {
2545 error = tcp_get_msg_priority(control, &msgpri);
2546 if (error) {
2547 return 0;
2548 }
2549 } else {
2550 msgpri = MSG_PRI_0;
2551 }
2552 space = (so->so_snd.sb_idealsize / MSG_PRI_COUNT) -
2553 so->so_msg_state->msg_priq[msgpri].msgq_bytes;
2554 if (space < 0) {
2555 space = 0;
2556 }
2557 return space;
2558 }
2559
2560 /* do we have to send all at once on a socket? */
2561 int
2562 sosendallatonce(struct socket *so)
2563 {
2564 return so->so_proto->pr_flags & PR_ATOMIC;
2565 }
2566
2567 /* can we read something from so? */
2568 int
2569 soreadable(struct socket *so)
2570 {
2571 return so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
2572 ((so->so_state & SS_CANTRCVMORE)
2573 #if CONTENT_FILTER
2574 && cfil_sock_data_pending(&so->so_rcv) == 0
2575 #endif /* CONTENT_FILTER */
2576 ) ||
2577 so->so_comp.tqh_first || so->so_error;
2578 }
2579
2580 /* can we write something to so? */
2581
2582 int
2583 sowriteable(struct socket *so)
2584 {
2585 if ((so->so_state & SS_CANTSENDMORE) ||
2586 so->so_error > 0) {
2587 return 1;
2588 }
2589 if (so_wait_for_if_feedback(so) || !socanwrite(so)) {
2590 return 0;
2591 }
2592 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
2593 return 1;
2594 }
2595
2596 if (sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat) {
2597 if (so->so_flags & SOF_NOTSENT_LOWAT) {
2598 if ((SOCK_DOM(so) == PF_INET6 ||
2599 SOCK_DOM(so) == PF_INET) &&
2600 so->so_type == SOCK_STREAM) {
2601 return tcp_notsent_lowat_check(so);
2602 }
2603 #if MPTCP
2604 else if ((SOCK_DOM(so) == PF_MULTIPATH) &&
2605 (SOCK_PROTO(so) == IPPROTO_TCP)) {
2606 return mptcp_notsent_lowat_check(so);
2607 }
2608 #endif
2609 else {
2610 return 1;
2611 }
2612 } else {
2613 return 1;
2614 }
2615 }
2616 return 0;
2617 }
2618
2619 /* adjust counters in sb reflecting allocation of m */
2620
2621 void
2622 sballoc(struct sockbuf *sb, struct mbuf *m)
2623 {
2624 u_int32_t cnt = 1;
2625 sb->sb_cc += m->m_len;
2626 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
2627 m->m_type != MT_OOBDATA) {
2628 sb->sb_ctl += m->m_len;
2629 }
2630 sb->sb_mbcnt += MSIZE;
2631
2632 if (m->m_flags & M_EXT) {
2633 sb->sb_mbcnt += m->m_ext.ext_size;
2634 cnt += (m->m_ext.ext_size >> MSIZESHIFT);
2635 }
2636 OSAddAtomic(cnt, &total_sbmb_cnt);
2637 VERIFY(total_sbmb_cnt > 0);
2638 if (total_sbmb_cnt > total_sbmb_cnt_peak) {
2639 total_sbmb_cnt_peak = total_sbmb_cnt;
2640 }
2641
2642 /*
2643 * If data is being added to the send socket buffer,
2644 * update the send byte count
2645 */
2646 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2647 inp_incr_sndbytes_total(sb->sb_so, m->m_len);
2648 inp_incr_sndbytes_unsent(sb->sb_so, m->m_len);
2649 }
2650 }
2651
2652 /* adjust counters in sb reflecting freeing of m */
2653 void
2654 sbfree(struct sockbuf *sb, struct mbuf *m)
2655 {
2656 int cnt = -1;
2657
2658 sb->sb_cc -= m->m_len;
2659 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
2660 m->m_type != MT_OOBDATA) {
2661 sb->sb_ctl -= m->m_len;
2662 }
2663 sb->sb_mbcnt -= MSIZE;
2664 if (m->m_flags & M_EXT) {
2665 sb->sb_mbcnt -= m->m_ext.ext_size;
2666 cnt -= (m->m_ext.ext_size >> MSIZESHIFT);
2667 }
2668 OSAddAtomic(cnt, &total_sbmb_cnt);
2669 VERIFY(total_sbmb_cnt >= 0);
2670 if (total_sbmb_cnt < total_sbmb_cnt_floor) {
2671 total_sbmb_cnt_floor = total_sbmb_cnt;
2672 }
2673
2674 /*
2675 * If data is being removed from the send socket buffer,
2676 * update the send byte count
2677 */
2678 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2679 inp_decr_sndbytes_total(sb->sb_so, m->m_len);
2680 }
2681 }
2682
2683 /*
2684 * Set lock on sockbuf sb; sleep if lock is already held.
2685 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
2686 * Returns error without lock if sleep is interrupted.
2687 */
2688 int
2689 sblock(struct sockbuf *sb, uint32_t flags)
2690 {
2691 boolean_t nointr = ((sb->sb_flags & SB_NOINTR) || (flags & SBL_NOINTR));
2692 void *lr_saved = __builtin_return_address(0);
2693 struct socket *so = sb->sb_so;
2694 void * wchan;
2695 int error = 0;
2696 thread_t tp = current_thread();
2697
2698 VERIFY((flags & SBL_VALID) == flags);
2699
2700 /* so_usecount may be 0 if we get here from sofreelastref() */
2701 if (so == NULL) {
2702 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2703 __func__, sb, sb->sb_flags, lr_saved);
2704 /* NOTREACHED */
2705 } else if (so->so_usecount < 0) {
2706 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2707 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2708 so->so_usecount, lr_saved, solockhistory_nr(so));
2709 /* NOTREACHED */
2710 }
2711
2712 /*
2713 * The content filter thread must hold the sockbuf lock
2714 */
2715 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
2716 /*
2717 * Don't panic if we are defunct because SB_LOCK has
2718 * been cleared by sodefunct()
2719 */
2720 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
2721 panic("%s: SB_LOCK not held for %p\n",
2722 __func__, sb);
2723 }
2724
2725 /* Keep the sockbuf locked */
2726 return 0;
2727 }
2728
2729 if ((sb->sb_flags & SB_LOCK) && !(flags & SBL_WAIT)) {
2730 return EWOULDBLOCK;
2731 }
2732 /*
2733 * We may get here from sorflush(), in which case "sb" may not
2734 * point to the real socket buffer. Use the actual socket buffer
2735 * address from the socket instead.
2736 */
2737 wchan = (sb->sb_flags & SB_RECV) ?
2738 &so->so_rcv.sb_flags : &so->so_snd.sb_flags;
2739
2740 /*
2741 * A content filter thread has exclusive access to the sockbuf
2742 * until it clears the
2743 */
2744 while ((sb->sb_flags & SB_LOCK) ||
2745 ((so->so_flags & SOF_CONTENT_FILTER) &&
2746 sb->sb_cfil_thread != NULL)) {
2747 lck_mtx_t *mutex_held;
2748
2749 /*
2750 * XXX: This code should be moved up above outside of this loop;
2751 * however, we may get here as part of sofreelastref(), and
2752 * at that time pr_getlock() may no longer be able to return
2753 * us the lock. This will be fixed in future.
2754 */
2755 if (so->so_proto->pr_getlock != NULL) {
2756 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2757 } else {
2758 mutex_held = so->so_proto->pr_domain->dom_mtx;
2759 }
2760
2761 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2762
2763 sb->sb_wantlock++;
2764 VERIFY(sb->sb_wantlock != 0);
2765
2766 error = msleep(wchan, mutex_held,
2767 nointr ? PSOCK : PSOCK | PCATCH,
2768 nointr ? "sb_lock_nointr" : "sb_lock", NULL);
2769
2770 VERIFY(sb->sb_wantlock != 0);
2771 sb->sb_wantlock--;
2772
2773 if (error == 0 && (so->so_flags & SOF_DEFUNCT) &&
2774 !(flags & SBL_IGNDEFUNCT)) {
2775 error = EBADF;
2776 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] "
2777 "(%d)\n", __func__, proc_selfpid(),
2778 proc_best_name(current_proc()),
2779 (uint64_t)VM_KERNEL_ADDRPERM(so),
2780 SOCK_DOM(so), SOCK_TYPE(so), error);
2781 }
2782
2783 if (error != 0) {
2784 return error;
2785 }
2786 }
2787 sb->sb_flags |= SB_LOCK;
2788 return 0;
2789 }
2790
2791 /*
2792 * Release lock on sockbuf sb
2793 */
2794 void
2795 sbunlock(struct sockbuf *sb, boolean_t keeplocked)
2796 {
2797 void *lr_saved = __builtin_return_address(0);
2798 struct socket *so = sb->sb_so;
2799 thread_t tp = current_thread();
2800
2801 /* so_usecount may be 0 if we get here from sofreelastref() */
2802 if (so == NULL) {
2803 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n",
2804 __func__, sb, sb->sb_flags, lr_saved);
2805 /* NOTREACHED */
2806 } else if (so->so_usecount < 0) {
2807 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2808 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2809 so->so_usecount, lr_saved, solockhistory_nr(so));
2810 /* NOTREACHED */
2811 }
2812
2813 /*
2814 * The content filter thread must hold the sockbuf lock
2815 */
2816 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
2817 /*
2818 * Don't panic if we are defunct because SB_LOCK has
2819 * been cleared by sodefunct()
2820 */
2821 if (!(so->so_flags & SOF_DEFUNCT) &&
2822 !(sb->sb_flags & SB_LOCK) &&
2823 !(so->so_state & SS_DEFUNCT) &&
2824 !(so->so_flags1 & SOF1_DEFUNCTINPROG)) {
2825 panic("%s: SB_LOCK not held for %p\n",
2826 __func__, sb);
2827 }
2828 /* Keep the sockbuf locked and proceed */
2829 } else {
2830 VERIFY((sb->sb_flags & SB_LOCK) ||
2831 (so->so_state & SS_DEFUNCT) ||
2832 (so->so_flags1 & SOF1_DEFUNCTINPROG));
2833
2834 sb->sb_flags &= ~SB_LOCK;
2835
2836 if (sb->sb_wantlock > 0) {
2837 /*
2838 * We may get here from sorflush(), in which case "sb"
2839 * may not point to the real socket buffer. Use the
2840 * actual socket buffer address from the socket instead.
2841 */
2842 wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags :
2843 &so->so_snd.sb_flags);
2844 }
2845 }
2846
2847 if (!keeplocked) { /* unlock on exit */
2848 lck_mtx_t *mutex_held;
2849
2850 if (so->so_proto->pr_getlock != NULL) {
2851 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2852 } else {
2853 mutex_held = so->so_proto->pr_domain->dom_mtx;
2854 }
2855
2856 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2857
2858 VERIFY(so->so_usecount > 0);
2859 so->so_usecount--;
2860 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2861 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2862 lck_mtx_unlock(mutex_held);
2863 }
2864 }
2865
2866 void
2867 sorwakeup(struct socket *so)
2868 {
2869 if (sb_notify(&so->so_rcv)) {
2870 sowakeup(so, &so->so_rcv);
2871 }
2872 }
2873
2874 void
2875 sowwakeup(struct socket *so)
2876 {
2877 if (sb_notify(&so->so_snd)) {
2878 sowakeup(so, &so->so_snd);
2879 }
2880 }
2881
2882 void
2883 soevent(struct socket *so, long hint)
2884 {
2885 if (so->so_flags & SOF_KNOTE) {
2886 KNOTE(&so->so_klist, hint);
2887 }
2888
2889 soevupcall(so, hint);
2890
2891 /*
2892 * Don't post an event if this a subflow socket or
2893 * the app has opted out of using cellular interface
2894 */
2895 if ((hint & SO_FILT_HINT_IFDENIED) &&
2896 !(so->so_flags & SOF_MP_SUBFLOW) &&
2897 !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR) &&
2898 !(so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
2899 soevent_ifdenied(so);
2900 }
2901 }
2902
2903 void
2904 soevupcall(struct socket *so, u_int32_t hint)
2905 {
2906 if (so->so_event != NULL) {
2907 caddr_t so_eventarg = so->so_eventarg;
2908
2909 hint &= so->so_eventmask;
2910 if (hint != 0) {
2911 so->so_event(so, so_eventarg, hint);
2912 }
2913 }
2914 }
2915
2916 static void
2917 soevent_ifdenied(struct socket *so)
2918 {
2919 struct kev_netpolicy_ifdenied ev_ifdenied;
2920
2921 bzero(&ev_ifdenied, sizeof(ev_ifdenied));
2922 /*
2923 * The event consumer is interested about the effective {upid,pid,uuid}
2924 * info which can be different than the those related to the process
2925 * that recently performed a system call on the socket, i.e. when the
2926 * socket is delegated.
2927 */
2928 if (so->so_flags & SOF_DELEGATED) {
2929 ev_ifdenied.ev_data.eupid = so->e_upid;
2930 ev_ifdenied.ev_data.epid = so->e_pid;
2931 uuid_copy(ev_ifdenied.ev_data.euuid, so->e_uuid);
2932 } else {
2933 ev_ifdenied.ev_data.eupid = so->last_upid;
2934 ev_ifdenied.ev_data.epid = so->last_pid;
2935 uuid_copy(ev_ifdenied.ev_data.euuid, so->last_uuid);
2936 }
2937
2938 if (++so->so_ifdenied_notifies > 1) {
2939 /*
2940 * Allow for at most one kernel event to be generated per
2941 * socket; so_ifdenied_notifies is reset upon changes in
2942 * the UUID policy. See comments in inp_update_policy.
2943 */
2944 if (net_io_policy_log) {
2945 uuid_string_t buf;
2946
2947 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
2948 log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %d "
2949 "euuid %s%s has %d redundant events supressed\n",
2950 __func__, so->last_pid,
2951 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
2952 SOCK_TYPE(so), ev_ifdenied.ev_data.epid, buf,
2953 ((so->so_flags & SOF_DELEGATED) ?
2954 " [delegated]" : ""), so->so_ifdenied_notifies);
2955 }
2956 } else {
2957 if (net_io_policy_log) {
2958 uuid_string_t buf;
2959
2960 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
2961 log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %d "
2962 "euuid %s%s event posted\n", __func__,
2963 so->last_pid, (uint64_t)VM_KERNEL_ADDRPERM(so),
2964 SOCK_DOM(so), SOCK_TYPE(so),
2965 ev_ifdenied.ev_data.epid, buf,
2966 ((so->so_flags & SOF_DELEGATED) ?
2967 " [delegated]" : ""));
2968 }
2969 netpolicy_post_msg(KEV_NETPOLICY_IFDENIED, &ev_ifdenied.ev_data,
2970 sizeof(ev_ifdenied));
2971 }
2972 }
2973
2974 /*
2975 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
2976 */
2977 struct sockaddr *
2978 dup_sockaddr(struct sockaddr *sa, int canwait)
2979 {
2980 struct sockaddr *sa2;
2981
2982 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
2983 canwait ? M_WAITOK : M_NOWAIT);
2984 if (sa2) {
2985 bcopy(sa, sa2, sa->sa_len);
2986 }
2987 return sa2;
2988 }
2989
2990 /*
2991 * Create an external-format (``xsocket'') structure using the information
2992 * in the kernel-format socket structure pointed to by so. This is done
2993 * to reduce the spew of irrelevant information over this interface,
2994 * to isolate user code from changes in the kernel structure, and
2995 * potentially to provide information-hiding if we decide that
2996 * some of this information should be hidden from users.
2997 */
2998 void
2999 sotoxsocket(struct socket *so, struct xsocket *xso)
3000 {
3001 xso->xso_len = sizeof(*xso);
3002 xso->xso_so = (_XSOCKET_PTR(struct socket *))VM_KERNEL_ADDRPERM(so);
3003 xso->so_type = so->so_type;
3004 xso->so_options = (short)(so->so_options & 0xffff);
3005 xso->so_linger = so->so_linger;
3006 xso->so_state = so->so_state;
3007 xso->so_pcb = (_XSOCKET_PTR(caddr_t))VM_KERNEL_ADDRPERM(so->so_pcb);
3008 if (so->so_proto) {
3009 xso->xso_protocol = SOCK_PROTO(so);
3010 xso->xso_family = SOCK_DOM(so);
3011 } else {
3012 xso->xso_protocol = xso->xso_family = 0;
3013 }
3014 xso->so_qlen = so->so_qlen;
3015 xso->so_incqlen = so->so_incqlen;
3016 xso->so_qlimit = so->so_qlimit;
3017 xso->so_timeo = so->so_timeo;
3018 xso->so_error = so->so_error;
3019 xso->so_pgid = so->so_pgid;
3020 xso->so_oobmark = so->so_oobmark;
3021 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
3022 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
3023 xso->so_uid = kauth_cred_getuid(so->so_cred);
3024 }
3025
3026
3027 #if !CONFIG_EMBEDDED
3028
3029 void
3030 sotoxsocket64(struct socket *so, struct xsocket64 *xso)
3031 {
3032 xso->xso_len = sizeof(*xso);
3033 xso->xso_so = (u_int64_t)VM_KERNEL_ADDRPERM(so);
3034 xso->so_type = so->so_type;
3035 xso->so_options = (short)(so->so_options & 0xffff);
3036 xso->so_linger = so->so_linger;
3037 xso->so_state = so->so_state;
3038 xso->so_pcb = (u_int64_t)VM_KERNEL_ADDRPERM(so->so_pcb);
3039 if (so->so_proto) {
3040 xso->xso_protocol = SOCK_PROTO(so);
3041 xso->xso_family = SOCK_DOM(so);
3042 } else {
3043 xso->xso_protocol = xso->xso_family = 0;
3044 }
3045 xso->so_qlen = so->so_qlen;
3046 xso->so_incqlen = so->so_incqlen;
3047 xso->so_qlimit = so->so_qlimit;
3048 xso->so_timeo = so->so_timeo;
3049 xso->so_error = so->so_error;
3050 xso->so_pgid = so->so_pgid;
3051 xso->so_oobmark = so->so_oobmark;
3052 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
3053 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
3054 xso->so_uid = kauth_cred_getuid(so->so_cred);
3055 }
3056
3057 #endif /* !CONFIG_EMBEDDED */
3058
3059 /*
3060 * This does the same for sockbufs. Note that the xsockbuf structure,
3061 * since it is always embedded in a socket, does not include a self
3062 * pointer nor a length. We make this entry point public in case
3063 * some other mechanism needs it.
3064 */
3065 void
3066 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
3067 {
3068 xsb->sb_cc = sb->sb_cc;
3069 xsb->sb_hiwat = sb->sb_hiwat;
3070 xsb->sb_mbcnt = sb->sb_mbcnt;
3071 xsb->sb_mbmax = sb->sb_mbmax;
3072 xsb->sb_lowat = sb->sb_lowat;
3073 xsb->sb_flags = sb->sb_flags;
3074 xsb->sb_timeo = (short)
3075 (sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
3076 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0) {
3077 xsb->sb_timeo = 1;
3078 }
3079 }
3080
3081 /*
3082 * Based on the policy set by an all knowing decison maker, throttle sockets
3083 * that either have been marked as belonging to "background" process.
3084 */
3085 inline int
3086 soisthrottled(struct socket *so)
3087 {
3088 return so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND;
3089 }
3090
3091 inline int
3092 soisprivilegedtraffic(struct socket *so)
3093 {
3094 return (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS) ? 1 : 0;
3095 }
3096
3097 inline int
3098 soissrcbackground(struct socket *so)
3099 {
3100 return (so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND) ||
3101 IS_SO_TC_BACKGROUND(so->so_traffic_class);
3102 }
3103
3104 inline int
3105 soissrcrealtime(struct socket *so)
3106 {
3107 return so->so_traffic_class >= SO_TC_AV &&
3108 so->so_traffic_class <= SO_TC_VO;
3109 }
3110
3111 inline int
3112 soissrcbesteffort(struct socket *so)
3113 {
3114 return so->so_traffic_class == SO_TC_BE ||
3115 so->so_traffic_class == SO_TC_RD ||
3116 so->so_traffic_class == SO_TC_OAM;
3117 }
3118
3119 void
3120 soclearfastopen(struct socket *so)
3121 {
3122 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3123 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
3124 }
3125
3126 if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
3127 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
3128 }
3129 }
3130
3131 void
3132 sonullevent(struct socket *so, void *arg, uint32_t hint)
3133 {
3134 #pragma unused(so, arg, hint)
3135 }
3136
3137 /*
3138 * Here is the definition of some of the basic objects in the kern.ipc
3139 * branch of the MIB.
3140 */
3141 SYSCTL_NODE(_kern, KERN_IPC, ipc,
3142 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "IPC");
3143
3144 /* Check that the maximum socket buffer size is within a range */
3145
3146 static int
3147 sysctl_sb_max SYSCTL_HANDLER_ARGS
3148 {
3149 #pragma unused(oidp, arg1, arg2)
3150 u_int32_t new_value;
3151 int changed = 0;
3152 int error = sysctl_io_number(req, sb_max, sizeof(u_int32_t),
3153 &new_value, &changed);
3154 if (!error && changed) {
3155 if (new_value > LOW_SB_MAX && new_value <= high_sb_max) {
3156 sb_max = new_value;
3157 } else {
3158 error = ERANGE;
3159 }
3160 }
3161 return error;
3162 }
3163
3164 SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
3165 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3166 &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size");
3167
3168 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor,
3169 CTLFLAG_RW | CTLFLAG_LOCKED, &sb_efficiency, 0, "");
3170
3171 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters,
3172 CTLFLAG_RD | CTLFLAG_LOCKED, &nmbclusters, 0, "");
3173
3174 SYSCTL_INT(_kern_ipc, OID_AUTO, njcl,
3175 CTLFLAG_RD | CTLFLAG_LOCKED, &njcl, 0, "");
3176
3177 SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes,
3178 CTLFLAG_RD | CTLFLAG_LOCKED, &njclbytes, 0, "");
3179
3180 SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat,
3181 CTLFLAG_RW | CTLFLAG_LOCKED, &soqlimitcompat, 1,
3182 "Enable socket queue limit compatibility");
3183
3184 /*
3185 * Hack alert -- rdar://33572856
3186 * A loopback test we cannot change was failing because it sets
3187 * SO_SENDTIMEO to 5 seconds and that's also the value
3188 * of the minimum persist timer. Because of the persist timer,
3189 * the connection was not idle for 5 seconds and SO_SNDTIMEO
3190 * was not triggering at 5 seconds causing the test failure.
3191 * As a workaround we check the sysctl soqlencomp the test is already
3192 * setting to set disable auto tuning of the receive buffer.
3193 */
3194
3195 extern u_int32_t tcp_do_autorcvbuf;
3196
3197 static int
3198 sysctl_soqlencomp SYSCTL_HANDLER_ARGS
3199 {
3200 #pragma unused(oidp, arg1, arg2)
3201 u_int32_t new_value;
3202 int changed = 0;
3203 int error = sysctl_io_number(req, soqlencomp, sizeof(u_int32_t),
3204 &new_value, &changed);
3205 if (!error && changed) {
3206 soqlencomp = new_value;
3207 if (new_value != 0) {
3208 tcp_do_autorcvbuf = 0;
3209 tcptv_persmin_val = 6 * TCP_RETRANSHZ;
3210 }
3211 }
3212 return error;
3213 }
3214 SYSCTL_PROC(_kern_ipc, OID_AUTO, soqlencomp,
3215 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3216 &soqlencomp, 0, &sysctl_soqlencomp, "IU", "");
3217
3218 SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt, CTLFLAG_RD | CTLFLAG_LOCKED,
3219 &total_sbmb_cnt, 0, "");
3220 SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt_peak, CTLFLAG_RD | CTLFLAG_LOCKED,
3221 &total_sbmb_cnt_peak, 0, "");
3222 SYSCTL_INT(_kern_ipc, OID_AUTO, sbmb_cnt_floor, CTLFLAG_RD | CTLFLAG_LOCKED,
3223 &total_sbmb_cnt_floor, 0, "");
3224 SYSCTL_QUAD(_kern_ipc, OID_AUTO, sbmb_limreached, CTLFLAG_RD | CTLFLAG_LOCKED,
3225 &sbmb_limreached, "");
3226
3227
3228 SYSCTL_NODE(_kern_ipc, OID_AUTO, io_policy, CTLFLAG_RW, 0, "network IO policy");
3229
3230 SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
3231 &net_io_policy_log, 0, "");
3232
3233 #if CONFIG_PROC_UUID_POLICY
3234 SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, uuid, CTLFLAG_RW | CTLFLAG_LOCKED,
3235 &net_io_policy_uuid, 0, "");
3236 #endif /* CONFIG_PROC_UUID_POLICY */