]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket2.c
xnu-1228.0.2.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 1998-2007 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
1c79356b
A
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
9bccf70c 62 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
1c79356b 63 */
2d21ac55
A
64/*
65 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
66 * support for mandatory and extensible security protections. This notice
67 * is included in support of clause 2.2 (b) of the Apple Public License,
68 * Version 2.0.
69 */
1c79356b
A
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/domain.h>
74#include <sys/kernel.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/kauth.h>
1c79356b
A
77#include <sys/malloc.h>
78#include <sys/mbuf.h>
79#include <sys/protosw.h>
80#include <sys/stat.h>
81#include <sys/socket.h>
82#include <sys/socketvar.h>
83#include <sys/signalvar.h>
84#include <sys/sysctl.h>
85#include <sys/ev.h>
91447636
A
86#include <kern/locks.h>
87#include <net/route.h>
88#include <netinet/in.h>
89#include <netinet/in_pcb.h>
fa4905b1 90#include <sys/kdebug.h>
2d21ac55
A
91#include <libkern/OSAtomic.h>
92
93#if CONFIG_MACF
94#include <security/mac_framework.h>
95#endif
96
97/* TODO: this should be in a header file somewhere */
98extern void postevent(struct socket *, struct sockbuf *, int);
fa4905b1
A
99
100#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
101#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
102
2d21ac55
A
103static inline void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
104static struct socket *sonewconn_internal(struct socket *, int);
105static int sbappendaddr_internal(struct sockbuf *, struct sockaddr *,
106 struct mbuf *, struct mbuf *);
107static int sbappendcontrol_internal(struct sockbuf *, struct mbuf *,
108 struct mbuf *);
fa4905b1 109
1c79356b
A
110/*
111 * Primitive routines for operating on sockets and socket buffers
112 */
2d21ac55
A
113static int soqlimitcompat = 1;
114static int soqlencomp = 0;
1c79356b
A
115
116u_long sb_max = SB_MAX; /* XXX should be static */
117
118static u_long sb_efficiency = 8; /* parameter for sbreserve() */
2d21ac55
A
119__private_extern__ unsigned int total_mb_cnt = 0;
120__private_extern__ unsigned int total_cl_cnt = 0;
121__private_extern__ int sbspace_factor = 8;
1c79356b 122
1c79356b
A
123/*
124 * Procedures to manipulate state flags of socket
125 * and do appropriate wakeups. Normal sequence from the
126 * active (originating) side is that soisconnecting() is
127 * called during processing of connect() call,
128 * resulting in an eventual call to soisconnected() if/when the
129 * connection is established. When the connection is torn down
9bccf70c 130 * soisdisconnecting() is called during processing of disconnect() call,
1c79356b
A
131 * and soisdisconnected() is called when the connection to the peer
132 * is totally severed. The semantics of these routines are such that
133 * connectionless protocols can call soisconnected() and soisdisconnected()
134 * only, bypassing the in-progress calls when setting up a ``connection''
135 * takes no time.
136 *
137 * From the passive side, a socket is created with
e3027f41
A
138 * two queues of sockets: so_incomp for connections in progress
139 * and so_comp for connections already made and awaiting user acceptance.
9bccf70c 140 * As a protocol is preparing incoming connections, it creates a socket
e3027f41 141 * structure queued on so_incomp by calling sonewconn(). When the connection
1c79356b 142 * is established, soisconnected() is called, and transfers the
e3027f41 143 * socket structure to so_comp, making it available to accept().
1c79356b 144 *
9bccf70c 145 * If a socket is closed with sockets on either
e3027f41 146 * so_incomp or so_comp, these sockets are dropped.
9bccf70c 147 *
1c79356b
A
148 * If higher level protocols are implemented in
149 * the kernel, the wakeups done here will sometimes
150 * cause software-interrupt process scheduling.
151 */
1c79356b 152void
2d21ac55 153soisconnecting(struct socket *so)
1c79356b
A
154{
155
156 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
157 so->so_state |= SS_ISCONNECTING;
2d21ac55 158
91447636 159 sflt_notify(so, sock_evt_connecting, NULL);
1c79356b
A
160}
161
162void
2d21ac55 163soisconnected(struct socket *so)
9bccf70c
A
164{
165 struct socket *head = so->so_head;
1c79356b
A
166
167 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
168 so->so_state |= SS_ISCONNECTED;
2d21ac55 169
91447636 170 sflt_notify(so, sock_evt_connected, NULL);
2d21ac55 171
1c79356b 172 if (head && (so->so_state & SS_INCOMP)) {
ff6e181a
A
173 so->so_state &= ~SS_INCOMP;
174 so->so_state |= SS_COMP;
175 if (head->so_proto->pr_getlock != NULL) {
176 socket_unlock(so, 0);
91447636 177 socket_lock(head, 1);
ff6e181a 178 }
91447636 179 postevent(head, 0, EV_RCONN);
1c79356b
A
180 TAILQ_REMOVE(&head->so_incomp, so, so_list);
181 head->so_incqlen--;
1c79356b 182 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
1c79356b 183 sorwakeup(head);
91447636 184 wakeup_one((caddr_t)&head->so_timeo);
ff6e181a 185 if (head->so_proto->pr_getlock != NULL) {
91447636 186 socket_unlock(head, 1);
ff6e181a
A
187 socket_lock(so, 0);
188 }
1c79356b 189 } else {
91447636 190 postevent(so, 0, EV_WCONN);
1c79356b
A
191 wakeup((caddr_t)&so->so_timeo);
192 sorwakeup(so);
193 sowwakeup(so);
194 }
195}
196
197void
2d21ac55 198soisdisconnecting(struct socket *so)
9bccf70c 199{
1c79356b
A
200 so->so_state &= ~SS_ISCONNECTING;
201 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
91447636 202 sflt_notify(so, sock_evt_disconnecting, NULL);
1c79356b
A
203 wakeup((caddr_t)&so->so_timeo);
204 sowwakeup(so);
205 sorwakeup(so);
206}
207
208void
2d21ac55 209soisdisconnected(struct socket *so)
9bccf70c 210{
1c79356b 211 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
9bccf70c 212 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
91447636 213 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
214 wakeup((caddr_t)&so->so_timeo);
215 sowwakeup(so);
216 sorwakeup(so);
217}
218
1c79356b
A
219/*
220 * When an attempt at a new connection is noted on a socket
221 * which accepts connections, sonewconn is called. If the
222 * connection is possible (subject to space constraints, etc.)
223 * then we allocate a new structure, propoerly linked into the
224 * data structure of the original socket, and return this.
225 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
226 */
91447636 227static struct socket *
2d21ac55 228sonewconn_internal(struct socket *head, int connstatus)
9bccf70c 229{
2d21ac55
A
230 int so_qlen, error = 0;
231 struct socket *so;
91447636
A
232 lck_mtx_t *mutex_held;
233
2d21ac55 234 if (head->so_proto->pr_getlock != NULL)
91447636 235 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
2d21ac55 236 else
91447636
A
237 mutex_held = head->so_proto->pr_domain->dom_mtx;
238 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1c79356b 239
2d21ac55
A
240 if (!soqlencomp) {
241 /*
242 * This is the default case; so_qlen represents the
243 * sum of both incomplete and completed queues.
244 */
245 so_qlen = head->so_qlen;
246 } else {
247 /*
248 * When kern.ipc.soqlencomp is set to 1, so_qlen
249 * represents only the completed queue. Since we
250 * cannot let the incomplete queue goes unbounded
251 * (in case of SYN flood), we cap the incomplete
252 * queue length to at most somaxconn, and use that
253 * as so_qlen so that we fail immediately below.
254 */
255 so_qlen = head->so_qlen - head->so_incqlen;
256 if (head->so_incqlen > somaxconn)
257 so_qlen = somaxconn;
258 }
259
260 if (so_qlen >=
261 (soqlimitcompat ? head->so_qlimit : (3 * head->so_qlimit / 2)))
1c79356b 262 return ((struct socket *)0);
2d21ac55
A
263 so = soalloc(M_NOWAIT, head->so_proto->pr_domain->dom_family,
264 head->so_type);
1c79356b
A
265 if (so == NULL)
266 return ((struct socket *)0);
9bccf70c
A
267 /* check if head was closed during the soalloc */
268 if (head->so_proto == NULL) {
2d21ac55
A
269 sodealloc(so);
270 return ((struct socket *)0);
1c79356b
A
271 }
272
273 so->so_head = head;
274 so->so_type = head->so_type;
275 so->so_options = head->so_options &~ SO_ACCEPTCONN;
276 so->so_linger = head->so_linger;
277 so->so_state = head->so_state | SS_NOFDREF;
278 so->so_proto = head->so_proto;
279 so->so_timeo = head->so_timeo;
280 so->so_pgid = head->so_pgid;
281 so->so_uid = head->so_uid;
2d21ac55 282 so->so_flags = head->so_flags & (SOF_REUSESHAREUID|SOF_NOTIFYCONFLICT); /* inherit SO_REUSESHAREUID and SO_NOTIFYCONFLICT ocket options */
91447636 283 so->so_usecount = 1;
0c530ab8
A
284 so->next_lock_lr = 0;
285 so->next_unlock_lr = 0;
1c79356b 286
13fec989
A
287#ifdef __APPLE__
288 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
289 so->so_rcv.sb_so = so->so_snd.sb_so = so;
290 TAILQ_INIT(&so->so_evlist);
291#endif
292
2d21ac55
A
293#if CONFIG_MACF_SOCKET
294 mac_socket_label_associate_accept(head, so);
295#endif
296
91447636
A
297 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
298 sflt_termsock(so);
9bccf70c
A
299 sodealloc(so);
300 return ((struct socket *)0);
301 }
302
91447636 303 /*
2d21ac55
A
304 * Must be done with head unlocked to avoid deadlock
305 * for protocol with per socket mutexes.
91447636 306 */
37839358
A
307 if (head->so_proto->pr_unlock)
308 socket_unlock(head, 0);
2d21ac55
A
309 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) ||
310 error) {
91447636 311 sflt_termsock(so);
1c79356b 312 sodealloc(so);
37839358
A
313 if (head->so_proto->pr_unlock)
314 socket_lock(head, 0);
1c79356b
A
315 return ((struct socket *)0);
316 }
37839358
A
317 if (head->so_proto->pr_unlock)
318 socket_lock(head, 0);
9bccf70c 319#ifdef __APPLE__
1c79356b 320 so->so_proto->pr_domain->dom_refs++;
9bccf70c 321#endif
1c79356b
A
322
323 if (connstatus) {
324 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
325 so->so_state |= SS_COMP;
326 } else {
327 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
328 so->so_state |= SS_INCOMP;
329 head->so_incqlen++;
330 }
331 head->so_qlen++;
91447636 332
13fec989 333#ifdef __APPLE__
0c530ab8
A
334 /* Attach socket filters for this protocol */
335 sflt_initsock(so);
9bccf70c 336#endif
2d21ac55 337
91447636
A
338 if (connstatus) {
339 so->so_state |= connstatus;
340 sorwakeup(head);
341 wakeup((caddr_t)&head->so_timeo);
342 }
1c79356b
A
343 return (so);
344}
345
91447636
A
346
347struct socket *
2d21ac55 348sonewconn(struct socket *head, int connstatus, const struct sockaddr *from)
91447636
A
349{
350 int error = 0;
2d21ac55
A
351 struct socket_filter_entry *filter;
352 int filtered = 0;
353
91447636 354 for (filter = head->so_filt; filter && (error == 0);
2d21ac55 355 filter = filter->sfe_next_onsocket) {
91447636
A
356 if (filter->sfe_filter->sf_filter.sf_connect_in) {
357 if (filtered == 0) {
358 filtered = 1;
359 sflt_use(head);
360 socket_unlock(head, 0);
361 }
2d21ac55
A
362 error = filter->sfe_filter->sf_filter.
363 sf_connect_in(filter->sfe_cookie, head, from);
91447636
A
364 }
365 }
366 if (filtered != 0) {
367 socket_lock(head, 0);
368 sflt_unuse(head);
369 }
2d21ac55 370
91447636 371 if (error) {
2d21ac55 372 return (NULL);
91447636 373 }
2d21ac55
A
374
375 return (sonewconn_internal(head, connstatus));
91447636
A
376}
377
1c79356b
A
378/*
379 * Socantsendmore indicates that no more data will be sent on the
380 * socket; it would normally be applied to a socket when the user
381 * informs the system that no more data is to be sent, by the protocol
382 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
383 * will be received, and will normally be applied to the socket by a
384 * protocol when it detects that the peer will send no more data.
385 * Data queued for reading in the socket may yet be read.
386 */
387
388void
2d21ac55 389socantsendmore(struct socket *so)
9bccf70c 390{
1c79356b 391 so->so_state |= SS_CANTSENDMORE;
91447636 392 sflt_notify(so, sock_evt_cantsendmore, NULL);
1c79356b
A
393 sowwakeup(so);
394}
395
396void
2d21ac55 397socantrcvmore(struct socket *so)
9bccf70c 398{
1c79356b 399 so->so_state |= SS_CANTRCVMORE;
91447636 400 sflt_notify(so, sock_evt_cantrecvmore, NULL);
1c79356b
A
401 sorwakeup(so);
402}
403
404/*
405 * Wait for data to arrive at/drain from a socket buffer.
2d21ac55
A
406 *
407 * Returns: 0 Success
408 * EBADF
409 * msleep:EINTR
1c79356b
A
410 */
411int
2d21ac55 412sbwait(struct sockbuf *sb)
1c79356b 413{
0c530ab8 414 int error = 0, lr_saved;
91447636
A
415 struct socket *so = sb->sb_so;
416 lck_mtx_t *mutex_held;
417 struct timespec ts;
418
0c530ab8 419 lr_saved = (unsigned int) __builtin_return_address(0);
2d21ac55
A
420
421 if (so->so_proto->pr_getlock != NULL)
91447636 422 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2d21ac55 423 else
91447636 424 mutex_held = so->so_proto->pr_domain->dom_mtx;
1c79356b
A
425
426 sb->sb_flags |= SB_WAIT;
91447636
A
427
428 if (so->so_usecount < 1)
2d21ac55 429 panic("sbwait: so=%p refcount=%d\n", so, so->so_usecount);
91447636
A
430 ts.tv_sec = sb->sb_timeo.tv_sec;
431 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
432 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
2d21ac55 433 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", &ts);
91447636
A
434
435 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
436
437 if (so->so_usecount < 1)
2d21ac55 438 panic("sbwait: so=%p refcount=%d\n", so, so->so_usecount);
91447636
A
439
440 if ((so->so_state & SS_DRAINING)) {
441 error = EBADF;
442 }
443
444 return (error);
1c79356b
A
445}
446
447/*
448 * Lock a sockbuf already known to be locked;
449 * return any error returned from sleep (EINTR).
2d21ac55
A
450 *
451 * Returns: 0 Success
452 * EINTR
1c79356b
A
453 */
454int
2d21ac55 455sb_lock(struct sockbuf *sb)
1c79356b 456{
91447636 457 struct socket *so = sb->sb_so;
2d21ac55 458 lck_mtx_t *mutex_held;
0c530ab8 459 int error = 0;
2d21ac55 460
91447636 461 if (so == NULL)
2d21ac55 462 panic("sb_lock: null so back pointer sb=%p\n", sb);
1c79356b
A
463
464 while (sb->sb_flags & SB_LOCK) {
465 sb->sb_flags |= SB_WANT;
2d21ac55 466 if (so->so_proto->pr_getlock != NULL)
91447636
A
467 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
468 else
469 mutex_held = so->so_proto->pr_domain->dom_mtx;
470 if (so->so_usecount < 1)
2d21ac55
A
471 panic("sb_lock: so=%p refcount=%d\n", so,
472 so->so_usecount);
0c530ab8 473
91447636 474 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
2d21ac55
A
475 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH,
476 "sb_lock", 0);
91447636 477 if (so->so_usecount < 1)
2d21ac55
A
478 panic("sb_lock: 2 so=%p refcount=%d\n", so,
479 so->so_usecount);
480 if (error)
1c79356b
A
481 return (error);
482 }
483 sb->sb_flags |= SB_LOCK;
484 return (0);
485}
486
487/*
488 * Wakeup processes waiting on a socket buffer.
489 * Do asynchronous notification via SIGIO
490 * if the socket has the SS_ASYNC flag set.
491 */
492void
2d21ac55 493sowakeup(struct socket *so, struct sockbuf *sb)
1c79356b 494{
0b4e3aa0 495 sb->sb_flags &= ~SB_SEL;
1c79356b 496 selwakeup(&sb->sb_sel);
1c79356b
A
497 if (sb->sb_flags & SB_WAIT) {
498 sb->sb_flags &= ~SB_WAIT;
499 wakeup((caddr_t)&sb->sb_cc);
500 }
501 if (so->so_state & SS_ASYNC) {
502 if (so->so_pgid < 0)
503 gsignal(-so->so_pgid, SIGIO);
2d21ac55
A
504 else if (so->so_pgid > 0)
505 proc_signal(so->so_pgid, SIGIO);
1c79356b 506 }
91447636
A
507 if (sb->sb_flags & SB_KNOTE) {
508 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
509 }
510 if (sb->sb_flags & SB_UPCALL) {
2d21ac55
A
511 void (*so_upcall)(struct socket *, caddr_t, int);
512 caddr_t so_upcallarg;
513
514 so_upcall = so->so_upcall;
515 so_upcallarg = so->so_upcallarg;
516 /* Let close know that we're about to do an upcall */
517 so->so_flags |= SOF_UPCALLINUSE;
518
91447636 519 socket_unlock(so, 0);
2d21ac55 520 (*so_upcall)(so, so_upcallarg, M_DONTWAIT);
91447636 521 socket_lock(so, 0);
2d21ac55
A
522
523 so->so_flags &= ~SOF_UPCALLINUSE;
524 /* Tell close that it's safe to proceed */
525 if (so->so_flags & SOF_CLOSEWAIT)
526 wakeup((caddr_t)&so->so_upcall);
91447636 527 }
1c79356b
A
528}
529
530/*
531 * Socket buffer (struct sockbuf) utility routines.
532 *
533 * Each socket contains two socket buffers: one for sending data and
534 * one for receiving data. Each buffer contains a queue of mbufs,
535 * information about the number of mbufs and amount of data in the
536 * queue, and other fields allowing select() statements and notification
537 * on data availability to be implemented.
538 *
539 * Data stored in a socket buffer is maintained as a list of records.
540 * Each record is a list of mbufs chained together with the m_next
541 * field. Records are chained together with the m_nextpkt field. The upper
542 * level routine soreceive() expects the following conventions to be
543 * observed when placing information in the receive buffer:
544 *
545 * 1. If the protocol requires each message be preceded by the sender's
546 * name, then a record containing that name must be present before
547 * any associated data (mbuf's must be of type MT_SONAME).
548 * 2. If the protocol supports the exchange of ``access rights'' (really
549 * just additional data associated with the message), and there are
550 * ``rights'' to be received, then a record containing this data
551 * should be present (mbuf's must be of type MT_RIGHTS).
552 * 3. If a name or rights record exists, then it must be followed by
553 * a data record, perhaps of zero length.
554 *
555 * Before using a new socket structure it is first necessary to reserve
556 * buffer space to the socket, by calling sbreserve(). This should commit
557 * some of the available buffer space in the system buffer pool for the
558 * socket (currently, it does nothing but enforce limits). The space
559 * should be released by calling sbrelease() when the socket is destroyed.
560 */
561
2d21ac55
A
562/*
563 * Returns: 0 Success
564 * ENOBUFS
565 */
1c79356b 566int
2d21ac55 567soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
1c79356b 568{
1c79356b
A
569
570 if (sbreserve(&so->so_snd, sndcc) == 0)
571 goto bad;
572 if (sbreserve(&so->so_rcv, rcvcc) == 0)
573 goto bad2;
574 if (so->so_rcv.sb_lowat == 0)
575 so->so_rcv.sb_lowat = 1;
576 if (so->so_snd.sb_lowat == 0)
577 so->so_snd.sb_lowat = MCLBYTES;
578 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
579 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
580 return (0);
581bad2:
9bccf70c 582#ifdef __APPLE__
0b4e3aa0 583 selthreadclear(&so->so_snd.sb_sel);
9bccf70c 584#endif
1c79356b
A
585 sbrelease(&so->so_snd);
586bad:
587 return (ENOBUFS);
588}
589
590/*
591 * Allot mbufs to a sockbuf.
592 * Attempt to scale mbmax so that mbcnt doesn't become limiting
593 * if buffering efficiency is near the normal case.
594 */
595int
2d21ac55 596sbreserve(struct sockbuf *sb, u_long cc)
1c79356b
A
597{
598 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
599 return (0);
600 sb->sb_hiwat = cc;
601 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
602 if (sb->sb_lowat > sb->sb_hiwat)
603 sb->sb_lowat = sb->sb_hiwat;
604 return (1);
605}
606
607/*
608 * Free mbufs held by a socket, and reserved mbuf space.
609 */
2d21ac55 610/* WARNING needs to do selthreadclear() before calling this */
1c79356b 611void
2d21ac55 612sbrelease(struct sockbuf *sb)
1c79356b 613{
1c79356b 614 sbflush(sb);
9bccf70c
A
615 sb->sb_hiwat = 0;
616 sb->sb_mbmax = 0;
1c79356b
A
617}
618
619/*
620 * Routines to add and remove
621 * data from an mbuf queue.
622 *
623 * The routines sbappend() or sbappendrecord() are normally called to
624 * append new mbufs to a socket buffer, after checking that adequate
625 * space is available, comparing the function sbspace() with the amount
626 * of data to be added. sbappendrecord() differs from sbappend() in
627 * that data supplied is treated as the beginning of a new record.
628 * To place a sender's address, optional access rights, and data in a
629 * socket receive buffer, sbappendaddr() should be used. To place
630 * access rights and data in a socket receive buffer, sbappendrights()
631 * should be used. In either case, the new data begins a new record.
632 * Note that unlike sbappend() and sbappendrecord(), these routines check
633 * for the caller that there will be enough space to store the data.
634 * Each fails if there is not enough space, or if it cannot find mbufs
635 * to store additional information in.
636 *
637 * Reliable protocols may use the socket send buffer to hold data
638 * awaiting acknowledgement. Data is normally copied from a socket
639 * send buffer in a protocol with m_copy for output to a peer,
640 * and then removing the data from the socket buffer with sbdrop()
641 * or sbdroprecord() when the data is acknowledged by the peer.
642 */
643
644/*
645 * Append mbuf chain m to the last record in the
646 * socket buffer sb. The additional space associated
647 * the mbuf chain is recorded in sb. Empty mbufs are
648 * discarded and mbufs are compacted where possible.
649 */
91447636 650int
2d21ac55 651sbappend(struct sockbuf *sb, struct mbuf *m)
9bccf70c 652{
2d21ac55 653 struct socket *so = sb->sb_so;
1c79356b 654
2d21ac55
A
655 if (m == NULL || (sb->sb_flags & SB_DROP)) {
656 if (m != NULL)
657 m_freem(m);
658 return (0);
659 }
fa4905b1 660
2d21ac55 661 SBLASTRECORDCHK(sb, "sbappend 1");
fa4905b1 662
2d21ac55
A
663 if (sb->sb_lastrecord != NULL && (sb->sb_mbtail->m_flags & M_EOR))
664 return (sbappendrecord(sb, m));
665
666 if (sb->sb_flags & SB_RECV) {
667 int error = sflt_data_in(so, NULL, &m, NULL, 0, NULL);
668 SBLASTRECORDCHK(sb, "sbappend 2");
669 if (error != 0) {
670 if (error != EJUSTRETURN)
671 m_freem(m);
672 return (0);
91447636 673 }
91447636
A
674 }
675
2d21ac55
A
676 /* If this is the first record, it's also the last record */
677 if (sb->sb_lastrecord == NULL)
678 sb->sb_lastrecord = m;
fa4905b1 679
2d21ac55
A
680 sbcompress(sb, m, sb->sb_mbtail);
681 SBLASTRECORDCHK(sb, "sbappend 3");
682 return (1);
683}
684
685/*
686 * Similar to sbappend, except that this is optimized for stream sockets.
687 */
688int
689sbappendstream(struct sockbuf *sb, struct mbuf *m)
690{
691 struct socket *so = sb->sb_so;
692
693 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord))
694 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n",
695 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
696
697 SBLASTMBUFCHK(sb, __func__);
698
699 if (m == NULL || (sb->sb_flags & SB_DROP)) {
700 if (m != NULL)
701 m_freem(m);
702 return (0);
703 }
704
705 if (sb->sb_flags & SB_RECV) {
706 int error = sflt_data_in(so, NULL, &m, NULL, 0, NULL);
707 SBLASTRECORDCHK(sb, "sbappendstream 1");
708 if (error != 0) {
709 if (error != EJUSTRETURN)
710 m_freem(m);
711 return (0);
712 }
713 }
714
715 sbcompress(sb, m, sb->sb_mbtail);
716 sb->sb_lastrecord = sb->sb_mb;
717 SBLASTRECORDCHK(sb, "sbappendstream 2");
718 return (1);
1c79356b
A
719}
720
721#ifdef SOCKBUF_DEBUG
722void
2d21ac55 723sbcheck(struct sockbuf *sb)
1c79356b 724{
2d21ac55
A
725 struct mbuf *m;
726 struct mbuf *n = 0;
727 u_long len = 0, mbcnt = 0;
91447636
A
728 lck_mtx_t *mutex_held;
729
2d21ac55 730 if (sb->sb_so->so_proto->pr_getlock != NULL)
91447636 731 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
2d21ac55 732 else
91447636
A
733 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
734
735 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
736
737 if (sbchecking == 0)
738 return;
1c79356b
A
739
740 for (m = sb->sb_mb; m; m = n) {
2d21ac55
A
741 n = m->m_nextpkt;
742 for (; m; m = m->m_next) {
743 len += m->m_len;
744 mbcnt += MSIZE;
745 /* XXX pretty sure this is bogus */
746 if (m->m_flags & M_EXT)
747 mbcnt += m->m_ext.ext_size;
748 }
749 }
750 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
751 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
752 mbcnt, sb->sb_mbcnt);
1c79356b
A
753 }
754}
755#endif
756
2d21ac55
A
757void
758sblastrecordchk(struct sockbuf *sb, const char *where)
759{
760 struct mbuf *m = sb->sb_mb;
761
762 while (m && m->m_nextpkt)
763 m = m->m_nextpkt;
764
765 if (m != sb->sb_lastrecord) {
766 printf("sblastrecordchk: mb %p lastrecord %p last %p\n",
767 sb->sb_mb, sb->sb_lastrecord, m);
768 printf("packet chain:\n");
769 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
770 printf("\t%p\n", m);
771 panic("sblastrecordchk from %s", where);
772 }
773}
774
775void
776sblastmbufchk(struct sockbuf *sb, const char *where)
777{
778 struct mbuf *m = sb->sb_mb;
779 struct mbuf *n;
780
781 while (m && m->m_nextpkt)
782 m = m->m_nextpkt;
783
784 while (m && m->m_next)
785 m = m->m_next;
786
787 if (m != sb->sb_mbtail) {
788 printf("sblastmbufchk: mb %p mbtail %p last %p\n",
789 sb->sb_mb, sb->sb_mbtail, m);
790 printf("packet tree:\n");
791 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
792 printf("\t");
793 for (n = m; n != NULL; n = n->m_next)
794 printf("%p ", n);
795 printf("\n");
796 }
797 panic("sblastmbufchk from %s", where);
798 }
799}
800
1c79356b 801/*
2d21ac55 802 * Similar to sbappend, except the mbuf chain begins a new record.
1c79356b 803 */
91447636 804int
2d21ac55 805sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1c79356b 806{
2d21ac55
A
807 struct mbuf *m;
808 int space = 0;
9bccf70c 809
2d21ac55
A
810 if (m0 == NULL || (sb->sb_flags & SB_DROP)) {
811 if (m0 != NULL)
812 m_freem(m0);
813 return (0);
814 }
815
816 for (m = m0; m != NULL; m = m->m_next)
817 space += m->m_len;
818
819 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
820 m_freem(m0);
821 return (0);
822 }
823
824 if (sb->sb_flags & SB_RECV) {
825 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
826 sock_data_filt_flag_record, NULL);
91447636 827 if (error != 0) {
2d21ac55 828 SBLASTRECORDCHK(sb, "sbappendrecord 1");
91447636
A
829 if (error != EJUSTRETURN)
830 m_freem(m0);
2d21ac55 831 return (0);
1c79356b 832 }
1c79356b 833 }
2d21ac55 834
1c79356b 835 /*
1c79356b
A
836 * Note this permits zero length records.
837 */
838 sballoc(sb, m0);
2d21ac55
A
839 SBLASTRECORDCHK(sb, "sbappendrecord 2");
840 if (sb->sb_lastrecord != NULL) {
841 sb->sb_lastrecord->m_nextpkt = m0;
842 } else {
1c79356b 843 sb->sb_mb = m0;
2d21ac55
A
844 }
845 sb->sb_lastrecord = m0;
846
1c79356b
A
847 m = m0->m_next;
848 m0->m_next = 0;
849 if (m && (m0->m_flags & M_EOR)) {
850 m0->m_flags &= ~M_EOR;
851 m->m_flags |= M_EOR;
852 }
2d21ac55
A
853 sbcompress(sb, m, m0);
854 SBLASTRECORDCHK(sb, "sbappendrecord 3");
855 return (1);
1c79356b
A
856}
857
858/*
859 * As above except that OOB data
860 * is inserted at the beginning of the sockbuf,
861 * but after any other OOB data.
862 */
91447636 863int
2d21ac55 864sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
1c79356b 865{
91447636
A
866 struct mbuf *m;
867 struct mbuf **mp;
1c79356b
A
868
869 if (m0 == 0)
2d21ac55
A
870 return (0);
871
872 SBLASTRECORDCHK(sb, "sbinsertoob 1");
873
91447636
A
874 if ((sb->sb_flags & SB_RECV) != 0) {
875 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
2d21ac55
A
876 sock_data_filt_flag_oob, NULL);
877
878 SBLASTRECORDCHK(sb, "sbinsertoob 2");
91447636
A
879 if (error) {
880 if (error != EJUSTRETURN) {
881 m_freem(m0);
882 }
2d21ac55 883 return (0);
1c79356b 884 }
1c79356b 885 }
2d21ac55
A
886
887 for (mp = &sb->sb_mb; *mp; mp = &((*mp)->m_nextpkt)) {
888 m = *mp;
889again:
1c79356b
A
890 switch (m->m_type) {
891
892 case MT_OOBDATA:
893 continue; /* WANT next train */
894
895 case MT_CONTROL:
896 m = m->m_next;
897 if (m)
898 goto again; /* inspect THIS train further */
899 }
900 break;
901 }
902 /*
903 * Put the first mbuf on the queue.
904 * Note this permits zero length records.
905 */
906 sballoc(sb, m0);
907 m0->m_nextpkt = *mp;
2d21ac55
A
908 if (*mp == NULL) {
909 /* m0 is actually the new tail */
910 sb->sb_lastrecord = m0;
911 }
1c79356b
A
912 *mp = m0;
913 m = m0->m_next;
914 m0->m_next = 0;
915 if (m && (m0->m_flags & M_EOR)) {
916 m0->m_flags &= ~M_EOR;
917 m->m_flags |= M_EOR;
918 }
2d21ac55
A
919 sbcompress(sb, m, m0);
920 SBLASTRECORDCHK(sb, "sbinsertoob 3");
921 return (1);
1c79356b
A
922}
923
924/*
925 * Append address and data, and optionally, control (ancillary) data
926 * to the receive queue of a socket. If present,
927 * m0 must include a packet header with total length.
928 * Returns 0 if no space in sockbuf or insufficient mbufs.
2d21ac55
A
929 *
930 * Returns: 0 No space/out of mbufs
931 * 1 Success
1c79356b 932 */
91447636 933static int
2d21ac55
A
934sbappendaddr_internal(struct sockbuf *sb, struct sockaddr *asa,
935 struct mbuf *m0, struct mbuf *control)
1c79356b 936{
2d21ac55 937 struct mbuf *m, *n, *nlast;
1c79356b 938 int space = asa->sa_len;
1c79356b
A
939
940 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
941 panic("sbappendaddr");
942
1c79356b
A
943 if (m0)
944 space += m0->m_pkthdr.len;
945 for (n = control; n; n = n->m_next) {
946 space += n->m_len;
947 if (n->m_next == 0) /* keep pointer to last control buf */
948 break;
949 }
950 if (space > sbspace(sb))
951 return (0);
952 if (asa->sa_len > MLEN)
953 return (0);
954 MGET(m, M_DONTWAIT, MT_SONAME);
955 if (m == 0)
956 return (0);
957 m->m_len = asa->sa_len;
958 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
959 if (n)
960 n->m_next = m0; /* concatenate data to control */
961 else
962 control = m0;
963 m->m_next = control;
2d21ac55
A
964
965 SBLASTRECORDCHK(sb, "sbappendadddr 1");
966
967 for (n = m; n->m_next != NULL; n = n->m_next)
1c79356b 968 sballoc(sb, n);
2d21ac55
A
969 sballoc(sb, n);
970 nlast = n;
971
972 if (sb->sb_lastrecord != NULL) {
973 sb->sb_lastrecord->m_nextpkt = m;
974 } else {
1c79356b 975 sb->sb_mb = m;
2d21ac55
A
976 }
977 sb->sb_lastrecord = m;
978 sb->sb_mbtail = nlast;
979
980 SBLASTMBUFCHK(sb, __func__);
981 SBLASTRECORDCHK(sb, "sbappendadddr 2");
982
983 postevent(0, sb, EV_RWBYTES);
1c79356b
A
984 return (1);
985}
986
2d21ac55
A
987/*
988 * Returns: 0 Error: No space/out of mbufs/etc.
989 * 1 Success
990 *
991 * Imputed: (*error_out) errno for error
992 * ENOBUFS
993 * sflt_data_in:??? [whatever a filter author chooses]
994 */
1c79356b 995int
2d21ac55
A
996sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
997 struct mbuf *control, int *error_out)
91447636
A
998{
999 int result = 0;
2d21ac55
A
1000 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1001
1002 if (error_out)
1003 *error_out = 0;
1004
91447636
A
1005 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
1006 panic("sbappendaddrorfree");
2d21ac55
A
1007
1008 if (sb->sb_flags & SB_DROP) {
1009 if (m0 != NULL)
1010 m_freem(m0);
1011 if (control != NULL && !sb_unix)
1012 m_freem(control);
1013 if (error_out != NULL)
1014 *error_out = EINVAL;
1015 return (0);
1016 }
1017
91447636
A
1018 /* Call socket data in filters */
1019 if ((sb->sb_flags & SB_RECV) != 0) {
1020 int error;
cc9f6e38 1021 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
2d21ac55 1022 SBLASTRECORDCHK(sb, __func__);
91447636
A
1023 if (error) {
1024 if (error != EJUSTRETURN) {
2d21ac55
A
1025 if (m0)
1026 m_freem(m0);
1027 if (control != NULL && !sb_unix)
1028 m_freem(control);
1029 if (error_out)
1030 *error_out = error;
91447636 1031 }
2d21ac55 1032 return (0);
91447636
A
1033 }
1034 }
2d21ac55 1035
91447636
A
1036 result = sbappendaddr_internal(sb, asa, m0, control);
1037 if (result == 0) {
2d21ac55
A
1038 if (m0)
1039 m_freem(m0);
1040 if (control != NULL && !sb_unix)
1041 m_freem(control);
1042 if (error_out)
1043 *error_out = ENOBUFS;
91447636 1044 }
2d21ac55
A
1045
1046 return (result);
91447636
A
1047}
1048
1049static int
2d21ac55
A
1050sbappendcontrol_internal(struct sockbuf *sb, struct mbuf *m0,
1051 struct mbuf *control)
1c79356b 1052{
2d21ac55 1053 struct mbuf *m, *mlast, *n;
1c79356b 1054 int space = 0;
1c79356b
A
1055
1056 if (control == 0)
1057 panic("sbappendcontrol");
1058
1c79356b
A
1059 for (m = control; ; m = m->m_next) {
1060 space += m->m_len;
1061 if (m->m_next == 0)
1062 break;
1063 }
1064 n = m; /* save pointer to last control buffer */
1065 for (m = m0; m; m = m->m_next)
1066 space += m->m_len;
2d21ac55 1067 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX))
1c79356b
A
1068 return (0);
1069 n->m_next = m0; /* concatenate data to control */
2d21ac55
A
1070
1071 SBLASTRECORDCHK(sb, "sbappendcontrol 1");
1072
1073 for (m = control; m->m_next != NULL; m = m->m_next)
1c79356b 1074 sballoc(sb, m);
2d21ac55
A
1075 sballoc(sb, m);
1076 mlast = m;
1077
1078 if (sb->sb_lastrecord != NULL) {
1079 sb->sb_lastrecord->m_nextpkt = control;
1080 } else {
1c79356b 1081 sb->sb_mb = control;
2d21ac55
A
1082 }
1083 sb->sb_lastrecord = control;
1084 sb->sb_mbtail = mlast;
1085
1086 SBLASTMBUFCHK(sb, __func__);
1087 SBLASTRECORDCHK(sb, "sbappendcontrol 2");
1088
1089 postevent(0, sb, EV_RWBYTES);
1c79356b
A
1090 return (1);
1091}
1092
91447636 1093int
2d21ac55
A
1094sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
1095 int *error_out)
91447636
A
1096{
1097 int result = 0;
2d21ac55
A
1098 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1099
1100 if (error_out)
1101 *error_out = 0;
1102
1103 if (sb->sb_flags & SB_DROP) {
1104 if (m0 != NULL)
1105 m_freem(m0);
1106 if (control != NULL && !sb_unix)
1107 m_freem(control);
1108 if (error_out != NULL)
1109 *error_out = EINVAL;
1110 return (0);
1111 }
1112
91447636
A
1113 if (sb->sb_flags & SB_RECV) {
1114 int error;
2d21ac55 1115
cc9f6e38 1116 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
2d21ac55 1117 SBLASTRECORDCHK(sb, __func__);
91447636
A
1118 if (error) {
1119 if (error != EJUSTRETURN) {
2d21ac55
A
1120 if (m0)
1121 m_freem(m0);
1122 if (control != NULL && !sb_unix)
1123 m_freem(control);
1124 if (error_out)
1125 *error_out = error;
91447636 1126 }
2d21ac55 1127 return (0);
91447636
A
1128 }
1129 }
2d21ac55 1130
91447636
A
1131 result = sbappendcontrol_internal(sb, m0, control);
1132 if (result == 0) {
2d21ac55
A
1133 if (m0)
1134 m_freem(m0);
1135 if (control != NULL && !sb_unix)
1136 m_freem(control);
1137 if (error_out)
1138 *error_out = ENOBUFS;
91447636 1139 }
2d21ac55
A
1140
1141 return (result);
91447636
A
1142}
1143
1c79356b
A
1144/*
1145 * Compress mbuf chain m into the socket
1146 * buffer sb following mbuf n. If n
1147 * is null, the buffer is presumed empty.
1148 */
2d21ac55
A
1149static inline void
1150sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1151{
1152 int eor = 0;
1153 struct mbuf *o;
1154
1155 if (m == NULL) {
1156 /* There is nothing to compress; just update the tail */
1157 for (; n->m_next != NULL; n = n->m_next)
1158 ;
1159 sb->sb_mbtail = n;
1160 goto done;
1161 }
1c79356b
A
1162
1163 while (m) {
1164 eor |= m->m_flags & M_EOR;
2d21ac55
A
1165 if (m->m_len == 0 && (eor == 0 ||
1166 (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) {
1167 if (sb->sb_lastrecord == m)
1168 sb->sb_lastrecord = m->m_next;
1c79356b
A
1169 m = m_free(m);
1170 continue;
1171 }
9bccf70c
A
1172 if (n && (n->m_flags & M_EOR) == 0 &&
1173#ifndef __APPLE__
1174 M_WRITABLE(n) &&
1175#endif
1176 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1177 m->m_len <= M_TRAILINGSPACE(n) &&
1c79356b
A
1178 n->m_type == m->m_type) {
1179 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1180 (unsigned)m->m_len);
1181 n->m_len += m->m_len;
1182 sb->sb_cc += m->m_len;
2d21ac55
A
1183 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1184 m->m_type != MT_OOBDATA)
1185 /* XXX: Probably don't need.*/
1186 sb->sb_ctl += m->m_len;
1c79356b
A
1187 m = m_free(m);
1188 continue;
1189 }
1190 if (n)
1191 n->m_next = m;
1192 else
1193 sb->sb_mb = m;
2d21ac55 1194 sb->sb_mbtail = m;
1c79356b
A
1195 sballoc(sb, m);
1196 n = m;
1197 m->m_flags &= ~M_EOR;
1198 m = m->m_next;
1199 n->m_next = 0;
1200 }
1201 if (eor) {
1202 if (n)
1203 n->m_flags |= eor;
1204 else
1205 printf("semi-panic: sbcompress\n");
1206 }
2d21ac55
A
1207done:
1208 SBLASTMBUFCHK(sb, __func__);
1209 postevent(0, sb, EV_RWBYTES);
1210}
1211
1212void
1213sb_empty_assert(struct sockbuf *sb, const char *where)
1214{
1215 if (!(sb->sb_cc == 0 && sb->sb_mb == NULL && sb->sb_mbcnt == 0 &&
1216 sb->sb_mbtail == NULL && sb->sb_lastrecord == NULL)) {
1217 panic("%s: sb %p so %p cc %ld mbcnt %ld mb %p mbtail %p "
1218 "lastrecord %p\n", where, sb, sb->sb_so, sb->sb_cc,
1219 sb->sb_mbcnt, sb->sb_mb, sb->sb_mbtail, sb->sb_lastrecord);
1220 /* NOTREACHED */
1221 }
1c79356b
A
1222}
1223
1224/*
1225 * Free all mbufs in a sockbuf.
1226 * Check that all resources are reclaimed.
1227 */
1228void
2d21ac55 1229sbflush(struct sockbuf *sb)
1c79356b 1230{
91447636 1231 if (sb->sb_so == NULL)
2d21ac55
A
1232 panic("sbflush sb->sb_so already null sb=%p\n", sb);
1233 (void) sblock(sb, M_WAIT);
9bccf70c
A
1234 while (sb->sb_mbcnt) {
1235 /*
1236 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1237 * we would loop forever. Panic instead.
1238 */
1239 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1240 break;
1c79356b 1241 sbdrop(sb, (int)sb->sb_cc);
9bccf70c 1242 }
2d21ac55 1243 sb_empty_assert(sb, __func__);
1c79356b 1244 postevent(0, sb, EV_RWBYTES);
91447636
A
1245 sbunlock(sb, 1); /* keep socket locked */
1246
1c79356b
A
1247}
1248
1249/*
1250 * Drop data from (the front of) a sockbuf.
9bccf70c
A
1251 * use m_freem_list to free the mbuf structures
1252 * under a single lock... this is done by pruning
1253 * the top of the tree from the body by keeping track
1254 * of where we get to in the tree and then zeroing the
1255 * two pertinent pointers m_nextpkt and m_next
1256 * the socket buffer is then updated to point at the new
1257 * top of the tree and the pruned area is released via
1258 * m_freem_list.
1c79356b
A
1259 */
1260void
2d21ac55 1261sbdrop(struct sockbuf *sb, int len)
1c79356b 1262{
2d21ac55 1263 struct mbuf *m, *free_list, *ml;
fa4905b1 1264 struct mbuf *next, *last;
1c79356b 1265
fa4905b1
A
1266 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1267
1c79356b 1268 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
fa4905b1
A
1269 free_list = last = m;
1270 ml = (struct mbuf *)0;
1271
1c79356b
A
1272 while (len > 0) {
1273 if (m == 0) {
2d21ac55
A
1274 if (next == 0) {
1275 /*
1276 * temporarily replacing this panic with printf
1277 * because it occurs occasionally when closing
1278 * a socket when there is no harm in ignoring
1279 * it. This problem will be investigated
1280 * further.
1281 */
1282 /* panic("sbdrop"); */
1283 printf("sbdrop - count not zero\n");
1284 len = 0;
1285 /*
1286 * zero the counts. if we have no mbufs,
1287 * we have no data (PR-2986815)
1288 */
1289 sb->sb_cc = 0;
1290 sb->sb_mbcnt = 0;
1291 break;
1292 }
1293 m = last = next;
1294 next = m->m_nextpkt;
1295 continue;
1c79356b
A
1296 }
1297 if (m->m_len > len) {
1298 m->m_len -= len;
1299 m->m_data += len;
1300 sb->sb_cc -= len;
2d21ac55
A
1301 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1302 m->m_type != MT_OOBDATA)
1303 sb->sb_ctl -= len;
1c79356b
A
1304 break;
1305 }
1306 len -= m->m_len;
1307 sbfree(sb, m);
fa4905b1
A
1308
1309 ml = m;
1310 m = m->m_next;
1c79356b
A
1311 }
1312 while (m && m->m_len == 0) {
1313 sbfree(sb, m);
fa4905b1
A
1314
1315 ml = m;
1316 m = m->m_next;
1317 }
1318 if (ml) {
2d21ac55 1319 ml->m_next = (struct mbuf *)0;
fa4905b1 1320 last->m_nextpkt = (struct mbuf *)0;
2d21ac55 1321 m_freem_list(free_list);
1c79356b
A
1322 }
1323 if (m) {
1324 sb->sb_mb = m;
1325 m->m_nextpkt = next;
2d21ac55 1326 } else {
1c79356b 1327 sb->sb_mb = next;
2d21ac55
A
1328 }
1329
1330 /*
1331 * First part is an inline SB_EMPTY_FIXUP(). Second part
1332 * makes sure sb_lastrecord is up-to-date if we dropped
1333 * part of the last record.
1334 */
1335 m = sb->sb_mb;
1336 if (m == NULL) {
1337 sb->sb_mbtail = NULL;
1338 sb->sb_lastrecord = NULL;
1339 } else if (m->m_nextpkt == NULL) {
1340 sb->sb_lastrecord = m;
1341 }
fa4905b1 1342
1c79356b 1343 postevent(0, sb, EV_RWBYTES);
fa4905b1
A
1344
1345 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1c79356b
A
1346}
1347
1348/*
1349 * Drop a record off the front of a sockbuf
1350 * and move the next record to the front.
1351 */
1352void
2d21ac55 1353sbdroprecord(struct sockbuf *sb)
1c79356b 1354{
2d21ac55 1355 struct mbuf *m, *mn;
1c79356b
A
1356
1357 m = sb->sb_mb;
1358 if (m) {
1359 sb->sb_mb = m->m_nextpkt;
1360 do {
1361 sbfree(sb, m);
1362 MFREE(m, mn);
9bccf70c
A
1363 m = mn;
1364 } while (m);
1c79356b 1365 }
2d21ac55 1366 SB_EMPTY_FIXUP(sb);
1c79356b
A
1367 postevent(0, sb, EV_RWBYTES);
1368}
1369
1370/*
1371 * Create a "control" mbuf containing the specified data
1372 * with the specified type for presentation on a socket buffer.
1373 */
1374struct mbuf *
2d21ac55 1375sbcreatecontrol(caddr_t p, int size, int type, int level)
1c79356b 1376{
2d21ac55 1377 struct cmsghdr *cp;
1c79356b
A
1378 struct mbuf *m;
1379
9bccf70c 1380 if (CMSG_SPACE((u_int)size) > MLEN)
2d21ac55 1381 return ((struct mbuf *)NULL);
1c79356b 1382 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
2d21ac55 1383 return ((struct mbuf *)NULL);
1c79356b
A
1384 cp = mtod(m, struct cmsghdr *);
1385 /* XXX check size? */
2d21ac55 1386 (void) memcpy(CMSG_DATA(cp), p, size);
9bccf70c
A
1387 m->m_len = CMSG_SPACE(size);
1388 cp->cmsg_len = CMSG_LEN(size);
1c79356b
A
1389 cp->cmsg_level = level;
1390 cp->cmsg_type = type;
1391 return (m);
1392}
1393
1394/*
1395 * Some routines that return EOPNOTSUPP for entry points that are not
1396 * supported by a protocol. Fill in as needed.
1397 */
1398int
2d21ac55 1399pru_abort_notsupp(__unused struct socket *so)
1c79356b 1400{
2d21ac55 1401 return (EOPNOTSUPP);
1c79356b
A
1402}
1403
1c79356b 1404int
2d21ac55 1405pru_accept_notsupp(__unused struct socket *so, __unused struct sockaddr **nam)
1c79356b 1406{
2d21ac55 1407 return (EOPNOTSUPP);
1c79356b
A
1408}
1409
1410int
2d21ac55
A
1411pru_attach_notsupp(__unused struct socket *so, __unused int proto,
1412 __unused struct proc *p)
1c79356b 1413{
2d21ac55 1414 return (EOPNOTSUPP);
1c79356b
A
1415}
1416
1417int
2d21ac55
A
1418pru_bind_notsupp(__unused struct socket *so, __unused struct sockaddr *nam,
1419 __unused struct proc *p)
1c79356b 1420{
2d21ac55 1421 return (EOPNOTSUPP);
1c79356b
A
1422}
1423
1424int
2d21ac55
A
1425pru_connect_notsupp(__unused struct socket *so, __unused struct sockaddr *nam,
1426 __unused struct proc *p)
1c79356b 1427{
2d21ac55 1428 return (EOPNOTSUPP);
1c79356b
A
1429}
1430
1431int
2d21ac55 1432pru_connect2_notsupp(__unused struct socket *so1, __unused struct socket *so2)
1c79356b 1433{
2d21ac55 1434 return (EOPNOTSUPP);
1c79356b
A
1435}
1436
1437int
2d21ac55
A
1438pru_control_notsupp(__unused struct socket *so, __unused u_long cmd,
1439 __unused caddr_t data, __unused struct ifnet *ifp, __unused struct proc *p)
1c79356b 1440{
2d21ac55 1441 return (EOPNOTSUPP);
1c79356b
A
1442}
1443
1444int
2d21ac55 1445pru_detach_notsupp(__unused struct socket *so)
1c79356b 1446{
2d21ac55 1447 return (EOPNOTSUPP);
1c79356b
A
1448}
1449
1450int
2d21ac55 1451pru_disconnect_notsupp(__unused struct socket *so)
1c79356b 1452{
2d21ac55 1453 return (EOPNOTSUPP);
1c79356b
A
1454}
1455
1456int
2d21ac55 1457pru_listen_notsupp(__unused struct socket *so, __unused struct proc *p)
1c79356b 1458{
2d21ac55 1459 return (EOPNOTSUPP);
1c79356b
A
1460}
1461
1462int
2d21ac55 1463pru_peeraddr_notsupp(__unused struct socket *so, __unused struct sockaddr **nam)
1c79356b 1464{
2d21ac55 1465 return (EOPNOTSUPP);
1c79356b
A
1466}
1467
1468int
2d21ac55 1469pru_rcvd_notsupp(__unused struct socket *so, __unused int flags)
1c79356b 1470{
2d21ac55 1471 return (EOPNOTSUPP);
1c79356b
A
1472}
1473
1474int
2d21ac55
A
1475pru_rcvoob_notsupp(__unused struct socket *so, __unused struct mbuf *m,
1476 __unused int flags)
1c79356b 1477{
2d21ac55 1478 return (EOPNOTSUPP);
1c79356b
A
1479}
1480
1481int
2d21ac55
A
1482pru_send_notsupp(__unused struct socket *so, __unused int flags,
1483 __unused struct mbuf *m, __unused struct sockaddr *addr,
1484 __unused struct mbuf *control, __unused struct proc *p)
1c79356b
A
1485
1486{
2d21ac55 1487 return (EOPNOTSUPP);
1c79356b
A
1488}
1489
1490
1491/*
1492 * This isn't really a ``null'' operation, but it's the default one
1493 * and doesn't do anything destructive.
1494 */
1495int
2d21ac55 1496pru_sense_null(struct socket *so, void *ub, int isstat64)
1c79356b 1497{
2d21ac55
A
1498 if (isstat64 != 0) {
1499 struct stat64 *sb64;
1c79356b 1500
2d21ac55
A
1501 sb64 = (struct stat64 *)ub;
1502 sb64->st_blksize = so->so_snd.sb_hiwat;
1503 } else {
1504 struct stat *sb;
1c79356b 1505
2d21ac55
A
1506 sb = (struct stat *)ub;
1507 sb->st_blksize = so->so_snd.sb_hiwat;
1508 }
1c79356b 1509
2d21ac55 1510 return (0);
1c79356b
A
1511}
1512
1c79356b
A
1513
1514int
2d21ac55
A
1515pru_sosend_notsupp(__unused struct socket *so, __unused struct sockaddr *addr,
1516 __unused struct uio *uio, __unused struct mbuf *top,
1517 __unused struct mbuf *control, __unused int flags)
1c79356b 1518
1c79356b 1519{
2d21ac55 1520 return (EOPNOTSUPP);
1c79356b
A
1521}
1522
1523int
2d21ac55
A
1524pru_soreceive_notsupp(__unused struct socket *so,
1525 __unused struct sockaddr **paddr,
1526 __unused struct uio *uio, __unused struct mbuf **mp0,
1527 __unused struct mbuf **controlp, __unused int *flagsp)
1c79356b 1528{
2d21ac55 1529 return (EOPNOTSUPP);
1c79356b
A
1530}
1531
2d21ac55
A
1532int
1533pru_shutdown_notsupp(__unused struct socket *so)
1c79356b 1534{
2d21ac55 1535 return (EOPNOTSUPP);
1c79356b
A
1536}
1537
2d21ac55
A
1538int
1539pru_sockaddr_notsupp(__unused struct socket *so, __unused struct sockaddr **nam)
1c79356b 1540{
2d21ac55 1541 return (EOPNOTSUPP);
1c79356b
A
1542}
1543
91447636
A
1544int
1545pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
2d21ac55 1546 __unused kauth_cred_t cred, __unused void *wql)
1c79356b 1547{
2d21ac55 1548 return (EOPNOTSUPP);
1c79356b
A
1549}
1550
1551
9bccf70c
A
1552#ifdef __APPLE__
1553/*
1554 * The following are macros on BSD and functions on Darwin
1555 */
1c79356b 1556
0b4e3aa0
A
1557/*
1558 * Do we need to notify the other side when I/O is possible?
1559 */
1560
2d21ac55 1561int
0b4e3aa0
A
1562sb_notify(struct sockbuf *sb)
1563{
2d21ac55
A
1564 return ((sb->sb_flags &
1565 (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
0b4e3aa0
A
1566}
1567
1568/*
1569 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1570 * This is problematical if the fields are unsigned, as the space might
1571 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1572 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1573 */
1574long
1575sbspace(struct sockbuf *sb)
1576{
2d21ac55
A
1577 return ((long)imin((int)(sb->sb_hiwat - sb->sb_cc),
1578 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
0b4e3aa0
A
1579}
1580
1581/* do we have to send all at once on a socket? */
1582int
1583sosendallatonce(struct socket *so)
1584{
2d21ac55 1585 return (so->so_proto->pr_flags & PR_ATOMIC);
0b4e3aa0
A
1586}
1587
1588/* can we read something from so? */
1589int
1590soreadable(struct socket *so)
1591{
2d21ac55
A
1592 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1593 (so->so_state & SS_CANTRCVMORE) ||
1594 so->so_comp.tqh_first || so->so_error);
0b4e3aa0
A
1595}
1596
1597/* can we write something to so? */
1598
1599int
1600sowriteable(struct socket *so)
1601{
2d21ac55
A
1602 return ((sbspace(&(so)->so_snd) >= (long)(so)->so_snd.sb_lowat &&
1603 ((so->so_state&SS_ISCONNECTED) ||
1604 (so->so_proto->pr_flags&PR_CONNREQUIRED) == 0)) ||
1605 (so->so_state & SS_CANTSENDMORE) ||
1606 so->so_error);
0b4e3aa0
A
1607}
1608
1609/* adjust counters in sb reflecting allocation of m */
1610
1611void
1612sballoc(struct sockbuf *sb, struct mbuf *m)
1613{
2d21ac55 1614 int cnt = 1;
0b4e3aa0 1615 sb->sb_cc += m->m_len;
2d21ac55
A
1616 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1617 m->m_type != MT_OOBDATA)
1618 sb->sb_ctl += m->m_len;
0b4e3aa0 1619 sb->sb_mbcnt += MSIZE;
2d21ac55
A
1620
1621 if (m->m_flags & M_EXT) {
0b4e3aa0 1622 sb->sb_mbcnt += m->m_ext.ext_size;
2d21ac55
A
1623 cnt += m->m_ext.ext_size / MSIZE ;
1624 }
1625 OSAddAtomic(cnt, (SInt32*)&total_mb_cnt);
0b4e3aa0
A
1626}
1627
1628/* adjust counters in sb reflecting freeing of m */
1629void
1630sbfree(struct sockbuf *sb, struct mbuf *m)
1631{
2d21ac55
A
1632 int cnt = -1;
1633 sb->sb_cc -= m->m_len;
1634 if (m->m_type != MT_DATA && m->m_type != MT_HEADER &&
1635 m->m_type != MT_OOBDATA)
1636 sb->sb_ctl -= m->m_len;
0b4e3aa0 1637 sb->sb_mbcnt -= MSIZE;
2d21ac55 1638 if (m->m_flags & M_EXT) {
0b4e3aa0 1639 sb->sb_mbcnt -= m->m_ext.ext_size;
2d21ac55
A
1640 cnt -= m->m_ext.ext_size / MSIZE ;
1641 }
1642 OSAddAtomic(cnt, (SInt32*)&total_mb_cnt);
0b4e3aa0
A
1643}
1644
1645/*
1646 * Set lock on sockbuf sb; sleep if lock is already held.
1647 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1648 * Returns error without lock if sleep is interrupted.
2d21ac55
A
1649 *
1650 * Returns: 0 Success
1651 * EWOULDBLOCK
1652 * sb_lock:EINTR
0b4e3aa0
A
1653 */
1654int
1655sblock(struct sockbuf *sb, int wf)
1656{
6601e61a
A
1657 int error = 0;
1658
1659 if (sb->sb_flags & SB_LOCK)
1660 error = (wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK;
1661 else
1662 sb->sb_flags |= SB_LOCK;
1663
1664 return (error);
0b4e3aa0
A
1665}
1666
1667/* release lock on sockbuf sb */
1668void
91447636 1669sbunlock(struct sockbuf *sb, int keeplocked)
0b4e3aa0 1670{
91447636 1671 struct socket *so = sb->sb_so;
0c530ab8 1672 int lr_saved;
91447636
A
1673 lck_mtx_t *mutex_held;
1674
0c530ab8 1675 lr_saved = (unsigned int) __builtin_return_address(0);
91447636 1676
2d21ac55 1677 sb->sb_flags &= ~SB_LOCK;
91447636 1678
2d21ac55
A
1679 if (sb->sb_flags & SB_WANT) {
1680 sb->sb_flags &= ~SB_WANT;
91447636 1681 if (so->so_usecount < 0)
2d21ac55
A
1682 panic("sbunlock: b4 wakeup so=%p ref=%d lr=%x "
1683 "sb_flags=%x\n", sb->sb_so, so->so_usecount,
1684 lr_saved, sb->sb_flags);
91447636 1685
2d21ac55
A
1686 wakeup((caddr_t)&(sb)->sb_flags);
1687 }
91447636 1688 if (keeplocked == 0) { /* unlock on exit */
0c530ab8
A
1689 if (so->so_proto->pr_getlock != NULL)
1690 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1691 else
1692 mutex_held = so->so_proto->pr_domain->dom_mtx;
2d21ac55 1693
0c530ab8
A
1694 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1695
91447636
A
1696 so->so_usecount--;
1697 if (so->so_usecount < 0)
2d21ac55
A
1698 panic("sbunlock: unlock on exit so=%p ref=%d lr=%x "
1699 "sb_flags=%x\n", so, so->so_usecount, lr_saved,
1700 sb->sb_flags);
1701 so->unlock_lr[so->next_unlock_lr] = (u_int32_t)lr_saved;
0c530ab8 1702 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
91447636
A
1703 lck_mtx_unlock(mutex_held);
1704 }
0b4e3aa0
A
1705}
1706
1707void
2d21ac55 1708sorwakeup(struct socket *so)
0b4e3aa0 1709{
2d21ac55
A
1710 if (sb_notify(&so->so_rcv))
1711 sowakeup(so, &so->so_rcv);
0b4e3aa0
A
1712}
1713
1714void
2d21ac55 1715sowwakeup(struct socket *so)
0b4e3aa0 1716{
2d21ac55
A
1717 if (sb_notify(&so->so_snd))
1718 sowakeup(so, &so->so_snd);
0b4e3aa0 1719}
2d21ac55 1720#endif /* __APPLE__ */
0b4e3aa0 1721
1c79356b
A
1722/*
1723 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1724 */
1725struct sockaddr *
2d21ac55 1726dup_sockaddr(struct sockaddr *sa, int canwait)
1c79356b
A
1727{
1728 struct sockaddr *sa2;
1729
2d21ac55
A
1730 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1731 canwait ? M_WAITOK : M_NOWAIT);
1c79356b
A
1732 if (sa2)
1733 bcopy(sa, sa2, sa->sa_len);
2d21ac55 1734 return (sa2);
1c79356b
A
1735}
1736
1737/*
1738 * Create an external-format (``xsocket'') structure using the information
1739 * in the kernel-format socket structure pointed to by so. This is done
1740 * to reduce the spew of irrelevant information over this interface,
1741 * to isolate user code from changes in the kernel structure, and
1742 * potentially to provide information-hiding if we decide that
1743 * some of this information should be hidden from users.
1744 */
1745void
1746sotoxsocket(struct socket *so, struct xsocket *xso)
1747{
2d21ac55 1748 xso->xso_len = sizeof (*xso);
1c79356b
A
1749 xso->xso_so = so;
1750 xso->so_type = so->so_type;
1751 xso->so_options = so->so_options;
1752 xso->so_linger = so->so_linger;
1753 xso->so_state = so->so_state;
1754 xso->so_pcb = so->so_pcb;
91447636
A
1755 if (so->so_proto) {
1756 xso->xso_protocol = so->so_proto->pr_protocol;
1757 xso->xso_family = so->so_proto->pr_domain->dom_family;
2d21ac55 1758 } else {
91447636 1759 xso->xso_protocol = xso->xso_family = 0;
2d21ac55 1760 }
1c79356b
A
1761 xso->so_qlen = so->so_qlen;
1762 xso->so_incqlen = so->so_incqlen;
1763 xso->so_qlimit = so->so_qlimit;
1764 xso->so_timeo = so->so_timeo;
1765 xso->so_error = so->so_error;
1766 xso->so_pgid = so->so_pgid;
1767 xso->so_oobmark = so->so_oobmark;
1768 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1769 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1770 xso->so_uid = so->so_uid;
1771}
1772
1773/*
1774 * This does the same for sockbufs. Note that the xsockbuf structure,
1775 * since it is always embedded in a socket, does not include a self
1776 * pointer nor a length. We make this entry point public in case
1777 * some other mechanism needs it.
1778 */
1779void
1780sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1781{
1782 xsb->sb_cc = sb->sb_cc;
1783 xsb->sb_hiwat = sb->sb_hiwat;
1784 xsb->sb_mbcnt = sb->sb_mbcnt;
1785 xsb->sb_mbmax = sb->sb_mbmax;
1786 xsb->sb_lowat = sb->sb_lowat;
1787 xsb->sb_flags = sb->sb_flags;
2d21ac55
A
1788 xsb->sb_timeo = (u_long)
1789 (sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
91447636
A
1790 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1791 xsb->sb_timeo = 1;
1c79356b
A
1792}
1793
1794/*
1795 * Here is the definition of some of the basic objects in the kern.ipc
1796 * branch of the MIB.
1797 */
2d21ac55 1798SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IPC");
1c79356b
A
1799
1800/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1801static int dummy;
1802SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1803
2d21ac55 1804SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
9bccf70c 1805 &sb_max, 0, "Maximum socket buffer size");
2d21ac55 1806SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
9bccf70c 1807 &maxsockets, 0, "Maximum number of sockets avaliable");
1c79356b 1808SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
2d21ac55
A
1809 &sb_efficiency, 0, "");
1810SYSCTL_INT(_kern_ipc, OID_AUTO, sbspace_factor, CTLFLAG_RW,
1811 &sbspace_factor, 0, "Ratio of mbuf/cluster use for socket layers");
1812SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
1813 &nmbclusters, 0, "");
1814SYSCTL_INT(_kern_ipc, OID_AUTO, njcl, CTLFLAG_RD, &njcl, 0, "");
1815SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes, CTLFLAG_RD, &njclbytes, 0, "");
1816SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat, CTLFLAG_RW,
1817 &soqlimitcompat, 1, "Enable socket queue limit compatibility");
1818SYSCTL_INT(_kern_ipc, OID_AUTO, soqlencomp, CTLFLAG_RW,
1819 &soqlencomp, 0, "Listen backlog represents only complete queue");