]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket2.c
xnu-792.13.8.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
CommitLineData
1c79356b 1/*
5d5c5d0d
A
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
8ad349bb 4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
1c79356b 5 *
8ad349bb
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
1c79356b
A
29 */
30/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
31/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
32/*
33 * Copyright (c) 1982, 1986, 1988, 1990, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
9bccf70c 65 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
1c79356b
A
66 */
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/domain.h>
71#include <sys/kernel.h>
91447636
A
72#include <sys/proc_internal.h>
73#include <sys/kauth.h>
1c79356b
A
74#include <sys/malloc.h>
75#include <sys/mbuf.h>
76#include <sys/protosw.h>
77#include <sys/stat.h>
78#include <sys/socket.h>
79#include <sys/socketvar.h>
80#include <sys/signalvar.h>
81#include <sys/sysctl.h>
82#include <sys/ev.h>
91447636
A
83#include <kern/locks.h>
84#include <net/route.h>
85#include <netinet/in.h>
86#include <netinet/in_pcb.h>
fa4905b1
A
87#include <sys/kdebug.h>
88
89#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
90#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
91
5d5c5d0d 92static int sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
fa4905b1 93
1c79356b
A
94/*
95 * Primitive routines for operating on sockets and socket buffers
96 */
97
98u_long sb_max = SB_MAX; /* XXX should be static */
99
100static u_long sb_efficiency = 8; /* parameter for sbreserve() */
101
1c79356b
A
102/*
103 * Procedures to manipulate state flags of socket
104 * and do appropriate wakeups. Normal sequence from the
105 * active (originating) side is that soisconnecting() is
106 * called during processing of connect() call,
107 * resulting in an eventual call to soisconnected() if/when the
108 * connection is established. When the connection is torn down
9bccf70c 109 * soisdisconnecting() is called during processing of disconnect() call,
1c79356b
A
110 * and soisdisconnected() is called when the connection to the peer
111 * is totally severed. The semantics of these routines are such that
112 * connectionless protocols can call soisconnected() and soisdisconnected()
113 * only, bypassing the in-progress calls when setting up a ``connection''
114 * takes no time.
115 *
116 * From the passive side, a socket is created with
e3027f41
A
117 * two queues of sockets: so_incomp for connections in progress
118 * and so_comp for connections already made and awaiting user acceptance.
9bccf70c 119 * As a protocol is preparing incoming connections, it creates a socket
e3027f41 120 * structure queued on so_incomp by calling sonewconn(). When the connection
1c79356b 121 * is established, soisconnected() is called, and transfers the
e3027f41 122 * socket structure to so_comp, making it available to accept().
1c79356b 123 *
9bccf70c 124 * If a socket is closed with sockets on either
e3027f41 125 * so_incomp or so_comp, these sockets are dropped.
9bccf70c 126 *
1c79356b
A
127 * If higher level protocols are implemented in
128 * the kernel, the wakeups done here will sometimes
129 * cause software-interrupt process scheduling.
130 */
1c79356b
A
131void
132soisconnecting(so)
133 register struct socket *so;
134{
135
136 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
137 so->so_state |= SS_ISCONNECTING;
91447636
A
138
139 sflt_notify(so, sock_evt_connecting, NULL);
1c79356b
A
140}
141
142void
143soisconnected(so)
9bccf70c
A
144 struct socket *so;
145{
146 struct socket *head = so->so_head;
1c79356b
A
147
148 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
149 so->so_state |= SS_ISCONNECTED;
91447636
A
150
151 sflt_notify(so, sock_evt_connected, NULL);
152
1c79356b 153 if (head && (so->so_state & SS_INCOMP)) {
ff6e181a
A
154 so->so_state &= ~SS_INCOMP;
155 so->so_state |= SS_COMP;
156 if (head->so_proto->pr_getlock != NULL) {
157 socket_unlock(so, 0);
91447636 158 socket_lock(head, 1);
ff6e181a 159 }
91447636 160 postevent(head, 0, EV_RCONN);
1c79356b
A
161 TAILQ_REMOVE(&head->so_incomp, so, so_list);
162 head->so_incqlen--;
1c79356b 163 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
1c79356b 164 sorwakeup(head);
91447636 165 wakeup_one((caddr_t)&head->so_timeo);
ff6e181a 166 if (head->so_proto->pr_getlock != NULL) {
91447636 167 socket_unlock(head, 1);
ff6e181a
A
168 socket_lock(so, 0);
169 }
1c79356b 170 } else {
91447636 171 postevent(so, 0, EV_WCONN);
1c79356b
A
172 wakeup((caddr_t)&so->so_timeo);
173 sorwakeup(so);
174 sowwakeup(so);
175 }
176}
177
178void
179soisdisconnecting(so)
180 register struct socket *so;
9bccf70c 181{
1c79356b
A
182 so->so_state &= ~SS_ISCONNECTING;
183 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
91447636 184 sflt_notify(so, sock_evt_disconnecting, NULL);
1c79356b
A
185 wakeup((caddr_t)&so->so_timeo);
186 sowwakeup(so);
187 sorwakeup(so);
188}
189
190void
191soisdisconnected(so)
192 register struct socket *so;
9bccf70c 193{
1c79356b 194 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
9bccf70c 195 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
91447636 196 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
197 wakeup((caddr_t)&so->so_timeo);
198 sowwakeup(so);
199 sorwakeup(so);
200}
201
202/*
203 * Return a random connection that hasn't been serviced yet and
204 * is eligible for discard. There is a one in qlen chance that
205 * we will return a null, saying that there are no dropable
206 * requests. In this case, the protocol specific code should drop
207 * the new request. This insures fairness.
208 *
209 * This may be used in conjunction with protocol specific queue
210 * congestion routines.
211 */
212struct socket *
213sodropablereq(head)
214 register struct socket *head;
215{
91447636 216 struct socket *so, *sonext = NULL;
1c79356b
A
217 unsigned int i, j, qlen;
218 static int rnd;
219 static struct timeval old_runtime;
220 static unsigned int cur_cnt, old_cnt;
221 struct timeval tv;
222
223 microtime(&tv);
224 if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
225 old_runtime = tv;
226 old_cnt = cur_cnt / i;
227 cur_cnt = 0;
228 }
229
230 so = TAILQ_FIRST(&head->so_incomp);
231 if (!so)
91447636 232 return (NULL);
1c79356b
A
233
234 qlen = head->so_incqlen;
235 if (++cur_cnt > qlen || old_cnt > qlen) {
236 rnd = (314159 * rnd + 66329) & 0xffff;
237 j = ((qlen + 1) * rnd) >> 16;
91447636
A
238//###LD To clean up
239 while (j-- && so) {
240// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
241 socket_lock(so, 1);
242 sonext = TAILQ_NEXT(so, so_list);
243// in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
244 socket_unlock(so, 1);
245 so = sonext;
246 }
1c79356b
A
247 }
248
91447636
A
249// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
250// return (NULL);
251// else
252 return (so);
1c79356b
A
253}
254
255/*
256 * When an attempt at a new connection is noted on a socket
257 * which accepts connections, sonewconn is called. If the
258 * connection is possible (subject to space constraints, etc.)
259 * then we allocate a new structure, propoerly linked into the
260 * data structure of the original socket, and return this.
261 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
262 */
91447636
A
263static struct socket *
264sonewconn_internal(head, connstatus)
1c79356b
A
265 register struct socket *head;
266 int connstatus;
9bccf70c
A
267{
268 int error = 0;
1c79356b 269 register struct socket *so;
91447636
A
270 lck_mtx_t *mutex_held;
271
272 if (head->so_proto->pr_getlock != NULL)
273 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
274 else
275 mutex_held = head->so_proto->pr_domain->dom_mtx;
276 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1c79356b
A
277
278 if (head->so_qlen > 3 * head->so_qlimit / 2)
279 return ((struct socket *)0);
0b4e3aa0 280 so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
1c79356b
A
281 if (so == NULL)
282 return ((struct socket *)0);
9bccf70c
A
283 /* check if head was closed during the soalloc */
284 if (head->so_proto == NULL) {
285 sodealloc(so);
286 return ((struct socket *)0);
1c79356b
A
287 }
288
289 so->so_head = head;
290 so->so_type = head->so_type;
291 so->so_options = head->so_options &~ SO_ACCEPTCONN;
292 so->so_linger = head->so_linger;
293 so->so_state = head->so_state | SS_NOFDREF;
294 so->so_proto = head->so_proto;
295 so->so_timeo = head->so_timeo;
296 so->so_pgid = head->so_pgid;
297 so->so_uid = head->so_uid;
91447636 298 so->so_usecount = 1;
5d5c5d0d
A
299 so->next_lock_lr = 0;
300 so->next_unlock_lr = 0;
1c79356b 301
13fec989
A
302#ifdef __APPLE__
303 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
304 so->so_rcv.sb_so = so->so_snd.sb_so = so;
305 TAILQ_INIT(&so->so_evlist);
306#endif
307
91447636
A
308 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
309 sflt_termsock(so);
9bccf70c
A
310 sodealloc(so);
311 return ((struct socket *)0);
312 }
313
91447636 314 /*
37839358 315 * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
91447636 316 */
37839358
A
317 if (head->so_proto->pr_unlock)
318 socket_unlock(head, 0);
91447636
A
319 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
320 sflt_termsock(so);
1c79356b 321 sodealloc(so);
37839358
A
322 if (head->so_proto->pr_unlock)
323 socket_lock(head, 0);
1c79356b
A
324 return ((struct socket *)0);
325 }
37839358
A
326 if (head->so_proto->pr_unlock)
327 socket_lock(head, 0);
9bccf70c 328#ifdef __APPLE__
1c79356b 329 so->so_proto->pr_domain->dom_refs++;
9bccf70c 330#endif
1c79356b
A
331
332 if (connstatus) {
333 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
334 so->so_state |= SS_COMP;
335 } else {
336 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
337 so->so_state |= SS_INCOMP;
338 head->so_incqlen++;
339 }
340 head->so_qlen++;
91447636 341
13fec989 342#ifdef __APPLE__
5d5c5d0d
A
343 /* Attach socket filters for this protocol */
344 sflt_initsock(so);
9bccf70c 345#endif
91447636
A
346 if (connstatus) {
347 so->so_state |= connstatus;
348 sorwakeup(head);
349 wakeup((caddr_t)&head->so_timeo);
350 }
1c79356b
A
351 return (so);
352}
353
91447636
A
354
355struct socket *
356sonewconn(
357 struct socket *head,
358 int connstatus,
359 const struct sockaddr *from)
360{
361 int error = 0;
362 struct socket_filter_entry *filter;
363 int filtered = 0;
364
365 error = 0;
366 for (filter = head->so_filt; filter && (error == 0);
367 filter = filter->sfe_next_onsocket) {
368 if (filter->sfe_filter->sf_filter.sf_connect_in) {
369 if (filtered == 0) {
370 filtered = 1;
371 sflt_use(head);
372 socket_unlock(head, 0);
373 }
374 error = filter->sfe_filter->sf_filter.sf_connect_in(
375 filter->sfe_cookie, head, from);
376 }
377 }
378 if (filtered != 0) {
379 socket_lock(head, 0);
380 sflt_unuse(head);
381 }
382
383 if (error) {
384 return NULL;
385 }
386
387 return sonewconn_internal(head, connstatus);
388}
389
1c79356b
A
390/*
391 * Socantsendmore indicates that no more data will be sent on the
392 * socket; it would normally be applied to a socket when the user
393 * informs the system that no more data is to be sent, by the protocol
394 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
395 * will be received, and will normally be applied to the socket by a
396 * protocol when it detects that the peer will send no more data.
397 * Data queued for reading in the socket may yet be read.
398 */
399
400void
401socantsendmore(so)
402 struct socket *so;
9bccf70c 403{
1c79356b 404 so->so_state |= SS_CANTSENDMORE;
91447636 405 sflt_notify(so, sock_evt_cantsendmore, NULL);
1c79356b
A
406 sowwakeup(so);
407}
408
409void
410socantrcvmore(so)
411 struct socket *so;
9bccf70c 412{
1c79356b 413 so->so_state |= SS_CANTRCVMORE;
91447636 414 sflt_notify(so, sock_evt_cantrecvmore, NULL);
1c79356b
A
415 sorwakeup(so);
416}
417
418/*
419 * Wait for data to arrive at/drain from a socket buffer.
420 */
421int
422sbwait(sb)
423 struct sockbuf *sb;
424{
5d5c5d0d 425 int error = 0, lr_saved;
91447636
A
426 struct socket *so = sb->sb_so;
427 lck_mtx_t *mutex_held;
428 struct timespec ts;
429
5d5c5d0d 430 lr_saved = (unsigned int) __builtin_return_address(0);
91447636 431
91447636
A
432 if (so->so_proto->pr_getlock != NULL)
433 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
434 else
435 mutex_held = so->so_proto->pr_domain->dom_mtx;
1c79356b
A
436
437 sb->sb_flags |= SB_WAIT;
91447636
A
438
439 if (so->so_usecount < 1)
440 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
441 ts.tv_sec = sb->sb_timeo.tv_sec;
442 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
443 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
444 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
445 &ts);
446
447 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
448
449 if (so->so_usecount < 1)
450 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
451
452 if ((so->so_state & SS_DRAINING)) {
453 error = EBADF;
454 }
455
456 return (error);
1c79356b
A
457}
458
459/*
460 * Lock a sockbuf already known to be locked;
461 * return any error returned from sleep (EINTR).
462 */
463int
464sb_lock(sb)
465 register struct sockbuf *sb;
466{
91447636
A
467 struct socket *so = sb->sb_so;
468 lck_mtx_t * mutex_held;
5d5c5d0d 469 int error = 0;
91447636
A
470
471 if (so == NULL)
472 panic("sb_lock: null so back pointer sb=%x\n", sb);
1c79356b
A
473
474 while (sb->sb_flags & SB_LOCK) {
475 sb->sb_flags |= SB_WANT;
91447636
A
476 if (so->so_proto->pr_getlock != NULL)
477 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
478 else
479 mutex_held = so->so_proto->pr_domain->dom_mtx;
480 if (so->so_usecount < 1)
481 panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
5d5c5d0d 482
91447636
A
483 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
484 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
485 if (so->so_usecount < 1)
486 panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
487 if (error)
1c79356b
A
488 return (error);
489 }
490 sb->sb_flags |= SB_LOCK;
491 return (0);
492}
493
494/*
495 * Wakeup processes waiting on a socket buffer.
496 * Do asynchronous notification via SIGIO
497 * if the socket has the SS_ASYNC flag set.
498 */
499void
500sowakeup(so, sb)
501 register struct socket *so;
502 register struct sockbuf *sb;
503{
504 struct proc *p = current_proc();
0b4e3aa0 505 sb->sb_flags &= ~SB_SEL;
1c79356b 506 selwakeup(&sb->sb_sel);
1c79356b
A
507 if (sb->sb_flags & SB_WAIT) {
508 sb->sb_flags &= ~SB_WAIT;
509 wakeup((caddr_t)&sb->sb_cc);
510 }
511 if (so->so_state & SS_ASYNC) {
512 if (so->so_pgid < 0)
513 gsignal(-so->so_pgid, SIGIO);
514 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
515 psignal(p, SIGIO);
516 }
91447636
A
517 if (sb->sb_flags & SB_KNOTE) {
518 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
519 }
520 if (sb->sb_flags & SB_UPCALL) {
521 socket_unlock(so, 0);
1c79356b 522 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
91447636
A
523 socket_lock(so, 0);
524 }
1c79356b
A
525}
526
527/*
528 * Socket buffer (struct sockbuf) utility routines.
529 *
530 * Each socket contains two socket buffers: one for sending data and
531 * one for receiving data. Each buffer contains a queue of mbufs,
532 * information about the number of mbufs and amount of data in the
533 * queue, and other fields allowing select() statements and notification
534 * on data availability to be implemented.
535 *
536 * Data stored in a socket buffer is maintained as a list of records.
537 * Each record is a list of mbufs chained together with the m_next
538 * field. Records are chained together with the m_nextpkt field. The upper
539 * level routine soreceive() expects the following conventions to be
540 * observed when placing information in the receive buffer:
541 *
542 * 1. If the protocol requires each message be preceded by the sender's
543 * name, then a record containing that name must be present before
544 * any associated data (mbuf's must be of type MT_SONAME).
545 * 2. If the protocol supports the exchange of ``access rights'' (really
546 * just additional data associated with the message), and there are
547 * ``rights'' to be received, then a record containing this data
548 * should be present (mbuf's must be of type MT_RIGHTS).
549 * 3. If a name or rights record exists, then it must be followed by
550 * a data record, perhaps of zero length.
551 *
552 * Before using a new socket structure it is first necessary to reserve
553 * buffer space to the socket, by calling sbreserve(). This should commit
554 * some of the available buffer space in the system buffer pool for the
555 * socket (currently, it does nothing but enforce limits). The space
556 * should be released by calling sbrelease() when the socket is destroyed.
557 */
558
559int
560soreserve(so, sndcc, rcvcc)
561 register struct socket *so;
562 u_long sndcc, rcvcc;
563{
1c79356b
A
564
565 if (sbreserve(&so->so_snd, sndcc) == 0)
566 goto bad;
567 if (sbreserve(&so->so_rcv, rcvcc) == 0)
568 goto bad2;
569 if (so->so_rcv.sb_lowat == 0)
570 so->so_rcv.sb_lowat = 1;
571 if (so->so_snd.sb_lowat == 0)
572 so->so_snd.sb_lowat = MCLBYTES;
573 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
574 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
575 return (0);
576bad2:
9bccf70c 577#ifdef __APPLE__
0b4e3aa0 578 selthreadclear(&so->so_snd.sb_sel);
9bccf70c 579#endif
1c79356b
A
580 sbrelease(&so->so_snd);
581bad:
582 return (ENOBUFS);
583}
584
585/*
586 * Allot mbufs to a sockbuf.
587 * Attempt to scale mbmax so that mbcnt doesn't become limiting
588 * if buffering efficiency is near the normal case.
589 */
590int
591sbreserve(sb, cc)
592 struct sockbuf *sb;
593 u_long cc;
594{
595 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
596 return (0);
597 sb->sb_hiwat = cc;
598 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
599 if (sb->sb_lowat > sb->sb_hiwat)
600 sb->sb_lowat = sb->sb_hiwat;
601 return (1);
602}
603
604/*
605 * Free mbufs held by a socket, and reserved mbuf space.
606 */
0b4e3aa0 607 /* WARNING needs to do selthreadclear() before calling this */
1c79356b
A
608void
609sbrelease(sb)
610 struct sockbuf *sb;
611{
612
613 sbflush(sb);
9bccf70c
A
614 sb->sb_hiwat = 0;
615 sb->sb_mbmax = 0;
616
1c79356b
A
617}
618
619/*
620 * Routines to add and remove
621 * data from an mbuf queue.
622 *
623 * The routines sbappend() or sbappendrecord() are normally called to
624 * append new mbufs to a socket buffer, after checking that adequate
625 * space is available, comparing the function sbspace() with the amount
626 * of data to be added. sbappendrecord() differs from sbappend() in
627 * that data supplied is treated as the beginning of a new record.
628 * To place a sender's address, optional access rights, and data in a
629 * socket receive buffer, sbappendaddr() should be used. To place
630 * access rights and data in a socket receive buffer, sbappendrights()
631 * should be used. In either case, the new data begins a new record.
632 * Note that unlike sbappend() and sbappendrecord(), these routines check
633 * for the caller that there will be enough space to store the data.
634 * Each fails if there is not enough space, or if it cannot find mbufs
635 * to store additional information in.
636 *
637 * Reliable protocols may use the socket send buffer to hold data
638 * awaiting acknowledgement. Data is normally copied from a socket
639 * send buffer in a protocol with m_copy for output to a peer,
640 * and then removing the data from the socket buffer with sbdrop()
641 * or sbdroprecord() when the data is acknowledged by the peer.
642 */
643
644/*
645 * Append mbuf chain m to the last record in the
646 * socket buffer sb. The additional space associated
647 * the mbuf chain is recorded in sb. Empty mbufs are
648 * discarded and mbufs are compacted where possible.
649 */
91447636 650int
1c79356b
A
651sbappend(sb, m)
652 struct sockbuf *sb;
653 struct mbuf *m;
9bccf70c 654{
91447636
A
655 register struct mbuf *n, *sb_first;
656 int result = 0;
657 int error = 0;
cc9f6e38 658 int filtered = 0;
1c79356b 659
fa4905b1
A
660
661 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
662
1c79356b 663 if (m == 0)
91447636 664 return 0;
cc9f6e38
A
665
666again:
91447636 667 sb_first = n = sb->sb_mb;
9bccf70c 668 if (n) {
1c79356b
A
669 while (n->m_nextpkt)
670 n = n->m_nextpkt;
671 do {
672 if (n->m_flags & M_EOR) {
91447636 673 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
55e303ae 674 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
91447636 675 return result;
1c79356b
A
676 }
677 } while (n->m_next && (n = n->m_next));
678 }
91447636 679
cc9f6e38
A
680 if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
681 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
91447636
A
682 if (error) {
683 /* no data was appended, caller should not call sowakeup */
684 return 0;
685 }
cc9f6e38
A
686
687 /*
688 If we any filters, the socket lock was dropped. n and sb_first
689 cached data from the socket buffer. This cache is not valid
690 since we dropped the lock. We must start over. Since filtered
691 is set we won't run through the filters a second time. We just
692 set n and sb_start again.
693 */
694 if (filtered)
695 goto again;
91447636
A
696 }
697
698 result = sbcompress(sb, m, n);
fa4905b1
A
699
700 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
91447636
A
701
702 return result;
1c79356b
A
703}
704
705#ifdef SOCKBUF_DEBUG
706void
707sbcheck(sb)
708 register struct sockbuf *sb;
709{
710 register struct mbuf *m;
711 register struct mbuf *n = 0;
712 register u_long len = 0, mbcnt = 0;
91447636
A
713 lck_mtx_t *mutex_held;
714
715 if (sb->sb_so->so_proto->pr_getlock != NULL)
716 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
717 else
718 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
719
720 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
721
722 if (sbchecking == 0)
723 return;
1c79356b
A
724
725 for (m = sb->sb_mb; m; m = n) {
726 n = m->m_nextpkt;
727 for (; m; m = m->m_next) {
9bccf70c
A
728 len += m->m_len;
729 mbcnt += MSIZE;
730 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
731 mbcnt += m->m_ext.ext_size;
732 }
1c79356b 733 }
9bccf70c 734 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
91447636 735 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
9bccf70c 736 mbcnt, sb->sb_mbcnt);
9bccf70c 737 }
1c79356b
A
738}
739#endif
740
741/*
742 * As above, except the mbuf chain
743 * begins a new record.
744 */
91447636 745int
1c79356b
A
746sbappendrecord(sb, m0)
747 register struct sockbuf *sb;
5d5c5d0d 748 struct mbuf *m0;
1c79356b
A
749{
750 register struct mbuf *m;
91447636 751 int result = 0;
9bccf70c 752
1c79356b 753 if (m0 == 0)
91447636
A
754 return 0;
755
756 if ((sb->sb_flags & SB_RECV) != 0) {
cc9f6e38 757 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
91447636
A
758 if (error != 0) {
759 if (error != EJUSTRETURN)
760 m_freem(m0);
761 return 0;
1c79356b 762 }
1c79356b
A
763 }
764
765 m = sb->sb_mb;
766 if (m)
767 while (m->m_nextpkt)
768 m = m->m_nextpkt;
769 /*
770 * Put the first mbuf on the queue.
771 * Note this permits zero length records.
772 */
773 sballoc(sb, m0);
774 if (m)
775 m->m_nextpkt = m0;
776 else
777 sb->sb_mb = m0;
778 m = m0->m_next;
779 m0->m_next = 0;
780 if (m && (m0->m_flags & M_EOR)) {
781 m0->m_flags &= ~M_EOR;
782 m->m_flags |= M_EOR;
783 }
91447636 784 return sbcompress(sb, m, m0);
1c79356b
A
785}
786
787/*
788 * As above except that OOB data
789 * is inserted at the beginning of the sockbuf,
790 * but after any other OOB data.
791 */
91447636 792int
1c79356b 793sbinsertoob(sb, m0)
91447636
A
794 struct sockbuf *sb;
795 struct mbuf *m0;
1c79356b 796{
91447636
A
797 struct mbuf *m;
798 struct mbuf **mp;
1c79356b
A
799
800 if (m0 == 0)
91447636
A
801 return 0;
802
803 if ((sb->sb_flags & SB_RECV) != 0) {
804 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
cc9f6e38 805 sock_data_filt_flag_oob, NULL);
91447636
A
806
807 if (error) {
808 if (error != EJUSTRETURN) {
809 m_freem(m0);
810 }
811 return 0;
1c79356b 812 }
1c79356b
A
813 }
814
815 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
816 m = *mp;
817 again:
818 switch (m->m_type) {
819
820 case MT_OOBDATA:
821 continue; /* WANT next train */
822
823 case MT_CONTROL:
824 m = m->m_next;
825 if (m)
826 goto again; /* inspect THIS train further */
827 }
828 break;
829 }
830 /*
831 * Put the first mbuf on the queue.
832 * Note this permits zero length records.
833 */
834 sballoc(sb, m0);
835 m0->m_nextpkt = *mp;
836 *mp = m0;
837 m = m0->m_next;
838 m0->m_next = 0;
839 if (m && (m0->m_flags & M_EOR)) {
840 m0->m_flags &= ~M_EOR;
841 m->m_flags |= M_EOR;
842 }
91447636 843 return sbcompress(sb, m, m0);
1c79356b
A
844}
845
846/*
847 * Append address and data, and optionally, control (ancillary) data
848 * to the receive queue of a socket. If present,
849 * m0 must include a packet header with total length.
850 * Returns 0 if no space in sockbuf or insufficient mbufs.
851 */
91447636
A
852static int
853sbappendaddr_internal(sb, asa, m0, control)
1c79356b
A
854 register struct sockbuf *sb;
855 struct sockaddr *asa;
856 struct mbuf *m0, *control;
857{
858 register struct mbuf *m, *n;
859 int space = asa->sa_len;
1c79356b
A
860
861 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
862 panic("sbappendaddr");
863
1c79356b
A
864 if (m0)
865 space += m0->m_pkthdr.len;
866 for (n = control; n; n = n->m_next) {
867 space += n->m_len;
868 if (n->m_next == 0) /* keep pointer to last control buf */
869 break;
870 }
871 if (space > sbspace(sb))
872 return (0);
873 if (asa->sa_len > MLEN)
874 return (0);
875 MGET(m, M_DONTWAIT, MT_SONAME);
876 if (m == 0)
877 return (0);
878 m->m_len = asa->sa_len;
879 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
880 if (n)
881 n->m_next = m0; /* concatenate data to control */
882 else
883 control = m0;
884 m->m_next = control;
885 for (n = m; n; n = n->m_next)
886 sballoc(sb, n);
887 n = sb->sb_mb;
888 if (n) {
889 while (n->m_nextpkt)
890 n = n->m_nextpkt;
891 n->m_nextpkt = m;
892 } else
893 sb->sb_mb = m;
894 postevent(0,sb,EV_RWBYTES);
895 return (1);
896}
897
898int
91447636
A
899sbappendaddr(
900 struct sockbuf* sb,
901 struct sockaddr* asa,
902 struct mbuf *m0,
903 struct mbuf *control,
904 int *error_out)
905{
906 int result = 0;
907
908 if (error_out) *error_out = 0;
909
910 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
911 panic("sbappendaddrorfree");
912
913 /* Call socket data in filters */
914 if ((sb->sb_flags & SB_RECV) != 0) {
915 int error;
cc9f6e38 916 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
91447636
A
917 if (error) {
918 if (error != EJUSTRETURN) {
919 if (m0) m_freem(m0);
920 if (control) m_freem(control);
921 if (error_out) *error_out = error;
922 }
923 return 0;
924 }
925 }
926
927 result = sbappendaddr_internal(sb, asa, m0, control);
928 if (result == 0) {
929 if (m0) m_freem(m0);
930 if (control) m_freem(control);
931 if (error_out) *error_out = ENOBUFS;
932 }
933
934 return result;
935}
936
937static int
938sbappendcontrol_internal(sb, m0, control)
1c79356b
A
939 struct sockbuf *sb;
940 struct mbuf *control, *m0;
941{
942 register struct mbuf *m, *n;
943 int space = 0;
1c79356b
A
944
945 if (control == 0)
946 panic("sbappendcontrol");
947
1c79356b
A
948 for (m = control; ; m = m->m_next) {
949 space += m->m_len;
950 if (m->m_next == 0)
951 break;
952 }
953 n = m; /* save pointer to last control buffer */
954 for (m = m0; m; m = m->m_next)
955 space += m->m_len;
956 if (space > sbspace(sb))
957 return (0);
958 n->m_next = m0; /* concatenate data to control */
959 for (m = control; m; m = m->m_next)
960 sballoc(sb, m);
961 n = sb->sb_mb;
962 if (n) {
963 while (n->m_nextpkt)
964 n = n->m_nextpkt;
965 n->m_nextpkt = control;
966 } else
967 sb->sb_mb = control;
968 postevent(0,sb,EV_RWBYTES);
969 return (1);
970}
971
91447636
A
972int
973sbappendcontrol(
974 struct sockbuf *sb,
975 struct mbuf *m0,
976 struct mbuf *control,
977 int *error_out)
978{
979 int result = 0;
980
981 if (error_out) *error_out = 0;
982
983 if (sb->sb_flags & SB_RECV) {
984 int error;
cc9f6e38 985 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
91447636
A
986 if (error) {
987 if (error != EJUSTRETURN) {
988 if (m0) m_freem(m0);
989 if (control) m_freem(control);
990 if (error_out) *error_out = error;
991 }
992 return 0;
993 }
994 }
995
996 result = sbappendcontrol_internal(sb, m0, control);
997 if (result == 0) {
998 if (m0) m_freem(m0);
999 if (control) m_freem(control);
1000 if (error_out) *error_out = ENOBUFS;
1001 }
1002
1003 return result;
1004}
1005
1c79356b
A
1006/*
1007 * Compress mbuf chain m into the socket
1008 * buffer sb following mbuf n. If n
1009 * is null, the buffer is presumed empty.
1010 */
91447636 1011static int
1c79356b
A
1012sbcompress(sb, m, n)
1013 register struct sockbuf *sb;
1014 register struct mbuf *m, *n;
1015{
1016 register int eor = 0;
1017 register struct mbuf *o;
1018
1019 while (m) {
1020 eor |= m->m_flags & M_EOR;
1021 if (m->m_len == 0 &&
1022 (eor == 0 ||
1023 (((o = m->m_next) || (o = n)) &&
1024 o->m_type == m->m_type))) {
1025 m = m_free(m);
1026 continue;
1027 }
9bccf70c
A
1028 if (n && (n->m_flags & M_EOR) == 0 &&
1029#ifndef __APPLE__
1030 M_WRITABLE(n) &&
1031#endif
1032 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1033 m->m_len <= M_TRAILINGSPACE(n) &&
1c79356b
A
1034 n->m_type == m->m_type) {
1035 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1036 (unsigned)m->m_len);
1037 n->m_len += m->m_len;
1038 sb->sb_cc += m->m_len;
1039 m = m_free(m);
1040 continue;
1041 }
1042 if (n)
1043 n->m_next = m;
1044 else
1045 sb->sb_mb = m;
1046 sballoc(sb, m);
1047 n = m;
1048 m->m_flags &= ~M_EOR;
1049 m = m->m_next;
1050 n->m_next = 0;
1051 }
1052 if (eor) {
1053 if (n)
1054 n->m_flags |= eor;
1055 else
1056 printf("semi-panic: sbcompress\n");
1057 }
1058 postevent(0,sb, EV_RWBYTES);
91447636 1059 return 1;
1c79356b
A
1060}
1061
1062/*
1063 * Free all mbufs in a sockbuf.
1064 * Check that all resources are reclaimed.
1065 */
1066void
1067sbflush(sb)
1068 register struct sockbuf *sb;
1069{
91447636
A
1070 if (sb->sb_so == NULL)
1071 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
55e303ae 1072 (void)sblock(sb, M_WAIT);
9bccf70c
A
1073 while (sb->sb_mbcnt) {
1074 /*
1075 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1076 * we would loop forever. Panic instead.
1077 */
1078 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1079 break;
1c79356b 1080 sbdrop(sb, (int)sb->sb_cc);
9bccf70c 1081 }
91447636
A
1082 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1083 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
55e303ae 1084
1c79356b 1085 postevent(0, sb, EV_RWBYTES);
91447636
A
1086 sbunlock(sb, 1); /* keep socket locked */
1087
1c79356b
A
1088}
1089
1090/*
1091 * Drop data from (the front of) a sockbuf.
9bccf70c
A
1092 * use m_freem_list to free the mbuf structures
1093 * under a single lock... this is done by pruning
1094 * the top of the tree from the body by keeping track
1095 * of where we get to in the tree and then zeroing the
1096 * two pertinent pointers m_nextpkt and m_next
1097 * the socket buffer is then updated to point at the new
1098 * top of the tree and the pruned area is released via
1099 * m_freem_list.
1c79356b
A
1100 */
1101void
1102sbdrop(sb, len)
1103 register struct sockbuf *sb;
1104 register int len;
1105{
fa4905b1
A
1106 register struct mbuf *m, *free_list, *ml;
1107 struct mbuf *next, *last;
1c79356b 1108
fa4905b1
A
1109 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1110
1c79356b 1111 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
fa4905b1
A
1112 free_list = last = m;
1113 ml = (struct mbuf *)0;
1114
1c79356b
A
1115 while (len > 0) {
1116 if (m == 0) {
9bccf70c
A
1117 if (next == 0) {
1118 /* temporarily replacing this panic with printf because
1119 * it occurs occasionally when closing a socket when there
1120 * is no harm in ignoring it. This problem will be investigated
1121 * further.
1122 */
1123 /* panic("sbdrop"); */
1124 printf("sbdrop - count not zero\n");
1125 len = 0;
1126 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1127 sb->sb_cc = 0;
1128 sb->sb_mbcnt = 0;
1129 break;
1130 }
1131 m = last = next;
1132 next = m->m_nextpkt;
1133 continue;
1c79356b
A
1134 }
1135 if (m->m_len > len) {
1136 m->m_len -= len;
1137 m->m_data += len;
1138 sb->sb_cc -= len;
1139 break;
1140 }
1141 len -= m->m_len;
1142 sbfree(sb, m);
fa4905b1
A
1143
1144 ml = m;
1145 m = m->m_next;
1c79356b
A
1146 }
1147 while (m && m->m_len == 0) {
1148 sbfree(sb, m);
fa4905b1
A
1149
1150 ml = m;
1151 m = m->m_next;
1152 }
1153 if (ml) {
1154 ml->m_next = (struct mbuf *)0;
1155 last->m_nextpkt = (struct mbuf *)0;
1156 m_freem_list(free_list);
1c79356b
A
1157 }
1158 if (m) {
1159 sb->sb_mb = m;
1160 m->m_nextpkt = next;
1161 } else
1162 sb->sb_mb = next;
fa4905b1 1163
1c79356b 1164 postevent(0, sb, EV_RWBYTES);
fa4905b1
A
1165
1166 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1c79356b
A
1167}
1168
1169/*
1170 * Drop a record off the front of a sockbuf
1171 * and move the next record to the front.
1172 */
1173void
1174sbdroprecord(sb)
1175 register struct sockbuf *sb;
1176{
1177 register struct mbuf *m, *mn;
1c79356b
A
1178
1179 m = sb->sb_mb;
1180 if (m) {
1181 sb->sb_mb = m->m_nextpkt;
1182 do {
1183 sbfree(sb, m);
1184 MFREE(m, mn);
9bccf70c
A
1185 m = mn;
1186 } while (m);
1c79356b
A
1187 }
1188 postevent(0, sb, EV_RWBYTES);
1189}
1190
1191/*
1192 * Create a "control" mbuf containing the specified data
1193 * with the specified type for presentation on a socket buffer.
1194 */
1195struct mbuf *
1196sbcreatecontrol(p, size, type, level)
1197 caddr_t p;
1198 register int size;
1199 int type, level;
1200{
1201 register struct cmsghdr *cp;
1202 struct mbuf *m;
1203
9bccf70c
A
1204 if (CMSG_SPACE((u_int)size) > MLEN)
1205 return ((struct mbuf *) NULL);
1c79356b
A
1206 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1207 return ((struct mbuf *) NULL);
1208 cp = mtod(m, struct cmsghdr *);
1209 /* XXX check size? */
1210 (void)memcpy(CMSG_DATA(cp), p, size);
9bccf70c
A
1211 m->m_len = CMSG_SPACE(size);
1212 cp->cmsg_len = CMSG_LEN(size);
1c79356b
A
1213 cp->cmsg_level = level;
1214 cp->cmsg_type = type;
1215 return (m);
1216}
1217
1218/*
1219 * Some routines that return EOPNOTSUPP for entry points that are not
1220 * supported by a protocol. Fill in as needed.
1221 */
1222int
1223pru_abort_notsupp(struct socket *so)
1224{
1225 return EOPNOTSUPP;
1226}
1227
1228
1229int
1230pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1231{
1232 return EOPNOTSUPP;
1233}
1234
1235int
1236pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1237{
1238 return EOPNOTSUPP;
1239}
1240
1241int
1242pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1243{
1244 return EOPNOTSUPP;
1245}
1246
1247int
1248pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1249{
1250 return EOPNOTSUPP;
1251}
1252
1253int
1254pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1255{
1256 return EOPNOTSUPP;
1257}
1258
1259int
1260pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1261 struct ifnet *ifp, struct proc *p)
1262{
1263 return EOPNOTSUPP;
1264}
1265
1266int
1267pru_detach_notsupp(struct socket *so)
1268{
1269 return EOPNOTSUPP;
1270}
1271
1272int
1273pru_disconnect_notsupp(struct socket *so)
1274{
1275 return EOPNOTSUPP;
1276}
1277
1278int
1279pru_listen_notsupp(struct socket *so, struct proc *p)
1280{
1281 return EOPNOTSUPP;
1282}
1283
1284int
1285pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1286{
1287 return EOPNOTSUPP;
1288}
1289
1290int
1291pru_rcvd_notsupp(struct socket *so, int flags)
1292{
1293 return EOPNOTSUPP;
1294}
1295
1296int
1297pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1298{
1299 return EOPNOTSUPP;
1300}
1301
1302int
1303pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1304 struct sockaddr *addr, struct mbuf *control,
1305 struct proc *p)
1306
1307{
1308 return EOPNOTSUPP;
1309}
1310
1311
1312/*
1313 * This isn't really a ``null'' operation, but it's the default one
1314 * and doesn't do anything destructive.
1315 */
1316int
1317pru_sense_null(struct socket *so, struct stat *sb)
1318{
1319 sb->st_blksize = so->so_snd.sb_hiwat;
1320 return 0;
1321}
1322
1323
1324int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1325 struct uio *uio, struct mbuf *top,
1326 struct mbuf *control, int flags)
1327
1328{
1329 return EOPNOTSUPP;
1330}
1331
1332int pru_soreceive_notsupp(struct socket *so,
1333 struct sockaddr **paddr,
1334 struct uio *uio, struct mbuf **mp0,
1335 struct mbuf **controlp, int *flagsp)
1336{
1337 return EOPNOTSUPP;
1338}
1339
1340int
1341
1342pru_shutdown_notsupp(struct socket *so)
1343{
1344 return EOPNOTSUPP;
1345}
1346
1347int
1348pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1349{
1350 return EOPNOTSUPP;
1351}
1352
1353int pru_sosend(struct socket *so, struct sockaddr *addr,
1354 struct uio *uio, struct mbuf *top,
1355 struct mbuf *control, int flags)
1356{
1357 return EOPNOTSUPP;
1358}
1359
1360int pru_soreceive(struct socket *so,
1361 struct sockaddr **paddr,
1362 struct uio *uio, struct mbuf **mp0,
1363 struct mbuf **controlp, int *flagsp)
1364{
1365 return EOPNOTSUPP;
1366}
1367
1368
91447636
A
1369int
1370pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1371 __unused kauth_cred_t cred, __unused void *wql)
1c79356b
A
1372{
1373 return EOPNOTSUPP;
1374}
1375
1376
9bccf70c
A
1377#ifdef __APPLE__
1378/*
1379 * The following are macros on BSD and functions on Darwin
1380 */
1c79356b 1381
0b4e3aa0
A
1382/*
1383 * Do we need to notify the other side when I/O is possible?
1384 */
1385
1386int
1387sb_notify(struct sockbuf *sb)
1388{
55e303ae 1389 return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
0b4e3aa0
A
1390}
1391
1392/*
1393 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1394 * This is problematical if the fields are unsigned, as the space might
1395 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1396 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1397 */
1398long
1399sbspace(struct sockbuf *sb)
1400{
1401 return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1402 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1403}
1404
1405/* do we have to send all at once on a socket? */
1406int
1407sosendallatonce(struct socket *so)
1408{
1409 return (so->so_proto->pr_flags & PR_ATOMIC);
1410}
1411
1412/* can we read something from so? */
1413int
1414soreadable(struct socket *so)
1415{
1416 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1417 (so->so_state & SS_CANTRCVMORE) ||
1418 so->so_comp.tqh_first || so->so_error);
1419}
1420
1421/* can we write something to so? */
1422
1423int
1424sowriteable(struct socket *so)
1425{
1426 return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1427 ((so->so_state&SS_ISCONNECTED) ||
1428 (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1429 (so->so_state & SS_CANTSENDMORE) ||
1430 so->so_error);
1431}
1432
1433/* adjust counters in sb reflecting allocation of m */
1434
1435void
1436sballoc(struct sockbuf *sb, struct mbuf *m)
1437{
1438 sb->sb_cc += m->m_len;
1439 sb->sb_mbcnt += MSIZE;
1440 if (m->m_flags & M_EXT)
1441 sb->sb_mbcnt += m->m_ext.ext_size;
1442}
1443
1444/* adjust counters in sb reflecting freeing of m */
1445void
1446sbfree(struct sockbuf *sb, struct mbuf *m)
1447{
1448 sb->sb_cc -= m->m_len;
1449 sb->sb_mbcnt -= MSIZE;
1450 if (m->m_flags & M_EXT)
1451 sb->sb_mbcnt -= m->m_ext.ext_size;
1452}
1453
1454/*
1455 * Set lock on sockbuf sb; sleep if lock is already held.
1456 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1457 * Returns error without lock if sleep is interrupted.
1458 */
1459int
1460sblock(struct sockbuf *sb, int wf)
1461{
1462 return(sb->sb_flags & SB_LOCK ?
1463 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1464 (sb->sb_flags |= SB_LOCK), 0);
1465}
1466
1467/* release lock on sockbuf sb */
1468void
91447636 1469sbunlock(struct sockbuf *sb, int keeplocked)
0b4e3aa0 1470{
91447636 1471 struct socket *so = sb->sb_so;
5d5c5d0d 1472 int lr_saved;
91447636
A
1473 lck_mtx_t *mutex_held;
1474
5d5c5d0d
A
1475
1476 lr_saved = (unsigned int) __builtin_return_address(0);
1477
0b4e3aa0 1478 sb->sb_flags &= ~SB_LOCK;
91447636
A
1479
1480 if (so->so_proto->pr_getlock != NULL)
1481 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1482 else
1483 mutex_held = so->so_proto->pr_domain->dom_mtx;
1484
1485 if (keeplocked == 0)
1486 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1487
0b4e3aa0
A
1488 if (sb->sb_flags & SB_WANT) {
1489 sb->sb_flags &= ~SB_WANT;
91447636
A
1490 if (so->so_usecount < 0)
1491 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1492
0b4e3aa0
A
1493 wakeup((caddr_t)&(sb)->sb_flags);
1494 }
91447636
A
1495 if (keeplocked == 0) { /* unlock on exit */
1496 so->so_usecount--;
1497 if (so->so_usecount < 0)
1498 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
5d5c5d0d
A
1499 so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved;
1500 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
91447636
A
1501 lck_mtx_unlock(mutex_held);
1502 }
0b4e3aa0
A
1503}
1504
1505void
1506sorwakeup(struct socket * so)
1507{
1508 if (sb_notify(&so->so_rcv))
1509 sowakeup(so, &so->so_rcv);
1510}
1511
1512void
1513sowwakeup(struct socket * so)
1514{
1515 if (sb_notify(&so->so_snd))
1516 sowakeup(so, &so->so_snd);
1517}
9bccf70c 1518#endif __APPLE__
0b4e3aa0 1519
1c79356b
A
1520/*
1521 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1522 */
1523struct sockaddr *
1524dup_sockaddr(sa, canwait)
1525 struct sockaddr *sa;
1526 int canwait;
1527{
1528 struct sockaddr *sa2;
1529
1530 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1531 canwait ? M_WAITOK : M_NOWAIT);
1532 if (sa2)
1533 bcopy(sa, sa2, sa->sa_len);
1534 return sa2;
1535}
1536
1537/*
1538 * Create an external-format (``xsocket'') structure using the information
1539 * in the kernel-format socket structure pointed to by so. This is done
1540 * to reduce the spew of irrelevant information over this interface,
1541 * to isolate user code from changes in the kernel structure, and
1542 * potentially to provide information-hiding if we decide that
1543 * some of this information should be hidden from users.
1544 */
1545void
1546sotoxsocket(struct socket *so, struct xsocket *xso)
1547{
1548 xso->xso_len = sizeof *xso;
1549 xso->xso_so = so;
1550 xso->so_type = so->so_type;
1551 xso->so_options = so->so_options;
1552 xso->so_linger = so->so_linger;
1553 xso->so_state = so->so_state;
1554 xso->so_pcb = so->so_pcb;
91447636
A
1555 if (so->so_proto) {
1556 xso->xso_protocol = so->so_proto->pr_protocol;
1557 xso->xso_family = so->so_proto->pr_domain->dom_family;
1558 }
1559 else
1560 xso->xso_protocol = xso->xso_family = 0;
1c79356b
A
1561 xso->so_qlen = so->so_qlen;
1562 xso->so_incqlen = so->so_incqlen;
1563 xso->so_qlimit = so->so_qlimit;
1564 xso->so_timeo = so->so_timeo;
1565 xso->so_error = so->so_error;
1566 xso->so_pgid = so->so_pgid;
1567 xso->so_oobmark = so->so_oobmark;
1568 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1569 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1570 xso->so_uid = so->so_uid;
1571}
1572
1573/*
1574 * This does the same for sockbufs. Note that the xsockbuf structure,
1575 * since it is always embedded in a socket, does not include a self
1576 * pointer nor a length. We make this entry point public in case
1577 * some other mechanism needs it.
1578 */
1579void
1580sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1581{
1582 xsb->sb_cc = sb->sb_cc;
1583 xsb->sb_hiwat = sb->sb_hiwat;
1584 xsb->sb_mbcnt = sb->sb_mbcnt;
1585 xsb->sb_mbmax = sb->sb_mbmax;
1586 xsb->sb_lowat = sb->sb_lowat;
1587 xsb->sb_flags = sb->sb_flags;
91447636
A
1588 xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1589 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1590 xsb->sb_timeo = 1;
1c79356b
A
1591}
1592
1593/*
1594 * Here is the definition of some of the basic objects in the kern.ipc
1595 * branch of the MIB.
1596 */
1c79356b
A
1597SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1598
1599/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1600static int dummy;
1601SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1602
9bccf70c
A
1603SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1604 &sb_max, 0, "Maximum socket buffer size");
1605SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1606 &maxsockets, 0, "Maximum number of sockets avaliable");
1c79356b
A
1607SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1608 &sb_efficiency, 0, "");
1609SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1610