]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket2.c
xnu-792.25.20.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket2.c
CommitLineData
1c79356b 1/*
5d5c5d0d
A
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
6601e61a 4 * @APPLE_LICENSE_HEADER_START@
1c79356b 5 *
6601e61a
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
8f6c56a5 11 *
6601e61a
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
6601e61a
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
8f6c56a5 19 *
6601e61a 20 * @APPLE_LICENSE_HEADER_END@
1c79356b
A
21 */
22/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24/*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
9bccf70c 57 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
1c79356b
A
58 */
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/domain.h>
63#include <sys/kernel.h>
91447636
A
64#include <sys/proc_internal.h>
65#include <sys/kauth.h>
1c79356b
A
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
68#include <sys/protosw.h>
69#include <sys/stat.h>
70#include <sys/socket.h>
71#include <sys/socketvar.h>
72#include <sys/signalvar.h>
73#include <sys/sysctl.h>
74#include <sys/ev.h>
91447636
A
75#include <kern/locks.h>
76#include <net/route.h>
77#include <netinet/in.h>
78#include <netinet/in_pcb.h>
fa4905b1
A
79#include <sys/kdebug.h>
80
81#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
82#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
83
0c530ab8 84static int sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
fa4905b1 85
1c79356b
A
86/*
87 * Primitive routines for operating on sockets and socket buffers
88 */
89
90u_long sb_max = SB_MAX; /* XXX should be static */
91
92static u_long sb_efficiency = 8; /* parameter for sbreserve() */
93
1c79356b
A
94/*
95 * Procedures to manipulate state flags of socket
96 * and do appropriate wakeups. Normal sequence from the
97 * active (originating) side is that soisconnecting() is
98 * called during processing of connect() call,
99 * resulting in an eventual call to soisconnected() if/when the
100 * connection is established. When the connection is torn down
9bccf70c 101 * soisdisconnecting() is called during processing of disconnect() call,
1c79356b
A
102 * and soisdisconnected() is called when the connection to the peer
103 * is totally severed. The semantics of these routines are such that
104 * connectionless protocols can call soisconnected() and soisdisconnected()
105 * only, bypassing the in-progress calls when setting up a ``connection''
106 * takes no time.
107 *
108 * From the passive side, a socket is created with
e3027f41
A
109 * two queues of sockets: so_incomp for connections in progress
110 * and so_comp for connections already made and awaiting user acceptance.
9bccf70c 111 * As a protocol is preparing incoming connections, it creates a socket
e3027f41 112 * structure queued on so_incomp by calling sonewconn(). When the connection
1c79356b 113 * is established, soisconnected() is called, and transfers the
e3027f41 114 * socket structure to so_comp, making it available to accept().
1c79356b 115 *
9bccf70c 116 * If a socket is closed with sockets on either
e3027f41 117 * so_incomp or so_comp, these sockets are dropped.
9bccf70c 118 *
1c79356b
A
119 * If higher level protocols are implemented in
120 * the kernel, the wakeups done here will sometimes
121 * cause software-interrupt process scheduling.
122 */
1c79356b
A
123void
124soisconnecting(so)
125 register struct socket *so;
126{
127
128 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
129 so->so_state |= SS_ISCONNECTING;
91447636
A
130
131 sflt_notify(so, sock_evt_connecting, NULL);
1c79356b
A
132}
133
134void
135soisconnected(so)
9bccf70c
A
136 struct socket *so;
137{
138 struct socket *head = so->so_head;
1c79356b
A
139
140 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
141 so->so_state |= SS_ISCONNECTED;
91447636
A
142
143 sflt_notify(so, sock_evt_connected, NULL);
144
1c79356b 145 if (head && (so->so_state & SS_INCOMP)) {
ff6e181a
A
146 so->so_state &= ~SS_INCOMP;
147 so->so_state |= SS_COMP;
148 if (head->so_proto->pr_getlock != NULL) {
149 socket_unlock(so, 0);
91447636 150 socket_lock(head, 1);
ff6e181a 151 }
91447636 152 postevent(head, 0, EV_RCONN);
1c79356b
A
153 TAILQ_REMOVE(&head->so_incomp, so, so_list);
154 head->so_incqlen--;
1c79356b 155 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
1c79356b 156 sorwakeup(head);
91447636 157 wakeup_one((caddr_t)&head->so_timeo);
ff6e181a 158 if (head->so_proto->pr_getlock != NULL) {
91447636 159 socket_unlock(head, 1);
ff6e181a
A
160 socket_lock(so, 0);
161 }
1c79356b 162 } else {
91447636 163 postevent(so, 0, EV_WCONN);
1c79356b
A
164 wakeup((caddr_t)&so->so_timeo);
165 sorwakeup(so);
166 sowwakeup(so);
167 }
168}
169
170void
171soisdisconnecting(so)
172 register struct socket *so;
9bccf70c 173{
1c79356b
A
174 so->so_state &= ~SS_ISCONNECTING;
175 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
91447636 176 sflt_notify(so, sock_evt_disconnecting, NULL);
1c79356b
A
177 wakeup((caddr_t)&so->so_timeo);
178 sowwakeup(so);
179 sorwakeup(so);
180}
181
182void
183soisdisconnected(so)
184 register struct socket *so;
9bccf70c 185{
1c79356b 186 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
9bccf70c 187 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
91447636 188 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
189 wakeup((caddr_t)&so->so_timeo);
190 sowwakeup(so);
191 sorwakeup(so);
192}
193
194/*
195 * Return a random connection that hasn't been serviced yet and
196 * is eligible for discard. There is a one in qlen chance that
197 * we will return a null, saying that there are no dropable
198 * requests. In this case, the protocol specific code should drop
199 * the new request. This insures fairness.
200 *
201 * This may be used in conjunction with protocol specific queue
202 * congestion routines.
203 */
204struct socket *
205sodropablereq(head)
206 register struct socket *head;
207{
91447636 208 struct socket *so, *sonext = NULL;
1c79356b
A
209 unsigned int i, j, qlen;
210 static int rnd;
211 static struct timeval old_runtime;
212 static unsigned int cur_cnt, old_cnt;
213 struct timeval tv;
214
215 microtime(&tv);
216 if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
217 old_runtime = tv;
218 old_cnt = cur_cnt / i;
219 cur_cnt = 0;
220 }
221
222 so = TAILQ_FIRST(&head->so_incomp);
223 if (!so)
91447636 224 return (NULL);
1c79356b
A
225
226 qlen = head->so_incqlen;
227 if (++cur_cnt > qlen || old_cnt > qlen) {
228 rnd = (314159 * rnd + 66329) & 0xffff;
229 j = ((qlen + 1) * rnd) >> 16;
91447636
A
230//###LD To clean up
231 while (j-- && so) {
232// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
233 socket_lock(so, 1);
234 sonext = TAILQ_NEXT(so, so_list);
235// in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
236 socket_unlock(so, 1);
237 so = sonext;
238 }
1c79356b
A
239 }
240
91447636
A
241// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
242// return (NULL);
243// else
244 return (so);
1c79356b
A
245}
246
247/*
248 * When an attempt at a new connection is noted on a socket
249 * which accepts connections, sonewconn is called. If the
250 * connection is possible (subject to space constraints, etc.)
251 * then we allocate a new structure, propoerly linked into the
252 * data structure of the original socket, and return this.
253 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
254 */
91447636
A
255static struct socket *
256sonewconn_internal(head, connstatus)
1c79356b
A
257 register struct socket *head;
258 int connstatus;
9bccf70c
A
259{
260 int error = 0;
1c79356b 261 register struct socket *so;
91447636
A
262 lck_mtx_t *mutex_held;
263
264 if (head->so_proto->pr_getlock != NULL)
265 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
266 else
267 mutex_held = head->so_proto->pr_domain->dom_mtx;
268 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1c79356b
A
269
270 if (head->so_qlen > 3 * head->so_qlimit / 2)
271 return ((struct socket *)0);
0b4e3aa0 272 so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
1c79356b
A
273 if (so == NULL)
274 return ((struct socket *)0);
9bccf70c
A
275 /* check if head was closed during the soalloc */
276 if (head->so_proto == NULL) {
277 sodealloc(so);
278 return ((struct socket *)0);
1c79356b
A
279 }
280
281 so->so_head = head;
282 so->so_type = head->so_type;
283 so->so_options = head->so_options &~ SO_ACCEPTCONN;
284 so->so_linger = head->so_linger;
285 so->so_state = head->so_state | SS_NOFDREF;
286 so->so_proto = head->so_proto;
287 so->so_timeo = head->so_timeo;
288 so->so_pgid = head->so_pgid;
289 so->so_uid = head->so_uid;
91447636 290 so->so_usecount = 1;
0c530ab8
A
291 so->next_lock_lr = 0;
292 so->next_unlock_lr = 0;
1c79356b 293
13fec989
A
294#ifdef __APPLE__
295 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
296 so->so_rcv.sb_so = so->so_snd.sb_so = so;
297 TAILQ_INIT(&so->so_evlist);
298#endif
299
91447636
A
300 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
301 sflt_termsock(so);
9bccf70c
A
302 sodealloc(so);
303 return ((struct socket *)0);
304 }
305
91447636 306 /*
37839358 307 * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
91447636 308 */
37839358
A
309 if (head->so_proto->pr_unlock)
310 socket_unlock(head, 0);
91447636
A
311 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
312 sflt_termsock(so);
1c79356b 313 sodealloc(so);
37839358
A
314 if (head->so_proto->pr_unlock)
315 socket_lock(head, 0);
1c79356b
A
316 return ((struct socket *)0);
317 }
37839358
A
318 if (head->so_proto->pr_unlock)
319 socket_lock(head, 0);
9bccf70c 320#ifdef __APPLE__
1c79356b 321 so->so_proto->pr_domain->dom_refs++;
9bccf70c 322#endif
1c79356b
A
323
324 if (connstatus) {
325 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
326 so->so_state |= SS_COMP;
327 } else {
328 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
329 so->so_state |= SS_INCOMP;
330 head->so_incqlen++;
331 }
332 head->so_qlen++;
91447636 333
13fec989 334#ifdef __APPLE__
0c530ab8
A
335 /* Attach socket filters for this protocol */
336 sflt_initsock(so);
9bccf70c 337#endif
91447636
A
338 if (connstatus) {
339 so->so_state |= connstatus;
340 sorwakeup(head);
341 wakeup((caddr_t)&head->so_timeo);
342 }
1c79356b
A
343 return (so);
344}
345
91447636
A
346
347struct socket *
348sonewconn(
349 struct socket *head,
350 int connstatus,
351 const struct sockaddr *from)
352{
353 int error = 0;
354 struct socket_filter_entry *filter;
355 int filtered = 0;
356
357 error = 0;
358 for (filter = head->so_filt; filter && (error == 0);
359 filter = filter->sfe_next_onsocket) {
360 if (filter->sfe_filter->sf_filter.sf_connect_in) {
361 if (filtered == 0) {
362 filtered = 1;
363 sflt_use(head);
364 socket_unlock(head, 0);
365 }
366 error = filter->sfe_filter->sf_filter.sf_connect_in(
367 filter->sfe_cookie, head, from);
368 }
369 }
370 if (filtered != 0) {
371 socket_lock(head, 0);
372 sflt_unuse(head);
373 }
374
375 if (error) {
376 return NULL;
377 }
378
379 return sonewconn_internal(head, connstatus);
380}
381
1c79356b
A
382/*
383 * Socantsendmore indicates that no more data will be sent on the
384 * socket; it would normally be applied to a socket when the user
385 * informs the system that no more data is to be sent, by the protocol
386 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
387 * will be received, and will normally be applied to the socket by a
388 * protocol when it detects that the peer will send no more data.
389 * Data queued for reading in the socket may yet be read.
390 */
391
392void
393socantsendmore(so)
394 struct socket *so;
9bccf70c 395{
1c79356b 396 so->so_state |= SS_CANTSENDMORE;
91447636 397 sflt_notify(so, sock_evt_cantsendmore, NULL);
1c79356b
A
398 sowwakeup(so);
399}
400
401void
402socantrcvmore(so)
403 struct socket *so;
9bccf70c 404{
1c79356b 405 so->so_state |= SS_CANTRCVMORE;
91447636 406 sflt_notify(so, sock_evt_cantrecvmore, NULL);
1c79356b
A
407 sorwakeup(so);
408}
409
410/*
411 * Wait for data to arrive at/drain from a socket buffer.
412 */
413int
414sbwait(sb)
415 struct sockbuf *sb;
416{
0c530ab8 417 int error = 0, lr_saved;
91447636
A
418 struct socket *so = sb->sb_so;
419 lck_mtx_t *mutex_held;
420 struct timespec ts;
421
0c530ab8 422 lr_saved = (unsigned int) __builtin_return_address(0);
91447636 423
91447636
A
424 if (so->so_proto->pr_getlock != NULL)
425 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
426 else
427 mutex_held = so->so_proto->pr_domain->dom_mtx;
1c79356b
A
428
429 sb->sb_flags |= SB_WAIT;
91447636
A
430
431 if (so->so_usecount < 1)
432 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
433 ts.tv_sec = sb->sb_timeo.tv_sec;
434 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
435 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
436 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
437 &ts);
438
439 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
440
441 if (so->so_usecount < 1)
442 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
443
444 if ((so->so_state & SS_DRAINING)) {
445 error = EBADF;
446 }
447
448 return (error);
1c79356b
A
449}
450
451/*
452 * Lock a sockbuf already known to be locked;
453 * return any error returned from sleep (EINTR).
454 */
455int
456sb_lock(sb)
457 register struct sockbuf *sb;
458{
91447636
A
459 struct socket *so = sb->sb_so;
460 lck_mtx_t * mutex_held;
0c530ab8 461 int error = 0;
91447636
A
462
463 if (so == NULL)
464 panic("sb_lock: null so back pointer sb=%x\n", sb);
1c79356b
A
465
466 while (sb->sb_flags & SB_LOCK) {
467 sb->sb_flags |= SB_WANT;
91447636
A
468 if (so->so_proto->pr_getlock != NULL)
469 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
470 else
471 mutex_held = so->so_proto->pr_domain->dom_mtx;
472 if (so->so_usecount < 1)
473 panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
0c530ab8 474
91447636
A
475 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
476 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
477 if (so->so_usecount < 1)
478 panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
479 if (error)
1c79356b
A
480 return (error);
481 }
482 sb->sb_flags |= SB_LOCK;
483 return (0);
484}
485
486/*
487 * Wakeup processes waiting on a socket buffer.
488 * Do asynchronous notification via SIGIO
489 * if the socket has the SS_ASYNC flag set.
490 */
491void
492sowakeup(so, sb)
493 register struct socket *so;
494 register struct sockbuf *sb;
495{
496 struct proc *p = current_proc();
0b4e3aa0 497 sb->sb_flags &= ~SB_SEL;
1c79356b 498 selwakeup(&sb->sb_sel);
1c79356b
A
499 if (sb->sb_flags & SB_WAIT) {
500 sb->sb_flags &= ~SB_WAIT;
501 wakeup((caddr_t)&sb->sb_cc);
502 }
503 if (so->so_state & SS_ASYNC) {
504 if (so->so_pgid < 0)
505 gsignal(-so->so_pgid, SIGIO);
506 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
507 psignal(p, SIGIO);
508 }
91447636
A
509 if (sb->sb_flags & SB_KNOTE) {
510 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
511 }
512 if (sb->sb_flags & SB_UPCALL) {
513 socket_unlock(so, 0);
1c79356b 514 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
91447636
A
515 socket_lock(so, 0);
516 }
1c79356b
A
517}
518
519/*
520 * Socket buffer (struct sockbuf) utility routines.
521 *
522 * Each socket contains two socket buffers: one for sending data and
523 * one for receiving data. Each buffer contains a queue of mbufs,
524 * information about the number of mbufs and amount of data in the
525 * queue, and other fields allowing select() statements and notification
526 * on data availability to be implemented.
527 *
528 * Data stored in a socket buffer is maintained as a list of records.
529 * Each record is a list of mbufs chained together with the m_next
530 * field. Records are chained together with the m_nextpkt field. The upper
531 * level routine soreceive() expects the following conventions to be
532 * observed when placing information in the receive buffer:
533 *
534 * 1. If the protocol requires each message be preceded by the sender's
535 * name, then a record containing that name must be present before
536 * any associated data (mbuf's must be of type MT_SONAME).
537 * 2. If the protocol supports the exchange of ``access rights'' (really
538 * just additional data associated with the message), and there are
539 * ``rights'' to be received, then a record containing this data
540 * should be present (mbuf's must be of type MT_RIGHTS).
541 * 3. If a name or rights record exists, then it must be followed by
542 * a data record, perhaps of zero length.
543 *
544 * Before using a new socket structure it is first necessary to reserve
545 * buffer space to the socket, by calling sbreserve(). This should commit
546 * some of the available buffer space in the system buffer pool for the
547 * socket (currently, it does nothing but enforce limits). The space
548 * should be released by calling sbrelease() when the socket is destroyed.
549 */
550
551int
552soreserve(so, sndcc, rcvcc)
553 register struct socket *so;
554 u_long sndcc, rcvcc;
555{
1c79356b
A
556
557 if (sbreserve(&so->so_snd, sndcc) == 0)
558 goto bad;
559 if (sbreserve(&so->so_rcv, rcvcc) == 0)
560 goto bad2;
561 if (so->so_rcv.sb_lowat == 0)
562 so->so_rcv.sb_lowat = 1;
563 if (so->so_snd.sb_lowat == 0)
564 so->so_snd.sb_lowat = MCLBYTES;
565 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
566 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
567 return (0);
568bad2:
9bccf70c 569#ifdef __APPLE__
0b4e3aa0 570 selthreadclear(&so->so_snd.sb_sel);
9bccf70c 571#endif
1c79356b
A
572 sbrelease(&so->so_snd);
573bad:
574 return (ENOBUFS);
575}
576
577/*
578 * Allot mbufs to a sockbuf.
579 * Attempt to scale mbmax so that mbcnt doesn't become limiting
580 * if buffering efficiency is near the normal case.
581 */
582int
583sbreserve(sb, cc)
584 struct sockbuf *sb;
585 u_long cc;
586{
587 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
588 return (0);
589 sb->sb_hiwat = cc;
590 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
591 if (sb->sb_lowat > sb->sb_hiwat)
592 sb->sb_lowat = sb->sb_hiwat;
593 return (1);
594}
595
596/*
597 * Free mbufs held by a socket, and reserved mbuf space.
598 */
0b4e3aa0 599 /* WARNING needs to do selthreadclear() before calling this */
1c79356b
A
600void
601sbrelease(sb)
602 struct sockbuf *sb;
603{
604
605 sbflush(sb);
9bccf70c
A
606 sb->sb_hiwat = 0;
607 sb->sb_mbmax = 0;
608
1c79356b
A
609}
610
611/*
612 * Routines to add and remove
613 * data from an mbuf queue.
614 *
615 * The routines sbappend() or sbappendrecord() are normally called to
616 * append new mbufs to a socket buffer, after checking that adequate
617 * space is available, comparing the function sbspace() with the amount
618 * of data to be added. sbappendrecord() differs from sbappend() in
619 * that data supplied is treated as the beginning of a new record.
620 * To place a sender's address, optional access rights, and data in a
621 * socket receive buffer, sbappendaddr() should be used. To place
622 * access rights and data in a socket receive buffer, sbappendrights()
623 * should be used. In either case, the new data begins a new record.
624 * Note that unlike sbappend() and sbappendrecord(), these routines check
625 * for the caller that there will be enough space to store the data.
626 * Each fails if there is not enough space, or if it cannot find mbufs
627 * to store additional information in.
628 *
629 * Reliable protocols may use the socket send buffer to hold data
630 * awaiting acknowledgement. Data is normally copied from a socket
631 * send buffer in a protocol with m_copy for output to a peer,
632 * and then removing the data from the socket buffer with sbdrop()
633 * or sbdroprecord() when the data is acknowledged by the peer.
634 */
635
636/*
637 * Append mbuf chain m to the last record in the
638 * socket buffer sb. The additional space associated
639 * the mbuf chain is recorded in sb. Empty mbufs are
640 * discarded and mbufs are compacted where possible.
641 */
91447636 642int
1c79356b
A
643sbappend(sb, m)
644 struct sockbuf *sb;
645 struct mbuf *m;
9bccf70c 646{
91447636
A
647 register struct mbuf *n, *sb_first;
648 int result = 0;
649 int error = 0;
cc9f6e38 650 int filtered = 0;
1c79356b 651
fa4905b1
A
652
653 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
654
1c79356b 655 if (m == 0)
91447636 656 return 0;
cc9f6e38
A
657
658again:
91447636 659 sb_first = n = sb->sb_mb;
9bccf70c 660 if (n) {
1c79356b
A
661 while (n->m_nextpkt)
662 n = n->m_nextpkt;
663 do {
664 if (n->m_flags & M_EOR) {
91447636 665 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
55e303ae 666 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
91447636 667 return result;
1c79356b
A
668 }
669 } while (n->m_next && (n = n->m_next));
670 }
91447636 671
cc9f6e38
A
672 if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
673 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
91447636
A
674 if (error) {
675 /* no data was appended, caller should not call sowakeup */
676 return 0;
677 }
cc9f6e38
A
678
679 /*
680 If we any filters, the socket lock was dropped. n and sb_first
681 cached data from the socket buffer. This cache is not valid
682 since we dropped the lock. We must start over. Since filtered
683 is set we won't run through the filters a second time. We just
684 set n and sb_start again.
685 */
686 if (filtered)
687 goto again;
91447636
A
688 }
689
690 result = sbcompress(sb, m, n);
fa4905b1
A
691
692 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
91447636
A
693
694 return result;
1c79356b
A
695}
696
697#ifdef SOCKBUF_DEBUG
698void
699sbcheck(sb)
700 register struct sockbuf *sb;
701{
702 register struct mbuf *m;
703 register struct mbuf *n = 0;
704 register u_long len = 0, mbcnt = 0;
91447636
A
705 lck_mtx_t *mutex_held;
706
707 if (sb->sb_so->so_proto->pr_getlock != NULL)
708 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
709 else
710 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
711
712 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
713
714 if (sbchecking == 0)
715 return;
1c79356b
A
716
717 for (m = sb->sb_mb; m; m = n) {
718 n = m->m_nextpkt;
719 for (; m; m = m->m_next) {
9bccf70c
A
720 len += m->m_len;
721 mbcnt += MSIZE;
722 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
723 mbcnt += m->m_ext.ext_size;
724 }
1c79356b 725 }
9bccf70c 726 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
91447636 727 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
9bccf70c 728 mbcnt, sb->sb_mbcnt);
9bccf70c 729 }
1c79356b
A
730}
731#endif
732
733/*
734 * As above, except the mbuf chain
735 * begins a new record.
736 */
91447636 737int
1c79356b
A
738sbappendrecord(sb, m0)
739 register struct sockbuf *sb;
0c530ab8 740 struct mbuf *m0;
1c79356b
A
741{
742 register struct mbuf *m;
91447636 743 int result = 0;
9bccf70c 744
1c79356b 745 if (m0 == 0)
91447636
A
746 return 0;
747
748 if ((sb->sb_flags & SB_RECV) != 0) {
cc9f6e38 749 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
91447636
A
750 if (error != 0) {
751 if (error != EJUSTRETURN)
752 m_freem(m0);
753 return 0;
1c79356b 754 }
1c79356b
A
755 }
756
757 m = sb->sb_mb;
758 if (m)
759 while (m->m_nextpkt)
760 m = m->m_nextpkt;
761 /*
762 * Put the first mbuf on the queue.
763 * Note this permits zero length records.
764 */
765 sballoc(sb, m0);
766 if (m)
767 m->m_nextpkt = m0;
768 else
769 sb->sb_mb = m0;
770 m = m0->m_next;
771 m0->m_next = 0;
772 if (m && (m0->m_flags & M_EOR)) {
773 m0->m_flags &= ~M_EOR;
774 m->m_flags |= M_EOR;
775 }
91447636 776 return sbcompress(sb, m, m0);
1c79356b
A
777}
778
779/*
780 * As above except that OOB data
781 * is inserted at the beginning of the sockbuf,
782 * but after any other OOB data.
783 */
91447636 784int
1c79356b 785sbinsertoob(sb, m0)
91447636
A
786 struct sockbuf *sb;
787 struct mbuf *m0;
1c79356b 788{
91447636
A
789 struct mbuf *m;
790 struct mbuf **mp;
1c79356b
A
791
792 if (m0 == 0)
91447636
A
793 return 0;
794
795 if ((sb->sb_flags & SB_RECV) != 0) {
796 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
cc9f6e38 797 sock_data_filt_flag_oob, NULL);
91447636
A
798
799 if (error) {
800 if (error != EJUSTRETURN) {
801 m_freem(m0);
802 }
803 return 0;
1c79356b 804 }
1c79356b
A
805 }
806
807 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
808 m = *mp;
809 again:
810 switch (m->m_type) {
811
812 case MT_OOBDATA:
813 continue; /* WANT next train */
814
815 case MT_CONTROL:
816 m = m->m_next;
817 if (m)
818 goto again; /* inspect THIS train further */
819 }
820 break;
821 }
822 /*
823 * Put the first mbuf on the queue.
824 * Note this permits zero length records.
825 */
826 sballoc(sb, m0);
827 m0->m_nextpkt = *mp;
828 *mp = m0;
829 m = m0->m_next;
830 m0->m_next = 0;
831 if (m && (m0->m_flags & M_EOR)) {
832 m0->m_flags &= ~M_EOR;
833 m->m_flags |= M_EOR;
834 }
91447636 835 return sbcompress(sb, m, m0);
1c79356b
A
836}
837
838/*
839 * Append address and data, and optionally, control (ancillary) data
840 * to the receive queue of a socket. If present,
841 * m0 must include a packet header with total length.
842 * Returns 0 if no space in sockbuf or insufficient mbufs.
843 */
91447636
A
844static int
845sbappendaddr_internal(sb, asa, m0, control)
1c79356b
A
846 register struct sockbuf *sb;
847 struct sockaddr *asa;
848 struct mbuf *m0, *control;
849{
850 register struct mbuf *m, *n;
851 int space = asa->sa_len;
1c79356b
A
852
853 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
854 panic("sbappendaddr");
855
1c79356b
A
856 if (m0)
857 space += m0->m_pkthdr.len;
858 for (n = control; n; n = n->m_next) {
859 space += n->m_len;
860 if (n->m_next == 0) /* keep pointer to last control buf */
861 break;
862 }
863 if (space > sbspace(sb))
864 return (0);
865 if (asa->sa_len > MLEN)
866 return (0);
867 MGET(m, M_DONTWAIT, MT_SONAME);
868 if (m == 0)
869 return (0);
870 m->m_len = asa->sa_len;
871 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
872 if (n)
873 n->m_next = m0; /* concatenate data to control */
874 else
875 control = m0;
876 m->m_next = control;
877 for (n = m; n; n = n->m_next)
878 sballoc(sb, n);
879 n = sb->sb_mb;
880 if (n) {
881 while (n->m_nextpkt)
882 n = n->m_nextpkt;
883 n->m_nextpkt = m;
884 } else
885 sb->sb_mb = m;
886 postevent(0,sb,EV_RWBYTES);
887 return (1);
888}
889
890int
91447636
A
891sbappendaddr(
892 struct sockbuf* sb,
893 struct sockaddr* asa,
894 struct mbuf *m0,
895 struct mbuf *control,
896 int *error_out)
897{
898 int result = 0;
899
900 if (error_out) *error_out = 0;
901
902 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
903 panic("sbappendaddrorfree");
904
905 /* Call socket data in filters */
906 if ((sb->sb_flags & SB_RECV) != 0) {
907 int error;
cc9f6e38 908 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
91447636
A
909 if (error) {
910 if (error != EJUSTRETURN) {
911 if (m0) m_freem(m0);
912 if (control) m_freem(control);
913 if (error_out) *error_out = error;
914 }
915 return 0;
916 }
917 }
918
919 result = sbappendaddr_internal(sb, asa, m0, control);
920 if (result == 0) {
921 if (m0) m_freem(m0);
922 if (control) m_freem(control);
923 if (error_out) *error_out = ENOBUFS;
924 }
925
926 return result;
927}
928
929static int
930sbappendcontrol_internal(sb, m0, control)
1c79356b
A
931 struct sockbuf *sb;
932 struct mbuf *control, *m0;
933{
934 register struct mbuf *m, *n;
935 int space = 0;
1c79356b
A
936
937 if (control == 0)
938 panic("sbappendcontrol");
939
1c79356b
A
940 for (m = control; ; m = m->m_next) {
941 space += m->m_len;
942 if (m->m_next == 0)
943 break;
944 }
945 n = m; /* save pointer to last control buffer */
946 for (m = m0; m; m = m->m_next)
947 space += m->m_len;
948 if (space > sbspace(sb))
949 return (0);
950 n->m_next = m0; /* concatenate data to control */
951 for (m = control; m; m = m->m_next)
952 sballoc(sb, m);
953 n = sb->sb_mb;
954 if (n) {
955 while (n->m_nextpkt)
956 n = n->m_nextpkt;
957 n->m_nextpkt = control;
958 } else
959 sb->sb_mb = control;
960 postevent(0,sb,EV_RWBYTES);
961 return (1);
962}
963
91447636
A
964int
965sbappendcontrol(
966 struct sockbuf *sb,
967 struct mbuf *m0,
968 struct mbuf *control,
969 int *error_out)
970{
971 int result = 0;
972
973 if (error_out) *error_out = 0;
974
975 if (sb->sb_flags & SB_RECV) {
976 int error;
cc9f6e38 977 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
91447636
A
978 if (error) {
979 if (error != EJUSTRETURN) {
980 if (m0) m_freem(m0);
981 if (control) m_freem(control);
982 if (error_out) *error_out = error;
983 }
984 return 0;
985 }
986 }
987
988 result = sbappendcontrol_internal(sb, m0, control);
989 if (result == 0) {
990 if (m0) m_freem(m0);
991 if (control) m_freem(control);
992 if (error_out) *error_out = ENOBUFS;
993 }
994
995 return result;
996}
997
1c79356b
A
998/*
999 * Compress mbuf chain m into the socket
1000 * buffer sb following mbuf n. If n
1001 * is null, the buffer is presumed empty.
1002 */
91447636 1003static int
1c79356b
A
1004sbcompress(sb, m, n)
1005 register struct sockbuf *sb;
1006 register struct mbuf *m, *n;
1007{
1008 register int eor = 0;
1009 register struct mbuf *o;
1010
1011 while (m) {
1012 eor |= m->m_flags & M_EOR;
1013 if (m->m_len == 0 &&
1014 (eor == 0 ||
1015 (((o = m->m_next) || (o = n)) &&
1016 o->m_type == m->m_type))) {
1017 m = m_free(m);
1018 continue;
1019 }
9bccf70c
A
1020 if (n && (n->m_flags & M_EOR) == 0 &&
1021#ifndef __APPLE__
1022 M_WRITABLE(n) &&
1023#endif
1024 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1025 m->m_len <= M_TRAILINGSPACE(n) &&
1c79356b
A
1026 n->m_type == m->m_type) {
1027 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1028 (unsigned)m->m_len);
1029 n->m_len += m->m_len;
1030 sb->sb_cc += m->m_len;
1031 m = m_free(m);
1032 continue;
1033 }
1034 if (n)
1035 n->m_next = m;
1036 else
1037 sb->sb_mb = m;
1038 sballoc(sb, m);
1039 n = m;
1040 m->m_flags &= ~M_EOR;
1041 m = m->m_next;
1042 n->m_next = 0;
1043 }
1044 if (eor) {
1045 if (n)
1046 n->m_flags |= eor;
1047 else
1048 printf("semi-panic: sbcompress\n");
1049 }
1050 postevent(0,sb, EV_RWBYTES);
91447636 1051 return 1;
1c79356b
A
1052}
1053
1054/*
1055 * Free all mbufs in a sockbuf.
1056 * Check that all resources are reclaimed.
1057 */
1058void
1059sbflush(sb)
1060 register struct sockbuf *sb;
1061{
91447636
A
1062 if (sb->sb_so == NULL)
1063 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
55e303ae 1064 (void)sblock(sb, M_WAIT);
9bccf70c
A
1065 while (sb->sb_mbcnt) {
1066 /*
1067 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1068 * we would loop forever. Panic instead.
1069 */
1070 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1071 break;
1c79356b 1072 sbdrop(sb, (int)sb->sb_cc);
9bccf70c 1073 }
91447636
A
1074 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1075 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
55e303ae 1076
1c79356b 1077 postevent(0, sb, EV_RWBYTES);
91447636
A
1078 sbunlock(sb, 1); /* keep socket locked */
1079
1c79356b
A
1080}
1081
1082/*
1083 * Drop data from (the front of) a sockbuf.
9bccf70c
A
1084 * use m_freem_list to free the mbuf structures
1085 * under a single lock... this is done by pruning
1086 * the top of the tree from the body by keeping track
1087 * of where we get to in the tree and then zeroing the
1088 * two pertinent pointers m_nextpkt and m_next
1089 * the socket buffer is then updated to point at the new
1090 * top of the tree and the pruned area is released via
1091 * m_freem_list.
1c79356b
A
1092 */
1093void
1094sbdrop(sb, len)
1095 register struct sockbuf *sb;
1096 register int len;
1097{
fa4905b1
A
1098 register struct mbuf *m, *free_list, *ml;
1099 struct mbuf *next, *last;
1c79356b 1100
fa4905b1
A
1101 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1102
1c79356b 1103 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
fa4905b1
A
1104 free_list = last = m;
1105 ml = (struct mbuf *)0;
1106
1c79356b
A
1107 while (len > 0) {
1108 if (m == 0) {
9bccf70c
A
1109 if (next == 0) {
1110 /* temporarily replacing this panic with printf because
1111 * it occurs occasionally when closing a socket when there
1112 * is no harm in ignoring it. This problem will be investigated
1113 * further.
1114 */
1115 /* panic("sbdrop"); */
1116 printf("sbdrop - count not zero\n");
1117 len = 0;
1118 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1119 sb->sb_cc = 0;
1120 sb->sb_mbcnt = 0;
1121 break;
1122 }
1123 m = last = next;
1124 next = m->m_nextpkt;
1125 continue;
1c79356b
A
1126 }
1127 if (m->m_len > len) {
1128 m->m_len -= len;
1129 m->m_data += len;
1130 sb->sb_cc -= len;
1131 break;
1132 }
1133 len -= m->m_len;
1134 sbfree(sb, m);
fa4905b1
A
1135
1136 ml = m;
1137 m = m->m_next;
1c79356b
A
1138 }
1139 while (m && m->m_len == 0) {
1140 sbfree(sb, m);
fa4905b1
A
1141
1142 ml = m;
1143 m = m->m_next;
1144 }
1145 if (ml) {
1146 ml->m_next = (struct mbuf *)0;
1147 last->m_nextpkt = (struct mbuf *)0;
1148 m_freem_list(free_list);
1c79356b
A
1149 }
1150 if (m) {
1151 sb->sb_mb = m;
1152 m->m_nextpkt = next;
1153 } else
1154 sb->sb_mb = next;
fa4905b1 1155
1c79356b 1156 postevent(0, sb, EV_RWBYTES);
fa4905b1
A
1157
1158 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1c79356b
A
1159}
1160
1161/*
1162 * Drop a record off the front of a sockbuf
1163 * and move the next record to the front.
1164 */
1165void
1166sbdroprecord(sb)
1167 register struct sockbuf *sb;
1168{
1169 register struct mbuf *m, *mn;
1c79356b
A
1170
1171 m = sb->sb_mb;
1172 if (m) {
1173 sb->sb_mb = m->m_nextpkt;
1174 do {
1175 sbfree(sb, m);
1176 MFREE(m, mn);
9bccf70c
A
1177 m = mn;
1178 } while (m);
1c79356b
A
1179 }
1180 postevent(0, sb, EV_RWBYTES);
1181}
1182
1183/*
1184 * Create a "control" mbuf containing the specified data
1185 * with the specified type for presentation on a socket buffer.
1186 */
1187struct mbuf *
1188sbcreatecontrol(p, size, type, level)
1189 caddr_t p;
1190 register int size;
1191 int type, level;
1192{
1193 register struct cmsghdr *cp;
1194 struct mbuf *m;
1195
9bccf70c
A
1196 if (CMSG_SPACE((u_int)size) > MLEN)
1197 return ((struct mbuf *) NULL);
1c79356b
A
1198 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1199 return ((struct mbuf *) NULL);
1200 cp = mtod(m, struct cmsghdr *);
1201 /* XXX check size? */
1202 (void)memcpy(CMSG_DATA(cp), p, size);
9bccf70c
A
1203 m->m_len = CMSG_SPACE(size);
1204 cp->cmsg_len = CMSG_LEN(size);
1c79356b
A
1205 cp->cmsg_level = level;
1206 cp->cmsg_type = type;
1207 return (m);
1208}
1209
1210/*
1211 * Some routines that return EOPNOTSUPP for entry points that are not
1212 * supported by a protocol. Fill in as needed.
1213 */
1214int
1215pru_abort_notsupp(struct socket *so)
1216{
1217 return EOPNOTSUPP;
1218}
1219
1220
1221int
1222pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1223{
1224 return EOPNOTSUPP;
1225}
1226
1227int
1228pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1229{
1230 return EOPNOTSUPP;
1231}
1232
1233int
1234pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1235{
1236 return EOPNOTSUPP;
1237}
1238
1239int
1240pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1241{
1242 return EOPNOTSUPP;
1243}
1244
1245int
1246pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1247{
1248 return EOPNOTSUPP;
1249}
1250
1251int
1252pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1253 struct ifnet *ifp, struct proc *p)
1254{
1255 return EOPNOTSUPP;
1256}
1257
1258int
1259pru_detach_notsupp(struct socket *so)
1260{
1261 return EOPNOTSUPP;
1262}
1263
1264int
1265pru_disconnect_notsupp(struct socket *so)
1266{
1267 return EOPNOTSUPP;
1268}
1269
1270int
1271pru_listen_notsupp(struct socket *so, struct proc *p)
1272{
1273 return EOPNOTSUPP;
1274}
1275
1276int
1277pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1278{
1279 return EOPNOTSUPP;
1280}
1281
1282int
1283pru_rcvd_notsupp(struct socket *so, int flags)
1284{
1285 return EOPNOTSUPP;
1286}
1287
1288int
1289pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1290{
1291 return EOPNOTSUPP;
1292}
1293
1294int
1295pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1296 struct sockaddr *addr, struct mbuf *control,
1297 struct proc *p)
1298
1299{
1300 return EOPNOTSUPP;
1301}
1302
1303
1304/*
1305 * This isn't really a ``null'' operation, but it's the default one
1306 * and doesn't do anything destructive.
1307 */
1308int
1309pru_sense_null(struct socket *so, struct stat *sb)
1310{
1311 sb->st_blksize = so->so_snd.sb_hiwat;
1312 return 0;
1313}
1314
1315
1316int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1317 struct uio *uio, struct mbuf *top,
1318 struct mbuf *control, int flags)
1319
1320{
1321 return EOPNOTSUPP;
1322}
1323
1324int pru_soreceive_notsupp(struct socket *so,
1325 struct sockaddr **paddr,
1326 struct uio *uio, struct mbuf **mp0,
1327 struct mbuf **controlp, int *flagsp)
1328{
1329 return EOPNOTSUPP;
1330}
1331
1332int
1333
1334pru_shutdown_notsupp(struct socket *so)
1335{
1336 return EOPNOTSUPP;
1337}
1338
1339int
1340pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1341{
1342 return EOPNOTSUPP;
1343}
1344
1345int pru_sosend(struct socket *so, struct sockaddr *addr,
1346 struct uio *uio, struct mbuf *top,
1347 struct mbuf *control, int flags)
1348{
1349 return EOPNOTSUPP;
1350}
1351
1352int pru_soreceive(struct socket *so,
1353 struct sockaddr **paddr,
1354 struct uio *uio, struct mbuf **mp0,
1355 struct mbuf **controlp, int *flagsp)
1356{
1357 return EOPNOTSUPP;
1358}
1359
1360
91447636
A
1361int
1362pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1363 __unused kauth_cred_t cred, __unused void *wql)
1c79356b
A
1364{
1365 return EOPNOTSUPP;
1366}
1367
1368
9bccf70c
A
1369#ifdef __APPLE__
1370/*
1371 * The following are macros on BSD and functions on Darwin
1372 */
1c79356b 1373
0b4e3aa0
A
1374/*
1375 * Do we need to notify the other side when I/O is possible?
1376 */
1377
1378int
1379sb_notify(struct sockbuf *sb)
1380{
55e303ae 1381 return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
0b4e3aa0
A
1382}
1383
1384/*
1385 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1386 * This is problematical if the fields are unsigned, as the space might
1387 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1388 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1389 */
1390long
1391sbspace(struct sockbuf *sb)
1392{
1393 return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1394 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1395}
1396
1397/* do we have to send all at once on a socket? */
1398int
1399sosendallatonce(struct socket *so)
1400{
1401 return (so->so_proto->pr_flags & PR_ATOMIC);
1402}
1403
1404/* can we read something from so? */
1405int
1406soreadable(struct socket *so)
1407{
1408 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1409 (so->so_state & SS_CANTRCVMORE) ||
1410 so->so_comp.tqh_first || so->so_error);
1411}
1412
1413/* can we write something to so? */
1414
1415int
1416sowriteable(struct socket *so)
1417{
1418 return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1419 ((so->so_state&SS_ISCONNECTED) ||
1420 (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1421 (so->so_state & SS_CANTSENDMORE) ||
1422 so->so_error);
1423}
1424
1425/* adjust counters in sb reflecting allocation of m */
1426
1427void
1428sballoc(struct sockbuf *sb, struct mbuf *m)
1429{
1430 sb->sb_cc += m->m_len;
1431 sb->sb_mbcnt += MSIZE;
1432 if (m->m_flags & M_EXT)
1433 sb->sb_mbcnt += m->m_ext.ext_size;
1434}
1435
1436/* adjust counters in sb reflecting freeing of m */
1437void
1438sbfree(struct sockbuf *sb, struct mbuf *m)
1439{
1440 sb->sb_cc -= m->m_len;
1441 sb->sb_mbcnt -= MSIZE;
1442 if (m->m_flags & M_EXT)
1443 sb->sb_mbcnt -= m->m_ext.ext_size;
1444}
1445
1446/*
1447 * Set lock on sockbuf sb; sleep if lock is already held.
1448 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1449 * Returns error without lock if sleep is interrupted.
1450 */
1451int
1452sblock(struct sockbuf *sb, int wf)
1453{
6601e61a
A
1454 int error = 0;
1455
1456 if (sb->sb_flags & SB_LOCK)
1457 error = (wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK;
1458 else
1459 sb->sb_flags |= SB_LOCK;
1460
1461 return (error);
0b4e3aa0
A
1462}
1463
1464/* release lock on sockbuf sb */
1465void
91447636 1466sbunlock(struct sockbuf *sb, int keeplocked)
0b4e3aa0 1467{
91447636 1468 struct socket *so = sb->sb_so;
0c530ab8 1469 int lr_saved;
91447636
A
1470 lck_mtx_t *mutex_held;
1471
0c530ab8 1472 lr_saved = (unsigned int) __builtin_return_address(0);
91447636 1473
0c530ab8 1474 sb->sb_flags &= ~SB_LOCK;
91447636 1475
0b4e3aa0
A
1476 if (sb->sb_flags & SB_WANT) {
1477 sb->sb_flags &= ~SB_WANT;
91447636
A
1478 if (so->so_usecount < 0)
1479 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1480
0b4e3aa0
A
1481 wakeup((caddr_t)&(sb)->sb_flags);
1482 }
91447636 1483 if (keeplocked == 0) { /* unlock on exit */
0c530ab8
A
1484 if (so->so_proto->pr_getlock != NULL)
1485 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1486 else
1487 mutex_held = so->so_proto->pr_domain->dom_mtx;
1488
1489 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1490
91447636
A
1491 so->so_usecount--;
1492 if (so->so_usecount < 0)
1493 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
0c530ab8
A
1494 so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved;
1495 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
91447636
A
1496 lck_mtx_unlock(mutex_held);
1497 }
0b4e3aa0
A
1498}
1499
1500void
1501sorwakeup(struct socket * so)
1502{
1503 if (sb_notify(&so->so_rcv))
1504 sowakeup(so, &so->so_rcv);
1505}
1506
1507void
1508sowwakeup(struct socket * so)
1509{
1510 if (sb_notify(&so->so_snd))
1511 sowakeup(so, &so->so_snd);
1512}
9bccf70c 1513#endif __APPLE__
0b4e3aa0 1514
1c79356b
A
1515/*
1516 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1517 */
1518struct sockaddr *
1519dup_sockaddr(sa, canwait)
1520 struct sockaddr *sa;
1521 int canwait;
1522{
1523 struct sockaddr *sa2;
1524
1525 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1526 canwait ? M_WAITOK : M_NOWAIT);
1527 if (sa2)
1528 bcopy(sa, sa2, sa->sa_len);
1529 return sa2;
1530}
1531
1532/*
1533 * Create an external-format (``xsocket'') structure using the information
1534 * in the kernel-format socket structure pointed to by so. This is done
1535 * to reduce the spew of irrelevant information over this interface,
1536 * to isolate user code from changes in the kernel structure, and
1537 * potentially to provide information-hiding if we decide that
1538 * some of this information should be hidden from users.
1539 */
1540void
1541sotoxsocket(struct socket *so, struct xsocket *xso)
1542{
1543 xso->xso_len = sizeof *xso;
1544 xso->xso_so = so;
1545 xso->so_type = so->so_type;
1546 xso->so_options = so->so_options;
1547 xso->so_linger = so->so_linger;
1548 xso->so_state = so->so_state;
1549 xso->so_pcb = so->so_pcb;
91447636
A
1550 if (so->so_proto) {
1551 xso->xso_protocol = so->so_proto->pr_protocol;
1552 xso->xso_family = so->so_proto->pr_domain->dom_family;
1553 }
1554 else
1555 xso->xso_protocol = xso->xso_family = 0;
1c79356b
A
1556 xso->so_qlen = so->so_qlen;
1557 xso->so_incqlen = so->so_incqlen;
1558 xso->so_qlimit = so->so_qlimit;
1559 xso->so_timeo = so->so_timeo;
1560 xso->so_error = so->so_error;
1561 xso->so_pgid = so->so_pgid;
1562 xso->so_oobmark = so->so_oobmark;
1563 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1564 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1565 xso->so_uid = so->so_uid;
1566}
1567
1568/*
1569 * This does the same for sockbufs. Note that the xsockbuf structure,
1570 * since it is always embedded in a socket, does not include a self
1571 * pointer nor a length. We make this entry point public in case
1572 * some other mechanism needs it.
1573 */
1574void
1575sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1576{
1577 xsb->sb_cc = sb->sb_cc;
1578 xsb->sb_hiwat = sb->sb_hiwat;
1579 xsb->sb_mbcnt = sb->sb_mbcnt;
1580 xsb->sb_mbmax = sb->sb_mbmax;
1581 xsb->sb_lowat = sb->sb_lowat;
1582 xsb->sb_flags = sb->sb_flags;
91447636
A
1583 xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1584 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1585 xsb->sb_timeo = 1;
1c79356b
A
1586}
1587
1588/*
1589 * Here is the definition of some of the basic objects in the kern.ipc
1590 * branch of the MIB.
1591 */
1c79356b
A
1592SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1593
1594/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1595static int dummy;
1596SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1597
9bccf70c
A
1598SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1599 &sb_max, 0, "Maximum socket buffer size");
1600SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1601 &maxsockets, 0, "Maximum number of sockets avaliable");
1c79356b
A
1602SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1603 &sb_efficiency, 0, "");
1604SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1605