]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket2.c
5d850a6ab463b9dd15d4ecf7d7a072f27929430c
[apple/xnu.git] / bsd / kern / uipc_socket2.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
57 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
58 */
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/domain.h>
63 #include <sys/kernel.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/protosw.h>
69 #include <sys/stat.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/signalvar.h>
73 #include <sys/sysctl.h>
74 #include <sys/ev.h>
75 #include <kern/locks.h>
76 #include <net/route.h>
77 #include <netinet/in.h>
78 #include <netinet/in_pcb.h>
79 #include <sys/kdebug.h>
80
81 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
82 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
83
84 static int sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
85
86 /*
87 * Primitive routines for operating on sockets and socket buffers
88 */
89
90 u_long sb_max = SB_MAX; /* XXX should be static */
91
92 static u_long sb_efficiency = 8; /* parameter for sbreserve() */
93
94 /*
95 * Procedures to manipulate state flags of socket
96 * and do appropriate wakeups. Normal sequence from the
97 * active (originating) side is that soisconnecting() is
98 * called during processing of connect() call,
99 * resulting in an eventual call to soisconnected() if/when the
100 * connection is established. When the connection is torn down
101 * soisdisconnecting() is called during processing of disconnect() call,
102 * and soisdisconnected() is called when the connection to the peer
103 * is totally severed. The semantics of these routines are such that
104 * connectionless protocols can call soisconnected() and soisdisconnected()
105 * only, bypassing the in-progress calls when setting up a ``connection''
106 * takes no time.
107 *
108 * From the passive side, a socket is created with
109 * two queues of sockets: so_incomp for connections in progress
110 * and so_comp for connections already made and awaiting user acceptance.
111 * As a protocol is preparing incoming connections, it creates a socket
112 * structure queued on so_incomp by calling sonewconn(). When the connection
113 * is established, soisconnected() is called, and transfers the
114 * socket structure to so_comp, making it available to accept().
115 *
116 * If a socket is closed with sockets on either
117 * so_incomp or so_comp, these sockets are dropped.
118 *
119 * If higher level protocols are implemented in
120 * the kernel, the wakeups done here will sometimes
121 * cause software-interrupt process scheduling.
122 */
123 void
124 soisconnecting(so)
125 register struct socket *so;
126 {
127
128 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
129 so->so_state |= SS_ISCONNECTING;
130
131 sflt_notify(so, sock_evt_connecting, NULL);
132 }
133
134 void
135 soisconnected(so)
136 struct socket *so;
137 {
138 struct socket *head = so->so_head;
139
140 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
141 so->so_state |= SS_ISCONNECTED;
142
143 sflt_notify(so, sock_evt_connected, NULL);
144
145 if (head && (so->so_state & SS_INCOMP)) {
146 so->so_state &= ~SS_INCOMP;
147 so->so_state |= SS_COMP;
148 if (head->so_proto->pr_getlock != NULL) {
149 socket_unlock(so, 0);
150 socket_lock(head, 1);
151 }
152 postevent(head, 0, EV_RCONN);
153 TAILQ_REMOVE(&head->so_incomp, so, so_list);
154 head->so_incqlen--;
155 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
156 sorwakeup(head);
157 wakeup_one((caddr_t)&head->so_timeo);
158 if (head->so_proto->pr_getlock != NULL) {
159 socket_unlock(head, 1);
160 socket_lock(so, 0);
161 }
162 } else {
163 postevent(so, 0, EV_WCONN);
164 wakeup((caddr_t)&so->so_timeo);
165 sorwakeup(so);
166 sowwakeup(so);
167 }
168 }
169
170 void
171 soisdisconnecting(so)
172 register struct socket *so;
173 {
174 so->so_state &= ~SS_ISCONNECTING;
175 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
176 sflt_notify(so, sock_evt_disconnecting, NULL);
177 wakeup((caddr_t)&so->so_timeo);
178 sowwakeup(so);
179 sorwakeup(so);
180 }
181
182 void
183 soisdisconnected(so)
184 register struct socket *so;
185 {
186 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
187 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
188 sflt_notify(so, sock_evt_disconnected, NULL);
189 wakeup((caddr_t)&so->so_timeo);
190 sowwakeup(so);
191 sorwakeup(so);
192 }
193
194 /*
195 * Return a random connection that hasn't been serviced yet and
196 * is eligible for discard. There is a one in qlen chance that
197 * we will return a null, saying that there are no dropable
198 * requests. In this case, the protocol specific code should drop
199 * the new request. This insures fairness.
200 *
201 * This may be used in conjunction with protocol specific queue
202 * congestion routines.
203 */
204 struct socket *
205 sodropablereq(head)
206 register struct socket *head;
207 {
208 struct socket *so, *sonext = NULL;
209 unsigned int i, j, qlen;
210 static int rnd;
211 static struct timeval old_runtime;
212 static unsigned int cur_cnt, old_cnt;
213 struct timeval tv;
214
215 microtime(&tv);
216 if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
217 old_runtime = tv;
218 old_cnt = cur_cnt / i;
219 cur_cnt = 0;
220 }
221
222 so = TAILQ_FIRST(&head->so_incomp);
223 if (!so)
224 return (NULL);
225
226 qlen = head->so_incqlen;
227 if (++cur_cnt > qlen || old_cnt > qlen) {
228 rnd = (314159 * rnd + 66329) & 0xffff;
229 j = ((qlen + 1) * rnd) >> 16;
230 //###LD To clean up
231 while (j-- && so) {
232 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
233 socket_lock(so, 1);
234 sonext = TAILQ_NEXT(so, so_list);
235 // in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
236 socket_unlock(so, 1);
237 so = sonext;
238 }
239 }
240
241 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
242 // return (NULL);
243 // else
244 return (so);
245 }
246
247 /*
248 * When an attempt at a new connection is noted on a socket
249 * which accepts connections, sonewconn is called. If the
250 * connection is possible (subject to space constraints, etc.)
251 * then we allocate a new structure, propoerly linked into the
252 * data structure of the original socket, and return this.
253 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
254 */
255 static struct socket *
256 sonewconn_internal(head, connstatus)
257 register struct socket *head;
258 int connstatus;
259 {
260 int error = 0;
261 register struct socket *so;
262 lck_mtx_t *mutex_held;
263
264 if (head->so_proto->pr_getlock != NULL)
265 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
266 else
267 mutex_held = head->so_proto->pr_domain->dom_mtx;
268 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
269
270 if (head->so_qlen > 3 * head->so_qlimit / 2)
271 return ((struct socket *)0);
272 so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
273 if (so == NULL)
274 return ((struct socket *)0);
275 /* check if head was closed during the soalloc */
276 if (head->so_proto == NULL) {
277 sodealloc(so);
278 return ((struct socket *)0);
279 }
280
281 so->so_head = head;
282 so->so_type = head->so_type;
283 so->so_options = head->so_options &~ SO_ACCEPTCONN;
284 so->so_linger = head->so_linger;
285 so->so_state = head->so_state | SS_NOFDREF;
286 so->so_proto = head->so_proto;
287 so->so_timeo = head->so_timeo;
288 so->so_pgid = head->so_pgid;
289 so->so_uid = head->so_uid;
290 so->so_usecount = 1;
291 so->next_lock_lr = 0;
292 so->next_unlock_lr = 0;
293
294 #ifdef __APPLE__
295 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
296 so->so_rcv.sb_so = so->so_snd.sb_so = so;
297 TAILQ_INIT(&so->so_evlist);
298 #endif
299
300 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
301 sflt_termsock(so);
302 sodealloc(so);
303 return ((struct socket *)0);
304 }
305
306 /*
307 * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
308 */
309 if (head->so_proto->pr_unlock)
310 socket_unlock(head, 0);
311 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
312 sflt_termsock(so);
313 sodealloc(so);
314 if (head->so_proto->pr_unlock)
315 socket_lock(head, 0);
316 return ((struct socket *)0);
317 }
318 if (head->so_proto->pr_unlock)
319 socket_lock(head, 0);
320 #ifdef __APPLE__
321 so->so_proto->pr_domain->dom_refs++;
322 #endif
323
324 if (connstatus) {
325 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
326 so->so_state |= SS_COMP;
327 } else {
328 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
329 so->so_state |= SS_INCOMP;
330 head->so_incqlen++;
331 }
332 head->so_qlen++;
333
334 #ifdef __APPLE__
335 /* Attach socket filters for this protocol */
336 sflt_initsock(so);
337 #endif
338 if (connstatus) {
339 so->so_state |= connstatus;
340 sorwakeup(head);
341 wakeup((caddr_t)&head->so_timeo);
342 }
343 return (so);
344 }
345
346
347 struct socket *
348 sonewconn(
349 struct socket *head,
350 int connstatus,
351 const struct sockaddr *from)
352 {
353 int error = 0;
354 struct socket_filter_entry *filter;
355 int filtered = 0;
356
357 error = 0;
358 for (filter = head->so_filt; filter && (error == 0);
359 filter = filter->sfe_next_onsocket) {
360 if (filter->sfe_filter->sf_filter.sf_connect_in) {
361 if (filtered == 0) {
362 filtered = 1;
363 sflt_use(head);
364 socket_unlock(head, 0);
365 }
366 error = filter->sfe_filter->sf_filter.sf_connect_in(
367 filter->sfe_cookie, head, from);
368 }
369 }
370 if (filtered != 0) {
371 socket_lock(head, 0);
372 sflt_unuse(head);
373 }
374
375 if (error) {
376 return NULL;
377 }
378
379 return sonewconn_internal(head, connstatus);
380 }
381
382 /*
383 * Socantsendmore indicates that no more data will be sent on the
384 * socket; it would normally be applied to a socket when the user
385 * informs the system that no more data is to be sent, by the protocol
386 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
387 * will be received, and will normally be applied to the socket by a
388 * protocol when it detects that the peer will send no more data.
389 * Data queued for reading in the socket may yet be read.
390 */
391
392 void
393 socantsendmore(so)
394 struct socket *so;
395 {
396 so->so_state |= SS_CANTSENDMORE;
397 sflt_notify(so, sock_evt_cantsendmore, NULL);
398 sowwakeup(so);
399 }
400
401 void
402 socantrcvmore(so)
403 struct socket *so;
404 {
405 so->so_state |= SS_CANTRCVMORE;
406 sflt_notify(so, sock_evt_cantrecvmore, NULL);
407 sorwakeup(so);
408 }
409
410 /*
411 * Wait for data to arrive at/drain from a socket buffer.
412 */
413 int
414 sbwait(sb)
415 struct sockbuf *sb;
416 {
417 int error = 0, lr_saved;
418 struct socket *so = sb->sb_so;
419 lck_mtx_t *mutex_held;
420 struct timespec ts;
421
422 lr_saved = (unsigned int) __builtin_return_address(0);
423
424 if (so->so_proto->pr_getlock != NULL)
425 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
426 else
427 mutex_held = so->so_proto->pr_domain->dom_mtx;
428
429 sb->sb_flags |= SB_WAIT;
430
431 if (so->so_usecount < 1)
432 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
433 ts.tv_sec = sb->sb_timeo.tv_sec;
434 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
435 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
436 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
437 &ts);
438
439 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
440
441 if (so->so_usecount < 1)
442 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
443
444 if ((so->so_state & SS_DRAINING)) {
445 error = EBADF;
446 }
447
448 return (error);
449 }
450
451 /*
452 * Lock a sockbuf already known to be locked;
453 * return any error returned from sleep (EINTR).
454 */
455 int
456 sb_lock(sb)
457 register struct sockbuf *sb;
458 {
459 struct socket *so = sb->sb_so;
460 lck_mtx_t * mutex_held;
461 int error = 0;
462
463 if (so == NULL)
464 panic("sb_lock: null so back pointer sb=%x\n", sb);
465
466 while (sb->sb_flags & SB_LOCK) {
467 sb->sb_flags |= SB_WANT;
468 if (so->so_proto->pr_getlock != NULL)
469 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
470 else
471 mutex_held = so->so_proto->pr_domain->dom_mtx;
472 if (so->so_usecount < 1)
473 panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
474
475 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
476 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
477 if (so->so_usecount < 1)
478 panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
479 if (error)
480 return (error);
481 }
482 sb->sb_flags |= SB_LOCK;
483 return (0);
484 }
485
486 /*
487 * Wakeup processes waiting on a socket buffer.
488 * Do asynchronous notification via SIGIO
489 * if the socket has the SS_ASYNC flag set.
490 */
491 void
492 sowakeup(so, sb)
493 register struct socket *so;
494 register struct sockbuf *sb;
495 {
496 struct proc *p = current_proc();
497 sb->sb_flags &= ~SB_SEL;
498 selwakeup(&sb->sb_sel);
499 if (sb->sb_flags & SB_WAIT) {
500 sb->sb_flags &= ~SB_WAIT;
501 wakeup((caddr_t)&sb->sb_cc);
502 }
503 if (so->so_state & SS_ASYNC) {
504 if (so->so_pgid < 0)
505 gsignal(-so->so_pgid, SIGIO);
506 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
507 psignal(p, SIGIO);
508 }
509 if (sb->sb_flags & SB_KNOTE) {
510 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
511 }
512 if (sb->sb_flags & SB_UPCALL) {
513 socket_unlock(so, 0);
514 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
515 socket_lock(so, 0);
516 }
517 }
518
519 /*
520 * Socket buffer (struct sockbuf) utility routines.
521 *
522 * Each socket contains two socket buffers: one for sending data and
523 * one for receiving data. Each buffer contains a queue of mbufs,
524 * information about the number of mbufs and amount of data in the
525 * queue, and other fields allowing select() statements and notification
526 * on data availability to be implemented.
527 *
528 * Data stored in a socket buffer is maintained as a list of records.
529 * Each record is a list of mbufs chained together with the m_next
530 * field. Records are chained together with the m_nextpkt field. The upper
531 * level routine soreceive() expects the following conventions to be
532 * observed when placing information in the receive buffer:
533 *
534 * 1. If the protocol requires each message be preceded by the sender's
535 * name, then a record containing that name must be present before
536 * any associated data (mbuf's must be of type MT_SONAME).
537 * 2. If the protocol supports the exchange of ``access rights'' (really
538 * just additional data associated with the message), and there are
539 * ``rights'' to be received, then a record containing this data
540 * should be present (mbuf's must be of type MT_RIGHTS).
541 * 3. If a name or rights record exists, then it must be followed by
542 * a data record, perhaps of zero length.
543 *
544 * Before using a new socket structure it is first necessary to reserve
545 * buffer space to the socket, by calling sbreserve(). This should commit
546 * some of the available buffer space in the system buffer pool for the
547 * socket (currently, it does nothing but enforce limits). The space
548 * should be released by calling sbrelease() when the socket is destroyed.
549 */
550
551 int
552 soreserve(so, sndcc, rcvcc)
553 register struct socket *so;
554 u_long sndcc, rcvcc;
555 {
556
557 if (sbreserve(&so->so_snd, sndcc) == 0)
558 goto bad;
559 if (sbreserve(&so->so_rcv, rcvcc) == 0)
560 goto bad2;
561 if (so->so_rcv.sb_lowat == 0)
562 so->so_rcv.sb_lowat = 1;
563 if (so->so_snd.sb_lowat == 0)
564 so->so_snd.sb_lowat = MCLBYTES;
565 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
566 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
567 return (0);
568 bad2:
569 #ifdef __APPLE__
570 selthreadclear(&so->so_snd.sb_sel);
571 #endif
572 sbrelease(&so->so_snd);
573 bad:
574 return (ENOBUFS);
575 }
576
577 /*
578 * Allot mbufs to a sockbuf.
579 * Attempt to scale mbmax so that mbcnt doesn't become limiting
580 * if buffering efficiency is near the normal case.
581 */
582 int
583 sbreserve(sb, cc)
584 struct sockbuf *sb;
585 u_long cc;
586 {
587 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
588 return (0);
589 sb->sb_hiwat = cc;
590 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
591 if (sb->sb_lowat > sb->sb_hiwat)
592 sb->sb_lowat = sb->sb_hiwat;
593 return (1);
594 }
595
596 /*
597 * Free mbufs held by a socket, and reserved mbuf space.
598 */
599 /* WARNING needs to do selthreadclear() before calling this */
600 void
601 sbrelease(sb)
602 struct sockbuf *sb;
603 {
604
605 sbflush(sb);
606 sb->sb_hiwat = 0;
607 sb->sb_mbmax = 0;
608
609 }
610
611 /*
612 * Routines to add and remove
613 * data from an mbuf queue.
614 *
615 * The routines sbappend() or sbappendrecord() are normally called to
616 * append new mbufs to a socket buffer, after checking that adequate
617 * space is available, comparing the function sbspace() with the amount
618 * of data to be added. sbappendrecord() differs from sbappend() in
619 * that data supplied is treated as the beginning of a new record.
620 * To place a sender's address, optional access rights, and data in a
621 * socket receive buffer, sbappendaddr() should be used. To place
622 * access rights and data in a socket receive buffer, sbappendrights()
623 * should be used. In either case, the new data begins a new record.
624 * Note that unlike sbappend() and sbappendrecord(), these routines check
625 * for the caller that there will be enough space to store the data.
626 * Each fails if there is not enough space, or if it cannot find mbufs
627 * to store additional information in.
628 *
629 * Reliable protocols may use the socket send buffer to hold data
630 * awaiting acknowledgement. Data is normally copied from a socket
631 * send buffer in a protocol with m_copy for output to a peer,
632 * and then removing the data from the socket buffer with sbdrop()
633 * or sbdroprecord() when the data is acknowledged by the peer.
634 */
635
636 /*
637 * Append mbuf chain m to the last record in the
638 * socket buffer sb. The additional space associated
639 * the mbuf chain is recorded in sb. Empty mbufs are
640 * discarded and mbufs are compacted where possible.
641 */
642 int
643 sbappend(sb, m)
644 struct sockbuf *sb;
645 struct mbuf *m;
646 {
647 register struct mbuf *n, *sb_first;
648 int result = 0;
649 int error = 0;
650 int filtered = 0;
651
652
653 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
654
655 if (m == 0)
656 return 0;
657
658 again:
659 sb_first = n = sb->sb_mb;
660 if (n) {
661 while (n->m_nextpkt)
662 n = n->m_nextpkt;
663 do {
664 if (n->m_flags & M_EOR) {
665 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
666 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
667 return result;
668 }
669 } while (n->m_next && (n = n->m_next));
670 }
671
672 if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
673 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
674 if (error) {
675 /* no data was appended, caller should not call sowakeup */
676 return 0;
677 }
678
679 /*
680 If we any filters, the socket lock was dropped. n and sb_first
681 cached data from the socket buffer. This cache is not valid
682 since we dropped the lock. We must start over. Since filtered
683 is set we won't run through the filters a second time. We just
684 set n and sb_start again.
685 */
686 if (filtered)
687 goto again;
688 }
689
690 result = sbcompress(sb, m, n);
691
692 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
693
694 return result;
695 }
696
697 #ifdef SOCKBUF_DEBUG
698 void
699 sbcheck(sb)
700 register struct sockbuf *sb;
701 {
702 register struct mbuf *m;
703 register struct mbuf *n = 0;
704 register u_long len = 0, mbcnt = 0;
705 lck_mtx_t *mutex_held;
706
707 if (sb->sb_so->so_proto->pr_getlock != NULL)
708 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
709 else
710 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
711
712 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
713
714 if (sbchecking == 0)
715 return;
716
717 for (m = sb->sb_mb; m; m = n) {
718 n = m->m_nextpkt;
719 for (; m; m = m->m_next) {
720 len += m->m_len;
721 mbcnt += MSIZE;
722 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
723 mbcnt += m->m_ext.ext_size;
724 }
725 }
726 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
727 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
728 mbcnt, sb->sb_mbcnt);
729 }
730 }
731 #endif
732
733 /*
734 * As above, except the mbuf chain
735 * begins a new record.
736 */
737 int
738 sbappendrecord(sb, m0)
739 register struct sockbuf *sb;
740 struct mbuf *m0;
741 {
742 register struct mbuf *m;
743 int result = 0;
744
745 if (m0 == 0)
746 return 0;
747
748 if ((sb->sb_flags & SB_RECV) != 0) {
749 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
750 if (error != 0) {
751 if (error != EJUSTRETURN)
752 m_freem(m0);
753 return 0;
754 }
755 }
756
757 m = sb->sb_mb;
758 if (m)
759 while (m->m_nextpkt)
760 m = m->m_nextpkt;
761 /*
762 * Put the first mbuf on the queue.
763 * Note this permits zero length records.
764 */
765 sballoc(sb, m0);
766 if (m)
767 m->m_nextpkt = m0;
768 else
769 sb->sb_mb = m0;
770 m = m0->m_next;
771 m0->m_next = 0;
772 if (m && (m0->m_flags & M_EOR)) {
773 m0->m_flags &= ~M_EOR;
774 m->m_flags |= M_EOR;
775 }
776 return sbcompress(sb, m, m0);
777 }
778
779 /*
780 * As above except that OOB data
781 * is inserted at the beginning of the sockbuf,
782 * but after any other OOB data.
783 */
784 int
785 sbinsertoob(sb, m0)
786 struct sockbuf *sb;
787 struct mbuf *m0;
788 {
789 struct mbuf *m;
790 struct mbuf **mp;
791
792 if (m0 == 0)
793 return 0;
794
795 if ((sb->sb_flags & SB_RECV) != 0) {
796 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
797 sock_data_filt_flag_oob, NULL);
798
799 if (error) {
800 if (error != EJUSTRETURN) {
801 m_freem(m0);
802 }
803 return 0;
804 }
805 }
806
807 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
808 m = *mp;
809 again:
810 switch (m->m_type) {
811
812 case MT_OOBDATA:
813 continue; /* WANT next train */
814
815 case MT_CONTROL:
816 m = m->m_next;
817 if (m)
818 goto again; /* inspect THIS train further */
819 }
820 break;
821 }
822 /*
823 * Put the first mbuf on the queue.
824 * Note this permits zero length records.
825 */
826 sballoc(sb, m0);
827 m0->m_nextpkt = *mp;
828 *mp = m0;
829 m = m0->m_next;
830 m0->m_next = 0;
831 if (m && (m0->m_flags & M_EOR)) {
832 m0->m_flags &= ~M_EOR;
833 m->m_flags |= M_EOR;
834 }
835 return sbcompress(sb, m, m0);
836 }
837
838 /*
839 * Append address and data, and optionally, control (ancillary) data
840 * to the receive queue of a socket. If present,
841 * m0 must include a packet header with total length.
842 * Returns 0 if no space in sockbuf or insufficient mbufs.
843 */
844 static int
845 sbappendaddr_internal(sb, asa, m0, control)
846 register struct sockbuf *sb;
847 struct sockaddr *asa;
848 struct mbuf *m0, *control;
849 {
850 register struct mbuf *m, *n;
851 int space = asa->sa_len;
852
853 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
854 panic("sbappendaddr");
855
856 if (m0)
857 space += m0->m_pkthdr.len;
858 for (n = control; n; n = n->m_next) {
859 space += n->m_len;
860 if (n->m_next == 0) /* keep pointer to last control buf */
861 break;
862 }
863 if (space > sbspace(sb))
864 return (0);
865 if (asa->sa_len > MLEN)
866 return (0);
867 MGET(m, M_DONTWAIT, MT_SONAME);
868 if (m == 0)
869 return (0);
870 m->m_len = asa->sa_len;
871 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
872 if (n)
873 n->m_next = m0; /* concatenate data to control */
874 else
875 control = m0;
876 m->m_next = control;
877 for (n = m; n; n = n->m_next)
878 sballoc(sb, n);
879 n = sb->sb_mb;
880 if (n) {
881 while (n->m_nextpkt)
882 n = n->m_nextpkt;
883 n->m_nextpkt = m;
884 } else
885 sb->sb_mb = m;
886 postevent(0,sb,EV_RWBYTES);
887 return (1);
888 }
889
890 int
891 sbappendaddr(
892 struct sockbuf* sb,
893 struct sockaddr* asa,
894 struct mbuf *m0,
895 struct mbuf *control,
896 int *error_out)
897 {
898 int result = 0;
899
900 if (error_out) *error_out = 0;
901
902 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
903 panic("sbappendaddrorfree");
904
905 /* Call socket data in filters */
906 if ((sb->sb_flags & SB_RECV) != 0) {
907 int error;
908 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
909 if (error) {
910 if (error != EJUSTRETURN) {
911 if (m0) m_freem(m0);
912 if (control) m_freem(control);
913 if (error_out) *error_out = error;
914 }
915 return 0;
916 }
917 }
918
919 result = sbappendaddr_internal(sb, asa, m0, control);
920 if (result == 0) {
921 if (m0) m_freem(m0);
922 if (control) m_freem(control);
923 if (error_out) *error_out = ENOBUFS;
924 }
925
926 return result;
927 }
928
929 static int
930 sbappendcontrol_internal(sb, m0, control)
931 struct sockbuf *sb;
932 struct mbuf *control, *m0;
933 {
934 register struct mbuf *m, *n;
935 int space = 0;
936
937 if (control == 0)
938 panic("sbappendcontrol");
939
940 for (m = control; ; m = m->m_next) {
941 space += m->m_len;
942 if (m->m_next == 0)
943 break;
944 }
945 n = m; /* save pointer to last control buffer */
946 for (m = m0; m; m = m->m_next)
947 space += m->m_len;
948 if (space > sbspace(sb))
949 return (0);
950 n->m_next = m0; /* concatenate data to control */
951 for (m = control; m; m = m->m_next)
952 sballoc(sb, m);
953 n = sb->sb_mb;
954 if (n) {
955 while (n->m_nextpkt)
956 n = n->m_nextpkt;
957 n->m_nextpkt = control;
958 } else
959 sb->sb_mb = control;
960 postevent(0,sb,EV_RWBYTES);
961 return (1);
962 }
963
964 int
965 sbappendcontrol(
966 struct sockbuf *sb,
967 struct mbuf *m0,
968 struct mbuf *control,
969 int *error_out)
970 {
971 int result = 0;
972
973 if (error_out) *error_out = 0;
974
975 if (sb->sb_flags & SB_RECV) {
976 int error;
977 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
978 if (error) {
979 if (error != EJUSTRETURN) {
980 if (m0) m_freem(m0);
981 if (control) m_freem(control);
982 if (error_out) *error_out = error;
983 }
984 return 0;
985 }
986 }
987
988 result = sbappendcontrol_internal(sb, m0, control);
989 if (result == 0) {
990 if (m0) m_freem(m0);
991 if (control) m_freem(control);
992 if (error_out) *error_out = ENOBUFS;
993 }
994
995 return result;
996 }
997
998 /*
999 * Compress mbuf chain m into the socket
1000 * buffer sb following mbuf n. If n
1001 * is null, the buffer is presumed empty.
1002 */
1003 static int
1004 sbcompress(sb, m, n)
1005 register struct sockbuf *sb;
1006 register struct mbuf *m, *n;
1007 {
1008 register int eor = 0;
1009 register struct mbuf *o;
1010
1011 while (m) {
1012 eor |= m->m_flags & M_EOR;
1013 if (m->m_len == 0 &&
1014 (eor == 0 ||
1015 (((o = m->m_next) || (o = n)) &&
1016 o->m_type == m->m_type))) {
1017 m = m_free(m);
1018 continue;
1019 }
1020 if (n && (n->m_flags & M_EOR) == 0 &&
1021 #ifndef __APPLE__
1022 M_WRITABLE(n) &&
1023 #endif
1024 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1025 m->m_len <= M_TRAILINGSPACE(n) &&
1026 n->m_type == m->m_type) {
1027 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1028 (unsigned)m->m_len);
1029 n->m_len += m->m_len;
1030 sb->sb_cc += m->m_len;
1031 m = m_free(m);
1032 continue;
1033 }
1034 if (n)
1035 n->m_next = m;
1036 else
1037 sb->sb_mb = m;
1038 sballoc(sb, m);
1039 n = m;
1040 m->m_flags &= ~M_EOR;
1041 m = m->m_next;
1042 n->m_next = 0;
1043 }
1044 if (eor) {
1045 if (n)
1046 n->m_flags |= eor;
1047 else
1048 printf("semi-panic: sbcompress\n");
1049 }
1050 postevent(0,sb, EV_RWBYTES);
1051 return 1;
1052 }
1053
1054 /*
1055 * Free all mbufs in a sockbuf.
1056 * Check that all resources are reclaimed.
1057 */
1058 void
1059 sbflush(sb)
1060 register struct sockbuf *sb;
1061 {
1062 if (sb->sb_so == NULL)
1063 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
1064 (void)sblock(sb, M_WAIT);
1065 while (sb->sb_mbcnt) {
1066 /*
1067 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1068 * we would loop forever. Panic instead.
1069 */
1070 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1071 break;
1072 sbdrop(sb, (int)sb->sb_cc);
1073 }
1074 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1075 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
1076
1077 postevent(0, sb, EV_RWBYTES);
1078 sbunlock(sb, 1); /* keep socket locked */
1079
1080 }
1081
1082 /*
1083 * Drop data from (the front of) a sockbuf.
1084 * use m_freem_list to free the mbuf structures
1085 * under a single lock... this is done by pruning
1086 * the top of the tree from the body by keeping track
1087 * of where we get to in the tree and then zeroing the
1088 * two pertinent pointers m_nextpkt and m_next
1089 * the socket buffer is then updated to point at the new
1090 * top of the tree and the pruned area is released via
1091 * m_freem_list.
1092 */
1093 void
1094 sbdrop(sb, len)
1095 register struct sockbuf *sb;
1096 register int len;
1097 {
1098 register struct mbuf *m, *free_list, *ml;
1099 struct mbuf *next, *last;
1100
1101 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1102
1103 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1104 free_list = last = m;
1105 ml = (struct mbuf *)0;
1106
1107 while (len > 0) {
1108 if (m == 0) {
1109 if (next == 0) {
1110 /* temporarily replacing this panic with printf because
1111 * it occurs occasionally when closing a socket when there
1112 * is no harm in ignoring it. This problem will be investigated
1113 * further.
1114 */
1115 /* panic("sbdrop"); */
1116 printf("sbdrop - count not zero\n");
1117 len = 0;
1118 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1119 sb->sb_cc = 0;
1120 sb->sb_mbcnt = 0;
1121 break;
1122 }
1123 m = last = next;
1124 next = m->m_nextpkt;
1125 continue;
1126 }
1127 if (m->m_len > len) {
1128 m->m_len -= len;
1129 m->m_data += len;
1130 sb->sb_cc -= len;
1131 break;
1132 }
1133 len -= m->m_len;
1134 sbfree(sb, m);
1135
1136 ml = m;
1137 m = m->m_next;
1138 }
1139 while (m && m->m_len == 0) {
1140 sbfree(sb, m);
1141
1142 ml = m;
1143 m = m->m_next;
1144 }
1145 if (ml) {
1146 ml->m_next = (struct mbuf *)0;
1147 last->m_nextpkt = (struct mbuf *)0;
1148 m_freem_list(free_list);
1149 }
1150 if (m) {
1151 sb->sb_mb = m;
1152 m->m_nextpkt = next;
1153 } else
1154 sb->sb_mb = next;
1155
1156 postevent(0, sb, EV_RWBYTES);
1157
1158 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1159 }
1160
1161 /*
1162 * Drop a record off the front of a sockbuf
1163 * and move the next record to the front.
1164 */
1165 void
1166 sbdroprecord(sb)
1167 register struct sockbuf *sb;
1168 {
1169 register struct mbuf *m, *mn;
1170
1171 m = sb->sb_mb;
1172 if (m) {
1173 sb->sb_mb = m->m_nextpkt;
1174 do {
1175 sbfree(sb, m);
1176 MFREE(m, mn);
1177 m = mn;
1178 } while (m);
1179 }
1180 postevent(0, sb, EV_RWBYTES);
1181 }
1182
1183 /*
1184 * Create a "control" mbuf containing the specified data
1185 * with the specified type for presentation on a socket buffer.
1186 */
1187 struct mbuf *
1188 sbcreatecontrol(p, size, type, level)
1189 caddr_t p;
1190 register int size;
1191 int type, level;
1192 {
1193 register struct cmsghdr *cp;
1194 struct mbuf *m;
1195
1196 if (CMSG_SPACE((u_int)size) > MLEN)
1197 return ((struct mbuf *) NULL);
1198 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1199 return ((struct mbuf *) NULL);
1200 cp = mtod(m, struct cmsghdr *);
1201 /* XXX check size? */
1202 (void)memcpy(CMSG_DATA(cp), p, size);
1203 m->m_len = CMSG_SPACE(size);
1204 cp->cmsg_len = CMSG_LEN(size);
1205 cp->cmsg_level = level;
1206 cp->cmsg_type = type;
1207 return (m);
1208 }
1209
1210 /*
1211 * Some routines that return EOPNOTSUPP for entry points that are not
1212 * supported by a protocol. Fill in as needed.
1213 */
1214 int
1215 pru_abort_notsupp(struct socket *so)
1216 {
1217 return EOPNOTSUPP;
1218 }
1219
1220
1221 int
1222 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1223 {
1224 return EOPNOTSUPP;
1225 }
1226
1227 int
1228 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1229 {
1230 return EOPNOTSUPP;
1231 }
1232
1233 int
1234 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1235 {
1236 return EOPNOTSUPP;
1237 }
1238
1239 int
1240 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1241 {
1242 return EOPNOTSUPP;
1243 }
1244
1245 int
1246 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1247 {
1248 return EOPNOTSUPP;
1249 }
1250
1251 int
1252 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1253 struct ifnet *ifp, struct proc *p)
1254 {
1255 return EOPNOTSUPP;
1256 }
1257
1258 int
1259 pru_detach_notsupp(struct socket *so)
1260 {
1261 return EOPNOTSUPP;
1262 }
1263
1264 int
1265 pru_disconnect_notsupp(struct socket *so)
1266 {
1267 return EOPNOTSUPP;
1268 }
1269
1270 int
1271 pru_listen_notsupp(struct socket *so, struct proc *p)
1272 {
1273 return EOPNOTSUPP;
1274 }
1275
1276 int
1277 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1278 {
1279 return EOPNOTSUPP;
1280 }
1281
1282 int
1283 pru_rcvd_notsupp(struct socket *so, int flags)
1284 {
1285 return EOPNOTSUPP;
1286 }
1287
1288 int
1289 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1290 {
1291 return EOPNOTSUPP;
1292 }
1293
1294 int
1295 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1296 struct sockaddr *addr, struct mbuf *control,
1297 struct proc *p)
1298
1299 {
1300 return EOPNOTSUPP;
1301 }
1302
1303
1304 /*
1305 * This isn't really a ``null'' operation, but it's the default one
1306 * and doesn't do anything destructive.
1307 */
1308 int
1309 pru_sense_null(struct socket *so, struct stat *sb)
1310 {
1311 sb->st_blksize = so->so_snd.sb_hiwat;
1312 return 0;
1313 }
1314
1315
1316 int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1317 struct uio *uio, struct mbuf *top,
1318 struct mbuf *control, int flags)
1319
1320 {
1321 return EOPNOTSUPP;
1322 }
1323
1324 int pru_soreceive_notsupp(struct socket *so,
1325 struct sockaddr **paddr,
1326 struct uio *uio, struct mbuf **mp0,
1327 struct mbuf **controlp, int *flagsp)
1328 {
1329 return EOPNOTSUPP;
1330 }
1331
1332 int
1333
1334 pru_shutdown_notsupp(struct socket *so)
1335 {
1336 return EOPNOTSUPP;
1337 }
1338
1339 int
1340 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1341 {
1342 return EOPNOTSUPP;
1343 }
1344
1345 int pru_sosend(struct socket *so, struct sockaddr *addr,
1346 struct uio *uio, struct mbuf *top,
1347 struct mbuf *control, int flags)
1348 {
1349 return EOPNOTSUPP;
1350 }
1351
1352 int pru_soreceive(struct socket *so,
1353 struct sockaddr **paddr,
1354 struct uio *uio, struct mbuf **mp0,
1355 struct mbuf **controlp, int *flagsp)
1356 {
1357 return EOPNOTSUPP;
1358 }
1359
1360
1361 int
1362 pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1363 __unused kauth_cred_t cred, __unused void *wql)
1364 {
1365 return EOPNOTSUPP;
1366 }
1367
1368
1369 #ifdef __APPLE__
1370 /*
1371 * The following are macros on BSD and functions on Darwin
1372 */
1373
1374 /*
1375 * Do we need to notify the other side when I/O is possible?
1376 */
1377
1378 int
1379 sb_notify(struct sockbuf *sb)
1380 {
1381 return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
1382 }
1383
1384 /*
1385 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1386 * This is problematical if the fields are unsigned, as the space might
1387 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1388 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1389 */
1390 long
1391 sbspace(struct sockbuf *sb)
1392 {
1393 return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1394 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1395 }
1396
1397 /* do we have to send all at once on a socket? */
1398 int
1399 sosendallatonce(struct socket *so)
1400 {
1401 return (so->so_proto->pr_flags & PR_ATOMIC);
1402 }
1403
1404 /* can we read something from so? */
1405 int
1406 soreadable(struct socket *so)
1407 {
1408 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1409 (so->so_state & SS_CANTRCVMORE) ||
1410 so->so_comp.tqh_first || so->so_error);
1411 }
1412
1413 /* can we write something to so? */
1414
1415 int
1416 sowriteable(struct socket *so)
1417 {
1418 return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1419 ((so->so_state&SS_ISCONNECTED) ||
1420 (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1421 (so->so_state & SS_CANTSENDMORE) ||
1422 so->so_error);
1423 }
1424
1425 /* adjust counters in sb reflecting allocation of m */
1426
1427 void
1428 sballoc(struct sockbuf *sb, struct mbuf *m)
1429 {
1430 sb->sb_cc += m->m_len;
1431 sb->sb_mbcnt += MSIZE;
1432 if (m->m_flags & M_EXT)
1433 sb->sb_mbcnt += m->m_ext.ext_size;
1434 }
1435
1436 /* adjust counters in sb reflecting freeing of m */
1437 void
1438 sbfree(struct sockbuf *sb, struct mbuf *m)
1439 {
1440 sb->sb_cc -= m->m_len;
1441 sb->sb_mbcnt -= MSIZE;
1442 if (m->m_flags & M_EXT)
1443 sb->sb_mbcnt -= m->m_ext.ext_size;
1444 }
1445
1446 /*
1447 * Set lock on sockbuf sb; sleep if lock is already held.
1448 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1449 * Returns error without lock if sleep is interrupted.
1450 */
1451 int
1452 sblock(struct sockbuf *sb, int wf)
1453 {
1454 return(sb->sb_flags & SB_LOCK ?
1455 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1456 (sb->sb_flags |= SB_LOCK), 0);
1457 }
1458
1459 /* release lock on sockbuf sb */
1460 void
1461 sbunlock(struct sockbuf *sb, int keeplocked)
1462 {
1463 struct socket *so = sb->sb_so;
1464 int lr_saved;
1465 lck_mtx_t *mutex_held;
1466
1467
1468 lr_saved = (unsigned int) __builtin_return_address(0);
1469
1470 sb->sb_flags &= ~SB_LOCK;
1471
1472 if (so->so_proto->pr_getlock != NULL)
1473 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1474 else
1475 mutex_held = so->so_proto->pr_domain->dom_mtx;
1476
1477 if (keeplocked == 0)
1478 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1479
1480 if (sb->sb_flags & SB_WANT) {
1481 sb->sb_flags &= ~SB_WANT;
1482 if (so->so_usecount < 0)
1483 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1484
1485 wakeup((caddr_t)&(sb)->sb_flags);
1486 }
1487 if (keeplocked == 0) { /* unlock on exit */
1488 so->so_usecount--;
1489 if (so->so_usecount < 0)
1490 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
1491 so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved;
1492 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
1493 lck_mtx_unlock(mutex_held);
1494 }
1495 }
1496
1497 void
1498 sorwakeup(struct socket * so)
1499 {
1500 if (sb_notify(&so->so_rcv))
1501 sowakeup(so, &so->so_rcv);
1502 }
1503
1504 void
1505 sowwakeup(struct socket * so)
1506 {
1507 if (sb_notify(&so->so_snd))
1508 sowakeup(so, &so->so_snd);
1509 }
1510 #endif __APPLE__
1511
1512 /*
1513 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1514 */
1515 struct sockaddr *
1516 dup_sockaddr(sa, canwait)
1517 struct sockaddr *sa;
1518 int canwait;
1519 {
1520 struct sockaddr *sa2;
1521
1522 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1523 canwait ? M_WAITOK : M_NOWAIT);
1524 if (sa2)
1525 bcopy(sa, sa2, sa->sa_len);
1526 return sa2;
1527 }
1528
1529 /*
1530 * Create an external-format (``xsocket'') structure using the information
1531 * in the kernel-format socket structure pointed to by so. This is done
1532 * to reduce the spew of irrelevant information over this interface,
1533 * to isolate user code from changes in the kernel structure, and
1534 * potentially to provide information-hiding if we decide that
1535 * some of this information should be hidden from users.
1536 */
1537 void
1538 sotoxsocket(struct socket *so, struct xsocket *xso)
1539 {
1540 xso->xso_len = sizeof *xso;
1541 xso->xso_so = so;
1542 xso->so_type = so->so_type;
1543 xso->so_options = so->so_options;
1544 xso->so_linger = so->so_linger;
1545 xso->so_state = so->so_state;
1546 xso->so_pcb = so->so_pcb;
1547 if (so->so_proto) {
1548 xso->xso_protocol = so->so_proto->pr_protocol;
1549 xso->xso_family = so->so_proto->pr_domain->dom_family;
1550 }
1551 else
1552 xso->xso_protocol = xso->xso_family = 0;
1553 xso->so_qlen = so->so_qlen;
1554 xso->so_incqlen = so->so_incqlen;
1555 xso->so_qlimit = so->so_qlimit;
1556 xso->so_timeo = so->so_timeo;
1557 xso->so_error = so->so_error;
1558 xso->so_pgid = so->so_pgid;
1559 xso->so_oobmark = so->so_oobmark;
1560 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1561 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1562 xso->so_uid = so->so_uid;
1563 }
1564
1565 /*
1566 * This does the same for sockbufs. Note that the xsockbuf structure,
1567 * since it is always embedded in a socket, does not include a self
1568 * pointer nor a length. We make this entry point public in case
1569 * some other mechanism needs it.
1570 */
1571 void
1572 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1573 {
1574 xsb->sb_cc = sb->sb_cc;
1575 xsb->sb_hiwat = sb->sb_hiwat;
1576 xsb->sb_mbcnt = sb->sb_mbcnt;
1577 xsb->sb_mbmax = sb->sb_mbmax;
1578 xsb->sb_lowat = sb->sb_lowat;
1579 xsb->sb_flags = sb->sb_flags;
1580 xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1581 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1582 xsb->sb_timeo = 1;
1583 }
1584
1585 /*
1586 * Here is the definition of some of the basic objects in the kern.ipc
1587 * branch of the MIB.
1588 */
1589 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1590
1591 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1592 static int dummy;
1593 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1594
1595 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1596 &sb_max, 0, "Maximum socket buffer size");
1597 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1598 &maxsockets, 0, "Maximum number of sockets avaliable");
1599 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1600 &sb_efficiency, 0, "");
1601 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1602