]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket2.c
e44e3329ca059db178acaa48c2c461f0c5e8601a
[apple/xnu.git] / bsd / kern / uipc_socket2.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
24 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
25 /*
26 * Copyright (c) 1982, 1986, 1988, 1990, 1993
27 * The Regents of the University of California. All rights reserved.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions
31 * are met:
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * 2. Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in the
36 * documentation and/or other materials provided with the distribution.
37 * 3. All advertising materials mentioning features or use of this software
38 * must display the following acknowledgement:
39 * This product includes software developed by the University of
40 * California, Berkeley and its contributors.
41 * 4. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
58 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
59 */
60
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/domain.h>
64 #include <sys/kernel.h>
65 #include <sys/proc_internal.h>
66 #include <sys/kauth.h>
67 #include <sys/malloc.h>
68 #include <sys/mbuf.h>
69 #include <sys/protosw.h>
70 #include <sys/stat.h>
71 #include <sys/socket.h>
72 #include <sys/socketvar.h>
73 #include <sys/signalvar.h>
74 #include <sys/sysctl.h>
75 #include <sys/ev.h>
76 #include <kern/locks.h>
77 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/in_pcb.h>
80 #include <sys/kdebug.h>
81
82 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
83 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
84
85
86 /*
87 * Primitive routines for operating on sockets and socket buffers
88 */
89
90 u_long sb_max = SB_MAX; /* XXX should be static */
91
92 static u_long sb_efficiency = 8; /* parameter for sbreserve() */
93
94 /*
95 * Procedures to manipulate state flags of socket
96 * and do appropriate wakeups. Normal sequence from the
97 * active (originating) side is that soisconnecting() is
98 * called during processing of connect() call,
99 * resulting in an eventual call to soisconnected() if/when the
100 * connection is established. When the connection is torn down
101 * soisdisconnecting() is called during processing of disconnect() call,
102 * and soisdisconnected() is called when the connection to the peer
103 * is totally severed. The semantics of these routines are such that
104 * connectionless protocols can call soisconnected() and soisdisconnected()
105 * only, bypassing the in-progress calls when setting up a ``connection''
106 * takes no time.
107 *
108 * From the passive side, a socket is created with
109 * two queues of sockets: so_incomp for connections in progress
110 * and so_comp for connections already made and awaiting user acceptance.
111 * As a protocol is preparing incoming connections, it creates a socket
112 * structure queued on so_incomp by calling sonewconn(). When the connection
113 * is established, soisconnected() is called, and transfers the
114 * socket structure to so_comp, making it available to accept().
115 *
116 * If a socket is closed with sockets on either
117 * so_incomp or so_comp, these sockets are dropped.
118 *
119 * If higher level protocols are implemented in
120 * the kernel, the wakeups done here will sometimes
121 * cause software-interrupt process scheduling.
122 */
123 void
124 soisconnecting(so)
125 register struct socket *so;
126 {
127
128 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
129 so->so_state |= SS_ISCONNECTING;
130
131 sflt_notify(so, sock_evt_connecting, NULL);
132 }
133
134 void
135 soisconnected(so)
136 struct socket *so;
137 {
138 struct socket *head = so->so_head;
139
140 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
141 so->so_state |= SS_ISCONNECTED;
142
143 sflt_notify(so, sock_evt_connected, NULL);
144
145 if (head && (so->so_state & SS_INCOMP)) {
146 so->so_state &= ~SS_INCOMP;
147 so->so_state |= SS_COMP;
148 if (head->so_proto->pr_getlock != NULL) {
149 socket_unlock(so, 0);
150 socket_lock(head, 1);
151 }
152 postevent(head, 0, EV_RCONN);
153 TAILQ_REMOVE(&head->so_incomp, so, so_list);
154 head->so_incqlen--;
155 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
156 sorwakeup(head);
157 wakeup_one((caddr_t)&head->so_timeo);
158 if (head->so_proto->pr_getlock != NULL) {
159 socket_unlock(head, 1);
160 socket_lock(so, 0);
161 }
162 } else {
163 postevent(so, 0, EV_WCONN);
164 wakeup((caddr_t)&so->so_timeo);
165 sorwakeup(so);
166 sowwakeup(so);
167 }
168 }
169
170 void
171 soisdisconnecting(so)
172 register struct socket *so;
173 {
174 so->so_state &= ~SS_ISCONNECTING;
175 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
176 sflt_notify(so, sock_evt_disconnecting, NULL);
177 wakeup((caddr_t)&so->so_timeo);
178 sowwakeup(so);
179 sorwakeup(so);
180 }
181
182 void
183 soisdisconnected(so)
184 register struct socket *so;
185 {
186 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
187 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
188 sflt_notify(so, sock_evt_disconnected, NULL);
189 wakeup((caddr_t)&so->so_timeo);
190 sowwakeup(so);
191 sorwakeup(so);
192 }
193
194 /*
195 * Return a random connection that hasn't been serviced yet and
196 * is eligible for discard. There is a one in qlen chance that
197 * we will return a null, saying that there are no dropable
198 * requests. In this case, the protocol specific code should drop
199 * the new request. This insures fairness.
200 *
201 * This may be used in conjunction with protocol specific queue
202 * congestion routines.
203 */
204 struct socket *
205 sodropablereq(head)
206 register struct socket *head;
207 {
208 struct socket *so, *sonext = NULL;
209 unsigned int i, j, qlen;
210 static int rnd;
211 static struct timeval old_runtime;
212 static unsigned int cur_cnt, old_cnt;
213 struct timeval tv;
214
215 microtime(&tv);
216 if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
217 old_runtime = tv;
218 old_cnt = cur_cnt / i;
219 cur_cnt = 0;
220 }
221
222 so = TAILQ_FIRST(&head->so_incomp);
223 if (!so)
224 return (NULL);
225
226 qlen = head->so_incqlen;
227 if (++cur_cnt > qlen || old_cnt > qlen) {
228 rnd = (314159 * rnd + 66329) & 0xffff;
229 j = ((qlen + 1) * rnd) >> 16;
230 //###LD To clean up
231 while (j-- && so) {
232 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
233 socket_lock(so, 1);
234 sonext = TAILQ_NEXT(so, so_list);
235 // in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
236 socket_unlock(so, 1);
237 so = sonext;
238 }
239 }
240
241 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
242 // return (NULL);
243 // else
244 return (so);
245 }
246
247 /*
248 * When an attempt at a new connection is noted on a socket
249 * which accepts connections, sonewconn is called. If the
250 * connection is possible (subject to space constraints, etc.)
251 * then we allocate a new structure, propoerly linked into the
252 * data structure of the original socket, and return this.
253 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
254 */
255 static struct socket *
256 sonewconn_internal(head, connstatus)
257 register struct socket *head;
258 int connstatus;
259 {
260 int error = 0;
261 register struct socket *so;
262 lck_mtx_t *mutex_held;
263
264 if (head->so_proto->pr_getlock != NULL)
265 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
266 else
267 mutex_held = head->so_proto->pr_domain->dom_mtx;
268 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
269
270 if (head->so_qlen > 3 * head->so_qlimit / 2)
271 return ((struct socket *)0);
272 so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
273 if (so == NULL)
274 return ((struct socket *)0);
275 /* check if head was closed during the soalloc */
276 if (head->so_proto == NULL) {
277 sodealloc(so);
278 return ((struct socket *)0);
279 }
280
281 so->so_head = head;
282 so->so_type = head->so_type;
283 so->so_options = head->so_options &~ SO_ACCEPTCONN;
284 so->so_linger = head->so_linger;
285 so->so_state = head->so_state | SS_NOFDREF;
286 so->so_proto = head->so_proto;
287 so->so_timeo = head->so_timeo;
288 so->so_pgid = head->so_pgid;
289 so->so_uid = head->so_uid;
290 so->so_usecount = 1;
291
292 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
293 sflt_termsock(so);
294 sodealloc(so);
295 return ((struct socket *)0);
296 }
297
298 /*
299 * Must be done with head unlocked to avoid deadlock with pcb list
300 */
301 socket_unlock(head, 0);
302 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
303 sflt_termsock(so);
304 sodealloc(so);
305 socket_lock(head, 0);
306 return ((struct socket *)0);
307 }
308 socket_lock(head, 0);
309 #ifdef __APPLE__
310 so->so_proto->pr_domain->dom_refs++;
311 #endif
312
313 if (connstatus) {
314 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
315 so->so_state |= SS_COMP;
316 } else {
317 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
318 so->so_state |= SS_INCOMP;
319 head->so_incqlen++;
320 }
321 head->so_qlen++;
322 #ifdef __APPLE__
323 so->so_rcv.sb_so = so->so_snd.sb_so = so;
324 TAILQ_INIT(&so->so_evlist);
325
326 /* Attach socket filters for this protocol */
327 sflt_initsock(so);
328 #endif
329 if (connstatus) {
330 so->so_state |= connstatus;
331 sorwakeup(head);
332 wakeup((caddr_t)&head->so_timeo);
333 }
334 return (so);
335 }
336
337
338 struct socket *
339 sonewconn(
340 struct socket *head,
341 int connstatus,
342 const struct sockaddr *from)
343 {
344 int error = 0;
345 struct socket_filter_entry *filter;
346 int filtered = 0;
347
348 error = 0;
349 for (filter = head->so_filt; filter && (error == 0);
350 filter = filter->sfe_next_onsocket) {
351 if (filter->sfe_filter->sf_filter.sf_connect_in) {
352 if (filtered == 0) {
353 filtered = 1;
354 sflt_use(head);
355 socket_unlock(head, 0);
356 }
357 error = filter->sfe_filter->sf_filter.sf_connect_in(
358 filter->sfe_cookie, head, from);
359 }
360 }
361 if (filtered != 0) {
362 socket_lock(head, 0);
363 sflt_unuse(head);
364 }
365
366 if (error) {
367 return NULL;
368 }
369
370 return sonewconn_internal(head, connstatus);
371 }
372
373 /*
374 * Socantsendmore indicates that no more data will be sent on the
375 * socket; it would normally be applied to a socket when the user
376 * informs the system that no more data is to be sent, by the protocol
377 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
378 * will be received, and will normally be applied to the socket by a
379 * protocol when it detects that the peer will send no more data.
380 * Data queued for reading in the socket may yet be read.
381 */
382
383 void
384 socantsendmore(so)
385 struct socket *so;
386 {
387 so->so_state |= SS_CANTSENDMORE;
388 sflt_notify(so, sock_evt_cantsendmore, NULL);
389 sowwakeup(so);
390 }
391
392 void
393 socantrcvmore(so)
394 struct socket *so;
395 {
396 so->so_state |= SS_CANTRCVMORE;
397 sflt_notify(so, sock_evt_cantrecvmore, NULL);
398 sorwakeup(so);
399 }
400
401 /*
402 * Wait for data to arrive at/drain from a socket buffer.
403 */
404 int
405 sbwait(sb)
406 struct sockbuf *sb;
407 {
408 int error = 0, lr, lr_saved;
409 struct socket *so = sb->sb_so;
410 lck_mtx_t *mutex_held;
411 struct timespec ts;
412
413 #ifdef __ppc__
414 __asm__ volatile("mflr %0" : "=r" (lr));
415 lr_saved = lr;
416 #endif
417
418
419 if (so->so_proto->pr_getlock != NULL)
420 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
421 else
422 mutex_held = so->so_proto->pr_domain->dom_mtx;
423
424 sb->sb_flags |= SB_WAIT;
425
426 if (so->so_usecount < 1)
427 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
428 ts.tv_sec = sb->sb_timeo.tv_sec;
429 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
430 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
431 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
432 &ts);
433
434 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
435
436 if (so->so_usecount < 1)
437 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
438
439 if ((so->so_state & SS_DRAINING)) {
440 error = EBADF;
441 }
442
443 return (error);
444 }
445
446 /*
447 * Lock a sockbuf already known to be locked;
448 * return any error returned from sleep (EINTR).
449 */
450 int
451 sb_lock(sb)
452 register struct sockbuf *sb;
453 {
454 struct socket *so = sb->sb_so;
455 lck_mtx_t * mutex_held;
456 int error = 0, lr, lr_saved;
457
458 #ifdef __ppc__
459 __asm__ volatile("mflr %0" : "=r" (lr));
460 lr_saved = lr;
461 #endif
462
463 if (so == NULL)
464 panic("sb_lock: null so back pointer sb=%x\n", sb);
465
466 while (sb->sb_flags & SB_LOCK) {
467 sb->sb_flags |= SB_WANT;
468 if (so->so_proto->pr_getlock != NULL)
469 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
470 else
471 mutex_held = so->so_proto->pr_domain->dom_mtx;
472 if (so->so_usecount < 1)
473 panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
474 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
475 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
476 if (so->so_usecount < 1)
477 panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
478 if (error)
479 return (error);
480 }
481 sb->sb_flags |= SB_LOCK;
482 return (0);
483 }
484
485 /*
486 * Wakeup processes waiting on a socket buffer.
487 * Do asynchronous notification via SIGIO
488 * if the socket has the SS_ASYNC flag set.
489 */
490 void
491 sowakeup(so, sb)
492 register struct socket *so;
493 register struct sockbuf *sb;
494 {
495 struct proc *p = current_proc();
496 sb->sb_flags &= ~SB_SEL;
497 selwakeup(&sb->sb_sel);
498 if (sb->sb_flags & SB_WAIT) {
499 sb->sb_flags &= ~SB_WAIT;
500 wakeup((caddr_t)&sb->sb_cc);
501 }
502 if (so->so_state & SS_ASYNC) {
503 if (so->so_pgid < 0)
504 gsignal(-so->so_pgid, SIGIO);
505 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
506 psignal(p, SIGIO);
507 }
508 if (sb->sb_flags & SB_KNOTE) {
509 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
510 }
511 if (sb->sb_flags & SB_UPCALL) {
512 socket_unlock(so, 0);
513 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
514 socket_lock(so, 0);
515 }
516 }
517
518 /*
519 * Socket buffer (struct sockbuf) utility routines.
520 *
521 * Each socket contains two socket buffers: one for sending data and
522 * one for receiving data. Each buffer contains a queue of mbufs,
523 * information about the number of mbufs and amount of data in the
524 * queue, and other fields allowing select() statements and notification
525 * on data availability to be implemented.
526 *
527 * Data stored in a socket buffer is maintained as a list of records.
528 * Each record is a list of mbufs chained together with the m_next
529 * field. Records are chained together with the m_nextpkt field. The upper
530 * level routine soreceive() expects the following conventions to be
531 * observed when placing information in the receive buffer:
532 *
533 * 1. If the protocol requires each message be preceded by the sender's
534 * name, then a record containing that name must be present before
535 * any associated data (mbuf's must be of type MT_SONAME).
536 * 2. If the protocol supports the exchange of ``access rights'' (really
537 * just additional data associated with the message), and there are
538 * ``rights'' to be received, then a record containing this data
539 * should be present (mbuf's must be of type MT_RIGHTS).
540 * 3. If a name or rights record exists, then it must be followed by
541 * a data record, perhaps of zero length.
542 *
543 * Before using a new socket structure it is first necessary to reserve
544 * buffer space to the socket, by calling sbreserve(). This should commit
545 * some of the available buffer space in the system buffer pool for the
546 * socket (currently, it does nothing but enforce limits). The space
547 * should be released by calling sbrelease() when the socket is destroyed.
548 */
549
550 int
551 soreserve(so, sndcc, rcvcc)
552 register struct socket *so;
553 u_long sndcc, rcvcc;
554 {
555
556 if (sbreserve(&so->so_snd, sndcc) == 0)
557 goto bad;
558 if (sbreserve(&so->so_rcv, rcvcc) == 0)
559 goto bad2;
560 if (so->so_rcv.sb_lowat == 0)
561 so->so_rcv.sb_lowat = 1;
562 if (so->so_snd.sb_lowat == 0)
563 so->so_snd.sb_lowat = MCLBYTES;
564 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
565 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
566 return (0);
567 bad2:
568 #ifdef __APPLE__
569 selthreadclear(&so->so_snd.sb_sel);
570 #endif
571 sbrelease(&so->so_snd);
572 bad:
573 return (ENOBUFS);
574 }
575
576 /*
577 * Allot mbufs to a sockbuf.
578 * Attempt to scale mbmax so that mbcnt doesn't become limiting
579 * if buffering efficiency is near the normal case.
580 */
581 int
582 sbreserve(sb, cc)
583 struct sockbuf *sb;
584 u_long cc;
585 {
586 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
587 return (0);
588 sb->sb_hiwat = cc;
589 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
590 if (sb->sb_lowat > sb->sb_hiwat)
591 sb->sb_lowat = sb->sb_hiwat;
592 return (1);
593 }
594
595 /*
596 * Free mbufs held by a socket, and reserved mbuf space.
597 */
598 /* WARNING needs to do selthreadclear() before calling this */
599 void
600 sbrelease(sb)
601 struct sockbuf *sb;
602 {
603
604 sbflush(sb);
605 sb->sb_hiwat = 0;
606 sb->sb_mbmax = 0;
607
608 }
609
610 /*
611 * Routines to add and remove
612 * data from an mbuf queue.
613 *
614 * The routines sbappend() or sbappendrecord() are normally called to
615 * append new mbufs to a socket buffer, after checking that adequate
616 * space is available, comparing the function sbspace() with the amount
617 * of data to be added. sbappendrecord() differs from sbappend() in
618 * that data supplied is treated as the beginning of a new record.
619 * To place a sender's address, optional access rights, and data in a
620 * socket receive buffer, sbappendaddr() should be used. To place
621 * access rights and data in a socket receive buffer, sbappendrights()
622 * should be used. In either case, the new data begins a new record.
623 * Note that unlike sbappend() and sbappendrecord(), these routines check
624 * for the caller that there will be enough space to store the data.
625 * Each fails if there is not enough space, or if it cannot find mbufs
626 * to store additional information in.
627 *
628 * Reliable protocols may use the socket send buffer to hold data
629 * awaiting acknowledgement. Data is normally copied from a socket
630 * send buffer in a protocol with m_copy for output to a peer,
631 * and then removing the data from the socket buffer with sbdrop()
632 * or sbdroprecord() when the data is acknowledged by the peer.
633 */
634
635 /*
636 * Append mbuf chain m to the last record in the
637 * socket buffer sb. The additional space associated
638 * the mbuf chain is recorded in sb. Empty mbufs are
639 * discarded and mbufs are compacted where possible.
640 */
641 int
642 sbappend(sb, m)
643 struct sockbuf *sb;
644 struct mbuf *m;
645 {
646 register struct mbuf *n, *sb_first;
647 int result = 0;
648 int error = 0;
649 int filtered = 0;
650
651
652 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
653
654 if (m == 0)
655 return 0;
656
657 again:
658 sb_first = n = sb->sb_mb;
659 if (n) {
660 while (n->m_nextpkt)
661 n = n->m_nextpkt;
662 do {
663 if (n->m_flags & M_EOR) {
664 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
665 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
666 return result;
667 }
668 } while (n->m_next && (n = n->m_next));
669 }
670
671 if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
672 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
673 if (error) {
674 /* no data was appended, caller should not call sowakeup */
675 return 0;
676 }
677
678 /*
679 If we any filters, the socket lock was dropped. n and sb_first
680 cached data from the socket buffer. This cache is not valid
681 since we dropped the lock. We must start over. Since filtered
682 is set we won't run through the filters a second time. We just
683 set n and sb_start again.
684 */
685 if (filtered)
686 goto again;
687 }
688
689 result = sbcompress(sb, m, n);
690
691 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
692
693 return result;
694 }
695
696 #ifdef SOCKBUF_DEBUG
697 void
698 sbcheck(sb)
699 register struct sockbuf *sb;
700 {
701 register struct mbuf *m;
702 register struct mbuf *n = 0;
703 register u_long len = 0, mbcnt = 0;
704 lck_mtx_t *mutex_held;
705
706 if (sb->sb_so->so_proto->pr_getlock != NULL)
707 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
708 else
709 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
710
711 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
712
713 if (sbchecking == 0)
714 return;
715
716 for (m = sb->sb_mb; m; m = n) {
717 n = m->m_nextpkt;
718 for (; m; m = m->m_next) {
719 len += m->m_len;
720 mbcnt += MSIZE;
721 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
722 mbcnt += m->m_ext.ext_size;
723 }
724 }
725 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
726 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
727 mbcnt, sb->sb_mbcnt);
728 }
729 }
730 #endif
731
732 /*
733 * As above, except the mbuf chain
734 * begins a new record.
735 */
736 int
737 sbappendrecord(sb, m0)
738 register struct sockbuf *sb;
739 register struct mbuf *m0;
740 {
741 register struct mbuf *m;
742 int result = 0;
743
744 if (m0 == 0)
745 return 0;
746
747 if ((sb->sb_flags & SB_RECV) != 0) {
748 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
749 if (error != 0) {
750 if (error != EJUSTRETURN)
751 m_freem(m0);
752 return 0;
753 }
754 }
755
756 m = sb->sb_mb;
757 if (m)
758 while (m->m_nextpkt)
759 m = m->m_nextpkt;
760 /*
761 * Put the first mbuf on the queue.
762 * Note this permits zero length records.
763 */
764 sballoc(sb, m0);
765 if (m)
766 m->m_nextpkt = m0;
767 else
768 sb->sb_mb = m0;
769 m = m0->m_next;
770 m0->m_next = 0;
771 if (m && (m0->m_flags & M_EOR)) {
772 m0->m_flags &= ~M_EOR;
773 m->m_flags |= M_EOR;
774 }
775 return sbcompress(sb, m, m0);
776 }
777
778 /*
779 * As above except that OOB data
780 * is inserted at the beginning of the sockbuf,
781 * but after any other OOB data.
782 */
783 int
784 sbinsertoob(sb, m0)
785 struct sockbuf *sb;
786 struct mbuf *m0;
787 {
788 struct mbuf *m;
789 struct mbuf **mp;
790
791 if (m0 == 0)
792 return 0;
793
794 if ((sb->sb_flags & SB_RECV) != 0) {
795 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
796 sock_data_filt_flag_oob, NULL);
797
798 if (error) {
799 if (error != EJUSTRETURN) {
800 m_freem(m0);
801 }
802 return 0;
803 }
804 }
805
806 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
807 m = *mp;
808 again:
809 switch (m->m_type) {
810
811 case MT_OOBDATA:
812 continue; /* WANT next train */
813
814 case MT_CONTROL:
815 m = m->m_next;
816 if (m)
817 goto again; /* inspect THIS train further */
818 }
819 break;
820 }
821 /*
822 * Put the first mbuf on the queue.
823 * Note this permits zero length records.
824 */
825 sballoc(sb, m0);
826 m0->m_nextpkt = *mp;
827 *mp = m0;
828 m = m0->m_next;
829 m0->m_next = 0;
830 if (m && (m0->m_flags & M_EOR)) {
831 m0->m_flags &= ~M_EOR;
832 m->m_flags |= M_EOR;
833 }
834 return sbcompress(sb, m, m0);
835 }
836
837 /*
838 * Append address and data, and optionally, control (ancillary) data
839 * to the receive queue of a socket. If present,
840 * m0 must include a packet header with total length.
841 * Returns 0 if no space in sockbuf or insufficient mbufs.
842 */
843 static int
844 sbappendaddr_internal(sb, asa, m0, control)
845 register struct sockbuf *sb;
846 struct sockaddr *asa;
847 struct mbuf *m0, *control;
848 {
849 register struct mbuf *m, *n;
850 int space = asa->sa_len;
851
852 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
853 panic("sbappendaddr");
854
855 if (m0)
856 space += m0->m_pkthdr.len;
857 for (n = control; n; n = n->m_next) {
858 space += n->m_len;
859 if (n->m_next == 0) /* keep pointer to last control buf */
860 break;
861 }
862 if (space > sbspace(sb))
863 return (0);
864 if (asa->sa_len > MLEN)
865 return (0);
866 MGET(m, M_DONTWAIT, MT_SONAME);
867 if (m == 0)
868 return (0);
869 m->m_len = asa->sa_len;
870 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
871 if (n)
872 n->m_next = m0; /* concatenate data to control */
873 else
874 control = m0;
875 m->m_next = control;
876 for (n = m; n; n = n->m_next)
877 sballoc(sb, n);
878 n = sb->sb_mb;
879 if (n) {
880 while (n->m_nextpkt)
881 n = n->m_nextpkt;
882 n->m_nextpkt = m;
883 } else
884 sb->sb_mb = m;
885 postevent(0,sb,EV_RWBYTES);
886 return (1);
887 }
888
889 int
890 sbappendaddr(
891 struct sockbuf* sb,
892 struct sockaddr* asa,
893 struct mbuf *m0,
894 struct mbuf *control,
895 int *error_out)
896 {
897 int result = 0;
898
899 if (error_out) *error_out = 0;
900
901 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
902 panic("sbappendaddrorfree");
903
904 /* Call socket data in filters */
905 if ((sb->sb_flags & SB_RECV) != 0) {
906 int error;
907 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
908 if (error) {
909 if (error != EJUSTRETURN) {
910 if (m0) m_freem(m0);
911 if (control) m_freem(control);
912 if (error_out) *error_out = error;
913 }
914 return 0;
915 }
916 }
917
918 result = sbappendaddr_internal(sb, asa, m0, control);
919 if (result == 0) {
920 if (m0) m_freem(m0);
921 if (control) m_freem(control);
922 if (error_out) *error_out = ENOBUFS;
923 }
924
925 return result;
926 }
927
928 static int
929 sbappendcontrol_internal(sb, m0, control)
930 struct sockbuf *sb;
931 struct mbuf *control, *m0;
932 {
933 register struct mbuf *m, *n;
934 int space = 0;
935
936 if (control == 0)
937 panic("sbappendcontrol");
938
939 for (m = control; ; m = m->m_next) {
940 space += m->m_len;
941 if (m->m_next == 0)
942 break;
943 }
944 n = m; /* save pointer to last control buffer */
945 for (m = m0; m; m = m->m_next)
946 space += m->m_len;
947 if (space > sbspace(sb))
948 return (0);
949 n->m_next = m0; /* concatenate data to control */
950 for (m = control; m; m = m->m_next)
951 sballoc(sb, m);
952 n = sb->sb_mb;
953 if (n) {
954 while (n->m_nextpkt)
955 n = n->m_nextpkt;
956 n->m_nextpkt = control;
957 } else
958 sb->sb_mb = control;
959 postevent(0,sb,EV_RWBYTES);
960 return (1);
961 }
962
963 int
964 sbappendcontrol(
965 struct sockbuf *sb,
966 struct mbuf *m0,
967 struct mbuf *control,
968 int *error_out)
969 {
970 int result = 0;
971
972 if (error_out) *error_out = 0;
973
974 if (sb->sb_flags & SB_RECV) {
975 int error;
976 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
977 if (error) {
978 if (error != EJUSTRETURN) {
979 if (m0) m_freem(m0);
980 if (control) m_freem(control);
981 if (error_out) *error_out = error;
982 }
983 return 0;
984 }
985 }
986
987 result = sbappendcontrol_internal(sb, m0, control);
988 if (result == 0) {
989 if (m0) m_freem(m0);
990 if (control) m_freem(control);
991 if (error_out) *error_out = ENOBUFS;
992 }
993
994 return result;
995 }
996
997 /*
998 * Compress mbuf chain m into the socket
999 * buffer sb following mbuf n. If n
1000 * is null, the buffer is presumed empty.
1001 */
1002 static int
1003 sbcompress(sb, m, n)
1004 register struct sockbuf *sb;
1005 register struct mbuf *m, *n;
1006 {
1007 register int eor = 0;
1008 register struct mbuf *o;
1009
1010 while (m) {
1011 eor |= m->m_flags & M_EOR;
1012 if (m->m_len == 0 &&
1013 (eor == 0 ||
1014 (((o = m->m_next) || (o = n)) &&
1015 o->m_type == m->m_type))) {
1016 m = m_free(m);
1017 continue;
1018 }
1019 if (n && (n->m_flags & M_EOR) == 0 &&
1020 #ifndef __APPLE__
1021 M_WRITABLE(n) &&
1022 #endif
1023 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1024 m->m_len <= M_TRAILINGSPACE(n) &&
1025 n->m_type == m->m_type) {
1026 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1027 (unsigned)m->m_len);
1028 n->m_len += m->m_len;
1029 sb->sb_cc += m->m_len;
1030 m = m_free(m);
1031 continue;
1032 }
1033 if (n)
1034 n->m_next = m;
1035 else
1036 sb->sb_mb = m;
1037 sballoc(sb, m);
1038 n = m;
1039 m->m_flags &= ~M_EOR;
1040 m = m->m_next;
1041 n->m_next = 0;
1042 }
1043 if (eor) {
1044 if (n)
1045 n->m_flags |= eor;
1046 else
1047 printf("semi-panic: sbcompress\n");
1048 }
1049 postevent(0,sb, EV_RWBYTES);
1050 return 1;
1051 }
1052
1053 /*
1054 * Free all mbufs in a sockbuf.
1055 * Check that all resources are reclaimed.
1056 */
1057 void
1058 sbflush(sb)
1059 register struct sockbuf *sb;
1060 {
1061 if (sb->sb_so == NULL)
1062 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
1063 (void)sblock(sb, M_WAIT);
1064 while (sb->sb_mbcnt) {
1065 /*
1066 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1067 * we would loop forever. Panic instead.
1068 */
1069 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1070 break;
1071 sbdrop(sb, (int)sb->sb_cc);
1072 }
1073 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1074 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
1075
1076 postevent(0, sb, EV_RWBYTES);
1077 sbunlock(sb, 1); /* keep socket locked */
1078
1079 }
1080
1081 /*
1082 * Drop data from (the front of) a sockbuf.
1083 * use m_freem_list to free the mbuf structures
1084 * under a single lock... this is done by pruning
1085 * the top of the tree from the body by keeping track
1086 * of where we get to in the tree and then zeroing the
1087 * two pertinent pointers m_nextpkt and m_next
1088 * the socket buffer is then updated to point at the new
1089 * top of the tree and the pruned area is released via
1090 * m_freem_list.
1091 */
1092 void
1093 sbdrop(sb, len)
1094 register struct sockbuf *sb;
1095 register int len;
1096 {
1097 register struct mbuf *m, *free_list, *ml;
1098 struct mbuf *next, *last;
1099
1100 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1101
1102 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1103 free_list = last = m;
1104 ml = (struct mbuf *)0;
1105
1106 while (len > 0) {
1107 if (m == 0) {
1108 if (next == 0) {
1109 /* temporarily replacing this panic with printf because
1110 * it occurs occasionally when closing a socket when there
1111 * is no harm in ignoring it. This problem will be investigated
1112 * further.
1113 */
1114 /* panic("sbdrop"); */
1115 printf("sbdrop - count not zero\n");
1116 len = 0;
1117 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1118 sb->sb_cc = 0;
1119 sb->sb_mbcnt = 0;
1120 break;
1121 }
1122 m = last = next;
1123 next = m->m_nextpkt;
1124 continue;
1125 }
1126 if (m->m_len > len) {
1127 m->m_len -= len;
1128 m->m_data += len;
1129 sb->sb_cc -= len;
1130 break;
1131 }
1132 len -= m->m_len;
1133 sbfree(sb, m);
1134
1135 ml = m;
1136 m = m->m_next;
1137 }
1138 while (m && m->m_len == 0) {
1139 sbfree(sb, m);
1140
1141 ml = m;
1142 m = m->m_next;
1143 }
1144 if (ml) {
1145 ml->m_next = (struct mbuf *)0;
1146 last->m_nextpkt = (struct mbuf *)0;
1147 m_freem_list(free_list);
1148 }
1149 if (m) {
1150 sb->sb_mb = m;
1151 m->m_nextpkt = next;
1152 } else
1153 sb->sb_mb = next;
1154
1155 postevent(0, sb, EV_RWBYTES);
1156
1157 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1158 }
1159
1160 /*
1161 * Drop a record off the front of a sockbuf
1162 * and move the next record to the front.
1163 */
1164 void
1165 sbdroprecord(sb)
1166 register struct sockbuf *sb;
1167 {
1168 register struct mbuf *m, *mn;
1169
1170 m = sb->sb_mb;
1171 if (m) {
1172 sb->sb_mb = m->m_nextpkt;
1173 do {
1174 sbfree(sb, m);
1175 MFREE(m, mn);
1176 m = mn;
1177 } while (m);
1178 }
1179 postevent(0, sb, EV_RWBYTES);
1180 }
1181
1182 /*
1183 * Create a "control" mbuf containing the specified data
1184 * with the specified type for presentation on a socket buffer.
1185 */
1186 struct mbuf *
1187 sbcreatecontrol(p, size, type, level)
1188 caddr_t p;
1189 register int size;
1190 int type, level;
1191 {
1192 register struct cmsghdr *cp;
1193 struct mbuf *m;
1194
1195 if (CMSG_SPACE((u_int)size) > MLEN)
1196 return ((struct mbuf *) NULL);
1197 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1198 return ((struct mbuf *) NULL);
1199 cp = mtod(m, struct cmsghdr *);
1200 /* XXX check size? */
1201 (void)memcpy(CMSG_DATA(cp), p, size);
1202 m->m_len = CMSG_SPACE(size);
1203 cp->cmsg_len = CMSG_LEN(size);
1204 cp->cmsg_level = level;
1205 cp->cmsg_type = type;
1206 return (m);
1207 }
1208
1209 /*
1210 * Some routines that return EOPNOTSUPP for entry points that are not
1211 * supported by a protocol. Fill in as needed.
1212 */
1213 int
1214 pru_abort_notsupp(struct socket *so)
1215 {
1216 return EOPNOTSUPP;
1217 }
1218
1219
1220 int
1221 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1222 {
1223 return EOPNOTSUPP;
1224 }
1225
1226 int
1227 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1228 {
1229 return EOPNOTSUPP;
1230 }
1231
1232 int
1233 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1234 {
1235 return EOPNOTSUPP;
1236 }
1237
1238 int
1239 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1240 {
1241 return EOPNOTSUPP;
1242 }
1243
1244 int
1245 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1246 {
1247 return EOPNOTSUPP;
1248 }
1249
1250 int
1251 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1252 struct ifnet *ifp, struct proc *p)
1253 {
1254 return EOPNOTSUPP;
1255 }
1256
1257 int
1258 pru_detach_notsupp(struct socket *so)
1259 {
1260 return EOPNOTSUPP;
1261 }
1262
1263 int
1264 pru_disconnect_notsupp(struct socket *so)
1265 {
1266 return EOPNOTSUPP;
1267 }
1268
1269 int
1270 pru_listen_notsupp(struct socket *so, struct proc *p)
1271 {
1272 return EOPNOTSUPP;
1273 }
1274
1275 int
1276 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1277 {
1278 return EOPNOTSUPP;
1279 }
1280
1281 int
1282 pru_rcvd_notsupp(struct socket *so, int flags)
1283 {
1284 return EOPNOTSUPP;
1285 }
1286
1287 int
1288 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1289 {
1290 return EOPNOTSUPP;
1291 }
1292
1293 int
1294 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1295 struct sockaddr *addr, struct mbuf *control,
1296 struct proc *p)
1297
1298 {
1299 return EOPNOTSUPP;
1300 }
1301
1302
1303 /*
1304 * This isn't really a ``null'' operation, but it's the default one
1305 * and doesn't do anything destructive.
1306 */
1307 int
1308 pru_sense_null(struct socket *so, struct stat *sb)
1309 {
1310 sb->st_blksize = so->so_snd.sb_hiwat;
1311 return 0;
1312 }
1313
1314
1315 int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1316 struct uio *uio, struct mbuf *top,
1317 struct mbuf *control, int flags)
1318
1319 {
1320 return EOPNOTSUPP;
1321 }
1322
1323 int pru_soreceive_notsupp(struct socket *so,
1324 struct sockaddr **paddr,
1325 struct uio *uio, struct mbuf **mp0,
1326 struct mbuf **controlp, int *flagsp)
1327 {
1328 return EOPNOTSUPP;
1329 }
1330
1331 int
1332
1333 pru_shutdown_notsupp(struct socket *so)
1334 {
1335 return EOPNOTSUPP;
1336 }
1337
1338 int
1339 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1340 {
1341 return EOPNOTSUPP;
1342 }
1343
1344 int pru_sosend(struct socket *so, struct sockaddr *addr,
1345 struct uio *uio, struct mbuf *top,
1346 struct mbuf *control, int flags)
1347 {
1348 return EOPNOTSUPP;
1349 }
1350
1351 int pru_soreceive(struct socket *so,
1352 struct sockaddr **paddr,
1353 struct uio *uio, struct mbuf **mp0,
1354 struct mbuf **controlp, int *flagsp)
1355 {
1356 return EOPNOTSUPP;
1357 }
1358
1359
1360 int
1361 pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1362 __unused kauth_cred_t cred, __unused void *wql)
1363 {
1364 return EOPNOTSUPP;
1365 }
1366
1367
1368 #ifdef __APPLE__
1369 /*
1370 * The following are macros on BSD and functions on Darwin
1371 */
1372
1373 /*
1374 * Do we need to notify the other side when I/O is possible?
1375 */
1376
1377 int
1378 sb_notify(struct sockbuf *sb)
1379 {
1380 return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
1381 }
1382
1383 /*
1384 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1385 * This is problematical if the fields are unsigned, as the space might
1386 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1387 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1388 */
1389 long
1390 sbspace(struct sockbuf *sb)
1391 {
1392 return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1393 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1394 }
1395
1396 /* do we have to send all at once on a socket? */
1397 int
1398 sosendallatonce(struct socket *so)
1399 {
1400 return (so->so_proto->pr_flags & PR_ATOMIC);
1401 }
1402
1403 /* can we read something from so? */
1404 int
1405 soreadable(struct socket *so)
1406 {
1407 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1408 (so->so_state & SS_CANTRCVMORE) ||
1409 so->so_comp.tqh_first || so->so_error);
1410 }
1411
1412 /* can we write something to so? */
1413
1414 int
1415 sowriteable(struct socket *so)
1416 {
1417 return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1418 ((so->so_state&SS_ISCONNECTED) ||
1419 (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1420 (so->so_state & SS_CANTSENDMORE) ||
1421 so->so_error);
1422 }
1423
1424 /* adjust counters in sb reflecting allocation of m */
1425
1426 void
1427 sballoc(struct sockbuf *sb, struct mbuf *m)
1428 {
1429 sb->sb_cc += m->m_len;
1430 sb->sb_mbcnt += MSIZE;
1431 if (m->m_flags & M_EXT)
1432 sb->sb_mbcnt += m->m_ext.ext_size;
1433 }
1434
1435 /* adjust counters in sb reflecting freeing of m */
1436 void
1437 sbfree(struct sockbuf *sb, struct mbuf *m)
1438 {
1439 sb->sb_cc -= m->m_len;
1440 sb->sb_mbcnt -= MSIZE;
1441 if (m->m_flags & M_EXT)
1442 sb->sb_mbcnt -= m->m_ext.ext_size;
1443 }
1444
1445 /*
1446 * Set lock on sockbuf sb; sleep if lock is already held.
1447 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1448 * Returns error without lock if sleep is interrupted.
1449 */
1450 int
1451 sblock(struct sockbuf *sb, int wf)
1452 {
1453 return(sb->sb_flags & SB_LOCK ?
1454 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1455 (sb->sb_flags |= SB_LOCK), 0);
1456 }
1457
1458 /* release lock on sockbuf sb */
1459 void
1460 sbunlock(struct sockbuf *sb, int keeplocked)
1461 {
1462 struct socket *so = sb->sb_so;
1463 int lr, lr_saved;
1464 lck_mtx_t *mutex_held;
1465
1466 #ifdef __ppc__
1467 __asm__ volatile("mflr %0" : "=r" (lr));
1468 lr_saved = lr;
1469 #endif
1470 sb->sb_flags &= ~SB_LOCK;
1471
1472 if (so->so_proto->pr_getlock != NULL)
1473 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1474 else
1475 mutex_held = so->so_proto->pr_domain->dom_mtx;
1476
1477 if (keeplocked == 0)
1478 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1479
1480 if (sb->sb_flags & SB_WANT) {
1481 sb->sb_flags &= ~SB_WANT;
1482 if (so->so_usecount < 0)
1483 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1484
1485 wakeup((caddr_t)&(sb)->sb_flags);
1486 }
1487 if (keeplocked == 0) { /* unlock on exit */
1488 so->so_usecount--;
1489 if (so->so_usecount < 0)
1490 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
1491 so->reserved4= lr_saved;
1492 lck_mtx_unlock(mutex_held);
1493 }
1494 }
1495
1496 void
1497 sorwakeup(struct socket * so)
1498 {
1499 if (sb_notify(&so->so_rcv))
1500 sowakeup(so, &so->so_rcv);
1501 }
1502
1503 void
1504 sowwakeup(struct socket * so)
1505 {
1506 if (sb_notify(&so->so_snd))
1507 sowakeup(so, &so->so_snd);
1508 }
1509 #endif __APPLE__
1510
1511 /*
1512 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1513 */
1514 struct sockaddr *
1515 dup_sockaddr(sa, canwait)
1516 struct sockaddr *sa;
1517 int canwait;
1518 {
1519 struct sockaddr *sa2;
1520
1521 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1522 canwait ? M_WAITOK : M_NOWAIT);
1523 if (sa2)
1524 bcopy(sa, sa2, sa->sa_len);
1525 return sa2;
1526 }
1527
1528 /*
1529 * Create an external-format (``xsocket'') structure using the information
1530 * in the kernel-format socket structure pointed to by so. This is done
1531 * to reduce the spew of irrelevant information over this interface,
1532 * to isolate user code from changes in the kernel structure, and
1533 * potentially to provide information-hiding if we decide that
1534 * some of this information should be hidden from users.
1535 */
1536 void
1537 sotoxsocket(struct socket *so, struct xsocket *xso)
1538 {
1539 xso->xso_len = sizeof *xso;
1540 xso->xso_so = so;
1541 xso->so_type = so->so_type;
1542 xso->so_options = so->so_options;
1543 xso->so_linger = so->so_linger;
1544 xso->so_state = so->so_state;
1545 xso->so_pcb = so->so_pcb;
1546 if (so->so_proto) {
1547 xso->xso_protocol = so->so_proto->pr_protocol;
1548 xso->xso_family = so->so_proto->pr_domain->dom_family;
1549 }
1550 else
1551 xso->xso_protocol = xso->xso_family = 0;
1552 xso->so_qlen = so->so_qlen;
1553 xso->so_incqlen = so->so_incqlen;
1554 xso->so_qlimit = so->so_qlimit;
1555 xso->so_timeo = so->so_timeo;
1556 xso->so_error = so->so_error;
1557 xso->so_pgid = so->so_pgid;
1558 xso->so_oobmark = so->so_oobmark;
1559 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1560 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1561 xso->so_uid = so->so_uid;
1562 }
1563
1564 /*
1565 * This does the same for sockbufs. Note that the xsockbuf structure,
1566 * since it is always embedded in a socket, does not include a self
1567 * pointer nor a length. We make this entry point public in case
1568 * some other mechanism needs it.
1569 */
1570 void
1571 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1572 {
1573 xsb->sb_cc = sb->sb_cc;
1574 xsb->sb_hiwat = sb->sb_hiwat;
1575 xsb->sb_mbcnt = sb->sb_mbcnt;
1576 xsb->sb_mbmax = sb->sb_mbmax;
1577 xsb->sb_lowat = sb->sb_lowat;
1578 xsb->sb_flags = sb->sb_flags;
1579 xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1580 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1581 xsb->sb_timeo = 1;
1582 }
1583
1584 /*
1585 * Here is the definition of some of the basic objects in the kern.ipc
1586 * branch of the MIB.
1587 */
1588 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1589
1590 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1591 static int dummy;
1592 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1593
1594 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1595 &sb_max, 0, "Maximum socket buffer size");
1596 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1597 &maxsockets, 0, "Maximum number of sockets avaliable");
1598 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1599 &sb_efficiency, 0, "");
1600 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1601