]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket2.c
30750607ff19d67fc046f0e5df052b17bde6bf15
[apple/xnu.git] / bsd / kern / uipc_socket2.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
57 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
58 */
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/domain.h>
63 #include <sys/kernel.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/protosw.h>
69 #include <sys/stat.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/signalvar.h>
73 #include <sys/sysctl.h>
74 #include <sys/ev.h>
75 #include <kern/locks.h>
76 #include <net/route.h>
77 #include <netinet/in.h>
78 #include <netinet/in_pcb.h>
79 #include <sys/kdebug.h>
80
81 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
82 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
83
84
85 /*
86 * Primitive routines for operating on sockets and socket buffers
87 */
88
89 u_long sb_max = SB_MAX; /* XXX should be static */
90
91 static u_long sb_efficiency = 8; /* parameter for sbreserve() */
92
93 /*
94 * Procedures to manipulate state flags of socket
95 * and do appropriate wakeups. Normal sequence from the
96 * active (originating) side is that soisconnecting() is
97 * called during processing of connect() call,
98 * resulting in an eventual call to soisconnected() if/when the
99 * connection is established. When the connection is torn down
100 * soisdisconnecting() is called during processing of disconnect() call,
101 * and soisdisconnected() is called when the connection to the peer
102 * is totally severed. The semantics of these routines are such that
103 * connectionless protocols can call soisconnected() and soisdisconnected()
104 * only, bypassing the in-progress calls when setting up a ``connection''
105 * takes no time.
106 *
107 * From the passive side, a socket is created with
108 * two queues of sockets: so_incomp for connections in progress
109 * and so_comp for connections already made and awaiting user acceptance.
110 * As a protocol is preparing incoming connections, it creates a socket
111 * structure queued on so_incomp by calling sonewconn(). When the connection
112 * is established, soisconnected() is called, and transfers the
113 * socket structure to so_comp, making it available to accept().
114 *
115 * If a socket is closed with sockets on either
116 * so_incomp or so_comp, these sockets are dropped.
117 *
118 * If higher level protocols are implemented in
119 * the kernel, the wakeups done here will sometimes
120 * cause software-interrupt process scheduling.
121 */
122 void
123 soisconnecting(so)
124 register struct socket *so;
125 {
126
127 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
128 so->so_state |= SS_ISCONNECTING;
129
130 sflt_notify(so, sock_evt_connecting, NULL);
131 }
132
133 void
134 soisconnected(so)
135 struct socket *so;
136 {
137 struct socket *head = so->so_head;
138
139 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
140 so->so_state |= SS_ISCONNECTED;
141
142 sflt_notify(so, sock_evt_connected, NULL);
143
144 if (head && (so->so_state & SS_INCOMP)) {
145 so->so_state &= ~SS_INCOMP;
146 so->so_state |= SS_COMP;
147 if (head->so_proto->pr_getlock != NULL) {
148 socket_unlock(so, 0);
149 socket_lock(head, 1);
150 }
151 postevent(head, 0, EV_RCONN);
152 TAILQ_REMOVE(&head->so_incomp, so, so_list);
153 head->so_incqlen--;
154 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
155 sorwakeup(head);
156 wakeup_one((caddr_t)&head->so_timeo);
157 if (head->so_proto->pr_getlock != NULL) {
158 socket_unlock(head, 1);
159 socket_lock(so, 0);
160 }
161 } else {
162 postevent(so, 0, EV_WCONN);
163 wakeup((caddr_t)&so->so_timeo);
164 sorwakeup(so);
165 sowwakeup(so);
166 }
167 }
168
169 void
170 soisdisconnecting(so)
171 register struct socket *so;
172 {
173 so->so_state &= ~SS_ISCONNECTING;
174 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
175 sflt_notify(so, sock_evt_disconnecting, NULL);
176 wakeup((caddr_t)&so->so_timeo);
177 sowwakeup(so);
178 sorwakeup(so);
179 }
180
181 void
182 soisdisconnected(so)
183 register struct socket *so;
184 {
185 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
186 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
187 sflt_notify(so, sock_evt_disconnected, NULL);
188 wakeup((caddr_t)&so->so_timeo);
189 sowwakeup(so);
190 sorwakeup(so);
191 }
192
193 /*
194 * Return a random connection that hasn't been serviced yet and
195 * is eligible for discard. There is a one in qlen chance that
196 * we will return a null, saying that there are no dropable
197 * requests. In this case, the protocol specific code should drop
198 * the new request. This insures fairness.
199 *
200 * This may be used in conjunction with protocol specific queue
201 * congestion routines.
202 */
203 struct socket *
204 sodropablereq(head)
205 register struct socket *head;
206 {
207 struct socket *so, *sonext = NULL;
208 unsigned int i, j, qlen;
209 static int rnd;
210 static struct timeval old_runtime;
211 static unsigned int cur_cnt, old_cnt;
212 struct timeval tv;
213
214 microtime(&tv);
215 if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
216 old_runtime = tv;
217 old_cnt = cur_cnt / i;
218 cur_cnt = 0;
219 }
220
221 so = TAILQ_FIRST(&head->so_incomp);
222 if (!so)
223 return (NULL);
224
225 qlen = head->so_incqlen;
226 if (++cur_cnt > qlen || old_cnt > qlen) {
227 rnd = (314159 * rnd + 66329) & 0xffff;
228 j = ((qlen + 1) * rnd) >> 16;
229 //###LD To clean up
230 while (j-- && so) {
231 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
232 socket_lock(so, 1);
233 sonext = TAILQ_NEXT(so, so_list);
234 // in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
235 socket_unlock(so, 1);
236 so = sonext;
237 }
238 }
239
240 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
241 // return (NULL);
242 // else
243 return (so);
244 }
245
246 /*
247 * When an attempt at a new connection is noted on a socket
248 * which accepts connections, sonewconn is called. If the
249 * connection is possible (subject to space constraints, etc.)
250 * then we allocate a new structure, propoerly linked into the
251 * data structure of the original socket, and return this.
252 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
253 */
254 static struct socket *
255 sonewconn_internal(head, connstatus)
256 register struct socket *head;
257 int connstatus;
258 {
259 int error = 0;
260 register struct socket *so;
261 lck_mtx_t *mutex_held;
262
263 if (head->so_proto->pr_getlock != NULL)
264 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
265 else
266 mutex_held = head->so_proto->pr_domain->dom_mtx;
267 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
268
269 if (head->so_qlen > 3 * head->so_qlimit / 2)
270 return ((struct socket *)0);
271 so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
272 if (so == NULL)
273 return ((struct socket *)0);
274 /* check if head was closed during the soalloc */
275 if (head->so_proto == NULL) {
276 sodealloc(so);
277 return ((struct socket *)0);
278 }
279
280 so->so_head = head;
281 so->so_type = head->so_type;
282 so->so_options = head->so_options &~ SO_ACCEPTCONN;
283 so->so_linger = head->so_linger;
284 so->so_state = head->so_state | SS_NOFDREF;
285 so->so_proto = head->so_proto;
286 so->so_timeo = head->so_timeo;
287 so->so_pgid = head->so_pgid;
288 so->so_uid = head->so_uid;
289 so->so_usecount = 1;
290
291 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
292 sflt_termsock(so);
293 sodealloc(so);
294 return ((struct socket *)0);
295 }
296
297 /*
298 * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
299 */
300 if (head->so_proto->pr_unlock)
301 socket_unlock(head, 0);
302 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
303 sflt_termsock(so);
304 sodealloc(so);
305 if (head->so_proto->pr_unlock)
306 socket_lock(head, 0);
307 return ((struct socket *)0);
308 }
309 if (head->so_proto->pr_unlock)
310 socket_lock(head, 0);
311 #ifdef __APPLE__
312 so->so_proto->pr_domain->dom_refs++;
313 #endif
314
315 if (connstatus) {
316 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
317 so->so_state |= SS_COMP;
318 } else {
319 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
320 so->so_state |= SS_INCOMP;
321 head->so_incqlen++;
322 }
323 head->so_qlen++;
324 #ifdef __APPLE__
325 so->so_rcv.sb_so = so->so_snd.sb_so = so;
326 TAILQ_INIT(&so->so_evlist);
327
328 /* Attach socket filters for this protocol */
329 sflt_initsock(so);
330 #endif
331 if (connstatus) {
332 so->so_state |= connstatus;
333 sorwakeup(head);
334 wakeup((caddr_t)&head->so_timeo);
335 }
336 return (so);
337 }
338
339
340 struct socket *
341 sonewconn(
342 struct socket *head,
343 int connstatus,
344 const struct sockaddr *from)
345 {
346 int error = 0;
347 struct socket_filter_entry *filter;
348 int filtered = 0;
349
350 error = 0;
351 for (filter = head->so_filt; filter && (error == 0);
352 filter = filter->sfe_next_onsocket) {
353 if (filter->sfe_filter->sf_filter.sf_connect_in) {
354 if (filtered == 0) {
355 filtered = 1;
356 sflt_use(head);
357 socket_unlock(head, 0);
358 }
359 error = filter->sfe_filter->sf_filter.sf_connect_in(
360 filter->sfe_cookie, head, from);
361 }
362 }
363 if (filtered != 0) {
364 socket_lock(head, 0);
365 sflt_unuse(head);
366 }
367
368 if (error) {
369 return NULL;
370 }
371
372 return sonewconn_internal(head, connstatus);
373 }
374
375 /*
376 * Socantsendmore indicates that no more data will be sent on the
377 * socket; it would normally be applied to a socket when the user
378 * informs the system that no more data is to be sent, by the protocol
379 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
380 * will be received, and will normally be applied to the socket by a
381 * protocol when it detects that the peer will send no more data.
382 * Data queued for reading in the socket may yet be read.
383 */
384
385 void
386 socantsendmore(so)
387 struct socket *so;
388 {
389 so->so_state |= SS_CANTSENDMORE;
390 sflt_notify(so, sock_evt_cantsendmore, NULL);
391 sowwakeup(so);
392 }
393
394 void
395 socantrcvmore(so)
396 struct socket *so;
397 {
398 so->so_state |= SS_CANTRCVMORE;
399 sflt_notify(so, sock_evt_cantrecvmore, NULL);
400 sorwakeup(so);
401 }
402
403 /*
404 * Wait for data to arrive at/drain from a socket buffer.
405 */
406 int
407 sbwait(sb)
408 struct sockbuf *sb;
409 {
410 int error = 0, lr, lr_saved;
411 struct socket *so = sb->sb_so;
412 lck_mtx_t *mutex_held;
413 struct timespec ts;
414
415 #ifdef __ppc__
416 __asm__ volatile("mflr %0" : "=r" (lr));
417 lr_saved = lr;
418 #endif
419
420
421 if (so->so_proto->pr_getlock != NULL)
422 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
423 else
424 mutex_held = so->so_proto->pr_domain->dom_mtx;
425
426 sb->sb_flags |= SB_WAIT;
427
428 if (so->so_usecount < 1)
429 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
430 ts.tv_sec = sb->sb_timeo.tv_sec;
431 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
432 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
433 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
434 &ts);
435
436 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
437
438 if (so->so_usecount < 1)
439 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
440
441 if ((so->so_state & SS_DRAINING)) {
442 error = EBADF;
443 }
444
445 return (error);
446 }
447
448 /*
449 * Lock a sockbuf already known to be locked;
450 * return any error returned from sleep (EINTR).
451 */
452 int
453 sb_lock(sb)
454 register struct sockbuf *sb;
455 {
456 struct socket *so = sb->sb_so;
457 lck_mtx_t * mutex_held;
458 int error = 0, lr, lr_saved;
459
460 #ifdef __ppc__
461 __asm__ volatile("mflr %0" : "=r" (lr));
462 lr_saved = lr;
463 #endif
464
465 if (so == NULL)
466 panic("sb_lock: null so back pointer sb=%x\n", sb);
467
468 while (sb->sb_flags & SB_LOCK) {
469 sb->sb_flags |= SB_WANT;
470 if (so->so_proto->pr_getlock != NULL)
471 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
472 else
473 mutex_held = so->so_proto->pr_domain->dom_mtx;
474 if (so->so_usecount < 1)
475 panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
476 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
477 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
478 if (so->so_usecount < 1)
479 panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
480 if (error)
481 return (error);
482 }
483 sb->sb_flags |= SB_LOCK;
484 return (0);
485 }
486
487 /*
488 * Wakeup processes waiting on a socket buffer.
489 * Do asynchronous notification via SIGIO
490 * if the socket has the SS_ASYNC flag set.
491 */
492 void
493 sowakeup(so, sb)
494 register struct socket *so;
495 register struct sockbuf *sb;
496 {
497 struct proc *p = current_proc();
498 sb->sb_flags &= ~SB_SEL;
499 selwakeup(&sb->sb_sel);
500 if (sb->sb_flags & SB_WAIT) {
501 sb->sb_flags &= ~SB_WAIT;
502 wakeup((caddr_t)&sb->sb_cc);
503 }
504 if (so->so_state & SS_ASYNC) {
505 if (so->so_pgid < 0)
506 gsignal(-so->so_pgid, SIGIO);
507 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
508 psignal(p, SIGIO);
509 }
510 if (sb->sb_flags & SB_KNOTE) {
511 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
512 }
513 if (sb->sb_flags & SB_UPCALL) {
514 socket_unlock(so, 0);
515 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
516 socket_lock(so, 0);
517 }
518 }
519
520 /*
521 * Socket buffer (struct sockbuf) utility routines.
522 *
523 * Each socket contains two socket buffers: one for sending data and
524 * one for receiving data. Each buffer contains a queue of mbufs,
525 * information about the number of mbufs and amount of data in the
526 * queue, and other fields allowing select() statements and notification
527 * on data availability to be implemented.
528 *
529 * Data stored in a socket buffer is maintained as a list of records.
530 * Each record is a list of mbufs chained together with the m_next
531 * field. Records are chained together with the m_nextpkt field. The upper
532 * level routine soreceive() expects the following conventions to be
533 * observed when placing information in the receive buffer:
534 *
535 * 1. If the protocol requires each message be preceded by the sender's
536 * name, then a record containing that name must be present before
537 * any associated data (mbuf's must be of type MT_SONAME).
538 * 2. If the protocol supports the exchange of ``access rights'' (really
539 * just additional data associated with the message), and there are
540 * ``rights'' to be received, then a record containing this data
541 * should be present (mbuf's must be of type MT_RIGHTS).
542 * 3. If a name or rights record exists, then it must be followed by
543 * a data record, perhaps of zero length.
544 *
545 * Before using a new socket structure it is first necessary to reserve
546 * buffer space to the socket, by calling sbreserve(). This should commit
547 * some of the available buffer space in the system buffer pool for the
548 * socket (currently, it does nothing but enforce limits). The space
549 * should be released by calling sbrelease() when the socket is destroyed.
550 */
551
552 int
553 soreserve(so, sndcc, rcvcc)
554 register struct socket *so;
555 u_long sndcc, rcvcc;
556 {
557
558 if (sbreserve(&so->so_snd, sndcc) == 0)
559 goto bad;
560 if (sbreserve(&so->so_rcv, rcvcc) == 0)
561 goto bad2;
562 if (so->so_rcv.sb_lowat == 0)
563 so->so_rcv.sb_lowat = 1;
564 if (so->so_snd.sb_lowat == 0)
565 so->so_snd.sb_lowat = MCLBYTES;
566 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
567 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
568 return (0);
569 bad2:
570 #ifdef __APPLE__
571 selthreadclear(&so->so_snd.sb_sel);
572 #endif
573 sbrelease(&so->so_snd);
574 bad:
575 return (ENOBUFS);
576 }
577
578 /*
579 * Allot mbufs to a sockbuf.
580 * Attempt to scale mbmax so that mbcnt doesn't become limiting
581 * if buffering efficiency is near the normal case.
582 */
583 int
584 sbreserve(sb, cc)
585 struct sockbuf *sb;
586 u_long cc;
587 {
588 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
589 return (0);
590 sb->sb_hiwat = cc;
591 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
592 if (sb->sb_lowat > sb->sb_hiwat)
593 sb->sb_lowat = sb->sb_hiwat;
594 return (1);
595 }
596
597 /*
598 * Free mbufs held by a socket, and reserved mbuf space.
599 */
600 /* WARNING needs to do selthreadclear() before calling this */
601 void
602 sbrelease(sb)
603 struct sockbuf *sb;
604 {
605
606 sbflush(sb);
607 sb->sb_hiwat = 0;
608 sb->sb_mbmax = 0;
609
610 }
611
612 /*
613 * Routines to add and remove
614 * data from an mbuf queue.
615 *
616 * The routines sbappend() or sbappendrecord() are normally called to
617 * append new mbufs to a socket buffer, after checking that adequate
618 * space is available, comparing the function sbspace() with the amount
619 * of data to be added. sbappendrecord() differs from sbappend() in
620 * that data supplied is treated as the beginning of a new record.
621 * To place a sender's address, optional access rights, and data in a
622 * socket receive buffer, sbappendaddr() should be used. To place
623 * access rights and data in a socket receive buffer, sbappendrights()
624 * should be used. In either case, the new data begins a new record.
625 * Note that unlike sbappend() and sbappendrecord(), these routines check
626 * for the caller that there will be enough space to store the data.
627 * Each fails if there is not enough space, or if it cannot find mbufs
628 * to store additional information in.
629 *
630 * Reliable protocols may use the socket send buffer to hold data
631 * awaiting acknowledgement. Data is normally copied from a socket
632 * send buffer in a protocol with m_copy for output to a peer,
633 * and then removing the data from the socket buffer with sbdrop()
634 * or sbdroprecord() when the data is acknowledged by the peer.
635 */
636
637 /*
638 * Append mbuf chain m to the last record in the
639 * socket buffer sb. The additional space associated
640 * the mbuf chain is recorded in sb. Empty mbufs are
641 * discarded and mbufs are compacted where possible.
642 */
643 int
644 sbappend(sb, m)
645 struct sockbuf *sb;
646 struct mbuf *m;
647 {
648 register struct mbuf *n, *sb_first;
649 int result = 0;
650 int error = 0;
651 int filtered = 0;
652
653
654 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
655
656 if (m == 0)
657 return 0;
658
659 again:
660 sb_first = n = sb->sb_mb;
661 if (n) {
662 while (n->m_nextpkt)
663 n = n->m_nextpkt;
664 do {
665 if (n->m_flags & M_EOR) {
666 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
667 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
668 return result;
669 }
670 } while (n->m_next && (n = n->m_next));
671 }
672
673 if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
674 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
675 if (error) {
676 /* no data was appended, caller should not call sowakeup */
677 return 0;
678 }
679
680 /*
681 If we any filters, the socket lock was dropped. n and sb_first
682 cached data from the socket buffer. This cache is not valid
683 since we dropped the lock. We must start over. Since filtered
684 is set we won't run through the filters a second time. We just
685 set n and sb_start again.
686 */
687 if (filtered)
688 goto again;
689 }
690
691 result = sbcompress(sb, m, n);
692
693 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
694
695 return result;
696 }
697
698 #ifdef SOCKBUF_DEBUG
699 void
700 sbcheck(sb)
701 register struct sockbuf *sb;
702 {
703 register struct mbuf *m;
704 register struct mbuf *n = 0;
705 register u_long len = 0, mbcnt = 0;
706 lck_mtx_t *mutex_held;
707
708 if (sb->sb_so->so_proto->pr_getlock != NULL)
709 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
710 else
711 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
712
713 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
714
715 if (sbchecking == 0)
716 return;
717
718 for (m = sb->sb_mb; m; m = n) {
719 n = m->m_nextpkt;
720 for (; m; m = m->m_next) {
721 len += m->m_len;
722 mbcnt += MSIZE;
723 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
724 mbcnt += m->m_ext.ext_size;
725 }
726 }
727 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
728 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
729 mbcnt, sb->sb_mbcnt);
730 }
731 }
732 #endif
733
734 /*
735 * As above, except the mbuf chain
736 * begins a new record.
737 */
738 int
739 sbappendrecord(sb, m0)
740 register struct sockbuf *sb;
741 register struct mbuf *m0;
742 {
743 register struct mbuf *m;
744 int result = 0;
745
746 if (m0 == 0)
747 return 0;
748
749 if ((sb->sb_flags & SB_RECV) != 0) {
750 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
751 if (error != 0) {
752 if (error != EJUSTRETURN)
753 m_freem(m0);
754 return 0;
755 }
756 }
757
758 m = sb->sb_mb;
759 if (m)
760 while (m->m_nextpkt)
761 m = m->m_nextpkt;
762 /*
763 * Put the first mbuf on the queue.
764 * Note this permits zero length records.
765 */
766 sballoc(sb, m0);
767 if (m)
768 m->m_nextpkt = m0;
769 else
770 sb->sb_mb = m0;
771 m = m0->m_next;
772 m0->m_next = 0;
773 if (m && (m0->m_flags & M_EOR)) {
774 m0->m_flags &= ~M_EOR;
775 m->m_flags |= M_EOR;
776 }
777 return sbcompress(sb, m, m0);
778 }
779
780 /*
781 * As above except that OOB data
782 * is inserted at the beginning of the sockbuf,
783 * but after any other OOB data.
784 */
785 int
786 sbinsertoob(sb, m0)
787 struct sockbuf *sb;
788 struct mbuf *m0;
789 {
790 struct mbuf *m;
791 struct mbuf **mp;
792
793 if (m0 == 0)
794 return 0;
795
796 if ((sb->sb_flags & SB_RECV) != 0) {
797 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
798 sock_data_filt_flag_oob, NULL);
799
800 if (error) {
801 if (error != EJUSTRETURN) {
802 m_freem(m0);
803 }
804 return 0;
805 }
806 }
807
808 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
809 m = *mp;
810 again:
811 switch (m->m_type) {
812
813 case MT_OOBDATA:
814 continue; /* WANT next train */
815
816 case MT_CONTROL:
817 m = m->m_next;
818 if (m)
819 goto again; /* inspect THIS train further */
820 }
821 break;
822 }
823 /*
824 * Put the first mbuf on the queue.
825 * Note this permits zero length records.
826 */
827 sballoc(sb, m0);
828 m0->m_nextpkt = *mp;
829 *mp = m0;
830 m = m0->m_next;
831 m0->m_next = 0;
832 if (m && (m0->m_flags & M_EOR)) {
833 m0->m_flags &= ~M_EOR;
834 m->m_flags |= M_EOR;
835 }
836 return sbcompress(sb, m, m0);
837 }
838
839 /*
840 * Append address and data, and optionally, control (ancillary) data
841 * to the receive queue of a socket. If present,
842 * m0 must include a packet header with total length.
843 * Returns 0 if no space in sockbuf or insufficient mbufs.
844 */
845 static int
846 sbappendaddr_internal(sb, asa, m0, control)
847 register struct sockbuf *sb;
848 struct sockaddr *asa;
849 struct mbuf *m0, *control;
850 {
851 register struct mbuf *m, *n;
852 int space = asa->sa_len;
853
854 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
855 panic("sbappendaddr");
856
857 if (m0)
858 space += m0->m_pkthdr.len;
859 for (n = control; n; n = n->m_next) {
860 space += n->m_len;
861 if (n->m_next == 0) /* keep pointer to last control buf */
862 break;
863 }
864 if (space > sbspace(sb))
865 return (0);
866 if (asa->sa_len > MLEN)
867 return (0);
868 MGET(m, M_DONTWAIT, MT_SONAME);
869 if (m == 0)
870 return (0);
871 m->m_len = asa->sa_len;
872 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
873 if (n)
874 n->m_next = m0; /* concatenate data to control */
875 else
876 control = m0;
877 m->m_next = control;
878 for (n = m; n; n = n->m_next)
879 sballoc(sb, n);
880 n = sb->sb_mb;
881 if (n) {
882 while (n->m_nextpkt)
883 n = n->m_nextpkt;
884 n->m_nextpkt = m;
885 } else
886 sb->sb_mb = m;
887 postevent(0,sb,EV_RWBYTES);
888 return (1);
889 }
890
891 int
892 sbappendaddr(
893 struct sockbuf* sb,
894 struct sockaddr* asa,
895 struct mbuf *m0,
896 struct mbuf *control,
897 int *error_out)
898 {
899 int result = 0;
900
901 if (error_out) *error_out = 0;
902
903 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
904 panic("sbappendaddrorfree");
905
906 /* Call socket data in filters */
907 if ((sb->sb_flags & SB_RECV) != 0) {
908 int error;
909 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
910 if (error) {
911 if (error != EJUSTRETURN) {
912 if (m0) m_freem(m0);
913 if (control) m_freem(control);
914 if (error_out) *error_out = error;
915 }
916 return 0;
917 }
918 }
919
920 result = sbappendaddr_internal(sb, asa, m0, control);
921 if (result == 0) {
922 if (m0) m_freem(m0);
923 if (control) m_freem(control);
924 if (error_out) *error_out = ENOBUFS;
925 }
926
927 return result;
928 }
929
930 static int
931 sbappendcontrol_internal(sb, m0, control)
932 struct sockbuf *sb;
933 struct mbuf *control, *m0;
934 {
935 register struct mbuf *m, *n;
936 int space = 0;
937
938 if (control == 0)
939 panic("sbappendcontrol");
940
941 for (m = control; ; m = m->m_next) {
942 space += m->m_len;
943 if (m->m_next == 0)
944 break;
945 }
946 n = m; /* save pointer to last control buffer */
947 for (m = m0; m; m = m->m_next)
948 space += m->m_len;
949 if (space > sbspace(sb))
950 return (0);
951 n->m_next = m0; /* concatenate data to control */
952 for (m = control; m; m = m->m_next)
953 sballoc(sb, m);
954 n = sb->sb_mb;
955 if (n) {
956 while (n->m_nextpkt)
957 n = n->m_nextpkt;
958 n->m_nextpkt = control;
959 } else
960 sb->sb_mb = control;
961 postevent(0,sb,EV_RWBYTES);
962 return (1);
963 }
964
965 int
966 sbappendcontrol(
967 struct sockbuf *sb,
968 struct mbuf *m0,
969 struct mbuf *control,
970 int *error_out)
971 {
972 int result = 0;
973
974 if (error_out) *error_out = 0;
975
976 if (sb->sb_flags & SB_RECV) {
977 int error;
978 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
979 if (error) {
980 if (error != EJUSTRETURN) {
981 if (m0) m_freem(m0);
982 if (control) m_freem(control);
983 if (error_out) *error_out = error;
984 }
985 return 0;
986 }
987 }
988
989 result = sbappendcontrol_internal(sb, m0, control);
990 if (result == 0) {
991 if (m0) m_freem(m0);
992 if (control) m_freem(control);
993 if (error_out) *error_out = ENOBUFS;
994 }
995
996 return result;
997 }
998
999 /*
1000 * Compress mbuf chain m into the socket
1001 * buffer sb following mbuf n. If n
1002 * is null, the buffer is presumed empty.
1003 */
1004 static int
1005 sbcompress(sb, m, n)
1006 register struct sockbuf *sb;
1007 register struct mbuf *m, *n;
1008 {
1009 register int eor = 0;
1010 register struct mbuf *o;
1011
1012 while (m) {
1013 eor |= m->m_flags & M_EOR;
1014 if (m->m_len == 0 &&
1015 (eor == 0 ||
1016 (((o = m->m_next) || (o = n)) &&
1017 o->m_type == m->m_type))) {
1018 m = m_free(m);
1019 continue;
1020 }
1021 if (n && (n->m_flags & M_EOR) == 0 &&
1022 #ifndef __APPLE__
1023 M_WRITABLE(n) &&
1024 #endif
1025 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1026 m->m_len <= M_TRAILINGSPACE(n) &&
1027 n->m_type == m->m_type) {
1028 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1029 (unsigned)m->m_len);
1030 n->m_len += m->m_len;
1031 sb->sb_cc += m->m_len;
1032 m = m_free(m);
1033 continue;
1034 }
1035 if (n)
1036 n->m_next = m;
1037 else
1038 sb->sb_mb = m;
1039 sballoc(sb, m);
1040 n = m;
1041 m->m_flags &= ~M_EOR;
1042 m = m->m_next;
1043 n->m_next = 0;
1044 }
1045 if (eor) {
1046 if (n)
1047 n->m_flags |= eor;
1048 else
1049 printf("semi-panic: sbcompress\n");
1050 }
1051 postevent(0,sb, EV_RWBYTES);
1052 return 1;
1053 }
1054
1055 /*
1056 * Free all mbufs in a sockbuf.
1057 * Check that all resources are reclaimed.
1058 */
1059 void
1060 sbflush(sb)
1061 register struct sockbuf *sb;
1062 {
1063 if (sb->sb_so == NULL)
1064 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
1065 (void)sblock(sb, M_WAIT);
1066 while (sb->sb_mbcnt) {
1067 /*
1068 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1069 * we would loop forever. Panic instead.
1070 */
1071 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1072 break;
1073 sbdrop(sb, (int)sb->sb_cc);
1074 }
1075 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1076 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
1077
1078 postevent(0, sb, EV_RWBYTES);
1079 sbunlock(sb, 1); /* keep socket locked */
1080
1081 }
1082
1083 /*
1084 * Drop data from (the front of) a sockbuf.
1085 * use m_freem_list to free the mbuf structures
1086 * under a single lock... this is done by pruning
1087 * the top of the tree from the body by keeping track
1088 * of where we get to in the tree and then zeroing the
1089 * two pertinent pointers m_nextpkt and m_next
1090 * the socket buffer is then updated to point at the new
1091 * top of the tree and the pruned area is released via
1092 * m_freem_list.
1093 */
1094 void
1095 sbdrop(sb, len)
1096 register struct sockbuf *sb;
1097 register int len;
1098 {
1099 register struct mbuf *m, *free_list, *ml;
1100 struct mbuf *next, *last;
1101
1102 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1103
1104 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1105 free_list = last = m;
1106 ml = (struct mbuf *)0;
1107
1108 while (len > 0) {
1109 if (m == 0) {
1110 if (next == 0) {
1111 /* temporarily replacing this panic with printf because
1112 * it occurs occasionally when closing a socket when there
1113 * is no harm in ignoring it. This problem will be investigated
1114 * further.
1115 */
1116 /* panic("sbdrop"); */
1117 printf("sbdrop - count not zero\n");
1118 len = 0;
1119 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1120 sb->sb_cc = 0;
1121 sb->sb_mbcnt = 0;
1122 break;
1123 }
1124 m = last = next;
1125 next = m->m_nextpkt;
1126 continue;
1127 }
1128 if (m->m_len > len) {
1129 m->m_len -= len;
1130 m->m_data += len;
1131 sb->sb_cc -= len;
1132 break;
1133 }
1134 len -= m->m_len;
1135 sbfree(sb, m);
1136
1137 ml = m;
1138 m = m->m_next;
1139 }
1140 while (m && m->m_len == 0) {
1141 sbfree(sb, m);
1142
1143 ml = m;
1144 m = m->m_next;
1145 }
1146 if (ml) {
1147 ml->m_next = (struct mbuf *)0;
1148 last->m_nextpkt = (struct mbuf *)0;
1149 m_freem_list(free_list);
1150 }
1151 if (m) {
1152 sb->sb_mb = m;
1153 m->m_nextpkt = next;
1154 } else
1155 sb->sb_mb = next;
1156
1157 postevent(0, sb, EV_RWBYTES);
1158
1159 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1160 }
1161
1162 /*
1163 * Drop a record off the front of a sockbuf
1164 * and move the next record to the front.
1165 */
1166 void
1167 sbdroprecord(sb)
1168 register struct sockbuf *sb;
1169 {
1170 register struct mbuf *m, *mn;
1171
1172 m = sb->sb_mb;
1173 if (m) {
1174 sb->sb_mb = m->m_nextpkt;
1175 do {
1176 sbfree(sb, m);
1177 MFREE(m, mn);
1178 m = mn;
1179 } while (m);
1180 }
1181 postevent(0, sb, EV_RWBYTES);
1182 }
1183
1184 /*
1185 * Create a "control" mbuf containing the specified data
1186 * with the specified type for presentation on a socket buffer.
1187 */
1188 struct mbuf *
1189 sbcreatecontrol(p, size, type, level)
1190 caddr_t p;
1191 register int size;
1192 int type, level;
1193 {
1194 register struct cmsghdr *cp;
1195 struct mbuf *m;
1196
1197 if (CMSG_SPACE((u_int)size) > MLEN)
1198 return ((struct mbuf *) NULL);
1199 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1200 return ((struct mbuf *) NULL);
1201 cp = mtod(m, struct cmsghdr *);
1202 /* XXX check size? */
1203 (void)memcpy(CMSG_DATA(cp), p, size);
1204 m->m_len = CMSG_SPACE(size);
1205 cp->cmsg_len = CMSG_LEN(size);
1206 cp->cmsg_level = level;
1207 cp->cmsg_type = type;
1208 return (m);
1209 }
1210
1211 /*
1212 * Some routines that return EOPNOTSUPP for entry points that are not
1213 * supported by a protocol. Fill in as needed.
1214 */
1215 int
1216 pru_abort_notsupp(struct socket *so)
1217 {
1218 return EOPNOTSUPP;
1219 }
1220
1221
1222 int
1223 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1224 {
1225 return EOPNOTSUPP;
1226 }
1227
1228 int
1229 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1230 {
1231 return EOPNOTSUPP;
1232 }
1233
1234 int
1235 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1236 {
1237 return EOPNOTSUPP;
1238 }
1239
1240 int
1241 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1242 {
1243 return EOPNOTSUPP;
1244 }
1245
1246 int
1247 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1248 {
1249 return EOPNOTSUPP;
1250 }
1251
1252 int
1253 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1254 struct ifnet *ifp, struct proc *p)
1255 {
1256 return EOPNOTSUPP;
1257 }
1258
1259 int
1260 pru_detach_notsupp(struct socket *so)
1261 {
1262 return EOPNOTSUPP;
1263 }
1264
1265 int
1266 pru_disconnect_notsupp(struct socket *so)
1267 {
1268 return EOPNOTSUPP;
1269 }
1270
1271 int
1272 pru_listen_notsupp(struct socket *so, struct proc *p)
1273 {
1274 return EOPNOTSUPP;
1275 }
1276
1277 int
1278 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1279 {
1280 return EOPNOTSUPP;
1281 }
1282
1283 int
1284 pru_rcvd_notsupp(struct socket *so, int flags)
1285 {
1286 return EOPNOTSUPP;
1287 }
1288
1289 int
1290 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1291 {
1292 return EOPNOTSUPP;
1293 }
1294
1295 int
1296 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1297 struct sockaddr *addr, struct mbuf *control,
1298 struct proc *p)
1299
1300 {
1301 return EOPNOTSUPP;
1302 }
1303
1304
1305 /*
1306 * This isn't really a ``null'' operation, but it's the default one
1307 * and doesn't do anything destructive.
1308 */
1309 int
1310 pru_sense_null(struct socket *so, struct stat *sb)
1311 {
1312 sb->st_blksize = so->so_snd.sb_hiwat;
1313 return 0;
1314 }
1315
1316
1317 int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1318 struct uio *uio, struct mbuf *top,
1319 struct mbuf *control, int flags)
1320
1321 {
1322 return EOPNOTSUPP;
1323 }
1324
1325 int pru_soreceive_notsupp(struct socket *so,
1326 struct sockaddr **paddr,
1327 struct uio *uio, struct mbuf **mp0,
1328 struct mbuf **controlp, int *flagsp)
1329 {
1330 return EOPNOTSUPP;
1331 }
1332
1333 int
1334
1335 pru_shutdown_notsupp(struct socket *so)
1336 {
1337 return EOPNOTSUPP;
1338 }
1339
1340 int
1341 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1342 {
1343 return EOPNOTSUPP;
1344 }
1345
1346 int pru_sosend(struct socket *so, struct sockaddr *addr,
1347 struct uio *uio, struct mbuf *top,
1348 struct mbuf *control, int flags)
1349 {
1350 return EOPNOTSUPP;
1351 }
1352
1353 int pru_soreceive(struct socket *so,
1354 struct sockaddr **paddr,
1355 struct uio *uio, struct mbuf **mp0,
1356 struct mbuf **controlp, int *flagsp)
1357 {
1358 return EOPNOTSUPP;
1359 }
1360
1361
1362 int
1363 pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1364 __unused kauth_cred_t cred, __unused void *wql)
1365 {
1366 return EOPNOTSUPP;
1367 }
1368
1369
1370 #ifdef __APPLE__
1371 /*
1372 * The following are macros on BSD and functions on Darwin
1373 */
1374
1375 /*
1376 * Do we need to notify the other side when I/O is possible?
1377 */
1378
1379 int
1380 sb_notify(struct sockbuf *sb)
1381 {
1382 return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
1383 }
1384
1385 /*
1386 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1387 * This is problematical if the fields are unsigned, as the space might
1388 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1389 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1390 */
1391 long
1392 sbspace(struct sockbuf *sb)
1393 {
1394 return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1395 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1396 }
1397
1398 /* do we have to send all at once on a socket? */
1399 int
1400 sosendallatonce(struct socket *so)
1401 {
1402 return (so->so_proto->pr_flags & PR_ATOMIC);
1403 }
1404
1405 /* can we read something from so? */
1406 int
1407 soreadable(struct socket *so)
1408 {
1409 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1410 (so->so_state & SS_CANTRCVMORE) ||
1411 so->so_comp.tqh_first || so->so_error);
1412 }
1413
1414 /* can we write something to so? */
1415
1416 int
1417 sowriteable(struct socket *so)
1418 {
1419 return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1420 ((so->so_state&SS_ISCONNECTED) ||
1421 (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1422 (so->so_state & SS_CANTSENDMORE) ||
1423 so->so_error);
1424 }
1425
1426 /* adjust counters in sb reflecting allocation of m */
1427
1428 void
1429 sballoc(struct sockbuf *sb, struct mbuf *m)
1430 {
1431 sb->sb_cc += m->m_len;
1432 sb->sb_mbcnt += MSIZE;
1433 if (m->m_flags & M_EXT)
1434 sb->sb_mbcnt += m->m_ext.ext_size;
1435 }
1436
1437 /* adjust counters in sb reflecting freeing of m */
1438 void
1439 sbfree(struct sockbuf *sb, struct mbuf *m)
1440 {
1441 sb->sb_cc -= m->m_len;
1442 sb->sb_mbcnt -= MSIZE;
1443 if (m->m_flags & M_EXT)
1444 sb->sb_mbcnt -= m->m_ext.ext_size;
1445 }
1446
1447 /*
1448 * Set lock on sockbuf sb; sleep if lock is already held.
1449 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1450 * Returns error without lock if sleep is interrupted.
1451 */
1452 int
1453 sblock(struct sockbuf *sb, int wf)
1454 {
1455 return(sb->sb_flags & SB_LOCK ?
1456 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1457 (sb->sb_flags |= SB_LOCK), 0);
1458 }
1459
1460 /* release lock on sockbuf sb */
1461 void
1462 sbunlock(struct sockbuf *sb, int keeplocked)
1463 {
1464 struct socket *so = sb->sb_so;
1465 int lr, lr_saved;
1466 lck_mtx_t *mutex_held;
1467
1468 #ifdef __ppc__
1469 __asm__ volatile("mflr %0" : "=r" (lr));
1470 lr_saved = lr;
1471 #endif
1472 sb->sb_flags &= ~SB_LOCK;
1473
1474 if (so->so_proto->pr_getlock != NULL)
1475 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1476 else
1477 mutex_held = so->so_proto->pr_domain->dom_mtx;
1478
1479 if (keeplocked == 0)
1480 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1481
1482 if (sb->sb_flags & SB_WANT) {
1483 sb->sb_flags &= ~SB_WANT;
1484 if (so->so_usecount < 0)
1485 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1486
1487 wakeup((caddr_t)&(sb)->sb_flags);
1488 }
1489 if (keeplocked == 0) { /* unlock on exit */
1490 so->so_usecount--;
1491 if (so->so_usecount < 0)
1492 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
1493 so->reserved4= lr_saved;
1494 lck_mtx_unlock(mutex_held);
1495 }
1496 }
1497
1498 void
1499 sorwakeup(struct socket * so)
1500 {
1501 if (sb_notify(&so->so_rcv))
1502 sowakeup(so, &so->so_rcv);
1503 }
1504
1505 void
1506 sowwakeup(struct socket * so)
1507 {
1508 if (sb_notify(&so->so_snd))
1509 sowakeup(so, &so->so_snd);
1510 }
1511 #endif __APPLE__
1512
1513 /*
1514 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1515 */
1516 struct sockaddr *
1517 dup_sockaddr(sa, canwait)
1518 struct sockaddr *sa;
1519 int canwait;
1520 {
1521 struct sockaddr *sa2;
1522
1523 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1524 canwait ? M_WAITOK : M_NOWAIT);
1525 if (sa2)
1526 bcopy(sa, sa2, sa->sa_len);
1527 return sa2;
1528 }
1529
1530 /*
1531 * Create an external-format (``xsocket'') structure using the information
1532 * in the kernel-format socket structure pointed to by so. This is done
1533 * to reduce the spew of irrelevant information over this interface,
1534 * to isolate user code from changes in the kernel structure, and
1535 * potentially to provide information-hiding if we decide that
1536 * some of this information should be hidden from users.
1537 */
1538 void
1539 sotoxsocket(struct socket *so, struct xsocket *xso)
1540 {
1541 xso->xso_len = sizeof *xso;
1542 xso->xso_so = so;
1543 xso->so_type = so->so_type;
1544 xso->so_options = so->so_options;
1545 xso->so_linger = so->so_linger;
1546 xso->so_state = so->so_state;
1547 xso->so_pcb = so->so_pcb;
1548 if (so->so_proto) {
1549 xso->xso_protocol = so->so_proto->pr_protocol;
1550 xso->xso_family = so->so_proto->pr_domain->dom_family;
1551 }
1552 else
1553 xso->xso_protocol = xso->xso_family = 0;
1554 xso->so_qlen = so->so_qlen;
1555 xso->so_incqlen = so->so_incqlen;
1556 xso->so_qlimit = so->so_qlimit;
1557 xso->so_timeo = so->so_timeo;
1558 xso->so_error = so->so_error;
1559 xso->so_pgid = so->so_pgid;
1560 xso->so_oobmark = so->so_oobmark;
1561 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1562 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1563 xso->so_uid = so->so_uid;
1564 }
1565
1566 /*
1567 * This does the same for sockbufs. Note that the xsockbuf structure,
1568 * since it is always embedded in a socket, does not include a self
1569 * pointer nor a length. We make this entry point public in case
1570 * some other mechanism needs it.
1571 */
1572 void
1573 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1574 {
1575 xsb->sb_cc = sb->sb_cc;
1576 xsb->sb_hiwat = sb->sb_hiwat;
1577 xsb->sb_mbcnt = sb->sb_mbcnt;
1578 xsb->sb_mbmax = sb->sb_mbmax;
1579 xsb->sb_lowat = sb->sb_lowat;
1580 xsb->sb_flags = sb->sb_flags;
1581 xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1582 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1583 xsb->sb_timeo = 1;
1584 }
1585
1586 /*
1587 * Here is the definition of some of the basic objects in the kern.ipc
1588 * branch of the MIB.
1589 */
1590 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1591
1592 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1593 static int dummy;
1594 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1595
1596 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1597 &sb_max, 0, "Maximum socket buffer size");
1598 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1599 &maxsockets, 0, "Maximum number of sockets avaliable");
1600 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1601 &sb_efficiency, 0, "");
1602 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1603