]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket2.c
a9287124fcc6501a39e970e17e7fec05e0040b57
[apple/xnu.git] / bsd / kern / uipc_socket2.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
57 * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $
58 */
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/domain.h>
63 #include <sys/kernel.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/protosw.h>
69 #include <sys/stat.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/signalvar.h>
73 #include <sys/sysctl.h>
74 #include <sys/ev.h>
75 #include <kern/locks.h>
76 #include <net/route.h>
77 #include <netinet/in.h>
78 #include <netinet/in_pcb.h>
79 #include <sys/kdebug.h>
80
81 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
82 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
83
84
85 /*
86 * Primitive routines for operating on sockets and socket buffers
87 */
88
89 u_long sb_max = SB_MAX; /* XXX should be static */
90
91 static u_long sb_efficiency = 8; /* parameter for sbreserve() */
92
93 /*
94 * Procedures to manipulate state flags of socket
95 * and do appropriate wakeups. Normal sequence from the
96 * active (originating) side is that soisconnecting() is
97 * called during processing of connect() call,
98 * resulting in an eventual call to soisconnected() if/when the
99 * connection is established. When the connection is torn down
100 * soisdisconnecting() is called during processing of disconnect() call,
101 * and soisdisconnected() is called when the connection to the peer
102 * is totally severed. The semantics of these routines are such that
103 * connectionless protocols can call soisconnected() and soisdisconnected()
104 * only, bypassing the in-progress calls when setting up a ``connection''
105 * takes no time.
106 *
107 * From the passive side, a socket is created with
108 * two queues of sockets: so_incomp for connections in progress
109 * and so_comp for connections already made and awaiting user acceptance.
110 * As a protocol is preparing incoming connections, it creates a socket
111 * structure queued on so_incomp by calling sonewconn(). When the connection
112 * is established, soisconnected() is called, and transfers the
113 * socket structure to so_comp, making it available to accept().
114 *
115 * If a socket is closed with sockets on either
116 * so_incomp or so_comp, these sockets are dropped.
117 *
118 * If higher level protocols are implemented in
119 * the kernel, the wakeups done here will sometimes
120 * cause software-interrupt process scheduling.
121 */
122 void
123 soisconnecting(so)
124 register struct socket *so;
125 {
126
127 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
128 so->so_state |= SS_ISCONNECTING;
129
130 sflt_notify(so, sock_evt_connecting, NULL);
131 }
132
133 void
134 soisconnected(so)
135 struct socket *so;
136 {
137 struct socket *head = so->so_head;
138
139 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
140 so->so_state |= SS_ISCONNECTED;
141
142 sflt_notify(so, sock_evt_connected, NULL);
143
144 if (head && (so->so_state & SS_INCOMP)) {
145 so->so_state &= ~SS_INCOMP;
146 so->so_state |= SS_COMP;
147 if (head->so_proto->pr_getlock != NULL) {
148 socket_unlock(so, 0);
149 socket_lock(head, 1);
150 }
151 postevent(head, 0, EV_RCONN);
152 TAILQ_REMOVE(&head->so_incomp, so, so_list);
153 head->so_incqlen--;
154 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
155 sorwakeup(head);
156 wakeup_one((caddr_t)&head->so_timeo);
157 if (head->so_proto->pr_getlock != NULL) {
158 socket_unlock(head, 1);
159 socket_lock(so, 0);
160 }
161 } else {
162 postevent(so, 0, EV_WCONN);
163 wakeup((caddr_t)&so->so_timeo);
164 sorwakeup(so);
165 sowwakeup(so);
166 }
167 }
168
169 void
170 soisdisconnecting(so)
171 register struct socket *so;
172 {
173 so->so_state &= ~SS_ISCONNECTING;
174 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
175 sflt_notify(so, sock_evt_disconnecting, NULL);
176 wakeup((caddr_t)&so->so_timeo);
177 sowwakeup(so);
178 sorwakeup(so);
179 }
180
181 void
182 soisdisconnected(so)
183 register struct socket *so;
184 {
185 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
186 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
187 sflt_notify(so, sock_evt_disconnected, NULL);
188 wakeup((caddr_t)&so->so_timeo);
189 sowwakeup(so);
190 sorwakeup(so);
191 }
192
193 /*
194 * Return a random connection that hasn't been serviced yet and
195 * is eligible for discard. There is a one in qlen chance that
196 * we will return a null, saying that there are no dropable
197 * requests. In this case, the protocol specific code should drop
198 * the new request. This insures fairness.
199 *
200 * This may be used in conjunction with protocol specific queue
201 * congestion routines.
202 */
203 struct socket *
204 sodropablereq(head)
205 register struct socket *head;
206 {
207 struct socket *so, *sonext = NULL;
208 unsigned int i, j, qlen;
209 static int rnd;
210 static struct timeval old_runtime;
211 static unsigned int cur_cnt, old_cnt;
212 struct timeval tv;
213
214 microtime(&tv);
215 if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
216 old_runtime = tv;
217 old_cnt = cur_cnt / i;
218 cur_cnt = 0;
219 }
220
221 so = TAILQ_FIRST(&head->so_incomp);
222 if (!so)
223 return (NULL);
224
225 qlen = head->so_incqlen;
226 if (++cur_cnt > qlen || old_cnt > qlen) {
227 rnd = (314159 * rnd + 66329) & 0xffff;
228 j = ((qlen + 1) * rnd) >> 16;
229 //###LD To clean up
230 while (j-- && so) {
231 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
232 socket_lock(so, 1);
233 sonext = TAILQ_NEXT(so, so_list);
234 // in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0);
235 socket_unlock(so, 1);
236 so = sonext;
237 }
238 }
239
240 // if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING)
241 // return (NULL);
242 // else
243 return (so);
244 }
245
246 /*
247 * When an attempt at a new connection is noted on a socket
248 * which accepts connections, sonewconn is called. If the
249 * connection is possible (subject to space constraints, etc.)
250 * then we allocate a new structure, propoerly linked into the
251 * data structure of the original socket, and return this.
252 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
253 */
254 static struct socket *
255 sonewconn_internal(head, connstatus)
256 register struct socket *head;
257 int connstatus;
258 {
259 int error = 0;
260 register struct socket *so;
261 lck_mtx_t *mutex_held;
262
263 if (head->so_proto->pr_getlock != NULL)
264 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
265 else
266 mutex_held = head->so_proto->pr_domain->dom_mtx;
267 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
268
269 if (head->so_qlen > 3 * head->so_qlimit / 2)
270 return ((struct socket *)0);
271 so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type);
272 if (so == NULL)
273 return ((struct socket *)0);
274 /* check if head was closed during the soalloc */
275 if (head->so_proto == NULL) {
276 sodealloc(so);
277 return ((struct socket *)0);
278 }
279
280 so->so_head = head;
281 so->so_type = head->so_type;
282 so->so_options = head->so_options &~ SO_ACCEPTCONN;
283 so->so_linger = head->so_linger;
284 so->so_state = head->so_state | SS_NOFDREF;
285 so->so_proto = head->so_proto;
286 so->so_timeo = head->so_timeo;
287 so->so_pgid = head->so_pgid;
288 so->so_uid = head->so_uid;
289 so->so_usecount = 1;
290
291 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
292 sflt_termsock(so);
293 sodealloc(so);
294 return ((struct socket *)0);
295 }
296
297 /*
298 * Must be done with head unlocked to avoid deadlock for protocol with per socket mutexes.
299 */
300 if (head->so_proto->pr_unlock)
301 socket_unlock(head, 0);
302 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) {
303 sflt_termsock(so);
304 sodealloc(so);
305 if (head->so_proto->pr_unlock)
306 socket_lock(head, 0);
307 return ((struct socket *)0);
308 }
309 if (head->so_proto->pr_unlock)
310 socket_lock(head, 0);
311 #ifdef __APPLE__
312 so->so_proto->pr_domain->dom_refs++;
313 #endif
314
315 if (connstatus) {
316 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
317 so->so_state |= SS_COMP;
318 } else {
319 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
320 so->so_state |= SS_INCOMP;
321 head->so_incqlen++;
322 }
323 head->so_qlen++;
324 #ifdef __APPLE__
325 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
326 so->so_rcv.sb_so = so->so_snd.sb_so = so;
327 TAILQ_INIT(&so->so_evlist);
328
329 /* Attach socket filters for this protocol */
330 sflt_initsock(so);
331 #endif
332 if (connstatus) {
333 so->so_state |= connstatus;
334 sorwakeup(head);
335 wakeup((caddr_t)&head->so_timeo);
336 }
337 return (so);
338 }
339
340
341 struct socket *
342 sonewconn(
343 struct socket *head,
344 int connstatus,
345 const struct sockaddr *from)
346 {
347 int error = 0;
348 struct socket_filter_entry *filter;
349 int filtered = 0;
350
351 error = 0;
352 for (filter = head->so_filt; filter && (error == 0);
353 filter = filter->sfe_next_onsocket) {
354 if (filter->sfe_filter->sf_filter.sf_connect_in) {
355 if (filtered == 0) {
356 filtered = 1;
357 sflt_use(head);
358 socket_unlock(head, 0);
359 }
360 error = filter->sfe_filter->sf_filter.sf_connect_in(
361 filter->sfe_cookie, head, from);
362 }
363 }
364 if (filtered != 0) {
365 socket_lock(head, 0);
366 sflt_unuse(head);
367 }
368
369 if (error) {
370 return NULL;
371 }
372
373 return sonewconn_internal(head, connstatus);
374 }
375
376 /*
377 * Socantsendmore indicates that no more data will be sent on the
378 * socket; it would normally be applied to a socket when the user
379 * informs the system that no more data is to be sent, by the protocol
380 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
381 * will be received, and will normally be applied to the socket by a
382 * protocol when it detects that the peer will send no more data.
383 * Data queued for reading in the socket may yet be read.
384 */
385
386 void
387 socantsendmore(so)
388 struct socket *so;
389 {
390 so->so_state |= SS_CANTSENDMORE;
391 sflt_notify(so, sock_evt_cantsendmore, NULL);
392 sowwakeup(so);
393 }
394
395 void
396 socantrcvmore(so)
397 struct socket *so;
398 {
399 so->so_state |= SS_CANTRCVMORE;
400 sflt_notify(so, sock_evt_cantrecvmore, NULL);
401 sorwakeup(so);
402 }
403
404 /*
405 * Wait for data to arrive at/drain from a socket buffer.
406 */
407 int
408 sbwait(sb)
409 struct sockbuf *sb;
410 {
411 int error = 0, lr, lr_saved;
412 struct socket *so = sb->sb_so;
413 lck_mtx_t *mutex_held;
414 struct timespec ts;
415
416 #ifdef __ppc__
417 __asm__ volatile("mflr %0" : "=r" (lr));
418 lr_saved = lr;
419 #endif
420
421
422 if (so->so_proto->pr_getlock != NULL)
423 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
424 else
425 mutex_held = so->so_proto->pr_domain->dom_mtx;
426
427 sb->sb_flags |= SB_WAIT;
428
429 if (so->so_usecount < 1)
430 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
431 ts.tv_sec = sb->sb_timeo.tv_sec;
432 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
433 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
434 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
435 &ts);
436
437 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
438
439 if (so->so_usecount < 1)
440 panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount);
441
442 if ((so->so_state & SS_DRAINING)) {
443 error = EBADF;
444 }
445
446 return (error);
447 }
448
449 /*
450 * Lock a sockbuf already known to be locked;
451 * return any error returned from sleep (EINTR).
452 */
453 int
454 sb_lock(sb)
455 register struct sockbuf *sb;
456 {
457 struct socket *so = sb->sb_so;
458 lck_mtx_t * mutex_held;
459 int error = 0, lr, lr_saved;
460
461 #ifdef __ppc__
462 __asm__ volatile("mflr %0" : "=r" (lr));
463 lr_saved = lr;
464 #endif
465
466 if (so == NULL)
467 panic("sb_lock: null so back pointer sb=%x\n", sb);
468
469 while (sb->sb_flags & SB_LOCK) {
470 sb->sb_flags |= SB_WANT;
471 if (so->so_proto->pr_getlock != NULL)
472 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
473 else
474 mutex_held = so->so_proto->pr_domain->dom_mtx;
475 if (so->so_usecount < 1)
476 panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount);
477 error = msleep((caddr_t)&sb->sb_flags, mutex_held,
478 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0);
479 if (so->so_usecount < 1)
480 panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount);
481 if (error)
482 return (error);
483 }
484 sb->sb_flags |= SB_LOCK;
485 return (0);
486 }
487
488 /*
489 * Wakeup processes waiting on a socket buffer.
490 * Do asynchronous notification via SIGIO
491 * if the socket has the SS_ASYNC flag set.
492 */
493 void
494 sowakeup(so, sb)
495 register struct socket *so;
496 register struct sockbuf *sb;
497 {
498 struct proc *p = current_proc();
499 sb->sb_flags &= ~SB_SEL;
500 selwakeup(&sb->sb_sel);
501 if (sb->sb_flags & SB_WAIT) {
502 sb->sb_flags &= ~SB_WAIT;
503 wakeup((caddr_t)&sb->sb_cc);
504 }
505 if (so->so_state & SS_ASYNC) {
506 if (so->so_pgid < 0)
507 gsignal(-so->so_pgid, SIGIO);
508 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
509 psignal(p, SIGIO);
510 }
511 if (sb->sb_flags & SB_KNOTE) {
512 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
513 }
514 if (sb->sb_flags & SB_UPCALL) {
515 socket_unlock(so, 0);
516 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
517 socket_lock(so, 0);
518 }
519 }
520
521 /*
522 * Socket buffer (struct sockbuf) utility routines.
523 *
524 * Each socket contains two socket buffers: one for sending data and
525 * one for receiving data. Each buffer contains a queue of mbufs,
526 * information about the number of mbufs and amount of data in the
527 * queue, and other fields allowing select() statements and notification
528 * on data availability to be implemented.
529 *
530 * Data stored in a socket buffer is maintained as a list of records.
531 * Each record is a list of mbufs chained together with the m_next
532 * field. Records are chained together with the m_nextpkt field. The upper
533 * level routine soreceive() expects the following conventions to be
534 * observed when placing information in the receive buffer:
535 *
536 * 1. If the protocol requires each message be preceded by the sender's
537 * name, then a record containing that name must be present before
538 * any associated data (mbuf's must be of type MT_SONAME).
539 * 2. If the protocol supports the exchange of ``access rights'' (really
540 * just additional data associated with the message), and there are
541 * ``rights'' to be received, then a record containing this data
542 * should be present (mbuf's must be of type MT_RIGHTS).
543 * 3. If a name or rights record exists, then it must be followed by
544 * a data record, perhaps of zero length.
545 *
546 * Before using a new socket structure it is first necessary to reserve
547 * buffer space to the socket, by calling sbreserve(). This should commit
548 * some of the available buffer space in the system buffer pool for the
549 * socket (currently, it does nothing but enforce limits). The space
550 * should be released by calling sbrelease() when the socket is destroyed.
551 */
552
553 int
554 soreserve(so, sndcc, rcvcc)
555 register struct socket *so;
556 u_long sndcc, rcvcc;
557 {
558
559 if (sbreserve(&so->so_snd, sndcc) == 0)
560 goto bad;
561 if (sbreserve(&so->so_rcv, rcvcc) == 0)
562 goto bad2;
563 if (so->so_rcv.sb_lowat == 0)
564 so->so_rcv.sb_lowat = 1;
565 if (so->so_snd.sb_lowat == 0)
566 so->so_snd.sb_lowat = MCLBYTES;
567 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
568 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
569 return (0);
570 bad2:
571 #ifdef __APPLE__
572 selthreadclear(&so->so_snd.sb_sel);
573 #endif
574 sbrelease(&so->so_snd);
575 bad:
576 return (ENOBUFS);
577 }
578
579 /*
580 * Allot mbufs to a sockbuf.
581 * Attempt to scale mbmax so that mbcnt doesn't become limiting
582 * if buffering efficiency is near the normal case.
583 */
584 int
585 sbreserve(sb, cc)
586 struct sockbuf *sb;
587 u_long cc;
588 {
589 if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
590 return (0);
591 sb->sb_hiwat = cc;
592 sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
593 if (sb->sb_lowat > sb->sb_hiwat)
594 sb->sb_lowat = sb->sb_hiwat;
595 return (1);
596 }
597
598 /*
599 * Free mbufs held by a socket, and reserved mbuf space.
600 */
601 /* WARNING needs to do selthreadclear() before calling this */
602 void
603 sbrelease(sb)
604 struct sockbuf *sb;
605 {
606
607 sbflush(sb);
608 sb->sb_hiwat = 0;
609 sb->sb_mbmax = 0;
610
611 }
612
613 /*
614 * Routines to add and remove
615 * data from an mbuf queue.
616 *
617 * The routines sbappend() or sbappendrecord() are normally called to
618 * append new mbufs to a socket buffer, after checking that adequate
619 * space is available, comparing the function sbspace() with the amount
620 * of data to be added. sbappendrecord() differs from sbappend() in
621 * that data supplied is treated as the beginning of a new record.
622 * To place a sender's address, optional access rights, and data in a
623 * socket receive buffer, sbappendaddr() should be used. To place
624 * access rights and data in a socket receive buffer, sbappendrights()
625 * should be used. In either case, the new data begins a new record.
626 * Note that unlike sbappend() and sbappendrecord(), these routines check
627 * for the caller that there will be enough space to store the data.
628 * Each fails if there is not enough space, or if it cannot find mbufs
629 * to store additional information in.
630 *
631 * Reliable protocols may use the socket send buffer to hold data
632 * awaiting acknowledgement. Data is normally copied from a socket
633 * send buffer in a protocol with m_copy for output to a peer,
634 * and then removing the data from the socket buffer with sbdrop()
635 * or sbdroprecord() when the data is acknowledged by the peer.
636 */
637
638 /*
639 * Append mbuf chain m to the last record in the
640 * socket buffer sb. The additional space associated
641 * the mbuf chain is recorded in sb. Empty mbufs are
642 * discarded and mbufs are compacted where possible.
643 */
644 int
645 sbappend(sb, m)
646 struct sockbuf *sb;
647 struct mbuf *m;
648 {
649 register struct mbuf *n, *sb_first;
650 int result = 0;
651 int error = 0;
652 int filtered = 0;
653
654
655 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0);
656
657 if (m == 0)
658 return 0;
659
660 again:
661 sb_first = n = sb->sb_mb;
662 if (n) {
663 while (n->m_nextpkt)
664 n = n->m_nextpkt;
665 do {
666 if (n->m_flags & M_EOR) {
667 result = sbappendrecord(sb, m); /* XXXXXX!!!! */
668 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
669 return result;
670 }
671 } while (n->m_next && (n = n->m_next));
672 }
673
674 if (!filtered && (sb->sb_flags & SB_RECV) != 0) {
675 error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0, &filtered);
676 if (error) {
677 /* no data was appended, caller should not call sowakeup */
678 return 0;
679 }
680
681 /*
682 If we any filters, the socket lock was dropped. n and sb_first
683 cached data from the socket buffer. This cache is not valid
684 since we dropped the lock. We must start over. Since filtered
685 is set we won't run through the filters a second time. We just
686 set n and sb_start again.
687 */
688 if (filtered)
689 goto again;
690 }
691
692 result = sbcompress(sb, m, n);
693
694 KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0);
695
696 return result;
697 }
698
699 #ifdef SOCKBUF_DEBUG
700 void
701 sbcheck(sb)
702 register struct sockbuf *sb;
703 {
704 register struct mbuf *m;
705 register struct mbuf *n = 0;
706 register u_long len = 0, mbcnt = 0;
707 lck_mtx_t *mutex_held;
708
709 if (sb->sb_so->so_proto->pr_getlock != NULL)
710 mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0);
711 else
712 mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx;
713
714 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
715
716 if (sbchecking == 0)
717 return;
718
719 for (m = sb->sb_mb; m; m = n) {
720 n = m->m_nextpkt;
721 for (; m; m = m->m_next) {
722 len += m->m_len;
723 mbcnt += MSIZE;
724 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
725 mbcnt += m->m_ext.ext_size;
726 }
727 }
728 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
729 panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
730 mbcnt, sb->sb_mbcnt);
731 }
732 }
733 #endif
734
735 /*
736 * As above, except the mbuf chain
737 * begins a new record.
738 */
739 int
740 sbappendrecord(sb, m0)
741 register struct sockbuf *sb;
742 register struct mbuf *m0;
743 {
744 register struct mbuf *m;
745 int result = 0;
746
747 if (m0 == 0)
748 return 0;
749
750 if ((sb->sb_flags & SB_RECV) != 0) {
751 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record, NULL);
752 if (error != 0) {
753 if (error != EJUSTRETURN)
754 m_freem(m0);
755 return 0;
756 }
757 }
758
759 m = sb->sb_mb;
760 if (m)
761 while (m->m_nextpkt)
762 m = m->m_nextpkt;
763 /*
764 * Put the first mbuf on the queue.
765 * Note this permits zero length records.
766 */
767 sballoc(sb, m0);
768 if (m)
769 m->m_nextpkt = m0;
770 else
771 sb->sb_mb = m0;
772 m = m0->m_next;
773 m0->m_next = 0;
774 if (m && (m0->m_flags & M_EOR)) {
775 m0->m_flags &= ~M_EOR;
776 m->m_flags |= M_EOR;
777 }
778 return sbcompress(sb, m, m0);
779 }
780
781 /*
782 * As above except that OOB data
783 * is inserted at the beginning of the sockbuf,
784 * but after any other OOB data.
785 */
786 int
787 sbinsertoob(sb, m0)
788 struct sockbuf *sb;
789 struct mbuf *m0;
790 {
791 struct mbuf *m;
792 struct mbuf **mp;
793
794 if (m0 == 0)
795 return 0;
796
797 if ((sb->sb_flags & SB_RECV) != 0) {
798 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
799 sock_data_filt_flag_oob, NULL);
800
801 if (error) {
802 if (error != EJUSTRETURN) {
803 m_freem(m0);
804 }
805 return 0;
806 }
807 }
808
809 for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
810 m = *mp;
811 again:
812 switch (m->m_type) {
813
814 case MT_OOBDATA:
815 continue; /* WANT next train */
816
817 case MT_CONTROL:
818 m = m->m_next;
819 if (m)
820 goto again; /* inspect THIS train further */
821 }
822 break;
823 }
824 /*
825 * Put the first mbuf on the queue.
826 * Note this permits zero length records.
827 */
828 sballoc(sb, m0);
829 m0->m_nextpkt = *mp;
830 *mp = m0;
831 m = m0->m_next;
832 m0->m_next = 0;
833 if (m && (m0->m_flags & M_EOR)) {
834 m0->m_flags &= ~M_EOR;
835 m->m_flags |= M_EOR;
836 }
837 return sbcompress(sb, m, m0);
838 }
839
840 /*
841 * Append address and data, and optionally, control (ancillary) data
842 * to the receive queue of a socket. If present,
843 * m0 must include a packet header with total length.
844 * Returns 0 if no space in sockbuf or insufficient mbufs.
845 */
846 static int
847 sbappendaddr_internal(sb, asa, m0, control)
848 register struct sockbuf *sb;
849 struct sockaddr *asa;
850 struct mbuf *m0, *control;
851 {
852 register struct mbuf *m, *n;
853 int space = asa->sa_len;
854
855 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
856 panic("sbappendaddr");
857
858 if (m0)
859 space += m0->m_pkthdr.len;
860 for (n = control; n; n = n->m_next) {
861 space += n->m_len;
862 if (n->m_next == 0) /* keep pointer to last control buf */
863 break;
864 }
865 if (space > sbspace(sb))
866 return (0);
867 if (asa->sa_len > MLEN)
868 return (0);
869 MGET(m, M_DONTWAIT, MT_SONAME);
870 if (m == 0)
871 return (0);
872 m->m_len = asa->sa_len;
873 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
874 if (n)
875 n->m_next = m0; /* concatenate data to control */
876 else
877 control = m0;
878 m->m_next = control;
879 for (n = m; n; n = n->m_next)
880 sballoc(sb, n);
881 n = sb->sb_mb;
882 if (n) {
883 while (n->m_nextpkt)
884 n = n->m_nextpkt;
885 n->m_nextpkt = m;
886 } else
887 sb->sb_mb = m;
888 postevent(0,sb,EV_RWBYTES);
889 return (1);
890 }
891
892 int
893 sbappendaddr(
894 struct sockbuf* sb,
895 struct sockaddr* asa,
896 struct mbuf *m0,
897 struct mbuf *control,
898 int *error_out)
899 {
900 int result = 0;
901
902 if (error_out) *error_out = 0;
903
904 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
905 panic("sbappendaddrorfree");
906
907 /* Call socket data in filters */
908 if ((sb->sb_flags & SB_RECV) != 0) {
909 int error;
910 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
911 if (error) {
912 if (error != EJUSTRETURN) {
913 if (m0) m_freem(m0);
914 if (control) m_freem(control);
915 if (error_out) *error_out = error;
916 }
917 return 0;
918 }
919 }
920
921 result = sbappendaddr_internal(sb, asa, m0, control);
922 if (result == 0) {
923 if (m0) m_freem(m0);
924 if (control) m_freem(control);
925 if (error_out) *error_out = ENOBUFS;
926 }
927
928 return result;
929 }
930
931 static int
932 sbappendcontrol_internal(sb, m0, control)
933 struct sockbuf *sb;
934 struct mbuf *control, *m0;
935 {
936 register struct mbuf *m, *n;
937 int space = 0;
938
939 if (control == 0)
940 panic("sbappendcontrol");
941
942 for (m = control; ; m = m->m_next) {
943 space += m->m_len;
944 if (m->m_next == 0)
945 break;
946 }
947 n = m; /* save pointer to last control buffer */
948 for (m = m0; m; m = m->m_next)
949 space += m->m_len;
950 if (space > sbspace(sb))
951 return (0);
952 n->m_next = m0; /* concatenate data to control */
953 for (m = control; m; m = m->m_next)
954 sballoc(sb, m);
955 n = sb->sb_mb;
956 if (n) {
957 while (n->m_nextpkt)
958 n = n->m_nextpkt;
959 n->m_nextpkt = control;
960 } else
961 sb->sb_mb = control;
962 postevent(0,sb,EV_RWBYTES);
963 return (1);
964 }
965
966 int
967 sbappendcontrol(
968 struct sockbuf *sb,
969 struct mbuf *m0,
970 struct mbuf *control,
971 int *error_out)
972 {
973 int result = 0;
974
975 if (error_out) *error_out = 0;
976
977 if (sb->sb_flags & SB_RECV) {
978 int error;
979 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
980 if (error) {
981 if (error != EJUSTRETURN) {
982 if (m0) m_freem(m0);
983 if (control) m_freem(control);
984 if (error_out) *error_out = error;
985 }
986 return 0;
987 }
988 }
989
990 result = sbappendcontrol_internal(sb, m0, control);
991 if (result == 0) {
992 if (m0) m_freem(m0);
993 if (control) m_freem(control);
994 if (error_out) *error_out = ENOBUFS;
995 }
996
997 return result;
998 }
999
1000 /*
1001 * Compress mbuf chain m into the socket
1002 * buffer sb following mbuf n. If n
1003 * is null, the buffer is presumed empty.
1004 */
1005 static int
1006 sbcompress(sb, m, n)
1007 register struct sockbuf *sb;
1008 register struct mbuf *m, *n;
1009 {
1010 register int eor = 0;
1011 register struct mbuf *o;
1012
1013 while (m) {
1014 eor |= m->m_flags & M_EOR;
1015 if (m->m_len == 0 &&
1016 (eor == 0 ||
1017 (((o = m->m_next) || (o = n)) &&
1018 o->m_type == m->m_type))) {
1019 m = m_free(m);
1020 continue;
1021 }
1022 if (n && (n->m_flags & M_EOR) == 0 &&
1023 #ifndef __APPLE__
1024 M_WRITABLE(n) &&
1025 #endif
1026 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1027 m->m_len <= M_TRAILINGSPACE(n) &&
1028 n->m_type == m->m_type) {
1029 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1030 (unsigned)m->m_len);
1031 n->m_len += m->m_len;
1032 sb->sb_cc += m->m_len;
1033 m = m_free(m);
1034 continue;
1035 }
1036 if (n)
1037 n->m_next = m;
1038 else
1039 sb->sb_mb = m;
1040 sballoc(sb, m);
1041 n = m;
1042 m->m_flags &= ~M_EOR;
1043 m = m->m_next;
1044 n->m_next = 0;
1045 }
1046 if (eor) {
1047 if (n)
1048 n->m_flags |= eor;
1049 else
1050 printf("semi-panic: sbcompress\n");
1051 }
1052 postevent(0,sb, EV_RWBYTES);
1053 return 1;
1054 }
1055
1056 /*
1057 * Free all mbufs in a sockbuf.
1058 * Check that all resources are reclaimed.
1059 */
1060 void
1061 sbflush(sb)
1062 register struct sockbuf *sb;
1063 {
1064 if (sb->sb_so == NULL)
1065 panic ("sbflush sb->sb_so already null sb=%x\n", sb);
1066 (void)sblock(sb, M_WAIT);
1067 while (sb->sb_mbcnt) {
1068 /*
1069 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1070 * we would loop forever. Panic instead.
1071 */
1072 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
1073 break;
1074 sbdrop(sb, (int)sb->sb_cc);
1075 }
1076 if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL)
1077 panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so);
1078
1079 postevent(0, sb, EV_RWBYTES);
1080 sbunlock(sb, 1); /* keep socket locked */
1081
1082 }
1083
1084 /*
1085 * Drop data from (the front of) a sockbuf.
1086 * use m_freem_list to free the mbuf structures
1087 * under a single lock... this is done by pruning
1088 * the top of the tree from the body by keeping track
1089 * of where we get to in the tree and then zeroing the
1090 * two pertinent pointers m_nextpkt and m_next
1091 * the socket buffer is then updated to point at the new
1092 * top of the tree and the pruned area is released via
1093 * m_freem_list.
1094 */
1095 void
1096 sbdrop(sb, len)
1097 register struct sockbuf *sb;
1098 register int len;
1099 {
1100 register struct mbuf *m, *free_list, *ml;
1101 struct mbuf *next, *last;
1102
1103 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1104
1105 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1106 free_list = last = m;
1107 ml = (struct mbuf *)0;
1108
1109 while (len > 0) {
1110 if (m == 0) {
1111 if (next == 0) {
1112 /* temporarily replacing this panic with printf because
1113 * it occurs occasionally when closing a socket when there
1114 * is no harm in ignoring it. This problem will be investigated
1115 * further.
1116 */
1117 /* panic("sbdrop"); */
1118 printf("sbdrop - count not zero\n");
1119 len = 0;
1120 /* zero the counts. if we have no mbufs, we have no data (PR-2986815) */
1121 sb->sb_cc = 0;
1122 sb->sb_mbcnt = 0;
1123 break;
1124 }
1125 m = last = next;
1126 next = m->m_nextpkt;
1127 continue;
1128 }
1129 if (m->m_len > len) {
1130 m->m_len -= len;
1131 m->m_data += len;
1132 sb->sb_cc -= len;
1133 break;
1134 }
1135 len -= m->m_len;
1136 sbfree(sb, m);
1137
1138 ml = m;
1139 m = m->m_next;
1140 }
1141 while (m && m->m_len == 0) {
1142 sbfree(sb, m);
1143
1144 ml = m;
1145 m = m->m_next;
1146 }
1147 if (ml) {
1148 ml->m_next = (struct mbuf *)0;
1149 last->m_nextpkt = (struct mbuf *)0;
1150 m_freem_list(free_list);
1151 }
1152 if (m) {
1153 sb->sb_mb = m;
1154 m->m_nextpkt = next;
1155 } else
1156 sb->sb_mb = next;
1157
1158 postevent(0, sb, EV_RWBYTES);
1159
1160 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1161 }
1162
1163 /*
1164 * Drop a record off the front of a sockbuf
1165 * and move the next record to the front.
1166 */
1167 void
1168 sbdroprecord(sb)
1169 register struct sockbuf *sb;
1170 {
1171 register struct mbuf *m, *mn;
1172
1173 m = sb->sb_mb;
1174 if (m) {
1175 sb->sb_mb = m->m_nextpkt;
1176 do {
1177 sbfree(sb, m);
1178 MFREE(m, mn);
1179 m = mn;
1180 } while (m);
1181 }
1182 postevent(0, sb, EV_RWBYTES);
1183 }
1184
1185 /*
1186 * Create a "control" mbuf containing the specified data
1187 * with the specified type for presentation on a socket buffer.
1188 */
1189 struct mbuf *
1190 sbcreatecontrol(p, size, type, level)
1191 caddr_t p;
1192 register int size;
1193 int type, level;
1194 {
1195 register struct cmsghdr *cp;
1196 struct mbuf *m;
1197
1198 if (CMSG_SPACE((u_int)size) > MLEN)
1199 return ((struct mbuf *) NULL);
1200 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1201 return ((struct mbuf *) NULL);
1202 cp = mtod(m, struct cmsghdr *);
1203 /* XXX check size? */
1204 (void)memcpy(CMSG_DATA(cp), p, size);
1205 m->m_len = CMSG_SPACE(size);
1206 cp->cmsg_len = CMSG_LEN(size);
1207 cp->cmsg_level = level;
1208 cp->cmsg_type = type;
1209 return (m);
1210 }
1211
1212 /*
1213 * Some routines that return EOPNOTSUPP for entry points that are not
1214 * supported by a protocol. Fill in as needed.
1215 */
1216 int
1217 pru_abort_notsupp(struct socket *so)
1218 {
1219 return EOPNOTSUPP;
1220 }
1221
1222
1223 int
1224 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
1225 {
1226 return EOPNOTSUPP;
1227 }
1228
1229 int
1230 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
1231 {
1232 return EOPNOTSUPP;
1233 }
1234
1235 int
1236 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1237 {
1238 return EOPNOTSUPP;
1239 }
1240
1241 int
1242 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
1243 {
1244 return EOPNOTSUPP;
1245 }
1246
1247 int
1248 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
1249 {
1250 return EOPNOTSUPP;
1251 }
1252
1253 int
1254 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
1255 struct ifnet *ifp, struct proc *p)
1256 {
1257 return EOPNOTSUPP;
1258 }
1259
1260 int
1261 pru_detach_notsupp(struct socket *so)
1262 {
1263 return EOPNOTSUPP;
1264 }
1265
1266 int
1267 pru_disconnect_notsupp(struct socket *so)
1268 {
1269 return EOPNOTSUPP;
1270 }
1271
1272 int
1273 pru_listen_notsupp(struct socket *so, struct proc *p)
1274 {
1275 return EOPNOTSUPP;
1276 }
1277
1278 int
1279 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
1280 {
1281 return EOPNOTSUPP;
1282 }
1283
1284 int
1285 pru_rcvd_notsupp(struct socket *so, int flags)
1286 {
1287 return EOPNOTSUPP;
1288 }
1289
1290 int
1291 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1292 {
1293 return EOPNOTSUPP;
1294 }
1295
1296 int
1297 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1298 struct sockaddr *addr, struct mbuf *control,
1299 struct proc *p)
1300
1301 {
1302 return EOPNOTSUPP;
1303 }
1304
1305
1306 /*
1307 * This isn't really a ``null'' operation, but it's the default one
1308 * and doesn't do anything destructive.
1309 */
1310 int
1311 pru_sense_null(struct socket *so, struct stat *sb)
1312 {
1313 sb->st_blksize = so->so_snd.sb_hiwat;
1314 return 0;
1315 }
1316
1317
1318 int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1319 struct uio *uio, struct mbuf *top,
1320 struct mbuf *control, int flags)
1321
1322 {
1323 return EOPNOTSUPP;
1324 }
1325
1326 int pru_soreceive_notsupp(struct socket *so,
1327 struct sockaddr **paddr,
1328 struct uio *uio, struct mbuf **mp0,
1329 struct mbuf **controlp, int *flagsp)
1330 {
1331 return EOPNOTSUPP;
1332 }
1333
1334 int
1335
1336 pru_shutdown_notsupp(struct socket *so)
1337 {
1338 return EOPNOTSUPP;
1339 }
1340
1341 int
1342 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1343 {
1344 return EOPNOTSUPP;
1345 }
1346
1347 int pru_sosend(struct socket *so, struct sockaddr *addr,
1348 struct uio *uio, struct mbuf *top,
1349 struct mbuf *control, int flags)
1350 {
1351 return EOPNOTSUPP;
1352 }
1353
1354 int pru_soreceive(struct socket *so,
1355 struct sockaddr **paddr,
1356 struct uio *uio, struct mbuf **mp0,
1357 struct mbuf **controlp, int *flagsp)
1358 {
1359 return EOPNOTSUPP;
1360 }
1361
1362
1363 int
1364 pru_sopoll_notsupp(__unused struct socket *so, __unused int events,
1365 __unused kauth_cred_t cred, __unused void *wql)
1366 {
1367 return EOPNOTSUPP;
1368 }
1369
1370
1371 #ifdef __APPLE__
1372 /*
1373 * The following are macros on BSD and functions on Darwin
1374 */
1375
1376 /*
1377 * Do we need to notify the other side when I/O is possible?
1378 */
1379
1380 int
1381 sb_notify(struct sockbuf *sb)
1382 {
1383 return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0);
1384 }
1385
1386 /*
1387 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
1388 * This is problematical if the fields are unsigned, as the space might
1389 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
1390 * overflow and return 0. Should use "lmin" but it doesn't exist now.
1391 */
1392 long
1393 sbspace(struct sockbuf *sb)
1394 {
1395 return ((long) imin((int)(sb->sb_hiwat - sb->sb_cc),
1396 (int)(sb->sb_mbmax - sb->sb_mbcnt)));
1397 }
1398
1399 /* do we have to send all at once on a socket? */
1400 int
1401 sosendallatonce(struct socket *so)
1402 {
1403 return (so->so_proto->pr_flags & PR_ATOMIC);
1404 }
1405
1406 /* can we read something from so? */
1407 int
1408 soreadable(struct socket *so)
1409 {
1410 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
1411 (so->so_state & SS_CANTRCVMORE) ||
1412 so->so_comp.tqh_first || so->so_error);
1413 }
1414
1415 /* can we write something to so? */
1416
1417 int
1418 sowriteable(struct socket *so)
1419 {
1420 return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
1421 ((so->so_state&SS_ISCONNECTED) ||
1422 (so->so_proto->pr_flags&PR_CONNREQUIRED)==0)) ||
1423 (so->so_state & SS_CANTSENDMORE) ||
1424 so->so_error);
1425 }
1426
1427 /* adjust counters in sb reflecting allocation of m */
1428
1429 void
1430 sballoc(struct sockbuf *sb, struct mbuf *m)
1431 {
1432 sb->sb_cc += m->m_len;
1433 sb->sb_mbcnt += MSIZE;
1434 if (m->m_flags & M_EXT)
1435 sb->sb_mbcnt += m->m_ext.ext_size;
1436 }
1437
1438 /* adjust counters in sb reflecting freeing of m */
1439 void
1440 sbfree(struct sockbuf *sb, struct mbuf *m)
1441 {
1442 sb->sb_cc -= m->m_len;
1443 sb->sb_mbcnt -= MSIZE;
1444 if (m->m_flags & M_EXT)
1445 sb->sb_mbcnt -= m->m_ext.ext_size;
1446 }
1447
1448 /*
1449 * Set lock on sockbuf sb; sleep if lock is already held.
1450 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1451 * Returns error without lock if sleep is interrupted.
1452 */
1453 int
1454 sblock(struct sockbuf *sb, int wf)
1455 {
1456 return(sb->sb_flags & SB_LOCK ?
1457 ((wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK) :
1458 (sb->sb_flags |= SB_LOCK), 0);
1459 }
1460
1461 /* release lock on sockbuf sb */
1462 void
1463 sbunlock(struct sockbuf *sb, int keeplocked)
1464 {
1465 struct socket *so = sb->sb_so;
1466 int lr, lr_saved;
1467 lck_mtx_t *mutex_held;
1468
1469 #ifdef __ppc__
1470 __asm__ volatile("mflr %0" : "=r" (lr));
1471 lr_saved = lr;
1472 #endif
1473 sb->sb_flags &= ~SB_LOCK;
1474
1475 if (so->so_proto->pr_getlock != NULL)
1476 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1477 else
1478 mutex_held = so->so_proto->pr_domain->dom_mtx;
1479
1480 if (keeplocked == 0)
1481 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1482
1483 if (sb->sb_flags & SB_WANT) {
1484 sb->sb_flags &= ~SB_WANT;
1485 if (so->so_usecount < 0)
1486 panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags);
1487
1488 wakeup((caddr_t)&(sb)->sb_flags);
1489 }
1490 if (keeplocked == 0) { /* unlock on exit */
1491 so->so_usecount--;
1492 if (so->so_usecount < 0)
1493 panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags);
1494 so->reserved4= lr_saved;
1495 lck_mtx_unlock(mutex_held);
1496 }
1497 }
1498
1499 void
1500 sorwakeup(struct socket * so)
1501 {
1502 if (sb_notify(&so->so_rcv))
1503 sowakeup(so, &so->so_rcv);
1504 }
1505
1506 void
1507 sowwakeup(struct socket * so)
1508 {
1509 if (sb_notify(&so->so_snd))
1510 sowakeup(so, &so->so_snd);
1511 }
1512 #endif __APPLE__
1513
1514 /*
1515 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1516 */
1517 struct sockaddr *
1518 dup_sockaddr(sa, canwait)
1519 struct sockaddr *sa;
1520 int canwait;
1521 {
1522 struct sockaddr *sa2;
1523
1524 MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1525 canwait ? M_WAITOK : M_NOWAIT);
1526 if (sa2)
1527 bcopy(sa, sa2, sa->sa_len);
1528 return sa2;
1529 }
1530
1531 /*
1532 * Create an external-format (``xsocket'') structure using the information
1533 * in the kernel-format socket structure pointed to by so. This is done
1534 * to reduce the spew of irrelevant information over this interface,
1535 * to isolate user code from changes in the kernel structure, and
1536 * potentially to provide information-hiding if we decide that
1537 * some of this information should be hidden from users.
1538 */
1539 void
1540 sotoxsocket(struct socket *so, struct xsocket *xso)
1541 {
1542 xso->xso_len = sizeof *xso;
1543 xso->xso_so = so;
1544 xso->so_type = so->so_type;
1545 xso->so_options = so->so_options;
1546 xso->so_linger = so->so_linger;
1547 xso->so_state = so->so_state;
1548 xso->so_pcb = so->so_pcb;
1549 if (so->so_proto) {
1550 xso->xso_protocol = so->so_proto->pr_protocol;
1551 xso->xso_family = so->so_proto->pr_domain->dom_family;
1552 }
1553 else
1554 xso->xso_protocol = xso->xso_family = 0;
1555 xso->so_qlen = so->so_qlen;
1556 xso->so_incqlen = so->so_incqlen;
1557 xso->so_qlimit = so->so_qlimit;
1558 xso->so_timeo = so->so_timeo;
1559 xso->so_error = so->so_error;
1560 xso->so_pgid = so->so_pgid;
1561 xso->so_oobmark = so->so_oobmark;
1562 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1563 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1564 xso->so_uid = so->so_uid;
1565 }
1566
1567 /*
1568 * This does the same for sockbufs. Note that the xsockbuf structure,
1569 * since it is always embedded in a socket, does not include a self
1570 * pointer nor a length. We make this entry point public in case
1571 * some other mechanism needs it.
1572 */
1573 void
1574 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1575 {
1576 xsb->sb_cc = sb->sb_cc;
1577 xsb->sb_hiwat = sb->sb_hiwat;
1578 xsb->sb_mbcnt = sb->sb_mbcnt;
1579 xsb->sb_mbmax = sb->sb_mbmax;
1580 xsb->sb_lowat = sb->sb_lowat;
1581 xsb->sb_flags = sb->sb_flags;
1582 xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
1583 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
1584 xsb->sb_timeo = 1;
1585 }
1586
1587 /*
1588 * Here is the definition of some of the basic objects in the kern.ipc
1589 * branch of the MIB.
1590 */
1591 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1592
1593 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1594 static int dummy;
1595 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1596
1597 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW,
1598 &sb_max, 0, "Maximum socket buffer size");
1599 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
1600 &maxsockets, 0, "Maximum number of sockets avaliable");
1601 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1602 &sb_efficiency, 0, "");
1603 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1604