]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
xnu-792.24.17.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
57 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
58 */
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/filedesc.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/file_internal.h>
66 #include <sys/fcntl.h>
67 #include <sys/malloc.h>
68 #include <sys/mbuf.h>
69 #include <sys/domain.h>
70 #include <sys/kernel.h>
71 #include <sys/event.h>
72 #include <sys/poll.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/resourcevar.h>
77 #include <sys/signalvar.h>
78 #include <sys/sysctl.h>
79 #include <sys/uio.h>
80 #include <sys/ev.h>
81 #include <sys/kdebug.h>
82 #include <net/route.h>
83 #include <netinet/in.h>
84 #include <netinet/in_pcb.h>
85 #include <kern/zalloc.h>
86 #include <kern/locks.h>
87 #include <machine/limits.h>
88
89 int so_cache_hw = 0;
90 int so_cache_timeouts = 0;
91 int so_cache_max_freed = 0;
92 int cached_sock_count = 0;
93 struct socket *socket_cache_head = 0;
94 struct socket *socket_cache_tail = 0;
95 u_long so_cache_time = 0;
96 int so_cache_init_done = 0;
97 struct zone *so_cache_zone;
98 extern int get_inpcb_str_size();
99 extern int get_tcp_str_size();
100
101 static lck_grp_t *so_cache_mtx_grp;
102 static lck_attr_t *so_cache_mtx_attr;
103 static lck_grp_attr_t *so_cache_mtx_grp_attr;
104 lck_mtx_t *so_cache_mtx;
105
106 #include <machine/limits.h>
107
108 static void filt_sordetach(struct knote *kn);
109 static int filt_soread(struct knote *kn, long hint);
110 static void filt_sowdetach(struct knote *kn);
111 static int filt_sowrite(struct knote *kn, long hint);
112 static int filt_solisten(struct knote *kn, long hint);
113
114 static struct filterops solisten_filtops =
115 { 1, NULL, filt_sordetach, filt_solisten };
116 static struct filterops soread_filtops =
117 { 1, NULL, filt_sordetach, filt_soread };
118 static struct filterops sowrite_filtops =
119 { 1, NULL, filt_sowdetach, filt_sowrite };
120
121 #define EVEN_MORE_LOCKING_DEBUG 0
122 int socket_debug = 0;
123 int socket_zone = M_SOCKET;
124 so_gen_t so_gencnt; /* generation count for sockets */
125
126 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
127 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
128
129 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
130 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
131 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
132 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
133 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
134 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
135 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
136
137 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
138
139
140 SYSCTL_DECL(_kern_ipc);
141
142 static int somaxconn = SOMAXCONN;
143 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
144 0, "");
145
146 /* Should we get a maximum also ??? */
147 static int sosendmaxchain = 65536;
148 static int sosendminchain = 16384;
149 static int sorecvmincopy = 16384;
150 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
151 0, "");
152 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
153 0, "");
154
155 void so_cache_timer();
156
157 /*
158 * Socket operation routines.
159 * These routines are called by the routines in
160 * sys_socket.c or from a system process, and
161 * implement the semantics of socket operations by
162 * switching out to the protocol specific routines.
163 */
164
165 #ifdef __APPLE__
166
167 vm_size_t so_cache_zone_element_size;
168
169 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
170
171
172 void socketinit()
173 {
174 vm_size_t str_size;
175
176 if (so_cache_init_done) {
177 printf("socketinit: already called...\n");
178 return;
179 }
180
181 /*
182 * allocate lock group attribute and group for socket cache mutex
183 */
184 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
185 lck_grp_attr_setdefault(so_cache_mtx_grp_attr);
186
187 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
188
189 /*
190 * allocate the lock attribute for socket cache mutex
191 */
192 so_cache_mtx_attr = lck_attr_alloc_init();
193 lck_attr_setdefault(so_cache_mtx_attr);
194
195 so_cache_init_done = 1;
196
197 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
198
199 if (so_cache_mtx == NULL)
200 return; /* we're hosed... */
201
202 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
203 get_inpcb_str_size() + 4 +
204 get_tcp_str_size());
205 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
206 #if TEMPDEBUG
207 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
208 #endif
209 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
210
211 so_cache_zone_element_size = str_size;
212
213 sflt_init();
214
215 }
216
217 void cached_sock_alloc(so, waitok)
218 struct socket **so;
219 int waitok;
220
221 {
222 caddr_t temp;
223 register u_long offset;
224
225
226 lck_mtx_lock(so_cache_mtx);
227
228 if (cached_sock_count) {
229 cached_sock_count--;
230 *so = socket_cache_head;
231 if (*so == 0)
232 panic("cached_sock_alloc: cached sock is null");
233
234 socket_cache_head = socket_cache_head->cache_next;
235 if (socket_cache_head)
236 socket_cache_head->cache_prev = 0;
237 else
238 socket_cache_tail = 0;
239
240 lck_mtx_unlock(so_cache_mtx);
241
242 temp = (*so)->so_saved_pcb;
243 bzero((caddr_t)*so, sizeof(struct socket));
244 #if TEMPDEBUG
245 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
246 cached_sock_count);
247 #endif
248 (*so)->so_saved_pcb = temp;
249 (*so)->cached_in_sock_layer = 1;
250
251 }
252 else {
253 #if TEMPDEBUG
254 kprintf("Allocating cached sock %x from memory\n", *so);
255 #endif
256
257 lck_mtx_unlock(so_cache_mtx);
258
259 if (waitok)
260 *so = (struct socket *) zalloc(so_cache_zone);
261 else
262 *so = (struct socket *) zalloc_noblock(so_cache_zone);
263
264 if (*so == 0)
265 return;
266
267 bzero((caddr_t)*so, sizeof(struct socket));
268
269 /*
270 * Define offsets for extra structures into our single block of
271 * memory. Align extra structures on longword boundaries.
272 */
273
274
275 offset = (u_long) *so;
276 offset += sizeof(struct socket);
277 if (offset & 0x3) {
278 offset += 4;
279 offset &= 0xfffffffc;
280 }
281 (*so)->so_saved_pcb = (caddr_t) offset;
282 offset += get_inpcb_str_size();
283 if (offset & 0x3) {
284 offset += 4;
285 offset &= 0xfffffffc;
286 }
287
288 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
289 #if TEMPDEBUG
290 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
291 (*so)->so_saved_pcb,
292 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
293 #endif
294 }
295
296 (*so)->cached_in_sock_layer = 1;
297 }
298
299
300 void cached_sock_free(so)
301 struct socket *so;
302 {
303
304 lck_mtx_lock(so_cache_mtx);
305
306 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
307 --cached_sock_count;
308 lck_mtx_unlock(so_cache_mtx);
309 #if TEMPDEBUG
310 kprintf("Freeing overflowed cached socket %x\n", so);
311 #endif
312 zfree(so_cache_zone, so);
313 }
314 else {
315 #if TEMPDEBUG
316 kprintf("Freeing socket %x into cache\n", so);
317 #endif
318 if (so_cache_hw < cached_sock_count)
319 so_cache_hw = cached_sock_count;
320
321 so->cache_next = socket_cache_head;
322 so->cache_prev = 0;
323 if (socket_cache_head)
324 socket_cache_head->cache_prev = so;
325 else
326 socket_cache_tail = so;
327
328 so->cache_timestamp = so_cache_time;
329 socket_cache_head = so;
330 lck_mtx_unlock(so_cache_mtx);
331 }
332
333 #if TEMPDEBUG
334 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
335 #endif
336
337
338 }
339
340
341 void so_cache_timer()
342 {
343 register struct socket *p;
344 register int n_freed = 0;
345
346
347 lck_mtx_lock(so_cache_mtx);
348
349 ++so_cache_time;
350
351 while ( (p = socket_cache_tail) )
352 {
353 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
354 break;
355
356 so_cache_timeouts++;
357
358 if ( (socket_cache_tail = p->cache_prev) )
359 p->cache_prev->cache_next = 0;
360 if (--cached_sock_count == 0)
361 socket_cache_head = 0;
362
363
364 zfree(so_cache_zone, p);
365
366 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
367 {
368 so_cache_max_freed++;
369 break;
370 }
371 }
372 lck_mtx_unlock(so_cache_mtx);
373
374 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
375
376
377 }
378 #endif /* __APPLE__ */
379
380 /*
381 * Get a socket structure from our zone, and initialize it.
382 * We don't implement `waitok' yet (see comments in uipc_domain.c).
383 * Note that it would probably be better to allocate socket
384 * and PCB at the same time, but I'm not convinced that all
385 * the protocols can be easily modified to do this.
386 */
387 struct socket *
388 soalloc(waitok, dom, type)
389 int waitok;
390 int dom;
391 int type;
392 {
393 struct socket *so;
394
395 if ((dom == PF_INET) && (type == SOCK_STREAM))
396 cached_sock_alloc(&so, waitok);
397 else
398 {
399 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
400 if (so)
401 bzero(so, sizeof *so);
402 }
403 /* XXX race condition for reentrant kernel */
404 //###LD Atomic add for so_gencnt
405 if (so) {
406 so->so_gencnt = ++so_gencnt;
407 so->so_zone = socket_zone;
408 }
409
410 return so;
411 }
412
413 int
414 socreate(dom, aso, type, proto)
415 int dom;
416 struct socket **aso;
417 register int type;
418 int proto;
419 {
420 struct proc *p = current_proc();
421 register struct protosw *prp;
422 register struct socket *so;
423 register int error = 0;
424 #if TCPDEBUG
425 extern int tcpconsdebug;
426 #endif
427 if (proto)
428 prp = pffindproto(dom, proto, type);
429 else
430 prp = pffindtype(dom, type);
431
432 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
433 return (EPROTONOSUPPORT);
434 #ifndef __APPLE__
435
436 if (p->p_prison && jail_socket_unixiproute_only &&
437 prp->pr_domain->dom_family != PF_LOCAL &&
438 prp->pr_domain->dom_family != PF_INET &&
439 prp->pr_domain->dom_family != PF_ROUTE) {
440 return (EPROTONOSUPPORT);
441 }
442
443 #endif
444 if (prp->pr_type != type)
445 return (EPROTOTYPE);
446 so = soalloc(p != 0, dom, type);
447 if (so == 0)
448 return (ENOBUFS);
449
450 TAILQ_INIT(&so->so_incomp);
451 TAILQ_INIT(&so->so_comp);
452 so->so_type = type;
453
454 #ifdef __APPLE__
455 if (p != 0) {
456 so->so_uid = kauth_cred_getuid(kauth_cred_get());
457 if (!suser(kauth_cred_get(),NULL))
458 so->so_state = SS_PRIV;
459 }
460 #else
461 so->so_cred = kauth_cred_get_with_ref();
462 #endif
463 so->so_proto = prp;
464 #ifdef __APPLE__
465 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
466 so->so_rcv.sb_so = so->so_snd.sb_so = so;
467 #endif
468
469 //### Attachement will create the per pcb lock if necessary and increase refcount
470 so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */
471
472 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
473 if (error) {
474 /*
475 * Warning:
476 * If so_pcb is not zero, the socket will be leaked,
477 * so protocol attachment handler must be coded carefuly
478 */
479 so->so_state |= SS_NOFDREF;
480 so->so_usecount--;
481 sofreelastref(so, 1); /* will deallocate the socket */
482 return (error);
483 }
484 #ifdef __APPLE__
485 prp->pr_domain->dom_refs++;
486 TAILQ_INIT(&so->so_evlist);
487
488 /* Attach socket filters for this protocol */
489 sflt_initsock(so);
490 #if TCPDEBUG
491 if (tcpconsdebug == 2)
492 so->so_options |= SO_DEBUG;
493 #endif
494 #endif
495
496 *aso = so;
497 return (0);
498 }
499
500 int
501 sobind(so, nam)
502 struct socket *so;
503 struct sockaddr *nam;
504
505 {
506 struct proc *p = current_proc();
507 int error = 0;
508 struct socket_filter_entry *filter;
509 int filtered = 0;
510
511 socket_lock(so, 1);
512
513 /* Socket filter */
514 error = 0;
515 for (filter = so->so_filt; filter && (error == 0);
516 filter = filter->sfe_next_onsocket) {
517 if (filter->sfe_filter->sf_filter.sf_bind) {
518 if (filtered == 0) {
519 filtered = 1;
520 sflt_use(so);
521 socket_unlock(so, 0);
522 }
523 error = filter->sfe_filter->sf_filter.sf_bind(
524 filter->sfe_cookie, so, nam);
525 }
526 }
527 if (filtered != 0) {
528 socket_lock(so, 0);
529 sflt_unuse(so);
530 }
531 /* End socket filter */
532
533 if (error == 0)
534 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
535
536 socket_unlock(so, 1);
537
538 if (error == EJUSTRETURN)
539 error = 0;
540
541 return (error);
542 }
543
544 void
545 sodealloc(so)
546 struct socket *so;
547 {
548 so->so_gencnt = ++so_gencnt;
549
550 #ifndef __APPLE__
551 if (so->so_rcv.sb_hiwat)
552 (void)chgsbsize(so->so_cred->cr_uidinfo,
553 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
554 if (so->so_snd.sb_hiwat)
555 (void)chgsbsize(so->so_cred->cr_uidinfo,
556 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
557 #ifdef INET
558 if (so->so_accf != NULL) {
559 if (so->so_accf->so_accept_filter != NULL &&
560 so->so_accf->so_accept_filter->accf_destroy != NULL) {
561 so->so_accf->so_accept_filter->accf_destroy(so);
562 }
563 if (so->so_accf->so_accept_filter_str != NULL)
564 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
565 FREE(so->so_accf, M_ACCF);
566 }
567 #endif /* INET */
568 kauth_cred_rele(so->so_cred);
569 zfreei(so->so_zone, so);
570 #else
571 if (so->cached_in_sock_layer == 1)
572 cached_sock_free(so);
573 else {
574 if (so->cached_in_sock_layer == -1)
575 panic("sodealloc: double dealloc: so=%x\n", so);
576 so->cached_in_sock_layer = -1;
577 FREE_ZONE(so, sizeof(*so), so->so_zone);
578 }
579 #endif /* __APPLE__ */
580 }
581
582 int
583 solisten(so, backlog)
584 register struct socket *so;
585 int backlog;
586
587 {
588 struct proc *p = current_proc();
589 int error;
590
591 socket_lock(so, 1);
592
593 {
594 struct socket_filter_entry *filter;
595 int filtered = 0;
596 error = 0;
597 for (filter = so->so_filt; filter && (error == 0);
598 filter = filter->sfe_next_onsocket) {
599 if (filter->sfe_filter->sf_filter.sf_listen) {
600 if (filtered == 0) {
601 filtered = 1;
602 sflt_use(so);
603 socket_unlock(so, 0);
604 }
605 error = filter->sfe_filter->sf_filter.sf_listen(
606 filter->sfe_cookie, so);
607 }
608 }
609 if (filtered != 0) {
610 socket_lock(so, 0);
611 sflt_unuse(so);
612 }
613 }
614
615 if (error == 0) {
616 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
617 }
618
619 if (error) {
620 socket_unlock(so, 1);
621 if (error == EJUSTRETURN)
622 error = 0;
623 return (error);
624 }
625
626 if (TAILQ_EMPTY(&so->so_comp))
627 so->so_options |= SO_ACCEPTCONN;
628 if (backlog < 0 || backlog > somaxconn)
629 backlog = somaxconn;
630 so->so_qlimit = backlog;
631
632 socket_unlock(so, 1);
633 return (0);
634 }
635
636 void
637 sofreelastref(so, dealloc)
638 register struct socket *so;
639 int dealloc;
640 {
641 int error;
642 struct socket *head = so->so_head;
643
644 /*### Assume socket is locked */
645
646 /* Remove any filters - may be called more than once */
647 sflt_termsock(so);
648
649 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
650 #ifdef __APPLE__
651 selthreadclear(&so->so_snd.sb_sel);
652 selthreadclear(&so->so_rcv.sb_sel);
653 so->so_rcv.sb_flags &= ~SB_UPCALL;
654 so->so_snd.sb_flags &= ~SB_UPCALL;
655 #endif
656 return;
657 }
658 if (head != NULL) {
659 socket_lock(head, 1);
660 if (so->so_state & SS_INCOMP) {
661 TAILQ_REMOVE(&head->so_incomp, so, so_list);
662 head->so_incqlen--;
663 } else if (so->so_state & SS_COMP) {
664 /*
665 * We must not decommission a socket that's
666 * on the accept(2) queue. If we do, then
667 * accept(2) may hang after select(2) indicated
668 * that the listening socket was ready.
669 */
670 #ifdef __APPLE__
671 selthreadclear(&so->so_snd.sb_sel);
672 selthreadclear(&so->so_rcv.sb_sel);
673 so->so_rcv.sb_flags &= ~SB_UPCALL;
674 so->so_snd.sb_flags &= ~SB_UPCALL;
675 #endif
676 socket_unlock(head, 1);
677 return;
678 } else {
679 panic("sofree: not queued");
680 }
681 head->so_qlen--;
682 so->so_state &= ~SS_INCOMP;
683 so->so_head = NULL;
684 socket_unlock(head, 1);
685 }
686 #ifdef __APPLE__
687 selthreadclear(&so->so_snd.sb_sel);
688 sbrelease(&so->so_snd);
689 #endif
690 sorflush(so);
691
692 /* 3932268: disable upcall */
693 so->so_rcv.sb_flags &= ~SB_UPCALL;
694 so->so_snd.sb_flags &= ~SB_UPCALL;
695
696 if (dealloc)
697 sodealloc(so);
698 }
699
700 /*
701 * Close a socket on last file table reference removal.
702 * Initiate disconnect if connected.
703 * Free socket when disconnect complete.
704 */
705 int
706 soclose_locked(so)
707 register struct socket *so;
708 {
709 int error = 0;
710 lck_mtx_t * mutex_held;
711 struct timespec ts;
712
713 if (so->so_usecount == 0) {
714 panic("soclose: so=%x refcount=0\n", so);
715 }
716
717 sflt_notify(so, sock_evt_closing, NULL);
718
719 if ((so->so_options & SO_ACCEPTCONN)) {
720 struct socket *sp;
721
722 /* We do not want new connection to be added to the connection queues */
723 so->so_options &= ~SO_ACCEPTCONN;
724
725 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
726 /* A bit tricky here. We need to keep
727 * a lock if it's a protocol global lock
728 * but we want the head, not the socket locked
729 * in the case of per-socket lock...
730 */
731 if (so->so_proto->pr_getlock != NULL) {
732 socket_unlock(so, 0);
733 socket_lock(sp, 1);
734 }
735 (void) soabort(sp);
736 if (so->so_proto->pr_getlock != NULL) {
737 socket_unlock(sp, 1);
738 socket_lock(so, 0);
739 }
740 }
741
742 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
743 /* Dequeue from so_comp since sofree() won't do it */
744 TAILQ_REMOVE(&so->so_comp, sp, so_list);
745 so->so_qlen--;
746
747 if (so->so_proto->pr_getlock != NULL) {
748 socket_unlock(so, 0);
749 socket_lock(sp, 1);
750 }
751
752 sp->so_state &= ~SS_COMP;
753 sp->so_head = NULL;
754
755 (void) soabort(sp);
756 if (so->so_proto->pr_getlock != NULL) {
757 socket_unlock(sp, 1);
758 socket_lock(so, 0);
759 }
760 }
761 }
762 if (so->so_pcb == 0) {
763 /* 3915887: mark the socket as ready for dealloc */
764 so->so_flags |= SOF_PCBCLEARING;
765 goto discard;
766 }
767 if (so->so_state & SS_ISCONNECTED) {
768 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
769 error = sodisconnectlocked(so);
770 if (error)
771 goto drop;
772 }
773 if (so->so_options & SO_LINGER) {
774 if ((so->so_state & SS_ISDISCONNECTING) &&
775 (so->so_state & SS_NBIO))
776 goto drop;
777 if (so->so_proto->pr_getlock != NULL)
778 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
779 else
780 mutex_held = so->so_proto->pr_domain->dom_mtx;
781 while (so->so_state & SS_ISCONNECTED) {
782 ts.tv_sec = (so->so_linger/100);
783 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
784 error = msleep((caddr_t)&so->so_timeo, mutex_held,
785 PSOCK | PCATCH, "soclos", &ts);
786 if (error) {
787 /* It's OK when the time fires, don't report an error */
788 if (error == EWOULDBLOCK)
789 error = 0;
790 break;
791 }
792 }
793 }
794 }
795 drop:
796 if (so->so_usecount == 0)
797 panic("soclose: usecount is zero so=%x\n", so);
798 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
799 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
800 if (error == 0)
801 error = error2;
802 }
803 if (so->so_usecount <= 0)
804 panic("soclose: usecount is zero so=%x\n", so);
805 discard:
806 if (so->so_pcb && so->so_state & SS_NOFDREF)
807 panic("soclose: NOFDREF");
808 so->so_state |= SS_NOFDREF;
809 #ifdef __APPLE__
810 so->so_proto->pr_domain->dom_refs--;
811 evsofree(so);
812 #endif
813 so->so_usecount--;
814 sofree(so);
815 return (error);
816 }
817
818 int
819 soclose(so)
820 register struct socket *so;
821 {
822 int error = 0;
823 socket_lock(so, 1);
824 if (so->so_retaincnt == 0)
825 error = soclose_locked(so);
826 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
827 so->so_usecount--;
828 if (so->so_usecount < 2)
829 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
830 }
831 socket_unlock(so, 1);
832 return (error);
833 }
834
835
836 /*
837 * Must be called at splnet...
838 */
839 //#### Should already be locked
840 int
841 soabort(so)
842 struct socket *so;
843 {
844 int error;
845
846 #ifdef MORE_LOCKING_DEBUG
847 lck_mtx_t * mutex_held;
848
849 if (so->so_proto->pr_getlock != NULL)
850 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
851 else
852 mutex_held = so->so_proto->pr_domain->dom_mtx;
853 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
854 #endif
855
856 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
857 if (error) {
858 sofree(so);
859 return error;
860 }
861 return (0);
862 }
863
864 int
865 soacceptlock(so, nam, dolock)
866 register struct socket *so;
867 struct sockaddr **nam;
868 int dolock;
869 {
870 int error;
871
872 if (dolock) socket_lock(so, 1);
873
874 if ((so->so_state & SS_NOFDREF) == 0)
875 panic("soaccept: !NOFDREF");
876 so->so_state &= ~SS_NOFDREF;
877 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
878
879 if (dolock) socket_unlock(so, 1);
880 return (error);
881 }
882 int
883 soaccept(so, nam)
884 register struct socket *so;
885 struct sockaddr **nam;
886 {
887 return (soacceptlock(so, nam, 1));
888 }
889
890 int
891 soconnectlock(so, nam, dolock)
892 register struct socket *so;
893 struct sockaddr *nam;
894 int dolock;
895
896 {
897 int s;
898 int error;
899 struct proc *p = current_proc();
900
901 if (dolock) socket_lock(so, 1);
902
903 if (so->so_options & SO_ACCEPTCONN) {
904 if (dolock) socket_unlock(so, 1);
905 return (EOPNOTSUPP);
906 }
907 /*
908 * If protocol is connection-based, can only connect once.
909 * Otherwise, if connected, try to disconnect first.
910 * This allows user to disconnect by connecting to, e.g.,
911 * a null address.
912 */
913 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
914 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
915 (error = sodisconnectlocked(so))))
916 error = EISCONN;
917 else {
918 /*
919 * Run connect filter before calling protocol:
920 * - non-blocking connect returns before completion;
921 */
922 {
923 struct socket_filter_entry *filter;
924 int filtered = 0;
925 error = 0;
926 for (filter = so->so_filt; filter && (error == 0);
927 filter = filter->sfe_next_onsocket) {
928 if (filter->sfe_filter->sf_filter.sf_connect_out) {
929 if (filtered == 0) {
930 filtered = 1;
931 sflt_use(so);
932 socket_unlock(so, 0);
933 }
934 error = filter->sfe_filter->sf_filter.sf_connect_out(
935 filter->sfe_cookie, so, nam);
936 }
937 }
938 if (filtered != 0) {
939 socket_lock(so, 0);
940 sflt_unuse(so);
941 }
942 }
943 if (error) {
944 if (error == EJUSTRETURN)
945 error = 0;
946 if (dolock) socket_unlock(so, 1);
947 return error;
948 }
949
950 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
951 }
952 if (dolock) socket_unlock(so, 1);
953 return (error);
954 }
955
956 int
957 soconnect(so, nam)
958 register struct socket *so;
959 struct sockaddr *nam;
960 {
961 return (soconnectlock(so, nam, 1));
962 }
963
964 int
965 soconnect2(so1, so2)
966 register struct socket *so1;
967 struct socket *so2;
968 {
969 int error;
970 //####### Assumes so1 is already locked /
971
972 socket_lock(so2, 1);
973
974 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
975
976 socket_unlock(so2, 1);
977 return (error);
978 }
979
980
981 int
982 sodisconnectlocked(so)
983 register struct socket *so;
984 {
985 int error;
986
987 if ((so->so_state & SS_ISCONNECTED) == 0) {
988 error = ENOTCONN;
989 goto bad;
990 }
991 if (so->so_state & SS_ISDISCONNECTING) {
992 error = EALREADY;
993 goto bad;
994 }
995
996 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
997
998 if (error == 0) {
999 sflt_notify(so, sock_evt_disconnected, NULL);
1000 }
1001
1002 bad:
1003 return (error);
1004 }
1005 //### Locking version
1006 int
1007 sodisconnect(so)
1008 register struct socket *so;
1009 {
1010 int error;
1011
1012 socket_lock(so, 1);
1013 error = sodisconnectlocked(so);
1014 socket_unlock(so, 1);
1015 return(error);
1016 }
1017
1018 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1019
1020 /*
1021 * sosendcheck will lock the socket buffer if it isn't locked and
1022 * verify that there is space for the data being inserted.
1023 */
1024
1025 static int
1026 sosendcheck(
1027 struct socket *so,
1028 struct sockaddr *addr,
1029 long resid,
1030 long clen,
1031 long atomic,
1032 int flags,
1033 int *sblocked)
1034 {
1035 int error = 0;
1036 long space;
1037 int assumelock = 0;
1038
1039 restart:
1040 if (*sblocked == 0) {
1041 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1042 so->so_send_filt_thread != 0 &&
1043 so->so_send_filt_thread == current_thread()) {
1044 /*
1045 * We're being called recursively from a filter,
1046 * allow this to continue. Radar 4150520.
1047 * Don't set sblocked because we don't want
1048 * to perform an unlock later.
1049 */
1050 assumelock = 1;
1051 }
1052 else {
1053 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1054 if (error) {
1055 return error;
1056 }
1057 *sblocked = 1;
1058 }
1059 }
1060
1061 if (so->so_state & SS_CANTSENDMORE)
1062 return EPIPE;
1063
1064 if (so->so_error) {
1065 error = so->so_error;
1066 so->so_error = 0;
1067 return error;
1068 }
1069
1070 if ((so->so_state & SS_ISCONNECTED) == 0) {
1071 /*
1072 * `sendto' and `sendmsg' is allowed on a connection-
1073 * based socket if it supports implied connect.
1074 * Return ENOTCONN if not connected and no address is
1075 * supplied.
1076 */
1077 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1078 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1079 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1080 !(resid == 0 && clen != 0))
1081 return ENOTCONN;
1082 } else if (addr == 0 && !(flags&MSG_HOLD))
1083 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1084 }
1085 space = sbspace(&so->so_snd);
1086 if (flags & MSG_OOB)
1087 space += 1024;
1088 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1089 clen > so->so_snd.sb_hiwat)
1090 return EMSGSIZE;
1091 if (space < resid + clen &&
1092 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1093 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) {
1094 return EWOULDBLOCK;
1095 }
1096 sbunlock(&so->so_snd, 1);
1097 error = sbwait(&so->so_snd);
1098 if (error) {
1099 return error;
1100 }
1101 goto restart;
1102 }
1103
1104 return 0;
1105 }
1106
1107 /*
1108 * Send on a socket.
1109 * If send must go all at once and message is larger than
1110 * send buffering, then hard error.
1111 * Lock against other senders.
1112 * If must go all at once and not enough room now, then
1113 * inform user that this would block and do nothing.
1114 * Otherwise, if nonblocking, send as much as possible.
1115 * The data to be sent is described by "uio" if nonzero,
1116 * otherwise by the mbuf chain "top" (which must be null
1117 * if uio is not). Data provided in mbuf chain must be small
1118 * enough to send all at once.
1119 *
1120 * Returns nonzero on error, timeout or signal; callers
1121 * must check for short counts if EINTR/ERESTART are returned.
1122 * Data and control buffers are freed on return.
1123 * Experiment:
1124 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1125 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1126 * point at the mbuf chain being constructed and go from there.
1127 */
1128 int
1129 sosend(so, addr, uio, top, control, flags)
1130 register struct socket *so;
1131 struct sockaddr *addr;
1132 struct uio *uio;
1133 struct mbuf *top;
1134 struct mbuf *control;
1135 int flags;
1136
1137 {
1138 struct mbuf **mp;
1139 register struct mbuf *m, *freelist = NULL;
1140 register long space, len, resid;
1141 int clen = 0, error, dontroute, mlen, sendflags;
1142 int atomic = sosendallatonce(so) || top;
1143 int sblocked = 0;
1144 struct proc *p = current_proc();
1145
1146 if (uio)
1147 // LP64todo - fix this!
1148 resid = uio_resid(uio);
1149 else
1150 resid = top->m_pkthdr.len;
1151
1152 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1153 so,
1154 resid,
1155 so->so_snd.sb_cc,
1156 so->so_snd.sb_lowat,
1157 so->so_snd.sb_hiwat);
1158
1159 socket_lock(so, 1);
1160
1161 /*
1162 * In theory resid should be unsigned.
1163 * However, space must be signed, as it might be less than 0
1164 * if we over-committed, and we must use a signed comparison
1165 * of space and resid. On the other hand, a negative resid
1166 * causes us to loop sending 0-length segments to the protocol.
1167 *
1168 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1169 * type sockets since that's an error.
1170 */
1171 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1172 error = EINVAL;
1173 socket_unlock(so, 1);
1174 goto out;
1175 }
1176
1177 dontroute =
1178 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1179 (so->so_proto->pr_flags & PR_ATOMIC);
1180 if (p)
1181 p->p_stats->p_ru.ru_msgsnd++;
1182 if (control)
1183 clen = control->m_len;
1184
1185 do {
1186 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1187 if (error) {
1188 goto release;
1189 }
1190 mp = &top;
1191 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
1192
1193 do {
1194
1195 if (uio == NULL) {
1196 /*
1197 * Data is prepackaged in "top".
1198 */
1199 resid = 0;
1200 if (flags & MSG_EOR)
1201 top->m_flags |= M_EOR;
1202 } else {
1203 int chainlength;
1204 int bytes_to_copy;
1205
1206 bytes_to_copy = min(resid, space);
1207
1208 if (sosendminchain > 0) {
1209 chainlength = 0;
1210 } else
1211 chainlength = sosendmaxchain;
1212
1213 socket_unlock(so, 0);
1214
1215 do {
1216 int num_needed;
1217 int hdrs_needed = (top == 0) ? 1 : 0;
1218
1219 /*
1220 * try to maintain a local cache of mbuf clusters needed to complete this write
1221 * the list is further limited to the number that are currently needed to fill the socket
1222 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1223 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1224 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1225 * we haven't yet consumed.
1226 */
1227 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1228 num_needed = bytes_to_copy / NBPG;
1229
1230 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1231 num_needed++;
1232
1233 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1234 /* Fall back to cluster size if allocation failed */
1235 }
1236
1237 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1238 num_needed = bytes_to_copy / MCLBYTES;
1239
1240 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1241 num_needed++;
1242
1243 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1244 /* Fall back to a single mbuf if allocation failed */
1245 }
1246
1247 if (freelist == NULL) {
1248 if (top == 0)
1249 MGETHDR(freelist, M_WAIT, MT_DATA);
1250 else
1251 MGET(freelist, M_WAIT, MT_DATA);
1252
1253 if (freelist == NULL) {
1254 error = ENOBUFS;
1255 socket_lock(so, 0);
1256 goto release;
1257 }
1258 /*
1259 * For datagram protocols, leave room
1260 * for protocol headers in first mbuf.
1261 */
1262 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1263 MH_ALIGN(freelist, bytes_to_copy);
1264 }
1265 m = freelist;
1266 freelist = m->m_next;
1267 m->m_next = NULL;
1268
1269 if ((m->m_flags & M_EXT))
1270 mlen = m->m_ext.ext_size;
1271 else if ((m->m_flags & M_PKTHDR))
1272 mlen = MHLEN - m_leadingspace(m);
1273 else
1274 mlen = MLEN;
1275 len = min(mlen, bytes_to_copy);
1276
1277 chainlength += len;
1278
1279 space -= len;
1280
1281 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1282
1283 // LP64todo - fix this!
1284 resid = uio_resid(uio);
1285
1286 m->m_len = len;
1287 *mp = m;
1288 top->m_pkthdr.len += len;
1289 if (error)
1290 break;
1291 mp = &m->m_next;
1292 if (resid <= 0) {
1293 if (flags & MSG_EOR)
1294 top->m_flags |= M_EOR;
1295 break;
1296 }
1297 bytes_to_copy = min(resid, space);
1298
1299 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1300
1301 socket_lock(so, 0);
1302
1303 if (error)
1304 goto release;
1305 }
1306
1307 if (flags & (MSG_HOLD|MSG_SEND))
1308 {
1309 /* Enqueue for later, go away if HOLD */
1310 register struct mbuf *mb1;
1311 if (so->so_temp && (flags & MSG_FLUSH))
1312 {
1313 m_freem(so->so_temp);
1314 so->so_temp = NULL;
1315 }
1316 if (so->so_temp)
1317 so->so_tail->m_next = top;
1318 else
1319 so->so_temp = top;
1320 mb1 = top;
1321 while (mb1->m_next)
1322 mb1 = mb1->m_next;
1323 so->so_tail = mb1;
1324 if (flags & MSG_HOLD)
1325 {
1326 top = NULL;
1327 goto release;
1328 }
1329 top = so->so_temp;
1330 }
1331 if (dontroute)
1332 so->so_options |= SO_DONTROUTE;
1333 /* Compute flags here, for pru_send and NKEs */
1334 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1335 /*
1336 * If the user set MSG_EOF, the protocol
1337 * understands this flag and nothing left to
1338 * send then use PRU_SEND_EOF instead of PRU_SEND.
1339 */
1340 ((flags & MSG_EOF) &&
1341 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1342 (resid <= 0)) ?
1343 PRUS_EOF :
1344 /* If there is more to send set PRUS_MORETOCOME */
1345 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1346
1347 /*
1348 * Socket filter processing
1349 */
1350 {
1351 struct socket_filter_entry *filter;
1352 int filtered;
1353
1354 filtered = 0;
1355 error = 0;
1356 for (filter = so->so_filt; filter && (error == 0);
1357 filter = filter->sfe_next_onsocket) {
1358 if (filter->sfe_filter->sf_filter.sf_data_out) {
1359 int so_flags = 0;
1360 if (filtered == 0) {
1361 filtered = 1;
1362 so->so_send_filt_thread = current_thread();
1363 sflt_use(so);
1364 socket_unlock(so, 0);
1365 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1366 }
1367 error = filter->sfe_filter->sf_filter.sf_data_out(
1368 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1369 }
1370 }
1371
1372 if (filtered) {
1373 /*
1374 * At this point, we've run at least one filter.
1375 * The socket is unlocked as is the socket buffer.
1376 */
1377 socket_lock(so, 0);
1378 sflt_unuse(so);
1379 so->so_send_filt_thread = 0;
1380 if (error) {
1381 if (error == EJUSTRETURN) {
1382 error = 0;
1383 clen = 0;
1384 control = 0;
1385 top = 0;
1386 }
1387
1388 goto release;
1389 }
1390 }
1391 }
1392 /*
1393 * End Socket filter processing
1394 */
1395
1396 if (error == EJUSTRETURN) {
1397 /* A socket filter handled this data */
1398 error = 0;
1399 }
1400 else {
1401 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1402 sendflags, top, addr, control, p);
1403 }
1404 #ifdef __APPLE__
1405 if (flags & MSG_SEND)
1406 so->so_temp = NULL;
1407 #endif
1408 if (dontroute)
1409 so->so_options &= ~SO_DONTROUTE;
1410 clen = 0;
1411 control = 0;
1412 top = 0;
1413 mp = &top;
1414 if (error)
1415 goto release;
1416 } while (resid && space > 0);
1417 } while (resid);
1418
1419 release:
1420 if (sblocked)
1421 sbunlock(&so->so_snd, 0); /* will unlock socket */
1422 else
1423 socket_unlock(so, 1);
1424 out:
1425 if (top)
1426 m_freem(top);
1427 if (control)
1428 m_freem(control);
1429 if (freelist)
1430 m_freem_list(freelist);
1431
1432 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1433 so,
1434 resid,
1435 so->so_snd.sb_cc,
1436 space,
1437 error);
1438
1439 return (error);
1440 }
1441
1442 /*
1443 * Implement receive operations on a socket.
1444 * We depend on the way that records are added to the sockbuf
1445 * by sbappend*. In particular, each record (mbufs linked through m_next)
1446 * must begin with an address if the protocol so specifies,
1447 * followed by an optional mbuf or mbufs containing ancillary data,
1448 * and then zero or more mbufs of data.
1449 * In order to avoid blocking network interrupts for the entire time here,
1450 * we splx() while doing the actual copy to user space.
1451 * Although the sockbuf is locked, new data may still be appended,
1452 * and thus we must maintain consistency of the sockbuf during that time.
1453 *
1454 * The caller may receive the data as a single mbuf chain by supplying
1455 * an mbuf **mp0 for use in returning the chain. The uio is then used
1456 * only for the count in uio_resid.
1457 */
1458 int
1459 soreceive(so, psa, uio, mp0, controlp, flagsp)
1460 register struct socket *so;
1461 struct sockaddr **psa;
1462 struct uio *uio;
1463 struct mbuf **mp0;
1464 struct mbuf **controlp;
1465 int *flagsp;
1466 {
1467 register struct mbuf *m, **mp, *ml = NULL;
1468 register int flags, len, error, offset;
1469 struct protosw *pr = so->so_proto;
1470 struct mbuf *nextrecord;
1471 int moff, type = 0;
1472 // LP64todo - fix this!
1473 int orig_resid = uio_resid(uio);
1474 volatile struct mbuf *free_list;
1475 volatile int delayed_copy_len;
1476 int can_delay;
1477 int need_event;
1478 struct proc *p = current_proc();
1479
1480
1481 // LP64todo - fix this!
1482 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1483 so,
1484 uio_resid(uio),
1485 so->so_rcv.sb_cc,
1486 so->so_rcv.sb_lowat,
1487 so->so_rcv.sb_hiwat);
1488
1489 socket_lock(so, 1);
1490
1491 #ifdef MORE_LOCKING_DEBUG
1492 if (so->so_usecount == 1)
1493 panic("soreceive: so=%x no other reference on socket\n", so);
1494 #endif
1495 mp = mp0;
1496 if (psa)
1497 *psa = 0;
1498 if (controlp)
1499 *controlp = 0;
1500 if (flagsp)
1501 flags = *flagsp &~ MSG_EOR;
1502 else
1503 flags = 0;
1504 /*
1505 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1506 * regardless of the flags argument. Here is the case were
1507 * out-of-band data is not inline.
1508 */
1509 if ((flags & MSG_OOB) ||
1510 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1511 (so->so_options & SO_OOBINLINE) == 0 &&
1512 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1513 m = m_get(M_WAIT, MT_DATA);
1514 if (m == NULL) {
1515 socket_unlock(so, 1);
1516 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
1517 return (ENOBUFS);
1518 }
1519 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1520 if (error)
1521 goto bad;
1522 socket_unlock(so, 0);
1523 do {
1524 // LP64todo - fix this!
1525 error = uiomove(mtod(m, caddr_t),
1526 (int) min(uio_resid(uio), m->m_len), uio);
1527 m = m_free(m);
1528 } while (uio_resid(uio) && error == 0 && m);
1529 socket_lock(so, 0);
1530 bad:
1531 if (m)
1532 m_freem(m);
1533 #ifdef __APPLE__
1534 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1535 if (error == EWOULDBLOCK || error == EINVAL) {
1536 /*
1537 * Let's try to get normal data:
1538 * EWOULDBLOCK: out-of-band data not receive yet;
1539 * EINVAL: out-of-band data already read.
1540 */
1541 error = 0;
1542 goto nooob;
1543 } else if (error == 0 && flagsp)
1544 *flagsp |= MSG_OOB;
1545 }
1546 socket_unlock(so, 1);
1547 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1548 #endif
1549 return (error);
1550 }
1551 nooob:
1552 if (mp)
1553 *mp = (struct mbuf *)0;
1554 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1555 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1556
1557
1558 free_list = (struct mbuf *)0;
1559 delayed_copy_len = 0;
1560 restart:
1561 #ifdef MORE_LOCKING_DEBUG
1562 if (so->so_usecount <= 1)
1563 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1564 #endif
1565 /*
1566 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
1567 * and if so just return to the caller. This could happen when
1568 * soreceive() is called by a socket upcall function during the
1569 * time the socket is freed. The socket buffer would have been
1570 * locked across the upcall, therefore we cannot put this thread
1571 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
1572 * we may livelock), because the lock on the socket buffer will
1573 * only be released when the upcall routine returns to its caller.
1574 * Because the socket has been officially closed, there can be
1575 * no further read on it.
1576 */
1577 if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
1578 (SS_NOFDREF | SS_CANTRCVMORE)) {
1579 socket_unlock(so, 1);
1580 return (0);
1581 }
1582
1583 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1584 if (error) {
1585 socket_unlock(so, 1);
1586 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1587 return (error);
1588 }
1589
1590 m = so->so_rcv.sb_mb;
1591 /*
1592 * If we have less data than requested, block awaiting more
1593 * (subject to any timeout) if:
1594 * 1. the current count is less than the low water mark, or
1595 * 2. MSG_WAITALL is set, and it is possible to do the entire
1596 * receive operation at once if we block (resid <= hiwat).
1597 * 3. MSG_DONTWAIT is not set
1598 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1599 * we have to do the receive in sections, and thus risk returning
1600 * a short count if a timeout or signal occurs after we start.
1601 */
1602 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1603 so->so_rcv.sb_cc < uio_resid(uio)) &&
1604 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1605 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1606 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1607
1608 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1609 if (so->so_error) {
1610 if (m)
1611 goto dontblock;
1612 error = so->so_error;
1613 if ((flags & MSG_PEEK) == 0)
1614 so->so_error = 0;
1615 goto release;
1616 }
1617 if (so->so_state & SS_CANTRCVMORE) {
1618 if (m)
1619 goto dontblock;
1620 else
1621 goto release;
1622 }
1623 for (; m; m = m->m_next)
1624 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1625 m = so->so_rcv.sb_mb;
1626 goto dontblock;
1627 }
1628 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1629 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1630 error = ENOTCONN;
1631 goto release;
1632 }
1633 if (uio_resid(uio) == 0)
1634 goto release;
1635 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1636 error = EWOULDBLOCK;
1637 goto release;
1638 }
1639 sbunlock(&so->so_rcv, 1);
1640 #ifdef EVEN_MORE_LOCKING_DEBUG
1641 if (socket_debug)
1642 printf("Waiting for socket data\n");
1643 #endif
1644
1645 error = sbwait(&so->so_rcv);
1646 #ifdef EVEN_MORE_LOCKING_DEBUG
1647 if (socket_debug)
1648 printf("SORECEIVE - sbwait returned %d\n", error);
1649 #endif
1650 if (so->so_usecount < 1)
1651 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1652 if (error) {
1653 socket_unlock(so, 1);
1654 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1655 return (error);
1656 }
1657 goto restart;
1658 }
1659 dontblock:
1660 #ifndef __APPLE__
1661 if (uio->uio_procp)
1662 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1663 #else /* __APPLE__ */
1664 /*
1665 * 2207985
1666 * This should be uio->uio-procp; however, some callers of this
1667 * function use auto variables with stack garbage, and fail to
1668 * fill out the uio structure properly.
1669 */
1670 if (p)
1671 p->p_stats->p_ru.ru_msgrcv++;
1672 #endif /* __APPLE__ */
1673 nextrecord = m->m_nextpkt;
1674 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1675 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1676 orig_resid = 0;
1677 if (psa) {
1678 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1679 mp0 == 0);
1680 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1681 error = EWOULDBLOCK;
1682 goto release;
1683 }
1684 }
1685 if (flags & MSG_PEEK) {
1686 m = m->m_next;
1687 } else {
1688 sbfree(&so->so_rcv, m);
1689 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1690 panic("soreceive: about to create invalid socketbuf");
1691 MFREE(m, so->so_rcv.sb_mb);
1692 m = so->so_rcv.sb_mb;
1693 }
1694 }
1695 while (m && m->m_type == MT_CONTROL && error == 0) {
1696 if (flags & MSG_PEEK) {
1697 if (controlp)
1698 *controlp = m_copy(m, 0, m->m_len);
1699 m = m->m_next;
1700 } else {
1701 sbfree(&so->so_rcv, m);
1702 if (controlp) {
1703 if (pr->pr_domain->dom_externalize &&
1704 mtod(m, struct cmsghdr *)->cmsg_type ==
1705 SCM_RIGHTS) {
1706 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1707 error = (*pr->pr_domain->dom_externalize)(m);
1708 socket_lock(so, 0);
1709 }
1710 *controlp = m;
1711 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1712 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1713 so->so_rcv.sb_mb = m->m_next;
1714 m->m_next = 0;
1715 m = so->so_rcv.sb_mb;
1716 } else {
1717 MFREE(m, so->so_rcv.sb_mb);
1718 m = so->so_rcv.sb_mb;
1719 }
1720 }
1721 if (controlp) {
1722 orig_resid = 0;
1723 controlp = &(*controlp)->m_next;
1724 }
1725 }
1726 if (m) {
1727 if ((flags & MSG_PEEK) == 0)
1728 m->m_nextpkt = nextrecord;
1729 type = m->m_type;
1730 if (type == MT_OOBDATA)
1731 flags |= MSG_OOB;
1732 }
1733 moff = 0;
1734 offset = 0;
1735
1736 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
1737 can_delay = 1;
1738 else
1739 can_delay = 0;
1740
1741 need_event = 0;
1742
1743 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1744 if (m->m_type == MT_OOBDATA) {
1745 if (type != MT_OOBDATA)
1746 break;
1747 } else if (type == MT_OOBDATA)
1748 break;
1749 #ifndef __APPLE__
1750 /*
1751 * This assertion needs rework. The trouble is Appletalk is uses many
1752 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1753 * For now just remove the assertion... CSM 9/98
1754 */
1755 else
1756 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1757 ("receive 3"));
1758 #else
1759 /*
1760 * Make sure to allways set MSG_OOB event when getting
1761 * out of band data inline.
1762 */
1763 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1764 (so->so_options & SO_OOBINLINE) != 0 &&
1765 (so->so_state & SS_RCVATMARK) != 0) {
1766 flags |= MSG_OOB;
1767 }
1768 #endif
1769 so->so_state &= ~SS_RCVATMARK;
1770 // LP64todo - fix this!
1771 len = uio_resid(uio) - delayed_copy_len;
1772 if (so->so_oobmark && len > so->so_oobmark - offset)
1773 len = so->so_oobmark - offset;
1774 if (len > m->m_len - moff)
1775 len = m->m_len - moff;
1776 /*
1777 * If mp is set, just pass back the mbufs.
1778 * Otherwise copy them out via the uio, then free.
1779 * Sockbuf must be consistent here (points to current mbuf,
1780 * it points to next record) when we drop priority;
1781 * we must note any additions to the sockbuf when we
1782 * block interrupts again.
1783 */
1784 if (mp == 0) {
1785 if (can_delay && len == m->m_len) {
1786 /*
1787 * only delay the copy if we're consuming the
1788 * mbuf and we're NOT in MSG_PEEK mode
1789 * and we have enough data to make it worthwile
1790 * to drop and retake the funnel... can_delay
1791 * reflects the state of the 2 latter constraints
1792 * moff should always be zero in these cases
1793 */
1794 delayed_copy_len += len;
1795 } else {
1796
1797 if (delayed_copy_len) {
1798 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1799
1800 if (error) {
1801 goto release;
1802 }
1803 if (m != so->so_rcv.sb_mb) {
1804 /*
1805 * can only get here if MSG_PEEK is not set
1806 * therefore, m should point at the head of the rcv queue...
1807 * if it doesn't, it means something drastically changed
1808 * while we were out from behind the funnel in sodelayed_copy...
1809 * perhaps a RST on the stream... in any event, the stream has
1810 * been interrupted... it's probably best just to return
1811 * whatever data we've moved and let the caller sort it out...
1812 */
1813 break;
1814 }
1815 }
1816 socket_unlock(so, 0);
1817 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1818 socket_lock(so, 0);
1819
1820 if (error)
1821 goto release;
1822 }
1823 } else
1824 uio_setresid(uio, (uio_resid(uio) - len));
1825
1826 if (len == m->m_len - moff) {
1827 if (m->m_flags & M_EOR)
1828 flags |= MSG_EOR;
1829 if (flags & MSG_PEEK) {
1830 m = m->m_next;
1831 moff = 0;
1832 } else {
1833 nextrecord = m->m_nextpkt;
1834 sbfree(&so->so_rcv, m);
1835 m->m_nextpkt = NULL;
1836
1837 if (mp) {
1838 *mp = m;
1839 mp = &m->m_next;
1840 so->so_rcv.sb_mb = m = m->m_next;
1841 *mp = (struct mbuf *)0;
1842 } else {
1843 if (free_list == NULL)
1844 free_list = m;
1845 else
1846 ml->m_next = m;
1847 ml = m;
1848 so->so_rcv.sb_mb = m = m->m_next;
1849 ml->m_next = 0;
1850 }
1851 if (m)
1852 m->m_nextpkt = nextrecord;
1853 }
1854 } else {
1855 if (flags & MSG_PEEK)
1856 moff += len;
1857 else {
1858 if (mp)
1859 *mp = m_copym(m, 0, len, M_WAIT);
1860 m->m_data += len;
1861 m->m_len -= len;
1862 so->so_rcv.sb_cc -= len;
1863 }
1864 }
1865 if (so->so_oobmark) {
1866 if ((flags & MSG_PEEK) == 0) {
1867 so->so_oobmark -= len;
1868 if (so->so_oobmark == 0) {
1869 so->so_state |= SS_RCVATMARK;
1870 /*
1871 * delay posting the actual event until after
1872 * any delayed copy processing has finished
1873 */
1874 need_event = 1;
1875 break;
1876 }
1877 } else {
1878 offset += len;
1879 if (offset == so->so_oobmark)
1880 break;
1881 }
1882 }
1883 if (flags & MSG_EOR)
1884 break;
1885 /*
1886 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1887 * we must not quit until "uio->uio_resid == 0" or an error
1888 * termination. If a signal/timeout occurs, return
1889 * with a short count but without error.
1890 * Keep sockbuf locked against other readers.
1891 */
1892 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1893 !sosendallatonce(so) && !nextrecord) {
1894 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1895 goto release;
1896
1897 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
1898 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1899 if (sbwait(&so->so_rcv)) {
1900 error = 0;
1901 goto release;
1902 }
1903 /*
1904 * have to wait until after we get back from the sbwait to do the copy because
1905 * we will drop the funnel if we have enough data that has been delayed... by dropping
1906 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1907 * and to change the state of this socket... we're issuing the sbwait because
1908 * the socket is empty and we're expecting the netisr thread to wake us up when more
1909 * packets arrive... if we allow that processing to happen and then sbwait, we
1910 * could stall forever with packets sitting in the socket if no further packets
1911 * arrive from the remote side.
1912 *
1913 * we want to copy before we've collected all the data to satisfy this request to
1914 * allow the copy to overlap the incoming packet processing on an MP system
1915 */
1916 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1917
1918 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1919
1920 if (error)
1921 goto release;
1922 }
1923 m = so->so_rcv.sb_mb;
1924 if (m) {
1925 nextrecord = m->m_nextpkt;
1926 }
1927 }
1928 }
1929 #ifdef MORE_LOCKING_DEBUG
1930 if (so->so_usecount <= 1)
1931 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1932 #endif
1933
1934 if (m && pr->pr_flags & PR_ATOMIC) {
1935 #ifdef __APPLE__
1936 if (so->so_options & SO_DONTTRUNC)
1937 flags |= MSG_RCVMORE;
1938 else {
1939 #endif
1940 flags |= MSG_TRUNC;
1941 if ((flags & MSG_PEEK) == 0)
1942 (void) sbdroprecord(&so->so_rcv);
1943 #ifdef __APPLE__
1944 }
1945 #endif
1946 }
1947 if ((flags & MSG_PEEK) == 0) {
1948 if (m == 0)
1949 so->so_rcv.sb_mb = nextrecord;
1950 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1951 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1952 }
1953 #ifdef __APPLE__
1954 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1955 flags |= MSG_HAVEMORE;
1956
1957 if (delayed_copy_len) {
1958 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1959
1960 if (error)
1961 goto release;
1962 }
1963 if (free_list) {
1964 m_freem_list((struct mbuf *)free_list);
1965 free_list = (struct mbuf *)0;
1966 }
1967 if (need_event)
1968 postevent(so, 0, EV_OOB);
1969 #endif
1970 if (orig_resid == uio_resid(uio) && orig_resid &&
1971 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1972 sbunlock(&so->so_rcv, 1);
1973 goto restart;
1974 }
1975
1976 if (flagsp)
1977 *flagsp |= flags;
1978 release:
1979 #ifdef MORE_LOCKING_DEBUG
1980 if (so->so_usecount <= 1)
1981 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1982 #endif
1983 if (delayed_copy_len) {
1984 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1985 }
1986 if (free_list) {
1987 m_freem_list((struct mbuf *)free_list);
1988 }
1989 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1990
1991 // LP64todo - fix this!
1992 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1993 so,
1994 uio_resid(uio),
1995 so->so_rcv.sb_cc,
1996 0,
1997 error);
1998
1999 return (error);
2000 }
2001
2002
2003 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
2004 {
2005 int error = 0;
2006 struct mbuf *m;
2007
2008 m = *free_list;
2009
2010 socket_unlock(so, 0);
2011
2012 while (m && error == 0) {
2013
2014 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
2015
2016 m = m->m_next;
2017 }
2018 m_freem_list(*free_list);
2019
2020 *free_list = (struct mbuf *)NULL;
2021 *resid = 0;
2022
2023 socket_lock(so, 0);
2024
2025 return (error);
2026 }
2027
2028
2029 int
2030 soshutdown(so, how)
2031 register struct socket *so;
2032 register int how;
2033 {
2034 register struct protosw *pr = so->so_proto;
2035 int ret;
2036
2037 socket_lock(so, 1);
2038
2039 sflt_notify(so, sock_evt_shutdown, &how);
2040
2041 if (how != SHUT_WR) {
2042 sorflush(so);
2043 postevent(so, 0, EV_RCLOSED);
2044 }
2045 if (how != SHUT_RD) {
2046 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2047 postevent(so, 0, EV_WCLOSED);
2048 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2049 socket_unlock(so, 1);
2050 return(ret);
2051 }
2052
2053 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2054 socket_unlock(so, 1);
2055 return (0);
2056 }
2057
2058 void
2059 sorflush(so)
2060 register struct socket *so;
2061 {
2062 register struct sockbuf *sb = &so->so_rcv;
2063 register struct protosw *pr = so->so_proto;
2064 struct sockbuf asb;
2065
2066 #ifdef MORE_LOCKING_DEBUG
2067 lck_mtx_t * mutex_held;
2068
2069 if (so->so_proto->pr_getlock != NULL)
2070 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2071 else
2072 mutex_held = so->so_proto->pr_domain->dom_mtx;
2073 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2074 #endif
2075
2076 sflt_notify(so, sock_evt_flush_read, NULL);
2077
2078 sb->sb_flags |= SB_NOINTR;
2079 (void) sblock(sb, M_WAIT);
2080 socantrcvmore(so);
2081 sbunlock(sb, 1);
2082 #ifdef __APPLE__
2083 selthreadclear(&sb->sb_sel);
2084 #endif
2085 asb = *sb;
2086 bzero((caddr_t)sb, sizeof (*sb));
2087 sb->sb_so = so; /* reestablish link to socket */
2088 if (asb.sb_flags & SB_KNOTE) {
2089 sb->sb_sel.si_note = asb.sb_sel.si_note;
2090 sb->sb_flags = SB_KNOTE;
2091 }
2092 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2093 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2094 sbrelease(&asb);
2095 }
2096
2097 /*
2098 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2099 * an additional variant to handle the case where the option value needs
2100 * to be some kind of integer, but not a specific size.
2101 * In addition to their use here, these functions are also called by the
2102 * protocol-level pr_ctloutput() routines.
2103 */
2104 int
2105 sooptcopyin(sopt, buf, len, minlen)
2106 struct sockopt *sopt;
2107 void *buf;
2108 size_t len;
2109 size_t minlen;
2110 {
2111 size_t valsize;
2112
2113 /*
2114 * If the user gives us more than we wanted, we ignore it,
2115 * but if we don't get the minimum length the caller
2116 * wants, we return EINVAL. On success, sopt->sopt_valsize
2117 * is set to however much we actually retrieved.
2118 */
2119 if ((valsize = sopt->sopt_valsize) < minlen)
2120 return EINVAL;
2121 if (valsize > len)
2122 sopt->sopt_valsize = valsize = len;
2123
2124 if (sopt->sopt_p != 0)
2125 return (copyin(sopt->sopt_val, buf, valsize));
2126
2127 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
2128 return 0;
2129 }
2130
2131 int
2132 sosetopt(so, sopt)
2133 struct socket *so;
2134 struct sockopt *sopt;
2135 {
2136 int error, optval;
2137 struct linger l;
2138 struct timeval tv;
2139 short val;
2140
2141 socket_lock(so, 1);
2142
2143 if (sopt->sopt_dir != SOPT_SET) {
2144 sopt->sopt_dir = SOPT_SET;
2145 }
2146
2147 {
2148 struct socket_filter_entry *filter;
2149 int filtered = 0;
2150 error = 0;
2151 for (filter = so->so_filt; filter && (error == 0);
2152 filter = filter->sfe_next_onsocket) {
2153 if (filter->sfe_filter->sf_filter.sf_setoption) {
2154 if (filtered == 0) {
2155 filtered = 1;
2156 sflt_use(so);
2157 socket_unlock(so, 0);
2158 }
2159 error = filter->sfe_filter->sf_filter.sf_setoption(
2160 filter->sfe_cookie, so, sopt);
2161 }
2162 }
2163
2164 if (filtered != 0) {
2165 socket_lock(so, 0);
2166 sflt_unuse(so);
2167
2168 if (error) {
2169 if (error == EJUSTRETURN)
2170 error = 0;
2171 goto bad;
2172 }
2173 }
2174 }
2175
2176 error = 0;
2177 if (sopt->sopt_level != SOL_SOCKET) {
2178 if (so->so_proto && so->so_proto->pr_ctloutput) {
2179 error = (*so->so_proto->pr_ctloutput)
2180 (so, sopt);
2181 socket_unlock(so, 1);
2182 return (error);
2183 }
2184 error = ENOPROTOOPT;
2185 } else {
2186 switch (sopt->sopt_name) {
2187 case SO_LINGER:
2188 case SO_LINGER_SEC:
2189 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2190 if (error)
2191 goto bad;
2192
2193 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
2194 if (l.l_onoff)
2195 so->so_options |= SO_LINGER;
2196 else
2197 so->so_options &= ~SO_LINGER;
2198 break;
2199
2200 case SO_DEBUG:
2201 case SO_KEEPALIVE:
2202 case SO_DONTROUTE:
2203 case SO_USELOOPBACK:
2204 case SO_BROADCAST:
2205 case SO_REUSEADDR:
2206 case SO_REUSEPORT:
2207 case SO_OOBINLINE:
2208 case SO_TIMESTAMP:
2209 #ifdef __APPLE__
2210 case SO_DONTTRUNC:
2211 case SO_WANTMORE:
2212 case SO_WANTOOBFLAG:
2213 #endif
2214 error = sooptcopyin(sopt, &optval, sizeof optval,
2215 sizeof optval);
2216 if (error)
2217 goto bad;
2218 if (optval)
2219 so->so_options |= sopt->sopt_name;
2220 else
2221 so->so_options &= ~sopt->sopt_name;
2222 break;
2223
2224 case SO_SNDBUF:
2225 case SO_RCVBUF:
2226 case SO_SNDLOWAT:
2227 case SO_RCVLOWAT:
2228 error = sooptcopyin(sopt, &optval, sizeof optval,
2229 sizeof optval);
2230 if (error)
2231 goto bad;
2232
2233 /*
2234 * Values < 1 make no sense for any of these
2235 * options, so disallow them.
2236 */
2237 if (optval < 1) {
2238 error = EINVAL;
2239 goto bad;
2240 }
2241
2242 switch (sopt->sopt_name) {
2243 case SO_SNDBUF:
2244 case SO_RCVBUF:
2245 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2246 &so->so_snd : &so->so_rcv,
2247 (u_long) optval) == 0) {
2248 error = ENOBUFS;
2249 goto bad;
2250 }
2251 break;
2252
2253 /*
2254 * Make sure the low-water is never greater than
2255 * the high-water.
2256 */
2257 case SO_SNDLOWAT:
2258 so->so_snd.sb_lowat =
2259 (optval > so->so_snd.sb_hiwat) ?
2260 so->so_snd.sb_hiwat : optval;
2261 break;
2262 case SO_RCVLOWAT:
2263 so->so_rcv.sb_lowat =
2264 (optval > so->so_rcv.sb_hiwat) ?
2265 so->so_rcv.sb_hiwat : optval;
2266 break;
2267 }
2268 break;
2269
2270 case SO_SNDTIMEO:
2271 case SO_RCVTIMEO:
2272 error = sooptcopyin(sopt, &tv, sizeof tv,
2273 sizeof tv);
2274 if (error)
2275 goto bad;
2276
2277 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
2278 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2279 error = EDOM;
2280 goto bad;
2281 }
2282
2283 switch (sopt->sopt_name) {
2284 case SO_SNDTIMEO:
2285 so->so_snd.sb_timeo = tv;
2286 break;
2287 case SO_RCVTIMEO:
2288 so->so_rcv.sb_timeo = tv;
2289 break;
2290 }
2291 break;
2292
2293 case SO_NKE:
2294 {
2295 struct so_nke nke;
2296
2297 error = sooptcopyin(sopt, &nke,
2298 sizeof nke, sizeof nke);
2299 if (error)
2300 goto bad;
2301
2302 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
2303 break;
2304 }
2305
2306 case SO_NOSIGPIPE:
2307 error = sooptcopyin(sopt, &optval, sizeof optval,
2308 sizeof optval);
2309 if (error)
2310 goto bad;
2311 if (optval)
2312 so->so_flags |= SOF_NOSIGPIPE;
2313 else
2314 so->so_flags &= ~SOF_NOSIGPIPE;
2315
2316 break;
2317
2318 case SO_NOADDRERR:
2319 error = sooptcopyin(sopt, &optval, sizeof optval,
2320 sizeof optval);
2321 if (error)
2322 goto bad;
2323 if (optval)
2324 so->so_flags |= SOF_NOADDRAVAIL;
2325 else
2326 so->so_flags &= ~SOF_NOADDRAVAIL;
2327
2328 break;
2329
2330 default:
2331 error = ENOPROTOOPT;
2332 break;
2333 }
2334 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2335 (void) ((*so->so_proto->pr_ctloutput)
2336 (so, sopt));
2337 }
2338 }
2339 bad:
2340 socket_unlock(so, 1);
2341 return (error);
2342 }
2343
2344 /* Helper routine for getsockopt */
2345 int
2346 sooptcopyout(sopt, buf, len)
2347 struct sockopt *sopt;
2348 void *buf;
2349 size_t len;
2350 {
2351 int error;
2352 size_t valsize;
2353
2354 error = 0;
2355
2356 /*
2357 * Documented get behavior is that we always return a value,
2358 * possibly truncated to fit in the user's buffer.
2359 * Traditional behavior is that we always tell the user
2360 * precisely how much we copied, rather than something useful
2361 * like the total amount we had available for her.
2362 * Note that this interface is not idempotent; the entire answer must
2363 * generated ahead of time.
2364 */
2365 valsize = min(len, sopt->sopt_valsize);
2366 sopt->sopt_valsize = valsize;
2367 if (sopt->sopt_val != USER_ADDR_NULL) {
2368 if (sopt->sopt_p != 0)
2369 error = copyout(buf, sopt->sopt_val, valsize);
2370 else
2371 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
2372 }
2373 return error;
2374 }
2375
2376 int
2377 sogetopt(so, sopt)
2378 struct socket *so;
2379 struct sockopt *sopt;
2380 {
2381 int error, optval;
2382 struct linger l;
2383 struct timeval tv;
2384
2385 if (sopt->sopt_dir != SOPT_GET) {
2386 sopt->sopt_dir = SOPT_GET;
2387 }
2388
2389 socket_lock(so, 1);
2390
2391 {
2392 struct socket_filter_entry *filter;
2393 int filtered = 0;
2394 error = 0;
2395 for (filter = so->so_filt; filter && (error == 0);
2396 filter = filter->sfe_next_onsocket) {
2397 if (filter->sfe_filter->sf_filter.sf_getoption) {
2398 if (filtered == 0) {
2399 filtered = 1;
2400 sflt_use(so);
2401 socket_unlock(so, 0);
2402 }
2403 error = filter->sfe_filter->sf_filter.sf_getoption(
2404 filter->sfe_cookie, so, sopt);
2405 }
2406 }
2407 if (filtered != 0) {
2408 socket_lock(so, 0);
2409 sflt_unuse(so);
2410
2411 if (error) {
2412 if (error == EJUSTRETURN)
2413 error = 0;
2414 socket_unlock(so, 1);
2415 return error;
2416 }
2417 }
2418 }
2419
2420 error = 0;
2421 if (sopt->sopt_level != SOL_SOCKET) {
2422 if (so->so_proto && so->so_proto->pr_ctloutput) {
2423 error = (*so->so_proto->pr_ctloutput)
2424 (so, sopt);
2425 socket_unlock(so, 1);
2426 return (error);
2427 } else {
2428 socket_unlock(so, 1);
2429 return (ENOPROTOOPT);
2430 }
2431 } else {
2432 switch (sopt->sopt_name) {
2433 case SO_LINGER:
2434 case SO_LINGER_SEC:
2435 l.l_onoff = so->so_options & SO_LINGER;
2436 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2437 so->so_linger / hz;
2438 error = sooptcopyout(sopt, &l, sizeof l);
2439 break;
2440
2441 case SO_USELOOPBACK:
2442 case SO_DONTROUTE:
2443 case SO_DEBUG:
2444 case SO_KEEPALIVE:
2445 case SO_REUSEADDR:
2446 case SO_REUSEPORT:
2447 case SO_BROADCAST:
2448 case SO_OOBINLINE:
2449 case SO_TIMESTAMP:
2450 #ifdef __APPLE__
2451 case SO_DONTTRUNC:
2452 case SO_WANTMORE:
2453 case SO_WANTOOBFLAG:
2454 #endif
2455 optval = so->so_options & sopt->sopt_name;
2456 integer:
2457 error = sooptcopyout(sopt, &optval, sizeof optval);
2458 break;
2459
2460 case SO_TYPE:
2461 optval = so->so_type;
2462 goto integer;
2463
2464 #ifdef __APPLE__
2465 case SO_NREAD:
2466 {
2467 int pkt_total;
2468 struct mbuf *m1;
2469
2470 pkt_total = 0;
2471 m1 = so->so_rcv.sb_mb;
2472 if (so->so_proto->pr_flags & PR_ATOMIC)
2473 {
2474 while (m1) {
2475 if (m1->m_type == MT_DATA)
2476 pkt_total += m1->m_len;
2477 m1 = m1->m_next;
2478 }
2479 optval = pkt_total;
2480 } else
2481 optval = so->so_rcv.sb_cc;
2482 goto integer;
2483 }
2484 case SO_NWRITE:
2485 optval = so->so_snd.sb_cc;
2486 goto integer;
2487 #endif
2488 case SO_ERROR:
2489 optval = so->so_error;
2490 so->so_error = 0;
2491 goto integer;
2492
2493 case SO_SNDBUF:
2494 optval = so->so_snd.sb_hiwat;
2495 goto integer;
2496
2497 case SO_RCVBUF:
2498 optval = so->so_rcv.sb_hiwat;
2499 goto integer;
2500
2501 case SO_SNDLOWAT:
2502 optval = so->so_snd.sb_lowat;
2503 goto integer;
2504
2505 case SO_RCVLOWAT:
2506 optval = so->so_rcv.sb_lowat;
2507 goto integer;
2508
2509 case SO_SNDTIMEO:
2510 case SO_RCVTIMEO:
2511 tv = (sopt->sopt_name == SO_SNDTIMEO ?
2512 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2513
2514 error = sooptcopyout(sopt, &tv, sizeof tv);
2515 break;
2516
2517 case SO_NOSIGPIPE:
2518 optval = (so->so_flags & SOF_NOSIGPIPE);
2519 goto integer;
2520
2521 case SO_NOADDRERR:
2522 optval = (so->so_flags & SOF_NOADDRAVAIL);
2523 goto integer;
2524
2525 default:
2526 error = ENOPROTOOPT;
2527 break;
2528 }
2529 socket_unlock(so, 1);
2530 return (error);
2531 }
2532 }
2533
2534 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2535 int
2536 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
2537 {
2538 struct mbuf *m, *m_prev;
2539 int sopt_size = sopt->sopt_valsize;
2540
2541 if (sopt_size > MAX_SOOPTGETM_SIZE)
2542 return EMSGSIZE;
2543
2544 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2545 if (m == 0)
2546 return ENOBUFS;
2547 if (sopt_size > MLEN) {
2548 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2549 if ((m->m_flags & M_EXT) == 0) {
2550 m_free(m);
2551 return ENOBUFS;
2552 }
2553 m->m_len = min(MCLBYTES, sopt_size);
2554 } else {
2555 m->m_len = min(MLEN, sopt_size);
2556 }
2557 sopt_size -= m->m_len;
2558 *mp = m;
2559 m_prev = m;
2560
2561 while (sopt_size) {
2562 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2563 if (m == 0) {
2564 m_freem(*mp);
2565 return ENOBUFS;
2566 }
2567 if (sopt_size > MLEN) {
2568 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2569 if ((m->m_flags & M_EXT) == 0) {
2570 m_freem(*mp);
2571 return ENOBUFS;
2572 }
2573 m->m_len = min(MCLBYTES, sopt_size);
2574 } else {
2575 m->m_len = min(MLEN, sopt_size);
2576 }
2577 sopt_size -= m->m_len;
2578 m_prev->m_next = m;
2579 m_prev = m;
2580 }
2581 return 0;
2582 }
2583
2584 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2585 int
2586 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
2587 {
2588 struct mbuf *m0 = m;
2589
2590 if (sopt->sopt_val == USER_ADDR_NULL)
2591 return 0;
2592 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2593 if (sopt->sopt_p != NULL) {
2594 int error;
2595
2596 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
2597 if (error != 0) {
2598 m_freem(m0);
2599 return(error);
2600 }
2601 } else
2602 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
2603 sopt->sopt_valsize -= m->m_len;
2604 sopt->sopt_val += m->m_len;
2605 m = m->m_next;
2606 }
2607 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2608 panic("soopt_mcopyin");
2609 return 0;
2610 }
2611
2612 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2613 int
2614 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
2615 {
2616 struct mbuf *m0 = m;
2617 size_t valsize = 0;
2618
2619 if (sopt->sopt_val == USER_ADDR_NULL)
2620 return 0;
2621 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2622 if (sopt->sopt_p != NULL) {
2623 int error;
2624
2625 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
2626 if (error != 0) {
2627 m_freem(m0);
2628 return(error);
2629 }
2630 } else
2631 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
2632 sopt->sopt_valsize -= m->m_len;
2633 sopt->sopt_val += m->m_len;
2634 valsize += m->m_len;
2635 m = m->m_next;
2636 }
2637 if (m != NULL) {
2638 /* enough soopt buffer should be given from user-land */
2639 m_freem(m0);
2640 return(EINVAL);
2641 }
2642 sopt->sopt_valsize = valsize;
2643 return 0;
2644 }
2645
2646 void
2647 sohasoutofband(so)
2648 register struct socket *so;
2649 {
2650 struct proc *p;
2651
2652 if (so->so_pgid < 0)
2653 gsignal(-so->so_pgid, SIGURG);
2654 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2655 psignal(p, SIGURG);
2656 selwakeup(&so->so_rcv.sb_sel);
2657 }
2658
2659 int
2660 sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
2661 {
2662 struct proc *p = current_proc();
2663 int revents = 0;
2664
2665 socket_lock(so, 1);
2666
2667 if (events & (POLLIN | POLLRDNORM))
2668 if (soreadable(so))
2669 revents |= events & (POLLIN | POLLRDNORM);
2670
2671 if (events & (POLLOUT | POLLWRNORM))
2672 if (sowriteable(so))
2673 revents |= events & (POLLOUT | POLLWRNORM);
2674
2675 if (events & (POLLPRI | POLLRDBAND))
2676 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2677 revents |= events & (POLLPRI | POLLRDBAND);
2678
2679 if (revents == 0) {
2680 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2681 /* Darwin sets the flag first, BSD calls selrecord first */
2682 so->so_rcv.sb_flags |= SB_SEL;
2683 selrecord(p, &so->so_rcv.sb_sel, wql);
2684 }
2685
2686 if (events & (POLLOUT | POLLWRNORM)) {
2687 /* Darwin sets the flag first, BSD calls selrecord first */
2688 so->so_snd.sb_flags |= SB_SEL;
2689 selrecord(p, &so->so_snd.sb_sel, wql);
2690 }
2691 }
2692
2693 socket_unlock(so, 1);
2694 return (revents);
2695 }
2696
2697 int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
2698
2699 int
2700 soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
2701 {
2702 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2703 struct sockbuf *sb;
2704 socket_lock(so, 1);
2705
2706 switch (kn->kn_filter) {
2707 case EVFILT_READ:
2708 if (so->so_options & SO_ACCEPTCONN)
2709 kn->kn_fop = &solisten_filtops;
2710 else
2711 kn->kn_fop = &soread_filtops;
2712 sb = &so->so_rcv;
2713 break;
2714 case EVFILT_WRITE:
2715 kn->kn_fop = &sowrite_filtops;
2716 sb = &so->so_snd;
2717 break;
2718 default:
2719 socket_unlock(so, 1);
2720 return (1);
2721 }
2722
2723 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2724 sb->sb_flags |= SB_KNOTE;
2725 socket_unlock(so, 1);
2726 return (0);
2727 }
2728
2729 static void
2730 filt_sordetach(struct knote *kn)
2731 {
2732 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2733
2734 socket_lock(so, 1);
2735 if (so->so_rcv.sb_flags & SB_KNOTE)
2736 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2737 so->so_rcv.sb_flags &= ~SB_KNOTE;
2738 socket_unlock(so, 1);
2739 }
2740
2741 /*ARGSUSED*/
2742 static int
2743 filt_soread(struct knote *kn, long hint)
2744 {
2745 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2746
2747 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2748 socket_lock(so, 1);
2749
2750 if (so->so_oobmark) {
2751 if (kn->kn_flags & EV_OOBAND) {
2752 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2753 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2754 socket_unlock(so, 1);
2755 return (1);
2756 }
2757 kn->kn_data = so->so_oobmark;
2758 kn->kn_flags |= EV_OOBAND;
2759 } else {
2760 kn->kn_data = so->so_rcv.sb_cc;
2761 if (so->so_state & SS_CANTRCVMORE) {
2762 kn->kn_flags |= EV_EOF;
2763 kn->kn_fflags = so->so_error;
2764 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2765 socket_unlock(so, 1);
2766 return (1);
2767 }
2768 }
2769
2770 if (so->so_state & SS_RCVATMARK) {
2771 if (kn->kn_flags & EV_OOBAND) {
2772 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2773 socket_unlock(so, 1);
2774 return (1);
2775 }
2776 kn->kn_flags |= EV_OOBAND;
2777 } else if (kn->kn_flags & EV_OOBAND) {
2778 kn->kn_data = 0;
2779 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2780 socket_unlock(so, 1);
2781 return (0);
2782 }
2783
2784 if (so->so_error) { /* temporary udp error */
2785 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2786 socket_unlock(so, 1);
2787 return (1);
2788 }
2789
2790 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2791 socket_unlock(so, 1);
2792
2793 return( kn->kn_flags & EV_OOBAND ||
2794 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2795 kn->kn_sdata : so->so_rcv.sb_lowat));
2796 }
2797
2798 static void
2799 filt_sowdetach(struct knote *kn)
2800 {
2801 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2802 socket_lock(so, 1);
2803
2804 if(so->so_snd.sb_flags & SB_KNOTE)
2805 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2806 so->so_snd.sb_flags &= ~SB_KNOTE;
2807 socket_unlock(so, 1);
2808 }
2809
2810 /*ARGSUSED*/
2811 static int
2812 filt_sowrite(struct knote *kn, long hint)
2813 {
2814 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2815
2816 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2817 socket_lock(so, 1);
2818
2819 kn->kn_data = sbspace(&so->so_snd);
2820 if (so->so_state & SS_CANTSENDMORE) {
2821 kn->kn_flags |= EV_EOF;
2822 kn->kn_fflags = so->so_error;
2823 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2824 socket_unlock(so, 1);
2825 return (1);
2826 }
2827 if (so->so_error) { /* temporary udp error */
2828 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2829 socket_unlock(so, 1);
2830 return (1);
2831 }
2832 if (((so->so_state & SS_ISCONNECTED) == 0) &&
2833 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2834 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2835 socket_unlock(so, 1);
2836 return (0);
2837 }
2838 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2839 socket_unlock(so, 1);
2840 if (kn->kn_sfflags & NOTE_LOWAT)
2841 return (kn->kn_data >= kn->kn_sdata);
2842 return (kn->kn_data >= so->so_snd.sb_lowat);
2843 }
2844
2845 /*ARGSUSED*/
2846 static int
2847 filt_solisten(struct knote *kn, long hint)
2848 {
2849 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2850 int isempty;
2851
2852 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2853 socket_lock(so, 1);
2854 kn->kn_data = so->so_qlen;
2855 isempty = ! TAILQ_EMPTY(&so->so_comp);
2856 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2857 socket_unlock(so, 1);
2858 return (isempty);
2859 }
2860
2861
2862 int
2863 socket_lock(so, refcount)
2864 struct socket *so;
2865 int refcount;
2866 {
2867 int error = 0, lr, lr_saved;
2868 #ifdef __ppc__
2869 __asm__ volatile("mflr %0" : "=r" (lr));
2870 lr_saved = lr;
2871 #endif
2872
2873 if (so->so_proto->pr_lock) {
2874 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2875 }
2876 else {
2877 #ifdef MORE_LOCKING_DEBUG
2878 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2879 #endif
2880 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2881 if (refcount)
2882 so->so_usecount++;
2883 so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */
2884 }
2885
2886 return(error);
2887
2888 }
2889
2890 int
2891 socket_unlock(so, refcount)
2892 struct socket *so;
2893 int refcount;
2894 {
2895 int error = 0, lr, lr_saved;
2896 lck_mtx_t * mutex_held;
2897
2898 #ifdef __ppc__
2899 __asm__ volatile("mflr %0" : "=r" (lr));
2900 lr_saved = lr;
2901 #endif
2902
2903
2904
2905 if (so->so_proto == NULL)
2906 panic("socket_unlock null so_proto so=%x\n", so);
2907
2908 if (so && so->so_proto->pr_unlock)
2909 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2910 else {
2911 mutex_held = so->so_proto->pr_domain->dom_mtx;
2912 #ifdef MORE_LOCKING_DEBUG
2913 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2914 #endif
2915 if (refcount) {
2916 if (so->so_usecount <= 0)
2917 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2918 so->so_usecount--;
2919 if (so->so_usecount == 0) {
2920 sofreelastref(so, 1);
2921 }
2922 else
2923 so->reserved4 = (void*)lr_saved; /* save caller */
2924 }
2925 lck_mtx_unlock(mutex_held);
2926 }
2927
2928 return(error);
2929 }
2930 //### Called with socket locked, will unlock socket
2931 void
2932 sofree(so)
2933 struct socket *so;
2934 {
2935
2936 int lr, lr_saved;
2937 lck_mtx_t * mutex_held;
2938 #ifdef __ppc__
2939 __asm__ volatile("mflr %0" : "=r" (lr));
2940 lr_saved = lr;
2941 #endif
2942 if (so->so_proto->pr_getlock != NULL)
2943 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2944 else
2945 mutex_held = so->so_proto->pr_domain->dom_mtx;
2946 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2947
2948 sofreelastref(so, 0);
2949 }
2950
2951 void
2952 soreference(so)
2953 struct socket *so;
2954 {
2955 socket_lock(so, 1); /* locks & take one reference on socket */
2956 socket_unlock(so, 0); /* unlock only */
2957 }
2958
2959 void
2960 sodereference(so)
2961 struct socket *so;
2962 {
2963 socket_lock(so, 0);
2964 socket_unlock(so, 1);
2965 }