]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
xnu-792.6.22.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
57 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
58 */
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/filedesc.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/file_internal.h>
66 #include <sys/fcntl.h>
67 #include <sys/malloc.h>
68 #include <sys/mbuf.h>
69 #include <sys/domain.h>
70 #include <sys/kernel.h>
71 #include <sys/event.h>
72 #include <sys/poll.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/resourcevar.h>
77 #include <sys/signalvar.h>
78 #include <sys/sysctl.h>
79 #include <sys/uio.h>
80 #include <sys/ev.h>
81 #include <sys/kdebug.h>
82 #include <net/route.h>
83 #include <netinet/in.h>
84 #include <netinet/in_pcb.h>
85 #include <kern/zalloc.h>
86 #include <kern/locks.h>
87 #include <machine/limits.h>
88
89 int so_cache_hw = 0;
90 int so_cache_timeouts = 0;
91 int so_cache_max_freed = 0;
92 int cached_sock_count = 0;
93 struct socket *socket_cache_head = 0;
94 struct socket *socket_cache_tail = 0;
95 u_long so_cache_time = 0;
96 int so_cache_init_done = 0;
97 struct zone *so_cache_zone;
98 extern int get_inpcb_str_size();
99 extern int get_tcp_str_size();
100
101 static lck_grp_t *so_cache_mtx_grp;
102 static lck_attr_t *so_cache_mtx_attr;
103 static lck_grp_attr_t *so_cache_mtx_grp_attr;
104 lck_mtx_t *so_cache_mtx;
105
106 #include <machine/limits.h>
107
108 static void filt_sordetach(struct knote *kn);
109 static int filt_soread(struct knote *kn, long hint);
110 static void filt_sowdetach(struct knote *kn);
111 static int filt_sowrite(struct knote *kn, long hint);
112 static int filt_solisten(struct knote *kn, long hint);
113
114 static struct filterops solisten_filtops =
115 { 1, NULL, filt_sordetach, filt_solisten };
116 static struct filterops soread_filtops =
117 { 1, NULL, filt_sordetach, filt_soread };
118 static struct filterops sowrite_filtops =
119 { 1, NULL, filt_sowdetach, filt_sowrite };
120
121 #define EVEN_MORE_LOCKING_DEBUG 0
122 int socket_debug = 0;
123 int socket_zone = M_SOCKET;
124 so_gen_t so_gencnt; /* generation count for sockets */
125
126 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
127 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
128
129 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
130 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
131 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
132 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
133 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
134 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
135 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
136
137 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
138
139
140 SYSCTL_DECL(_kern_ipc);
141
142 static int somaxconn = SOMAXCONN;
143 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
144 0, "");
145
146 /* Should we get a maximum also ??? */
147 static int sosendmaxchain = 65536;
148 static int sosendminchain = 16384;
149 static int sorecvmincopy = 16384;
150 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
151 0, "");
152 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
153 0, "");
154
155 void so_cache_timer();
156
157 /*
158 * Socket operation routines.
159 * These routines are called by the routines in
160 * sys_socket.c or from a system process, and
161 * implement the semantics of socket operations by
162 * switching out to the protocol specific routines.
163 */
164
165 #ifdef __APPLE__
166
167 vm_size_t so_cache_zone_element_size;
168
169 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
170
171
172 void socketinit()
173 {
174 vm_size_t str_size;
175
176 if (so_cache_init_done) {
177 printf("socketinit: already called...\n");
178 return;
179 }
180
181 /*
182 * allocate lock group attribute and group for socket cache mutex
183 */
184 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
185 lck_grp_attr_setdefault(so_cache_mtx_grp_attr);
186
187 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
188
189 /*
190 * allocate the lock attribute for socket cache mutex
191 */
192 so_cache_mtx_attr = lck_attr_alloc_init();
193 lck_attr_setdefault(so_cache_mtx_attr);
194
195 so_cache_init_done = 1;
196
197 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
198
199 if (so_cache_mtx == NULL)
200 return; /* we're hosed... */
201
202 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
203 get_inpcb_str_size() + 4 +
204 get_tcp_str_size());
205 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
206 #if TEMPDEBUG
207 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
208 #endif
209 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
210
211 so_cache_zone_element_size = str_size;
212
213 sflt_init();
214
215 }
216
217 void cached_sock_alloc(so, waitok)
218 struct socket **so;
219 int waitok;
220
221 {
222 caddr_t temp;
223 register u_long offset;
224
225
226 lck_mtx_lock(so_cache_mtx);
227
228 if (cached_sock_count) {
229 cached_sock_count--;
230 *so = socket_cache_head;
231 if (*so == 0)
232 panic("cached_sock_alloc: cached sock is null");
233
234 socket_cache_head = socket_cache_head->cache_next;
235 if (socket_cache_head)
236 socket_cache_head->cache_prev = 0;
237 else
238 socket_cache_tail = 0;
239
240 lck_mtx_unlock(so_cache_mtx);
241
242 temp = (*so)->so_saved_pcb;
243 bzero((caddr_t)*so, sizeof(struct socket));
244 #if TEMPDEBUG
245 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
246 cached_sock_count);
247 #endif
248 (*so)->so_saved_pcb = temp;
249 (*so)->cached_in_sock_layer = 1;
250
251 }
252 else {
253 #if TEMPDEBUG
254 kprintf("Allocating cached sock %x from memory\n", *so);
255 #endif
256
257 lck_mtx_unlock(so_cache_mtx);
258
259 if (waitok)
260 *so = (struct socket *) zalloc(so_cache_zone);
261 else
262 *so = (struct socket *) zalloc_noblock(so_cache_zone);
263
264 if (*so == 0)
265 return;
266
267 bzero((caddr_t)*so, sizeof(struct socket));
268
269 /*
270 * Define offsets for extra structures into our single block of
271 * memory. Align extra structures on longword boundaries.
272 */
273
274
275 offset = (u_long) *so;
276 offset += sizeof(struct socket);
277 if (offset & 0x3) {
278 offset += 4;
279 offset &= 0xfffffffc;
280 }
281 (*so)->so_saved_pcb = (caddr_t) offset;
282 offset += get_inpcb_str_size();
283 if (offset & 0x3) {
284 offset += 4;
285 offset &= 0xfffffffc;
286 }
287
288 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
289 #if TEMPDEBUG
290 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
291 (*so)->so_saved_pcb,
292 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
293 #endif
294 }
295
296 (*so)->cached_in_sock_layer = 1;
297 }
298
299
300 void cached_sock_free(so)
301 struct socket *so;
302 {
303
304 lck_mtx_lock(so_cache_mtx);
305
306 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
307 --cached_sock_count;
308 lck_mtx_unlock(so_cache_mtx);
309 #if TEMPDEBUG
310 kprintf("Freeing overflowed cached socket %x\n", so);
311 #endif
312 zfree(so_cache_zone, so);
313 }
314 else {
315 #if TEMPDEBUG
316 kprintf("Freeing socket %x into cache\n", so);
317 #endif
318 if (so_cache_hw < cached_sock_count)
319 so_cache_hw = cached_sock_count;
320
321 so->cache_next = socket_cache_head;
322 so->cache_prev = 0;
323 if (socket_cache_head)
324 socket_cache_head->cache_prev = so;
325 else
326 socket_cache_tail = so;
327
328 so->cache_timestamp = so_cache_time;
329 socket_cache_head = so;
330 lck_mtx_unlock(so_cache_mtx);
331 }
332
333 #if TEMPDEBUG
334 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
335 #endif
336
337
338 }
339
340
341 void so_cache_timer()
342 {
343 register struct socket *p;
344 register int n_freed = 0;
345
346
347 lck_mtx_lock(so_cache_mtx);
348
349 ++so_cache_time;
350
351 while ( (p = socket_cache_tail) )
352 {
353 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
354 break;
355
356 so_cache_timeouts++;
357
358 if ( (socket_cache_tail = p->cache_prev) )
359 p->cache_prev->cache_next = 0;
360 if (--cached_sock_count == 0)
361 socket_cache_head = 0;
362
363
364 zfree(so_cache_zone, p);
365
366 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
367 {
368 so_cache_max_freed++;
369 break;
370 }
371 }
372 lck_mtx_unlock(so_cache_mtx);
373
374 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
375
376
377 }
378 #endif /* __APPLE__ */
379
380 /*
381 * Get a socket structure from our zone, and initialize it.
382 * We don't implement `waitok' yet (see comments in uipc_domain.c).
383 * Note that it would probably be better to allocate socket
384 * and PCB at the same time, but I'm not convinced that all
385 * the protocols can be easily modified to do this.
386 */
387 struct socket *
388 soalloc(waitok, dom, type)
389 int waitok;
390 int dom;
391 int type;
392 {
393 struct socket *so;
394
395 if ((dom == PF_INET) && (type == SOCK_STREAM))
396 cached_sock_alloc(&so, waitok);
397 else
398 {
399 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
400 if (so)
401 bzero(so, sizeof *so);
402 }
403 /* XXX race condition for reentrant kernel */
404 //###LD Atomic add for so_gencnt
405 if (so) {
406 so->so_gencnt = ++so_gencnt;
407 so->so_zone = socket_zone;
408 }
409
410 return so;
411 }
412
413 int
414 socreate(dom, aso, type, proto)
415 int dom;
416 struct socket **aso;
417 register int type;
418 int proto;
419 {
420 struct proc *p = current_proc();
421 register struct protosw *prp;
422 register struct socket *so;
423 register int error = 0;
424 #if TCPDEBUG
425 extern int tcpconsdebug;
426 #endif
427 if (proto)
428 prp = pffindproto(dom, proto, type);
429 else
430 prp = pffindtype(dom, type);
431
432 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
433 return (EPROTONOSUPPORT);
434 #ifndef __APPLE__
435
436 if (p->p_prison && jail_socket_unixiproute_only &&
437 prp->pr_domain->dom_family != PF_LOCAL &&
438 prp->pr_domain->dom_family != PF_INET &&
439 prp->pr_domain->dom_family != PF_ROUTE) {
440 return (EPROTONOSUPPORT);
441 }
442
443 #endif
444 if (prp->pr_type != type)
445 return (EPROTOTYPE);
446 so = soalloc(p != 0, dom, type);
447 if (so == 0)
448 return (ENOBUFS);
449
450 TAILQ_INIT(&so->so_incomp);
451 TAILQ_INIT(&so->so_comp);
452 so->so_type = type;
453
454 #ifdef __APPLE__
455 if (p != 0) {
456 so->so_uid = kauth_cred_getuid(kauth_cred_get());
457 if (!suser(kauth_cred_get(),NULL))
458 so->so_state = SS_PRIV;
459 }
460 #else
461 so->so_cred = kauth_cred_get_with_ref();
462 #endif
463 so->so_proto = prp;
464 #ifdef __APPLE__
465 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
466 so->so_rcv.sb_so = so->so_snd.sb_so = so;
467 #endif
468
469 //### Attachement will create the per pcb lock if necessary and increase refcount
470
471 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
472 if (error) {
473 /*
474 * Warning:
475 * If so_pcb is not zero, the socket will be leaked,
476 * so protocol attachment handler must be coded carefuly
477 */
478 so->so_state |= SS_NOFDREF;
479 sofreelastref(so, 1);
480 return (error);
481 }
482 so->so_usecount++;
483 #ifdef __APPLE__
484 prp->pr_domain->dom_refs++;
485 TAILQ_INIT(&so->so_evlist);
486
487 /* Attach socket filters for this protocol */
488 sflt_initsock(so);
489 #if TCPDEBUG
490 if (tcpconsdebug == 2)
491 so->so_options |= SO_DEBUG;
492 #endif
493 #endif
494
495 *aso = so;
496 return (0);
497 }
498
499 int
500 sobind(so, nam)
501 struct socket *so;
502 struct sockaddr *nam;
503
504 {
505 struct proc *p = current_proc();
506 int error = 0;
507 struct socket_filter_entry *filter;
508 int filtered = 0;
509
510 socket_lock(so, 1);
511
512 /* Socket filter */
513 error = 0;
514 for (filter = so->so_filt; filter && (error == 0);
515 filter = filter->sfe_next_onsocket) {
516 if (filter->sfe_filter->sf_filter.sf_bind) {
517 if (filtered == 0) {
518 filtered = 1;
519 sflt_use(so);
520 socket_unlock(so, 0);
521 }
522 error = filter->sfe_filter->sf_filter.sf_bind(
523 filter->sfe_cookie, so, nam);
524 }
525 }
526 if (filtered != 0) {
527 socket_lock(so, 0);
528 sflt_unuse(so);
529 }
530 /* End socket filter */
531
532 if (error == 0)
533 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
534
535 socket_unlock(so, 1);
536
537 if (error == EJUSTRETURN)
538 error = 0;
539
540 return (error);
541 }
542
543 void
544 sodealloc(so)
545 struct socket *so;
546 {
547 so->so_gencnt = ++so_gencnt;
548
549 #ifndef __APPLE__
550 if (so->so_rcv.sb_hiwat)
551 (void)chgsbsize(so->so_cred->cr_uidinfo,
552 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
553 if (so->so_snd.sb_hiwat)
554 (void)chgsbsize(so->so_cred->cr_uidinfo,
555 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
556 #ifdef INET
557 if (so->so_accf != NULL) {
558 if (so->so_accf->so_accept_filter != NULL &&
559 so->so_accf->so_accept_filter->accf_destroy != NULL) {
560 so->so_accf->so_accept_filter->accf_destroy(so);
561 }
562 if (so->so_accf->so_accept_filter_str != NULL)
563 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
564 FREE(so->so_accf, M_ACCF);
565 }
566 #endif /* INET */
567 kauth_cred_rele(so->so_cred);
568 zfreei(so->so_zone, so);
569 #else
570 if (so->cached_in_sock_layer == 1)
571 cached_sock_free(so);
572 else {
573 if (so->cached_in_sock_layer == -1)
574 panic("sodealloc: double dealloc: so=%x\n", so);
575 so->cached_in_sock_layer = -1;
576 FREE_ZONE(so, sizeof(*so), so->so_zone);
577 }
578 #endif /* __APPLE__ */
579 }
580
581 int
582 solisten(so, backlog)
583 register struct socket *so;
584 int backlog;
585
586 {
587 struct proc *p = current_proc();
588 int error;
589
590 socket_lock(so, 1);
591
592 {
593 struct socket_filter_entry *filter;
594 int filtered = 0;
595 error = 0;
596 for (filter = so->so_filt; filter && (error == 0);
597 filter = filter->sfe_next_onsocket) {
598 if (filter->sfe_filter->sf_filter.sf_listen) {
599 if (filtered == 0) {
600 filtered = 1;
601 sflt_use(so);
602 socket_unlock(so, 0);
603 }
604 error = filter->sfe_filter->sf_filter.sf_listen(
605 filter->sfe_cookie, so);
606 }
607 }
608 if (filtered != 0) {
609 socket_lock(so, 0);
610 sflt_unuse(so);
611 }
612 }
613
614 if (error == 0) {
615 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
616 }
617
618 if (error) {
619 socket_unlock(so, 1);
620 if (error == EJUSTRETURN)
621 error = 0;
622 return (error);
623 }
624
625 if (TAILQ_EMPTY(&so->so_comp))
626 so->so_options |= SO_ACCEPTCONN;
627 if (backlog < 0 || backlog > somaxconn)
628 backlog = somaxconn;
629 so->so_qlimit = backlog;
630
631 socket_unlock(so, 1);
632 return (0);
633 }
634
635 void
636 sofreelastref(so, dealloc)
637 register struct socket *so;
638 int dealloc;
639 {
640 int error;
641 struct socket *head = so->so_head;
642
643 /*### Assume socket is locked */
644
645 /* Remove any filters - may be called more than once */
646 sflt_termsock(so);
647
648 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
649 #ifdef __APPLE__
650 selthreadclear(&so->so_snd.sb_sel);
651 selthreadclear(&so->so_rcv.sb_sel);
652 so->so_rcv.sb_flags &= ~SB_UPCALL;
653 so->so_snd.sb_flags &= ~SB_UPCALL;
654 #endif
655 return;
656 }
657 if (head != NULL) {
658 socket_lock(head, 1);
659 if (so->so_state & SS_INCOMP) {
660 TAILQ_REMOVE(&head->so_incomp, so, so_list);
661 head->so_incqlen--;
662 } else if (so->so_state & SS_COMP) {
663 /*
664 * We must not decommission a socket that's
665 * on the accept(2) queue. If we do, then
666 * accept(2) may hang after select(2) indicated
667 * that the listening socket was ready.
668 */
669 #ifdef __APPLE__
670 selthreadclear(&so->so_snd.sb_sel);
671 selthreadclear(&so->so_rcv.sb_sel);
672 so->so_rcv.sb_flags &= ~SB_UPCALL;
673 so->so_snd.sb_flags &= ~SB_UPCALL;
674 #endif
675 socket_unlock(head, 1);
676 return;
677 } else {
678 panic("sofree: not queued");
679 }
680 head->so_qlen--;
681 so->so_state &= ~SS_INCOMP;
682 so->so_head = NULL;
683 socket_unlock(head, 1);
684 }
685 #ifdef __APPLE__
686 selthreadclear(&so->so_snd.sb_sel);
687 sbrelease(&so->so_snd);
688 #endif
689 sorflush(so);
690
691 /* 3932268: disable upcall */
692 so->so_rcv.sb_flags &= ~SB_UPCALL;
693 so->so_snd.sb_flags &= ~SB_UPCALL;
694
695 if (dealloc)
696 sodealloc(so);
697 }
698
699 /*
700 * Close a socket on last file table reference removal.
701 * Initiate disconnect if connected.
702 * Free socket when disconnect complete.
703 */
704 int
705 soclose_locked(so)
706 register struct socket *so;
707 {
708 int error = 0;
709 lck_mtx_t * mutex_held;
710 struct timespec ts;
711
712 if (so->so_usecount == 0) {
713 panic("soclose: so=%x refcount=0\n", so);
714 }
715
716 sflt_notify(so, sock_evt_closing, NULL);
717
718 if ((so->so_options & SO_ACCEPTCONN)) {
719 struct socket *sp;
720
721 /* We do not want new connection to be added to the connection queues */
722 so->so_options &= ~SO_ACCEPTCONN;
723
724 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
725 /* A bit tricky here. We need to keep
726 * a lock if it's a protocol global lock
727 * but we want the head, not the socket locked
728 * in the case of per-socket lock...
729 */
730 if (so->so_proto->pr_getlock != NULL)
731 socket_lock(sp, 1);
732 if (so->so_proto->pr_getlock != NULL)
733 socket_unlock(so, 0);
734 (void) soabort(sp);
735 if (so->so_proto->pr_getlock != NULL)
736 socket_lock(so, 0);
737 if (so->so_proto->pr_getlock != NULL)
738 socket_unlock(sp, 1);
739 }
740
741 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
742 if (so->so_proto->pr_getlock != NULL)
743 socket_lock(sp, 1);
744
745 /* Dequeue from so_comp since sofree() won't do it */
746 TAILQ_REMOVE(&so->so_comp, sp, so_list);
747 so->so_qlen--;
748 sp->so_state &= ~SS_COMP;
749 sp->so_head = NULL;
750
751 if (so->so_proto->pr_getlock != NULL)
752 socket_unlock(so, 0);
753 (void) soabort(sp);
754 if (so->so_proto->pr_getlock != NULL)
755 socket_lock(so, 0);
756 if (so->so_proto->pr_getlock != NULL)
757 socket_unlock(sp, 1);
758 }
759 }
760 if (so->so_pcb == 0) {
761 /* 3915887: mark the socket as ready for dealloc */
762 so->so_flags |= SOF_PCBCLEARING;
763 goto discard;
764 }
765 if (so->so_state & SS_ISCONNECTED) {
766 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
767 error = sodisconnectlocked(so);
768 if (error)
769 goto drop;
770 }
771 if (so->so_options & SO_LINGER) {
772 if ((so->so_state & SS_ISDISCONNECTING) &&
773 (so->so_state & SS_NBIO))
774 goto drop;
775 if (so->so_proto->pr_getlock != NULL)
776 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
777 else
778 mutex_held = so->so_proto->pr_domain->dom_mtx;
779 while (so->so_state & SS_ISCONNECTED) {
780 ts.tv_sec = (so->so_linger/100);
781 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
782 error = msleep((caddr_t)&so->so_timeo, mutex_held,
783 PSOCK | PCATCH, "soclos", &ts);
784 if (error) {
785 /* It's OK when the time fires, don't report an error */
786 if (error == EWOULDBLOCK)
787 error = 0;
788 break;
789 }
790 }
791 }
792 }
793 drop:
794 if (so->so_usecount == 0)
795 panic("soclose: usecount is zero so=%x\n", so);
796 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
797 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
798 if (error == 0)
799 error = error2;
800 }
801 if (so->so_usecount <= 0)
802 panic("soclose: usecount is zero so=%x\n", so);
803 discard:
804 if (so->so_pcb && so->so_state & SS_NOFDREF)
805 panic("soclose: NOFDREF");
806 so->so_state |= SS_NOFDREF;
807 #ifdef __APPLE__
808 so->so_proto->pr_domain->dom_refs--;
809 evsofree(so);
810 #endif
811 so->so_usecount--;
812 sofree(so);
813 return (error);
814 }
815
816 int
817 soclose(so)
818 register struct socket *so;
819 {
820 int error = 0;
821 socket_lock(so, 1);
822 if (so->so_retaincnt == 0)
823 error = soclose_locked(so);
824 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
825 so->so_usecount--;
826 if (so->so_usecount < 2)
827 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
828 }
829 socket_unlock(so, 1);
830 return (error);
831 }
832
833
834 /*
835 * Must be called at splnet...
836 */
837 //#### Should already be locked
838 int
839 soabort(so)
840 struct socket *so;
841 {
842 int error;
843
844 #ifdef MORE_LOCKING_DEBUG
845 lck_mtx_t * mutex_held;
846
847 if (so->so_proto->pr_getlock != NULL)
848 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
849 else
850 mutex_held = so->so_proto->pr_domain->dom_mtx;
851 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
852 #endif
853
854 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
855 if (error) {
856 sofree(so);
857 return error;
858 }
859 return (0);
860 }
861
862 int
863 soacceptlock(so, nam, dolock)
864 register struct socket *so;
865 struct sockaddr **nam;
866 int dolock;
867 {
868 int error;
869
870 if (dolock) socket_lock(so, 1);
871
872 if ((so->so_state & SS_NOFDREF) == 0)
873 panic("soaccept: !NOFDREF");
874 so->so_state &= ~SS_NOFDREF;
875 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
876
877 if (dolock) socket_unlock(so, 1);
878 return (error);
879 }
880 int
881 soaccept(so, nam)
882 register struct socket *so;
883 struct sockaddr **nam;
884 {
885 return (soacceptlock(so, nam, 1));
886 }
887
888 int
889 soconnectlock(so, nam, dolock)
890 register struct socket *so;
891 struct sockaddr *nam;
892 int dolock;
893
894 {
895 int s;
896 int error;
897 struct proc *p = current_proc();
898
899 if (dolock) socket_lock(so, 1);
900
901 if (so->so_options & SO_ACCEPTCONN) {
902 if (dolock) socket_unlock(so, 1);
903 return (EOPNOTSUPP);
904 }
905 /*
906 * If protocol is connection-based, can only connect once.
907 * Otherwise, if connected, try to disconnect first.
908 * This allows user to disconnect by connecting to, e.g.,
909 * a null address.
910 */
911 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
912 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
913 (error = sodisconnectlocked(so))))
914 error = EISCONN;
915 else {
916 /*
917 * Run connect filter before calling protocol:
918 * - non-blocking connect returns before completion;
919 */
920 {
921 struct socket_filter_entry *filter;
922 int filtered = 0;
923 error = 0;
924 for (filter = so->so_filt; filter && (error == 0);
925 filter = filter->sfe_next_onsocket) {
926 if (filter->sfe_filter->sf_filter.sf_connect_out) {
927 if (filtered == 0) {
928 filtered = 1;
929 sflt_use(so);
930 socket_unlock(so, 0);
931 }
932 error = filter->sfe_filter->sf_filter.sf_connect_out(
933 filter->sfe_cookie, so, nam);
934 }
935 }
936 if (filtered != 0) {
937 socket_lock(so, 0);
938 sflt_unuse(so);
939 }
940 }
941 if (error) {
942 if (error == EJUSTRETURN)
943 error = 0;
944 if (dolock) socket_unlock(so, 1);
945 return error;
946 }
947
948 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
949 }
950 if (dolock) socket_unlock(so, 1);
951 return (error);
952 }
953
954 int
955 soconnect(so, nam)
956 register struct socket *so;
957 struct sockaddr *nam;
958 {
959 return (soconnectlock(so, nam, 1));
960 }
961
962 int
963 soconnect2(so1, so2)
964 register struct socket *so1;
965 struct socket *so2;
966 {
967 int error;
968 //####### Assumes so1 is already locked /
969
970 socket_lock(so2, 1);
971
972 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
973
974 socket_unlock(so2, 1);
975 return (error);
976 }
977
978
979 int
980 sodisconnectlocked(so)
981 register struct socket *so;
982 {
983 int error;
984
985 if ((so->so_state & SS_ISCONNECTED) == 0) {
986 error = ENOTCONN;
987 goto bad;
988 }
989 if (so->so_state & SS_ISDISCONNECTING) {
990 error = EALREADY;
991 goto bad;
992 }
993
994 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
995
996 if (error == 0) {
997 sflt_notify(so, sock_evt_disconnected, NULL);
998 }
999
1000 bad:
1001 return (error);
1002 }
1003 //### Locking version
1004 int
1005 sodisconnect(so)
1006 register struct socket *so;
1007 {
1008 int error;
1009
1010 socket_lock(so, 1);
1011 error = sodisconnectlocked(so);
1012 socket_unlock(so, 1);
1013 return(error);
1014 }
1015
1016 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1017
1018 /*
1019 * sosendcheck will lock the socket buffer if it isn't locked and
1020 * verify that there is space for the data being inserted.
1021 */
1022
1023 static int
1024 sosendcheck(
1025 struct socket *so,
1026 struct sockaddr *addr,
1027 long resid,
1028 long clen,
1029 long atomic,
1030 int flags,
1031 int *sblocked)
1032 {
1033 int error = 0;
1034 long space;
1035 int assumelock = 0;
1036
1037 restart:
1038 if (*sblocked == 0) {
1039 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1040 so->so_send_filt_thread != 0 &&
1041 so->so_send_filt_thread == current_thread()) {
1042 /*
1043 * We're being called recursively from a filter,
1044 * allow this to continue. Radar 4150520.
1045 * Don't set sblocked because we don't want
1046 * to perform an unlock later.
1047 */
1048 assumelock = 1;
1049 }
1050 else {
1051 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1052 if (error) {
1053 return error;
1054 }
1055 *sblocked = 1;
1056 }
1057 }
1058
1059 if (so->so_state & SS_CANTSENDMORE)
1060 return EPIPE;
1061
1062 if (so->so_error) {
1063 error = so->so_error;
1064 so->so_error = 0;
1065 return error;
1066 }
1067
1068 if ((so->so_state & SS_ISCONNECTED) == 0) {
1069 /*
1070 * `sendto' and `sendmsg' is allowed on a connection-
1071 * based socket if it supports implied connect.
1072 * Return ENOTCONN if not connected and no address is
1073 * supplied.
1074 */
1075 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1076 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1077 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1078 !(resid == 0 && clen != 0))
1079 return ENOTCONN;
1080 } else if (addr == 0 && !(flags&MSG_HOLD))
1081 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1082 }
1083 space = sbspace(&so->so_snd);
1084 if (flags & MSG_OOB)
1085 space += 1024;
1086 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1087 clen > so->so_snd.sb_hiwat)
1088 return EMSGSIZE;
1089 if (space < resid + clen &&
1090 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1091 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) {
1092 return EWOULDBLOCK;
1093 }
1094 sbunlock(&so->so_snd, 1);
1095 error = sbwait(&so->so_snd);
1096 if (error) {
1097 return error;
1098 }
1099 goto restart;
1100 }
1101
1102 return 0;
1103 }
1104
1105 /*
1106 * Send on a socket.
1107 * If send must go all at once and message is larger than
1108 * send buffering, then hard error.
1109 * Lock against other senders.
1110 * If must go all at once and not enough room now, then
1111 * inform user that this would block and do nothing.
1112 * Otherwise, if nonblocking, send as much as possible.
1113 * The data to be sent is described by "uio" if nonzero,
1114 * otherwise by the mbuf chain "top" (which must be null
1115 * if uio is not). Data provided in mbuf chain must be small
1116 * enough to send all at once.
1117 *
1118 * Returns nonzero on error, timeout or signal; callers
1119 * must check for short counts if EINTR/ERESTART are returned.
1120 * Data and control buffers are freed on return.
1121 * Experiment:
1122 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1123 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1124 * point at the mbuf chain being constructed and go from there.
1125 */
1126 int
1127 sosend(so, addr, uio, top, control, flags)
1128 register struct socket *so;
1129 struct sockaddr *addr;
1130 struct uio *uio;
1131 struct mbuf *top;
1132 struct mbuf *control;
1133 int flags;
1134
1135 {
1136 struct mbuf **mp;
1137 register struct mbuf *m, *freelist = NULL;
1138 register long space, len, resid;
1139 int clen = 0, error, dontroute, mlen, sendflags;
1140 int atomic = sosendallatonce(so) || top;
1141 int sblocked = 0;
1142 struct proc *p = current_proc();
1143
1144 if (uio)
1145 // LP64todo - fix this!
1146 resid = uio_resid(uio);
1147 else
1148 resid = top->m_pkthdr.len;
1149
1150 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1151 so,
1152 resid,
1153 so->so_snd.sb_cc,
1154 so->so_snd.sb_lowat,
1155 so->so_snd.sb_hiwat);
1156
1157 socket_lock(so, 1);
1158
1159 /*
1160 * In theory resid should be unsigned.
1161 * However, space must be signed, as it might be less than 0
1162 * if we over-committed, and we must use a signed comparison
1163 * of space and resid. On the other hand, a negative resid
1164 * causes us to loop sending 0-length segments to the protocol.
1165 *
1166 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1167 * type sockets since that's an error.
1168 */
1169 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1170 error = EINVAL;
1171 socket_unlock(so, 1);
1172 goto out;
1173 }
1174
1175 dontroute =
1176 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1177 (so->so_proto->pr_flags & PR_ATOMIC);
1178 if (p)
1179 p->p_stats->p_ru.ru_msgsnd++;
1180 if (control)
1181 clen = control->m_len;
1182
1183 do {
1184 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1185 if (error) {
1186 goto release;
1187 }
1188 mp = &top;
1189 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
1190
1191 do {
1192
1193 if (uio == NULL) {
1194 /*
1195 * Data is prepackaged in "top".
1196 */
1197 resid = 0;
1198 if (flags & MSG_EOR)
1199 top->m_flags |= M_EOR;
1200 } else {
1201 int chainlength;
1202 int bytes_to_copy;
1203
1204 bytes_to_copy = min(resid, space);
1205
1206 if (sosendminchain > 0) {
1207 chainlength = 0;
1208 } else
1209 chainlength = sosendmaxchain;
1210
1211 socket_unlock(so, 0);
1212
1213 do {
1214 int num_needed;
1215 int hdrs_needed = (top == 0) ? 1 : 0;
1216
1217 /*
1218 * try to maintain a local cache of mbuf clusters needed to complete this write
1219 * the list is further limited to the number that are currently needed to fill the socket
1220 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1221 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1222 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1223 * we haven't yet consumed.
1224 */
1225 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1226 num_needed = bytes_to_copy / NBPG;
1227
1228 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1229 num_needed++;
1230
1231 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1232 /* Fall back to cluster size if allocation failed */
1233 }
1234
1235 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1236 num_needed = bytes_to_copy / MCLBYTES;
1237
1238 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1239 num_needed++;
1240
1241 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1242 /* Fall back to a single mbuf if allocation failed */
1243 }
1244
1245 if (freelist == NULL) {
1246 if (top == 0)
1247 MGETHDR(freelist, M_WAIT, MT_DATA);
1248 else
1249 MGET(freelist, M_WAIT, MT_DATA);
1250
1251 if (freelist == NULL) {
1252 error = ENOBUFS;
1253 socket_lock(so, 0);
1254 goto release;
1255 }
1256 /*
1257 * For datagram protocols, leave room
1258 * for protocol headers in first mbuf.
1259 */
1260 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1261 MH_ALIGN(freelist, bytes_to_copy);
1262 }
1263 m = freelist;
1264 freelist = m->m_next;
1265 m->m_next = NULL;
1266
1267 if ((m->m_flags & M_EXT))
1268 mlen = m->m_ext.ext_size;
1269 else if ((m->m_flags & M_PKTHDR))
1270 mlen = MHLEN - m_leadingspace(m);
1271 else
1272 mlen = MLEN;
1273 len = min(mlen, bytes_to_copy);
1274
1275 chainlength += len;
1276
1277 space -= len;
1278
1279 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1280
1281 // LP64todo - fix this!
1282 resid = uio_resid(uio);
1283
1284 m->m_len = len;
1285 *mp = m;
1286 top->m_pkthdr.len += len;
1287 if (error)
1288 break;
1289 mp = &m->m_next;
1290 if (resid <= 0) {
1291 if (flags & MSG_EOR)
1292 top->m_flags |= M_EOR;
1293 break;
1294 }
1295 bytes_to_copy = min(resid, space);
1296
1297 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1298
1299 socket_lock(so, 0);
1300
1301 if (error)
1302 goto release;
1303 }
1304
1305 if (flags & (MSG_HOLD|MSG_SEND))
1306 {
1307 /* Enqueue for later, go away if HOLD */
1308 register struct mbuf *mb1;
1309 if (so->so_temp && (flags & MSG_FLUSH))
1310 {
1311 m_freem(so->so_temp);
1312 so->so_temp = NULL;
1313 }
1314 if (so->so_temp)
1315 so->so_tail->m_next = top;
1316 else
1317 so->so_temp = top;
1318 mb1 = top;
1319 while (mb1->m_next)
1320 mb1 = mb1->m_next;
1321 so->so_tail = mb1;
1322 if (flags & MSG_HOLD)
1323 {
1324 top = NULL;
1325 goto release;
1326 }
1327 top = so->so_temp;
1328 }
1329 if (dontroute)
1330 so->so_options |= SO_DONTROUTE;
1331 /* Compute flags here, for pru_send and NKEs */
1332 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1333 /*
1334 * If the user set MSG_EOF, the protocol
1335 * understands this flag and nothing left to
1336 * send then use PRU_SEND_EOF instead of PRU_SEND.
1337 */
1338 ((flags & MSG_EOF) &&
1339 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1340 (resid <= 0)) ?
1341 PRUS_EOF :
1342 /* If there is more to send set PRUS_MORETOCOME */
1343 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1344
1345 /*
1346 * Socket filter processing
1347 */
1348 {
1349 struct socket_filter_entry *filter;
1350 int filtered;
1351
1352 filtered = 0;
1353 error = 0;
1354 for (filter = so->so_filt; filter && (error == 0);
1355 filter = filter->sfe_next_onsocket) {
1356 if (filter->sfe_filter->sf_filter.sf_data_out) {
1357 int so_flags = 0;
1358 if (filtered == 0) {
1359 filtered = 1;
1360 so->so_send_filt_thread = current_thread();
1361 socket_unlock(so, 0);
1362 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1363 }
1364 error = filter->sfe_filter->sf_filter.sf_data_out(
1365 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1366 }
1367 }
1368
1369 if (filtered) {
1370 /*
1371 * At this point, we've run at least one filter.
1372 * The socket is unlocked as is the socket buffer.
1373 */
1374 socket_lock(so, 0);
1375 so->so_send_filt_thread = 0;
1376 if (error) {
1377 if (error == EJUSTRETURN) {
1378 error = 0;
1379 clen = 0;
1380 control = 0;
1381 top = 0;
1382 }
1383
1384 goto release;
1385 }
1386 }
1387 }
1388 /*
1389 * End Socket filter processing
1390 */
1391
1392 if (error == EJUSTRETURN) {
1393 /* A socket filter handled this data */
1394 error = 0;
1395 }
1396 else {
1397 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1398 sendflags, top, addr, control, p);
1399 }
1400 #ifdef __APPLE__
1401 if (flags & MSG_SEND)
1402 so->so_temp = NULL;
1403 #endif
1404 if (dontroute)
1405 so->so_options &= ~SO_DONTROUTE;
1406 clen = 0;
1407 control = 0;
1408 top = 0;
1409 mp = &top;
1410 if (error)
1411 goto release;
1412 } while (resid && space > 0);
1413 } while (resid);
1414
1415 release:
1416 if (sblocked)
1417 sbunlock(&so->so_snd, 0); /* will unlock socket */
1418 else
1419 socket_unlock(so, 1);
1420 out:
1421 if (top)
1422 m_freem(top);
1423 if (control)
1424 m_freem(control);
1425 if (freelist)
1426 m_freem_list(freelist);
1427
1428 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1429 so,
1430 resid,
1431 so->so_snd.sb_cc,
1432 space,
1433 error);
1434
1435 return (error);
1436 }
1437
1438 /*
1439 * Implement receive operations on a socket.
1440 * We depend on the way that records are added to the sockbuf
1441 * by sbappend*. In particular, each record (mbufs linked through m_next)
1442 * must begin with an address if the protocol so specifies,
1443 * followed by an optional mbuf or mbufs containing ancillary data,
1444 * and then zero or more mbufs of data.
1445 * In order to avoid blocking network interrupts for the entire time here,
1446 * we splx() while doing the actual copy to user space.
1447 * Although the sockbuf is locked, new data may still be appended,
1448 * and thus we must maintain consistency of the sockbuf during that time.
1449 *
1450 * The caller may receive the data as a single mbuf chain by supplying
1451 * an mbuf **mp0 for use in returning the chain. The uio is then used
1452 * only for the count in uio_resid.
1453 */
1454 int
1455 soreceive(so, psa, uio, mp0, controlp, flagsp)
1456 register struct socket *so;
1457 struct sockaddr **psa;
1458 struct uio *uio;
1459 struct mbuf **mp0;
1460 struct mbuf **controlp;
1461 int *flagsp;
1462 {
1463 register struct mbuf *m, **mp, *ml = NULL;
1464 register int flags, len, error, offset;
1465 struct protosw *pr = so->so_proto;
1466 struct mbuf *nextrecord;
1467 int moff, type = 0;
1468 // LP64todo - fix this!
1469 int orig_resid = uio_resid(uio);
1470 volatile struct mbuf *free_list;
1471 volatile int delayed_copy_len;
1472 int can_delay;
1473 int need_event;
1474 struct proc *p = current_proc();
1475
1476
1477 // LP64todo - fix this!
1478 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1479 so,
1480 uio_resid(uio),
1481 so->so_rcv.sb_cc,
1482 so->so_rcv.sb_lowat,
1483 so->so_rcv.sb_hiwat);
1484
1485 socket_lock(so, 1);
1486
1487 #ifdef MORE_LOCKING_DEBUG
1488 if (so->so_usecount == 1)
1489 panic("soreceive: so=%x no other reference on socket\n", so);
1490 #endif
1491 mp = mp0;
1492 if (psa)
1493 *psa = 0;
1494 if (controlp)
1495 *controlp = 0;
1496 if (flagsp)
1497 flags = *flagsp &~ MSG_EOR;
1498 else
1499 flags = 0;
1500 /*
1501 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1502 * regardless of the flags argument. Here is the case were
1503 * out-of-band data is not inline.
1504 */
1505 if ((flags & MSG_OOB) ||
1506 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1507 (so->so_options & SO_OOBINLINE) == 0 &&
1508 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1509 m = m_get(M_WAIT, MT_DATA);
1510 if (m == NULL) {
1511 socket_unlock(so, 1);
1512 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
1513 return (ENOBUFS);
1514 }
1515 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1516 if (error)
1517 goto bad;
1518 socket_unlock(so, 0);
1519 do {
1520 // LP64todo - fix this!
1521 error = uiomove(mtod(m, caddr_t),
1522 (int) min(uio_resid(uio), m->m_len), uio);
1523 m = m_free(m);
1524 } while (uio_resid(uio) && error == 0 && m);
1525 socket_lock(so, 0);
1526 bad:
1527 if (m)
1528 m_freem(m);
1529 #ifdef __APPLE__
1530 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1531 if (error == EWOULDBLOCK || error == EINVAL) {
1532 /*
1533 * Let's try to get normal data:
1534 * EWOULDBLOCK: out-of-band data not receive yet;
1535 * EINVAL: out-of-band data already read.
1536 */
1537 error = 0;
1538 goto nooob;
1539 } else if (error == 0 && flagsp)
1540 *flagsp |= MSG_OOB;
1541 }
1542 socket_unlock(so, 1);
1543 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1544 #endif
1545 return (error);
1546 }
1547 nooob:
1548 if (mp)
1549 *mp = (struct mbuf *)0;
1550 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1551 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1552
1553
1554 free_list = (struct mbuf *)0;
1555 delayed_copy_len = 0;
1556 restart:
1557 #ifdef MORE_LOCKING_DEBUG
1558 if (so->so_usecount <= 1)
1559 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1560 #endif
1561 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1562 if (error) {
1563 socket_unlock(so, 1);
1564 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1565 return (error);
1566 }
1567
1568 m = so->so_rcv.sb_mb;
1569 /*
1570 * If we have less data than requested, block awaiting more
1571 * (subject to any timeout) if:
1572 * 1. the current count is less than the low water mark, or
1573 * 2. MSG_WAITALL is set, and it is possible to do the entire
1574 * receive operation at once if we block (resid <= hiwat).
1575 * 3. MSG_DONTWAIT is not set
1576 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1577 * we have to do the receive in sections, and thus risk returning
1578 * a short count if a timeout or signal occurs after we start.
1579 */
1580 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1581 so->so_rcv.sb_cc < uio_resid(uio)) &&
1582 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1583 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1584 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1585
1586 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1587 if (so->so_error) {
1588 if (m)
1589 goto dontblock;
1590 error = so->so_error;
1591 if ((flags & MSG_PEEK) == 0)
1592 so->so_error = 0;
1593 goto release;
1594 }
1595 if (so->so_state & SS_CANTRCVMORE) {
1596 if (m)
1597 goto dontblock;
1598 else
1599 goto release;
1600 }
1601 for (; m; m = m->m_next)
1602 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1603 m = so->so_rcv.sb_mb;
1604 goto dontblock;
1605 }
1606 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1607 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1608 error = ENOTCONN;
1609 goto release;
1610 }
1611 if (uio_resid(uio) == 0)
1612 goto release;
1613 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1614 error = EWOULDBLOCK;
1615 goto release;
1616 }
1617 sbunlock(&so->so_rcv, 1);
1618 #ifdef EVEN_MORE_LOCKING_DEBUG
1619 if (socket_debug)
1620 printf("Waiting for socket data\n");
1621 #endif
1622
1623 error = sbwait(&so->so_rcv);
1624 #ifdef EVEN_MORE_LOCKING_DEBUG
1625 if (socket_debug)
1626 printf("SORECEIVE - sbwait returned %d\n", error);
1627 #endif
1628 if (so->so_usecount < 1)
1629 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1630 if (error) {
1631 socket_unlock(so, 1);
1632 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1633 return (error);
1634 }
1635 goto restart;
1636 }
1637 dontblock:
1638 #ifndef __APPLE__
1639 if (uio->uio_procp)
1640 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1641 #else /* __APPLE__ */
1642 /*
1643 * 2207985
1644 * This should be uio->uio-procp; however, some callers of this
1645 * function use auto variables with stack garbage, and fail to
1646 * fill out the uio structure properly.
1647 */
1648 if (p)
1649 p->p_stats->p_ru.ru_msgrcv++;
1650 #endif /* __APPLE__ */
1651 nextrecord = m->m_nextpkt;
1652 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1653 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1654 orig_resid = 0;
1655 if (psa) {
1656 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1657 mp0 == 0);
1658 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1659 error = EWOULDBLOCK;
1660 goto release;
1661 }
1662 }
1663 if (flags & MSG_PEEK) {
1664 m = m->m_next;
1665 } else {
1666 sbfree(&so->so_rcv, m);
1667 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1668 panic("soreceive: about to create invalid socketbuf");
1669 MFREE(m, so->so_rcv.sb_mb);
1670 m = so->so_rcv.sb_mb;
1671 }
1672 }
1673 while (m && m->m_type == MT_CONTROL && error == 0) {
1674 if (flags & MSG_PEEK) {
1675 if (controlp)
1676 *controlp = m_copy(m, 0, m->m_len);
1677 m = m->m_next;
1678 } else {
1679 sbfree(&so->so_rcv, m);
1680 if (controlp) {
1681 if (pr->pr_domain->dom_externalize &&
1682 mtod(m, struct cmsghdr *)->cmsg_type ==
1683 SCM_RIGHTS) {
1684 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1685 error = (*pr->pr_domain->dom_externalize)(m);
1686 socket_lock(so, 0);
1687 }
1688 *controlp = m;
1689 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1690 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1691 so->so_rcv.sb_mb = m->m_next;
1692 m->m_next = 0;
1693 m = so->so_rcv.sb_mb;
1694 } else {
1695 MFREE(m, so->so_rcv.sb_mb);
1696 m = so->so_rcv.sb_mb;
1697 }
1698 }
1699 if (controlp) {
1700 orig_resid = 0;
1701 controlp = &(*controlp)->m_next;
1702 }
1703 }
1704 if (m) {
1705 if ((flags & MSG_PEEK) == 0)
1706 m->m_nextpkt = nextrecord;
1707 type = m->m_type;
1708 if (type == MT_OOBDATA)
1709 flags |= MSG_OOB;
1710 }
1711 moff = 0;
1712 offset = 0;
1713
1714 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
1715 can_delay = 1;
1716 else
1717 can_delay = 0;
1718
1719 need_event = 0;
1720
1721 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1722 if (m->m_type == MT_OOBDATA) {
1723 if (type != MT_OOBDATA)
1724 break;
1725 } else if (type == MT_OOBDATA)
1726 break;
1727 #ifndef __APPLE__
1728 /*
1729 * This assertion needs rework. The trouble is Appletalk is uses many
1730 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1731 * For now just remove the assertion... CSM 9/98
1732 */
1733 else
1734 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1735 ("receive 3"));
1736 #else
1737 /*
1738 * Make sure to allways set MSG_OOB event when getting
1739 * out of band data inline.
1740 */
1741 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1742 (so->so_options & SO_OOBINLINE) != 0 &&
1743 (so->so_state & SS_RCVATMARK) != 0) {
1744 flags |= MSG_OOB;
1745 }
1746 #endif
1747 so->so_state &= ~SS_RCVATMARK;
1748 // LP64todo - fix this!
1749 len = uio_resid(uio) - delayed_copy_len;
1750 if (so->so_oobmark && len > so->so_oobmark - offset)
1751 len = so->so_oobmark - offset;
1752 if (len > m->m_len - moff)
1753 len = m->m_len - moff;
1754 /*
1755 * If mp is set, just pass back the mbufs.
1756 * Otherwise copy them out via the uio, then free.
1757 * Sockbuf must be consistent here (points to current mbuf,
1758 * it points to next record) when we drop priority;
1759 * we must note any additions to the sockbuf when we
1760 * block interrupts again.
1761 */
1762 if (mp == 0) {
1763 if (can_delay && len == m->m_len) {
1764 /*
1765 * only delay the copy if we're consuming the
1766 * mbuf and we're NOT in MSG_PEEK mode
1767 * and we have enough data to make it worthwile
1768 * to drop and retake the funnel... can_delay
1769 * reflects the state of the 2 latter constraints
1770 * moff should always be zero in these cases
1771 */
1772 delayed_copy_len += len;
1773 } else {
1774
1775 if (delayed_copy_len) {
1776 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1777
1778 if (error) {
1779 goto release;
1780 }
1781 if (m != so->so_rcv.sb_mb) {
1782 /*
1783 * can only get here if MSG_PEEK is not set
1784 * therefore, m should point at the head of the rcv queue...
1785 * if it doesn't, it means something drastically changed
1786 * while we were out from behind the funnel in sodelayed_copy...
1787 * perhaps a RST on the stream... in any event, the stream has
1788 * been interrupted... it's probably best just to return
1789 * whatever data we've moved and let the caller sort it out...
1790 */
1791 break;
1792 }
1793 }
1794 socket_unlock(so, 0);
1795 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1796 socket_lock(so, 0);
1797
1798 if (error)
1799 goto release;
1800 }
1801 } else
1802 uio_setresid(uio, (uio_resid(uio) - len));
1803
1804 if (len == m->m_len - moff) {
1805 if (m->m_flags & M_EOR)
1806 flags |= MSG_EOR;
1807 if (flags & MSG_PEEK) {
1808 m = m->m_next;
1809 moff = 0;
1810 } else {
1811 nextrecord = m->m_nextpkt;
1812 sbfree(&so->so_rcv, m);
1813 m->m_nextpkt = NULL;
1814
1815 if (mp) {
1816 *mp = m;
1817 mp = &m->m_next;
1818 so->so_rcv.sb_mb = m = m->m_next;
1819 *mp = (struct mbuf *)0;
1820 } else {
1821 if (free_list == NULL)
1822 free_list = m;
1823 else
1824 ml->m_next = m;
1825 ml = m;
1826 so->so_rcv.sb_mb = m = m->m_next;
1827 ml->m_next = 0;
1828 }
1829 if (m)
1830 m->m_nextpkt = nextrecord;
1831 }
1832 } else {
1833 if (flags & MSG_PEEK)
1834 moff += len;
1835 else {
1836 if (mp)
1837 *mp = m_copym(m, 0, len, M_WAIT);
1838 m->m_data += len;
1839 m->m_len -= len;
1840 so->so_rcv.sb_cc -= len;
1841 }
1842 }
1843 if (so->so_oobmark) {
1844 if ((flags & MSG_PEEK) == 0) {
1845 so->so_oobmark -= len;
1846 if (so->so_oobmark == 0) {
1847 so->so_state |= SS_RCVATMARK;
1848 /*
1849 * delay posting the actual event until after
1850 * any delayed copy processing has finished
1851 */
1852 need_event = 1;
1853 break;
1854 }
1855 } else {
1856 offset += len;
1857 if (offset == so->so_oobmark)
1858 break;
1859 }
1860 }
1861 if (flags & MSG_EOR)
1862 break;
1863 /*
1864 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1865 * we must not quit until "uio->uio_resid == 0" or an error
1866 * termination. If a signal/timeout occurs, return
1867 * with a short count but without error.
1868 * Keep sockbuf locked against other readers.
1869 */
1870 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1871 !sosendallatonce(so) && !nextrecord) {
1872 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1873 goto release;
1874
1875 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
1876 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1877 if (sbwait(&so->so_rcv)) {
1878 error = 0;
1879 goto release;
1880 }
1881 /*
1882 * have to wait until after we get back from the sbwait to do the copy because
1883 * we will drop the funnel if we have enough data that has been delayed... by dropping
1884 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1885 * and to change the state of this socket... we're issuing the sbwait because
1886 * the socket is empty and we're expecting the netisr thread to wake us up when more
1887 * packets arrive... if we allow that processing to happen and then sbwait, we
1888 * could stall forever with packets sitting in the socket if no further packets
1889 * arrive from the remote side.
1890 *
1891 * we want to copy before we've collected all the data to satisfy this request to
1892 * allow the copy to overlap the incoming packet processing on an MP system
1893 */
1894 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1895
1896 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1897
1898 if (error)
1899 goto release;
1900 }
1901 m = so->so_rcv.sb_mb;
1902 if (m) {
1903 nextrecord = m->m_nextpkt;
1904 }
1905 }
1906 }
1907 #ifdef MORE_LOCKING_DEBUG
1908 if (so->so_usecount <= 1)
1909 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1910 #endif
1911
1912 if (m && pr->pr_flags & PR_ATOMIC) {
1913 #ifdef __APPLE__
1914 if (so->so_options & SO_DONTTRUNC)
1915 flags |= MSG_RCVMORE;
1916 else {
1917 #endif
1918 flags |= MSG_TRUNC;
1919 if ((flags & MSG_PEEK) == 0)
1920 (void) sbdroprecord(&so->so_rcv);
1921 #ifdef __APPLE__
1922 }
1923 #endif
1924 }
1925 if ((flags & MSG_PEEK) == 0) {
1926 if (m == 0)
1927 so->so_rcv.sb_mb = nextrecord;
1928 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1929 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1930 }
1931 #ifdef __APPLE__
1932 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1933 flags |= MSG_HAVEMORE;
1934
1935 if (delayed_copy_len) {
1936 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1937
1938 if (error)
1939 goto release;
1940 }
1941 if (free_list) {
1942 m_freem_list((struct mbuf *)free_list);
1943 free_list = (struct mbuf *)0;
1944 }
1945 if (need_event)
1946 postevent(so, 0, EV_OOB);
1947 #endif
1948 if (orig_resid == uio_resid(uio) && orig_resid &&
1949 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1950 sbunlock(&so->so_rcv, 1);
1951 goto restart;
1952 }
1953
1954 if (flagsp)
1955 *flagsp |= flags;
1956 release:
1957 #ifdef MORE_LOCKING_DEBUG
1958 if (so->so_usecount <= 1)
1959 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1960 #endif
1961 if (delayed_copy_len) {
1962 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1963 }
1964 if (free_list) {
1965 m_freem_list((struct mbuf *)free_list);
1966 }
1967 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1968
1969 // LP64todo - fix this!
1970 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1971 so,
1972 uio_resid(uio),
1973 so->so_rcv.sb_cc,
1974 0,
1975 error);
1976
1977 return (error);
1978 }
1979
1980
1981 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
1982 {
1983 int error = 0;
1984 struct mbuf *m;
1985
1986 m = *free_list;
1987
1988 socket_unlock(so, 0);
1989
1990 while (m && error == 0) {
1991
1992 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
1993
1994 m = m->m_next;
1995 }
1996 m_freem_list(*free_list);
1997
1998 *free_list = (struct mbuf *)NULL;
1999 *resid = 0;
2000
2001 socket_lock(so, 0);
2002
2003 return (error);
2004 }
2005
2006
2007 int
2008 soshutdown(so, how)
2009 register struct socket *so;
2010 register int how;
2011 {
2012 register struct protosw *pr = so->so_proto;
2013 int ret;
2014
2015 socket_lock(so, 1);
2016
2017 sflt_notify(so, sock_evt_shutdown, &how);
2018
2019 if (how != SHUT_WR) {
2020 sorflush(so);
2021 postevent(so, 0, EV_RCLOSED);
2022 }
2023 if (how != SHUT_RD) {
2024 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2025 postevent(so, 0, EV_WCLOSED);
2026 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2027 socket_unlock(so, 1);
2028 return(ret);
2029 }
2030
2031 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2032 socket_unlock(so, 1);
2033 return (0);
2034 }
2035
2036 void
2037 sorflush(so)
2038 register struct socket *so;
2039 {
2040 register struct sockbuf *sb = &so->so_rcv;
2041 register struct protosw *pr = so->so_proto;
2042 struct sockbuf asb;
2043
2044 #ifdef MORE_LOCKING_DEBUG
2045 lck_mtx_t * mutex_held;
2046
2047 if (so->so_proto->pr_getlock != NULL)
2048 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2049 else
2050 mutex_held = so->so_proto->pr_domain->dom_mtx;
2051 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2052 #endif
2053
2054 sflt_notify(so, sock_evt_flush_read, NULL);
2055
2056 sb->sb_flags |= SB_NOINTR;
2057 (void) sblock(sb, M_WAIT);
2058 socantrcvmore(so);
2059 sbunlock(sb, 1);
2060 #ifdef __APPLE__
2061 selthreadclear(&sb->sb_sel);
2062 #endif
2063 asb = *sb;
2064 bzero((caddr_t)sb, sizeof (*sb));
2065 sb->sb_so = so; /* reestablish link to socket */
2066 if (asb.sb_flags & SB_KNOTE) {
2067 sb->sb_sel.si_note = asb.sb_sel.si_note;
2068 sb->sb_flags = SB_KNOTE;
2069 }
2070 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2071 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2072 sbrelease(&asb);
2073 }
2074
2075 /*
2076 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2077 * an additional variant to handle the case where the option value needs
2078 * to be some kind of integer, but not a specific size.
2079 * In addition to their use here, these functions are also called by the
2080 * protocol-level pr_ctloutput() routines.
2081 */
2082 int
2083 sooptcopyin(sopt, buf, len, minlen)
2084 struct sockopt *sopt;
2085 void *buf;
2086 size_t len;
2087 size_t minlen;
2088 {
2089 size_t valsize;
2090
2091 /*
2092 * If the user gives us more than we wanted, we ignore it,
2093 * but if we don't get the minimum length the caller
2094 * wants, we return EINVAL. On success, sopt->sopt_valsize
2095 * is set to however much we actually retrieved.
2096 */
2097 if ((valsize = sopt->sopt_valsize) < minlen)
2098 return EINVAL;
2099 if (valsize > len)
2100 sopt->sopt_valsize = valsize = len;
2101
2102 if (sopt->sopt_p != 0)
2103 return (copyin(sopt->sopt_val, buf, valsize));
2104
2105 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
2106 return 0;
2107 }
2108
2109 int
2110 sosetopt(so, sopt)
2111 struct socket *so;
2112 struct sockopt *sopt;
2113 {
2114 int error, optval;
2115 struct linger l;
2116 struct timeval tv;
2117 short val;
2118
2119 socket_lock(so, 1);
2120
2121 if (sopt->sopt_dir != SOPT_SET) {
2122 sopt->sopt_dir = SOPT_SET;
2123 }
2124
2125 {
2126 struct socket_filter_entry *filter;
2127 int filtered = 0;
2128 error = 0;
2129 for (filter = so->so_filt; filter && (error == 0);
2130 filter = filter->sfe_next_onsocket) {
2131 if (filter->sfe_filter->sf_filter.sf_setoption) {
2132 if (filtered == 0) {
2133 filtered = 1;
2134 sflt_use(so);
2135 socket_unlock(so, 0);
2136 }
2137 error = filter->sfe_filter->sf_filter.sf_setoption(
2138 filter->sfe_cookie, so, sopt);
2139 }
2140 }
2141
2142 if (filtered != 0) {
2143 socket_lock(so, 0);
2144 sflt_unuse(so);
2145
2146 if (error) {
2147 if (error == EJUSTRETURN)
2148 error = 0;
2149 goto bad;
2150 }
2151 }
2152 }
2153
2154 error = 0;
2155 if (sopt->sopt_level != SOL_SOCKET) {
2156 if (so->so_proto && so->so_proto->pr_ctloutput) {
2157 error = (*so->so_proto->pr_ctloutput)
2158 (so, sopt);
2159 socket_unlock(so, 1);
2160 return (error);
2161 }
2162 error = ENOPROTOOPT;
2163 } else {
2164 switch (sopt->sopt_name) {
2165 case SO_LINGER:
2166 case SO_LINGER_SEC:
2167 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2168 if (error)
2169 goto bad;
2170
2171 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
2172 if (l.l_onoff)
2173 so->so_options |= SO_LINGER;
2174 else
2175 so->so_options &= ~SO_LINGER;
2176 break;
2177
2178 case SO_DEBUG:
2179 case SO_KEEPALIVE:
2180 case SO_DONTROUTE:
2181 case SO_USELOOPBACK:
2182 case SO_BROADCAST:
2183 case SO_REUSEADDR:
2184 case SO_REUSEPORT:
2185 case SO_OOBINLINE:
2186 case SO_TIMESTAMP:
2187 #ifdef __APPLE__
2188 case SO_DONTTRUNC:
2189 case SO_WANTMORE:
2190 case SO_WANTOOBFLAG:
2191 #endif
2192 error = sooptcopyin(sopt, &optval, sizeof optval,
2193 sizeof optval);
2194 if (error)
2195 goto bad;
2196 if (optval)
2197 so->so_options |= sopt->sopt_name;
2198 else
2199 so->so_options &= ~sopt->sopt_name;
2200 break;
2201
2202 case SO_SNDBUF:
2203 case SO_RCVBUF:
2204 case SO_SNDLOWAT:
2205 case SO_RCVLOWAT:
2206 error = sooptcopyin(sopt, &optval, sizeof optval,
2207 sizeof optval);
2208 if (error)
2209 goto bad;
2210
2211 /*
2212 * Values < 1 make no sense for any of these
2213 * options, so disallow them.
2214 */
2215 if (optval < 1) {
2216 error = EINVAL;
2217 goto bad;
2218 }
2219
2220 switch (sopt->sopt_name) {
2221 case SO_SNDBUF:
2222 case SO_RCVBUF:
2223 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2224 &so->so_snd : &so->so_rcv,
2225 (u_long) optval) == 0) {
2226 error = ENOBUFS;
2227 goto bad;
2228 }
2229 break;
2230
2231 /*
2232 * Make sure the low-water is never greater than
2233 * the high-water.
2234 */
2235 case SO_SNDLOWAT:
2236 so->so_snd.sb_lowat =
2237 (optval > so->so_snd.sb_hiwat) ?
2238 so->so_snd.sb_hiwat : optval;
2239 break;
2240 case SO_RCVLOWAT:
2241 so->so_rcv.sb_lowat =
2242 (optval > so->so_rcv.sb_hiwat) ?
2243 so->so_rcv.sb_hiwat : optval;
2244 break;
2245 }
2246 break;
2247
2248 case SO_SNDTIMEO:
2249 case SO_RCVTIMEO:
2250 error = sooptcopyin(sopt, &tv, sizeof tv,
2251 sizeof tv);
2252 if (error)
2253 goto bad;
2254
2255 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
2256 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2257 error = EDOM;
2258 goto bad;
2259 }
2260
2261 switch (sopt->sopt_name) {
2262 case SO_SNDTIMEO:
2263 so->so_snd.sb_timeo = tv;
2264 break;
2265 case SO_RCVTIMEO:
2266 so->so_rcv.sb_timeo = tv;
2267 break;
2268 }
2269 break;
2270
2271 case SO_NKE:
2272 {
2273 struct so_nke nke;
2274
2275 error = sooptcopyin(sopt, &nke,
2276 sizeof nke, sizeof nke);
2277 if (error)
2278 goto bad;
2279
2280 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
2281 break;
2282 }
2283
2284 case SO_NOSIGPIPE:
2285 error = sooptcopyin(sopt, &optval, sizeof optval,
2286 sizeof optval);
2287 if (error)
2288 goto bad;
2289 if (optval)
2290 so->so_flags |= SOF_NOSIGPIPE;
2291 else
2292 so->so_flags &= ~SOF_NOSIGPIPE;
2293
2294 break;
2295
2296 case SO_NOADDRERR:
2297 error = sooptcopyin(sopt, &optval, sizeof optval,
2298 sizeof optval);
2299 if (error)
2300 goto bad;
2301 if (optval)
2302 so->so_flags |= SOF_NOADDRAVAIL;
2303 else
2304 so->so_flags &= ~SOF_NOADDRAVAIL;
2305
2306 break;
2307
2308 default:
2309 error = ENOPROTOOPT;
2310 break;
2311 }
2312 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2313 (void) ((*so->so_proto->pr_ctloutput)
2314 (so, sopt));
2315 }
2316 }
2317 bad:
2318 socket_unlock(so, 1);
2319 return (error);
2320 }
2321
2322 /* Helper routine for getsockopt */
2323 int
2324 sooptcopyout(sopt, buf, len)
2325 struct sockopt *sopt;
2326 void *buf;
2327 size_t len;
2328 {
2329 int error;
2330 size_t valsize;
2331
2332 error = 0;
2333
2334 /*
2335 * Documented get behavior is that we always return a value,
2336 * possibly truncated to fit in the user's buffer.
2337 * Traditional behavior is that we always tell the user
2338 * precisely how much we copied, rather than something useful
2339 * like the total amount we had available for her.
2340 * Note that this interface is not idempotent; the entire answer must
2341 * generated ahead of time.
2342 */
2343 valsize = min(len, sopt->sopt_valsize);
2344 sopt->sopt_valsize = valsize;
2345 if (sopt->sopt_val != USER_ADDR_NULL) {
2346 if (sopt->sopt_p != 0)
2347 error = copyout(buf, sopt->sopt_val, valsize);
2348 else
2349 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
2350 }
2351 return error;
2352 }
2353
2354 int
2355 sogetopt(so, sopt)
2356 struct socket *so;
2357 struct sockopt *sopt;
2358 {
2359 int error, optval;
2360 struct linger l;
2361 struct timeval tv;
2362
2363 if (sopt->sopt_dir != SOPT_GET) {
2364 sopt->sopt_dir = SOPT_GET;
2365 }
2366
2367 socket_lock(so, 1);
2368
2369 {
2370 struct socket_filter_entry *filter;
2371 int filtered = 0;
2372 error = 0;
2373 for (filter = so->so_filt; filter && (error == 0);
2374 filter = filter->sfe_next_onsocket) {
2375 if (filter->sfe_filter->sf_filter.sf_getoption) {
2376 if (filtered == 0) {
2377 filtered = 1;
2378 sflt_use(so);
2379 socket_unlock(so, 0);
2380 }
2381 error = filter->sfe_filter->sf_filter.sf_getoption(
2382 filter->sfe_cookie, so, sopt);
2383 }
2384 }
2385 if (filtered != 0) {
2386 socket_lock(so, 0);
2387 sflt_unuse(so);
2388
2389 if (error) {
2390 if (error == EJUSTRETURN)
2391 error = 0;
2392 socket_unlock(so, 1);
2393 return error;
2394 }
2395 }
2396 }
2397
2398 error = 0;
2399 if (sopt->sopt_level != SOL_SOCKET) {
2400 if (so->so_proto && so->so_proto->pr_ctloutput) {
2401 error = (*so->so_proto->pr_ctloutput)
2402 (so, sopt);
2403 socket_unlock(so, 1);
2404 return (error);
2405 } else {
2406 socket_unlock(so, 1);
2407 return (ENOPROTOOPT);
2408 }
2409 } else {
2410 switch (sopt->sopt_name) {
2411 case SO_LINGER:
2412 case SO_LINGER_SEC:
2413 l.l_onoff = so->so_options & SO_LINGER;
2414 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2415 so->so_linger / hz;
2416 error = sooptcopyout(sopt, &l, sizeof l);
2417 break;
2418
2419 case SO_USELOOPBACK:
2420 case SO_DONTROUTE:
2421 case SO_DEBUG:
2422 case SO_KEEPALIVE:
2423 case SO_REUSEADDR:
2424 case SO_REUSEPORT:
2425 case SO_BROADCAST:
2426 case SO_OOBINLINE:
2427 case SO_TIMESTAMP:
2428 #ifdef __APPLE__
2429 case SO_DONTTRUNC:
2430 case SO_WANTMORE:
2431 case SO_WANTOOBFLAG:
2432 #endif
2433 optval = so->so_options & sopt->sopt_name;
2434 integer:
2435 error = sooptcopyout(sopt, &optval, sizeof optval);
2436 break;
2437
2438 case SO_TYPE:
2439 optval = so->so_type;
2440 goto integer;
2441
2442 #ifdef __APPLE__
2443 case SO_NREAD:
2444 {
2445 int pkt_total;
2446 struct mbuf *m1;
2447
2448 pkt_total = 0;
2449 m1 = so->so_rcv.sb_mb;
2450 if (so->so_proto->pr_flags & PR_ATOMIC)
2451 {
2452 while (m1) {
2453 if (m1->m_type == MT_DATA)
2454 pkt_total += m1->m_len;
2455 m1 = m1->m_next;
2456 }
2457 optval = pkt_total;
2458 } else
2459 optval = so->so_rcv.sb_cc;
2460 goto integer;
2461 }
2462 case SO_NWRITE:
2463 optval = so->so_snd.sb_cc;
2464 goto integer;
2465 #endif
2466 case SO_ERROR:
2467 optval = so->so_error;
2468 so->so_error = 0;
2469 goto integer;
2470
2471 case SO_SNDBUF:
2472 optval = so->so_snd.sb_hiwat;
2473 goto integer;
2474
2475 case SO_RCVBUF:
2476 optval = so->so_rcv.sb_hiwat;
2477 goto integer;
2478
2479 case SO_SNDLOWAT:
2480 optval = so->so_snd.sb_lowat;
2481 goto integer;
2482
2483 case SO_RCVLOWAT:
2484 optval = so->so_rcv.sb_lowat;
2485 goto integer;
2486
2487 case SO_SNDTIMEO:
2488 case SO_RCVTIMEO:
2489 tv = (sopt->sopt_name == SO_SNDTIMEO ?
2490 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2491
2492 error = sooptcopyout(sopt, &tv, sizeof tv);
2493 break;
2494
2495 case SO_NOSIGPIPE:
2496 optval = (so->so_flags & SOF_NOSIGPIPE);
2497 goto integer;
2498
2499 case SO_NOADDRERR:
2500 optval = (so->so_flags & SOF_NOADDRAVAIL);
2501 goto integer;
2502
2503 default:
2504 error = ENOPROTOOPT;
2505 break;
2506 }
2507 socket_unlock(so, 1);
2508 return (error);
2509 }
2510 }
2511
2512 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2513 int
2514 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
2515 {
2516 struct mbuf *m, *m_prev;
2517 int sopt_size = sopt->sopt_valsize;
2518
2519 if (sopt_size > MAX_SOOPTGETM_SIZE)
2520 return EMSGSIZE;
2521
2522 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2523 if (m == 0)
2524 return ENOBUFS;
2525 if (sopt_size > MLEN) {
2526 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2527 if ((m->m_flags & M_EXT) == 0) {
2528 m_free(m);
2529 return ENOBUFS;
2530 }
2531 m->m_len = min(MCLBYTES, sopt_size);
2532 } else {
2533 m->m_len = min(MLEN, sopt_size);
2534 }
2535 sopt_size -= m->m_len;
2536 *mp = m;
2537 m_prev = m;
2538
2539 while (sopt_size) {
2540 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2541 if (m == 0) {
2542 m_freem(*mp);
2543 return ENOBUFS;
2544 }
2545 if (sopt_size > MLEN) {
2546 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2547 if ((m->m_flags & M_EXT) == 0) {
2548 m_freem(*mp);
2549 return ENOBUFS;
2550 }
2551 m->m_len = min(MCLBYTES, sopt_size);
2552 } else {
2553 m->m_len = min(MLEN, sopt_size);
2554 }
2555 sopt_size -= m->m_len;
2556 m_prev->m_next = m;
2557 m_prev = m;
2558 }
2559 return 0;
2560 }
2561
2562 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2563 int
2564 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
2565 {
2566 struct mbuf *m0 = m;
2567
2568 if (sopt->sopt_val == USER_ADDR_NULL)
2569 return 0;
2570 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2571 if (sopt->sopt_p != NULL) {
2572 int error;
2573
2574 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
2575 if (error != 0) {
2576 m_freem(m0);
2577 return(error);
2578 }
2579 } else
2580 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
2581 sopt->sopt_valsize -= m->m_len;
2582 sopt->sopt_val += m->m_len;
2583 m = m->m_next;
2584 }
2585 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2586 panic("soopt_mcopyin");
2587 return 0;
2588 }
2589
2590 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2591 int
2592 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
2593 {
2594 struct mbuf *m0 = m;
2595 size_t valsize = 0;
2596
2597 if (sopt->sopt_val == USER_ADDR_NULL)
2598 return 0;
2599 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2600 if (sopt->sopt_p != NULL) {
2601 int error;
2602
2603 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
2604 if (error != 0) {
2605 m_freem(m0);
2606 return(error);
2607 }
2608 } else
2609 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
2610 sopt->sopt_valsize -= m->m_len;
2611 sopt->sopt_val += m->m_len;
2612 valsize += m->m_len;
2613 m = m->m_next;
2614 }
2615 if (m != NULL) {
2616 /* enough soopt buffer should be given from user-land */
2617 m_freem(m0);
2618 return(EINVAL);
2619 }
2620 sopt->sopt_valsize = valsize;
2621 return 0;
2622 }
2623
2624 void
2625 sohasoutofband(so)
2626 register struct socket *so;
2627 {
2628 struct proc *p;
2629
2630 if (so->so_pgid < 0)
2631 gsignal(-so->so_pgid, SIGURG);
2632 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2633 psignal(p, SIGURG);
2634 selwakeup(&so->so_rcv.sb_sel);
2635 }
2636
2637 int
2638 sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
2639 {
2640 struct proc *p = current_proc();
2641 int revents = 0;
2642
2643 socket_lock(so, 1);
2644
2645 if (events & (POLLIN | POLLRDNORM))
2646 if (soreadable(so))
2647 revents |= events & (POLLIN | POLLRDNORM);
2648
2649 if (events & (POLLOUT | POLLWRNORM))
2650 if (sowriteable(so))
2651 revents |= events & (POLLOUT | POLLWRNORM);
2652
2653 if (events & (POLLPRI | POLLRDBAND))
2654 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2655 revents |= events & (POLLPRI | POLLRDBAND);
2656
2657 if (revents == 0) {
2658 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2659 /* Darwin sets the flag first, BSD calls selrecord first */
2660 so->so_rcv.sb_flags |= SB_SEL;
2661 selrecord(p, &so->so_rcv.sb_sel, wql);
2662 }
2663
2664 if (events & (POLLOUT | POLLWRNORM)) {
2665 /* Darwin sets the flag first, BSD calls selrecord first */
2666 so->so_snd.sb_flags |= SB_SEL;
2667 selrecord(p, &so->so_snd.sb_sel, wql);
2668 }
2669 }
2670
2671 socket_unlock(so, 1);
2672 return (revents);
2673 }
2674
2675 int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
2676
2677 int
2678 soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
2679 {
2680 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2681 struct sockbuf *sb;
2682 socket_lock(so, 1);
2683
2684 switch (kn->kn_filter) {
2685 case EVFILT_READ:
2686 if (so->so_options & SO_ACCEPTCONN)
2687 kn->kn_fop = &solisten_filtops;
2688 else
2689 kn->kn_fop = &soread_filtops;
2690 sb = &so->so_rcv;
2691 break;
2692 case EVFILT_WRITE:
2693 kn->kn_fop = &sowrite_filtops;
2694 sb = &so->so_snd;
2695 break;
2696 default:
2697 socket_unlock(so, 1);
2698 return (1);
2699 }
2700
2701 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2702 sb->sb_flags |= SB_KNOTE;
2703 socket_unlock(so, 1);
2704 return (0);
2705 }
2706
2707 static void
2708 filt_sordetach(struct knote *kn)
2709 {
2710 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2711
2712 socket_lock(so, 1);
2713 if (so->so_rcv.sb_flags & SB_KNOTE)
2714 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2715 so->so_rcv.sb_flags &= ~SB_KNOTE;
2716 socket_unlock(so, 1);
2717 }
2718
2719 /*ARGSUSED*/
2720 static int
2721 filt_soread(struct knote *kn, long hint)
2722 {
2723 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2724
2725 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2726 socket_lock(so, 1);
2727
2728 if (so->so_oobmark) {
2729 if (kn->kn_flags & EV_OOBAND) {
2730 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2731 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2732 socket_unlock(so, 1);
2733 return (1);
2734 }
2735 kn->kn_data = so->so_oobmark;
2736 kn->kn_flags |= EV_OOBAND;
2737 } else {
2738 kn->kn_data = so->so_rcv.sb_cc;
2739 if (so->so_state & SS_CANTRCVMORE) {
2740 kn->kn_flags |= EV_EOF;
2741 kn->kn_fflags = so->so_error;
2742 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2743 socket_unlock(so, 1);
2744 return (1);
2745 }
2746 }
2747
2748 if (so->so_state & SS_RCVATMARK) {
2749 if (kn->kn_flags & EV_OOBAND) {
2750 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2751 socket_unlock(so, 1);
2752 return (1);
2753 }
2754 kn->kn_flags |= EV_OOBAND;
2755 } else if (kn->kn_flags & EV_OOBAND) {
2756 kn->kn_data = 0;
2757 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2758 socket_unlock(so, 1);
2759 return (0);
2760 }
2761
2762 if (so->so_error) { /* temporary udp error */
2763 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2764 socket_unlock(so, 1);
2765 return (1);
2766 }
2767
2768 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2769 socket_unlock(so, 1);
2770
2771 return( kn->kn_flags & EV_OOBAND ||
2772 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2773 kn->kn_sdata : so->so_rcv.sb_lowat));
2774 }
2775
2776 static void
2777 filt_sowdetach(struct knote *kn)
2778 {
2779 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2780 socket_lock(so, 1);
2781
2782 if(so->so_snd.sb_flags & SB_KNOTE)
2783 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2784 so->so_snd.sb_flags &= ~SB_KNOTE;
2785 socket_unlock(so, 1);
2786 }
2787
2788 /*ARGSUSED*/
2789 static int
2790 filt_sowrite(struct knote *kn, long hint)
2791 {
2792 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2793
2794 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2795 socket_lock(so, 1);
2796
2797 kn->kn_data = sbspace(&so->so_snd);
2798 if (so->so_state & SS_CANTSENDMORE) {
2799 kn->kn_flags |= EV_EOF;
2800 kn->kn_fflags = so->so_error;
2801 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2802 socket_unlock(so, 1);
2803 return (1);
2804 }
2805 if (so->so_error) { /* temporary udp error */
2806 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2807 socket_unlock(so, 1);
2808 return (1);
2809 }
2810 if (((so->so_state & SS_ISCONNECTED) == 0) &&
2811 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2812 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2813 socket_unlock(so, 1);
2814 return (0);
2815 }
2816 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2817 socket_unlock(so, 1);
2818 if (kn->kn_sfflags & NOTE_LOWAT)
2819 return (kn->kn_data >= kn->kn_sdata);
2820 return (kn->kn_data >= so->so_snd.sb_lowat);
2821 }
2822
2823 /*ARGSUSED*/
2824 static int
2825 filt_solisten(struct knote *kn, long hint)
2826 {
2827 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2828 int isempty;
2829
2830 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2831 socket_lock(so, 1);
2832 kn->kn_data = so->so_qlen;
2833 isempty = ! TAILQ_EMPTY(&so->so_comp);
2834 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2835 socket_unlock(so, 1);
2836 return (isempty);
2837 }
2838
2839
2840 int
2841 socket_lock(so, refcount)
2842 struct socket *so;
2843 int refcount;
2844 {
2845 int error = 0, lr, lr_saved;
2846 #ifdef __ppc__
2847 __asm__ volatile("mflr %0" : "=r" (lr));
2848 lr_saved = lr;
2849 #endif
2850
2851 if (so->so_proto->pr_lock) {
2852 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2853 }
2854 else {
2855 #ifdef MORE_LOCKING_DEBUG
2856 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2857 #endif
2858 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2859 if (refcount)
2860 so->so_usecount++;
2861 so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */
2862 }
2863
2864 return(error);
2865
2866 }
2867
2868 int
2869 socket_unlock(so, refcount)
2870 struct socket *so;
2871 int refcount;
2872 {
2873 int error = 0, lr, lr_saved;
2874 lck_mtx_t * mutex_held;
2875
2876 #ifdef __ppc__
2877 __asm__ volatile("mflr %0" : "=r" (lr));
2878 lr_saved = lr;
2879 #endif
2880
2881
2882
2883 if (so->so_proto == NULL)
2884 panic("socket_unlock null so_proto so=%x\n", so);
2885
2886 if (so && so->so_proto->pr_unlock)
2887 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2888 else {
2889 mutex_held = so->so_proto->pr_domain->dom_mtx;
2890 #ifdef MORE_LOCKING_DEBUG
2891 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2892 #endif
2893 if (refcount) {
2894 if (so->so_usecount <= 0)
2895 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2896 so->so_usecount--;
2897 if (so->so_usecount == 0) {
2898 sofreelastref(so, 1);
2899 }
2900 else
2901 so->reserved4 = (void*)lr_saved; /* save caller */
2902 }
2903 lck_mtx_unlock(mutex_held);
2904 }
2905
2906 return(error);
2907 }
2908 //### Called with socket locked, will unlock socket
2909 void
2910 sofree(so)
2911 struct socket *so;
2912 {
2913
2914 int lr, lr_saved;
2915 lck_mtx_t * mutex_held;
2916 #ifdef __ppc__
2917 __asm__ volatile("mflr %0" : "=r" (lr));
2918 lr_saved = lr;
2919 #endif
2920 if (so->so_proto->pr_getlock != NULL)
2921 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2922 else
2923 mutex_held = so->so_proto->pr_domain->dom_mtx;
2924 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2925
2926 sofreelastref(so, 0);
2927 }
2928
2929 void
2930 soreference(so)
2931 struct socket *so;
2932 {
2933 socket_lock(so, 1); /* locks & take one reference on socket */
2934 socket_unlock(so, 0); /* unlock only */
2935 }
2936
2937 void
2938 sodereference(so)
2939 struct socket *so;
2940 {
2941 socket_lock(so, 0);
2942 socket_unlock(so, 1);
2943 }