]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
cba1490aec3d53d0340df6eddf2ef009b0f1bcef
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
57 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
58 */
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/filedesc.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/file_internal.h>
66 #include <sys/fcntl.h>
67 #include <sys/malloc.h>
68 #include <sys/mbuf.h>
69 #include <sys/domain.h>
70 #include <sys/kernel.h>
71 #include <sys/event.h>
72 #include <sys/poll.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/resourcevar.h>
77 #include <sys/signalvar.h>
78 #include <sys/sysctl.h>
79 #include <sys/uio.h>
80 #include <sys/ev.h>
81 #include <sys/kdebug.h>
82 #include <net/route.h>
83 #include <netinet/in.h>
84 #include <netinet/in_pcb.h>
85 #include <kern/zalloc.h>
86 #include <kern/locks.h>
87 #include <machine/limits.h>
88
89 int so_cache_hw = 0;
90 int so_cache_timeouts = 0;
91 int so_cache_max_freed = 0;
92 int cached_sock_count = 0;
93 struct socket *socket_cache_head = 0;
94 struct socket *socket_cache_tail = 0;
95 u_long so_cache_time = 0;
96 int so_cache_init_done = 0;
97 struct zone *so_cache_zone;
98 extern int get_inpcb_str_size();
99 extern int get_tcp_str_size();
100
101 static lck_grp_t *so_cache_mtx_grp;
102 static lck_attr_t *so_cache_mtx_attr;
103 static lck_grp_attr_t *so_cache_mtx_grp_attr;
104 lck_mtx_t *so_cache_mtx;
105
106 #include <machine/limits.h>
107
108 static void filt_sordetach(struct knote *kn);
109 static int filt_soread(struct knote *kn, long hint);
110 static void filt_sowdetach(struct knote *kn);
111 static int filt_sowrite(struct knote *kn, long hint);
112 static int filt_solisten(struct knote *kn, long hint);
113
114 static struct filterops solisten_filtops =
115 { 1, NULL, filt_sordetach, filt_solisten };
116 static struct filterops soread_filtops =
117 { 1, NULL, filt_sordetach, filt_soread };
118 static struct filterops sowrite_filtops =
119 { 1, NULL, filt_sowdetach, filt_sowrite };
120
121 #define EVEN_MORE_LOCKING_DEBUG 0
122 int socket_debug = 0;
123 int socket_zone = M_SOCKET;
124 so_gen_t so_gencnt; /* generation count for sockets */
125
126 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
127 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
128
129 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
130 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
131 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
132 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
133 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
134 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
135 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
136
137 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
138
139
140 SYSCTL_DECL(_kern_ipc);
141
142 static int somaxconn = SOMAXCONN;
143 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
144 0, "");
145
146 /* Should we get a maximum also ??? */
147 static int sosendmaxchain = 65536;
148 static int sosendminchain = 16384;
149 static int sorecvmincopy = 16384;
150 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
151 0, "");
152 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
153 0, "");
154
155 void so_cache_timer();
156
157 /*
158 * Socket operation routines.
159 * These routines are called by the routines in
160 * sys_socket.c or from a system process, and
161 * implement the semantics of socket operations by
162 * switching out to the protocol specific routines.
163 */
164
165 #ifdef __APPLE__
166
167 vm_size_t so_cache_zone_element_size;
168
169 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
170
171
172 void socketinit()
173 {
174 vm_size_t str_size;
175
176 if (so_cache_init_done) {
177 printf("socketinit: already called...\n");
178 return;
179 }
180
181 /*
182 * allocate lock group attribute and group for socket cache mutex
183 */
184 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
185 lck_grp_attr_setdefault(so_cache_mtx_grp_attr);
186
187 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
188
189 /*
190 * allocate the lock attribute for socket cache mutex
191 */
192 so_cache_mtx_attr = lck_attr_alloc_init();
193 lck_attr_setdefault(so_cache_mtx_attr);
194
195 so_cache_init_done = 1;
196
197 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
198
199 if (so_cache_mtx == NULL)
200 return; /* we're hosed... */
201
202 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
203 get_inpcb_str_size() + 4 +
204 get_tcp_str_size());
205 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
206 #if TEMPDEBUG
207 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
208 #endif
209 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
210
211 so_cache_zone_element_size = str_size;
212
213 sflt_init();
214
215 }
216
217 void cached_sock_alloc(so, waitok)
218 struct socket **so;
219 int waitok;
220
221 {
222 caddr_t temp;
223 register u_long offset;
224
225
226 lck_mtx_lock(so_cache_mtx);
227
228 if (cached_sock_count) {
229 cached_sock_count--;
230 *so = socket_cache_head;
231 if (*so == 0)
232 panic("cached_sock_alloc: cached sock is null");
233
234 socket_cache_head = socket_cache_head->cache_next;
235 if (socket_cache_head)
236 socket_cache_head->cache_prev = 0;
237 else
238 socket_cache_tail = 0;
239
240 lck_mtx_unlock(so_cache_mtx);
241
242 temp = (*so)->so_saved_pcb;
243 bzero((caddr_t)*so, sizeof(struct socket));
244 #if TEMPDEBUG
245 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
246 cached_sock_count);
247 #endif
248 (*so)->so_saved_pcb = temp;
249 (*so)->cached_in_sock_layer = 1;
250
251 }
252 else {
253 #if TEMPDEBUG
254 kprintf("Allocating cached sock %x from memory\n", *so);
255 #endif
256
257 lck_mtx_unlock(so_cache_mtx);
258
259 if (waitok)
260 *so = (struct socket *) zalloc(so_cache_zone);
261 else
262 *so = (struct socket *) zalloc_noblock(so_cache_zone);
263
264 if (*so == 0)
265 return;
266
267 bzero((caddr_t)*so, sizeof(struct socket));
268
269 /*
270 * Define offsets for extra structures into our single block of
271 * memory. Align extra structures on longword boundaries.
272 */
273
274
275 offset = (u_long) *so;
276 offset += sizeof(struct socket);
277 if (offset & 0x3) {
278 offset += 4;
279 offset &= 0xfffffffc;
280 }
281 (*so)->so_saved_pcb = (caddr_t) offset;
282 offset += get_inpcb_str_size();
283 if (offset & 0x3) {
284 offset += 4;
285 offset &= 0xfffffffc;
286 }
287
288 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
289 #if TEMPDEBUG
290 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
291 (*so)->so_saved_pcb,
292 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
293 #endif
294 }
295
296 (*so)->cached_in_sock_layer = 1;
297 }
298
299
300 void cached_sock_free(so)
301 struct socket *so;
302 {
303
304 lck_mtx_lock(so_cache_mtx);
305
306 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
307 --cached_sock_count;
308 lck_mtx_unlock(so_cache_mtx);
309 #if TEMPDEBUG
310 kprintf("Freeing overflowed cached socket %x\n", so);
311 #endif
312 zfree(so_cache_zone, so);
313 }
314 else {
315 #if TEMPDEBUG
316 kprintf("Freeing socket %x into cache\n", so);
317 #endif
318 if (so_cache_hw < cached_sock_count)
319 so_cache_hw = cached_sock_count;
320
321 so->cache_next = socket_cache_head;
322 so->cache_prev = 0;
323 if (socket_cache_head)
324 socket_cache_head->cache_prev = so;
325 else
326 socket_cache_tail = so;
327
328 so->cache_timestamp = so_cache_time;
329 socket_cache_head = so;
330 lck_mtx_unlock(so_cache_mtx);
331 }
332
333 #if TEMPDEBUG
334 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
335 #endif
336
337
338 }
339
340
341 void so_cache_timer()
342 {
343 register struct socket *p;
344 register int n_freed = 0;
345
346
347 lck_mtx_lock(so_cache_mtx);
348
349 ++so_cache_time;
350
351 while ( (p = socket_cache_tail) )
352 {
353 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
354 break;
355
356 so_cache_timeouts++;
357
358 if ( (socket_cache_tail = p->cache_prev) )
359 p->cache_prev->cache_next = 0;
360 if (--cached_sock_count == 0)
361 socket_cache_head = 0;
362
363
364 zfree(so_cache_zone, p);
365
366 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
367 {
368 so_cache_max_freed++;
369 break;
370 }
371 }
372 lck_mtx_unlock(so_cache_mtx);
373
374 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
375
376
377 }
378 #endif /* __APPLE__ */
379
380 /*
381 * Get a socket structure from our zone, and initialize it.
382 * We don't implement `waitok' yet (see comments in uipc_domain.c).
383 * Note that it would probably be better to allocate socket
384 * and PCB at the same time, but I'm not convinced that all
385 * the protocols can be easily modified to do this.
386 */
387 struct socket *
388 soalloc(waitok, dom, type)
389 int waitok;
390 int dom;
391 int type;
392 {
393 struct socket *so;
394
395 if ((dom == PF_INET) && (type == SOCK_STREAM))
396 cached_sock_alloc(&so, waitok);
397 else
398 {
399 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
400 if (so)
401 bzero(so, sizeof *so);
402 }
403 /* XXX race condition for reentrant kernel */
404 //###LD Atomic add for so_gencnt
405 if (so) {
406 so->so_gencnt = ++so_gencnt;
407 so->so_zone = socket_zone;
408 }
409
410 return so;
411 }
412
413 int
414 socreate(dom, aso, type, proto)
415 int dom;
416 struct socket **aso;
417 register int type;
418 int proto;
419 {
420 struct proc *p = current_proc();
421 register struct protosw *prp;
422 register struct socket *so;
423 register int error = 0;
424 #if TCPDEBUG
425 extern int tcpconsdebug;
426 #endif
427 if (proto)
428 prp = pffindproto(dom, proto, type);
429 else
430 prp = pffindtype(dom, type);
431
432 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
433 return (EPROTONOSUPPORT);
434 #ifndef __APPLE__
435
436 if (p->p_prison && jail_socket_unixiproute_only &&
437 prp->pr_domain->dom_family != PF_LOCAL &&
438 prp->pr_domain->dom_family != PF_INET &&
439 prp->pr_domain->dom_family != PF_ROUTE) {
440 return (EPROTONOSUPPORT);
441 }
442
443 #endif
444 if (prp->pr_type != type)
445 return (EPROTOTYPE);
446 so = soalloc(p != 0, dom, type);
447 if (so == 0)
448 return (ENOBUFS);
449
450 TAILQ_INIT(&so->so_incomp);
451 TAILQ_INIT(&so->so_comp);
452 so->so_type = type;
453
454 #ifdef __APPLE__
455 if (p != 0) {
456 so->so_uid = kauth_cred_getuid(kauth_cred_get());
457 if (!suser(kauth_cred_get(),NULL))
458 so->so_state = SS_PRIV;
459 }
460 #else
461 so->so_cred = kauth_cred_get_with_ref();
462 #endif
463 so->so_proto = prp;
464 #ifdef __APPLE__
465 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
466 so->so_rcv.sb_so = so->so_snd.sb_so = so;
467 #endif
468
469 //### Attachement will create the per pcb lock if necessary and increase refcount
470 so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */
471
472 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
473 if (error) {
474 /*
475 * Warning:
476 * If so_pcb is not zero, the socket will be leaked,
477 * so protocol attachment handler must be coded carefuly
478 */
479 so->so_state |= SS_NOFDREF;
480 so->so_usecount--;
481 sofreelastref(so, 1); /* will deallocate the socket */
482 return (error);
483 }
484 #ifdef __APPLE__
485 prp->pr_domain->dom_refs++;
486 TAILQ_INIT(&so->so_evlist);
487
488 /* Attach socket filters for this protocol */
489 sflt_initsock(so);
490 #if TCPDEBUG
491 if (tcpconsdebug == 2)
492 so->so_options |= SO_DEBUG;
493 #endif
494 #endif
495
496 *aso = so;
497 return (0);
498 }
499
500 int
501 sobind(so, nam)
502 struct socket *so;
503 struct sockaddr *nam;
504
505 {
506 struct proc *p = current_proc();
507 int error = 0;
508 struct socket_filter_entry *filter;
509 int filtered = 0;
510
511 socket_lock(so, 1);
512
513 /* Socket filter */
514 error = 0;
515 for (filter = so->so_filt; filter && (error == 0);
516 filter = filter->sfe_next_onsocket) {
517 if (filter->sfe_filter->sf_filter.sf_bind) {
518 if (filtered == 0) {
519 filtered = 1;
520 sflt_use(so);
521 socket_unlock(so, 0);
522 }
523 error = filter->sfe_filter->sf_filter.sf_bind(
524 filter->sfe_cookie, so, nam);
525 }
526 }
527 if (filtered != 0) {
528 socket_lock(so, 0);
529 sflt_unuse(so);
530 }
531 /* End socket filter */
532
533 if (error == 0)
534 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
535
536 socket_unlock(so, 1);
537
538 if (error == EJUSTRETURN)
539 error = 0;
540
541 return (error);
542 }
543
544 void
545 sodealloc(so)
546 struct socket *so;
547 {
548 so->so_gencnt = ++so_gencnt;
549
550 #ifndef __APPLE__
551 if (so->so_rcv.sb_hiwat)
552 (void)chgsbsize(so->so_cred->cr_uidinfo,
553 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
554 if (so->so_snd.sb_hiwat)
555 (void)chgsbsize(so->so_cred->cr_uidinfo,
556 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
557 #ifdef INET
558 if (so->so_accf != NULL) {
559 if (so->so_accf->so_accept_filter != NULL &&
560 so->so_accf->so_accept_filter->accf_destroy != NULL) {
561 so->so_accf->so_accept_filter->accf_destroy(so);
562 }
563 if (so->so_accf->so_accept_filter_str != NULL)
564 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
565 FREE(so->so_accf, M_ACCF);
566 }
567 #endif /* INET */
568 kauth_cred_rele(so->so_cred);
569 zfreei(so->so_zone, so);
570 #else
571 if (so->cached_in_sock_layer == 1)
572 cached_sock_free(so);
573 else {
574 if (so->cached_in_sock_layer == -1)
575 panic("sodealloc: double dealloc: so=%x\n", so);
576 so->cached_in_sock_layer = -1;
577 FREE_ZONE(so, sizeof(*so), so->so_zone);
578 }
579 #endif /* __APPLE__ */
580 }
581
582 int
583 solisten(so, backlog)
584 register struct socket *so;
585 int backlog;
586
587 {
588 struct proc *p = current_proc();
589 int error;
590
591 socket_lock(so, 1);
592
593 {
594 struct socket_filter_entry *filter;
595 int filtered = 0;
596 error = 0;
597 for (filter = so->so_filt; filter && (error == 0);
598 filter = filter->sfe_next_onsocket) {
599 if (filter->sfe_filter->sf_filter.sf_listen) {
600 if (filtered == 0) {
601 filtered = 1;
602 sflt_use(so);
603 socket_unlock(so, 0);
604 }
605 error = filter->sfe_filter->sf_filter.sf_listen(
606 filter->sfe_cookie, so);
607 }
608 }
609 if (filtered != 0) {
610 socket_lock(so, 0);
611 sflt_unuse(so);
612 }
613 }
614
615 if (error == 0) {
616 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
617 }
618
619 if (error) {
620 socket_unlock(so, 1);
621 if (error == EJUSTRETURN)
622 error = 0;
623 return (error);
624 }
625
626 if (TAILQ_EMPTY(&so->so_comp))
627 so->so_options |= SO_ACCEPTCONN;
628 if (backlog < 0 || backlog > somaxconn)
629 backlog = somaxconn;
630 so->so_qlimit = backlog;
631
632 socket_unlock(so, 1);
633 return (0);
634 }
635
636 void
637 sofreelastref(so, dealloc)
638 register struct socket *so;
639 int dealloc;
640 {
641 int error;
642 struct socket *head = so->so_head;
643
644 /*### Assume socket is locked */
645
646 /* Remove any filters - may be called more than once */
647 sflt_termsock(so);
648
649 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
650 #ifdef __APPLE__
651 selthreadclear(&so->so_snd.sb_sel);
652 selthreadclear(&so->so_rcv.sb_sel);
653 so->so_rcv.sb_flags &= ~SB_UPCALL;
654 so->so_snd.sb_flags &= ~SB_UPCALL;
655 #endif
656 return;
657 }
658 if (head != NULL) {
659 socket_lock(head, 1);
660 if (so->so_state & SS_INCOMP) {
661 TAILQ_REMOVE(&head->so_incomp, so, so_list);
662 head->so_incqlen--;
663 } else if (so->so_state & SS_COMP) {
664 /*
665 * We must not decommission a socket that's
666 * on the accept(2) queue. If we do, then
667 * accept(2) may hang after select(2) indicated
668 * that the listening socket was ready.
669 */
670 #ifdef __APPLE__
671 selthreadclear(&so->so_snd.sb_sel);
672 selthreadclear(&so->so_rcv.sb_sel);
673 so->so_rcv.sb_flags &= ~SB_UPCALL;
674 so->so_snd.sb_flags &= ~SB_UPCALL;
675 #endif
676 socket_unlock(head, 1);
677 return;
678 } else {
679 panic("sofree: not queued");
680 }
681 head->so_qlen--;
682 so->so_state &= ~SS_INCOMP;
683 so->so_head = NULL;
684 socket_unlock(head, 1);
685 }
686 #ifdef __APPLE__
687 selthreadclear(&so->so_snd.sb_sel);
688 sbrelease(&so->so_snd);
689 #endif
690 sorflush(so);
691
692 /* 3932268: disable upcall */
693 so->so_rcv.sb_flags &= ~SB_UPCALL;
694 so->so_snd.sb_flags &= ~SB_UPCALL;
695
696 if (dealloc)
697 sodealloc(so);
698 }
699
700 /*
701 * Close a socket on last file table reference removal.
702 * Initiate disconnect if connected.
703 * Free socket when disconnect complete.
704 */
705 int
706 soclose_locked(so)
707 register struct socket *so;
708 {
709 int error = 0;
710 lck_mtx_t * mutex_held;
711 struct timespec ts;
712
713 if (so->so_usecount == 0) {
714 panic("soclose: so=%x refcount=0\n", so);
715 }
716
717 sflt_notify(so, sock_evt_closing, NULL);
718
719 if ((so->so_options & SO_ACCEPTCONN)) {
720 struct socket *sp;
721
722 /* We do not want new connection to be added to the connection queues */
723 so->so_options &= ~SO_ACCEPTCONN;
724
725 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
726 /* A bit tricky here. We need to keep
727 * a lock if it's a protocol global lock
728 * but we want the head, not the socket locked
729 * in the case of per-socket lock...
730 */
731 if (so->so_proto->pr_getlock != NULL) {
732 socket_unlock(so, 0);
733 socket_lock(sp, 1);
734 }
735 (void) soabort(sp);
736 if (so->so_proto->pr_getlock != NULL) {
737 socket_unlock(sp, 1);
738 socket_lock(so, 0);
739 }
740 }
741
742 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
743 /* Dequeue from so_comp since sofree() won't do it */
744 TAILQ_REMOVE(&so->so_comp, sp, so_list);
745 so->so_qlen--;
746
747 if (so->so_proto->pr_getlock != NULL) {
748 socket_unlock(so, 0);
749 socket_lock(sp, 1);
750 }
751
752 sp->so_state &= ~SS_COMP;
753 sp->so_head = NULL;
754
755 (void) soabort(sp);
756 if (so->so_proto->pr_getlock != NULL) {
757 socket_unlock(sp, 1);
758 socket_lock(so, 0);
759 }
760 }
761 }
762 if (so->so_pcb == 0) {
763 /* 3915887: mark the socket as ready for dealloc */
764 so->so_flags |= SOF_PCBCLEARING;
765 goto discard;
766 }
767 if (so->so_state & SS_ISCONNECTED) {
768 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
769 error = sodisconnectlocked(so);
770 if (error)
771 goto drop;
772 }
773 if (so->so_options & SO_LINGER) {
774 if ((so->so_state & SS_ISDISCONNECTING) &&
775 (so->so_state & SS_NBIO))
776 goto drop;
777 if (so->so_proto->pr_getlock != NULL)
778 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
779 else
780 mutex_held = so->so_proto->pr_domain->dom_mtx;
781 while (so->so_state & SS_ISCONNECTED) {
782 ts.tv_sec = (so->so_linger/100);
783 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
784 error = msleep((caddr_t)&so->so_timeo, mutex_held,
785 PSOCK | PCATCH, "soclos", &ts);
786 if (error) {
787 /* It's OK when the time fires, don't report an error */
788 if (error == EWOULDBLOCK)
789 error = 0;
790 break;
791 }
792 }
793 }
794 }
795 drop:
796 if (so->so_usecount == 0)
797 panic("soclose: usecount is zero so=%x\n", so);
798 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
799 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
800 if (error == 0)
801 error = error2;
802 }
803 if (so->so_usecount <= 0)
804 panic("soclose: usecount is zero so=%x\n", so);
805 discard:
806 if (so->so_pcb && so->so_state & SS_NOFDREF)
807 panic("soclose: NOFDREF");
808 so->so_state |= SS_NOFDREF;
809 #ifdef __APPLE__
810 so->so_proto->pr_domain->dom_refs--;
811 evsofree(so);
812 #endif
813 so->so_usecount--;
814 sofree(so);
815 return (error);
816 }
817
818 int
819 soclose(so)
820 register struct socket *so;
821 {
822 int error = 0;
823 socket_lock(so, 1);
824 if (so->so_retaincnt == 0)
825 error = soclose_locked(so);
826 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
827 so->so_usecount--;
828 if (so->so_usecount < 2)
829 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
830 }
831 socket_unlock(so, 1);
832 return (error);
833 }
834
835
836 /*
837 * Must be called at splnet...
838 */
839 //#### Should already be locked
840 int
841 soabort(so)
842 struct socket *so;
843 {
844 int error;
845
846 #ifdef MORE_LOCKING_DEBUG
847 lck_mtx_t * mutex_held;
848
849 if (so->so_proto->pr_getlock != NULL)
850 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
851 else
852 mutex_held = so->so_proto->pr_domain->dom_mtx;
853 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
854 #endif
855
856 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
857 if (error) {
858 sofree(so);
859 return error;
860 }
861 return (0);
862 }
863
864 int
865 soacceptlock(so, nam, dolock)
866 register struct socket *so;
867 struct sockaddr **nam;
868 int dolock;
869 {
870 int error;
871
872 if (dolock) socket_lock(so, 1);
873
874 if ((so->so_state & SS_NOFDREF) == 0)
875 panic("soaccept: !NOFDREF");
876 so->so_state &= ~SS_NOFDREF;
877 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
878
879 if (dolock) socket_unlock(so, 1);
880 return (error);
881 }
882 int
883 soaccept(so, nam)
884 register struct socket *so;
885 struct sockaddr **nam;
886 {
887 return (soacceptlock(so, nam, 1));
888 }
889
890 int
891 soconnectlock(so, nam, dolock)
892 register struct socket *so;
893 struct sockaddr *nam;
894 int dolock;
895
896 {
897 int s;
898 int error;
899 struct proc *p = current_proc();
900
901 if (dolock) socket_lock(so, 1);
902
903 if (so->so_options & SO_ACCEPTCONN) {
904 if (dolock) socket_unlock(so, 1);
905 return (EOPNOTSUPP);
906 }
907 /*
908 * If protocol is connection-based, can only connect once.
909 * Otherwise, if connected, try to disconnect first.
910 * This allows user to disconnect by connecting to, e.g.,
911 * a null address.
912 */
913 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
914 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
915 (error = sodisconnectlocked(so))))
916 error = EISCONN;
917 else {
918 /*
919 * Run connect filter before calling protocol:
920 * - non-blocking connect returns before completion;
921 */
922 {
923 struct socket_filter_entry *filter;
924 int filtered = 0;
925 error = 0;
926 for (filter = so->so_filt; filter && (error == 0);
927 filter = filter->sfe_next_onsocket) {
928 if (filter->sfe_filter->sf_filter.sf_connect_out) {
929 if (filtered == 0) {
930 filtered = 1;
931 sflt_use(so);
932 socket_unlock(so, 0);
933 }
934 error = filter->sfe_filter->sf_filter.sf_connect_out(
935 filter->sfe_cookie, so, nam);
936 }
937 }
938 if (filtered != 0) {
939 socket_lock(so, 0);
940 sflt_unuse(so);
941 }
942 }
943 if (error) {
944 if (error == EJUSTRETURN)
945 error = 0;
946 if (dolock) socket_unlock(so, 1);
947 return error;
948 }
949
950 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
951 }
952 if (dolock) socket_unlock(so, 1);
953 return (error);
954 }
955
956 int
957 soconnect(so, nam)
958 register struct socket *so;
959 struct sockaddr *nam;
960 {
961 return (soconnectlock(so, nam, 1));
962 }
963
964 int
965 soconnect2(so1, so2)
966 register struct socket *so1;
967 struct socket *so2;
968 {
969 int error;
970 //####### Assumes so1 is already locked /
971
972 socket_lock(so2, 1);
973
974 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
975
976 socket_unlock(so2, 1);
977 return (error);
978 }
979
980
981 int
982 sodisconnectlocked(so)
983 register struct socket *so;
984 {
985 int error;
986
987 if ((so->so_state & SS_ISCONNECTED) == 0) {
988 error = ENOTCONN;
989 goto bad;
990 }
991 if (so->so_state & SS_ISDISCONNECTING) {
992 error = EALREADY;
993 goto bad;
994 }
995
996 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
997
998 if (error == 0) {
999 sflt_notify(so, sock_evt_disconnected, NULL);
1000 }
1001
1002 bad:
1003 return (error);
1004 }
1005 //### Locking version
1006 int
1007 sodisconnect(so)
1008 register struct socket *so;
1009 {
1010 int error;
1011
1012 socket_lock(so, 1);
1013 error = sodisconnectlocked(so);
1014 socket_unlock(so, 1);
1015 return(error);
1016 }
1017
1018 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1019
1020 /*
1021 * sosendcheck will lock the socket buffer if it isn't locked and
1022 * verify that there is space for the data being inserted.
1023 */
1024
1025 static int
1026 sosendcheck(
1027 struct socket *so,
1028 struct sockaddr *addr,
1029 long resid,
1030 long clen,
1031 long atomic,
1032 int flags,
1033 int *sblocked)
1034 {
1035 int error = 0;
1036 long space;
1037 int assumelock = 0;
1038
1039 restart:
1040 if (*sblocked == 0) {
1041 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1042 so->so_send_filt_thread != 0 &&
1043 so->so_send_filt_thread == current_thread()) {
1044 /*
1045 * We're being called recursively from a filter,
1046 * allow this to continue. Radar 4150520.
1047 * Don't set sblocked because we don't want
1048 * to perform an unlock later.
1049 */
1050 assumelock = 1;
1051 }
1052 else {
1053 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1054 if (error) {
1055 return error;
1056 }
1057 *sblocked = 1;
1058 }
1059 }
1060
1061 if (so->so_state & SS_CANTSENDMORE)
1062 return EPIPE;
1063
1064 if (so->so_error) {
1065 error = so->so_error;
1066 so->so_error = 0;
1067 return error;
1068 }
1069
1070 if ((so->so_state & SS_ISCONNECTED) == 0) {
1071 /*
1072 * `sendto' and `sendmsg' is allowed on a connection-
1073 * based socket if it supports implied connect.
1074 * Return ENOTCONN if not connected and no address is
1075 * supplied.
1076 */
1077 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1078 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1079 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1080 !(resid == 0 && clen != 0))
1081 return ENOTCONN;
1082 } else if (addr == 0 && !(flags&MSG_HOLD))
1083 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1084 }
1085 space = sbspace(&so->so_snd);
1086 if (flags & MSG_OOB)
1087 space += 1024;
1088 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1089 clen > so->so_snd.sb_hiwat)
1090 return EMSGSIZE;
1091 if (space < resid + clen &&
1092 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1093 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) {
1094 return EWOULDBLOCK;
1095 }
1096 sbunlock(&so->so_snd, 1);
1097 error = sbwait(&so->so_snd);
1098 if (error) {
1099 return error;
1100 }
1101 goto restart;
1102 }
1103
1104 return 0;
1105 }
1106
1107 /*
1108 * Send on a socket.
1109 * If send must go all at once and message is larger than
1110 * send buffering, then hard error.
1111 * Lock against other senders.
1112 * If must go all at once and not enough room now, then
1113 * inform user that this would block and do nothing.
1114 * Otherwise, if nonblocking, send as much as possible.
1115 * The data to be sent is described by "uio" if nonzero,
1116 * otherwise by the mbuf chain "top" (which must be null
1117 * if uio is not). Data provided in mbuf chain must be small
1118 * enough to send all at once.
1119 *
1120 * Returns nonzero on error, timeout or signal; callers
1121 * must check for short counts if EINTR/ERESTART are returned.
1122 * Data and control buffers are freed on return.
1123 * Experiment:
1124 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1125 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1126 * point at the mbuf chain being constructed and go from there.
1127 */
1128 int
1129 sosend(so, addr, uio, top, control, flags)
1130 register struct socket *so;
1131 struct sockaddr *addr;
1132 struct uio *uio;
1133 struct mbuf *top;
1134 struct mbuf *control;
1135 int flags;
1136
1137 {
1138 struct mbuf **mp;
1139 register struct mbuf *m, *freelist = NULL;
1140 register long space, len, resid;
1141 int clen = 0, error, dontroute, mlen, sendflags;
1142 int atomic = sosendallatonce(so) || top;
1143 int sblocked = 0;
1144 struct proc *p = current_proc();
1145
1146 if (uio)
1147 // LP64todo - fix this!
1148 resid = uio_resid(uio);
1149 else
1150 resid = top->m_pkthdr.len;
1151
1152 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1153 so,
1154 resid,
1155 so->so_snd.sb_cc,
1156 so->so_snd.sb_lowat,
1157 so->so_snd.sb_hiwat);
1158
1159 socket_lock(so, 1);
1160
1161 /*
1162 * In theory resid should be unsigned.
1163 * However, space must be signed, as it might be less than 0
1164 * if we over-committed, and we must use a signed comparison
1165 * of space and resid. On the other hand, a negative resid
1166 * causes us to loop sending 0-length segments to the protocol.
1167 *
1168 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1169 * type sockets since that's an error.
1170 */
1171 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1172 error = EINVAL;
1173 socket_unlock(so, 1);
1174 goto out;
1175 }
1176
1177 dontroute =
1178 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1179 (so->so_proto->pr_flags & PR_ATOMIC);
1180 if (p)
1181 p->p_stats->p_ru.ru_msgsnd++;
1182 if (control)
1183 clen = control->m_len;
1184
1185 do {
1186 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1187 if (error) {
1188 goto release;
1189 }
1190 mp = &top;
1191 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
1192
1193 do {
1194
1195 if (uio == NULL) {
1196 /*
1197 * Data is prepackaged in "top".
1198 */
1199 resid = 0;
1200 if (flags & MSG_EOR)
1201 top->m_flags |= M_EOR;
1202 } else {
1203 int chainlength;
1204 int bytes_to_copy;
1205
1206 bytes_to_copy = min(resid, space);
1207
1208 if (sosendminchain > 0) {
1209 chainlength = 0;
1210 } else
1211 chainlength = sosendmaxchain;
1212
1213 socket_unlock(so, 0);
1214
1215 do {
1216 int num_needed;
1217 int hdrs_needed = (top == 0) ? 1 : 0;
1218
1219 /*
1220 * try to maintain a local cache of mbuf clusters needed to complete this write
1221 * the list is further limited to the number that are currently needed to fill the socket
1222 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1223 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1224 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1225 * we haven't yet consumed.
1226 */
1227 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1228 num_needed = bytes_to_copy / NBPG;
1229
1230 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1231 num_needed++;
1232
1233 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1234 /* Fall back to cluster size if allocation failed */
1235 }
1236
1237 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1238 num_needed = bytes_to_copy / MCLBYTES;
1239
1240 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1241 num_needed++;
1242
1243 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1244 /* Fall back to a single mbuf if allocation failed */
1245 }
1246
1247 if (freelist == NULL) {
1248 if (top == 0)
1249 MGETHDR(freelist, M_WAIT, MT_DATA);
1250 else
1251 MGET(freelist, M_WAIT, MT_DATA);
1252
1253 if (freelist == NULL) {
1254 error = ENOBUFS;
1255 socket_lock(so, 0);
1256 goto release;
1257 }
1258 /*
1259 * For datagram protocols, leave room
1260 * for protocol headers in first mbuf.
1261 */
1262 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1263 MH_ALIGN(freelist, bytes_to_copy);
1264 }
1265 m = freelist;
1266 freelist = m->m_next;
1267 m->m_next = NULL;
1268
1269 if ((m->m_flags & M_EXT))
1270 mlen = m->m_ext.ext_size;
1271 else if ((m->m_flags & M_PKTHDR))
1272 mlen = MHLEN - m_leadingspace(m);
1273 else
1274 mlen = MLEN;
1275 len = min(mlen, bytes_to_copy);
1276
1277 chainlength += len;
1278
1279 space -= len;
1280
1281 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1282
1283 // LP64todo - fix this!
1284 resid = uio_resid(uio);
1285
1286 m->m_len = len;
1287 *mp = m;
1288 top->m_pkthdr.len += len;
1289 if (error)
1290 break;
1291 mp = &m->m_next;
1292 if (resid <= 0) {
1293 if (flags & MSG_EOR)
1294 top->m_flags |= M_EOR;
1295 break;
1296 }
1297 bytes_to_copy = min(resid, space);
1298
1299 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1300
1301 socket_lock(so, 0);
1302
1303 if (error)
1304 goto release;
1305 }
1306
1307 if (flags & (MSG_HOLD|MSG_SEND))
1308 {
1309 /* Enqueue for later, go away if HOLD */
1310 register struct mbuf *mb1;
1311 if (so->so_temp && (flags & MSG_FLUSH))
1312 {
1313 m_freem(so->so_temp);
1314 so->so_temp = NULL;
1315 }
1316 if (so->so_temp)
1317 so->so_tail->m_next = top;
1318 else
1319 so->so_temp = top;
1320 mb1 = top;
1321 while (mb1->m_next)
1322 mb1 = mb1->m_next;
1323 so->so_tail = mb1;
1324 if (flags & MSG_HOLD)
1325 {
1326 top = NULL;
1327 goto release;
1328 }
1329 top = so->so_temp;
1330 }
1331 if (dontroute)
1332 so->so_options |= SO_DONTROUTE;
1333 /* Compute flags here, for pru_send and NKEs */
1334 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1335 /*
1336 * If the user set MSG_EOF, the protocol
1337 * understands this flag and nothing left to
1338 * send then use PRU_SEND_EOF instead of PRU_SEND.
1339 */
1340 ((flags & MSG_EOF) &&
1341 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1342 (resid <= 0)) ?
1343 PRUS_EOF :
1344 /* If there is more to send set PRUS_MORETOCOME */
1345 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1346
1347 /*
1348 * Socket filter processing
1349 */
1350 {
1351 struct socket_filter_entry *filter;
1352 int filtered;
1353
1354 filtered = 0;
1355 error = 0;
1356 for (filter = so->so_filt; filter && (error == 0);
1357 filter = filter->sfe_next_onsocket) {
1358 if (filter->sfe_filter->sf_filter.sf_data_out) {
1359 int so_flags = 0;
1360 if (filtered == 0) {
1361 filtered = 1;
1362 so->so_send_filt_thread = current_thread();
1363 sflt_use(so);
1364 socket_unlock(so, 0);
1365 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1366 }
1367 error = filter->sfe_filter->sf_filter.sf_data_out(
1368 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1369 }
1370 }
1371
1372 if (filtered) {
1373 /*
1374 * At this point, we've run at least one filter.
1375 * The socket is unlocked as is the socket buffer.
1376 */
1377 socket_lock(so, 0);
1378 sflt_unuse(so);
1379 so->so_send_filt_thread = 0;
1380 if (error) {
1381 if (error == EJUSTRETURN) {
1382 error = 0;
1383 clen = 0;
1384 control = 0;
1385 top = 0;
1386 }
1387
1388 goto release;
1389 }
1390 }
1391 }
1392 /*
1393 * End Socket filter processing
1394 */
1395
1396 if (error == EJUSTRETURN) {
1397 /* A socket filter handled this data */
1398 error = 0;
1399 }
1400 else {
1401 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1402 sendflags, top, addr, control, p);
1403 }
1404 #ifdef __APPLE__
1405 if (flags & MSG_SEND)
1406 so->so_temp = NULL;
1407 #endif
1408 if (dontroute)
1409 so->so_options &= ~SO_DONTROUTE;
1410 clen = 0;
1411 control = 0;
1412 top = 0;
1413 mp = &top;
1414 if (error)
1415 goto release;
1416 } while (resid && space > 0);
1417 } while (resid);
1418
1419 release:
1420 if (sblocked)
1421 sbunlock(&so->so_snd, 0); /* will unlock socket */
1422 else
1423 socket_unlock(so, 1);
1424 out:
1425 if (top)
1426 m_freem(top);
1427 if (control)
1428 m_freem(control);
1429 if (freelist)
1430 m_freem_list(freelist);
1431
1432 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1433 so,
1434 resid,
1435 so->so_snd.sb_cc,
1436 space,
1437 error);
1438
1439 return (error);
1440 }
1441
1442 /*
1443 * Implement receive operations on a socket.
1444 * We depend on the way that records are added to the sockbuf
1445 * by sbappend*. In particular, each record (mbufs linked through m_next)
1446 * must begin with an address if the protocol so specifies,
1447 * followed by an optional mbuf or mbufs containing ancillary data,
1448 * and then zero or more mbufs of data.
1449 * In order to avoid blocking network interrupts for the entire time here,
1450 * we splx() while doing the actual copy to user space.
1451 * Although the sockbuf is locked, new data may still be appended,
1452 * and thus we must maintain consistency of the sockbuf during that time.
1453 *
1454 * The caller may receive the data as a single mbuf chain by supplying
1455 * an mbuf **mp0 for use in returning the chain. The uio is then used
1456 * only for the count in uio_resid.
1457 */
1458 int
1459 soreceive(so, psa, uio, mp0, controlp, flagsp)
1460 register struct socket *so;
1461 struct sockaddr **psa;
1462 struct uio *uio;
1463 struct mbuf **mp0;
1464 struct mbuf **controlp;
1465 int *flagsp;
1466 {
1467 register struct mbuf *m, **mp, *ml = NULL;
1468 register int flags, len, error, offset;
1469 struct protosw *pr = so->so_proto;
1470 struct mbuf *nextrecord;
1471 int moff, type = 0;
1472 // LP64todo - fix this!
1473 int orig_resid = uio_resid(uio);
1474 volatile struct mbuf *free_list;
1475 volatile int delayed_copy_len;
1476 int can_delay;
1477 int need_event;
1478 struct proc *p = current_proc();
1479
1480
1481 // LP64todo - fix this!
1482 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1483 so,
1484 uio_resid(uio),
1485 so->so_rcv.sb_cc,
1486 so->so_rcv.sb_lowat,
1487 so->so_rcv.sb_hiwat);
1488
1489 socket_lock(so, 1);
1490
1491 #ifdef MORE_LOCKING_DEBUG
1492 if (so->so_usecount == 1)
1493 panic("soreceive: so=%x no other reference on socket\n", so);
1494 #endif
1495 mp = mp0;
1496 if (psa)
1497 *psa = 0;
1498 if (controlp)
1499 *controlp = 0;
1500 if (flagsp)
1501 flags = *flagsp &~ MSG_EOR;
1502 else
1503 flags = 0;
1504 /*
1505 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1506 * regardless of the flags argument. Here is the case were
1507 * out-of-band data is not inline.
1508 */
1509 if ((flags & MSG_OOB) ||
1510 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1511 (so->so_options & SO_OOBINLINE) == 0 &&
1512 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1513 m = m_get(M_WAIT, MT_DATA);
1514 if (m == NULL) {
1515 socket_unlock(so, 1);
1516 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
1517 return (ENOBUFS);
1518 }
1519 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1520 if (error)
1521 goto bad;
1522 socket_unlock(so, 0);
1523 do {
1524 // LP64todo - fix this!
1525 error = uiomove(mtod(m, caddr_t),
1526 (int) min(uio_resid(uio), m->m_len), uio);
1527 m = m_free(m);
1528 } while (uio_resid(uio) && error == 0 && m);
1529 socket_lock(so, 0);
1530 bad:
1531 if (m)
1532 m_freem(m);
1533 #ifdef __APPLE__
1534 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1535 if (error == EWOULDBLOCK || error == EINVAL) {
1536 /*
1537 * Let's try to get normal data:
1538 * EWOULDBLOCK: out-of-band data not receive yet;
1539 * EINVAL: out-of-band data already read.
1540 */
1541 error = 0;
1542 goto nooob;
1543 } else if (error == 0 && flagsp)
1544 *flagsp |= MSG_OOB;
1545 }
1546 socket_unlock(so, 1);
1547 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1548 #endif
1549 return (error);
1550 }
1551 nooob:
1552 if (mp)
1553 *mp = (struct mbuf *)0;
1554 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1555 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1556
1557
1558 free_list = (struct mbuf *)0;
1559 delayed_copy_len = 0;
1560 restart:
1561 #ifdef MORE_LOCKING_DEBUG
1562 if (so->so_usecount <= 1)
1563 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1564 #endif
1565 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1566 if (error) {
1567 socket_unlock(so, 1);
1568 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1569 return (error);
1570 }
1571
1572 m = so->so_rcv.sb_mb;
1573 /*
1574 * If we have less data than requested, block awaiting more
1575 * (subject to any timeout) if:
1576 * 1. the current count is less than the low water mark, or
1577 * 2. MSG_WAITALL is set, and it is possible to do the entire
1578 * receive operation at once if we block (resid <= hiwat).
1579 * 3. MSG_DONTWAIT is not set
1580 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1581 * we have to do the receive in sections, and thus risk returning
1582 * a short count if a timeout or signal occurs after we start.
1583 */
1584 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1585 so->so_rcv.sb_cc < uio_resid(uio)) &&
1586 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1587 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1588 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1589
1590 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1591 if (so->so_error) {
1592 if (m)
1593 goto dontblock;
1594 error = so->so_error;
1595 if ((flags & MSG_PEEK) == 0)
1596 so->so_error = 0;
1597 goto release;
1598 }
1599 if (so->so_state & SS_CANTRCVMORE) {
1600 if (m)
1601 goto dontblock;
1602 else
1603 goto release;
1604 }
1605 for (; m; m = m->m_next)
1606 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1607 m = so->so_rcv.sb_mb;
1608 goto dontblock;
1609 }
1610 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1611 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1612 error = ENOTCONN;
1613 goto release;
1614 }
1615 if (uio_resid(uio) == 0)
1616 goto release;
1617 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1618 error = EWOULDBLOCK;
1619 goto release;
1620 }
1621 sbunlock(&so->so_rcv, 1);
1622 #ifdef EVEN_MORE_LOCKING_DEBUG
1623 if (socket_debug)
1624 printf("Waiting for socket data\n");
1625 #endif
1626
1627 error = sbwait(&so->so_rcv);
1628 #ifdef EVEN_MORE_LOCKING_DEBUG
1629 if (socket_debug)
1630 printf("SORECEIVE - sbwait returned %d\n", error);
1631 #endif
1632 if (so->so_usecount < 1)
1633 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1634 if (error) {
1635 socket_unlock(so, 1);
1636 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1637 return (error);
1638 }
1639 goto restart;
1640 }
1641 dontblock:
1642 #ifndef __APPLE__
1643 if (uio->uio_procp)
1644 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1645 #else /* __APPLE__ */
1646 /*
1647 * 2207985
1648 * This should be uio->uio-procp; however, some callers of this
1649 * function use auto variables with stack garbage, and fail to
1650 * fill out the uio structure properly.
1651 */
1652 if (p)
1653 p->p_stats->p_ru.ru_msgrcv++;
1654 #endif /* __APPLE__ */
1655 nextrecord = m->m_nextpkt;
1656 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1657 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1658 orig_resid = 0;
1659 if (psa) {
1660 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1661 mp0 == 0);
1662 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1663 error = EWOULDBLOCK;
1664 goto release;
1665 }
1666 }
1667 if (flags & MSG_PEEK) {
1668 m = m->m_next;
1669 } else {
1670 sbfree(&so->so_rcv, m);
1671 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1672 panic("soreceive: about to create invalid socketbuf");
1673 MFREE(m, so->so_rcv.sb_mb);
1674 m = so->so_rcv.sb_mb;
1675 }
1676 }
1677 while (m && m->m_type == MT_CONTROL && error == 0) {
1678 if (flags & MSG_PEEK) {
1679 if (controlp)
1680 *controlp = m_copy(m, 0, m->m_len);
1681 m = m->m_next;
1682 } else {
1683 sbfree(&so->so_rcv, m);
1684 if (controlp) {
1685 if (pr->pr_domain->dom_externalize &&
1686 mtod(m, struct cmsghdr *)->cmsg_type ==
1687 SCM_RIGHTS) {
1688 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1689 error = (*pr->pr_domain->dom_externalize)(m);
1690 socket_lock(so, 0);
1691 }
1692 *controlp = m;
1693 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1694 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1695 so->so_rcv.sb_mb = m->m_next;
1696 m->m_next = 0;
1697 m = so->so_rcv.sb_mb;
1698 } else {
1699 MFREE(m, so->so_rcv.sb_mb);
1700 m = so->so_rcv.sb_mb;
1701 }
1702 }
1703 if (controlp) {
1704 orig_resid = 0;
1705 controlp = &(*controlp)->m_next;
1706 }
1707 }
1708 if (m) {
1709 if ((flags & MSG_PEEK) == 0)
1710 m->m_nextpkt = nextrecord;
1711 type = m->m_type;
1712 if (type == MT_OOBDATA)
1713 flags |= MSG_OOB;
1714 }
1715 moff = 0;
1716 offset = 0;
1717
1718 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
1719 can_delay = 1;
1720 else
1721 can_delay = 0;
1722
1723 need_event = 0;
1724
1725 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1726 if (m->m_type == MT_OOBDATA) {
1727 if (type != MT_OOBDATA)
1728 break;
1729 } else if (type == MT_OOBDATA)
1730 break;
1731 #ifndef __APPLE__
1732 /*
1733 * This assertion needs rework. The trouble is Appletalk is uses many
1734 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1735 * For now just remove the assertion... CSM 9/98
1736 */
1737 else
1738 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1739 ("receive 3"));
1740 #else
1741 /*
1742 * Make sure to allways set MSG_OOB event when getting
1743 * out of band data inline.
1744 */
1745 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1746 (so->so_options & SO_OOBINLINE) != 0 &&
1747 (so->so_state & SS_RCVATMARK) != 0) {
1748 flags |= MSG_OOB;
1749 }
1750 #endif
1751 so->so_state &= ~SS_RCVATMARK;
1752 // LP64todo - fix this!
1753 len = uio_resid(uio) - delayed_copy_len;
1754 if (so->so_oobmark && len > so->so_oobmark - offset)
1755 len = so->so_oobmark - offset;
1756 if (len > m->m_len - moff)
1757 len = m->m_len - moff;
1758 /*
1759 * If mp is set, just pass back the mbufs.
1760 * Otherwise copy them out via the uio, then free.
1761 * Sockbuf must be consistent here (points to current mbuf,
1762 * it points to next record) when we drop priority;
1763 * we must note any additions to the sockbuf when we
1764 * block interrupts again.
1765 */
1766 if (mp == 0) {
1767 if (can_delay && len == m->m_len) {
1768 /*
1769 * only delay the copy if we're consuming the
1770 * mbuf and we're NOT in MSG_PEEK mode
1771 * and we have enough data to make it worthwile
1772 * to drop and retake the funnel... can_delay
1773 * reflects the state of the 2 latter constraints
1774 * moff should always be zero in these cases
1775 */
1776 delayed_copy_len += len;
1777 } else {
1778
1779 if (delayed_copy_len) {
1780 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1781
1782 if (error) {
1783 goto release;
1784 }
1785 if (m != so->so_rcv.sb_mb) {
1786 /*
1787 * can only get here if MSG_PEEK is not set
1788 * therefore, m should point at the head of the rcv queue...
1789 * if it doesn't, it means something drastically changed
1790 * while we were out from behind the funnel in sodelayed_copy...
1791 * perhaps a RST on the stream... in any event, the stream has
1792 * been interrupted... it's probably best just to return
1793 * whatever data we've moved and let the caller sort it out...
1794 */
1795 break;
1796 }
1797 }
1798 socket_unlock(so, 0);
1799 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1800 socket_lock(so, 0);
1801
1802 if (error)
1803 goto release;
1804 }
1805 } else
1806 uio_setresid(uio, (uio_resid(uio) - len));
1807
1808 if (len == m->m_len - moff) {
1809 if (m->m_flags & M_EOR)
1810 flags |= MSG_EOR;
1811 if (flags & MSG_PEEK) {
1812 m = m->m_next;
1813 moff = 0;
1814 } else {
1815 nextrecord = m->m_nextpkt;
1816 sbfree(&so->so_rcv, m);
1817 m->m_nextpkt = NULL;
1818
1819 if (mp) {
1820 *mp = m;
1821 mp = &m->m_next;
1822 so->so_rcv.sb_mb = m = m->m_next;
1823 *mp = (struct mbuf *)0;
1824 } else {
1825 if (free_list == NULL)
1826 free_list = m;
1827 else
1828 ml->m_next = m;
1829 ml = m;
1830 so->so_rcv.sb_mb = m = m->m_next;
1831 ml->m_next = 0;
1832 }
1833 if (m)
1834 m->m_nextpkt = nextrecord;
1835 }
1836 } else {
1837 if (flags & MSG_PEEK)
1838 moff += len;
1839 else {
1840 if (mp)
1841 *mp = m_copym(m, 0, len, M_WAIT);
1842 m->m_data += len;
1843 m->m_len -= len;
1844 so->so_rcv.sb_cc -= len;
1845 }
1846 }
1847 if (so->so_oobmark) {
1848 if ((flags & MSG_PEEK) == 0) {
1849 so->so_oobmark -= len;
1850 if (so->so_oobmark == 0) {
1851 so->so_state |= SS_RCVATMARK;
1852 /*
1853 * delay posting the actual event until after
1854 * any delayed copy processing has finished
1855 */
1856 need_event = 1;
1857 break;
1858 }
1859 } else {
1860 offset += len;
1861 if (offset == so->so_oobmark)
1862 break;
1863 }
1864 }
1865 if (flags & MSG_EOR)
1866 break;
1867 /*
1868 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1869 * we must not quit until "uio->uio_resid == 0" or an error
1870 * termination. If a signal/timeout occurs, return
1871 * with a short count but without error.
1872 * Keep sockbuf locked against other readers.
1873 */
1874 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1875 !sosendallatonce(so) && !nextrecord) {
1876 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1877 goto release;
1878
1879 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
1880 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1881 if (sbwait(&so->so_rcv)) {
1882 error = 0;
1883 goto release;
1884 }
1885 /*
1886 * have to wait until after we get back from the sbwait to do the copy because
1887 * we will drop the funnel if we have enough data that has been delayed... by dropping
1888 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1889 * and to change the state of this socket... we're issuing the sbwait because
1890 * the socket is empty and we're expecting the netisr thread to wake us up when more
1891 * packets arrive... if we allow that processing to happen and then sbwait, we
1892 * could stall forever with packets sitting in the socket if no further packets
1893 * arrive from the remote side.
1894 *
1895 * we want to copy before we've collected all the data to satisfy this request to
1896 * allow the copy to overlap the incoming packet processing on an MP system
1897 */
1898 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1899
1900 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1901
1902 if (error)
1903 goto release;
1904 }
1905 m = so->so_rcv.sb_mb;
1906 if (m) {
1907 nextrecord = m->m_nextpkt;
1908 }
1909 }
1910 }
1911 #ifdef MORE_LOCKING_DEBUG
1912 if (so->so_usecount <= 1)
1913 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1914 #endif
1915
1916 if (m && pr->pr_flags & PR_ATOMIC) {
1917 #ifdef __APPLE__
1918 if (so->so_options & SO_DONTTRUNC)
1919 flags |= MSG_RCVMORE;
1920 else {
1921 #endif
1922 flags |= MSG_TRUNC;
1923 if ((flags & MSG_PEEK) == 0)
1924 (void) sbdroprecord(&so->so_rcv);
1925 #ifdef __APPLE__
1926 }
1927 #endif
1928 }
1929 if ((flags & MSG_PEEK) == 0) {
1930 if (m == 0)
1931 so->so_rcv.sb_mb = nextrecord;
1932 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1933 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1934 }
1935 #ifdef __APPLE__
1936 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1937 flags |= MSG_HAVEMORE;
1938
1939 if (delayed_copy_len) {
1940 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1941
1942 if (error)
1943 goto release;
1944 }
1945 if (free_list) {
1946 m_freem_list((struct mbuf *)free_list);
1947 free_list = (struct mbuf *)0;
1948 }
1949 if (need_event)
1950 postevent(so, 0, EV_OOB);
1951 #endif
1952 if (orig_resid == uio_resid(uio) && orig_resid &&
1953 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1954 sbunlock(&so->so_rcv, 1);
1955 goto restart;
1956 }
1957
1958 if (flagsp)
1959 *flagsp |= flags;
1960 release:
1961 #ifdef MORE_LOCKING_DEBUG
1962 if (so->so_usecount <= 1)
1963 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1964 #endif
1965 if (delayed_copy_len) {
1966 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1967 }
1968 if (free_list) {
1969 m_freem_list((struct mbuf *)free_list);
1970 }
1971 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1972
1973 // LP64todo - fix this!
1974 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1975 so,
1976 uio_resid(uio),
1977 so->so_rcv.sb_cc,
1978 0,
1979 error);
1980
1981 return (error);
1982 }
1983
1984
1985 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
1986 {
1987 int error = 0;
1988 struct mbuf *m;
1989
1990 m = *free_list;
1991
1992 socket_unlock(so, 0);
1993
1994 while (m && error == 0) {
1995
1996 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
1997
1998 m = m->m_next;
1999 }
2000 m_freem_list(*free_list);
2001
2002 *free_list = (struct mbuf *)NULL;
2003 *resid = 0;
2004
2005 socket_lock(so, 0);
2006
2007 return (error);
2008 }
2009
2010
2011 int
2012 soshutdown(so, how)
2013 register struct socket *so;
2014 register int how;
2015 {
2016 register struct protosw *pr = so->so_proto;
2017 int ret;
2018
2019 socket_lock(so, 1);
2020
2021 sflt_notify(so, sock_evt_shutdown, &how);
2022
2023 if (how != SHUT_WR) {
2024 sorflush(so);
2025 postevent(so, 0, EV_RCLOSED);
2026 }
2027 if (how != SHUT_RD) {
2028 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2029 postevent(so, 0, EV_WCLOSED);
2030 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2031 socket_unlock(so, 1);
2032 return(ret);
2033 }
2034
2035 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2036 socket_unlock(so, 1);
2037 return (0);
2038 }
2039
2040 void
2041 sorflush(so)
2042 register struct socket *so;
2043 {
2044 register struct sockbuf *sb = &so->so_rcv;
2045 register struct protosw *pr = so->so_proto;
2046 struct sockbuf asb;
2047
2048 #ifdef MORE_LOCKING_DEBUG
2049 lck_mtx_t * mutex_held;
2050
2051 if (so->so_proto->pr_getlock != NULL)
2052 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2053 else
2054 mutex_held = so->so_proto->pr_domain->dom_mtx;
2055 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2056 #endif
2057
2058 sflt_notify(so, sock_evt_flush_read, NULL);
2059
2060 sb->sb_flags |= SB_NOINTR;
2061 (void) sblock(sb, M_WAIT);
2062 socantrcvmore(so);
2063 sbunlock(sb, 1);
2064 #ifdef __APPLE__
2065 selthreadclear(&sb->sb_sel);
2066 #endif
2067 asb = *sb;
2068 bzero((caddr_t)sb, sizeof (*sb));
2069 sb->sb_so = so; /* reestablish link to socket */
2070 if (asb.sb_flags & SB_KNOTE) {
2071 sb->sb_sel.si_note = asb.sb_sel.si_note;
2072 sb->sb_flags = SB_KNOTE;
2073 }
2074 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2075 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2076 sbrelease(&asb);
2077 }
2078
2079 /*
2080 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2081 * an additional variant to handle the case where the option value needs
2082 * to be some kind of integer, but not a specific size.
2083 * In addition to their use here, these functions are also called by the
2084 * protocol-level pr_ctloutput() routines.
2085 */
2086 int
2087 sooptcopyin(sopt, buf, len, minlen)
2088 struct sockopt *sopt;
2089 void *buf;
2090 size_t len;
2091 size_t minlen;
2092 {
2093 size_t valsize;
2094
2095 /*
2096 * If the user gives us more than we wanted, we ignore it,
2097 * but if we don't get the minimum length the caller
2098 * wants, we return EINVAL. On success, sopt->sopt_valsize
2099 * is set to however much we actually retrieved.
2100 */
2101 if ((valsize = sopt->sopt_valsize) < minlen)
2102 return EINVAL;
2103 if (valsize > len)
2104 sopt->sopt_valsize = valsize = len;
2105
2106 if (sopt->sopt_p != 0)
2107 return (copyin(sopt->sopt_val, buf, valsize));
2108
2109 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
2110 return 0;
2111 }
2112
2113 int
2114 sosetopt(so, sopt)
2115 struct socket *so;
2116 struct sockopt *sopt;
2117 {
2118 int error, optval;
2119 struct linger l;
2120 struct timeval tv;
2121 short val;
2122
2123 socket_lock(so, 1);
2124
2125 if (sopt->sopt_dir != SOPT_SET) {
2126 sopt->sopt_dir = SOPT_SET;
2127 }
2128
2129 {
2130 struct socket_filter_entry *filter;
2131 int filtered = 0;
2132 error = 0;
2133 for (filter = so->so_filt; filter && (error == 0);
2134 filter = filter->sfe_next_onsocket) {
2135 if (filter->sfe_filter->sf_filter.sf_setoption) {
2136 if (filtered == 0) {
2137 filtered = 1;
2138 sflt_use(so);
2139 socket_unlock(so, 0);
2140 }
2141 error = filter->sfe_filter->sf_filter.sf_setoption(
2142 filter->sfe_cookie, so, sopt);
2143 }
2144 }
2145
2146 if (filtered != 0) {
2147 socket_lock(so, 0);
2148 sflt_unuse(so);
2149
2150 if (error) {
2151 if (error == EJUSTRETURN)
2152 error = 0;
2153 goto bad;
2154 }
2155 }
2156 }
2157
2158 error = 0;
2159 if (sopt->sopt_level != SOL_SOCKET) {
2160 if (so->so_proto && so->so_proto->pr_ctloutput) {
2161 error = (*so->so_proto->pr_ctloutput)
2162 (so, sopt);
2163 socket_unlock(so, 1);
2164 return (error);
2165 }
2166 error = ENOPROTOOPT;
2167 } else {
2168 switch (sopt->sopt_name) {
2169 case SO_LINGER:
2170 case SO_LINGER_SEC:
2171 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2172 if (error)
2173 goto bad;
2174
2175 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
2176 if (l.l_onoff)
2177 so->so_options |= SO_LINGER;
2178 else
2179 so->so_options &= ~SO_LINGER;
2180 break;
2181
2182 case SO_DEBUG:
2183 case SO_KEEPALIVE:
2184 case SO_DONTROUTE:
2185 case SO_USELOOPBACK:
2186 case SO_BROADCAST:
2187 case SO_REUSEADDR:
2188 case SO_REUSEPORT:
2189 case SO_OOBINLINE:
2190 case SO_TIMESTAMP:
2191 #ifdef __APPLE__
2192 case SO_DONTTRUNC:
2193 case SO_WANTMORE:
2194 case SO_WANTOOBFLAG:
2195 #endif
2196 error = sooptcopyin(sopt, &optval, sizeof optval,
2197 sizeof optval);
2198 if (error)
2199 goto bad;
2200 if (optval)
2201 so->so_options |= sopt->sopt_name;
2202 else
2203 so->so_options &= ~sopt->sopt_name;
2204 break;
2205
2206 case SO_SNDBUF:
2207 case SO_RCVBUF:
2208 case SO_SNDLOWAT:
2209 case SO_RCVLOWAT:
2210 error = sooptcopyin(sopt, &optval, sizeof optval,
2211 sizeof optval);
2212 if (error)
2213 goto bad;
2214
2215 /*
2216 * Values < 1 make no sense for any of these
2217 * options, so disallow them.
2218 */
2219 if (optval < 1) {
2220 error = EINVAL;
2221 goto bad;
2222 }
2223
2224 switch (sopt->sopt_name) {
2225 case SO_SNDBUF:
2226 case SO_RCVBUF:
2227 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2228 &so->so_snd : &so->so_rcv,
2229 (u_long) optval) == 0) {
2230 error = ENOBUFS;
2231 goto bad;
2232 }
2233 break;
2234
2235 /*
2236 * Make sure the low-water is never greater than
2237 * the high-water.
2238 */
2239 case SO_SNDLOWAT:
2240 so->so_snd.sb_lowat =
2241 (optval > so->so_snd.sb_hiwat) ?
2242 so->so_snd.sb_hiwat : optval;
2243 break;
2244 case SO_RCVLOWAT:
2245 so->so_rcv.sb_lowat =
2246 (optval > so->so_rcv.sb_hiwat) ?
2247 so->so_rcv.sb_hiwat : optval;
2248 break;
2249 }
2250 break;
2251
2252 case SO_SNDTIMEO:
2253 case SO_RCVTIMEO:
2254 error = sooptcopyin(sopt, &tv, sizeof tv,
2255 sizeof tv);
2256 if (error)
2257 goto bad;
2258
2259 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
2260 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2261 error = EDOM;
2262 goto bad;
2263 }
2264
2265 switch (sopt->sopt_name) {
2266 case SO_SNDTIMEO:
2267 so->so_snd.sb_timeo = tv;
2268 break;
2269 case SO_RCVTIMEO:
2270 so->so_rcv.sb_timeo = tv;
2271 break;
2272 }
2273 break;
2274
2275 case SO_NKE:
2276 {
2277 struct so_nke nke;
2278
2279 error = sooptcopyin(sopt, &nke,
2280 sizeof nke, sizeof nke);
2281 if (error)
2282 goto bad;
2283
2284 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
2285 break;
2286 }
2287
2288 case SO_NOSIGPIPE:
2289 error = sooptcopyin(sopt, &optval, sizeof optval,
2290 sizeof optval);
2291 if (error)
2292 goto bad;
2293 if (optval)
2294 so->so_flags |= SOF_NOSIGPIPE;
2295 else
2296 so->so_flags &= ~SOF_NOSIGPIPE;
2297
2298 break;
2299
2300 case SO_NOADDRERR:
2301 error = sooptcopyin(sopt, &optval, sizeof optval,
2302 sizeof optval);
2303 if (error)
2304 goto bad;
2305 if (optval)
2306 so->so_flags |= SOF_NOADDRAVAIL;
2307 else
2308 so->so_flags &= ~SOF_NOADDRAVAIL;
2309
2310 break;
2311
2312 default:
2313 error = ENOPROTOOPT;
2314 break;
2315 }
2316 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2317 (void) ((*so->so_proto->pr_ctloutput)
2318 (so, sopt));
2319 }
2320 }
2321 bad:
2322 socket_unlock(so, 1);
2323 return (error);
2324 }
2325
2326 /* Helper routine for getsockopt */
2327 int
2328 sooptcopyout(sopt, buf, len)
2329 struct sockopt *sopt;
2330 void *buf;
2331 size_t len;
2332 {
2333 int error;
2334 size_t valsize;
2335
2336 error = 0;
2337
2338 /*
2339 * Documented get behavior is that we always return a value,
2340 * possibly truncated to fit in the user's buffer.
2341 * Traditional behavior is that we always tell the user
2342 * precisely how much we copied, rather than something useful
2343 * like the total amount we had available for her.
2344 * Note that this interface is not idempotent; the entire answer must
2345 * generated ahead of time.
2346 */
2347 valsize = min(len, sopt->sopt_valsize);
2348 sopt->sopt_valsize = valsize;
2349 if (sopt->sopt_val != USER_ADDR_NULL) {
2350 if (sopt->sopt_p != 0)
2351 error = copyout(buf, sopt->sopt_val, valsize);
2352 else
2353 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
2354 }
2355 return error;
2356 }
2357
2358 int
2359 sogetopt(so, sopt)
2360 struct socket *so;
2361 struct sockopt *sopt;
2362 {
2363 int error, optval;
2364 struct linger l;
2365 struct timeval tv;
2366
2367 if (sopt->sopt_dir != SOPT_GET) {
2368 sopt->sopt_dir = SOPT_GET;
2369 }
2370
2371 socket_lock(so, 1);
2372
2373 {
2374 struct socket_filter_entry *filter;
2375 int filtered = 0;
2376 error = 0;
2377 for (filter = so->so_filt; filter && (error == 0);
2378 filter = filter->sfe_next_onsocket) {
2379 if (filter->sfe_filter->sf_filter.sf_getoption) {
2380 if (filtered == 0) {
2381 filtered = 1;
2382 sflt_use(so);
2383 socket_unlock(so, 0);
2384 }
2385 error = filter->sfe_filter->sf_filter.sf_getoption(
2386 filter->sfe_cookie, so, sopt);
2387 }
2388 }
2389 if (filtered != 0) {
2390 socket_lock(so, 0);
2391 sflt_unuse(so);
2392
2393 if (error) {
2394 if (error == EJUSTRETURN)
2395 error = 0;
2396 socket_unlock(so, 1);
2397 return error;
2398 }
2399 }
2400 }
2401
2402 error = 0;
2403 if (sopt->sopt_level != SOL_SOCKET) {
2404 if (so->so_proto && so->so_proto->pr_ctloutput) {
2405 error = (*so->so_proto->pr_ctloutput)
2406 (so, sopt);
2407 socket_unlock(so, 1);
2408 return (error);
2409 } else {
2410 socket_unlock(so, 1);
2411 return (ENOPROTOOPT);
2412 }
2413 } else {
2414 switch (sopt->sopt_name) {
2415 case SO_LINGER:
2416 case SO_LINGER_SEC:
2417 l.l_onoff = so->so_options & SO_LINGER;
2418 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2419 so->so_linger / hz;
2420 error = sooptcopyout(sopt, &l, sizeof l);
2421 break;
2422
2423 case SO_USELOOPBACK:
2424 case SO_DONTROUTE:
2425 case SO_DEBUG:
2426 case SO_KEEPALIVE:
2427 case SO_REUSEADDR:
2428 case SO_REUSEPORT:
2429 case SO_BROADCAST:
2430 case SO_OOBINLINE:
2431 case SO_TIMESTAMP:
2432 #ifdef __APPLE__
2433 case SO_DONTTRUNC:
2434 case SO_WANTMORE:
2435 case SO_WANTOOBFLAG:
2436 #endif
2437 optval = so->so_options & sopt->sopt_name;
2438 integer:
2439 error = sooptcopyout(sopt, &optval, sizeof optval);
2440 break;
2441
2442 case SO_TYPE:
2443 optval = so->so_type;
2444 goto integer;
2445
2446 #ifdef __APPLE__
2447 case SO_NREAD:
2448 {
2449 int pkt_total;
2450 struct mbuf *m1;
2451
2452 pkt_total = 0;
2453 m1 = so->so_rcv.sb_mb;
2454 if (so->so_proto->pr_flags & PR_ATOMIC)
2455 {
2456 while (m1) {
2457 if (m1->m_type == MT_DATA)
2458 pkt_total += m1->m_len;
2459 m1 = m1->m_next;
2460 }
2461 optval = pkt_total;
2462 } else
2463 optval = so->so_rcv.sb_cc;
2464 goto integer;
2465 }
2466 case SO_NWRITE:
2467 optval = so->so_snd.sb_cc;
2468 goto integer;
2469 #endif
2470 case SO_ERROR:
2471 optval = so->so_error;
2472 so->so_error = 0;
2473 goto integer;
2474
2475 case SO_SNDBUF:
2476 optval = so->so_snd.sb_hiwat;
2477 goto integer;
2478
2479 case SO_RCVBUF:
2480 optval = so->so_rcv.sb_hiwat;
2481 goto integer;
2482
2483 case SO_SNDLOWAT:
2484 optval = so->so_snd.sb_lowat;
2485 goto integer;
2486
2487 case SO_RCVLOWAT:
2488 optval = so->so_rcv.sb_lowat;
2489 goto integer;
2490
2491 case SO_SNDTIMEO:
2492 case SO_RCVTIMEO:
2493 tv = (sopt->sopt_name == SO_SNDTIMEO ?
2494 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2495
2496 error = sooptcopyout(sopt, &tv, sizeof tv);
2497 break;
2498
2499 case SO_NOSIGPIPE:
2500 optval = (so->so_flags & SOF_NOSIGPIPE);
2501 goto integer;
2502
2503 case SO_NOADDRERR:
2504 optval = (so->so_flags & SOF_NOADDRAVAIL);
2505 goto integer;
2506
2507 default:
2508 error = ENOPROTOOPT;
2509 break;
2510 }
2511 socket_unlock(so, 1);
2512 return (error);
2513 }
2514 }
2515
2516 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2517 int
2518 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
2519 {
2520 struct mbuf *m, *m_prev;
2521 int sopt_size = sopt->sopt_valsize;
2522
2523 if (sopt_size > MAX_SOOPTGETM_SIZE)
2524 return EMSGSIZE;
2525
2526 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2527 if (m == 0)
2528 return ENOBUFS;
2529 if (sopt_size > MLEN) {
2530 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2531 if ((m->m_flags & M_EXT) == 0) {
2532 m_free(m);
2533 return ENOBUFS;
2534 }
2535 m->m_len = min(MCLBYTES, sopt_size);
2536 } else {
2537 m->m_len = min(MLEN, sopt_size);
2538 }
2539 sopt_size -= m->m_len;
2540 *mp = m;
2541 m_prev = m;
2542
2543 while (sopt_size) {
2544 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2545 if (m == 0) {
2546 m_freem(*mp);
2547 return ENOBUFS;
2548 }
2549 if (sopt_size > MLEN) {
2550 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2551 if ((m->m_flags & M_EXT) == 0) {
2552 m_freem(*mp);
2553 return ENOBUFS;
2554 }
2555 m->m_len = min(MCLBYTES, sopt_size);
2556 } else {
2557 m->m_len = min(MLEN, sopt_size);
2558 }
2559 sopt_size -= m->m_len;
2560 m_prev->m_next = m;
2561 m_prev = m;
2562 }
2563 return 0;
2564 }
2565
2566 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2567 int
2568 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
2569 {
2570 struct mbuf *m0 = m;
2571
2572 if (sopt->sopt_val == USER_ADDR_NULL)
2573 return 0;
2574 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2575 if (sopt->sopt_p != NULL) {
2576 int error;
2577
2578 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
2579 if (error != 0) {
2580 m_freem(m0);
2581 return(error);
2582 }
2583 } else
2584 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
2585 sopt->sopt_valsize -= m->m_len;
2586 sopt->sopt_val += m->m_len;
2587 m = m->m_next;
2588 }
2589 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2590 panic("soopt_mcopyin");
2591 return 0;
2592 }
2593
2594 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2595 int
2596 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
2597 {
2598 struct mbuf *m0 = m;
2599 size_t valsize = 0;
2600
2601 if (sopt->sopt_val == USER_ADDR_NULL)
2602 return 0;
2603 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2604 if (sopt->sopt_p != NULL) {
2605 int error;
2606
2607 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
2608 if (error != 0) {
2609 m_freem(m0);
2610 return(error);
2611 }
2612 } else
2613 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
2614 sopt->sopt_valsize -= m->m_len;
2615 sopt->sopt_val += m->m_len;
2616 valsize += m->m_len;
2617 m = m->m_next;
2618 }
2619 if (m != NULL) {
2620 /* enough soopt buffer should be given from user-land */
2621 m_freem(m0);
2622 return(EINVAL);
2623 }
2624 sopt->sopt_valsize = valsize;
2625 return 0;
2626 }
2627
2628 void
2629 sohasoutofband(so)
2630 register struct socket *so;
2631 {
2632 struct proc *p;
2633
2634 if (so->so_pgid < 0)
2635 gsignal(-so->so_pgid, SIGURG);
2636 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2637 psignal(p, SIGURG);
2638 selwakeup(&so->so_rcv.sb_sel);
2639 }
2640
2641 int
2642 sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
2643 {
2644 struct proc *p = current_proc();
2645 int revents = 0;
2646
2647 socket_lock(so, 1);
2648
2649 if (events & (POLLIN | POLLRDNORM))
2650 if (soreadable(so))
2651 revents |= events & (POLLIN | POLLRDNORM);
2652
2653 if (events & (POLLOUT | POLLWRNORM))
2654 if (sowriteable(so))
2655 revents |= events & (POLLOUT | POLLWRNORM);
2656
2657 if (events & (POLLPRI | POLLRDBAND))
2658 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2659 revents |= events & (POLLPRI | POLLRDBAND);
2660
2661 if (revents == 0) {
2662 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2663 /* Darwin sets the flag first, BSD calls selrecord first */
2664 so->so_rcv.sb_flags |= SB_SEL;
2665 selrecord(p, &so->so_rcv.sb_sel, wql);
2666 }
2667
2668 if (events & (POLLOUT | POLLWRNORM)) {
2669 /* Darwin sets the flag first, BSD calls selrecord first */
2670 so->so_snd.sb_flags |= SB_SEL;
2671 selrecord(p, &so->so_snd.sb_sel, wql);
2672 }
2673 }
2674
2675 socket_unlock(so, 1);
2676 return (revents);
2677 }
2678
2679 int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
2680
2681 int
2682 soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
2683 {
2684 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2685 struct sockbuf *sb;
2686 socket_lock(so, 1);
2687
2688 switch (kn->kn_filter) {
2689 case EVFILT_READ:
2690 if (so->so_options & SO_ACCEPTCONN)
2691 kn->kn_fop = &solisten_filtops;
2692 else
2693 kn->kn_fop = &soread_filtops;
2694 sb = &so->so_rcv;
2695 break;
2696 case EVFILT_WRITE:
2697 kn->kn_fop = &sowrite_filtops;
2698 sb = &so->so_snd;
2699 break;
2700 default:
2701 socket_unlock(so, 1);
2702 return (1);
2703 }
2704
2705 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2706 sb->sb_flags |= SB_KNOTE;
2707 socket_unlock(so, 1);
2708 return (0);
2709 }
2710
2711 static void
2712 filt_sordetach(struct knote *kn)
2713 {
2714 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2715
2716 socket_lock(so, 1);
2717 if (so->so_rcv.sb_flags & SB_KNOTE)
2718 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2719 so->so_rcv.sb_flags &= ~SB_KNOTE;
2720 socket_unlock(so, 1);
2721 }
2722
2723 /*ARGSUSED*/
2724 static int
2725 filt_soread(struct knote *kn, long hint)
2726 {
2727 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2728
2729 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2730 socket_lock(so, 1);
2731
2732 if (so->so_oobmark) {
2733 if (kn->kn_flags & EV_OOBAND) {
2734 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2735 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2736 socket_unlock(so, 1);
2737 return (1);
2738 }
2739 kn->kn_data = so->so_oobmark;
2740 kn->kn_flags |= EV_OOBAND;
2741 } else {
2742 kn->kn_data = so->so_rcv.sb_cc;
2743 if (so->so_state & SS_CANTRCVMORE) {
2744 kn->kn_flags |= EV_EOF;
2745 kn->kn_fflags = so->so_error;
2746 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2747 socket_unlock(so, 1);
2748 return (1);
2749 }
2750 }
2751
2752 if (so->so_state & SS_RCVATMARK) {
2753 if (kn->kn_flags & EV_OOBAND) {
2754 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2755 socket_unlock(so, 1);
2756 return (1);
2757 }
2758 kn->kn_flags |= EV_OOBAND;
2759 } else if (kn->kn_flags & EV_OOBAND) {
2760 kn->kn_data = 0;
2761 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2762 socket_unlock(so, 1);
2763 return (0);
2764 }
2765
2766 if (so->so_error) { /* temporary udp error */
2767 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2768 socket_unlock(so, 1);
2769 return (1);
2770 }
2771
2772 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2773 socket_unlock(so, 1);
2774
2775 return( kn->kn_flags & EV_OOBAND ||
2776 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2777 kn->kn_sdata : so->so_rcv.sb_lowat));
2778 }
2779
2780 static void
2781 filt_sowdetach(struct knote *kn)
2782 {
2783 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2784 socket_lock(so, 1);
2785
2786 if(so->so_snd.sb_flags & SB_KNOTE)
2787 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2788 so->so_snd.sb_flags &= ~SB_KNOTE;
2789 socket_unlock(so, 1);
2790 }
2791
2792 /*ARGSUSED*/
2793 static int
2794 filt_sowrite(struct knote *kn, long hint)
2795 {
2796 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2797
2798 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2799 socket_lock(so, 1);
2800
2801 kn->kn_data = sbspace(&so->so_snd);
2802 if (so->so_state & SS_CANTSENDMORE) {
2803 kn->kn_flags |= EV_EOF;
2804 kn->kn_fflags = so->so_error;
2805 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2806 socket_unlock(so, 1);
2807 return (1);
2808 }
2809 if (so->so_error) { /* temporary udp error */
2810 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2811 socket_unlock(so, 1);
2812 return (1);
2813 }
2814 if (((so->so_state & SS_ISCONNECTED) == 0) &&
2815 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2816 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2817 socket_unlock(so, 1);
2818 return (0);
2819 }
2820 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2821 socket_unlock(so, 1);
2822 if (kn->kn_sfflags & NOTE_LOWAT)
2823 return (kn->kn_data >= kn->kn_sdata);
2824 return (kn->kn_data >= so->so_snd.sb_lowat);
2825 }
2826
2827 /*ARGSUSED*/
2828 static int
2829 filt_solisten(struct knote *kn, long hint)
2830 {
2831 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2832 int isempty;
2833
2834 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2835 socket_lock(so, 1);
2836 kn->kn_data = so->so_qlen;
2837 isempty = ! TAILQ_EMPTY(&so->so_comp);
2838 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2839 socket_unlock(so, 1);
2840 return (isempty);
2841 }
2842
2843
2844 int
2845 socket_lock(so, refcount)
2846 struct socket *so;
2847 int refcount;
2848 {
2849 int error = 0, lr, lr_saved;
2850 #ifdef __ppc__
2851 __asm__ volatile("mflr %0" : "=r" (lr));
2852 lr_saved = lr;
2853 #endif
2854
2855 if (so->so_proto->pr_lock) {
2856 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2857 }
2858 else {
2859 #ifdef MORE_LOCKING_DEBUG
2860 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2861 #endif
2862 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2863 if (refcount)
2864 so->so_usecount++;
2865 so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */
2866 }
2867
2868 return(error);
2869
2870 }
2871
2872 int
2873 socket_unlock(so, refcount)
2874 struct socket *so;
2875 int refcount;
2876 {
2877 int error = 0, lr, lr_saved;
2878 lck_mtx_t * mutex_held;
2879
2880 #ifdef __ppc__
2881 __asm__ volatile("mflr %0" : "=r" (lr));
2882 lr_saved = lr;
2883 #endif
2884
2885
2886
2887 if (so->so_proto == NULL)
2888 panic("socket_unlock null so_proto so=%x\n", so);
2889
2890 if (so && so->so_proto->pr_unlock)
2891 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2892 else {
2893 mutex_held = so->so_proto->pr_domain->dom_mtx;
2894 #ifdef MORE_LOCKING_DEBUG
2895 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2896 #endif
2897 if (refcount) {
2898 if (so->so_usecount <= 0)
2899 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2900 so->so_usecount--;
2901 if (so->so_usecount == 0) {
2902 sofreelastref(so, 1);
2903 }
2904 else
2905 so->reserved4 = (void*)lr_saved; /* save caller */
2906 }
2907 lck_mtx_unlock(mutex_held);
2908 }
2909
2910 return(error);
2911 }
2912 //### Called with socket locked, will unlock socket
2913 void
2914 sofree(so)
2915 struct socket *so;
2916 {
2917
2918 int lr, lr_saved;
2919 lck_mtx_t * mutex_held;
2920 #ifdef __ppc__
2921 __asm__ volatile("mflr %0" : "=r" (lr));
2922 lr_saved = lr;
2923 #endif
2924 if (so->so_proto->pr_getlock != NULL)
2925 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2926 else
2927 mutex_held = so->so_proto->pr_domain->dom_mtx;
2928 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2929
2930 sofreelastref(so, 0);
2931 }
2932
2933 void
2934 soreference(so)
2935 struct socket *so;
2936 {
2937 socket_lock(so, 1); /* locks & take one reference on socket */
2938 socket_unlock(so, 0); /* unlock only */
2939 }
2940
2941 void
2942 sodereference(so)
2943 struct socket *so;
2944 {
2945 socket_lock(so, 0);
2946 socket_unlock(so, 1);
2947 }