]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
0422f131784479fd13ba063be7964783f40b60b3
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
57 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
58 */
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/filedesc.h>
63 #include <sys/proc_internal.h>
64 #include <sys/kauth.h>
65 #include <sys/file_internal.h>
66 #include <sys/fcntl.h>
67 #include <sys/malloc.h>
68 #include <sys/mbuf.h>
69 #include <sys/domain.h>
70 #include <sys/kernel.h>
71 #include <sys/event.h>
72 #include <sys/poll.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/resourcevar.h>
77 #include <sys/signalvar.h>
78 #include <sys/sysctl.h>
79 #include <sys/uio.h>
80 #include <sys/ev.h>
81 #include <sys/kdebug.h>
82 #include <net/route.h>
83 #include <netinet/in.h>
84 #include <netinet/in_pcb.h>
85 #include <kern/zalloc.h>
86 #include <kern/locks.h>
87 #include <machine/limits.h>
88
89 int so_cache_hw = 0;
90 int so_cache_timeouts = 0;
91 int so_cache_max_freed = 0;
92 int cached_sock_count = 0;
93 struct socket *socket_cache_head = 0;
94 struct socket *socket_cache_tail = 0;
95 u_long so_cache_time = 0;
96 int so_cache_init_done = 0;
97 struct zone *so_cache_zone;
98 extern int get_inpcb_str_size();
99 extern int get_tcp_str_size();
100
101 static lck_grp_t *so_cache_mtx_grp;
102 static lck_attr_t *so_cache_mtx_attr;
103 static lck_grp_attr_t *so_cache_mtx_grp_attr;
104 lck_mtx_t *so_cache_mtx;
105
106 #include <machine/limits.h>
107
108 static void filt_sordetach(struct knote *kn);
109 static int filt_soread(struct knote *kn, long hint);
110 static void filt_sowdetach(struct knote *kn);
111 static int filt_sowrite(struct knote *kn, long hint);
112 static int filt_solisten(struct knote *kn, long hint);
113
114 static struct filterops solisten_filtops =
115 { 1, NULL, filt_sordetach, filt_solisten };
116 static struct filterops soread_filtops =
117 { 1, NULL, filt_sordetach, filt_soread };
118 static struct filterops sowrite_filtops =
119 { 1, NULL, filt_sowdetach, filt_sowrite };
120
121 #define EVEN_MORE_LOCKING_DEBUG 0
122 int socket_debug = 0;
123 int socket_zone = M_SOCKET;
124 so_gen_t so_gencnt; /* generation count for sockets */
125
126 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
127 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
128
129 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
130 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
131 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
132 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
133 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
134 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
135 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
136
137 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
138
139
140 SYSCTL_DECL(_kern_ipc);
141
142 static int somaxconn = SOMAXCONN;
143 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
144 0, "");
145
146 /* Should we get a maximum also ??? */
147 static int sosendmaxchain = 65536;
148 static int sosendminchain = 16384;
149 static int sorecvmincopy = 16384;
150 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
151 0, "");
152 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
153 0, "");
154
155 void so_cache_timer();
156
157 /*
158 * Socket operation routines.
159 * These routines are called by the routines in
160 * sys_socket.c or from a system process, and
161 * implement the semantics of socket operations by
162 * switching out to the protocol specific routines.
163 */
164
165 #ifdef __APPLE__
166
167 vm_size_t so_cache_zone_element_size;
168
169 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
170
171
172 void socketinit()
173 {
174 vm_size_t str_size;
175
176 if (so_cache_init_done) {
177 printf("socketinit: already called...\n");
178 return;
179 }
180
181 /*
182 * allocate lock group attribute and group for socket cache mutex
183 */
184 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
185
186 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
187
188 /*
189 * allocate the lock attribute for socket cache mutex
190 */
191 so_cache_mtx_attr = lck_attr_alloc_init();
192
193 so_cache_init_done = 1;
194
195 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
196
197 if (so_cache_mtx == NULL)
198 return; /* we're hosed... */
199
200 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
201 get_inpcb_str_size() + 4 +
202 get_tcp_str_size());
203 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
204 #if TEMPDEBUG
205 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
206 #endif
207 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
208
209 so_cache_zone_element_size = str_size;
210
211 sflt_init();
212
213 }
214
215 void cached_sock_alloc(so, waitok)
216 struct socket **so;
217 int waitok;
218
219 {
220 caddr_t temp;
221 register u_long offset;
222
223
224 lck_mtx_lock(so_cache_mtx);
225
226 if (cached_sock_count) {
227 cached_sock_count--;
228 *so = socket_cache_head;
229 if (*so == 0)
230 panic("cached_sock_alloc: cached sock is null");
231
232 socket_cache_head = socket_cache_head->cache_next;
233 if (socket_cache_head)
234 socket_cache_head->cache_prev = 0;
235 else
236 socket_cache_tail = 0;
237
238 lck_mtx_unlock(so_cache_mtx);
239
240 temp = (*so)->so_saved_pcb;
241 bzero((caddr_t)*so, sizeof(struct socket));
242 #if TEMPDEBUG
243 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
244 cached_sock_count);
245 #endif
246 (*so)->so_saved_pcb = temp;
247 (*so)->cached_in_sock_layer = 1;
248
249 }
250 else {
251 #if TEMPDEBUG
252 kprintf("Allocating cached sock %x from memory\n", *so);
253 #endif
254
255 lck_mtx_unlock(so_cache_mtx);
256
257 if (waitok)
258 *so = (struct socket *) zalloc(so_cache_zone);
259 else
260 *so = (struct socket *) zalloc_noblock(so_cache_zone);
261
262 if (*so == 0)
263 return;
264
265 bzero((caddr_t)*so, sizeof(struct socket));
266
267 /*
268 * Define offsets for extra structures into our single block of
269 * memory. Align extra structures on longword boundaries.
270 */
271
272
273 offset = (u_long) *so;
274 offset += sizeof(struct socket);
275 if (offset & 0x3) {
276 offset += 4;
277 offset &= 0xfffffffc;
278 }
279 (*so)->so_saved_pcb = (caddr_t) offset;
280 offset += get_inpcb_str_size();
281 if (offset & 0x3) {
282 offset += 4;
283 offset &= 0xfffffffc;
284 }
285
286 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
287 #if TEMPDEBUG
288 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
289 (*so)->so_saved_pcb,
290 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
291 #endif
292 }
293
294 (*so)->cached_in_sock_layer = 1;
295 }
296
297
298 void cached_sock_free(so)
299 struct socket *so;
300 {
301
302 lck_mtx_lock(so_cache_mtx);
303
304 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
305 --cached_sock_count;
306 lck_mtx_unlock(so_cache_mtx);
307 #if TEMPDEBUG
308 kprintf("Freeing overflowed cached socket %x\n", so);
309 #endif
310 zfree(so_cache_zone, so);
311 }
312 else {
313 #if TEMPDEBUG
314 kprintf("Freeing socket %x into cache\n", so);
315 #endif
316 if (so_cache_hw < cached_sock_count)
317 so_cache_hw = cached_sock_count;
318
319 so->cache_next = socket_cache_head;
320 so->cache_prev = 0;
321 if (socket_cache_head)
322 socket_cache_head->cache_prev = so;
323 else
324 socket_cache_tail = so;
325
326 so->cache_timestamp = so_cache_time;
327 socket_cache_head = so;
328 lck_mtx_unlock(so_cache_mtx);
329 }
330
331 #if TEMPDEBUG
332 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
333 #endif
334
335
336 }
337
338
339 void so_cache_timer()
340 {
341 register struct socket *p;
342 register int n_freed = 0;
343
344
345 lck_mtx_lock(so_cache_mtx);
346
347 ++so_cache_time;
348
349 while ( (p = socket_cache_tail) )
350 {
351 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
352 break;
353
354 so_cache_timeouts++;
355
356 if ( (socket_cache_tail = p->cache_prev) )
357 p->cache_prev->cache_next = 0;
358 if (--cached_sock_count == 0)
359 socket_cache_head = 0;
360
361
362 zfree(so_cache_zone, p);
363
364 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
365 {
366 so_cache_max_freed++;
367 break;
368 }
369 }
370 lck_mtx_unlock(so_cache_mtx);
371
372 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
373
374
375 }
376 #endif /* __APPLE__ */
377
378 /*
379 * Get a socket structure from our zone, and initialize it.
380 * We don't implement `waitok' yet (see comments in uipc_domain.c).
381 * Note that it would probably be better to allocate socket
382 * and PCB at the same time, but I'm not convinced that all
383 * the protocols can be easily modified to do this.
384 */
385 struct socket *
386 soalloc(waitok, dom, type)
387 int waitok;
388 int dom;
389 int type;
390 {
391 struct socket *so;
392
393 if ((dom == PF_INET) && (type == SOCK_STREAM))
394 cached_sock_alloc(&so, waitok);
395 else
396 {
397 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
398 if (so)
399 bzero(so, sizeof *so);
400 }
401 /* XXX race condition for reentrant kernel */
402 //###LD Atomic add for so_gencnt
403 if (so) {
404 so->so_gencnt = ++so_gencnt;
405 so->so_zone = socket_zone;
406 }
407
408 return so;
409 }
410
411 int
412 socreate(dom, aso, type, proto)
413 int dom;
414 struct socket **aso;
415 register int type;
416 int proto;
417 {
418 struct proc *p = current_proc();
419 register struct protosw *prp;
420 register struct socket *so;
421 register int error = 0;
422 #if TCPDEBUG
423 extern int tcpconsdebug;
424 #endif
425 if (proto)
426 prp = pffindproto(dom, proto, type);
427 else
428 prp = pffindtype(dom, type);
429
430 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
431 return (EPROTONOSUPPORT);
432 #ifndef __APPLE__
433
434 if (p->p_prison && jail_socket_unixiproute_only &&
435 prp->pr_domain->dom_family != PF_LOCAL &&
436 prp->pr_domain->dom_family != PF_INET &&
437 prp->pr_domain->dom_family != PF_ROUTE) {
438 return (EPROTONOSUPPORT);
439 }
440
441 #endif
442 if (prp->pr_type != type)
443 return (EPROTOTYPE);
444 so = soalloc(p != 0, dom, type);
445 if (so == 0)
446 return (ENOBUFS);
447
448 TAILQ_INIT(&so->so_incomp);
449 TAILQ_INIT(&so->so_comp);
450 so->so_type = type;
451
452 #ifdef __APPLE__
453 if (p != 0) {
454 so->so_uid = kauth_cred_getuid(kauth_cred_get());
455 if (!suser(kauth_cred_get(),NULL))
456 so->so_state = SS_PRIV;
457 }
458 #else
459 so->so_cred = kauth_cred_get_with_ref();
460 #endif
461 so->so_proto = prp;
462 #ifdef __APPLE__
463 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
464 so->so_rcv.sb_so = so->so_snd.sb_so = so;
465 #endif
466 so->next_lock_lr = 0;
467 so->next_unlock_lr = 0;
468
469
470 //### Attachement will create the per pcb lock if necessary and increase refcount
471 so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */
472
473 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
474 if (error) {
475 /*
476 * Warning:
477 * If so_pcb is not zero, the socket will be leaked,
478 * so protocol attachment handler must be coded carefuly
479 */
480 so->so_state |= SS_NOFDREF;
481 so->so_usecount--;
482 sofreelastref(so, 1); /* will deallocate the socket */
483 return (error);
484 }
485 #ifdef __APPLE__
486 prp->pr_domain->dom_refs++;
487 TAILQ_INIT(&so->so_evlist);
488
489 /* Attach socket filters for this protocol */
490 sflt_initsock(so);
491 #if TCPDEBUG
492 if (tcpconsdebug == 2)
493 so->so_options |= SO_DEBUG;
494 #endif
495 #endif
496
497 *aso = so;
498 return (0);
499 }
500
501 int
502 sobind(so, nam)
503 struct socket *so;
504 struct sockaddr *nam;
505
506 {
507 struct proc *p = current_proc();
508 int error = 0;
509 struct socket_filter_entry *filter;
510 int filtered = 0;
511
512 socket_lock(so, 1);
513
514 /* Socket filter */
515 error = 0;
516 for (filter = so->so_filt; filter && (error == 0);
517 filter = filter->sfe_next_onsocket) {
518 if (filter->sfe_filter->sf_filter.sf_bind) {
519 if (filtered == 0) {
520 filtered = 1;
521 sflt_use(so);
522 socket_unlock(so, 0);
523 }
524 error = filter->sfe_filter->sf_filter.sf_bind(
525 filter->sfe_cookie, so, nam);
526 }
527 }
528 if (filtered != 0) {
529 socket_lock(so, 0);
530 sflt_unuse(so);
531 }
532 /* End socket filter */
533
534 if (error == 0)
535 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
536
537 socket_unlock(so, 1);
538
539 if (error == EJUSTRETURN)
540 error = 0;
541
542 return (error);
543 }
544
545 void
546 sodealloc(so)
547 struct socket *so;
548 {
549 so->so_gencnt = ++so_gencnt;
550
551 #ifndef __APPLE__
552 if (so->so_rcv.sb_hiwat)
553 (void)chgsbsize(so->so_cred->cr_uidinfo,
554 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
555 if (so->so_snd.sb_hiwat)
556 (void)chgsbsize(so->so_cred->cr_uidinfo,
557 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
558 #ifdef INET
559 if (so->so_accf != NULL) {
560 if (so->so_accf->so_accept_filter != NULL &&
561 so->so_accf->so_accept_filter->accf_destroy != NULL) {
562 so->so_accf->so_accept_filter->accf_destroy(so);
563 }
564 if (so->so_accf->so_accept_filter_str != NULL)
565 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
566 FREE(so->so_accf, M_ACCF);
567 }
568 #endif /* INET */
569 kauth_cred_rele(so->so_cred);
570 zfreei(so->so_zone, so);
571 #else
572 if (so->cached_in_sock_layer == 1)
573 cached_sock_free(so);
574 else {
575 if (so->cached_in_sock_layer == -1)
576 panic("sodealloc: double dealloc: so=%x\n", so);
577 so->cached_in_sock_layer = -1;
578 FREE_ZONE(so, sizeof(*so), so->so_zone);
579 }
580 #endif /* __APPLE__ */
581 }
582
583 int
584 solisten(so, backlog)
585 register struct socket *so;
586 int backlog;
587
588 {
589 struct proc *p = current_proc();
590 int error;
591
592 socket_lock(so, 1);
593
594 {
595 struct socket_filter_entry *filter;
596 int filtered = 0;
597 error = 0;
598 for (filter = so->so_filt; filter && (error == 0);
599 filter = filter->sfe_next_onsocket) {
600 if (filter->sfe_filter->sf_filter.sf_listen) {
601 if (filtered == 0) {
602 filtered = 1;
603 sflt_use(so);
604 socket_unlock(so, 0);
605 }
606 error = filter->sfe_filter->sf_filter.sf_listen(
607 filter->sfe_cookie, so);
608 }
609 }
610 if (filtered != 0) {
611 socket_lock(so, 0);
612 sflt_unuse(so);
613 }
614 }
615
616 if (error == 0) {
617 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
618 }
619
620 if (error) {
621 socket_unlock(so, 1);
622 if (error == EJUSTRETURN)
623 error = 0;
624 return (error);
625 }
626
627 if (TAILQ_EMPTY(&so->so_comp))
628 so->so_options |= SO_ACCEPTCONN;
629 if (backlog < 0 || backlog > somaxconn)
630 backlog = somaxconn;
631 so->so_qlimit = backlog;
632
633 socket_unlock(so, 1);
634 return (0);
635 }
636
637 void
638 sofreelastref(so, dealloc)
639 register struct socket *so;
640 int dealloc;
641 {
642 int error;
643 struct socket *head = so->so_head;
644
645 /*### Assume socket is locked */
646
647 /* Remove any filters - may be called more than once */
648 sflt_termsock(so);
649
650 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
651 #ifdef __APPLE__
652 selthreadclear(&so->so_snd.sb_sel);
653 selthreadclear(&so->so_rcv.sb_sel);
654 so->so_rcv.sb_flags &= ~SB_UPCALL;
655 so->so_snd.sb_flags &= ~SB_UPCALL;
656 #endif
657 return;
658 }
659 if (head != NULL) {
660 socket_lock(head, 1);
661 if (so->so_state & SS_INCOMP) {
662 TAILQ_REMOVE(&head->so_incomp, so, so_list);
663 head->so_incqlen--;
664 } else if (so->so_state & SS_COMP) {
665 /*
666 * We must not decommission a socket that's
667 * on the accept(2) queue. If we do, then
668 * accept(2) may hang after select(2) indicated
669 * that the listening socket was ready.
670 */
671 #ifdef __APPLE__
672 selthreadclear(&so->so_snd.sb_sel);
673 selthreadclear(&so->so_rcv.sb_sel);
674 so->so_rcv.sb_flags &= ~SB_UPCALL;
675 so->so_snd.sb_flags &= ~SB_UPCALL;
676 #endif
677 socket_unlock(head, 1);
678 return;
679 } else {
680 panic("sofree: not queued");
681 }
682 head->so_qlen--;
683 so->so_state &= ~SS_INCOMP;
684 so->so_head = NULL;
685 socket_unlock(head, 1);
686 }
687 #ifdef __APPLE__
688 selthreadclear(&so->so_snd.sb_sel);
689 sbrelease(&so->so_snd);
690 #endif
691 sorflush(so);
692
693 /* 3932268: disable upcall */
694 so->so_rcv.sb_flags &= ~SB_UPCALL;
695 so->so_snd.sb_flags &= ~SB_UPCALL;
696
697 if (dealloc)
698 sodealloc(so);
699 }
700
701 /*
702 * Close a socket on last file table reference removal.
703 * Initiate disconnect if connected.
704 * Free socket when disconnect complete.
705 */
706 int
707 soclose_locked(so)
708 register struct socket *so;
709 {
710 int error = 0;
711 lck_mtx_t * mutex_held;
712 struct timespec ts;
713
714 if (so->so_usecount == 0) {
715 panic("soclose: so=%x refcount=0\n", so);
716 }
717
718 sflt_notify(so, sock_evt_closing, NULL);
719
720 if ((so->so_options & SO_ACCEPTCONN)) {
721 struct socket *sp;
722
723 /* We do not want new connection to be added to the connection queues */
724 so->so_options &= ~SO_ACCEPTCONN;
725
726 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
727 /* A bit tricky here. We need to keep
728 * a lock if it's a protocol global lock
729 * but we want the head, not the socket locked
730 * in the case of per-socket lock...
731 */
732 if (so->so_proto->pr_getlock != NULL) {
733 socket_unlock(so, 0);
734 socket_lock(sp, 1);
735 }
736 (void) soabort(sp);
737 if (so->so_proto->pr_getlock != NULL) {
738 socket_unlock(sp, 1);
739 socket_lock(so, 0);
740 }
741 }
742
743 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
744 /* Dequeue from so_comp since sofree() won't do it */
745 TAILQ_REMOVE(&so->so_comp, sp, so_list);
746 so->so_qlen--;
747
748 if (so->so_proto->pr_getlock != NULL) {
749 socket_unlock(so, 0);
750 socket_lock(sp, 1);
751 }
752
753 sp->so_state &= ~SS_COMP;
754 sp->so_head = NULL;
755
756 (void) soabort(sp);
757 if (so->so_proto->pr_getlock != NULL) {
758 socket_unlock(sp, 1);
759 socket_lock(so, 0);
760 }
761 }
762 }
763 if (so->so_pcb == 0) {
764 /* 3915887: mark the socket as ready for dealloc */
765 so->so_flags |= SOF_PCBCLEARING;
766 goto discard;
767 }
768 if (so->so_state & SS_ISCONNECTED) {
769 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
770 error = sodisconnectlocked(so);
771 if (error)
772 goto drop;
773 }
774 if (so->so_options & SO_LINGER) {
775 if ((so->so_state & SS_ISDISCONNECTING) &&
776 (so->so_state & SS_NBIO))
777 goto drop;
778 if (so->so_proto->pr_getlock != NULL)
779 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
780 else
781 mutex_held = so->so_proto->pr_domain->dom_mtx;
782 while (so->so_state & SS_ISCONNECTED) {
783 ts.tv_sec = (so->so_linger/100);
784 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
785 error = msleep((caddr_t)&so->so_timeo, mutex_held,
786 PSOCK | PCATCH, "soclos", &ts);
787 if (error) {
788 /* It's OK when the time fires, don't report an error */
789 if (error == EWOULDBLOCK)
790 error = 0;
791 break;
792 }
793 }
794 }
795 }
796 drop:
797 if (so->so_usecount == 0)
798 panic("soclose: usecount is zero so=%x\n", so);
799 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
800 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
801 if (error == 0)
802 error = error2;
803 }
804 if (so->so_usecount <= 0)
805 panic("soclose: usecount is zero so=%x\n", so);
806 discard:
807 if (so->so_pcb && so->so_state & SS_NOFDREF)
808 panic("soclose: NOFDREF");
809 so->so_state |= SS_NOFDREF;
810 #ifdef __APPLE__
811 so->so_proto->pr_domain->dom_refs--;
812 evsofree(so);
813 #endif
814 so->so_usecount--;
815 sofree(so);
816 return (error);
817 }
818
819 int
820 soclose(so)
821 register struct socket *so;
822 {
823 int error = 0;
824 socket_lock(so, 1);
825 if (so->so_retaincnt == 0)
826 error = soclose_locked(so);
827 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
828 so->so_usecount--;
829 if (so->so_usecount < 2)
830 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
831 }
832 socket_unlock(so, 1);
833 return (error);
834 }
835
836
837 /*
838 * Must be called at splnet...
839 */
840 //#### Should already be locked
841 int
842 soabort(so)
843 struct socket *so;
844 {
845 int error;
846
847 #ifdef MORE_LOCKING_DEBUG
848 lck_mtx_t * mutex_held;
849
850 if (so->so_proto->pr_getlock != NULL)
851 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
852 else
853 mutex_held = so->so_proto->pr_domain->dom_mtx;
854 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
855 #endif
856
857 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
858 if (error) {
859 sofree(so);
860 return error;
861 }
862 return (0);
863 }
864
865 int
866 soacceptlock(so, nam, dolock)
867 register struct socket *so;
868 struct sockaddr **nam;
869 int dolock;
870 {
871 int error;
872
873 if (dolock) socket_lock(so, 1);
874
875 if ((so->so_state & SS_NOFDREF) == 0)
876 panic("soaccept: !NOFDREF");
877 so->so_state &= ~SS_NOFDREF;
878 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
879
880 if (dolock) socket_unlock(so, 1);
881 return (error);
882 }
883 int
884 soaccept(so, nam)
885 register struct socket *so;
886 struct sockaddr **nam;
887 {
888 return (soacceptlock(so, nam, 1));
889 }
890
891 int
892 soconnectlock(so, nam, dolock)
893 register struct socket *so;
894 struct sockaddr *nam;
895 int dolock;
896
897 {
898 int s;
899 int error;
900 struct proc *p = current_proc();
901
902 if (dolock) socket_lock(so, 1);
903
904 if (so->so_options & SO_ACCEPTCONN) {
905 if (dolock) socket_unlock(so, 1);
906 return (EOPNOTSUPP);
907 }
908 /*
909 * If protocol is connection-based, can only connect once.
910 * Otherwise, if connected, try to disconnect first.
911 * This allows user to disconnect by connecting to, e.g.,
912 * a null address.
913 */
914 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
915 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
916 (error = sodisconnectlocked(so))))
917 error = EISCONN;
918 else {
919 /*
920 * Run connect filter before calling protocol:
921 * - non-blocking connect returns before completion;
922 */
923 {
924 struct socket_filter_entry *filter;
925 int filtered = 0;
926 error = 0;
927 for (filter = so->so_filt; filter && (error == 0);
928 filter = filter->sfe_next_onsocket) {
929 if (filter->sfe_filter->sf_filter.sf_connect_out) {
930 if (filtered == 0) {
931 filtered = 1;
932 sflt_use(so);
933 socket_unlock(so, 0);
934 }
935 error = filter->sfe_filter->sf_filter.sf_connect_out(
936 filter->sfe_cookie, so, nam);
937 }
938 }
939 if (filtered != 0) {
940 socket_lock(so, 0);
941 sflt_unuse(so);
942 }
943 }
944 if (error) {
945 if (error == EJUSTRETURN)
946 error = 0;
947 if (dolock) socket_unlock(so, 1);
948 return error;
949 }
950
951 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
952 }
953 if (dolock) socket_unlock(so, 1);
954 return (error);
955 }
956
957 int
958 soconnect(so, nam)
959 register struct socket *so;
960 struct sockaddr *nam;
961 {
962 return (soconnectlock(so, nam, 1));
963 }
964
965 int
966 soconnect2(so1, so2)
967 register struct socket *so1;
968 struct socket *so2;
969 {
970 int error;
971
972 socket_lock(so1, 1);
973 if (so2->so_proto->pr_lock)
974 socket_lock(so2, 1);
975
976 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
977
978 socket_unlock(so1, 1);
979 if (so2->so_proto->pr_lock)
980 socket_unlock(so2, 1);
981 return (error);
982 }
983
984
985 int
986 sodisconnectlocked(so)
987 register struct socket *so;
988 {
989 int error;
990
991 if ((so->so_state & SS_ISCONNECTED) == 0) {
992 error = ENOTCONN;
993 goto bad;
994 }
995 if (so->so_state & SS_ISDISCONNECTING) {
996 error = EALREADY;
997 goto bad;
998 }
999
1000 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
1001
1002 if (error == 0) {
1003 sflt_notify(so, sock_evt_disconnected, NULL);
1004 }
1005
1006 bad:
1007 return (error);
1008 }
1009 //### Locking version
1010 int
1011 sodisconnect(so)
1012 register struct socket *so;
1013 {
1014 int error;
1015
1016 socket_lock(so, 1);
1017 error = sodisconnectlocked(so);
1018 socket_unlock(so, 1);
1019 return(error);
1020 }
1021
1022 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1023
1024 /*
1025 * sosendcheck will lock the socket buffer if it isn't locked and
1026 * verify that there is space for the data being inserted.
1027 */
1028
1029 static int
1030 sosendcheck(
1031 struct socket *so,
1032 struct sockaddr *addr,
1033 long resid,
1034 long clen,
1035 long atomic,
1036 int flags,
1037 int *sblocked)
1038 {
1039 int error = 0;
1040 long space;
1041 int assumelock = 0;
1042
1043 restart:
1044 if (*sblocked == 0) {
1045 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1046 so->so_send_filt_thread != 0 &&
1047 so->so_send_filt_thread == current_thread()) {
1048 /*
1049 * We're being called recursively from a filter,
1050 * allow this to continue. Radar 4150520.
1051 * Don't set sblocked because we don't want
1052 * to perform an unlock later.
1053 */
1054 assumelock = 1;
1055 }
1056 else {
1057 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1058 if (error) {
1059 return error;
1060 }
1061 *sblocked = 1;
1062 }
1063 }
1064
1065 if (so->so_state & SS_CANTSENDMORE)
1066 return EPIPE;
1067
1068 if (so->so_error) {
1069 error = so->so_error;
1070 so->so_error = 0;
1071 return error;
1072 }
1073
1074 if ((so->so_state & SS_ISCONNECTED) == 0) {
1075 /*
1076 * `sendto' and `sendmsg' is allowed on a connection-
1077 * based socket if it supports implied connect.
1078 * Return ENOTCONN if not connected and no address is
1079 * supplied.
1080 */
1081 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1082 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1083 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1084 !(resid == 0 && clen != 0))
1085 return ENOTCONN;
1086 } else if (addr == 0 && !(flags&MSG_HOLD))
1087 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1088 }
1089 space = sbspace(&so->so_snd);
1090 if (flags & MSG_OOB)
1091 space += 1024;
1092 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1093 clen > so->so_snd.sb_hiwat)
1094 return EMSGSIZE;
1095 if (space < resid + clen &&
1096 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1097 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) {
1098 return EWOULDBLOCK;
1099 }
1100 sbunlock(&so->so_snd, 1);
1101 error = sbwait(&so->so_snd);
1102 if (error) {
1103 return error;
1104 }
1105 goto restart;
1106 }
1107
1108 return 0;
1109 }
1110
1111 /*
1112 * Send on a socket.
1113 * If send must go all at once and message is larger than
1114 * send buffering, then hard error.
1115 * Lock against other senders.
1116 * If must go all at once and not enough room now, then
1117 * inform user that this would block and do nothing.
1118 * Otherwise, if nonblocking, send as much as possible.
1119 * The data to be sent is described by "uio" if nonzero,
1120 * otherwise by the mbuf chain "top" (which must be null
1121 * if uio is not). Data provided in mbuf chain must be small
1122 * enough to send all at once.
1123 *
1124 * Returns nonzero on error, timeout or signal; callers
1125 * must check for short counts if EINTR/ERESTART are returned.
1126 * Data and control buffers are freed on return.
1127 * Experiment:
1128 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1129 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1130 * point at the mbuf chain being constructed and go from there.
1131 */
1132 int
1133 sosend(so, addr, uio, top, control, flags)
1134 register struct socket *so;
1135 struct sockaddr *addr;
1136 struct uio *uio;
1137 struct mbuf *top;
1138 struct mbuf *control;
1139 int flags;
1140
1141 {
1142 struct mbuf **mp;
1143 register struct mbuf *m, *freelist = NULL;
1144 register long space, len, resid;
1145 int clen = 0, error, dontroute, mlen, sendflags;
1146 int atomic = sosendallatonce(so) || top;
1147 int sblocked = 0;
1148 struct proc *p = current_proc();
1149
1150 if (uio)
1151 // LP64todo - fix this!
1152 resid = uio_resid(uio);
1153 else
1154 resid = top->m_pkthdr.len;
1155
1156 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1157 so,
1158 resid,
1159 so->so_snd.sb_cc,
1160 so->so_snd.sb_lowat,
1161 so->so_snd.sb_hiwat);
1162
1163 socket_lock(so, 1);
1164
1165 /*
1166 * In theory resid should be unsigned.
1167 * However, space must be signed, as it might be less than 0
1168 * if we over-committed, and we must use a signed comparison
1169 * of space and resid. On the other hand, a negative resid
1170 * causes us to loop sending 0-length segments to the protocol.
1171 *
1172 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1173 * type sockets since that's an error.
1174 */
1175 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1176 error = EINVAL;
1177 socket_unlock(so, 1);
1178 goto out;
1179 }
1180
1181 dontroute =
1182 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1183 (so->so_proto->pr_flags & PR_ATOMIC);
1184 if (p)
1185 p->p_stats->p_ru.ru_msgsnd++;
1186 if (control)
1187 clen = control->m_len;
1188
1189 do {
1190 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1191 if (error) {
1192 goto release;
1193 }
1194 mp = &top;
1195 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
1196
1197 do {
1198
1199 if (uio == NULL) {
1200 /*
1201 * Data is prepackaged in "top".
1202 */
1203 resid = 0;
1204 if (flags & MSG_EOR)
1205 top->m_flags |= M_EOR;
1206 } else {
1207 int chainlength;
1208 int bytes_to_copy;
1209
1210 bytes_to_copy = min(resid, space);
1211
1212 if (sosendminchain > 0) {
1213 chainlength = 0;
1214 } else
1215 chainlength = sosendmaxchain;
1216
1217 socket_unlock(so, 0);
1218
1219 do {
1220 int num_needed;
1221 int hdrs_needed = (top == 0) ? 1 : 0;
1222
1223 /*
1224 * try to maintain a local cache of mbuf clusters needed to complete this write
1225 * the list is further limited to the number that are currently needed to fill the socket
1226 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1227 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1228 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1229 * we haven't yet consumed.
1230 */
1231 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1232 num_needed = bytes_to_copy / NBPG;
1233
1234 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1235 num_needed++;
1236
1237 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1238 /* Fall back to cluster size if allocation failed */
1239 }
1240
1241 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1242 num_needed = bytes_to_copy / MCLBYTES;
1243
1244 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1245 num_needed++;
1246
1247 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1248 /* Fall back to a single mbuf if allocation failed */
1249 }
1250
1251 if (freelist == NULL) {
1252 if (top == 0)
1253 MGETHDR(freelist, M_WAIT, MT_DATA);
1254 else
1255 MGET(freelist, M_WAIT, MT_DATA);
1256
1257 if (freelist == NULL) {
1258 error = ENOBUFS;
1259 socket_lock(so, 0);
1260 goto release;
1261 }
1262 /*
1263 * For datagram protocols, leave room
1264 * for protocol headers in first mbuf.
1265 */
1266 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1267 MH_ALIGN(freelist, bytes_to_copy);
1268 }
1269 m = freelist;
1270 freelist = m->m_next;
1271 m->m_next = NULL;
1272
1273 if ((m->m_flags & M_EXT))
1274 mlen = m->m_ext.ext_size;
1275 else if ((m->m_flags & M_PKTHDR))
1276 mlen = MHLEN - m_leadingspace(m);
1277 else
1278 mlen = MLEN;
1279 len = min(mlen, bytes_to_copy);
1280
1281 chainlength += len;
1282
1283 space -= len;
1284
1285 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1286
1287 // LP64todo - fix this!
1288 resid = uio_resid(uio);
1289
1290 m->m_len = len;
1291 *mp = m;
1292 top->m_pkthdr.len += len;
1293 if (error)
1294 break;
1295 mp = &m->m_next;
1296 if (resid <= 0) {
1297 if (flags & MSG_EOR)
1298 top->m_flags |= M_EOR;
1299 break;
1300 }
1301 bytes_to_copy = min(resid, space);
1302
1303 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1304
1305 socket_lock(so, 0);
1306
1307 if (error)
1308 goto release;
1309 }
1310
1311 if (flags & (MSG_HOLD|MSG_SEND))
1312 {
1313 /* Enqueue for later, go away if HOLD */
1314 register struct mbuf *mb1;
1315 if (so->so_temp && (flags & MSG_FLUSH))
1316 {
1317 m_freem(so->so_temp);
1318 so->so_temp = NULL;
1319 }
1320 if (so->so_temp)
1321 so->so_tail->m_next = top;
1322 else
1323 so->so_temp = top;
1324 mb1 = top;
1325 while (mb1->m_next)
1326 mb1 = mb1->m_next;
1327 so->so_tail = mb1;
1328 if (flags & MSG_HOLD)
1329 {
1330 top = NULL;
1331 goto release;
1332 }
1333 top = so->so_temp;
1334 }
1335 if (dontroute)
1336 so->so_options |= SO_DONTROUTE;
1337 /* Compute flags here, for pru_send and NKEs */
1338 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1339 /*
1340 * If the user set MSG_EOF, the protocol
1341 * understands this flag and nothing left to
1342 * send then use PRU_SEND_EOF instead of PRU_SEND.
1343 */
1344 ((flags & MSG_EOF) &&
1345 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1346 (resid <= 0)) ?
1347 PRUS_EOF :
1348 /* If there is more to send set PRUS_MORETOCOME */
1349 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1350
1351 /*
1352 * Socket filter processing
1353 */
1354 {
1355 struct socket_filter_entry *filter;
1356 int filtered;
1357
1358 filtered = 0;
1359 error = 0;
1360 for (filter = so->so_filt; filter && (error == 0);
1361 filter = filter->sfe_next_onsocket) {
1362 if (filter->sfe_filter->sf_filter.sf_data_out) {
1363 int so_flags = 0;
1364 if (filtered == 0) {
1365 filtered = 1;
1366 so->so_send_filt_thread = current_thread();
1367 sflt_use(so);
1368 socket_unlock(so, 0);
1369 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1370 }
1371 error = filter->sfe_filter->sf_filter.sf_data_out(
1372 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1373 }
1374 }
1375
1376 if (filtered) {
1377 /*
1378 * At this point, we've run at least one filter.
1379 * The socket is unlocked as is the socket buffer.
1380 */
1381 socket_lock(so, 0);
1382 sflt_unuse(so);
1383 so->so_send_filt_thread = 0;
1384 if (error) {
1385 if (error == EJUSTRETURN) {
1386 error = 0;
1387 clen = 0;
1388 control = 0;
1389 top = 0;
1390 }
1391
1392 goto release;
1393 }
1394 }
1395 }
1396 /*
1397 * End Socket filter processing
1398 */
1399
1400 if (error == EJUSTRETURN) {
1401 /* A socket filter handled this data */
1402 error = 0;
1403 }
1404 else {
1405 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1406 sendflags, top, addr, control, p);
1407 }
1408 #ifdef __APPLE__
1409 if (flags & MSG_SEND)
1410 so->so_temp = NULL;
1411 #endif
1412 if (dontroute)
1413 so->so_options &= ~SO_DONTROUTE;
1414 clen = 0;
1415 control = 0;
1416 top = 0;
1417 mp = &top;
1418 if (error)
1419 goto release;
1420 } while (resid && space > 0);
1421 } while (resid);
1422
1423 release:
1424 if (sblocked)
1425 sbunlock(&so->so_snd, 0); /* will unlock socket */
1426 else
1427 socket_unlock(so, 1);
1428 out:
1429 if (top)
1430 m_freem(top);
1431 if (control)
1432 m_freem(control);
1433 if (freelist)
1434 m_freem_list(freelist);
1435
1436 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1437 so,
1438 resid,
1439 so->so_snd.sb_cc,
1440 space,
1441 error);
1442
1443 return (error);
1444 }
1445
1446 /*
1447 * Implement receive operations on a socket.
1448 * We depend on the way that records are added to the sockbuf
1449 * by sbappend*. In particular, each record (mbufs linked through m_next)
1450 * must begin with an address if the protocol so specifies,
1451 * followed by an optional mbuf or mbufs containing ancillary data,
1452 * and then zero or more mbufs of data.
1453 * In order to avoid blocking network interrupts for the entire time here,
1454 * we splx() while doing the actual copy to user space.
1455 * Although the sockbuf is locked, new data may still be appended,
1456 * and thus we must maintain consistency of the sockbuf during that time.
1457 *
1458 * The caller may receive the data as a single mbuf chain by supplying
1459 * an mbuf **mp0 for use in returning the chain. The uio is then used
1460 * only for the count in uio_resid.
1461 */
1462 int
1463 soreceive(so, psa, uio, mp0, controlp, flagsp)
1464 register struct socket *so;
1465 struct sockaddr **psa;
1466 struct uio *uio;
1467 struct mbuf **mp0;
1468 struct mbuf **controlp;
1469 int *flagsp;
1470 {
1471 register struct mbuf *m, **mp, *ml = NULL;
1472 register int flags, len, error, offset;
1473 struct protosw *pr = so->so_proto;
1474 struct mbuf *nextrecord;
1475 int moff, type = 0;
1476 // LP64todo - fix this!
1477 int orig_resid = uio_resid(uio);
1478 volatile struct mbuf *free_list;
1479 volatile int delayed_copy_len;
1480 int can_delay;
1481 int need_event;
1482 struct proc *p = current_proc();
1483
1484
1485 // LP64todo - fix this!
1486 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1487 so,
1488 uio_resid(uio),
1489 so->so_rcv.sb_cc,
1490 so->so_rcv.sb_lowat,
1491 so->so_rcv.sb_hiwat);
1492
1493 socket_lock(so, 1);
1494
1495 #ifdef MORE_LOCKING_DEBUG
1496 if (so->so_usecount == 1)
1497 panic("soreceive: so=%x no other reference on socket\n", so);
1498 #endif
1499 mp = mp0;
1500 if (psa)
1501 *psa = 0;
1502 if (controlp)
1503 *controlp = 0;
1504 if (flagsp)
1505 flags = *flagsp &~ MSG_EOR;
1506 else
1507 flags = 0;
1508 /*
1509 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1510 * regardless of the flags argument. Here is the case were
1511 * out-of-band data is not inline.
1512 */
1513 if ((flags & MSG_OOB) ||
1514 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1515 (so->so_options & SO_OOBINLINE) == 0 &&
1516 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1517 m = m_get(M_WAIT, MT_DATA);
1518 if (m == NULL) {
1519 socket_unlock(so, 1);
1520 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
1521 return (ENOBUFS);
1522 }
1523 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1524 if (error)
1525 goto bad;
1526 socket_unlock(so, 0);
1527 do {
1528 // LP64todo - fix this!
1529 error = uiomove(mtod(m, caddr_t),
1530 (int) min(uio_resid(uio), m->m_len), uio);
1531 m = m_free(m);
1532 } while (uio_resid(uio) && error == 0 && m);
1533 socket_lock(so, 0);
1534 bad:
1535 if (m)
1536 m_freem(m);
1537 #ifdef __APPLE__
1538 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1539 if (error == EWOULDBLOCK || error == EINVAL) {
1540 /*
1541 * Let's try to get normal data:
1542 * EWOULDBLOCK: out-of-band data not receive yet;
1543 * EINVAL: out-of-band data already read.
1544 */
1545 error = 0;
1546 goto nooob;
1547 } else if (error == 0 && flagsp)
1548 *flagsp |= MSG_OOB;
1549 }
1550 socket_unlock(so, 1);
1551 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1552 #endif
1553 return (error);
1554 }
1555 nooob:
1556 if (mp)
1557 *mp = (struct mbuf *)0;
1558 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1559 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1560
1561
1562 free_list = (struct mbuf *)0;
1563 delayed_copy_len = 0;
1564 restart:
1565 #ifdef MORE_LOCKING_DEBUG
1566 if (so->so_usecount <= 1)
1567 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1568 #endif
1569 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1570 if (error) {
1571 socket_unlock(so, 1);
1572 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1573 return (error);
1574 }
1575
1576 m = so->so_rcv.sb_mb;
1577 /*
1578 * If we have less data than requested, block awaiting more
1579 * (subject to any timeout) if:
1580 * 1. the current count is less than the low water mark, or
1581 * 2. MSG_WAITALL is set, and it is possible to do the entire
1582 * receive operation at once if we block (resid <= hiwat).
1583 * 3. MSG_DONTWAIT is not set
1584 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1585 * we have to do the receive in sections, and thus risk returning
1586 * a short count if a timeout or signal occurs after we start.
1587 */
1588 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
1589 so->so_rcv.sb_cc < uio_resid(uio)) &&
1590 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
1591 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1592 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
1593
1594 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1595 if (so->so_error) {
1596 if (m)
1597 goto dontblock;
1598 error = so->so_error;
1599 if ((flags & MSG_PEEK) == 0)
1600 so->so_error = 0;
1601 goto release;
1602 }
1603 if (so->so_state & SS_CANTRCVMORE) {
1604 if (m)
1605 goto dontblock;
1606 else
1607 goto release;
1608 }
1609 for (; m; m = m->m_next)
1610 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1611 m = so->so_rcv.sb_mb;
1612 goto dontblock;
1613 }
1614 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1615 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1616 error = ENOTCONN;
1617 goto release;
1618 }
1619 if (uio_resid(uio) == 0)
1620 goto release;
1621 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1622 error = EWOULDBLOCK;
1623 goto release;
1624 }
1625 sbunlock(&so->so_rcv, 1);
1626 #ifdef EVEN_MORE_LOCKING_DEBUG
1627 if (socket_debug)
1628 printf("Waiting for socket data\n");
1629 #endif
1630
1631 error = sbwait(&so->so_rcv);
1632 #ifdef EVEN_MORE_LOCKING_DEBUG
1633 if (socket_debug)
1634 printf("SORECEIVE - sbwait returned %d\n", error);
1635 #endif
1636 if (so->so_usecount < 1)
1637 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1638 if (error) {
1639 socket_unlock(so, 1);
1640 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1641 return (error);
1642 }
1643 goto restart;
1644 }
1645 dontblock:
1646 #ifndef __APPLE__
1647 if (uio->uio_procp)
1648 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
1649 #else /* __APPLE__ */
1650 /*
1651 * 2207985
1652 * This should be uio->uio-procp; however, some callers of this
1653 * function use auto variables with stack garbage, and fail to
1654 * fill out the uio structure properly.
1655 */
1656 if (p)
1657 p->p_stats->p_ru.ru_msgrcv++;
1658 #endif /* __APPLE__ */
1659 nextrecord = m->m_nextpkt;
1660 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1661 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1662 orig_resid = 0;
1663 if (psa) {
1664 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1665 mp0 == 0);
1666 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1667 error = EWOULDBLOCK;
1668 goto release;
1669 }
1670 }
1671 if (flags & MSG_PEEK) {
1672 m = m->m_next;
1673 } else {
1674 sbfree(&so->so_rcv, m);
1675 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1676 panic("soreceive: about to create invalid socketbuf");
1677 MFREE(m, so->so_rcv.sb_mb);
1678 m = so->so_rcv.sb_mb;
1679 }
1680 }
1681 while (m && m->m_type == MT_CONTROL && error == 0) {
1682 if (flags & MSG_PEEK) {
1683 if (controlp)
1684 *controlp = m_copy(m, 0, m->m_len);
1685 m = m->m_next;
1686 } else {
1687 sbfree(&so->so_rcv, m);
1688 if (controlp) {
1689 if (pr->pr_domain->dom_externalize &&
1690 mtod(m, struct cmsghdr *)->cmsg_type ==
1691 SCM_RIGHTS) {
1692 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1693 error = (*pr->pr_domain->dom_externalize)(m);
1694 socket_lock(so, 0);
1695 }
1696 *controlp = m;
1697 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1698 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1699 so->so_rcv.sb_mb = m->m_next;
1700 m->m_next = 0;
1701 m = so->so_rcv.sb_mb;
1702 } else {
1703 MFREE(m, so->so_rcv.sb_mb);
1704 m = so->so_rcv.sb_mb;
1705 }
1706 }
1707 if (controlp) {
1708 orig_resid = 0;
1709 controlp = &(*controlp)->m_next;
1710 }
1711 }
1712 if (m) {
1713 if ((flags & MSG_PEEK) == 0)
1714 m->m_nextpkt = nextrecord;
1715 type = m->m_type;
1716 if (type == MT_OOBDATA)
1717 flags |= MSG_OOB;
1718 }
1719 moff = 0;
1720 offset = 0;
1721
1722 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
1723 can_delay = 1;
1724 else
1725 can_delay = 0;
1726
1727 need_event = 0;
1728
1729 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1730 if (m->m_type == MT_OOBDATA) {
1731 if (type != MT_OOBDATA)
1732 break;
1733 } else if (type == MT_OOBDATA)
1734 break;
1735 #ifndef __APPLE__
1736 /*
1737 * This assertion needs rework. The trouble is Appletalk is uses many
1738 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1739 * For now just remove the assertion... CSM 9/98
1740 */
1741 else
1742 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1743 ("receive 3"));
1744 #else
1745 /*
1746 * Make sure to allways set MSG_OOB event when getting
1747 * out of band data inline.
1748 */
1749 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1750 (so->so_options & SO_OOBINLINE) != 0 &&
1751 (so->so_state & SS_RCVATMARK) != 0) {
1752 flags |= MSG_OOB;
1753 }
1754 #endif
1755 so->so_state &= ~SS_RCVATMARK;
1756 // LP64todo - fix this!
1757 len = uio_resid(uio) - delayed_copy_len;
1758 if (so->so_oobmark && len > so->so_oobmark - offset)
1759 len = so->so_oobmark - offset;
1760 if (len > m->m_len - moff)
1761 len = m->m_len - moff;
1762 /*
1763 * If mp is set, just pass back the mbufs.
1764 * Otherwise copy them out via the uio, then free.
1765 * Sockbuf must be consistent here (points to current mbuf,
1766 * it points to next record) when we drop priority;
1767 * we must note any additions to the sockbuf when we
1768 * block interrupts again.
1769 */
1770 if (mp == 0) {
1771 if (can_delay && len == m->m_len) {
1772 /*
1773 * only delay the copy if we're consuming the
1774 * mbuf and we're NOT in MSG_PEEK mode
1775 * and we have enough data to make it worthwile
1776 * to drop and retake the funnel... can_delay
1777 * reflects the state of the 2 latter constraints
1778 * moff should always be zero in these cases
1779 */
1780 delayed_copy_len += len;
1781 } else {
1782
1783 if (delayed_copy_len) {
1784 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1785
1786 if (error) {
1787 goto release;
1788 }
1789 if (m != so->so_rcv.sb_mb) {
1790 /*
1791 * can only get here if MSG_PEEK is not set
1792 * therefore, m should point at the head of the rcv queue...
1793 * if it doesn't, it means something drastically changed
1794 * while we were out from behind the funnel in sodelayed_copy...
1795 * perhaps a RST on the stream... in any event, the stream has
1796 * been interrupted... it's probably best just to return
1797 * whatever data we've moved and let the caller sort it out...
1798 */
1799 break;
1800 }
1801 }
1802 socket_unlock(so, 0);
1803 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1804 socket_lock(so, 0);
1805
1806 if (error)
1807 goto release;
1808 }
1809 } else
1810 uio_setresid(uio, (uio_resid(uio) - len));
1811
1812 if (len == m->m_len - moff) {
1813 if (m->m_flags & M_EOR)
1814 flags |= MSG_EOR;
1815 if (flags & MSG_PEEK) {
1816 m = m->m_next;
1817 moff = 0;
1818 } else {
1819 nextrecord = m->m_nextpkt;
1820 sbfree(&so->so_rcv, m);
1821 m->m_nextpkt = NULL;
1822
1823 if (mp) {
1824 *mp = m;
1825 mp = &m->m_next;
1826 so->so_rcv.sb_mb = m = m->m_next;
1827 *mp = (struct mbuf *)0;
1828 } else {
1829 if (free_list == NULL)
1830 free_list = m;
1831 else
1832 ml->m_next = m;
1833 ml = m;
1834 so->so_rcv.sb_mb = m = m->m_next;
1835 ml->m_next = 0;
1836 }
1837 if (m)
1838 m->m_nextpkt = nextrecord;
1839 }
1840 } else {
1841 if (flags & MSG_PEEK)
1842 moff += len;
1843 else {
1844 if (mp)
1845 *mp = m_copym(m, 0, len, M_WAIT);
1846 m->m_data += len;
1847 m->m_len -= len;
1848 so->so_rcv.sb_cc -= len;
1849 }
1850 }
1851 if (so->so_oobmark) {
1852 if ((flags & MSG_PEEK) == 0) {
1853 so->so_oobmark -= len;
1854 if (so->so_oobmark == 0) {
1855 so->so_state |= SS_RCVATMARK;
1856 /*
1857 * delay posting the actual event until after
1858 * any delayed copy processing has finished
1859 */
1860 need_event = 1;
1861 break;
1862 }
1863 } else {
1864 offset += len;
1865 if (offset == so->so_oobmark)
1866 break;
1867 }
1868 }
1869 if (flags & MSG_EOR)
1870 break;
1871 /*
1872 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1873 * we must not quit until "uio->uio_resid == 0" or an error
1874 * termination. If a signal/timeout occurs, return
1875 * with a short count but without error.
1876 * Keep sockbuf locked against other readers.
1877 */
1878 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1879 !sosendallatonce(so) && !nextrecord) {
1880 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1881 goto release;
1882
1883 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
1884 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1885 if (sbwait(&so->so_rcv)) {
1886 error = 0;
1887 goto release;
1888 }
1889 /*
1890 * have to wait until after we get back from the sbwait to do the copy because
1891 * we will drop the funnel if we have enough data that has been delayed... by dropping
1892 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1893 * and to change the state of this socket... we're issuing the sbwait because
1894 * the socket is empty and we're expecting the netisr thread to wake us up when more
1895 * packets arrive... if we allow that processing to happen and then sbwait, we
1896 * could stall forever with packets sitting in the socket if no further packets
1897 * arrive from the remote side.
1898 *
1899 * we want to copy before we've collected all the data to satisfy this request to
1900 * allow the copy to overlap the incoming packet processing on an MP system
1901 */
1902 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1903
1904 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1905
1906 if (error)
1907 goto release;
1908 }
1909 m = so->so_rcv.sb_mb;
1910 if (m) {
1911 nextrecord = m->m_nextpkt;
1912 }
1913 }
1914 }
1915 #ifdef MORE_LOCKING_DEBUG
1916 if (so->so_usecount <= 1)
1917 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1918 #endif
1919
1920 if (m && pr->pr_flags & PR_ATOMIC) {
1921 #ifdef __APPLE__
1922 if (so->so_options & SO_DONTTRUNC)
1923 flags |= MSG_RCVMORE;
1924 else {
1925 #endif
1926 flags |= MSG_TRUNC;
1927 if ((flags & MSG_PEEK) == 0)
1928 (void) sbdroprecord(&so->so_rcv);
1929 #ifdef __APPLE__
1930 }
1931 #endif
1932 }
1933 if ((flags & MSG_PEEK) == 0) {
1934 if (m == 0)
1935 so->so_rcv.sb_mb = nextrecord;
1936 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1937 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1938 }
1939 #ifdef __APPLE__
1940 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1941 flags |= MSG_HAVEMORE;
1942
1943 if (delayed_copy_len) {
1944 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1945
1946 if (error)
1947 goto release;
1948 }
1949 if (free_list) {
1950 m_freem_list((struct mbuf *)free_list);
1951 free_list = (struct mbuf *)0;
1952 }
1953 if (need_event)
1954 postevent(so, 0, EV_OOB);
1955 #endif
1956 if (orig_resid == uio_resid(uio) && orig_resid &&
1957 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1958 sbunlock(&so->so_rcv, 1);
1959 goto restart;
1960 }
1961
1962 if (flagsp)
1963 *flagsp |= flags;
1964 release:
1965 #ifdef MORE_LOCKING_DEBUG
1966 if (so->so_usecount <= 1)
1967 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1968 #endif
1969 if (delayed_copy_len) {
1970 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
1971 }
1972 if (free_list) {
1973 m_freem_list((struct mbuf *)free_list);
1974 }
1975 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1976
1977 // LP64todo - fix this!
1978 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1979 so,
1980 uio_resid(uio),
1981 so->so_rcv.sb_cc,
1982 0,
1983 error);
1984
1985 return (error);
1986 }
1987
1988
1989 static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
1990 {
1991 int error = 0;
1992 struct mbuf *m;
1993
1994 m = *free_list;
1995
1996 socket_unlock(so, 0);
1997
1998 while (m && error == 0) {
1999
2000 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
2001
2002 m = m->m_next;
2003 }
2004 m_freem_list(*free_list);
2005
2006 *free_list = (struct mbuf *)NULL;
2007 *resid = 0;
2008
2009 socket_lock(so, 0);
2010
2011 return (error);
2012 }
2013
2014
2015 int
2016 soshutdown(so, how)
2017 register struct socket *so;
2018 int how;
2019 {
2020 register struct protosw *pr = so->so_proto;
2021 int ret;
2022
2023 socket_lock(so, 1);
2024
2025 sflt_notify(so, sock_evt_shutdown, &how);
2026
2027 if (how != SHUT_WR) {
2028 sorflush(so);
2029 postevent(so, 0, EV_RCLOSED);
2030 }
2031 if (how != SHUT_RD) {
2032 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2033 postevent(so, 0, EV_WCLOSED);
2034 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2035 socket_unlock(so, 1);
2036 return(ret);
2037 }
2038
2039 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
2040 socket_unlock(so, 1);
2041 return (0);
2042 }
2043
2044 void
2045 sorflush(so)
2046 register struct socket *so;
2047 {
2048 register struct sockbuf *sb = &so->so_rcv;
2049 register struct protosw *pr = so->so_proto;
2050 struct sockbuf asb;
2051
2052 #ifdef MORE_LOCKING_DEBUG
2053 lck_mtx_t * mutex_held;
2054
2055 if (so->so_proto->pr_getlock != NULL)
2056 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2057 else
2058 mutex_held = so->so_proto->pr_domain->dom_mtx;
2059 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2060 #endif
2061
2062 sflt_notify(so, sock_evt_flush_read, NULL);
2063
2064 sb->sb_flags |= SB_NOINTR;
2065 (void) sblock(sb, M_WAIT);
2066 socantrcvmore(so);
2067 sbunlock(sb, 1);
2068 #ifdef __APPLE__
2069 selthreadclear(&sb->sb_sel);
2070 #endif
2071 asb = *sb;
2072 bzero((caddr_t)sb, sizeof (*sb));
2073 sb->sb_so = so; /* reestablish link to socket */
2074 if (asb.sb_flags & SB_KNOTE) {
2075 sb->sb_sel.si_note = asb.sb_sel.si_note;
2076 sb->sb_flags = SB_KNOTE;
2077 }
2078 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2079 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2080 sbrelease(&asb);
2081 }
2082
2083 /*
2084 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2085 * an additional variant to handle the case where the option value needs
2086 * to be some kind of integer, but not a specific size.
2087 * In addition to their use here, these functions are also called by the
2088 * protocol-level pr_ctloutput() routines.
2089 */
2090 int
2091 sooptcopyin(sopt, buf, len, minlen)
2092 struct sockopt *sopt;
2093 void *buf;
2094 size_t len;
2095 size_t minlen;
2096 {
2097 size_t valsize;
2098
2099 /*
2100 * If the user gives us more than we wanted, we ignore it,
2101 * but if we don't get the minimum length the caller
2102 * wants, we return EINVAL. On success, sopt->sopt_valsize
2103 * is set to however much we actually retrieved.
2104 */
2105 if ((valsize = sopt->sopt_valsize) < minlen)
2106 return EINVAL;
2107 if (valsize > len)
2108 sopt->sopt_valsize = valsize = len;
2109
2110 if (sopt->sopt_p != 0)
2111 return (copyin(sopt->sopt_val, buf, valsize));
2112
2113 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
2114 return 0;
2115 }
2116
2117 int
2118 sosetopt(so, sopt)
2119 struct socket *so;
2120 struct sockopt *sopt;
2121 {
2122 int error, optval;
2123 struct linger l;
2124 struct timeval tv;
2125 short val;
2126
2127 socket_lock(so, 1);
2128
2129 if (sopt->sopt_dir != SOPT_SET) {
2130 sopt->sopt_dir = SOPT_SET;
2131 }
2132
2133 {
2134 struct socket_filter_entry *filter;
2135 int filtered = 0;
2136 error = 0;
2137 for (filter = so->so_filt; filter && (error == 0);
2138 filter = filter->sfe_next_onsocket) {
2139 if (filter->sfe_filter->sf_filter.sf_setoption) {
2140 if (filtered == 0) {
2141 filtered = 1;
2142 sflt_use(so);
2143 socket_unlock(so, 0);
2144 }
2145 error = filter->sfe_filter->sf_filter.sf_setoption(
2146 filter->sfe_cookie, so, sopt);
2147 }
2148 }
2149
2150 if (filtered != 0) {
2151 socket_lock(so, 0);
2152 sflt_unuse(so);
2153
2154 if (error) {
2155 if (error == EJUSTRETURN)
2156 error = 0;
2157 goto bad;
2158 }
2159 }
2160 }
2161
2162 error = 0;
2163 if (sopt->sopt_level != SOL_SOCKET) {
2164 if (so->so_proto && so->so_proto->pr_ctloutput) {
2165 error = (*so->so_proto->pr_ctloutput)
2166 (so, sopt);
2167 socket_unlock(so, 1);
2168 return (error);
2169 }
2170 error = ENOPROTOOPT;
2171 } else {
2172 switch (sopt->sopt_name) {
2173 case SO_LINGER:
2174 case SO_LINGER_SEC:
2175 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2176 if (error)
2177 goto bad;
2178
2179 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
2180 if (l.l_onoff)
2181 so->so_options |= SO_LINGER;
2182 else
2183 so->so_options &= ~SO_LINGER;
2184 break;
2185
2186 case SO_DEBUG:
2187 case SO_KEEPALIVE:
2188 case SO_DONTROUTE:
2189 case SO_USELOOPBACK:
2190 case SO_BROADCAST:
2191 case SO_REUSEADDR:
2192 case SO_REUSEPORT:
2193 case SO_OOBINLINE:
2194 case SO_TIMESTAMP:
2195 #ifdef __APPLE__
2196 case SO_DONTTRUNC:
2197 case SO_WANTMORE:
2198 case SO_WANTOOBFLAG:
2199 #endif
2200 error = sooptcopyin(sopt, &optval, sizeof optval,
2201 sizeof optval);
2202 if (error)
2203 goto bad;
2204 if (optval)
2205 so->so_options |= sopt->sopt_name;
2206 else
2207 so->so_options &= ~sopt->sopt_name;
2208 break;
2209
2210 case SO_SNDBUF:
2211 case SO_RCVBUF:
2212 case SO_SNDLOWAT:
2213 case SO_RCVLOWAT:
2214 error = sooptcopyin(sopt, &optval, sizeof optval,
2215 sizeof optval);
2216 if (error)
2217 goto bad;
2218
2219 /*
2220 * Values < 1 make no sense for any of these
2221 * options, so disallow them.
2222 */
2223 if (optval < 1) {
2224 error = EINVAL;
2225 goto bad;
2226 }
2227
2228 switch (sopt->sopt_name) {
2229 case SO_SNDBUF:
2230 case SO_RCVBUF:
2231 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2232 &so->so_snd : &so->so_rcv,
2233 (u_long) optval) == 0) {
2234 error = ENOBUFS;
2235 goto bad;
2236 }
2237 break;
2238
2239 /*
2240 * Make sure the low-water is never greater than
2241 * the high-water.
2242 */
2243 case SO_SNDLOWAT:
2244 so->so_snd.sb_lowat =
2245 (optval > so->so_snd.sb_hiwat) ?
2246 so->so_snd.sb_hiwat : optval;
2247 break;
2248 case SO_RCVLOWAT:
2249 so->so_rcv.sb_lowat =
2250 (optval > so->so_rcv.sb_hiwat) ?
2251 so->so_rcv.sb_hiwat : optval;
2252 break;
2253 }
2254 break;
2255
2256 case SO_SNDTIMEO:
2257 case SO_RCVTIMEO:
2258 error = sooptcopyin(sopt, &tv, sizeof tv,
2259 sizeof tv);
2260 if (error)
2261 goto bad;
2262
2263 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
2264 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2265 error = EDOM;
2266 goto bad;
2267 }
2268
2269 switch (sopt->sopt_name) {
2270 case SO_SNDTIMEO:
2271 so->so_snd.sb_timeo = tv;
2272 break;
2273 case SO_RCVTIMEO:
2274 so->so_rcv.sb_timeo = tv;
2275 break;
2276 }
2277 break;
2278
2279 case SO_NKE:
2280 {
2281 struct so_nke nke;
2282
2283 error = sooptcopyin(sopt, &nke,
2284 sizeof nke, sizeof nke);
2285 if (error)
2286 goto bad;
2287
2288 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
2289 break;
2290 }
2291
2292 case SO_NOSIGPIPE:
2293 error = sooptcopyin(sopt, &optval, sizeof optval,
2294 sizeof optval);
2295 if (error)
2296 goto bad;
2297 if (optval)
2298 so->so_flags |= SOF_NOSIGPIPE;
2299 else
2300 so->so_flags &= ~SOF_NOSIGPIPE;
2301
2302 break;
2303
2304 case SO_NOADDRERR:
2305 error = sooptcopyin(sopt, &optval, sizeof optval,
2306 sizeof optval);
2307 if (error)
2308 goto bad;
2309 if (optval)
2310 so->so_flags |= SOF_NOADDRAVAIL;
2311 else
2312 so->so_flags &= ~SOF_NOADDRAVAIL;
2313
2314 break;
2315
2316 default:
2317 error = ENOPROTOOPT;
2318 break;
2319 }
2320 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2321 (void) ((*so->so_proto->pr_ctloutput)
2322 (so, sopt));
2323 }
2324 }
2325 bad:
2326 socket_unlock(so, 1);
2327 return (error);
2328 }
2329
2330 /* Helper routine for getsockopt */
2331 int
2332 sooptcopyout(sopt, buf, len)
2333 struct sockopt *sopt;
2334 void *buf;
2335 size_t len;
2336 {
2337 int error;
2338 size_t valsize;
2339
2340 error = 0;
2341
2342 /*
2343 * Documented get behavior is that we always return a value,
2344 * possibly truncated to fit in the user's buffer.
2345 * Traditional behavior is that we always tell the user
2346 * precisely how much we copied, rather than something useful
2347 * like the total amount we had available for her.
2348 * Note that this interface is not idempotent; the entire answer must
2349 * generated ahead of time.
2350 */
2351 valsize = min(len, sopt->sopt_valsize);
2352 sopt->sopt_valsize = valsize;
2353 if (sopt->sopt_val != USER_ADDR_NULL) {
2354 if (sopt->sopt_p != 0)
2355 error = copyout(buf, sopt->sopt_val, valsize);
2356 else
2357 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
2358 }
2359 return error;
2360 }
2361
2362 int
2363 sogetopt(so, sopt)
2364 struct socket *so;
2365 struct sockopt *sopt;
2366 {
2367 int error, optval;
2368 struct linger l;
2369 struct timeval tv;
2370
2371 if (sopt->sopt_dir != SOPT_GET) {
2372 sopt->sopt_dir = SOPT_GET;
2373 }
2374
2375 socket_lock(so, 1);
2376
2377 {
2378 struct socket_filter_entry *filter;
2379 int filtered = 0;
2380 error = 0;
2381 for (filter = so->so_filt; filter && (error == 0);
2382 filter = filter->sfe_next_onsocket) {
2383 if (filter->sfe_filter->sf_filter.sf_getoption) {
2384 if (filtered == 0) {
2385 filtered = 1;
2386 sflt_use(so);
2387 socket_unlock(so, 0);
2388 }
2389 error = filter->sfe_filter->sf_filter.sf_getoption(
2390 filter->sfe_cookie, so, sopt);
2391 }
2392 }
2393 if (filtered != 0) {
2394 socket_lock(so, 0);
2395 sflt_unuse(so);
2396
2397 if (error) {
2398 if (error == EJUSTRETURN)
2399 error = 0;
2400 socket_unlock(so, 1);
2401 return error;
2402 }
2403 }
2404 }
2405
2406 error = 0;
2407 if (sopt->sopt_level != SOL_SOCKET) {
2408 if (so->so_proto && so->so_proto->pr_ctloutput) {
2409 error = (*so->so_proto->pr_ctloutput)
2410 (so, sopt);
2411 socket_unlock(so, 1);
2412 return (error);
2413 } else {
2414 socket_unlock(so, 1);
2415 return (ENOPROTOOPT);
2416 }
2417 } else {
2418 switch (sopt->sopt_name) {
2419 case SO_LINGER:
2420 case SO_LINGER_SEC:
2421 l.l_onoff = so->so_options & SO_LINGER;
2422 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2423 so->so_linger / hz;
2424 error = sooptcopyout(sopt, &l, sizeof l);
2425 break;
2426
2427 case SO_USELOOPBACK:
2428 case SO_DONTROUTE:
2429 case SO_DEBUG:
2430 case SO_KEEPALIVE:
2431 case SO_REUSEADDR:
2432 case SO_REUSEPORT:
2433 case SO_BROADCAST:
2434 case SO_OOBINLINE:
2435 case SO_TIMESTAMP:
2436 #ifdef __APPLE__
2437 case SO_DONTTRUNC:
2438 case SO_WANTMORE:
2439 case SO_WANTOOBFLAG:
2440 #endif
2441 optval = so->so_options & sopt->sopt_name;
2442 integer:
2443 error = sooptcopyout(sopt, &optval, sizeof optval);
2444 break;
2445
2446 case SO_TYPE:
2447 optval = so->so_type;
2448 goto integer;
2449
2450 #ifdef __APPLE__
2451 case SO_NREAD:
2452 {
2453 int pkt_total;
2454 struct mbuf *m1;
2455
2456 pkt_total = 0;
2457 m1 = so->so_rcv.sb_mb;
2458 if (so->so_proto->pr_flags & PR_ATOMIC)
2459 {
2460 while (m1) {
2461 if (m1->m_type == MT_DATA)
2462 pkt_total += m1->m_len;
2463 m1 = m1->m_next;
2464 }
2465 optval = pkt_total;
2466 } else
2467 optval = so->so_rcv.sb_cc;
2468 goto integer;
2469 }
2470 case SO_NWRITE:
2471 optval = so->so_snd.sb_cc;
2472 goto integer;
2473 #endif
2474 case SO_ERROR:
2475 optval = so->so_error;
2476 so->so_error = 0;
2477 goto integer;
2478
2479 case SO_SNDBUF:
2480 optval = so->so_snd.sb_hiwat;
2481 goto integer;
2482
2483 case SO_RCVBUF:
2484 optval = so->so_rcv.sb_hiwat;
2485 goto integer;
2486
2487 case SO_SNDLOWAT:
2488 optval = so->so_snd.sb_lowat;
2489 goto integer;
2490
2491 case SO_RCVLOWAT:
2492 optval = so->so_rcv.sb_lowat;
2493 goto integer;
2494
2495 case SO_SNDTIMEO:
2496 case SO_RCVTIMEO:
2497 tv = (sopt->sopt_name == SO_SNDTIMEO ?
2498 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2499
2500 error = sooptcopyout(sopt, &tv, sizeof tv);
2501 break;
2502
2503 case SO_NOSIGPIPE:
2504 optval = (so->so_flags & SOF_NOSIGPIPE);
2505 goto integer;
2506
2507 case SO_NOADDRERR:
2508 optval = (so->so_flags & SOF_NOADDRAVAIL);
2509 goto integer;
2510
2511 default:
2512 error = ENOPROTOOPT;
2513 break;
2514 }
2515 socket_unlock(so, 1);
2516 return (error);
2517 }
2518 }
2519
2520 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
2521 int
2522 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
2523 {
2524 struct mbuf *m, *m_prev;
2525 int sopt_size = sopt->sopt_valsize;
2526
2527 if (sopt_size > MAX_SOOPTGETM_SIZE)
2528 return EMSGSIZE;
2529
2530 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2531 if (m == 0)
2532 return ENOBUFS;
2533 if (sopt_size > MLEN) {
2534 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2535 if ((m->m_flags & M_EXT) == 0) {
2536 m_free(m);
2537 return ENOBUFS;
2538 }
2539 m->m_len = min(MCLBYTES, sopt_size);
2540 } else {
2541 m->m_len = min(MLEN, sopt_size);
2542 }
2543 sopt_size -= m->m_len;
2544 *mp = m;
2545 m_prev = m;
2546
2547 while (sopt_size) {
2548 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2549 if (m == 0) {
2550 m_freem(*mp);
2551 return ENOBUFS;
2552 }
2553 if (sopt_size > MLEN) {
2554 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2555 if ((m->m_flags & M_EXT) == 0) {
2556 m_freem(*mp);
2557 return ENOBUFS;
2558 }
2559 m->m_len = min(MCLBYTES, sopt_size);
2560 } else {
2561 m->m_len = min(MLEN, sopt_size);
2562 }
2563 sopt_size -= m->m_len;
2564 m_prev->m_next = m;
2565 m_prev = m;
2566 }
2567 return 0;
2568 }
2569
2570 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2571 int
2572 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
2573 {
2574 struct mbuf *m0 = m;
2575
2576 if (sopt->sopt_val == USER_ADDR_NULL)
2577 return 0;
2578 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2579 if (sopt->sopt_p != NULL) {
2580 int error;
2581
2582 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
2583 if (error != 0) {
2584 m_freem(m0);
2585 return(error);
2586 }
2587 } else
2588 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
2589 sopt->sopt_valsize -= m->m_len;
2590 sopt->sopt_val += m->m_len;
2591 m = m->m_next;
2592 }
2593 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
2594 panic("soopt_mcopyin");
2595 return 0;
2596 }
2597
2598 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2599 int
2600 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
2601 {
2602 struct mbuf *m0 = m;
2603 size_t valsize = 0;
2604
2605 if (sopt->sopt_val == USER_ADDR_NULL)
2606 return 0;
2607 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2608 if (sopt->sopt_p != NULL) {
2609 int error;
2610
2611 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
2612 if (error != 0) {
2613 m_freem(m0);
2614 return(error);
2615 }
2616 } else
2617 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
2618 sopt->sopt_valsize -= m->m_len;
2619 sopt->sopt_val += m->m_len;
2620 valsize += m->m_len;
2621 m = m->m_next;
2622 }
2623 if (m != NULL) {
2624 /* enough soopt buffer should be given from user-land */
2625 m_freem(m0);
2626 return(EINVAL);
2627 }
2628 sopt->sopt_valsize = valsize;
2629 return 0;
2630 }
2631
2632 void
2633 sohasoutofband(so)
2634 register struct socket *so;
2635 {
2636 struct proc *p;
2637
2638 if (so->so_pgid < 0)
2639 gsignal(-so->so_pgid, SIGURG);
2640 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2641 psignal(p, SIGURG);
2642 selwakeup(&so->so_rcv.sb_sel);
2643 }
2644
2645 int
2646 sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
2647 {
2648 struct proc *p = current_proc();
2649 int revents = 0;
2650
2651 socket_lock(so, 1);
2652
2653 if (events & (POLLIN | POLLRDNORM))
2654 if (soreadable(so))
2655 revents |= events & (POLLIN | POLLRDNORM);
2656
2657 if (events & (POLLOUT | POLLWRNORM))
2658 if (sowriteable(so))
2659 revents |= events & (POLLOUT | POLLWRNORM);
2660
2661 if (events & (POLLPRI | POLLRDBAND))
2662 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2663 revents |= events & (POLLPRI | POLLRDBAND);
2664
2665 if (revents == 0) {
2666 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2667 /* Darwin sets the flag first, BSD calls selrecord first */
2668 so->so_rcv.sb_flags |= SB_SEL;
2669 selrecord(p, &so->so_rcv.sb_sel, wql);
2670 }
2671
2672 if (events & (POLLOUT | POLLWRNORM)) {
2673 /* Darwin sets the flag first, BSD calls selrecord first */
2674 so->so_snd.sb_flags |= SB_SEL;
2675 selrecord(p, &so->so_snd.sb_sel, wql);
2676 }
2677 }
2678
2679 socket_unlock(so, 1);
2680 return (revents);
2681 }
2682
2683 int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
2684
2685 int
2686 soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
2687 {
2688 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2689 struct sockbuf *sb;
2690 socket_lock(so, 1);
2691
2692 switch (kn->kn_filter) {
2693 case EVFILT_READ:
2694 if (so->so_options & SO_ACCEPTCONN)
2695 kn->kn_fop = &solisten_filtops;
2696 else
2697 kn->kn_fop = &soread_filtops;
2698 sb = &so->so_rcv;
2699 break;
2700 case EVFILT_WRITE:
2701 kn->kn_fop = &sowrite_filtops;
2702 sb = &so->so_snd;
2703 break;
2704 default:
2705 socket_unlock(so, 1);
2706 return (1);
2707 }
2708
2709 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2710 sb->sb_flags |= SB_KNOTE;
2711 socket_unlock(so, 1);
2712 return (0);
2713 }
2714
2715 static void
2716 filt_sordetach(struct knote *kn)
2717 {
2718 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2719
2720 socket_lock(so, 1);
2721 if (so->so_rcv.sb_flags & SB_KNOTE)
2722 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2723 so->so_rcv.sb_flags &= ~SB_KNOTE;
2724 socket_unlock(so, 1);
2725 }
2726
2727 /*ARGSUSED*/
2728 static int
2729 filt_soread(struct knote *kn, long hint)
2730 {
2731 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2732
2733 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2734 socket_lock(so, 1);
2735
2736 if (so->so_oobmark) {
2737 if (kn->kn_flags & EV_OOBAND) {
2738 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2739 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2740 socket_unlock(so, 1);
2741 return (1);
2742 }
2743 kn->kn_data = so->so_oobmark;
2744 kn->kn_flags |= EV_OOBAND;
2745 } else {
2746 kn->kn_data = so->so_rcv.sb_cc;
2747 if (so->so_state & SS_CANTRCVMORE) {
2748 kn->kn_flags |= EV_EOF;
2749 kn->kn_fflags = so->so_error;
2750 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2751 socket_unlock(so, 1);
2752 return (1);
2753 }
2754 }
2755
2756 if (so->so_state & SS_RCVATMARK) {
2757 if (kn->kn_flags & EV_OOBAND) {
2758 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2759 socket_unlock(so, 1);
2760 return (1);
2761 }
2762 kn->kn_flags |= EV_OOBAND;
2763 } else if (kn->kn_flags & EV_OOBAND) {
2764 kn->kn_data = 0;
2765 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2766 socket_unlock(so, 1);
2767 return (0);
2768 }
2769
2770 if (so->so_error) { /* temporary udp error */
2771 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2772 socket_unlock(so, 1);
2773 return (1);
2774 }
2775
2776 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2777 socket_unlock(so, 1);
2778
2779 return( kn->kn_flags & EV_OOBAND ||
2780 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2781 kn->kn_sdata : so->so_rcv.sb_lowat));
2782 }
2783
2784 static void
2785 filt_sowdetach(struct knote *kn)
2786 {
2787 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2788 socket_lock(so, 1);
2789
2790 if(so->so_snd.sb_flags & SB_KNOTE)
2791 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2792 so->so_snd.sb_flags &= ~SB_KNOTE;
2793 socket_unlock(so, 1);
2794 }
2795
2796 /*ARGSUSED*/
2797 static int
2798 filt_sowrite(struct knote *kn, long hint)
2799 {
2800 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2801
2802 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2803 socket_lock(so, 1);
2804
2805 kn->kn_data = sbspace(&so->so_snd);
2806 if (so->so_state & SS_CANTSENDMORE) {
2807 kn->kn_flags |= EV_EOF;
2808 kn->kn_fflags = so->so_error;
2809 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2810 socket_unlock(so, 1);
2811 return (1);
2812 }
2813 if (so->so_error) { /* temporary udp error */
2814 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2815 socket_unlock(so, 1);
2816 return (1);
2817 }
2818 if (((so->so_state & SS_ISCONNECTED) == 0) &&
2819 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2820 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2821 socket_unlock(so, 1);
2822 return (0);
2823 }
2824 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2825 socket_unlock(so, 1);
2826 if (kn->kn_sfflags & NOTE_LOWAT)
2827 return (kn->kn_data >= kn->kn_sdata);
2828 return (kn->kn_data >= so->so_snd.sb_lowat);
2829 }
2830
2831 /*ARGSUSED*/
2832 static int
2833 filt_solisten(struct knote *kn, long hint)
2834 {
2835 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2836 int isempty;
2837
2838 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2839 socket_lock(so, 1);
2840 kn->kn_data = so->so_qlen;
2841 isempty = ! TAILQ_EMPTY(&so->so_comp);
2842 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2843 socket_unlock(so, 1);
2844 return (isempty);
2845 }
2846
2847
2848 int
2849 socket_lock(so, refcount)
2850 struct socket *so;
2851 int refcount;
2852 {
2853 int error = 0, lr_saved;
2854
2855 lr_saved = (unsigned int) __builtin_return_address(0);
2856
2857 if (so->so_proto->pr_lock) {
2858 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2859 }
2860 else {
2861 #ifdef MORE_LOCKING_DEBUG
2862 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2863 #endif
2864 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2865 if (refcount)
2866 so->so_usecount++;
2867 so->lock_lr[so->next_lock_lr] = (void *)lr_saved;
2868 so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
2869 }
2870
2871 return(error);
2872
2873 }
2874
2875 int
2876 socket_unlock(so, refcount)
2877 struct socket *so;
2878 int refcount;
2879 {
2880 int error = 0, lr_saved;
2881 lck_mtx_t * mutex_held;
2882
2883 lr_saved = (unsigned int) __builtin_return_address(0);
2884
2885 if (so->so_proto == NULL)
2886 panic("socket_unlock null so_proto so=%x\n", so);
2887
2888 if (so && so->so_proto->pr_unlock)
2889 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2890 else {
2891 mutex_held = so->so_proto->pr_domain->dom_mtx;
2892 #ifdef MORE_LOCKING_DEBUG
2893 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2894 #endif
2895 so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved;
2896 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
2897
2898 if (refcount) {
2899 if (so->so_usecount <= 0)
2900 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2901 so->so_usecount--;
2902 if (so->so_usecount == 0) {
2903 sofreelastref(so, 1);
2904 }
2905 }
2906 lck_mtx_unlock(mutex_held);
2907 }
2908
2909 return(error);
2910 }
2911 //### Called with socket locked, will unlock socket
2912 void
2913 sofree(so)
2914 struct socket *so;
2915 {
2916
2917 lck_mtx_t * mutex_held;
2918 if (so->so_proto->pr_getlock != NULL)
2919 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2920 else
2921 mutex_held = so->so_proto->pr_domain->dom_mtx;
2922 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2923
2924 sofreelastref(so, 0);
2925 }
2926
2927 void
2928 soreference(so)
2929 struct socket *so;
2930 {
2931 socket_lock(so, 1); /* locks & take one reference on socket */
2932 socket_unlock(so, 0); /* unlock only */
2933 }
2934
2935 void
2936 sodereference(so)
2937 struct socket *so;
2938 {
2939 socket_lock(so, 0);
2940 socket_unlock(so, 1);
2941 }