]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket.c
xnu-792.6.76.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
37839358
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
37839358
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
37839358
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24/*
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
9bccf70c
A
56 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
57 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
1c79356b
A
58 */
59
60#include <sys/param.h>
61#include <sys/systm.h>
55e303ae 62#include <sys/filedesc.h>
91447636
A
63#include <sys/proc_internal.h>
64#include <sys/kauth.h>
65#include <sys/file_internal.h>
1c79356b
A
66#include <sys/fcntl.h>
67#include <sys/malloc.h>
68#include <sys/mbuf.h>
69#include <sys/domain.h>
70#include <sys/kernel.h>
55e303ae 71#include <sys/event.h>
1c79356b
A
72#include <sys/poll.h>
73#include <sys/protosw.h>
74#include <sys/socket.h>
75#include <sys/socketvar.h>
76#include <sys/resourcevar.h>
77#include <sys/signalvar.h>
78#include <sys/sysctl.h>
79#include <sys/uio.h>
80#include <sys/ev.h>
81#include <sys/kdebug.h>
82#include <net/route.h>
83#include <netinet/in.h>
84#include <netinet/in_pcb.h>
85#include <kern/zalloc.h>
91447636 86#include <kern/locks.h>
1c79356b
A
87#include <machine/limits.h>
88
89int so_cache_hw = 0;
90int so_cache_timeouts = 0;
91int so_cache_max_freed = 0;
92int cached_sock_count = 0;
93struct socket *socket_cache_head = 0;
94struct socket *socket_cache_tail = 0;
95u_long so_cache_time = 0;
96int so_cache_init_done = 0;
97struct zone *so_cache_zone;
98extern int get_inpcb_str_size();
99extern int get_tcp_str_size();
100
91447636
A
101static lck_grp_t *so_cache_mtx_grp;
102static lck_attr_t *so_cache_mtx_attr;
103static lck_grp_attr_t *so_cache_mtx_grp_attr;
104lck_mtx_t *so_cache_mtx;
105
1c79356b
A
106#include <machine/limits.h>
107
55e303ae
A
108static void filt_sordetach(struct knote *kn);
109static int filt_soread(struct knote *kn, long hint);
110static void filt_sowdetach(struct knote *kn);
111static int filt_sowrite(struct knote *kn, long hint);
112static int filt_solisten(struct knote *kn, long hint);
113
114static struct filterops solisten_filtops =
115 { 1, NULL, filt_sordetach, filt_solisten };
116static struct filterops soread_filtops =
117 { 1, NULL, filt_sordetach, filt_soread };
118static struct filterops sowrite_filtops =
119 { 1, NULL, filt_sowdetach, filt_sowrite };
120
91447636 121#define EVEN_MORE_LOCKING_DEBUG 0
1c79356b
A
122int socket_debug = 0;
123int socket_zone = M_SOCKET;
124so_gen_t so_gencnt; /* generation count for sockets */
125
126MALLOC_DEFINE(M_SONAME, "soname", "socket name");
127MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
128
129#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
130#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
131#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
132#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
133#define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
134#define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
135#define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
136
a3d08fcd 137#define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
1c79356b 138
91447636 139
1c79356b
A
140SYSCTL_DECL(_kern_ipc);
141
142static int somaxconn = SOMAXCONN;
143SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
144 0, "");
145
146/* Should we get a maximum also ??? */
fa4905b1 147static int sosendmaxchain = 65536;
1c79356b 148static int sosendminchain = 16384;
55e303ae 149static int sorecvmincopy = 16384;
1c79356b
A
150SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
151 0, "");
55e303ae
A
152SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
153 0, "");
1c79356b
A
154
155void so_cache_timer();
156
157/*
158 * Socket operation routines.
159 * These routines are called by the routines in
160 * sys_socket.c or from a system process, and
161 * implement the semantics of socket operations by
162 * switching out to the protocol specific routines.
163 */
164
9bccf70c 165#ifdef __APPLE__
91447636
A
166
167vm_size_t so_cache_zone_element_size;
168
169static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
170
171
1c79356b
A
172void socketinit()
173{
174 vm_size_t str_size;
175
91447636
A
176 if (so_cache_init_done) {
177 printf("socketinit: already called...\n");
178 return;
179 }
180
181 /*
182 * allocate lock group attribute and group for socket cache mutex
183 */
184 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
185 lck_grp_attr_setdefault(so_cache_mtx_grp_attr);
186
187 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
188
189 /*
190 * allocate the lock attribute for socket cache mutex
191 */
192 so_cache_mtx_attr = lck_attr_alloc_init();
193 lck_attr_setdefault(so_cache_mtx_attr);
194
1c79356b
A
195 so_cache_init_done = 1;
196
91447636
A
197 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
198
199 if (so_cache_mtx == NULL)
200 return; /* we're hosed... */
201
1c79356b
A
202 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
203 get_inpcb_str_size() + 4 +
204 get_tcp_str_size());
205 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
206#if TEMPDEBUG
91447636 207 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
1c79356b 208#endif
91447636
A
209 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
210
211 so_cache_zone_element_size = str_size;
212
213 sflt_init();
1c79356b
A
214
215}
216
217void cached_sock_alloc(so, waitok)
218struct socket **so;
219int waitok;
220
221{
222 caddr_t temp;
1c79356b
A
223 register u_long offset;
224
225
91447636
A
226 lck_mtx_lock(so_cache_mtx);
227
1c79356b
A
228 if (cached_sock_count) {
229 cached_sock_count--;
230 *so = socket_cache_head;
231 if (*so == 0)
232 panic("cached_sock_alloc: cached sock is null");
233
234 socket_cache_head = socket_cache_head->cache_next;
235 if (socket_cache_head)
236 socket_cache_head->cache_prev = 0;
237 else
238 socket_cache_tail = 0;
91447636
A
239
240 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
241
242 temp = (*so)->so_saved_pcb;
243 bzero((caddr_t)*so, sizeof(struct socket));
244#if TEMPDEBUG
245 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
246 cached_sock_count);
247#endif
248 (*so)->so_saved_pcb = temp;
91447636
A
249 (*so)->cached_in_sock_layer = 1;
250
1c79356b
A
251 }
252 else {
253#if TEMPDEBUG
254 kprintf("Allocating cached sock %x from memory\n", *so);
255#endif
256
91447636
A
257 lck_mtx_unlock(so_cache_mtx);
258
1c79356b
A
259 if (waitok)
260 *so = (struct socket *) zalloc(so_cache_zone);
261 else
262 *so = (struct socket *) zalloc_noblock(so_cache_zone);
263
264 if (*so == 0)
265 return;
266
267 bzero((caddr_t)*so, sizeof(struct socket));
268
269 /*
270 * Define offsets for extra structures into our single block of
271 * memory. Align extra structures on longword boundaries.
272 */
273
274
275 offset = (u_long) *so;
276 offset += sizeof(struct socket);
277 if (offset & 0x3) {
278 offset += 4;
279 offset &= 0xfffffffc;
280 }
281 (*so)->so_saved_pcb = (caddr_t) offset;
282 offset += get_inpcb_str_size();
283 if (offset & 0x3) {
284 offset += 4;
285 offset &= 0xfffffffc;
286 }
287
288 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
289#if TEMPDEBUG
290 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
291 (*so)->so_saved_pcb,
292 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
293#endif
294 }
295
296 (*so)->cached_in_sock_layer = 1;
297}
298
299
300void cached_sock_free(so)
301struct socket *so;
302{
1c79356b 303
91447636 304 lck_mtx_lock(so_cache_mtx);
1c79356b 305
1c79356b
A
306 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
307 --cached_sock_count;
91447636 308 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
309#if TEMPDEBUG
310 kprintf("Freeing overflowed cached socket %x\n", so);
311#endif
91447636 312 zfree(so_cache_zone, so);
1c79356b
A
313 }
314 else {
315#if TEMPDEBUG
316 kprintf("Freeing socket %x into cache\n", so);
317#endif
318 if (so_cache_hw < cached_sock_count)
319 so_cache_hw = cached_sock_count;
320
321 so->cache_next = socket_cache_head;
322 so->cache_prev = 0;
323 if (socket_cache_head)
324 socket_cache_head->cache_prev = so;
325 else
326 socket_cache_tail = so;
327
328 so->cache_timestamp = so_cache_time;
329 socket_cache_head = so;
91447636 330 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
331 }
332
333#if TEMPDEBUG
334 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
335#endif
336
337
338}
339
340
341void so_cache_timer()
342{
343 register struct socket *p;
1c79356b 344 register int n_freed = 0;
1c79356b 345
1c79356b 346
91447636 347 lck_mtx_lock(so_cache_mtx);
1c79356b 348
91447636 349 ++so_cache_time;
1c79356b 350
91447636 351 while ( (p = socket_cache_tail) )
1c79356b
A
352 {
353 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
354 break;
355
356 so_cache_timeouts++;
357
91447636 358 if ( (socket_cache_tail = p->cache_prev) )
1c79356b
A
359 p->cache_prev->cache_next = 0;
360 if (--cached_sock_count == 0)
361 socket_cache_head = 0;
362
1c79356b 363
91447636 364 zfree(so_cache_zone, p);
1c79356b 365
1c79356b
A
366 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
367 {
368 so_cache_max_freed++;
369 break;
370 }
371 }
91447636 372 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
373
374 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
375
1c79356b
A
376
377}
9bccf70c 378#endif /* __APPLE__ */
1c79356b
A
379
380/*
381 * Get a socket structure from our zone, and initialize it.
382 * We don't implement `waitok' yet (see comments in uipc_domain.c).
383 * Note that it would probably be better to allocate socket
384 * and PCB at the same time, but I'm not convinced that all
385 * the protocols can be easily modified to do this.
386 */
387struct socket *
388soalloc(waitok, dom, type)
389 int waitok;
390 int dom;
391 int type;
392{
393 struct socket *so;
394
395 if ((dom == PF_INET) && (type == SOCK_STREAM))
396 cached_sock_alloc(&so, waitok);
397 else
398 {
91447636 399 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
1c79356b
A
400 if (so)
401 bzero(so, sizeof *so);
402 }
403 /* XXX race condition for reentrant kernel */
91447636 404//###LD Atomic add for so_gencnt
1c79356b
A
405 if (so) {
406 so->so_gencnt = ++so_gencnt;
407 so->so_zone = socket_zone;
408 }
409
410 return so;
411}
412
413int
414socreate(dom, aso, type, proto)
415 int dom;
416 struct socket **aso;
417 register int type;
418 int proto;
1c79356b
A
419{
420 struct proc *p = current_proc();
421 register struct protosw *prp;
9bccf70c 422 register struct socket *so;
1c79356b 423 register int error = 0;
55e303ae
A
424#if TCPDEBUG
425 extern int tcpconsdebug;
426#endif
1c79356b
A
427 if (proto)
428 prp = pffindproto(dom, proto, type);
429 else
430 prp = pffindtype(dom, type);
9bccf70c 431
1c79356b
A
432 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
433 return (EPROTONOSUPPORT);
9bccf70c
A
434#ifndef __APPLE__
435
436 if (p->p_prison && jail_socket_unixiproute_only &&
437 prp->pr_domain->dom_family != PF_LOCAL &&
438 prp->pr_domain->dom_family != PF_INET &&
439 prp->pr_domain->dom_family != PF_ROUTE) {
440 return (EPROTONOSUPPORT);
441 }
442
443#endif
1c79356b
A
444 if (prp->pr_type != type)
445 return (EPROTOTYPE);
446 so = soalloc(p != 0, dom, type);
447 if (so == 0)
448 return (ENOBUFS);
449
450 TAILQ_INIT(&so->so_incomp);
451 TAILQ_INIT(&so->so_comp);
452 so->so_type = type;
453
9bccf70c 454#ifdef __APPLE__
1c79356b 455 if (p != 0) {
91447636
A
456 so->so_uid = kauth_cred_getuid(kauth_cred_get());
457 if (!suser(kauth_cred_get(),NULL))
1c79356b 458 so->so_state = SS_PRIV;
1c79356b 459 }
9bccf70c 460#else
91447636 461 so->so_cred = kauth_cred_get_with_ref();
9bccf70c 462#endif
1c79356b 463 so->so_proto = prp;
9bccf70c 464#ifdef __APPLE__
1c79356b 465 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
91447636 466 so->so_rcv.sb_so = so->so_snd.sb_so = so;
9bccf70c 467#endif
91447636
A
468
469//### Attachement will create the per pcb lock if necessary and increase refcount
37839358 470 so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */
91447636
A
471
472 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
1c79356b 473 if (error) {
55e303ae
A
474 /*
475 * Warning:
476 * If so_pcb is not zero, the socket will be leaked,
477 * so protocol attachment handler must be coded carefuly
478 */
1c79356b 479 so->so_state |= SS_NOFDREF;
37839358
A
480 so->so_usecount--;
481 sofreelastref(so, 1); /* will deallocate the socket */
1c79356b
A
482 return (error);
483 }
9bccf70c 484#ifdef __APPLE__
1c79356b 485 prp->pr_domain->dom_refs++;
1c79356b 486 TAILQ_INIT(&so->so_evlist);
91447636
A
487
488 /* Attach socket filters for this protocol */
489 sflt_initsock(so);
55e303ae
A
490#if TCPDEBUG
491 if (tcpconsdebug == 2)
492 so->so_options |= SO_DEBUG;
493#endif
9bccf70c 494#endif
55e303ae 495
1c79356b
A
496 *aso = so;
497 return (0);
498}
499
500int
501sobind(so, nam)
502 struct socket *so;
503 struct sockaddr *nam;
504
505{
506 struct proc *p = current_proc();
91447636
A
507 int error = 0;
508 struct socket_filter_entry *filter;
509 int filtered = 0;
1c79356b 510
91447636
A
511 socket_lock(so, 1);
512
513 /* Socket filter */
514 error = 0;
515 for (filter = so->so_filt; filter && (error == 0);
516 filter = filter->sfe_next_onsocket) {
517 if (filter->sfe_filter->sf_filter.sf_bind) {
518 if (filtered == 0) {
519 filtered = 1;
520 sflt_use(so);
521 socket_unlock(so, 0);
1c79356b 522 }
91447636
A
523 error = filter->sfe_filter->sf_filter.sf_bind(
524 filter->sfe_cookie, so, nam);
1c79356b
A
525 }
526 }
91447636
A
527 if (filtered != 0) {
528 socket_lock(so, 0);
529 sflt_unuse(so);
530 }
531 /* End socket filter */
532
533 if (error == 0)
534 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
535
536 socket_unlock(so, 1);
537
538 if (error == EJUSTRETURN)
539 error = 0;
540
1c79356b
A
541 return (error);
542}
543
544void
545sodealloc(so)
546 struct socket *so;
547{
548 so->so_gencnt = ++so_gencnt;
549
9bccf70c
A
550#ifndef __APPLE__
551 if (so->so_rcv.sb_hiwat)
552 (void)chgsbsize(so->so_cred->cr_uidinfo,
553 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
554 if (so->so_snd.sb_hiwat)
555 (void)chgsbsize(so->so_cred->cr_uidinfo,
556 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
557#ifdef INET
558 if (so->so_accf != NULL) {
559 if (so->so_accf->so_accept_filter != NULL &&
560 so->so_accf->so_accept_filter->accf_destroy != NULL) {
561 so->so_accf->so_accept_filter->accf_destroy(so);
562 }
563 if (so->so_accf->so_accept_filter_str != NULL)
564 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
565 FREE(so->so_accf, M_ACCF);
566 }
567#endif /* INET */
91447636 568 kauth_cred_rele(so->so_cred);
9bccf70c
A
569 zfreei(so->so_zone, so);
570#else
1c79356b
A
571 if (so->cached_in_sock_layer == 1)
572 cached_sock_free(so);
91447636
A
573 else {
574 if (so->cached_in_sock_layer == -1)
575 panic("sodealloc: double dealloc: so=%x\n", so);
576 so->cached_in_sock_layer = -1;
577 FREE_ZONE(so, sizeof(*so), so->so_zone);
578 }
9bccf70c 579#endif /* __APPLE__ */
1c79356b
A
580}
581
582int
583solisten(so, backlog)
584 register struct socket *so;
585 int backlog;
586
587{
1c79356b 588 struct proc *p = current_proc();
91447636 589 int error;
1c79356b 590
91447636
A
591 socket_lock(so, 1);
592
593 {
594 struct socket_filter_entry *filter;
595 int filtered = 0;
596 error = 0;
597 for (filter = so->so_filt; filter && (error == 0);
598 filter = filter->sfe_next_onsocket) {
599 if (filter->sfe_filter->sf_filter.sf_listen) {
600 if (filtered == 0) {
601 filtered = 1;
602 sflt_use(so);
603 socket_unlock(so, 0);
604 }
605 error = filter->sfe_filter->sf_filter.sf_listen(
606 filter->sfe_cookie, so);
607 }
608 }
609 if (filtered != 0) {
610 socket_lock(so, 0);
611 sflt_unuse(so);
612 }
613 }
614
615 if (error == 0) {
616 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
617 }
618
1c79356b 619 if (error) {
91447636
A
620 socket_unlock(so, 1);
621 if (error == EJUSTRETURN)
622 error = 0;
1c79356b
A
623 return (error);
624 }
91447636
A
625
626 if (TAILQ_EMPTY(&so->so_comp))
1c79356b
A
627 so->so_options |= SO_ACCEPTCONN;
628 if (backlog < 0 || backlog > somaxconn)
629 backlog = somaxconn;
630 so->so_qlimit = backlog;
1c79356b 631
91447636 632 socket_unlock(so, 1);
1c79356b
A
633 return (0);
634}
635
1c79356b 636void
91447636 637sofreelastref(so, dealloc)
1c79356b 638 register struct socket *so;
91447636 639 int dealloc;
9bccf70c
A
640{
641 int error;
1c79356b
A
642 struct socket *head = so->so_head;
643
91447636 644 /*### Assume socket is locked */
1c79356b 645
3a60a9f5
A
646 /* Remove any filters - may be called more than once */
647 sflt_termsock(so);
648
91447636 649 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
9bccf70c 650#ifdef __APPLE__
0b4e3aa0
A
651 selthreadclear(&so->so_snd.sb_sel);
652 selthreadclear(&so->so_rcv.sb_sel);
cc9f6e38
A
653 so->so_rcv.sb_flags &= ~SB_UPCALL;
654 so->so_snd.sb_flags &= ~SB_UPCALL;
9bccf70c 655#endif
1c79356b 656 return;
0b4e3aa0 657 }
9bccf70c 658 if (head != NULL) {
91447636 659 socket_lock(head, 1);
9bccf70c
A
660 if (so->so_state & SS_INCOMP) {
661 TAILQ_REMOVE(&head->so_incomp, so, so_list);
662 head->so_incqlen--;
663 } else if (so->so_state & SS_COMP) {
664 /*
665 * We must not decommission a socket that's
666 * on the accept(2) queue. If we do, then
667 * accept(2) may hang after select(2) indicated
668 * that the listening socket was ready.
669 */
670#ifdef __APPLE__
671 selthreadclear(&so->so_snd.sb_sel);
672 selthreadclear(&so->so_rcv.sb_sel);
cc9f6e38
A
673 so->so_rcv.sb_flags &= ~SB_UPCALL;
674 so->so_snd.sb_flags &= ~SB_UPCALL;
9bccf70c 675#endif
91447636 676 socket_unlock(head, 1);
9bccf70c
A
677 return;
678 } else {
679 panic("sofree: not queued");
680 }
1c79356b 681 head->so_qlen--;
9bccf70c 682 so->so_state &= ~SS_INCOMP;
1c79356b 683 so->so_head = NULL;
91447636 684 socket_unlock(head, 1);
1c79356b 685 }
9bccf70c 686#ifdef __APPLE__
0b4e3aa0 687 selthreadclear(&so->so_snd.sb_sel);
1c79356b 688 sbrelease(&so->so_snd);
9bccf70c 689#endif
1c79356b 690 sorflush(so);
91447636
A
691
692 /* 3932268: disable upcall */
693 so->so_rcv.sb_flags &= ~SB_UPCALL;
694 so->so_snd.sb_flags &= ~SB_UPCALL;
695
696 if (dealloc)
697 sodealloc(so);
1c79356b
A
698}
699
700/*
701 * Close a socket on last file table reference removal.
702 * Initiate disconnect if connected.
703 * Free socket when disconnect complete.
704 */
705int
91447636 706soclose_locked(so)
1c79356b
A
707 register struct socket *so;
708{
1c79356b 709 int error = 0;
91447636
A
710 lck_mtx_t * mutex_held;
711 struct timespec ts;
1c79356b 712
91447636
A
713 if (so->so_usecount == 0) {
714 panic("soclose: so=%x refcount=0\n", so);
1c79356b
A
715 }
716
91447636
A
717 sflt_notify(so, sock_evt_closing, NULL);
718
719 if ((so->so_options & SO_ACCEPTCONN)) {
720 struct socket *sp;
721
722 /* We do not want new connection to be added to the connection queues */
723 so->so_options &= ~SO_ACCEPTCONN;
724
725 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
726 /* A bit tricky here. We need to keep
727 * a lock if it's a protocol global lock
728 * but we want the head, not the socket locked
729 * in the case of per-socket lock...
730 */
ff6e181a 731 if (so->so_proto->pr_getlock != NULL) {
91447636 732 socket_unlock(so, 0);
ff6e181a
A
733 socket_lock(sp, 1);
734 }
91447636 735 (void) soabort(sp);
ff6e181a 736 if (so->so_proto->pr_getlock != NULL) {
91447636 737 socket_unlock(sp, 1);
ff6e181a
A
738 socket_lock(so, 0);
739 }
91447636
A
740 }
741
742 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
91447636
A
743 /* Dequeue from so_comp since sofree() won't do it */
744 TAILQ_REMOVE(&so->so_comp, sp, so_list);
745 so->so_qlen--;
ff6e181a
A
746
747 if (so->so_proto->pr_getlock != NULL) {
748 socket_unlock(so, 0);
749 socket_lock(sp, 1);
750 }
751
91447636
A
752 sp->so_state &= ~SS_COMP;
753 sp->so_head = NULL;
754
91447636 755 (void) soabort(sp);
ff6e181a 756 if (so->so_proto->pr_getlock != NULL) {
91447636 757 socket_unlock(sp, 1);
ff6e181a
A
758 socket_lock(so, 0);
759 }
91447636
A
760 }
761 }
762 if (so->so_pcb == 0) {
763 /* 3915887: mark the socket as ready for dealloc */
764 so->so_flags |= SOF_PCBCLEARING;
1c79356b 765 goto discard;
91447636 766 }
1c79356b
A
767 if (so->so_state & SS_ISCONNECTED) {
768 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
91447636 769 error = sodisconnectlocked(so);
1c79356b
A
770 if (error)
771 goto drop;
772 }
773 if (so->so_options & SO_LINGER) {
774 if ((so->so_state & SS_ISDISCONNECTING) &&
775 (so->so_state & SS_NBIO))
776 goto drop;
91447636
A
777 if (so->so_proto->pr_getlock != NULL)
778 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
779 else
780 mutex_held = so->so_proto->pr_domain->dom_mtx;
1c79356b 781 while (so->so_state & SS_ISCONNECTED) {
91447636
A
782 ts.tv_sec = (so->so_linger/100);
783 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
784 error = msleep((caddr_t)&so->so_timeo, mutex_held,
785 PSOCK | PCATCH, "soclos", &ts);
786 if (error) {
787 /* It's OK when the time fires, don't report an error */
788 if (error == EWOULDBLOCK)
789 error = 0;
1c79356b 790 break;
91447636 791 }
1c79356b
A
792 }
793 }
794 }
795drop:
91447636
A
796 if (so->so_usecount == 0)
797 panic("soclose: usecount is zero so=%x\n", so);
798 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
1c79356b
A
799 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
800 if (error == 0)
801 error = error2;
802 }
91447636
A
803 if (so->so_usecount <= 0)
804 panic("soclose: usecount is zero so=%x\n", so);
1c79356b 805discard:
e3027f41 806 if (so->so_pcb && so->so_state & SS_NOFDREF)
1c79356b
A
807 panic("soclose: NOFDREF");
808 so->so_state |= SS_NOFDREF;
9bccf70c 809#ifdef __APPLE__
1c79356b
A
810 so->so_proto->pr_domain->dom_refs--;
811 evsofree(so);
9bccf70c 812#endif
91447636 813 so->so_usecount--;
1c79356b 814 sofree(so);
1c79356b
A
815 return (error);
816}
817
91447636
A
818int
819soclose(so)
820 register struct socket *so;
821{
822 int error = 0;
823 socket_lock(so, 1);
824 if (so->so_retaincnt == 0)
825 error = soclose_locked(so);
826 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
827 so->so_usecount--;
828 if (so->so_usecount < 2)
829 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
830 }
831 socket_unlock(so, 1);
832 return (error);
833}
834
835
1c79356b
A
836/*
837 * Must be called at splnet...
838 */
91447636 839//#### Should already be locked
1c79356b
A
840int
841soabort(so)
842 struct socket *so;
843{
9bccf70c 844 int error;
1c79356b 845
91447636
A
846#ifdef MORE_LOCKING_DEBUG
847 lck_mtx_t * mutex_held;
848
849 if (so->so_proto->pr_getlock != NULL)
850 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
851 else
852 mutex_held = so->so_proto->pr_domain->dom_mtx;
853 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
854#endif
855
9bccf70c
A
856 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
857 if (error) {
858 sofree(so);
859 return error;
860 }
861 return (0);
1c79356b
A
862}
863
864int
91447636 865soacceptlock(so, nam, dolock)
1c79356b
A
866 register struct socket *so;
867 struct sockaddr **nam;
91447636 868 int dolock;
9bccf70c 869{
1c79356b 870 int error;
91447636
A
871
872 if (dolock) socket_lock(so, 1);
1c79356b
A
873
874 if ((so->so_state & SS_NOFDREF) == 0)
875 panic("soaccept: !NOFDREF");
876 so->so_state &= ~SS_NOFDREF;
877 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
1c79356b 878
91447636 879 if (dolock) socket_unlock(so, 1);
1c79356b
A
880 return (error);
881}
91447636
A
882int
883soaccept(so, nam)
884 register struct socket *so;
885 struct sockaddr **nam;
886{
887 return (soacceptlock(so, nam, 1));
888}
1c79356b
A
889
890int
91447636 891soconnectlock(so, nam, dolock)
1c79356b
A
892 register struct socket *so;
893 struct sockaddr *nam;
91447636 894 int dolock;
1c79356b
A
895
896{
897 int s;
898 int error;
899 struct proc *p = current_proc();
1c79356b 900
91447636
A
901 if (dolock) socket_lock(so, 1);
902
903 if (so->so_options & SO_ACCEPTCONN) {
904 if (dolock) socket_unlock(so, 1);
1c79356b 905 return (EOPNOTSUPP);
91447636 906 }
1c79356b
A
907 /*
908 * If protocol is connection-based, can only connect once.
909 * Otherwise, if connected, try to disconnect first.
910 * This allows user to disconnect by connecting to, e.g.,
911 * a null address.
912 */
913 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
914 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
91447636 915 (error = sodisconnectlocked(so))))
1c79356b
A
916 error = EISCONN;
917 else {
91447636
A
918 /*
919 * Run connect filter before calling protocol:
920 * - non-blocking connect returns before completion;
921 */
922 {
923 struct socket_filter_entry *filter;
924 int filtered = 0;
925 error = 0;
926 for (filter = so->so_filt; filter && (error == 0);
927 filter = filter->sfe_next_onsocket) {
928 if (filter->sfe_filter->sf_filter.sf_connect_out) {
929 if (filtered == 0) {
930 filtered = 1;
931 sflt_use(so);
932 socket_unlock(so, 0);
933 }
934 error = filter->sfe_filter->sf_filter.sf_connect_out(
935 filter->sfe_cookie, so, nam);
936 }
937 }
938 if (filtered != 0) {
939 socket_lock(so, 0);
940 sflt_unuse(so);
941 }
942 }
943 if (error) {
944 if (error == EJUSTRETURN)
945 error = 0;
946 if (dolock) socket_unlock(so, 1);
947 return error;
948 }
949
1c79356b 950 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
1c79356b 951 }
91447636 952 if (dolock) socket_unlock(so, 1);
1c79356b
A
953 return (error);
954}
955
91447636
A
956int
957soconnect(so, nam)
958 register struct socket *so;
959 struct sockaddr *nam;
960{
961 return (soconnectlock(so, nam, 1));
962}
963
1c79356b
A
964int
965soconnect2(so1, so2)
966 register struct socket *so1;
967 struct socket *so2;
968{
1c79356b 969 int error;
91447636
A
970//####### Assumes so1 is already locked /
971
972 socket_lock(so2, 1);
1c79356b
A
973
974 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
91447636
A
975
976 socket_unlock(so2, 1);
1c79356b
A
977 return (error);
978}
979
91447636 980
1c79356b 981int
91447636 982sodisconnectlocked(so)
1c79356b
A
983 register struct socket *so;
984{
1c79356b 985 int error;
1c79356b
A
986
987 if ((so->so_state & SS_ISCONNECTED) == 0) {
988 error = ENOTCONN;
989 goto bad;
990 }
991 if (so->so_state & SS_ISDISCONNECTING) {
992 error = EALREADY;
993 goto bad;
994 }
91447636 995
1c79356b 996 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
91447636 997
9bccf70c 998 if (error == 0) {
91447636 999 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
1000 }
1001
1002bad:
1c79356b
A
1003 return (error);
1004}
91447636
A
1005//### Locking version
1006int
1007sodisconnect(so)
1008 register struct socket *so;
1009{
1010 int error;
1011
1012 socket_lock(so, 1);
1013 error = sodisconnectlocked(so);
1014 socket_unlock(so, 1);
1015 return(error);
1016}
1c79356b
A
1017
1018#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
91447636
A
1019
1020/*
1021 * sosendcheck will lock the socket buffer if it isn't locked and
1022 * verify that there is space for the data being inserted.
1023 */
1024
1025static int
1026sosendcheck(
1027 struct socket *so,
1028 struct sockaddr *addr,
1029 long resid,
1030 long clen,
1031 long atomic,
1032 int flags,
1033 int *sblocked)
1034{
1035 int error = 0;
1036 long space;
3a60a9f5 1037 int assumelock = 0;
91447636
A
1038
1039restart:
1040 if (*sblocked == 0) {
3a60a9f5
A
1041 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1042 so->so_send_filt_thread != 0 &&
1043 so->so_send_filt_thread == current_thread()) {
1044 /*
1045 * We're being called recursively from a filter,
1046 * allow this to continue. Radar 4150520.
1047 * Don't set sblocked because we don't want
1048 * to perform an unlock later.
1049 */
1050 assumelock = 1;
1051 }
1052 else {
1053 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1054 if (error) {
1055 return error;
1056 }
1057 *sblocked = 1;
1058 }
91447636
A
1059 }
1060
1061 if (so->so_state & SS_CANTSENDMORE)
1062 return EPIPE;
1063
1064 if (so->so_error) {
1065 error = so->so_error;
1066 so->so_error = 0;
1067 return error;
1068 }
1069
1070 if ((so->so_state & SS_ISCONNECTED) == 0) {
1071 /*
1072 * `sendto' and `sendmsg' is allowed on a connection-
1073 * based socket if it supports implied connect.
1074 * Return ENOTCONN if not connected and no address is
1075 * supplied.
1076 */
1077 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1078 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1079 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1080 !(resid == 0 && clen != 0))
1081 return ENOTCONN;
1082 } else if (addr == 0 && !(flags&MSG_HOLD))
1083 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1084 }
1085 space = sbspace(&so->so_snd);
1086 if (flags & MSG_OOB)
1087 space += 1024;
1088 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1089 clen > so->so_snd.sb_hiwat)
1090 return EMSGSIZE;
1091 if (space < resid + clen &&
1092 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
3a60a9f5 1093 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) {
91447636 1094 return EWOULDBLOCK;
3a60a9f5 1095 }
91447636
A
1096 sbunlock(&so->so_snd, 1);
1097 error = sbwait(&so->so_snd);
1098 if (error) {
1099 return error;
1100 }
1101 goto restart;
1102 }
1103
1104 return 0;
1105}
1106
1c79356b
A
1107/*
1108 * Send on a socket.
1109 * If send must go all at once and message is larger than
1110 * send buffering, then hard error.
1111 * Lock against other senders.
1112 * If must go all at once and not enough room now, then
1113 * inform user that this would block and do nothing.
1114 * Otherwise, if nonblocking, send as much as possible.
1115 * The data to be sent is described by "uio" if nonzero,
1116 * otherwise by the mbuf chain "top" (which must be null
1117 * if uio is not). Data provided in mbuf chain must be small
1118 * enough to send all at once.
1119 *
1120 * Returns nonzero on error, timeout or signal; callers
1121 * must check for short counts if EINTR/ERESTART are returned.
1122 * Data and control buffers are freed on return.
1123 * Experiment:
1124 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1125 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1126 * point at the mbuf chain being constructed and go from there.
1127 */
1128int
1129sosend(so, addr, uio, top, control, flags)
1130 register struct socket *so;
1131 struct sockaddr *addr;
1132 struct uio *uio;
1133 struct mbuf *top;
1134 struct mbuf *control;
1135 int flags;
1136
1137{
1138 struct mbuf **mp;
fa4905b1 1139 register struct mbuf *m, *freelist = NULL;
1c79356b 1140 register long space, len, resid;
91447636 1141 int clen = 0, error, dontroute, mlen, sendflags;
1c79356b 1142 int atomic = sosendallatonce(so) || top;
91447636 1143 int sblocked = 0;
1c79356b 1144 struct proc *p = current_proc();
1c79356b
A
1145
1146 if (uio)
91447636
A
1147 // LP64todo - fix this!
1148 resid = uio_resid(uio);
1c79356b
A
1149 else
1150 resid = top->m_pkthdr.len;
1151
1152 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1153 so,
1154 resid,
1155 so->so_snd.sb_cc,
1156 so->so_snd.sb_lowat,
1157 so->so_snd.sb_hiwat);
1158
91447636
A
1159 socket_lock(so, 1);
1160
1c79356b
A
1161 /*
1162 * In theory resid should be unsigned.
1163 * However, space must be signed, as it might be less than 0
1164 * if we over-committed, and we must use a signed comparison
1165 * of space and resid. On the other hand, a negative resid
1166 * causes us to loop sending 0-length segments to the protocol.
1167 *
1168 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1169 * type sockets since that's an error.
1170 */
91447636 1171 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1c79356b 1172 error = EINVAL;
91447636 1173 socket_unlock(so, 1);
1c79356b
A
1174 goto out;
1175 }
1176
1177 dontroute =
1178 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1179 (so->so_proto->pr_flags & PR_ATOMIC);
1180 if (p)
1181 p->p_stats->p_ru.ru_msgsnd++;
1182 if (control)
1183 clen = control->m_len;
1c79356b 1184
1c79356b 1185 do {
91447636
A
1186 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1187 if (error) {
3a60a9f5 1188 goto release;
1c79356b 1189 }
1c79356b 1190 mp = &top;
91447636 1191 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
fa4905b1 1192
1c79356b 1193 do {
fa4905b1 1194
91447636
A
1195 if (uio == NULL) {
1196 /*
1197 * Data is prepackaged in "top".
1198 */
1199 resid = 0;
1c79356b
A
1200 if (flags & MSG_EOR)
1201 top->m_flags |= M_EOR;
91447636
A
1202 } else {
1203 int chainlength;
1204 int bytes_to_copy;
1205
1206 bytes_to_copy = min(resid, space);
1207
1208 if (sosendminchain > 0) {
1209 chainlength = 0;
1210 } else
1211 chainlength = sosendmaxchain;
1212
1213 socket_unlock(so, 0);
1214
1215 do {
1216 int num_needed;
1217 int hdrs_needed = (top == 0) ? 1 : 0;
1218
1219 /*
1220 * try to maintain a local cache of mbuf clusters needed to complete this write
1221 * the list is further limited to the number that are currently needed to fill the socket
1222 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1223 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1224 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1225 * we haven't yet consumed.
1226 */
1227 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1228 num_needed = bytes_to_copy / NBPG;
1229
1230 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1231 num_needed++;
1232
1233 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1234 /* Fall back to cluster size if allocation failed */
1235 }
1236
1237 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1238 num_needed = bytes_to_copy / MCLBYTES;
1239
1240 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1241 num_needed++;
1242
1243 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1244 /* Fall back to a single mbuf if allocation failed */
1245 }
1246
1247 if (freelist == NULL) {
1248 if (top == 0)
1249 MGETHDR(freelist, M_WAIT, MT_DATA);
1250 else
1251 MGET(freelist, M_WAIT, MT_DATA);
1252
1253 if (freelist == NULL) {
1254 error = ENOBUFS;
1255 socket_lock(so, 0);
3a60a9f5 1256 goto release;
91447636
A
1257 }
1258 /*
1259 * For datagram protocols, leave room
1260 * for protocol headers in first mbuf.
1261 */
1262 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1263 MH_ALIGN(freelist, bytes_to_copy);
1264 }
1265 m = freelist;
1266 freelist = m->m_next;
1267 m->m_next = NULL;
1268
1269 if ((m->m_flags & M_EXT))
1270 mlen = m->m_ext.ext_size;
1271 else if ((m->m_flags & M_PKTHDR))
1272 mlen = MHLEN - m_leadingspace(m);
1273 else
1274 mlen = MLEN;
1275 len = min(mlen, bytes_to_copy);
1276
1277 chainlength += len;
1278
1279 space -= len;
fa4905b1 1280
91447636
A
1281 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1282
1283 // LP64todo - fix this!
1284 resid = uio_resid(uio);
1285
1286 m->m_len = len;
1287 *mp = m;
1288 top->m_pkthdr.len += len;
1289 if (error)
1290 break;
1291 mp = &m->m_next;
1292 if (resid <= 0) {
1293 if (flags & MSG_EOR)
1294 top->m_flags |= M_EOR;
1295 break;
1296 }
1297 bytes_to_copy = min(resid, space);
1298
1299 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1300
1301 socket_lock(so, 0);
1302
1303 if (error)
1304 goto release;
1305 }
1c79356b
A
1306
1307 if (flags & (MSG_HOLD|MSG_SEND))
3a60a9f5
A
1308 {
1309 /* Enqueue for later, go away if HOLD */
1310 register struct mbuf *mb1;
1311 if (so->so_temp && (flags & MSG_FLUSH))
1312 {
1313 m_freem(so->so_temp);
1314 so->so_temp = NULL;
1315 }
1316 if (so->so_temp)
1317 so->so_tail->m_next = top;
1318 else
1319 so->so_temp = top;
1320 mb1 = top;
1321 while (mb1->m_next)
1322 mb1 = mb1->m_next;
1323 so->so_tail = mb1;
1324 if (flags & MSG_HOLD)
1325 {
1326 top = NULL;
1327 goto release;
1328 }
1329 top = so->so_temp;
1c79356b
A
1330 }
1331 if (dontroute)
1332 so->so_options |= SO_DONTROUTE;
1c79356b
A
1333 /* Compute flags here, for pru_send and NKEs */
1334 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1335 /*
1336 * If the user set MSG_EOF, the protocol
1337 * understands this flag and nothing left to
1338 * send then use PRU_SEND_EOF instead of PRU_SEND.
1339 */
1340 ((flags & MSG_EOF) &&
1341 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1342 (resid <= 0)) ?
1343 PRUS_EOF :
1344 /* If there is more to send set PRUS_MORETOCOME */
1345 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
91447636
A
1346
1347 /*
1348 * Socket filter processing
1349 */
1350 {
1351 struct socket_filter_entry *filter;
1352 int filtered;
1353
1354 filtered = 0;
1355 error = 0;
1356 for (filter = so->so_filt; filter && (error == 0);
1357 filter = filter->sfe_next_onsocket) {
1358 if (filter->sfe_filter->sf_filter.sf_data_out) {
1359 int so_flags = 0;
1360 if (filtered == 0) {
1361 filtered = 1;
3a60a9f5 1362 so->so_send_filt_thread = current_thread();
ff6e181a 1363 sflt_use(so);
91447636
A
1364 socket_unlock(so, 0);
1365 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1366 }
1367 error = filter->sfe_filter->sf_filter.sf_data_out(
1368 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1369 }
1370 }
1371
1372 if (filtered) {
1373 /*
1374 * At this point, we've run at least one filter.
1375 * The socket is unlocked as is the socket buffer.
1376 */
1377 socket_lock(so, 0);
ff6e181a 1378 sflt_unuse(so);
3a60a9f5 1379 so->so_send_filt_thread = 0;
91447636 1380 if (error) {
3a60a9f5
A
1381 if (error == EJUSTRETURN) {
1382 error = 0;
1383 clen = 0;
1384 control = 0;
1385 top = 0;
91447636 1386 }
3a60a9f5
A
1387
1388 goto release;
1c79356b 1389 }
1c79356b
A
1390 }
1391 }
91447636
A
1392 /*
1393 * End Socket filter processing
1394 */
1395
1396 if (error == EJUSTRETURN) {
1397 /* A socket filter handled this data */
1398 error = 0;
1399 }
1400 else {
1401 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1402 sendflags, top, addr, control, p);
1403 }
9bccf70c 1404#ifdef __APPLE__
1c79356b
A
1405 if (flags & MSG_SEND)
1406 so->so_temp = NULL;
9bccf70c 1407#endif
1c79356b
A
1408 if (dontroute)
1409 so->so_options &= ~SO_DONTROUTE;
1410 clen = 0;
1411 control = 0;
1412 top = 0;
1413 mp = &top;
1414 if (error)
1415 goto release;
1416 } while (resid && space > 0);
1417 } while (resid);
1418
1419release:
3a60a9f5
A
1420 if (sblocked)
1421 sbunlock(&so->so_snd, 0); /* will unlock socket */
1422 else
1423 socket_unlock(so, 1);
1c79356b
A
1424out:
1425 if (top)
1426 m_freem(top);
1427 if (control)
1428 m_freem(control);
fa4905b1
A
1429 if (freelist)
1430 m_freem_list(freelist);
1c79356b
A
1431
1432 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1433 so,
1434 resid,
1435 so->so_snd.sb_cc,
1436 space,
1437 error);
1438
1439 return (error);
1440}
1441
1442/*
1443 * Implement receive operations on a socket.
1444 * We depend on the way that records are added to the sockbuf
1445 * by sbappend*. In particular, each record (mbufs linked through m_next)
1446 * must begin with an address if the protocol so specifies,
1447 * followed by an optional mbuf or mbufs containing ancillary data,
1448 * and then zero or more mbufs of data.
1449 * In order to avoid blocking network interrupts for the entire time here,
1450 * we splx() while doing the actual copy to user space.
1451 * Although the sockbuf is locked, new data may still be appended,
1452 * and thus we must maintain consistency of the sockbuf during that time.
1453 *
1454 * The caller may receive the data as a single mbuf chain by supplying
1455 * an mbuf **mp0 for use in returning the chain. The uio is then used
1456 * only for the count in uio_resid.
1457 */
1458int
1459soreceive(so, psa, uio, mp0, controlp, flagsp)
1460 register struct socket *so;
1461 struct sockaddr **psa;
1462 struct uio *uio;
1463 struct mbuf **mp0;
1464 struct mbuf **controlp;
1465 int *flagsp;
1466{
91447636
A
1467 register struct mbuf *m, **mp, *ml = NULL;
1468 register int flags, len, error, offset;
1c79356b
A
1469 struct protosw *pr = so->so_proto;
1470 struct mbuf *nextrecord;
1471 int moff, type = 0;
91447636
A
1472 // LP64todo - fix this!
1473 int orig_resid = uio_resid(uio);
55e303ae
A
1474 volatile struct mbuf *free_list;
1475 volatile int delayed_copy_len;
1476 int can_delay;
1477 int need_event;
1478 struct proc *p = current_proc();
1479
1480
91447636 1481 // LP64todo - fix this!
1c79356b
A
1482 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1483 so,
91447636 1484 uio_resid(uio),
1c79356b
A
1485 so->so_rcv.sb_cc,
1486 so->so_rcv.sb_lowat,
1487 so->so_rcv.sb_hiwat);
1488
91447636 1489 socket_lock(so, 1);
1c79356b 1490
91447636
A
1491#ifdef MORE_LOCKING_DEBUG
1492 if (so->so_usecount == 1)
1493 panic("soreceive: so=%x no other reference on socket\n", so);
1494#endif
1c79356b
A
1495 mp = mp0;
1496 if (psa)
1497 *psa = 0;
1498 if (controlp)
1499 *controlp = 0;
1500 if (flagsp)
1501 flags = *flagsp &~ MSG_EOR;
1502 else
1503 flags = 0;
1504 /*
1505 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1506 * regardless of the flags argument. Here is the case were
1507 * out-of-band data is not inline.
1508 */
1509 if ((flags & MSG_OOB) ||
1510 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1511 (so->so_options & SO_OOBINLINE) == 0 &&
1512 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1513 m = m_get(M_WAIT, MT_DATA);
55e303ae 1514 if (m == NULL) {
91447636 1515 socket_unlock(so, 1);
55e303ae 1516 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
9bccf70c 1517 return (ENOBUFS);
55e303ae 1518 }
1c79356b
A
1519 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1520 if (error)
1521 goto bad;
91447636 1522 socket_unlock(so, 0);
1c79356b 1523 do {
91447636 1524 // LP64todo - fix this!
1c79356b 1525 error = uiomove(mtod(m, caddr_t),
91447636 1526 (int) min(uio_resid(uio), m->m_len), uio);
1c79356b 1527 m = m_free(m);
91447636
A
1528 } while (uio_resid(uio) && error == 0 && m);
1529 socket_lock(so, 0);
1c79356b
A
1530bad:
1531 if (m)
1532 m_freem(m);
9bccf70c
A
1533#ifdef __APPLE__
1534 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1535 if (error == EWOULDBLOCK || error == EINVAL) {
1536 /*
1537 * Let's try to get normal data:
1538 * EWOULDBLOCK: out-of-band data not receive yet;
1539 * EINVAL: out-of-band data already read.
1540 */
1541 error = 0;
1542 goto nooob;
1543 } else if (error == 0 && flagsp)
1544 *flagsp |= MSG_OOB;
91447636
A
1545 }
1546 socket_unlock(so, 1);
1c79356b 1547 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
9bccf70c 1548#endif
1c79356b
A
1549 return (error);
1550 }
1551nooob:
1552 if (mp)
1553 *mp = (struct mbuf *)0;
91447636 1554 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1c79356b
A
1555 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1556
55e303ae
A
1557
1558 free_list = (struct mbuf *)0;
1559 delayed_copy_len = 0;
1c79356b 1560restart:
91447636
A
1561#ifdef MORE_LOCKING_DEBUG
1562 if (so->so_usecount <= 1)
1563 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1564#endif
9bccf70c
A
1565 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1566 if (error) {
91447636 1567 socket_unlock(so, 1);
1c79356b
A
1568 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1569 return (error);
1570 }
1c79356b
A
1571
1572 m = so->so_rcv.sb_mb;
1573 /*
1574 * If we have less data than requested, block awaiting more
1575 * (subject to any timeout) if:
1576 * 1. the current count is less than the low water mark, or
1577 * 2. MSG_WAITALL is set, and it is possible to do the entire
1578 * receive operation at once if we block (resid <= hiwat).
1579 * 3. MSG_DONTWAIT is not set
1580 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1581 * we have to do the receive in sections, and thus risk returning
1582 * a short count if a timeout or signal occurs after we start.
1583 */
1584 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
91447636 1585 so->so_rcv.sb_cc < uio_resid(uio)) &&
55e303ae 1586 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
91447636 1587 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1c79356b 1588 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
55e303ae 1589
1c79356b
A
1590 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1591 if (so->so_error) {
1592 if (m)
1593 goto dontblock;
1594 error = so->so_error;
1595 if ((flags & MSG_PEEK) == 0)
1596 so->so_error = 0;
1597 goto release;
1598 }
1599 if (so->so_state & SS_CANTRCVMORE) {
1600 if (m)
1601 goto dontblock;
1602 else
1603 goto release;
1604 }
1605 for (; m; m = m->m_next)
1606 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1607 m = so->so_rcv.sb_mb;
1608 goto dontblock;
1609 }
1610 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1611 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1612 error = ENOTCONN;
1613 goto release;
1614 }
91447636 1615 if (uio_resid(uio) == 0)
1c79356b 1616 goto release;
91447636 1617 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1c79356b
A
1618 error = EWOULDBLOCK;
1619 goto release;
1620 }
91447636
A
1621 sbunlock(&so->so_rcv, 1);
1622#ifdef EVEN_MORE_LOCKING_DEBUG
1c79356b
A
1623 if (socket_debug)
1624 printf("Waiting for socket data\n");
91447636 1625#endif
55e303ae 1626
1c79356b 1627 error = sbwait(&so->so_rcv);
91447636 1628#ifdef EVEN_MORE_LOCKING_DEBUG
1c79356b
A
1629 if (socket_debug)
1630 printf("SORECEIVE - sbwait returned %d\n", error);
91447636
A
1631#endif
1632 if (so->so_usecount < 1)
1633 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
9bccf70c 1634 if (error) {
91447636 1635 socket_unlock(so, 1);
1c79356b
A
1636 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1637 return (error);
1638 }
1639 goto restart;
1640 }
1641dontblock:
9bccf70c 1642#ifndef __APPLE__
1c79356b
A
1643 if (uio->uio_procp)
1644 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
55e303ae
A
1645#else /* __APPLE__ */
1646 /*
1647 * 2207985
1648 * This should be uio->uio-procp; however, some callers of this
1649 * function use auto variables with stack garbage, and fail to
1650 * fill out the uio structure properly.
1651 */
1652 if (p)
1653 p->p_stats->p_ru.ru_msgrcv++;
1654#endif /* __APPLE__ */
1c79356b
A
1655 nextrecord = m->m_nextpkt;
1656 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1657 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1658 orig_resid = 0;
4a249263 1659 if (psa) {
1c79356b
A
1660 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1661 mp0 == 0);
4a249263
A
1662 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1663 error = EWOULDBLOCK;
1664 goto release;
1665 }
1666 }
1c79356b
A
1667 if (flags & MSG_PEEK) {
1668 m = m->m_next;
1669 } else {
1670 sbfree(&so->so_rcv, m);
91447636
A
1671 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1672 panic("soreceive: about to create invalid socketbuf");
1c79356b
A
1673 MFREE(m, so->so_rcv.sb_mb);
1674 m = so->so_rcv.sb_mb;
1675 }
1676 }
1677 while (m && m->m_type == MT_CONTROL && error == 0) {
1678 if (flags & MSG_PEEK) {
1679 if (controlp)
1680 *controlp = m_copy(m, 0, m->m_len);
1681 m = m->m_next;
1682 } else {
1683 sbfree(&so->so_rcv, m);
1684 if (controlp) {
1685 if (pr->pr_domain->dom_externalize &&
1686 mtod(m, struct cmsghdr *)->cmsg_type ==
91447636
A
1687 SCM_RIGHTS) {
1688 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1c79356b 1689 error = (*pr->pr_domain->dom_externalize)(m);
91447636
A
1690 socket_lock(so, 0);
1691 }
1c79356b 1692 *controlp = m;
91447636
A
1693 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1694 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1c79356b
A
1695 so->so_rcv.sb_mb = m->m_next;
1696 m->m_next = 0;
1697 m = so->so_rcv.sb_mb;
1698 } else {
1699 MFREE(m, so->so_rcv.sb_mb);
1700 m = so->so_rcv.sb_mb;
1701 }
1702 }
1703 if (controlp) {
1704 orig_resid = 0;
1705 controlp = &(*controlp)->m_next;
1706 }
1707 }
1708 if (m) {
1709 if ((flags & MSG_PEEK) == 0)
1710 m->m_nextpkt = nextrecord;
1711 type = m->m_type;
1712 if (type == MT_OOBDATA)
1713 flags |= MSG_OOB;
1714 }
1715 moff = 0;
1716 offset = 0;
fa4905b1 1717
91447636 1718 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
55e303ae
A
1719 can_delay = 1;
1720 else
1721 can_delay = 0;
1722
1723 need_event = 0;
fa4905b1 1724
91447636 1725 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1c79356b
A
1726 if (m->m_type == MT_OOBDATA) {
1727 if (type != MT_OOBDATA)
1728 break;
1729 } else if (type == MT_OOBDATA)
1730 break;
9bccf70c 1731#ifndef __APPLE__
1c79356b
A
1732/*
1733 * This assertion needs rework. The trouble is Appletalk is uses many
1734 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1735 * For now just remove the assertion... CSM 9/98
1736 */
1737 else
1738 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1739 ("receive 3"));
9bccf70c
A
1740#else
1741 /*
1742 * Make sure to allways set MSG_OOB event when getting
1743 * out of band data inline.
1744 */
1c79356b 1745 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
9bccf70c
A
1746 (so->so_options & SO_OOBINLINE) != 0 &&
1747 (so->so_state & SS_RCVATMARK) != 0) {
1748 flags |= MSG_OOB;
1749 }
1750#endif
1c79356b 1751 so->so_state &= ~SS_RCVATMARK;
91447636
A
1752 // LP64todo - fix this!
1753 len = uio_resid(uio) - delayed_copy_len;
1c79356b
A
1754 if (so->so_oobmark && len > so->so_oobmark - offset)
1755 len = so->so_oobmark - offset;
1756 if (len > m->m_len - moff)
1757 len = m->m_len - moff;
1758 /*
1759 * If mp is set, just pass back the mbufs.
1760 * Otherwise copy them out via the uio, then free.
1761 * Sockbuf must be consistent here (points to current mbuf,
1762 * it points to next record) when we drop priority;
1763 * we must note any additions to the sockbuf when we
1764 * block interrupts again.
1765 */
1766 if (mp == 0) {
55e303ae
A
1767 if (can_delay && len == m->m_len) {
1768 /*
1769 * only delay the copy if we're consuming the
1770 * mbuf and we're NOT in MSG_PEEK mode
1771 * and we have enough data to make it worthwile
1772 * to drop and retake the funnel... can_delay
1773 * reflects the state of the 2 latter constraints
1774 * moff should always be zero in these cases
1775 */
1776 delayed_copy_len += len;
1777 } else {
55e303ae
A
1778
1779 if (delayed_copy_len) {
91447636 1780 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1781
1782 if (error) {
55e303ae
A
1783 goto release;
1784 }
1785 if (m != so->so_rcv.sb_mb) {
1786 /*
1787 * can only get here if MSG_PEEK is not set
1788 * therefore, m should point at the head of the rcv queue...
1789 * if it doesn't, it means something drastically changed
1790 * while we were out from behind the funnel in sodelayed_copy...
1791 * perhaps a RST on the stream... in any event, the stream has
1792 * been interrupted... it's probably best just to return
1793 * whatever data we've moved and let the caller sort it out...
1794 */
1795 break;
1796 }
1797 }
91447636 1798 socket_unlock(so, 0);
55e303ae 1799 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
91447636 1800 socket_lock(so, 0);
55e303ae 1801
55e303ae
A
1802 if (error)
1803 goto release;
1804 }
1c79356b 1805 } else
91447636 1806 uio_setresid(uio, (uio_resid(uio) - len));
55e303ae 1807
1c79356b
A
1808 if (len == m->m_len - moff) {
1809 if (m->m_flags & M_EOR)
1810 flags |= MSG_EOR;
1811 if (flags & MSG_PEEK) {
1812 m = m->m_next;
1813 moff = 0;
1814 } else {
1815 nextrecord = m->m_nextpkt;
1816 sbfree(&so->so_rcv, m);
91447636 1817 m->m_nextpkt = NULL;
55e303ae 1818
1c79356b
A
1819 if (mp) {
1820 *mp = m;
1821 mp = &m->m_next;
1822 so->so_rcv.sb_mb = m = m->m_next;
1823 *mp = (struct mbuf *)0;
1824 } else {
55e303ae
A
1825 if (free_list == NULL)
1826 free_list = m;
1827 else
14353aa8
A
1828 ml->m_next = m;
1829 ml = m;
1830 so->so_rcv.sb_mb = m = m->m_next;
1831 ml->m_next = 0;
1c79356b
A
1832 }
1833 if (m)
1834 m->m_nextpkt = nextrecord;
1835 }
1836 } else {
1837 if (flags & MSG_PEEK)
1838 moff += len;
1839 else {
1840 if (mp)
1841 *mp = m_copym(m, 0, len, M_WAIT);
1842 m->m_data += len;
1843 m->m_len -= len;
1844 so->so_rcv.sb_cc -= len;
1845 }
1846 }
1847 if (so->so_oobmark) {
1848 if ((flags & MSG_PEEK) == 0) {
1849 so->so_oobmark -= len;
1850 if (so->so_oobmark == 0) {
1851 so->so_state |= SS_RCVATMARK;
55e303ae
A
1852 /*
1853 * delay posting the actual event until after
1854 * any delayed copy processing has finished
1855 */
1856 need_event = 1;
1c79356b
A
1857 break;
1858 }
1859 } else {
1860 offset += len;
1861 if (offset == so->so_oobmark)
1862 break;
1863 }
1864 }
91447636 1865 if (flags & MSG_EOR)
1c79356b
A
1866 break;
1867 /*
55e303ae 1868 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1c79356b
A
1869 * we must not quit until "uio->uio_resid == 0" or an error
1870 * termination. If a signal/timeout occurs, return
1871 * with a short count but without error.
1872 * Keep sockbuf locked against other readers.
1873 */
91447636 1874 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1c79356b
A
1875 !sosendallatonce(so) && !nextrecord) {
1876 if (so->so_error || so->so_state & SS_CANTRCVMORE)
55e303ae 1877 goto release;
fa4905b1 1878
91447636 1879 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
55e303ae
A
1880 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1881 if (sbwait(&so->so_rcv)) {
1882 error = 0;
1883 goto release;
fa4905b1 1884 }
55e303ae
A
1885 /*
1886 * have to wait until after we get back from the sbwait to do the copy because
1887 * we will drop the funnel if we have enough data that has been delayed... by dropping
1888 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1889 * and to change the state of this socket... we're issuing the sbwait because
1890 * the socket is empty and we're expecting the netisr thread to wake us up when more
1891 * packets arrive... if we allow that processing to happen and then sbwait, we
1892 * could stall forever with packets sitting in the socket if no further packets
1893 * arrive from the remote side.
1894 *
1895 * we want to copy before we've collected all the data to satisfy this request to
1896 * allow the copy to overlap the incoming packet processing on an MP system
1897 */
1898 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1899
91447636 1900 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1901
1902 if (error)
1903 goto release;
1c79356b
A
1904 }
1905 m = so->so_rcv.sb_mb;
fa4905b1 1906 if (m) {
1c79356b 1907 nextrecord = m->m_nextpkt;
fa4905b1 1908 }
1c79356b
A
1909 }
1910 }
91447636
A
1911#ifdef MORE_LOCKING_DEBUG
1912 if (so->so_usecount <= 1)
1913 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1914#endif
1c79356b
A
1915
1916 if (m && pr->pr_flags & PR_ATOMIC) {
9bccf70c 1917#ifdef __APPLE__
1c79356b
A
1918 if (so->so_options & SO_DONTTRUNC)
1919 flags |= MSG_RCVMORE;
9bccf70c
A
1920 else {
1921#endif
1922 flags |= MSG_TRUNC;
1c79356b
A
1923 if ((flags & MSG_PEEK) == 0)
1924 (void) sbdroprecord(&so->so_rcv);
9bccf70c 1925#ifdef __APPLE__
1c79356b 1926 }
9bccf70c 1927#endif
1c79356b
A
1928 }
1929 if ((flags & MSG_PEEK) == 0) {
1930 if (m == 0)
1931 so->so_rcv.sb_mb = nextrecord;
1932 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1933 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1934 }
9bccf70c 1935#ifdef __APPLE__
1c79356b
A
1936 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1937 flags |= MSG_HAVEMORE;
55e303ae
A
1938
1939 if (delayed_copy_len) {
91447636 1940 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1941
1942 if (error)
1943 goto release;
1944 }
1945 if (free_list) {
1946 m_freem_list((struct mbuf *)free_list);
1947 free_list = (struct mbuf *)0;
1948 }
1949 if (need_event)
1950 postevent(so, 0, EV_OOB);
9bccf70c 1951#endif
91447636 1952 if (orig_resid == uio_resid(uio) && orig_resid &&
1c79356b 1953 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
91447636 1954 sbunlock(&so->so_rcv, 1);
1c79356b
A
1955 goto restart;
1956 }
1957
1958 if (flagsp)
1959 *flagsp |= flags;
1960release:
91447636
A
1961#ifdef MORE_LOCKING_DEBUG
1962 if (so->so_usecount <= 1)
1963 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1964#endif
55e303ae 1965 if (delayed_copy_len) {
91447636 1966 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1967 }
1968 if (free_list) {
1969 m_freem_list((struct mbuf *)free_list);
1970 }
91447636 1971 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1c79356b 1972
91447636 1973 // LP64todo - fix this!
1c79356b
A
1974 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1975 so,
91447636 1976 uio_resid(uio),
1c79356b
A
1977 so->so_rcv.sb_cc,
1978 0,
1979 error);
1980
1981 return (error);
1982}
1983
55e303ae 1984
91447636 1985static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
55e303ae
A
1986{
1987 int error = 0;
55e303ae
A
1988 struct mbuf *m;
1989
1990 m = *free_list;
1991
91447636 1992 socket_unlock(so, 0);
55e303ae 1993
55e303ae
A
1994 while (m && error == 0) {
1995
1996 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
1997
1998 m = m->m_next;
1999 }
2000 m_freem_list(*free_list);
2001
2002 *free_list = (struct mbuf *)NULL;
2003 *resid = 0;
2004
91447636 2005 socket_lock(so, 0);
55e303ae
A
2006
2007 return (error);
2008}
2009
2010
1c79356b
A
2011int
2012soshutdown(so, how)
2013 register struct socket *so;
2014 register int how;
2015{
2016 register struct protosw *pr = so->so_proto;
1c79356b
A
2017 int ret;
2018
91447636
A
2019 socket_lock(so, 1);
2020
2021 sflt_notify(so, sock_evt_shutdown, &how);
1c79356b 2022
9bccf70c 2023 if (how != SHUT_WR) {
1c79356b
A
2024 sorflush(so);
2025 postevent(so, 0, EV_RCLOSED);
2026 }
9bccf70c 2027 if (how != SHUT_RD) {
1c79356b
A
2028 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2029 postevent(so, 0, EV_WCLOSED);
2030 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
91447636 2031 socket_unlock(so, 1);
1c79356b
A
2032 return(ret);
2033 }
2034
2035 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
91447636 2036 socket_unlock(so, 1);
1c79356b
A
2037 return (0);
2038}
2039
2040void
2041sorflush(so)
2042 register struct socket *so;
2043{
2044 register struct sockbuf *sb = &so->so_rcv;
2045 register struct protosw *pr = so->so_proto;
1c79356b 2046 struct sockbuf asb;
1c79356b 2047
91447636
A
2048#ifdef MORE_LOCKING_DEBUG
2049 lck_mtx_t * mutex_held;
2050
2051 if (so->so_proto->pr_getlock != NULL)
2052 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2053 else
2054 mutex_held = so->so_proto->pr_domain->dom_mtx;
2055 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2056#endif
2057
2058 sflt_notify(so, sock_evt_flush_read, NULL);
1c79356b
A
2059
2060 sb->sb_flags |= SB_NOINTR;
2061 (void) sblock(sb, M_WAIT);
1c79356b 2062 socantrcvmore(so);
91447636 2063 sbunlock(sb, 1);
9bccf70c 2064#ifdef __APPLE__
0b4e3aa0 2065 selthreadclear(&sb->sb_sel);
9bccf70c 2066#endif
1c79356b
A
2067 asb = *sb;
2068 bzero((caddr_t)sb, sizeof (*sb));
91447636 2069 sb->sb_so = so; /* reestablish link to socket */
9bccf70c
A
2070 if (asb.sb_flags & SB_KNOTE) {
2071 sb->sb_sel.si_note = asb.sb_sel.si_note;
2072 sb->sb_flags = SB_KNOTE;
2073 }
1c79356b
A
2074 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2075 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2076 sbrelease(&asb);
2077}
2078
2079/*
2080 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2081 * an additional variant to handle the case where the option value needs
2082 * to be some kind of integer, but not a specific size.
2083 * In addition to their use here, these functions are also called by the
2084 * protocol-level pr_ctloutput() routines.
2085 */
2086int
2087sooptcopyin(sopt, buf, len, minlen)
2088 struct sockopt *sopt;
2089 void *buf;
2090 size_t len;
2091 size_t minlen;
2092{
2093 size_t valsize;
2094
2095 /*
2096 * If the user gives us more than we wanted, we ignore it,
2097 * but if we don't get the minimum length the caller
2098 * wants, we return EINVAL. On success, sopt->sopt_valsize
2099 * is set to however much we actually retrieved.
2100 */
2101 if ((valsize = sopt->sopt_valsize) < minlen)
2102 return EINVAL;
2103 if (valsize > len)
2104 sopt->sopt_valsize = valsize = len;
2105
2106 if (sopt->sopt_p != 0)
2107 return (copyin(sopt->sopt_val, buf, valsize));
2108
91447636 2109 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
1c79356b
A
2110 return 0;
2111}
2112
2113int
2114sosetopt(so, sopt)
2115 struct socket *so;
2116 struct sockopt *sopt;
2117{
2118 int error, optval;
2119 struct linger l;
2120 struct timeval tv;
2121 short val;
91447636
A
2122
2123 socket_lock(so, 1);
1c79356b 2124
9bccf70c
A
2125 if (sopt->sopt_dir != SOPT_SET) {
2126 sopt->sopt_dir = SOPT_SET;
2127 }
2128
91447636
A
2129 {
2130 struct socket_filter_entry *filter;
2131 int filtered = 0;
2132 error = 0;
2133 for (filter = so->so_filt; filter && (error == 0);
2134 filter = filter->sfe_next_onsocket) {
2135 if (filter->sfe_filter->sf_filter.sf_setoption) {
2136 if (filtered == 0) {
2137 filtered = 1;
2138 sflt_use(so);
2139 socket_unlock(so, 0);
2140 }
2141 error = filter->sfe_filter->sf_filter.sf_setoption(
2142 filter->sfe_cookie, so, sopt);
2143 }
2144 }
2145
2146 if (filtered != 0) {
2147 socket_lock(so, 0);
2148 sflt_unuse(so);
2149
2150 if (error) {
2151 if (error == EJUSTRETURN)
2152 error = 0;
2153 goto bad;
2154 }
1c79356b 2155 }
1c79356b
A
2156 }
2157
2158 error = 0;
2159 if (sopt->sopt_level != SOL_SOCKET) {
91447636
A
2160 if (so->so_proto && so->so_proto->pr_ctloutput) {
2161 error = (*so->so_proto->pr_ctloutput)
2162 (so, sopt);
2163 socket_unlock(so, 1);
2164 return (error);
2165 }
1c79356b
A
2166 error = ENOPROTOOPT;
2167 } else {
2168 switch (sopt->sopt_name) {
2169 case SO_LINGER:
91447636 2170 case SO_LINGER_SEC:
1c79356b
A
2171 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2172 if (error)
2173 goto bad;
2174
91447636 2175 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
1c79356b
A
2176 if (l.l_onoff)
2177 so->so_options |= SO_LINGER;
2178 else
2179 so->so_options &= ~SO_LINGER;
2180 break;
2181
2182 case SO_DEBUG:
2183 case SO_KEEPALIVE:
2184 case SO_DONTROUTE:
2185 case SO_USELOOPBACK:
2186 case SO_BROADCAST:
2187 case SO_REUSEADDR:
2188 case SO_REUSEPORT:
2189 case SO_OOBINLINE:
2190 case SO_TIMESTAMP:
9bccf70c 2191#ifdef __APPLE__
1c79356b
A
2192 case SO_DONTTRUNC:
2193 case SO_WANTMORE:
9bccf70c
A
2194 case SO_WANTOOBFLAG:
2195#endif
1c79356b
A
2196 error = sooptcopyin(sopt, &optval, sizeof optval,
2197 sizeof optval);
2198 if (error)
2199 goto bad;
2200 if (optval)
2201 so->so_options |= sopt->sopt_name;
2202 else
2203 so->so_options &= ~sopt->sopt_name;
2204 break;
2205
2206 case SO_SNDBUF:
2207 case SO_RCVBUF:
2208 case SO_SNDLOWAT:
2209 case SO_RCVLOWAT:
2210 error = sooptcopyin(sopt, &optval, sizeof optval,
2211 sizeof optval);
2212 if (error)
2213 goto bad;
2214
2215 /*
2216 * Values < 1 make no sense for any of these
2217 * options, so disallow them.
2218 */
2219 if (optval < 1) {
2220 error = EINVAL;
2221 goto bad;
2222 }
2223
2224 switch (sopt->sopt_name) {
2225 case SO_SNDBUF:
2226 case SO_RCVBUF:
2227 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2228 &so->so_snd : &so->so_rcv,
2229 (u_long) optval) == 0) {
2230 error = ENOBUFS;
2231 goto bad;
2232 }
2233 break;
2234
2235 /*
2236 * Make sure the low-water is never greater than
2237 * the high-water.
2238 */
2239 case SO_SNDLOWAT:
2240 so->so_snd.sb_lowat =
2241 (optval > so->so_snd.sb_hiwat) ?
2242 so->so_snd.sb_hiwat : optval;
2243 break;
2244 case SO_RCVLOWAT:
2245 so->so_rcv.sb_lowat =
2246 (optval > so->so_rcv.sb_hiwat) ?
2247 so->so_rcv.sb_hiwat : optval;
2248 break;
2249 }
2250 break;
2251
2252 case SO_SNDTIMEO:
2253 case SO_RCVTIMEO:
2254 error = sooptcopyin(sopt, &tv, sizeof tv,
2255 sizeof tv);
2256 if (error)
2257 goto bad;
2258
91447636 2259 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
9bccf70c
A
2260 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2261 error = EDOM;
2262 goto bad;
2263 }
91447636 2264
1c79356b
A
2265 switch (sopt->sopt_name) {
2266 case SO_SNDTIMEO:
91447636 2267 so->so_snd.sb_timeo = tv;
1c79356b
A
2268 break;
2269 case SO_RCVTIMEO:
91447636 2270 so->so_rcv.sb_timeo = tv;
1c79356b
A
2271 break;
2272 }
2273 break;
2274
2275 case SO_NKE:
9bccf70c
A
2276 {
2277 struct so_nke nke;
1c79356b 2278
9bccf70c
A
2279 error = sooptcopyin(sopt, &nke,
2280 sizeof nke, sizeof nke);
1c79356b
A
2281 if (error)
2282 goto bad;
2283
91447636 2284 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
1c79356b
A
2285 break;
2286 }
2287
9bccf70c
A
2288 case SO_NOSIGPIPE:
2289 error = sooptcopyin(sopt, &optval, sizeof optval,
2290 sizeof optval);
2291 if (error)
2292 goto bad;
2293 if (optval)
2294 so->so_flags |= SOF_NOSIGPIPE;
2295 else
2296 so->so_flags &= ~SOF_NOSIGPIPE;
2297
2298 break;
2299
55e303ae
A
2300 case SO_NOADDRERR:
2301 error = sooptcopyin(sopt, &optval, sizeof optval,
2302 sizeof optval);
2303 if (error)
2304 goto bad;
2305 if (optval)
2306 so->so_flags |= SOF_NOADDRAVAIL;
2307 else
2308 so->so_flags &= ~SOF_NOADDRAVAIL;
2309
2310 break;
2311
1c79356b
A
2312 default:
2313 error = ENOPROTOOPT;
2314 break;
2315 }
2316 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2317 (void) ((*so->so_proto->pr_ctloutput)
2318 (so, sopt));
2319 }
2320 }
2321bad:
91447636 2322 socket_unlock(so, 1);
1c79356b
A
2323 return (error);
2324}
2325
2326/* Helper routine for getsockopt */
2327int
2328sooptcopyout(sopt, buf, len)
2329 struct sockopt *sopt;
2330 void *buf;
2331 size_t len;
2332{
2333 int error;
2334 size_t valsize;
2335
2336 error = 0;
2337
2338 /*
2339 * Documented get behavior is that we always return a value,
2340 * possibly truncated to fit in the user's buffer.
2341 * Traditional behavior is that we always tell the user
2342 * precisely how much we copied, rather than something useful
2343 * like the total amount we had available for her.
2344 * Note that this interface is not idempotent; the entire answer must
2345 * generated ahead of time.
2346 */
2347 valsize = min(len, sopt->sopt_valsize);
2348 sopt->sopt_valsize = valsize;
91447636 2349 if (sopt->sopt_val != USER_ADDR_NULL) {
1c79356b
A
2350 if (sopt->sopt_p != 0)
2351 error = copyout(buf, sopt->sopt_val, valsize);
2352 else
91447636 2353 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
1c79356b
A
2354 }
2355 return error;
2356}
2357
2358int
2359sogetopt(so, sopt)
2360 struct socket *so;
2361 struct sockopt *sopt;
2362{
2363 int error, optval;
2364 struct linger l;
2365 struct timeval tv;
1c79356b 2366
9bccf70c
A
2367 if (sopt->sopt_dir != SOPT_GET) {
2368 sopt->sopt_dir = SOPT_GET;
2369 }
2370
91447636
A
2371 socket_lock(so, 1);
2372
2373 {
2374 struct socket_filter_entry *filter;
2375 int filtered = 0;
2376 error = 0;
2377 for (filter = so->so_filt; filter && (error == 0);
2378 filter = filter->sfe_next_onsocket) {
2379 if (filter->sfe_filter->sf_filter.sf_getoption) {
2380 if (filtered == 0) {
2381 filtered = 1;
2382 sflt_use(so);
2383 socket_unlock(so, 0);
2384 }
2385 error = filter->sfe_filter->sf_filter.sf_getoption(
2386 filter->sfe_cookie, so, sopt);
2387 }
2388 }
2389 if (filtered != 0) {
2390 socket_lock(so, 0);
2391 sflt_unuse(so);
2392
2393 if (error) {
2394 if (error == EJUSTRETURN)
2395 error = 0;
2396 socket_unlock(so, 1);
2397 return error;
2398 }
1c79356b 2399 }
1c79356b
A
2400 }
2401
2402 error = 0;
2403 if (sopt->sopt_level != SOL_SOCKET) {
2404 if (so->so_proto && so->so_proto->pr_ctloutput) {
91447636
A
2405 error = (*so->so_proto->pr_ctloutput)
2406 (so, sopt);
2407 socket_unlock(so, 1);
2408 return (error);
2409 } else {
2410 socket_unlock(so, 1);
1c79356b 2411 return (ENOPROTOOPT);
91447636 2412 }
1c79356b
A
2413 } else {
2414 switch (sopt->sopt_name) {
2415 case SO_LINGER:
91447636 2416 case SO_LINGER_SEC:
1c79356b 2417 l.l_onoff = so->so_options & SO_LINGER;
91447636
A
2418 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2419 so->so_linger / hz;
1c79356b
A
2420 error = sooptcopyout(sopt, &l, sizeof l);
2421 break;
2422
2423 case SO_USELOOPBACK:
2424 case SO_DONTROUTE:
2425 case SO_DEBUG:
2426 case SO_KEEPALIVE:
2427 case SO_REUSEADDR:
2428 case SO_REUSEPORT:
2429 case SO_BROADCAST:
2430 case SO_OOBINLINE:
2431 case SO_TIMESTAMP:
9bccf70c 2432#ifdef __APPLE__
1c79356b
A
2433 case SO_DONTTRUNC:
2434 case SO_WANTMORE:
9bccf70c
A
2435 case SO_WANTOOBFLAG:
2436#endif
1c79356b
A
2437 optval = so->so_options & sopt->sopt_name;
2438integer:
2439 error = sooptcopyout(sopt, &optval, sizeof optval);
2440 break;
2441
2442 case SO_TYPE:
2443 optval = so->so_type;
2444 goto integer;
2445
9bccf70c 2446#ifdef __APPLE__
1c79356b 2447 case SO_NREAD:
9bccf70c
A
2448 {
2449 int pkt_total;
1c79356b
A
2450 struct mbuf *m1;
2451
2452 pkt_total = 0;
2453 m1 = so->so_rcv.sb_mb;
2454 if (so->so_proto->pr_flags & PR_ATOMIC)
2455 {
9bccf70c
A
2456 while (m1) {
2457 if (m1->m_type == MT_DATA)
1c79356b 2458 pkt_total += m1->m_len;
1c79356b
A
2459 m1 = m1->m_next;
2460 }
2461 optval = pkt_total;
2462 } else
2463 optval = so->so_rcv.sb_cc;
1c79356b
A
2464 goto integer;
2465 }
91447636
A
2466 case SO_NWRITE:
2467 optval = so->so_snd.sb_cc;
2468 goto integer;
9bccf70c 2469#endif
1c79356b
A
2470 case SO_ERROR:
2471 optval = so->so_error;
2472 so->so_error = 0;
2473 goto integer;
2474
2475 case SO_SNDBUF:
2476 optval = so->so_snd.sb_hiwat;
2477 goto integer;
2478
2479 case SO_RCVBUF:
2480 optval = so->so_rcv.sb_hiwat;
2481 goto integer;
2482
2483 case SO_SNDLOWAT:
2484 optval = so->so_snd.sb_lowat;
2485 goto integer;
2486
2487 case SO_RCVLOWAT:
2488 optval = so->so_rcv.sb_lowat;
2489 goto integer;
2490
2491 case SO_SNDTIMEO:
2492 case SO_RCVTIMEO:
91447636 2493 tv = (sopt->sopt_name == SO_SNDTIMEO ?
1c79356b
A
2494 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2495
1c79356b
A
2496 error = sooptcopyout(sopt, &tv, sizeof tv);
2497 break;
2498
91447636
A
2499 case SO_NOSIGPIPE:
2500 optval = (so->so_flags & SOF_NOSIGPIPE);
2501 goto integer;
9bccf70c 2502
55e303ae 2503 case SO_NOADDRERR:
91447636
A
2504 optval = (so->so_flags & SOF_NOADDRAVAIL);
2505 goto integer;
55e303ae 2506
1c79356b
A
2507 default:
2508 error = ENOPROTOOPT;
2509 break;
2510 }
91447636 2511 socket_unlock(so, 1);
1c79356b
A
2512 return (error);
2513 }
2514}
2515
9bccf70c 2516/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
1c79356b 2517int
9bccf70c 2518soopt_getm(struct sockopt *sopt, struct mbuf **mp)
1c79356b
A
2519{
2520 struct mbuf *m, *m_prev;
2521 int sopt_size = sopt->sopt_valsize;
2522
a3d08fcd
A
2523 if (sopt_size > MAX_SOOPTGETM_SIZE)
2524 return EMSGSIZE;
2525
1c79356b
A
2526 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2527 if (m == 0)
2528 return ENOBUFS;
2529 if (sopt_size > MLEN) {
2530 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2531 if ((m->m_flags & M_EXT) == 0) {
2532 m_free(m);
2533 return ENOBUFS;
2534 }
2535 m->m_len = min(MCLBYTES, sopt_size);
2536 } else {
2537 m->m_len = min(MLEN, sopt_size);
2538 }
2539 sopt_size -= m->m_len;
2540 *mp = m;
2541 m_prev = m;
2542
2543 while (sopt_size) {
2544 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2545 if (m == 0) {
2546 m_freem(*mp);
2547 return ENOBUFS;
2548 }
2549 if (sopt_size > MLEN) {
2550 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2551 if ((m->m_flags & M_EXT) == 0) {
2552 m_freem(*mp);
2553 return ENOBUFS;
2554 }
2555 m->m_len = min(MCLBYTES, sopt_size);
2556 } else {
2557 m->m_len = min(MLEN, sopt_size);
2558 }
2559 sopt_size -= m->m_len;
2560 m_prev->m_next = m;
2561 m_prev = m;
2562 }
2563 return 0;
2564}
2565
2566/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2567int
9bccf70c 2568soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
1c79356b
A
2569{
2570 struct mbuf *m0 = m;
2571
91447636 2572 if (sopt->sopt_val == USER_ADDR_NULL)
1c79356b
A
2573 return 0;
2574 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2575 if (sopt->sopt_p != NULL) {
2576 int error;
2577
91447636 2578 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
1c79356b
A
2579 if (error != 0) {
2580 m_freem(m0);
2581 return(error);
2582 }
2583 } else
91447636 2584 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
1c79356b 2585 sopt->sopt_valsize -= m->m_len;
91447636 2586 sopt->sopt_val += m->m_len;
1c79356b
A
2587 m = m->m_next;
2588 }
2589 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
9bccf70c 2590 panic("soopt_mcopyin");
1c79356b
A
2591 return 0;
2592}
2593
2594/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2595int
9bccf70c 2596soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
1c79356b
A
2597{
2598 struct mbuf *m0 = m;
2599 size_t valsize = 0;
2600
91447636 2601 if (sopt->sopt_val == USER_ADDR_NULL)
1c79356b
A
2602 return 0;
2603 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2604 if (sopt->sopt_p != NULL) {
2605 int error;
2606
91447636 2607 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
1c79356b
A
2608 if (error != 0) {
2609 m_freem(m0);
2610 return(error);
2611 }
2612 } else
91447636 2613 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
1c79356b 2614 sopt->sopt_valsize -= m->m_len;
91447636 2615 sopt->sopt_val += m->m_len;
1c79356b
A
2616 valsize += m->m_len;
2617 m = m->m_next;
2618 }
2619 if (m != NULL) {
2620 /* enough soopt buffer should be given from user-land */
2621 m_freem(m0);
2622 return(EINVAL);
2623 }
2624 sopt->sopt_valsize = valsize;
2625 return 0;
2626}
2627
9bccf70c
A
2628void
2629sohasoutofband(so)
2630 register struct socket *so;
2631{
2632 struct proc *p;
9bccf70c 2633
9bccf70c
A
2634 if (so->so_pgid < 0)
2635 gsignal(-so->so_pgid, SIGURG);
2636 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2637 psignal(p, SIGURG);
2638 selwakeup(&so->so_rcv.sb_sel);
2639}
2640
2641int
91447636 2642sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
9bccf70c
A
2643{
2644 struct proc *p = current_proc();
2645 int revents = 0;
91447636
A
2646
2647 socket_lock(so, 1);
9bccf70c
A
2648
2649 if (events & (POLLIN | POLLRDNORM))
2650 if (soreadable(so))
2651 revents |= events & (POLLIN | POLLRDNORM);
2652
2653 if (events & (POLLOUT | POLLWRNORM))
2654 if (sowriteable(so))
2655 revents |= events & (POLLOUT | POLLWRNORM);
2656
2657 if (events & (POLLPRI | POLLRDBAND))
2658 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2659 revents |= events & (POLLPRI | POLLRDBAND);
2660
2661 if (revents == 0) {
2662 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2663 /* Darwin sets the flag first, BSD calls selrecord first */
2664 so->so_rcv.sb_flags |= SB_SEL;
2665 selrecord(p, &so->so_rcv.sb_sel, wql);
2666 }
2667
2668 if (events & (POLLOUT | POLLWRNORM)) {
2669 /* Darwin sets the flag first, BSD calls selrecord first */
2670 so->so_snd.sb_flags |= SB_SEL;
2671 selrecord(p, &so->so_snd.sb_sel, wql);
2672 }
2673 }
2674
91447636 2675 socket_unlock(so, 1);
9bccf70c
A
2676 return (revents);
2677}
55e303ae 2678
91447636 2679int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
55e303ae
A
2680
2681int
91447636 2682soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
55e303ae 2683{
91447636 2684 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
55e303ae 2685 struct sockbuf *sb;
91447636 2686 socket_lock(so, 1);
55e303ae
A
2687
2688 switch (kn->kn_filter) {
2689 case EVFILT_READ:
2690 if (so->so_options & SO_ACCEPTCONN)
2691 kn->kn_fop = &solisten_filtops;
2692 else
2693 kn->kn_fop = &soread_filtops;
2694 sb = &so->so_rcv;
2695 break;
2696 case EVFILT_WRITE:
2697 kn->kn_fop = &sowrite_filtops;
2698 sb = &so->so_snd;
2699 break;
2700 default:
91447636 2701 socket_unlock(so, 1);
55e303ae
A
2702 return (1);
2703 }
2704
55e303ae
A
2705 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2706 sb->sb_flags |= SB_KNOTE;
91447636 2707 socket_unlock(so, 1);
55e303ae
A
2708 return (0);
2709}
2710
2711static void
2712filt_sordetach(struct knote *kn)
2713{
91447636 2714 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
55e303ae 2715
91447636
A
2716 socket_lock(so, 1);
2717 if (so->so_rcv.sb_flags & SB_KNOTE)
55e303ae
A
2718 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2719 so->so_rcv.sb_flags &= ~SB_KNOTE;
91447636 2720 socket_unlock(so, 1);
55e303ae
A
2721}
2722
2723/*ARGSUSED*/
2724static int
2725filt_soread(struct knote *kn, long hint)
2726{
91447636 2727 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
55e303ae 2728
91447636
A
2729 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2730 socket_lock(so, 1);
2731
2732 if (so->so_oobmark) {
2733 if (kn->kn_flags & EV_OOBAND) {
2734 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2735 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2736 socket_unlock(so, 1);
2737 return (1);
2738 }
2739 kn->kn_data = so->so_oobmark;
2740 kn->kn_flags |= EV_OOBAND;
2741 } else {
2742 kn->kn_data = so->so_rcv.sb_cc;
2743 if (so->so_state & SS_CANTRCVMORE) {
2744 kn->kn_flags |= EV_EOF;
2745 kn->kn_fflags = so->so_error;
2746 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2747 socket_unlock(so, 1);
2748 return (1);
2749 }
55e303ae 2750 }
91447636
A
2751
2752 if (so->so_state & SS_RCVATMARK) {
2753 if (kn->kn_flags & EV_OOBAND) {
2754 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2755 socket_unlock(so, 1);
2756 return (1);
2757 }
2758 kn->kn_flags |= EV_OOBAND;
2759 } else if (kn->kn_flags & EV_OOBAND) {
2760 kn->kn_data = 0;
2761 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2762 socket_unlock(so, 1);
2763 return (0);
2764 }
2765
2766 if (so->so_error) { /* temporary udp error */
2767 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2768 socket_unlock(so, 1);
55e303ae 2769 return (1);
91447636
A
2770 }
2771
2772 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2773 socket_unlock(so, 1);
2774
2775 return( kn->kn_flags & EV_OOBAND ||
2776 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2777 kn->kn_sdata : so->so_rcv.sb_lowat));
55e303ae
A
2778}
2779
2780static void
2781filt_sowdetach(struct knote *kn)
2782{
91447636
A
2783 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2784 socket_lock(so, 1);
55e303ae 2785
91447636 2786 if(so->so_snd.sb_flags & SB_KNOTE)
55e303ae
A
2787 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2788 so->so_snd.sb_flags &= ~SB_KNOTE;
91447636 2789 socket_unlock(so, 1);
55e303ae
A
2790}
2791
2792/*ARGSUSED*/
2793static int
2794filt_sowrite(struct knote *kn, long hint)
2795{
91447636
A
2796 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2797
2798 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2799 socket_lock(so, 1);
55e303ae
A
2800
2801 kn->kn_data = sbspace(&so->so_snd);
2802 if (so->so_state & SS_CANTSENDMORE) {
2803 kn->kn_flags |= EV_EOF;
2804 kn->kn_fflags = so->so_error;
91447636
A
2805 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2806 socket_unlock(so, 1);
55e303ae
A
2807 return (1);
2808 }
91447636
A
2809 if (so->so_error) { /* temporary udp error */
2810 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2811 socket_unlock(so, 1);
55e303ae 2812 return (1);
91447636 2813 }
55e303ae 2814 if (((so->so_state & SS_ISCONNECTED) == 0) &&
91447636
A
2815 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2816 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2817 socket_unlock(so, 1);
55e303ae 2818 return (0);
91447636
A
2819 }
2820 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2821 socket_unlock(so, 1);
55e303ae
A
2822 if (kn->kn_sfflags & NOTE_LOWAT)
2823 return (kn->kn_data >= kn->kn_sdata);
2824 return (kn->kn_data >= so->so_snd.sb_lowat);
2825}
2826
2827/*ARGSUSED*/
2828static int
2829filt_solisten(struct knote *kn, long hint)
2830{
91447636
A
2831 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2832 int isempty;
55e303ae 2833
91447636
A
2834 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2835 socket_lock(so, 1);
55e303ae 2836 kn->kn_data = so->so_qlen;
91447636
A
2837 isempty = ! TAILQ_EMPTY(&so->so_comp);
2838 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2839 socket_unlock(so, 1);
2840 return (isempty);
55e303ae
A
2841}
2842
91447636
A
2843
2844int
2845socket_lock(so, refcount)
2846 struct socket *so;
2847 int refcount;
2848{
2849 int error = 0, lr, lr_saved;
2850#ifdef __ppc__
2851 __asm__ volatile("mflr %0" : "=r" (lr));
2852 lr_saved = lr;
2853#endif
2854
2855 if (so->so_proto->pr_lock) {
2856 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2857 }
2858 else {
2859#ifdef MORE_LOCKING_DEBUG
2860 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2861#endif
2862 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2863 if (refcount)
2864 so->so_usecount++;
2865 so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */
2866 }
2867
2868 return(error);
2869
2870}
2871
2872int
2873socket_unlock(so, refcount)
2874 struct socket *so;
2875 int refcount;
2876{
2877 int error = 0, lr, lr_saved;
2878 lck_mtx_t * mutex_held;
2879
2880#ifdef __ppc__
2881__asm__ volatile("mflr %0" : "=r" (lr));
2882 lr_saved = lr;
2883#endif
2884
2885
2886
2887 if (so->so_proto == NULL)
2888 panic("socket_unlock null so_proto so=%x\n", so);
2889
2890 if (so && so->so_proto->pr_unlock)
2891 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2892 else {
2893 mutex_held = so->so_proto->pr_domain->dom_mtx;
2894#ifdef MORE_LOCKING_DEBUG
2895 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2896#endif
2897 if (refcount) {
2898 if (so->so_usecount <= 0)
2899 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2900 so->so_usecount--;
2901 if (so->so_usecount == 0) {
2902 sofreelastref(so, 1);
2903 }
2904 else
2905 so->reserved4 = (void*)lr_saved; /* save caller */
2906 }
2907 lck_mtx_unlock(mutex_held);
2908 }
2909
2910 return(error);
2911}
2912//### Called with socket locked, will unlock socket
2913void
2914sofree(so)
2915 struct socket *so;
2916{
2917
2918 int lr, lr_saved;
2919 lck_mtx_t * mutex_held;
2920#ifdef __ppc__
2921 __asm__ volatile("mflr %0" : "=r" (lr));
2922 lr_saved = lr;
2923#endif
2924 if (so->so_proto->pr_getlock != NULL)
2925 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2926 else
2927 mutex_held = so->so_proto->pr_domain->dom_mtx;
2928 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2929
91447636
A
2930 sofreelastref(so, 0);
2931}
2932
2933void
2934soreference(so)
2935 struct socket *so;
2936{
2937 socket_lock(so, 1); /* locks & take one reference on socket */
2938 socket_unlock(so, 0); /* unlock only */
2939}
2940
2941void
2942sodereference(so)
2943 struct socket *so;
2944{
2945 socket_lock(so, 0);
2946 socket_unlock(so, 1);
2947}