]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket.c
xnu-792.21.3.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
CommitLineData
1c79356b 1/*
5d5c5d0d
A
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
8f6c56a5 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
8f6c56a5
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
8ad349bb 24 * limitations under the License.
8f6c56a5
A
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
29/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30/*
31 * Copyright (c) 1982, 1986, 1988, 1990, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the University of
45 * California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
9bccf70c
A
62 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
63 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
1c79356b
A
64 */
65
66#include <sys/param.h>
67#include <sys/systm.h>
55e303ae 68#include <sys/filedesc.h>
91447636
A
69#include <sys/proc_internal.h>
70#include <sys/kauth.h>
71#include <sys/file_internal.h>
1c79356b
A
72#include <sys/fcntl.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/domain.h>
76#include <sys/kernel.h>
55e303ae 77#include <sys/event.h>
1c79356b
A
78#include <sys/poll.h>
79#include <sys/protosw.h>
80#include <sys/socket.h>
81#include <sys/socketvar.h>
82#include <sys/resourcevar.h>
83#include <sys/signalvar.h>
84#include <sys/sysctl.h>
85#include <sys/uio.h>
86#include <sys/ev.h>
87#include <sys/kdebug.h>
88#include <net/route.h>
89#include <netinet/in.h>
90#include <netinet/in_pcb.h>
91#include <kern/zalloc.h>
91447636 92#include <kern/locks.h>
1c79356b
A
93#include <machine/limits.h>
94
95int so_cache_hw = 0;
96int so_cache_timeouts = 0;
97int so_cache_max_freed = 0;
98int cached_sock_count = 0;
99struct socket *socket_cache_head = 0;
100struct socket *socket_cache_tail = 0;
101u_long so_cache_time = 0;
102int so_cache_init_done = 0;
103struct zone *so_cache_zone;
104extern int get_inpcb_str_size();
105extern int get_tcp_str_size();
106
91447636
A
107static lck_grp_t *so_cache_mtx_grp;
108static lck_attr_t *so_cache_mtx_attr;
109static lck_grp_attr_t *so_cache_mtx_grp_attr;
110lck_mtx_t *so_cache_mtx;
111
1c79356b
A
112#include <machine/limits.h>
113
55e303ae
A
114static void filt_sordetach(struct knote *kn);
115static int filt_soread(struct knote *kn, long hint);
116static void filt_sowdetach(struct knote *kn);
117static int filt_sowrite(struct knote *kn, long hint);
118static int filt_solisten(struct knote *kn, long hint);
119
120static struct filterops solisten_filtops =
121 { 1, NULL, filt_sordetach, filt_solisten };
122static struct filterops soread_filtops =
123 { 1, NULL, filt_sordetach, filt_soread };
124static struct filterops sowrite_filtops =
125 { 1, NULL, filt_sowdetach, filt_sowrite };
126
91447636 127#define EVEN_MORE_LOCKING_DEBUG 0
1c79356b
A
128int socket_debug = 0;
129int socket_zone = M_SOCKET;
130so_gen_t so_gencnt; /* generation count for sockets */
131
132MALLOC_DEFINE(M_SONAME, "soname", "socket name");
133MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
134
135#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
136#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
137#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
138#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
139#define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
140#define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
141#define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
142
a3d08fcd 143#define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
1c79356b 144
91447636 145
1c79356b
A
146SYSCTL_DECL(_kern_ipc);
147
148static int somaxconn = SOMAXCONN;
149SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
150 0, "");
151
152/* Should we get a maximum also ??? */
fa4905b1 153static int sosendmaxchain = 65536;
1c79356b 154static int sosendminchain = 16384;
55e303ae 155static int sorecvmincopy = 16384;
1c79356b
A
156SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
157 0, "");
55e303ae
A
158SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
159 0, "");
1c79356b
A
160
161void so_cache_timer();
162
163/*
164 * Socket operation routines.
165 * These routines are called by the routines in
166 * sys_socket.c or from a system process, and
167 * implement the semantics of socket operations by
168 * switching out to the protocol specific routines.
169 */
170
9bccf70c 171#ifdef __APPLE__
91447636
A
172
173vm_size_t so_cache_zone_element_size;
174
175static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
176
177
1c79356b
A
178void socketinit()
179{
180 vm_size_t str_size;
181
91447636
A
182 if (so_cache_init_done) {
183 printf("socketinit: already called...\n");
184 return;
185 }
186
187 /*
188 * allocate lock group attribute and group for socket cache mutex
189 */
190 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
21362eb3 191 lck_grp_attr_setdefault(so_cache_mtx_grp_attr);
91447636
A
192
193 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
194
195 /*
196 * allocate the lock attribute for socket cache mutex
197 */
198 so_cache_mtx_attr = lck_attr_alloc_init();
21362eb3 199 lck_attr_setdefault(so_cache_mtx_attr);
91447636 200
1c79356b
A
201 so_cache_init_done = 1;
202
91447636
A
203 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
204
205 if (so_cache_mtx == NULL)
206 return; /* we're hosed... */
207
1c79356b
A
208 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
209 get_inpcb_str_size() + 4 +
210 get_tcp_str_size());
211 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
212#if TEMPDEBUG
91447636 213 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
1c79356b 214#endif
91447636
A
215 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
216
217 so_cache_zone_element_size = str_size;
218
219 sflt_init();
1c79356b
A
220
221}
222
223void cached_sock_alloc(so, waitok)
224struct socket **so;
225int waitok;
226
227{
228 caddr_t temp;
1c79356b
A
229 register u_long offset;
230
231
91447636
A
232 lck_mtx_lock(so_cache_mtx);
233
1c79356b
A
234 if (cached_sock_count) {
235 cached_sock_count--;
236 *so = socket_cache_head;
237 if (*so == 0)
238 panic("cached_sock_alloc: cached sock is null");
239
240 socket_cache_head = socket_cache_head->cache_next;
241 if (socket_cache_head)
242 socket_cache_head->cache_prev = 0;
243 else
244 socket_cache_tail = 0;
91447636
A
245
246 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
247
248 temp = (*so)->so_saved_pcb;
249 bzero((caddr_t)*so, sizeof(struct socket));
250#if TEMPDEBUG
251 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
252 cached_sock_count);
253#endif
254 (*so)->so_saved_pcb = temp;
91447636
A
255 (*so)->cached_in_sock_layer = 1;
256
1c79356b
A
257 }
258 else {
259#if TEMPDEBUG
260 kprintf("Allocating cached sock %x from memory\n", *so);
261#endif
262
91447636
A
263 lck_mtx_unlock(so_cache_mtx);
264
1c79356b
A
265 if (waitok)
266 *so = (struct socket *) zalloc(so_cache_zone);
267 else
268 *so = (struct socket *) zalloc_noblock(so_cache_zone);
269
270 if (*so == 0)
271 return;
272
273 bzero((caddr_t)*so, sizeof(struct socket));
274
275 /*
276 * Define offsets for extra structures into our single block of
277 * memory. Align extra structures on longword boundaries.
278 */
279
280
281 offset = (u_long) *so;
282 offset += sizeof(struct socket);
283 if (offset & 0x3) {
284 offset += 4;
285 offset &= 0xfffffffc;
286 }
287 (*so)->so_saved_pcb = (caddr_t) offset;
288 offset += get_inpcb_str_size();
289 if (offset & 0x3) {
290 offset += 4;
291 offset &= 0xfffffffc;
292 }
293
294 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
295#if TEMPDEBUG
296 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
297 (*so)->so_saved_pcb,
298 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
299#endif
300 }
301
302 (*so)->cached_in_sock_layer = 1;
303}
304
305
306void cached_sock_free(so)
307struct socket *so;
308{
1c79356b 309
91447636 310 lck_mtx_lock(so_cache_mtx);
1c79356b 311
1c79356b
A
312 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
313 --cached_sock_count;
91447636 314 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
315#if TEMPDEBUG
316 kprintf("Freeing overflowed cached socket %x\n", so);
317#endif
91447636 318 zfree(so_cache_zone, so);
1c79356b
A
319 }
320 else {
321#if TEMPDEBUG
322 kprintf("Freeing socket %x into cache\n", so);
323#endif
324 if (so_cache_hw < cached_sock_count)
325 so_cache_hw = cached_sock_count;
326
327 so->cache_next = socket_cache_head;
328 so->cache_prev = 0;
329 if (socket_cache_head)
330 socket_cache_head->cache_prev = so;
331 else
332 socket_cache_tail = so;
333
334 so->cache_timestamp = so_cache_time;
335 socket_cache_head = so;
91447636 336 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
337 }
338
339#if TEMPDEBUG
340 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
341#endif
342
343
344}
345
346
347void so_cache_timer()
348{
349 register struct socket *p;
1c79356b 350 register int n_freed = 0;
1c79356b 351
1c79356b 352
91447636 353 lck_mtx_lock(so_cache_mtx);
1c79356b 354
91447636 355 ++so_cache_time;
1c79356b 356
91447636 357 while ( (p = socket_cache_tail) )
1c79356b
A
358 {
359 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
360 break;
361
362 so_cache_timeouts++;
363
91447636 364 if ( (socket_cache_tail = p->cache_prev) )
1c79356b
A
365 p->cache_prev->cache_next = 0;
366 if (--cached_sock_count == 0)
367 socket_cache_head = 0;
368
1c79356b 369
91447636 370 zfree(so_cache_zone, p);
1c79356b 371
1c79356b
A
372 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
373 {
374 so_cache_max_freed++;
375 break;
376 }
377 }
91447636 378 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
379
380 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
381
1c79356b
A
382
383}
9bccf70c 384#endif /* __APPLE__ */
1c79356b
A
385
386/*
387 * Get a socket structure from our zone, and initialize it.
388 * We don't implement `waitok' yet (see comments in uipc_domain.c).
389 * Note that it would probably be better to allocate socket
390 * and PCB at the same time, but I'm not convinced that all
391 * the protocols can be easily modified to do this.
392 */
393struct socket *
394soalloc(waitok, dom, type)
395 int waitok;
396 int dom;
397 int type;
398{
399 struct socket *so;
400
401 if ((dom == PF_INET) && (type == SOCK_STREAM))
402 cached_sock_alloc(&so, waitok);
403 else
404 {
91447636 405 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
1c79356b
A
406 if (so)
407 bzero(so, sizeof *so);
408 }
409 /* XXX race condition for reentrant kernel */
91447636 410//###LD Atomic add for so_gencnt
1c79356b
A
411 if (so) {
412 so->so_gencnt = ++so_gencnt;
413 so->so_zone = socket_zone;
414 }
415
416 return so;
417}
418
419int
420socreate(dom, aso, type, proto)
421 int dom;
422 struct socket **aso;
423 register int type;
424 int proto;
1c79356b
A
425{
426 struct proc *p = current_proc();
427 register struct protosw *prp;
9bccf70c 428 register struct socket *so;
1c79356b 429 register int error = 0;
55e303ae
A
430#if TCPDEBUG
431 extern int tcpconsdebug;
432#endif
1c79356b
A
433 if (proto)
434 prp = pffindproto(dom, proto, type);
435 else
436 prp = pffindtype(dom, type);
9bccf70c 437
1c79356b
A
438 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
439 return (EPROTONOSUPPORT);
9bccf70c
A
440#ifndef __APPLE__
441
442 if (p->p_prison && jail_socket_unixiproute_only &&
443 prp->pr_domain->dom_family != PF_LOCAL &&
444 prp->pr_domain->dom_family != PF_INET &&
445 prp->pr_domain->dom_family != PF_ROUTE) {
446 return (EPROTONOSUPPORT);
447 }
448
449#endif
1c79356b
A
450 if (prp->pr_type != type)
451 return (EPROTOTYPE);
452 so = soalloc(p != 0, dom, type);
453 if (so == 0)
454 return (ENOBUFS);
455
456 TAILQ_INIT(&so->so_incomp);
457 TAILQ_INIT(&so->so_comp);
458 so->so_type = type;
459
9bccf70c 460#ifdef __APPLE__
1c79356b 461 if (p != 0) {
91447636
A
462 so->so_uid = kauth_cred_getuid(kauth_cred_get());
463 if (!suser(kauth_cred_get(),NULL))
1c79356b 464 so->so_state = SS_PRIV;
1c79356b 465 }
9bccf70c 466#else
91447636 467 so->so_cred = kauth_cred_get_with_ref();
9bccf70c 468#endif
1c79356b 469 so->so_proto = prp;
9bccf70c 470#ifdef __APPLE__
1c79356b 471 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
91447636 472 so->so_rcv.sb_so = so->so_snd.sb_so = so;
9bccf70c 473#endif
91447636
A
474
475//### Attachement will create the per pcb lock if necessary and increase refcount
37839358 476 so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */
91447636
A
477
478 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
1c79356b 479 if (error) {
55e303ae
A
480 /*
481 * Warning:
482 * If so_pcb is not zero, the socket will be leaked,
483 * so protocol attachment handler must be coded carefuly
484 */
1c79356b 485 so->so_state |= SS_NOFDREF;
37839358
A
486 so->so_usecount--;
487 sofreelastref(so, 1); /* will deallocate the socket */
1c79356b
A
488 return (error);
489 }
9bccf70c 490#ifdef __APPLE__
1c79356b 491 prp->pr_domain->dom_refs++;
1c79356b 492 TAILQ_INIT(&so->so_evlist);
91447636
A
493
494 /* Attach socket filters for this protocol */
495 sflt_initsock(so);
55e303ae
A
496#if TCPDEBUG
497 if (tcpconsdebug == 2)
498 so->so_options |= SO_DEBUG;
499#endif
9bccf70c 500#endif
55e303ae 501
1c79356b
A
502 *aso = so;
503 return (0);
504}
505
506int
507sobind(so, nam)
508 struct socket *so;
509 struct sockaddr *nam;
510
511{
512 struct proc *p = current_proc();
91447636
A
513 int error = 0;
514 struct socket_filter_entry *filter;
515 int filtered = 0;
1c79356b 516
91447636
A
517 socket_lock(so, 1);
518
519 /* Socket filter */
520 error = 0;
521 for (filter = so->so_filt; filter && (error == 0);
522 filter = filter->sfe_next_onsocket) {
523 if (filter->sfe_filter->sf_filter.sf_bind) {
524 if (filtered == 0) {
525 filtered = 1;
526 sflt_use(so);
527 socket_unlock(so, 0);
1c79356b 528 }
91447636
A
529 error = filter->sfe_filter->sf_filter.sf_bind(
530 filter->sfe_cookie, so, nam);
1c79356b
A
531 }
532 }
91447636
A
533 if (filtered != 0) {
534 socket_lock(so, 0);
535 sflt_unuse(so);
536 }
537 /* End socket filter */
538
539 if (error == 0)
540 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
541
542 socket_unlock(so, 1);
543
544 if (error == EJUSTRETURN)
545 error = 0;
546
1c79356b
A
547 return (error);
548}
549
550void
551sodealloc(so)
552 struct socket *so;
553{
554 so->so_gencnt = ++so_gencnt;
555
9bccf70c
A
556#ifndef __APPLE__
557 if (so->so_rcv.sb_hiwat)
558 (void)chgsbsize(so->so_cred->cr_uidinfo,
559 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
560 if (so->so_snd.sb_hiwat)
561 (void)chgsbsize(so->so_cred->cr_uidinfo,
562 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
563#ifdef INET
564 if (so->so_accf != NULL) {
565 if (so->so_accf->so_accept_filter != NULL &&
566 so->so_accf->so_accept_filter->accf_destroy != NULL) {
567 so->so_accf->so_accept_filter->accf_destroy(so);
568 }
569 if (so->so_accf->so_accept_filter_str != NULL)
570 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
571 FREE(so->so_accf, M_ACCF);
572 }
573#endif /* INET */
21362eb3 574 kauth_cred_rele(so->so_cred);
9bccf70c
A
575 zfreei(so->so_zone, so);
576#else
1c79356b
A
577 if (so->cached_in_sock_layer == 1)
578 cached_sock_free(so);
91447636
A
579 else {
580 if (so->cached_in_sock_layer == -1)
581 panic("sodealloc: double dealloc: so=%x\n", so);
582 so->cached_in_sock_layer = -1;
583 FREE_ZONE(so, sizeof(*so), so->so_zone);
584 }
9bccf70c 585#endif /* __APPLE__ */
1c79356b
A
586}
587
588int
589solisten(so, backlog)
590 register struct socket *so;
591 int backlog;
592
593{
1c79356b 594 struct proc *p = current_proc();
91447636 595 int error;
1c79356b 596
91447636
A
597 socket_lock(so, 1);
598
599 {
600 struct socket_filter_entry *filter;
601 int filtered = 0;
602 error = 0;
603 for (filter = so->so_filt; filter && (error == 0);
604 filter = filter->sfe_next_onsocket) {
605 if (filter->sfe_filter->sf_filter.sf_listen) {
606 if (filtered == 0) {
607 filtered = 1;
608 sflt_use(so);
609 socket_unlock(so, 0);
610 }
611 error = filter->sfe_filter->sf_filter.sf_listen(
612 filter->sfe_cookie, so);
613 }
614 }
615 if (filtered != 0) {
616 socket_lock(so, 0);
617 sflt_unuse(so);
618 }
619 }
620
621 if (error == 0) {
622 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
623 }
624
1c79356b 625 if (error) {
91447636
A
626 socket_unlock(so, 1);
627 if (error == EJUSTRETURN)
628 error = 0;
1c79356b
A
629 return (error);
630 }
91447636
A
631
632 if (TAILQ_EMPTY(&so->so_comp))
1c79356b
A
633 so->so_options |= SO_ACCEPTCONN;
634 if (backlog < 0 || backlog > somaxconn)
635 backlog = somaxconn;
636 so->so_qlimit = backlog;
1c79356b 637
91447636 638 socket_unlock(so, 1);
1c79356b
A
639 return (0);
640}
641
1c79356b 642void
91447636 643sofreelastref(so, dealloc)
1c79356b 644 register struct socket *so;
91447636 645 int dealloc;
9bccf70c
A
646{
647 int error;
1c79356b
A
648 struct socket *head = so->so_head;
649
91447636 650 /*### Assume socket is locked */
1c79356b 651
3a60a9f5
A
652 /* Remove any filters - may be called more than once */
653 sflt_termsock(so);
654
91447636 655 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
9bccf70c 656#ifdef __APPLE__
0b4e3aa0
A
657 selthreadclear(&so->so_snd.sb_sel);
658 selthreadclear(&so->so_rcv.sb_sel);
cc9f6e38
A
659 so->so_rcv.sb_flags &= ~SB_UPCALL;
660 so->so_snd.sb_flags &= ~SB_UPCALL;
9bccf70c 661#endif
1c79356b 662 return;
0b4e3aa0 663 }
9bccf70c 664 if (head != NULL) {
91447636 665 socket_lock(head, 1);
9bccf70c
A
666 if (so->so_state & SS_INCOMP) {
667 TAILQ_REMOVE(&head->so_incomp, so, so_list);
668 head->so_incqlen--;
669 } else if (so->so_state & SS_COMP) {
670 /*
671 * We must not decommission a socket that's
672 * on the accept(2) queue. If we do, then
673 * accept(2) may hang after select(2) indicated
674 * that the listening socket was ready.
675 */
676#ifdef __APPLE__
677 selthreadclear(&so->so_snd.sb_sel);
678 selthreadclear(&so->so_rcv.sb_sel);
cc9f6e38
A
679 so->so_rcv.sb_flags &= ~SB_UPCALL;
680 so->so_snd.sb_flags &= ~SB_UPCALL;
9bccf70c 681#endif
91447636 682 socket_unlock(head, 1);
9bccf70c
A
683 return;
684 } else {
685 panic("sofree: not queued");
686 }
1c79356b 687 head->so_qlen--;
9bccf70c 688 so->so_state &= ~SS_INCOMP;
1c79356b 689 so->so_head = NULL;
91447636 690 socket_unlock(head, 1);
1c79356b 691 }
9bccf70c 692#ifdef __APPLE__
0b4e3aa0 693 selthreadclear(&so->so_snd.sb_sel);
1c79356b 694 sbrelease(&so->so_snd);
9bccf70c 695#endif
1c79356b 696 sorflush(so);
91447636
A
697
698 /* 3932268: disable upcall */
699 so->so_rcv.sb_flags &= ~SB_UPCALL;
700 so->so_snd.sb_flags &= ~SB_UPCALL;
701
702 if (dealloc)
703 sodealloc(so);
1c79356b
A
704}
705
706/*
707 * Close a socket on last file table reference removal.
708 * Initiate disconnect if connected.
709 * Free socket when disconnect complete.
710 */
711int
91447636 712soclose_locked(so)
1c79356b
A
713 register struct socket *so;
714{
1c79356b 715 int error = 0;
91447636
A
716 lck_mtx_t * mutex_held;
717 struct timespec ts;
1c79356b 718
91447636
A
719 if (so->so_usecount == 0) {
720 panic("soclose: so=%x refcount=0\n", so);
1c79356b
A
721 }
722
91447636
A
723 sflt_notify(so, sock_evt_closing, NULL);
724
725 if ((so->so_options & SO_ACCEPTCONN)) {
726 struct socket *sp;
727
728 /* We do not want new connection to be added to the connection queues */
729 so->so_options &= ~SO_ACCEPTCONN;
730
731 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
732 /* A bit tricky here. We need to keep
733 * a lock if it's a protocol global lock
734 * but we want the head, not the socket locked
735 * in the case of per-socket lock...
736 */
ff6e181a 737 if (so->so_proto->pr_getlock != NULL) {
91447636 738 socket_unlock(so, 0);
ff6e181a
A
739 socket_lock(sp, 1);
740 }
91447636 741 (void) soabort(sp);
ff6e181a 742 if (so->so_proto->pr_getlock != NULL) {
91447636 743 socket_unlock(sp, 1);
ff6e181a
A
744 socket_lock(so, 0);
745 }
91447636
A
746 }
747
748 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
91447636
A
749 /* Dequeue from so_comp since sofree() won't do it */
750 TAILQ_REMOVE(&so->so_comp, sp, so_list);
751 so->so_qlen--;
ff6e181a
A
752
753 if (so->so_proto->pr_getlock != NULL) {
754 socket_unlock(so, 0);
755 socket_lock(sp, 1);
756 }
757
91447636
A
758 sp->so_state &= ~SS_COMP;
759 sp->so_head = NULL;
760
91447636 761 (void) soabort(sp);
ff6e181a 762 if (so->so_proto->pr_getlock != NULL) {
91447636 763 socket_unlock(sp, 1);
ff6e181a
A
764 socket_lock(so, 0);
765 }
91447636
A
766 }
767 }
768 if (so->so_pcb == 0) {
769 /* 3915887: mark the socket as ready for dealloc */
770 so->so_flags |= SOF_PCBCLEARING;
1c79356b 771 goto discard;
91447636 772 }
1c79356b
A
773 if (so->so_state & SS_ISCONNECTED) {
774 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
91447636 775 error = sodisconnectlocked(so);
1c79356b
A
776 if (error)
777 goto drop;
778 }
779 if (so->so_options & SO_LINGER) {
780 if ((so->so_state & SS_ISDISCONNECTING) &&
781 (so->so_state & SS_NBIO))
782 goto drop;
91447636
A
783 if (so->so_proto->pr_getlock != NULL)
784 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
785 else
786 mutex_held = so->so_proto->pr_domain->dom_mtx;
1c79356b 787 while (so->so_state & SS_ISCONNECTED) {
91447636
A
788 ts.tv_sec = (so->so_linger/100);
789 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
790 error = msleep((caddr_t)&so->so_timeo, mutex_held,
791 PSOCK | PCATCH, "soclos", &ts);
792 if (error) {
793 /* It's OK when the time fires, don't report an error */
794 if (error == EWOULDBLOCK)
795 error = 0;
1c79356b 796 break;
91447636 797 }
1c79356b
A
798 }
799 }
800 }
801drop:
91447636
A
802 if (so->so_usecount == 0)
803 panic("soclose: usecount is zero so=%x\n", so);
804 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
1c79356b
A
805 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
806 if (error == 0)
807 error = error2;
808 }
91447636
A
809 if (so->so_usecount <= 0)
810 panic("soclose: usecount is zero so=%x\n", so);
1c79356b 811discard:
e3027f41 812 if (so->so_pcb && so->so_state & SS_NOFDREF)
1c79356b
A
813 panic("soclose: NOFDREF");
814 so->so_state |= SS_NOFDREF;
9bccf70c 815#ifdef __APPLE__
1c79356b
A
816 so->so_proto->pr_domain->dom_refs--;
817 evsofree(so);
9bccf70c 818#endif
91447636 819 so->so_usecount--;
1c79356b 820 sofree(so);
1c79356b
A
821 return (error);
822}
823
91447636
A
824int
825soclose(so)
826 register struct socket *so;
827{
828 int error = 0;
829 socket_lock(so, 1);
830 if (so->so_retaincnt == 0)
831 error = soclose_locked(so);
832 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
833 so->so_usecount--;
834 if (so->so_usecount < 2)
835 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
836 }
837 socket_unlock(so, 1);
838 return (error);
839}
840
841
1c79356b
A
842/*
843 * Must be called at splnet...
844 */
91447636 845//#### Should already be locked
1c79356b
A
846int
847soabort(so)
848 struct socket *so;
849{
9bccf70c 850 int error;
1c79356b 851
91447636
A
852#ifdef MORE_LOCKING_DEBUG
853 lck_mtx_t * mutex_held;
854
855 if (so->so_proto->pr_getlock != NULL)
856 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
857 else
858 mutex_held = so->so_proto->pr_domain->dom_mtx;
859 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
860#endif
861
9bccf70c
A
862 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
863 if (error) {
864 sofree(so);
865 return error;
866 }
867 return (0);
1c79356b
A
868}
869
870int
91447636 871soacceptlock(so, nam, dolock)
1c79356b
A
872 register struct socket *so;
873 struct sockaddr **nam;
91447636 874 int dolock;
9bccf70c 875{
1c79356b 876 int error;
91447636
A
877
878 if (dolock) socket_lock(so, 1);
1c79356b
A
879
880 if ((so->so_state & SS_NOFDREF) == 0)
881 panic("soaccept: !NOFDREF");
882 so->so_state &= ~SS_NOFDREF;
883 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
1c79356b 884
91447636 885 if (dolock) socket_unlock(so, 1);
1c79356b
A
886 return (error);
887}
91447636
A
888int
889soaccept(so, nam)
890 register struct socket *so;
891 struct sockaddr **nam;
892{
893 return (soacceptlock(so, nam, 1));
894}
1c79356b
A
895
896int
91447636 897soconnectlock(so, nam, dolock)
1c79356b
A
898 register struct socket *so;
899 struct sockaddr *nam;
91447636 900 int dolock;
1c79356b
A
901
902{
903 int s;
904 int error;
905 struct proc *p = current_proc();
1c79356b 906
91447636
A
907 if (dolock) socket_lock(so, 1);
908
909 if (so->so_options & SO_ACCEPTCONN) {
910 if (dolock) socket_unlock(so, 1);
1c79356b 911 return (EOPNOTSUPP);
91447636 912 }
1c79356b
A
913 /*
914 * If protocol is connection-based, can only connect once.
915 * Otherwise, if connected, try to disconnect first.
916 * This allows user to disconnect by connecting to, e.g.,
917 * a null address.
918 */
919 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
920 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
91447636 921 (error = sodisconnectlocked(so))))
1c79356b
A
922 error = EISCONN;
923 else {
91447636
A
924 /*
925 * Run connect filter before calling protocol:
926 * - non-blocking connect returns before completion;
927 */
928 {
929 struct socket_filter_entry *filter;
930 int filtered = 0;
931 error = 0;
932 for (filter = so->so_filt; filter && (error == 0);
933 filter = filter->sfe_next_onsocket) {
934 if (filter->sfe_filter->sf_filter.sf_connect_out) {
935 if (filtered == 0) {
936 filtered = 1;
937 sflt_use(so);
938 socket_unlock(so, 0);
939 }
940 error = filter->sfe_filter->sf_filter.sf_connect_out(
941 filter->sfe_cookie, so, nam);
942 }
943 }
944 if (filtered != 0) {
945 socket_lock(so, 0);
946 sflt_unuse(so);
947 }
948 }
949 if (error) {
950 if (error == EJUSTRETURN)
951 error = 0;
952 if (dolock) socket_unlock(so, 1);
953 return error;
954 }
955
1c79356b 956 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
1c79356b 957 }
91447636 958 if (dolock) socket_unlock(so, 1);
1c79356b
A
959 return (error);
960}
961
91447636
A
962int
963soconnect(so, nam)
964 register struct socket *so;
965 struct sockaddr *nam;
966{
967 return (soconnectlock(so, nam, 1));
968}
969
1c79356b
A
970int
971soconnect2(so1, so2)
972 register struct socket *so1;
973 struct socket *so2;
974{
1c79356b 975 int error;
21362eb3 976//####### Assumes so1 is already locked /
91447636 977
21362eb3 978 socket_lock(so2, 1);
1c79356b
A
979
980 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
91447636 981
21362eb3 982 socket_unlock(so2, 1);
1c79356b
A
983 return (error);
984}
985
91447636 986
1c79356b 987int
91447636 988sodisconnectlocked(so)
1c79356b
A
989 register struct socket *so;
990{
1c79356b 991 int error;
1c79356b
A
992
993 if ((so->so_state & SS_ISCONNECTED) == 0) {
994 error = ENOTCONN;
995 goto bad;
996 }
997 if (so->so_state & SS_ISDISCONNECTING) {
998 error = EALREADY;
999 goto bad;
1000 }
91447636 1001
1c79356b 1002 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
91447636 1003
9bccf70c 1004 if (error == 0) {
91447636 1005 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
1006 }
1007
1008bad:
1c79356b
A
1009 return (error);
1010}
91447636
A
1011//### Locking version
1012int
1013sodisconnect(so)
1014 register struct socket *so;
1015{
1016 int error;
1017
1018 socket_lock(so, 1);
1019 error = sodisconnectlocked(so);
1020 socket_unlock(so, 1);
1021 return(error);
1022}
1c79356b
A
1023
1024#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
91447636
A
1025
1026/*
1027 * sosendcheck will lock the socket buffer if it isn't locked and
1028 * verify that there is space for the data being inserted.
1029 */
1030
1031static int
1032sosendcheck(
1033 struct socket *so,
1034 struct sockaddr *addr,
1035 long resid,
1036 long clen,
1037 long atomic,
1038 int flags,
1039 int *sblocked)
1040{
1041 int error = 0;
1042 long space;
3a60a9f5 1043 int assumelock = 0;
91447636
A
1044
1045restart:
1046 if (*sblocked == 0) {
3a60a9f5
A
1047 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1048 so->so_send_filt_thread != 0 &&
1049 so->so_send_filt_thread == current_thread()) {
1050 /*
1051 * We're being called recursively from a filter,
1052 * allow this to continue. Radar 4150520.
1053 * Don't set sblocked because we don't want
1054 * to perform an unlock later.
1055 */
1056 assumelock = 1;
1057 }
1058 else {
1059 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1060 if (error) {
1061 return error;
1062 }
1063 *sblocked = 1;
1064 }
91447636
A
1065 }
1066
1067 if (so->so_state & SS_CANTSENDMORE)
1068 return EPIPE;
1069
1070 if (so->so_error) {
1071 error = so->so_error;
1072 so->so_error = 0;
1073 return error;
1074 }
1075
1076 if ((so->so_state & SS_ISCONNECTED) == 0) {
1077 /*
1078 * `sendto' and `sendmsg' is allowed on a connection-
1079 * based socket if it supports implied connect.
1080 * Return ENOTCONN if not connected and no address is
1081 * supplied.
1082 */
1083 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1084 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1085 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1086 !(resid == 0 && clen != 0))
1087 return ENOTCONN;
1088 } else if (addr == 0 && !(flags&MSG_HOLD))
1089 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1090 }
1091 space = sbspace(&so->so_snd);
1092 if (flags & MSG_OOB)
1093 space += 1024;
1094 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1095 clen > so->so_snd.sb_hiwat)
1096 return EMSGSIZE;
1097 if (space < resid + clen &&
1098 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
3a60a9f5 1099 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) {
91447636 1100 return EWOULDBLOCK;
3a60a9f5 1101 }
91447636
A
1102 sbunlock(&so->so_snd, 1);
1103 error = sbwait(&so->so_snd);
1104 if (error) {
1105 return error;
1106 }
1107 goto restart;
1108 }
1109
1110 return 0;
1111}
1112
1c79356b
A
1113/*
1114 * Send on a socket.
1115 * If send must go all at once and message is larger than
1116 * send buffering, then hard error.
1117 * Lock against other senders.
1118 * If must go all at once and not enough room now, then
1119 * inform user that this would block and do nothing.
1120 * Otherwise, if nonblocking, send as much as possible.
1121 * The data to be sent is described by "uio" if nonzero,
1122 * otherwise by the mbuf chain "top" (which must be null
1123 * if uio is not). Data provided in mbuf chain must be small
1124 * enough to send all at once.
1125 *
1126 * Returns nonzero on error, timeout or signal; callers
1127 * must check for short counts if EINTR/ERESTART are returned.
1128 * Data and control buffers are freed on return.
1129 * Experiment:
1130 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1131 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1132 * point at the mbuf chain being constructed and go from there.
1133 */
1134int
1135sosend(so, addr, uio, top, control, flags)
1136 register struct socket *so;
1137 struct sockaddr *addr;
1138 struct uio *uio;
1139 struct mbuf *top;
1140 struct mbuf *control;
1141 int flags;
1142
1143{
1144 struct mbuf **mp;
fa4905b1 1145 register struct mbuf *m, *freelist = NULL;
1c79356b 1146 register long space, len, resid;
91447636 1147 int clen = 0, error, dontroute, mlen, sendflags;
1c79356b 1148 int atomic = sosendallatonce(so) || top;
91447636 1149 int sblocked = 0;
1c79356b 1150 struct proc *p = current_proc();
1c79356b
A
1151
1152 if (uio)
91447636
A
1153 // LP64todo - fix this!
1154 resid = uio_resid(uio);
1c79356b
A
1155 else
1156 resid = top->m_pkthdr.len;
1157
1158 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1159 so,
1160 resid,
1161 so->so_snd.sb_cc,
1162 so->so_snd.sb_lowat,
1163 so->so_snd.sb_hiwat);
1164
91447636
A
1165 socket_lock(so, 1);
1166
1c79356b
A
1167 /*
1168 * In theory resid should be unsigned.
1169 * However, space must be signed, as it might be less than 0
1170 * if we over-committed, and we must use a signed comparison
1171 * of space and resid. On the other hand, a negative resid
1172 * causes us to loop sending 0-length segments to the protocol.
1173 *
1174 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1175 * type sockets since that's an error.
1176 */
91447636 1177 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1c79356b 1178 error = EINVAL;
91447636 1179 socket_unlock(so, 1);
1c79356b
A
1180 goto out;
1181 }
1182
1183 dontroute =
1184 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1185 (so->so_proto->pr_flags & PR_ATOMIC);
1186 if (p)
1187 p->p_stats->p_ru.ru_msgsnd++;
1188 if (control)
1189 clen = control->m_len;
1c79356b 1190
1c79356b 1191 do {
91447636
A
1192 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1193 if (error) {
3a60a9f5 1194 goto release;
1c79356b 1195 }
1c79356b 1196 mp = &top;
91447636 1197 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
fa4905b1 1198
1c79356b 1199 do {
fa4905b1 1200
91447636
A
1201 if (uio == NULL) {
1202 /*
1203 * Data is prepackaged in "top".
1204 */
1205 resid = 0;
1c79356b
A
1206 if (flags & MSG_EOR)
1207 top->m_flags |= M_EOR;
91447636
A
1208 } else {
1209 int chainlength;
1210 int bytes_to_copy;
1211
1212 bytes_to_copy = min(resid, space);
1213
1214 if (sosendminchain > 0) {
1215 chainlength = 0;
1216 } else
1217 chainlength = sosendmaxchain;
1218
1219 socket_unlock(so, 0);
1220
1221 do {
1222 int num_needed;
1223 int hdrs_needed = (top == 0) ? 1 : 0;
1224
1225 /*
1226 * try to maintain a local cache of mbuf clusters needed to complete this write
1227 * the list is further limited to the number that are currently needed to fill the socket
1228 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1229 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1230 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1231 * we haven't yet consumed.
1232 */
1233 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1234 num_needed = bytes_to_copy / NBPG;
1235
1236 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1237 num_needed++;
1238
1239 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1240 /* Fall back to cluster size if allocation failed */
1241 }
1242
1243 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1244 num_needed = bytes_to_copy / MCLBYTES;
1245
1246 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1247 num_needed++;
1248
1249 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1250 /* Fall back to a single mbuf if allocation failed */
1251 }
1252
1253 if (freelist == NULL) {
1254 if (top == 0)
1255 MGETHDR(freelist, M_WAIT, MT_DATA);
1256 else
1257 MGET(freelist, M_WAIT, MT_DATA);
1258
1259 if (freelist == NULL) {
1260 error = ENOBUFS;
1261 socket_lock(so, 0);
3a60a9f5 1262 goto release;
91447636
A
1263 }
1264 /*
1265 * For datagram protocols, leave room
1266 * for protocol headers in first mbuf.
1267 */
1268 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1269 MH_ALIGN(freelist, bytes_to_copy);
1270 }
1271 m = freelist;
1272 freelist = m->m_next;
1273 m->m_next = NULL;
1274
1275 if ((m->m_flags & M_EXT))
1276 mlen = m->m_ext.ext_size;
1277 else if ((m->m_flags & M_PKTHDR))
1278 mlen = MHLEN - m_leadingspace(m);
1279 else
1280 mlen = MLEN;
1281 len = min(mlen, bytes_to_copy);
1282
1283 chainlength += len;
1284
1285 space -= len;
fa4905b1 1286
91447636
A
1287 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1288
1289 // LP64todo - fix this!
1290 resid = uio_resid(uio);
1291
1292 m->m_len = len;
1293 *mp = m;
1294 top->m_pkthdr.len += len;
1295 if (error)
1296 break;
1297 mp = &m->m_next;
1298 if (resid <= 0) {
1299 if (flags & MSG_EOR)
1300 top->m_flags |= M_EOR;
1301 break;
1302 }
1303 bytes_to_copy = min(resid, space);
1304
1305 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1306
1307 socket_lock(so, 0);
1308
1309 if (error)
1310 goto release;
1311 }
1c79356b
A
1312
1313 if (flags & (MSG_HOLD|MSG_SEND))
3a60a9f5
A
1314 {
1315 /* Enqueue for later, go away if HOLD */
1316 register struct mbuf *mb1;
1317 if (so->so_temp && (flags & MSG_FLUSH))
1318 {
1319 m_freem(so->so_temp);
1320 so->so_temp = NULL;
1321 }
1322 if (so->so_temp)
1323 so->so_tail->m_next = top;
1324 else
1325 so->so_temp = top;
1326 mb1 = top;
1327 while (mb1->m_next)
1328 mb1 = mb1->m_next;
1329 so->so_tail = mb1;
1330 if (flags & MSG_HOLD)
1331 {
1332 top = NULL;
1333 goto release;
1334 }
1335 top = so->so_temp;
1c79356b
A
1336 }
1337 if (dontroute)
1338 so->so_options |= SO_DONTROUTE;
1c79356b
A
1339 /* Compute flags here, for pru_send and NKEs */
1340 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1341 /*
1342 * If the user set MSG_EOF, the protocol
1343 * understands this flag and nothing left to
1344 * send then use PRU_SEND_EOF instead of PRU_SEND.
1345 */
1346 ((flags & MSG_EOF) &&
1347 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1348 (resid <= 0)) ?
1349 PRUS_EOF :
1350 /* If there is more to send set PRUS_MORETOCOME */
1351 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
91447636
A
1352
1353 /*
1354 * Socket filter processing
1355 */
1356 {
1357 struct socket_filter_entry *filter;
1358 int filtered;
1359
1360 filtered = 0;
1361 error = 0;
1362 for (filter = so->so_filt; filter && (error == 0);
1363 filter = filter->sfe_next_onsocket) {
1364 if (filter->sfe_filter->sf_filter.sf_data_out) {
1365 int so_flags = 0;
1366 if (filtered == 0) {
1367 filtered = 1;
3a60a9f5 1368 so->so_send_filt_thread = current_thread();
ff6e181a 1369 sflt_use(so);
91447636
A
1370 socket_unlock(so, 0);
1371 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1372 }
1373 error = filter->sfe_filter->sf_filter.sf_data_out(
1374 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1375 }
1376 }
1377
1378 if (filtered) {
1379 /*
1380 * At this point, we've run at least one filter.
1381 * The socket is unlocked as is the socket buffer.
1382 */
1383 socket_lock(so, 0);
ff6e181a 1384 sflt_unuse(so);
3a60a9f5 1385 so->so_send_filt_thread = 0;
91447636 1386 if (error) {
3a60a9f5
A
1387 if (error == EJUSTRETURN) {
1388 error = 0;
1389 clen = 0;
1390 control = 0;
1391 top = 0;
91447636 1392 }
3a60a9f5
A
1393
1394 goto release;
1c79356b 1395 }
1c79356b
A
1396 }
1397 }
91447636
A
1398 /*
1399 * End Socket filter processing
1400 */
1401
1402 if (error == EJUSTRETURN) {
1403 /* A socket filter handled this data */
1404 error = 0;
1405 }
1406 else {
1407 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1408 sendflags, top, addr, control, p);
1409 }
9bccf70c 1410#ifdef __APPLE__
1c79356b
A
1411 if (flags & MSG_SEND)
1412 so->so_temp = NULL;
9bccf70c 1413#endif
1c79356b
A
1414 if (dontroute)
1415 so->so_options &= ~SO_DONTROUTE;
1416 clen = 0;
1417 control = 0;
1418 top = 0;
1419 mp = &top;
1420 if (error)
1421 goto release;
1422 } while (resid && space > 0);
1423 } while (resid);
1424
1425release:
3a60a9f5
A
1426 if (sblocked)
1427 sbunlock(&so->so_snd, 0); /* will unlock socket */
1428 else
1429 socket_unlock(so, 1);
1c79356b
A
1430out:
1431 if (top)
1432 m_freem(top);
1433 if (control)
1434 m_freem(control);
fa4905b1
A
1435 if (freelist)
1436 m_freem_list(freelist);
1c79356b
A
1437
1438 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1439 so,
1440 resid,
1441 so->so_snd.sb_cc,
1442 space,
1443 error);
1444
1445 return (error);
1446}
1447
1448/*
1449 * Implement receive operations on a socket.
1450 * We depend on the way that records are added to the sockbuf
1451 * by sbappend*. In particular, each record (mbufs linked through m_next)
1452 * must begin with an address if the protocol so specifies,
1453 * followed by an optional mbuf or mbufs containing ancillary data,
1454 * and then zero or more mbufs of data.
1455 * In order to avoid blocking network interrupts for the entire time here,
1456 * we splx() while doing the actual copy to user space.
1457 * Although the sockbuf is locked, new data may still be appended,
1458 * and thus we must maintain consistency of the sockbuf during that time.
1459 *
1460 * The caller may receive the data as a single mbuf chain by supplying
1461 * an mbuf **mp0 for use in returning the chain. The uio is then used
1462 * only for the count in uio_resid.
1463 */
1464int
1465soreceive(so, psa, uio, mp0, controlp, flagsp)
1466 register struct socket *so;
1467 struct sockaddr **psa;
1468 struct uio *uio;
1469 struct mbuf **mp0;
1470 struct mbuf **controlp;
1471 int *flagsp;
1472{
91447636
A
1473 register struct mbuf *m, **mp, *ml = NULL;
1474 register int flags, len, error, offset;
1c79356b
A
1475 struct protosw *pr = so->so_proto;
1476 struct mbuf *nextrecord;
1477 int moff, type = 0;
91447636
A
1478 // LP64todo - fix this!
1479 int orig_resid = uio_resid(uio);
55e303ae
A
1480 volatile struct mbuf *free_list;
1481 volatile int delayed_copy_len;
1482 int can_delay;
1483 int need_event;
1484 struct proc *p = current_proc();
1485
1486
91447636 1487 // LP64todo - fix this!
1c79356b
A
1488 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1489 so,
91447636 1490 uio_resid(uio),
1c79356b
A
1491 so->so_rcv.sb_cc,
1492 so->so_rcv.sb_lowat,
1493 so->so_rcv.sb_hiwat);
1494
91447636 1495 socket_lock(so, 1);
1c79356b 1496
91447636
A
1497#ifdef MORE_LOCKING_DEBUG
1498 if (so->so_usecount == 1)
1499 panic("soreceive: so=%x no other reference on socket\n", so);
1500#endif
1c79356b
A
1501 mp = mp0;
1502 if (psa)
1503 *psa = 0;
1504 if (controlp)
1505 *controlp = 0;
1506 if (flagsp)
1507 flags = *flagsp &~ MSG_EOR;
1508 else
1509 flags = 0;
1510 /*
1511 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1512 * regardless of the flags argument. Here is the case were
1513 * out-of-band data is not inline.
1514 */
1515 if ((flags & MSG_OOB) ||
1516 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1517 (so->so_options & SO_OOBINLINE) == 0 &&
1518 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1519 m = m_get(M_WAIT, MT_DATA);
55e303ae 1520 if (m == NULL) {
91447636 1521 socket_unlock(so, 1);
55e303ae 1522 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
9bccf70c 1523 return (ENOBUFS);
55e303ae 1524 }
1c79356b
A
1525 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1526 if (error)
1527 goto bad;
91447636 1528 socket_unlock(so, 0);
1c79356b 1529 do {
91447636 1530 // LP64todo - fix this!
1c79356b 1531 error = uiomove(mtod(m, caddr_t),
91447636 1532 (int) min(uio_resid(uio), m->m_len), uio);
1c79356b 1533 m = m_free(m);
91447636
A
1534 } while (uio_resid(uio) && error == 0 && m);
1535 socket_lock(so, 0);
1c79356b
A
1536bad:
1537 if (m)
1538 m_freem(m);
9bccf70c
A
1539#ifdef __APPLE__
1540 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1541 if (error == EWOULDBLOCK || error == EINVAL) {
1542 /*
1543 * Let's try to get normal data:
1544 * EWOULDBLOCK: out-of-band data not receive yet;
1545 * EINVAL: out-of-band data already read.
1546 */
1547 error = 0;
1548 goto nooob;
1549 } else if (error == 0 && flagsp)
1550 *flagsp |= MSG_OOB;
91447636
A
1551 }
1552 socket_unlock(so, 1);
1c79356b 1553 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
9bccf70c 1554#endif
1c79356b
A
1555 return (error);
1556 }
1557nooob:
1558 if (mp)
1559 *mp = (struct mbuf *)0;
91447636 1560 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1c79356b
A
1561 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1562
55e303ae
A
1563
1564 free_list = (struct mbuf *)0;
1565 delayed_copy_len = 0;
1c79356b 1566restart:
91447636
A
1567#ifdef MORE_LOCKING_DEBUG
1568 if (so->so_usecount <= 1)
1569 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1570#endif
9bccf70c
A
1571 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1572 if (error) {
91447636 1573 socket_unlock(so, 1);
1c79356b
A
1574 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1575 return (error);
1576 }
1c79356b
A
1577
1578 m = so->so_rcv.sb_mb;
1579 /*
1580 * If we have less data than requested, block awaiting more
1581 * (subject to any timeout) if:
1582 * 1. the current count is less than the low water mark, or
1583 * 2. MSG_WAITALL is set, and it is possible to do the entire
1584 * receive operation at once if we block (resid <= hiwat).
1585 * 3. MSG_DONTWAIT is not set
1586 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1587 * we have to do the receive in sections, and thus risk returning
1588 * a short count if a timeout or signal occurs after we start.
1589 */
1590 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
91447636 1591 so->so_rcv.sb_cc < uio_resid(uio)) &&
55e303ae 1592 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
91447636 1593 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1c79356b 1594 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
55e303ae 1595
1c79356b
A
1596 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1597 if (so->so_error) {
1598 if (m)
1599 goto dontblock;
1600 error = so->so_error;
1601 if ((flags & MSG_PEEK) == 0)
1602 so->so_error = 0;
1603 goto release;
1604 }
1605 if (so->so_state & SS_CANTRCVMORE) {
1606 if (m)
1607 goto dontblock;
1608 else
1609 goto release;
1610 }
1611 for (; m; m = m->m_next)
1612 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1613 m = so->so_rcv.sb_mb;
1614 goto dontblock;
1615 }
1616 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1617 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1618 error = ENOTCONN;
1619 goto release;
1620 }
91447636 1621 if (uio_resid(uio) == 0)
1c79356b 1622 goto release;
91447636 1623 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1c79356b
A
1624 error = EWOULDBLOCK;
1625 goto release;
1626 }
91447636
A
1627 sbunlock(&so->so_rcv, 1);
1628#ifdef EVEN_MORE_LOCKING_DEBUG
1c79356b
A
1629 if (socket_debug)
1630 printf("Waiting for socket data\n");
91447636 1631#endif
55e303ae 1632
1c79356b 1633 error = sbwait(&so->so_rcv);
91447636 1634#ifdef EVEN_MORE_LOCKING_DEBUG
1c79356b
A
1635 if (socket_debug)
1636 printf("SORECEIVE - sbwait returned %d\n", error);
91447636
A
1637#endif
1638 if (so->so_usecount < 1)
1639 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
9bccf70c 1640 if (error) {
91447636 1641 socket_unlock(so, 1);
1c79356b
A
1642 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1643 return (error);
1644 }
1645 goto restart;
1646 }
1647dontblock:
9bccf70c 1648#ifndef __APPLE__
1c79356b
A
1649 if (uio->uio_procp)
1650 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
55e303ae
A
1651#else /* __APPLE__ */
1652 /*
1653 * 2207985
1654 * This should be uio->uio-procp; however, some callers of this
1655 * function use auto variables with stack garbage, and fail to
1656 * fill out the uio structure properly.
1657 */
1658 if (p)
1659 p->p_stats->p_ru.ru_msgrcv++;
1660#endif /* __APPLE__ */
1c79356b
A
1661 nextrecord = m->m_nextpkt;
1662 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1663 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1664 orig_resid = 0;
4a249263 1665 if (psa) {
1c79356b
A
1666 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1667 mp0 == 0);
4a249263
A
1668 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1669 error = EWOULDBLOCK;
1670 goto release;
1671 }
1672 }
1c79356b
A
1673 if (flags & MSG_PEEK) {
1674 m = m->m_next;
1675 } else {
1676 sbfree(&so->so_rcv, m);
91447636
A
1677 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1678 panic("soreceive: about to create invalid socketbuf");
1c79356b
A
1679 MFREE(m, so->so_rcv.sb_mb);
1680 m = so->so_rcv.sb_mb;
1681 }
1682 }
1683 while (m && m->m_type == MT_CONTROL && error == 0) {
1684 if (flags & MSG_PEEK) {
1685 if (controlp)
1686 *controlp = m_copy(m, 0, m->m_len);
1687 m = m->m_next;
1688 } else {
1689 sbfree(&so->so_rcv, m);
1690 if (controlp) {
1691 if (pr->pr_domain->dom_externalize &&
1692 mtod(m, struct cmsghdr *)->cmsg_type ==
91447636
A
1693 SCM_RIGHTS) {
1694 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1c79356b 1695 error = (*pr->pr_domain->dom_externalize)(m);
91447636
A
1696 socket_lock(so, 0);
1697 }
1c79356b 1698 *controlp = m;
91447636
A
1699 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1700 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1c79356b
A
1701 so->so_rcv.sb_mb = m->m_next;
1702 m->m_next = 0;
1703 m = so->so_rcv.sb_mb;
1704 } else {
1705 MFREE(m, so->so_rcv.sb_mb);
1706 m = so->so_rcv.sb_mb;
1707 }
1708 }
1709 if (controlp) {
1710 orig_resid = 0;
1711 controlp = &(*controlp)->m_next;
1712 }
1713 }
1714 if (m) {
1715 if ((flags & MSG_PEEK) == 0)
1716 m->m_nextpkt = nextrecord;
1717 type = m->m_type;
1718 if (type == MT_OOBDATA)
1719 flags |= MSG_OOB;
1720 }
1721 moff = 0;
1722 offset = 0;
fa4905b1 1723
91447636 1724 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
55e303ae
A
1725 can_delay = 1;
1726 else
1727 can_delay = 0;
1728
1729 need_event = 0;
fa4905b1 1730
91447636 1731 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1c79356b
A
1732 if (m->m_type == MT_OOBDATA) {
1733 if (type != MT_OOBDATA)
1734 break;
1735 } else if (type == MT_OOBDATA)
1736 break;
9bccf70c 1737#ifndef __APPLE__
1c79356b
A
1738/*
1739 * This assertion needs rework. The trouble is Appletalk is uses many
1740 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1741 * For now just remove the assertion... CSM 9/98
1742 */
1743 else
1744 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1745 ("receive 3"));
9bccf70c
A
1746#else
1747 /*
1748 * Make sure to allways set MSG_OOB event when getting
1749 * out of band data inline.
1750 */
1c79356b 1751 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
9bccf70c
A
1752 (so->so_options & SO_OOBINLINE) != 0 &&
1753 (so->so_state & SS_RCVATMARK) != 0) {
1754 flags |= MSG_OOB;
1755 }
1756#endif
1c79356b 1757 so->so_state &= ~SS_RCVATMARK;
91447636
A
1758 // LP64todo - fix this!
1759 len = uio_resid(uio) - delayed_copy_len;
1c79356b
A
1760 if (so->so_oobmark && len > so->so_oobmark - offset)
1761 len = so->so_oobmark - offset;
1762 if (len > m->m_len - moff)
1763 len = m->m_len - moff;
1764 /*
1765 * If mp is set, just pass back the mbufs.
1766 * Otherwise copy them out via the uio, then free.
1767 * Sockbuf must be consistent here (points to current mbuf,
1768 * it points to next record) when we drop priority;
1769 * we must note any additions to the sockbuf when we
1770 * block interrupts again.
1771 */
1772 if (mp == 0) {
55e303ae
A
1773 if (can_delay && len == m->m_len) {
1774 /*
1775 * only delay the copy if we're consuming the
1776 * mbuf and we're NOT in MSG_PEEK mode
1777 * and we have enough data to make it worthwile
1778 * to drop and retake the funnel... can_delay
1779 * reflects the state of the 2 latter constraints
1780 * moff should always be zero in these cases
1781 */
1782 delayed_copy_len += len;
1783 } else {
55e303ae
A
1784
1785 if (delayed_copy_len) {
91447636 1786 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1787
1788 if (error) {
55e303ae
A
1789 goto release;
1790 }
1791 if (m != so->so_rcv.sb_mb) {
1792 /*
1793 * can only get here if MSG_PEEK is not set
1794 * therefore, m should point at the head of the rcv queue...
1795 * if it doesn't, it means something drastically changed
1796 * while we were out from behind the funnel in sodelayed_copy...
1797 * perhaps a RST on the stream... in any event, the stream has
1798 * been interrupted... it's probably best just to return
1799 * whatever data we've moved and let the caller sort it out...
1800 */
1801 break;
1802 }
1803 }
91447636 1804 socket_unlock(so, 0);
55e303ae 1805 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
91447636 1806 socket_lock(so, 0);
55e303ae 1807
55e303ae
A
1808 if (error)
1809 goto release;
1810 }
1c79356b 1811 } else
91447636 1812 uio_setresid(uio, (uio_resid(uio) - len));
55e303ae 1813
1c79356b
A
1814 if (len == m->m_len - moff) {
1815 if (m->m_flags & M_EOR)
1816 flags |= MSG_EOR;
1817 if (flags & MSG_PEEK) {
1818 m = m->m_next;
1819 moff = 0;
1820 } else {
1821 nextrecord = m->m_nextpkt;
1822 sbfree(&so->so_rcv, m);
91447636 1823 m->m_nextpkt = NULL;
55e303ae 1824
1c79356b
A
1825 if (mp) {
1826 *mp = m;
1827 mp = &m->m_next;
1828 so->so_rcv.sb_mb = m = m->m_next;
1829 *mp = (struct mbuf *)0;
1830 } else {
55e303ae
A
1831 if (free_list == NULL)
1832 free_list = m;
1833 else
14353aa8
A
1834 ml->m_next = m;
1835 ml = m;
1836 so->so_rcv.sb_mb = m = m->m_next;
1837 ml->m_next = 0;
1c79356b
A
1838 }
1839 if (m)
1840 m->m_nextpkt = nextrecord;
1841 }
1842 } else {
1843 if (flags & MSG_PEEK)
1844 moff += len;
1845 else {
1846 if (mp)
1847 *mp = m_copym(m, 0, len, M_WAIT);
1848 m->m_data += len;
1849 m->m_len -= len;
1850 so->so_rcv.sb_cc -= len;
1851 }
1852 }
1853 if (so->so_oobmark) {
1854 if ((flags & MSG_PEEK) == 0) {
1855 so->so_oobmark -= len;
1856 if (so->so_oobmark == 0) {
1857 so->so_state |= SS_RCVATMARK;
55e303ae
A
1858 /*
1859 * delay posting the actual event until after
1860 * any delayed copy processing has finished
1861 */
1862 need_event = 1;
1c79356b
A
1863 break;
1864 }
1865 } else {
1866 offset += len;
1867 if (offset == so->so_oobmark)
1868 break;
1869 }
1870 }
91447636 1871 if (flags & MSG_EOR)
1c79356b
A
1872 break;
1873 /*
55e303ae 1874 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1c79356b
A
1875 * we must not quit until "uio->uio_resid == 0" or an error
1876 * termination. If a signal/timeout occurs, return
1877 * with a short count but without error.
1878 * Keep sockbuf locked against other readers.
1879 */
91447636 1880 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1c79356b
A
1881 !sosendallatonce(so) && !nextrecord) {
1882 if (so->so_error || so->so_state & SS_CANTRCVMORE)
55e303ae 1883 goto release;
fa4905b1 1884
91447636 1885 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
55e303ae
A
1886 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1887 if (sbwait(&so->so_rcv)) {
1888 error = 0;
1889 goto release;
fa4905b1 1890 }
55e303ae
A
1891 /*
1892 * have to wait until after we get back from the sbwait to do the copy because
1893 * we will drop the funnel if we have enough data that has been delayed... by dropping
1894 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1895 * and to change the state of this socket... we're issuing the sbwait because
1896 * the socket is empty and we're expecting the netisr thread to wake us up when more
1897 * packets arrive... if we allow that processing to happen and then sbwait, we
1898 * could stall forever with packets sitting in the socket if no further packets
1899 * arrive from the remote side.
1900 *
1901 * we want to copy before we've collected all the data to satisfy this request to
1902 * allow the copy to overlap the incoming packet processing on an MP system
1903 */
1904 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1905
91447636 1906 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1907
1908 if (error)
1909 goto release;
1c79356b
A
1910 }
1911 m = so->so_rcv.sb_mb;
fa4905b1 1912 if (m) {
1c79356b 1913 nextrecord = m->m_nextpkt;
fa4905b1 1914 }
1c79356b
A
1915 }
1916 }
91447636
A
1917#ifdef MORE_LOCKING_DEBUG
1918 if (so->so_usecount <= 1)
1919 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1920#endif
1c79356b
A
1921
1922 if (m && pr->pr_flags & PR_ATOMIC) {
9bccf70c 1923#ifdef __APPLE__
1c79356b
A
1924 if (so->so_options & SO_DONTTRUNC)
1925 flags |= MSG_RCVMORE;
9bccf70c
A
1926 else {
1927#endif
1928 flags |= MSG_TRUNC;
1c79356b
A
1929 if ((flags & MSG_PEEK) == 0)
1930 (void) sbdroprecord(&so->so_rcv);
9bccf70c 1931#ifdef __APPLE__
1c79356b 1932 }
9bccf70c 1933#endif
1c79356b
A
1934 }
1935 if ((flags & MSG_PEEK) == 0) {
1936 if (m == 0)
1937 so->so_rcv.sb_mb = nextrecord;
1938 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1939 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1940 }
9bccf70c 1941#ifdef __APPLE__
1c79356b
A
1942 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1943 flags |= MSG_HAVEMORE;
55e303ae
A
1944
1945 if (delayed_copy_len) {
91447636 1946 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1947
1948 if (error)
1949 goto release;
1950 }
1951 if (free_list) {
1952 m_freem_list((struct mbuf *)free_list);
1953 free_list = (struct mbuf *)0;
1954 }
1955 if (need_event)
1956 postevent(so, 0, EV_OOB);
9bccf70c 1957#endif
91447636 1958 if (orig_resid == uio_resid(uio) && orig_resid &&
1c79356b 1959 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
91447636 1960 sbunlock(&so->so_rcv, 1);
1c79356b
A
1961 goto restart;
1962 }
1963
1964 if (flagsp)
1965 *flagsp |= flags;
1966release:
91447636
A
1967#ifdef MORE_LOCKING_DEBUG
1968 if (so->so_usecount <= 1)
1969 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1970#endif
55e303ae 1971 if (delayed_copy_len) {
91447636 1972 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1973 }
1974 if (free_list) {
1975 m_freem_list((struct mbuf *)free_list);
1976 }
91447636 1977 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1c79356b 1978
91447636 1979 // LP64todo - fix this!
1c79356b
A
1980 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1981 so,
91447636 1982 uio_resid(uio),
1c79356b
A
1983 so->so_rcv.sb_cc,
1984 0,
1985 error);
1986
1987 return (error);
1988}
1989
55e303ae 1990
91447636 1991static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
55e303ae
A
1992{
1993 int error = 0;
55e303ae
A
1994 struct mbuf *m;
1995
1996 m = *free_list;
1997
91447636 1998 socket_unlock(so, 0);
55e303ae 1999
55e303ae
A
2000 while (m && error == 0) {
2001
2002 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
2003
2004 m = m->m_next;
2005 }
2006 m_freem_list(*free_list);
2007
2008 *free_list = (struct mbuf *)NULL;
2009 *resid = 0;
2010
91447636 2011 socket_lock(so, 0);
55e303ae
A
2012
2013 return (error);
2014}
2015
2016
1c79356b
A
2017int
2018soshutdown(so, how)
2019 register struct socket *so;
21362eb3 2020 register int how;
1c79356b
A
2021{
2022 register struct protosw *pr = so->so_proto;
1c79356b
A
2023 int ret;
2024
91447636
A
2025 socket_lock(so, 1);
2026
2027 sflt_notify(so, sock_evt_shutdown, &how);
1c79356b 2028
9bccf70c 2029 if (how != SHUT_WR) {
1c79356b
A
2030 sorflush(so);
2031 postevent(so, 0, EV_RCLOSED);
2032 }
9bccf70c 2033 if (how != SHUT_RD) {
1c79356b
A
2034 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2035 postevent(so, 0, EV_WCLOSED);
2036 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
91447636 2037 socket_unlock(so, 1);
1c79356b
A
2038 return(ret);
2039 }
2040
2041 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
91447636 2042 socket_unlock(so, 1);
1c79356b
A
2043 return (0);
2044}
2045
2046void
2047sorflush(so)
2048 register struct socket *so;
2049{
2050 register struct sockbuf *sb = &so->so_rcv;
2051 register struct protosw *pr = so->so_proto;
1c79356b 2052 struct sockbuf asb;
1c79356b 2053
91447636
A
2054#ifdef MORE_LOCKING_DEBUG
2055 lck_mtx_t * mutex_held;
2056
2057 if (so->so_proto->pr_getlock != NULL)
2058 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2059 else
2060 mutex_held = so->so_proto->pr_domain->dom_mtx;
2061 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2062#endif
2063
2064 sflt_notify(so, sock_evt_flush_read, NULL);
1c79356b
A
2065
2066 sb->sb_flags |= SB_NOINTR;
2067 (void) sblock(sb, M_WAIT);
1c79356b 2068 socantrcvmore(so);
91447636 2069 sbunlock(sb, 1);
9bccf70c 2070#ifdef __APPLE__
0b4e3aa0 2071 selthreadclear(&sb->sb_sel);
9bccf70c 2072#endif
1c79356b
A
2073 asb = *sb;
2074 bzero((caddr_t)sb, sizeof (*sb));
91447636 2075 sb->sb_so = so; /* reestablish link to socket */
9bccf70c
A
2076 if (asb.sb_flags & SB_KNOTE) {
2077 sb->sb_sel.si_note = asb.sb_sel.si_note;
2078 sb->sb_flags = SB_KNOTE;
2079 }
1c79356b
A
2080 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2081 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2082 sbrelease(&asb);
2083}
2084
2085/*
2086 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2087 * an additional variant to handle the case where the option value needs
2088 * to be some kind of integer, but not a specific size.
2089 * In addition to their use here, these functions are also called by the
2090 * protocol-level pr_ctloutput() routines.
2091 */
2092int
2093sooptcopyin(sopt, buf, len, minlen)
2094 struct sockopt *sopt;
2095 void *buf;
2096 size_t len;
2097 size_t minlen;
2098{
2099 size_t valsize;
2100
2101 /*
2102 * If the user gives us more than we wanted, we ignore it,
2103 * but if we don't get the minimum length the caller
2104 * wants, we return EINVAL. On success, sopt->sopt_valsize
2105 * is set to however much we actually retrieved.
2106 */
2107 if ((valsize = sopt->sopt_valsize) < minlen)
2108 return EINVAL;
2109 if (valsize > len)
2110 sopt->sopt_valsize = valsize = len;
2111
2112 if (sopt->sopt_p != 0)
2113 return (copyin(sopt->sopt_val, buf, valsize));
2114
91447636 2115 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
1c79356b
A
2116 return 0;
2117}
2118
2119int
2120sosetopt(so, sopt)
2121 struct socket *so;
2122 struct sockopt *sopt;
2123{
2124 int error, optval;
2125 struct linger l;
2126 struct timeval tv;
2127 short val;
91447636
A
2128
2129 socket_lock(so, 1);
1c79356b 2130
9bccf70c
A
2131 if (sopt->sopt_dir != SOPT_SET) {
2132 sopt->sopt_dir = SOPT_SET;
2133 }
2134
91447636
A
2135 {
2136 struct socket_filter_entry *filter;
2137 int filtered = 0;
2138 error = 0;
2139 for (filter = so->so_filt; filter && (error == 0);
2140 filter = filter->sfe_next_onsocket) {
2141 if (filter->sfe_filter->sf_filter.sf_setoption) {
2142 if (filtered == 0) {
2143 filtered = 1;
2144 sflt_use(so);
2145 socket_unlock(so, 0);
2146 }
2147 error = filter->sfe_filter->sf_filter.sf_setoption(
2148 filter->sfe_cookie, so, sopt);
2149 }
2150 }
2151
2152 if (filtered != 0) {
2153 socket_lock(so, 0);
2154 sflt_unuse(so);
2155
2156 if (error) {
2157 if (error == EJUSTRETURN)
2158 error = 0;
2159 goto bad;
2160 }
1c79356b 2161 }
1c79356b
A
2162 }
2163
2164 error = 0;
2165 if (sopt->sopt_level != SOL_SOCKET) {
91447636
A
2166 if (so->so_proto && so->so_proto->pr_ctloutput) {
2167 error = (*so->so_proto->pr_ctloutput)
2168 (so, sopt);
2169 socket_unlock(so, 1);
2170 return (error);
2171 }
1c79356b
A
2172 error = ENOPROTOOPT;
2173 } else {
2174 switch (sopt->sopt_name) {
2175 case SO_LINGER:
91447636 2176 case SO_LINGER_SEC:
1c79356b
A
2177 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2178 if (error)
2179 goto bad;
2180
91447636 2181 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
1c79356b
A
2182 if (l.l_onoff)
2183 so->so_options |= SO_LINGER;
2184 else
2185 so->so_options &= ~SO_LINGER;
2186 break;
2187
2188 case SO_DEBUG:
2189 case SO_KEEPALIVE:
2190 case SO_DONTROUTE:
2191 case SO_USELOOPBACK:
2192 case SO_BROADCAST:
2193 case SO_REUSEADDR:
2194 case SO_REUSEPORT:
2195 case SO_OOBINLINE:
2196 case SO_TIMESTAMP:
9bccf70c 2197#ifdef __APPLE__
1c79356b
A
2198 case SO_DONTTRUNC:
2199 case SO_WANTMORE:
9bccf70c
A
2200 case SO_WANTOOBFLAG:
2201#endif
1c79356b
A
2202 error = sooptcopyin(sopt, &optval, sizeof optval,
2203 sizeof optval);
2204 if (error)
2205 goto bad;
2206 if (optval)
2207 so->so_options |= sopt->sopt_name;
2208 else
2209 so->so_options &= ~sopt->sopt_name;
2210 break;
2211
2212 case SO_SNDBUF:
2213 case SO_RCVBUF:
2214 case SO_SNDLOWAT:
2215 case SO_RCVLOWAT:
2216 error = sooptcopyin(sopt, &optval, sizeof optval,
2217 sizeof optval);
2218 if (error)
2219 goto bad;
2220
2221 /*
2222 * Values < 1 make no sense for any of these
2223 * options, so disallow them.
2224 */
2225 if (optval < 1) {
2226 error = EINVAL;
2227 goto bad;
2228 }
2229
2230 switch (sopt->sopt_name) {
2231 case SO_SNDBUF:
2232 case SO_RCVBUF:
2233 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2234 &so->so_snd : &so->so_rcv,
2235 (u_long) optval) == 0) {
2236 error = ENOBUFS;
2237 goto bad;
2238 }
2239 break;
2240
2241 /*
2242 * Make sure the low-water is never greater than
2243 * the high-water.
2244 */
2245 case SO_SNDLOWAT:
2246 so->so_snd.sb_lowat =
2247 (optval > so->so_snd.sb_hiwat) ?
2248 so->so_snd.sb_hiwat : optval;
2249 break;
2250 case SO_RCVLOWAT:
2251 so->so_rcv.sb_lowat =
2252 (optval > so->so_rcv.sb_hiwat) ?
2253 so->so_rcv.sb_hiwat : optval;
2254 break;
2255 }
2256 break;
2257
2258 case SO_SNDTIMEO:
2259 case SO_RCVTIMEO:
2260 error = sooptcopyin(sopt, &tv, sizeof tv,
2261 sizeof tv);
2262 if (error)
2263 goto bad;
2264
91447636 2265 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
9bccf70c
A
2266 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2267 error = EDOM;
2268 goto bad;
2269 }
91447636 2270
1c79356b
A
2271 switch (sopt->sopt_name) {
2272 case SO_SNDTIMEO:
91447636 2273 so->so_snd.sb_timeo = tv;
1c79356b
A
2274 break;
2275 case SO_RCVTIMEO:
91447636 2276 so->so_rcv.sb_timeo = tv;
1c79356b
A
2277 break;
2278 }
2279 break;
2280
2281 case SO_NKE:
9bccf70c
A
2282 {
2283 struct so_nke nke;
1c79356b 2284
9bccf70c
A
2285 error = sooptcopyin(sopt, &nke,
2286 sizeof nke, sizeof nke);
1c79356b
A
2287 if (error)
2288 goto bad;
2289
91447636 2290 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
1c79356b
A
2291 break;
2292 }
2293
9bccf70c
A
2294 case SO_NOSIGPIPE:
2295 error = sooptcopyin(sopt, &optval, sizeof optval,
2296 sizeof optval);
2297 if (error)
2298 goto bad;
2299 if (optval)
2300 so->so_flags |= SOF_NOSIGPIPE;
2301 else
2302 so->so_flags &= ~SOF_NOSIGPIPE;
2303
2304 break;
2305
55e303ae
A
2306 case SO_NOADDRERR:
2307 error = sooptcopyin(sopt, &optval, sizeof optval,
2308 sizeof optval);
2309 if (error)
2310 goto bad;
2311 if (optval)
2312 so->so_flags |= SOF_NOADDRAVAIL;
2313 else
2314 so->so_flags &= ~SOF_NOADDRAVAIL;
2315
2316 break;
2317
1c79356b
A
2318 default:
2319 error = ENOPROTOOPT;
2320 break;
2321 }
2322 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2323 (void) ((*so->so_proto->pr_ctloutput)
2324 (so, sopt));
2325 }
2326 }
2327bad:
91447636 2328 socket_unlock(so, 1);
1c79356b
A
2329 return (error);
2330}
2331
2332/* Helper routine for getsockopt */
2333int
2334sooptcopyout(sopt, buf, len)
2335 struct sockopt *sopt;
2336 void *buf;
2337 size_t len;
2338{
2339 int error;
2340 size_t valsize;
2341
2342 error = 0;
2343
2344 /*
2345 * Documented get behavior is that we always return a value,
2346 * possibly truncated to fit in the user's buffer.
2347 * Traditional behavior is that we always tell the user
2348 * precisely how much we copied, rather than something useful
2349 * like the total amount we had available for her.
2350 * Note that this interface is not idempotent; the entire answer must
2351 * generated ahead of time.
2352 */
2353 valsize = min(len, sopt->sopt_valsize);
2354 sopt->sopt_valsize = valsize;
91447636 2355 if (sopt->sopt_val != USER_ADDR_NULL) {
1c79356b
A
2356 if (sopt->sopt_p != 0)
2357 error = copyout(buf, sopt->sopt_val, valsize);
2358 else
91447636 2359 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
1c79356b
A
2360 }
2361 return error;
2362}
2363
2364int
2365sogetopt(so, sopt)
2366 struct socket *so;
2367 struct sockopt *sopt;
2368{
2369 int error, optval;
2370 struct linger l;
2371 struct timeval tv;
1c79356b 2372
9bccf70c
A
2373 if (sopt->sopt_dir != SOPT_GET) {
2374 sopt->sopt_dir = SOPT_GET;
2375 }
2376
91447636
A
2377 socket_lock(so, 1);
2378
2379 {
2380 struct socket_filter_entry *filter;
2381 int filtered = 0;
2382 error = 0;
2383 for (filter = so->so_filt; filter && (error == 0);
2384 filter = filter->sfe_next_onsocket) {
2385 if (filter->sfe_filter->sf_filter.sf_getoption) {
2386 if (filtered == 0) {
2387 filtered = 1;
2388 sflt_use(so);
2389 socket_unlock(so, 0);
2390 }
2391 error = filter->sfe_filter->sf_filter.sf_getoption(
2392 filter->sfe_cookie, so, sopt);
2393 }
2394 }
2395 if (filtered != 0) {
2396 socket_lock(so, 0);
2397 sflt_unuse(so);
2398
2399 if (error) {
2400 if (error == EJUSTRETURN)
2401 error = 0;
2402 socket_unlock(so, 1);
2403 return error;
2404 }
1c79356b 2405 }
1c79356b
A
2406 }
2407
2408 error = 0;
2409 if (sopt->sopt_level != SOL_SOCKET) {
2410 if (so->so_proto && so->so_proto->pr_ctloutput) {
91447636
A
2411 error = (*so->so_proto->pr_ctloutput)
2412 (so, sopt);
2413 socket_unlock(so, 1);
2414 return (error);
2415 } else {
2416 socket_unlock(so, 1);
1c79356b 2417 return (ENOPROTOOPT);
91447636 2418 }
1c79356b
A
2419 } else {
2420 switch (sopt->sopt_name) {
2421 case SO_LINGER:
91447636 2422 case SO_LINGER_SEC:
1c79356b 2423 l.l_onoff = so->so_options & SO_LINGER;
91447636
A
2424 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2425 so->so_linger / hz;
1c79356b
A
2426 error = sooptcopyout(sopt, &l, sizeof l);
2427 break;
2428
2429 case SO_USELOOPBACK:
2430 case SO_DONTROUTE:
2431 case SO_DEBUG:
2432 case SO_KEEPALIVE:
2433 case SO_REUSEADDR:
2434 case SO_REUSEPORT:
2435 case SO_BROADCAST:
2436 case SO_OOBINLINE:
2437 case SO_TIMESTAMP:
9bccf70c 2438#ifdef __APPLE__
1c79356b
A
2439 case SO_DONTTRUNC:
2440 case SO_WANTMORE:
9bccf70c
A
2441 case SO_WANTOOBFLAG:
2442#endif
1c79356b
A
2443 optval = so->so_options & sopt->sopt_name;
2444integer:
2445 error = sooptcopyout(sopt, &optval, sizeof optval);
2446 break;
2447
2448 case SO_TYPE:
2449 optval = so->so_type;
2450 goto integer;
2451
9bccf70c 2452#ifdef __APPLE__
1c79356b 2453 case SO_NREAD:
9bccf70c
A
2454 {
2455 int pkt_total;
1c79356b
A
2456 struct mbuf *m1;
2457
2458 pkt_total = 0;
2459 m1 = so->so_rcv.sb_mb;
2460 if (so->so_proto->pr_flags & PR_ATOMIC)
2461 {
9bccf70c
A
2462 while (m1) {
2463 if (m1->m_type == MT_DATA)
1c79356b 2464 pkt_total += m1->m_len;
1c79356b
A
2465 m1 = m1->m_next;
2466 }
2467 optval = pkt_total;
2468 } else
2469 optval = so->so_rcv.sb_cc;
1c79356b
A
2470 goto integer;
2471 }
91447636
A
2472 case SO_NWRITE:
2473 optval = so->so_snd.sb_cc;
2474 goto integer;
9bccf70c 2475#endif
1c79356b
A
2476 case SO_ERROR:
2477 optval = so->so_error;
2478 so->so_error = 0;
2479 goto integer;
2480
2481 case SO_SNDBUF:
2482 optval = so->so_snd.sb_hiwat;
2483 goto integer;
2484
2485 case SO_RCVBUF:
2486 optval = so->so_rcv.sb_hiwat;
2487 goto integer;
2488
2489 case SO_SNDLOWAT:
2490 optval = so->so_snd.sb_lowat;
2491 goto integer;
2492
2493 case SO_RCVLOWAT:
2494 optval = so->so_rcv.sb_lowat;
2495 goto integer;
2496
2497 case SO_SNDTIMEO:
2498 case SO_RCVTIMEO:
91447636 2499 tv = (sopt->sopt_name == SO_SNDTIMEO ?
1c79356b
A
2500 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2501
1c79356b
A
2502 error = sooptcopyout(sopt, &tv, sizeof tv);
2503 break;
2504
91447636
A
2505 case SO_NOSIGPIPE:
2506 optval = (so->so_flags & SOF_NOSIGPIPE);
2507 goto integer;
9bccf70c 2508
55e303ae 2509 case SO_NOADDRERR:
91447636
A
2510 optval = (so->so_flags & SOF_NOADDRAVAIL);
2511 goto integer;
55e303ae 2512
1c79356b
A
2513 default:
2514 error = ENOPROTOOPT;
2515 break;
2516 }
91447636 2517 socket_unlock(so, 1);
1c79356b
A
2518 return (error);
2519 }
2520}
2521
9bccf70c 2522/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
1c79356b 2523int
9bccf70c 2524soopt_getm(struct sockopt *sopt, struct mbuf **mp)
1c79356b
A
2525{
2526 struct mbuf *m, *m_prev;
2527 int sopt_size = sopt->sopt_valsize;
2528
a3d08fcd
A
2529 if (sopt_size > MAX_SOOPTGETM_SIZE)
2530 return EMSGSIZE;
2531
1c79356b
A
2532 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2533 if (m == 0)
2534 return ENOBUFS;
2535 if (sopt_size > MLEN) {
2536 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2537 if ((m->m_flags & M_EXT) == 0) {
2538 m_free(m);
2539 return ENOBUFS;
2540 }
2541 m->m_len = min(MCLBYTES, sopt_size);
2542 } else {
2543 m->m_len = min(MLEN, sopt_size);
2544 }
2545 sopt_size -= m->m_len;
2546 *mp = m;
2547 m_prev = m;
2548
2549 while (sopt_size) {
2550 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2551 if (m == 0) {
2552 m_freem(*mp);
2553 return ENOBUFS;
2554 }
2555 if (sopt_size > MLEN) {
2556 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2557 if ((m->m_flags & M_EXT) == 0) {
2558 m_freem(*mp);
2559 return ENOBUFS;
2560 }
2561 m->m_len = min(MCLBYTES, sopt_size);
2562 } else {
2563 m->m_len = min(MLEN, sopt_size);
2564 }
2565 sopt_size -= m->m_len;
2566 m_prev->m_next = m;
2567 m_prev = m;
2568 }
2569 return 0;
2570}
2571
2572/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2573int
9bccf70c 2574soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
1c79356b
A
2575{
2576 struct mbuf *m0 = m;
2577
91447636 2578 if (sopt->sopt_val == USER_ADDR_NULL)
1c79356b
A
2579 return 0;
2580 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2581 if (sopt->sopt_p != NULL) {
2582 int error;
2583
91447636 2584 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
1c79356b
A
2585 if (error != 0) {
2586 m_freem(m0);
2587 return(error);
2588 }
2589 } else
91447636 2590 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
1c79356b 2591 sopt->sopt_valsize -= m->m_len;
91447636 2592 sopt->sopt_val += m->m_len;
1c79356b
A
2593 m = m->m_next;
2594 }
2595 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
9bccf70c 2596 panic("soopt_mcopyin");
1c79356b
A
2597 return 0;
2598}
2599
2600/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2601int
9bccf70c 2602soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
1c79356b
A
2603{
2604 struct mbuf *m0 = m;
2605 size_t valsize = 0;
2606
91447636 2607 if (sopt->sopt_val == USER_ADDR_NULL)
1c79356b
A
2608 return 0;
2609 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2610 if (sopt->sopt_p != NULL) {
2611 int error;
2612
91447636 2613 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
1c79356b
A
2614 if (error != 0) {
2615 m_freem(m0);
2616 return(error);
2617 }
2618 } else
91447636 2619 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
1c79356b 2620 sopt->sopt_valsize -= m->m_len;
91447636 2621 sopt->sopt_val += m->m_len;
1c79356b
A
2622 valsize += m->m_len;
2623 m = m->m_next;
2624 }
2625 if (m != NULL) {
2626 /* enough soopt buffer should be given from user-land */
2627 m_freem(m0);
2628 return(EINVAL);
2629 }
2630 sopt->sopt_valsize = valsize;
2631 return 0;
2632}
2633
9bccf70c
A
2634void
2635sohasoutofband(so)
2636 register struct socket *so;
2637{
2638 struct proc *p;
9bccf70c 2639
9bccf70c
A
2640 if (so->so_pgid < 0)
2641 gsignal(-so->so_pgid, SIGURG);
2642 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2643 psignal(p, SIGURG);
2644 selwakeup(&so->so_rcv.sb_sel);
2645}
2646
2647int
91447636 2648sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
9bccf70c
A
2649{
2650 struct proc *p = current_proc();
2651 int revents = 0;
91447636
A
2652
2653 socket_lock(so, 1);
9bccf70c
A
2654
2655 if (events & (POLLIN | POLLRDNORM))
2656 if (soreadable(so))
2657 revents |= events & (POLLIN | POLLRDNORM);
2658
2659 if (events & (POLLOUT | POLLWRNORM))
2660 if (sowriteable(so))
2661 revents |= events & (POLLOUT | POLLWRNORM);
2662
2663 if (events & (POLLPRI | POLLRDBAND))
2664 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2665 revents |= events & (POLLPRI | POLLRDBAND);
2666
2667 if (revents == 0) {
2668 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2669 /* Darwin sets the flag first, BSD calls selrecord first */
2670 so->so_rcv.sb_flags |= SB_SEL;
2671 selrecord(p, &so->so_rcv.sb_sel, wql);
2672 }
2673
2674 if (events & (POLLOUT | POLLWRNORM)) {
2675 /* Darwin sets the flag first, BSD calls selrecord first */
2676 so->so_snd.sb_flags |= SB_SEL;
2677 selrecord(p, &so->so_snd.sb_sel, wql);
2678 }
2679 }
2680
91447636 2681 socket_unlock(so, 1);
9bccf70c
A
2682 return (revents);
2683}
55e303ae 2684
91447636 2685int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
55e303ae
A
2686
2687int
91447636 2688soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
55e303ae 2689{
91447636 2690 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
55e303ae 2691 struct sockbuf *sb;
91447636 2692 socket_lock(so, 1);
55e303ae
A
2693
2694 switch (kn->kn_filter) {
2695 case EVFILT_READ:
2696 if (so->so_options & SO_ACCEPTCONN)
2697 kn->kn_fop = &solisten_filtops;
2698 else
2699 kn->kn_fop = &soread_filtops;
2700 sb = &so->so_rcv;
2701 break;
2702 case EVFILT_WRITE:
2703 kn->kn_fop = &sowrite_filtops;
2704 sb = &so->so_snd;
2705 break;
2706 default:
91447636 2707 socket_unlock(so, 1);
55e303ae
A
2708 return (1);
2709 }
2710
55e303ae
A
2711 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2712 sb->sb_flags |= SB_KNOTE;
91447636 2713 socket_unlock(so, 1);
55e303ae
A
2714 return (0);
2715}
2716
2717static void
2718filt_sordetach(struct knote *kn)
2719{
91447636 2720 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
55e303ae 2721
91447636
A
2722 socket_lock(so, 1);
2723 if (so->so_rcv.sb_flags & SB_KNOTE)
55e303ae
A
2724 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2725 so->so_rcv.sb_flags &= ~SB_KNOTE;
91447636 2726 socket_unlock(so, 1);
55e303ae
A
2727}
2728
2729/*ARGSUSED*/
2730static int
2731filt_soread(struct knote *kn, long hint)
2732{
91447636 2733 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
55e303ae 2734
91447636
A
2735 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2736 socket_lock(so, 1);
2737
2738 if (so->so_oobmark) {
2739 if (kn->kn_flags & EV_OOBAND) {
2740 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2741 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2742 socket_unlock(so, 1);
2743 return (1);
2744 }
2745 kn->kn_data = so->so_oobmark;
2746 kn->kn_flags |= EV_OOBAND;
2747 } else {
2748 kn->kn_data = so->so_rcv.sb_cc;
2749 if (so->so_state & SS_CANTRCVMORE) {
2750 kn->kn_flags |= EV_EOF;
2751 kn->kn_fflags = so->so_error;
2752 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2753 socket_unlock(so, 1);
2754 return (1);
2755 }
55e303ae 2756 }
91447636
A
2757
2758 if (so->so_state & SS_RCVATMARK) {
2759 if (kn->kn_flags & EV_OOBAND) {
2760 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2761 socket_unlock(so, 1);
2762 return (1);
2763 }
2764 kn->kn_flags |= EV_OOBAND;
2765 } else if (kn->kn_flags & EV_OOBAND) {
2766 kn->kn_data = 0;
2767 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2768 socket_unlock(so, 1);
2769 return (0);
2770 }
2771
2772 if (so->so_error) { /* temporary udp error */
2773 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2774 socket_unlock(so, 1);
55e303ae 2775 return (1);
91447636
A
2776 }
2777
2778 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2779 socket_unlock(so, 1);
2780
2781 return( kn->kn_flags & EV_OOBAND ||
2782 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2783 kn->kn_sdata : so->so_rcv.sb_lowat));
55e303ae
A
2784}
2785
2786static void
2787filt_sowdetach(struct knote *kn)
2788{
91447636
A
2789 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2790 socket_lock(so, 1);
55e303ae 2791
91447636 2792 if(so->so_snd.sb_flags & SB_KNOTE)
55e303ae
A
2793 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2794 so->so_snd.sb_flags &= ~SB_KNOTE;
91447636 2795 socket_unlock(so, 1);
55e303ae
A
2796}
2797
2798/*ARGSUSED*/
2799static int
2800filt_sowrite(struct knote *kn, long hint)
2801{
91447636
A
2802 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2803
2804 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2805 socket_lock(so, 1);
55e303ae
A
2806
2807 kn->kn_data = sbspace(&so->so_snd);
2808 if (so->so_state & SS_CANTSENDMORE) {
2809 kn->kn_flags |= EV_EOF;
2810 kn->kn_fflags = so->so_error;
91447636
A
2811 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2812 socket_unlock(so, 1);
55e303ae
A
2813 return (1);
2814 }
91447636
A
2815 if (so->so_error) { /* temporary udp error */
2816 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2817 socket_unlock(so, 1);
55e303ae 2818 return (1);
91447636 2819 }
55e303ae 2820 if (((so->so_state & SS_ISCONNECTED) == 0) &&
91447636
A
2821 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2822 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2823 socket_unlock(so, 1);
55e303ae 2824 return (0);
91447636
A
2825 }
2826 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2827 socket_unlock(so, 1);
55e303ae
A
2828 if (kn->kn_sfflags & NOTE_LOWAT)
2829 return (kn->kn_data >= kn->kn_sdata);
2830 return (kn->kn_data >= so->so_snd.sb_lowat);
2831}
2832
2833/*ARGSUSED*/
2834static int
2835filt_solisten(struct knote *kn, long hint)
2836{
91447636
A
2837 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2838 int isempty;
55e303ae 2839
91447636
A
2840 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2841 socket_lock(so, 1);
55e303ae 2842 kn->kn_data = so->so_qlen;
91447636
A
2843 isempty = ! TAILQ_EMPTY(&so->so_comp);
2844 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2845 socket_unlock(so, 1);
2846 return (isempty);
55e303ae
A
2847}
2848
91447636
A
2849
2850int
2851socket_lock(so, refcount)
2852 struct socket *so;
2853 int refcount;
2854{
21362eb3
A
2855 int error = 0, lr, lr_saved;
2856#ifdef __ppc__
2857 __asm__ volatile("mflr %0" : "=r" (lr));
2858 lr_saved = lr;
2859#endif
91447636
A
2860
2861 if (so->so_proto->pr_lock) {
2862 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2863 }
2864 else {
2865#ifdef MORE_LOCKING_DEBUG
2866 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2867#endif
2868 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2869 if (refcount)
2870 so->so_usecount++;
21362eb3 2871 so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */
91447636
A
2872 }
2873
2874 return(error);
2875
2876}
2877
2878int
2879socket_unlock(so, refcount)
2880 struct socket *so;
2881 int refcount;
2882{
21362eb3 2883 int error = 0, lr, lr_saved;
91447636
A
2884 lck_mtx_t * mutex_held;
2885
21362eb3
A
2886#ifdef __ppc__
2887__asm__ volatile("mflr %0" : "=r" (lr));
2888 lr_saved = lr;
2889#endif
2890
2891
91447636
A
2892
2893 if (so->so_proto == NULL)
2894 panic("socket_unlock null so_proto so=%x\n", so);
2895
2896 if (so && so->so_proto->pr_unlock)
2897 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2898 else {
2899 mutex_held = so->so_proto->pr_domain->dom_mtx;
2900#ifdef MORE_LOCKING_DEBUG
2901 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2902#endif
2903 if (refcount) {
2904 if (so->so_usecount <= 0)
2905 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2906 so->so_usecount--;
2907 if (so->so_usecount == 0) {
2908 sofreelastref(so, 1);
2909 }
21362eb3
A
2910 else
2911 so->reserved4 = (void*)lr_saved; /* save caller */
91447636
A
2912 }
2913 lck_mtx_unlock(mutex_held);
2914 }
2915
2916 return(error);
2917}
2918//### Called with socket locked, will unlock socket
2919void
2920sofree(so)
2921 struct socket *so;
2922{
2923
21362eb3 2924 int lr, lr_saved;
91447636 2925 lck_mtx_t * mutex_held;
21362eb3
A
2926#ifdef __ppc__
2927 __asm__ volatile("mflr %0" : "=r" (lr));
2928 lr_saved = lr;
2929#endif
91447636
A
2930 if (so->so_proto->pr_getlock != NULL)
2931 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2932 else
2933 mutex_held = so->so_proto->pr_domain->dom_mtx;
2934 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2935
91447636
A
2936 sofreelastref(so, 0);
2937}
2938
2939void
2940soreference(so)
2941 struct socket *so;
2942{
2943 socket_lock(so, 1); /* locks & take one reference on socket */
2944 socket_unlock(so, 0); /* unlock only */
2945}
2946
2947void
2948sodereference(so)
2949 struct socket *so;
2950{
2951 socket_lock(so, 0);
2952 socket_unlock(so, 1);
2953}