]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_socket.c
xnu-792.22.5.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
CommitLineData
1c79356b 1/*
5d5c5d0d
A
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
8f6c56a5 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
8f6c56a5
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
8ad349bb 24 * limitations under the License.
8f6c56a5
A
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
29/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30/*
31 * Copyright (c) 1982, 1986, 1988, 1990, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the University of
45 * California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
9bccf70c
A
62 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
63 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
1c79356b
A
64 */
65
66#include <sys/param.h>
67#include <sys/systm.h>
55e303ae 68#include <sys/filedesc.h>
91447636
A
69#include <sys/proc_internal.h>
70#include <sys/kauth.h>
71#include <sys/file_internal.h>
1c79356b
A
72#include <sys/fcntl.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/domain.h>
76#include <sys/kernel.h>
55e303ae 77#include <sys/event.h>
1c79356b
A
78#include <sys/poll.h>
79#include <sys/protosw.h>
80#include <sys/socket.h>
81#include <sys/socketvar.h>
82#include <sys/resourcevar.h>
83#include <sys/signalvar.h>
84#include <sys/sysctl.h>
85#include <sys/uio.h>
86#include <sys/ev.h>
87#include <sys/kdebug.h>
88#include <net/route.h>
89#include <netinet/in.h>
90#include <netinet/in_pcb.h>
91#include <kern/zalloc.h>
91447636 92#include <kern/locks.h>
1c79356b
A
93#include <machine/limits.h>
94
95int so_cache_hw = 0;
96int so_cache_timeouts = 0;
97int so_cache_max_freed = 0;
98int cached_sock_count = 0;
99struct socket *socket_cache_head = 0;
100struct socket *socket_cache_tail = 0;
101u_long so_cache_time = 0;
102int so_cache_init_done = 0;
103struct zone *so_cache_zone;
104extern int get_inpcb_str_size();
105extern int get_tcp_str_size();
106
91447636
A
107static lck_grp_t *so_cache_mtx_grp;
108static lck_attr_t *so_cache_mtx_attr;
109static lck_grp_attr_t *so_cache_mtx_grp_attr;
110lck_mtx_t *so_cache_mtx;
111
1c79356b
A
112#include <machine/limits.h>
113
55e303ae
A
114static void filt_sordetach(struct knote *kn);
115static int filt_soread(struct knote *kn, long hint);
116static void filt_sowdetach(struct knote *kn);
117static int filt_sowrite(struct knote *kn, long hint);
118static int filt_solisten(struct knote *kn, long hint);
119
120static struct filterops solisten_filtops =
121 { 1, NULL, filt_sordetach, filt_solisten };
122static struct filterops soread_filtops =
123 { 1, NULL, filt_sordetach, filt_soread };
124static struct filterops sowrite_filtops =
125 { 1, NULL, filt_sowdetach, filt_sowrite };
126
91447636 127#define EVEN_MORE_LOCKING_DEBUG 0
1c79356b
A
128int socket_debug = 0;
129int socket_zone = M_SOCKET;
130so_gen_t so_gencnt; /* generation count for sockets */
131
132MALLOC_DEFINE(M_SONAME, "soname", "socket name");
133MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
134
135#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
136#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
137#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
138#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
139#define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
140#define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
141#define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
142
a3d08fcd 143#define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
1c79356b 144
91447636 145
1c79356b
A
146SYSCTL_DECL(_kern_ipc);
147
148static int somaxconn = SOMAXCONN;
149SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
150 0, "");
151
152/* Should we get a maximum also ??? */
fa4905b1 153static int sosendmaxchain = 65536;
1c79356b 154static int sosendminchain = 16384;
55e303ae 155static int sorecvmincopy = 16384;
1c79356b
A
156SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
157 0, "");
55e303ae
A
158SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
159 0, "");
1c79356b
A
160
161void so_cache_timer();
162
163/*
164 * Socket operation routines.
165 * These routines are called by the routines in
166 * sys_socket.c or from a system process, and
167 * implement the semantics of socket operations by
168 * switching out to the protocol specific routines.
169 */
170
9bccf70c 171#ifdef __APPLE__
91447636
A
172
173vm_size_t so_cache_zone_element_size;
174
175static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid);
176
177
1c79356b
A
178void socketinit()
179{
180 vm_size_t str_size;
181
91447636
A
182 if (so_cache_init_done) {
183 printf("socketinit: already called...\n");
184 return;
185 }
186
187 /*
188 * allocate lock group attribute and group for socket cache mutex
189 */
190 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
91447636
A
191
192 so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr);
193
194 /*
195 * allocate the lock attribute for socket cache mutex
196 */
197 so_cache_mtx_attr = lck_attr_alloc_init();
91447636 198
1c79356b
A
199 so_cache_init_done = 1;
200
91447636
A
201 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */
202
203 if (so_cache_mtx == NULL)
204 return; /* we're hosed... */
205
1c79356b
A
206 str_size = (vm_size_t)( sizeof(struct socket) + 4 +
207 get_inpcb_str_size() + 4 +
208 get_tcp_str_size());
209 so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone");
210#if TEMPDEBUG
91447636 211 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
1c79356b 212#endif
91447636
A
213 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
214
215 so_cache_zone_element_size = str_size;
216
217 sflt_init();
1c79356b
A
218
219}
220
221void cached_sock_alloc(so, waitok)
222struct socket **so;
223int waitok;
224
225{
226 caddr_t temp;
1c79356b
A
227 register u_long offset;
228
229
91447636
A
230 lck_mtx_lock(so_cache_mtx);
231
1c79356b
A
232 if (cached_sock_count) {
233 cached_sock_count--;
234 *so = socket_cache_head;
235 if (*so == 0)
236 panic("cached_sock_alloc: cached sock is null");
237
238 socket_cache_head = socket_cache_head->cache_next;
239 if (socket_cache_head)
240 socket_cache_head->cache_prev = 0;
241 else
242 socket_cache_tail = 0;
91447636
A
243
244 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
245
246 temp = (*so)->so_saved_pcb;
247 bzero((caddr_t)*so, sizeof(struct socket));
248#if TEMPDEBUG
249 kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so,
250 cached_sock_count);
251#endif
252 (*so)->so_saved_pcb = temp;
91447636
A
253 (*so)->cached_in_sock_layer = 1;
254
1c79356b
A
255 }
256 else {
257#if TEMPDEBUG
258 kprintf("Allocating cached sock %x from memory\n", *so);
259#endif
260
91447636
A
261 lck_mtx_unlock(so_cache_mtx);
262
1c79356b
A
263 if (waitok)
264 *so = (struct socket *) zalloc(so_cache_zone);
265 else
266 *so = (struct socket *) zalloc_noblock(so_cache_zone);
267
268 if (*so == 0)
269 return;
270
271 bzero((caddr_t)*so, sizeof(struct socket));
272
273 /*
274 * Define offsets for extra structures into our single block of
275 * memory. Align extra structures on longword boundaries.
276 */
277
278
279 offset = (u_long) *so;
280 offset += sizeof(struct socket);
281 if (offset & 0x3) {
282 offset += 4;
283 offset &= 0xfffffffc;
284 }
285 (*so)->so_saved_pcb = (caddr_t) offset;
286 offset += get_inpcb_str_size();
287 if (offset & 0x3) {
288 offset += 4;
289 offset &= 0xfffffffc;
290 }
291
292 ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset;
293#if TEMPDEBUG
294 kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so,
295 (*so)->so_saved_pcb,
296 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
297#endif
298 }
299
300 (*so)->cached_in_sock_layer = 1;
301}
302
303
304void cached_sock_free(so)
305struct socket *so;
306{
1c79356b 307
91447636 308 lck_mtx_lock(so_cache_mtx);
1c79356b 309
1c79356b
A
310 if (++cached_sock_count > MAX_CACHED_SOCKETS) {
311 --cached_sock_count;
91447636 312 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
313#if TEMPDEBUG
314 kprintf("Freeing overflowed cached socket %x\n", so);
315#endif
91447636 316 zfree(so_cache_zone, so);
1c79356b
A
317 }
318 else {
319#if TEMPDEBUG
320 kprintf("Freeing socket %x into cache\n", so);
321#endif
322 if (so_cache_hw < cached_sock_count)
323 so_cache_hw = cached_sock_count;
324
325 so->cache_next = socket_cache_head;
326 so->cache_prev = 0;
327 if (socket_cache_head)
328 socket_cache_head->cache_prev = so;
329 else
330 socket_cache_tail = so;
331
332 so->cache_timestamp = so_cache_time;
333 socket_cache_head = so;
91447636 334 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
335 }
336
337#if TEMPDEBUG
338 kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count);
339#endif
340
341
342}
343
344
345void so_cache_timer()
346{
347 register struct socket *p;
1c79356b 348 register int n_freed = 0;
1c79356b 349
1c79356b 350
91447636 351 lck_mtx_lock(so_cache_mtx);
1c79356b 352
91447636 353 ++so_cache_time;
1c79356b 354
91447636 355 while ( (p = socket_cache_tail) )
1c79356b
A
356 {
357 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
358 break;
359
360 so_cache_timeouts++;
361
91447636 362 if ( (socket_cache_tail = p->cache_prev) )
1c79356b
A
363 p->cache_prev->cache_next = 0;
364 if (--cached_sock_count == 0)
365 socket_cache_head = 0;
366
1c79356b 367
91447636 368 zfree(so_cache_zone, p);
1c79356b 369
1c79356b
A
370 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH)
371 {
372 so_cache_max_freed++;
373 break;
374 }
375 }
91447636 376 lck_mtx_unlock(so_cache_mtx);
1c79356b
A
377
378 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
379
1c79356b
A
380
381}
9bccf70c 382#endif /* __APPLE__ */
1c79356b
A
383
384/*
385 * Get a socket structure from our zone, and initialize it.
386 * We don't implement `waitok' yet (see comments in uipc_domain.c).
387 * Note that it would probably be better to allocate socket
388 * and PCB at the same time, but I'm not convinced that all
389 * the protocols can be easily modified to do this.
390 */
391struct socket *
392soalloc(waitok, dom, type)
393 int waitok;
394 int dom;
395 int type;
396{
397 struct socket *so;
398
399 if ((dom == PF_INET) && (type == SOCK_STREAM))
400 cached_sock_alloc(&so, waitok);
401 else
402 {
91447636 403 MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK);
1c79356b
A
404 if (so)
405 bzero(so, sizeof *so);
406 }
407 /* XXX race condition for reentrant kernel */
91447636 408//###LD Atomic add for so_gencnt
1c79356b
A
409 if (so) {
410 so->so_gencnt = ++so_gencnt;
411 so->so_zone = socket_zone;
412 }
413
414 return so;
415}
416
417int
418socreate(dom, aso, type, proto)
419 int dom;
420 struct socket **aso;
421 register int type;
422 int proto;
1c79356b
A
423{
424 struct proc *p = current_proc();
425 register struct protosw *prp;
9bccf70c 426 register struct socket *so;
1c79356b 427 register int error = 0;
55e303ae
A
428#if TCPDEBUG
429 extern int tcpconsdebug;
430#endif
1c79356b
A
431 if (proto)
432 prp = pffindproto(dom, proto, type);
433 else
434 prp = pffindtype(dom, type);
9bccf70c 435
1c79356b
A
436 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
437 return (EPROTONOSUPPORT);
9bccf70c
A
438#ifndef __APPLE__
439
440 if (p->p_prison && jail_socket_unixiproute_only &&
441 prp->pr_domain->dom_family != PF_LOCAL &&
442 prp->pr_domain->dom_family != PF_INET &&
443 prp->pr_domain->dom_family != PF_ROUTE) {
444 return (EPROTONOSUPPORT);
445 }
446
447#endif
1c79356b
A
448 if (prp->pr_type != type)
449 return (EPROTOTYPE);
450 so = soalloc(p != 0, dom, type);
451 if (so == 0)
452 return (ENOBUFS);
453
454 TAILQ_INIT(&so->so_incomp);
455 TAILQ_INIT(&so->so_comp);
456 so->so_type = type;
457
9bccf70c 458#ifdef __APPLE__
1c79356b 459 if (p != 0) {
91447636
A
460 so->so_uid = kauth_cred_getuid(kauth_cred_get());
461 if (!suser(kauth_cred_get(),NULL))
1c79356b 462 so->so_state = SS_PRIV;
1c79356b 463 }
9bccf70c 464#else
91447636 465 so->so_cred = kauth_cred_get_with_ref();
9bccf70c 466#endif
1c79356b 467 so->so_proto = prp;
9bccf70c 468#ifdef __APPLE__
1c79356b 469 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
91447636 470 so->so_rcv.sb_so = so->so_snd.sb_so = so;
9bccf70c 471#endif
4452a7af
A
472 so->next_lock_lr = 0;
473 so->next_unlock_lr = 0;
474
91447636
A
475
476//### Attachement will create the per pcb lock if necessary and increase refcount
37839358 477 so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */
91447636
A
478
479 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
1c79356b 480 if (error) {
55e303ae
A
481 /*
482 * Warning:
483 * If so_pcb is not zero, the socket will be leaked,
484 * so protocol attachment handler must be coded carefuly
485 */
1c79356b 486 so->so_state |= SS_NOFDREF;
37839358
A
487 so->so_usecount--;
488 sofreelastref(so, 1); /* will deallocate the socket */
1c79356b
A
489 return (error);
490 }
9bccf70c 491#ifdef __APPLE__
1c79356b 492 prp->pr_domain->dom_refs++;
1c79356b 493 TAILQ_INIT(&so->so_evlist);
91447636
A
494
495 /* Attach socket filters for this protocol */
496 sflt_initsock(so);
55e303ae
A
497#if TCPDEBUG
498 if (tcpconsdebug == 2)
499 so->so_options |= SO_DEBUG;
500#endif
9bccf70c 501#endif
55e303ae 502
1c79356b
A
503 *aso = so;
504 return (0);
505}
506
507int
508sobind(so, nam)
509 struct socket *so;
510 struct sockaddr *nam;
511
512{
513 struct proc *p = current_proc();
91447636
A
514 int error = 0;
515 struct socket_filter_entry *filter;
516 int filtered = 0;
1c79356b 517
91447636
A
518 socket_lock(so, 1);
519
520 /* Socket filter */
521 error = 0;
522 for (filter = so->so_filt; filter && (error == 0);
523 filter = filter->sfe_next_onsocket) {
524 if (filter->sfe_filter->sf_filter.sf_bind) {
525 if (filtered == 0) {
526 filtered = 1;
527 sflt_use(so);
528 socket_unlock(so, 0);
1c79356b 529 }
91447636
A
530 error = filter->sfe_filter->sf_filter.sf_bind(
531 filter->sfe_cookie, so, nam);
1c79356b
A
532 }
533 }
91447636
A
534 if (filtered != 0) {
535 socket_lock(so, 0);
536 sflt_unuse(so);
537 }
538 /* End socket filter */
539
540 if (error == 0)
541 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
542
543 socket_unlock(so, 1);
544
545 if (error == EJUSTRETURN)
546 error = 0;
547
1c79356b
A
548 return (error);
549}
550
551void
552sodealloc(so)
553 struct socket *so;
554{
555 so->so_gencnt = ++so_gencnt;
556
9bccf70c
A
557#ifndef __APPLE__
558 if (so->so_rcv.sb_hiwat)
559 (void)chgsbsize(so->so_cred->cr_uidinfo,
560 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
561 if (so->so_snd.sb_hiwat)
562 (void)chgsbsize(so->so_cred->cr_uidinfo,
563 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
564#ifdef INET
565 if (so->so_accf != NULL) {
566 if (so->so_accf->so_accept_filter != NULL &&
567 so->so_accf->so_accept_filter->accf_destroy != NULL) {
568 so->so_accf->so_accept_filter->accf_destroy(so);
569 }
570 if (so->so_accf->so_accept_filter_str != NULL)
571 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
572 FREE(so->so_accf, M_ACCF);
573 }
574#endif /* INET */
4452a7af 575 kauth_cred_unref(&so->so_cred);
9bccf70c
A
576 zfreei(so->so_zone, so);
577#else
1c79356b
A
578 if (so->cached_in_sock_layer == 1)
579 cached_sock_free(so);
91447636
A
580 else {
581 if (so->cached_in_sock_layer == -1)
582 panic("sodealloc: double dealloc: so=%x\n", so);
583 so->cached_in_sock_layer = -1;
584 FREE_ZONE(so, sizeof(*so), so->so_zone);
585 }
9bccf70c 586#endif /* __APPLE__ */
1c79356b
A
587}
588
589int
590solisten(so, backlog)
591 register struct socket *so;
592 int backlog;
593
594{
1c79356b 595 struct proc *p = current_proc();
91447636 596 int error;
1c79356b 597
91447636
A
598 socket_lock(so, 1);
599
600 {
601 struct socket_filter_entry *filter;
602 int filtered = 0;
603 error = 0;
604 for (filter = so->so_filt; filter && (error == 0);
605 filter = filter->sfe_next_onsocket) {
606 if (filter->sfe_filter->sf_filter.sf_listen) {
607 if (filtered == 0) {
608 filtered = 1;
609 sflt_use(so);
610 socket_unlock(so, 0);
611 }
612 error = filter->sfe_filter->sf_filter.sf_listen(
613 filter->sfe_cookie, so);
614 }
615 }
616 if (filtered != 0) {
617 socket_lock(so, 0);
618 sflt_unuse(so);
619 }
620 }
621
622 if (error == 0) {
623 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
624 }
625
1c79356b 626 if (error) {
91447636
A
627 socket_unlock(so, 1);
628 if (error == EJUSTRETURN)
629 error = 0;
1c79356b
A
630 return (error);
631 }
91447636
A
632
633 if (TAILQ_EMPTY(&so->so_comp))
1c79356b
A
634 so->so_options |= SO_ACCEPTCONN;
635 if (backlog < 0 || backlog > somaxconn)
636 backlog = somaxconn;
637 so->so_qlimit = backlog;
1c79356b 638
91447636 639 socket_unlock(so, 1);
1c79356b
A
640 return (0);
641}
642
1c79356b 643void
91447636 644sofreelastref(so, dealloc)
1c79356b 645 register struct socket *so;
91447636 646 int dealloc;
9bccf70c
A
647{
648 int error;
1c79356b
A
649 struct socket *head = so->so_head;
650
91447636 651 /*### Assume socket is locked */
1c79356b 652
3a60a9f5
A
653 /* Remove any filters - may be called more than once */
654 sflt_termsock(so);
655
91447636 656 if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
9bccf70c 657#ifdef __APPLE__
0b4e3aa0
A
658 selthreadclear(&so->so_snd.sb_sel);
659 selthreadclear(&so->so_rcv.sb_sel);
cc9f6e38
A
660 so->so_rcv.sb_flags &= ~SB_UPCALL;
661 so->so_snd.sb_flags &= ~SB_UPCALL;
9bccf70c 662#endif
1c79356b 663 return;
0b4e3aa0 664 }
9bccf70c 665 if (head != NULL) {
91447636 666 socket_lock(head, 1);
9bccf70c
A
667 if (so->so_state & SS_INCOMP) {
668 TAILQ_REMOVE(&head->so_incomp, so, so_list);
669 head->so_incqlen--;
670 } else if (so->so_state & SS_COMP) {
671 /*
672 * We must not decommission a socket that's
673 * on the accept(2) queue. If we do, then
674 * accept(2) may hang after select(2) indicated
675 * that the listening socket was ready.
676 */
677#ifdef __APPLE__
678 selthreadclear(&so->so_snd.sb_sel);
679 selthreadclear(&so->so_rcv.sb_sel);
cc9f6e38
A
680 so->so_rcv.sb_flags &= ~SB_UPCALL;
681 so->so_snd.sb_flags &= ~SB_UPCALL;
9bccf70c 682#endif
91447636 683 socket_unlock(head, 1);
9bccf70c
A
684 return;
685 } else {
686 panic("sofree: not queued");
687 }
1c79356b 688 head->so_qlen--;
9bccf70c 689 so->so_state &= ~SS_INCOMP;
1c79356b 690 so->so_head = NULL;
91447636 691 socket_unlock(head, 1);
1c79356b 692 }
9bccf70c 693#ifdef __APPLE__
0b4e3aa0 694 selthreadclear(&so->so_snd.sb_sel);
1c79356b 695 sbrelease(&so->so_snd);
9bccf70c 696#endif
1c79356b 697 sorflush(so);
91447636
A
698
699 /* 3932268: disable upcall */
700 so->so_rcv.sb_flags &= ~SB_UPCALL;
701 so->so_snd.sb_flags &= ~SB_UPCALL;
702
703 if (dealloc)
704 sodealloc(so);
1c79356b
A
705}
706
707/*
708 * Close a socket on last file table reference removal.
709 * Initiate disconnect if connected.
710 * Free socket when disconnect complete.
711 */
712int
91447636 713soclose_locked(so)
1c79356b
A
714 register struct socket *so;
715{
1c79356b 716 int error = 0;
91447636
A
717 lck_mtx_t * mutex_held;
718 struct timespec ts;
1c79356b 719
91447636
A
720 if (so->so_usecount == 0) {
721 panic("soclose: so=%x refcount=0\n", so);
1c79356b
A
722 }
723
91447636
A
724 sflt_notify(so, sock_evt_closing, NULL);
725
726 if ((so->so_options & SO_ACCEPTCONN)) {
727 struct socket *sp;
728
729 /* We do not want new connection to be added to the connection queues */
730 so->so_options &= ~SO_ACCEPTCONN;
731
732 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
733 /* A bit tricky here. We need to keep
734 * a lock if it's a protocol global lock
735 * but we want the head, not the socket locked
736 * in the case of per-socket lock...
737 */
ff6e181a 738 if (so->so_proto->pr_getlock != NULL) {
91447636 739 socket_unlock(so, 0);
ff6e181a
A
740 socket_lock(sp, 1);
741 }
91447636 742 (void) soabort(sp);
ff6e181a 743 if (so->so_proto->pr_getlock != NULL) {
91447636 744 socket_unlock(sp, 1);
ff6e181a
A
745 socket_lock(so, 0);
746 }
91447636
A
747 }
748
749 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
91447636
A
750 /* Dequeue from so_comp since sofree() won't do it */
751 TAILQ_REMOVE(&so->so_comp, sp, so_list);
752 so->so_qlen--;
ff6e181a
A
753
754 if (so->so_proto->pr_getlock != NULL) {
755 socket_unlock(so, 0);
756 socket_lock(sp, 1);
757 }
758
91447636
A
759 sp->so_state &= ~SS_COMP;
760 sp->so_head = NULL;
761
91447636 762 (void) soabort(sp);
ff6e181a 763 if (so->so_proto->pr_getlock != NULL) {
91447636 764 socket_unlock(sp, 1);
ff6e181a
A
765 socket_lock(so, 0);
766 }
91447636
A
767 }
768 }
769 if (so->so_pcb == 0) {
770 /* 3915887: mark the socket as ready for dealloc */
771 so->so_flags |= SOF_PCBCLEARING;
1c79356b 772 goto discard;
91447636 773 }
1c79356b
A
774 if (so->so_state & SS_ISCONNECTED) {
775 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
91447636 776 error = sodisconnectlocked(so);
1c79356b
A
777 if (error)
778 goto drop;
779 }
780 if (so->so_options & SO_LINGER) {
781 if ((so->so_state & SS_ISDISCONNECTING) &&
782 (so->so_state & SS_NBIO))
783 goto drop;
91447636
A
784 if (so->so_proto->pr_getlock != NULL)
785 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
786 else
787 mutex_held = so->so_proto->pr_domain->dom_mtx;
1c79356b 788 while (so->so_state & SS_ISCONNECTED) {
91447636
A
789 ts.tv_sec = (so->so_linger/100);
790 ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10;
791 error = msleep((caddr_t)&so->so_timeo, mutex_held,
792 PSOCK | PCATCH, "soclos", &ts);
793 if (error) {
794 /* It's OK when the time fires, don't report an error */
795 if (error == EWOULDBLOCK)
796 error = 0;
1c79356b 797 break;
91447636 798 }
1c79356b
A
799 }
800 }
801 }
802drop:
91447636
A
803 if (so->so_usecount == 0)
804 panic("soclose: usecount is zero so=%x\n", so);
805 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
1c79356b
A
806 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
807 if (error == 0)
808 error = error2;
809 }
91447636
A
810 if (so->so_usecount <= 0)
811 panic("soclose: usecount is zero so=%x\n", so);
1c79356b 812discard:
e3027f41 813 if (so->so_pcb && so->so_state & SS_NOFDREF)
1c79356b
A
814 panic("soclose: NOFDREF");
815 so->so_state |= SS_NOFDREF;
9bccf70c 816#ifdef __APPLE__
1c79356b
A
817 so->so_proto->pr_domain->dom_refs--;
818 evsofree(so);
9bccf70c 819#endif
91447636 820 so->so_usecount--;
1c79356b 821 sofree(so);
1c79356b
A
822 return (error);
823}
824
91447636
A
825int
826soclose(so)
827 register struct socket *so;
828{
829 int error = 0;
830 socket_lock(so, 1);
831 if (so->so_retaincnt == 0)
832 error = soclose_locked(so);
833 else { /* if the FD is going away, but socket is retained in kernel remove its reference */
834 so->so_usecount--;
835 if (so->so_usecount < 2)
836 panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount);
837 }
838 socket_unlock(so, 1);
839 return (error);
840}
841
842
1c79356b
A
843/*
844 * Must be called at splnet...
845 */
91447636 846//#### Should already be locked
1c79356b
A
847int
848soabort(so)
849 struct socket *so;
850{
9bccf70c 851 int error;
1c79356b 852
91447636
A
853#ifdef MORE_LOCKING_DEBUG
854 lck_mtx_t * mutex_held;
855
856 if (so->so_proto->pr_getlock != NULL)
857 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
858 else
859 mutex_held = so->so_proto->pr_domain->dom_mtx;
860 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
861#endif
862
9bccf70c
A
863 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
864 if (error) {
865 sofree(so);
866 return error;
867 }
868 return (0);
1c79356b
A
869}
870
871int
91447636 872soacceptlock(so, nam, dolock)
1c79356b
A
873 register struct socket *so;
874 struct sockaddr **nam;
91447636 875 int dolock;
9bccf70c 876{
1c79356b 877 int error;
91447636
A
878
879 if (dolock) socket_lock(so, 1);
1c79356b
A
880
881 if ((so->so_state & SS_NOFDREF) == 0)
882 panic("soaccept: !NOFDREF");
883 so->so_state &= ~SS_NOFDREF;
884 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
1c79356b 885
91447636 886 if (dolock) socket_unlock(so, 1);
1c79356b
A
887 return (error);
888}
91447636
A
889int
890soaccept(so, nam)
891 register struct socket *so;
892 struct sockaddr **nam;
893{
894 return (soacceptlock(so, nam, 1));
895}
1c79356b
A
896
897int
91447636 898soconnectlock(so, nam, dolock)
1c79356b
A
899 register struct socket *so;
900 struct sockaddr *nam;
91447636 901 int dolock;
1c79356b
A
902
903{
904 int s;
905 int error;
906 struct proc *p = current_proc();
1c79356b 907
91447636
A
908 if (dolock) socket_lock(so, 1);
909
910 if (so->so_options & SO_ACCEPTCONN) {
911 if (dolock) socket_unlock(so, 1);
1c79356b 912 return (EOPNOTSUPP);
91447636 913 }
1c79356b
A
914 /*
915 * If protocol is connection-based, can only connect once.
916 * Otherwise, if connected, try to disconnect first.
917 * This allows user to disconnect by connecting to, e.g.,
918 * a null address.
919 */
920 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
921 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
91447636 922 (error = sodisconnectlocked(so))))
1c79356b
A
923 error = EISCONN;
924 else {
91447636
A
925 /*
926 * Run connect filter before calling protocol:
927 * - non-blocking connect returns before completion;
928 */
929 {
930 struct socket_filter_entry *filter;
931 int filtered = 0;
932 error = 0;
933 for (filter = so->so_filt; filter && (error == 0);
934 filter = filter->sfe_next_onsocket) {
935 if (filter->sfe_filter->sf_filter.sf_connect_out) {
936 if (filtered == 0) {
937 filtered = 1;
938 sflt_use(so);
939 socket_unlock(so, 0);
940 }
941 error = filter->sfe_filter->sf_filter.sf_connect_out(
942 filter->sfe_cookie, so, nam);
943 }
944 }
945 if (filtered != 0) {
946 socket_lock(so, 0);
947 sflt_unuse(so);
948 }
949 }
950 if (error) {
951 if (error == EJUSTRETURN)
952 error = 0;
953 if (dolock) socket_unlock(so, 1);
954 return error;
955 }
956
1c79356b 957 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
1c79356b 958 }
91447636 959 if (dolock) socket_unlock(so, 1);
1c79356b
A
960 return (error);
961}
962
91447636
A
963int
964soconnect(so, nam)
965 register struct socket *so;
966 struct sockaddr *nam;
967{
968 return (soconnectlock(so, nam, 1));
969}
970
1c79356b
A
971int
972soconnect2(so1, so2)
973 register struct socket *so1;
974 struct socket *so2;
975{
1c79356b 976 int error;
91447636 977
4452a7af
A
978 socket_lock(so1, 1);
979 if (so2->so_proto->pr_lock)
980 socket_lock(so2, 1);
1c79356b
A
981
982 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
91447636 983
4452a7af
A
984 socket_unlock(so1, 1);
985 if (so2->so_proto->pr_lock)
986 socket_unlock(so2, 1);
1c79356b
A
987 return (error);
988}
989
91447636 990
1c79356b 991int
91447636 992sodisconnectlocked(so)
1c79356b
A
993 register struct socket *so;
994{
1c79356b 995 int error;
1c79356b
A
996
997 if ((so->so_state & SS_ISCONNECTED) == 0) {
998 error = ENOTCONN;
999 goto bad;
1000 }
1001 if (so->so_state & SS_ISDISCONNECTING) {
1002 error = EALREADY;
1003 goto bad;
1004 }
91447636 1005
1c79356b 1006 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
91447636 1007
9bccf70c 1008 if (error == 0) {
91447636 1009 sflt_notify(so, sock_evt_disconnected, NULL);
1c79356b
A
1010 }
1011
1012bad:
1c79356b
A
1013 return (error);
1014}
91447636
A
1015//### Locking version
1016int
1017sodisconnect(so)
1018 register struct socket *so;
1019{
1020 int error;
1021
1022 socket_lock(so, 1);
1023 error = sodisconnectlocked(so);
1024 socket_unlock(so, 1);
1025 return(error);
1026}
1c79356b
A
1027
1028#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
91447636
A
1029
1030/*
1031 * sosendcheck will lock the socket buffer if it isn't locked and
1032 * verify that there is space for the data being inserted.
1033 */
1034
1035static int
1036sosendcheck(
1037 struct socket *so,
1038 struct sockaddr *addr,
1039 long resid,
1040 long clen,
1041 long atomic,
1042 int flags,
1043 int *sblocked)
1044{
1045 int error = 0;
1046 long space;
3a60a9f5 1047 int assumelock = 0;
91447636
A
1048
1049restart:
1050 if (*sblocked == 0) {
3a60a9f5
A
1051 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1052 so->so_send_filt_thread != 0 &&
1053 so->so_send_filt_thread == current_thread()) {
1054 /*
1055 * We're being called recursively from a filter,
1056 * allow this to continue. Radar 4150520.
1057 * Don't set sblocked because we don't want
1058 * to perform an unlock later.
1059 */
1060 assumelock = 1;
1061 }
1062 else {
1063 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1064 if (error) {
1065 return error;
1066 }
1067 *sblocked = 1;
1068 }
91447636
A
1069 }
1070
1071 if (so->so_state & SS_CANTSENDMORE)
1072 return EPIPE;
1073
1074 if (so->so_error) {
1075 error = so->so_error;
1076 so->so_error = 0;
1077 return error;
1078 }
1079
1080 if ((so->so_state & SS_ISCONNECTED) == 0) {
1081 /*
1082 * `sendto' and `sendmsg' is allowed on a connection-
1083 * based socket if it supports implied connect.
1084 * Return ENOTCONN if not connected and no address is
1085 * supplied.
1086 */
1087 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1088 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1089 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1090 !(resid == 0 && clen != 0))
1091 return ENOTCONN;
1092 } else if (addr == 0 && !(flags&MSG_HOLD))
1093 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ;
1094 }
1095 space = sbspace(&so->so_snd);
1096 if (flags & MSG_OOB)
1097 space += 1024;
1098 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1099 clen > so->so_snd.sb_hiwat)
1100 return EMSGSIZE;
1101 if (space < resid + clen &&
1102 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
3a60a9f5 1103 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) {
91447636 1104 return EWOULDBLOCK;
3a60a9f5 1105 }
91447636
A
1106 sbunlock(&so->so_snd, 1);
1107 error = sbwait(&so->so_snd);
1108 if (error) {
1109 return error;
1110 }
1111 goto restart;
1112 }
1113
1114 return 0;
1115}
1116
1c79356b
A
1117/*
1118 * Send on a socket.
1119 * If send must go all at once and message is larger than
1120 * send buffering, then hard error.
1121 * Lock against other senders.
1122 * If must go all at once and not enough room now, then
1123 * inform user that this would block and do nothing.
1124 * Otherwise, if nonblocking, send as much as possible.
1125 * The data to be sent is described by "uio" if nonzero,
1126 * otherwise by the mbuf chain "top" (which must be null
1127 * if uio is not). Data provided in mbuf chain must be small
1128 * enough to send all at once.
1129 *
1130 * Returns nonzero on error, timeout or signal; callers
1131 * must check for short counts if EINTR/ERESTART are returned.
1132 * Data and control buffers are freed on return.
1133 * Experiment:
1134 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1135 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1136 * point at the mbuf chain being constructed and go from there.
1137 */
1138int
1139sosend(so, addr, uio, top, control, flags)
1140 register struct socket *so;
1141 struct sockaddr *addr;
1142 struct uio *uio;
1143 struct mbuf *top;
1144 struct mbuf *control;
1145 int flags;
1146
1147{
1148 struct mbuf **mp;
fa4905b1 1149 register struct mbuf *m, *freelist = NULL;
1c79356b 1150 register long space, len, resid;
91447636 1151 int clen = 0, error, dontroute, mlen, sendflags;
1c79356b 1152 int atomic = sosendallatonce(so) || top;
91447636 1153 int sblocked = 0;
1c79356b 1154 struct proc *p = current_proc();
1c79356b
A
1155
1156 if (uio)
91447636
A
1157 // LP64todo - fix this!
1158 resid = uio_resid(uio);
1c79356b
A
1159 else
1160 resid = top->m_pkthdr.len;
1161
1162 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START),
1163 so,
1164 resid,
1165 so->so_snd.sb_cc,
1166 so->so_snd.sb_lowat,
1167 so->so_snd.sb_hiwat);
1168
91447636
A
1169 socket_lock(so, 1);
1170
1c79356b
A
1171 /*
1172 * In theory resid should be unsigned.
1173 * However, space must be signed, as it might be less than 0
1174 * if we over-committed, and we must use a signed comparison
1175 * of space and resid. On the other hand, a negative resid
1176 * causes us to loop sending 0-length segments to the protocol.
1177 *
1178 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1179 * type sockets since that's an error.
1180 */
91447636 1181 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1c79356b 1182 error = EINVAL;
91447636 1183 socket_unlock(so, 1);
1c79356b
A
1184 goto out;
1185 }
1186
1187 dontroute =
1188 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1189 (so->so_proto->pr_flags & PR_ATOMIC);
1190 if (p)
1191 p->p_stats->p_ru.ru_msgsnd++;
1192 if (control)
1193 clen = control->m_len;
1c79356b 1194
1c79356b 1195 do {
91447636
A
1196 error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
1197 if (error) {
3a60a9f5 1198 goto release;
1c79356b 1199 }
1c79356b 1200 mp = &top;
91447636 1201 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
fa4905b1 1202
1c79356b 1203 do {
fa4905b1 1204
91447636
A
1205 if (uio == NULL) {
1206 /*
1207 * Data is prepackaged in "top".
1208 */
1209 resid = 0;
1c79356b
A
1210 if (flags & MSG_EOR)
1211 top->m_flags |= M_EOR;
91447636
A
1212 } else {
1213 int chainlength;
1214 int bytes_to_copy;
1215
1216 bytes_to_copy = min(resid, space);
1217
1218 if (sosendminchain > 0) {
1219 chainlength = 0;
1220 } else
1221 chainlength = sosendmaxchain;
1222
1223 socket_unlock(so, 0);
1224
1225 do {
1226 int num_needed;
1227 int hdrs_needed = (top == 0) ? 1 : 0;
1228
1229 /*
1230 * try to maintain a local cache of mbuf clusters needed to complete this write
1231 * the list is further limited to the number that are currently needed to fill the socket
1232 * this mechanism allows a large number of mbufs/clusters to be grabbed under a single
1233 * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs
1234 * if we fail early (or miscalcluate the number needed) make sure to release any clusters
1235 * we haven't yet consumed.
1236 */
1237 if (freelist == NULL && bytes_to_copy > MCLBYTES) {
1238 num_needed = bytes_to_copy / NBPG;
1239
1240 if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE)
1241 num_needed++;
1242
1243 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG);
1244 /* Fall back to cluster size if allocation failed */
1245 }
1246
1247 if (freelist == NULL && bytes_to_copy > MINCLSIZE) {
1248 num_needed = bytes_to_copy / MCLBYTES;
1249
1250 if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE)
1251 num_needed++;
1252
1253 freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES);
1254 /* Fall back to a single mbuf if allocation failed */
1255 }
1256
1257 if (freelist == NULL) {
1258 if (top == 0)
1259 MGETHDR(freelist, M_WAIT, MT_DATA);
1260 else
1261 MGET(freelist, M_WAIT, MT_DATA);
1262
1263 if (freelist == NULL) {
1264 error = ENOBUFS;
1265 socket_lock(so, 0);
3a60a9f5 1266 goto release;
91447636
A
1267 }
1268 /*
1269 * For datagram protocols, leave room
1270 * for protocol headers in first mbuf.
1271 */
1272 if (atomic && top == 0 && bytes_to_copy < MHLEN)
1273 MH_ALIGN(freelist, bytes_to_copy);
1274 }
1275 m = freelist;
1276 freelist = m->m_next;
1277 m->m_next = NULL;
1278
1279 if ((m->m_flags & M_EXT))
1280 mlen = m->m_ext.ext_size;
1281 else if ((m->m_flags & M_PKTHDR))
1282 mlen = MHLEN - m_leadingspace(m);
1283 else
1284 mlen = MLEN;
1285 len = min(mlen, bytes_to_copy);
1286
1287 chainlength += len;
1288
1289 space -= len;
fa4905b1 1290
91447636
A
1291 error = uiomove(mtod(m, caddr_t), (int)len, uio);
1292
1293 // LP64todo - fix this!
1294 resid = uio_resid(uio);
1295
1296 m->m_len = len;
1297 *mp = m;
1298 top->m_pkthdr.len += len;
1299 if (error)
1300 break;
1301 mp = &m->m_next;
1302 if (resid <= 0) {
1303 if (flags & MSG_EOR)
1304 top->m_flags |= M_EOR;
1305 break;
1306 }
1307 bytes_to_copy = min(resid, space);
1308
1309 } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE));
1310
1311 socket_lock(so, 0);
1312
1313 if (error)
1314 goto release;
1315 }
1c79356b
A
1316
1317 if (flags & (MSG_HOLD|MSG_SEND))
3a60a9f5
A
1318 {
1319 /* Enqueue for later, go away if HOLD */
1320 register struct mbuf *mb1;
1321 if (so->so_temp && (flags & MSG_FLUSH))
1322 {
1323 m_freem(so->so_temp);
1324 so->so_temp = NULL;
1325 }
1326 if (so->so_temp)
1327 so->so_tail->m_next = top;
1328 else
1329 so->so_temp = top;
1330 mb1 = top;
1331 while (mb1->m_next)
1332 mb1 = mb1->m_next;
1333 so->so_tail = mb1;
1334 if (flags & MSG_HOLD)
1335 {
1336 top = NULL;
1337 goto release;
1338 }
1339 top = so->so_temp;
1c79356b
A
1340 }
1341 if (dontroute)
1342 so->so_options |= SO_DONTROUTE;
1c79356b
A
1343 /* Compute flags here, for pru_send and NKEs */
1344 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1345 /*
1346 * If the user set MSG_EOF, the protocol
1347 * understands this flag and nothing left to
1348 * send then use PRU_SEND_EOF instead of PRU_SEND.
1349 */
1350 ((flags & MSG_EOF) &&
1351 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1352 (resid <= 0)) ?
1353 PRUS_EOF :
1354 /* If there is more to send set PRUS_MORETOCOME */
1355 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
91447636
A
1356
1357 /*
1358 * Socket filter processing
1359 */
1360 {
1361 struct socket_filter_entry *filter;
1362 int filtered;
1363
1364 filtered = 0;
1365 error = 0;
1366 for (filter = so->so_filt; filter && (error == 0);
1367 filter = filter->sfe_next_onsocket) {
1368 if (filter->sfe_filter->sf_filter.sf_data_out) {
1369 int so_flags = 0;
1370 if (filtered == 0) {
1371 filtered = 1;
3a60a9f5 1372 so->so_send_filt_thread = current_thread();
ff6e181a 1373 sflt_use(so);
91447636
A
1374 socket_unlock(so, 0);
1375 so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
1376 }
1377 error = filter->sfe_filter->sf_filter.sf_data_out(
1378 filter->sfe_cookie, so, addr, &top, &control, so_flags);
1379 }
1380 }
1381
1382 if (filtered) {
1383 /*
1384 * At this point, we've run at least one filter.
1385 * The socket is unlocked as is the socket buffer.
1386 */
1387 socket_lock(so, 0);
ff6e181a 1388 sflt_unuse(so);
3a60a9f5 1389 so->so_send_filt_thread = 0;
91447636 1390 if (error) {
3a60a9f5
A
1391 if (error == EJUSTRETURN) {
1392 error = 0;
1393 clen = 0;
1394 control = 0;
1395 top = 0;
91447636 1396 }
3a60a9f5
A
1397
1398 goto release;
1c79356b 1399 }
1c79356b
A
1400 }
1401 }
91447636
A
1402 /*
1403 * End Socket filter processing
1404 */
1405
1406 if (error == EJUSTRETURN) {
1407 /* A socket filter handled this data */
1408 error = 0;
1409 }
1410 else {
1411 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1412 sendflags, top, addr, control, p);
1413 }
9bccf70c 1414#ifdef __APPLE__
1c79356b
A
1415 if (flags & MSG_SEND)
1416 so->so_temp = NULL;
9bccf70c 1417#endif
1c79356b
A
1418 if (dontroute)
1419 so->so_options &= ~SO_DONTROUTE;
1420 clen = 0;
1421 control = 0;
1422 top = 0;
1423 mp = &top;
1424 if (error)
1425 goto release;
1426 } while (resid && space > 0);
1427 } while (resid);
1428
1429release:
3a60a9f5
A
1430 if (sblocked)
1431 sbunlock(&so->so_snd, 0); /* will unlock socket */
1432 else
1433 socket_unlock(so, 1);
1c79356b
A
1434out:
1435 if (top)
1436 m_freem(top);
1437 if (control)
1438 m_freem(control);
fa4905b1
A
1439 if (freelist)
1440 m_freem_list(freelist);
1c79356b
A
1441
1442 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END,
1443 so,
1444 resid,
1445 so->so_snd.sb_cc,
1446 space,
1447 error);
1448
1449 return (error);
1450}
1451
1452/*
1453 * Implement receive operations on a socket.
1454 * We depend on the way that records are added to the sockbuf
1455 * by sbappend*. In particular, each record (mbufs linked through m_next)
1456 * must begin with an address if the protocol so specifies,
1457 * followed by an optional mbuf or mbufs containing ancillary data,
1458 * and then zero or more mbufs of data.
1459 * In order to avoid blocking network interrupts for the entire time here,
1460 * we splx() while doing the actual copy to user space.
1461 * Although the sockbuf is locked, new data may still be appended,
1462 * and thus we must maintain consistency of the sockbuf during that time.
1463 *
1464 * The caller may receive the data as a single mbuf chain by supplying
1465 * an mbuf **mp0 for use in returning the chain. The uio is then used
1466 * only for the count in uio_resid.
1467 */
1468int
1469soreceive(so, psa, uio, mp0, controlp, flagsp)
1470 register struct socket *so;
1471 struct sockaddr **psa;
1472 struct uio *uio;
1473 struct mbuf **mp0;
1474 struct mbuf **controlp;
1475 int *flagsp;
1476{
91447636
A
1477 register struct mbuf *m, **mp, *ml = NULL;
1478 register int flags, len, error, offset;
1c79356b
A
1479 struct protosw *pr = so->so_proto;
1480 struct mbuf *nextrecord;
1481 int moff, type = 0;
91447636
A
1482 // LP64todo - fix this!
1483 int orig_resid = uio_resid(uio);
55e303ae
A
1484 volatile struct mbuf *free_list;
1485 volatile int delayed_copy_len;
1486 int can_delay;
1487 int need_event;
1488 struct proc *p = current_proc();
1489
1490
91447636 1491 // LP64todo - fix this!
1c79356b
A
1492 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START,
1493 so,
91447636 1494 uio_resid(uio),
1c79356b
A
1495 so->so_rcv.sb_cc,
1496 so->so_rcv.sb_lowat,
1497 so->so_rcv.sb_hiwat);
1498
91447636 1499 socket_lock(so, 1);
1c79356b 1500
91447636
A
1501#ifdef MORE_LOCKING_DEBUG
1502 if (so->so_usecount == 1)
1503 panic("soreceive: so=%x no other reference on socket\n", so);
1504#endif
1c79356b
A
1505 mp = mp0;
1506 if (psa)
1507 *psa = 0;
1508 if (controlp)
1509 *controlp = 0;
1510 if (flagsp)
1511 flags = *flagsp &~ MSG_EOR;
1512 else
1513 flags = 0;
1514 /*
1515 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1516 * regardless of the flags argument. Here is the case were
1517 * out-of-band data is not inline.
1518 */
1519 if ((flags & MSG_OOB) ||
1520 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1521 (so->so_options & SO_OOBINLINE) == 0 &&
1522 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1523 m = m_get(M_WAIT, MT_DATA);
55e303ae 1524 if (m == NULL) {
91447636 1525 socket_unlock(so, 1);
55e303ae 1526 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0);
9bccf70c 1527 return (ENOBUFS);
55e303ae 1528 }
1c79356b
A
1529 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1530 if (error)
1531 goto bad;
91447636 1532 socket_unlock(so, 0);
1c79356b 1533 do {
91447636 1534 // LP64todo - fix this!
1c79356b 1535 error = uiomove(mtod(m, caddr_t),
91447636 1536 (int) min(uio_resid(uio), m->m_len), uio);
1c79356b 1537 m = m_free(m);
91447636
A
1538 } while (uio_resid(uio) && error == 0 && m);
1539 socket_lock(so, 0);
1c79356b
A
1540bad:
1541 if (m)
1542 m_freem(m);
9bccf70c
A
1543#ifdef __APPLE__
1544 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
1545 if (error == EWOULDBLOCK || error == EINVAL) {
1546 /*
1547 * Let's try to get normal data:
1548 * EWOULDBLOCK: out-of-band data not receive yet;
1549 * EINVAL: out-of-band data already read.
1550 */
1551 error = 0;
1552 goto nooob;
1553 } else if (error == 0 && flagsp)
1554 *flagsp |= MSG_OOB;
91447636
A
1555 }
1556 socket_unlock(so, 1);
1c79356b 1557 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
9bccf70c 1558#endif
1c79356b
A
1559 return (error);
1560 }
1561nooob:
1562 if (mp)
1563 *mp = (struct mbuf *)0;
91447636 1564 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
1c79356b
A
1565 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1566
55e303ae
A
1567
1568 free_list = (struct mbuf *)0;
1569 delayed_copy_len = 0;
1c79356b 1570restart:
91447636
A
1571#ifdef MORE_LOCKING_DEBUG
1572 if (so->so_usecount <= 1)
1573 printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount);
1574#endif
9bccf70c
A
1575 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
1576 if (error) {
91447636 1577 socket_unlock(so, 1);
1c79356b
A
1578 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1579 return (error);
1580 }
1c79356b
A
1581
1582 m = so->so_rcv.sb_mb;
1583 /*
1584 * If we have less data than requested, block awaiting more
1585 * (subject to any timeout) if:
1586 * 1. the current count is less than the low water mark, or
1587 * 2. MSG_WAITALL is set, and it is possible to do the entire
1588 * receive operation at once if we block (resid <= hiwat).
1589 * 3. MSG_DONTWAIT is not set
1590 * If MSG_WAITALL is set but resid is larger than the receive buffer,
1591 * we have to do the receive in sections, and thus risk returning
1592 * a short count if a timeout or signal occurs after we start.
1593 */
1594 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
91447636 1595 so->so_rcv.sb_cc < uio_resid(uio)) &&
55e303ae 1596 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
91447636 1597 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
1c79356b 1598 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
55e303ae 1599
1c79356b
A
1600 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
1601 if (so->so_error) {
1602 if (m)
1603 goto dontblock;
1604 error = so->so_error;
1605 if ((flags & MSG_PEEK) == 0)
1606 so->so_error = 0;
1607 goto release;
1608 }
1609 if (so->so_state & SS_CANTRCVMORE) {
1610 if (m)
1611 goto dontblock;
1612 else
1613 goto release;
1614 }
1615 for (; m; m = m->m_next)
1616 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1617 m = so->so_rcv.sb_mb;
1618 goto dontblock;
1619 }
1620 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1621 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1622 error = ENOTCONN;
1623 goto release;
1624 }
91447636 1625 if (uio_resid(uio) == 0)
1c79356b 1626 goto release;
91447636 1627 if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1c79356b
A
1628 error = EWOULDBLOCK;
1629 goto release;
1630 }
91447636
A
1631 sbunlock(&so->so_rcv, 1);
1632#ifdef EVEN_MORE_LOCKING_DEBUG
1c79356b
A
1633 if (socket_debug)
1634 printf("Waiting for socket data\n");
91447636 1635#endif
55e303ae 1636
1c79356b 1637 error = sbwait(&so->so_rcv);
91447636 1638#ifdef EVEN_MORE_LOCKING_DEBUG
1c79356b
A
1639 if (socket_debug)
1640 printf("SORECEIVE - sbwait returned %d\n", error);
91447636
A
1641#endif
1642 if (so->so_usecount < 1)
1643 panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount);
9bccf70c 1644 if (error) {
91447636 1645 socket_unlock(so, 1);
1c79356b
A
1646 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0);
1647 return (error);
1648 }
1649 goto restart;
1650 }
1651dontblock:
9bccf70c 1652#ifndef __APPLE__
1c79356b
A
1653 if (uio->uio_procp)
1654 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
55e303ae
A
1655#else /* __APPLE__ */
1656 /*
1657 * 2207985
1658 * This should be uio->uio-procp; however, some callers of this
1659 * function use auto variables with stack garbage, and fail to
1660 * fill out the uio structure properly.
1661 */
1662 if (p)
1663 p->p_stats->p_ru.ru_msgrcv++;
1664#endif /* __APPLE__ */
1c79356b
A
1665 nextrecord = m->m_nextpkt;
1666 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
1667 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
1668 orig_resid = 0;
4a249263 1669 if (psa) {
1c79356b
A
1670 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
1671 mp0 == 0);
4a249263
A
1672 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
1673 error = EWOULDBLOCK;
1674 goto release;
1675 }
1676 }
1c79356b
A
1677 if (flags & MSG_PEEK) {
1678 m = m->m_next;
1679 } else {
1680 sbfree(&so->so_rcv, m);
91447636
A
1681 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1682 panic("soreceive: about to create invalid socketbuf");
1c79356b
A
1683 MFREE(m, so->so_rcv.sb_mb);
1684 m = so->so_rcv.sb_mb;
1685 }
1686 }
1687 while (m && m->m_type == MT_CONTROL && error == 0) {
1688 if (flags & MSG_PEEK) {
1689 if (controlp)
1690 *controlp = m_copy(m, 0, m->m_len);
1691 m = m->m_next;
1692 } else {
1693 sbfree(&so->so_rcv, m);
1694 if (controlp) {
1695 if (pr->pr_domain->dom_externalize &&
1696 mtod(m, struct cmsghdr *)->cmsg_type ==
91447636
A
1697 SCM_RIGHTS) {
1698 socket_unlock(so, 0); /* release socket lock: see 3903171 */
1c79356b 1699 error = (*pr->pr_domain->dom_externalize)(m);
91447636
A
1700 socket_lock(so, 0);
1701 }
1c79356b 1702 *controlp = m;
91447636
A
1703 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
1704 panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0");
1c79356b
A
1705 so->so_rcv.sb_mb = m->m_next;
1706 m->m_next = 0;
1707 m = so->so_rcv.sb_mb;
1708 } else {
1709 MFREE(m, so->so_rcv.sb_mb);
1710 m = so->so_rcv.sb_mb;
1711 }
1712 }
1713 if (controlp) {
1714 orig_resid = 0;
1715 controlp = &(*controlp)->m_next;
1716 }
1717 }
1718 if (m) {
1719 if ((flags & MSG_PEEK) == 0)
1720 m->m_nextpkt = nextrecord;
1721 type = m->m_type;
1722 if (type == MT_OOBDATA)
1723 flags |= MSG_OOB;
1724 }
1725 moff = 0;
1726 offset = 0;
fa4905b1 1727
91447636 1728 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
55e303ae
A
1729 can_delay = 1;
1730 else
1731 can_delay = 0;
1732
1733 need_event = 0;
fa4905b1 1734
91447636 1735 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
1c79356b
A
1736 if (m->m_type == MT_OOBDATA) {
1737 if (type != MT_OOBDATA)
1738 break;
1739 } else if (type == MT_OOBDATA)
1740 break;
9bccf70c 1741#ifndef __APPLE__
1c79356b
A
1742/*
1743 * This assertion needs rework. The trouble is Appletalk is uses many
1744 * mbuf types (NOT listed in mbuf.h!) which will trigger this panic.
1745 * For now just remove the assertion... CSM 9/98
1746 */
1747 else
1748 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1749 ("receive 3"));
9bccf70c
A
1750#else
1751 /*
1752 * Make sure to allways set MSG_OOB event when getting
1753 * out of band data inline.
1754 */
1c79356b 1755 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
9bccf70c
A
1756 (so->so_options & SO_OOBINLINE) != 0 &&
1757 (so->so_state & SS_RCVATMARK) != 0) {
1758 flags |= MSG_OOB;
1759 }
1760#endif
1c79356b 1761 so->so_state &= ~SS_RCVATMARK;
91447636
A
1762 // LP64todo - fix this!
1763 len = uio_resid(uio) - delayed_copy_len;
1c79356b
A
1764 if (so->so_oobmark && len > so->so_oobmark - offset)
1765 len = so->so_oobmark - offset;
1766 if (len > m->m_len - moff)
1767 len = m->m_len - moff;
1768 /*
1769 * If mp is set, just pass back the mbufs.
1770 * Otherwise copy them out via the uio, then free.
1771 * Sockbuf must be consistent here (points to current mbuf,
1772 * it points to next record) when we drop priority;
1773 * we must note any additions to the sockbuf when we
1774 * block interrupts again.
1775 */
1776 if (mp == 0) {
55e303ae
A
1777 if (can_delay && len == m->m_len) {
1778 /*
1779 * only delay the copy if we're consuming the
1780 * mbuf and we're NOT in MSG_PEEK mode
1781 * and we have enough data to make it worthwile
1782 * to drop and retake the funnel... can_delay
1783 * reflects the state of the 2 latter constraints
1784 * moff should always be zero in these cases
1785 */
1786 delayed_copy_len += len;
1787 } else {
55e303ae
A
1788
1789 if (delayed_copy_len) {
91447636 1790 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1791
1792 if (error) {
55e303ae
A
1793 goto release;
1794 }
1795 if (m != so->so_rcv.sb_mb) {
1796 /*
1797 * can only get here if MSG_PEEK is not set
1798 * therefore, m should point at the head of the rcv queue...
1799 * if it doesn't, it means something drastically changed
1800 * while we were out from behind the funnel in sodelayed_copy...
1801 * perhaps a RST on the stream... in any event, the stream has
1802 * been interrupted... it's probably best just to return
1803 * whatever data we've moved and let the caller sort it out...
1804 */
1805 break;
1806 }
1807 }
91447636 1808 socket_unlock(so, 0);
55e303ae 1809 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
91447636 1810 socket_lock(so, 0);
55e303ae 1811
55e303ae
A
1812 if (error)
1813 goto release;
1814 }
1c79356b 1815 } else
91447636 1816 uio_setresid(uio, (uio_resid(uio) - len));
55e303ae 1817
1c79356b
A
1818 if (len == m->m_len - moff) {
1819 if (m->m_flags & M_EOR)
1820 flags |= MSG_EOR;
1821 if (flags & MSG_PEEK) {
1822 m = m->m_next;
1823 moff = 0;
1824 } else {
1825 nextrecord = m->m_nextpkt;
1826 sbfree(&so->so_rcv, m);
91447636 1827 m->m_nextpkt = NULL;
55e303ae 1828
1c79356b
A
1829 if (mp) {
1830 *mp = m;
1831 mp = &m->m_next;
1832 so->so_rcv.sb_mb = m = m->m_next;
1833 *mp = (struct mbuf *)0;
1834 } else {
55e303ae
A
1835 if (free_list == NULL)
1836 free_list = m;
1837 else
14353aa8
A
1838 ml->m_next = m;
1839 ml = m;
1840 so->so_rcv.sb_mb = m = m->m_next;
1841 ml->m_next = 0;
1c79356b
A
1842 }
1843 if (m)
1844 m->m_nextpkt = nextrecord;
1845 }
1846 } else {
1847 if (flags & MSG_PEEK)
1848 moff += len;
1849 else {
1850 if (mp)
1851 *mp = m_copym(m, 0, len, M_WAIT);
1852 m->m_data += len;
1853 m->m_len -= len;
1854 so->so_rcv.sb_cc -= len;
1855 }
1856 }
1857 if (so->so_oobmark) {
1858 if ((flags & MSG_PEEK) == 0) {
1859 so->so_oobmark -= len;
1860 if (so->so_oobmark == 0) {
1861 so->so_state |= SS_RCVATMARK;
55e303ae
A
1862 /*
1863 * delay posting the actual event until after
1864 * any delayed copy processing has finished
1865 */
1866 need_event = 1;
1c79356b
A
1867 break;
1868 }
1869 } else {
1870 offset += len;
1871 if (offset == so->so_oobmark)
1872 break;
1873 }
1874 }
91447636 1875 if (flags & MSG_EOR)
1c79356b
A
1876 break;
1877 /*
55e303ae 1878 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket),
1c79356b
A
1879 * we must not quit until "uio->uio_resid == 0" or an error
1880 * termination. If a signal/timeout occurs, return
1881 * with a short count but without error.
1882 * Keep sockbuf locked against other readers.
1883 */
91447636 1884 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 &&
1c79356b
A
1885 !sosendallatonce(so) && !nextrecord) {
1886 if (so->so_error || so->so_state & SS_CANTRCVMORE)
55e303ae 1887 goto release;
fa4905b1 1888
91447636 1889 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD))
55e303ae
A
1890 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1891 if (sbwait(&so->so_rcv)) {
1892 error = 0;
1893 goto release;
fa4905b1 1894 }
55e303ae
A
1895 /*
1896 * have to wait until after we get back from the sbwait to do the copy because
1897 * we will drop the funnel if we have enough data that has been delayed... by dropping
1898 * the funnel we open up a window allowing the netisr thread to process the incoming packets
1899 * and to change the state of this socket... we're issuing the sbwait because
1900 * the socket is empty and we're expecting the netisr thread to wake us up when more
1901 * packets arrive... if we allow that processing to happen and then sbwait, we
1902 * could stall forever with packets sitting in the socket if no further packets
1903 * arrive from the remote side.
1904 *
1905 * we want to copy before we've collected all the data to satisfy this request to
1906 * allow the copy to overlap the incoming packet processing on an MP system
1907 */
1908 if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
1909
91447636 1910 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1911
1912 if (error)
1913 goto release;
1c79356b
A
1914 }
1915 m = so->so_rcv.sb_mb;
fa4905b1 1916 if (m) {
1c79356b 1917 nextrecord = m->m_nextpkt;
fa4905b1 1918 }
1c79356b
A
1919 }
1920 }
91447636
A
1921#ifdef MORE_LOCKING_DEBUG
1922 if (so->so_usecount <= 1)
1923 panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount);
1924#endif
1c79356b
A
1925
1926 if (m && pr->pr_flags & PR_ATOMIC) {
9bccf70c 1927#ifdef __APPLE__
1c79356b
A
1928 if (so->so_options & SO_DONTTRUNC)
1929 flags |= MSG_RCVMORE;
9bccf70c
A
1930 else {
1931#endif
1932 flags |= MSG_TRUNC;
1c79356b
A
1933 if ((flags & MSG_PEEK) == 0)
1934 (void) sbdroprecord(&so->so_rcv);
9bccf70c 1935#ifdef __APPLE__
1c79356b 1936 }
9bccf70c 1937#endif
1c79356b
A
1938 }
1939 if ((flags & MSG_PEEK) == 0) {
1940 if (m == 0)
1941 so->so_rcv.sb_mb = nextrecord;
1942 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1943 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1944 }
9bccf70c 1945#ifdef __APPLE__
1c79356b
A
1946 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
1947 flags |= MSG_HAVEMORE;
55e303ae
A
1948
1949 if (delayed_copy_len) {
91447636 1950 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1951
1952 if (error)
1953 goto release;
1954 }
1955 if (free_list) {
1956 m_freem_list((struct mbuf *)free_list);
1957 free_list = (struct mbuf *)0;
1958 }
1959 if (need_event)
1960 postevent(so, 0, EV_OOB);
9bccf70c 1961#endif
91447636 1962 if (orig_resid == uio_resid(uio) && orig_resid &&
1c79356b 1963 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
91447636 1964 sbunlock(&so->so_rcv, 1);
1c79356b
A
1965 goto restart;
1966 }
1967
1968 if (flagsp)
1969 *flagsp |= flags;
1970release:
91447636
A
1971#ifdef MORE_LOCKING_DEBUG
1972 if (so->so_usecount <= 1)
1973 panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount);
1974#endif
55e303ae 1975 if (delayed_copy_len) {
91447636 1976 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
55e303ae
A
1977 }
1978 if (free_list) {
1979 m_freem_list((struct mbuf *)free_list);
1980 }
91447636 1981 sbunlock(&so->so_rcv, 0); /* will unlock socket */
1c79356b 1982
91447636 1983 // LP64todo - fix this!
1c79356b
A
1984 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1985 so,
91447636 1986 uio_resid(uio),
1c79356b
A
1987 so->so_rcv.sb_cc,
1988 0,
1989 error);
1990
1991 return (error);
1992}
1993
55e303ae 1994
91447636 1995static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid)
55e303ae
A
1996{
1997 int error = 0;
55e303ae
A
1998 struct mbuf *m;
1999
2000 m = *free_list;
2001
91447636 2002 socket_unlock(so, 0);
55e303ae 2003
55e303ae
A
2004 while (m && error == 0) {
2005
2006 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
2007
2008 m = m->m_next;
2009 }
2010 m_freem_list(*free_list);
2011
2012 *free_list = (struct mbuf *)NULL;
2013 *resid = 0;
2014
91447636 2015 socket_lock(so, 0);
55e303ae
A
2016
2017 return (error);
2018}
2019
2020
1c79356b
A
2021int
2022soshutdown(so, how)
2023 register struct socket *so;
4452a7af 2024 int how;
1c79356b
A
2025{
2026 register struct protosw *pr = so->so_proto;
1c79356b
A
2027 int ret;
2028
91447636
A
2029 socket_lock(so, 1);
2030
2031 sflt_notify(so, sock_evt_shutdown, &how);
1c79356b 2032
9bccf70c 2033 if (how != SHUT_WR) {
1c79356b
A
2034 sorflush(so);
2035 postevent(so, 0, EV_RCLOSED);
2036 }
9bccf70c 2037 if (how != SHUT_RD) {
1c79356b
A
2038 ret = ((*pr->pr_usrreqs->pru_shutdown)(so));
2039 postevent(so, 0, EV_WCLOSED);
2040 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
91447636 2041 socket_unlock(so, 1);
1c79356b
A
2042 return(ret);
2043 }
2044
2045 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0);
91447636 2046 socket_unlock(so, 1);
1c79356b
A
2047 return (0);
2048}
2049
2050void
2051sorflush(so)
2052 register struct socket *so;
2053{
2054 register struct sockbuf *sb = &so->so_rcv;
2055 register struct protosw *pr = so->so_proto;
1c79356b 2056 struct sockbuf asb;
1c79356b 2057
91447636
A
2058#ifdef MORE_LOCKING_DEBUG
2059 lck_mtx_t * mutex_held;
2060
2061 if (so->so_proto->pr_getlock != NULL)
2062 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2063 else
2064 mutex_held = so->so_proto->pr_domain->dom_mtx;
2065 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2066#endif
2067
2068 sflt_notify(so, sock_evt_flush_read, NULL);
1c79356b
A
2069
2070 sb->sb_flags |= SB_NOINTR;
2071 (void) sblock(sb, M_WAIT);
1c79356b 2072 socantrcvmore(so);
91447636 2073 sbunlock(sb, 1);
9bccf70c 2074#ifdef __APPLE__
0b4e3aa0 2075 selthreadclear(&sb->sb_sel);
9bccf70c 2076#endif
1c79356b
A
2077 asb = *sb;
2078 bzero((caddr_t)sb, sizeof (*sb));
91447636 2079 sb->sb_so = so; /* reestablish link to socket */
9bccf70c
A
2080 if (asb.sb_flags & SB_KNOTE) {
2081 sb->sb_sel.si_note = asb.sb_sel.si_note;
2082 sb->sb_flags = SB_KNOTE;
2083 }
1c79356b
A
2084 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
2085 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2086 sbrelease(&asb);
2087}
2088
2089/*
2090 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2091 * an additional variant to handle the case where the option value needs
2092 * to be some kind of integer, but not a specific size.
2093 * In addition to their use here, these functions are also called by the
2094 * protocol-level pr_ctloutput() routines.
2095 */
2096int
2097sooptcopyin(sopt, buf, len, minlen)
2098 struct sockopt *sopt;
2099 void *buf;
2100 size_t len;
2101 size_t minlen;
2102{
2103 size_t valsize;
2104
2105 /*
2106 * If the user gives us more than we wanted, we ignore it,
2107 * but if we don't get the minimum length the caller
2108 * wants, we return EINVAL. On success, sopt->sopt_valsize
2109 * is set to however much we actually retrieved.
2110 */
2111 if ((valsize = sopt->sopt_valsize) < minlen)
2112 return EINVAL;
2113 if (valsize > len)
2114 sopt->sopt_valsize = valsize = len;
2115
2116 if (sopt->sopt_p != 0)
2117 return (copyin(sopt->sopt_val, buf, valsize));
2118
91447636 2119 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
1c79356b
A
2120 return 0;
2121}
2122
2123int
2124sosetopt(so, sopt)
2125 struct socket *so;
2126 struct sockopt *sopt;
2127{
2128 int error, optval;
2129 struct linger l;
2130 struct timeval tv;
2131 short val;
91447636
A
2132
2133 socket_lock(so, 1);
1c79356b 2134
9bccf70c
A
2135 if (sopt->sopt_dir != SOPT_SET) {
2136 sopt->sopt_dir = SOPT_SET;
2137 }
2138
91447636
A
2139 {
2140 struct socket_filter_entry *filter;
2141 int filtered = 0;
2142 error = 0;
2143 for (filter = so->so_filt; filter && (error == 0);
2144 filter = filter->sfe_next_onsocket) {
2145 if (filter->sfe_filter->sf_filter.sf_setoption) {
2146 if (filtered == 0) {
2147 filtered = 1;
2148 sflt_use(so);
2149 socket_unlock(so, 0);
2150 }
2151 error = filter->sfe_filter->sf_filter.sf_setoption(
2152 filter->sfe_cookie, so, sopt);
2153 }
2154 }
2155
2156 if (filtered != 0) {
2157 socket_lock(so, 0);
2158 sflt_unuse(so);
2159
2160 if (error) {
2161 if (error == EJUSTRETURN)
2162 error = 0;
2163 goto bad;
2164 }
1c79356b 2165 }
1c79356b
A
2166 }
2167
2168 error = 0;
2169 if (sopt->sopt_level != SOL_SOCKET) {
91447636
A
2170 if (so->so_proto && so->so_proto->pr_ctloutput) {
2171 error = (*so->so_proto->pr_ctloutput)
2172 (so, sopt);
2173 socket_unlock(so, 1);
2174 return (error);
2175 }
1c79356b
A
2176 error = ENOPROTOOPT;
2177 } else {
2178 switch (sopt->sopt_name) {
2179 case SO_LINGER:
91447636 2180 case SO_LINGER_SEC:
1c79356b
A
2181 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
2182 if (error)
2183 goto bad;
2184
91447636 2185 so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz;
1c79356b
A
2186 if (l.l_onoff)
2187 so->so_options |= SO_LINGER;
2188 else
2189 so->so_options &= ~SO_LINGER;
2190 break;
2191
2192 case SO_DEBUG:
2193 case SO_KEEPALIVE:
2194 case SO_DONTROUTE:
2195 case SO_USELOOPBACK:
2196 case SO_BROADCAST:
2197 case SO_REUSEADDR:
2198 case SO_REUSEPORT:
2199 case SO_OOBINLINE:
2200 case SO_TIMESTAMP:
9bccf70c 2201#ifdef __APPLE__
1c79356b
A
2202 case SO_DONTTRUNC:
2203 case SO_WANTMORE:
9bccf70c
A
2204 case SO_WANTOOBFLAG:
2205#endif
1c79356b
A
2206 error = sooptcopyin(sopt, &optval, sizeof optval,
2207 sizeof optval);
2208 if (error)
2209 goto bad;
2210 if (optval)
2211 so->so_options |= sopt->sopt_name;
2212 else
2213 so->so_options &= ~sopt->sopt_name;
2214 break;
2215
2216 case SO_SNDBUF:
2217 case SO_RCVBUF:
2218 case SO_SNDLOWAT:
2219 case SO_RCVLOWAT:
2220 error = sooptcopyin(sopt, &optval, sizeof optval,
2221 sizeof optval);
2222 if (error)
2223 goto bad;
2224
2225 /*
2226 * Values < 1 make no sense for any of these
2227 * options, so disallow them.
2228 */
2229 if (optval < 1) {
2230 error = EINVAL;
2231 goto bad;
2232 }
2233
2234 switch (sopt->sopt_name) {
2235 case SO_SNDBUF:
2236 case SO_RCVBUF:
2237 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
2238 &so->so_snd : &so->so_rcv,
2239 (u_long) optval) == 0) {
2240 error = ENOBUFS;
2241 goto bad;
2242 }
2243 break;
2244
2245 /*
2246 * Make sure the low-water is never greater than
2247 * the high-water.
2248 */
2249 case SO_SNDLOWAT:
2250 so->so_snd.sb_lowat =
2251 (optval > so->so_snd.sb_hiwat) ?
2252 so->so_snd.sb_hiwat : optval;
2253 break;
2254 case SO_RCVLOWAT:
2255 so->so_rcv.sb_lowat =
2256 (optval > so->so_rcv.sb_hiwat) ?
2257 so->so_rcv.sb_hiwat : optval;
2258 break;
2259 }
2260 break;
2261
2262 case SO_SNDTIMEO:
2263 case SO_RCVTIMEO:
2264 error = sooptcopyin(sopt, &tv, sizeof tv,
2265 sizeof tv);
2266 if (error)
2267 goto bad;
2268
91447636 2269 if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX ||
9bccf70c
A
2270 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
2271 error = EDOM;
2272 goto bad;
2273 }
91447636 2274
1c79356b
A
2275 switch (sopt->sopt_name) {
2276 case SO_SNDTIMEO:
91447636 2277 so->so_snd.sb_timeo = tv;
1c79356b
A
2278 break;
2279 case SO_RCVTIMEO:
91447636 2280 so->so_rcv.sb_timeo = tv;
1c79356b
A
2281 break;
2282 }
2283 break;
2284
2285 case SO_NKE:
9bccf70c
A
2286 {
2287 struct so_nke nke;
1c79356b 2288
9bccf70c
A
2289 error = sooptcopyin(sopt, &nke,
2290 sizeof nke, sizeof nke);
1c79356b
A
2291 if (error)
2292 goto bad;
2293
91447636 2294 error = sflt_attach_private(so, NULL, nke.nke_handle, 1);
1c79356b
A
2295 break;
2296 }
2297
9bccf70c
A
2298 case SO_NOSIGPIPE:
2299 error = sooptcopyin(sopt, &optval, sizeof optval,
2300 sizeof optval);
2301 if (error)
2302 goto bad;
2303 if (optval)
2304 so->so_flags |= SOF_NOSIGPIPE;
2305 else
2306 so->so_flags &= ~SOF_NOSIGPIPE;
2307
2308 break;
2309
55e303ae
A
2310 case SO_NOADDRERR:
2311 error = sooptcopyin(sopt, &optval, sizeof optval,
2312 sizeof optval);
2313 if (error)
2314 goto bad;
2315 if (optval)
2316 so->so_flags |= SOF_NOADDRAVAIL;
2317 else
2318 so->so_flags &= ~SOF_NOADDRAVAIL;
2319
2320 break;
2321
1c79356b
A
2322 default:
2323 error = ENOPROTOOPT;
2324 break;
2325 }
2326 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
2327 (void) ((*so->so_proto->pr_ctloutput)
2328 (so, sopt));
2329 }
2330 }
2331bad:
91447636 2332 socket_unlock(so, 1);
1c79356b
A
2333 return (error);
2334}
2335
2336/* Helper routine for getsockopt */
2337int
2338sooptcopyout(sopt, buf, len)
2339 struct sockopt *sopt;
2340 void *buf;
2341 size_t len;
2342{
2343 int error;
2344 size_t valsize;
2345
2346 error = 0;
2347
2348 /*
2349 * Documented get behavior is that we always return a value,
2350 * possibly truncated to fit in the user's buffer.
2351 * Traditional behavior is that we always tell the user
2352 * precisely how much we copied, rather than something useful
2353 * like the total amount we had available for her.
2354 * Note that this interface is not idempotent; the entire answer must
2355 * generated ahead of time.
2356 */
2357 valsize = min(len, sopt->sopt_valsize);
2358 sopt->sopt_valsize = valsize;
91447636 2359 if (sopt->sopt_val != USER_ADDR_NULL) {
1c79356b
A
2360 if (sopt->sopt_p != 0)
2361 error = copyout(buf, sopt->sopt_val, valsize);
2362 else
91447636 2363 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
1c79356b
A
2364 }
2365 return error;
2366}
2367
2368int
2369sogetopt(so, sopt)
2370 struct socket *so;
2371 struct sockopt *sopt;
2372{
2373 int error, optval;
2374 struct linger l;
2375 struct timeval tv;
1c79356b 2376
9bccf70c
A
2377 if (sopt->sopt_dir != SOPT_GET) {
2378 sopt->sopt_dir = SOPT_GET;
2379 }
2380
91447636
A
2381 socket_lock(so, 1);
2382
2383 {
2384 struct socket_filter_entry *filter;
2385 int filtered = 0;
2386 error = 0;
2387 for (filter = so->so_filt; filter && (error == 0);
2388 filter = filter->sfe_next_onsocket) {
2389 if (filter->sfe_filter->sf_filter.sf_getoption) {
2390 if (filtered == 0) {
2391 filtered = 1;
2392 sflt_use(so);
2393 socket_unlock(so, 0);
2394 }
2395 error = filter->sfe_filter->sf_filter.sf_getoption(
2396 filter->sfe_cookie, so, sopt);
2397 }
2398 }
2399 if (filtered != 0) {
2400 socket_lock(so, 0);
2401 sflt_unuse(so);
2402
2403 if (error) {
2404 if (error == EJUSTRETURN)
2405 error = 0;
2406 socket_unlock(so, 1);
2407 return error;
2408 }
1c79356b 2409 }
1c79356b
A
2410 }
2411
2412 error = 0;
2413 if (sopt->sopt_level != SOL_SOCKET) {
2414 if (so->so_proto && so->so_proto->pr_ctloutput) {
91447636
A
2415 error = (*so->so_proto->pr_ctloutput)
2416 (so, sopt);
2417 socket_unlock(so, 1);
2418 return (error);
2419 } else {
2420 socket_unlock(so, 1);
1c79356b 2421 return (ENOPROTOOPT);
91447636 2422 }
1c79356b
A
2423 } else {
2424 switch (sopt->sopt_name) {
2425 case SO_LINGER:
91447636 2426 case SO_LINGER_SEC:
1c79356b 2427 l.l_onoff = so->so_options & SO_LINGER;
91447636
A
2428 l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger :
2429 so->so_linger / hz;
1c79356b
A
2430 error = sooptcopyout(sopt, &l, sizeof l);
2431 break;
2432
2433 case SO_USELOOPBACK:
2434 case SO_DONTROUTE:
2435 case SO_DEBUG:
2436 case SO_KEEPALIVE:
2437 case SO_REUSEADDR:
2438 case SO_REUSEPORT:
2439 case SO_BROADCAST:
2440 case SO_OOBINLINE:
2441 case SO_TIMESTAMP:
9bccf70c 2442#ifdef __APPLE__
1c79356b
A
2443 case SO_DONTTRUNC:
2444 case SO_WANTMORE:
9bccf70c
A
2445 case SO_WANTOOBFLAG:
2446#endif
1c79356b
A
2447 optval = so->so_options & sopt->sopt_name;
2448integer:
2449 error = sooptcopyout(sopt, &optval, sizeof optval);
2450 break;
2451
2452 case SO_TYPE:
2453 optval = so->so_type;
2454 goto integer;
2455
9bccf70c 2456#ifdef __APPLE__
1c79356b 2457 case SO_NREAD:
9bccf70c
A
2458 {
2459 int pkt_total;
1c79356b
A
2460 struct mbuf *m1;
2461
2462 pkt_total = 0;
2463 m1 = so->so_rcv.sb_mb;
2464 if (so->so_proto->pr_flags & PR_ATOMIC)
2465 {
9bccf70c
A
2466 while (m1) {
2467 if (m1->m_type == MT_DATA)
1c79356b 2468 pkt_total += m1->m_len;
1c79356b
A
2469 m1 = m1->m_next;
2470 }
2471 optval = pkt_total;
2472 } else
2473 optval = so->so_rcv.sb_cc;
1c79356b
A
2474 goto integer;
2475 }
91447636
A
2476 case SO_NWRITE:
2477 optval = so->so_snd.sb_cc;
2478 goto integer;
9bccf70c 2479#endif
1c79356b
A
2480 case SO_ERROR:
2481 optval = so->so_error;
2482 so->so_error = 0;
2483 goto integer;
2484
2485 case SO_SNDBUF:
2486 optval = so->so_snd.sb_hiwat;
2487 goto integer;
2488
2489 case SO_RCVBUF:
2490 optval = so->so_rcv.sb_hiwat;
2491 goto integer;
2492
2493 case SO_SNDLOWAT:
2494 optval = so->so_snd.sb_lowat;
2495 goto integer;
2496
2497 case SO_RCVLOWAT:
2498 optval = so->so_rcv.sb_lowat;
2499 goto integer;
2500
2501 case SO_SNDTIMEO:
2502 case SO_RCVTIMEO:
91447636 2503 tv = (sopt->sopt_name == SO_SNDTIMEO ?
1c79356b
A
2504 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2505
1c79356b
A
2506 error = sooptcopyout(sopt, &tv, sizeof tv);
2507 break;
2508
91447636
A
2509 case SO_NOSIGPIPE:
2510 optval = (so->so_flags & SOF_NOSIGPIPE);
2511 goto integer;
9bccf70c 2512
55e303ae 2513 case SO_NOADDRERR:
91447636
A
2514 optval = (so->so_flags & SOF_NOADDRAVAIL);
2515 goto integer;
55e303ae 2516
1c79356b
A
2517 default:
2518 error = ENOPROTOOPT;
2519 break;
2520 }
91447636 2521 socket_unlock(so, 1);
1c79356b
A
2522 return (error);
2523 }
2524}
2525
9bccf70c 2526/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
1c79356b 2527int
9bccf70c 2528soopt_getm(struct sockopt *sopt, struct mbuf **mp)
1c79356b
A
2529{
2530 struct mbuf *m, *m_prev;
2531 int sopt_size = sopt->sopt_valsize;
2532
a3d08fcd
A
2533 if (sopt_size > MAX_SOOPTGETM_SIZE)
2534 return EMSGSIZE;
2535
1c79356b
A
2536 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2537 if (m == 0)
2538 return ENOBUFS;
2539 if (sopt_size > MLEN) {
2540 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2541 if ((m->m_flags & M_EXT) == 0) {
2542 m_free(m);
2543 return ENOBUFS;
2544 }
2545 m->m_len = min(MCLBYTES, sopt_size);
2546 } else {
2547 m->m_len = min(MLEN, sopt_size);
2548 }
2549 sopt_size -= m->m_len;
2550 *mp = m;
2551 m_prev = m;
2552
2553 while (sopt_size) {
2554 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
2555 if (m == 0) {
2556 m_freem(*mp);
2557 return ENOBUFS;
2558 }
2559 if (sopt_size > MLEN) {
2560 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
2561 if ((m->m_flags & M_EXT) == 0) {
2562 m_freem(*mp);
2563 return ENOBUFS;
2564 }
2565 m->m_len = min(MCLBYTES, sopt_size);
2566 } else {
2567 m->m_len = min(MLEN, sopt_size);
2568 }
2569 sopt_size -= m->m_len;
2570 m_prev->m_next = m;
2571 m_prev = m;
2572 }
2573 return 0;
2574}
2575
2576/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
2577int
9bccf70c 2578soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
1c79356b
A
2579{
2580 struct mbuf *m0 = m;
2581
91447636 2582 if (sopt->sopt_val == USER_ADDR_NULL)
1c79356b
A
2583 return 0;
2584 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2585 if (sopt->sopt_p != NULL) {
2586 int error;
2587
91447636 2588 error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len);
1c79356b
A
2589 if (error != 0) {
2590 m_freem(m0);
2591 return(error);
2592 }
2593 } else
91447636 2594 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len);
1c79356b 2595 sopt->sopt_valsize -= m->m_len;
91447636 2596 sopt->sopt_val += m->m_len;
1c79356b
A
2597 m = m->m_next;
2598 }
2599 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
9bccf70c 2600 panic("soopt_mcopyin");
1c79356b
A
2601 return 0;
2602}
2603
2604/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
2605int
9bccf70c 2606soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
1c79356b
A
2607{
2608 struct mbuf *m0 = m;
2609 size_t valsize = 0;
2610
91447636 2611 if (sopt->sopt_val == USER_ADDR_NULL)
1c79356b
A
2612 return 0;
2613 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2614 if (sopt->sopt_p != NULL) {
2615 int error;
2616
91447636 2617 error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len);
1c79356b
A
2618 if (error != 0) {
2619 m_freem(m0);
2620 return(error);
2621 }
2622 } else
91447636 2623 bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
1c79356b 2624 sopt->sopt_valsize -= m->m_len;
91447636 2625 sopt->sopt_val += m->m_len;
1c79356b
A
2626 valsize += m->m_len;
2627 m = m->m_next;
2628 }
2629 if (m != NULL) {
2630 /* enough soopt buffer should be given from user-land */
2631 m_freem(m0);
2632 return(EINVAL);
2633 }
2634 sopt->sopt_valsize = valsize;
2635 return 0;
2636}
2637
9bccf70c
A
2638void
2639sohasoutofband(so)
2640 register struct socket *so;
2641{
2642 struct proc *p;
9bccf70c 2643
9bccf70c
A
2644 if (so->so_pgid < 0)
2645 gsignal(-so->so_pgid, SIGURG);
2646 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
2647 psignal(p, SIGURG);
2648 selwakeup(&so->so_rcv.sb_sel);
2649}
2650
2651int
91447636 2652sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
9bccf70c
A
2653{
2654 struct proc *p = current_proc();
2655 int revents = 0;
91447636
A
2656
2657 socket_lock(so, 1);
9bccf70c
A
2658
2659 if (events & (POLLIN | POLLRDNORM))
2660 if (soreadable(so))
2661 revents |= events & (POLLIN | POLLRDNORM);
2662
2663 if (events & (POLLOUT | POLLWRNORM))
2664 if (sowriteable(so))
2665 revents |= events & (POLLOUT | POLLWRNORM);
2666
2667 if (events & (POLLPRI | POLLRDBAND))
2668 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
2669 revents |= events & (POLLPRI | POLLRDBAND);
2670
2671 if (revents == 0) {
2672 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
2673 /* Darwin sets the flag first, BSD calls selrecord first */
2674 so->so_rcv.sb_flags |= SB_SEL;
2675 selrecord(p, &so->so_rcv.sb_sel, wql);
2676 }
2677
2678 if (events & (POLLOUT | POLLWRNORM)) {
2679 /* Darwin sets the flag first, BSD calls selrecord first */
2680 so->so_snd.sb_flags |= SB_SEL;
2681 selrecord(p, &so->so_snd.sb_sel, wql);
2682 }
2683 }
2684
91447636 2685 socket_unlock(so, 1);
9bccf70c
A
2686 return (revents);
2687}
55e303ae 2688
91447636 2689int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p);
55e303ae
A
2690
2691int
91447636 2692soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p)
55e303ae 2693{
91447636 2694 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
55e303ae 2695 struct sockbuf *sb;
91447636 2696 socket_lock(so, 1);
55e303ae
A
2697
2698 switch (kn->kn_filter) {
2699 case EVFILT_READ:
2700 if (so->so_options & SO_ACCEPTCONN)
2701 kn->kn_fop = &solisten_filtops;
2702 else
2703 kn->kn_fop = &soread_filtops;
2704 sb = &so->so_rcv;
2705 break;
2706 case EVFILT_WRITE:
2707 kn->kn_fop = &sowrite_filtops;
2708 sb = &so->so_snd;
2709 break;
2710 default:
91447636 2711 socket_unlock(so, 1);
55e303ae
A
2712 return (1);
2713 }
2714
55e303ae
A
2715 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
2716 sb->sb_flags |= SB_KNOTE;
91447636 2717 socket_unlock(so, 1);
55e303ae
A
2718 return (0);
2719}
2720
2721static void
2722filt_sordetach(struct knote *kn)
2723{
91447636 2724 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
55e303ae 2725
91447636
A
2726 socket_lock(so, 1);
2727 if (so->so_rcv.sb_flags & SB_KNOTE)
55e303ae
A
2728 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
2729 so->so_rcv.sb_flags &= ~SB_KNOTE;
91447636 2730 socket_unlock(so, 1);
55e303ae
A
2731}
2732
2733/*ARGSUSED*/
2734static int
2735filt_soread(struct knote *kn, long hint)
2736{
91447636 2737 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
55e303ae 2738
91447636
A
2739 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2740 socket_lock(so, 1);
2741
2742 if (so->so_oobmark) {
2743 if (kn->kn_flags & EV_OOBAND) {
2744 kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark;
2745 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2746 socket_unlock(so, 1);
2747 return (1);
2748 }
2749 kn->kn_data = so->so_oobmark;
2750 kn->kn_flags |= EV_OOBAND;
2751 } else {
2752 kn->kn_data = so->so_rcv.sb_cc;
2753 if (so->so_state & SS_CANTRCVMORE) {
2754 kn->kn_flags |= EV_EOF;
2755 kn->kn_fflags = so->so_error;
2756 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2757 socket_unlock(so, 1);
2758 return (1);
2759 }
55e303ae 2760 }
91447636
A
2761
2762 if (so->so_state & SS_RCVATMARK) {
2763 if (kn->kn_flags & EV_OOBAND) {
2764 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2765 socket_unlock(so, 1);
2766 return (1);
2767 }
2768 kn->kn_flags |= EV_OOBAND;
2769 } else if (kn->kn_flags & EV_OOBAND) {
2770 kn->kn_data = 0;
2771 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2772 socket_unlock(so, 1);
2773 return (0);
2774 }
2775
2776 if (so->so_error) { /* temporary udp error */
2777 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2778 socket_unlock(so, 1);
55e303ae 2779 return (1);
91447636
A
2780 }
2781
2782 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2783 socket_unlock(so, 1);
2784
2785 return( kn->kn_flags & EV_OOBAND ||
2786 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
2787 kn->kn_sdata : so->so_rcv.sb_lowat));
55e303ae
A
2788}
2789
2790static void
2791filt_sowdetach(struct knote *kn)
2792{
91447636
A
2793 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2794 socket_lock(so, 1);
55e303ae 2795
91447636 2796 if(so->so_snd.sb_flags & SB_KNOTE)
55e303ae
A
2797 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
2798 so->so_snd.sb_flags &= ~SB_KNOTE;
91447636 2799 socket_unlock(so, 1);
55e303ae
A
2800}
2801
2802/*ARGSUSED*/
2803static int
2804filt_sowrite(struct knote *kn, long hint)
2805{
91447636
A
2806 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2807
2808 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2809 socket_lock(so, 1);
55e303ae
A
2810
2811 kn->kn_data = sbspace(&so->so_snd);
2812 if (so->so_state & SS_CANTSENDMORE) {
2813 kn->kn_flags |= EV_EOF;
2814 kn->kn_fflags = so->so_error;
91447636
A
2815 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2816 socket_unlock(so, 1);
55e303ae
A
2817 return (1);
2818 }
91447636
A
2819 if (so->so_error) { /* temporary udp error */
2820 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2821 socket_unlock(so, 1);
55e303ae 2822 return (1);
91447636 2823 }
55e303ae 2824 if (((so->so_state & SS_ISCONNECTED) == 0) &&
91447636
A
2825 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2826 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2827 socket_unlock(so, 1);
55e303ae 2828 return (0);
91447636
A
2829 }
2830 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2831 socket_unlock(so, 1);
55e303ae
A
2832 if (kn->kn_sfflags & NOTE_LOWAT)
2833 return (kn->kn_data >= kn->kn_sdata);
2834 return (kn->kn_data >= so->so_snd.sb_lowat);
2835}
2836
2837/*ARGSUSED*/
2838static int
2839filt_solisten(struct knote *kn, long hint)
2840{
91447636
A
2841 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
2842 int isempty;
55e303ae 2843
91447636
A
2844 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2845 socket_lock(so, 1);
55e303ae 2846 kn->kn_data = so->so_qlen;
91447636
A
2847 isempty = ! TAILQ_EMPTY(&so->so_comp);
2848 if ((hint & SO_FILT_HINT_LOCKED) == 0)
2849 socket_unlock(so, 1);
2850 return (isempty);
55e303ae
A
2851}
2852
91447636
A
2853
2854int
2855socket_lock(so, refcount)
2856 struct socket *so;
2857 int refcount;
2858{
4452a7af
A
2859 int error = 0, lr_saved;
2860
2861 lr_saved = (unsigned int) __builtin_return_address(0);
91447636
A
2862
2863 if (so->so_proto->pr_lock) {
2864 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
2865 }
2866 else {
2867#ifdef MORE_LOCKING_DEBUG
2868 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
2869#endif
2870 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
2871 if (refcount)
2872 so->so_usecount++;
4452a7af
A
2873 so->lock_lr[so->next_lock_lr] = (void *)lr_saved;
2874 so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
91447636
A
2875 }
2876
2877 return(error);
2878
2879}
2880
2881int
2882socket_unlock(so, refcount)
2883 struct socket *so;
2884 int refcount;
2885{
4452a7af 2886 int error = 0, lr_saved;
91447636
A
2887 lck_mtx_t * mutex_held;
2888
4452a7af 2889 lr_saved = (unsigned int) __builtin_return_address(0);
91447636
A
2890
2891 if (so->so_proto == NULL)
2892 panic("socket_unlock null so_proto so=%x\n", so);
2893
2894 if (so && so->so_proto->pr_unlock)
2895 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
2896 else {
2897 mutex_held = so->so_proto->pr_domain->dom_mtx;
2898#ifdef MORE_LOCKING_DEBUG
2899 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2900#endif
4452a7af
A
2901 so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved;
2902 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
2903
91447636
A
2904 if (refcount) {
2905 if (so->so_usecount <= 0)
2906 panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount);
2907 so->so_usecount--;
2908 if (so->so_usecount == 0) {
2909 sofreelastref(so, 1);
2910 }
91447636
A
2911 }
2912 lck_mtx_unlock(mutex_held);
2913 }
2914
2915 return(error);
2916}
2917//### Called with socket locked, will unlock socket
2918void
2919sofree(so)
2920 struct socket *so;
2921{
2922
91447636 2923 lck_mtx_t * mutex_held;
91447636
A
2924 if (so->so_proto->pr_getlock != NULL)
2925 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2926 else
2927 mutex_held = so->so_proto->pr_domain->dom_mtx;
2928 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2929
91447636
A
2930 sofreelastref(so, 0);
2931}
2932
2933void
2934soreference(so)
2935 struct socket *so;
2936{
2937 socket_lock(so, 1); /* locks & take one reference on socket */
2938 socket_unlock(so, 0); /* unlock only */
2939}
2940
2941void
2942sodereference(so)
2943 struct socket *so;
2944{
2945 socket_lock(so, 0);
2946 socket_unlock(so, 1);
2947}