]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_socket.c
666f99c46b1e9a8a792de5107ff852b3d95afb96
[apple/xnu.git] / bsd / kern / uipc_socket.c
1 /*
2 * Copyright (c) 1998-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
62 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $
63 */
64 /*
65 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
66 * support for mandatory and extensible security protections. This notice
67 * is included in support of clause 2.2 (b) of the Apple Public License,
68 * Version 2.0.
69 */
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/filedesc.h>
74 #include <sys/proc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/kauth.h>
77 #include <sys/file_internal.h>
78 #include <sys/fcntl.h>
79 #include <sys/malloc.h>
80 #include <sys/mbuf.h>
81 #include <sys/domain.h>
82 #include <sys/kernel.h>
83 #include <sys/event.h>
84 #include <sys/poll.h>
85 #include <sys/protosw.h>
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <sys/resourcevar.h>
89 #include <sys/signalvar.h>
90 #include <sys/sysctl.h>
91 #include <sys/uio.h>
92 #include <sys/ev.h>
93 #include <sys/kdebug.h>
94 #include <sys/un.h>
95 #include <sys/user.h>
96 #include <net/route.h>
97 #include <netinet/in.h>
98 #include <netinet/in_pcb.h>
99 #include <kern/zalloc.h>
100 #include <kern/locks.h>
101 #include <machine/limits.h>
102 #include <libkern/OSAtomic.h>
103 #include <pexpert/pexpert.h>
104 #include <kern/assert.h>
105
106 #if CONFIG_MACF
107 #include <security/mac.h>
108 #include <security/mac_framework.h>
109 #endif /* MAC */
110
111 int so_cache_hw = 0;
112 int so_cache_timeouts = 0;
113 int so_cache_max_freed = 0;
114 int cached_sock_count = 0;
115 __private_extern__ int max_cached_sock_count = MAX_CACHED_SOCKETS;
116 struct socket *socket_cache_head = 0;
117 struct socket *socket_cache_tail = 0;
118 u_int32_t so_cache_time = 0;
119 int so_cache_init_done = 0;
120 struct zone *so_cache_zone;
121
122 static lck_grp_t *so_cache_mtx_grp;
123 static lck_attr_t *so_cache_mtx_attr;
124 static lck_grp_attr_t *so_cache_mtx_grp_attr;
125 lck_mtx_t *so_cache_mtx;
126
127 #include <machine/limits.h>
128
129 static void filt_sordetach(struct knote *kn);
130 static int filt_soread(struct knote *kn, long hint);
131 static void filt_sowdetach(struct knote *kn);
132 static int filt_sowrite(struct knote *kn, long hint);
133
134 static int
135 sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p);
136
137 static int
138 sooptcopyout_timeval(struct sockopt *sopt, const struct timeval * tv_p);
139
140 static struct filterops soread_filtops = {
141 .f_isfd = 1,
142 .f_detach = filt_sordetach,
143 .f_event = filt_soread,
144 };
145 static struct filterops sowrite_filtops = {
146 .f_isfd = 1,
147 .f_detach = filt_sowdetach,
148 .f_event = filt_sowrite,
149 };
150
151 #define EVEN_MORE_LOCKING_DEBUG 0
152 int socket_debug = 0;
153 int socket_zone = M_SOCKET;
154 so_gen_t so_gencnt; /* generation count for sockets */
155
156 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
157 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
158
159 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
160 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
161 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
162 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
163 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
164 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
165 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
166
167 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
168
169
170 SYSCTL_DECL(_kern_ipc);
171
172 int somaxconn = SOMAXCONN;
173 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, "");
174
175 /* Should we get a maximum also ??? */
176 static int sosendmaxchain = 65536;
177 static int sosendminchain = 16384;
178 static int sorecvmincopy = 16384;
179 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
180 0, "");
181 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
182 0, "");
183
184 /*
185 * Set to enable jumbo clusters (if available) for large writes when
186 * the socket is marked with SOF_MULTIPAGES; see below.
187 */
188 int sosendjcl = 1;
189 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, CTLFLAG_RW, &sosendjcl, 0, "");
190
191 /*
192 * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
193 * writes on the socket for all protocols on any network interfaces,
194 * depending upon sosendjcl above. Be extra careful when setting this
195 * to 1, because sending down packets that cross physical pages down to
196 * broken drivers (those that falsely assume that the physical pages
197 * are contiguous) might lead to system panics or silent data corruption.
198 * When set to 0, the system will respect SOF_MULTIPAGES, which is set
199 * only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES
200 * capable. Set this to 1 only for testing/debugging purposes.
201 */
202 int sosendjcl_ignore_capab = 0;
203 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, CTLFLAG_RW,
204 &sosendjcl_ignore_capab, 0, "");
205
206 /*
207 * Socket operation routines.
208 * These routines are called by the routines in
209 * sys_socket.c or from a system process, and
210 * implement the semantics of socket operations by
211 * switching out to the protocol specific routines.
212 */
213
214 /* sys_generic.c */
215 extern void postevent(struct socket *, struct sockbuf *, int);
216 extern void evsofree(struct socket *);
217
218 /* TODO: these should be in header file */
219 extern int get_inpcb_str_size(void);
220 extern int get_tcp_str_size(void);
221 extern struct domain *pffinddomain(int);
222 extern struct protosw *pffindprotonotype(int, int);
223 extern int soclose_locked(struct socket *);
224 extern int soo_kqfilter(struct fileproc *, struct knote *, struct proc *);
225
226 extern int uthread_get_background_state(uthread_t);
227
228 #ifdef __APPLE__
229
230 vm_size_t so_cache_zone_element_size;
231
232 static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **, int *);
233 static void cached_sock_alloc(struct socket **, int);
234 static void cached_sock_free(struct socket *);
235 static void so_cache_timer(void *);
236
237 void soclose_wait_locked(struct socket *so);
238 int so_isdstlocal(struct socket *so);
239
240
241 void
242 socketinit(void)
243 {
244 vm_size_t str_size;
245
246 if (so_cache_init_done) {
247 printf("socketinit: already called...\n");
248 return;
249 }
250
251 PE_parse_boot_argn("socket_debug", &socket_debug, sizeof (socket_debug));
252
253 /*
254 * allocate lock group attribute and group for socket cache mutex
255 */
256 so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
257
258 so_cache_mtx_grp = lck_grp_alloc_init("so_cache",
259 so_cache_mtx_grp_attr);
260
261 /*
262 * allocate the lock attribute for socket cache mutex
263 */
264 so_cache_mtx_attr = lck_attr_alloc_init();
265
266 so_cache_init_done = 1;
267
268 /* cached sockets mutex */
269 so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr);
270
271 if (so_cache_mtx == NULL)
272 return; /* we're hosed... */
273
274 str_size = (vm_size_t)(sizeof (struct socket) + 4 +
275 get_inpcb_str_size() + 4 + get_tcp_str_size());
276
277 so_cache_zone = zinit(str_size, 120000*str_size, 8192, "socache zone");
278 #if TEMPDEBUG
279 printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
280 #endif
281 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
282
283 so_cache_zone_element_size = str_size;
284
285 sflt_init();
286 }
287
288 static void
289 cached_sock_alloc(struct socket **so, int waitok)
290 {
291 caddr_t temp;
292 register uintptr_t offset;
293
294 lck_mtx_lock(so_cache_mtx);
295
296 if (cached_sock_count) {
297 cached_sock_count--;
298 *so = socket_cache_head;
299 if (*so == 0)
300 panic("cached_sock_alloc: cached sock is null");
301
302 socket_cache_head = socket_cache_head->cache_next;
303 if (socket_cache_head)
304 socket_cache_head->cache_prev = 0;
305 else
306 socket_cache_tail = 0;
307
308 lck_mtx_unlock(so_cache_mtx);
309
310 temp = (*so)->so_saved_pcb;
311 bzero((caddr_t)*so, sizeof (struct socket));
312 #if TEMPDEBUG
313 kprintf("cached_sock_alloc - retreiving cached sock %p - "
314 "count == %d\n", *so, cached_sock_count);
315 #endif
316 (*so)->so_saved_pcb = temp;
317 (*so)->cached_in_sock_layer = 1;
318 } else {
319 #if TEMPDEBUG
320 kprintf("Allocating cached sock %p from memory\n", *so);
321 #endif
322
323 lck_mtx_unlock(so_cache_mtx);
324
325 if (waitok)
326 *so = (struct socket *)zalloc(so_cache_zone);
327 else
328 *so = (struct socket *)zalloc_noblock(so_cache_zone);
329
330 if (*so == 0)
331 return;
332
333 bzero((caddr_t)*so, sizeof (struct socket));
334
335 /*
336 * Define offsets for extra structures into our single block of
337 * memory. Align extra structures on longword boundaries.
338 */
339
340 offset = (uintptr_t) *so;
341 offset += sizeof (struct socket);
342
343 offset = ALIGN(offset);
344
345 (*so)->so_saved_pcb = (caddr_t)offset;
346 offset += get_inpcb_str_size();
347
348 offset = ALIGN(offset);
349
350 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb =
351 (caddr_t)offset;
352 #if TEMPDEBUG
353 kprintf("Allocating cached socket - %p, pcb=%p tcpcb=%p\n",
354 *so, (*so)->so_saved_pcb,
355 ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb);
356 #endif
357 }
358
359 (*so)->cached_in_sock_layer = 1;
360 }
361
362 static void
363 cached_sock_free(struct socket *so)
364 {
365
366 lck_mtx_lock(so_cache_mtx);
367
368 if (++cached_sock_count > max_cached_sock_count) {
369 --cached_sock_count;
370 lck_mtx_unlock(so_cache_mtx);
371 #if TEMPDEBUG
372 kprintf("Freeing overflowed cached socket %p\n", so);
373 #endif
374 zfree(so_cache_zone, so);
375 } else {
376 #if TEMPDEBUG
377 kprintf("Freeing socket %p into cache\n", so);
378 #endif
379 if (so_cache_hw < cached_sock_count)
380 so_cache_hw = cached_sock_count;
381
382 so->cache_next = socket_cache_head;
383 so->cache_prev = 0;
384 if (socket_cache_head)
385 socket_cache_head->cache_prev = so;
386 else
387 socket_cache_tail = so;
388
389 so->cache_timestamp = so_cache_time;
390 socket_cache_head = so;
391 lck_mtx_unlock(so_cache_mtx);
392 }
393
394 #if TEMPDEBUG
395 kprintf("Freed cached sock %p into cache - count is %d\n",
396 so, cached_sock_count);
397 #endif
398 }
399
400 static void
401 so_cache_timer(__unused void *dummy)
402 {
403 register struct socket *p;
404 register int n_freed = 0;
405
406 lck_mtx_lock(so_cache_mtx);
407
408 ++so_cache_time;
409
410 while ((p = socket_cache_tail)) {
411 if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT)
412 break;
413
414 so_cache_timeouts++;
415
416 if ((socket_cache_tail = p->cache_prev))
417 p->cache_prev->cache_next = 0;
418 if (--cached_sock_count == 0)
419 socket_cache_head = 0;
420
421 zfree(so_cache_zone, p);
422
423 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH) {
424 so_cache_max_freed++;
425 break;
426 }
427 }
428 lck_mtx_unlock(so_cache_mtx);
429
430 timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz));
431 }
432 #endif /* __APPLE__ */
433
434 /*
435 * Get a socket structure from our zone, and initialize it.
436 * We don't implement `waitok' yet (see comments in uipc_domain.c).
437 * Note that it would probably be better to allocate socket
438 * and PCB at the same time, but I'm not convinced that all
439 * the protocols can be easily modified to do this.
440 */
441 struct socket *
442 soalloc(int waitok, int dom, int type)
443 {
444 struct socket *so;
445
446 if ((dom == PF_INET) && (type == SOCK_STREAM)) {
447 cached_sock_alloc(&so, waitok);
448 } else {
449 MALLOC_ZONE(so, struct socket *, sizeof (*so), socket_zone,
450 M_WAITOK);
451 if (so != NULL)
452 bzero(so, sizeof (*so));
453 }
454 /* XXX race condition for reentrant kernel */
455 //###LD Atomic add for so_gencnt
456 if (so != NULL) {
457 so->so_gencnt = ++so_gencnt;
458 so->so_zone = socket_zone;
459 #if CONFIG_MACF_SOCKET
460 /* Convert waitok to M_WAITOK/M_NOWAIT for MAC Framework. */
461 if (mac_socket_label_init(so, !waitok) != 0) {
462 sodealloc(so);
463 return (NULL);
464 }
465 #endif /* MAC_SOCKET */
466 }
467
468 return (so);
469 }
470
471 /*
472 * Returns: 0 Success
473 * EAFNOSUPPORT
474 * EPROTOTYPE
475 * EPROTONOSUPPORT
476 * ENOBUFS
477 * <pru_attach>:ENOBUFS[AF_UNIX]
478 * <pru_attach>:ENOBUFS[TCP]
479 * <pru_attach>:ENOMEM[TCP]
480 * <pru_attach>:EISCONN[TCP]
481 * <pru_attach>:??? [other protocol families, IPSEC]
482 */
483 int
484 socreate(int dom, struct socket **aso, int type, int proto)
485 {
486 struct proc *p = current_proc();
487 register struct protosw *prp;
488 register struct socket *so;
489 register int error = 0;
490 thread_t thread;
491 struct uthread *ut;
492
493 #if TCPDEBUG
494 extern int tcpconsdebug;
495 #endif
496 if (proto)
497 prp = pffindproto(dom, proto, type);
498 else
499 prp = pffindtype(dom, type);
500
501 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) {
502 if (pffinddomain(dom) == NULL) {
503 return (EAFNOSUPPORT);
504 }
505 if (proto != 0) {
506 if (pffindprotonotype(dom, proto) != NULL) {
507 return (EPROTOTYPE);
508 }
509 }
510 return (EPROTONOSUPPORT);
511 }
512 if (prp->pr_type != type)
513 return (EPROTOTYPE);
514 so = soalloc(1, dom, type);
515 if (so == 0)
516 return (ENOBUFS);
517
518 TAILQ_INIT(&so->so_incomp);
519 TAILQ_INIT(&so->so_comp);
520 so->so_type = type;
521
522 so->so_uid = kauth_cred_getuid(kauth_cred_get());
523 if (!suser(kauth_cred_get(), NULL))
524 so->so_state = SS_PRIV;
525
526 so->so_proto = prp;
527 #ifdef __APPLE__
528 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
529 so->so_rcv.sb_so = so->so_snd.sb_so = so;
530 #endif
531 so->next_lock_lr = 0;
532 so->next_unlock_lr = 0;
533
534 #if CONFIG_MACF_SOCKET
535 mac_socket_label_associate(kauth_cred_get(), so);
536 #endif /* MAC_SOCKET */
537
538 //### Attachement will create the per pcb lock if necessary and increase refcount
539 /*
540 * for creation, make sure it's done before
541 * socket is inserted in lists
542 */
543 so->so_usecount++;
544
545 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
546 if (error) {
547 /*
548 * Warning:
549 * If so_pcb is not zero, the socket will be leaked,
550 * so protocol attachment handler must be coded carefuly
551 */
552 so->so_state |= SS_NOFDREF;
553 so->so_usecount--;
554 sofreelastref(so, 1); /* will deallocate the socket */
555 return (error);
556 }
557 #ifdef __APPLE__
558 prp->pr_domain->dom_refs++;
559 TAILQ_INIT(&so->so_evlist);
560
561 /* Attach socket filters for this protocol */
562 sflt_initsock(so);
563 #if TCPDEBUG
564 if (tcpconsdebug == 2)
565 so->so_options |= SO_DEBUG;
566 #endif
567 #endif
568 /*
569 * If this is a background thread/task, mark the socket as such.
570 */
571 thread = current_thread();
572 ut = get_bsdthread_info(thread);
573 if (uthread_get_background_state(ut)) {
574 socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND);
575 so->so_background_thread = thread;
576 /*
577 * In case setpriority(PRIO_DARWIN_THREAD) was called
578 * on this thread, regulate network (TCP) traffics.
579 */
580 if (ut->uu_flag & UT_BACKGROUND_TRAFFIC_MGT) {
581 socket_set_traffic_mgt_flags(so,
582 TRAFFIC_MGT_SO_BG_REGULATE);
583 }
584 }
585
586 *aso = so;
587 return (0);
588 }
589
590 /*
591 * Returns: 0 Success
592 * <pru_bind>:EINVAL Invalid argument [COMMON_START]
593 * <pru_bind>:EAFNOSUPPORT Address family not supported
594 * <pru_bind>:EADDRNOTAVAIL Address not available.
595 * <pru_bind>:EINVAL Invalid argument
596 * <pru_bind>:EAFNOSUPPORT Address family not supported [notdef]
597 * <pru_bind>:EACCES Permission denied
598 * <pru_bind>:EADDRINUSE Address in use
599 * <pru_bind>:EAGAIN Resource unavailable, try again
600 * <pru_bind>:EPERM Operation not permitted
601 * <pru_bind>:???
602 * <sf_bind>:???
603 *
604 * Notes: It's not possible to fully enumerate the return codes above,
605 * since socket filter authors and protocol family authors may
606 * not choose to limit their error returns to those listed, even
607 * though this may result in some software operating incorrectly.
608 *
609 * The error codes which are enumerated above are those known to
610 * be returned by the tcp_usr_bind function supplied.
611 */
612 int
613 sobind(struct socket *so, struct sockaddr *nam)
614 {
615 struct proc *p = current_proc();
616 int error = 0;
617 struct socket_filter_entry *filter;
618 int filtered = 0;
619
620 socket_lock(so, 1);
621
622 /*
623 * If this is a bind request on a previously-accepted socket
624 * that has been marked as inactive, reject it now before
625 * we go any further.
626 */
627 if (so->so_flags & SOF_DEFUNCT) {
628 error = EINVAL;
629 goto out;
630 }
631
632 /* Socket filter */
633 error = 0;
634 for (filter = so->so_filt; filter && (error == 0);
635 filter = filter->sfe_next_onsocket) {
636 if (filter->sfe_filter->sf_filter.sf_bind) {
637 if (filtered == 0) {
638 filtered = 1;
639 sflt_use(so);
640 socket_unlock(so, 0);
641 }
642 error = filter->sfe_filter->sf_filter.
643 sf_bind(filter->sfe_cookie, so, nam);
644 }
645 }
646 if (filtered != 0) {
647 socket_lock(so, 0);
648 sflt_unuse(so);
649 }
650 /* End socket filter */
651
652 if (error == 0)
653 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
654 out:
655 socket_unlock(so, 1);
656
657 if (error == EJUSTRETURN)
658 error = 0;
659
660 return (error);
661 }
662
663 void
664 sodealloc(struct socket *so)
665 {
666 so->so_gencnt = ++so_gencnt;
667
668 #if CONFIG_MACF_SOCKET
669 mac_socket_label_destroy(so);
670 #endif /* MAC_SOCKET */
671 if (so->cached_in_sock_layer == 1) {
672 cached_sock_free(so);
673 } else {
674 if (so->cached_in_sock_layer == -1)
675 panic("sodealloc: double dealloc: so=%p\n", so);
676 so->cached_in_sock_layer = -1;
677 FREE_ZONE(so, sizeof (*so), so->so_zone);
678 }
679 }
680
681 /*
682 * Returns: 0 Success
683 * EINVAL
684 * EOPNOTSUPP
685 * <pru_listen>:EINVAL[AF_UNIX]
686 * <pru_listen>:EINVAL[TCP]
687 * <pru_listen>:EADDRNOTAVAIL[TCP] Address not available.
688 * <pru_listen>:EINVAL[TCP] Invalid argument
689 * <pru_listen>:EAFNOSUPPORT[TCP] Address family not supported [notdef]
690 * <pru_listen>:EACCES[TCP] Permission denied
691 * <pru_listen>:EADDRINUSE[TCP] Address in use
692 * <pru_listen>:EAGAIN[TCP] Resource unavailable, try again
693 * <pru_listen>:EPERM[TCP] Operation not permitted
694 * <sf_listen>:???
695 *
696 * Notes: Other <pru_listen> returns depend on the protocol family; all
697 * <sf_listen> returns depend on what the filter author causes
698 * their filter to return.
699 */
700 int
701 solisten(struct socket *so, int backlog)
702 {
703 struct proc *p = current_proc();
704 int error = 0;
705 struct socket_filter_entry *filter;
706 int filtered = 0;
707
708 socket_lock(so, 1);
709 if (so->so_proto == NULL) {
710 error = EINVAL;
711 goto out;
712 }
713 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
714 error = EOPNOTSUPP;
715 goto out;
716 }
717
718 /*
719 * If the listen request is made on a socket that is not fully
720 * disconnected, or on a previously-accepted socket that has
721 * been marked as inactive, reject the request now.
722 */
723 if ((so->so_state &
724 (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) ||
725 (so->so_flags & SOF_DEFUNCT)) {
726 error = EINVAL;
727 goto out;
728 }
729
730 if ((so->so_restrictions & SO_RESTRICT_DENYIN) != 0) {
731 error = EPERM;
732 goto out;
733 }
734
735 error = 0;
736 for (filter = so->so_filt; filter && (error == 0);
737 filter = filter->sfe_next_onsocket) {
738 if (filter->sfe_filter->sf_filter.sf_listen) {
739 if (filtered == 0) {
740 filtered = 1;
741 sflt_use(so);
742 socket_unlock(so, 0);
743 }
744 error = filter->sfe_filter->sf_filter.
745 sf_listen(filter->sfe_cookie, so);
746 }
747 }
748 if (filtered != 0) {
749 socket_lock(so, 0);
750 sflt_unuse(so);
751 }
752
753 if (error == 0) {
754 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
755 }
756
757 if (error) {
758 if (error == EJUSTRETURN)
759 error = 0;
760 goto out;
761 }
762
763 if (TAILQ_EMPTY(&so->so_comp))
764 so->so_options |= SO_ACCEPTCONN;
765 /*
766 * POSIX: The implementation may have an upper limit on the length of
767 * the listen queue-either global or per accepting socket. If backlog
768 * exceeds this limit, the length of the listen queue is set to the
769 * limit.
770 *
771 * If listen() is called with a backlog argument value that is less
772 * than 0, the function behaves as if it had been called with a backlog
773 * argument value of 0.
774 *
775 * A backlog argument of 0 may allow the socket to accept connections,
776 * in which case the length of the listen queue may be set to an
777 * implementation-defined minimum value.
778 */
779 if (backlog <= 0 || backlog > somaxconn)
780 backlog = somaxconn;
781
782 so->so_qlimit = backlog;
783 out:
784 socket_unlock(so, 1);
785 return (error);
786 }
787
788 void
789 sofreelastref(struct socket *so, int dealloc)
790 {
791 struct socket *head = so->so_head;
792
793 /* Assume socket is locked */
794
795 /* Remove any filters - may be called more than once */
796 sflt_termsock(so);
797
798 if ((!(so->so_flags & SOF_PCBCLEARING)) ||
799 ((so->so_state & SS_NOFDREF) == 0)) {
800 #ifdef __APPLE__
801 selthreadclear(&so->so_snd.sb_sel);
802 selthreadclear(&so->so_rcv.sb_sel);
803 so->so_rcv.sb_flags &= ~SB_UPCALL;
804 so->so_snd.sb_flags &= ~SB_UPCALL;
805 #endif
806 return;
807 }
808 if (head != NULL) {
809 socket_lock(head, 1);
810 if (so->so_state & SS_INCOMP) {
811 TAILQ_REMOVE(&head->so_incomp, so, so_list);
812 head->so_incqlen--;
813 } else if (so->so_state & SS_COMP) {
814 /*
815 * We must not decommission a socket that's
816 * on the accept(2) queue. If we do, then
817 * accept(2) may hang after select(2) indicated
818 * that the listening socket was ready.
819 */
820 #ifdef __APPLE__
821 selthreadclear(&so->so_snd.sb_sel);
822 selthreadclear(&so->so_rcv.sb_sel);
823 so->so_rcv.sb_flags &= ~SB_UPCALL;
824 so->so_snd.sb_flags &= ~SB_UPCALL;
825 #endif
826 socket_unlock(head, 1);
827 return;
828 } else {
829 panic("sofree: not queued");
830 }
831 head->so_qlen--;
832 so->so_state &= ~SS_INCOMP;
833 so->so_head = NULL;
834 socket_unlock(head, 1);
835 }
836 #ifdef __APPLE__
837 selthreadclear(&so->so_snd.sb_sel);
838 sbrelease(&so->so_snd);
839 #endif
840 sorflush(so);
841
842 /* 3932268: disable upcall */
843 so->so_rcv.sb_flags &= ~SB_UPCALL;
844 so->so_snd.sb_flags &= ~SB_UPCALL;
845
846 if (dealloc)
847 sodealloc(so);
848 }
849
850 void
851 soclose_wait_locked(struct socket *so)
852 {
853 lck_mtx_t *mutex_held;
854
855 if (so->so_proto->pr_getlock != NULL)
856 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
857 else
858 mutex_held = so->so_proto->pr_domain->dom_mtx;
859 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
860
861 /*
862 * Double check here and return if there's no outstanding upcall;
863 * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
864 */
865 if (!(so->so_flags & SOF_UPCALLINUSE) ||
866 !(so->so_flags & SOF_UPCALLCLOSEWAIT))
867 return;
868
869 so->so_flags |= SOF_CLOSEWAIT;
870 (void) msleep((caddr_t)&so->so_upcall, mutex_held, (PZERO - 1),
871 "soclose_wait_locked", NULL);
872 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
873 so->so_flags &= ~SOF_CLOSEWAIT;
874 }
875
876 /*
877 * Close a socket on last file table reference removal.
878 * Initiate disconnect if connected.
879 * Free socket when disconnect complete.
880 */
881 int
882 soclose_locked(struct socket *so)
883 {
884 int error = 0;
885 lck_mtx_t *mutex_held;
886 struct timespec ts;
887
888 if (so->so_usecount == 0) {
889 panic("soclose: so=%p refcount=0\n", so);
890 }
891
892 sflt_notify(so, sock_evt_closing, NULL);
893
894 if ((so->so_options & SO_ACCEPTCONN)) {
895 struct socket *sp, *sonext;
896 int socklock = 0;
897
898 /*
899 * We do not want new connection to be added
900 * to the connection queues
901 */
902 so->so_options &= ~SO_ACCEPTCONN;
903
904 for (sp = TAILQ_FIRST(&so->so_incomp); sp != NULL; sp = sonext) {
905 sonext = TAILQ_NEXT(sp, so_list);
906
907 /* Radar 5350314
908 * skip sockets thrown away by tcpdropdropblreq
909 * they will get cleanup by the garbage collection.
910 * otherwise, remove the incomp socket from the queue
911 * and let soabort trigger the appropriate cleanup.
912 */
913 if (sp->so_flags & SOF_OVERFLOW)
914 continue;
915
916 if (so->so_proto->pr_getlock != NULL) {
917 /* lock ordering for consistency with the rest of the stack,
918 * we lock the socket first and then grabb the head.
919 */
920 socket_unlock(so, 0);
921 socket_lock(sp, 1);
922 socket_lock(so, 0);
923 socklock = 1;
924 }
925
926 TAILQ_REMOVE(&so->so_incomp, sp, so_list);
927 so->so_incqlen--;
928
929 if (sp->so_state & SS_INCOMP) {
930 sp->so_state &= ~SS_INCOMP;
931 sp->so_head = NULL;
932
933 (void) soabort(sp);
934 }
935
936 if (socklock)
937 socket_unlock(sp, 1);
938 }
939
940 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
941 /* Dequeue from so_comp since sofree() won't do it */
942 TAILQ_REMOVE(&so->so_comp, sp, so_list);
943 so->so_qlen--;
944
945 if (so->so_proto->pr_getlock != NULL) {
946 socket_unlock(so, 0);
947 socket_lock(sp, 1);
948 }
949
950 if (sp->so_state & SS_COMP) {
951 sp->so_state &= ~SS_COMP;
952 sp->so_head = NULL;
953
954 (void) soabort(sp);
955 }
956
957 if (so->so_proto->pr_getlock != NULL) {
958 socket_unlock(sp, 1);
959 socket_lock(so, 0);
960 }
961 }
962 }
963 if (so->so_pcb == 0) {
964 /* 3915887: mark the socket as ready for dealloc */
965 so->so_flags |= SOF_PCBCLEARING;
966 goto discard;
967 }
968 if (so->so_state & SS_ISCONNECTED) {
969 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
970 error = sodisconnectlocked(so);
971 if (error)
972 goto drop;
973 }
974 if (so->so_options & SO_LINGER) {
975 if ((so->so_state & SS_ISDISCONNECTING) &&
976 (so->so_state & SS_NBIO))
977 goto drop;
978 if (so->so_proto->pr_getlock != NULL)
979 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
980 else
981 mutex_held = so->so_proto->pr_domain->dom_mtx;
982 while (so->so_state & SS_ISCONNECTED) {
983 ts.tv_sec = (so->so_linger/100);
984 ts.tv_nsec = (so->so_linger % 100) *
985 NSEC_PER_USEC * 1000 * 10;
986 error = msleep((caddr_t)&so->so_timeo,
987 mutex_held, PSOCK | PCATCH, "soclose", &ts);
988 if (error) {
989 /*
990 * It's OK when the time fires,
991 * don't report an error
992 */
993 if (error == EWOULDBLOCK)
994 error = 0;
995 break;
996 }
997 }
998 }
999 }
1000 drop:
1001 if (so->so_usecount == 0)
1002 panic("soclose: usecount is zero so=%p\n", so);
1003 if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
1004 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
1005 if (error == 0)
1006 error = error2;
1007 }
1008 if (so->so_usecount <= 0)
1009 panic("soclose: usecount is zero so=%p\n", so);
1010 discard:
1011 if (so->so_pcb && so->so_state & SS_NOFDREF)
1012 panic("soclose: NOFDREF");
1013 so->so_state |= SS_NOFDREF;
1014 #ifdef __APPLE__
1015 so->so_proto->pr_domain->dom_refs--;
1016 evsofree(so);
1017 #endif
1018 so->so_usecount--;
1019 sofree(so);
1020 return (error);
1021 }
1022
1023 int
1024 soclose(struct socket *so)
1025 {
1026 int error = 0;
1027 socket_lock(so, 1);
1028
1029 if (so->so_flags & SOF_UPCALLINUSE)
1030 soclose_wait_locked(so);
1031
1032 if (so->so_retaincnt == 0) {
1033 error = soclose_locked(so);
1034 } else {
1035 /*
1036 * if the FD is going away, but socket is
1037 * retained in kernel remove its reference
1038 */
1039 so->so_usecount--;
1040 if (so->so_usecount < 2)
1041 panic("soclose: retaincnt non null and so=%p "
1042 "usecount=%d\n", so, so->so_usecount);
1043 }
1044 socket_unlock(so, 1);
1045 return (error);
1046 }
1047
1048 /*
1049 * Must be called at splnet...
1050 */
1051 /* Should already be locked */
1052 int
1053 soabort(struct socket *so)
1054 {
1055 int error;
1056
1057 #ifdef MORE_LOCKING_DEBUG
1058 lck_mtx_t *mutex_held;
1059
1060 if (so->so_proto->pr_getlock != NULL)
1061 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1062 else
1063 mutex_held = so->so_proto->pr_domain->dom_mtx;
1064 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1065 #endif
1066
1067 if ((so->so_flags & SOF_ABORTED) == 0) {
1068 so->so_flags |= SOF_ABORTED;
1069 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
1070 if (error) {
1071 sofree(so);
1072 return (error);
1073 }
1074 }
1075 return (0);
1076 }
1077
1078 int
1079 soacceptlock(struct socket *so, struct sockaddr **nam, int dolock)
1080 {
1081 int error;
1082
1083 if (dolock)
1084 socket_lock(so, 1);
1085
1086 if ((so->so_state & SS_NOFDREF) == 0)
1087 panic("soaccept: !NOFDREF");
1088 so->so_state &= ~SS_NOFDREF;
1089 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
1090
1091 if (dolock)
1092 socket_unlock(so, 1);
1093 return (error);
1094 }
1095
1096 int
1097 soaccept(struct socket *so, struct sockaddr **nam)
1098 {
1099 return (soacceptlock(so, nam, 1));
1100 }
1101
1102 int
1103 soacceptfilter(struct socket *so)
1104 {
1105 struct sockaddr *local = NULL, *remote = NULL;
1106 struct socket_filter_entry *filter;
1107 int error = 0, filtered = 0;
1108 struct socket *head = so->so_head;
1109
1110 /*
1111 * Hold the lock even if this socket
1112 * has not been made visible to the filter(s).
1113 * For sockets with global locks, this protect against the
1114 * head or peer going away
1115 */
1116 socket_lock(so, 1);
1117 if (sogetaddr_locked(so, &remote, 1) != 0 ||
1118 sogetaddr_locked(so, &local, 0) != 0) {
1119 so->so_state &= ~(SS_NOFDREF | SS_COMP);
1120 so->so_head = NULL;
1121 socket_unlock(so, 1);
1122 soclose(so);
1123 /* Out of resources; try it again next time */
1124 error = ECONNABORTED;
1125 goto done;
1126 }
1127
1128 /*
1129 * At this point, we have a reference on the listening socket
1130 * so we know it won't be going away. Do the same for the newly
1131 * accepted socket while we invoke the accept callback routine.
1132 */
1133 for (filter = so->so_filt; filter != NULL && error == 0;
1134 filter = filter->sfe_next_onsocket) {
1135 if (filter->sfe_filter->sf_filter.sf_accept != NULL) {
1136 if (!filtered) {
1137 filtered = 1;
1138 sflt_use(so);
1139 socket_unlock(so, 0);
1140 }
1141 error = filter->sfe_filter->sf_filter.
1142 sf_accept(filter->sfe_cookie,
1143 head, so, local, remote);
1144 }
1145 }
1146
1147 if (filtered) {
1148 socket_lock(so, 0);
1149 sflt_unuse(so);
1150 }
1151
1152 /*
1153 * If we get EJUSTRETURN from one of the filters, mark this socket
1154 * as inactive and return it anyway. This newly accepted socket
1155 * will be disconnected later before we hand it off to the caller.
1156 */
1157 if (error == EJUSTRETURN) {
1158 error = 0;
1159 so->so_flags |= SOF_DEFUNCT;
1160 /* Prevent data from being appended to the socket buffers */
1161 so->so_snd.sb_flags |= SB_DROP;
1162 so->so_rcv.sb_flags |= SB_DROP;
1163 }
1164
1165 if (error != 0) {
1166 /*
1167 * This may seem like a duplication to the above error
1168 * handling part when we return ECONNABORTED, except
1169 * the following is done while holding the lock since
1170 * the socket has been exposed to the filter(s) earlier.
1171 */
1172 so->so_state &= ~(SS_NOFDREF | SS_COMP);
1173 so->so_head = NULL;
1174 socket_unlock(so, 1);
1175 soclose(so);
1176 /* Propagate socket filter's error code to the caller */
1177 } else {
1178 socket_unlock(so, 1);
1179 }
1180 done:
1181 /* Callee checks for NULL pointer */
1182 sock_freeaddr(remote);
1183 sock_freeaddr(local);
1184 return (error);
1185 }
1186
1187 /*
1188 * Returns: 0 Success
1189 * EOPNOTSUPP Operation not supported on socket
1190 * EISCONN Socket is connected
1191 * <pru_connect>:EADDRNOTAVAIL Address not available.
1192 * <pru_connect>:EINVAL Invalid argument
1193 * <pru_connect>:EAFNOSUPPORT Address family not supported [notdef]
1194 * <pru_connect>:EACCES Permission denied
1195 * <pru_connect>:EADDRINUSE Address in use
1196 * <pru_connect>:EAGAIN Resource unavailable, try again
1197 * <pru_connect>:EPERM Operation not permitted
1198 * <sf_connect_out>:??? [anything a filter writer might set]
1199 */
1200 int
1201 soconnectlock(struct socket *so, struct sockaddr *nam, int dolock)
1202 {
1203 int error;
1204 struct proc *p = current_proc();
1205
1206 if (dolock)
1207 socket_lock(so, 1);
1208
1209 /*
1210 * If this is a listening socket or if this is a previously-accepted
1211 * socket that has been marked as inactive, reject the connect request.
1212 */
1213 if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
1214 if (dolock)
1215 socket_unlock(so, 1);
1216 return (EOPNOTSUPP);
1217 }
1218
1219 if ((so->so_restrictions & SO_RESTRICT_DENYOUT) != 0) {
1220 if (dolock)
1221 socket_unlock(so, 1);
1222 return (EPERM);
1223 }
1224
1225 /*
1226 * If protocol is connection-based, can only connect once.
1227 * Otherwise, if connected, try to disconnect first.
1228 * This allows user to disconnect by connecting to, e.g.,
1229 * a null address.
1230 */
1231 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
1232 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
1233 (error = sodisconnectlocked(so)))) {
1234 error = EISCONN;
1235 } else {
1236 /*
1237 * Run connect filter before calling protocol:
1238 * - non-blocking connect returns before completion;
1239 */
1240 struct socket_filter_entry *filter;
1241 int filtered = 0;
1242
1243 error = 0;
1244 for (filter = so->so_filt; filter && (error == 0);
1245 filter = filter->sfe_next_onsocket) {
1246 if (filter->sfe_filter->sf_filter.sf_connect_out) {
1247 if (filtered == 0) {
1248 filtered = 1;
1249 sflt_use(so);
1250 socket_unlock(so, 0);
1251 }
1252 error = filter->sfe_filter->sf_filter.
1253 sf_connect_out(filter->sfe_cookie, so, nam);
1254 }
1255 }
1256 if (filtered != 0) {
1257 socket_lock(so, 0);
1258 sflt_unuse(so);
1259 }
1260
1261 if (error) {
1262 if (error == EJUSTRETURN)
1263 error = 0;
1264 if (dolock)
1265 socket_unlock(so, 1);
1266 return (error);
1267 }
1268
1269 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
1270 }
1271 if (dolock)
1272 socket_unlock(so, 1);
1273 return (error);
1274 }
1275
1276 int
1277 soconnect(struct socket *so, struct sockaddr *nam)
1278 {
1279 return (soconnectlock(so, nam, 1));
1280 }
1281
1282 /*
1283 * Returns: 0 Success
1284 * <pru_connect2>:EINVAL[AF_UNIX]
1285 * <pru_connect2>:EPROTOTYPE[AF_UNIX]
1286 * <pru_connect2>:??? [other protocol families]
1287 *
1288 * Notes: <pru_connect2> is not supported by [TCP].
1289 */
1290 int
1291 soconnect2(struct socket *so1, struct socket *so2)
1292 {
1293 int error;
1294
1295 socket_lock(so1, 1);
1296 if (so2->so_proto->pr_lock)
1297 socket_lock(so2, 1);
1298
1299 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
1300
1301 socket_unlock(so1, 1);
1302 if (so2->so_proto->pr_lock)
1303 socket_unlock(so2, 1);
1304 return (error);
1305 }
1306
1307 int
1308 sodisconnectlocked(struct socket *so)
1309 {
1310 int error;
1311
1312 if ((so->so_state & SS_ISCONNECTED) == 0) {
1313 error = ENOTCONN;
1314 goto bad;
1315 }
1316 if (so->so_state & SS_ISDISCONNECTING) {
1317 error = EALREADY;
1318 goto bad;
1319 }
1320
1321 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
1322
1323 if (error == 0) {
1324 sflt_notify(so, sock_evt_disconnected, NULL);
1325 }
1326 bad:
1327 return (error);
1328 }
1329
1330 /* Locking version */
1331 int
1332 sodisconnect(struct socket *so)
1333 {
1334 int error;
1335
1336 socket_lock(so, 1);
1337 error = sodisconnectlocked(so);
1338 socket_unlock(so, 1);
1339 return (error);
1340 }
1341
1342 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT)
1343
1344 /*
1345 * sosendcheck will lock the socket buffer if it isn't locked and
1346 * verify that there is space for the data being inserted.
1347 *
1348 * Returns: 0 Success
1349 * EPIPE
1350 * sblock:EWOULDBLOCK
1351 * sblock:EINTR
1352 * sbwait:EBADF
1353 * sbwait:EINTR
1354 * [so_error]:???
1355 */
1356 static int
1357 sosendcheck(struct socket *so, struct sockaddr *addr, int32_t resid, int32_t clen,
1358 int32_t atomic, int flags, int *sblocked)
1359 {
1360 int error = 0;
1361 int32_t space;
1362 int assumelock = 0;
1363
1364 restart:
1365 if (*sblocked == 0) {
1366 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
1367 so->so_send_filt_thread != 0 &&
1368 so->so_send_filt_thread == current_thread()) {
1369 /*
1370 * We're being called recursively from a filter,
1371 * allow this to continue. Radar 4150520.
1372 * Don't set sblocked because we don't want
1373 * to perform an unlock later.
1374 */
1375 assumelock = 1;
1376 } else {
1377 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1378 if (error) {
1379 return (error);
1380 }
1381 *sblocked = 1;
1382 }
1383 }
1384
1385 /*
1386 * If a send attempt is made on a previously-accepted socket
1387 * that has been marked as inactive (disconnected), reject
1388 * the request.
1389 */
1390 if (so->so_flags & SOF_DEFUNCT)
1391 return (ENOTCONN);
1392
1393 if (so->so_state & SS_CANTSENDMORE)
1394 return (EPIPE);
1395
1396 if (so->so_error) {
1397 error = so->so_error;
1398 so->so_error = 0;
1399 return (error);
1400 }
1401
1402 if ((so->so_state & SS_ISCONNECTED) == 0) {
1403 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
1404 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1405 !(resid == 0 && clen != 0))
1406 return (ENOTCONN);
1407 } else if (addr == 0 && !(flags&MSG_HOLD)) {
1408 return ((so->so_proto->pr_flags & PR_CONNREQUIRED) ?
1409 ENOTCONN : EDESTADDRREQ);
1410 }
1411 }
1412 space = sbspace(&so->so_snd);
1413 if (flags & MSG_OOB)
1414 space += 1024;
1415 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1416 clen > so->so_snd.sb_hiwat)
1417 return (EMSGSIZE);
1418 if (space < resid + clen &&
1419 (atomic || space < (int32_t)so->so_snd.sb_lowat || space < clen)) {
1420 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) ||
1421 assumelock) {
1422 return (EWOULDBLOCK);
1423 }
1424 sbunlock(&so->so_snd, 1);
1425 error = sbwait(&so->so_snd);
1426 if (error) {
1427 return (error);
1428 }
1429 goto restart;
1430 }
1431
1432 return (0);
1433 }
1434
1435 /*
1436 * Send on a socket.
1437 * If send must go all at once and message is larger than
1438 * send buffering, then hard error.
1439 * Lock against other senders.
1440 * If must go all at once and not enough room now, then
1441 * inform user that this would block and do nothing.
1442 * Otherwise, if nonblocking, send as much as possible.
1443 * The data to be sent is described by "uio" if nonzero,
1444 * otherwise by the mbuf chain "top" (which must be null
1445 * if uio is not). Data provided in mbuf chain must be small
1446 * enough to send all at once.
1447 *
1448 * Returns nonzero on error, timeout or signal; callers
1449 * must check for short counts if EINTR/ERESTART are returned.
1450 * Data and control buffers are freed on return.
1451 * Experiment:
1452 * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf
1453 * MSG_SEND: go thru as for MSG_HOLD on current fragment, then
1454 * point at the mbuf chain being constructed and go from there.
1455 *
1456 * Returns: 0 Success
1457 * EOPNOTSUPP
1458 * EINVAL
1459 * ENOBUFS
1460 * uiomove:EFAULT
1461 * sosendcheck:EPIPE
1462 * sosendcheck:EWOULDBLOCK
1463 * sosendcheck:EINTR
1464 * sosendcheck:EBADF
1465 * sosendcheck:EINTR
1466 * sosendcheck:??? [value from so_error]
1467 * <pru_send>:ECONNRESET[TCP]
1468 * <pru_send>:EINVAL[TCP]
1469 * <pru_send>:ENOBUFS[TCP]
1470 * <pru_send>:EADDRINUSE[TCP]
1471 * <pru_send>:EADDRNOTAVAIL[TCP]
1472 * <pru_send>:EAFNOSUPPORT[TCP]
1473 * <pru_send>:EACCES[TCP]
1474 * <pru_send>:EAGAIN[TCP]
1475 * <pru_send>:EPERM[TCP]
1476 * <pru_send>:EMSGSIZE[TCP]
1477 * <pru_send>:EHOSTUNREACH[TCP]
1478 * <pru_send>:ENETUNREACH[TCP]
1479 * <pru_send>:ENETDOWN[TCP]
1480 * <pru_send>:ENOMEM[TCP]
1481 * <pru_send>:ENOBUFS[TCP]
1482 * <pru_send>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1483 * <pru_send>:EINVAL[AF_UNIX]
1484 * <pru_send>:EOPNOTSUPP[AF_UNIX]
1485 * <pru_send>:EPIPE[AF_UNIX]
1486 * <pru_send>:ENOTCONN[AF_UNIX]
1487 * <pru_send>:EISCONN[AF_UNIX]
1488 * <pru_send>:???[AF_UNIX] [whatever a filter author chooses]
1489 * <sf_data_out>:??? [whatever a filter author chooses]
1490 *
1491 * Notes: Other <pru_send> returns depend on the protocol family; all
1492 * <sf_data_out> returns depend on what the filter author causes
1493 * their filter to return.
1494 */
1495 int
1496 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
1497 struct mbuf *top, struct mbuf *control, int flags)
1498 {
1499 struct mbuf **mp;
1500 register struct mbuf *m, *freelist = NULL;
1501 register int32_t space, len, resid;
1502 int clen = 0, error, dontroute, mlen, sendflags;
1503 int atomic = sosendallatonce(so) || top;
1504 int sblocked = 0;
1505 struct proc *p = current_proc();
1506
1507 if (uio) {
1508 // LP64todo - fix this!
1509 resid = uio_resid(uio);
1510 } else {
1511 resid = top->m_pkthdr.len;
1512 }
1513 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START), so, resid,
1514 so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
1515
1516 socket_lock(so, 1);
1517 if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) {
1518 error = EOPNOTSUPP;
1519 socket_unlock(so, 1);
1520 goto out;
1521 }
1522
1523 /*
1524 * In theory resid should be unsigned.
1525 * However, space must be signed, as it might be less than 0
1526 * if we over-committed, and we must use a signed comparison
1527 * of space and resid. On the other hand, a negative resid
1528 * causes us to loop sending 0-length segments to the protocol.
1529 *
1530 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1531 * type sockets since that's an error.
1532 */
1533 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1534 error = EINVAL;
1535 socket_unlock(so, 1);
1536 goto out;
1537 }
1538
1539 dontroute =
1540 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1541 (so->so_proto->pr_flags & PR_ATOMIC);
1542 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
1543 if (control)
1544 clen = control->m_len;
1545
1546 do {
1547 error = sosendcheck(so, addr, resid, clen, atomic, flags,
1548 &sblocked);
1549 if (error) {
1550 goto release;
1551 }
1552 mp = &top;
1553 space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ?
1554 1024 : 0);
1555
1556 do {
1557 struct socket_filter_entry *filter;
1558 int filtered;
1559 boolean_t recursive;
1560
1561 if (uio == NULL) {
1562 /*
1563 * Data is prepackaged in "top".
1564 */
1565 resid = 0;
1566 if (flags & MSG_EOR)
1567 top->m_flags |= M_EOR;
1568 } else {
1569 int chainlength;
1570 int bytes_to_copy;
1571 boolean_t jumbocl;
1572
1573 bytes_to_copy = imin(resid, space);
1574
1575 if (sosendminchain > 0) {
1576 chainlength = 0;
1577 } else {
1578 chainlength = sosendmaxchain;
1579 }
1580
1581 /*
1582 * Attempt to use larger than system page-size
1583 * clusters for large writes only if there is
1584 * a jumbo cluster pool and if the socket is
1585 * marked accordingly.
1586 */
1587 jumbocl = sosendjcl && njcl > 0 &&
1588 ((so->so_flags & SOF_MULTIPAGES) ||
1589 sosendjcl_ignore_capab);
1590
1591 socket_unlock(so, 0);
1592
1593 do {
1594 int num_needed;
1595 int hdrs_needed = (top == 0) ? 1 : 0;
1596
1597 /*
1598 * try to maintain a local cache of mbuf
1599 * clusters needed to complete this
1600 * write the list is further limited to
1601 * the number that are currently needed
1602 * to fill the socket this mechanism
1603 * allows a large number of mbufs/
1604 * clusters to be grabbed under a single
1605 * mbuf lock... if we can't get any
1606 * clusters, than fall back to trying
1607 * for mbufs if we fail early (or
1608 * miscalcluate the number needed) make
1609 * sure to release any clusters we
1610 * haven't yet consumed.
1611 */
1612 if (freelist == NULL &&
1613 bytes_to_copy > NBPG && jumbocl) {
1614 num_needed =
1615 bytes_to_copy / M16KCLBYTES;
1616
1617 if ((bytes_to_copy -
1618 (num_needed * M16KCLBYTES))
1619 >= MINCLSIZE)
1620 num_needed++;
1621
1622 freelist =
1623 m_getpackets_internal(
1624 (unsigned int *)&num_needed,
1625 hdrs_needed, M_WAIT, 0,
1626 M16KCLBYTES);
1627 /*
1628 * Fall back to 4K cluster size
1629 * if allocation failed
1630 */
1631 }
1632
1633 if (freelist == NULL &&
1634 bytes_to_copy > MCLBYTES) {
1635 num_needed =
1636 bytes_to_copy / NBPG;
1637
1638 if ((bytes_to_copy -
1639 (num_needed * NBPG)) >=
1640 MINCLSIZE)
1641 num_needed++;
1642
1643 freelist =
1644 m_getpackets_internal(
1645 (unsigned int *)&num_needed,
1646 hdrs_needed, M_WAIT, 0,
1647 NBPG);
1648 /*
1649 * Fall back to cluster size
1650 * if allocation failed
1651 */
1652 }
1653
1654 if (freelist == NULL &&
1655 bytes_to_copy > MINCLSIZE) {
1656 num_needed =
1657 bytes_to_copy / MCLBYTES;
1658
1659 if ((bytes_to_copy -
1660 (num_needed * MCLBYTES)) >=
1661 MINCLSIZE)
1662 num_needed++;
1663
1664 freelist =
1665 m_getpackets_internal(
1666 (unsigned int *)&num_needed,
1667 hdrs_needed, M_WAIT, 0,
1668 MCLBYTES);
1669 /*
1670 * Fall back to a single mbuf
1671 * if allocation failed
1672 */
1673 }
1674
1675 if (freelist == NULL) {
1676 if (top == 0)
1677 MGETHDR(freelist,
1678 M_WAIT, MT_DATA);
1679 else
1680 MGET(freelist,
1681 M_WAIT, MT_DATA);
1682
1683 if (freelist == NULL) {
1684 error = ENOBUFS;
1685 socket_lock(so, 0);
1686 goto release;
1687 }
1688 /*
1689 * For datagram protocols,
1690 * leave room for protocol
1691 * headers in first mbuf.
1692 */
1693 if (atomic && top == 0 &&
1694 bytes_to_copy < MHLEN) {
1695 MH_ALIGN(freelist,
1696 bytes_to_copy);
1697 }
1698 }
1699 m = freelist;
1700 freelist = m->m_next;
1701 m->m_next = NULL;
1702
1703 if ((m->m_flags & M_EXT))
1704 mlen = m->m_ext.ext_size;
1705 else if ((m->m_flags & M_PKTHDR))
1706 mlen =
1707 MHLEN - m_leadingspace(m);
1708 else
1709 mlen = MLEN;
1710 len = imin(mlen, bytes_to_copy);
1711
1712 chainlength += len;
1713
1714 space -= len;
1715
1716 error = uiomove(mtod(m, caddr_t),
1717 len, uio);
1718
1719 resid = uio_resid(uio);
1720
1721 m->m_len = len;
1722 *mp = m;
1723 top->m_pkthdr.len += len;
1724 if (error)
1725 break;
1726 mp = &m->m_next;
1727 if (resid <= 0) {
1728 if (flags & MSG_EOR)
1729 top->m_flags |= M_EOR;
1730 break;
1731 }
1732 bytes_to_copy = min(resid, space);
1733
1734 } while (space > 0 &&
1735 (chainlength < sosendmaxchain || atomic ||
1736 resid < MINCLSIZE));
1737
1738 socket_lock(so, 0);
1739
1740 if (error)
1741 goto release;
1742 }
1743
1744 if (flags & (MSG_HOLD|MSG_SEND)) {
1745 /* Enqueue for later, go away if HOLD */
1746 register struct mbuf *mb1;
1747 if (so->so_temp && (flags & MSG_FLUSH)) {
1748 m_freem(so->so_temp);
1749 so->so_temp = NULL;
1750 }
1751 if (so->so_temp)
1752 so->so_tail->m_next = top;
1753 else
1754 so->so_temp = top;
1755 mb1 = top;
1756 while (mb1->m_next)
1757 mb1 = mb1->m_next;
1758 so->so_tail = mb1;
1759 if (flags & MSG_HOLD) {
1760 top = NULL;
1761 goto release;
1762 }
1763 top = so->so_temp;
1764 }
1765 if (dontroute)
1766 so->so_options |= SO_DONTROUTE;
1767
1768 /* Compute flags here, for pru_send and NKEs */
1769 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
1770 /*
1771 * If the user set MSG_EOF, the protocol
1772 * understands this flag and nothing left to
1773 * send then use PRU_SEND_EOF instead of PRU_SEND.
1774 */
1775 ((flags & MSG_EOF) &&
1776 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1777 (resid <= 0)) ?
1778 PRUS_EOF :
1779 /* If there is more to send set PRUS_MORETOCOME */
1780 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
1781
1782 /*
1783 * Socket filter processing
1784 */
1785 recursive = (so->so_send_filt_thread != NULL);
1786 filtered = 0;
1787 error = 0;
1788 for (filter = so->so_filt; filter && (error == 0);
1789 filter = filter->sfe_next_onsocket) {
1790 if (filter->sfe_filter->sf_filter.sf_data_out) {
1791 int so_flags = 0;
1792 if (filtered == 0) {
1793 filtered = 1;
1794 so->so_send_filt_thread =
1795 current_thread();
1796 sflt_use(so);
1797 socket_unlock(so, 0);
1798 so_flags =
1799 (sendflags & MSG_OOB) ?
1800 sock_data_filt_flag_oob : 0;
1801 }
1802 error = filter->sfe_filter->sf_filter.
1803 sf_data_out(filter->sfe_cookie, so,
1804 addr, &top, &control, so_flags);
1805 }
1806 }
1807
1808 if (filtered) {
1809 /*
1810 * At this point, we've run at least one
1811 * filter. The socket is unlocked as is
1812 * the socket buffer. Clear the recorded
1813 * filter thread only when we are outside
1814 * of a filter's context. This allows for
1815 * a filter to issue multiple inject calls
1816 * from its sf_data_out callback routine.
1817 */
1818 socket_lock(so, 0);
1819 sflt_unuse(so);
1820 if (!recursive)
1821 so->so_send_filt_thread = 0;
1822 if (error) {
1823 if (error == EJUSTRETURN) {
1824 error = 0;
1825 clen = 0;
1826 control = 0;
1827 top = 0;
1828 }
1829
1830 goto release;
1831 }
1832 }
1833 /*
1834 * End Socket filter processing
1835 */
1836
1837 if (error == EJUSTRETURN) {
1838 /* A socket filter handled this data */
1839 error = 0;
1840 } else {
1841 error = (*so->so_proto->pr_usrreqs->pru_send)
1842 (so, sendflags, top, addr, control, p);
1843 }
1844 #ifdef __APPLE__
1845 if (flags & MSG_SEND)
1846 so->so_temp = NULL;
1847 #endif
1848 if (dontroute)
1849 so->so_options &= ~SO_DONTROUTE;
1850
1851 clen = 0;
1852 control = 0;
1853 top = 0;
1854 mp = &top;
1855 if (error)
1856 goto release;
1857 } while (resid && space > 0);
1858 } while (resid);
1859
1860 release:
1861 if (sblocked)
1862 sbunlock(&so->so_snd, 0); /* will unlock socket */
1863 else
1864 socket_unlock(so, 1);
1865 out:
1866 if (top)
1867 m_freem(top);
1868 if (control)
1869 m_freem(control);
1870 if (freelist)
1871 m_freem_list(freelist);
1872
1873 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, resid, so->so_snd.sb_cc,
1874 space, error);
1875
1876 return (error);
1877 }
1878
1879 /*
1880 * Implement receive operations on a socket.
1881 * We depend on the way that records are added to the sockbuf
1882 * by sbappend*. In particular, each record (mbufs linked through m_next)
1883 * must begin with an address if the protocol so specifies,
1884 * followed by an optional mbuf or mbufs containing ancillary data,
1885 * and then zero or more mbufs of data.
1886 * In order to avoid blocking network interrupts for the entire time here,
1887 * we splx() while doing the actual copy to user space.
1888 * Although the sockbuf is locked, new data may still be appended,
1889 * and thus we must maintain consistency of the sockbuf during that time.
1890 *
1891 * The caller may receive the data as a single mbuf chain by supplying
1892 * an mbuf **mp0 for use in returning the chain. The uio is then used
1893 * only for the count in uio_resid.
1894 *
1895 * Returns: 0 Success
1896 * ENOBUFS
1897 * ENOTCONN
1898 * EWOULDBLOCK
1899 * uiomove:EFAULT
1900 * sblock:EWOULDBLOCK
1901 * sblock:EINTR
1902 * sbwait:EBADF
1903 * sbwait:EINTR
1904 * sodelayed_copy:EFAULT
1905 * <pru_rcvoob>:EINVAL[TCP]
1906 * <pru_rcvoob>:EWOULDBLOCK[TCP]
1907 * <pru_rcvoob>:???
1908 * <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX]
1909 * <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX]
1910 * <pr_domain->dom_externalize>:???
1911 *
1912 * Notes: Additional return values from calls through <pru_rcvoob> and
1913 * <pr_domain->dom_externalize> depend on protocols other than
1914 * TCP or AF_UNIX, which are documented above.
1915 */
1916 int
1917 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
1918 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1919 {
1920 register struct mbuf *m, **mp, *ml = NULL;
1921 register int flags, len, error, offset;
1922 struct protosw *pr = so->so_proto;
1923 struct mbuf *nextrecord;
1924 int moff, type = 0;
1925 int orig_resid = uio_resid(uio);
1926 struct mbuf *free_list;
1927 int delayed_copy_len;
1928 int can_delay;
1929 int need_event;
1930 struct proc *p = current_proc();
1931
1932 // LP64todo - fix this!
1933 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so, uio_resid(uio),
1934 so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat);
1935
1936 socket_lock(so, 1);
1937
1938 #ifdef MORE_LOCKING_DEBUG
1939 if (so->so_usecount == 1)
1940 panic("soreceive: so=%x no other reference on socket\n", so);
1941 #endif
1942 mp = mp0;
1943 if (psa)
1944 *psa = 0;
1945 if (controlp)
1946 *controlp = 0;
1947 if (flagsp)
1948 flags = *flagsp &~ MSG_EOR;
1949 else
1950 flags = 0;
1951
1952 /*
1953 * If a recv attempt is made on a previously-accepted socket
1954 * that has been marked as inactive (disconnected), reject
1955 * the request.
1956 */
1957 if (so->so_flags & SOF_DEFUNCT) {
1958 struct sockbuf *sb = &so->so_rcv;
1959
1960 /*
1961 * This socket should have been disconnected and flushed
1962 * prior to being returned from accept; there should be
1963 * no data on its receive list, so panic otherwise.
1964 */
1965 sb_empty_assert(sb, __func__);
1966 socket_unlock(so, 1);
1967 return (ENOTCONN);
1968 }
1969
1970 /*
1971 * When SO_WANTOOBFLAG is set we try to get out-of-band data
1972 * regardless of the flags argument. Here is the case were
1973 * out-of-band data is not inline.
1974 */
1975 if ((flags & MSG_OOB) ||
1976 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
1977 (so->so_options & SO_OOBINLINE) == 0 &&
1978 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
1979 m = m_get(M_WAIT, MT_DATA);
1980 if (m == NULL) {
1981 socket_unlock(so, 1);
1982 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
1983 ENOBUFS, 0, 0, 0, 0);
1984 return (ENOBUFS);
1985 }
1986 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1987 if (error)
1988 goto bad;
1989 socket_unlock(so, 0);
1990 do {
1991 error = uiomove(mtod(m, caddr_t),
1992 imin(uio_resid(uio), m->m_len), uio);
1993 m = m_free(m);
1994 } while (uio_resid(uio) && error == 0 && m);
1995 socket_lock(so, 0);
1996 bad:
1997 if (m)
1998 m_freem(m);
1999 #ifdef __APPLE__
2000 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
2001 if (error == EWOULDBLOCK || error == EINVAL) {
2002 /*
2003 * Let's try to get normal data:
2004 * EWOULDBLOCK: out-of-band data not
2005 * receive yet. EINVAL: out-of-band data
2006 * already read.
2007 */
2008 error = 0;
2009 goto nooob;
2010 } else if (error == 0 && flagsp) {
2011 *flagsp |= MSG_OOB;
2012 }
2013 }
2014 socket_unlock(so, 1);
2015 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
2016 0, 0, 0, 0);
2017 #endif
2018 return (error);
2019 }
2020 nooob:
2021 if (mp)
2022 *mp = (struct mbuf *)0;
2023 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio))
2024 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
2025
2026
2027 free_list = (struct mbuf *)0;
2028 delayed_copy_len = 0;
2029 restart:
2030 #ifdef MORE_LOCKING_DEBUG
2031 if (so->so_usecount <= 1)
2032 printf("soreceive: sblock so=%p ref=%d on socket\n",
2033 so, so->so_usecount);
2034 #endif
2035 /*
2036 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
2037 * and if so just return to the caller. This could happen when
2038 * soreceive() is called by a socket upcall function during the
2039 * time the socket is freed. The socket buffer would have been
2040 * locked across the upcall, therefore we cannot put this thread
2041 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
2042 * we may livelock), because the lock on the socket buffer will
2043 * only be released when the upcall routine returns to its caller.
2044 * Because the socket has been officially closed, there can be
2045 * no further read on it.
2046 */
2047 if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
2048 (SS_NOFDREF | SS_CANTRCVMORE)) {
2049 socket_unlock(so, 1);
2050 return (0);
2051 }
2052
2053 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
2054 if (error) {
2055 socket_unlock(so, 1);
2056 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
2057 0, 0, 0, 0);
2058 return (error);
2059 }
2060
2061 m = so->so_rcv.sb_mb;
2062 /*
2063 * If we have less data than requested, block awaiting more
2064 * (subject to any timeout) if:
2065 * 1. the current count is less than the low water mark, or
2066 * 2. MSG_WAITALL is set, and it is possible to do the entire
2067 * receive operation at once if we block (resid <= hiwat).
2068 * 3. MSG_DONTWAIT is not set
2069 * If MSG_WAITALL is set but resid is larger than the receive buffer,
2070 * we have to do the receive in sections, and thus risk returning
2071 * a short count if a timeout or signal occurs after we start.
2072 */
2073 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
2074 so->so_rcv.sb_cc < uio_resid(uio)) &&
2075 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
2076 ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) &&
2077 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
2078 /*
2079 * Panic if we notice inconsistencies in the socket's
2080 * receive list; both sb_mb and sb_cc should correctly
2081 * reflect the contents of the list, otherwise we may
2082 * end up with false positives during select() or poll()
2083 * which could put the application in a bad state.
2084 */
2085 if (m == NULL && so->so_rcv.sb_cc != 0)
2086 panic("soreceive corrupted so_rcv: m %p cc %u",
2087 m, so->so_rcv.sb_cc);
2088
2089 if (so->so_error) {
2090 if (m)
2091 goto dontblock;
2092 error = so->so_error;
2093 if ((flags & MSG_PEEK) == 0)
2094 so->so_error = 0;
2095 goto release;
2096 }
2097 if (so->so_state & SS_CANTRCVMORE) {
2098 if (m)
2099 goto dontblock;
2100 else
2101 goto release;
2102 }
2103 for (; m; m = m->m_next)
2104 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
2105 m = so->so_rcv.sb_mb;
2106 goto dontblock;
2107 }
2108 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
2109 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2110 error = ENOTCONN;
2111 goto release;
2112 }
2113 if (uio_resid(uio) == 0)
2114 goto release;
2115 if ((so->so_state & SS_NBIO) ||
2116 (flags & (MSG_DONTWAIT|MSG_NBIO))) {
2117 error = EWOULDBLOCK;
2118 goto release;
2119 }
2120 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
2121 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
2122 sbunlock(&so->so_rcv, 1);
2123 #if EVEN_MORE_LOCKING_DEBUG
2124 if (socket_debug)
2125 printf("Waiting for socket data\n");
2126 #endif
2127
2128 error = sbwait(&so->so_rcv);
2129 #if EVEN_MORE_LOCKING_DEBUG
2130 if (socket_debug)
2131 printf("SORECEIVE - sbwait returned %d\n", error);
2132 #endif
2133 if (so->so_usecount < 1)
2134 panic("soreceive: after 2nd sblock so=%p ref=%d on "
2135 "socket\n", so, so->so_usecount);
2136 if (error) {
2137 socket_unlock(so, 1);
2138 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
2139 0, 0, 0, 0);
2140 return (error);
2141 }
2142 goto restart;
2143 }
2144 dontblock:
2145 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
2146 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
2147 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
2148 nextrecord = m->m_nextpkt;
2149 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
2150 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
2151 #if CONFIG_MACF_SOCKET_SUBSET
2152 /*
2153 * Call the MAC framework for policy checking if we're in
2154 * the user process context and the socket isn't connected.
2155 */
2156 if (p != kernproc && !(so->so_state & SS_ISCONNECTED)) {
2157 struct mbuf *m0 = m;
2158 /*
2159 * Dequeue this record (temporarily) from the receive
2160 * list since we're about to drop the socket's lock
2161 * where a new record may arrive and be appended to
2162 * the list. Upon MAC policy failure, the record
2163 * will be freed. Otherwise, we'll add it back to
2164 * the head of the list. We cannot rely on SB_LOCK
2165 * because append operation uses the socket's lock.
2166 */
2167 do {
2168 m->m_nextpkt = NULL;
2169 sbfree(&so->so_rcv, m);
2170 m = m->m_next;
2171 } while (m != NULL);
2172 m = m0;
2173 so->so_rcv.sb_mb = nextrecord;
2174 SB_EMPTY_FIXUP(&so->so_rcv);
2175 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1a");
2176 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1a");
2177 socket_unlock(so, 0);
2178 if (mac_socket_check_received(proc_ucred(p), so,
2179 mtod(m, struct sockaddr *)) != 0) {
2180 /*
2181 * MAC policy failure; free this record and
2182 * process the next record (or block until
2183 * one is available). We have adjusted sb_cc
2184 * and sb_mbcnt above so there is no need to
2185 * call sbfree() again.
2186 */
2187 do {
2188 m = m_free(m);
2189 } while (m != NULL);
2190 /*
2191 * Clear SB_LOCK but don't unlock the socket.
2192 * Process the next record or wait for one.
2193 */
2194 socket_lock(so, 0);
2195 sbunlock(&so->so_rcv, 1);
2196 goto restart;
2197 }
2198 socket_lock(so, 0);
2199 /*
2200 * Re-adjust the socket receive list and re-enqueue
2201 * the record in front of any packets which may have
2202 * been appended while we dropped the lock.
2203 */
2204 for (m = m0; m->m_next != NULL; m = m->m_next)
2205 sballoc(&so->so_rcv, m);
2206 sballoc(&so->so_rcv, m);
2207 if (so->so_rcv.sb_mb == NULL) {
2208 so->so_rcv.sb_lastrecord = m0;
2209 so->so_rcv.sb_mbtail = m;
2210 }
2211 m = m0;
2212 nextrecord = m->m_nextpkt = so->so_rcv.sb_mb;
2213 so->so_rcv.sb_mb = m;
2214 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1b");
2215 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1b");
2216 }
2217 #endif /* CONFIG_MACF_SOCKET_SUBSET */
2218 orig_resid = 0;
2219 if (psa) {
2220 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
2221 mp0 == 0);
2222 if ((*psa == 0) && (flags & MSG_NEEDSA)) {
2223 error = EWOULDBLOCK;
2224 goto release;
2225 }
2226 }
2227 if (flags & MSG_PEEK) {
2228 m = m->m_next;
2229 } else {
2230 sbfree(&so->so_rcv, m);
2231 if (m->m_next == 0 && so->so_rcv.sb_cc != 0)
2232 panic("soreceive: about to create invalid "
2233 "socketbuf");
2234 MFREE(m, so->so_rcv.sb_mb);
2235 m = so->so_rcv.sb_mb;
2236 if (m != NULL) {
2237 m->m_nextpkt = nextrecord;
2238 } else {
2239 so->so_rcv.sb_mb = nextrecord;
2240 SB_EMPTY_FIXUP(&so->so_rcv);
2241 }
2242 }
2243 }
2244
2245 /*
2246 * Process one or more MT_CONTROL mbufs present before any data mbufs
2247 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
2248 * just copy the data; if !MSG_PEEK, we call into the protocol to
2249 * perform externalization.
2250 */
2251 if (m != NULL && m->m_type == MT_CONTROL) {
2252 struct mbuf *cm = NULL, *cmn;
2253 struct mbuf **cme = &cm;
2254 struct sockbuf *sb_rcv = &so->so_rcv;
2255
2256 /*
2257 * Externalizing the control messages would require us to
2258 * drop the socket's lock below. Once we re-acquire the
2259 * lock, the mbuf chain might change. In order to preserve
2260 * consistency, we unlink all control messages from the
2261 * first mbuf chain in one shot and link them separately
2262 * onto a different chain.
2263 */
2264 do {
2265 if (flags & MSG_PEEK) {
2266 if (controlp != NULL) {
2267 *controlp = m_copy(m, 0, m->m_len);
2268 controlp = &(*controlp)->m_next;
2269 }
2270 m = m->m_next;
2271 } else {
2272 m->m_nextpkt = NULL;
2273 sbfree(sb_rcv, m);
2274 sb_rcv->sb_mb = m->m_next;
2275 m->m_next = NULL;
2276 *cme = m;
2277 cme = &(*cme)->m_next;
2278 m = sb_rcv->sb_mb;
2279 }
2280 } while (m != NULL && m->m_type == MT_CONTROL);
2281
2282 if (!(flags & MSG_PEEK)) {
2283 if (sb_rcv->sb_mb != NULL) {
2284 sb_rcv->sb_mb->m_nextpkt = nextrecord;
2285 } else {
2286 sb_rcv->sb_mb = nextrecord;
2287 SB_EMPTY_FIXUP(sb_rcv);
2288 }
2289 if (nextrecord == NULL)
2290 sb_rcv->sb_lastrecord = m;
2291 }
2292
2293 SBLASTRECORDCHK(&so->so_rcv, "soreceive ctl");
2294 SBLASTMBUFCHK(&so->so_rcv, "soreceive ctl");
2295
2296 while (cm != NULL) {
2297 int cmsg_type;
2298
2299 cmn = cm->m_next;
2300 cm->m_next = NULL;
2301 cmsg_type = mtod(cm, struct cmsghdr *)->cmsg_type;
2302
2303 /*
2304 * Call the protocol to externalize SCM_RIGHTS message
2305 * and return the modified message to the caller upon
2306 * success. Otherwise, all other control messages are
2307 * returned unmodified to the caller. Note that we
2308 * only get into this loop if MSG_PEEK is not set.
2309 */
2310 if (pr->pr_domain->dom_externalize != NULL &&
2311 cmsg_type == SCM_RIGHTS) {
2312 /*
2313 * Release socket lock: see 3903171. This
2314 * would also allow more records to be appended
2315 * to the socket buffer. We still have SB_LOCK
2316 * set on it, so we can be sure that the head
2317 * of the mbuf chain won't change.
2318 */
2319 socket_unlock(so, 0);
2320 error = (*pr->pr_domain->dom_externalize)(cm);
2321 socket_lock(so, 0);
2322 } else {
2323 error = 0;
2324 }
2325
2326 if (controlp != NULL && error == 0) {
2327 *controlp = cm;
2328 controlp = &(*controlp)->m_next;
2329 orig_resid = 0;
2330 } else {
2331 (void) m_free(cm);
2332 }
2333 cm = cmn;
2334 }
2335 orig_resid = 0;
2336 if (sb_rcv->sb_mb != NULL)
2337 nextrecord = sb_rcv->sb_mb->m_nextpkt;
2338 else
2339 nextrecord = NULL;
2340 }
2341
2342 if (m != NULL) {
2343 if (!(flags & MSG_PEEK)) {
2344 /*
2345 * We get here because m points to an mbuf following
2346 * any MT_SONAME or MT_CONTROL mbufs which have been
2347 * processed above. In any case, m should be pointing
2348 * to the head of the mbuf chain, and the nextrecord
2349 * should be either NULL or equal to m->m_nextpkt.
2350 * See comments above about SB_LOCK.
2351 */
2352 if (m != so->so_rcv.sb_mb || m->m_nextpkt != nextrecord)
2353 panic("soreceive: post-control !sync so=%p "
2354 "m=%p nextrecord=%p\n", so, m, nextrecord);
2355
2356 if (nextrecord == NULL)
2357 so->so_rcv.sb_lastrecord = m;
2358 }
2359 type = m->m_type;
2360 if (type == MT_OOBDATA)
2361 flags |= MSG_OOB;
2362 } else {
2363 if (!(flags & MSG_PEEK)) {
2364 so->so_rcv.sb_mb = nextrecord;
2365 SB_EMPTY_FIXUP(&so->so_rcv);
2366 }
2367 }
2368 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
2369 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
2370
2371 moff = 0;
2372 offset = 0;
2373
2374 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy)
2375 can_delay = 1;
2376 else
2377 can_delay = 0;
2378
2379 need_event = 0;
2380
2381 while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
2382 if (m->m_type == MT_OOBDATA) {
2383 if (type != MT_OOBDATA)
2384 break;
2385 } else if (type == MT_OOBDATA) {
2386 break;
2387 }
2388 /*
2389 * Make sure to allways set MSG_OOB event when getting
2390 * out of band data inline.
2391 */
2392 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
2393 (so->so_options & SO_OOBINLINE) != 0 &&
2394 (so->so_state & SS_RCVATMARK) != 0) {
2395 flags |= MSG_OOB;
2396 }
2397 so->so_state &= ~SS_RCVATMARK;
2398 len = uio_resid(uio) - delayed_copy_len;
2399 if (so->so_oobmark && len > so->so_oobmark - offset)
2400 len = so->so_oobmark - offset;
2401 if (len > m->m_len - moff)
2402 len = m->m_len - moff;
2403 /*
2404 * If mp is set, just pass back the mbufs.
2405 * Otherwise copy them out via the uio, then free.
2406 * Sockbuf must be consistent here (points to current mbuf,
2407 * it points to next record) when we drop priority;
2408 * we must note any additions to the sockbuf when we
2409 * block interrupts again.
2410 */
2411 if (mp == 0) {
2412 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
2413 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
2414 if (can_delay && len == m->m_len) {
2415 /*
2416 * only delay the copy if we're consuming the
2417 * mbuf and we're NOT in MSG_PEEK mode
2418 * and we have enough data to make it worthwile
2419 * to drop and retake the lock... can_delay
2420 * reflects the state of the 2 latter
2421 * constraints moff should always be zero
2422 * in these cases
2423 */
2424 delayed_copy_len += len;
2425 } else {
2426 if (delayed_copy_len) {
2427 error = sodelayed_copy(so, uio,
2428 &free_list, &delayed_copy_len);
2429
2430 if (error) {
2431 goto release;
2432 }
2433 /*
2434 * can only get here if MSG_PEEK is not
2435 * set therefore, m should point at the
2436 * head of the rcv queue; if it doesn't,
2437 * it means something drastically
2438 * changed while we were out from behind
2439 * the lock in sodelayed_copy. perhaps
2440 * a RST on the stream. in any event,
2441 * the stream has been interrupted. it's
2442 * probably best just to return whatever
2443 * data we've moved and let the caller
2444 * sort it out...
2445 */
2446 if (m != so->so_rcv.sb_mb) {
2447 break;
2448 }
2449 }
2450 socket_unlock(so, 0);
2451 error = uiomove(mtod(m, caddr_t) + moff,
2452 (int)len, uio);
2453 socket_lock(so, 0);
2454
2455 if (error)
2456 goto release;
2457 }
2458 } else {
2459 uio_setresid(uio, (uio_resid(uio) - len));
2460 }
2461 if (len == m->m_len - moff) {
2462 if (m->m_flags & M_EOR)
2463 flags |= MSG_EOR;
2464 if (flags & MSG_PEEK) {
2465 m = m->m_next;
2466 moff = 0;
2467 } else {
2468 nextrecord = m->m_nextpkt;
2469 sbfree(&so->so_rcv, m);
2470 m->m_nextpkt = NULL;
2471
2472 if (mp) {
2473 *mp = m;
2474 mp = &m->m_next;
2475 so->so_rcv.sb_mb = m = m->m_next;
2476 *mp = (struct mbuf *)0;
2477 } else {
2478 if (free_list == NULL)
2479 free_list = m;
2480 else
2481 ml->m_next = m;
2482 ml = m;
2483 so->so_rcv.sb_mb = m = m->m_next;
2484 ml->m_next = 0;
2485 }
2486 if (m != NULL) {
2487 m->m_nextpkt = nextrecord;
2488 if (nextrecord == NULL)
2489 so->so_rcv.sb_lastrecord = m;
2490 } else {
2491 so->so_rcv.sb_mb = nextrecord;
2492 SB_EMPTY_FIXUP(&so->so_rcv);
2493 }
2494 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
2495 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
2496 }
2497 } else {
2498 if (flags & MSG_PEEK) {
2499 moff += len;
2500 } else {
2501 if (mp)
2502 *mp = m_copym(m, 0, len, M_WAIT);
2503 m->m_data += len;
2504 m->m_len -= len;
2505 so->so_rcv.sb_cc -= len;
2506 }
2507 }
2508 if (so->so_oobmark) {
2509 if ((flags & MSG_PEEK) == 0) {
2510 so->so_oobmark -= len;
2511 if (so->so_oobmark == 0) {
2512 so->so_state |= SS_RCVATMARK;
2513 /*
2514 * delay posting the actual event until
2515 * after any delayed copy processing
2516 * has finished
2517 */
2518 need_event = 1;
2519 break;
2520 }
2521 } else {
2522 offset += len;
2523 if (offset == so->so_oobmark)
2524 break;
2525 }
2526 }
2527 if (flags & MSG_EOR)
2528 break;
2529 /*
2530 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set
2531 * (for non-atomic socket), we must not quit until
2532 * "uio->uio_resid == 0" or an error termination.
2533 * If a signal/timeout occurs, return with a short
2534 * count but without error. Keep sockbuf locked
2535 * against other readers.
2536 */
2537 while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 &&
2538 (uio_resid(uio) - delayed_copy_len) > 0 &&
2539 !sosendallatonce(so) && !nextrecord) {
2540 if (so->so_error || so->so_state & SS_CANTRCVMORE)
2541 goto release;
2542
2543 /*
2544 * Depending on the protocol (e.g. TCP), the following
2545 * might cause the socket lock to be dropped and later
2546 * be reacquired, and more data could have arrived and
2547 * have been appended to the receive socket buffer by
2548 * the time it returns. Therefore, we only sleep in
2549 * sbwait() below if and only if the socket buffer is
2550 * empty, in order to avoid a false sleep.
2551 */
2552 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb &&
2553 (((struct inpcb *)so->so_pcb)->inp_state !=
2554 INPCB_STATE_DEAD))
2555 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
2556
2557 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
2558 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
2559
2560 if (so->so_rcv.sb_mb == NULL && sbwait(&so->so_rcv)) {
2561 error = 0;
2562 goto release;
2563 }
2564 /*
2565 * have to wait until after we get back from the sbwait
2566 * to do the copy because we will drop the lock if we
2567 * have enough data that has been delayed... by dropping
2568 * the lock we open up a window allowing the netisr
2569 * thread to process the incoming packets and to change
2570 * the state of this socket... we're issuing the sbwait
2571 * because the socket is empty and we're expecting the
2572 * netisr thread to wake us up when more packets arrive;
2573 * if we allow that processing to happen and then sbwait
2574 * we could stall forever with packets sitting in the
2575 * socket if no further packets arrive from the remote
2576 * side.
2577 *
2578 * we want to copy before we've collected all the data
2579 * to satisfy this request to allow the copy to overlap
2580 * the incoming packet processing on an MP system
2581 */
2582 if (delayed_copy_len > sorecvmincopy &&
2583 (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
2584 error = sodelayed_copy(so, uio,
2585 &free_list, &delayed_copy_len);
2586
2587 if (error)
2588 goto release;
2589 }
2590 m = so->so_rcv.sb_mb;
2591 if (m) {
2592 nextrecord = m->m_nextpkt;
2593 }
2594 }
2595 }
2596 #ifdef MORE_LOCKING_DEBUG
2597 if (so->so_usecount <= 1)
2598 panic("soreceive: after big while so=%p ref=%d on socket\n",
2599 so, so->so_usecount);
2600 #endif
2601
2602 if (m && pr->pr_flags & PR_ATOMIC) {
2603 #ifdef __APPLE__
2604 if (so->so_options & SO_DONTTRUNC) {
2605 flags |= MSG_RCVMORE;
2606 } else {
2607 #endif
2608 flags |= MSG_TRUNC;
2609 if ((flags & MSG_PEEK) == 0)
2610 (void) sbdroprecord(&so->so_rcv);
2611 #ifdef __APPLE__
2612 }
2613 #endif
2614 }
2615
2616 /*
2617 * pru_rcvd below (for TCP) may cause more data to be received
2618 * if the socket lock is dropped prior to sending the ACK; some
2619 * legacy OpenTransport applications don't handle this well
2620 * (if it receives less data than requested while MSG_HAVEMORE
2621 * is set), and so we set the flag now based on what we know
2622 * prior to calling pru_rcvd.
2623 */
2624 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
2625 flags |= MSG_HAVEMORE;
2626
2627 if ((flags & MSG_PEEK) == 0) {
2628 if (m == 0) {
2629 so->so_rcv.sb_mb = nextrecord;
2630 /*
2631 * First part is an inline SB_EMPTY_FIXUP(). Second
2632 * part makes sure sb_lastrecord is up-to-date if
2633 * there is still data in the socket buffer.
2634 */
2635 if (so->so_rcv.sb_mb == NULL) {
2636 so->so_rcv.sb_mbtail = NULL;
2637 so->so_rcv.sb_lastrecord = NULL;
2638 } else if (nextrecord->m_nextpkt == NULL) {
2639 so->so_rcv.sb_lastrecord = nextrecord;
2640 }
2641 }
2642 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
2643 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
2644 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
2645 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
2646 }
2647 #ifdef __APPLE__
2648 if (delayed_copy_len) {
2649 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
2650
2651 if (error)
2652 goto release;
2653 }
2654 if (free_list) {
2655 m_freem_list((struct mbuf *)free_list);
2656 free_list = (struct mbuf *)0;
2657 }
2658 if (need_event)
2659 postevent(so, 0, EV_OOB);
2660 #endif
2661 if (orig_resid == uio_resid(uio) && orig_resid &&
2662 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
2663 sbunlock(&so->so_rcv, 1);
2664 goto restart;
2665 }
2666
2667 if (flagsp)
2668 *flagsp |= flags;
2669 release:
2670 #ifdef MORE_LOCKING_DEBUG
2671 if (so->so_usecount <= 1)
2672 panic("soreceive: release so=%p ref=%d on socket\n",
2673 so, so->so_usecount);
2674 #endif
2675 if (delayed_copy_len) {
2676 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
2677 }
2678 if (free_list) {
2679 m_freem_list((struct mbuf *)free_list);
2680 }
2681 sbunlock(&so->so_rcv, 0); /* will unlock socket */
2682
2683 // LP64todo - fix this!
2684 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, so, uio_resid(uio),
2685 so->so_rcv.sb_cc, 0, error);
2686
2687 return (error);
2688 }
2689
2690 /*
2691 * Returns: 0 Success
2692 * uiomove:EFAULT
2693 */
2694 static int
2695 sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list,
2696 int *resid)
2697 {
2698 int error = 0;
2699 struct mbuf *m;
2700
2701 m = *free_list;
2702
2703 socket_unlock(so, 0);
2704
2705 while (m && error == 0) {
2706
2707 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
2708
2709 m = m->m_next;
2710 }
2711 m_freem_list(*free_list);
2712
2713 *free_list = (struct mbuf *)NULL;
2714 *resid = 0;
2715
2716 socket_lock(so, 0);
2717
2718 return (error);
2719 }
2720
2721
2722 /*
2723 * Returns: 0 Success
2724 * EINVAL
2725 * ENOTCONN
2726 * <pru_shutdown>:EINVAL
2727 * <pru_shutdown>:EADDRNOTAVAIL[TCP]
2728 * <pru_shutdown>:ENOBUFS[TCP]
2729 * <pru_shutdown>:EMSGSIZE[TCP]
2730 * <pru_shutdown>:EHOSTUNREACH[TCP]
2731 * <pru_shutdown>:ENETUNREACH[TCP]
2732 * <pru_shutdown>:ENETDOWN[TCP]
2733 * <pru_shutdown>:ENOMEM[TCP]
2734 * <pru_shutdown>:EACCES[TCP]
2735 * <pru_shutdown>:EMSGSIZE[TCP]
2736 * <pru_shutdown>:ENOBUFS[TCP]
2737 * <pru_shutdown>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2738 * <pru_shutdown>:??? [other protocol families]
2739 */
2740 int
2741 soshutdown(struct socket *so, int how)
2742 {
2743 int error;
2744
2745 switch (how) {
2746 case SHUT_RD:
2747 case SHUT_WR:
2748 case SHUT_RDWR:
2749 socket_lock(so, 1);
2750 if ((so->so_state &
2751 (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) == 0) {
2752 error = ENOTCONN;
2753 } else {
2754 error = soshutdownlock(so, how);
2755 }
2756 socket_unlock(so, 1);
2757 break;
2758 default:
2759 error = EINVAL;
2760 break;
2761 }
2762
2763 return (error);
2764 }
2765
2766 int
2767 soshutdownlock(struct socket *so, int how)
2768 {
2769 struct protosw *pr = so->so_proto;
2770 int error = 0;
2771
2772 sflt_notify(so, sock_evt_shutdown, &how);
2773
2774 if (how != SHUT_WR) {
2775 if ((so->so_state & SS_CANTRCVMORE) != 0) {
2776 /* read already shut down */
2777 error = ENOTCONN;
2778 goto done;
2779 }
2780 sorflush(so);
2781 postevent(so, 0, EV_RCLOSED);
2782 }
2783 if (how != SHUT_RD) {
2784 if ((so->so_state & SS_CANTSENDMORE) != 0) {
2785 /* write already shut down */
2786 error = ENOTCONN;
2787 goto done;
2788 }
2789 error = (*pr->pr_usrreqs->pru_shutdown)(so);
2790 postevent(so, 0, EV_WCLOSED);
2791 }
2792 done:
2793 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0, 0, 0, 0, 0);
2794 return (error);
2795 }
2796
2797 void
2798 sorflush(struct socket *so)
2799 {
2800 register struct sockbuf *sb = &so->so_rcv;
2801 register struct protosw *pr = so->so_proto;
2802 struct sockbuf asb;
2803
2804 #ifdef MORE_LOCKING_DEBUG
2805 lck_mtx_t *mutex_held;
2806
2807 if (so->so_proto->pr_getlock != NULL)
2808 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2809 else
2810 mutex_held = so->so_proto->pr_domain->dom_mtx;
2811 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2812 #endif
2813
2814 sflt_notify(so, sock_evt_flush_read, NULL);
2815
2816 sb->sb_flags |= SB_NOINTR;
2817 (void) sblock(sb, M_WAIT);
2818 socantrcvmore(so);
2819 sbunlock(sb, 1);
2820 #ifdef __APPLE__
2821 selthreadclear(&sb->sb_sel);
2822 #endif
2823 asb = *sb;
2824 bzero((caddr_t)sb, sizeof (*sb));
2825 sb->sb_so = so; /* reestablish link to socket */
2826 if (asb.sb_flags & SB_KNOTE) {
2827 sb->sb_sel.si_note = asb.sb_sel.si_note;
2828 sb->sb_flags = SB_KNOTE;
2829 }
2830 if (asb.sb_flags & SB_DROP)
2831 sb->sb_flags |= SB_DROP;
2832 if (asb.sb_flags & SB_UNIX)
2833 sb->sb_flags |= SB_UNIX;
2834 if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) {
2835 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2836 }
2837 sbrelease(&asb);
2838 }
2839
2840 /*
2841 * Perhaps this routine, and sooptcopyout(), below, ought to come in
2842 * an additional variant to handle the case where the option value needs
2843 * to be some kind of integer, but not a specific size.
2844 * In addition to their use here, these functions are also called by the
2845 * protocol-level pr_ctloutput() routines.
2846 *
2847 * Returns: 0 Success
2848 * EINVAL
2849 * copyin:EFAULT
2850 */
2851 int
2852 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
2853 {
2854 size_t valsize;
2855
2856 /*
2857 * If the user gives us more than we wanted, we ignore it,
2858 * but if we don't get the minimum length the caller
2859 * wants, we return EINVAL. On success, sopt->sopt_valsize
2860 * is set to however much we actually retrieved.
2861 */
2862 if ((valsize = sopt->sopt_valsize) < minlen)
2863 return (EINVAL);
2864 if (valsize > len)
2865 sopt->sopt_valsize = valsize = len;
2866
2867 if (sopt->sopt_p != kernproc)
2868 return (copyin(sopt->sopt_val, buf, valsize));
2869
2870 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
2871 return (0);
2872 }
2873
2874 /*
2875 * sooptcopyin_timeval
2876 * Copy in a timeval value into tv_p, and take into account whether the
2877 * the calling process is 64-bit or 32-bit. Moved the sanity checking
2878 * code here so that we can verify the 64-bit tv_sec value before we lose
2879 * the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec.
2880 */
2881 static int
2882 sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p)
2883 {
2884 int error;
2885
2886 if (proc_is64bit(sopt->sopt_p)) {
2887 struct user64_timeval tv64;
2888
2889 if (sopt->sopt_valsize < sizeof(tv64)) {
2890 return (EINVAL);
2891 }
2892 sopt->sopt_valsize = sizeof(tv64);
2893 if (sopt->sopt_p != kernproc) {
2894 error = copyin(sopt->sopt_val, &tv64, sizeof(tv64));
2895 if (error != 0)
2896 return (error);
2897 } else {
2898 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv64,
2899 sizeof(tv64));
2900 }
2901 if (tv64.tv_sec < 0 || tv64.tv_sec > LONG_MAX
2902 || tv64.tv_usec < 0 || tv64.tv_usec >= 1000000) {
2903 return (EDOM);
2904 }
2905 tv_p->tv_sec = tv64.tv_sec;
2906 tv_p->tv_usec = tv64.tv_usec;
2907 } else {
2908 struct user32_timeval tv32;
2909
2910 if (sopt->sopt_valsize < sizeof(tv32)) {
2911 return (EINVAL);
2912 }
2913 sopt->sopt_valsize = sizeof(tv32);
2914 if (sopt->sopt_p != kernproc) {
2915 error = copyin(sopt->sopt_val, &tv32, sizeof(tv32));
2916 if (error != 0) {
2917 return (error);
2918 }
2919 } else {
2920 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv32,
2921 sizeof(tv32));
2922 }
2923 #ifndef __LP64__ // K64todo "comparison is always false due to limited range of data type"
2924 if (tv32.tv_sec < 0 || tv32.tv_sec > LONG_MAX
2925 || tv32.tv_usec < 0 || tv32.tv_usec >= 1000000) {
2926 return (EDOM);
2927 }
2928 #endif
2929 tv_p->tv_sec = tv32.tv_sec;
2930 tv_p->tv_usec = tv32.tv_usec;
2931 }
2932 return (0);
2933 }
2934
2935 /*
2936 * Returns: 0 Success
2937 * EINVAL
2938 * ENOPROTOOPT
2939 * ENOBUFS
2940 * EDOM
2941 * sooptcopyin:EINVAL
2942 * sooptcopyin:EFAULT
2943 * sooptcopyin_timeval:EINVAL
2944 * sooptcopyin_timeval:EFAULT
2945 * sooptcopyin_timeval:EDOM
2946 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
2947 * <pr_ctloutput>:???w
2948 * sflt_attach_private:??? [whatever a filter author chooses]
2949 * <sf_setoption>:??? [whatever a filter author chooses]
2950 *
2951 * Notes: Other <pru_listen> returns depend on the protocol family; all
2952 * <sf_listen> returns depend on what the filter author causes
2953 * their filter to return.
2954 */
2955 int
2956 sosetopt(struct socket *so, struct sockopt *sopt)
2957 {
2958 int error, optval;
2959 struct linger l;
2960 struct timeval tv;
2961 struct socket_filter_entry *filter;
2962 int filtered = 0;
2963 #if CONFIG_MACF_SOCKET
2964 struct mac extmac;
2965 #endif /* MAC_SOCKET */
2966
2967 socket_lock(so, 1);
2968 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE))
2969 == (SS_CANTRCVMORE | SS_CANTSENDMORE) &&
2970 (so->so_flags & SOF_NPX_SETOPTSHUT) == 0) {
2971 /* the socket has been shutdown, no more sockopt's */
2972 error = EINVAL;
2973 goto bad;
2974 }
2975
2976 if (sopt->sopt_dir != SOPT_SET) {
2977 sopt->sopt_dir = SOPT_SET;
2978 }
2979
2980 error = 0;
2981 for (filter = so->so_filt; filter && (error == 0);
2982 filter = filter->sfe_next_onsocket) {
2983 if (filter->sfe_filter->sf_filter.sf_setoption) {
2984 if (filtered == 0) {
2985 filtered = 1;
2986 sflt_use(so);
2987 socket_unlock(so, 0);
2988 }
2989 error = filter->sfe_filter->sf_filter.
2990 sf_setoption(filter->sfe_cookie, so, sopt);
2991 }
2992 }
2993
2994 if (filtered != 0) {
2995 socket_lock(so, 0);
2996 sflt_unuse(so);
2997
2998 if (error) {
2999 if (error == EJUSTRETURN)
3000 error = 0;
3001 goto bad;
3002 }
3003 }
3004
3005 error = 0;
3006 if (sopt->sopt_level != SOL_SOCKET) {
3007 if (so->so_proto && so->so_proto->pr_ctloutput) {
3008 error = (*so->so_proto->pr_ctloutput)(so, sopt);
3009 socket_unlock(so, 1);
3010 return (error);
3011 }
3012 error = ENOPROTOOPT;
3013 } else {
3014 switch (sopt->sopt_name) {
3015 case SO_LINGER:
3016 case SO_LINGER_SEC:
3017 error = sooptcopyin(sopt, &l, sizeof (l), sizeof (l));
3018 if (error)
3019 goto bad;
3020
3021 so->so_linger = (sopt->sopt_name == SO_LINGER) ?
3022 l.l_linger : l.l_linger * hz;
3023 if (l.l_onoff)
3024 so->so_options |= SO_LINGER;
3025 else
3026 so->so_options &= ~SO_LINGER;
3027 break;
3028
3029 case SO_DEBUG:
3030 case SO_KEEPALIVE:
3031 case SO_DONTROUTE:
3032 case SO_USELOOPBACK:
3033 case SO_BROADCAST:
3034 case SO_REUSEADDR:
3035 case SO_REUSEPORT:
3036 case SO_OOBINLINE:
3037 case SO_TIMESTAMP:
3038 #ifdef __APPLE__
3039 case SO_DONTTRUNC:
3040 case SO_WANTMORE:
3041 case SO_WANTOOBFLAG:
3042 #endif
3043 error = sooptcopyin(sopt, &optval, sizeof (optval),
3044 sizeof (optval));
3045 if (error)
3046 goto bad;
3047 if (optval)
3048 so->so_options |= sopt->sopt_name;
3049 else
3050 so->so_options &= ~sopt->sopt_name;
3051 break;
3052
3053 case SO_SNDBUF:
3054 case SO_RCVBUF:
3055 case SO_SNDLOWAT:
3056 case SO_RCVLOWAT:
3057 error = sooptcopyin(sopt, &optval, sizeof (optval),
3058 sizeof (optval));
3059 if (error)
3060 goto bad;
3061
3062 /*
3063 * Values < 1 make no sense for any of these
3064 * options, so disallow them.
3065 */
3066 if (optval < 1) {
3067 error = EINVAL;
3068 goto bad;
3069 }
3070
3071 switch (sopt->sopt_name) {
3072 case SO_SNDBUF:
3073 case SO_RCVBUF:
3074 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
3075 &so->so_snd : &so->so_rcv,
3076 (u_int32_t) optval) == 0) {
3077 error = ENOBUFS;
3078 goto bad;
3079 }
3080 if (sopt->sopt_name == SO_SNDBUF)
3081 so->so_snd.sb_flags |= SB_USRSIZE;
3082 else
3083 so->so_rcv.sb_flags |= SB_USRSIZE;
3084 break;
3085
3086 /*
3087 * Make sure the low-water is never greater than
3088 * the high-water.
3089 */
3090 case SO_SNDLOWAT:
3091 so->so_snd.sb_lowat =
3092 (optval > so->so_snd.sb_hiwat) ?
3093 so->so_snd.sb_hiwat : optval;
3094 break;
3095 case SO_RCVLOWAT:
3096 so->so_rcv.sb_lowat =
3097 (optval > so->so_rcv.sb_hiwat) ?
3098 so->so_rcv.sb_hiwat : optval;
3099 break;
3100 }
3101 break;
3102
3103 case SO_SNDTIMEO:
3104 case SO_RCVTIMEO:
3105 error = sooptcopyin_timeval(sopt, &tv);
3106 if (error)
3107 goto bad;
3108
3109 switch (sopt->sopt_name) {
3110 case SO_SNDTIMEO:
3111 so->so_snd.sb_timeo = tv;
3112 break;
3113 case SO_RCVTIMEO:
3114 so->so_rcv.sb_timeo = tv;
3115 break;
3116 }
3117 break;
3118
3119 case SO_NKE:
3120 {
3121 struct so_nke nke;
3122
3123 error = sooptcopyin(sopt, &nke, sizeof (nke),
3124 sizeof (nke));
3125 if (error)
3126 goto bad;
3127
3128 error = sflt_attach_private(so, NULL,
3129 nke.nke_handle, 1);
3130 break;
3131 }
3132
3133 case SO_NOSIGPIPE:
3134 error = sooptcopyin(sopt, &optval, sizeof (optval),
3135 sizeof (optval));
3136 if (error)
3137 goto bad;
3138 if (optval)
3139 so->so_flags |= SOF_NOSIGPIPE;
3140 else
3141 so->so_flags &= ~SOF_NOSIGPIPE;
3142
3143 break;
3144
3145 case SO_NOADDRERR:
3146 error = sooptcopyin(sopt, &optval, sizeof (optval),
3147 sizeof (optval));
3148 if (error)
3149 goto bad;
3150 if (optval)
3151 so->so_flags |= SOF_NOADDRAVAIL;
3152 else
3153 so->so_flags &= ~SOF_NOADDRAVAIL;
3154
3155 break;
3156
3157 case SO_REUSESHAREUID:
3158 error = sooptcopyin(sopt, &optval, sizeof (optval),
3159 sizeof (optval));
3160 if (error)
3161 goto bad;
3162 if (optval)
3163 so->so_flags |= SOF_REUSESHAREUID;
3164 else
3165 so->so_flags &= ~SOF_REUSESHAREUID;
3166 break;
3167 #ifdef __APPLE_API_PRIVATE
3168 case SO_NOTIFYCONFLICT:
3169 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3170 error = EPERM;
3171 goto bad;
3172 }
3173 error = sooptcopyin(sopt, &optval, sizeof (optval),
3174 sizeof (optval));
3175 if (error)
3176 goto bad;
3177 if (optval)
3178 so->so_flags |= SOF_NOTIFYCONFLICT;
3179 else
3180 so->so_flags &= ~SOF_NOTIFYCONFLICT;
3181 break;
3182 #endif
3183 case SO_RESTRICTIONS:
3184 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3185 error = EPERM;
3186 goto bad;
3187 }
3188 error = sooptcopyin(sopt, &optval, sizeof (optval),
3189 sizeof (optval));
3190 if (error)
3191 goto bad;
3192 so->so_restrictions = (optval & (SO_RESTRICT_DENYIN |
3193 SO_RESTRICT_DENYOUT | SO_RESTRICT_DENYSET));
3194 break;
3195
3196 case SO_LABEL:
3197 #if CONFIG_MACF_SOCKET
3198 if ((error = sooptcopyin(sopt, &extmac, sizeof (extmac),
3199 sizeof (extmac))) != 0)
3200 goto bad;
3201
3202 error = mac_setsockopt_label(proc_ucred(sopt->sopt_p),
3203 so, &extmac);
3204 #else
3205 error = EOPNOTSUPP;
3206 #endif /* MAC_SOCKET */
3207 break;
3208
3209 #ifdef __APPLE_API_PRIVATE
3210 case SO_UPCALLCLOSEWAIT:
3211 error = sooptcopyin(sopt, &optval, sizeof (optval),
3212 sizeof (optval));
3213 if (error)
3214 goto bad;
3215 if (optval)
3216 so->so_flags |= SOF_UPCALLCLOSEWAIT;
3217 else
3218 so->so_flags &= ~SOF_UPCALLCLOSEWAIT;
3219 break;
3220 #endif
3221
3222 case SO_RANDOMPORT:
3223 error = sooptcopyin(sopt, &optval, sizeof (optval),
3224 sizeof (optval));
3225 if (error)
3226 goto bad;
3227 if (optval)
3228 so->so_flags |= SOF_BINDRANDOMPORT;
3229 else
3230 so->so_flags &= ~SOF_BINDRANDOMPORT;
3231 break;
3232
3233 case SO_NP_EXTENSIONS: {
3234 struct so_np_extensions sonpx;
3235
3236 error = sooptcopyin(sopt, &sonpx, sizeof(sonpx), sizeof(sonpx));
3237 if (error)
3238 goto bad;
3239 if (sonpx.npx_mask & ~SONPX_MASK_VALID) {
3240 error = EINVAL;
3241 goto bad;
3242 }
3243 /*
3244 * Only one bit defined for now
3245 */
3246 if ((sonpx.npx_mask & SONPX_SETOPTSHUT)) {
3247 if ((sonpx.npx_flags & SONPX_SETOPTSHUT))
3248 so->so_flags |= SOF_NPX_SETOPTSHUT;
3249 else
3250 so->so_flags &= ~SOF_NPX_SETOPTSHUT;
3251 }
3252 break;
3253 }
3254
3255 default:
3256 error = ENOPROTOOPT;
3257 break;
3258 }
3259 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
3260 (void) ((*so->so_proto->pr_ctloutput)(so, sopt));
3261 }
3262 }
3263 bad:
3264 socket_unlock(so, 1);
3265 return (error);
3266 }
3267
3268 /* Helper routines for getsockopt */
3269 int
3270 sooptcopyout(struct sockopt *sopt, void *buf, size_t len)
3271 {
3272 int error;
3273 size_t valsize;
3274
3275 error = 0;
3276
3277 /*
3278 * Documented get behavior is that we always return a value,
3279 * possibly truncated to fit in the user's buffer.
3280 * Traditional behavior is that we always tell the user
3281 * precisely how much we copied, rather than something useful
3282 * like the total amount we had available for her.
3283 * Note that this interface is not idempotent; the entire answer must
3284 * generated ahead of time.
3285 */
3286 valsize = min(len, sopt->sopt_valsize);
3287 sopt->sopt_valsize = valsize;
3288 if (sopt->sopt_val != USER_ADDR_NULL) {
3289 if (sopt->sopt_p != kernproc)
3290 error = copyout(buf, sopt->sopt_val, valsize);
3291 else
3292 bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
3293 }
3294 return (error);
3295 }
3296
3297 static int
3298 sooptcopyout_timeval(struct sockopt *sopt, const struct timeval * tv_p)
3299 {
3300 int error;
3301 size_t len;
3302 struct user64_timeval tv64;
3303 struct user32_timeval tv32;
3304 const void * val;
3305 size_t valsize;
3306
3307 error = 0;
3308 if (proc_is64bit(sopt->sopt_p)) {
3309 len = sizeof(tv64);
3310 tv64.tv_sec = tv_p->tv_sec;
3311 tv64.tv_usec = tv_p->tv_usec;
3312 val = &tv64;
3313 } else {
3314 len = sizeof(tv32);
3315 tv32.tv_sec = tv_p->tv_sec;
3316 tv32.tv_usec = tv_p->tv_usec;
3317 val = &tv32;
3318 }
3319 valsize = min(len, sopt->sopt_valsize);
3320 sopt->sopt_valsize = valsize;
3321 if (sopt->sopt_val != USER_ADDR_NULL) {
3322 if (sopt->sopt_p != kernproc)
3323 error = copyout(val, sopt->sopt_val, valsize);
3324 else
3325 bcopy(val, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
3326 }
3327 return (error);
3328 }
3329
3330 /*
3331 * Return: 0 Success
3332 * ENOPROTOOPT
3333 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
3334 * <pr_ctloutput>:???
3335 * <sf_getoption>:???
3336 */
3337 int
3338 sogetopt(struct socket *so, struct sockopt *sopt)
3339 {
3340 int error, optval;
3341 struct linger l;
3342 struct timeval tv;
3343 struct socket_filter_entry *filter;
3344 int filtered = 0;
3345 #if CONFIG_MACF_SOCKET
3346 struct mac extmac;
3347 #endif /* MAC_SOCKET */
3348
3349 if (sopt->sopt_dir != SOPT_GET) {
3350 sopt->sopt_dir = SOPT_GET;
3351 }
3352
3353 socket_lock(so, 1);
3354
3355 error = 0;
3356 for (filter = so->so_filt; filter && (error == 0);
3357 filter = filter->sfe_next_onsocket) {
3358 if (filter->sfe_filter->sf_filter.sf_getoption) {
3359 if (filtered == 0) {
3360 filtered = 1;
3361 sflt_use(so);
3362 socket_unlock(so, 0);
3363 }
3364 error = filter->sfe_filter->sf_filter.
3365 sf_getoption(filter->sfe_cookie, so, sopt);
3366 }
3367 }
3368 if (filtered != 0) {
3369 socket_lock(so, 0);
3370 sflt_unuse(so);
3371
3372 if (error) {
3373 if (error == EJUSTRETURN)
3374 error = 0;
3375 socket_unlock(so, 1);
3376 return (error);
3377 }
3378 }
3379
3380 error = 0;
3381 if (sopt->sopt_level != SOL_SOCKET) {
3382 if (so->so_proto && so->so_proto->pr_ctloutput) {
3383 error = (*so->so_proto->pr_ctloutput)(so, sopt);
3384 socket_unlock(so, 1);
3385 return (error);
3386 } else {
3387 socket_unlock(so, 1);
3388 return (ENOPROTOOPT);
3389 }
3390 } else {
3391 switch (sopt->sopt_name) {
3392 case SO_LINGER:
3393 case SO_LINGER_SEC:
3394 l.l_onoff = so->so_options & SO_LINGER;
3395 l.l_linger = (sopt->sopt_name == SO_LINGER) ?
3396 so->so_linger : so->so_linger / hz;
3397 error = sooptcopyout(sopt, &l, sizeof (l));
3398 break;
3399
3400 case SO_USELOOPBACK:
3401 case SO_DONTROUTE:
3402 case SO_DEBUG:
3403 case SO_KEEPALIVE:
3404 case SO_REUSEADDR:
3405 case SO_REUSEPORT:
3406 case SO_BROADCAST:
3407 case SO_OOBINLINE:
3408 case SO_TIMESTAMP:
3409 #ifdef __APPLE__
3410 case SO_DONTTRUNC:
3411 case SO_WANTMORE:
3412 case SO_WANTOOBFLAG:
3413 #endif
3414 optval = so->so_options & sopt->sopt_name;
3415 integer:
3416 error = sooptcopyout(sopt, &optval, sizeof (optval));
3417 break;
3418
3419 case SO_TYPE:
3420 optval = so->so_type;
3421 goto integer;
3422
3423 #ifdef __APPLE__
3424 case SO_NREAD:
3425 if (so->so_proto->pr_flags & PR_ATOMIC) {
3426 int pkt_total;
3427 struct mbuf *m1;
3428
3429 pkt_total = 0;
3430 m1 = so->so_rcv.sb_mb;
3431 while (m1) {
3432 if (m1->m_type == MT_DATA || m1->m_type == MT_HEADER ||
3433 m1->m_type == MT_OOBDATA)
3434 pkt_total += m1->m_len;
3435 m1 = m1->m_next;
3436 }
3437 optval = pkt_total;
3438 } else {
3439 optval = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
3440 }
3441 goto integer;
3442
3443 case SO_NWRITE:
3444 optval = so->so_snd.sb_cc;
3445 goto integer;
3446 #endif
3447 case SO_ERROR:
3448 optval = so->so_error;
3449 so->so_error = 0;
3450 goto integer;
3451
3452 case SO_SNDBUF:
3453 optval = so->so_snd.sb_hiwat;
3454 goto integer;
3455
3456 case SO_RCVBUF:
3457 optval = so->so_rcv.sb_hiwat;
3458 goto integer;
3459
3460 case SO_SNDLOWAT:
3461 optval = so->so_snd.sb_lowat;
3462 goto integer;
3463
3464 case SO_RCVLOWAT:
3465 optval = so->so_rcv.sb_lowat;
3466 goto integer;
3467
3468 case SO_SNDTIMEO:
3469 case SO_RCVTIMEO:
3470 tv = (sopt->sopt_name == SO_SNDTIMEO ?
3471 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
3472
3473 error = sooptcopyout_timeval(sopt, &tv);
3474 break;
3475
3476 case SO_NOSIGPIPE:
3477 optval = (so->so_flags & SOF_NOSIGPIPE);
3478 goto integer;
3479
3480 case SO_NOADDRERR:
3481 optval = (so->so_flags & SOF_NOADDRAVAIL);
3482 goto integer;
3483
3484 case SO_REUSESHAREUID:
3485 optval = (so->so_flags & SOF_REUSESHAREUID);
3486 goto integer;
3487
3488 #ifdef __APPLE_API_PRIVATE
3489 case SO_NOTIFYCONFLICT:
3490 optval = (so->so_flags & SOF_NOTIFYCONFLICT);
3491 goto integer;
3492 #endif
3493 case SO_RESTRICTIONS:
3494 optval = so->so_restrictions & (SO_RESTRICT_DENYIN |
3495 SO_RESTRICT_DENYOUT | SO_RESTRICT_DENYSET);
3496 goto integer;
3497
3498 case SO_LABEL:
3499 #if CONFIG_MACF_SOCKET
3500 if ((error = sooptcopyin(sopt, &extmac, sizeof (extmac),
3501 sizeof (extmac))) != 0 ||
3502 (error = mac_socket_label_get(proc_ucred(
3503 sopt->sopt_p), so, &extmac)) != 0)
3504 break;
3505
3506 error = sooptcopyout(sopt, &extmac, sizeof (extmac));
3507 #else
3508 error = EOPNOTSUPP;
3509 #endif /* MAC_SOCKET */
3510 break;
3511
3512 case SO_PEERLABEL:
3513 #if CONFIG_MACF_SOCKET
3514 if ((error = sooptcopyin(sopt, &extmac, sizeof (extmac),
3515 sizeof (extmac))) != 0 ||
3516 (error = mac_socketpeer_label_get(proc_ucred(
3517 sopt->sopt_p), so, &extmac)) != 0)
3518 break;
3519
3520 error = sooptcopyout(sopt, &extmac, sizeof (extmac));
3521 #else
3522 error = EOPNOTSUPP;
3523 #endif /* MAC_SOCKET */
3524 break;
3525
3526 #ifdef __APPLE_API_PRIVATE
3527 case SO_UPCALLCLOSEWAIT:
3528 optval = (so->so_flags & SOF_UPCALLCLOSEWAIT);
3529 goto integer;
3530 #endif
3531 case SO_RANDOMPORT:
3532 optval = (so->so_flags & SOF_BINDRANDOMPORT);
3533 goto integer;
3534
3535 case SO_NP_EXTENSIONS: {
3536 struct so_np_extensions sonpx;
3537
3538 sonpx.npx_flags = (so->so_flags & SOF_NPX_SETOPTSHUT) ? SONPX_SETOPTSHUT : 0;
3539 sonpx.npx_mask = SONPX_MASK_VALID;
3540
3541 error = sooptcopyout(sopt, &sonpx, sizeof(struct so_np_extensions));
3542 break;
3543 }
3544 default:
3545 error = ENOPROTOOPT;
3546 break;
3547 }
3548 socket_unlock(so, 1);
3549 return (error);
3550 }
3551 }
3552
3553 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
3554 int
3555 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
3556 {
3557 struct mbuf *m, *m_prev;
3558 int sopt_size = sopt->sopt_valsize;
3559 int how;
3560
3561 if (sopt_size > MAX_SOOPTGETM_SIZE)
3562 return (EMSGSIZE);
3563
3564 how = sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT;
3565 MGET(m, how, MT_DATA);
3566 if (m == 0)
3567 return (ENOBUFS);
3568 if (sopt_size > MLEN) {
3569 MCLGET(m, how);
3570 if ((m->m_flags & M_EXT) == 0) {
3571 m_free(m);
3572 return (ENOBUFS);
3573 }
3574 m->m_len = min(MCLBYTES, sopt_size);
3575 } else {
3576 m->m_len = min(MLEN, sopt_size);
3577 }
3578 sopt_size -= m->m_len;
3579 *mp = m;
3580 m_prev = m;
3581
3582 while (sopt_size) {
3583 MGET(m, how, MT_DATA);
3584 if (m == 0) {
3585 m_freem(*mp);
3586 return (ENOBUFS);
3587 }
3588 if (sopt_size > MLEN) {
3589 MCLGET(m, how);
3590 if ((m->m_flags & M_EXT) == 0) {
3591 m_freem(*mp);
3592 return (ENOBUFS);
3593 }
3594 m->m_len = min(MCLBYTES, sopt_size);
3595 } else {
3596 m->m_len = min(MLEN, sopt_size);
3597 }
3598 sopt_size -= m->m_len;
3599 m_prev->m_next = m;
3600 m_prev = m;
3601 }
3602 return (0);
3603 }
3604
3605 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
3606 int
3607 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
3608 {
3609 struct mbuf *m0 = m;
3610
3611 if (sopt->sopt_val == USER_ADDR_NULL)
3612 return (0);
3613 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
3614 if (sopt->sopt_p != kernproc) {
3615 int error;
3616
3617 error = copyin(sopt->sopt_val, mtod(m, char *),
3618 m->m_len);
3619 if (error != 0) {
3620 m_freem(m0);
3621 return (error);
3622 }
3623 } else {
3624 bcopy(CAST_DOWN(caddr_t, sopt->sopt_val),
3625 mtod(m, char *), m->m_len);
3626 }
3627 sopt->sopt_valsize -= m->m_len;
3628 sopt->sopt_val += m->m_len;
3629 m = m->m_next;
3630 }
3631 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
3632 panic("soopt_mcopyin");
3633 return (0);
3634 }
3635
3636 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
3637 int
3638 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
3639 {
3640 struct mbuf *m0 = m;
3641 size_t valsize = 0;
3642
3643 if (sopt->sopt_val == USER_ADDR_NULL)
3644 return (0);
3645 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
3646 if (sopt->sopt_p != kernproc) {
3647 int error;
3648
3649 error = copyout(mtod(m, char *), sopt->sopt_val,
3650 m->m_len);
3651 if (error != 0) {
3652 m_freem(m0);
3653 return (error);
3654 }
3655 } else {
3656 bcopy(mtod(m, char *),
3657 CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len);
3658 }
3659 sopt->sopt_valsize -= m->m_len;
3660 sopt->sopt_val += m->m_len;
3661 valsize += m->m_len;
3662 m = m->m_next;
3663 }
3664 if (m != NULL) {
3665 /* enough soopt buffer should be given from user-land */
3666 m_freem(m0);
3667 return (EINVAL);
3668 }
3669 sopt->sopt_valsize = valsize;
3670 return (0);
3671 }
3672
3673 void
3674 sohasoutofband(struct socket *so)
3675 {
3676
3677 if (so->so_pgid < 0)
3678 gsignal(-so->so_pgid, SIGURG);
3679 else if (so->so_pgid > 0)
3680 proc_signal(so->so_pgid, SIGURG);
3681 selwakeup(&so->so_rcv.sb_sel);
3682 }
3683
3684 int
3685 sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
3686 {
3687 struct proc *p = current_proc();
3688 int revents = 0;
3689
3690 socket_lock(so, 1);
3691
3692 if (events & (POLLIN | POLLRDNORM))
3693 if (soreadable(so))
3694 revents |= events & (POLLIN | POLLRDNORM);
3695
3696 if (events & (POLLOUT | POLLWRNORM))
3697 if (sowriteable(so))
3698 revents |= events & (POLLOUT | POLLWRNORM);
3699
3700 if (events & (POLLPRI | POLLRDBAND))
3701 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
3702 revents |= events & (POLLPRI | POLLRDBAND);
3703
3704 if (revents == 0) {
3705 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
3706 /*
3707 * Darwin sets the flag first,
3708 * BSD calls selrecord first
3709 */
3710 so->so_rcv.sb_flags |= SB_SEL;
3711 selrecord(p, &so->so_rcv.sb_sel, wql);
3712 }
3713
3714 if (events & (POLLOUT | POLLWRNORM)) {
3715 /*
3716 * Darwin sets the flag first,
3717 * BSD calls selrecord first
3718 */
3719 so->so_snd.sb_flags |= SB_SEL;
3720 selrecord(p, &so->so_snd.sb_sel, wql);
3721 }
3722 }
3723
3724 socket_unlock(so, 1);
3725 return (revents);
3726 }
3727
3728 int
3729 soo_kqfilter(__unused struct fileproc *fp, struct knote *kn,
3730 __unused struct proc *p)
3731 {
3732 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
3733 struct sockbuf *sb;
3734
3735 socket_lock(so, 1);
3736
3737 #if CONFIG_MACF_SOCKET
3738 if (mac_socket_check_kqfilter(proc_ucred(p), kn, so) != 0) {
3739 socket_unlock(so, 1);
3740 return (1);
3741 }
3742 #endif /* MAC_SOCKET */
3743
3744 switch (kn->kn_filter) {
3745 case EVFILT_READ:
3746 kn->kn_fop = &soread_filtops;
3747 sb = &so->so_rcv;
3748 break;
3749 case EVFILT_WRITE:
3750 kn->kn_fop = &sowrite_filtops;
3751 sb = &so->so_snd;
3752 break;
3753 default:
3754 socket_unlock(so, 1);
3755 return (1);
3756 }
3757
3758 if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
3759 sb->sb_flags |= SB_KNOTE;
3760 socket_unlock(so, 1);
3761 return (0);
3762 }
3763
3764 static void
3765 filt_sordetach(struct knote *kn)
3766 {
3767 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
3768
3769 socket_lock(so, 1);
3770 if (so->so_rcv.sb_flags & SB_KNOTE)
3771 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn))
3772 so->so_rcv.sb_flags &= ~SB_KNOTE;
3773 socket_unlock(so, 1);
3774 }
3775
3776 /*ARGSUSED*/
3777 static int
3778 filt_soread(struct knote *kn, long hint)
3779 {
3780 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
3781
3782 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3783 socket_lock(so, 1);
3784
3785 if (so->so_options & SO_ACCEPTCONN) {
3786 int isempty;
3787
3788 /* Radar 6615193 handle the listen case dynamically
3789 * for kqueue read filter. This allows to call listen() after registering
3790 * the kqueue EVFILT_READ.
3791 */
3792
3793 kn->kn_data = so->so_qlen;
3794 isempty = ! TAILQ_EMPTY(&so->so_comp);
3795
3796 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3797 socket_unlock(so, 1);
3798
3799 return (isempty);
3800 }
3801
3802 /* socket isn't a listener */
3803
3804 kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
3805
3806 if (so->so_oobmark) {
3807 if (kn->kn_flags & EV_OOBAND) {
3808 kn->kn_data -= so->so_oobmark;
3809 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3810 socket_unlock(so, 1);
3811 return (1);
3812 }
3813 kn->kn_data = so->so_oobmark;
3814 kn->kn_flags |= EV_OOBAND;
3815 } else {
3816 if (so->so_state & SS_CANTRCVMORE) {
3817 kn->kn_flags |= EV_EOF;
3818 kn->kn_fflags = so->so_error;
3819 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3820 socket_unlock(so, 1);
3821 return (1);
3822 }
3823 }
3824
3825 if (so->so_state & SS_RCVATMARK) {
3826 if (kn->kn_flags & EV_OOBAND) {
3827 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3828 socket_unlock(so, 1);
3829 return (1);
3830 }
3831 kn->kn_flags |= EV_OOBAND;
3832 } else if (kn->kn_flags & EV_OOBAND) {
3833 kn->kn_data = 0;
3834 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3835 socket_unlock(so, 1);
3836 return (0);
3837 }
3838
3839 if (so->so_error) { /* temporary udp error */
3840 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3841 socket_unlock(so, 1);
3842 return (1);
3843 }
3844
3845 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3846 socket_unlock(so, 1);
3847
3848 return ((kn->kn_flags & EV_OOBAND) ||
3849 kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
3850 kn->kn_sdata : so->so_rcv.sb_lowat));
3851 }
3852
3853 static void
3854 filt_sowdetach(struct knote *kn)
3855 {
3856 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
3857 socket_lock(so, 1);
3858
3859 if (so->so_snd.sb_flags & SB_KNOTE)
3860 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn))
3861 so->so_snd.sb_flags &= ~SB_KNOTE;
3862 socket_unlock(so, 1);
3863 }
3864
3865 /*ARGSUSED*/
3866 static int
3867 filt_sowrite(struct knote *kn, long hint)
3868 {
3869 struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
3870
3871 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3872 socket_lock(so, 1);
3873
3874 kn->kn_data = sbspace(&so->so_snd);
3875 if (so->so_state & SS_CANTSENDMORE) {
3876 kn->kn_flags |= EV_EOF;
3877 kn->kn_fflags = so->so_error;
3878 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3879 socket_unlock(so, 1);
3880 return (1);
3881 }
3882 if (so->so_error) { /* temporary udp error */
3883 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3884 socket_unlock(so, 1);
3885 return (1);
3886 }
3887 if (((so->so_state & SS_ISCONNECTED) == 0) &&
3888 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
3889 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3890 socket_unlock(so, 1);
3891 return (0);
3892 }
3893 if ((hint & SO_FILT_HINT_LOCKED) == 0)
3894 socket_unlock(so, 1);
3895 if (kn->kn_sfflags & NOTE_LOWAT)
3896 return (kn->kn_data >= kn->kn_sdata);
3897 return (kn->kn_data >= so->so_snd.sb_lowat);
3898 }
3899
3900 #define SO_LOCK_HISTORY_STR_LEN (2 * SO_LCKDBG_MAX * (2 + sizeof(void *) + 1) + 1)
3901
3902 __private_extern__ const char * solockhistory_nr(struct socket *so)
3903 {
3904 size_t n = 0;
3905 int i;
3906 static char lock_history_str[SO_LOCK_HISTORY_STR_LEN];
3907
3908 for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) {
3909 n += snprintf(lock_history_str + n, SO_LOCK_HISTORY_STR_LEN - n, "%lx:%lx ",
3910 (uintptr_t) so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX],
3911 (uintptr_t) so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]);
3912 }
3913 return lock_history_str;
3914 }
3915
3916 int
3917 socket_lock(struct socket *so, int refcount)
3918 {
3919 int error = 0;
3920 void *lr_saved;
3921
3922 lr_saved = __builtin_return_address(0);
3923
3924 if (so->so_proto->pr_lock) {
3925 error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
3926 } else {
3927 #ifdef MORE_LOCKING_DEBUG
3928 lck_mtx_assert(so->so_proto->pr_domain->dom_mtx,
3929 LCK_MTX_ASSERT_NOTOWNED);
3930 #endif
3931 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
3932 if (refcount)
3933 so->so_usecount++;
3934 so->lock_lr[so->next_lock_lr] = lr_saved;
3935 so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
3936 }
3937
3938 return (error);
3939 }
3940
3941 int
3942 socket_unlock(struct socket *so, int refcount)
3943 {
3944 int error = 0;
3945 void *lr_saved;
3946 lck_mtx_t *mutex_held;
3947
3948 lr_saved = __builtin_return_address(0);
3949
3950 if (so->so_proto == NULL)
3951 panic("socket_unlock null so_proto so=%p\n", so);
3952
3953 if (so && so->so_proto->pr_unlock) {
3954 error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
3955 } else {
3956 mutex_held = so->so_proto->pr_domain->dom_mtx;
3957 #ifdef MORE_LOCKING_DEBUG
3958 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
3959 #endif
3960 so->unlock_lr[so->next_unlock_lr] = lr_saved;
3961 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
3962
3963 if (refcount) {
3964 if (so->so_usecount <= 0)
3965 panic("socket_unlock: bad refcount=%d so=%p (%d, %d, %d) lrh=%s",
3966 so->so_usecount, so, so->so_proto->pr_domain->dom_family,
3967 so->so_type, so->so_proto->pr_protocol,
3968 solockhistory_nr(so));
3969
3970 so->so_usecount--;
3971 if (so->so_usecount == 0) {
3972 sofreelastref(so, 1);
3973 }
3974 }
3975 lck_mtx_unlock(mutex_held);
3976 }
3977
3978 return (error);
3979 }
3980
3981 /* Called with socket locked, will unlock socket */
3982 void
3983 sofree(struct socket *so)
3984 {
3985
3986 lck_mtx_t *mutex_held;
3987 if (so->so_proto->pr_getlock != NULL)
3988 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
3989 else
3990 mutex_held = so->so_proto->pr_domain->dom_mtx;
3991 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
3992
3993 sofreelastref(so, 0);
3994 }
3995
3996 void
3997 soreference(struct socket *so)
3998 {
3999 socket_lock(so, 1); /* locks & take one reference on socket */
4000 socket_unlock(so, 0); /* unlock only */
4001 }
4002
4003 void
4004 sodereference(struct socket *so)
4005 {
4006 socket_lock(so, 0);
4007 socket_unlock(so, 1);
4008 }
4009
4010 /*
4011 * Set or clear SOF_MULTIPAGES on the socket to enable or disable the
4012 * possibility of using jumbo clusters. Caller must ensure to hold
4013 * the socket lock.
4014 */
4015 void
4016 somultipages(struct socket *so, boolean_t set)
4017 {
4018 if (set)
4019 so->so_flags |= SOF_MULTIPAGES;
4020 else
4021 so->so_flags &= ~SOF_MULTIPAGES;
4022 }
4023
4024 int
4025 so_isdstlocal(struct socket *so) {
4026
4027 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4028
4029 if (so->so_proto->pr_domain->dom_family == AF_INET) {
4030 return inaddr_local(inp->inp_faddr);
4031 } else if (so->so_proto->pr_domain->dom_family == AF_INET6) {
4032 return in6addr_local(&inp->in6p_faddr);
4033 }
4034 return 0;
4035 }