]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
xnu-7195.60.75.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
80 #include <sys/mbuf.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
91 #include <sys/priv.h>
92 #include <sys/sysctl.h>
93 #include <sys/sys_domain.h>
94
95 #include <security/audit/audit.h>
96
97 #include <sys/kdebug.h>
98 #include <sys/sysproto.h>
99 #include <netinet/in.h>
100 #include <net/route.h>
101 #include <netinet/in_pcb.h>
102
103 #include <os/ptrtools.h>
104
105 #if CONFIG_MACF_SOCKET_SUBSET
106 #include <security/mac_framework.h>
107 #endif /* MAC_SOCKET_SUBSET */
108
109 #define f_flag fp_glob->fg_flag
110 #define f_ops fp_glob->fg_ops
111 #define f_data fp_glob->fg_data
112
113 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
114 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
115 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
116 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
117 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
118 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
119 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
120 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
121 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
122 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
123 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
124 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
125 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
126 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
127 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
128 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
129
130 #if DEBUG || DEVELOPMENT
131 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
132 #define DBG_PRINTF(...) printf(__VA_ARGS__)
133 #else
134 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
135 #define DBG_PRINTF(...) do { } while (0)
136 #endif
137
138 static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
139 int, int32_t *);
140 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
141 int32_t *);
142 static int connectit(struct socket *, struct sockaddr *);
143 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
144 size_t, boolean_t);
145 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
146 user_addr_t, size_t, boolean_t);
147 #if SENDFILE
148 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
149 boolean_t);
150 #endif /* SENDFILE */
151 static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
152 static int connectitx(struct socket *, struct sockaddr *,
153 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
154 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
155 static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
156 int *);
157 static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
158
159 static int internalize_user_msghdr_array(const void *, int, int, u_int,
160 struct user_msghdr_x *, struct uio **);
161 static u_int externalize_user_msghdr_array(void *, int, int, u_int,
162 const struct user_msghdr_x *, struct uio **);
163
164 static void free_uio_array(struct uio **, u_int);
165 static boolean_t uio_array_is_valid(struct uio **, u_int);
166 static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
167 static int internalize_recv_msghdr_array(const void *, int, int,
168 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
169 static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
170 const struct user_msghdr_x *, struct recv_msg_elem *);
171 static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
172 static void free_recv_msg_array(struct recv_msg_elem *, u_int);
173
174 SYSCTL_DECL(_kern_ipc);
175
176 static u_int somaxsendmsgx = 100;
177 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
178 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
179 static u_int somaxrecvmsgx = 100;
180 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
181 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
182
183 /*
184 * System call interface to the socket abstraction.
185 */
186
187 extern const struct fileops socketops;
188
189 /*
190 * Returns: 0 Success
191 * EACCES Mandatory Access Control failure
192 * falloc:ENFILE
193 * falloc:EMFILE
194 * falloc:ENOMEM
195 * socreate:EAFNOSUPPORT
196 * socreate:EPROTOTYPE
197 * socreate:EPROTONOSUPPORT
198 * socreate:ENOBUFS
199 * socreate:ENOMEM
200 * socreate:??? [other protocol families, IPSEC]
201 */
202 int
203 socket(struct proc *p,
204 struct socket_args *uap,
205 int32_t *retval)
206 {
207 return socket_common(p, uap->domain, uap->type, uap->protocol,
208 proc_selfpid(), retval, 0);
209 }
210
211 int
212 socket_delegate(struct proc *p,
213 struct socket_delegate_args *uap,
214 int32_t *retval)
215 {
216 return socket_common(p, uap->domain, uap->type, uap->protocol,
217 uap->epid, retval, 1);
218 }
219
220 static int
221 socket_common(struct proc *p,
222 int domain,
223 int type,
224 int protocol,
225 pid_t epid,
226 int32_t *retval,
227 int delegate)
228 {
229 struct socket *so;
230 struct fileproc *fp;
231 int fd, error;
232
233 AUDIT_ARG(socket, domain, type, protocol);
234 #if CONFIG_MACF_SOCKET_SUBSET
235 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
236 type, protocol)) != 0) {
237 return error;
238 }
239 #endif /* MAC_SOCKET_SUBSET */
240
241 if (delegate) {
242 error = priv_check_cred(kauth_cred_get(),
243 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
244 if (error) {
245 return EACCES;
246 }
247 }
248
249 error = falloc(p, &fp, &fd, vfs_context_current());
250 if (error) {
251 return error;
252 }
253 fp->f_flag = FREAD | FWRITE;
254 fp->f_ops = &socketops;
255
256 if (delegate) {
257 error = socreate_delegate(domain, &so, type, protocol, epid);
258 } else {
259 error = socreate(domain, &so, type, protocol);
260 }
261
262 if (error) {
263 fp_free(p, fd, fp);
264 } else {
265 fp->f_data = (caddr_t)so;
266
267 proc_fdlock(p);
268 procfdtbl_releasefd(p, fd, NULL);
269
270 fp_drop(p, fd, fp, 1);
271 proc_fdunlock(p);
272
273 *retval = fd;
274 if (ENTR_SHOULDTRACE) {
275 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
276 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
277 }
278 }
279 return error;
280 }
281
282 /*
283 * Returns: 0 Success
284 * EDESTADDRREQ Destination address required
285 * EBADF Bad file descriptor
286 * EACCES Mandatory Access Control failure
287 * file_socket:ENOTSOCK
288 * file_socket:EBADF
289 * getsockaddr:ENAMETOOLONG Filename too long
290 * getsockaddr:EINVAL Invalid argument
291 * getsockaddr:ENOMEM Not enough space
292 * getsockaddr:EFAULT Bad address
293 * sobindlock:???
294 */
295 /* ARGSUSED */
296 int
297 bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
298 {
299 struct sockaddr_storage ss;
300 struct sockaddr *sa = NULL;
301 struct socket *so;
302 boolean_t want_free = TRUE;
303 int error;
304
305 AUDIT_ARG(fd, uap->s);
306 error = file_socket(uap->s, &so);
307 if (error != 0) {
308 return error;
309 }
310 if (so == NULL) {
311 error = EBADF;
312 goto out;
313 }
314 if (uap->name == USER_ADDR_NULL) {
315 error = EDESTADDRREQ;
316 goto out;
317 }
318 if (uap->namelen > sizeof(ss)) {
319 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
320 } else {
321 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
322 if (error == 0) {
323 sa = (struct sockaddr *)&ss;
324 want_free = FALSE;
325 }
326 }
327 if (error != 0) {
328 goto out;
329 }
330 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
331 #if CONFIG_MACF_SOCKET_SUBSET
332 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
333 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
334 error = sobindlock(so, sa, 1); /* will lock socket */
335 }
336 #else
337 error = sobindlock(so, sa, 1); /* will lock socket */
338 #endif /* MAC_SOCKET_SUBSET */
339 if (want_free) {
340 FREE(sa, M_SONAME);
341 }
342 out:
343 file_drop(uap->s);
344 return error;
345 }
346
347 /*
348 * Returns: 0 Success
349 * EBADF
350 * EACCES Mandatory Access Control failure
351 * file_socket:ENOTSOCK
352 * file_socket:EBADF
353 * solisten:EINVAL
354 * solisten:EOPNOTSUPP
355 * solisten:???
356 */
357 int
358 listen(__unused struct proc *p, struct listen_args *uap,
359 __unused int32_t *retval)
360 {
361 int error;
362 struct socket *so;
363
364 AUDIT_ARG(fd, uap->s);
365 error = file_socket(uap->s, &so);
366 if (error) {
367 return error;
368 }
369 if (so != NULL)
370 #if CONFIG_MACF_SOCKET_SUBSET
371 {
372 error = mac_socket_check_listen(kauth_cred_get(), so);
373 if (error == 0) {
374 error = solisten(so, uap->backlog);
375 }
376 }
377 #else
378 { error = solisten(so, uap->backlog);}
379 #endif /* MAC_SOCKET_SUBSET */
380 else {
381 error = EBADF;
382 }
383
384 file_drop(uap->s);
385 return error;
386 }
387
388 /*
389 * Returns: fp_get_ftype:EBADF Bad file descriptor
390 * fp_get_ftype:ENOTSOCK Socket operation on non-socket
391 * :EFAULT Bad address on copyin/copyout
392 * :EBADF Bad file descriptor
393 * :EOPNOTSUPP Operation not supported on socket
394 * :EINVAL Invalid argument
395 * :EWOULDBLOCK Operation would block
396 * :ECONNABORTED Connection aborted
397 * :EINTR Interrupted function
398 * :EACCES Mandatory Access Control failure
399 * falloc:ENFILE Too many files open in system
400 * falloc:EMFILE Too many open files
401 * falloc:ENOMEM Not enough space
402 * 0 Success
403 */
404 int
405 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
406 int32_t *retval)
407 {
408 struct fileproc *fp;
409 struct sockaddr *sa = NULL;
410 socklen_t namelen;
411 int error;
412 struct socket *head, *so = NULL;
413 lck_mtx_t *mutex_held;
414 int fd = uap->s;
415 int newfd;
416 unsigned int fflag;
417 int dosocklock = 0;
418
419 *retval = -1;
420
421 AUDIT_ARG(fd, uap->s);
422
423 if (uap->name) {
424 error = copyin(uap->anamelen, (caddr_t)&namelen,
425 sizeof(socklen_t));
426 if (error) {
427 return error;
428 }
429 }
430 error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
431 if (error) {
432 return error;
433 }
434 head = fp->f_data;
435
436 #if CONFIG_MACF_SOCKET_SUBSET
437 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
438 goto out;
439 }
440 #endif /* MAC_SOCKET_SUBSET */
441
442 socket_lock(head, 1);
443
444 if (head->so_proto->pr_getlock != NULL) {
445 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
446 dosocklock = 1;
447 } else {
448 mutex_held = head->so_proto->pr_domain->dom_mtx;
449 dosocklock = 0;
450 }
451
452 if ((head->so_options & SO_ACCEPTCONN) == 0) {
453 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
454 error = EOPNOTSUPP;
455 } else {
456 /* POSIX: The socket is not accepting connections */
457 error = EINVAL;
458 }
459 socket_unlock(head, 1);
460 goto out;
461 }
462 check_again:
463 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
464 socket_unlock(head, 1);
465 error = EWOULDBLOCK;
466 goto out;
467 }
468 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
469 if (head->so_state & SS_CANTRCVMORE) {
470 head->so_error = ECONNABORTED;
471 break;
472 }
473 if (head->so_usecount < 1) {
474 panic("accept: head=%p refcount=%d\n", head,
475 head->so_usecount);
476 }
477 error = msleep((caddr_t)&head->so_timeo, mutex_held,
478 PSOCK | PCATCH, "accept", 0);
479 if (head->so_usecount < 1) {
480 panic("accept: 2 head=%p refcount=%d\n", head,
481 head->so_usecount);
482 }
483 if ((head->so_state & SS_DRAINING)) {
484 error = ECONNABORTED;
485 }
486 if (error) {
487 socket_unlock(head, 1);
488 goto out;
489 }
490 }
491 if (head->so_error) {
492 error = head->so_error;
493 head->so_error = 0;
494 socket_unlock(head, 1);
495 goto out;
496 }
497
498 /*
499 * At this point we know that there is at least one connection
500 * ready to be accepted. Remove it from the queue prior to
501 * allocating the file descriptor for it since falloc() may
502 * block allowing another process to accept the connection
503 * instead.
504 */
505 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
506
507 so_acquire_accept_list(head, NULL);
508 if (TAILQ_EMPTY(&head->so_comp)) {
509 so_release_accept_list(head);
510 goto check_again;
511 }
512
513 so = TAILQ_FIRST(&head->so_comp);
514 TAILQ_REMOVE(&head->so_comp, so, so_list);
515 so->so_head = NULL;
516 so->so_state &= ~SS_COMP;
517 head->so_qlen--;
518 so_release_accept_list(head);
519
520 /* unlock head to avoid deadlock with select, keep a ref on head */
521 socket_unlock(head, 0);
522
523 #if CONFIG_MACF_SOCKET_SUBSET
524 /*
525 * Pass the pre-accepted socket to the MAC framework. This is
526 * cheaper than allocating a file descriptor for the socket,
527 * calling the protocol accept callback, and possibly freeing
528 * the file descriptor should the MAC check fails.
529 */
530 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
531 socket_lock(so, 1);
532 so->so_state &= ~SS_NOFDREF;
533 socket_unlock(so, 1);
534 soclose(so);
535 /* Drop reference on listening socket */
536 sodereference(head);
537 goto out;
538 }
539 #endif /* MAC_SOCKET_SUBSET */
540
541 /*
542 * Pass the pre-accepted socket to any interested socket filter(s).
543 * Upon failure, the socket would have been closed by the callee.
544 */
545 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
546 /* Drop reference on listening socket */
547 sodereference(head);
548 /* Propagate socket filter's error code to the caller */
549 goto out;
550 }
551
552 fflag = fp->f_flag;
553 error = falloc(p, &fp, &newfd, vfs_context_current());
554 if (error) {
555 /*
556 * Probably ran out of file descriptors.
557 *
558 * <rdar://problem/8554930>
559 * Don't put this back on the socket like we used to, that
560 * just causes the client to spin. Drop the socket.
561 */
562 socket_lock(so, 1);
563 so->so_state &= ~SS_NOFDREF;
564 socket_unlock(so, 1);
565 soclose(so);
566 sodereference(head);
567 goto out;
568 }
569 *retval = newfd;
570 fp->f_flag = fflag;
571 fp->f_ops = &socketops;
572 fp->f_data = (caddr_t)so;
573
574 socket_lock(head, 0);
575 if (dosocklock) {
576 socket_lock(so, 1);
577 }
578
579 /* Sync socket non-blocking/async state with file flags */
580 if (fp->f_flag & FNONBLOCK) {
581 so->so_state |= SS_NBIO;
582 } else {
583 so->so_state &= ~SS_NBIO;
584 }
585
586 if (fp->f_flag & FASYNC) {
587 so->so_state |= SS_ASYNC;
588 so->so_rcv.sb_flags |= SB_ASYNC;
589 so->so_snd.sb_flags |= SB_ASYNC;
590 } else {
591 so->so_state &= ~SS_ASYNC;
592 so->so_rcv.sb_flags &= ~SB_ASYNC;
593 so->so_snd.sb_flags &= ~SB_ASYNC;
594 }
595
596 (void) soacceptlock(so, &sa, 0);
597 socket_unlock(head, 1);
598 if (sa == NULL) {
599 namelen = 0;
600 if (uap->name) {
601 goto gotnoname;
602 }
603 error = 0;
604 goto releasefd;
605 }
606 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
607
608 if (uap->name) {
609 socklen_t sa_len;
610
611 /* save sa_len before it is destroyed */
612 sa_len = sa->sa_len;
613 namelen = MIN(namelen, sa_len);
614 error = copyout(sa, uap->name, namelen);
615 if (!error) {
616 /* return the actual, untruncated address length */
617 namelen = sa_len;
618 }
619 gotnoname:
620 error = copyout((caddr_t)&namelen, uap->anamelen,
621 sizeof(socklen_t));
622 }
623 FREE(sa, M_SONAME);
624
625 releasefd:
626 /*
627 * If the socket has been marked as inactive by sosetdefunct(),
628 * disallow further operations on it.
629 */
630 if (so->so_flags & SOF_DEFUNCT) {
631 sodefunct(current_proc(), so,
632 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
633 }
634
635 if (dosocklock) {
636 socket_unlock(so, 1);
637 }
638
639 proc_fdlock(p);
640 procfdtbl_releasefd(p, newfd, NULL);
641 fp_drop(p, newfd, fp, 1);
642 proc_fdunlock(p);
643
644 out:
645 file_drop(fd);
646
647 if (error == 0 && ENTR_SHOULDTRACE) {
648 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
649 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
650 }
651 return error;
652 }
653
654 int
655 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
656 {
657 __pthread_testcancel(1);
658 return accept_nocancel(p, (struct accept_nocancel_args *)uap,
659 retval);
660 }
661
662 /*
663 * Returns: 0 Success
664 * EBADF Bad file descriptor
665 * EALREADY Connection already in progress
666 * EINPROGRESS Operation in progress
667 * ECONNABORTED Connection aborted
668 * EINTR Interrupted function
669 * EACCES Mandatory Access Control failure
670 * file_socket:ENOTSOCK
671 * file_socket:EBADF
672 * getsockaddr:ENAMETOOLONG Filename too long
673 * getsockaddr:EINVAL Invalid argument
674 * getsockaddr:ENOMEM Not enough space
675 * getsockaddr:EFAULT Bad address
676 * soconnectlock:EOPNOTSUPP
677 * soconnectlock:EISCONN
678 * soconnectlock:??? [depends on protocol, filters]
679 * msleep:EINTR
680 *
681 * Imputed: so_error error may be set from so_error, which
682 * may have been set by soconnectlock.
683 */
684 /* ARGSUSED */
685 int
686 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
687 {
688 __pthread_testcancel(1);
689 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
690 retval);
691 }
692
693 int
694 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
695 {
696 #pragma unused(p, retval)
697 struct socket *so;
698 struct sockaddr_storage ss;
699 struct sockaddr *sa = NULL;
700 int error;
701 int fd = uap->s;
702 boolean_t dgram;
703
704 AUDIT_ARG(fd, uap->s);
705 error = file_socket(fd, &so);
706 if (error != 0) {
707 return error;
708 }
709 if (so == NULL) {
710 error = EBADF;
711 goto out;
712 }
713
714 /*
715 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
716 * if this is a datagram socket; translate for other types.
717 */
718 dgram = (so->so_type == SOCK_DGRAM);
719
720 /* Get socket address now before we obtain socket lock */
721 if (uap->namelen > sizeof(ss)) {
722 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
723 } else {
724 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
725 if (error == 0) {
726 sa = (struct sockaddr *)&ss;
727 }
728 }
729 if (error != 0) {
730 goto out;
731 }
732
733 error = connectit(so, sa);
734
735 if (sa != NULL && sa != SA(&ss)) {
736 FREE(sa, M_SONAME);
737 }
738 if (error == ERESTART) {
739 error = EINTR;
740 }
741 out:
742 file_drop(fd);
743 return error;
744 }
745
746 static int
747 connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
748 {
749 #pragma unused(p, retval)
750 struct sockaddr_storage ss, sd;
751 struct sockaddr *src = NULL, *dst = NULL;
752 struct socket *so;
753 int error, error1, fd = uap->socket;
754 boolean_t dgram;
755 sae_connid_t cid = SAE_CONNID_ANY;
756 struct user32_sa_endpoints ep32;
757 struct user64_sa_endpoints ep64;
758 struct user_sa_endpoints ep;
759 user_ssize_t bytes_written = 0;
760 struct user_iovec *iovp;
761 uio_t auio = NULL;
762
763 AUDIT_ARG(fd, uap->socket);
764 error = file_socket(fd, &so);
765 if (error != 0) {
766 return error;
767 }
768 if (so == NULL) {
769 error = EBADF;
770 goto out;
771 }
772
773 if (uap->endpoints == USER_ADDR_NULL) {
774 error = EINVAL;
775 goto out;
776 }
777
778 if (IS_64BIT_PROCESS(p)) {
779 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
780 if (error != 0) {
781 goto out;
782 }
783
784 ep.sae_srcif = ep64.sae_srcif;
785 ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
786 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
787 ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
788 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
789 } else {
790 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
791 if (error != 0) {
792 goto out;
793 }
794
795 ep.sae_srcif = ep32.sae_srcif;
796 ep.sae_srcaddr = ep32.sae_srcaddr;
797 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
798 ep.sae_dstaddr = ep32.sae_dstaddr;
799 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
800 }
801
802 /*
803 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
804 * if this is a datagram socket; translate for other types.
805 */
806 dgram = (so->so_type == SOCK_DGRAM);
807
808 /* Get socket address now before we obtain socket lock */
809 if (ep.sae_srcaddr != USER_ADDR_NULL) {
810 if (ep.sae_srcaddrlen > sizeof(ss)) {
811 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
812 } else {
813 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
814 if (error == 0) {
815 src = (struct sockaddr *)&ss;
816 }
817 }
818
819 if (error) {
820 goto out;
821 }
822 }
823
824 if (ep.sae_dstaddr == USER_ADDR_NULL) {
825 error = EINVAL;
826 goto out;
827 }
828
829 /* Get socket address now before we obtain socket lock */
830 if (ep.sae_dstaddrlen > sizeof(sd)) {
831 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
832 } else {
833 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
834 if (error == 0) {
835 dst = (struct sockaddr *)&sd;
836 }
837 }
838
839 if (error) {
840 goto out;
841 }
842
843 VERIFY(dst != NULL);
844
845 if (uap->iov != USER_ADDR_NULL) {
846 /* Verify range before calling uio_create() */
847 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
848 error = EINVAL;
849 goto out;
850 }
851
852 if (uap->len == USER_ADDR_NULL) {
853 error = EINVAL;
854 goto out;
855 }
856
857 /* allocate a uio to hold the number of iovecs passed */
858 auio = uio_create(uap->iovcnt, 0,
859 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
860 UIO_WRITE);
861
862 if (auio == NULL) {
863 error = ENOMEM;
864 goto out;
865 }
866
867 /*
868 * get location of iovecs within the uio.
869 * then copyin the iovecs from user space.
870 */
871 iovp = uio_iovsaddr(auio);
872 if (iovp == NULL) {
873 error = ENOMEM;
874 goto out;
875 }
876 error = copyin_user_iovec_array(uap->iov,
877 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
878 uap->iovcnt, iovp);
879 if (error != 0) {
880 goto out;
881 }
882
883 /* finish setup of uio_t */
884 error = uio_calculateresid(auio);
885 if (error != 0) {
886 goto out;
887 }
888 }
889
890 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
891 &cid, auio, uap->flags, &bytes_written);
892 if (error == ERESTART) {
893 error = EINTR;
894 }
895
896 if (uap->len != USER_ADDR_NULL) {
897 error1 = copyout(&bytes_written, uap->len, sizeof(uap->len));
898 /* give precedence to connectitx errors */
899 if ((error1 != 0) && (error == 0)) {
900 error = error1;
901 }
902 }
903
904 if (uap->connid != USER_ADDR_NULL) {
905 error1 = copyout(&cid, uap->connid, sizeof(cid));
906 /* give precedence to connectitx errors */
907 if ((error1 != 0) && (error == 0)) {
908 error = error1;
909 }
910 }
911 out:
912 file_drop(fd);
913 if (auio != NULL) {
914 uio_free(auio);
915 }
916 if (src != NULL && src != SA(&ss)) {
917 FREE(src, M_SONAME);
918 }
919 if (dst != NULL && dst != SA(&sd)) {
920 FREE(dst, M_SONAME);
921 }
922 return error;
923 }
924
925 int
926 connectx(struct proc *p, struct connectx_args *uap, int *retval)
927 {
928 /*
929 * Due to similiarity with a POSIX interface, define as
930 * an unofficial cancellation point.
931 */
932 __pthread_testcancel(1);
933 return connectx_nocancel(p, uap, retval);
934 }
935
936 static int
937 connectit(struct socket *so, struct sockaddr *sa)
938 {
939 int error;
940
941 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
942 #if CONFIG_MACF_SOCKET_SUBSET
943 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
944 return error;
945 }
946 #endif /* MAC_SOCKET_SUBSET */
947
948 socket_lock(so, 1);
949 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
950 error = EALREADY;
951 goto out;
952 }
953 error = soconnectlock(so, sa, 0);
954 if (error != 0) {
955 goto out;
956 }
957 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
958 error = EINPROGRESS;
959 goto out;
960 }
961 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
962 lck_mtx_t *mutex_held;
963
964 if (so->so_proto->pr_getlock != NULL) {
965 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
966 } else {
967 mutex_held = so->so_proto->pr_domain->dom_mtx;
968 }
969 error = msleep((caddr_t)&so->so_timeo, mutex_held,
970 PSOCK | PCATCH, __func__, 0);
971 if (so->so_state & SS_DRAINING) {
972 error = ECONNABORTED;
973 }
974 if (error != 0) {
975 break;
976 }
977 }
978 if (error == 0) {
979 error = so->so_error;
980 so->so_error = 0;
981 }
982 out:
983 socket_unlock(so, 1);
984 return error;
985 }
986
987 static int
988 connectitx(struct socket *so, struct sockaddr *src,
989 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
990 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
991 user_ssize_t *bytes_written)
992 {
993 int error;
994
995 VERIFY(dst != NULL);
996
997 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
998 #if CONFIG_MACF_SOCKET_SUBSET
999 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1000 return error;
1001 }
1002
1003 if (auio != NULL) {
1004 if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1005 return error;
1006 }
1007 }
1008 #endif /* MAC_SOCKET_SUBSET */
1009
1010 socket_lock(so, 1);
1011 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1012 error = EALREADY;
1013 goto out;
1014 }
1015
1016 error = soconnectxlocked(so, src, dst, p, ifscope,
1017 aid, pcid, flags, NULL, 0, auio, bytes_written);
1018 if (error != 0) {
1019 goto out;
1020 }
1021 /*
1022 * If, after the call to soconnectxlocked the flag is still set (in case
1023 * data has been queued and the connect() has actually been triggered,
1024 * it will have been unset by the transport), we exit immediately. There
1025 * is no reason to wait on any event.
1026 */
1027 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1028 error = 0;
1029 goto out;
1030 }
1031 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1032 error = EINPROGRESS;
1033 goto out;
1034 }
1035 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1036 lck_mtx_t *mutex_held;
1037
1038 if (so->so_proto->pr_getlock != NULL) {
1039 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1040 } else {
1041 mutex_held = so->so_proto->pr_domain->dom_mtx;
1042 }
1043 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1044 PSOCK | PCATCH, __func__, 0);
1045 if (so->so_state & SS_DRAINING) {
1046 error = ECONNABORTED;
1047 }
1048 if (error != 0) {
1049 break;
1050 }
1051 }
1052 if (error == 0) {
1053 error = so->so_error;
1054 so->so_error = 0;
1055 }
1056 out:
1057 socket_unlock(so, 1);
1058 return error;
1059 }
1060
1061 int
1062 peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1063 {
1064 #pragma unused(p, uap, retval)
1065 /*
1066 * Due to similiarity with a POSIX interface, define as
1067 * an unofficial cancellation point.
1068 */
1069 __pthread_testcancel(1);
1070 return 0;
1071 }
1072
1073 int
1074 disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1075 {
1076 /*
1077 * Due to similiarity with a POSIX interface, define as
1078 * an unofficial cancellation point.
1079 */
1080 __pthread_testcancel(1);
1081 return disconnectx_nocancel(p, uap, retval);
1082 }
1083
1084 static int
1085 disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1086 {
1087 #pragma unused(p, retval)
1088 struct socket *so;
1089 int fd = uap->s;
1090 int error;
1091
1092 error = file_socket(fd, &so);
1093 if (error != 0) {
1094 return error;
1095 }
1096 if (so == NULL) {
1097 error = EBADF;
1098 goto out;
1099 }
1100
1101 error = sodisconnectx(so, uap->aid, uap->cid);
1102 out:
1103 file_drop(fd);
1104 return error;
1105 }
1106
1107 /*
1108 * Returns: 0 Success
1109 * socreate:EAFNOSUPPORT
1110 * socreate:EPROTOTYPE
1111 * socreate:EPROTONOSUPPORT
1112 * socreate:ENOBUFS
1113 * socreate:ENOMEM
1114 * socreate:EISCONN
1115 * socreate:??? [other protocol families, IPSEC]
1116 * falloc:ENFILE
1117 * falloc:EMFILE
1118 * falloc:ENOMEM
1119 * copyout:EFAULT
1120 * soconnect2:EINVAL
1121 * soconnect2:EPROTOTYPE
1122 * soconnect2:??? [other protocol families[
1123 */
1124 int
1125 socketpair(struct proc *p, struct socketpair_args *uap,
1126 __unused int32_t *retval)
1127 {
1128 struct fileproc *fp1, *fp2;
1129 struct socket *so1, *so2;
1130 int fd, error, sv[2];
1131
1132 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1133 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1134 if (error) {
1135 return error;
1136 }
1137 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1138 if (error) {
1139 goto free1;
1140 }
1141
1142 error = falloc(p, &fp1, &fd, vfs_context_current());
1143 if (error) {
1144 goto free2;
1145 }
1146 fp1->f_flag = FREAD | FWRITE;
1147 fp1->f_ops = &socketops;
1148 fp1->f_data = (caddr_t)so1;
1149 sv[0] = fd;
1150
1151 error = falloc(p, &fp2, &fd, vfs_context_current());
1152 if (error) {
1153 goto free3;
1154 }
1155 fp2->f_flag = FREAD | FWRITE;
1156 fp2->f_ops = &socketops;
1157 fp2->f_data = (caddr_t)so2;
1158 sv[1] = fd;
1159
1160 error = soconnect2(so1, so2);
1161 if (error) {
1162 goto free4;
1163 }
1164 if (uap->type == SOCK_DGRAM) {
1165 /*
1166 * Datagram socket connection is asymmetric.
1167 */
1168 error = soconnect2(so2, so1);
1169 if (error) {
1170 goto free4;
1171 }
1172 }
1173
1174 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1175 goto free4;
1176 }
1177
1178 proc_fdlock(p);
1179 procfdtbl_releasefd(p, sv[0], NULL);
1180 procfdtbl_releasefd(p, sv[1], NULL);
1181 fp_drop(p, sv[0], fp1, 1);
1182 fp_drop(p, sv[1], fp2, 1);
1183 proc_fdunlock(p);
1184
1185 return 0;
1186 free4:
1187 fp_free(p, sv[1], fp2);
1188 free3:
1189 fp_free(p, sv[0], fp1);
1190 free2:
1191 (void) soclose(so2);
1192 free1:
1193 (void) soclose(so1);
1194 return error;
1195 }
1196
1197 /*
1198 * Returns: 0 Success
1199 * EINVAL
1200 * ENOBUFS
1201 * EBADF
1202 * EPIPE
1203 * EACCES Mandatory Access Control failure
1204 * file_socket:ENOTSOCK
1205 * file_socket:EBADF
1206 * getsockaddr:ENAMETOOLONG Filename too long
1207 * getsockaddr:EINVAL Invalid argument
1208 * getsockaddr:ENOMEM Not enough space
1209 * getsockaddr:EFAULT Bad address
1210 * <pru_sosend>:EACCES[TCP]
1211 * <pru_sosend>:EADDRINUSE[TCP]
1212 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1213 * <pru_sosend>:EAFNOSUPPORT[TCP]
1214 * <pru_sosend>:EAGAIN[TCP]
1215 * <pru_sosend>:EBADF
1216 * <pru_sosend>:ECONNRESET[TCP]
1217 * <pru_sosend>:EFAULT
1218 * <pru_sosend>:EHOSTUNREACH[TCP]
1219 * <pru_sosend>:EINTR
1220 * <pru_sosend>:EINVAL
1221 * <pru_sosend>:EISCONN[AF_INET]
1222 * <pru_sosend>:EMSGSIZE[TCP]
1223 * <pru_sosend>:ENETDOWN[TCP]
1224 * <pru_sosend>:ENETUNREACH[TCP]
1225 * <pru_sosend>:ENOBUFS
1226 * <pru_sosend>:ENOMEM[TCP]
1227 * <pru_sosend>:ENOTCONN[AF_INET]
1228 * <pru_sosend>:EOPNOTSUPP
1229 * <pru_sosend>:EPERM[TCP]
1230 * <pru_sosend>:EPIPE
1231 * <pru_sosend>:EWOULDBLOCK
1232 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1233 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1234 * <pru_sosend>:??? [value from so_error]
1235 * sockargs:???
1236 */
1237 static int
1238 sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
1239 int flags, int32_t *retval)
1240 {
1241 struct mbuf *control = NULL;
1242 struct sockaddr_storage ss;
1243 struct sockaddr *to = NULL;
1244 boolean_t want_free = TRUE;
1245 int error;
1246 user_ssize_t len;
1247
1248 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1249
1250 if (mp->msg_name != USER_ADDR_NULL) {
1251 if (mp->msg_namelen > sizeof(ss)) {
1252 error = getsockaddr(so, &to, mp->msg_name,
1253 mp->msg_namelen, TRUE);
1254 } else {
1255 error = getsockaddr_s(so, &ss, mp->msg_name,
1256 mp->msg_namelen, TRUE);
1257 if (error == 0) {
1258 to = (struct sockaddr *)&ss;
1259 want_free = FALSE;
1260 }
1261 }
1262 if (error != 0) {
1263 goto out;
1264 }
1265 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1266 }
1267 if (mp->msg_control != USER_ADDR_NULL) {
1268 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1269 error = EINVAL;
1270 goto bad;
1271 }
1272 error = sockargs(&control, mp->msg_control,
1273 mp->msg_controllen, MT_CONTROL);
1274 if (error != 0) {
1275 goto bad;
1276 }
1277 }
1278
1279 #if CONFIG_MACF_SOCKET_SUBSET
1280 /*
1281 * We check the state without holding the socket lock;
1282 * if a race condition occurs, it would simply result
1283 * in an extra call to the MAC check function.
1284 */
1285 if (to != NULL &&
1286 !(so->so_state & SS_DEFUNCT) &&
1287 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1288 goto bad;
1289 }
1290 #endif /* MAC_SOCKET_SUBSET */
1291
1292 len = uio_resid(uiop);
1293 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1294 control, flags);
1295 if (error != 0) {
1296 if (uio_resid(uiop) != len && (error == ERESTART ||
1297 error == EINTR || error == EWOULDBLOCK)) {
1298 error = 0;
1299 }
1300 /* Generation of SIGPIPE can be controlled per socket */
1301 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1302 !(flags & MSG_NOSIGNAL)) {
1303 psignal(p, SIGPIPE);
1304 }
1305 }
1306 if (error == 0) {
1307 *retval = (int)(len - uio_resid(uiop));
1308 }
1309 bad:
1310 if (to != NULL && want_free) {
1311 FREE(to, M_SONAME);
1312 }
1313 out:
1314 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1315
1316 return error;
1317 }
1318
1319 /*
1320 * Returns: 0 Success
1321 * ENOMEM
1322 * sendit:??? [see sendit definition in this file]
1323 * write:??? [4056224: applicable for pipes]
1324 */
1325 int
1326 sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
1327 {
1328 __pthread_testcancel(1);
1329 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1330 }
1331
1332 int
1333 sendto_nocancel(struct proc *p,
1334 struct sendto_nocancel_args *uap,
1335 int32_t *retval)
1336 {
1337 struct user_msghdr msg;
1338 int error;
1339 uio_t auio = NULL;
1340 struct socket *so;
1341
1342 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1343 AUDIT_ARG(fd, uap->s);
1344
1345 if (uap->flags & MSG_SKIPCFIL) {
1346 error = EPERM;
1347 goto done;
1348 }
1349
1350 if (uap->len > LONG_MAX) {
1351 error = EINVAL;
1352 goto done;
1353 }
1354
1355 auio = uio_create(1, 0,
1356 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1357 UIO_WRITE);
1358 if (auio == NULL) {
1359 error = ENOMEM;
1360 goto done;
1361 }
1362 uio_addiov(auio, uap->buf, uap->len);
1363
1364 msg.msg_name = uap->to;
1365 msg.msg_namelen = uap->tolen;
1366 /* no need to set up msg_iov. sendit uses uio_t we send it */
1367 msg.msg_iov = 0;
1368 msg.msg_iovlen = 0;
1369 msg.msg_control = 0;
1370 msg.msg_flags = 0;
1371
1372 error = file_socket(uap->s, &so);
1373 if (error) {
1374 goto done;
1375 }
1376
1377 if (so == NULL) {
1378 error = EBADF;
1379 } else {
1380 error = sendit(p, so, &msg, auio, uap->flags, retval);
1381 }
1382
1383 file_drop(uap->s);
1384 done:
1385 if (auio != NULL) {
1386 uio_free(auio);
1387 }
1388
1389 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1390
1391 return error;
1392 }
1393
1394 /*
1395 * Returns: 0 Success
1396 * ENOBUFS
1397 * copyin:EFAULT
1398 * sendit:??? [see sendit definition in this file]
1399 */
1400 int
1401 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1402 {
1403 __pthread_testcancel(1);
1404 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1405 retval);
1406 }
1407
1408 int
1409 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1410 int32_t *retval)
1411 {
1412 struct user32_msghdr msg32;
1413 struct user64_msghdr msg64;
1414 struct user_msghdr user_msg;
1415 caddr_t msghdrp;
1416 int size_of_msghdr;
1417 int error;
1418 uio_t auio = NULL;
1419 struct user_iovec *iovp;
1420 struct socket *so;
1421
1422 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1423 AUDIT_ARG(fd, uap->s);
1424
1425 if (uap->flags & MSG_SKIPCFIL) {
1426 error = EPERM;
1427 goto done;
1428 }
1429
1430 if (IS_64BIT_PROCESS(p)) {
1431 msghdrp = (caddr_t)&msg64;
1432 size_of_msghdr = sizeof(msg64);
1433 } else {
1434 msghdrp = (caddr_t)&msg32;
1435 size_of_msghdr = sizeof(msg32);
1436 }
1437 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1438 if (error) {
1439 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1440 return error;
1441 }
1442
1443 if (IS_64BIT_PROCESS(p)) {
1444 user_msg.msg_flags = msg64.msg_flags;
1445 user_msg.msg_controllen = msg64.msg_controllen;
1446 user_msg.msg_control = (user_addr_t)msg64.msg_control;
1447 user_msg.msg_iovlen = msg64.msg_iovlen;
1448 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1449 user_msg.msg_namelen = msg64.msg_namelen;
1450 user_msg.msg_name = (user_addr_t)msg64.msg_name;
1451 } else {
1452 user_msg.msg_flags = msg32.msg_flags;
1453 user_msg.msg_controllen = msg32.msg_controllen;
1454 user_msg.msg_control = msg32.msg_control;
1455 user_msg.msg_iovlen = msg32.msg_iovlen;
1456 user_msg.msg_iov = msg32.msg_iov;
1457 user_msg.msg_namelen = msg32.msg_namelen;
1458 user_msg.msg_name = msg32.msg_name;
1459 }
1460
1461 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1462 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1463 0, 0, 0, 0);
1464 return EMSGSIZE;
1465 }
1466
1467 /* allocate a uio large enough to hold the number of iovecs passed */
1468 auio = uio_create(user_msg.msg_iovlen, 0,
1469 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1470 UIO_WRITE);
1471 if (auio == NULL) {
1472 error = ENOBUFS;
1473 goto done;
1474 }
1475
1476 if (user_msg.msg_iovlen) {
1477 /*
1478 * get location of iovecs within the uio.
1479 * then copyin the iovecs from user space.
1480 */
1481 iovp = uio_iovsaddr(auio);
1482 if (iovp == NULL) {
1483 error = ENOBUFS;
1484 goto done;
1485 }
1486 error = copyin_user_iovec_array(user_msg.msg_iov,
1487 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1488 user_msg.msg_iovlen, iovp);
1489 if (error) {
1490 goto done;
1491 }
1492 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1493
1494 /* finish setup of uio_t */
1495 error = uio_calculateresid(auio);
1496 if (error) {
1497 goto done;
1498 }
1499 } else {
1500 user_msg.msg_iov = 0;
1501 }
1502
1503 /* msg_flags is ignored for send */
1504 user_msg.msg_flags = 0;
1505
1506 error = file_socket(uap->s, &so);
1507 if (error) {
1508 goto done;
1509 }
1510 if (so == NULL) {
1511 error = EBADF;
1512 } else {
1513 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1514 }
1515 file_drop(uap->s);
1516 done:
1517 if (auio != NULL) {
1518 uio_free(auio);
1519 }
1520 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1521
1522 return error;
1523 }
1524
1525 int
1526 sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1527 {
1528 int error = 0;
1529 struct user_msghdr_x *user_msg_x = NULL;
1530 struct uio **uiop = NULL;
1531 struct socket *so;
1532 u_int i;
1533 struct sockaddr *to = NULL;
1534 user_ssize_t len_before = 0, len_after;
1535 int need_drop = 0;
1536 size_t size_of_msghdr;
1537 void *umsgp = NULL;
1538 u_int uiocnt;
1539 int has_addr_or_ctl = 0;
1540
1541 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1542
1543 if (uap->flags & MSG_SKIPCFIL) {
1544 error = EPERM;
1545 goto out;
1546 }
1547
1548 error = file_socket(uap->s, &so);
1549 if (error) {
1550 goto out;
1551 }
1552 need_drop = 1;
1553 if (so == NULL) {
1554 error = EBADF;
1555 goto out;
1556 }
1557
1558 /*
1559 * Input parameter range check
1560 */
1561 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1562 error = EINVAL;
1563 goto out;
1564 }
1565 /*
1566 * Clip to max currently allowed
1567 */
1568 if (uap->cnt > somaxsendmsgx) {
1569 uap->cnt = somaxsendmsgx;
1570 }
1571
1572 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
1573 M_TEMP, M_WAITOK | M_ZERO);
1574 if (user_msg_x == NULL) {
1575 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
1576 error = ENOMEM;
1577 goto out;
1578 }
1579 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1580 M_TEMP, M_WAITOK | M_ZERO);
1581 if (uiop == NULL) {
1582 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
1583 error = ENOMEM;
1584 goto out;
1585 }
1586
1587 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1588 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1589
1590 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
1591 M_TEMP, M_WAITOK | M_ZERO);
1592 if (umsgp == NULL) {
1593 printf("%s _MALLOC() user_msg_x failed\n", __func__);
1594 error = ENOMEM;
1595 goto out;
1596 }
1597 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1598 if (error) {
1599 DBG_PRINTF("%s copyin() failed\n", __func__);
1600 goto out;
1601 }
1602 error = internalize_user_msghdr_array(umsgp,
1603 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1604 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1605 if (error) {
1606 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1607 goto out;
1608 }
1609 /*
1610 * Make sure the size of each message iovec and
1611 * the aggregate size of all the iovec is valid
1612 */
1613 if (uio_array_is_valid(uiop, uap->cnt) == false) {
1614 error = EINVAL;
1615 goto out;
1616 }
1617
1618 /*
1619 * Sanity check on passed arguments
1620 */
1621 for (i = 0; i < uap->cnt; i++) {
1622 struct user_msghdr_x *mp = user_msg_x + i;
1623
1624 /*
1625 * No flags on send message
1626 */
1627 if (mp->msg_flags != 0) {
1628 error = EINVAL;
1629 goto out;
1630 }
1631 /*
1632 * No support for address or ancillary data (yet)
1633 */
1634 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1635 has_addr_or_ctl = 1;
1636 }
1637
1638 if (mp->msg_control != USER_ADDR_NULL ||
1639 mp->msg_controllen != 0) {
1640 has_addr_or_ctl = 1;
1641 }
1642
1643 #if CONFIG_MACF_SOCKET_SUBSET
1644 /*
1645 * We check the state without holding the socket lock;
1646 * if a race condition occurs, it would simply result
1647 * in an extra call to the MAC check function.
1648 *
1649 * Note: The following check is never true taken with the
1650 * current limitation that we do not accept to pass an address,
1651 * this is effectively placeholder code. If we add support for
1652 * addresses, we will have to check every address.
1653 */
1654 if (to != NULL &&
1655 !(so->so_state & SS_DEFUNCT) &&
1656 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1657 != 0) {
1658 goto out;
1659 }
1660 #endif /* MAC_SOCKET_SUBSET */
1661 }
1662
1663 len_before = uio_array_resid(uiop, uap->cnt);
1664
1665 /*
1666 * Feed list of packets at once only for connected socket without
1667 * control message
1668 */
1669 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1670 pru_sosend_list_notsupp &&
1671 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1672 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1673 uap->cnt, uap->flags);
1674 } else {
1675 for (i = 0; i < uap->cnt; i++) {
1676 struct user_msghdr_x *mp = user_msg_x + i;
1677 struct user_msghdr user_msg;
1678 uio_t auio = uiop[i];
1679 int32_t tmpval;
1680
1681 user_msg.msg_flags = mp->msg_flags;
1682 user_msg.msg_controllen = mp->msg_controllen;
1683 user_msg.msg_control = mp->msg_control;
1684 user_msg.msg_iovlen = mp->msg_iovlen;
1685 user_msg.msg_iov = mp->msg_iov;
1686 user_msg.msg_namelen = mp->msg_namelen;
1687 user_msg.msg_name = mp->msg_name;
1688
1689 error = sendit(p, so, &user_msg, auio, uap->flags,
1690 &tmpval);
1691 if (error != 0) {
1692 break;
1693 }
1694 }
1695 }
1696 len_after = uio_array_resid(uiop, uap->cnt);
1697
1698 VERIFY(len_after <= len_before);
1699
1700 if (error != 0) {
1701 if (len_after != len_before && (error == ERESTART ||
1702 error == EINTR || error == EWOULDBLOCK ||
1703 error == ENOBUFS)) {
1704 error = 0;
1705 }
1706 /* Generation of SIGPIPE can be controlled per socket */
1707 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1708 !(uap->flags & MSG_NOSIGNAL)) {
1709 psignal(p, SIGPIPE);
1710 }
1711 }
1712 if (error == 0) {
1713 uiocnt = externalize_user_msghdr_array(umsgp,
1714 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1715 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1716
1717 *retval = (int)(uiocnt);
1718 }
1719 out:
1720 if (need_drop) {
1721 file_drop(uap->s);
1722 }
1723 if (umsgp != NULL) {
1724 _FREE(umsgp, M_TEMP);
1725 }
1726 if (uiop != NULL) {
1727 free_uio_array(uiop, uap->cnt);
1728 _FREE(uiop, M_TEMP);
1729 }
1730 if (user_msg_x != NULL) {
1731 _FREE(user_msg_x, M_TEMP);
1732 }
1733
1734 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1735
1736 return error;
1737 }
1738
1739
1740 static int
1741 copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1742 {
1743 int error = 0;
1744 socklen_t sa_len = 0;
1745 ssize_t len;
1746
1747 len = *namelen;
1748 if (len <= 0 || fromsa == 0) {
1749 len = 0;
1750 } else {
1751 #ifndef MIN
1752 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1753 #endif
1754 sa_len = fromsa->sa_len;
1755 len = MIN((unsigned int)len, sa_len);
1756 error = copyout(fromsa, name, (unsigned)len);
1757 if (error) {
1758 goto out;
1759 }
1760 }
1761 *namelen = sa_len;
1762 out:
1763 return 0;
1764 }
1765
1766 static int
1767 copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1768 socklen_t *controllen, int *flags, struct socket *so)
1769 {
1770 int error = 0;
1771 socklen_t len;
1772 user_addr_t ctlbuf;
1773 struct inpcb *inp = so ? sotoinpcb(so) : NULL;
1774
1775 len = *controllen;
1776 *controllen = 0;
1777 ctlbuf = control;
1778
1779 while (m && len > 0) {
1780 socklen_t tocopy;
1781 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1782 socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
1783 socklen_t buflen = m->m_len;
1784
1785 while (buflen > 0 && len > 0) {
1786 /*
1787 * SCM_TIMESTAMP hack because struct timeval has a
1788 * different size for 32 bits and 64 bits processes
1789 */
1790 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1791 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
1792 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1793 socklen_t tmp_space;
1794 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1795
1796 tmp_cp->cmsg_level = SOL_SOCKET;
1797 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1798
1799 if (proc_is64bit(p)) {
1800 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1801
1802 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1803 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
1804
1805 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1806 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1807 } else {
1808 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1809
1810 tv32->tv_sec = (user32_time_t)tv->tv_sec;
1811 tv32->tv_usec = tv->tv_usec;
1812
1813 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1814 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1815 }
1816 if (len >= tmp_space) {
1817 tocopy = tmp_space;
1818 } else {
1819 *flags |= MSG_CTRUNC;
1820 tocopy = len;
1821 }
1822 error = copyout(tmp_buffer, ctlbuf, tocopy);
1823 if (error) {
1824 goto out;
1825 }
1826 } else {
1827 #if CONTENT_FILTER
1828 /* If socket is attached to Content Filter and socket did not request address, ignore it */
1829 if ((so != NULL) && (so->so_cfil_db != NULL) &&
1830 ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp &&
1831 !(inp->inp_flags & INP_RECVDSTADDR)) ||
1832 (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
1833 !(inp->inp_flags & IN6P_PKTINFO)))) {
1834 tocopy = 0;
1835 } else
1836 #endif
1837 {
1838 if (cp_size > buflen) {
1839 panic("cp_size > buflen, something"
1840 "wrong with alignment!");
1841 }
1842 if (len >= cp_size) {
1843 tocopy = cp_size;
1844 } else {
1845 *flags |= MSG_CTRUNC;
1846 tocopy = len;
1847 }
1848 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1849 if (error) {
1850 goto out;
1851 }
1852 }
1853 }
1854
1855 ctlbuf += tocopy;
1856 len -= tocopy;
1857
1858 buflen -= cp_size;
1859 cp = (struct cmsghdr *)(void *)
1860 ((unsigned char *) cp + cp_size);
1861 cp_size = CMSG_ALIGN(cp->cmsg_len);
1862 }
1863
1864 m = m->m_next;
1865 }
1866 *controllen = (socklen_t)(ctlbuf - control);
1867 out:
1868 return error;
1869 }
1870
1871 /*
1872 * Returns: 0 Success
1873 * ENOTSOCK
1874 * EINVAL
1875 * EBADF
1876 * EACCES Mandatory Access Control failure
1877 * copyout:EFAULT
1878 * fp_lookup:EBADF
1879 * <pru_soreceive>:ENOBUFS
1880 * <pru_soreceive>:ENOTCONN
1881 * <pru_soreceive>:EWOULDBLOCK
1882 * <pru_soreceive>:EFAULT
1883 * <pru_soreceive>:EINTR
1884 * <pru_soreceive>:EBADF
1885 * <pru_soreceive>:EINVAL
1886 * <pru_soreceive>:EMSGSIZE
1887 * <pru_soreceive>:???
1888 *
1889 * Notes: Additional return values from calls through <pru_soreceive>
1890 * depend on protocols other than TCP or AF_UNIX, which are
1891 * documented above.
1892 */
1893 static int
1894 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1895 user_addr_t namelenp, int32_t *retval)
1896 {
1897 ssize_t len;
1898 int error;
1899 struct mbuf *control = 0;
1900 struct socket *so;
1901 struct sockaddr *fromsa = 0;
1902 struct fileproc *fp;
1903
1904 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1905 if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
1906 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1907 return error;
1908 }
1909 so = fp->f_data;
1910
1911 #if CONFIG_MACF_SOCKET_SUBSET
1912 /*
1913 * We check the state without holding the socket lock;
1914 * if a race condition occurs, it would simply result
1915 * in an extra call to the MAC check function.
1916 */
1917 if (!(so->so_state & SS_DEFUNCT) &&
1918 !(so->so_state & SS_ISCONNECTED) &&
1919 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1920 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
1921 goto out1;
1922 }
1923 #endif /* MAC_SOCKET_SUBSET */
1924 if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
1925 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1926 error = EINVAL;
1927 goto out1;
1928 }
1929
1930 len = uio_resid(uiop);
1931 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1932 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1933 &mp->msg_flags);
1934 if (fromsa) {
1935 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1936 fromsa);
1937 }
1938 if (error) {
1939 if (uio_resid(uiop) != len && (error == ERESTART ||
1940 error == EINTR || error == EWOULDBLOCK)) {
1941 error = 0;
1942 }
1943 }
1944 if (error) {
1945 goto out;
1946 }
1947
1948 *retval = (int32_t)(len - uio_resid(uiop));
1949
1950 if (mp->msg_name) {
1951 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1952 if (error) {
1953 goto out;
1954 }
1955 /* return the actual, untruncated address length */
1956 if (namelenp &&
1957 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
1958 sizeof(int)))) {
1959 goto out;
1960 }
1961 }
1962
1963 if (mp->msg_control) {
1964 error = copyout_control(p, control, mp->msg_control,
1965 &mp->msg_controllen, &mp->msg_flags, so);
1966 }
1967 out:
1968 if (fromsa) {
1969 FREE(fromsa, M_SONAME);
1970 }
1971 if (control) {
1972 m_freem(control);
1973 }
1974 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1975 out1:
1976 fp_drop(p, s, fp, 0);
1977 return error;
1978 }
1979
1980 /*
1981 * Returns: 0 Success
1982 * ENOMEM
1983 * copyin:EFAULT
1984 * recvit:???
1985 * read:??? [4056224: applicable for pipes]
1986 *
1987 * Notes: The read entry point is only called as part of support for
1988 * binary backward compatability; new code should use read
1989 * instead of recv or recvfrom when attempting to read data
1990 * from pipes.
1991 *
1992 * For full documentation of the return codes from recvit, see
1993 * the block header for the recvit function.
1994 */
1995 int
1996 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
1997 {
1998 __pthread_testcancel(1);
1999 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2000 retval);
2001 }
2002
2003 int
2004 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2005 int32_t *retval)
2006 {
2007 struct user_msghdr msg;
2008 int error;
2009 uio_t auio = NULL;
2010
2011 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2012 AUDIT_ARG(fd, uap->s);
2013
2014 if (uap->fromlenaddr) {
2015 error = copyin(uap->fromlenaddr,
2016 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2017 if (error) {
2018 return error;
2019 }
2020 } else {
2021 msg.msg_namelen = 0;
2022 }
2023 msg.msg_name = uap->from;
2024 auio = uio_create(1, 0,
2025 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2026 UIO_READ);
2027 if (auio == NULL) {
2028 return ENOMEM;
2029 }
2030
2031 uio_addiov(auio, uap->buf, uap->len);
2032 /* no need to set up msg_iov. recvit uses uio_t we send it */
2033 msg.msg_iov = 0;
2034 msg.msg_iovlen = 0;
2035 msg.msg_control = 0;
2036 msg.msg_controllen = 0;
2037 msg.msg_flags = uap->flags;
2038 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2039 if (auio != NULL) {
2040 uio_free(auio);
2041 }
2042
2043 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2044
2045 return error;
2046 }
2047
2048 /*
2049 * Returns: 0 Success
2050 * EMSGSIZE
2051 * ENOMEM
2052 * copyin:EFAULT
2053 * copyout:EFAULT
2054 * recvit:???
2055 *
2056 * Notes: For full documentation of the return codes from recvit, see
2057 * the block header for the recvit function.
2058 */
2059 int
2060 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
2061 {
2062 __pthread_testcancel(1);
2063 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2064 retval);
2065 }
2066
2067 int
2068 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2069 int32_t *retval)
2070 {
2071 struct user32_msghdr msg32;
2072 struct user64_msghdr msg64;
2073 struct user_msghdr user_msg;
2074 caddr_t msghdrp;
2075 int size_of_msghdr;
2076 user_addr_t uiov;
2077 int error;
2078 uio_t auio = NULL;
2079 struct user_iovec *iovp;
2080
2081 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2082 AUDIT_ARG(fd, uap->s);
2083 if (IS_64BIT_PROCESS(p)) {
2084 msghdrp = (caddr_t)&msg64;
2085 size_of_msghdr = sizeof(msg64);
2086 } else {
2087 msghdrp = (caddr_t)&msg32;
2088 size_of_msghdr = sizeof(msg32);
2089 }
2090 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2091 if (error) {
2092 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2093 return error;
2094 }
2095
2096 /* only need to copy if user process is not 64-bit */
2097 if (IS_64BIT_PROCESS(p)) {
2098 user_msg.msg_flags = msg64.msg_flags;
2099 user_msg.msg_controllen = msg64.msg_controllen;
2100 user_msg.msg_control = (user_addr_t)msg64.msg_control;
2101 user_msg.msg_iovlen = msg64.msg_iovlen;
2102 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2103 user_msg.msg_namelen = msg64.msg_namelen;
2104 user_msg.msg_name = (user_addr_t)msg64.msg_name;
2105 } else {
2106 user_msg.msg_flags = msg32.msg_flags;
2107 user_msg.msg_controllen = msg32.msg_controllen;
2108 user_msg.msg_control = msg32.msg_control;
2109 user_msg.msg_iovlen = msg32.msg_iovlen;
2110 user_msg.msg_iov = msg32.msg_iov;
2111 user_msg.msg_namelen = msg32.msg_namelen;
2112 user_msg.msg_name = msg32.msg_name;
2113 }
2114
2115 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2116 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2117 0, 0, 0, 0);
2118 return EMSGSIZE;
2119 }
2120
2121 user_msg.msg_flags = uap->flags;
2122
2123 /* allocate a uio large enough to hold the number of iovecs passed */
2124 auio = uio_create(user_msg.msg_iovlen, 0,
2125 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2126 UIO_READ);
2127 if (auio == NULL) {
2128 error = ENOMEM;
2129 goto done;
2130 }
2131
2132 /*
2133 * get location of iovecs within the uio. then copyin the iovecs from
2134 * user space.
2135 */
2136 iovp = uio_iovsaddr(auio);
2137 if (iovp == NULL) {
2138 error = ENOMEM;
2139 goto done;
2140 }
2141 uiov = user_msg.msg_iov;
2142 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2143 error = copyin_user_iovec_array(uiov,
2144 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2145 user_msg.msg_iovlen, iovp);
2146 if (error) {
2147 goto done;
2148 }
2149
2150 /* finish setup of uio_t */
2151 error = uio_calculateresid(auio);
2152 if (error) {
2153 goto done;
2154 }
2155
2156 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2157 if (!error) {
2158 user_msg.msg_iov = uiov;
2159 if (IS_64BIT_PROCESS(p)) {
2160 msg64.msg_flags = user_msg.msg_flags;
2161 msg64.msg_controllen = user_msg.msg_controllen;
2162 msg64.msg_control = user_msg.msg_control;
2163 msg64.msg_iovlen = user_msg.msg_iovlen;
2164 msg64.msg_iov = user_msg.msg_iov;
2165 msg64.msg_namelen = user_msg.msg_namelen;
2166 msg64.msg_name = user_msg.msg_name;
2167 } else {
2168 msg32.msg_flags = user_msg.msg_flags;
2169 msg32.msg_controllen = user_msg.msg_controllen;
2170 msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2171 msg32.msg_iovlen = user_msg.msg_iovlen;
2172 msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2173 msg32.msg_namelen = user_msg.msg_namelen;
2174 msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2175 }
2176 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2177 }
2178 done:
2179 if (auio != NULL) {
2180 uio_free(auio);
2181 }
2182 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2183 return error;
2184 }
2185
2186 int
2187 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2188 {
2189 int error = EOPNOTSUPP;
2190 struct user_msghdr_x *user_msg_x = NULL;
2191 struct recv_msg_elem *recv_msg_array = NULL;
2192 struct socket *so;
2193 user_ssize_t len_before = 0, len_after;
2194 int need_drop = 0;
2195 size_t size_of_msghdr;
2196 void *umsgp = NULL;
2197 u_int i;
2198 u_int uiocnt;
2199
2200 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2201
2202 error = file_socket(uap->s, &so);
2203 if (error) {
2204 goto out;
2205 }
2206 need_drop = 1;
2207 if (so == NULL) {
2208 error = EBADF;
2209 goto out;
2210 }
2211 /*
2212 * Input parameter range check
2213 */
2214 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2215 error = EINVAL;
2216 goto out;
2217 }
2218 if (uap->cnt > somaxrecvmsgx) {
2219 uap->cnt = somaxrecvmsgx;
2220 }
2221
2222 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
2223 M_TEMP, M_WAITOK | M_ZERO);
2224 if (user_msg_x == NULL) {
2225 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
2226 error = ENOMEM;
2227 goto out;
2228 }
2229 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2230 if (recv_msg_array == NULL) {
2231 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2232 error = ENOMEM;
2233 goto out;
2234 }
2235 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2236 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2237
2238 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2239 if (umsgp == NULL) {
2240 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
2241 error = ENOMEM;
2242 goto out;
2243 }
2244 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2245 if (error) {
2246 DBG_PRINTF("%s copyin() failed\n", __func__);
2247 goto out;
2248 }
2249 error = internalize_recv_msghdr_array(umsgp,
2250 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2251 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2252 if (error) {
2253 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2254 goto out;
2255 }
2256 /*
2257 * Make sure the size of each message iovec and
2258 * the aggregate size of all the iovec is valid
2259 */
2260 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2261 error = EINVAL;
2262 goto out;
2263 }
2264 /*
2265 * Sanity check on passed arguments
2266 */
2267 for (i = 0; i < uap->cnt; i++) {
2268 struct user_msghdr_x *mp = user_msg_x + i;
2269
2270 if (mp->msg_flags != 0) {
2271 error = EINVAL;
2272 goto out;
2273 }
2274 }
2275 #if CONFIG_MACF_SOCKET_SUBSET
2276 /*
2277 * We check the state without holding the socket lock;
2278 * if a race condition occurs, it would simply result
2279 * in an extra call to the MAC check function.
2280 */
2281 if (!(so->so_state & SS_DEFUNCT) &&
2282 !(so->so_state & SS_ISCONNECTED) &&
2283 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2284 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2285 goto out;
2286 }
2287 #endif /* MAC_SOCKET_SUBSET */
2288
2289 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2290
2291 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2292 pru_soreceive_list_notsupp &&
2293 somaxrecvmsgx == 0) {
2294 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2295 recv_msg_array, uap->cnt, &uap->flags);
2296 } else {
2297 int flags = uap->flags;
2298
2299 for (i = 0; i < uap->cnt; i++) {
2300 struct recv_msg_elem *recv_msg_elem;
2301 uio_t auio;
2302 struct sockaddr **psa;
2303 struct mbuf **controlp;
2304
2305 recv_msg_elem = recv_msg_array + i;
2306 auio = recv_msg_elem->uio;
2307
2308 /*
2309 * Do not block if we got at least one packet
2310 */
2311 if (i > 0) {
2312 flags |= MSG_DONTWAIT;
2313 }
2314
2315 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2316 &recv_msg_elem->psa : NULL;
2317 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2318 &recv_msg_elem->controlp : NULL;
2319
2320 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2321 auio, (struct mbuf **)0, controlp, &flags);
2322 if (error) {
2323 break;
2324 }
2325 /*
2326 * We have some data
2327 */
2328 recv_msg_elem->which |= SOCK_MSG_DATA;
2329 /*
2330 * Stop on partial copy
2331 */
2332 if (flags & (MSG_RCVMORE | MSG_TRUNC)) {
2333 break;
2334 }
2335 }
2336 if ((uap->flags & MSG_DONTWAIT) == 0) {
2337 flags &= ~MSG_DONTWAIT;
2338 }
2339 uap->flags = flags;
2340 }
2341
2342 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2343
2344 if (error) {
2345 if (len_after != len_before && (error == ERESTART ||
2346 error == EINTR || error == EWOULDBLOCK)) {
2347 error = 0;
2348 } else {
2349 goto out;
2350 }
2351 }
2352
2353 uiocnt = externalize_recv_msghdr_array(umsgp,
2354 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2355 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2356
2357 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2358 if (error) {
2359 DBG_PRINTF("%s copyout() failed\n", __func__);
2360 goto out;
2361 }
2362 *retval = (int)(uiocnt);
2363
2364 for (i = 0; i < uap->cnt; i++) {
2365 struct user_msghdr_x *mp = user_msg_x + i;
2366 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2367 struct sockaddr *fromsa = recv_msg_elem->psa;
2368
2369 if (mp->msg_name) {
2370 error = copyout_sa(fromsa, mp->msg_name,
2371 &mp->msg_namelen);
2372 if (error) {
2373 goto out;
2374 }
2375 }
2376 if (mp->msg_control) {
2377 error = copyout_control(p, recv_msg_elem->controlp,
2378 mp->msg_control, &mp->msg_controllen,
2379 &mp->msg_flags, so);
2380 if (error) {
2381 goto out;
2382 }
2383 }
2384 }
2385 out:
2386 if (need_drop) {
2387 file_drop(uap->s);
2388 }
2389 if (umsgp != NULL) {
2390 _FREE(umsgp, M_TEMP);
2391 }
2392 if (recv_msg_array != NULL) {
2393 free_recv_msg_array(recv_msg_array, uap->cnt);
2394 }
2395 if (user_msg_x != NULL) {
2396 _FREE(user_msg_x, M_TEMP);
2397 }
2398
2399 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2400
2401 return error;
2402 }
2403
2404 /*
2405 * Returns: 0 Success
2406 * EBADF
2407 * file_socket:ENOTSOCK
2408 * file_socket:EBADF
2409 * soshutdown:EINVAL
2410 * soshutdown:ENOTCONN
2411 * soshutdown:EADDRNOTAVAIL[TCP]
2412 * soshutdown:ENOBUFS[TCP]
2413 * soshutdown:EMSGSIZE[TCP]
2414 * soshutdown:EHOSTUNREACH[TCP]
2415 * soshutdown:ENETUNREACH[TCP]
2416 * soshutdown:ENETDOWN[TCP]
2417 * soshutdown:ENOMEM[TCP]
2418 * soshutdown:EACCES[TCP]
2419 * soshutdown:EMSGSIZE[TCP]
2420 * soshutdown:ENOBUFS[TCP]
2421 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2422 * soshutdown:??? [other protocol families]
2423 */
2424 /* ARGSUSED */
2425 int
2426 shutdown(__unused struct proc *p, struct shutdown_args *uap,
2427 __unused int32_t *retval)
2428 {
2429 struct socket *so;
2430 int error;
2431
2432 AUDIT_ARG(fd, uap->s);
2433 error = file_socket(uap->s, &so);
2434 if (error) {
2435 return error;
2436 }
2437 if (so == NULL) {
2438 error = EBADF;
2439 goto out;
2440 }
2441 error = soshutdown((struct socket *)so, uap->how);
2442 out:
2443 file_drop(uap->s);
2444 return error;
2445 }
2446
2447 /*
2448 * Returns: 0 Success
2449 * EFAULT
2450 * EINVAL
2451 * EACCES Mandatory Access Control failure
2452 * file_socket:ENOTSOCK
2453 * file_socket:EBADF
2454 * sosetopt:EINVAL
2455 * sosetopt:ENOPROTOOPT
2456 * sosetopt:ENOBUFS
2457 * sosetopt:EDOM
2458 * sosetopt:EFAULT
2459 * sosetopt:EOPNOTSUPP[AF_UNIX]
2460 * sosetopt:???
2461 */
2462 /* ARGSUSED */
2463 int
2464 setsockopt(struct proc *p, struct setsockopt_args *uap,
2465 __unused int32_t *retval)
2466 {
2467 struct socket *so;
2468 struct sockopt sopt;
2469 int error;
2470
2471 AUDIT_ARG(fd, uap->s);
2472 if (uap->val == 0 && uap->valsize != 0) {
2473 return EFAULT;
2474 }
2475 /* No bounds checking on size (it's unsigned) */
2476
2477 error = file_socket(uap->s, &so);
2478 if (error) {
2479 return error;
2480 }
2481
2482 sopt.sopt_dir = SOPT_SET;
2483 sopt.sopt_level = uap->level;
2484 sopt.sopt_name = uap->name;
2485 sopt.sopt_val = uap->val;
2486 sopt.sopt_valsize = uap->valsize;
2487 sopt.sopt_p = p;
2488
2489 if (so == NULL) {
2490 error = EINVAL;
2491 goto out;
2492 }
2493 #if CONFIG_MACF_SOCKET_SUBSET
2494 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2495 &sopt)) != 0) {
2496 goto out;
2497 }
2498 #endif /* MAC_SOCKET_SUBSET */
2499 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
2500 out:
2501 file_drop(uap->s);
2502 return error;
2503 }
2504
2505
2506
2507 /*
2508 * Returns: 0 Success
2509 * EINVAL
2510 * EBADF
2511 * EACCES Mandatory Access Control failure
2512 * copyin:EFAULT
2513 * copyout:EFAULT
2514 * file_socket:ENOTSOCK
2515 * file_socket:EBADF
2516 * sogetopt:???
2517 */
2518 int
2519 getsockopt(struct proc *p, struct getsockopt_args *uap,
2520 __unused int32_t *retval)
2521 {
2522 int error;
2523 socklen_t valsize;
2524 struct sockopt sopt;
2525 struct socket *so;
2526
2527 error = file_socket(uap->s, &so);
2528 if (error) {
2529 return error;
2530 }
2531 if (uap->val) {
2532 error = copyin(uap->avalsize, (caddr_t)&valsize,
2533 sizeof(valsize));
2534 if (error) {
2535 goto out;
2536 }
2537 /* No bounds checking on size (it's unsigned) */
2538 } else {
2539 valsize = 0;
2540 }
2541 sopt.sopt_dir = SOPT_GET;
2542 sopt.sopt_level = uap->level;
2543 sopt.sopt_name = uap->name;
2544 sopt.sopt_val = uap->val;
2545 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2546 sopt.sopt_p = p;
2547
2548 if (so == NULL) {
2549 error = EBADF;
2550 goto out;
2551 }
2552 #if CONFIG_MACF_SOCKET_SUBSET
2553 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2554 &sopt)) != 0) {
2555 goto out;
2556 }
2557 #endif /* MAC_SOCKET_SUBSET */
2558 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
2559 if (error == 0) {
2560 valsize = (socklen_t)sopt.sopt_valsize;
2561 error = copyout((caddr_t)&valsize, uap->avalsize,
2562 sizeof(valsize));
2563 }
2564 out:
2565 file_drop(uap->s);
2566 return error;
2567 }
2568
2569
2570 /*
2571 * Get socket name.
2572 *
2573 * Returns: 0 Success
2574 * EBADF
2575 * file_socket:ENOTSOCK
2576 * file_socket:EBADF
2577 * copyin:EFAULT
2578 * copyout:EFAULT
2579 * <pru_sockaddr>:ENOBUFS[TCP]
2580 * <pru_sockaddr>:ECONNRESET[TCP]
2581 * <pru_sockaddr>:EINVAL[AF_UNIX]
2582 * <sf_getsockname>:???
2583 */
2584 /* ARGSUSED */
2585 int
2586 getsockname(__unused struct proc *p, struct getsockname_args *uap,
2587 __unused int32_t *retval)
2588 {
2589 struct socket *so;
2590 struct sockaddr *sa;
2591 socklen_t len;
2592 socklen_t sa_len;
2593 int error;
2594
2595 error = file_socket(uap->fdes, &so);
2596 if (error) {
2597 return error;
2598 }
2599 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2600 if (error) {
2601 goto out;
2602 }
2603 if (so == NULL) {
2604 error = EBADF;
2605 goto out;
2606 }
2607 sa = 0;
2608 socket_lock(so, 1);
2609 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2610 if (error == 0) {
2611 error = sflt_getsockname(so, &sa);
2612 if (error == EJUSTRETURN) {
2613 error = 0;
2614 }
2615 }
2616 socket_unlock(so, 1);
2617 if (error) {
2618 goto bad;
2619 }
2620 if (sa == 0) {
2621 len = 0;
2622 goto gotnothing;
2623 }
2624
2625 sa_len = sa->sa_len;
2626 len = MIN(len, sa_len);
2627 error = copyout((caddr_t)sa, uap->asa, len);
2628 if (error) {
2629 goto bad;
2630 }
2631 /* return the actual, untruncated address length */
2632 len = sa_len;
2633 gotnothing:
2634 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2635 bad:
2636 if (sa) {
2637 FREE(sa, M_SONAME);
2638 }
2639 out:
2640 file_drop(uap->fdes);
2641 return error;
2642 }
2643
2644 /*
2645 * Get name of peer for connected socket.
2646 *
2647 * Returns: 0 Success
2648 * EBADF
2649 * EINVAL
2650 * ENOTCONN
2651 * file_socket:ENOTSOCK
2652 * file_socket:EBADF
2653 * copyin:EFAULT
2654 * copyout:EFAULT
2655 * <pru_peeraddr>:???
2656 * <sf_getpeername>:???
2657 */
2658 /* ARGSUSED */
2659 int
2660 getpeername(__unused struct proc *p, struct getpeername_args *uap,
2661 __unused int32_t *retval)
2662 {
2663 struct socket *so;
2664 struct sockaddr *sa;
2665 socklen_t len;
2666 socklen_t sa_len;
2667 int error;
2668
2669 error = file_socket(uap->fdes, &so);
2670 if (error) {
2671 return error;
2672 }
2673 if (so == NULL) {
2674 error = EBADF;
2675 goto out;
2676 }
2677
2678 socket_lock(so, 1);
2679
2680 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2681 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2682 /* the socket has been shutdown, no more getpeername's */
2683 socket_unlock(so, 1);
2684 error = EINVAL;
2685 goto out;
2686 }
2687
2688 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
2689 socket_unlock(so, 1);
2690 error = ENOTCONN;
2691 goto out;
2692 }
2693 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2694 if (error) {
2695 socket_unlock(so, 1);
2696 goto out;
2697 }
2698 sa = 0;
2699 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2700 if (error == 0) {
2701 error = sflt_getpeername(so, &sa);
2702 if (error == EJUSTRETURN) {
2703 error = 0;
2704 }
2705 }
2706 socket_unlock(so, 1);
2707 if (error) {
2708 goto bad;
2709 }
2710 if (sa == 0) {
2711 len = 0;
2712 goto gotnothing;
2713 }
2714 sa_len = sa->sa_len;
2715 len = MIN(len, sa_len);
2716 error = copyout(sa, uap->asa, len);
2717 if (error) {
2718 goto bad;
2719 }
2720 /* return the actual, untruncated address length */
2721 len = sa_len;
2722 gotnothing:
2723 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2724 bad:
2725 if (sa) {
2726 FREE(sa, M_SONAME);
2727 }
2728 out:
2729 file_drop(uap->fdes);
2730 return error;
2731 }
2732
2733 int
2734 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
2735 {
2736 struct sockaddr *sa;
2737 struct mbuf *m;
2738 int error;
2739 socklen_t alloc_buflen = buflen;
2740
2741 if (buflen > INT_MAX / 2) {
2742 return EINVAL;
2743 }
2744 if (type == MT_SONAME && buflen > SOCK_MAXADDRLEN) {
2745 return EINVAL;
2746 }
2747
2748 #ifdef __LP64__
2749 /*
2750 * The fd's in the buffer must expand to be pointers, thus we need twice
2751 * as much space
2752 */
2753 if (type == MT_CONTROL) {
2754 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
2755 sizeof(struct cmsghdr);
2756 }
2757 #endif
2758 if (alloc_buflen > MLEN) {
2759 if (type == MT_SONAME && alloc_buflen <= 112) {
2760 alloc_buflen = MLEN; /* unix domain compat. hack */
2761 } else if (alloc_buflen > MCLBYTES) {
2762 return EINVAL;
2763 }
2764 }
2765 m = m_get(M_WAIT, type);
2766 if (m == NULL) {
2767 return ENOBUFS;
2768 }
2769 if (alloc_buflen > MLEN) {
2770 MCLGET(m, M_WAIT);
2771 if ((m->m_flags & M_EXT) == 0) {
2772 m_free(m);
2773 return ENOBUFS;
2774 }
2775 }
2776 /*
2777 * K64: We still copyin the original buflen because it gets expanded
2778 * later and we lie about the size of the mbuf because it only affects
2779 * unp_* functions
2780 */
2781 m->m_len = buflen;
2782 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2783 if (error) {
2784 (void) m_free(m);
2785 } else {
2786 *mp = m;
2787 if (type == MT_SONAME) {
2788 sa = mtod(m, struct sockaddr *);
2789 VERIFY(buflen <= SOCK_MAXADDRLEN);
2790 sa->sa_len = (__uint8_t)buflen;
2791 }
2792 }
2793 return error;
2794 }
2795
2796 /*
2797 * Given a user_addr_t of length len, allocate and fill out a *sa.
2798 *
2799 * Returns: 0 Success
2800 * ENAMETOOLONG Filename too long
2801 * EINVAL Invalid argument
2802 * ENOMEM Not enough space
2803 * copyin:EFAULT Bad address
2804 */
2805 static int
2806 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
2807 size_t len, boolean_t translate_unspec)
2808 {
2809 struct sockaddr *sa;
2810 int error;
2811
2812 if (len > SOCK_MAXADDRLEN) {
2813 return ENAMETOOLONG;
2814 }
2815
2816 if (len < offsetof(struct sockaddr, sa_data[0])) {
2817 return EINVAL;
2818 }
2819
2820 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
2821 if (sa == NULL) {
2822 return ENOMEM;
2823 }
2824 error = copyin(uaddr, (caddr_t)sa, len);
2825 if (error) {
2826 FREE(sa, M_SONAME);
2827 } else {
2828 /*
2829 * Force sa_family to AF_INET on AF_INET sockets to handle
2830 * legacy applications that use AF_UNSPEC (0). On all other
2831 * sockets we leave it unchanged and let the lower layer
2832 * handle it.
2833 */
2834 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2835 SOCK_CHECK_DOM(so, PF_INET) &&
2836 len == sizeof(struct sockaddr_in)) {
2837 sa->sa_family = AF_INET;
2838 }
2839 VERIFY(len <= SOCK_MAXADDRLEN);
2840 sa->sa_len = (__uint8_t)len;
2841 *namp = sa;
2842 }
2843 return error;
2844 }
2845
2846 static int
2847 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
2848 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2849 {
2850 int error;
2851
2852 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2853 len < offsetof(struct sockaddr, sa_data[0])) {
2854 return EINVAL;
2855 }
2856
2857 /*
2858 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2859 * so the check here is inclusive.
2860 */
2861 if (len > sizeof(*ss)) {
2862 return ENAMETOOLONG;
2863 }
2864
2865 bzero(ss, sizeof(*ss));
2866 error = copyin(uaddr, (caddr_t)ss, len);
2867 if (error == 0) {
2868 /*
2869 * Force sa_family to AF_INET on AF_INET sockets to handle
2870 * legacy applications that use AF_UNSPEC (0). On all other
2871 * sockets we leave it unchanged and let the lower layer
2872 * handle it.
2873 */
2874 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2875 SOCK_CHECK_DOM(so, PF_INET) &&
2876 len == sizeof(struct sockaddr_in)) {
2877 ss->ss_family = AF_INET;
2878 }
2879
2880 ss->ss_len = (__uint8_t)len;
2881 }
2882 return error;
2883 }
2884
2885 int
2886 internalize_user_msghdr_array(const void *src, int spacetype, int direction,
2887 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
2888 {
2889 int error = 0;
2890 u_int i;
2891 u_int namecnt = 0;
2892 u_int ctlcnt = 0;
2893
2894 for (i = 0; i < count; i++) {
2895 uio_t auio;
2896 struct user_iovec *iovp;
2897 struct user_msghdr_x *user_msg = dst + i;
2898
2899 if (spacetype == UIO_USERSPACE64) {
2900 const struct user64_msghdr_x *msghdr64;
2901
2902 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2903
2904 user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
2905 user_msg->msg_namelen = msghdr64->msg_namelen;
2906 user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
2907 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2908 user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
2909 user_msg->msg_controllen = msghdr64->msg_controllen;
2910 user_msg->msg_flags = msghdr64->msg_flags;
2911 user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
2912 } else {
2913 const struct user32_msghdr_x *msghdr32;
2914
2915 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2916
2917 user_msg->msg_name = msghdr32->msg_name;
2918 user_msg->msg_namelen = msghdr32->msg_namelen;
2919 user_msg->msg_iov = msghdr32->msg_iov;
2920 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2921 user_msg->msg_control = msghdr32->msg_control;
2922 user_msg->msg_controllen = msghdr32->msg_controllen;
2923 user_msg->msg_flags = msghdr32->msg_flags;
2924 user_msg->msg_datalen = msghdr32->msg_datalen;
2925 }
2926
2927 if (user_msg->msg_iovlen <= 0 ||
2928 user_msg->msg_iovlen > UIO_MAXIOV) {
2929 error = EMSGSIZE;
2930 goto done;
2931 }
2932 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2933 direction);
2934 if (auio == NULL) {
2935 error = ENOMEM;
2936 goto done;
2937 }
2938 uiop[i] = auio;
2939
2940 iovp = uio_iovsaddr(auio);
2941 if (iovp == NULL) {
2942 error = ENOMEM;
2943 goto done;
2944 }
2945 error = copyin_user_iovec_array(user_msg->msg_iov,
2946 spacetype, user_msg->msg_iovlen, iovp);
2947 if (error) {
2948 goto done;
2949 }
2950 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2951
2952 error = uio_calculateresid(auio);
2953 if (error) {
2954 goto done;
2955 }
2956 user_msg->msg_datalen = uio_resid(auio);
2957
2958 if (user_msg->msg_name && user_msg->msg_namelen) {
2959 namecnt++;
2960 }
2961 if (user_msg->msg_control && user_msg->msg_controllen) {
2962 ctlcnt++;
2963 }
2964 }
2965 done:
2966
2967 return error;
2968 }
2969
2970 int
2971 internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2972 u_int count, struct user_msghdr_x *dst,
2973 struct recv_msg_elem *recv_msg_array)
2974 {
2975 int error = 0;
2976 u_int i;
2977
2978 for (i = 0; i < count; i++) {
2979 struct user_iovec *iovp;
2980 struct user_msghdr_x *user_msg = dst + i;
2981 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2982
2983 if (spacetype == UIO_USERSPACE64) {
2984 const struct user64_msghdr_x *msghdr64;
2985
2986 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2987
2988 user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
2989 user_msg->msg_namelen = msghdr64->msg_namelen;
2990 user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
2991 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2992 user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
2993 user_msg->msg_controllen = msghdr64->msg_controllen;
2994 user_msg->msg_flags = msghdr64->msg_flags;
2995 user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
2996 } else {
2997 const struct user32_msghdr_x *msghdr32;
2998
2999 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3000
3001 user_msg->msg_name = msghdr32->msg_name;
3002 user_msg->msg_namelen = msghdr32->msg_namelen;
3003 user_msg->msg_iov = msghdr32->msg_iov;
3004 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3005 user_msg->msg_control = msghdr32->msg_control;
3006 user_msg->msg_controllen = msghdr32->msg_controllen;
3007 user_msg->msg_flags = msghdr32->msg_flags;
3008 user_msg->msg_datalen = msghdr32->msg_datalen;
3009 }
3010
3011 if (user_msg->msg_iovlen <= 0 ||
3012 user_msg->msg_iovlen > UIO_MAXIOV) {
3013 error = EMSGSIZE;
3014 goto done;
3015 }
3016 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3017 spacetype, direction);
3018 if (recv_msg_elem->uio == NULL) {
3019 error = ENOMEM;
3020 goto done;
3021 }
3022
3023 iovp = uio_iovsaddr(recv_msg_elem->uio);
3024 if (iovp == NULL) {
3025 error = ENOMEM;
3026 goto done;
3027 }
3028 error = copyin_user_iovec_array(user_msg->msg_iov,
3029 spacetype, user_msg->msg_iovlen, iovp);
3030 if (error) {
3031 goto done;
3032 }
3033 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3034
3035 error = uio_calculateresid(recv_msg_elem->uio);
3036 if (error) {
3037 goto done;
3038 }
3039 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3040
3041 if (user_msg->msg_name && user_msg->msg_namelen) {
3042 recv_msg_elem->which |= SOCK_MSG_SA;
3043 }
3044 if (user_msg->msg_control && user_msg->msg_controllen) {
3045 recv_msg_elem->which |= SOCK_MSG_CONTROL;
3046 }
3047 }
3048 done:
3049
3050 return error;
3051 }
3052
3053 u_int
3054 externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3055 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
3056 {
3057 #pragma unused(direction)
3058 u_int i;
3059 int seenlast = 0;
3060 u_int retcnt = 0;
3061
3062 for (i = 0; i < count; i++) {
3063 const struct user_msghdr_x *user_msg = src + i;
3064 uio_t auio = uiop[i];
3065 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3066
3067 if (user_msg->msg_datalen != 0 && len == 0) {
3068 seenlast = 1;
3069 }
3070
3071 if (seenlast == 0) {
3072 retcnt++;
3073 }
3074
3075 if (spacetype == UIO_USERSPACE64) {
3076 struct user64_msghdr_x *msghdr64;
3077
3078 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3079
3080 msghdr64->msg_flags = user_msg->msg_flags;
3081 msghdr64->msg_datalen = len;
3082 } else {
3083 struct user32_msghdr_x *msghdr32;
3084
3085 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3086
3087 msghdr32->msg_flags = user_msg->msg_flags;
3088 msghdr32->msg_datalen = (user32_size_t)len;
3089 }
3090 }
3091 return retcnt;
3092 }
3093
3094 u_int
3095 externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
3096 u_int count, const struct user_msghdr_x *src,
3097 struct recv_msg_elem *recv_msg_array)
3098 {
3099 u_int i;
3100 int seenlast = 0;
3101 u_int retcnt = 0;
3102
3103 for (i = 0; i < count; i++) {
3104 const struct user_msghdr_x *user_msg = src + i;
3105 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3106 user_ssize_t len;
3107
3108 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3109
3110 if (direction == UIO_READ) {
3111 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0) {
3112 seenlast = 1;
3113 }
3114 } else {
3115 if (user_msg->msg_datalen != 0 && len == 0) {
3116 seenlast = 1;
3117 }
3118 }
3119
3120 if (seenlast == 0) {
3121 retcnt++;
3122 }
3123
3124 if (spacetype == UIO_USERSPACE64) {
3125 struct user64_msghdr_x *msghdr64;
3126
3127 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3128
3129 msghdr64->msg_flags = user_msg->msg_flags;
3130 msghdr64->msg_datalen = len;
3131 } else {
3132 struct user32_msghdr_x *msghdr32;
3133
3134 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3135
3136 msghdr32->msg_flags = user_msg->msg_flags;
3137 msghdr32->msg_datalen = (user32_size_t)len;
3138 }
3139 }
3140 return retcnt;
3141 }
3142
3143 void
3144 free_uio_array(struct uio **uiop, u_int count)
3145 {
3146 u_int i;
3147
3148 for (i = 0; i < count; i++) {
3149 if (uiop[i] != NULL) {
3150 uio_free(uiop[i]);
3151 }
3152 }
3153 }
3154
3155 __private_extern__ user_ssize_t
3156 uio_array_resid(struct uio **uiop, u_int count)
3157 {
3158 user_ssize_t len = 0;
3159 u_int i;
3160
3161 for (i = 0; i < count; i++) {
3162 struct uio *auio = uiop[i];
3163
3164 if (auio != NULL) {
3165 len += uio_resid(auio);
3166 }
3167 }
3168 return len;
3169 }
3170
3171 static boolean_t
3172 uio_array_is_valid(struct uio **uiop, u_int count)
3173 {
3174 user_ssize_t len = 0;
3175 u_int i;
3176
3177 for (i = 0; i < count; i++) {
3178 struct uio *auio = uiop[i];
3179
3180 if (auio != NULL) {
3181 user_ssize_t resid = uio_resid(auio);
3182
3183 /*
3184 * Sanity check on the validity of the iovec:
3185 * no point of going over sb_max
3186 */
3187 if (resid < 0 || resid > (user_ssize_t)sb_max) {
3188 return false;
3189 }
3190
3191 len += resid;
3192 if (len < 0 || len > (user_ssize_t)sb_max) {
3193 return false;
3194 }
3195 }
3196 }
3197 return true;
3198 }
3199
3200
3201 struct recv_msg_elem *
3202 alloc_recv_msg_array(u_int count)
3203 {
3204 struct recv_msg_elem *recv_msg_array;
3205
3206 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3207 M_TEMP, M_WAITOK | M_ZERO);
3208
3209 return recv_msg_array;
3210 }
3211
3212 void
3213 free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3214 {
3215 u_int i;
3216
3217 for (i = 0; i < count; i++) {
3218 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3219
3220 if (recv_msg_elem->uio != NULL) {
3221 uio_free(recv_msg_elem->uio);
3222 }
3223 if (recv_msg_elem->psa != NULL) {
3224 _FREE(recv_msg_elem->psa, M_TEMP);
3225 }
3226 if (recv_msg_elem->controlp != NULL) {
3227 m_freem(recv_msg_elem->controlp);
3228 }
3229 }
3230 _FREE(recv_msg_array, M_TEMP);
3231 }
3232
3233
3234 __private_extern__ user_ssize_t
3235 recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3236 {
3237 user_ssize_t len = 0;
3238 u_int i;
3239
3240 for (i = 0; i < count; i++) {
3241 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3242
3243 if (recv_msg_elem->uio != NULL) {
3244 len += uio_resid(recv_msg_elem->uio);
3245 }
3246 }
3247 return len;
3248 }
3249
3250 int
3251 recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3252 {
3253 user_ssize_t len = 0;
3254 u_int i;
3255
3256 for (i = 0; i < count; i++) {
3257 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3258
3259 if (recv_msg_elem->uio != NULL) {
3260 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3261
3262 /*
3263 * Sanity check on the validity of the iovec:
3264 * no point of going over sb_max
3265 */
3266 if (resid < 0 || (u_int32_t)resid > sb_max) {
3267 return 0;
3268 }
3269
3270 len += resid;
3271 if (len < 0 || (u_int32_t)len > sb_max) {
3272 return 0;
3273 }
3274 }
3275 }
3276 return 1;
3277 }
3278
3279 #if SENDFILE
3280
3281 #define SFUIOBUFS 64
3282
3283 /* Macros to compute the number of mbufs needed depending on cluster size */
3284 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3285 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3286
3287 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3288 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3289
3290 /* Upper send limit in the number of mbuf clusters */
3291 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3292 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3293
3294 static void
3295 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3296 struct mbuf **m, boolean_t jumbocl)
3297 {
3298 unsigned int needed;
3299
3300 if (pktlen == 0) {
3301 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
3302 }
3303
3304 /*
3305 * Try to allocate for the whole thing. Since we want full control
3306 * over the buffer size and be able to accept partial result, we can't
3307 * use mbuf_allocpacket(). The logic below is similar to sosend().
3308 */
3309 *m = NULL;
3310 if (pktlen > MBIGCLBYTES && jumbocl) {
3311 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3312 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3313 }
3314 if (*m == NULL) {
3315 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3316 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3317 }
3318
3319 /*
3320 * Our previous attempt(s) at allocation had failed; the system
3321 * may be short on mbufs, and we want to block until they are
3322 * available. This time, ask just for 1 mbuf and don't return
3323 * until we get it.
3324 */
3325 if (*m == NULL) {
3326 needed = 1;
3327 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3328 }
3329 if (*m == NULL) {
3330 panic("%s: blocking allocation returned NULL\n", __func__);
3331 }
3332
3333 *maxchunks = needed;
3334 }
3335
3336 /*
3337 * sendfile(2).
3338 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3339 * struct sf_hdtr *hdtr, int flags)
3340 *
3341 * Send a file specified by 'fd' and starting at 'offset' to a socket
3342 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3343 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3344 * output. If specified, write the total number of bytes sent into *nbytes.
3345 */
3346 int
3347 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
3348 {
3349 struct fileproc *fp;
3350 struct vnode *vp;
3351 struct socket *so;
3352 struct writev_nocancel_args nuap;
3353 user_ssize_t writev_retval;
3354 struct user_sf_hdtr user_hdtr;
3355 struct user32_sf_hdtr user32_hdtr;
3356 struct user64_sf_hdtr user64_hdtr;
3357 off_t off, xfsize;
3358 off_t nbytes = 0, sbytes = 0;
3359 int error = 0;
3360 size_t sizeof_hdtr;
3361 off_t file_size;
3362 struct vfs_context context = *vfs_context_current();
3363
3364 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3365 0, 0, 0, 0);
3366
3367 AUDIT_ARG(fd, uap->fd);
3368 AUDIT_ARG(value32, uap->s);
3369
3370 /*
3371 * Do argument checking. Must be a regular file in, stream
3372 * type and connected socket out, positive offset.
3373 */
3374 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3375 goto done;
3376 }
3377 if ((fp->f_flag & FREAD) == 0) {
3378 error = EBADF;
3379 goto done1;
3380 }
3381 if (vnode_isreg(vp) == 0) {
3382 error = ENOTSUP;
3383 goto done1;
3384 }
3385 error = file_socket(uap->s, &so);
3386 if (error) {
3387 goto done1;
3388 }
3389 if (so == NULL) {
3390 error = EBADF;
3391 goto done2;
3392 }
3393 if (so->so_type != SOCK_STREAM) {
3394 error = EINVAL;
3395 goto done2;
3396 }
3397 if ((so->so_state & SS_ISCONNECTED) == 0) {
3398 error = ENOTCONN;
3399 goto done2;
3400 }
3401 if (uap->offset < 0) {
3402 error = EINVAL;
3403 goto done2;
3404 }
3405 if (uap->nbytes == USER_ADDR_NULL) {
3406 error = EINVAL;
3407 goto done2;
3408 }
3409 if (uap->flags != 0) {
3410 error = EINVAL;
3411 goto done2;
3412 }
3413
3414 context.vc_ucred = fp->fp_glob->fg_cred;
3415
3416 #if CONFIG_MACF_SOCKET_SUBSET
3417 /* JMM - fetch connected sockaddr? */
3418 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3419 if (error) {
3420 goto done2;
3421 }
3422 #endif
3423
3424 /*
3425 * Get number of bytes to send
3426 * Should it applies to size of header and trailer?
3427 */
3428 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3429 if (error) {
3430 goto done2;
3431 }
3432
3433 /*
3434 * If specified, get the pointer to the sf_hdtr struct for
3435 * any headers/trailers.
3436 */
3437 if (uap->hdtr != USER_ADDR_NULL) {
3438 caddr_t hdtrp;
3439
3440 bzero(&user_hdtr, sizeof(user_hdtr));
3441 if (IS_64BIT_PROCESS(p)) {
3442 hdtrp = (caddr_t)&user64_hdtr;
3443 sizeof_hdtr = sizeof(user64_hdtr);
3444 } else {
3445 hdtrp = (caddr_t)&user32_hdtr;
3446 sizeof_hdtr = sizeof(user32_hdtr);
3447 }
3448 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3449 if (error) {
3450 goto done2;
3451 }
3452 if (IS_64BIT_PROCESS(p)) {
3453 user_hdtr.headers = user64_hdtr.headers;
3454 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3455 user_hdtr.trailers = user64_hdtr.trailers;
3456 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3457 } else {
3458 user_hdtr.headers = user32_hdtr.headers;
3459 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3460 user_hdtr.trailers = user32_hdtr.trailers;
3461 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3462 }
3463
3464 /*
3465 * Send any headers. Wimp out and use writev(2).
3466 */
3467 if (user_hdtr.headers != USER_ADDR_NULL) {
3468 bzero(&nuap, sizeof(struct writev_args));
3469 nuap.fd = uap->s;
3470 nuap.iovp = user_hdtr.headers;
3471 nuap.iovcnt = user_hdtr.hdr_cnt;
3472 error = writev_nocancel(p, &nuap, &writev_retval);
3473 if (error) {
3474 goto done2;
3475 }
3476 sbytes += writev_retval;
3477 }
3478 }
3479
3480 /*
3481 * Get the file size for 2 reasons:
3482 * 1. We don't want to allocate more mbufs than necessary
3483 * 2. We don't want to read past the end of file
3484 */
3485 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3486 goto done2;
3487 }
3488
3489 /*
3490 * Simply read file data into a chain of mbufs that used with scatter
3491 * gather reads. We're not (yet?) setup to use zero copy external
3492 * mbufs that point to the file pages.
3493 */
3494 socket_lock(so, 1);
3495 error = sblock(&so->so_snd, SBL_WAIT);
3496 if (error) {
3497 socket_unlock(so, 1);
3498 goto done2;
3499 }
3500 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3501 mbuf_t m0 = NULL, m;
3502 unsigned int nbufs = SFUIOBUFS, i;
3503 uio_t auio;
3504 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3505 size_t uiolen;
3506 user_ssize_t rlen;
3507 off_t pgoff;
3508 size_t pktlen;
3509 boolean_t jumbocl;
3510
3511 /*
3512 * Calculate the amount to transfer.
3513 * Align to round number of pages.
3514 * Not to exceed send socket buffer,
3515 * the EOF, or the passed in nbytes.
3516 */
3517 xfsize = sbspace(&so->so_snd);
3518
3519 if (xfsize <= 0) {
3520 if (so->so_state & SS_CANTSENDMORE) {
3521 error = EPIPE;
3522 goto done3;
3523 } else if ((so->so_state & SS_NBIO)) {
3524 error = EAGAIN;
3525 goto done3;
3526 } else {
3527 xfsize = PAGE_SIZE;
3528 }
3529 }
3530
3531 if (xfsize > SENDFILE_MAX_BYTES) {
3532 xfsize = SENDFILE_MAX_BYTES;
3533 } else if (xfsize > PAGE_SIZE) {
3534 xfsize = trunc_page(xfsize);
3535 }
3536 pgoff = off & PAGE_MASK_64;
3537 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3538 xfsize = PAGE_SIZE_64 - pgoff;
3539 }
3540 if (nbytes && xfsize > (nbytes - sbytes)) {
3541 xfsize = nbytes - sbytes;
3542 }
3543 if (xfsize <= 0) {
3544 break;
3545 }
3546 if (off + xfsize > file_size) {
3547 xfsize = file_size - off;
3548 }
3549 if (xfsize <= 0) {
3550 break;
3551 }
3552
3553 /*
3554 * Attempt to use larger than system page-size clusters for
3555 * large writes only if there is a jumbo cluster pool and
3556 * if the socket is marked accordingly.
3557 */
3558 jumbocl = sosendjcl && njcl > 0 &&
3559 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3560
3561 socket_unlock(so, 0);
3562 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3563 pktlen = mbuf_pkthdr_maxlen(m0);
3564 if (pktlen < (size_t)xfsize) {
3565 xfsize = pktlen;
3566 }
3567
3568 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3569 UIO_READ, &uio_buf[0], sizeof(uio_buf));
3570 if (auio == NULL) {
3571 printf("sendfile failed. nbufs = %d. %s", nbufs,
3572 "File a radar related to rdar://10146739.\n");
3573 mbuf_freem(m0);
3574 error = ENXIO;
3575 socket_lock(so, 0);
3576 goto done3;
3577 }
3578
3579 for (i = 0, m = m0, uiolen = 0;
3580 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3581 i++, m = mbuf_next(m)) {
3582 size_t mlen = mbuf_maxlen(m);
3583
3584 if (mlen + uiolen > (size_t)xfsize) {
3585 mlen = xfsize - uiolen;
3586 }
3587 mbuf_setlen(m, mlen);
3588 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3589 mlen);
3590 uiolen += mlen;
3591 }
3592
3593 if (xfsize != uio_resid(auio)) {
3594 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3595 "%lld\n", xfsize, (long long)uio_resid(auio));
3596 }
3597
3598 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3599 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3600 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3601 error = fo_read(fp, auio, FOF_OFFSET, &context);
3602 socket_lock(so, 0);
3603 if (error != 0) {
3604 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3605 error == EINTR || error == EWOULDBLOCK)) {
3606 error = 0;
3607 } else {
3608 mbuf_freem(m0);
3609 goto done3;
3610 }
3611 }
3612 xfsize -= uio_resid(auio);
3613 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3614 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3615 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3616
3617 if (xfsize == 0) {
3618 // printf("sendfile: fo_read 0 bytes, EOF\n");
3619 break;
3620 }
3621 if (xfsize + off > file_size) {
3622 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3623 "%lld\n", xfsize, off, file_size);
3624 }
3625 for (i = 0, m = m0, rlen = 0;
3626 i < nbufs && m != NULL && rlen < xfsize;
3627 i++, m = mbuf_next(m)) {
3628 size_t mlen = mbuf_maxlen(m);
3629
3630 if (rlen + mlen > (size_t)xfsize) {
3631 mlen = xfsize - rlen;
3632 }
3633 mbuf_setlen(m, mlen);
3634
3635 rlen += mlen;
3636 }
3637 mbuf_pkthdr_setlen(m0, xfsize);
3638
3639 retry_space:
3640 /*
3641 * Make sure that the socket is still able to take more data.
3642 * CANTSENDMORE being true usually means that the connection
3643 * was closed. so_error is true when an error was sensed after
3644 * a previous send.
3645 * The state is checked after the page mapping and buffer
3646 * allocation above since those operations may block and make
3647 * any socket checks stale. From this point forward, nothing
3648 * blocks before the pru_send (or more accurately, any blocking
3649 * results in a loop back to here to re-check).
3650 */
3651 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3652 if (so->so_state & SS_CANTSENDMORE) {
3653 error = EPIPE;
3654 } else {
3655 error = so->so_error;
3656 so->so_error = 0;
3657 }
3658 m_freem(m0);
3659 goto done3;
3660 }
3661 /*
3662 * Wait for socket space to become available. We do this just
3663 * after checking the connection state above in order to avoid
3664 * a race condition with sbwait().
3665 */
3666 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3667 if (so->so_state & SS_NBIO) {
3668 m_freem(m0);
3669 error = EAGAIN;
3670 goto done3;
3671 }
3672 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3673 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3674 error = sbwait(&so->so_snd);
3675 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3676 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3677 /*
3678 * An error from sbwait usually indicates that we've
3679 * been interrupted by a signal. If we've sent anything
3680 * then return bytes sent, otherwise return the error.
3681 */
3682 if (error) {
3683 m_freem(m0);
3684 goto done3;
3685 }
3686 goto retry_space;
3687 }
3688
3689 struct mbuf *control = NULL;
3690 {
3691 /*
3692 * Socket filter processing
3693 */
3694
3695 error = sflt_data_out(so, NULL, &m0, &control, 0);
3696 if (error) {
3697 if (error == EJUSTRETURN) {
3698 error = 0;
3699 continue;
3700 }
3701 goto done3;
3702 }
3703 /*
3704 * End Socket filter processing
3705 */
3706 }
3707 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3708 uap->s, 0, 0, 0, 0);
3709 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3710 0, control, p);
3711 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3712 uap->s, 0, 0, 0, 0);
3713 if (error) {
3714 goto done3;
3715 }
3716 }
3717 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3718 /*
3719 * Send trailers. Wimp out and use writev(2).
3720 */
3721 if (uap->hdtr != USER_ADDR_NULL &&
3722 user_hdtr.trailers != USER_ADDR_NULL) {
3723 bzero(&nuap, sizeof(struct writev_args));
3724 nuap.fd = uap->s;
3725 nuap.iovp = user_hdtr.trailers;
3726 nuap.iovcnt = user_hdtr.trl_cnt;
3727 error = writev_nocancel(p, &nuap, &writev_retval);
3728 if (error) {
3729 goto done2;
3730 }
3731 sbytes += writev_retval;
3732 }
3733 done2:
3734 file_drop(uap->s);
3735 done1:
3736 file_drop(uap->fd);
3737 done:
3738 if (uap->nbytes != USER_ADDR_NULL) {
3739 /* XXX this appears bogus for some early failure conditions */
3740 copyout(&sbytes, uap->nbytes, sizeof(off_t));
3741 }
3742 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3743 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3744 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3745 return error;
3746 done3:
3747 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3748 goto done2;
3749 }
3750
3751
3752 #endif /* SENDFILE */