]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
ca5e7dd6e8e17768124c2da4c6d986ff2819c48e
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
80 #include <sys/mbuf.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
91 #include <sys/priv.h>
92 #include <sys/sysctl.h>
93 #include <sys/sys_domain.h>
94
95 #include <security/audit/audit.h>
96
97 #include <sys/kdebug.h>
98 #include <sys/sysproto.h>
99 #include <netinet/in.h>
100 #include <net/route.h>
101 #include <netinet/in_pcb.h>
102
103 #include <os/ptrtools.h>
104
105 #if CONFIG_MACF_SOCKET_SUBSET
106 #include <security/mac_framework.h>
107 #endif /* MAC_SOCKET_SUBSET */
108
109 #define f_flag f_fglob->fg_flag
110 #define f_type f_fglob->fg_ops->fo_type
111 #define f_msgcount f_fglob->fg_msgcount
112 #define f_cred f_fglob->fg_cred
113 #define f_ops f_fglob->fg_ops
114 #define f_offset f_fglob->fg_offset
115 #define f_data f_fglob->fg_data
116
117 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
118 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
119 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
120 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
121 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
122 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
123 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
124 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
125 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
126 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
127 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
128 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
129 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
130 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
131 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
132 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
133
134 #if DEBUG || DEVELOPMENT
135 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
136 #define DBG_PRINTF(...) printf(__VA_ARGS__)
137 #else
138 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
139 #define DBG_PRINTF(...) do { } while (0)
140 #endif
141
142 /* TODO: should be in header file */
143 int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
144
145 static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
146 int, int32_t *);
147 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
148 int32_t *);
149 static int connectit(struct socket *, struct sockaddr *);
150 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
151 size_t, boolean_t);
152 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
153 user_addr_t, size_t, boolean_t);
154 #if SENDFILE
155 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
156 boolean_t);
157 #endif /* SENDFILE */
158 static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
159 static int connectitx(struct socket *, struct sockaddr *,
160 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
161 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
162 static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
163 int *);
164 static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
165
166 static int internalize_user_msghdr_array(const void *, int, int, u_int,
167 struct user_msghdr_x *, struct uio **);
168 static u_int externalize_user_msghdr_array(void *, int, int, u_int,
169 const struct user_msghdr_x *, struct uio **);
170
171 static void free_uio_array(struct uio **, u_int);
172 static int uio_array_is_valid(struct uio **, u_int);
173 static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
174 static int internalize_recv_msghdr_array(const void *, int, int,
175 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
176 static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
177 const struct user_msghdr_x *, struct recv_msg_elem *);
178 static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
179 static void free_recv_msg_array(struct recv_msg_elem *, u_int);
180
181 SYSCTL_DECL(_kern_ipc);
182
183 static u_int somaxsendmsgx = 100;
184 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
185 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
186 static u_int somaxrecvmsgx = 100;
187 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
188 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
189
190 /*
191 * System call interface to the socket abstraction.
192 */
193
194 extern const struct fileops socketops;
195
196 /*
197 * Returns: 0 Success
198 * EACCES Mandatory Access Control failure
199 * falloc:ENFILE
200 * falloc:EMFILE
201 * falloc:ENOMEM
202 * socreate:EAFNOSUPPORT
203 * socreate:EPROTOTYPE
204 * socreate:EPROTONOSUPPORT
205 * socreate:ENOBUFS
206 * socreate:ENOMEM
207 * socreate:??? [other protocol families, IPSEC]
208 */
209 int
210 socket(struct proc *p,
211 struct socket_args *uap,
212 int32_t *retval)
213 {
214 return socket_common(p, uap->domain, uap->type, uap->protocol,
215 proc_selfpid(), retval, 0);
216 }
217
218 int
219 socket_delegate(struct proc *p,
220 struct socket_delegate_args *uap,
221 int32_t *retval)
222 {
223 return socket_common(p, uap->domain, uap->type, uap->protocol,
224 uap->epid, retval, 1);
225 }
226
227 static int
228 socket_common(struct proc *p,
229 int domain,
230 int type,
231 int protocol,
232 pid_t epid,
233 int32_t *retval,
234 int delegate)
235 {
236 struct socket *so;
237 struct fileproc *fp;
238 int fd, error;
239
240 AUDIT_ARG(socket, domain, type, protocol);
241 #if CONFIG_MACF_SOCKET_SUBSET
242 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
243 type, protocol)) != 0) {
244 return error;
245 }
246 #endif /* MAC_SOCKET_SUBSET */
247
248 if (delegate) {
249 error = priv_check_cred(kauth_cred_get(),
250 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
251 if (error) {
252 return EACCES;
253 }
254 }
255
256 error = falloc(p, &fp, &fd, vfs_context_current());
257 if (error) {
258 return error;
259 }
260 fp->f_flag = FREAD | FWRITE;
261 fp->f_ops = &socketops;
262
263 if (delegate) {
264 error = socreate_delegate(domain, &so, type, protocol, epid);
265 } else {
266 error = socreate(domain, &so, type, protocol);
267 }
268
269 if (error) {
270 fp_free(p, fd, fp);
271 } else {
272 fp->f_data = (caddr_t)so;
273
274 proc_fdlock(p);
275 procfdtbl_releasefd(p, fd, NULL);
276
277 fp_drop(p, fd, fp, 1);
278 proc_fdunlock(p);
279
280 *retval = fd;
281 if (ENTR_SHOULDTRACE) {
282 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
283 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
284 }
285 }
286 return error;
287 }
288
289 /*
290 * Returns: 0 Success
291 * EDESTADDRREQ Destination address required
292 * EBADF Bad file descriptor
293 * EACCES Mandatory Access Control failure
294 * file_socket:ENOTSOCK
295 * file_socket:EBADF
296 * getsockaddr:ENAMETOOLONG Filename too long
297 * getsockaddr:EINVAL Invalid argument
298 * getsockaddr:ENOMEM Not enough space
299 * getsockaddr:EFAULT Bad address
300 * sobindlock:???
301 */
302 /* ARGSUSED */
303 int
304 bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
305 {
306 struct sockaddr_storage ss;
307 struct sockaddr *sa = NULL;
308 struct socket *so;
309 boolean_t want_free = TRUE;
310 int error;
311
312 AUDIT_ARG(fd, uap->s);
313 error = file_socket(uap->s, &so);
314 if (error != 0) {
315 return error;
316 }
317 if (so == NULL) {
318 error = EBADF;
319 goto out;
320 }
321 if (uap->name == USER_ADDR_NULL) {
322 error = EDESTADDRREQ;
323 goto out;
324 }
325 if (uap->namelen > sizeof(ss)) {
326 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
327 } else {
328 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
329 if (error == 0) {
330 sa = (struct sockaddr *)&ss;
331 want_free = FALSE;
332 }
333 }
334 if (error != 0) {
335 goto out;
336 }
337 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
338 #if CONFIG_MACF_SOCKET_SUBSET
339 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
340 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
341 error = sobindlock(so, sa, 1); /* will lock socket */
342 }
343 #else
344 error = sobindlock(so, sa, 1); /* will lock socket */
345 #endif /* MAC_SOCKET_SUBSET */
346 if (want_free) {
347 FREE(sa, M_SONAME);
348 }
349 out:
350 file_drop(uap->s);
351 return error;
352 }
353
354 /*
355 * Returns: 0 Success
356 * EBADF
357 * EACCES Mandatory Access Control failure
358 * file_socket:ENOTSOCK
359 * file_socket:EBADF
360 * solisten:EINVAL
361 * solisten:EOPNOTSUPP
362 * solisten:???
363 */
364 int
365 listen(__unused struct proc *p, struct listen_args *uap,
366 __unused int32_t *retval)
367 {
368 int error;
369 struct socket *so;
370
371 AUDIT_ARG(fd, uap->s);
372 error = file_socket(uap->s, &so);
373 if (error) {
374 return error;
375 }
376 if (so != NULL)
377 #if CONFIG_MACF_SOCKET_SUBSET
378 {
379 error = mac_socket_check_listen(kauth_cred_get(), so);
380 if (error == 0) {
381 error = solisten(so, uap->backlog);
382 }
383 }
384 #else
385 { error = solisten(so, uap->backlog);}
386 #endif /* MAC_SOCKET_SUBSET */
387 else {
388 error = EBADF;
389 }
390
391 file_drop(uap->s);
392 return error;
393 }
394
395 /*
396 * Returns: fp_getfsock:EBADF Bad file descriptor
397 * fp_getfsock:EOPNOTSUPP ...
398 * xlate => :ENOTSOCK Socket operation on non-socket
399 * :EFAULT Bad address on copyin/copyout
400 * :EBADF Bad file descriptor
401 * :EOPNOTSUPP Operation not supported on socket
402 * :EINVAL Invalid argument
403 * :EWOULDBLOCK Operation would block
404 * :ECONNABORTED Connection aborted
405 * :EINTR Interrupted function
406 * :EACCES Mandatory Access Control failure
407 * falloc_locked:ENFILE Too many files open in system
408 * falloc_locked::EMFILE Too many open files
409 * falloc_locked::ENOMEM Not enough space
410 * 0 Success
411 */
412 int
413 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
414 int32_t *retval)
415 {
416 struct fileproc *fp;
417 struct sockaddr *sa = NULL;
418 socklen_t namelen;
419 int error;
420 struct socket *head, *so = NULL;
421 lck_mtx_t *mutex_held;
422 int fd = uap->s;
423 int newfd;
424 short fflag; /* type must match fp->f_flag */
425 int dosocklock = 0;
426
427 *retval = -1;
428
429 AUDIT_ARG(fd, uap->s);
430
431 if (uap->name) {
432 error = copyin(uap->anamelen, (caddr_t)&namelen,
433 sizeof(socklen_t));
434 if (error) {
435 return error;
436 }
437 }
438 error = fp_getfsock(p, fd, &fp, &head);
439 if (error) {
440 if (error == EOPNOTSUPP) {
441 error = ENOTSOCK;
442 }
443 return error;
444 }
445 if (head == NULL) {
446 error = EBADF;
447 goto out;
448 }
449 #if CONFIG_MACF_SOCKET_SUBSET
450 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
451 goto out;
452 }
453 #endif /* MAC_SOCKET_SUBSET */
454
455 socket_lock(head, 1);
456
457 if (head->so_proto->pr_getlock != NULL) {
458 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
459 dosocklock = 1;
460 } else {
461 mutex_held = head->so_proto->pr_domain->dom_mtx;
462 dosocklock = 0;
463 }
464
465 if ((head->so_options & SO_ACCEPTCONN) == 0) {
466 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
467 error = EOPNOTSUPP;
468 } else {
469 /* POSIX: The socket is not accepting connections */
470 error = EINVAL;
471 }
472 socket_unlock(head, 1);
473 goto out;
474 }
475 check_again:
476 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
477 socket_unlock(head, 1);
478 error = EWOULDBLOCK;
479 goto out;
480 }
481 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
482 if (head->so_state & SS_CANTRCVMORE) {
483 head->so_error = ECONNABORTED;
484 break;
485 }
486 if (head->so_usecount < 1) {
487 panic("accept: head=%p refcount=%d\n", head,
488 head->so_usecount);
489 }
490 error = msleep((caddr_t)&head->so_timeo, mutex_held,
491 PSOCK | PCATCH, "accept", 0);
492 if (head->so_usecount < 1) {
493 panic("accept: 2 head=%p refcount=%d\n", head,
494 head->so_usecount);
495 }
496 if ((head->so_state & SS_DRAINING)) {
497 error = ECONNABORTED;
498 }
499 if (error) {
500 socket_unlock(head, 1);
501 goto out;
502 }
503 }
504 if (head->so_error) {
505 error = head->so_error;
506 head->so_error = 0;
507 socket_unlock(head, 1);
508 goto out;
509 }
510
511 /*
512 * At this point we know that there is at least one connection
513 * ready to be accepted. Remove it from the queue prior to
514 * allocating the file descriptor for it since falloc() may
515 * block allowing another process to accept the connection
516 * instead.
517 */
518 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
519
520 so_acquire_accept_list(head, NULL);
521 if (TAILQ_EMPTY(&head->so_comp)) {
522 so_release_accept_list(head);
523 goto check_again;
524 }
525
526 so = TAILQ_FIRST(&head->so_comp);
527 TAILQ_REMOVE(&head->so_comp, so, so_list);
528 so->so_head = NULL;
529 so->so_state &= ~SS_COMP;
530 head->so_qlen--;
531 so_release_accept_list(head);
532
533 /* unlock head to avoid deadlock with select, keep a ref on head */
534 socket_unlock(head, 0);
535
536 #if CONFIG_MACF_SOCKET_SUBSET
537 /*
538 * Pass the pre-accepted socket to the MAC framework. This is
539 * cheaper than allocating a file descriptor for the socket,
540 * calling the protocol accept callback, and possibly freeing
541 * the file descriptor should the MAC check fails.
542 */
543 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
544 socket_lock(so, 1);
545 so->so_state &= ~SS_NOFDREF;
546 socket_unlock(so, 1);
547 soclose(so);
548 /* Drop reference on listening socket */
549 sodereference(head);
550 goto out;
551 }
552 #endif /* MAC_SOCKET_SUBSET */
553
554 /*
555 * Pass the pre-accepted socket to any interested socket filter(s).
556 * Upon failure, the socket would have been closed by the callee.
557 */
558 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
559 /* Drop reference on listening socket */
560 sodereference(head);
561 /* Propagate socket filter's error code to the caller */
562 goto out;
563 }
564
565 fflag = fp->f_flag;
566 error = falloc(p, &fp, &newfd, vfs_context_current());
567 if (error) {
568 /*
569 * Probably ran out of file descriptors.
570 *
571 * <rdar://problem/8554930>
572 * Don't put this back on the socket like we used to, that
573 * just causes the client to spin. Drop the socket.
574 */
575 socket_lock(so, 1);
576 so->so_state &= ~SS_NOFDREF;
577 socket_unlock(so, 1);
578 soclose(so);
579 sodereference(head);
580 goto out;
581 }
582 *retval = newfd;
583 fp->f_flag = fflag;
584 fp->f_ops = &socketops;
585 fp->f_data = (caddr_t)so;
586
587 socket_lock(head, 0);
588 if (dosocklock) {
589 socket_lock(so, 1);
590 }
591
592 /* Sync socket non-blocking/async state with file flags */
593 if (fp->f_flag & FNONBLOCK) {
594 so->so_state |= SS_NBIO;
595 } else {
596 so->so_state &= ~SS_NBIO;
597 }
598
599 if (fp->f_flag & FASYNC) {
600 so->so_state |= SS_ASYNC;
601 so->so_rcv.sb_flags |= SB_ASYNC;
602 so->so_snd.sb_flags |= SB_ASYNC;
603 } else {
604 so->so_state &= ~SS_ASYNC;
605 so->so_rcv.sb_flags &= ~SB_ASYNC;
606 so->so_snd.sb_flags &= ~SB_ASYNC;
607 }
608
609 (void) soacceptlock(so, &sa, 0);
610 socket_unlock(head, 1);
611 if (sa == NULL) {
612 namelen = 0;
613 if (uap->name) {
614 goto gotnoname;
615 }
616 error = 0;
617 goto releasefd;
618 }
619 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
620
621 if (uap->name) {
622 socklen_t sa_len;
623
624 /* save sa_len before it is destroyed */
625 sa_len = sa->sa_len;
626 namelen = MIN(namelen, sa_len);
627 error = copyout(sa, uap->name, namelen);
628 if (!error) {
629 /* return the actual, untruncated address length */
630 namelen = sa_len;
631 }
632 gotnoname:
633 error = copyout((caddr_t)&namelen, uap->anamelen,
634 sizeof(socklen_t));
635 }
636 FREE(sa, M_SONAME);
637
638 releasefd:
639 /*
640 * If the socket has been marked as inactive by sosetdefunct(),
641 * disallow further operations on it.
642 */
643 if (so->so_flags & SOF_DEFUNCT) {
644 sodefunct(current_proc(), so,
645 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
646 }
647
648 if (dosocklock) {
649 socket_unlock(so, 1);
650 }
651
652 proc_fdlock(p);
653 procfdtbl_releasefd(p, newfd, NULL);
654 fp_drop(p, newfd, fp, 1);
655 proc_fdunlock(p);
656
657 out:
658 file_drop(fd);
659
660 if (error == 0 && ENTR_SHOULDTRACE) {
661 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
662 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
663 }
664 return error;
665 }
666
667 int
668 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
669 {
670 __pthread_testcancel(1);
671 return accept_nocancel(p, (struct accept_nocancel_args *)uap,
672 retval);
673 }
674
675 /*
676 * Returns: 0 Success
677 * EBADF Bad file descriptor
678 * EALREADY Connection already in progress
679 * EINPROGRESS Operation in progress
680 * ECONNABORTED Connection aborted
681 * EINTR Interrupted function
682 * EACCES Mandatory Access Control failure
683 * file_socket:ENOTSOCK
684 * file_socket:EBADF
685 * getsockaddr:ENAMETOOLONG Filename too long
686 * getsockaddr:EINVAL Invalid argument
687 * getsockaddr:ENOMEM Not enough space
688 * getsockaddr:EFAULT Bad address
689 * soconnectlock:EOPNOTSUPP
690 * soconnectlock:EISCONN
691 * soconnectlock:??? [depends on protocol, filters]
692 * msleep:EINTR
693 *
694 * Imputed: so_error error may be set from so_error, which
695 * may have been set by soconnectlock.
696 */
697 /* ARGSUSED */
698 int
699 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
700 {
701 __pthread_testcancel(1);
702 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
703 retval);
704 }
705
706 int
707 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
708 {
709 #pragma unused(p, retval)
710 struct socket *so;
711 struct sockaddr_storage ss;
712 struct sockaddr *sa = NULL;
713 int error;
714 int fd = uap->s;
715 boolean_t dgram;
716
717 AUDIT_ARG(fd, uap->s);
718 error = file_socket(fd, &so);
719 if (error != 0) {
720 return error;
721 }
722 if (so == NULL) {
723 error = EBADF;
724 goto out;
725 }
726
727 /*
728 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
729 * if this is a datagram socket; translate for other types.
730 */
731 dgram = (so->so_type == SOCK_DGRAM);
732
733 /* Get socket address now before we obtain socket lock */
734 if (uap->namelen > sizeof(ss)) {
735 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
736 } else {
737 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
738 if (error == 0) {
739 sa = (struct sockaddr *)&ss;
740 }
741 }
742 if (error != 0) {
743 goto out;
744 }
745
746 error = connectit(so, sa);
747
748 if (sa != NULL && sa != SA(&ss)) {
749 FREE(sa, M_SONAME);
750 }
751 if (error == ERESTART) {
752 error = EINTR;
753 }
754 out:
755 file_drop(fd);
756 return error;
757 }
758
759 static int
760 connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
761 {
762 #pragma unused(p, retval)
763 struct sockaddr_storage ss, sd;
764 struct sockaddr *src = NULL, *dst = NULL;
765 struct socket *so;
766 int error, error1, fd = uap->socket;
767 boolean_t dgram;
768 sae_connid_t cid = SAE_CONNID_ANY;
769 struct user32_sa_endpoints ep32;
770 struct user64_sa_endpoints ep64;
771 struct user_sa_endpoints ep;
772 user_ssize_t bytes_written = 0;
773 struct user_iovec *iovp;
774 uio_t auio = NULL;
775
776 AUDIT_ARG(fd, uap->socket);
777 error = file_socket(fd, &so);
778 if (error != 0) {
779 return error;
780 }
781 if (so == NULL) {
782 error = EBADF;
783 goto out;
784 }
785
786 if (uap->endpoints == USER_ADDR_NULL) {
787 error = EINVAL;
788 goto out;
789 }
790
791 if (IS_64BIT_PROCESS(p)) {
792 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
793 if (error != 0) {
794 goto out;
795 }
796
797 ep.sae_srcif = ep64.sae_srcif;
798 ep.sae_srcaddr = ep64.sae_srcaddr;
799 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
800 ep.sae_dstaddr = ep64.sae_dstaddr;
801 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
802 } else {
803 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
804 if (error != 0) {
805 goto out;
806 }
807
808 ep.sae_srcif = ep32.sae_srcif;
809 ep.sae_srcaddr = ep32.sae_srcaddr;
810 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
811 ep.sae_dstaddr = ep32.sae_dstaddr;
812 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
813 }
814
815 /*
816 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
817 * if this is a datagram socket; translate for other types.
818 */
819 dgram = (so->so_type == SOCK_DGRAM);
820
821 /* Get socket address now before we obtain socket lock */
822 if (ep.sae_srcaddr != USER_ADDR_NULL) {
823 if (ep.sae_srcaddrlen > sizeof(ss)) {
824 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
825 } else {
826 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
827 if (error == 0) {
828 src = (struct sockaddr *)&ss;
829 }
830 }
831
832 if (error) {
833 goto out;
834 }
835 }
836
837 if (ep.sae_dstaddr == USER_ADDR_NULL) {
838 error = EINVAL;
839 goto out;
840 }
841
842 /* Get socket address now before we obtain socket lock */
843 if (ep.sae_dstaddrlen > sizeof(sd)) {
844 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
845 } else {
846 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
847 if (error == 0) {
848 dst = (struct sockaddr *)&sd;
849 }
850 }
851
852 if (error) {
853 goto out;
854 }
855
856 VERIFY(dst != NULL);
857
858 if (uap->iov != USER_ADDR_NULL) {
859 /* Verify range before calling uio_create() */
860 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
861 error = EINVAL;
862 goto out;
863 }
864
865 if (uap->len == USER_ADDR_NULL) {
866 error = EINVAL;
867 goto out;
868 }
869
870 /* allocate a uio to hold the number of iovecs passed */
871 auio = uio_create(uap->iovcnt, 0,
872 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
873 UIO_WRITE);
874
875 if (auio == NULL) {
876 error = ENOMEM;
877 goto out;
878 }
879
880 /*
881 * get location of iovecs within the uio.
882 * then copyin the iovecs from user space.
883 */
884 iovp = uio_iovsaddr(auio);
885 if (iovp == NULL) {
886 error = ENOMEM;
887 goto out;
888 }
889 error = copyin_user_iovec_array(uap->iov,
890 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
891 uap->iovcnt, iovp);
892 if (error != 0) {
893 goto out;
894 }
895
896 /* finish setup of uio_t */
897 error = uio_calculateresid(auio);
898 if (error != 0) {
899 goto out;
900 }
901 }
902
903 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
904 &cid, auio, uap->flags, &bytes_written);
905 if (error == ERESTART) {
906 error = EINTR;
907 }
908
909 if (uap->len != USER_ADDR_NULL) {
910 error1 = copyout(&bytes_written, uap->len, sizeof(uap->len));
911 /* give precedence to connectitx errors */
912 if ((error1 != 0) && (error == 0)) {
913 error = error1;
914 }
915 }
916
917 if (uap->connid != USER_ADDR_NULL) {
918 error1 = copyout(&cid, uap->connid, sizeof(cid));
919 /* give precedence to connectitx errors */
920 if ((error1 != 0) && (error == 0)) {
921 error = error1;
922 }
923 }
924 out:
925 file_drop(fd);
926 if (auio != NULL) {
927 uio_free(auio);
928 }
929 if (src != NULL && src != SA(&ss)) {
930 FREE(src, M_SONAME);
931 }
932 if (dst != NULL && dst != SA(&sd)) {
933 FREE(dst, M_SONAME);
934 }
935 return error;
936 }
937
938 int
939 connectx(struct proc *p, struct connectx_args *uap, int *retval)
940 {
941 /*
942 * Due to similiarity with a POSIX interface, define as
943 * an unofficial cancellation point.
944 */
945 __pthread_testcancel(1);
946 return connectx_nocancel(p, uap, retval);
947 }
948
949 static int
950 connectit(struct socket *so, struct sockaddr *sa)
951 {
952 int error;
953
954 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
955 #if CONFIG_MACF_SOCKET_SUBSET
956 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
957 return error;
958 }
959 #endif /* MAC_SOCKET_SUBSET */
960
961 socket_lock(so, 1);
962 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
963 error = EALREADY;
964 goto out;
965 }
966 error = soconnectlock(so, sa, 0);
967 if (error != 0) {
968 so->so_state &= ~SS_ISCONNECTING;
969 goto out;
970 }
971 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
972 error = EINPROGRESS;
973 goto out;
974 }
975 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
976 lck_mtx_t *mutex_held;
977
978 if (so->so_proto->pr_getlock != NULL) {
979 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
980 } else {
981 mutex_held = so->so_proto->pr_domain->dom_mtx;
982 }
983 error = msleep((caddr_t)&so->so_timeo, mutex_held,
984 PSOCK | PCATCH, __func__, 0);
985 if (so->so_state & SS_DRAINING) {
986 error = ECONNABORTED;
987 }
988 if (error != 0) {
989 break;
990 }
991 }
992 if (error == 0) {
993 error = so->so_error;
994 so->so_error = 0;
995 }
996 out:
997 socket_unlock(so, 1);
998 return error;
999 }
1000
1001 static int
1002 connectitx(struct socket *so, struct sockaddr *src,
1003 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
1004 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1005 user_ssize_t *bytes_written)
1006 {
1007 int error;
1008 #pragma unused (flags)
1009
1010 VERIFY(dst != NULL);
1011
1012 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1013 #if CONFIG_MACF_SOCKET_SUBSET
1014 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1015 return error;
1016 }
1017
1018 if (auio != NULL) {
1019 if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1020 return error;
1021 }
1022 }
1023 #endif /* MAC_SOCKET_SUBSET */
1024
1025 socket_lock(so, 1);
1026 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1027 error = EALREADY;
1028 goto out;
1029 }
1030
1031 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
1032 (flags & CONNECT_DATA_IDEMPOTENT)) {
1033 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
1034
1035 if (flags & CONNECT_DATA_AUTHENTICATED) {
1036 so->so_flags1 |= SOF1_DATA_AUTHENTICATED;
1037 }
1038 }
1039
1040 /*
1041 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
1042 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
1043 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
1044 * Case 3 allows user to combine write with connect even if they have
1045 * no use for TFO (such as regular TCP, and UDP).
1046 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
1047 */
1048 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
1049 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio)) {
1050 so->so_flags1 |= SOF1_PRECONNECT_DATA;
1051 }
1052
1053 /*
1054 * If a user sets data idempotent and does not pass an uio, or
1055 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1056 * SOF1_DATA_IDEMPOTENT.
1057 */
1058 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1059 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1060 /* We should return EINVAL instead perhaps. */
1061 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1062 }
1063
1064 error = soconnectxlocked(so, src, dst, p, ifscope,
1065 aid, pcid, 0, NULL, 0, auio, bytes_written);
1066 if (error != 0) {
1067 so->so_state &= ~SS_ISCONNECTING;
1068 goto out;
1069 }
1070 /*
1071 * If, after the call to soconnectxlocked the flag is still set (in case
1072 * data has been queued and the connect() has actually been triggered,
1073 * it will have been unset by the transport), we exit immediately. There
1074 * is no reason to wait on any event.
1075 */
1076 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1077 error = 0;
1078 goto out;
1079 }
1080 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1081 error = EINPROGRESS;
1082 goto out;
1083 }
1084 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1085 lck_mtx_t *mutex_held;
1086
1087 if (so->so_proto->pr_getlock != NULL) {
1088 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1089 } else {
1090 mutex_held = so->so_proto->pr_domain->dom_mtx;
1091 }
1092 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1093 PSOCK | PCATCH, __func__, 0);
1094 if (so->so_state & SS_DRAINING) {
1095 error = ECONNABORTED;
1096 }
1097 if (error != 0) {
1098 break;
1099 }
1100 }
1101 if (error == 0) {
1102 error = so->so_error;
1103 so->so_error = 0;
1104 }
1105 out:
1106 socket_unlock(so, 1);
1107 return error;
1108 }
1109
1110 int
1111 peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1112 {
1113 #pragma unused(p, uap, retval)
1114 /*
1115 * Due to similiarity with a POSIX interface, define as
1116 * an unofficial cancellation point.
1117 */
1118 __pthread_testcancel(1);
1119 return 0;
1120 }
1121
1122 int
1123 disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1124 {
1125 /*
1126 * Due to similiarity with a POSIX interface, define as
1127 * an unofficial cancellation point.
1128 */
1129 __pthread_testcancel(1);
1130 return disconnectx_nocancel(p, uap, retval);
1131 }
1132
1133 static int
1134 disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1135 {
1136 #pragma unused(p, retval)
1137 struct socket *so;
1138 int fd = uap->s;
1139 int error;
1140
1141 error = file_socket(fd, &so);
1142 if (error != 0) {
1143 return error;
1144 }
1145 if (so == NULL) {
1146 error = EBADF;
1147 goto out;
1148 }
1149
1150 error = sodisconnectx(so, uap->aid, uap->cid);
1151 out:
1152 file_drop(fd);
1153 return error;
1154 }
1155
1156 /*
1157 * Returns: 0 Success
1158 * socreate:EAFNOSUPPORT
1159 * socreate:EPROTOTYPE
1160 * socreate:EPROTONOSUPPORT
1161 * socreate:ENOBUFS
1162 * socreate:ENOMEM
1163 * socreate:EISCONN
1164 * socreate:??? [other protocol families, IPSEC]
1165 * falloc:ENFILE
1166 * falloc:EMFILE
1167 * falloc:ENOMEM
1168 * copyout:EFAULT
1169 * soconnect2:EINVAL
1170 * soconnect2:EPROTOTYPE
1171 * soconnect2:??? [other protocol families[
1172 */
1173 int
1174 socketpair(struct proc *p, struct socketpair_args *uap,
1175 __unused int32_t *retval)
1176 {
1177 struct fileproc *fp1, *fp2;
1178 struct socket *so1, *so2;
1179 int fd, error, sv[2];
1180
1181 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1182 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1183 if (error) {
1184 return error;
1185 }
1186 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1187 if (error) {
1188 goto free1;
1189 }
1190
1191 error = falloc(p, &fp1, &fd, vfs_context_current());
1192 if (error) {
1193 goto free2;
1194 }
1195 fp1->f_flag = FREAD | FWRITE;
1196 fp1->f_ops = &socketops;
1197 fp1->f_data = (caddr_t)so1;
1198 sv[0] = fd;
1199
1200 error = falloc(p, &fp2, &fd, vfs_context_current());
1201 if (error) {
1202 goto free3;
1203 }
1204 fp2->f_flag = FREAD | FWRITE;
1205 fp2->f_ops = &socketops;
1206 fp2->f_data = (caddr_t)so2;
1207 sv[1] = fd;
1208
1209 error = soconnect2(so1, so2);
1210 if (error) {
1211 goto free4;
1212 }
1213 if (uap->type == SOCK_DGRAM) {
1214 /*
1215 * Datagram socket connection is asymmetric.
1216 */
1217 error = soconnect2(so2, so1);
1218 if (error) {
1219 goto free4;
1220 }
1221 }
1222
1223 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1224 goto free4;
1225 }
1226
1227 proc_fdlock(p);
1228 procfdtbl_releasefd(p, sv[0], NULL);
1229 procfdtbl_releasefd(p, sv[1], NULL);
1230 fp_drop(p, sv[0], fp1, 1);
1231 fp_drop(p, sv[1], fp2, 1);
1232 proc_fdunlock(p);
1233
1234 return 0;
1235 free4:
1236 fp_free(p, sv[1], fp2);
1237 free3:
1238 fp_free(p, sv[0], fp1);
1239 free2:
1240 (void) soclose(so2);
1241 free1:
1242 (void) soclose(so1);
1243 return error;
1244 }
1245
1246 /*
1247 * Returns: 0 Success
1248 * EINVAL
1249 * ENOBUFS
1250 * EBADF
1251 * EPIPE
1252 * EACCES Mandatory Access Control failure
1253 * file_socket:ENOTSOCK
1254 * file_socket:EBADF
1255 * getsockaddr:ENAMETOOLONG Filename too long
1256 * getsockaddr:EINVAL Invalid argument
1257 * getsockaddr:ENOMEM Not enough space
1258 * getsockaddr:EFAULT Bad address
1259 * <pru_sosend>:EACCES[TCP]
1260 * <pru_sosend>:EADDRINUSE[TCP]
1261 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1262 * <pru_sosend>:EAFNOSUPPORT[TCP]
1263 * <pru_sosend>:EAGAIN[TCP]
1264 * <pru_sosend>:EBADF
1265 * <pru_sosend>:ECONNRESET[TCP]
1266 * <pru_sosend>:EFAULT
1267 * <pru_sosend>:EHOSTUNREACH[TCP]
1268 * <pru_sosend>:EINTR
1269 * <pru_sosend>:EINVAL
1270 * <pru_sosend>:EISCONN[AF_INET]
1271 * <pru_sosend>:EMSGSIZE[TCP]
1272 * <pru_sosend>:ENETDOWN[TCP]
1273 * <pru_sosend>:ENETUNREACH[TCP]
1274 * <pru_sosend>:ENOBUFS
1275 * <pru_sosend>:ENOMEM[TCP]
1276 * <pru_sosend>:ENOTCONN[AF_INET]
1277 * <pru_sosend>:EOPNOTSUPP
1278 * <pru_sosend>:EPERM[TCP]
1279 * <pru_sosend>:EPIPE
1280 * <pru_sosend>:EWOULDBLOCK
1281 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1282 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1283 * <pru_sosend>:??? [value from so_error]
1284 * sockargs:???
1285 */
1286 static int
1287 sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
1288 int flags, int32_t *retval)
1289 {
1290 struct mbuf *control = NULL;
1291 struct sockaddr_storage ss;
1292 struct sockaddr *to = NULL;
1293 boolean_t want_free = TRUE;
1294 int error;
1295 user_ssize_t len;
1296
1297 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1298
1299 if (mp->msg_name != USER_ADDR_NULL) {
1300 if (mp->msg_namelen > sizeof(ss)) {
1301 error = getsockaddr(so, &to, mp->msg_name,
1302 mp->msg_namelen, TRUE);
1303 } else {
1304 error = getsockaddr_s(so, &ss, mp->msg_name,
1305 mp->msg_namelen, TRUE);
1306 if (error == 0) {
1307 to = (struct sockaddr *)&ss;
1308 want_free = FALSE;
1309 }
1310 }
1311 if (error != 0) {
1312 goto out;
1313 }
1314 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1315 }
1316 if (mp->msg_control != USER_ADDR_NULL) {
1317 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1318 error = EINVAL;
1319 goto bad;
1320 }
1321 error = sockargs(&control, mp->msg_control,
1322 mp->msg_controllen, MT_CONTROL);
1323 if (error != 0) {
1324 goto bad;
1325 }
1326 }
1327
1328 #if CONFIG_MACF_SOCKET_SUBSET
1329 /*
1330 * We check the state without holding the socket lock;
1331 * if a race condition occurs, it would simply result
1332 * in an extra call to the MAC check function.
1333 */
1334 if (to != NULL &&
1335 !(so->so_state & SS_DEFUNCT) &&
1336 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1337 goto bad;
1338 }
1339 #endif /* MAC_SOCKET_SUBSET */
1340
1341 len = uio_resid(uiop);
1342 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1343 control, flags);
1344 if (error != 0) {
1345 if (uio_resid(uiop) != len && (error == ERESTART ||
1346 error == EINTR || error == EWOULDBLOCK)) {
1347 error = 0;
1348 }
1349 /* Generation of SIGPIPE can be controlled per socket */
1350 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) {
1351 psignal(p, SIGPIPE);
1352 }
1353 }
1354 if (error == 0) {
1355 *retval = (int)(len - uio_resid(uiop));
1356 }
1357 bad:
1358 if (to != NULL && want_free) {
1359 FREE(to, M_SONAME);
1360 }
1361 out:
1362 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1363
1364 return error;
1365 }
1366
1367 /*
1368 * Returns: 0 Success
1369 * ENOMEM
1370 * sendit:??? [see sendit definition in this file]
1371 * write:??? [4056224: applicable for pipes]
1372 */
1373 int
1374 sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
1375 {
1376 __pthread_testcancel(1);
1377 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1378 }
1379
1380 int
1381 sendto_nocancel(struct proc *p,
1382 struct sendto_nocancel_args *uap,
1383 int32_t *retval)
1384 {
1385 struct user_msghdr msg;
1386 int error;
1387 uio_t auio = NULL;
1388 struct socket *so;
1389
1390 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1391 AUDIT_ARG(fd, uap->s);
1392
1393 if (uap->flags & MSG_SKIPCFIL) {
1394 error = EPERM;
1395 goto done;
1396 }
1397
1398 auio = uio_create(1, 0,
1399 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1400 UIO_WRITE);
1401 if (auio == NULL) {
1402 error = ENOMEM;
1403 goto done;
1404 }
1405 uio_addiov(auio, uap->buf, uap->len);
1406
1407 msg.msg_name = uap->to;
1408 msg.msg_namelen = uap->tolen;
1409 /* no need to set up msg_iov. sendit uses uio_t we send it */
1410 msg.msg_iov = 0;
1411 msg.msg_iovlen = 0;
1412 msg.msg_control = 0;
1413 msg.msg_flags = 0;
1414
1415 error = file_socket(uap->s, &so);
1416 if (error) {
1417 goto done;
1418 }
1419
1420 if (so == NULL) {
1421 error = EBADF;
1422 } else {
1423 error = sendit(p, so, &msg, auio, uap->flags, retval);
1424 }
1425
1426 file_drop(uap->s);
1427 done:
1428 if (auio != NULL) {
1429 uio_free(auio);
1430 }
1431
1432 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1433
1434 return error;
1435 }
1436
1437 /*
1438 * Returns: 0 Success
1439 * ENOBUFS
1440 * copyin:EFAULT
1441 * sendit:??? [see sendit definition in this file]
1442 */
1443 int
1444 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1445 {
1446 __pthread_testcancel(1);
1447 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1448 retval);
1449 }
1450
1451 int
1452 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1453 int32_t *retval)
1454 {
1455 struct user32_msghdr msg32;
1456 struct user64_msghdr msg64;
1457 struct user_msghdr user_msg;
1458 caddr_t msghdrp;
1459 int size_of_msghdr;
1460 int error;
1461 uio_t auio = NULL;
1462 struct user_iovec *iovp;
1463 struct socket *so;
1464
1465 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1466 AUDIT_ARG(fd, uap->s);
1467
1468 if (uap->flags & MSG_SKIPCFIL) {
1469 error = EPERM;
1470 goto done;
1471 }
1472
1473 if (IS_64BIT_PROCESS(p)) {
1474 msghdrp = (caddr_t)&msg64;
1475 size_of_msghdr = sizeof(msg64);
1476 } else {
1477 msghdrp = (caddr_t)&msg32;
1478 size_of_msghdr = sizeof(msg32);
1479 }
1480 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1481 if (error) {
1482 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1483 return error;
1484 }
1485
1486 if (IS_64BIT_PROCESS(p)) {
1487 user_msg.msg_flags = msg64.msg_flags;
1488 user_msg.msg_controllen = msg64.msg_controllen;
1489 user_msg.msg_control = msg64.msg_control;
1490 user_msg.msg_iovlen = msg64.msg_iovlen;
1491 user_msg.msg_iov = msg64.msg_iov;
1492 user_msg.msg_namelen = msg64.msg_namelen;
1493 user_msg.msg_name = msg64.msg_name;
1494 } else {
1495 user_msg.msg_flags = msg32.msg_flags;
1496 user_msg.msg_controllen = msg32.msg_controllen;
1497 user_msg.msg_control = msg32.msg_control;
1498 user_msg.msg_iovlen = msg32.msg_iovlen;
1499 user_msg.msg_iov = msg32.msg_iov;
1500 user_msg.msg_namelen = msg32.msg_namelen;
1501 user_msg.msg_name = msg32.msg_name;
1502 }
1503
1504 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1505 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1506 0, 0, 0, 0);
1507 return EMSGSIZE;
1508 }
1509
1510 /* allocate a uio large enough to hold the number of iovecs passed */
1511 auio = uio_create(user_msg.msg_iovlen, 0,
1512 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1513 UIO_WRITE);
1514 if (auio == NULL) {
1515 error = ENOBUFS;
1516 goto done;
1517 }
1518
1519 if (user_msg.msg_iovlen) {
1520 /*
1521 * get location of iovecs within the uio.
1522 * then copyin the iovecs from user space.
1523 */
1524 iovp = uio_iovsaddr(auio);
1525 if (iovp == NULL) {
1526 error = ENOBUFS;
1527 goto done;
1528 }
1529 error = copyin_user_iovec_array(user_msg.msg_iov,
1530 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1531 user_msg.msg_iovlen, iovp);
1532 if (error) {
1533 goto done;
1534 }
1535 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1536
1537 /* finish setup of uio_t */
1538 error = uio_calculateresid(auio);
1539 if (error) {
1540 goto done;
1541 }
1542 } else {
1543 user_msg.msg_iov = 0;
1544 }
1545
1546 /* msg_flags is ignored for send */
1547 user_msg.msg_flags = 0;
1548
1549 error = file_socket(uap->s, &so);
1550 if (error) {
1551 goto done;
1552 }
1553 if (so == NULL) {
1554 error = EBADF;
1555 } else {
1556 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1557 }
1558 file_drop(uap->s);
1559 done:
1560 if (auio != NULL) {
1561 uio_free(auio);
1562 }
1563 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1564
1565 return error;
1566 }
1567
1568 int
1569 sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1570 {
1571 int error = 0;
1572 struct user_msghdr_x *user_msg_x = NULL;
1573 struct uio **uiop = NULL;
1574 struct socket *so;
1575 u_int i;
1576 struct sockaddr *to = NULL;
1577 user_ssize_t len_before = 0, len_after;
1578 int need_drop = 0;
1579 size_t size_of_msghdr;
1580 void *umsgp = NULL;
1581 u_int uiocnt;
1582 int has_addr_or_ctl = 0;
1583
1584 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1585
1586 if (uap->flags & MSG_SKIPCFIL) {
1587 error = EPERM;
1588 goto out;
1589 }
1590
1591 error = file_socket(uap->s, &so);
1592 if (error) {
1593 goto out;
1594 }
1595 need_drop = 1;
1596 if (so == NULL) {
1597 error = EBADF;
1598 goto out;
1599 }
1600
1601 /*
1602 * Input parameter range check
1603 */
1604 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1605 error = EINVAL;
1606 goto out;
1607 }
1608 /*
1609 * Clip to max currently allowed
1610 */
1611 if (uap->cnt > somaxsendmsgx) {
1612 uap->cnt = somaxsendmsgx;
1613 }
1614
1615 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
1616 M_TEMP, M_WAITOK | M_ZERO);
1617 if (user_msg_x == NULL) {
1618 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
1619 error = ENOMEM;
1620 goto out;
1621 }
1622 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1623 M_TEMP, M_WAITOK | M_ZERO);
1624 if (uiop == NULL) {
1625 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
1626 error = ENOMEM;
1627 goto out;
1628 }
1629
1630 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1631 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1632
1633 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
1634 M_TEMP, M_WAITOK | M_ZERO);
1635 if (umsgp == NULL) {
1636 printf("%s _MALLOC() user_msg_x failed\n", __func__);
1637 error = ENOMEM;
1638 goto out;
1639 }
1640 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1641 if (error) {
1642 DBG_PRINTF("%s copyin() failed\n", __func__);
1643 goto out;
1644 }
1645 error = internalize_user_msghdr_array(umsgp,
1646 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1647 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1648 if (error) {
1649 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1650 goto out;
1651 }
1652 /*
1653 * Make sure the size of each message iovec and
1654 * the aggregate size of all the iovec is valid
1655 */
1656 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1657 error = EINVAL;
1658 goto out;
1659 }
1660
1661 /*
1662 * Sanity check on passed arguments
1663 */
1664 for (i = 0; i < uap->cnt; i++) {
1665 struct user_msghdr_x *mp = user_msg_x + i;
1666
1667 /*
1668 * No flags on send message
1669 */
1670 if (mp->msg_flags != 0) {
1671 error = EINVAL;
1672 goto out;
1673 }
1674 /*
1675 * No support for address or ancillary data (yet)
1676 */
1677 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1678 has_addr_or_ctl = 1;
1679 }
1680
1681 if (mp->msg_control != USER_ADDR_NULL ||
1682 mp->msg_controllen != 0) {
1683 has_addr_or_ctl = 1;
1684 }
1685
1686 #if CONFIG_MACF_SOCKET_SUBSET
1687 /*
1688 * We check the state without holding the socket lock;
1689 * if a race condition occurs, it would simply result
1690 * in an extra call to the MAC check function.
1691 *
1692 * Note: The following check is never true taken with the
1693 * current limitation that we do not accept to pass an address,
1694 * this is effectively placeholder code. If we add support for
1695 * addresses, we will have to check every address.
1696 */
1697 if (to != NULL &&
1698 !(so->so_state & SS_DEFUNCT) &&
1699 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1700 != 0) {
1701 goto out;
1702 }
1703 #endif /* MAC_SOCKET_SUBSET */
1704 }
1705
1706 len_before = uio_array_resid(uiop, uap->cnt);
1707
1708 /*
1709 * Feed list of packets at once only for connected socket without
1710 * control message
1711 */
1712 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1713 pru_sosend_list_notsupp &&
1714 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1715 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1716 uap->cnt, uap->flags);
1717 } else {
1718 for (i = 0; i < uap->cnt; i++) {
1719 struct user_msghdr_x *mp = user_msg_x + i;
1720 struct user_msghdr user_msg;
1721 uio_t auio = uiop[i];
1722 int32_t tmpval;
1723
1724 user_msg.msg_flags = mp->msg_flags;
1725 user_msg.msg_controllen = mp->msg_controllen;
1726 user_msg.msg_control = mp->msg_control;
1727 user_msg.msg_iovlen = mp->msg_iovlen;
1728 user_msg.msg_iov = mp->msg_iov;
1729 user_msg.msg_namelen = mp->msg_namelen;
1730 user_msg.msg_name = mp->msg_name;
1731
1732 error = sendit(p, so, &user_msg, auio, uap->flags,
1733 &tmpval);
1734 if (error != 0) {
1735 break;
1736 }
1737 }
1738 }
1739 len_after = uio_array_resid(uiop, uap->cnt);
1740
1741 VERIFY(len_after <= len_before);
1742
1743 if (error != 0) {
1744 if (len_after != len_before && (error == ERESTART ||
1745 error == EINTR || error == EWOULDBLOCK ||
1746 error == ENOBUFS)) {
1747 error = 0;
1748 }
1749 /* Generation of SIGPIPE can be controlled per socket */
1750 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) {
1751 psignal(p, SIGPIPE);
1752 }
1753 }
1754 if (error == 0) {
1755 uiocnt = externalize_user_msghdr_array(umsgp,
1756 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1757 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1758
1759 *retval = (int)(uiocnt);
1760 }
1761 out:
1762 if (need_drop) {
1763 file_drop(uap->s);
1764 }
1765 if (umsgp != NULL) {
1766 _FREE(umsgp, M_TEMP);
1767 }
1768 if (uiop != NULL) {
1769 free_uio_array(uiop, uap->cnt);
1770 _FREE(uiop, M_TEMP);
1771 }
1772 if (user_msg_x != NULL) {
1773 _FREE(user_msg_x, M_TEMP);
1774 }
1775
1776 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1777
1778 return error;
1779 }
1780
1781
1782 static int
1783 copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1784 {
1785 int error = 0;
1786 socklen_t sa_len = 0;
1787 ssize_t len;
1788
1789 len = *namelen;
1790 if (len <= 0 || fromsa == 0) {
1791 len = 0;
1792 } else {
1793 #ifndef MIN
1794 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1795 #endif
1796 sa_len = fromsa->sa_len;
1797 len = MIN((unsigned int)len, sa_len);
1798 error = copyout(fromsa, name, (unsigned)len);
1799 if (error) {
1800 goto out;
1801 }
1802 }
1803 *namelen = sa_len;
1804 out:
1805 return 0;
1806 }
1807
1808 static int
1809 copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1810 socklen_t *controllen, int *flags)
1811 {
1812 int error = 0;
1813 ssize_t len;
1814 user_addr_t ctlbuf;
1815
1816 len = *controllen;
1817 *controllen = 0;
1818 ctlbuf = control;
1819
1820 while (m && len > 0) {
1821 unsigned int tocopy;
1822 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1823 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1824 int buflen = m->m_len;
1825
1826 while (buflen > 0 && len > 0) {
1827 /*
1828 * SCM_TIMESTAMP hack because struct timeval has a
1829 * different size for 32 bits and 64 bits processes
1830 */
1831 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1832 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
1833 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1834 int tmp_space;
1835 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1836
1837 tmp_cp->cmsg_level = SOL_SOCKET;
1838 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1839
1840 if (proc_is64bit(p)) {
1841 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1842
1843 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1844 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
1845
1846 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1847 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1848 } else {
1849 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1850
1851 tv32->tv_sec = tv->tv_sec;
1852 tv32->tv_usec = tv->tv_usec;
1853
1854 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1855 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1856 }
1857 if (len >= tmp_space) {
1858 tocopy = tmp_space;
1859 } else {
1860 *flags |= MSG_CTRUNC;
1861 tocopy = len;
1862 }
1863 error = copyout(tmp_buffer, ctlbuf, tocopy);
1864 if (error) {
1865 goto out;
1866 }
1867 } else {
1868 if (cp_size > buflen) {
1869 panic("cp_size > buflen, something"
1870 "wrong with alignment!");
1871 }
1872 if (len >= cp_size) {
1873 tocopy = cp_size;
1874 } else {
1875 *flags |= MSG_CTRUNC;
1876 tocopy = len;
1877 }
1878 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1879 if (error) {
1880 goto out;
1881 }
1882 }
1883
1884 ctlbuf += tocopy;
1885 len -= tocopy;
1886
1887 buflen -= cp_size;
1888 cp = (struct cmsghdr *)(void *)
1889 ((unsigned char *) cp + cp_size);
1890 cp_size = CMSG_ALIGN(cp->cmsg_len);
1891 }
1892
1893 m = m->m_next;
1894 }
1895 *controllen = ctlbuf - control;
1896 out:
1897 return error;
1898 }
1899
1900 /*
1901 * Returns: 0 Success
1902 * ENOTSOCK
1903 * EINVAL
1904 * EBADF
1905 * EACCES Mandatory Access Control failure
1906 * copyout:EFAULT
1907 * fp_lookup:EBADF
1908 * <pru_soreceive>:ENOBUFS
1909 * <pru_soreceive>:ENOTCONN
1910 * <pru_soreceive>:EWOULDBLOCK
1911 * <pru_soreceive>:EFAULT
1912 * <pru_soreceive>:EINTR
1913 * <pru_soreceive>:EBADF
1914 * <pru_soreceive>:EINVAL
1915 * <pru_soreceive>:EMSGSIZE
1916 * <pru_soreceive>:???
1917 *
1918 * Notes: Additional return values from calls through <pru_soreceive>
1919 * depend on protocols other than TCP or AF_UNIX, which are
1920 * documented above.
1921 */
1922 static int
1923 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1924 user_addr_t namelenp, int32_t *retval)
1925 {
1926 ssize_t len;
1927 int error;
1928 struct mbuf *control = 0;
1929 struct socket *so;
1930 struct sockaddr *fromsa = 0;
1931 struct fileproc *fp;
1932
1933 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1934 proc_fdlock(p);
1935 if ((error = fp_lookup(p, s, &fp, 1))) {
1936 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1937 proc_fdunlock(p);
1938 return error;
1939 }
1940 if (fp->f_type != DTYPE_SOCKET) {
1941 fp_drop(p, s, fp, 1);
1942 proc_fdunlock(p);
1943 return ENOTSOCK;
1944 }
1945
1946 so = (struct socket *)fp->f_data;
1947 if (so == NULL) {
1948 fp_drop(p, s, fp, 1);
1949 proc_fdunlock(p);
1950 return EBADF;
1951 }
1952
1953 proc_fdunlock(p);
1954
1955 #if CONFIG_MACF_SOCKET_SUBSET
1956 /*
1957 * We check the state without holding the socket lock;
1958 * if a race condition occurs, it would simply result
1959 * in an extra call to the MAC check function.
1960 */
1961 if (!(so->so_state & SS_DEFUNCT) &&
1962 !(so->so_state & SS_ISCONNECTED) &&
1963 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1964 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
1965 goto out1;
1966 }
1967 #endif /* MAC_SOCKET_SUBSET */
1968 if (uio_resid(uiop) < 0) {
1969 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1970 error = EINVAL;
1971 goto out1;
1972 }
1973
1974 len = uio_resid(uiop);
1975 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1976 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1977 &mp->msg_flags);
1978 if (fromsa) {
1979 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1980 fromsa);
1981 }
1982 if (error) {
1983 if (uio_resid(uiop) != len && (error == ERESTART ||
1984 error == EINTR || error == EWOULDBLOCK)) {
1985 error = 0;
1986 }
1987 }
1988 if (error) {
1989 goto out;
1990 }
1991
1992 *retval = len - uio_resid(uiop);
1993
1994 if (mp->msg_name) {
1995 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1996 if (error) {
1997 goto out;
1998 }
1999 /* return the actual, untruncated address length */
2000 if (namelenp &&
2001 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2002 sizeof(int)))) {
2003 goto out;
2004 }
2005 }
2006
2007 if (mp->msg_control) {
2008 error = copyout_control(p, control, mp->msg_control,
2009 &mp->msg_controllen, &mp->msg_flags);
2010 }
2011 out:
2012 if (fromsa) {
2013 FREE(fromsa, M_SONAME);
2014 }
2015 if (control) {
2016 m_freem(control);
2017 }
2018 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2019 out1:
2020 fp_drop(p, s, fp, 0);
2021 return error;
2022 }
2023
2024 /*
2025 * Returns: 0 Success
2026 * ENOMEM
2027 * copyin:EFAULT
2028 * recvit:???
2029 * read:??? [4056224: applicable for pipes]
2030 *
2031 * Notes: The read entry point is only called as part of support for
2032 * binary backward compatability; new code should use read
2033 * instead of recv or recvfrom when attempting to read data
2034 * from pipes.
2035 *
2036 * For full documentation of the return codes from recvit, see
2037 * the block header for the recvit function.
2038 */
2039 int
2040 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2041 {
2042 __pthread_testcancel(1);
2043 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2044 retval);
2045 }
2046
2047 int
2048 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2049 int32_t *retval)
2050 {
2051 struct user_msghdr msg;
2052 int error;
2053 uio_t auio = NULL;
2054
2055 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2056 AUDIT_ARG(fd, uap->s);
2057
2058 if (uap->fromlenaddr) {
2059 error = copyin(uap->fromlenaddr,
2060 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2061 if (error) {
2062 return error;
2063 }
2064 } else {
2065 msg.msg_namelen = 0;
2066 }
2067 msg.msg_name = uap->from;
2068 auio = uio_create(1, 0,
2069 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2070 UIO_READ);
2071 if (auio == NULL) {
2072 return ENOMEM;
2073 }
2074
2075 uio_addiov(auio, uap->buf, uap->len);
2076 /* no need to set up msg_iov. recvit uses uio_t we send it */
2077 msg.msg_iov = 0;
2078 msg.msg_iovlen = 0;
2079 msg.msg_control = 0;
2080 msg.msg_controllen = 0;
2081 msg.msg_flags = uap->flags;
2082 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2083 if (auio != NULL) {
2084 uio_free(auio);
2085 }
2086
2087 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2088
2089 return error;
2090 }
2091
2092 /*
2093 * Returns: 0 Success
2094 * EMSGSIZE
2095 * ENOMEM
2096 * copyin:EFAULT
2097 * copyout:EFAULT
2098 * recvit:???
2099 *
2100 * Notes: For full documentation of the return codes from recvit, see
2101 * the block header for the recvit function.
2102 */
2103 int
2104 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
2105 {
2106 __pthread_testcancel(1);
2107 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2108 retval);
2109 }
2110
2111 int
2112 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2113 int32_t *retval)
2114 {
2115 struct user32_msghdr msg32;
2116 struct user64_msghdr msg64;
2117 struct user_msghdr user_msg;
2118 caddr_t msghdrp;
2119 int size_of_msghdr;
2120 user_addr_t uiov;
2121 int error;
2122 uio_t auio = NULL;
2123 struct user_iovec *iovp;
2124
2125 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2126 AUDIT_ARG(fd, uap->s);
2127 if (IS_64BIT_PROCESS(p)) {
2128 msghdrp = (caddr_t)&msg64;
2129 size_of_msghdr = sizeof(msg64);
2130 } else {
2131 msghdrp = (caddr_t)&msg32;
2132 size_of_msghdr = sizeof(msg32);
2133 }
2134 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2135 if (error) {
2136 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2137 return error;
2138 }
2139
2140 /* only need to copy if user process is not 64-bit */
2141 if (IS_64BIT_PROCESS(p)) {
2142 user_msg.msg_flags = msg64.msg_flags;
2143 user_msg.msg_controllen = msg64.msg_controllen;
2144 user_msg.msg_control = msg64.msg_control;
2145 user_msg.msg_iovlen = msg64.msg_iovlen;
2146 user_msg.msg_iov = msg64.msg_iov;
2147 user_msg.msg_namelen = msg64.msg_namelen;
2148 user_msg.msg_name = msg64.msg_name;
2149 } else {
2150 user_msg.msg_flags = msg32.msg_flags;
2151 user_msg.msg_controllen = msg32.msg_controllen;
2152 user_msg.msg_control = msg32.msg_control;
2153 user_msg.msg_iovlen = msg32.msg_iovlen;
2154 user_msg.msg_iov = msg32.msg_iov;
2155 user_msg.msg_namelen = msg32.msg_namelen;
2156 user_msg.msg_name = msg32.msg_name;
2157 }
2158
2159 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2160 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2161 0, 0, 0, 0);
2162 return EMSGSIZE;
2163 }
2164
2165 user_msg.msg_flags = uap->flags;
2166
2167 /* allocate a uio large enough to hold the number of iovecs passed */
2168 auio = uio_create(user_msg.msg_iovlen, 0,
2169 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2170 UIO_READ);
2171 if (auio == NULL) {
2172 error = ENOMEM;
2173 goto done;
2174 }
2175
2176 /*
2177 * get location of iovecs within the uio. then copyin the iovecs from
2178 * user space.
2179 */
2180 iovp = uio_iovsaddr(auio);
2181 if (iovp == NULL) {
2182 error = ENOMEM;
2183 goto done;
2184 }
2185 uiov = user_msg.msg_iov;
2186 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2187 error = copyin_user_iovec_array(uiov,
2188 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2189 user_msg.msg_iovlen, iovp);
2190 if (error) {
2191 goto done;
2192 }
2193
2194 /* finish setup of uio_t */
2195 error = uio_calculateresid(auio);
2196 if (error) {
2197 goto done;
2198 }
2199
2200 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2201 if (!error) {
2202 user_msg.msg_iov = uiov;
2203 if (IS_64BIT_PROCESS(p)) {
2204 msg64.msg_flags = user_msg.msg_flags;
2205 msg64.msg_controllen = user_msg.msg_controllen;
2206 msg64.msg_control = user_msg.msg_control;
2207 msg64.msg_iovlen = user_msg.msg_iovlen;
2208 msg64.msg_iov = user_msg.msg_iov;
2209 msg64.msg_namelen = user_msg.msg_namelen;
2210 msg64.msg_name = user_msg.msg_name;
2211 } else {
2212 msg32.msg_flags = user_msg.msg_flags;
2213 msg32.msg_controllen = user_msg.msg_controllen;
2214 msg32.msg_control = user_msg.msg_control;
2215 msg32.msg_iovlen = user_msg.msg_iovlen;
2216 msg32.msg_iov = user_msg.msg_iov;
2217 msg32.msg_namelen = user_msg.msg_namelen;
2218 msg32.msg_name = user_msg.msg_name;
2219 }
2220 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2221 }
2222 done:
2223 if (auio != NULL) {
2224 uio_free(auio);
2225 }
2226 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2227 return error;
2228 }
2229
2230 int
2231 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2232 {
2233 int error = EOPNOTSUPP;
2234 struct user_msghdr_x *user_msg_x = NULL;
2235 struct recv_msg_elem *recv_msg_array = NULL;
2236 struct socket *so;
2237 user_ssize_t len_before = 0, len_after;
2238 int need_drop = 0;
2239 size_t size_of_msghdr;
2240 void *umsgp = NULL;
2241 u_int i;
2242 u_int uiocnt;
2243
2244 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2245
2246 error = file_socket(uap->s, &so);
2247 if (error) {
2248 goto out;
2249 }
2250 need_drop = 1;
2251 if (so == NULL) {
2252 error = EBADF;
2253 goto out;
2254 }
2255 /*
2256 * Input parameter range check
2257 */
2258 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2259 error = EINVAL;
2260 goto out;
2261 }
2262 if (uap->cnt > somaxrecvmsgx) {
2263 uap->cnt = somaxrecvmsgx;
2264 }
2265
2266 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
2267 M_TEMP, M_WAITOK | M_ZERO);
2268 if (user_msg_x == NULL) {
2269 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
2270 error = ENOMEM;
2271 goto out;
2272 }
2273 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2274 if (recv_msg_array == NULL) {
2275 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2276 error = ENOMEM;
2277 goto out;
2278 }
2279 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2280 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2281
2282 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2283 if (umsgp == NULL) {
2284 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
2285 error = ENOMEM;
2286 goto out;
2287 }
2288 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2289 if (error) {
2290 DBG_PRINTF("%s copyin() failed\n", __func__);
2291 goto out;
2292 }
2293 error = internalize_recv_msghdr_array(umsgp,
2294 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2295 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2296 if (error) {
2297 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2298 goto out;
2299 }
2300 /*
2301 * Make sure the size of each message iovec and
2302 * the aggregate size of all the iovec is valid
2303 */
2304 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2305 error = EINVAL;
2306 goto out;
2307 }
2308 /*
2309 * Sanity check on passed arguments
2310 */
2311 for (i = 0; i < uap->cnt; i++) {
2312 struct user_msghdr_x *mp = user_msg_x + i;
2313
2314 if (mp->msg_flags != 0) {
2315 error = EINVAL;
2316 goto out;
2317 }
2318 }
2319 #if CONFIG_MACF_SOCKET_SUBSET
2320 /*
2321 * We check the state without holding the socket lock;
2322 * if a race condition occurs, it would simply result
2323 * in an extra call to the MAC check function.
2324 */
2325 if (!(so->so_state & SS_DEFUNCT) &&
2326 !(so->so_state & SS_ISCONNECTED) &&
2327 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2328 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2329 goto out;
2330 }
2331 #endif /* MAC_SOCKET_SUBSET */
2332
2333 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2334
2335 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2336 pru_soreceive_list_notsupp &&
2337 somaxrecvmsgx == 0) {
2338 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2339 recv_msg_array, uap->cnt, &uap->flags);
2340 } else {
2341 int flags = uap->flags;
2342
2343 for (i = 0; i < uap->cnt; i++) {
2344 struct recv_msg_elem *recv_msg_elem;
2345 uio_t auio;
2346 struct sockaddr **psa;
2347 struct mbuf **controlp;
2348
2349 recv_msg_elem = recv_msg_array + i;
2350 auio = recv_msg_elem->uio;
2351
2352 /*
2353 * Do not block if we got at least one packet
2354 */
2355 if (i > 0) {
2356 flags |= MSG_DONTWAIT;
2357 }
2358
2359 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2360 &recv_msg_elem->psa : NULL;
2361 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2362 &recv_msg_elem->controlp : NULL;
2363
2364 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2365 auio, (struct mbuf **)0, controlp, &flags);
2366 if (error) {
2367 break;
2368 }
2369 /*
2370 * We have some data
2371 */
2372 recv_msg_elem->which |= SOCK_MSG_DATA;
2373 /*
2374 * Stop on partial copy
2375 */
2376 if (flags & (MSG_RCVMORE | MSG_TRUNC)) {
2377 break;
2378 }
2379 }
2380 if ((uap->flags & MSG_DONTWAIT) == 0) {
2381 flags &= ~MSG_DONTWAIT;
2382 }
2383 uap->flags = flags;
2384 }
2385
2386 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2387
2388 if (error) {
2389 if (len_after != len_before && (error == ERESTART ||
2390 error == EINTR || error == EWOULDBLOCK)) {
2391 error = 0;
2392 } else {
2393 goto out;
2394 }
2395 }
2396
2397 uiocnt = externalize_recv_msghdr_array(umsgp,
2398 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2399 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2400
2401 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2402 if (error) {
2403 DBG_PRINTF("%s copyout() failed\n", __func__);
2404 goto out;
2405 }
2406 *retval = (int)(uiocnt);
2407
2408 for (i = 0; i < uap->cnt; i++) {
2409 struct user_msghdr_x *mp = user_msg_x + i;
2410 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2411 struct sockaddr *fromsa = recv_msg_elem->psa;
2412
2413 if (mp->msg_name) {
2414 error = copyout_sa(fromsa, mp->msg_name,
2415 &mp->msg_namelen);
2416 if (error) {
2417 goto out;
2418 }
2419 }
2420 if (mp->msg_control) {
2421 error = copyout_control(p, recv_msg_elem->controlp,
2422 mp->msg_control, &mp->msg_controllen,
2423 &mp->msg_flags);
2424 if (error) {
2425 goto out;
2426 }
2427 }
2428 }
2429 out:
2430 if (need_drop) {
2431 file_drop(uap->s);
2432 }
2433 if (umsgp != NULL) {
2434 _FREE(umsgp, M_TEMP);
2435 }
2436 if (recv_msg_array != NULL) {
2437 free_recv_msg_array(recv_msg_array, uap->cnt);
2438 }
2439 if (user_msg_x != NULL) {
2440 _FREE(user_msg_x, M_TEMP);
2441 }
2442
2443 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2444
2445 return error;
2446 }
2447
2448 /*
2449 * Returns: 0 Success
2450 * EBADF
2451 * file_socket:ENOTSOCK
2452 * file_socket:EBADF
2453 * soshutdown:EINVAL
2454 * soshutdown:ENOTCONN
2455 * soshutdown:EADDRNOTAVAIL[TCP]
2456 * soshutdown:ENOBUFS[TCP]
2457 * soshutdown:EMSGSIZE[TCP]
2458 * soshutdown:EHOSTUNREACH[TCP]
2459 * soshutdown:ENETUNREACH[TCP]
2460 * soshutdown:ENETDOWN[TCP]
2461 * soshutdown:ENOMEM[TCP]
2462 * soshutdown:EACCES[TCP]
2463 * soshutdown:EMSGSIZE[TCP]
2464 * soshutdown:ENOBUFS[TCP]
2465 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2466 * soshutdown:??? [other protocol families]
2467 */
2468 /* ARGSUSED */
2469 int
2470 shutdown(__unused struct proc *p, struct shutdown_args *uap,
2471 __unused int32_t *retval)
2472 {
2473 struct socket *so;
2474 int error;
2475
2476 AUDIT_ARG(fd, uap->s);
2477 error = file_socket(uap->s, &so);
2478 if (error) {
2479 return error;
2480 }
2481 if (so == NULL) {
2482 error = EBADF;
2483 goto out;
2484 }
2485 error = soshutdown((struct socket *)so, uap->how);
2486 out:
2487 file_drop(uap->s);
2488 return error;
2489 }
2490
2491 /*
2492 * Returns: 0 Success
2493 * EFAULT
2494 * EINVAL
2495 * EACCES Mandatory Access Control failure
2496 * file_socket:ENOTSOCK
2497 * file_socket:EBADF
2498 * sosetopt:EINVAL
2499 * sosetopt:ENOPROTOOPT
2500 * sosetopt:ENOBUFS
2501 * sosetopt:EDOM
2502 * sosetopt:EFAULT
2503 * sosetopt:EOPNOTSUPP[AF_UNIX]
2504 * sosetopt:???
2505 */
2506 /* ARGSUSED */
2507 int
2508 setsockopt(struct proc *p, struct setsockopt_args *uap,
2509 __unused int32_t *retval)
2510 {
2511 struct socket *so;
2512 struct sockopt sopt;
2513 int error;
2514
2515 AUDIT_ARG(fd, uap->s);
2516 if (uap->val == 0 && uap->valsize != 0) {
2517 return EFAULT;
2518 }
2519 /* No bounds checking on size (it's unsigned) */
2520
2521 error = file_socket(uap->s, &so);
2522 if (error) {
2523 return error;
2524 }
2525
2526 sopt.sopt_dir = SOPT_SET;
2527 sopt.sopt_level = uap->level;
2528 sopt.sopt_name = uap->name;
2529 sopt.sopt_val = uap->val;
2530 sopt.sopt_valsize = uap->valsize;
2531 sopt.sopt_p = p;
2532
2533 if (so == NULL) {
2534 error = EINVAL;
2535 goto out;
2536 }
2537 #if CONFIG_MACF_SOCKET_SUBSET
2538 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2539 &sopt)) != 0) {
2540 goto out;
2541 }
2542 #endif /* MAC_SOCKET_SUBSET */
2543 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
2544 out:
2545 file_drop(uap->s);
2546 return error;
2547 }
2548
2549
2550
2551 /*
2552 * Returns: 0 Success
2553 * EINVAL
2554 * EBADF
2555 * EACCES Mandatory Access Control failure
2556 * copyin:EFAULT
2557 * copyout:EFAULT
2558 * file_socket:ENOTSOCK
2559 * file_socket:EBADF
2560 * sogetopt:???
2561 */
2562 int
2563 getsockopt(struct proc *p, struct getsockopt_args *uap,
2564 __unused int32_t *retval)
2565 {
2566 int error;
2567 socklen_t valsize;
2568 struct sockopt sopt;
2569 struct socket *so;
2570
2571 error = file_socket(uap->s, &so);
2572 if (error) {
2573 return error;
2574 }
2575 if (uap->val) {
2576 error = copyin(uap->avalsize, (caddr_t)&valsize,
2577 sizeof(valsize));
2578 if (error) {
2579 goto out;
2580 }
2581 /* No bounds checking on size (it's unsigned) */
2582 } else {
2583 valsize = 0;
2584 }
2585 sopt.sopt_dir = SOPT_GET;
2586 sopt.sopt_level = uap->level;
2587 sopt.sopt_name = uap->name;
2588 sopt.sopt_val = uap->val;
2589 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2590 sopt.sopt_p = p;
2591
2592 if (so == NULL) {
2593 error = EBADF;
2594 goto out;
2595 }
2596 #if CONFIG_MACF_SOCKET_SUBSET
2597 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2598 &sopt)) != 0) {
2599 goto out;
2600 }
2601 #endif /* MAC_SOCKET_SUBSET */
2602 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
2603 if (error == 0) {
2604 valsize = sopt.sopt_valsize;
2605 error = copyout((caddr_t)&valsize, uap->avalsize,
2606 sizeof(valsize));
2607 }
2608 out:
2609 file_drop(uap->s);
2610 return error;
2611 }
2612
2613
2614 /*
2615 * Get socket name.
2616 *
2617 * Returns: 0 Success
2618 * EBADF
2619 * file_socket:ENOTSOCK
2620 * file_socket:EBADF
2621 * copyin:EFAULT
2622 * copyout:EFAULT
2623 * <pru_sockaddr>:ENOBUFS[TCP]
2624 * <pru_sockaddr>:ECONNRESET[TCP]
2625 * <pru_sockaddr>:EINVAL[AF_UNIX]
2626 * <sf_getsockname>:???
2627 */
2628 /* ARGSUSED */
2629 int
2630 getsockname(__unused struct proc *p, struct getsockname_args *uap,
2631 __unused int32_t *retval)
2632 {
2633 struct socket *so;
2634 struct sockaddr *sa;
2635 socklen_t len;
2636 socklen_t sa_len;
2637 int error;
2638
2639 error = file_socket(uap->fdes, &so);
2640 if (error) {
2641 return error;
2642 }
2643 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2644 if (error) {
2645 goto out;
2646 }
2647 if (so == NULL) {
2648 error = EBADF;
2649 goto out;
2650 }
2651 sa = 0;
2652 socket_lock(so, 1);
2653 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2654 if (error == 0) {
2655 error = sflt_getsockname(so, &sa);
2656 if (error == EJUSTRETURN) {
2657 error = 0;
2658 }
2659 }
2660 socket_unlock(so, 1);
2661 if (error) {
2662 goto bad;
2663 }
2664 if (sa == 0) {
2665 len = 0;
2666 goto gotnothing;
2667 }
2668
2669 sa_len = sa->sa_len;
2670 len = MIN(len, sa_len);
2671 error = copyout((caddr_t)sa, uap->asa, len);
2672 if (error) {
2673 goto bad;
2674 }
2675 /* return the actual, untruncated address length */
2676 len = sa_len;
2677 gotnothing:
2678 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2679 bad:
2680 if (sa) {
2681 FREE(sa, M_SONAME);
2682 }
2683 out:
2684 file_drop(uap->fdes);
2685 return error;
2686 }
2687
2688 /*
2689 * Get name of peer for connected socket.
2690 *
2691 * Returns: 0 Success
2692 * EBADF
2693 * EINVAL
2694 * ENOTCONN
2695 * file_socket:ENOTSOCK
2696 * file_socket:EBADF
2697 * copyin:EFAULT
2698 * copyout:EFAULT
2699 * <pru_peeraddr>:???
2700 * <sf_getpeername>:???
2701 */
2702 /* ARGSUSED */
2703 int
2704 getpeername(__unused struct proc *p, struct getpeername_args *uap,
2705 __unused int32_t *retval)
2706 {
2707 struct socket *so;
2708 struct sockaddr *sa;
2709 socklen_t len;
2710 socklen_t sa_len;
2711 int error;
2712
2713 error = file_socket(uap->fdes, &so);
2714 if (error) {
2715 return error;
2716 }
2717 if (so == NULL) {
2718 error = EBADF;
2719 goto out;
2720 }
2721
2722 socket_lock(so, 1);
2723
2724 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2725 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2726 /* the socket has been shutdown, no more getpeername's */
2727 socket_unlock(so, 1);
2728 error = EINVAL;
2729 goto out;
2730 }
2731
2732 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
2733 socket_unlock(so, 1);
2734 error = ENOTCONN;
2735 goto out;
2736 }
2737 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2738 if (error) {
2739 socket_unlock(so, 1);
2740 goto out;
2741 }
2742 sa = 0;
2743 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2744 if (error == 0) {
2745 error = sflt_getpeername(so, &sa);
2746 if (error == EJUSTRETURN) {
2747 error = 0;
2748 }
2749 }
2750 socket_unlock(so, 1);
2751 if (error) {
2752 goto bad;
2753 }
2754 if (sa == 0) {
2755 len = 0;
2756 goto gotnothing;
2757 }
2758 sa_len = sa->sa_len;
2759 len = MIN(len, sa_len);
2760 error = copyout(sa, uap->asa, len);
2761 if (error) {
2762 goto bad;
2763 }
2764 /* return the actual, untruncated address length */
2765 len = sa_len;
2766 gotnothing:
2767 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2768 bad:
2769 if (sa) {
2770 FREE(sa, M_SONAME);
2771 }
2772 out:
2773 file_drop(uap->fdes);
2774 return error;
2775 }
2776
2777 int
2778 sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
2779 {
2780 struct sockaddr *sa;
2781 struct mbuf *m;
2782 int error;
2783
2784 size_t alloc_buflen = (size_t)buflen;
2785
2786 if (alloc_buflen > INT_MAX / 2) {
2787 return EINVAL;
2788 }
2789 #ifdef __LP64__
2790 /*
2791 * The fd's in the buffer must expand to be pointers, thus we need twice
2792 * as much space
2793 */
2794 if (type == MT_CONTROL) {
2795 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
2796 sizeof(struct cmsghdr);
2797 }
2798 #endif
2799 if (alloc_buflen > MLEN) {
2800 if (type == MT_SONAME && alloc_buflen <= 112) {
2801 alloc_buflen = MLEN; /* unix domain compat. hack */
2802 } else if (alloc_buflen > MCLBYTES) {
2803 return EINVAL;
2804 }
2805 }
2806 m = m_get(M_WAIT, type);
2807 if (m == NULL) {
2808 return ENOBUFS;
2809 }
2810 if (alloc_buflen > MLEN) {
2811 MCLGET(m, M_WAIT);
2812 if ((m->m_flags & M_EXT) == 0) {
2813 m_free(m);
2814 return ENOBUFS;
2815 }
2816 }
2817 /*
2818 * K64: We still copyin the original buflen because it gets expanded
2819 * later and we lie about the size of the mbuf because it only affects
2820 * unp_* functions
2821 */
2822 m->m_len = buflen;
2823 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2824 if (error) {
2825 (void) m_free(m);
2826 } else {
2827 *mp = m;
2828 if (type == MT_SONAME) {
2829 sa = mtod(m, struct sockaddr *);
2830 sa->sa_len = buflen;
2831 }
2832 }
2833 return error;
2834 }
2835
2836 /*
2837 * Given a user_addr_t of length len, allocate and fill out a *sa.
2838 *
2839 * Returns: 0 Success
2840 * ENAMETOOLONG Filename too long
2841 * EINVAL Invalid argument
2842 * ENOMEM Not enough space
2843 * copyin:EFAULT Bad address
2844 */
2845 static int
2846 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
2847 size_t len, boolean_t translate_unspec)
2848 {
2849 struct sockaddr *sa;
2850 int error;
2851
2852 if (len > SOCK_MAXADDRLEN) {
2853 return ENAMETOOLONG;
2854 }
2855
2856 if (len < offsetof(struct sockaddr, sa_data[0])) {
2857 return EINVAL;
2858 }
2859
2860 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
2861 if (sa == NULL) {
2862 return ENOMEM;
2863 }
2864 error = copyin(uaddr, (caddr_t)sa, len);
2865 if (error) {
2866 FREE(sa, M_SONAME);
2867 } else {
2868 /*
2869 * Force sa_family to AF_INET on AF_INET sockets to handle
2870 * legacy applications that use AF_UNSPEC (0). On all other
2871 * sockets we leave it unchanged and let the lower layer
2872 * handle it.
2873 */
2874 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2875 SOCK_CHECK_DOM(so, PF_INET) &&
2876 len == sizeof(struct sockaddr_in)) {
2877 sa->sa_family = AF_INET;
2878 }
2879
2880 sa->sa_len = len;
2881 *namp = sa;
2882 }
2883 return error;
2884 }
2885
2886 static int
2887 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
2888 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2889 {
2890 int error;
2891
2892 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2893 len < offsetof(struct sockaddr, sa_data[0])) {
2894 return EINVAL;
2895 }
2896
2897 /*
2898 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2899 * so the check here is inclusive.
2900 */
2901 if (len > sizeof(*ss)) {
2902 return ENAMETOOLONG;
2903 }
2904
2905 bzero(ss, sizeof(*ss));
2906 error = copyin(uaddr, (caddr_t)ss, len);
2907 if (error == 0) {
2908 /*
2909 * Force sa_family to AF_INET on AF_INET sockets to handle
2910 * legacy applications that use AF_UNSPEC (0). On all other
2911 * sockets we leave it unchanged and let the lower layer
2912 * handle it.
2913 */
2914 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2915 SOCK_CHECK_DOM(so, PF_INET) &&
2916 len == sizeof(struct sockaddr_in)) {
2917 ss->ss_family = AF_INET;
2918 }
2919
2920 ss->ss_len = len;
2921 }
2922 return error;
2923 }
2924
2925 int
2926 internalize_user_msghdr_array(const void *src, int spacetype, int direction,
2927 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
2928 {
2929 int error = 0;
2930 u_int i;
2931 u_int namecnt = 0;
2932 u_int ctlcnt = 0;
2933
2934 for (i = 0; i < count; i++) {
2935 uio_t auio;
2936 struct user_iovec *iovp;
2937 struct user_msghdr_x *user_msg = dst + i;
2938
2939 if (spacetype == UIO_USERSPACE64) {
2940 const struct user64_msghdr_x *msghdr64;
2941
2942 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2943
2944 user_msg->msg_name = msghdr64->msg_name;
2945 user_msg->msg_namelen = msghdr64->msg_namelen;
2946 user_msg->msg_iov = msghdr64->msg_iov;
2947 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2948 user_msg->msg_control = msghdr64->msg_control;
2949 user_msg->msg_controllen = msghdr64->msg_controllen;
2950 user_msg->msg_flags = msghdr64->msg_flags;
2951 user_msg->msg_datalen = msghdr64->msg_datalen;
2952 } else {
2953 const struct user32_msghdr_x *msghdr32;
2954
2955 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2956
2957 user_msg->msg_name = msghdr32->msg_name;
2958 user_msg->msg_namelen = msghdr32->msg_namelen;
2959 user_msg->msg_iov = msghdr32->msg_iov;
2960 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2961 user_msg->msg_control = msghdr32->msg_control;
2962 user_msg->msg_controllen = msghdr32->msg_controllen;
2963 user_msg->msg_flags = msghdr32->msg_flags;
2964 user_msg->msg_datalen = msghdr32->msg_datalen;
2965 }
2966
2967 if (user_msg->msg_iovlen <= 0 ||
2968 user_msg->msg_iovlen > UIO_MAXIOV) {
2969 error = EMSGSIZE;
2970 goto done;
2971 }
2972 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2973 direction);
2974 if (auio == NULL) {
2975 error = ENOMEM;
2976 goto done;
2977 }
2978 uiop[i] = auio;
2979
2980 iovp = uio_iovsaddr(auio);
2981 if (iovp == NULL) {
2982 error = ENOMEM;
2983 goto done;
2984 }
2985 error = copyin_user_iovec_array(user_msg->msg_iov,
2986 spacetype, user_msg->msg_iovlen, iovp);
2987 if (error) {
2988 goto done;
2989 }
2990 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2991
2992 error = uio_calculateresid(auio);
2993 if (error) {
2994 goto done;
2995 }
2996 user_msg->msg_datalen = uio_resid(auio);
2997
2998 if (user_msg->msg_name && user_msg->msg_namelen) {
2999 namecnt++;
3000 }
3001 if (user_msg->msg_control && user_msg->msg_controllen) {
3002 ctlcnt++;
3003 }
3004 }
3005 done:
3006
3007 return error;
3008 }
3009
3010 int
3011 internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
3012 u_int count, struct user_msghdr_x *dst,
3013 struct recv_msg_elem *recv_msg_array)
3014 {
3015 int error = 0;
3016 u_int i;
3017
3018 for (i = 0; i < count; i++) {
3019 struct user_iovec *iovp;
3020 struct user_msghdr_x *user_msg = dst + i;
3021 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3022
3023 if (spacetype == UIO_USERSPACE64) {
3024 const struct user64_msghdr_x *msghdr64;
3025
3026 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3027
3028 user_msg->msg_name = msghdr64->msg_name;
3029 user_msg->msg_namelen = msghdr64->msg_namelen;
3030 user_msg->msg_iov = msghdr64->msg_iov;
3031 user_msg->msg_iovlen = msghdr64->msg_iovlen;
3032 user_msg->msg_control = msghdr64->msg_control;
3033 user_msg->msg_controllen = msghdr64->msg_controllen;
3034 user_msg->msg_flags = msghdr64->msg_flags;
3035 user_msg->msg_datalen = msghdr64->msg_datalen;
3036 } else {
3037 const struct user32_msghdr_x *msghdr32;
3038
3039 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3040
3041 user_msg->msg_name = msghdr32->msg_name;
3042 user_msg->msg_namelen = msghdr32->msg_namelen;
3043 user_msg->msg_iov = msghdr32->msg_iov;
3044 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3045 user_msg->msg_control = msghdr32->msg_control;
3046 user_msg->msg_controllen = msghdr32->msg_controllen;
3047 user_msg->msg_flags = msghdr32->msg_flags;
3048 user_msg->msg_datalen = msghdr32->msg_datalen;
3049 }
3050
3051 if (user_msg->msg_iovlen <= 0 ||
3052 user_msg->msg_iovlen > UIO_MAXIOV) {
3053 error = EMSGSIZE;
3054 goto done;
3055 }
3056 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3057 spacetype, direction);
3058 if (recv_msg_elem->uio == NULL) {
3059 error = ENOMEM;
3060 goto done;
3061 }
3062
3063 iovp = uio_iovsaddr(recv_msg_elem->uio);
3064 if (iovp == NULL) {
3065 error = ENOMEM;
3066 goto done;
3067 }
3068 error = copyin_user_iovec_array(user_msg->msg_iov,
3069 spacetype, user_msg->msg_iovlen, iovp);
3070 if (error) {
3071 goto done;
3072 }
3073 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3074
3075 error = uio_calculateresid(recv_msg_elem->uio);
3076 if (error) {
3077 goto done;
3078 }
3079 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3080
3081 if (user_msg->msg_name && user_msg->msg_namelen) {
3082 recv_msg_elem->which |= SOCK_MSG_SA;
3083 }
3084 if (user_msg->msg_control && user_msg->msg_controllen) {
3085 recv_msg_elem->which |= SOCK_MSG_CONTROL;
3086 }
3087 }
3088 done:
3089
3090 return error;
3091 }
3092
3093 u_int
3094 externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3095 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
3096 {
3097 #pragma unused(direction)
3098 u_int i;
3099 int seenlast = 0;
3100 u_int retcnt = 0;
3101
3102 for (i = 0; i < count; i++) {
3103 const struct user_msghdr_x *user_msg = src + i;
3104 uio_t auio = uiop[i];
3105 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3106
3107 if (user_msg->msg_datalen != 0 && len == 0) {
3108 seenlast = 1;
3109 }
3110
3111 if (seenlast == 0) {
3112 retcnt++;
3113 }
3114
3115 if (spacetype == UIO_USERSPACE64) {
3116 struct user64_msghdr_x *msghdr64;
3117
3118 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3119
3120 msghdr64->msg_flags = user_msg->msg_flags;
3121 msghdr64->msg_datalen = len;
3122 } else {
3123 struct user32_msghdr_x *msghdr32;
3124
3125 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3126
3127 msghdr32->msg_flags = user_msg->msg_flags;
3128 msghdr32->msg_datalen = len;
3129 }
3130 }
3131 return retcnt;
3132 }
3133
3134 u_int
3135 externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
3136 u_int count, const struct user_msghdr_x *src,
3137 struct recv_msg_elem *recv_msg_array)
3138 {
3139 u_int i;
3140 int seenlast = 0;
3141 u_int retcnt = 0;
3142
3143 for (i = 0; i < count; i++) {
3144 const struct user_msghdr_x *user_msg = src + i;
3145 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3146 user_ssize_t len;
3147
3148 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3149
3150 if (direction == UIO_READ) {
3151 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0) {
3152 seenlast = 1;
3153 }
3154 } else {
3155 if (user_msg->msg_datalen != 0 && len == 0) {
3156 seenlast = 1;
3157 }
3158 }
3159
3160 if (seenlast == 0) {
3161 retcnt++;
3162 }
3163
3164 if (spacetype == UIO_USERSPACE64) {
3165 struct user64_msghdr_x *msghdr64;
3166
3167 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3168
3169 msghdr64->msg_flags = user_msg->msg_flags;
3170 msghdr64->msg_datalen = len;
3171 } else {
3172 struct user32_msghdr_x *msghdr32;
3173
3174 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3175
3176 msghdr32->msg_flags = user_msg->msg_flags;
3177 msghdr32->msg_datalen = len;
3178 }
3179 }
3180 return retcnt;
3181 }
3182
3183 void
3184 free_uio_array(struct uio **uiop, u_int count)
3185 {
3186 u_int i;
3187
3188 for (i = 0; i < count; i++) {
3189 if (uiop[i] != NULL) {
3190 uio_free(uiop[i]);
3191 }
3192 }
3193 }
3194
3195 __private_extern__ user_ssize_t
3196 uio_array_resid(struct uio **uiop, u_int count)
3197 {
3198 user_ssize_t len = 0;
3199 u_int i;
3200
3201 for (i = 0; i < count; i++) {
3202 struct uio *auio = uiop[i];
3203
3204 if (auio != NULL) {
3205 len += uio_resid(auio);
3206 }
3207 }
3208 return len;
3209 }
3210
3211 int
3212 uio_array_is_valid(struct uio **uiop, u_int count)
3213 {
3214 user_ssize_t len = 0;
3215 u_int i;
3216
3217 for (i = 0; i < count; i++) {
3218 struct uio *auio = uiop[i];
3219
3220 if (auio != NULL) {
3221 user_ssize_t resid = uio_resid(auio);
3222
3223 /*
3224 * Sanity check on the validity of the iovec:
3225 * no point of going over sb_max
3226 */
3227 if (resid < 0 || (u_int32_t)resid > sb_max) {
3228 return 0;
3229 }
3230
3231 len += resid;
3232 if (len < 0 || (u_int32_t)len > sb_max) {
3233 return 0;
3234 }
3235 }
3236 }
3237 return 1;
3238 }
3239
3240
3241 struct recv_msg_elem *
3242 alloc_recv_msg_array(u_int count)
3243 {
3244 struct recv_msg_elem *recv_msg_array;
3245
3246 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3247 M_TEMP, M_WAITOK | M_ZERO);
3248
3249 return recv_msg_array;
3250 }
3251
3252 void
3253 free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3254 {
3255 u_int i;
3256
3257 for (i = 0; i < count; i++) {
3258 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3259
3260 if (recv_msg_elem->uio != NULL) {
3261 uio_free(recv_msg_elem->uio);
3262 }
3263 if (recv_msg_elem->psa != NULL) {
3264 _FREE(recv_msg_elem->psa, M_TEMP);
3265 }
3266 if (recv_msg_elem->controlp != NULL) {
3267 m_freem(recv_msg_elem->controlp);
3268 }
3269 }
3270 _FREE(recv_msg_array, M_TEMP);
3271 }
3272
3273
3274 __private_extern__ user_ssize_t
3275 recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3276 {
3277 user_ssize_t len = 0;
3278 u_int i;
3279
3280 for (i = 0; i < count; i++) {
3281 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3282
3283 if (recv_msg_elem->uio != NULL) {
3284 len += uio_resid(recv_msg_elem->uio);
3285 }
3286 }
3287 return len;
3288 }
3289
3290 int
3291 recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3292 {
3293 user_ssize_t len = 0;
3294 u_int i;
3295
3296 for (i = 0; i < count; i++) {
3297 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3298
3299 if (recv_msg_elem->uio != NULL) {
3300 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3301
3302 /*
3303 * Sanity check on the validity of the iovec:
3304 * no point of going over sb_max
3305 */
3306 if (resid < 0 || (u_int32_t)resid > sb_max) {
3307 return 0;
3308 }
3309
3310 len += resid;
3311 if (len < 0 || (u_int32_t)len > sb_max) {
3312 return 0;
3313 }
3314 }
3315 }
3316 return 1;
3317 }
3318
3319 #if SENDFILE
3320
3321 #define SFUIOBUFS 64
3322
3323 /* Macros to compute the number of mbufs needed depending on cluster size */
3324 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3325 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3326
3327 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3328 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3329
3330 /* Upper send limit in the number of mbuf clusters */
3331 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3332 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3333
3334 static void
3335 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3336 struct mbuf **m, boolean_t jumbocl)
3337 {
3338 unsigned int needed;
3339
3340 if (pktlen == 0) {
3341 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
3342 }
3343
3344 /*
3345 * Try to allocate for the whole thing. Since we want full control
3346 * over the buffer size and be able to accept partial result, we can't
3347 * use mbuf_allocpacket(). The logic below is similar to sosend().
3348 */
3349 *m = NULL;
3350 if (pktlen > MBIGCLBYTES && jumbocl) {
3351 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3352 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3353 }
3354 if (*m == NULL) {
3355 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3356 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3357 }
3358
3359 /*
3360 * Our previous attempt(s) at allocation had failed; the system
3361 * may be short on mbufs, and we want to block until they are
3362 * available. This time, ask just for 1 mbuf and don't return
3363 * until we get it.
3364 */
3365 if (*m == NULL) {
3366 needed = 1;
3367 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3368 }
3369 if (*m == NULL) {
3370 panic("%s: blocking allocation returned NULL\n", __func__);
3371 }
3372
3373 *maxchunks = needed;
3374 }
3375
3376 /*
3377 * sendfile(2).
3378 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3379 * struct sf_hdtr *hdtr, int flags)
3380 *
3381 * Send a file specified by 'fd' and starting at 'offset' to a socket
3382 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3383 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3384 * output. If specified, write the total number of bytes sent into *nbytes.
3385 */
3386 int
3387 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
3388 {
3389 struct fileproc *fp;
3390 struct vnode *vp;
3391 struct socket *so;
3392 struct writev_nocancel_args nuap;
3393 user_ssize_t writev_retval;
3394 struct user_sf_hdtr user_hdtr;
3395 struct user32_sf_hdtr user32_hdtr;
3396 struct user64_sf_hdtr user64_hdtr;
3397 off_t off, xfsize;
3398 off_t nbytes = 0, sbytes = 0;
3399 int error = 0;
3400 size_t sizeof_hdtr;
3401 off_t file_size;
3402 struct vfs_context context = *vfs_context_current();
3403
3404 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3405 0, 0, 0, 0);
3406
3407 AUDIT_ARG(fd, uap->fd);
3408 AUDIT_ARG(value32, uap->s);
3409
3410 /*
3411 * Do argument checking. Must be a regular file in, stream
3412 * type and connected socket out, positive offset.
3413 */
3414 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3415 goto done;
3416 }
3417 if ((fp->f_flag & FREAD) == 0) {
3418 error = EBADF;
3419 goto done1;
3420 }
3421 if (vnode_isreg(vp) == 0) {
3422 error = ENOTSUP;
3423 goto done1;
3424 }
3425 error = file_socket(uap->s, &so);
3426 if (error) {
3427 goto done1;
3428 }
3429 if (so == NULL) {
3430 error = EBADF;
3431 goto done2;
3432 }
3433 if (so->so_type != SOCK_STREAM) {
3434 error = EINVAL;
3435 goto done2;
3436 }
3437 if ((so->so_state & SS_ISCONNECTED) == 0) {
3438 error = ENOTCONN;
3439 goto done2;
3440 }
3441 if (uap->offset < 0) {
3442 error = EINVAL;
3443 goto done2;
3444 }
3445 if (uap->nbytes == USER_ADDR_NULL) {
3446 error = EINVAL;
3447 goto done2;
3448 }
3449 if (uap->flags != 0) {
3450 error = EINVAL;
3451 goto done2;
3452 }
3453
3454 context.vc_ucred = fp->f_fglob->fg_cred;
3455
3456 #if CONFIG_MACF_SOCKET_SUBSET
3457 /* JMM - fetch connected sockaddr? */
3458 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3459 if (error) {
3460 goto done2;
3461 }
3462 #endif
3463
3464 /*
3465 * Get number of bytes to send
3466 * Should it applies to size of header and trailer?
3467 */
3468 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3469 if (error) {
3470 goto done2;
3471 }
3472
3473 /*
3474 * If specified, get the pointer to the sf_hdtr struct for
3475 * any headers/trailers.
3476 */
3477 if (uap->hdtr != USER_ADDR_NULL) {
3478 caddr_t hdtrp;
3479
3480 bzero(&user_hdtr, sizeof(user_hdtr));
3481 if (IS_64BIT_PROCESS(p)) {
3482 hdtrp = (caddr_t)&user64_hdtr;
3483 sizeof_hdtr = sizeof(user64_hdtr);
3484 } else {
3485 hdtrp = (caddr_t)&user32_hdtr;
3486 sizeof_hdtr = sizeof(user32_hdtr);
3487 }
3488 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3489 if (error) {
3490 goto done2;
3491 }
3492 if (IS_64BIT_PROCESS(p)) {
3493 user_hdtr.headers = user64_hdtr.headers;
3494 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3495 user_hdtr.trailers = user64_hdtr.trailers;
3496 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3497 } else {
3498 user_hdtr.headers = user32_hdtr.headers;
3499 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3500 user_hdtr.trailers = user32_hdtr.trailers;
3501 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3502 }
3503
3504 /*
3505 * Send any headers. Wimp out and use writev(2).
3506 */
3507 if (user_hdtr.headers != USER_ADDR_NULL) {
3508 bzero(&nuap, sizeof(struct writev_args));
3509 nuap.fd = uap->s;
3510 nuap.iovp = user_hdtr.headers;
3511 nuap.iovcnt = user_hdtr.hdr_cnt;
3512 error = writev_nocancel(p, &nuap, &writev_retval);
3513 if (error) {
3514 goto done2;
3515 }
3516 sbytes += writev_retval;
3517 }
3518 }
3519
3520 /*
3521 * Get the file size for 2 reasons:
3522 * 1. We don't want to allocate more mbufs than necessary
3523 * 2. We don't want to read past the end of file
3524 */
3525 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3526 goto done2;
3527 }
3528
3529 /*
3530 * Simply read file data into a chain of mbufs that used with scatter
3531 * gather reads. We're not (yet?) setup to use zero copy external
3532 * mbufs that point to the file pages.
3533 */
3534 socket_lock(so, 1);
3535 error = sblock(&so->so_snd, SBL_WAIT);
3536 if (error) {
3537 socket_unlock(so, 1);
3538 goto done2;
3539 }
3540 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3541 mbuf_t m0 = NULL, m;
3542 unsigned int nbufs = SFUIOBUFS, i;
3543 uio_t auio;
3544 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3545 size_t uiolen;
3546 user_ssize_t rlen;
3547 off_t pgoff;
3548 size_t pktlen;
3549 boolean_t jumbocl;
3550
3551 /*
3552 * Calculate the amount to transfer.
3553 * Align to round number of pages.
3554 * Not to exceed send socket buffer,
3555 * the EOF, or the passed in nbytes.
3556 */
3557 xfsize = sbspace(&so->so_snd);
3558
3559 if (xfsize <= 0) {
3560 if (so->so_state & SS_CANTSENDMORE) {
3561 error = EPIPE;
3562 goto done3;
3563 } else if ((so->so_state & SS_NBIO)) {
3564 error = EAGAIN;
3565 goto done3;
3566 } else {
3567 xfsize = PAGE_SIZE;
3568 }
3569 }
3570
3571 if (xfsize > SENDFILE_MAX_BYTES) {
3572 xfsize = SENDFILE_MAX_BYTES;
3573 } else if (xfsize > PAGE_SIZE) {
3574 xfsize = trunc_page(xfsize);
3575 }
3576 pgoff = off & PAGE_MASK_64;
3577 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3578 xfsize = PAGE_SIZE_64 - pgoff;
3579 }
3580 if (nbytes && xfsize > (nbytes - sbytes)) {
3581 xfsize = nbytes - sbytes;
3582 }
3583 if (xfsize <= 0) {
3584 break;
3585 }
3586 if (off + xfsize > file_size) {
3587 xfsize = file_size - off;
3588 }
3589 if (xfsize <= 0) {
3590 break;
3591 }
3592
3593 /*
3594 * Attempt to use larger than system page-size clusters for
3595 * large writes only if there is a jumbo cluster pool and
3596 * if the socket is marked accordingly.
3597 */
3598 jumbocl = sosendjcl && njcl > 0 &&
3599 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3600
3601 socket_unlock(so, 0);
3602 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3603 pktlen = mbuf_pkthdr_maxlen(m0);
3604 if (pktlen < (size_t)xfsize) {
3605 xfsize = pktlen;
3606 }
3607
3608 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3609 UIO_READ, &uio_buf[0], sizeof(uio_buf));
3610 if (auio == NULL) {
3611 printf("sendfile failed. nbufs = %d. %s", nbufs,
3612 "File a radar related to rdar://10146739.\n");
3613 mbuf_freem(m0);
3614 error = ENXIO;
3615 socket_lock(so, 0);
3616 goto done3;
3617 }
3618
3619 for (i = 0, m = m0, uiolen = 0;
3620 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3621 i++, m = mbuf_next(m)) {
3622 size_t mlen = mbuf_maxlen(m);
3623
3624 if (mlen + uiolen > (size_t)xfsize) {
3625 mlen = xfsize - uiolen;
3626 }
3627 mbuf_setlen(m, mlen);
3628 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3629 mlen);
3630 uiolen += mlen;
3631 }
3632
3633 if (xfsize != uio_resid(auio)) {
3634 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3635 "%lld\n", xfsize, (long long)uio_resid(auio));
3636 }
3637
3638 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3639 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3640 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3641 error = fo_read(fp, auio, FOF_OFFSET, &context);
3642 socket_lock(so, 0);
3643 if (error != 0) {
3644 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3645 error == EINTR || error == EWOULDBLOCK)) {
3646 error = 0;
3647 } else {
3648 mbuf_freem(m0);
3649 goto done3;
3650 }
3651 }
3652 xfsize -= uio_resid(auio);
3653 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3654 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3655 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3656
3657 if (xfsize == 0) {
3658 // printf("sendfile: fo_read 0 bytes, EOF\n");
3659 break;
3660 }
3661 if (xfsize + off > file_size) {
3662 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3663 "%lld\n", xfsize, off, file_size);
3664 }
3665 for (i = 0, m = m0, rlen = 0;
3666 i < nbufs && m != NULL && rlen < xfsize;
3667 i++, m = mbuf_next(m)) {
3668 size_t mlen = mbuf_maxlen(m);
3669
3670 if (rlen + mlen > (size_t)xfsize) {
3671 mlen = xfsize - rlen;
3672 }
3673 mbuf_setlen(m, mlen);
3674
3675 rlen += mlen;
3676 }
3677 mbuf_pkthdr_setlen(m0, xfsize);
3678
3679 retry_space:
3680 /*
3681 * Make sure that the socket is still able to take more data.
3682 * CANTSENDMORE being true usually means that the connection
3683 * was closed. so_error is true when an error was sensed after
3684 * a previous send.
3685 * The state is checked after the page mapping and buffer
3686 * allocation above since those operations may block and make
3687 * any socket checks stale. From this point forward, nothing
3688 * blocks before the pru_send (or more accurately, any blocking
3689 * results in a loop back to here to re-check).
3690 */
3691 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3692 if (so->so_state & SS_CANTSENDMORE) {
3693 error = EPIPE;
3694 } else {
3695 error = so->so_error;
3696 so->so_error = 0;
3697 }
3698 m_freem(m0);
3699 goto done3;
3700 }
3701 /*
3702 * Wait for socket space to become available. We do this just
3703 * after checking the connection state above in order to avoid
3704 * a race condition with sbwait().
3705 */
3706 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3707 if (so->so_state & SS_NBIO) {
3708 m_freem(m0);
3709 error = EAGAIN;
3710 goto done3;
3711 }
3712 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3713 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3714 error = sbwait(&so->so_snd);
3715 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3716 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3717 /*
3718 * An error from sbwait usually indicates that we've
3719 * been interrupted by a signal. If we've sent anything
3720 * then return bytes sent, otherwise return the error.
3721 */
3722 if (error) {
3723 m_freem(m0);
3724 goto done3;
3725 }
3726 goto retry_space;
3727 }
3728
3729 struct mbuf *control = NULL;
3730 {
3731 /*
3732 * Socket filter processing
3733 */
3734
3735 error = sflt_data_out(so, NULL, &m0, &control, 0);
3736 if (error) {
3737 if (error == EJUSTRETURN) {
3738 error = 0;
3739 continue;
3740 }
3741 goto done3;
3742 }
3743 /*
3744 * End Socket filter processing
3745 */
3746 }
3747 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3748 uap->s, 0, 0, 0, 0);
3749 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3750 0, control, p);
3751 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3752 uap->s, 0, 0, 0, 0);
3753 if (error) {
3754 goto done3;
3755 }
3756 }
3757 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3758 /*
3759 * Send trailers. Wimp out and use writev(2).
3760 */
3761 if (uap->hdtr != USER_ADDR_NULL &&
3762 user_hdtr.trailers != USER_ADDR_NULL) {
3763 bzero(&nuap, sizeof(struct writev_args));
3764 nuap.fd = uap->s;
3765 nuap.iovp = user_hdtr.trailers;
3766 nuap.iovcnt = user_hdtr.trl_cnt;
3767 error = writev_nocancel(p, &nuap, &writev_retval);
3768 if (error) {
3769 goto done2;
3770 }
3771 sbytes += writev_retval;
3772 }
3773 done2:
3774 file_drop(uap->s);
3775 done1:
3776 file_drop(uap->fd);
3777 done:
3778 if (uap->nbytes != USER_ADDR_NULL) {
3779 /* XXX this appears bogus for some early failure conditions */
3780 copyout(&sbytes, uap->nbytes, sizeof(off_t));
3781 }
3782 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3783 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3784 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3785 return error;
3786 done3:
3787 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3788 goto done2;
3789 }
3790
3791
3792 #endif /* SENDFILE */