]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
b903e4a18932055b92d7c1d49ebb0627b08ae3d6
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
80 #include <sys/mbuf.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
91 #include <sys/priv.h>
92 #include <sys/sysctl.h>
93 #include <sys/sys_domain.h>
94
95 #include <security/audit/audit.h>
96
97 #include <sys/kdebug.h>
98 #include <sys/sysproto.h>
99 #include <netinet/in.h>
100 #include <net/route.h>
101 #include <netinet/in_pcb.h>
102
103 #include <os/ptrtools.h>
104
105 #if CONFIG_MACF_SOCKET_SUBSET
106 #include <security/mac_framework.h>
107 #endif /* MAC_SOCKET_SUBSET */
108
109 #define f_flag f_fglob->fg_flag
110 #define f_type f_fglob->fg_ops->fo_type
111 #define f_msgcount f_fglob->fg_msgcount
112 #define f_cred f_fglob->fg_cred
113 #define f_ops f_fglob->fg_ops
114 #define f_offset f_fglob->fg_offset
115 #define f_data f_fglob->fg_data
116
117 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
118 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
119 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
120 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
121 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
122 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
123 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
124 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
125 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
126 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
127 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
128 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
129 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
130 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
131 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
132 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
133
134 #if DEBUG || DEVELOPMENT
135 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
136 #define DBG_PRINTF(...) printf(__VA_ARGS__)
137 #else
138 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
139 #define DBG_PRINTF(...) do { } while (0)
140 #endif
141
142 /* TODO: should be in header file */
143 int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
144
145 static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
146 int, int32_t *);
147 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
148 int32_t *);
149 static int connectit(struct socket *, struct sockaddr *);
150 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
151 size_t, boolean_t);
152 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
153 user_addr_t, size_t, boolean_t);
154 #if SENDFILE
155 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
156 boolean_t);
157 #endif /* SENDFILE */
158 static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
159 static int connectitx(struct socket *, struct sockaddr *,
160 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
161 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
162 static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
163 int *);
164 static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
165
166 static int internalize_user_msghdr_array(const void *, int, int, u_int,
167 struct user_msghdr_x *, struct uio **);
168 static u_int externalize_user_msghdr_array(void *, int, int, u_int,
169 const struct user_msghdr_x *, struct uio **);
170
171 static void free_uio_array(struct uio **, u_int);
172 static int uio_array_is_valid(struct uio **, u_int);
173 static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
174 static int internalize_recv_msghdr_array(const void *, int, int,
175 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
176 static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
177 const struct user_msghdr_x *, struct recv_msg_elem *);
178 static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
179 static void free_recv_msg_array(struct recv_msg_elem *, u_int);
180
181 SYSCTL_DECL(_kern_ipc);
182
183 static u_int somaxsendmsgx = 100;
184 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
185 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
186 static u_int somaxrecvmsgx = 100;
187 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
188 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
189
190 /*
191 * System call interface to the socket abstraction.
192 */
193
194 extern const struct fileops socketops;
195
196 /*
197 * Returns: 0 Success
198 * EACCES Mandatory Access Control failure
199 * falloc:ENFILE
200 * falloc:EMFILE
201 * falloc:ENOMEM
202 * socreate:EAFNOSUPPORT
203 * socreate:EPROTOTYPE
204 * socreate:EPROTONOSUPPORT
205 * socreate:ENOBUFS
206 * socreate:ENOMEM
207 * socreate:??? [other protocol families, IPSEC]
208 */
209 int
210 socket(struct proc *p,
211 struct socket_args *uap,
212 int32_t *retval)
213 {
214 return socket_common(p, uap->domain, uap->type, uap->protocol,
215 proc_selfpid(), retval, 0);
216 }
217
218 int
219 socket_delegate(struct proc *p,
220 struct socket_delegate_args *uap,
221 int32_t *retval)
222 {
223 return socket_common(p, uap->domain, uap->type, uap->protocol,
224 uap->epid, retval, 1);
225 }
226
227 static int
228 socket_common(struct proc *p,
229 int domain,
230 int type,
231 int protocol,
232 pid_t epid,
233 int32_t *retval,
234 int delegate)
235 {
236 struct socket *so;
237 struct fileproc *fp;
238 int fd, error;
239
240 AUDIT_ARG(socket, domain, type, protocol);
241 #if CONFIG_MACF_SOCKET_SUBSET
242 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
243 type, protocol)) != 0) {
244 return error;
245 }
246 #endif /* MAC_SOCKET_SUBSET */
247
248 if (delegate) {
249 error = priv_check_cred(kauth_cred_get(),
250 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
251 if (error) {
252 return EACCES;
253 }
254 }
255
256 error = falloc(p, &fp, &fd, vfs_context_current());
257 if (error) {
258 return error;
259 }
260 fp->f_flag = FREAD | FWRITE;
261 fp->f_ops = &socketops;
262
263 if (delegate) {
264 error = socreate_delegate(domain, &so, type, protocol, epid);
265 } else {
266 error = socreate(domain, &so, type, protocol);
267 }
268
269 if (error) {
270 fp_free(p, fd, fp);
271 } else {
272 fp->f_data = (caddr_t)so;
273
274 proc_fdlock(p);
275 procfdtbl_releasefd(p, fd, NULL);
276
277 fp_drop(p, fd, fp, 1);
278 proc_fdunlock(p);
279
280 *retval = fd;
281 if (ENTR_SHOULDTRACE) {
282 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
283 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
284 }
285 }
286 return error;
287 }
288
289 /*
290 * Returns: 0 Success
291 * EDESTADDRREQ Destination address required
292 * EBADF Bad file descriptor
293 * EACCES Mandatory Access Control failure
294 * file_socket:ENOTSOCK
295 * file_socket:EBADF
296 * getsockaddr:ENAMETOOLONG Filename too long
297 * getsockaddr:EINVAL Invalid argument
298 * getsockaddr:ENOMEM Not enough space
299 * getsockaddr:EFAULT Bad address
300 * sobindlock:???
301 */
302 /* ARGSUSED */
303 int
304 bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
305 {
306 struct sockaddr_storage ss;
307 struct sockaddr *sa = NULL;
308 struct socket *so;
309 boolean_t want_free = TRUE;
310 int error;
311
312 AUDIT_ARG(fd, uap->s);
313 error = file_socket(uap->s, &so);
314 if (error != 0) {
315 return error;
316 }
317 if (so == NULL) {
318 error = EBADF;
319 goto out;
320 }
321 if (uap->name == USER_ADDR_NULL) {
322 error = EDESTADDRREQ;
323 goto out;
324 }
325 if (uap->namelen > sizeof(ss)) {
326 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
327 } else {
328 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
329 if (error == 0) {
330 sa = (struct sockaddr *)&ss;
331 want_free = FALSE;
332 }
333 }
334 if (error != 0) {
335 goto out;
336 }
337 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
338 #if CONFIG_MACF_SOCKET_SUBSET
339 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
340 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
341 error = sobindlock(so, sa, 1); /* will lock socket */
342 }
343 #else
344 error = sobindlock(so, sa, 1); /* will lock socket */
345 #endif /* MAC_SOCKET_SUBSET */
346 if (want_free) {
347 FREE(sa, M_SONAME);
348 }
349 out:
350 file_drop(uap->s);
351 return error;
352 }
353
354 /*
355 * Returns: 0 Success
356 * EBADF
357 * EACCES Mandatory Access Control failure
358 * file_socket:ENOTSOCK
359 * file_socket:EBADF
360 * solisten:EINVAL
361 * solisten:EOPNOTSUPP
362 * solisten:???
363 */
364 int
365 listen(__unused struct proc *p, struct listen_args *uap,
366 __unused int32_t *retval)
367 {
368 int error;
369 struct socket *so;
370
371 AUDIT_ARG(fd, uap->s);
372 error = file_socket(uap->s, &so);
373 if (error) {
374 return error;
375 }
376 if (so != NULL)
377 #if CONFIG_MACF_SOCKET_SUBSET
378 {
379 error = mac_socket_check_listen(kauth_cred_get(), so);
380 if (error == 0) {
381 error = solisten(so, uap->backlog);
382 }
383 }
384 #else
385 { error = solisten(so, uap->backlog);}
386 #endif /* MAC_SOCKET_SUBSET */
387 else {
388 error = EBADF;
389 }
390
391 file_drop(uap->s);
392 return error;
393 }
394
395 /*
396 * Returns: fp_getfsock:EBADF Bad file descriptor
397 * fp_getfsock:EOPNOTSUPP ...
398 * xlate => :ENOTSOCK Socket operation on non-socket
399 * :EFAULT Bad address on copyin/copyout
400 * :EBADF Bad file descriptor
401 * :EOPNOTSUPP Operation not supported on socket
402 * :EINVAL Invalid argument
403 * :EWOULDBLOCK Operation would block
404 * :ECONNABORTED Connection aborted
405 * :EINTR Interrupted function
406 * :EACCES Mandatory Access Control failure
407 * falloc_locked:ENFILE Too many files open in system
408 * falloc_locked::EMFILE Too many open files
409 * falloc_locked::ENOMEM Not enough space
410 * 0 Success
411 */
412 int
413 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
414 int32_t *retval)
415 {
416 struct fileproc *fp;
417 struct sockaddr *sa = NULL;
418 socklen_t namelen;
419 int error;
420 struct socket *head, *so = NULL;
421 lck_mtx_t *mutex_held;
422 int fd = uap->s;
423 int newfd;
424 short fflag; /* type must match fp->f_flag */
425 int dosocklock = 0;
426
427 *retval = -1;
428
429 AUDIT_ARG(fd, uap->s);
430
431 if (uap->name) {
432 error = copyin(uap->anamelen, (caddr_t)&namelen,
433 sizeof(socklen_t));
434 if (error) {
435 return error;
436 }
437 }
438 error = fp_getfsock(p, fd, &fp, &head);
439 if (error) {
440 if (error == EOPNOTSUPP) {
441 error = ENOTSOCK;
442 }
443 return error;
444 }
445 if (head == NULL) {
446 error = EBADF;
447 goto out;
448 }
449 #if CONFIG_MACF_SOCKET_SUBSET
450 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
451 goto out;
452 }
453 #endif /* MAC_SOCKET_SUBSET */
454
455 socket_lock(head, 1);
456
457 if (head->so_proto->pr_getlock != NULL) {
458 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
459 dosocklock = 1;
460 } else {
461 mutex_held = head->so_proto->pr_domain->dom_mtx;
462 dosocklock = 0;
463 }
464
465 if ((head->so_options & SO_ACCEPTCONN) == 0) {
466 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
467 error = EOPNOTSUPP;
468 } else {
469 /* POSIX: The socket is not accepting connections */
470 error = EINVAL;
471 }
472 socket_unlock(head, 1);
473 goto out;
474 }
475 check_again:
476 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
477 socket_unlock(head, 1);
478 error = EWOULDBLOCK;
479 goto out;
480 }
481 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
482 if (head->so_state & SS_CANTRCVMORE) {
483 head->so_error = ECONNABORTED;
484 break;
485 }
486 if (head->so_usecount < 1) {
487 panic("accept: head=%p refcount=%d\n", head,
488 head->so_usecount);
489 }
490 error = msleep((caddr_t)&head->so_timeo, mutex_held,
491 PSOCK | PCATCH, "accept", 0);
492 if (head->so_usecount < 1) {
493 panic("accept: 2 head=%p refcount=%d\n", head,
494 head->so_usecount);
495 }
496 if ((head->so_state & SS_DRAINING)) {
497 error = ECONNABORTED;
498 }
499 if (error) {
500 socket_unlock(head, 1);
501 goto out;
502 }
503 }
504 if (head->so_error) {
505 error = head->so_error;
506 head->so_error = 0;
507 socket_unlock(head, 1);
508 goto out;
509 }
510
511 /*
512 * At this point we know that there is at least one connection
513 * ready to be accepted. Remove it from the queue prior to
514 * allocating the file descriptor for it since falloc() may
515 * block allowing another process to accept the connection
516 * instead.
517 */
518 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
519
520 so_acquire_accept_list(head, NULL);
521 if (TAILQ_EMPTY(&head->so_comp)) {
522 so_release_accept_list(head);
523 goto check_again;
524 }
525
526 so = TAILQ_FIRST(&head->so_comp);
527 TAILQ_REMOVE(&head->so_comp, so, so_list);
528 so->so_head = NULL;
529 so->so_state &= ~SS_COMP;
530 head->so_qlen--;
531 so_release_accept_list(head);
532
533 /* unlock head to avoid deadlock with select, keep a ref on head */
534 socket_unlock(head, 0);
535
536 #if CONFIG_MACF_SOCKET_SUBSET
537 /*
538 * Pass the pre-accepted socket to the MAC framework. This is
539 * cheaper than allocating a file descriptor for the socket,
540 * calling the protocol accept callback, and possibly freeing
541 * the file descriptor should the MAC check fails.
542 */
543 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
544 socket_lock(so, 1);
545 so->so_state &= ~SS_NOFDREF;
546 socket_unlock(so, 1);
547 soclose(so);
548 /* Drop reference on listening socket */
549 sodereference(head);
550 goto out;
551 }
552 #endif /* MAC_SOCKET_SUBSET */
553
554 /*
555 * Pass the pre-accepted socket to any interested socket filter(s).
556 * Upon failure, the socket would have been closed by the callee.
557 */
558 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
559 /* Drop reference on listening socket */
560 sodereference(head);
561 /* Propagate socket filter's error code to the caller */
562 goto out;
563 }
564
565 fflag = fp->f_flag;
566 error = falloc(p, &fp, &newfd, vfs_context_current());
567 if (error) {
568 /*
569 * Probably ran out of file descriptors.
570 *
571 * <rdar://problem/8554930>
572 * Don't put this back on the socket like we used to, that
573 * just causes the client to spin. Drop the socket.
574 */
575 socket_lock(so, 1);
576 so->so_state &= ~SS_NOFDREF;
577 socket_unlock(so, 1);
578 soclose(so);
579 sodereference(head);
580 goto out;
581 }
582 *retval = newfd;
583 fp->f_flag = fflag;
584 fp->f_ops = &socketops;
585 fp->f_data = (caddr_t)so;
586
587 socket_lock(head, 0);
588 if (dosocklock) {
589 socket_lock(so, 1);
590 }
591
592 /* Sync socket non-blocking/async state with file flags */
593 if (fp->f_flag & FNONBLOCK) {
594 so->so_state |= SS_NBIO;
595 } else {
596 so->so_state &= ~SS_NBIO;
597 }
598
599 if (fp->f_flag & FASYNC) {
600 so->so_state |= SS_ASYNC;
601 so->so_rcv.sb_flags |= SB_ASYNC;
602 so->so_snd.sb_flags |= SB_ASYNC;
603 } else {
604 so->so_state &= ~SS_ASYNC;
605 so->so_rcv.sb_flags &= ~SB_ASYNC;
606 so->so_snd.sb_flags &= ~SB_ASYNC;
607 }
608
609 (void) soacceptlock(so, &sa, 0);
610 socket_unlock(head, 1);
611 if (sa == NULL) {
612 namelen = 0;
613 if (uap->name) {
614 goto gotnoname;
615 }
616 error = 0;
617 goto releasefd;
618 }
619 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
620
621 if (uap->name) {
622 socklen_t sa_len;
623
624 /* save sa_len before it is destroyed */
625 sa_len = sa->sa_len;
626 namelen = MIN(namelen, sa_len);
627 error = copyout(sa, uap->name, namelen);
628 if (!error) {
629 /* return the actual, untruncated address length */
630 namelen = sa_len;
631 }
632 gotnoname:
633 error = copyout((caddr_t)&namelen, uap->anamelen,
634 sizeof(socklen_t));
635 }
636 FREE(sa, M_SONAME);
637
638 releasefd:
639 /*
640 * If the socket has been marked as inactive by sosetdefunct(),
641 * disallow further operations on it.
642 */
643 if (so->so_flags & SOF_DEFUNCT) {
644 sodefunct(current_proc(), so,
645 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
646 }
647
648 if (dosocklock) {
649 socket_unlock(so, 1);
650 }
651
652 proc_fdlock(p);
653 procfdtbl_releasefd(p, newfd, NULL);
654 fp_drop(p, newfd, fp, 1);
655 proc_fdunlock(p);
656
657 out:
658 file_drop(fd);
659
660 if (error == 0 && ENTR_SHOULDTRACE) {
661 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
662 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
663 }
664 return error;
665 }
666
667 int
668 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
669 {
670 __pthread_testcancel(1);
671 return accept_nocancel(p, (struct accept_nocancel_args *)uap,
672 retval);
673 }
674
675 /*
676 * Returns: 0 Success
677 * EBADF Bad file descriptor
678 * EALREADY Connection already in progress
679 * EINPROGRESS Operation in progress
680 * ECONNABORTED Connection aborted
681 * EINTR Interrupted function
682 * EACCES Mandatory Access Control failure
683 * file_socket:ENOTSOCK
684 * file_socket:EBADF
685 * getsockaddr:ENAMETOOLONG Filename too long
686 * getsockaddr:EINVAL Invalid argument
687 * getsockaddr:ENOMEM Not enough space
688 * getsockaddr:EFAULT Bad address
689 * soconnectlock:EOPNOTSUPP
690 * soconnectlock:EISCONN
691 * soconnectlock:??? [depends on protocol, filters]
692 * msleep:EINTR
693 *
694 * Imputed: so_error error may be set from so_error, which
695 * may have been set by soconnectlock.
696 */
697 /* ARGSUSED */
698 int
699 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
700 {
701 __pthread_testcancel(1);
702 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
703 retval);
704 }
705
706 int
707 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
708 {
709 #pragma unused(p, retval)
710 struct socket *so;
711 struct sockaddr_storage ss;
712 struct sockaddr *sa = NULL;
713 int error;
714 int fd = uap->s;
715 boolean_t dgram;
716
717 AUDIT_ARG(fd, uap->s);
718 error = file_socket(fd, &so);
719 if (error != 0) {
720 return error;
721 }
722 if (so == NULL) {
723 error = EBADF;
724 goto out;
725 }
726
727 /*
728 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
729 * if this is a datagram socket; translate for other types.
730 */
731 dgram = (so->so_type == SOCK_DGRAM);
732
733 /* Get socket address now before we obtain socket lock */
734 if (uap->namelen > sizeof(ss)) {
735 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
736 } else {
737 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
738 if (error == 0) {
739 sa = (struct sockaddr *)&ss;
740 }
741 }
742 if (error != 0) {
743 goto out;
744 }
745
746 error = connectit(so, sa);
747
748 if (sa != NULL && sa != SA(&ss)) {
749 FREE(sa, M_SONAME);
750 }
751 if (error == ERESTART) {
752 error = EINTR;
753 }
754 out:
755 file_drop(fd);
756 return error;
757 }
758
759 static int
760 connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
761 {
762 #pragma unused(p, retval)
763 struct sockaddr_storage ss, sd;
764 struct sockaddr *src = NULL, *dst = NULL;
765 struct socket *so;
766 int error, error1, fd = uap->socket;
767 boolean_t dgram;
768 sae_connid_t cid = SAE_CONNID_ANY;
769 struct user32_sa_endpoints ep32;
770 struct user64_sa_endpoints ep64;
771 struct user_sa_endpoints ep;
772 user_ssize_t bytes_written = 0;
773 struct user_iovec *iovp;
774 uio_t auio = NULL;
775
776 AUDIT_ARG(fd, uap->socket);
777 error = file_socket(fd, &so);
778 if (error != 0) {
779 return error;
780 }
781 if (so == NULL) {
782 error = EBADF;
783 goto out;
784 }
785
786 if (uap->endpoints == USER_ADDR_NULL) {
787 error = EINVAL;
788 goto out;
789 }
790
791 if (IS_64BIT_PROCESS(p)) {
792 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
793 if (error != 0) {
794 goto out;
795 }
796
797 ep.sae_srcif = ep64.sae_srcif;
798 ep.sae_srcaddr = ep64.sae_srcaddr;
799 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
800 ep.sae_dstaddr = ep64.sae_dstaddr;
801 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
802 } else {
803 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
804 if (error != 0) {
805 goto out;
806 }
807
808 ep.sae_srcif = ep32.sae_srcif;
809 ep.sae_srcaddr = ep32.sae_srcaddr;
810 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
811 ep.sae_dstaddr = ep32.sae_dstaddr;
812 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
813 }
814
815 /*
816 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
817 * if this is a datagram socket; translate for other types.
818 */
819 dgram = (so->so_type == SOCK_DGRAM);
820
821 /* Get socket address now before we obtain socket lock */
822 if (ep.sae_srcaddr != USER_ADDR_NULL) {
823 if (ep.sae_srcaddrlen > sizeof(ss)) {
824 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
825 } else {
826 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
827 if (error == 0) {
828 src = (struct sockaddr *)&ss;
829 }
830 }
831
832 if (error) {
833 goto out;
834 }
835 }
836
837 if (ep.sae_dstaddr == USER_ADDR_NULL) {
838 error = EINVAL;
839 goto out;
840 }
841
842 /* Get socket address now before we obtain socket lock */
843 if (ep.sae_dstaddrlen > sizeof(sd)) {
844 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
845 } else {
846 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
847 if (error == 0) {
848 dst = (struct sockaddr *)&sd;
849 }
850 }
851
852 if (error) {
853 goto out;
854 }
855
856 VERIFY(dst != NULL);
857
858 if (uap->iov != USER_ADDR_NULL) {
859 /* Verify range before calling uio_create() */
860 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
861 error = EINVAL;
862 goto out;
863 }
864
865 if (uap->len == USER_ADDR_NULL) {
866 error = EINVAL;
867 goto out;
868 }
869
870 /* allocate a uio to hold the number of iovecs passed */
871 auio = uio_create(uap->iovcnt, 0,
872 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
873 UIO_WRITE);
874
875 if (auio == NULL) {
876 error = ENOMEM;
877 goto out;
878 }
879
880 /*
881 * get location of iovecs within the uio.
882 * then copyin the iovecs from user space.
883 */
884 iovp = uio_iovsaddr(auio);
885 if (iovp == NULL) {
886 error = ENOMEM;
887 goto out;
888 }
889 error = copyin_user_iovec_array(uap->iov,
890 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
891 uap->iovcnt, iovp);
892 if (error != 0) {
893 goto out;
894 }
895
896 /* finish setup of uio_t */
897 error = uio_calculateresid(auio);
898 if (error != 0) {
899 goto out;
900 }
901 }
902
903 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
904 &cid, auio, uap->flags, &bytes_written);
905 if (error == ERESTART) {
906 error = EINTR;
907 }
908
909 if (uap->len != USER_ADDR_NULL) {
910 error1 = copyout(&bytes_written, uap->len, sizeof(uap->len));
911 /* give precedence to connectitx errors */
912 if ((error1 != 0) && (error == 0)) {
913 error = error1;
914 }
915 }
916
917 if (uap->connid != USER_ADDR_NULL) {
918 error1 = copyout(&cid, uap->connid, sizeof(cid));
919 /* give precedence to connectitx errors */
920 if ((error1 != 0) && (error == 0)) {
921 error = error1;
922 }
923 }
924 out:
925 file_drop(fd);
926 if (auio != NULL) {
927 uio_free(auio);
928 }
929 if (src != NULL && src != SA(&ss)) {
930 FREE(src, M_SONAME);
931 }
932 if (dst != NULL && dst != SA(&sd)) {
933 FREE(dst, M_SONAME);
934 }
935 return error;
936 }
937
938 int
939 connectx(struct proc *p, struct connectx_args *uap, int *retval)
940 {
941 /*
942 * Due to similiarity with a POSIX interface, define as
943 * an unofficial cancellation point.
944 */
945 __pthread_testcancel(1);
946 return connectx_nocancel(p, uap, retval);
947 }
948
949 static int
950 connectit(struct socket *so, struct sockaddr *sa)
951 {
952 int error;
953
954 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
955 #if CONFIG_MACF_SOCKET_SUBSET
956 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
957 return error;
958 }
959 #endif /* MAC_SOCKET_SUBSET */
960
961 socket_lock(so, 1);
962 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
963 error = EALREADY;
964 goto out;
965 }
966 error = soconnectlock(so, sa, 0);
967 if (error != 0) {
968 so->so_state &= ~SS_ISCONNECTING;
969 goto out;
970 }
971 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
972 error = EINPROGRESS;
973 goto out;
974 }
975 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
976 lck_mtx_t *mutex_held;
977
978 if (so->so_proto->pr_getlock != NULL) {
979 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
980 } else {
981 mutex_held = so->so_proto->pr_domain->dom_mtx;
982 }
983 error = msleep((caddr_t)&so->so_timeo, mutex_held,
984 PSOCK | PCATCH, __func__, 0);
985 if (so->so_state & SS_DRAINING) {
986 error = ECONNABORTED;
987 }
988 if (error != 0) {
989 break;
990 }
991 }
992 if (error == 0) {
993 error = so->so_error;
994 so->so_error = 0;
995 }
996 out:
997 socket_unlock(so, 1);
998 return error;
999 }
1000
1001 static int
1002 connectitx(struct socket *so, struct sockaddr *src,
1003 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
1004 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1005 user_ssize_t *bytes_written)
1006 {
1007 int error;
1008 #pragma unused (flags)
1009
1010 VERIFY(dst != NULL);
1011
1012 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1013 #if CONFIG_MACF_SOCKET_SUBSET
1014 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1015 return error;
1016 }
1017
1018 if (auio != NULL) {
1019 if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1020 return error;
1021 }
1022 }
1023 #endif /* MAC_SOCKET_SUBSET */
1024
1025 socket_lock(so, 1);
1026 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1027 error = EALREADY;
1028 goto out;
1029 }
1030
1031 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
1032 (flags & CONNECT_DATA_IDEMPOTENT)) {
1033 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
1034
1035 if (flags & CONNECT_DATA_AUTHENTICATED) {
1036 so->so_flags1 |= SOF1_DATA_AUTHENTICATED;
1037 }
1038 }
1039
1040 /*
1041 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
1042 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
1043 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
1044 * Case 3 allows user to combine write with connect even if they have
1045 * no use for TFO (such as regular TCP, and UDP).
1046 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
1047 */
1048 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
1049 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio)) {
1050 so->so_flags1 |= SOF1_PRECONNECT_DATA;
1051 }
1052
1053 /*
1054 * If a user sets data idempotent and does not pass an uio, or
1055 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1056 * SOF1_DATA_IDEMPOTENT.
1057 */
1058 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1059 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1060 /* We should return EINVAL instead perhaps. */
1061 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1062 }
1063
1064 error = soconnectxlocked(so, src, dst, p, ifscope,
1065 aid, pcid, 0, NULL, 0, auio, bytes_written);
1066 if (error != 0) {
1067 so->so_state &= ~SS_ISCONNECTING;
1068 goto out;
1069 }
1070 /*
1071 * If, after the call to soconnectxlocked the flag is still set (in case
1072 * data has been queued and the connect() has actually been triggered,
1073 * it will have been unset by the transport), we exit immediately. There
1074 * is no reason to wait on any event.
1075 */
1076 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1077 error = 0;
1078 goto out;
1079 }
1080 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1081 error = EINPROGRESS;
1082 goto out;
1083 }
1084 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1085 lck_mtx_t *mutex_held;
1086
1087 if (so->so_proto->pr_getlock != NULL) {
1088 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1089 } else {
1090 mutex_held = so->so_proto->pr_domain->dom_mtx;
1091 }
1092 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1093 PSOCK | PCATCH, __func__, 0);
1094 if (so->so_state & SS_DRAINING) {
1095 error = ECONNABORTED;
1096 }
1097 if (error != 0) {
1098 break;
1099 }
1100 }
1101 if (error == 0) {
1102 error = so->so_error;
1103 so->so_error = 0;
1104 }
1105 out:
1106 socket_unlock(so, 1);
1107 return error;
1108 }
1109
1110 int
1111 peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1112 {
1113 #pragma unused(p, uap, retval)
1114 /*
1115 * Due to similiarity with a POSIX interface, define as
1116 * an unofficial cancellation point.
1117 */
1118 __pthread_testcancel(1);
1119 return 0;
1120 }
1121
1122 int
1123 disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1124 {
1125 /*
1126 * Due to similiarity with a POSIX interface, define as
1127 * an unofficial cancellation point.
1128 */
1129 __pthread_testcancel(1);
1130 return disconnectx_nocancel(p, uap, retval);
1131 }
1132
1133 static int
1134 disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1135 {
1136 #pragma unused(p, retval)
1137 struct socket *so;
1138 int fd = uap->s;
1139 int error;
1140
1141 error = file_socket(fd, &so);
1142 if (error != 0) {
1143 return error;
1144 }
1145 if (so == NULL) {
1146 error = EBADF;
1147 goto out;
1148 }
1149
1150 error = sodisconnectx(so, uap->aid, uap->cid);
1151 out:
1152 file_drop(fd);
1153 return error;
1154 }
1155
1156 /*
1157 * Returns: 0 Success
1158 * socreate:EAFNOSUPPORT
1159 * socreate:EPROTOTYPE
1160 * socreate:EPROTONOSUPPORT
1161 * socreate:ENOBUFS
1162 * socreate:ENOMEM
1163 * socreate:EISCONN
1164 * socreate:??? [other protocol families, IPSEC]
1165 * falloc:ENFILE
1166 * falloc:EMFILE
1167 * falloc:ENOMEM
1168 * copyout:EFAULT
1169 * soconnect2:EINVAL
1170 * soconnect2:EPROTOTYPE
1171 * soconnect2:??? [other protocol families[
1172 */
1173 int
1174 socketpair(struct proc *p, struct socketpair_args *uap,
1175 __unused int32_t *retval)
1176 {
1177 struct fileproc *fp1, *fp2;
1178 struct socket *so1, *so2;
1179 int fd, error, sv[2];
1180
1181 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1182 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1183 if (error) {
1184 return error;
1185 }
1186 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1187 if (error) {
1188 goto free1;
1189 }
1190
1191 error = falloc(p, &fp1, &fd, vfs_context_current());
1192 if (error) {
1193 goto free2;
1194 }
1195 fp1->f_flag = FREAD | FWRITE;
1196 fp1->f_ops = &socketops;
1197 fp1->f_data = (caddr_t)so1;
1198 sv[0] = fd;
1199
1200 error = falloc(p, &fp2, &fd, vfs_context_current());
1201 if (error) {
1202 goto free3;
1203 }
1204 fp2->f_flag = FREAD | FWRITE;
1205 fp2->f_ops = &socketops;
1206 fp2->f_data = (caddr_t)so2;
1207 sv[1] = fd;
1208
1209 error = soconnect2(so1, so2);
1210 if (error) {
1211 goto free4;
1212 }
1213 if (uap->type == SOCK_DGRAM) {
1214 /*
1215 * Datagram socket connection is asymmetric.
1216 */
1217 error = soconnect2(so2, so1);
1218 if (error) {
1219 goto free4;
1220 }
1221 }
1222
1223 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1224 goto free4;
1225 }
1226
1227 proc_fdlock(p);
1228 procfdtbl_releasefd(p, sv[0], NULL);
1229 procfdtbl_releasefd(p, sv[1], NULL);
1230 fp_drop(p, sv[0], fp1, 1);
1231 fp_drop(p, sv[1], fp2, 1);
1232 proc_fdunlock(p);
1233
1234 return 0;
1235 free4:
1236 fp_free(p, sv[1], fp2);
1237 free3:
1238 fp_free(p, sv[0], fp1);
1239 free2:
1240 (void) soclose(so2);
1241 free1:
1242 (void) soclose(so1);
1243 return error;
1244 }
1245
1246 /*
1247 * Returns: 0 Success
1248 * EINVAL
1249 * ENOBUFS
1250 * EBADF
1251 * EPIPE
1252 * EACCES Mandatory Access Control failure
1253 * file_socket:ENOTSOCK
1254 * file_socket:EBADF
1255 * getsockaddr:ENAMETOOLONG Filename too long
1256 * getsockaddr:EINVAL Invalid argument
1257 * getsockaddr:ENOMEM Not enough space
1258 * getsockaddr:EFAULT Bad address
1259 * <pru_sosend>:EACCES[TCP]
1260 * <pru_sosend>:EADDRINUSE[TCP]
1261 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1262 * <pru_sosend>:EAFNOSUPPORT[TCP]
1263 * <pru_sosend>:EAGAIN[TCP]
1264 * <pru_sosend>:EBADF
1265 * <pru_sosend>:ECONNRESET[TCP]
1266 * <pru_sosend>:EFAULT
1267 * <pru_sosend>:EHOSTUNREACH[TCP]
1268 * <pru_sosend>:EINTR
1269 * <pru_sosend>:EINVAL
1270 * <pru_sosend>:EISCONN[AF_INET]
1271 * <pru_sosend>:EMSGSIZE[TCP]
1272 * <pru_sosend>:ENETDOWN[TCP]
1273 * <pru_sosend>:ENETUNREACH[TCP]
1274 * <pru_sosend>:ENOBUFS
1275 * <pru_sosend>:ENOMEM[TCP]
1276 * <pru_sosend>:ENOTCONN[AF_INET]
1277 * <pru_sosend>:EOPNOTSUPP
1278 * <pru_sosend>:EPERM[TCP]
1279 * <pru_sosend>:EPIPE
1280 * <pru_sosend>:EWOULDBLOCK
1281 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1282 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1283 * <pru_sosend>:??? [value from so_error]
1284 * sockargs:???
1285 */
1286 static int
1287 sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
1288 int flags, int32_t *retval)
1289 {
1290 struct mbuf *control = NULL;
1291 struct sockaddr_storage ss;
1292 struct sockaddr *to = NULL;
1293 boolean_t want_free = TRUE;
1294 int error;
1295 user_ssize_t len;
1296
1297 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1298
1299 if (mp->msg_name != USER_ADDR_NULL) {
1300 if (mp->msg_namelen > sizeof(ss)) {
1301 error = getsockaddr(so, &to, mp->msg_name,
1302 mp->msg_namelen, TRUE);
1303 } else {
1304 error = getsockaddr_s(so, &ss, mp->msg_name,
1305 mp->msg_namelen, TRUE);
1306 if (error == 0) {
1307 to = (struct sockaddr *)&ss;
1308 want_free = FALSE;
1309 }
1310 }
1311 if (error != 0) {
1312 goto out;
1313 }
1314 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1315 }
1316 if (mp->msg_control != USER_ADDR_NULL) {
1317 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1318 error = EINVAL;
1319 goto bad;
1320 }
1321 error = sockargs(&control, mp->msg_control,
1322 mp->msg_controllen, MT_CONTROL);
1323 if (error != 0) {
1324 goto bad;
1325 }
1326 }
1327
1328 #if CONFIG_MACF_SOCKET_SUBSET
1329 /*
1330 * We check the state without holding the socket lock;
1331 * if a race condition occurs, it would simply result
1332 * in an extra call to the MAC check function.
1333 */
1334 if (to != NULL &&
1335 !(so->so_state & SS_DEFUNCT) &&
1336 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1337 goto bad;
1338 }
1339 #endif /* MAC_SOCKET_SUBSET */
1340
1341 len = uio_resid(uiop);
1342 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1343 control, flags);
1344 if (error != 0) {
1345 if (uio_resid(uiop) != len && (error == ERESTART ||
1346 error == EINTR || error == EWOULDBLOCK)) {
1347 error = 0;
1348 }
1349 /* Generation of SIGPIPE can be controlled per socket */
1350 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) {
1351 psignal(p, SIGPIPE);
1352 }
1353 }
1354 if (error == 0) {
1355 *retval = (int)(len - uio_resid(uiop));
1356 }
1357 bad:
1358 if (to != NULL && want_free) {
1359 FREE(to, M_SONAME);
1360 }
1361 out:
1362 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1363
1364 return error;
1365 }
1366
1367 /*
1368 * Returns: 0 Success
1369 * ENOMEM
1370 * sendit:??? [see sendit definition in this file]
1371 * write:??? [4056224: applicable for pipes]
1372 */
1373 int
1374 sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
1375 {
1376 __pthread_testcancel(1);
1377 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1378 }
1379
1380 int
1381 sendto_nocancel(struct proc *p,
1382 struct sendto_nocancel_args *uap,
1383 int32_t *retval)
1384 {
1385 struct user_msghdr msg;
1386 int error;
1387 uio_t auio = NULL;
1388 struct socket *so;
1389
1390 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1391 AUDIT_ARG(fd, uap->s);
1392
1393 auio = uio_create(1, 0,
1394 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1395 UIO_WRITE);
1396 if (auio == NULL) {
1397 error = ENOMEM;
1398 goto done;
1399 }
1400 uio_addiov(auio, uap->buf, uap->len);
1401
1402 msg.msg_name = uap->to;
1403 msg.msg_namelen = uap->tolen;
1404 /* no need to set up msg_iov. sendit uses uio_t we send it */
1405 msg.msg_iov = 0;
1406 msg.msg_iovlen = 0;
1407 msg.msg_control = 0;
1408 msg.msg_flags = 0;
1409
1410 error = file_socket(uap->s, &so);
1411 if (error) {
1412 goto done;
1413 }
1414
1415 if (so == NULL) {
1416 error = EBADF;
1417 } else {
1418 error = sendit(p, so, &msg, auio, uap->flags, retval);
1419 }
1420
1421 file_drop(uap->s);
1422 done:
1423 if (auio != NULL) {
1424 uio_free(auio);
1425 }
1426
1427 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1428
1429 return error;
1430 }
1431
1432 /*
1433 * Returns: 0 Success
1434 * ENOBUFS
1435 * copyin:EFAULT
1436 * sendit:??? [see sendit definition in this file]
1437 */
1438 int
1439 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1440 {
1441 __pthread_testcancel(1);
1442 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1443 retval);
1444 }
1445
1446 int
1447 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1448 int32_t *retval)
1449 {
1450 struct user32_msghdr msg32;
1451 struct user64_msghdr msg64;
1452 struct user_msghdr user_msg;
1453 caddr_t msghdrp;
1454 int size_of_msghdr;
1455 int error;
1456 uio_t auio = NULL;
1457 struct user_iovec *iovp;
1458 struct socket *so;
1459
1460 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1461 AUDIT_ARG(fd, uap->s);
1462 if (IS_64BIT_PROCESS(p)) {
1463 msghdrp = (caddr_t)&msg64;
1464 size_of_msghdr = sizeof(msg64);
1465 } else {
1466 msghdrp = (caddr_t)&msg32;
1467 size_of_msghdr = sizeof(msg32);
1468 }
1469 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1470 if (error) {
1471 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1472 return error;
1473 }
1474
1475 if (IS_64BIT_PROCESS(p)) {
1476 user_msg.msg_flags = msg64.msg_flags;
1477 user_msg.msg_controllen = msg64.msg_controllen;
1478 user_msg.msg_control = msg64.msg_control;
1479 user_msg.msg_iovlen = msg64.msg_iovlen;
1480 user_msg.msg_iov = msg64.msg_iov;
1481 user_msg.msg_namelen = msg64.msg_namelen;
1482 user_msg.msg_name = msg64.msg_name;
1483 } else {
1484 user_msg.msg_flags = msg32.msg_flags;
1485 user_msg.msg_controllen = msg32.msg_controllen;
1486 user_msg.msg_control = msg32.msg_control;
1487 user_msg.msg_iovlen = msg32.msg_iovlen;
1488 user_msg.msg_iov = msg32.msg_iov;
1489 user_msg.msg_namelen = msg32.msg_namelen;
1490 user_msg.msg_name = msg32.msg_name;
1491 }
1492
1493 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1494 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1495 0, 0, 0, 0);
1496 return EMSGSIZE;
1497 }
1498
1499 /* allocate a uio large enough to hold the number of iovecs passed */
1500 auio = uio_create(user_msg.msg_iovlen, 0,
1501 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1502 UIO_WRITE);
1503 if (auio == NULL) {
1504 error = ENOBUFS;
1505 goto done;
1506 }
1507
1508 if (user_msg.msg_iovlen) {
1509 /*
1510 * get location of iovecs within the uio.
1511 * then copyin the iovecs from user space.
1512 */
1513 iovp = uio_iovsaddr(auio);
1514 if (iovp == NULL) {
1515 error = ENOBUFS;
1516 goto done;
1517 }
1518 error = copyin_user_iovec_array(user_msg.msg_iov,
1519 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1520 user_msg.msg_iovlen, iovp);
1521 if (error) {
1522 goto done;
1523 }
1524 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1525
1526 /* finish setup of uio_t */
1527 error = uio_calculateresid(auio);
1528 if (error) {
1529 goto done;
1530 }
1531 } else {
1532 user_msg.msg_iov = 0;
1533 }
1534
1535 /* msg_flags is ignored for send */
1536 user_msg.msg_flags = 0;
1537
1538 error = file_socket(uap->s, &so);
1539 if (error) {
1540 goto done;
1541 }
1542 if (so == NULL) {
1543 error = EBADF;
1544 } else {
1545 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1546 }
1547 file_drop(uap->s);
1548 done:
1549 if (auio != NULL) {
1550 uio_free(auio);
1551 }
1552 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1553
1554 return error;
1555 }
1556
1557 int
1558 sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1559 {
1560 int error = 0;
1561 struct user_msghdr_x *user_msg_x = NULL;
1562 struct uio **uiop = NULL;
1563 struct socket *so;
1564 u_int i;
1565 struct sockaddr *to = NULL;
1566 user_ssize_t len_before = 0, len_after;
1567 int need_drop = 0;
1568 size_t size_of_msghdr;
1569 void *umsgp = NULL;
1570 u_int uiocnt;
1571 int has_addr_or_ctl = 0;
1572
1573 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1574
1575 error = file_socket(uap->s, &so);
1576 if (error) {
1577 goto out;
1578 }
1579 need_drop = 1;
1580 if (so == NULL) {
1581 error = EBADF;
1582 goto out;
1583 }
1584
1585 /*
1586 * Input parameter range check
1587 */
1588 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1589 error = EINVAL;
1590 goto out;
1591 }
1592 /*
1593 * Clip to max currently allowed
1594 */
1595 if (uap->cnt > somaxsendmsgx) {
1596 uap->cnt = somaxsendmsgx;
1597 }
1598
1599 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
1600 M_TEMP, M_WAITOK | M_ZERO);
1601 if (user_msg_x == NULL) {
1602 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
1603 error = ENOMEM;
1604 goto out;
1605 }
1606 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1607 M_TEMP, M_WAITOK | M_ZERO);
1608 if (uiop == NULL) {
1609 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
1610 error = ENOMEM;
1611 goto out;
1612 }
1613
1614 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1615 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1616
1617 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
1618 M_TEMP, M_WAITOK | M_ZERO);
1619 if (umsgp == NULL) {
1620 printf("%s _MALLOC() user_msg_x failed\n", __func__);
1621 error = ENOMEM;
1622 goto out;
1623 }
1624 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1625 if (error) {
1626 DBG_PRINTF("%s copyin() failed\n", __func__);
1627 goto out;
1628 }
1629 error = internalize_user_msghdr_array(umsgp,
1630 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1631 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1632 if (error) {
1633 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1634 goto out;
1635 }
1636 /*
1637 * Make sure the size of each message iovec and
1638 * the aggregate size of all the iovec is valid
1639 */
1640 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1641 error = EINVAL;
1642 goto out;
1643 }
1644
1645 /*
1646 * Sanity check on passed arguments
1647 */
1648 for (i = 0; i < uap->cnt; i++) {
1649 struct user_msghdr_x *mp = user_msg_x + i;
1650
1651 /*
1652 * No flags on send message
1653 */
1654 if (mp->msg_flags != 0) {
1655 error = EINVAL;
1656 goto out;
1657 }
1658 /*
1659 * No support for address or ancillary data (yet)
1660 */
1661 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1662 has_addr_or_ctl = 1;
1663 }
1664
1665 if (mp->msg_control != USER_ADDR_NULL ||
1666 mp->msg_controllen != 0) {
1667 has_addr_or_ctl = 1;
1668 }
1669
1670 #if CONFIG_MACF_SOCKET_SUBSET
1671 /*
1672 * We check the state without holding the socket lock;
1673 * if a race condition occurs, it would simply result
1674 * in an extra call to the MAC check function.
1675 *
1676 * Note: The following check is never true taken with the
1677 * current limitation that we do not accept to pass an address,
1678 * this is effectively placeholder code. If we add support for
1679 * addresses, we will have to check every address.
1680 */
1681 if (to != NULL &&
1682 !(so->so_state & SS_DEFUNCT) &&
1683 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1684 != 0) {
1685 goto out;
1686 }
1687 #endif /* MAC_SOCKET_SUBSET */
1688 }
1689
1690 len_before = uio_array_resid(uiop, uap->cnt);
1691
1692 /*
1693 * Feed list of packets at once only for connected socket without
1694 * control message
1695 */
1696 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1697 pru_sosend_list_notsupp &&
1698 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1699 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1700 uap->cnt, uap->flags);
1701 } else {
1702 for (i = 0; i < uap->cnt; i++) {
1703 struct user_msghdr_x *mp = user_msg_x + i;
1704 struct user_msghdr user_msg;
1705 uio_t auio = uiop[i];
1706 int32_t tmpval;
1707
1708 user_msg.msg_flags = mp->msg_flags;
1709 user_msg.msg_controllen = mp->msg_controllen;
1710 user_msg.msg_control = mp->msg_control;
1711 user_msg.msg_iovlen = mp->msg_iovlen;
1712 user_msg.msg_iov = mp->msg_iov;
1713 user_msg.msg_namelen = mp->msg_namelen;
1714 user_msg.msg_name = mp->msg_name;
1715
1716 error = sendit(p, so, &user_msg, auio, uap->flags,
1717 &tmpval);
1718 if (error != 0) {
1719 break;
1720 }
1721 }
1722 }
1723 len_after = uio_array_resid(uiop, uap->cnt);
1724
1725 VERIFY(len_after <= len_before);
1726
1727 if (error != 0) {
1728 if (len_after != len_before && (error == ERESTART ||
1729 error == EINTR || error == EWOULDBLOCK ||
1730 error == ENOBUFS)) {
1731 error = 0;
1732 }
1733 /* Generation of SIGPIPE can be controlled per socket */
1734 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) {
1735 psignal(p, SIGPIPE);
1736 }
1737 }
1738 if (error == 0) {
1739 uiocnt = externalize_user_msghdr_array(umsgp,
1740 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1741 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1742
1743 *retval = (int)(uiocnt);
1744 }
1745 out:
1746 if (need_drop) {
1747 file_drop(uap->s);
1748 }
1749 if (umsgp != NULL) {
1750 _FREE(umsgp, M_TEMP);
1751 }
1752 if (uiop != NULL) {
1753 free_uio_array(uiop, uap->cnt);
1754 _FREE(uiop, M_TEMP);
1755 }
1756 if (user_msg_x != NULL) {
1757 _FREE(user_msg_x, M_TEMP);
1758 }
1759
1760 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1761
1762 return error;
1763 }
1764
1765
1766 static int
1767 copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1768 {
1769 int error = 0;
1770 socklen_t sa_len = 0;
1771 ssize_t len;
1772
1773 len = *namelen;
1774 if (len <= 0 || fromsa == 0) {
1775 len = 0;
1776 } else {
1777 #ifndef MIN
1778 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1779 #endif
1780 sa_len = fromsa->sa_len;
1781 len = MIN((unsigned int)len, sa_len);
1782 error = copyout(fromsa, name, (unsigned)len);
1783 if (error) {
1784 goto out;
1785 }
1786 }
1787 *namelen = sa_len;
1788 out:
1789 return 0;
1790 }
1791
1792 static int
1793 copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1794 socklen_t *controllen, int *flags)
1795 {
1796 int error = 0;
1797 ssize_t len;
1798 user_addr_t ctlbuf;
1799
1800 len = *controllen;
1801 *controllen = 0;
1802 ctlbuf = control;
1803
1804 while (m && len > 0) {
1805 unsigned int tocopy;
1806 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1807 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1808 int buflen = m->m_len;
1809
1810 while (buflen > 0 && len > 0) {
1811 /*
1812 * SCM_TIMESTAMP hack because struct timeval has a
1813 * different size for 32 bits and 64 bits processes
1814 */
1815 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1816 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
1817 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1818 int tmp_space;
1819 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1820
1821 tmp_cp->cmsg_level = SOL_SOCKET;
1822 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1823
1824 if (proc_is64bit(p)) {
1825 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1826
1827 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1828 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
1829
1830 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1831 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1832 } else {
1833 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1834
1835 tv32->tv_sec = tv->tv_sec;
1836 tv32->tv_usec = tv->tv_usec;
1837
1838 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1839 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1840 }
1841 if (len >= tmp_space) {
1842 tocopy = tmp_space;
1843 } else {
1844 *flags |= MSG_CTRUNC;
1845 tocopy = len;
1846 }
1847 error = copyout(tmp_buffer, ctlbuf, tocopy);
1848 if (error) {
1849 goto out;
1850 }
1851 } else {
1852 if (cp_size > buflen) {
1853 panic("cp_size > buflen, something"
1854 "wrong with alignment!");
1855 }
1856 if (len >= cp_size) {
1857 tocopy = cp_size;
1858 } else {
1859 *flags |= MSG_CTRUNC;
1860 tocopy = len;
1861 }
1862 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1863 if (error) {
1864 goto out;
1865 }
1866 }
1867
1868 ctlbuf += tocopy;
1869 len -= tocopy;
1870
1871 buflen -= cp_size;
1872 cp = (struct cmsghdr *)(void *)
1873 ((unsigned char *) cp + cp_size);
1874 cp_size = CMSG_ALIGN(cp->cmsg_len);
1875 }
1876
1877 m = m->m_next;
1878 }
1879 *controllen = ctlbuf - control;
1880 out:
1881 return error;
1882 }
1883
1884 /*
1885 * Returns: 0 Success
1886 * ENOTSOCK
1887 * EINVAL
1888 * EBADF
1889 * EACCES Mandatory Access Control failure
1890 * copyout:EFAULT
1891 * fp_lookup:EBADF
1892 * <pru_soreceive>:ENOBUFS
1893 * <pru_soreceive>:ENOTCONN
1894 * <pru_soreceive>:EWOULDBLOCK
1895 * <pru_soreceive>:EFAULT
1896 * <pru_soreceive>:EINTR
1897 * <pru_soreceive>:EBADF
1898 * <pru_soreceive>:EINVAL
1899 * <pru_soreceive>:EMSGSIZE
1900 * <pru_soreceive>:???
1901 *
1902 * Notes: Additional return values from calls through <pru_soreceive>
1903 * depend on protocols other than TCP or AF_UNIX, which are
1904 * documented above.
1905 */
1906 static int
1907 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1908 user_addr_t namelenp, int32_t *retval)
1909 {
1910 ssize_t len;
1911 int error;
1912 struct mbuf *control = 0;
1913 struct socket *so;
1914 struct sockaddr *fromsa = 0;
1915 struct fileproc *fp;
1916
1917 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1918 proc_fdlock(p);
1919 if ((error = fp_lookup(p, s, &fp, 1))) {
1920 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1921 proc_fdunlock(p);
1922 return error;
1923 }
1924 if (fp->f_type != DTYPE_SOCKET) {
1925 fp_drop(p, s, fp, 1);
1926 proc_fdunlock(p);
1927 return ENOTSOCK;
1928 }
1929
1930 so = (struct socket *)fp->f_data;
1931 if (so == NULL) {
1932 fp_drop(p, s, fp, 1);
1933 proc_fdunlock(p);
1934 return EBADF;
1935 }
1936
1937 proc_fdunlock(p);
1938
1939 #if CONFIG_MACF_SOCKET_SUBSET
1940 /*
1941 * We check the state without holding the socket lock;
1942 * if a race condition occurs, it would simply result
1943 * in an extra call to the MAC check function.
1944 */
1945 if (!(so->so_state & SS_DEFUNCT) &&
1946 !(so->so_state & SS_ISCONNECTED) &&
1947 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1948 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
1949 goto out1;
1950 }
1951 #endif /* MAC_SOCKET_SUBSET */
1952 if (uio_resid(uiop) < 0) {
1953 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1954 error = EINVAL;
1955 goto out1;
1956 }
1957
1958 len = uio_resid(uiop);
1959 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1960 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1961 &mp->msg_flags);
1962 if (fromsa) {
1963 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1964 fromsa);
1965 }
1966 if (error) {
1967 if (uio_resid(uiop) != len && (error == ERESTART ||
1968 error == EINTR || error == EWOULDBLOCK)) {
1969 error = 0;
1970 }
1971 }
1972 if (error) {
1973 goto out;
1974 }
1975
1976 *retval = len - uio_resid(uiop);
1977
1978 if (mp->msg_name) {
1979 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1980 if (error) {
1981 goto out;
1982 }
1983 /* return the actual, untruncated address length */
1984 if (namelenp &&
1985 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
1986 sizeof(int)))) {
1987 goto out;
1988 }
1989 }
1990
1991 if (mp->msg_control) {
1992 error = copyout_control(p, control, mp->msg_control,
1993 &mp->msg_controllen, &mp->msg_flags);
1994 }
1995 out:
1996 if (fromsa) {
1997 FREE(fromsa, M_SONAME);
1998 }
1999 if (control) {
2000 m_freem(control);
2001 }
2002 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2003 out1:
2004 fp_drop(p, s, fp, 0);
2005 return error;
2006 }
2007
2008 /*
2009 * Returns: 0 Success
2010 * ENOMEM
2011 * copyin:EFAULT
2012 * recvit:???
2013 * read:??? [4056224: applicable for pipes]
2014 *
2015 * Notes: The read entry point is only called as part of support for
2016 * binary backward compatability; new code should use read
2017 * instead of recv or recvfrom when attempting to read data
2018 * from pipes.
2019 *
2020 * For full documentation of the return codes from recvit, see
2021 * the block header for the recvit function.
2022 */
2023 int
2024 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2025 {
2026 __pthread_testcancel(1);
2027 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2028 retval);
2029 }
2030
2031 int
2032 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2033 int32_t *retval)
2034 {
2035 struct user_msghdr msg;
2036 int error;
2037 uio_t auio = NULL;
2038
2039 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2040 AUDIT_ARG(fd, uap->s);
2041
2042 if (uap->fromlenaddr) {
2043 error = copyin(uap->fromlenaddr,
2044 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2045 if (error) {
2046 return error;
2047 }
2048 } else {
2049 msg.msg_namelen = 0;
2050 }
2051 msg.msg_name = uap->from;
2052 auio = uio_create(1, 0,
2053 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2054 UIO_READ);
2055 if (auio == NULL) {
2056 return ENOMEM;
2057 }
2058
2059 uio_addiov(auio, uap->buf, uap->len);
2060 /* no need to set up msg_iov. recvit uses uio_t we send it */
2061 msg.msg_iov = 0;
2062 msg.msg_iovlen = 0;
2063 msg.msg_control = 0;
2064 msg.msg_controllen = 0;
2065 msg.msg_flags = uap->flags;
2066 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2067 if (auio != NULL) {
2068 uio_free(auio);
2069 }
2070
2071 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2072
2073 return error;
2074 }
2075
2076 /*
2077 * Returns: 0 Success
2078 * EMSGSIZE
2079 * ENOMEM
2080 * copyin:EFAULT
2081 * copyout:EFAULT
2082 * recvit:???
2083 *
2084 * Notes: For full documentation of the return codes from recvit, see
2085 * the block header for the recvit function.
2086 */
2087 int
2088 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
2089 {
2090 __pthread_testcancel(1);
2091 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2092 retval);
2093 }
2094
2095 int
2096 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2097 int32_t *retval)
2098 {
2099 struct user32_msghdr msg32;
2100 struct user64_msghdr msg64;
2101 struct user_msghdr user_msg;
2102 caddr_t msghdrp;
2103 int size_of_msghdr;
2104 user_addr_t uiov;
2105 int error;
2106 uio_t auio = NULL;
2107 struct user_iovec *iovp;
2108
2109 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2110 AUDIT_ARG(fd, uap->s);
2111 if (IS_64BIT_PROCESS(p)) {
2112 msghdrp = (caddr_t)&msg64;
2113 size_of_msghdr = sizeof(msg64);
2114 } else {
2115 msghdrp = (caddr_t)&msg32;
2116 size_of_msghdr = sizeof(msg32);
2117 }
2118 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2119 if (error) {
2120 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2121 return error;
2122 }
2123
2124 /* only need to copy if user process is not 64-bit */
2125 if (IS_64BIT_PROCESS(p)) {
2126 user_msg.msg_flags = msg64.msg_flags;
2127 user_msg.msg_controllen = msg64.msg_controllen;
2128 user_msg.msg_control = msg64.msg_control;
2129 user_msg.msg_iovlen = msg64.msg_iovlen;
2130 user_msg.msg_iov = msg64.msg_iov;
2131 user_msg.msg_namelen = msg64.msg_namelen;
2132 user_msg.msg_name = msg64.msg_name;
2133 } else {
2134 user_msg.msg_flags = msg32.msg_flags;
2135 user_msg.msg_controllen = msg32.msg_controllen;
2136 user_msg.msg_control = msg32.msg_control;
2137 user_msg.msg_iovlen = msg32.msg_iovlen;
2138 user_msg.msg_iov = msg32.msg_iov;
2139 user_msg.msg_namelen = msg32.msg_namelen;
2140 user_msg.msg_name = msg32.msg_name;
2141 }
2142
2143 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2144 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2145 0, 0, 0, 0);
2146 return EMSGSIZE;
2147 }
2148
2149 user_msg.msg_flags = uap->flags;
2150
2151 /* allocate a uio large enough to hold the number of iovecs passed */
2152 auio = uio_create(user_msg.msg_iovlen, 0,
2153 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2154 UIO_READ);
2155 if (auio == NULL) {
2156 error = ENOMEM;
2157 goto done;
2158 }
2159
2160 /*
2161 * get location of iovecs within the uio. then copyin the iovecs from
2162 * user space.
2163 */
2164 iovp = uio_iovsaddr(auio);
2165 if (iovp == NULL) {
2166 error = ENOMEM;
2167 goto done;
2168 }
2169 uiov = user_msg.msg_iov;
2170 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2171 error = copyin_user_iovec_array(uiov,
2172 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2173 user_msg.msg_iovlen, iovp);
2174 if (error) {
2175 goto done;
2176 }
2177
2178 /* finish setup of uio_t */
2179 error = uio_calculateresid(auio);
2180 if (error) {
2181 goto done;
2182 }
2183
2184 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2185 if (!error) {
2186 user_msg.msg_iov = uiov;
2187 if (IS_64BIT_PROCESS(p)) {
2188 msg64.msg_flags = user_msg.msg_flags;
2189 msg64.msg_controllen = user_msg.msg_controllen;
2190 msg64.msg_control = user_msg.msg_control;
2191 msg64.msg_iovlen = user_msg.msg_iovlen;
2192 msg64.msg_iov = user_msg.msg_iov;
2193 msg64.msg_namelen = user_msg.msg_namelen;
2194 msg64.msg_name = user_msg.msg_name;
2195 } else {
2196 msg32.msg_flags = user_msg.msg_flags;
2197 msg32.msg_controllen = user_msg.msg_controllen;
2198 msg32.msg_control = user_msg.msg_control;
2199 msg32.msg_iovlen = user_msg.msg_iovlen;
2200 msg32.msg_iov = user_msg.msg_iov;
2201 msg32.msg_namelen = user_msg.msg_namelen;
2202 msg32.msg_name = user_msg.msg_name;
2203 }
2204 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2205 }
2206 done:
2207 if (auio != NULL) {
2208 uio_free(auio);
2209 }
2210 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2211 return error;
2212 }
2213
2214 int
2215 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2216 {
2217 int error = EOPNOTSUPP;
2218 struct user_msghdr_x *user_msg_x = NULL;
2219 struct recv_msg_elem *recv_msg_array = NULL;
2220 struct socket *so;
2221 user_ssize_t len_before = 0, len_after;
2222 int need_drop = 0;
2223 size_t size_of_msghdr;
2224 void *umsgp = NULL;
2225 u_int i;
2226 u_int uiocnt;
2227
2228 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2229
2230 error = file_socket(uap->s, &so);
2231 if (error) {
2232 goto out;
2233 }
2234 need_drop = 1;
2235 if (so == NULL) {
2236 error = EBADF;
2237 goto out;
2238 }
2239 /*
2240 * Input parameter range check
2241 */
2242 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2243 error = EINVAL;
2244 goto out;
2245 }
2246 if (uap->cnt > somaxrecvmsgx) {
2247 uap->cnt = somaxrecvmsgx;
2248 }
2249
2250 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
2251 M_TEMP, M_WAITOK | M_ZERO);
2252 if (user_msg_x == NULL) {
2253 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
2254 error = ENOMEM;
2255 goto out;
2256 }
2257 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2258 if (recv_msg_array == NULL) {
2259 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2260 error = ENOMEM;
2261 goto out;
2262 }
2263 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2264 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2265
2266 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2267 if (umsgp == NULL) {
2268 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
2269 error = ENOMEM;
2270 goto out;
2271 }
2272 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2273 if (error) {
2274 DBG_PRINTF("%s copyin() failed\n", __func__);
2275 goto out;
2276 }
2277 error = internalize_recv_msghdr_array(umsgp,
2278 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2279 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2280 if (error) {
2281 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2282 goto out;
2283 }
2284 /*
2285 * Make sure the size of each message iovec and
2286 * the aggregate size of all the iovec is valid
2287 */
2288 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2289 error = EINVAL;
2290 goto out;
2291 }
2292 /*
2293 * Sanity check on passed arguments
2294 */
2295 for (i = 0; i < uap->cnt; i++) {
2296 struct user_msghdr_x *mp = user_msg_x + i;
2297
2298 if (mp->msg_flags != 0) {
2299 error = EINVAL;
2300 goto out;
2301 }
2302 }
2303 #if CONFIG_MACF_SOCKET_SUBSET
2304 /*
2305 * We check the state without holding the socket lock;
2306 * if a race condition occurs, it would simply result
2307 * in an extra call to the MAC check function.
2308 */
2309 if (!(so->so_state & SS_DEFUNCT) &&
2310 !(so->so_state & SS_ISCONNECTED) &&
2311 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2312 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2313 goto out;
2314 }
2315 #endif /* MAC_SOCKET_SUBSET */
2316
2317 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2318
2319 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2320 pru_soreceive_list_notsupp &&
2321 somaxrecvmsgx == 0) {
2322 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2323 recv_msg_array, uap->cnt, &uap->flags);
2324 } else {
2325 int flags = uap->flags;
2326
2327 for (i = 0; i < uap->cnt; i++) {
2328 struct recv_msg_elem *recv_msg_elem;
2329 uio_t auio;
2330 struct sockaddr **psa;
2331 struct mbuf **controlp;
2332
2333 recv_msg_elem = recv_msg_array + i;
2334 auio = recv_msg_elem->uio;
2335
2336 /*
2337 * Do not block if we got at least one packet
2338 */
2339 if (i > 0) {
2340 flags |= MSG_DONTWAIT;
2341 }
2342
2343 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2344 &recv_msg_elem->psa : NULL;
2345 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2346 &recv_msg_elem->controlp : NULL;
2347
2348 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2349 auio, (struct mbuf **)0, controlp, &flags);
2350 if (error) {
2351 break;
2352 }
2353 /*
2354 * We have some data
2355 */
2356 recv_msg_elem->which |= SOCK_MSG_DATA;
2357 /*
2358 * Stop on partial copy
2359 */
2360 if (flags & (MSG_RCVMORE | MSG_TRUNC)) {
2361 break;
2362 }
2363 }
2364 if ((uap->flags & MSG_DONTWAIT) == 0) {
2365 flags &= ~MSG_DONTWAIT;
2366 }
2367 uap->flags = flags;
2368 }
2369
2370 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2371
2372 if (error) {
2373 if (len_after != len_before && (error == ERESTART ||
2374 error == EINTR || error == EWOULDBLOCK)) {
2375 error = 0;
2376 } else {
2377 goto out;
2378 }
2379 }
2380
2381 uiocnt = externalize_recv_msghdr_array(umsgp,
2382 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2383 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2384
2385 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2386 if (error) {
2387 DBG_PRINTF("%s copyout() failed\n", __func__);
2388 goto out;
2389 }
2390 *retval = (int)(uiocnt);
2391
2392 for (i = 0; i < uap->cnt; i++) {
2393 struct user_msghdr_x *mp = user_msg_x + i;
2394 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2395 struct sockaddr *fromsa = recv_msg_elem->psa;
2396
2397 if (mp->msg_name) {
2398 error = copyout_sa(fromsa, mp->msg_name,
2399 &mp->msg_namelen);
2400 if (error) {
2401 goto out;
2402 }
2403 }
2404 if (mp->msg_control) {
2405 error = copyout_control(p, recv_msg_elem->controlp,
2406 mp->msg_control, &mp->msg_controllen,
2407 &mp->msg_flags);
2408 if (error) {
2409 goto out;
2410 }
2411 }
2412 }
2413 out:
2414 if (need_drop) {
2415 file_drop(uap->s);
2416 }
2417 if (umsgp != NULL) {
2418 _FREE(umsgp, M_TEMP);
2419 }
2420 if (recv_msg_array != NULL) {
2421 free_recv_msg_array(recv_msg_array, uap->cnt);
2422 }
2423 if (user_msg_x != NULL) {
2424 _FREE(user_msg_x, M_TEMP);
2425 }
2426
2427 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2428
2429 return error;
2430 }
2431
2432 /*
2433 * Returns: 0 Success
2434 * EBADF
2435 * file_socket:ENOTSOCK
2436 * file_socket:EBADF
2437 * soshutdown:EINVAL
2438 * soshutdown:ENOTCONN
2439 * soshutdown:EADDRNOTAVAIL[TCP]
2440 * soshutdown:ENOBUFS[TCP]
2441 * soshutdown:EMSGSIZE[TCP]
2442 * soshutdown:EHOSTUNREACH[TCP]
2443 * soshutdown:ENETUNREACH[TCP]
2444 * soshutdown:ENETDOWN[TCP]
2445 * soshutdown:ENOMEM[TCP]
2446 * soshutdown:EACCES[TCP]
2447 * soshutdown:EMSGSIZE[TCP]
2448 * soshutdown:ENOBUFS[TCP]
2449 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2450 * soshutdown:??? [other protocol families]
2451 */
2452 /* ARGSUSED */
2453 int
2454 shutdown(__unused struct proc *p, struct shutdown_args *uap,
2455 __unused int32_t *retval)
2456 {
2457 struct socket *so;
2458 int error;
2459
2460 AUDIT_ARG(fd, uap->s);
2461 error = file_socket(uap->s, &so);
2462 if (error) {
2463 return error;
2464 }
2465 if (so == NULL) {
2466 error = EBADF;
2467 goto out;
2468 }
2469 error = soshutdown((struct socket *)so, uap->how);
2470 out:
2471 file_drop(uap->s);
2472 return error;
2473 }
2474
2475 /*
2476 * Returns: 0 Success
2477 * EFAULT
2478 * EINVAL
2479 * EACCES Mandatory Access Control failure
2480 * file_socket:ENOTSOCK
2481 * file_socket:EBADF
2482 * sosetopt:EINVAL
2483 * sosetopt:ENOPROTOOPT
2484 * sosetopt:ENOBUFS
2485 * sosetopt:EDOM
2486 * sosetopt:EFAULT
2487 * sosetopt:EOPNOTSUPP[AF_UNIX]
2488 * sosetopt:???
2489 */
2490 /* ARGSUSED */
2491 int
2492 setsockopt(struct proc *p, struct setsockopt_args *uap,
2493 __unused int32_t *retval)
2494 {
2495 struct socket *so;
2496 struct sockopt sopt;
2497 int error;
2498
2499 AUDIT_ARG(fd, uap->s);
2500 if (uap->val == 0 && uap->valsize != 0) {
2501 return EFAULT;
2502 }
2503 /* No bounds checking on size (it's unsigned) */
2504
2505 error = file_socket(uap->s, &so);
2506 if (error) {
2507 return error;
2508 }
2509
2510 sopt.sopt_dir = SOPT_SET;
2511 sopt.sopt_level = uap->level;
2512 sopt.sopt_name = uap->name;
2513 sopt.sopt_val = uap->val;
2514 sopt.sopt_valsize = uap->valsize;
2515 sopt.sopt_p = p;
2516
2517 if (so == NULL) {
2518 error = EINVAL;
2519 goto out;
2520 }
2521 #if CONFIG_MACF_SOCKET_SUBSET
2522 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2523 &sopt)) != 0) {
2524 goto out;
2525 }
2526 #endif /* MAC_SOCKET_SUBSET */
2527 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
2528 out:
2529 file_drop(uap->s);
2530 return error;
2531 }
2532
2533
2534
2535 /*
2536 * Returns: 0 Success
2537 * EINVAL
2538 * EBADF
2539 * EACCES Mandatory Access Control failure
2540 * copyin:EFAULT
2541 * copyout:EFAULT
2542 * file_socket:ENOTSOCK
2543 * file_socket:EBADF
2544 * sogetopt:???
2545 */
2546 int
2547 getsockopt(struct proc *p, struct getsockopt_args *uap,
2548 __unused int32_t *retval)
2549 {
2550 int error;
2551 socklen_t valsize;
2552 struct sockopt sopt;
2553 struct socket *so;
2554
2555 error = file_socket(uap->s, &so);
2556 if (error) {
2557 return error;
2558 }
2559 if (uap->val) {
2560 error = copyin(uap->avalsize, (caddr_t)&valsize,
2561 sizeof(valsize));
2562 if (error) {
2563 goto out;
2564 }
2565 /* No bounds checking on size (it's unsigned) */
2566 } else {
2567 valsize = 0;
2568 }
2569 sopt.sopt_dir = SOPT_GET;
2570 sopt.sopt_level = uap->level;
2571 sopt.sopt_name = uap->name;
2572 sopt.sopt_val = uap->val;
2573 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2574 sopt.sopt_p = p;
2575
2576 if (so == NULL) {
2577 error = EBADF;
2578 goto out;
2579 }
2580 #if CONFIG_MACF_SOCKET_SUBSET
2581 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2582 &sopt)) != 0) {
2583 goto out;
2584 }
2585 #endif /* MAC_SOCKET_SUBSET */
2586 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
2587 if (error == 0) {
2588 valsize = sopt.sopt_valsize;
2589 error = copyout((caddr_t)&valsize, uap->avalsize,
2590 sizeof(valsize));
2591 }
2592 out:
2593 file_drop(uap->s);
2594 return error;
2595 }
2596
2597
2598 /*
2599 * Get socket name.
2600 *
2601 * Returns: 0 Success
2602 * EBADF
2603 * file_socket:ENOTSOCK
2604 * file_socket:EBADF
2605 * copyin:EFAULT
2606 * copyout:EFAULT
2607 * <pru_sockaddr>:ENOBUFS[TCP]
2608 * <pru_sockaddr>:ECONNRESET[TCP]
2609 * <pru_sockaddr>:EINVAL[AF_UNIX]
2610 * <sf_getsockname>:???
2611 */
2612 /* ARGSUSED */
2613 int
2614 getsockname(__unused struct proc *p, struct getsockname_args *uap,
2615 __unused int32_t *retval)
2616 {
2617 struct socket *so;
2618 struct sockaddr *sa;
2619 socklen_t len;
2620 socklen_t sa_len;
2621 int error;
2622
2623 error = file_socket(uap->fdes, &so);
2624 if (error) {
2625 return error;
2626 }
2627 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2628 if (error) {
2629 goto out;
2630 }
2631 if (so == NULL) {
2632 error = EBADF;
2633 goto out;
2634 }
2635 sa = 0;
2636 socket_lock(so, 1);
2637 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2638 if (error == 0) {
2639 error = sflt_getsockname(so, &sa);
2640 if (error == EJUSTRETURN) {
2641 error = 0;
2642 }
2643 }
2644 socket_unlock(so, 1);
2645 if (error) {
2646 goto bad;
2647 }
2648 if (sa == 0) {
2649 len = 0;
2650 goto gotnothing;
2651 }
2652
2653 sa_len = sa->sa_len;
2654 len = MIN(len, sa_len);
2655 error = copyout((caddr_t)sa, uap->asa, len);
2656 if (error) {
2657 goto bad;
2658 }
2659 /* return the actual, untruncated address length */
2660 len = sa_len;
2661 gotnothing:
2662 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2663 bad:
2664 if (sa) {
2665 FREE(sa, M_SONAME);
2666 }
2667 out:
2668 file_drop(uap->fdes);
2669 return error;
2670 }
2671
2672 /*
2673 * Get name of peer for connected socket.
2674 *
2675 * Returns: 0 Success
2676 * EBADF
2677 * EINVAL
2678 * ENOTCONN
2679 * file_socket:ENOTSOCK
2680 * file_socket:EBADF
2681 * copyin:EFAULT
2682 * copyout:EFAULT
2683 * <pru_peeraddr>:???
2684 * <sf_getpeername>:???
2685 */
2686 /* ARGSUSED */
2687 int
2688 getpeername(__unused struct proc *p, struct getpeername_args *uap,
2689 __unused int32_t *retval)
2690 {
2691 struct socket *so;
2692 struct sockaddr *sa;
2693 socklen_t len;
2694 socklen_t sa_len;
2695 int error;
2696
2697 error = file_socket(uap->fdes, &so);
2698 if (error) {
2699 return error;
2700 }
2701 if (so == NULL) {
2702 error = EBADF;
2703 goto out;
2704 }
2705
2706 socket_lock(so, 1);
2707
2708 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2709 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2710 /* the socket has been shutdown, no more getpeername's */
2711 socket_unlock(so, 1);
2712 error = EINVAL;
2713 goto out;
2714 }
2715
2716 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
2717 socket_unlock(so, 1);
2718 error = ENOTCONN;
2719 goto out;
2720 }
2721 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2722 if (error) {
2723 socket_unlock(so, 1);
2724 goto out;
2725 }
2726 sa = 0;
2727 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2728 if (error == 0) {
2729 error = sflt_getpeername(so, &sa);
2730 if (error == EJUSTRETURN) {
2731 error = 0;
2732 }
2733 }
2734 socket_unlock(so, 1);
2735 if (error) {
2736 goto bad;
2737 }
2738 if (sa == 0) {
2739 len = 0;
2740 goto gotnothing;
2741 }
2742 sa_len = sa->sa_len;
2743 len = MIN(len, sa_len);
2744 error = copyout(sa, uap->asa, len);
2745 if (error) {
2746 goto bad;
2747 }
2748 /* return the actual, untruncated address length */
2749 len = sa_len;
2750 gotnothing:
2751 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2752 bad:
2753 if (sa) {
2754 FREE(sa, M_SONAME);
2755 }
2756 out:
2757 file_drop(uap->fdes);
2758 return error;
2759 }
2760
2761 int
2762 sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
2763 {
2764 struct sockaddr *sa;
2765 struct mbuf *m;
2766 int error;
2767
2768 size_t alloc_buflen = (size_t)buflen;
2769
2770 if (alloc_buflen > INT_MAX / 2) {
2771 return EINVAL;
2772 }
2773 #ifdef __LP64__
2774 /*
2775 * The fd's in the buffer must expand to be pointers, thus we need twice
2776 * as much space
2777 */
2778 if (type == MT_CONTROL) {
2779 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
2780 sizeof(struct cmsghdr);
2781 }
2782 #endif
2783 if (alloc_buflen > MLEN) {
2784 if (type == MT_SONAME && alloc_buflen <= 112) {
2785 alloc_buflen = MLEN; /* unix domain compat. hack */
2786 } else if (alloc_buflen > MCLBYTES) {
2787 return EINVAL;
2788 }
2789 }
2790 m = m_get(M_WAIT, type);
2791 if (m == NULL) {
2792 return ENOBUFS;
2793 }
2794 if (alloc_buflen > MLEN) {
2795 MCLGET(m, M_WAIT);
2796 if ((m->m_flags & M_EXT) == 0) {
2797 m_free(m);
2798 return ENOBUFS;
2799 }
2800 }
2801 /*
2802 * K64: We still copyin the original buflen because it gets expanded
2803 * later and we lie about the size of the mbuf because it only affects
2804 * unp_* functions
2805 */
2806 m->m_len = buflen;
2807 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2808 if (error) {
2809 (void) m_free(m);
2810 } else {
2811 *mp = m;
2812 if (type == MT_SONAME) {
2813 sa = mtod(m, struct sockaddr *);
2814 sa->sa_len = buflen;
2815 }
2816 }
2817 return error;
2818 }
2819
2820 /*
2821 * Given a user_addr_t of length len, allocate and fill out a *sa.
2822 *
2823 * Returns: 0 Success
2824 * ENAMETOOLONG Filename too long
2825 * EINVAL Invalid argument
2826 * ENOMEM Not enough space
2827 * copyin:EFAULT Bad address
2828 */
2829 static int
2830 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
2831 size_t len, boolean_t translate_unspec)
2832 {
2833 struct sockaddr *sa;
2834 int error;
2835
2836 if (len > SOCK_MAXADDRLEN) {
2837 return ENAMETOOLONG;
2838 }
2839
2840 if (len < offsetof(struct sockaddr, sa_data[0])) {
2841 return EINVAL;
2842 }
2843
2844 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
2845 if (sa == NULL) {
2846 return ENOMEM;
2847 }
2848 error = copyin(uaddr, (caddr_t)sa, len);
2849 if (error) {
2850 FREE(sa, M_SONAME);
2851 } else {
2852 /*
2853 * Force sa_family to AF_INET on AF_INET sockets to handle
2854 * legacy applications that use AF_UNSPEC (0). On all other
2855 * sockets we leave it unchanged and let the lower layer
2856 * handle it.
2857 */
2858 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2859 SOCK_CHECK_DOM(so, PF_INET) &&
2860 len == sizeof(struct sockaddr_in)) {
2861 sa->sa_family = AF_INET;
2862 }
2863
2864 sa->sa_len = len;
2865 *namp = sa;
2866 }
2867 return error;
2868 }
2869
2870 static int
2871 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
2872 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2873 {
2874 int error;
2875
2876 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2877 len < offsetof(struct sockaddr, sa_data[0])) {
2878 return EINVAL;
2879 }
2880
2881 /*
2882 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2883 * so the check here is inclusive.
2884 */
2885 if (len > sizeof(*ss)) {
2886 return ENAMETOOLONG;
2887 }
2888
2889 bzero(ss, sizeof(*ss));
2890 error = copyin(uaddr, (caddr_t)ss, len);
2891 if (error == 0) {
2892 /*
2893 * Force sa_family to AF_INET on AF_INET sockets to handle
2894 * legacy applications that use AF_UNSPEC (0). On all other
2895 * sockets we leave it unchanged and let the lower layer
2896 * handle it.
2897 */
2898 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2899 SOCK_CHECK_DOM(so, PF_INET) &&
2900 len == sizeof(struct sockaddr_in)) {
2901 ss->ss_family = AF_INET;
2902 }
2903
2904 ss->ss_len = len;
2905 }
2906 return error;
2907 }
2908
2909 int
2910 internalize_user_msghdr_array(const void *src, int spacetype, int direction,
2911 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
2912 {
2913 int error = 0;
2914 u_int i;
2915 u_int namecnt = 0;
2916 u_int ctlcnt = 0;
2917
2918 for (i = 0; i < count; i++) {
2919 uio_t auio;
2920 struct user_iovec *iovp;
2921 struct user_msghdr_x *user_msg = dst + i;
2922
2923 if (spacetype == UIO_USERSPACE64) {
2924 const struct user64_msghdr_x *msghdr64;
2925
2926 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2927
2928 user_msg->msg_name = msghdr64->msg_name;
2929 user_msg->msg_namelen = msghdr64->msg_namelen;
2930 user_msg->msg_iov = msghdr64->msg_iov;
2931 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2932 user_msg->msg_control = msghdr64->msg_control;
2933 user_msg->msg_controllen = msghdr64->msg_controllen;
2934 user_msg->msg_flags = msghdr64->msg_flags;
2935 user_msg->msg_datalen = msghdr64->msg_datalen;
2936 } else {
2937 const struct user32_msghdr_x *msghdr32;
2938
2939 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2940
2941 user_msg->msg_name = msghdr32->msg_name;
2942 user_msg->msg_namelen = msghdr32->msg_namelen;
2943 user_msg->msg_iov = msghdr32->msg_iov;
2944 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2945 user_msg->msg_control = msghdr32->msg_control;
2946 user_msg->msg_controllen = msghdr32->msg_controllen;
2947 user_msg->msg_flags = msghdr32->msg_flags;
2948 user_msg->msg_datalen = msghdr32->msg_datalen;
2949 }
2950
2951 if (user_msg->msg_iovlen <= 0 ||
2952 user_msg->msg_iovlen > UIO_MAXIOV) {
2953 error = EMSGSIZE;
2954 goto done;
2955 }
2956 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2957 direction);
2958 if (auio == NULL) {
2959 error = ENOMEM;
2960 goto done;
2961 }
2962 uiop[i] = auio;
2963
2964 iovp = uio_iovsaddr(auio);
2965 if (iovp == NULL) {
2966 error = ENOMEM;
2967 goto done;
2968 }
2969 error = copyin_user_iovec_array(user_msg->msg_iov,
2970 spacetype, user_msg->msg_iovlen, iovp);
2971 if (error) {
2972 goto done;
2973 }
2974 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2975
2976 error = uio_calculateresid(auio);
2977 if (error) {
2978 goto done;
2979 }
2980 user_msg->msg_datalen = uio_resid(auio);
2981
2982 if (user_msg->msg_name && user_msg->msg_namelen) {
2983 namecnt++;
2984 }
2985 if (user_msg->msg_control && user_msg->msg_controllen) {
2986 ctlcnt++;
2987 }
2988 }
2989 done:
2990
2991 return error;
2992 }
2993
2994 int
2995 internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2996 u_int count, struct user_msghdr_x *dst,
2997 struct recv_msg_elem *recv_msg_array)
2998 {
2999 int error = 0;
3000 u_int i;
3001
3002 for (i = 0; i < count; i++) {
3003 struct user_iovec *iovp;
3004 struct user_msghdr_x *user_msg = dst + i;
3005 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3006
3007 if (spacetype == UIO_USERSPACE64) {
3008 const struct user64_msghdr_x *msghdr64;
3009
3010 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3011
3012 user_msg->msg_name = msghdr64->msg_name;
3013 user_msg->msg_namelen = msghdr64->msg_namelen;
3014 user_msg->msg_iov = msghdr64->msg_iov;
3015 user_msg->msg_iovlen = msghdr64->msg_iovlen;
3016 user_msg->msg_control = msghdr64->msg_control;
3017 user_msg->msg_controllen = msghdr64->msg_controllen;
3018 user_msg->msg_flags = msghdr64->msg_flags;
3019 user_msg->msg_datalen = msghdr64->msg_datalen;
3020 } else {
3021 const struct user32_msghdr_x *msghdr32;
3022
3023 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3024
3025 user_msg->msg_name = msghdr32->msg_name;
3026 user_msg->msg_namelen = msghdr32->msg_namelen;
3027 user_msg->msg_iov = msghdr32->msg_iov;
3028 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3029 user_msg->msg_control = msghdr32->msg_control;
3030 user_msg->msg_controllen = msghdr32->msg_controllen;
3031 user_msg->msg_flags = msghdr32->msg_flags;
3032 user_msg->msg_datalen = msghdr32->msg_datalen;
3033 }
3034
3035 if (user_msg->msg_iovlen <= 0 ||
3036 user_msg->msg_iovlen > UIO_MAXIOV) {
3037 error = EMSGSIZE;
3038 goto done;
3039 }
3040 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3041 spacetype, direction);
3042 if (recv_msg_elem->uio == NULL) {
3043 error = ENOMEM;
3044 goto done;
3045 }
3046
3047 iovp = uio_iovsaddr(recv_msg_elem->uio);
3048 if (iovp == NULL) {
3049 error = ENOMEM;
3050 goto done;
3051 }
3052 error = copyin_user_iovec_array(user_msg->msg_iov,
3053 spacetype, user_msg->msg_iovlen, iovp);
3054 if (error) {
3055 goto done;
3056 }
3057 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3058
3059 error = uio_calculateresid(recv_msg_elem->uio);
3060 if (error) {
3061 goto done;
3062 }
3063 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3064
3065 if (user_msg->msg_name && user_msg->msg_namelen) {
3066 recv_msg_elem->which |= SOCK_MSG_SA;
3067 }
3068 if (user_msg->msg_control && user_msg->msg_controllen) {
3069 recv_msg_elem->which |= SOCK_MSG_CONTROL;
3070 }
3071 }
3072 done:
3073
3074 return error;
3075 }
3076
3077 u_int
3078 externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3079 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
3080 {
3081 #pragma unused(direction)
3082 u_int i;
3083 int seenlast = 0;
3084 u_int retcnt = 0;
3085
3086 for (i = 0; i < count; i++) {
3087 const struct user_msghdr_x *user_msg = src + i;
3088 uio_t auio = uiop[i];
3089 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3090
3091 if (user_msg->msg_datalen != 0 && len == 0) {
3092 seenlast = 1;
3093 }
3094
3095 if (seenlast == 0) {
3096 retcnt++;
3097 }
3098
3099 if (spacetype == UIO_USERSPACE64) {
3100 struct user64_msghdr_x *msghdr64;
3101
3102 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3103
3104 msghdr64->msg_flags = user_msg->msg_flags;
3105 msghdr64->msg_datalen = len;
3106 } else {
3107 struct user32_msghdr_x *msghdr32;
3108
3109 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3110
3111 msghdr32->msg_flags = user_msg->msg_flags;
3112 msghdr32->msg_datalen = len;
3113 }
3114 }
3115 return retcnt;
3116 }
3117
3118 u_int
3119 externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
3120 u_int count, const struct user_msghdr_x *src,
3121 struct recv_msg_elem *recv_msg_array)
3122 {
3123 u_int i;
3124 int seenlast = 0;
3125 u_int retcnt = 0;
3126
3127 for (i = 0; i < count; i++) {
3128 const struct user_msghdr_x *user_msg = src + i;
3129 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3130 user_ssize_t len;
3131
3132 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3133
3134 if (direction == UIO_READ) {
3135 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0) {
3136 seenlast = 1;
3137 }
3138 } else {
3139 if (user_msg->msg_datalen != 0 && len == 0) {
3140 seenlast = 1;
3141 }
3142 }
3143
3144 if (seenlast == 0) {
3145 retcnt++;
3146 }
3147
3148 if (spacetype == UIO_USERSPACE64) {
3149 struct user64_msghdr_x *msghdr64;
3150
3151 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3152
3153 msghdr64->msg_flags = user_msg->msg_flags;
3154 msghdr64->msg_datalen = len;
3155 } else {
3156 struct user32_msghdr_x *msghdr32;
3157
3158 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3159
3160 msghdr32->msg_flags = user_msg->msg_flags;
3161 msghdr32->msg_datalen = len;
3162 }
3163 }
3164 return retcnt;
3165 }
3166
3167 void
3168 free_uio_array(struct uio **uiop, u_int count)
3169 {
3170 u_int i;
3171
3172 for (i = 0; i < count; i++) {
3173 if (uiop[i] != NULL) {
3174 uio_free(uiop[i]);
3175 }
3176 }
3177 }
3178
3179 __private_extern__ user_ssize_t
3180 uio_array_resid(struct uio **uiop, u_int count)
3181 {
3182 user_ssize_t len = 0;
3183 u_int i;
3184
3185 for (i = 0; i < count; i++) {
3186 struct uio *auio = uiop[i];
3187
3188 if (auio != NULL) {
3189 len += uio_resid(auio);
3190 }
3191 }
3192 return len;
3193 }
3194
3195 int
3196 uio_array_is_valid(struct uio **uiop, u_int count)
3197 {
3198 user_ssize_t len = 0;
3199 u_int i;
3200
3201 for (i = 0; i < count; i++) {
3202 struct uio *auio = uiop[i];
3203
3204 if (auio != NULL) {
3205 user_ssize_t resid = uio_resid(auio);
3206
3207 /*
3208 * Sanity check on the validity of the iovec:
3209 * no point of going over sb_max
3210 */
3211 if (resid < 0 || (u_int32_t)resid > sb_max) {
3212 return 0;
3213 }
3214
3215 len += resid;
3216 if (len < 0 || (u_int32_t)len > sb_max) {
3217 return 0;
3218 }
3219 }
3220 }
3221 return 1;
3222 }
3223
3224
3225 struct recv_msg_elem *
3226 alloc_recv_msg_array(u_int count)
3227 {
3228 struct recv_msg_elem *recv_msg_array;
3229
3230 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3231 M_TEMP, M_WAITOK | M_ZERO);
3232
3233 return recv_msg_array;
3234 }
3235
3236 void
3237 free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3238 {
3239 u_int i;
3240
3241 for (i = 0; i < count; i++) {
3242 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3243
3244 if (recv_msg_elem->uio != NULL) {
3245 uio_free(recv_msg_elem->uio);
3246 }
3247 if (recv_msg_elem->psa != NULL) {
3248 _FREE(recv_msg_elem->psa, M_TEMP);
3249 }
3250 if (recv_msg_elem->controlp != NULL) {
3251 m_freem(recv_msg_elem->controlp);
3252 }
3253 }
3254 _FREE(recv_msg_array, M_TEMP);
3255 }
3256
3257
3258 __private_extern__ user_ssize_t
3259 recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3260 {
3261 user_ssize_t len = 0;
3262 u_int i;
3263
3264 for (i = 0; i < count; i++) {
3265 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3266
3267 if (recv_msg_elem->uio != NULL) {
3268 len += uio_resid(recv_msg_elem->uio);
3269 }
3270 }
3271 return len;
3272 }
3273
3274 int
3275 recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3276 {
3277 user_ssize_t len = 0;
3278 u_int i;
3279
3280 for (i = 0; i < count; i++) {
3281 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3282
3283 if (recv_msg_elem->uio != NULL) {
3284 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3285
3286 /*
3287 * Sanity check on the validity of the iovec:
3288 * no point of going over sb_max
3289 */
3290 if (resid < 0 || (u_int32_t)resid > sb_max) {
3291 return 0;
3292 }
3293
3294 len += resid;
3295 if (len < 0 || (u_int32_t)len > sb_max) {
3296 return 0;
3297 }
3298 }
3299 }
3300 return 1;
3301 }
3302
3303 #if SENDFILE
3304
3305 #define SFUIOBUFS 64
3306
3307 /* Macros to compute the number of mbufs needed depending on cluster size */
3308 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3309 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3310
3311 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3312 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3313
3314 /* Upper send limit in the number of mbuf clusters */
3315 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3316 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3317
3318 static void
3319 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3320 struct mbuf **m, boolean_t jumbocl)
3321 {
3322 unsigned int needed;
3323
3324 if (pktlen == 0) {
3325 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
3326 }
3327
3328 /*
3329 * Try to allocate for the whole thing. Since we want full control
3330 * over the buffer size and be able to accept partial result, we can't
3331 * use mbuf_allocpacket(). The logic below is similar to sosend().
3332 */
3333 *m = NULL;
3334 if (pktlen > MBIGCLBYTES && jumbocl) {
3335 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3336 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3337 }
3338 if (*m == NULL) {
3339 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3340 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3341 }
3342
3343 /*
3344 * Our previous attempt(s) at allocation had failed; the system
3345 * may be short on mbufs, and we want to block until they are
3346 * available. This time, ask just for 1 mbuf and don't return
3347 * until we get it.
3348 */
3349 if (*m == NULL) {
3350 needed = 1;
3351 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3352 }
3353 if (*m == NULL) {
3354 panic("%s: blocking allocation returned NULL\n", __func__);
3355 }
3356
3357 *maxchunks = needed;
3358 }
3359
3360 /*
3361 * sendfile(2).
3362 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3363 * struct sf_hdtr *hdtr, int flags)
3364 *
3365 * Send a file specified by 'fd' and starting at 'offset' to a socket
3366 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3367 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3368 * output. If specified, write the total number of bytes sent into *nbytes.
3369 */
3370 int
3371 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
3372 {
3373 struct fileproc *fp;
3374 struct vnode *vp;
3375 struct socket *so;
3376 struct writev_nocancel_args nuap;
3377 user_ssize_t writev_retval;
3378 struct user_sf_hdtr user_hdtr;
3379 struct user32_sf_hdtr user32_hdtr;
3380 struct user64_sf_hdtr user64_hdtr;
3381 off_t off, xfsize;
3382 off_t nbytes = 0, sbytes = 0;
3383 int error = 0;
3384 size_t sizeof_hdtr;
3385 off_t file_size;
3386 struct vfs_context context = *vfs_context_current();
3387
3388 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3389 0, 0, 0, 0);
3390
3391 AUDIT_ARG(fd, uap->fd);
3392 AUDIT_ARG(value32, uap->s);
3393
3394 /*
3395 * Do argument checking. Must be a regular file in, stream
3396 * type and connected socket out, positive offset.
3397 */
3398 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3399 goto done;
3400 }
3401 if ((fp->f_flag & FREAD) == 0) {
3402 error = EBADF;
3403 goto done1;
3404 }
3405 if (vnode_isreg(vp) == 0) {
3406 error = ENOTSUP;
3407 goto done1;
3408 }
3409 error = file_socket(uap->s, &so);
3410 if (error) {
3411 goto done1;
3412 }
3413 if (so == NULL) {
3414 error = EBADF;
3415 goto done2;
3416 }
3417 if (so->so_type != SOCK_STREAM) {
3418 error = EINVAL;
3419 goto done2;
3420 }
3421 if ((so->so_state & SS_ISCONNECTED) == 0) {
3422 error = ENOTCONN;
3423 goto done2;
3424 }
3425 if (uap->offset < 0) {
3426 error = EINVAL;
3427 goto done2;
3428 }
3429 if (uap->nbytes == USER_ADDR_NULL) {
3430 error = EINVAL;
3431 goto done2;
3432 }
3433 if (uap->flags != 0) {
3434 error = EINVAL;
3435 goto done2;
3436 }
3437
3438 context.vc_ucred = fp->f_fglob->fg_cred;
3439
3440 #if CONFIG_MACF_SOCKET_SUBSET
3441 /* JMM - fetch connected sockaddr? */
3442 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3443 if (error) {
3444 goto done2;
3445 }
3446 #endif
3447
3448 /*
3449 * Get number of bytes to send
3450 * Should it applies to size of header and trailer?
3451 */
3452 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3453 if (error) {
3454 goto done2;
3455 }
3456
3457 /*
3458 * If specified, get the pointer to the sf_hdtr struct for
3459 * any headers/trailers.
3460 */
3461 if (uap->hdtr != USER_ADDR_NULL) {
3462 caddr_t hdtrp;
3463
3464 bzero(&user_hdtr, sizeof(user_hdtr));
3465 if (IS_64BIT_PROCESS(p)) {
3466 hdtrp = (caddr_t)&user64_hdtr;
3467 sizeof_hdtr = sizeof(user64_hdtr);
3468 } else {
3469 hdtrp = (caddr_t)&user32_hdtr;
3470 sizeof_hdtr = sizeof(user32_hdtr);
3471 }
3472 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3473 if (error) {
3474 goto done2;
3475 }
3476 if (IS_64BIT_PROCESS(p)) {
3477 user_hdtr.headers = user64_hdtr.headers;
3478 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3479 user_hdtr.trailers = user64_hdtr.trailers;
3480 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3481 } else {
3482 user_hdtr.headers = user32_hdtr.headers;
3483 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3484 user_hdtr.trailers = user32_hdtr.trailers;
3485 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3486 }
3487
3488 /*
3489 * Send any headers. Wimp out and use writev(2).
3490 */
3491 if (user_hdtr.headers != USER_ADDR_NULL) {
3492 bzero(&nuap, sizeof(struct writev_args));
3493 nuap.fd = uap->s;
3494 nuap.iovp = user_hdtr.headers;
3495 nuap.iovcnt = user_hdtr.hdr_cnt;
3496 error = writev_nocancel(p, &nuap, &writev_retval);
3497 if (error) {
3498 goto done2;
3499 }
3500 sbytes += writev_retval;
3501 }
3502 }
3503
3504 /*
3505 * Get the file size for 2 reasons:
3506 * 1. We don't want to allocate more mbufs than necessary
3507 * 2. We don't want to read past the end of file
3508 */
3509 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3510 goto done2;
3511 }
3512
3513 /*
3514 * Simply read file data into a chain of mbufs that used with scatter
3515 * gather reads. We're not (yet?) setup to use zero copy external
3516 * mbufs that point to the file pages.
3517 */
3518 socket_lock(so, 1);
3519 error = sblock(&so->so_snd, SBL_WAIT);
3520 if (error) {
3521 socket_unlock(so, 1);
3522 goto done2;
3523 }
3524 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3525 mbuf_t m0 = NULL, m;
3526 unsigned int nbufs = SFUIOBUFS, i;
3527 uio_t auio;
3528 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3529 size_t uiolen;
3530 user_ssize_t rlen;
3531 off_t pgoff;
3532 size_t pktlen;
3533 boolean_t jumbocl;
3534
3535 /*
3536 * Calculate the amount to transfer.
3537 * Align to round number of pages.
3538 * Not to exceed send socket buffer,
3539 * the EOF, or the passed in nbytes.
3540 */
3541 xfsize = sbspace(&so->so_snd);
3542
3543 if (xfsize <= 0) {
3544 if (so->so_state & SS_CANTSENDMORE) {
3545 error = EPIPE;
3546 goto done3;
3547 } else if ((so->so_state & SS_NBIO)) {
3548 error = EAGAIN;
3549 goto done3;
3550 } else {
3551 xfsize = PAGE_SIZE;
3552 }
3553 }
3554
3555 if (xfsize > SENDFILE_MAX_BYTES) {
3556 xfsize = SENDFILE_MAX_BYTES;
3557 } else if (xfsize > PAGE_SIZE) {
3558 xfsize = trunc_page(xfsize);
3559 }
3560 pgoff = off & PAGE_MASK_64;
3561 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3562 xfsize = PAGE_SIZE_64 - pgoff;
3563 }
3564 if (nbytes && xfsize > (nbytes - sbytes)) {
3565 xfsize = nbytes - sbytes;
3566 }
3567 if (xfsize <= 0) {
3568 break;
3569 }
3570 if (off + xfsize > file_size) {
3571 xfsize = file_size - off;
3572 }
3573 if (xfsize <= 0) {
3574 break;
3575 }
3576
3577 /*
3578 * Attempt to use larger than system page-size clusters for
3579 * large writes only if there is a jumbo cluster pool and
3580 * if the socket is marked accordingly.
3581 */
3582 jumbocl = sosendjcl && njcl > 0 &&
3583 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3584
3585 socket_unlock(so, 0);
3586 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3587 pktlen = mbuf_pkthdr_maxlen(m0);
3588 if (pktlen < (size_t)xfsize) {
3589 xfsize = pktlen;
3590 }
3591
3592 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3593 UIO_READ, &uio_buf[0], sizeof(uio_buf));
3594 if (auio == NULL) {
3595 printf("sendfile failed. nbufs = %d. %s", nbufs,
3596 "File a radar related to rdar://10146739.\n");
3597 mbuf_freem(m0);
3598 error = ENXIO;
3599 socket_lock(so, 0);
3600 goto done3;
3601 }
3602
3603 for (i = 0, m = m0, uiolen = 0;
3604 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3605 i++, m = mbuf_next(m)) {
3606 size_t mlen = mbuf_maxlen(m);
3607
3608 if (mlen + uiolen > (size_t)xfsize) {
3609 mlen = xfsize - uiolen;
3610 }
3611 mbuf_setlen(m, mlen);
3612 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3613 mlen);
3614 uiolen += mlen;
3615 }
3616
3617 if (xfsize != uio_resid(auio)) {
3618 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3619 "%lld\n", xfsize, (long long)uio_resid(auio));
3620 }
3621
3622 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3623 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3624 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3625 error = fo_read(fp, auio, FOF_OFFSET, &context);
3626 socket_lock(so, 0);
3627 if (error != 0) {
3628 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3629 error == EINTR || error == EWOULDBLOCK)) {
3630 error = 0;
3631 } else {
3632 mbuf_freem(m0);
3633 goto done3;
3634 }
3635 }
3636 xfsize -= uio_resid(auio);
3637 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3638 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3639 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3640
3641 if (xfsize == 0) {
3642 // printf("sendfile: fo_read 0 bytes, EOF\n");
3643 break;
3644 }
3645 if (xfsize + off > file_size) {
3646 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3647 "%lld\n", xfsize, off, file_size);
3648 }
3649 for (i = 0, m = m0, rlen = 0;
3650 i < nbufs && m != NULL && rlen < xfsize;
3651 i++, m = mbuf_next(m)) {
3652 size_t mlen = mbuf_maxlen(m);
3653
3654 if (rlen + mlen > (size_t)xfsize) {
3655 mlen = xfsize - rlen;
3656 }
3657 mbuf_setlen(m, mlen);
3658
3659 rlen += mlen;
3660 }
3661 mbuf_pkthdr_setlen(m0, xfsize);
3662
3663 retry_space:
3664 /*
3665 * Make sure that the socket is still able to take more data.
3666 * CANTSENDMORE being true usually means that the connection
3667 * was closed. so_error is true when an error was sensed after
3668 * a previous send.
3669 * The state is checked after the page mapping and buffer
3670 * allocation above since those operations may block and make
3671 * any socket checks stale. From this point forward, nothing
3672 * blocks before the pru_send (or more accurately, any blocking
3673 * results in a loop back to here to re-check).
3674 */
3675 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3676 if (so->so_state & SS_CANTSENDMORE) {
3677 error = EPIPE;
3678 } else {
3679 error = so->so_error;
3680 so->so_error = 0;
3681 }
3682 m_freem(m0);
3683 goto done3;
3684 }
3685 /*
3686 * Wait for socket space to become available. We do this just
3687 * after checking the connection state above in order to avoid
3688 * a race condition with sbwait().
3689 */
3690 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3691 if (so->so_state & SS_NBIO) {
3692 m_freem(m0);
3693 error = EAGAIN;
3694 goto done3;
3695 }
3696 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3697 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3698 error = sbwait(&so->so_snd);
3699 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3700 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3701 /*
3702 * An error from sbwait usually indicates that we've
3703 * been interrupted by a signal. If we've sent anything
3704 * then return bytes sent, otherwise return the error.
3705 */
3706 if (error) {
3707 m_freem(m0);
3708 goto done3;
3709 }
3710 goto retry_space;
3711 }
3712
3713 struct mbuf *control = NULL;
3714 {
3715 /*
3716 * Socket filter processing
3717 */
3718
3719 error = sflt_data_out(so, NULL, &m0, &control, 0);
3720 if (error) {
3721 if (error == EJUSTRETURN) {
3722 error = 0;
3723 continue;
3724 }
3725 goto done3;
3726 }
3727 /*
3728 * End Socket filter processing
3729 */
3730 }
3731 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3732 uap->s, 0, 0, 0, 0);
3733 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3734 0, control, p);
3735 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3736 uap->s, 0, 0, 0, 0);
3737 if (error) {
3738 goto done3;
3739 }
3740 }
3741 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3742 /*
3743 * Send trailers. Wimp out and use writev(2).
3744 */
3745 if (uap->hdtr != USER_ADDR_NULL &&
3746 user_hdtr.trailers != USER_ADDR_NULL) {
3747 bzero(&nuap, sizeof(struct writev_args));
3748 nuap.fd = uap->s;
3749 nuap.iovp = user_hdtr.trailers;
3750 nuap.iovcnt = user_hdtr.trl_cnt;
3751 error = writev_nocancel(p, &nuap, &writev_retval);
3752 if (error) {
3753 goto done2;
3754 }
3755 sbytes += writev_retval;
3756 }
3757 done2:
3758 file_drop(uap->s);
3759 done1:
3760 file_drop(uap->fd);
3761 done:
3762 if (uap->nbytes != USER_ADDR_NULL) {
3763 /* XXX this appears bogus for some early failure conditions */
3764 copyout(&sbytes, uap->nbytes, sizeof(off_t));
3765 }
3766 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3767 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3768 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3769 return error;
3770 done3:
3771 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3772 goto done2;
3773 }
3774
3775
3776 #endif /* SENDFILE */