]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
xnu-6153.101.6.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
80 #include <sys/mbuf.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
91 #include <sys/priv.h>
92 #include <sys/sysctl.h>
93 #include <sys/sys_domain.h>
94
95 #include <security/audit/audit.h>
96
97 #include <sys/kdebug.h>
98 #include <sys/sysproto.h>
99 #include <netinet/in.h>
100 #include <net/route.h>
101 #include <netinet/in_pcb.h>
102
103 #include <os/ptrtools.h>
104
105 #if CONFIG_MACF_SOCKET_SUBSET
106 #include <security/mac_framework.h>
107 #endif /* MAC_SOCKET_SUBSET */
108
109 #define f_flag f_fglob->fg_flag
110 #define f_type f_fglob->fg_ops->fo_type
111 #define f_msgcount f_fglob->fg_msgcount
112 #define f_cred f_fglob->fg_cred
113 #define f_ops f_fglob->fg_ops
114 #define f_offset f_fglob->fg_offset
115 #define f_data f_fglob->fg_data
116
117 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
118 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
119 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
120 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
121 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
122 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
123 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
124 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
125 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
126 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
127 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
128 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
129 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
130 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
131 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
132 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
133
134 #if DEBUG || DEVELOPMENT
135 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
136 #define DBG_PRINTF(...) printf(__VA_ARGS__)
137 #else
138 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
139 #define DBG_PRINTF(...) do { } while (0)
140 #endif
141
142 /* TODO: should be in header file */
143 int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
144
145 static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
146 int, int32_t *);
147 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
148 int32_t *);
149 static int connectit(struct socket *, struct sockaddr *);
150 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
151 size_t, boolean_t);
152 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
153 user_addr_t, size_t, boolean_t);
154 #if SENDFILE
155 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
156 boolean_t);
157 #endif /* SENDFILE */
158 static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
159 static int connectitx(struct socket *, struct sockaddr *,
160 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
161 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
162 static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
163 int *);
164 static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
165
166 static int internalize_user_msghdr_array(const void *, int, int, u_int,
167 struct user_msghdr_x *, struct uio **);
168 static u_int externalize_user_msghdr_array(void *, int, int, u_int,
169 const struct user_msghdr_x *, struct uio **);
170
171 static void free_uio_array(struct uio **, u_int);
172 static int uio_array_is_valid(struct uio **, u_int);
173 static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
174 static int internalize_recv_msghdr_array(const void *, int, int,
175 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
176 static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
177 const struct user_msghdr_x *, struct recv_msg_elem *);
178 static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
179 static void free_recv_msg_array(struct recv_msg_elem *, u_int);
180
181 SYSCTL_DECL(_kern_ipc);
182
183 static u_int somaxsendmsgx = 100;
184 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
185 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
186 static u_int somaxrecvmsgx = 100;
187 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
188 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
189
190 /*
191 * System call interface to the socket abstraction.
192 */
193
194 extern const struct fileops socketops;
195
196 /*
197 * Returns: 0 Success
198 * EACCES Mandatory Access Control failure
199 * falloc:ENFILE
200 * falloc:EMFILE
201 * falloc:ENOMEM
202 * socreate:EAFNOSUPPORT
203 * socreate:EPROTOTYPE
204 * socreate:EPROTONOSUPPORT
205 * socreate:ENOBUFS
206 * socreate:ENOMEM
207 * socreate:??? [other protocol families, IPSEC]
208 */
209 int
210 socket(struct proc *p,
211 struct socket_args *uap,
212 int32_t *retval)
213 {
214 return socket_common(p, uap->domain, uap->type, uap->protocol,
215 proc_selfpid(), retval, 0);
216 }
217
218 int
219 socket_delegate(struct proc *p,
220 struct socket_delegate_args *uap,
221 int32_t *retval)
222 {
223 return socket_common(p, uap->domain, uap->type, uap->protocol,
224 uap->epid, retval, 1);
225 }
226
227 static int
228 socket_common(struct proc *p,
229 int domain,
230 int type,
231 int protocol,
232 pid_t epid,
233 int32_t *retval,
234 int delegate)
235 {
236 struct socket *so;
237 struct fileproc *fp;
238 int fd, error;
239
240 AUDIT_ARG(socket, domain, type, protocol);
241 #if CONFIG_MACF_SOCKET_SUBSET
242 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
243 type, protocol)) != 0) {
244 return error;
245 }
246 #endif /* MAC_SOCKET_SUBSET */
247
248 if (delegate) {
249 error = priv_check_cred(kauth_cred_get(),
250 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
251 if (error) {
252 return EACCES;
253 }
254 }
255
256 error = falloc(p, &fp, &fd, vfs_context_current());
257 if (error) {
258 return error;
259 }
260 fp->f_flag = FREAD | FWRITE;
261 fp->f_ops = &socketops;
262
263 if (delegate) {
264 error = socreate_delegate(domain, &so, type, protocol, epid);
265 } else {
266 error = socreate(domain, &so, type, protocol);
267 }
268
269 if (error) {
270 fp_free(p, fd, fp);
271 } else {
272 fp->f_data = (caddr_t)so;
273
274 proc_fdlock(p);
275 procfdtbl_releasefd(p, fd, NULL);
276
277 fp_drop(p, fd, fp, 1);
278 proc_fdunlock(p);
279
280 *retval = fd;
281 if (ENTR_SHOULDTRACE) {
282 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
283 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
284 }
285 }
286 return error;
287 }
288
289 /*
290 * Returns: 0 Success
291 * EDESTADDRREQ Destination address required
292 * EBADF Bad file descriptor
293 * EACCES Mandatory Access Control failure
294 * file_socket:ENOTSOCK
295 * file_socket:EBADF
296 * getsockaddr:ENAMETOOLONG Filename too long
297 * getsockaddr:EINVAL Invalid argument
298 * getsockaddr:ENOMEM Not enough space
299 * getsockaddr:EFAULT Bad address
300 * sobindlock:???
301 */
302 /* ARGSUSED */
303 int
304 bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
305 {
306 struct sockaddr_storage ss;
307 struct sockaddr *sa = NULL;
308 struct socket *so;
309 boolean_t want_free = TRUE;
310 int error;
311
312 AUDIT_ARG(fd, uap->s);
313 error = file_socket(uap->s, &so);
314 if (error != 0) {
315 return error;
316 }
317 if (so == NULL) {
318 error = EBADF;
319 goto out;
320 }
321 if (uap->name == USER_ADDR_NULL) {
322 error = EDESTADDRREQ;
323 goto out;
324 }
325 if (uap->namelen > sizeof(ss)) {
326 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
327 } else {
328 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
329 if (error == 0) {
330 sa = (struct sockaddr *)&ss;
331 want_free = FALSE;
332 }
333 }
334 if (error != 0) {
335 goto out;
336 }
337 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
338 #if CONFIG_MACF_SOCKET_SUBSET
339 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
340 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
341 error = sobindlock(so, sa, 1); /* will lock socket */
342 }
343 #else
344 error = sobindlock(so, sa, 1); /* will lock socket */
345 #endif /* MAC_SOCKET_SUBSET */
346 if (want_free) {
347 FREE(sa, M_SONAME);
348 }
349 out:
350 file_drop(uap->s);
351 return error;
352 }
353
354 /*
355 * Returns: 0 Success
356 * EBADF
357 * EACCES Mandatory Access Control failure
358 * file_socket:ENOTSOCK
359 * file_socket:EBADF
360 * solisten:EINVAL
361 * solisten:EOPNOTSUPP
362 * solisten:???
363 */
364 int
365 listen(__unused struct proc *p, struct listen_args *uap,
366 __unused int32_t *retval)
367 {
368 int error;
369 struct socket *so;
370
371 AUDIT_ARG(fd, uap->s);
372 error = file_socket(uap->s, &so);
373 if (error) {
374 return error;
375 }
376 if (so != NULL)
377 #if CONFIG_MACF_SOCKET_SUBSET
378 {
379 error = mac_socket_check_listen(kauth_cred_get(), so);
380 if (error == 0) {
381 error = solisten(so, uap->backlog);
382 }
383 }
384 #else
385 { error = solisten(so, uap->backlog);}
386 #endif /* MAC_SOCKET_SUBSET */
387 else {
388 error = EBADF;
389 }
390
391 file_drop(uap->s);
392 return error;
393 }
394
395 /*
396 * Returns: fp_getfsock:EBADF Bad file descriptor
397 * fp_getfsock:EOPNOTSUPP ...
398 * xlate => :ENOTSOCK Socket operation on non-socket
399 * :EFAULT Bad address on copyin/copyout
400 * :EBADF Bad file descriptor
401 * :EOPNOTSUPP Operation not supported on socket
402 * :EINVAL Invalid argument
403 * :EWOULDBLOCK Operation would block
404 * :ECONNABORTED Connection aborted
405 * :EINTR Interrupted function
406 * :EACCES Mandatory Access Control failure
407 * falloc_locked:ENFILE Too many files open in system
408 * falloc_locked::EMFILE Too many open files
409 * falloc_locked::ENOMEM Not enough space
410 * 0 Success
411 */
412 int
413 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
414 int32_t *retval)
415 {
416 struct fileproc *fp;
417 struct sockaddr *sa = NULL;
418 socklen_t namelen;
419 int error;
420 struct socket *head, *so = NULL;
421 lck_mtx_t *mutex_held;
422 int fd = uap->s;
423 int newfd;
424 short fflag; /* type must match fp->f_flag */
425 int dosocklock = 0;
426
427 *retval = -1;
428
429 AUDIT_ARG(fd, uap->s);
430
431 if (uap->name) {
432 error = copyin(uap->anamelen, (caddr_t)&namelen,
433 sizeof(socklen_t));
434 if (error) {
435 return error;
436 }
437 }
438 error = fp_getfsock(p, fd, &fp, &head);
439 if (error) {
440 if (error == EOPNOTSUPP) {
441 error = ENOTSOCK;
442 }
443 return error;
444 }
445 if (head == NULL) {
446 error = EBADF;
447 goto out;
448 }
449 #if CONFIG_MACF_SOCKET_SUBSET
450 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
451 goto out;
452 }
453 #endif /* MAC_SOCKET_SUBSET */
454
455 socket_lock(head, 1);
456
457 if (head->so_proto->pr_getlock != NULL) {
458 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
459 dosocklock = 1;
460 } else {
461 mutex_held = head->so_proto->pr_domain->dom_mtx;
462 dosocklock = 0;
463 }
464
465 if ((head->so_options & SO_ACCEPTCONN) == 0) {
466 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
467 error = EOPNOTSUPP;
468 } else {
469 /* POSIX: The socket is not accepting connections */
470 error = EINVAL;
471 }
472 socket_unlock(head, 1);
473 goto out;
474 }
475 check_again:
476 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
477 socket_unlock(head, 1);
478 error = EWOULDBLOCK;
479 goto out;
480 }
481 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
482 if (head->so_state & SS_CANTRCVMORE) {
483 head->so_error = ECONNABORTED;
484 break;
485 }
486 if (head->so_usecount < 1) {
487 panic("accept: head=%p refcount=%d\n", head,
488 head->so_usecount);
489 }
490 error = msleep((caddr_t)&head->so_timeo, mutex_held,
491 PSOCK | PCATCH, "accept", 0);
492 if (head->so_usecount < 1) {
493 panic("accept: 2 head=%p refcount=%d\n", head,
494 head->so_usecount);
495 }
496 if ((head->so_state & SS_DRAINING)) {
497 error = ECONNABORTED;
498 }
499 if (error) {
500 socket_unlock(head, 1);
501 goto out;
502 }
503 }
504 if (head->so_error) {
505 error = head->so_error;
506 head->so_error = 0;
507 socket_unlock(head, 1);
508 goto out;
509 }
510
511 /*
512 * At this point we know that there is at least one connection
513 * ready to be accepted. Remove it from the queue prior to
514 * allocating the file descriptor for it since falloc() may
515 * block allowing another process to accept the connection
516 * instead.
517 */
518 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
519
520 so_acquire_accept_list(head, NULL);
521 if (TAILQ_EMPTY(&head->so_comp)) {
522 so_release_accept_list(head);
523 goto check_again;
524 }
525
526 so = TAILQ_FIRST(&head->so_comp);
527 TAILQ_REMOVE(&head->so_comp, so, so_list);
528 so->so_head = NULL;
529 so->so_state &= ~SS_COMP;
530 head->so_qlen--;
531 so_release_accept_list(head);
532
533 /* unlock head to avoid deadlock with select, keep a ref on head */
534 socket_unlock(head, 0);
535
536 #if CONFIG_MACF_SOCKET_SUBSET
537 /*
538 * Pass the pre-accepted socket to the MAC framework. This is
539 * cheaper than allocating a file descriptor for the socket,
540 * calling the protocol accept callback, and possibly freeing
541 * the file descriptor should the MAC check fails.
542 */
543 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
544 socket_lock(so, 1);
545 so->so_state &= ~SS_NOFDREF;
546 socket_unlock(so, 1);
547 soclose(so);
548 /* Drop reference on listening socket */
549 sodereference(head);
550 goto out;
551 }
552 #endif /* MAC_SOCKET_SUBSET */
553
554 /*
555 * Pass the pre-accepted socket to any interested socket filter(s).
556 * Upon failure, the socket would have been closed by the callee.
557 */
558 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
559 /* Drop reference on listening socket */
560 sodereference(head);
561 /* Propagate socket filter's error code to the caller */
562 goto out;
563 }
564
565 fflag = fp->f_flag;
566 error = falloc(p, &fp, &newfd, vfs_context_current());
567 if (error) {
568 /*
569 * Probably ran out of file descriptors.
570 *
571 * <rdar://problem/8554930>
572 * Don't put this back on the socket like we used to, that
573 * just causes the client to spin. Drop the socket.
574 */
575 socket_lock(so, 1);
576 so->so_state &= ~SS_NOFDREF;
577 socket_unlock(so, 1);
578 soclose(so);
579 sodereference(head);
580 goto out;
581 }
582 *retval = newfd;
583 fp->f_flag = fflag;
584 fp->f_ops = &socketops;
585 fp->f_data = (caddr_t)so;
586
587 socket_lock(head, 0);
588 if (dosocklock) {
589 socket_lock(so, 1);
590 }
591
592 /* Sync socket non-blocking/async state with file flags */
593 if (fp->f_flag & FNONBLOCK) {
594 so->so_state |= SS_NBIO;
595 } else {
596 so->so_state &= ~SS_NBIO;
597 }
598
599 if (fp->f_flag & FASYNC) {
600 so->so_state |= SS_ASYNC;
601 so->so_rcv.sb_flags |= SB_ASYNC;
602 so->so_snd.sb_flags |= SB_ASYNC;
603 } else {
604 so->so_state &= ~SS_ASYNC;
605 so->so_rcv.sb_flags &= ~SB_ASYNC;
606 so->so_snd.sb_flags &= ~SB_ASYNC;
607 }
608
609 (void) soacceptlock(so, &sa, 0);
610 socket_unlock(head, 1);
611 if (sa == NULL) {
612 namelen = 0;
613 if (uap->name) {
614 goto gotnoname;
615 }
616 error = 0;
617 goto releasefd;
618 }
619 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
620
621 if (uap->name) {
622 socklen_t sa_len;
623
624 /* save sa_len before it is destroyed */
625 sa_len = sa->sa_len;
626 namelen = MIN(namelen, sa_len);
627 error = copyout(sa, uap->name, namelen);
628 if (!error) {
629 /* return the actual, untruncated address length */
630 namelen = sa_len;
631 }
632 gotnoname:
633 error = copyout((caddr_t)&namelen, uap->anamelen,
634 sizeof(socklen_t));
635 }
636 FREE(sa, M_SONAME);
637
638 releasefd:
639 /*
640 * If the socket has been marked as inactive by sosetdefunct(),
641 * disallow further operations on it.
642 */
643 if (so->so_flags & SOF_DEFUNCT) {
644 sodefunct(current_proc(), so,
645 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
646 }
647
648 if (dosocklock) {
649 socket_unlock(so, 1);
650 }
651
652 proc_fdlock(p);
653 procfdtbl_releasefd(p, newfd, NULL);
654 fp_drop(p, newfd, fp, 1);
655 proc_fdunlock(p);
656
657 out:
658 file_drop(fd);
659
660 if (error == 0 && ENTR_SHOULDTRACE) {
661 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
662 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
663 }
664 return error;
665 }
666
667 int
668 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
669 {
670 __pthread_testcancel(1);
671 return accept_nocancel(p, (struct accept_nocancel_args *)uap,
672 retval);
673 }
674
675 /*
676 * Returns: 0 Success
677 * EBADF Bad file descriptor
678 * EALREADY Connection already in progress
679 * EINPROGRESS Operation in progress
680 * ECONNABORTED Connection aborted
681 * EINTR Interrupted function
682 * EACCES Mandatory Access Control failure
683 * file_socket:ENOTSOCK
684 * file_socket:EBADF
685 * getsockaddr:ENAMETOOLONG Filename too long
686 * getsockaddr:EINVAL Invalid argument
687 * getsockaddr:ENOMEM Not enough space
688 * getsockaddr:EFAULT Bad address
689 * soconnectlock:EOPNOTSUPP
690 * soconnectlock:EISCONN
691 * soconnectlock:??? [depends on protocol, filters]
692 * msleep:EINTR
693 *
694 * Imputed: so_error error may be set from so_error, which
695 * may have been set by soconnectlock.
696 */
697 /* ARGSUSED */
698 int
699 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
700 {
701 __pthread_testcancel(1);
702 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
703 retval);
704 }
705
706 int
707 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
708 {
709 #pragma unused(p, retval)
710 struct socket *so;
711 struct sockaddr_storage ss;
712 struct sockaddr *sa = NULL;
713 int error;
714 int fd = uap->s;
715 boolean_t dgram;
716
717 AUDIT_ARG(fd, uap->s);
718 error = file_socket(fd, &so);
719 if (error != 0) {
720 return error;
721 }
722 if (so == NULL) {
723 error = EBADF;
724 goto out;
725 }
726
727 /*
728 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
729 * if this is a datagram socket; translate for other types.
730 */
731 dgram = (so->so_type == SOCK_DGRAM);
732
733 /* Get socket address now before we obtain socket lock */
734 if (uap->namelen > sizeof(ss)) {
735 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
736 } else {
737 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
738 if (error == 0) {
739 sa = (struct sockaddr *)&ss;
740 }
741 }
742 if (error != 0) {
743 goto out;
744 }
745
746 error = connectit(so, sa);
747
748 if (sa != NULL && sa != SA(&ss)) {
749 FREE(sa, M_SONAME);
750 }
751 if (error == ERESTART) {
752 error = EINTR;
753 }
754 out:
755 file_drop(fd);
756 return error;
757 }
758
759 static int
760 connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
761 {
762 #pragma unused(p, retval)
763 struct sockaddr_storage ss, sd;
764 struct sockaddr *src = NULL, *dst = NULL;
765 struct socket *so;
766 int error, error1, fd = uap->socket;
767 boolean_t dgram;
768 sae_connid_t cid = SAE_CONNID_ANY;
769 struct user32_sa_endpoints ep32;
770 struct user64_sa_endpoints ep64;
771 struct user_sa_endpoints ep;
772 user_ssize_t bytes_written = 0;
773 struct user_iovec *iovp;
774 uio_t auio = NULL;
775
776 AUDIT_ARG(fd, uap->socket);
777 error = file_socket(fd, &so);
778 if (error != 0) {
779 return error;
780 }
781 if (so == NULL) {
782 error = EBADF;
783 goto out;
784 }
785
786 if (uap->endpoints == USER_ADDR_NULL) {
787 error = EINVAL;
788 goto out;
789 }
790
791 if (IS_64BIT_PROCESS(p)) {
792 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
793 if (error != 0) {
794 goto out;
795 }
796
797 ep.sae_srcif = ep64.sae_srcif;
798 ep.sae_srcaddr = ep64.sae_srcaddr;
799 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
800 ep.sae_dstaddr = ep64.sae_dstaddr;
801 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
802 } else {
803 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
804 if (error != 0) {
805 goto out;
806 }
807
808 ep.sae_srcif = ep32.sae_srcif;
809 ep.sae_srcaddr = ep32.sae_srcaddr;
810 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
811 ep.sae_dstaddr = ep32.sae_dstaddr;
812 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
813 }
814
815 /*
816 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
817 * if this is a datagram socket; translate for other types.
818 */
819 dgram = (so->so_type == SOCK_DGRAM);
820
821 /* Get socket address now before we obtain socket lock */
822 if (ep.sae_srcaddr != USER_ADDR_NULL) {
823 if (ep.sae_srcaddrlen > sizeof(ss)) {
824 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
825 } else {
826 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
827 if (error == 0) {
828 src = (struct sockaddr *)&ss;
829 }
830 }
831
832 if (error) {
833 goto out;
834 }
835 }
836
837 if (ep.sae_dstaddr == USER_ADDR_NULL) {
838 error = EINVAL;
839 goto out;
840 }
841
842 /* Get socket address now before we obtain socket lock */
843 if (ep.sae_dstaddrlen > sizeof(sd)) {
844 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
845 } else {
846 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
847 if (error == 0) {
848 dst = (struct sockaddr *)&sd;
849 }
850 }
851
852 if (error) {
853 goto out;
854 }
855
856 VERIFY(dst != NULL);
857
858 if (uap->iov != USER_ADDR_NULL) {
859 /* Verify range before calling uio_create() */
860 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
861 error = EINVAL;
862 goto out;
863 }
864
865 if (uap->len == USER_ADDR_NULL) {
866 error = EINVAL;
867 goto out;
868 }
869
870 /* allocate a uio to hold the number of iovecs passed */
871 auio = uio_create(uap->iovcnt, 0,
872 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
873 UIO_WRITE);
874
875 if (auio == NULL) {
876 error = ENOMEM;
877 goto out;
878 }
879
880 /*
881 * get location of iovecs within the uio.
882 * then copyin the iovecs from user space.
883 */
884 iovp = uio_iovsaddr(auio);
885 if (iovp == NULL) {
886 error = ENOMEM;
887 goto out;
888 }
889 error = copyin_user_iovec_array(uap->iov,
890 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
891 uap->iovcnt, iovp);
892 if (error != 0) {
893 goto out;
894 }
895
896 /* finish setup of uio_t */
897 error = uio_calculateresid(auio);
898 if (error != 0) {
899 goto out;
900 }
901 }
902
903 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
904 &cid, auio, uap->flags, &bytes_written);
905 if (error == ERESTART) {
906 error = EINTR;
907 }
908
909 if (uap->len != USER_ADDR_NULL) {
910 error1 = copyout(&bytes_written, uap->len, sizeof(uap->len));
911 /* give precedence to connectitx errors */
912 if ((error1 != 0) && (error == 0)) {
913 error = error1;
914 }
915 }
916
917 if (uap->connid != USER_ADDR_NULL) {
918 error1 = copyout(&cid, uap->connid, sizeof(cid));
919 /* give precedence to connectitx errors */
920 if ((error1 != 0) && (error == 0)) {
921 error = error1;
922 }
923 }
924 out:
925 file_drop(fd);
926 if (auio != NULL) {
927 uio_free(auio);
928 }
929 if (src != NULL && src != SA(&ss)) {
930 FREE(src, M_SONAME);
931 }
932 if (dst != NULL && dst != SA(&sd)) {
933 FREE(dst, M_SONAME);
934 }
935 return error;
936 }
937
938 int
939 connectx(struct proc *p, struct connectx_args *uap, int *retval)
940 {
941 /*
942 * Due to similiarity with a POSIX interface, define as
943 * an unofficial cancellation point.
944 */
945 __pthread_testcancel(1);
946 return connectx_nocancel(p, uap, retval);
947 }
948
949 static int
950 connectit(struct socket *so, struct sockaddr *sa)
951 {
952 int error;
953
954 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
955 #if CONFIG_MACF_SOCKET_SUBSET
956 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
957 return error;
958 }
959 #endif /* MAC_SOCKET_SUBSET */
960
961 socket_lock(so, 1);
962 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
963 error = EALREADY;
964 goto out;
965 }
966 error = soconnectlock(so, sa, 0);
967 if (error != 0) {
968 goto out;
969 }
970 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
971 error = EINPROGRESS;
972 goto out;
973 }
974 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
975 lck_mtx_t *mutex_held;
976
977 if (so->so_proto->pr_getlock != NULL) {
978 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
979 } else {
980 mutex_held = so->so_proto->pr_domain->dom_mtx;
981 }
982 error = msleep((caddr_t)&so->so_timeo, mutex_held,
983 PSOCK | PCATCH, __func__, 0);
984 if (so->so_state & SS_DRAINING) {
985 error = ECONNABORTED;
986 }
987 if (error != 0) {
988 break;
989 }
990 }
991 if (error == 0) {
992 error = so->so_error;
993 so->so_error = 0;
994 }
995 out:
996 socket_unlock(so, 1);
997 return error;
998 }
999
1000 static int
1001 connectitx(struct socket *so, struct sockaddr *src,
1002 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
1003 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1004 user_ssize_t *bytes_written)
1005 {
1006 int error;
1007 #pragma unused (flags)
1008
1009 VERIFY(dst != NULL);
1010
1011 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1012 #if CONFIG_MACF_SOCKET_SUBSET
1013 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1014 return error;
1015 }
1016
1017 if (auio != NULL) {
1018 if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1019 return error;
1020 }
1021 }
1022 #endif /* MAC_SOCKET_SUBSET */
1023
1024 socket_lock(so, 1);
1025 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1026 error = EALREADY;
1027 goto out;
1028 }
1029
1030 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
1031 (flags & CONNECT_DATA_IDEMPOTENT)) {
1032 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
1033
1034 if (flags & CONNECT_DATA_AUTHENTICATED) {
1035 so->so_flags1 |= SOF1_DATA_AUTHENTICATED;
1036 }
1037 }
1038
1039 /*
1040 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
1041 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
1042 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
1043 * Case 3 allows user to combine write with connect even if they have
1044 * no use for TFO (such as regular TCP, and UDP).
1045 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
1046 */
1047 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
1048 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio)) {
1049 so->so_flags1 |= SOF1_PRECONNECT_DATA;
1050 }
1051
1052 /*
1053 * If a user sets data idempotent and does not pass an uio, or
1054 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1055 * SOF1_DATA_IDEMPOTENT.
1056 */
1057 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1058 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1059 /* We should return EINVAL instead perhaps. */
1060 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1061 }
1062
1063 error = soconnectxlocked(so, src, dst, p, ifscope,
1064 aid, pcid, 0, NULL, 0, auio, bytes_written);
1065 if (error != 0) {
1066 goto out;
1067 }
1068 /*
1069 * If, after the call to soconnectxlocked the flag is still set (in case
1070 * data has been queued and the connect() has actually been triggered,
1071 * it will have been unset by the transport), we exit immediately. There
1072 * is no reason to wait on any event.
1073 */
1074 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1075 error = 0;
1076 goto out;
1077 }
1078 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1079 error = EINPROGRESS;
1080 goto out;
1081 }
1082 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1083 lck_mtx_t *mutex_held;
1084
1085 if (so->so_proto->pr_getlock != NULL) {
1086 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1087 } else {
1088 mutex_held = so->so_proto->pr_domain->dom_mtx;
1089 }
1090 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1091 PSOCK | PCATCH, __func__, 0);
1092 if (so->so_state & SS_DRAINING) {
1093 error = ECONNABORTED;
1094 }
1095 if (error != 0) {
1096 break;
1097 }
1098 }
1099 if (error == 0) {
1100 error = so->so_error;
1101 so->so_error = 0;
1102 }
1103 out:
1104 socket_unlock(so, 1);
1105 return error;
1106 }
1107
1108 int
1109 peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1110 {
1111 #pragma unused(p, uap, retval)
1112 /*
1113 * Due to similiarity with a POSIX interface, define as
1114 * an unofficial cancellation point.
1115 */
1116 __pthread_testcancel(1);
1117 return 0;
1118 }
1119
1120 int
1121 disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1122 {
1123 /*
1124 * Due to similiarity with a POSIX interface, define as
1125 * an unofficial cancellation point.
1126 */
1127 __pthread_testcancel(1);
1128 return disconnectx_nocancel(p, uap, retval);
1129 }
1130
1131 static int
1132 disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1133 {
1134 #pragma unused(p, retval)
1135 struct socket *so;
1136 int fd = uap->s;
1137 int error;
1138
1139 error = file_socket(fd, &so);
1140 if (error != 0) {
1141 return error;
1142 }
1143 if (so == NULL) {
1144 error = EBADF;
1145 goto out;
1146 }
1147
1148 error = sodisconnectx(so, uap->aid, uap->cid);
1149 out:
1150 file_drop(fd);
1151 return error;
1152 }
1153
1154 /*
1155 * Returns: 0 Success
1156 * socreate:EAFNOSUPPORT
1157 * socreate:EPROTOTYPE
1158 * socreate:EPROTONOSUPPORT
1159 * socreate:ENOBUFS
1160 * socreate:ENOMEM
1161 * socreate:EISCONN
1162 * socreate:??? [other protocol families, IPSEC]
1163 * falloc:ENFILE
1164 * falloc:EMFILE
1165 * falloc:ENOMEM
1166 * copyout:EFAULT
1167 * soconnect2:EINVAL
1168 * soconnect2:EPROTOTYPE
1169 * soconnect2:??? [other protocol families[
1170 */
1171 int
1172 socketpair(struct proc *p, struct socketpair_args *uap,
1173 __unused int32_t *retval)
1174 {
1175 struct fileproc *fp1, *fp2;
1176 struct socket *so1, *so2;
1177 int fd, error, sv[2];
1178
1179 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1180 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1181 if (error) {
1182 return error;
1183 }
1184 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1185 if (error) {
1186 goto free1;
1187 }
1188
1189 error = falloc(p, &fp1, &fd, vfs_context_current());
1190 if (error) {
1191 goto free2;
1192 }
1193 fp1->f_flag = FREAD | FWRITE;
1194 fp1->f_ops = &socketops;
1195 fp1->f_data = (caddr_t)so1;
1196 sv[0] = fd;
1197
1198 error = falloc(p, &fp2, &fd, vfs_context_current());
1199 if (error) {
1200 goto free3;
1201 }
1202 fp2->f_flag = FREAD | FWRITE;
1203 fp2->f_ops = &socketops;
1204 fp2->f_data = (caddr_t)so2;
1205 sv[1] = fd;
1206
1207 error = soconnect2(so1, so2);
1208 if (error) {
1209 goto free4;
1210 }
1211 if (uap->type == SOCK_DGRAM) {
1212 /*
1213 * Datagram socket connection is asymmetric.
1214 */
1215 error = soconnect2(so2, so1);
1216 if (error) {
1217 goto free4;
1218 }
1219 }
1220
1221 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1222 goto free4;
1223 }
1224
1225 proc_fdlock(p);
1226 procfdtbl_releasefd(p, sv[0], NULL);
1227 procfdtbl_releasefd(p, sv[1], NULL);
1228 fp_drop(p, sv[0], fp1, 1);
1229 fp_drop(p, sv[1], fp2, 1);
1230 proc_fdunlock(p);
1231
1232 return 0;
1233 free4:
1234 fp_free(p, sv[1], fp2);
1235 free3:
1236 fp_free(p, sv[0], fp1);
1237 free2:
1238 (void) soclose(so2);
1239 free1:
1240 (void) soclose(so1);
1241 return error;
1242 }
1243
1244 /*
1245 * Returns: 0 Success
1246 * EINVAL
1247 * ENOBUFS
1248 * EBADF
1249 * EPIPE
1250 * EACCES Mandatory Access Control failure
1251 * file_socket:ENOTSOCK
1252 * file_socket:EBADF
1253 * getsockaddr:ENAMETOOLONG Filename too long
1254 * getsockaddr:EINVAL Invalid argument
1255 * getsockaddr:ENOMEM Not enough space
1256 * getsockaddr:EFAULT Bad address
1257 * <pru_sosend>:EACCES[TCP]
1258 * <pru_sosend>:EADDRINUSE[TCP]
1259 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1260 * <pru_sosend>:EAFNOSUPPORT[TCP]
1261 * <pru_sosend>:EAGAIN[TCP]
1262 * <pru_sosend>:EBADF
1263 * <pru_sosend>:ECONNRESET[TCP]
1264 * <pru_sosend>:EFAULT
1265 * <pru_sosend>:EHOSTUNREACH[TCP]
1266 * <pru_sosend>:EINTR
1267 * <pru_sosend>:EINVAL
1268 * <pru_sosend>:EISCONN[AF_INET]
1269 * <pru_sosend>:EMSGSIZE[TCP]
1270 * <pru_sosend>:ENETDOWN[TCP]
1271 * <pru_sosend>:ENETUNREACH[TCP]
1272 * <pru_sosend>:ENOBUFS
1273 * <pru_sosend>:ENOMEM[TCP]
1274 * <pru_sosend>:ENOTCONN[AF_INET]
1275 * <pru_sosend>:EOPNOTSUPP
1276 * <pru_sosend>:EPERM[TCP]
1277 * <pru_sosend>:EPIPE
1278 * <pru_sosend>:EWOULDBLOCK
1279 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1280 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1281 * <pru_sosend>:??? [value from so_error]
1282 * sockargs:???
1283 */
1284 static int
1285 sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
1286 int flags, int32_t *retval)
1287 {
1288 struct mbuf *control = NULL;
1289 struct sockaddr_storage ss;
1290 struct sockaddr *to = NULL;
1291 boolean_t want_free = TRUE;
1292 int error;
1293 user_ssize_t len;
1294
1295 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1296
1297 if (mp->msg_name != USER_ADDR_NULL) {
1298 if (mp->msg_namelen > sizeof(ss)) {
1299 error = getsockaddr(so, &to, mp->msg_name,
1300 mp->msg_namelen, TRUE);
1301 } else {
1302 error = getsockaddr_s(so, &ss, mp->msg_name,
1303 mp->msg_namelen, TRUE);
1304 if (error == 0) {
1305 to = (struct sockaddr *)&ss;
1306 want_free = FALSE;
1307 }
1308 }
1309 if (error != 0) {
1310 goto out;
1311 }
1312 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1313 }
1314 if (mp->msg_control != USER_ADDR_NULL) {
1315 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1316 error = EINVAL;
1317 goto bad;
1318 }
1319 error = sockargs(&control, mp->msg_control,
1320 mp->msg_controllen, MT_CONTROL);
1321 if (error != 0) {
1322 goto bad;
1323 }
1324 }
1325
1326 #if CONFIG_MACF_SOCKET_SUBSET
1327 /*
1328 * We check the state without holding the socket lock;
1329 * if a race condition occurs, it would simply result
1330 * in an extra call to the MAC check function.
1331 */
1332 if (to != NULL &&
1333 !(so->so_state & SS_DEFUNCT) &&
1334 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1335 goto bad;
1336 }
1337 #endif /* MAC_SOCKET_SUBSET */
1338
1339 len = uio_resid(uiop);
1340 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1341 control, flags);
1342 if (error != 0) {
1343 if (uio_resid(uiop) != len && (error == ERESTART ||
1344 error == EINTR || error == EWOULDBLOCK)) {
1345 error = 0;
1346 }
1347 /* Generation of SIGPIPE can be controlled per socket */
1348 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) {
1349 psignal(p, SIGPIPE);
1350 }
1351 }
1352 if (error == 0) {
1353 *retval = (int)(len - uio_resid(uiop));
1354 }
1355 bad:
1356 if (to != NULL && want_free) {
1357 FREE(to, M_SONAME);
1358 }
1359 out:
1360 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1361
1362 return error;
1363 }
1364
1365 /*
1366 * Returns: 0 Success
1367 * ENOMEM
1368 * sendit:??? [see sendit definition in this file]
1369 * write:??? [4056224: applicable for pipes]
1370 */
1371 int
1372 sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
1373 {
1374 __pthread_testcancel(1);
1375 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1376 }
1377
1378 int
1379 sendto_nocancel(struct proc *p,
1380 struct sendto_nocancel_args *uap,
1381 int32_t *retval)
1382 {
1383 struct user_msghdr msg;
1384 int error;
1385 uio_t auio = NULL;
1386 struct socket *so;
1387
1388 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1389 AUDIT_ARG(fd, uap->s);
1390
1391 if (uap->flags & MSG_SKIPCFIL) {
1392 error = EPERM;
1393 goto done;
1394 }
1395
1396 auio = uio_create(1, 0,
1397 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1398 UIO_WRITE);
1399 if (auio == NULL) {
1400 error = ENOMEM;
1401 goto done;
1402 }
1403 uio_addiov(auio, uap->buf, uap->len);
1404
1405 msg.msg_name = uap->to;
1406 msg.msg_namelen = uap->tolen;
1407 /* no need to set up msg_iov. sendit uses uio_t we send it */
1408 msg.msg_iov = 0;
1409 msg.msg_iovlen = 0;
1410 msg.msg_control = 0;
1411 msg.msg_flags = 0;
1412
1413 error = file_socket(uap->s, &so);
1414 if (error) {
1415 goto done;
1416 }
1417
1418 if (so == NULL) {
1419 error = EBADF;
1420 } else {
1421 error = sendit(p, so, &msg, auio, uap->flags, retval);
1422 }
1423
1424 file_drop(uap->s);
1425 done:
1426 if (auio != NULL) {
1427 uio_free(auio);
1428 }
1429
1430 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1431
1432 return error;
1433 }
1434
1435 /*
1436 * Returns: 0 Success
1437 * ENOBUFS
1438 * copyin:EFAULT
1439 * sendit:??? [see sendit definition in this file]
1440 */
1441 int
1442 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1443 {
1444 __pthread_testcancel(1);
1445 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1446 retval);
1447 }
1448
1449 int
1450 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1451 int32_t *retval)
1452 {
1453 struct user32_msghdr msg32;
1454 struct user64_msghdr msg64;
1455 struct user_msghdr user_msg;
1456 caddr_t msghdrp;
1457 int size_of_msghdr;
1458 int error;
1459 uio_t auio = NULL;
1460 struct user_iovec *iovp;
1461 struct socket *so;
1462
1463 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1464 AUDIT_ARG(fd, uap->s);
1465
1466 if (uap->flags & MSG_SKIPCFIL) {
1467 error = EPERM;
1468 goto done;
1469 }
1470
1471 if (IS_64BIT_PROCESS(p)) {
1472 msghdrp = (caddr_t)&msg64;
1473 size_of_msghdr = sizeof(msg64);
1474 } else {
1475 msghdrp = (caddr_t)&msg32;
1476 size_of_msghdr = sizeof(msg32);
1477 }
1478 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1479 if (error) {
1480 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1481 return error;
1482 }
1483
1484 if (IS_64BIT_PROCESS(p)) {
1485 user_msg.msg_flags = msg64.msg_flags;
1486 user_msg.msg_controllen = msg64.msg_controllen;
1487 user_msg.msg_control = msg64.msg_control;
1488 user_msg.msg_iovlen = msg64.msg_iovlen;
1489 user_msg.msg_iov = msg64.msg_iov;
1490 user_msg.msg_namelen = msg64.msg_namelen;
1491 user_msg.msg_name = msg64.msg_name;
1492 } else {
1493 user_msg.msg_flags = msg32.msg_flags;
1494 user_msg.msg_controllen = msg32.msg_controllen;
1495 user_msg.msg_control = msg32.msg_control;
1496 user_msg.msg_iovlen = msg32.msg_iovlen;
1497 user_msg.msg_iov = msg32.msg_iov;
1498 user_msg.msg_namelen = msg32.msg_namelen;
1499 user_msg.msg_name = msg32.msg_name;
1500 }
1501
1502 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1503 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1504 0, 0, 0, 0);
1505 return EMSGSIZE;
1506 }
1507
1508 /* allocate a uio large enough to hold the number of iovecs passed */
1509 auio = uio_create(user_msg.msg_iovlen, 0,
1510 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1511 UIO_WRITE);
1512 if (auio == NULL) {
1513 error = ENOBUFS;
1514 goto done;
1515 }
1516
1517 if (user_msg.msg_iovlen) {
1518 /*
1519 * get location of iovecs within the uio.
1520 * then copyin the iovecs from user space.
1521 */
1522 iovp = uio_iovsaddr(auio);
1523 if (iovp == NULL) {
1524 error = ENOBUFS;
1525 goto done;
1526 }
1527 error = copyin_user_iovec_array(user_msg.msg_iov,
1528 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1529 user_msg.msg_iovlen, iovp);
1530 if (error) {
1531 goto done;
1532 }
1533 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1534
1535 /* finish setup of uio_t */
1536 error = uio_calculateresid(auio);
1537 if (error) {
1538 goto done;
1539 }
1540 } else {
1541 user_msg.msg_iov = 0;
1542 }
1543
1544 /* msg_flags is ignored for send */
1545 user_msg.msg_flags = 0;
1546
1547 error = file_socket(uap->s, &so);
1548 if (error) {
1549 goto done;
1550 }
1551 if (so == NULL) {
1552 error = EBADF;
1553 } else {
1554 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1555 }
1556 file_drop(uap->s);
1557 done:
1558 if (auio != NULL) {
1559 uio_free(auio);
1560 }
1561 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1562
1563 return error;
1564 }
1565
1566 int
1567 sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1568 {
1569 int error = 0;
1570 struct user_msghdr_x *user_msg_x = NULL;
1571 struct uio **uiop = NULL;
1572 struct socket *so;
1573 u_int i;
1574 struct sockaddr *to = NULL;
1575 user_ssize_t len_before = 0, len_after;
1576 int need_drop = 0;
1577 size_t size_of_msghdr;
1578 void *umsgp = NULL;
1579 u_int uiocnt;
1580 int has_addr_or_ctl = 0;
1581
1582 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1583
1584 if (uap->flags & MSG_SKIPCFIL) {
1585 error = EPERM;
1586 goto out;
1587 }
1588
1589 error = file_socket(uap->s, &so);
1590 if (error) {
1591 goto out;
1592 }
1593 need_drop = 1;
1594 if (so == NULL) {
1595 error = EBADF;
1596 goto out;
1597 }
1598
1599 /*
1600 * Input parameter range check
1601 */
1602 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1603 error = EINVAL;
1604 goto out;
1605 }
1606 /*
1607 * Clip to max currently allowed
1608 */
1609 if (uap->cnt > somaxsendmsgx) {
1610 uap->cnt = somaxsendmsgx;
1611 }
1612
1613 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
1614 M_TEMP, M_WAITOK | M_ZERO);
1615 if (user_msg_x == NULL) {
1616 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
1617 error = ENOMEM;
1618 goto out;
1619 }
1620 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1621 M_TEMP, M_WAITOK | M_ZERO);
1622 if (uiop == NULL) {
1623 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
1624 error = ENOMEM;
1625 goto out;
1626 }
1627
1628 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1629 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1630
1631 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
1632 M_TEMP, M_WAITOK | M_ZERO);
1633 if (umsgp == NULL) {
1634 printf("%s _MALLOC() user_msg_x failed\n", __func__);
1635 error = ENOMEM;
1636 goto out;
1637 }
1638 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1639 if (error) {
1640 DBG_PRINTF("%s copyin() failed\n", __func__);
1641 goto out;
1642 }
1643 error = internalize_user_msghdr_array(umsgp,
1644 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1645 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1646 if (error) {
1647 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1648 goto out;
1649 }
1650 /*
1651 * Make sure the size of each message iovec and
1652 * the aggregate size of all the iovec is valid
1653 */
1654 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1655 error = EINVAL;
1656 goto out;
1657 }
1658
1659 /*
1660 * Sanity check on passed arguments
1661 */
1662 for (i = 0; i < uap->cnt; i++) {
1663 struct user_msghdr_x *mp = user_msg_x + i;
1664
1665 /*
1666 * No flags on send message
1667 */
1668 if (mp->msg_flags != 0) {
1669 error = EINVAL;
1670 goto out;
1671 }
1672 /*
1673 * No support for address or ancillary data (yet)
1674 */
1675 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1676 has_addr_or_ctl = 1;
1677 }
1678
1679 if (mp->msg_control != USER_ADDR_NULL ||
1680 mp->msg_controllen != 0) {
1681 has_addr_or_ctl = 1;
1682 }
1683
1684 #if CONFIG_MACF_SOCKET_SUBSET
1685 /*
1686 * We check the state without holding the socket lock;
1687 * if a race condition occurs, it would simply result
1688 * in an extra call to the MAC check function.
1689 *
1690 * Note: The following check is never true taken with the
1691 * current limitation that we do not accept to pass an address,
1692 * this is effectively placeholder code. If we add support for
1693 * addresses, we will have to check every address.
1694 */
1695 if (to != NULL &&
1696 !(so->so_state & SS_DEFUNCT) &&
1697 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1698 != 0) {
1699 goto out;
1700 }
1701 #endif /* MAC_SOCKET_SUBSET */
1702 }
1703
1704 len_before = uio_array_resid(uiop, uap->cnt);
1705
1706 /*
1707 * Feed list of packets at once only for connected socket without
1708 * control message
1709 */
1710 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1711 pru_sosend_list_notsupp &&
1712 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1713 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1714 uap->cnt, uap->flags);
1715 } else {
1716 for (i = 0; i < uap->cnt; i++) {
1717 struct user_msghdr_x *mp = user_msg_x + i;
1718 struct user_msghdr user_msg;
1719 uio_t auio = uiop[i];
1720 int32_t tmpval;
1721
1722 user_msg.msg_flags = mp->msg_flags;
1723 user_msg.msg_controllen = mp->msg_controllen;
1724 user_msg.msg_control = mp->msg_control;
1725 user_msg.msg_iovlen = mp->msg_iovlen;
1726 user_msg.msg_iov = mp->msg_iov;
1727 user_msg.msg_namelen = mp->msg_namelen;
1728 user_msg.msg_name = mp->msg_name;
1729
1730 error = sendit(p, so, &user_msg, auio, uap->flags,
1731 &tmpval);
1732 if (error != 0) {
1733 break;
1734 }
1735 }
1736 }
1737 len_after = uio_array_resid(uiop, uap->cnt);
1738
1739 VERIFY(len_after <= len_before);
1740
1741 if (error != 0) {
1742 if (len_after != len_before && (error == ERESTART ||
1743 error == EINTR || error == EWOULDBLOCK ||
1744 error == ENOBUFS)) {
1745 error = 0;
1746 }
1747 /* Generation of SIGPIPE can be controlled per socket */
1748 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) {
1749 psignal(p, SIGPIPE);
1750 }
1751 }
1752 if (error == 0) {
1753 uiocnt = externalize_user_msghdr_array(umsgp,
1754 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1755 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1756
1757 *retval = (int)(uiocnt);
1758 }
1759 out:
1760 if (need_drop) {
1761 file_drop(uap->s);
1762 }
1763 if (umsgp != NULL) {
1764 _FREE(umsgp, M_TEMP);
1765 }
1766 if (uiop != NULL) {
1767 free_uio_array(uiop, uap->cnt);
1768 _FREE(uiop, M_TEMP);
1769 }
1770 if (user_msg_x != NULL) {
1771 _FREE(user_msg_x, M_TEMP);
1772 }
1773
1774 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1775
1776 return error;
1777 }
1778
1779
1780 static int
1781 copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1782 {
1783 int error = 0;
1784 socklen_t sa_len = 0;
1785 ssize_t len;
1786
1787 len = *namelen;
1788 if (len <= 0 || fromsa == 0) {
1789 len = 0;
1790 } else {
1791 #ifndef MIN
1792 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1793 #endif
1794 sa_len = fromsa->sa_len;
1795 len = MIN((unsigned int)len, sa_len);
1796 error = copyout(fromsa, name, (unsigned)len);
1797 if (error) {
1798 goto out;
1799 }
1800 }
1801 *namelen = sa_len;
1802 out:
1803 return 0;
1804 }
1805
1806 static int
1807 copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1808 socklen_t *controllen, int *flags)
1809 {
1810 int error = 0;
1811 ssize_t len;
1812 user_addr_t ctlbuf;
1813
1814 len = *controllen;
1815 *controllen = 0;
1816 ctlbuf = control;
1817
1818 while (m && len > 0) {
1819 unsigned int tocopy;
1820 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1821 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1822 int buflen = m->m_len;
1823
1824 while (buflen > 0 && len > 0) {
1825 /*
1826 * SCM_TIMESTAMP hack because struct timeval has a
1827 * different size for 32 bits and 64 bits processes
1828 */
1829 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1830 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
1831 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1832 int tmp_space;
1833 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1834
1835 tmp_cp->cmsg_level = SOL_SOCKET;
1836 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1837
1838 if (proc_is64bit(p)) {
1839 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1840
1841 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1842 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
1843
1844 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1845 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1846 } else {
1847 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1848
1849 tv32->tv_sec = tv->tv_sec;
1850 tv32->tv_usec = tv->tv_usec;
1851
1852 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1853 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1854 }
1855 if (len >= tmp_space) {
1856 tocopy = tmp_space;
1857 } else {
1858 *flags |= MSG_CTRUNC;
1859 tocopy = len;
1860 }
1861 error = copyout(tmp_buffer, ctlbuf, tocopy);
1862 if (error) {
1863 goto out;
1864 }
1865 } else {
1866 if (cp_size > buflen) {
1867 panic("cp_size > buflen, something"
1868 "wrong with alignment!");
1869 }
1870 if (len >= cp_size) {
1871 tocopy = cp_size;
1872 } else {
1873 *flags |= MSG_CTRUNC;
1874 tocopy = len;
1875 }
1876 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1877 if (error) {
1878 goto out;
1879 }
1880 }
1881
1882 ctlbuf += tocopy;
1883 len -= tocopy;
1884
1885 buflen -= cp_size;
1886 cp = (struct cmsghdr *)(void *)
1887 ((unsigned char *) cp + cp_size);
1888 cp_size = CMSG_ALIGN(cp->cmsg_len);
1889 }
1890
1891 m = m->m_next;
1892 }
1893 *controllen = ctlbuf - control;
1894 out:
1895 return error;
1896 }
1897
1898 /*
1899 * Returns: 0 Success
1900 * ENOTSOCK
1901 * EINVAL
1902 * EBADF
1903 * EACCES Mandatory Access Control failure
1904 * copyout:EFAULT
1905 * fp_lookup:EBADF
1906 * <pru_soreceive>:ENOBUFS
1907 * <pru_soreceive>:ENOTCONN
1908 * <pru_soreceive>:EWOULDBLOCK
1909 * <pru_soreceive>:EFAULT
1910 * <pru_soreceive>:EINTR
1911 * <pru_soreceive>:EBADF
1912 * <pru_soreceive>:EINVAL
1913 * <pru_soreceive>:EMSGSIZE
1914 * <pru_soreceive>:???
1915 *
1916 * Notes: Additional return values from calls through <pru_soreceive>
1917 * depend on protocols other than TCP or AF_UNIX, which are
1918 * documented above.
1919 */
1920 static int
1921 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1922 user_addr_t namelenp, int32_t *retval)
1923 {
1924 ssize_t len;
1925 int error;
1926 struct mbuf *control = 0;
1927 struct socket *so;
1928 struct sockaddr *fromsa = 0;
1929 struct fileproc *fp;
1930
1931 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1932 proc_fdlock(p);
1933 if ((error = fp_lookup(p, s, &fp, 1))) {
1934 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1935 proc_fdunlock(p);
1936 return error;
1937 }
1938 if (fp->f_type != DTYPE_SOCKET) {
1939 fp_drop(p, s, fp, 1);
1940 proc_fdunlock(p);
1941 return ENOTSOCK;
1942 }
1943
1944 so = (struct socket *)fp->f_data;
1945 if (so == NULL) {
1946 fp_drop(p, s, fp, 1);
1947 proc_fdunlock(p);
1948 return EBADF;
1949 }
1950
1951 proc_fdunlock(p);
1952
1953 #if CONFIG_MACF_SOCKET_SUBSET
1954 /*
1955 * We check the state without holding the socket lock;
1956 * if a race condition occurs, it would simply result
1957 * in an extra call to the MAC check function.
1958 */
1959 if (!(so->so_state & SS_DEFUNCT) &&
1960 !(so->so_state & SS_ISCONNECTED) &&
1961 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1962 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
1963 goto out1;
1964 }
1965 #endif /* MAC_SOCKET_SUBSET */
1966 if (uio_resid(uiop) < 0) {
1967 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1968 error = EINVAL;
1969 goto out1;
1970 }
1971
1972 len = uio_resid(uiop);
1973 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1974 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1975 &mp->msg_flags);
1976 if (fromsa) {
1977 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1978 fromsa);
1979 }
1980 if (error) {
1981 if (uio_resid(uiop) != len && (error == ERESTART ||
1982 error == EINTR || error == EWOULDBLOCK)) {
1983 error = 0;
1984 }
1985 }
1986 if (error) {
1987 goto out;
1988 }
1989
1990 *retval = len - uio_resid(uiop);
1991
1992 if (mp->msg_name) {
1993 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1994 if (error) {
1995 goto out;
1996 }
1997 /* return the actual, untruncated address length */
1998 if (namelenp &&
1999 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2000 sizeof(int)))) {
2001 goto out;
2002 }
2003 }
2004
2005 if (mp->msg_control) {
2006 error = copyout_control(p, control, mp->msg_control,
2007 &mp->msg_controllen, &mp->msg_flags);
2008 }
2009 out:
2010 if (fromsa) {
2011 FREE(fromsa, M_SONAME);
2012 }
2013 if (control) {
2014 m_freem(control);
2015 }
2016 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
2017 out1:
2018 fp_drop(p, s, fp, 0);
2019 return error;
2020 }
2021
2022 /*
2023 * Returns: 0 Success
2024 * ENOMEM
2025 * copyin:EFAULT
2026 * recvit:???
2027 * read:??? [4056224: applicable for pipes]
2028 *
2029 * Notes: The read entry point is only called as part of support for
2030 * binary backward compatability; new code should use read
2031 * instead of recv or recvfrom when attempting to read data
2032 * from pipes.
2033 *
2034 * For full documentation of the return codes from recvit, see
2035 * the block header for the recvit function.
2036 */
2037 int
2038 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2039 {
2040 __pthread_testcancel(1);
2041 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2042 retval);
2043 }
2044
2045 int
2046 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2047 int32_t *retval)
2048 {
2049 struct user_msghdr msg;
2050 int error;
2051 uio_t auio = NULL;
2052
2053 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2054 AUDIT_ARG(fd, uap->s);
2055
2056 if (uap->fromlenaddr) {
2057 error = copyin(uap->fromlenaddr,
2058 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2059 if (error) {
2060 return error;
2061 }
2062 } else {
2063 msg.msg_namelen = 0;
2064 }
2065 msg.msg_name = uap->from;
2066 auio = uio_create(1, 0,
2067 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2068 UIO_READ);
2069 if (auio == NULL) {
2070 return ENOMEM;
2071 }
2072
2073 uio_addiov(auio, uap->buf, uap->len);
2074 /* no need to set up msg_iov. recvit uses uio_t we send it */
2075 msg.msg_iov = 0;
2076 msg.msg_iovlen = 0;
2077 msg.msg_control = 0;
2078 msg.msg_controllen = 0;
2079 msg.msg_flags = uap->flags;
2080 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2081 if (auio != NULL) {
2082 uio_free(auio);
2083 }
2084
2085 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2086
2087 return error;
2088 }
2089
2090 /*
2091 * Returns: 0 Success
2092 * EMSGSIZE
2093 * ENOMEM
2094 * copyin:EFAULT
2095 * copyout:EFAULT
2096 * recvit:???
2097 *
2098 * Notes: For full documentation of the return codes from recvit, see
2099 * the block header for the recvit function.
2100 */
2101 int
2102 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
2103 {
2104 __pthread_testcancel(1);
2105 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2106 retval);
2107 }
2108
2109 int
2110 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2111 int32_t *retval)
2112 {
2113 struct user32_msghdr msg32;
2114 struct user64_msghdr msg64;
2115 struct user_msghdr user_msg;
2116 caddr_t msghdrp;
2117 int size_of_msghdr;
2118 user_addr_t uiov;
2119 int error;
2120 uio_t auio = NULL;
2121 struct user_iovec *iovp;
2122
2123 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2124 AUDIT_ARG(fd, uap->s);
2125 if (IS_64BIT_PROCESS(p)) {
2126 msghdrp = (caddr_t)&msg64;
2127 size_of_msghdr = sizeof(msg64);
2128 } else {
2129 msghdrp = (caddr_t)&msg32;
2130 size_of_msghdr = sizeof(msg32);
2131 }
2132 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2133 if (error) {
2134 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2135 return error;
2136 }
2137
2138 /* only need to copy if user process is not 64-bit */
2139 if (IS_64BIT_PROCESS(p)) {
2140 user_msg.msg_flags = msg64.msg_flags;
2141 user_msg.msg_controllen = msg64.msg_controllen;
2142 user_msg.msg_control = msg64.msg_control;
2143 user_msg.msg_iovlen = msg64.msg_iovlen;
2144 user_msg.msg_iov = msg64.msg_iov;
2145 user_msg.msg_namelen = msg64.msg_namelen;
2146 user_msg.msg_name = msg64.msg_name;
2147 } else {
2148 user_msg.msg_flags = msg32.msg_flags;
2149 user_msg.msg_controllen = msg32.msg_controllen;
2150 user_msg.msg_control = msg32.msg_control;
2151 user_msg.msg_iovlen = msg32.msg_iovlen;
2152 user_msg.msg_iov = msg32.msg_iov;
2153 user_msg.msg_namelen = msg32.msg_namelen;
2154 user_msg.msg_name = msg32.msg_name;
2155 }
2156
2157 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2158 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2159 0, 0, 0, 0);
2160 return EMSGSIZE;
2161 }
2162
2163 user_msg.msg_flags = uap->flags;
2164
2165 /* allocate a uio large enough to hold the number of iovecs passed */
2166 auio = uio_create(user_msg.msg_iovlen, 0,
2167 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2168 UIO_READ);
2169 if (auio == NULL) {
2170 error = ENOMEM;
2171 goto done;
2172 }
2173
2174 /*
2175 * get location of iovecs within the uio. then copyin the iovecs from
2176 * user space.
2177 */
2178 iovp = uio_iovsaddr(auio);
2179 if (iovp == NULL) {
2180 error = ENOMEM;
2181 goto done;
2182 }
2183 uiov = user_msg.msg_iov;
2184 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2185 error = copyin_user_iovec_array(uiov,
2186 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2187 user_msg.msg_iovlen, iovp);
2188 if (error) {
2189 goto done;
2190 }
2191
2192 /* finish setup of uio_t */
2193 error = uio_calculateresid(auio);
2194 if (error) {
2195 goto done;
2196 }
2197
2198 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2199 if (!error) {
2200 user_msg.msg_iov = uiov;
2201 if (IS_64BIT_PROCESS(p)) {
2202 msg64.msg_flags = user_msg.msg_flags;
2203 msg64.msg_controllen = user_msg.msg_controllen;
2204 msg64.msg_control = user_msg.msg_control;
2205 msg64.msg_iovlen = user_msg.msg_iovlen;
2206 msg64.msg_iov = user_msg.msg_iov;
2207 msg64.msg_namelen = user_msg.msg_namelen;
2208 msg64.msg_name = user_msg.msg_name;
2209 } else {
2210 msg32.msg_flags = user_msg.msg_flags;
2211 msg32.msg_controllen = user_msg.msg_controllen;
2212 msg32.msg_control = user_msg.msg_control;
2213 msg32.msg_iovlen = user_msg.msg_iovlen;
2214 msg32.msg_iov = user_msg.msg_iov;
2215 msg32.msg_namelen = user_msg.msg_namelen;
2216 msg32.msg_name = user_msg.msg_name;
2217 }
2218 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2219 }
2220 done:
2221 if (auio != NULL) {
2222 uio_free(auio);
2223 }
2224 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2225 return error;
2226 }
2227
2228 int
2229 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2230 {
2231 int error = EOPNOTSUPP;
2232 struct user_msghdr_x *user_msg_x = NULL;
2233 struct recv_msg_elem *recv_msg_array = NULL;
2234 struct socket *so;
2235 user_ssize_t len_before = 0, len_after;
2236 int need_drop = 0;
2237 size_t size_of_msghdr;
2238 void *umsgp = NULL;
2239 u_int i;
2240 u_int uiocnt;
2241
2242 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2243
2244 error = file_socket(uap->s, &so);
2245 if (error) {
2246 goto out;
2247 }
2248 need_drop = 1;
2249 if (so == NULL) {
2250 error = EBADF;
2251 goto out;
2252 }
2253 /*
2254 * Input parameter range check
2255 */
2256 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2257 error = EINVAL;
2258 goto out;
2259 }
2260 if (uap->cnt > somaxrecvmsgx) {
2261 uap->cnt = somaxrecvmsgx;
2262 }
2263
2264 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
2265 M_TEMP, M_WAITOK | M_ZERO);
2266 if (user_msg_x == NULL) {
2267 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
2268 error = ENOMEM;
2269 goto out;
2270 }
2271 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2272 if (recv_msg_array == NULL) {
2273 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2274 error = ENOMEM;
2275 goto out;
2276 }
2277 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2278 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2279
2280 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2281 if (umsgp == NULL) {
2282 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
2283 error = ENOMEM;
2284 goto out;
2285 }
2286 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2287 if (error) {
2288 DBG_PRINTF("%s copyin() failed\n", __func__);
2289 goto out;
2290 }
2291 error = internalize_recv_msghdr_array(umsgp,
2292 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2293 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2294 if (error) {
2295 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2296 goto out;
2297 }
2298 /*
2299 * Make sure the size of each message iovec and
2300 * the aggregate size of all the iovec is valid
2301 */
2302 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2303 error = EINVAL;
2304 goto out;
2305 }
2306 /*
2307 * Sanity check on passed arguments
2308 */
2309 for (i = 0; i < uap->cnt; i++) {
2310 struct user_msghdr_x *mp = user_msg_x + i;
2311
2312 if (mp->msg_flags != 0) {
2313 error = EINVAL;
2314 goto out;
2315 }
2316 }
2317 #if CONFIG_MACF_SOCKET_SUBSET
2318 /*
2319 * We check the state without holding the socket lock;
2320 * if a race condition occurs, it would simply result
2321 * in an extra call to the MAC check function.
2322 */
2323 if (!(so->so_state & SS_DEFUNCT) &&
2324 !(so->so_state & SS_ISCONNECTED) &&
2325 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2326 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2327 goto out;
2328 }
2329 #endif /* MAC_SOCKET_SUBSET */
2330
2331 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2332
2333 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2334 pru_soreceive_list_notsupp &&
2335 somaxrecvmsgx == 0) {
2336 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2337 recv_msg_array, uap->cnt, &uap->flags);
2338 } else {
2339 int flags = uap->flags;
2340
2341 for (i = 0; i < uap->cnt; i++) {
2342 struct recv_msg_elem *recv_msg_elem;
2343 uio_t auio;
2344 struct sockaddr **psa;
2345 struct mbuf **controlp;
2346
2347 recv_msg_elem = recv_msg_array + i;
2348 auio = recv_msg_elem->uio;
2349
2350 /*
2351 * Do not block if we got at least one packet
2352 */
2353 if (i > 0) {
2354 flags |= MSG_DONTWAIT;
2355 }
2356
2357 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2358 &recv_msg_elem->psa : NULL;
2359 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2360 &recv_msg_elem->controlp : NULL;
2361
2362 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2363 auio, (struct mbuf **)0, controlp, &flags);
2364 if (error) {
2365 break;
2366 }
2367 /*
2368 * We have some data
2369 */
2370 recv_msg_elem->which |= SOCK_MSG_DATA;
2371 /*
2372 * Stop on partial copy
2373 */
2374 if (flags & (MSG_RCVMORE | MSG_TRUNC)) {
2375 break;
2376 }
2377 }
2378 if ((uap->flags & MSG_DONTWAIT) == 0) {
2379 flags &= ~MSG_DONTWAIT;
2380 }
2381 uap->flags = flags;
2382 }
2383
2384 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2385
2386 if (error) {
2387 if (len_after != len_before && (error == ERESTART ||
2388 error == EINTR || error == EWOULDBLOCK)) {
2389 error = 0;
2390 } else {
2391 goto out;
2392 }
2393 }
2394
2395 uiocnt = externalize_recv_msghdr_array(umsgp,
2396 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2397 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2398
2399 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2400 if (error) {
2401 DBG_PRINTF("%s copyout() failed\n", __func__);
2402 goto out;
2403 }
2404 *retval = (int)(uiocnt);
2405
2406 for (i = 0; i < uap->cnt; i++) {
2407 struct user_msghdr_x *mp = user_msg_x + i;
2408 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2409 struct sockaddr *fromsa = recv_msg_elem->psa;
2410
2411 if (mp->msg_name) {
2412 error = copyout_sa(fromsa, mp->msg_name,
2413 &mp->msg_namelen);
2414 if (error) {
2415 goto out;
2416 }
2417 }
2418 if (mp->msg_control) {
2419 error = copyout_control(p, recv_msg_elem->controlp,
2420 mp->msg_control, &mp->msg_controllen,
2421 &mp->msg_flags);
2422 if (error) {
2423 goto out;
2424 }
2425 }
2426 }
2427 out:
2428 if (need_drop) {
2429 file_drop(uap->s);
2430 }
2431 if (umsgp != NULL) {
2432 _FREE(umsgp, M_TEMP);
2433 }
2434 if (recv_msg_array != NULL) {
2435 free_recv_msg_array(recv_msg_array, uap->cnt);
2436 }
2437 if (user_msg_x != NULL) {
2438 _FREE(user_msg_x, M_TEMP);
2439 }
2440
2441 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2442
2443 return error;
2444 }
2445
2446 /*
2447 * Returns: 0 Success
2448 * EBADF
2449 * file_socket:ENOTSOCK
2450 * file_socket:EBADF
2451 * soshutdown:EINVAL
2452 * soshutdown:ENOTCONN
2453 * soshutdown:EADDRNOTAVAIL[TCP]
2454 * soshutdown:ENOBUFS[TCP]
2455 * soshutdown:EMSGSIZE[TCP]
2456 * soshutdown:EHOSTUNREACH[TCP]
2457 * soshutdown:ENETUNREACH[TCP]
2458 * soshutdown:ENETDOWN[TCP]
2459 * soshutdown:ENOMEM[TCP]
2460 * soshutdown:EACCES[TCP]
2461 * soshutdown:EMSGSIZE[TCP]
2462 * soshutdown:ENOBUFS[TCP]
2463 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2464 * soshutdown:??? [other protocol families]
2465 */
2466 /* ARGSUSED */
2467 int
2468 shutdown(__unused struct proc *p, struct shutdown_args *uap,
2469 __unused int32_t *retval)
2470 {
2471 struct socket *so;
2472 int error;
2473
2474 AUDIT_ARG(fd, uap->s);
2475 error = file_socket(uap->s, &so);
2476 if (error) {
2477 return error;
2478 }
2479 if (so == NULL) {
2480 error = EBADF;
2481 goto out;
2482 }
2483 error = soshutdown((struct socket *)so, uap->how);
2484 out:
2485 file_drop(uap->s);
2486 return error;
2487 }
2488
2489 /*
2490 * Returns: 0 Success
2491 * EFAULT
2492 * EINVAL
2493 * EACCES Mandatory Access Control failure
2494 * file_socket:ENOTSOCK
2495 * file_socket:EBADF
2496 * sosetopt:EINVAL
2497 * sosetopt:ENOPROTOOPT
2498 * sosetopt:ENOBUFS
2499 * sosetopt:EDOM
2500 * sosetopt:EFAULT
2501 * sosetopt:EOPNOTSUPP[AF_UNIX]
2502 * sosetopt:???
2503 */
2504 /* ARGSUSED */
2505 int
2506 setsockopt(struct proc *p, struct setsockopt_args *uap,
2507 __unused int32_t *retval)
2508 {
2509 struct socket *so;
2510 struct sockopt sopt;
2511 int error;
2512
2513 AUDIT_ARG(fd, uap->s);
2514 if (uap->val == 0 && uap->valsize != 0) {
2515 return EFAULT;
2516 }
2517 /* No bounds checking on size (it's unsigned) */
2518
2519 error = file_socket(uap->s, &so);
2520 if (error) {
2521 return error;
2522 }
2523
2524 sopt.sopt_dir = SOPT_SET;
2525 sopt.sopt_level = uap->level;
2526 sopt.sopt_name = uap->name;
2527 sopt.sopt_val = uap->val;
2528 sopt.sopt_valsize = uap->valsize;
2529 sopt.sopt_p = p;
2530
2531 if (so == NULL) {
2532 error = EINVAL;
2533 goto out;
2534 }
2535 #if CONFIG_MACF_SOCKET_SUBSET
2536 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2537 &sopt)) != 0) {
2538 goto out;
2539 }
2540 #endif /* MAC_SOCKET_SUBSET */
2541 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
2542 out:
2543 file_drop(uap->s);
2544 return error;
2545 }
2546
2547
2548
2549 /*
2550 * Returns: 0 Success
2551 * EINVAL
2552 * EBADF
2553 * EACCES Mandatory Access Control failure
2554 * copyin:EFAULT
2555 * copyout:EFAULT
2556 * file_socket:ENOTSOCK
2557 * file_socket:EBADF
2558 * sogetopt:???
2559 */
2560 int
2561 getsockopt(struct proc *p, struct getsockopt_args *uap,
2562 __unused int32_t *retval)
2563 {
2564 int error;
2565 socklen_t valsize;
2566 struct sockopt sopt;
2567 struct socket *so;
2568
2569 error = file_socket(uap->s, &so);
2570 if (error) {
2571 return error;
2572 }
2573 if (uap->val) {
2574 error = copyin(uap->avalsize, (caddr_t)&valsize,
2575 sizeof(valsize));
2576 if (error) {
2577 goto out;
2578 }
2579 /* No bounds checking on size (it's unsigned) */
2580 } else {
2581 valsize = 0;
2582 }
2583 sopt.sopt_dir = SOPT_GET;
2584 sopt.sopt_level = uap->level;
2585 sopt.sopt_name = uap->name;
2586 sopt.sopt_val = uap->val;
2587 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2588 sopt.sopt_p = p;
2589
2590 if (so == NULL) {
2591 error = EBADF;
2592 goto out;
2593 }
2594 #if CONFIG_MACF_SOCKET_SUBSET
2595 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2596 &sopt)) != 0) {
2597 goto out;
2598 }
2599 #endif /* MAC_SOCKET_SUBSET */
2600 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
2601 if (error == 0) {
2602 valsize = sopt.sopt_valsize;
2603 error = copyout((caddr_t)&valsize, uap->avalsize,
2604 sizeof(valsize));
2605 }
2606 out:
2607 file_drop(uap->s);
2608 return error;
2609 }
2610
2611
2612 /*
2613 * Get socket name.
2614 *
2615 * Returns: 0 Success
2616 * EBADF
2617 * file_socket:ENOTSOCK
2618 * file_socket:EBADF
2619 * copyin:EFAULT
2620 * copyout:EFAULT
2621 * <pru_sockaddr>:ENOBUFS[TCP]
2622 * <pru_sockaddr>:ECONNRESET[TCP]
2623 * <pru_sockaddr>:EINVAL[AF_UNIX]
2624 * <sf_getsockname>:???
2625 */
2626 /* ARGSUSED */
2627 int
2628 getsockname(__unused struct proc *p, struct getsockname_args *uap,
2629 __unused int32_t *retval)
2630 {
2631 struct socket *so;
2632 struct sockaddr *sa;
2633 socklen_t len;
2634 socklen_t sa_len;
2635 int error;
2636
2637 error = file_socket(uap->fdes, &so);
2638 if (error) {
2639 return error;
2640 }
2641 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2642 if (error) {
2643 goto out;
2644 }
2645 if (so == NULL) {
2646 error = EBADF;
2647 goto out;
2648 }
2649 sa = 0;
2650 socket_lock(so, 1);
2651 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2652 if (error == 0) {
2653 error = sflt_getsockname(so, &sa);
2654 if (error == EJUSTRETURN) {
2655 error = 0;
2656 }
2657 }
2658 socket_unlock(so, 1);
2659 if (error) {
2660 goto bad;
2661 }
2662 if (sa == 0) {
2663 len = 0;
2664 goto gotnothing;
2665 }
2666
2667 sa_len = sa->sa_len;
2668 len = MIN(len, sa_len);
2669 error = copyout((caddr_t)sa, uap->asa, len);
2670 if (error) {
2671 goto bad;
2672 }
2673 /* return the actual, untruncated address length */
2674 len = sa_len;
2675 gotnothing:
2676 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2677 bad:
2678 if (sa) {
2679 FREE(sa, M_SONAME);
2680 }
2681 out:
2682 file_drop(uap->fdes);
2683 return error;
2684 }
2685
2686 /*
2687 * Get name of peer for connected socket.
2688 *
2689 * Returns: 0 Success
2690 * EBADF
2691 * EINVAL
2692 * ENOTCONN
2693 * file_socket:ENOTSOCK
2694 * file_socket:EBADF
2695 * copyin:EFAULT
2696 * copyout:EFAULT
2697 * <pru_peeraddr>:???
2698 * <sf_getpeername>:???
2699 */
2700 /* ARGSUSED */
2701 int
2702 getpeername(__unused struct proc *p, struct getpeername_args *uap,
2703 __unused int32_t *retval)
2704 {
2705 struct socket *so;
2706 struct sockaddr *sa;
2707 socklen_t len;
2708 socklen_t sa_len;
2709 int error;
2710
2711 error = file_socket(uap->fdes, &so);
2712 if (error) {
2713 return error;
2714 }
2715 if (so == NULL) {
2716 error = EBADF;
2717 goto out;
2718 }
2719
2720 socket_lock(so, 1);
2721
2722 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2723 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2724 /* the socket has been shutdown, no more getpeername's */
2725 socket_unlock(so, 1);
2726 error = EINVAL;
2727 goto out;
2728 }
2729
2730 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
2731 socket_unlock(so, 1);
2732 error = ENOTCONN;
2733 goto out;
2734 }
2735 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2736 if (error) {
2737 socket_unlock(so, 1);
2738 goto out;
2739 }
2740 sa = 0;
2741 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2742 if (error == 0) {
2743 error = sflt_getpeername(so, &sa);
2744 if (error == EJUSTRETURN) {
2745 error = 0;
2746 }
2747 }
2748 socket_unlock(so, 1);
2749 if (error) {
2750 goto bad;
2751 }
2752 if (sa == 0) {
2753 len = 0;
2754 goto gotnothing;
2755 }
2756 sa_len = sa->sa_len;
2757 len = MIN(len, sa_len);
2758 error = copyout(sa, uap->asa, len);
2759 if (error) {
2760 goto bad;
2761 }
2762 /* return the actual, untruncated address length */
2763 len = sa_len;
2764 gotnothing:
2765 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2766 bad:
2767 if (sa) {
2768 FREE(sa, M_SONAME);
2769 }
2770 out:
2771 file_drop(uap->fdes);
2772 return error;
2773 }
2774
2775 int
2776 sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
2777 {
2778 struct sockaddr *sa;
2779 struct mbuf *m;
2780 int error;
2781
2782 size_t alloc_buflen = (size_t)buflen;
2783
2784 if (alloc_buflen > INT_MAX / 2) {
2785 return EINVAL;
2786 }
2787 #ifdef __LP64__
2788 /*
2789 * The fd's in the buffer must expand to be pointers, thus we need twice
2790 * as much space
2791 */
2792 if (type == MT_CONTROL) {
2793 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
2794 sizeof(struct cmsghdr);
2795 }
2796 #endif
2797 if (alloc_buflen > MLEN) {
2798 if (type == MT_SONAME && alloc_buflen <= 112) {
2799 alloc_buflen = MLEN; /* unix domain compat. hack */
2800 } else if (alloc_buflen > MCLBYTES) {
2801 return EINVAL;
2802 }
2803 }
2804 m = m_get(M_WAIT, type);
2805 if (m == NULL) {
2806 return ENOBUFS;
2807 }
2808 if (alloc_buflen > MLEN) {
2809 MCLGET(m, M_WAIT);
2810 if ((m->m_flags & M_EXT) == 0) {
2811 m_free(m);
2812 return ENOBUFS;
2813 }
2814 }
2815 /*
2816 * K64: We still copyin the original buflen because it gets expanded
2817 * later and we lie about the size of the mbuf because it only affects
2818 * unp_* functions
2819 */
2820 m->m_len = buflen;
2821 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2822 if (error) {
2823 (void) m_free(m);
2824 } else {
2825 *mp = m;
2826 if (type == MT_SONAME) {
2827 sa = mtod(m, struct sockaddr *);
2828 sa->sa_len = buflen;
2829 }
2830 }
2831 return error;
2832 }
2833
2834 /*
2835 * Given a user_addr_t of length len, allocate and fill out a *sa.
2836 *
2837 * Returns: 0 Success
2838 * ENAMETOOLONG Filename too long
2839 * EINVAL Invalid argument
2840 * ENOMEM Not enough space
2841 * copyin:EFAULT Bad address
2842 */
2843 static int
2844 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
2845 size_t len, boolean_t translate_unspec)
2846 {
2847 struct sockaddr *sa;
2848 int error;
2849
2850 if (len > SOCK_MAXADDRLEN) {
2851 return ENAMETOOLONG;
2852 }
2853
2854 if (len < offsetof(struct sockaddr, sa_data[0])) {
2855 return EINVAL;
2856 }
2857
2858 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
2859 if (sa == NULL) {
2860 return ENOMEM;
2861 }
2862 error = copyin(uaddr, (caddr_t)sa, len);
2863 if (error) {
2864 FREE(sa, M_SONAME);
2865 } else {
2866 /*
2867 * Force sa_family to AF_INET on AF_INET sockets to handle
2868 * legacy applications that use AF_UNSPEC (0). On all other
2869 * sockets we leave it unchanged and let the lower layer
2870 * handle it.
2871 */
2872 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2873 SOCK_CHECK_DOM(so, PF_INET) &&
2874 len == sizeof(struct sockaddr_in)) {
2875 sa->sa_family = AF_INET;
2876 }
2877
2878 sa->sa_len = len;
2879 *namp = sa;
2880 }
2881 return error;
2882 }
2883
2884 static int
2885 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
2886 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2887 {
2888 int error;
2889
2890 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2891 len < offsetof(struct sockaddr, sa_data[0])) {
2892 return EINVAL;
2893 }
2894
2895 /*
2896 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2897 * so the check here is inclusive.
2898 */
2899 if (len > sizeof(*ss)) {
2900 return ENAMETOOLONG;
2901 }
2902
2903 bzero(ss, sizeof(*ss));
2904 error = copyin(uaddr, (caddr_t)ss, len);
2905 if (error == 0) {
2906 /*
2907 * Force sa_family to AF_INET on AF_INET sockets to handle
2908 * legacy applications that use AF_UNSPEC (0). On all other
2909 * sockets we leave it unchanged and let the lower layer
2910 * handle it.
2911 */
2912 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2913 SOCK_CHECK_DOM(so, PF_INET) &&
2914 len == sizeof(struct sockaddr_in)) {
2915 ss->ss_family = AF_INET;
2916 }
2917
2918 ss->ss_len = len;
2919 }
2920 return error;
2921 }
2922
2923 int
2924 internalize_user_msghdr_array(const void *src, int spacetype, int direction,
2925 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
2926 {
2927 int error = 0;
2928 u_int i;
2929 u_int namecnt = 0;
2930 u_int ctlcnt = 0;
2931
2932 for (i = 0; i < count; i++) {
2933 uio_t auio;
2934 struct user_iovec *iovp;
2935 struct user_msghdr_x *user_msg = dst + i;
2936
2937 if (spacetype == UIO_USERSPACE64) {
2938 const struct user64_msghdr_x *msghdr64;
2939
2940 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2941
2942 user_msg->msg_name = msghdr64->msg_name;
2943 user_msg->msg_namelen = msghdr64->msg_namelen;
2944 user_msg->msg_iov = msghdr64->msg_iov;
2945 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2946 user_msg->msg_control = msghdr64->msg_control;
2947 user_msg->msg_controllen = msghdr64->msg_controllen;
2948 user_msg->msg_flags = msghdr64->msg_flags;
2949 user_msg->msg_datalen = msghdr64->msg_datalen;
2950 } else {
2951 const struct user32_msghdr_x *msghdr32;
2952
2953 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2954
2955 user_msg->msg_name = msghdr32->msg_name;
2956 user_msg->msg_namelen = msghdr32->msg_namelen;
2957 user_msg->msg_iov = msghdr32->msg_iov;
2958 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2959 user_msg->msg_control = msghdr32->msg_control;
2960 user_msg->msg_controllen = msghdr32->msg_controllen;
2961 user_msg->msg_flags = msghdr32->msg_flags;
2962 user_msg->msg_datalen = msghdr32->msg_datalen;
2963 }
2964
2965 if (user_msg->msg_iovlen <= 0 ||
2966 user_msg->msg_iovlen > UIO_MAXIOV) {
2967 error = EMSGSIZE;
2968 goto done;
2969 }
2970 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2971 direction);
2972 if (auio == NULL) {
2973 error = ENOMEM;
2974 goto done;
2975 }
2976 uiop[i] = auio;
2977
2978 iovp = uio_iovsaddr(auio);
2979 if (iovp == NULL) {
2980 error = ENOMEM;
2981 goto done;
2982 }
2983 error = copyin_user_iovec_array(user_msg->msg_iov,
2984 spacetype, user_msg->msg_iovlen, iovp);
2985 if (error) {
2986 goto done;
2987 }
2988 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2989
2990 error = uio_calculateresid(auio);
2991 if (error) {
2992 goto done;
2993 }
2994 user_msg->msg_datalen = uio_resid(auio);
2995
2996 if (user_msg->msg_name && user_msg->msg_namelen) {
2997 namecnt++;
2998 }
2999 if (user_msg->msg_control && user_msg->msg_controllen) {
3000 ctlcnt++;
3001 }
3002 }
3003 done:
3004
3005 return error;
3006 }
3007
3008 int
3009 internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
3010 u_int count, struct user_msghdr_x *dst,
3011 struct recv_msg_elem *recv_msg_array)
3012 {
3013 int error = 0;
3014 u_int i;
3015
3016 for (i = 0; i < count; i++) {
3017 struct user_iovec *iovp;
3018 struct user_msghdr_x *user_msg = dst + i;
3019 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3020
3021 if (spacetype == UIO_USERSPACE64) {
3022 const struct user64_msghdr_x *msghdr64;
3023
3024 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3025
3026 user_msg->msg_name = msghdr64->msg_name;
3027 user_msg->msg_namelen = msghdr64->msg_namelen;
3028 user_msg->msg_iov = msghdr64->msg_iov;
3029 user_msg->msg_iovlen = msghdr64->msg_iovlen;
3030 user_msg->msg_control = msghdr64->msg_control;
3031 user_msg->msg_controllen = msghdr64->msg_controllen;
3032 user_msg->msg_flags = msghdr64->msg_flags;
3033 user_msg->msg_datalen = msghdr64->msg_datalen;
3034 } else {
3035 const struct user32_msghdr_x *msghdr32;
3036
3037 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3038
3039 user_msg->msg_name = msghdr32->msg_name;
3040 user_msg->msg_namelen = msghdr32->msg_namelen;
3041 user_msg->msg_iov = msghdr32->msg_iov;
3042 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3043 user_msg->msg_control = msghdr32->msg_control;
3044 user_msg->msg_controllen = msghdr32->msg_controllen;
3045 user_msg->msg_flags = msghdr32->msg_flags;
3046 user_msg->msg_datalen = msghdr32->msg_datalen;
3047 }
3048
3049 if (user_msg->msg_iovlen <= 0 ||
3050 user_msg->msg_iovlen > UIO_MAXIOV) {
3051 error = EMSGSIZE;
3052 goto done;
3053 }
3054 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3055 spacetype, direction);
3056 if (recv_msg_elem->uio == NULL) {
3057 error = ENOMEM;
3058 goto done;
3059 }
3060
3061 iovp = uio_iovsaddr(recv_msg_elem->uio);
3062 if (iovp == NULL) {
3063 error = ENOMEM;
3064 goto done;
3065 }
3066 error = copyin_user_iovec_array(user_msg->msg_iov,
3067 spacetype, user_msg->msg_iovlen, iovp);
3068 if (error) {
3069 goto done;
3070 }
3071 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3072
3073 error = uio_calculateresid(recv_msg_elem->uio);
3074 if (error) {
3075 goto done;
3076 }
3077 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3078
3079 if (user_msg->msg_name && user_msg->msg_namelen) {
3080 recv_msg_elem->which |= SOCK_MSG_SA;
3081 }
3082 if (user_msg->msg_control && user_msg->msg_controllen) {
3083 recv_msg_elem->which |= SOCK_MSG_CONTROL;
3084 }
3085 }
3086 done:
3087
3088 return error;
3089 }
3090
3091 u_int
3092 externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3093 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
3094 {
3095 #pragma unused(direction)
3096 u_int i;
3097 int seenlast = 0;
3098 u_int retcnt = 0;
3099
3100 for (i = 0; i < count; i++) {
3101 const struct user_msghdr_x *user_msg = src + i;
3102 uio_t auio = uiop[i];
3103 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3104
3105 if (user_msg->msg_datalen != 0 && len == 0) {
3106 seenlast = 1;
3107 }
3108
3109 if (seenlast == 0) {
3110 retcnt++;
3111 }
3112
3113 if (spacetype == UIO_USERSPACE64) {
3114 struct user64_msghdr_x *msghdr64;
3115
3116 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3117
3118 msghdr64->msg_flags = user_msg->msg_flags;
3119 msghdr64->msg_datalen = len;
3120 } else {
3121 struct user32_msghdr_x *msghdr32;
3122
3123 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3124
3125 msghdr32->msg_flags = user_msg->msg_flags;
3126 msghdr32->msg_datalen = len;
3127 }
3128 }
3129 return retcnt;
3130 }
3131
3132 u_int
3133 externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
3134 u_int count, const struct user_msghdr_x *src,
3135 struct recv_msg_elem *recv_msg_array)
3136 {
3137 u_int i;
3138 int seenlast = 0;
3139 u_int retcnt = 0;
3140
3141 for (i = 0; i < count; i++) {
3142 const struct user_msghdr_x *user_msg = src + i;
3143 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3144 user_ssize_t len;
3145
3146 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3147
3148 if (direction == UIO_READ) {
3149 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0) {
3150 seenlast = 1;
3151 }
3152 } else {
3153 if (user_msg->msg_datalen != 0 && len == 0) {
3154 seenlast = 1;
3155 }
3156 }
3157
3158 if (seenlast == 0) {
3159 retcnt++;
3160 }
3161
3162 if (spacetype == UIO_USERSPACE64) {
3163 struct user64_msghdr_x *msghdr64;
3164
3165 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3166
3167 msghdr64->msg_flags = user_msg->msg_flags;
3168 msghdr64->msg_datalen = len;
3169 } else {
3170 struct user32_msghdr_x *msghdr32;
3171
3172 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3173
3174 msghdr32->msg_flags = user_msg->msg_flags;
3175 msghdr32->msg_datalen = len;
3176 }
3177 }
3178 return retcnt;
3179 }
3180
3181 void
3182 free_uio_array(struct uio **uiop, u_int count)
3183 {
3184 u_int i;
3185
3186 for (i = 0; i < count; i++) {
3187 if (uiop[i] != NULL) {
3188 uio_free(uiop[i]);
3189 }
3190 }
3191 }
3192
3193 __private_extern__ user_ssize_t
3194 uio_array_resid(struct uio **uiop, u_int count)
3195 {
3196 user_ssize_t len = 0;
3197 u_int i;
3198
3199 for (i = 0; i < count; i++) {
3200 struct uio *auio = uiop[i];
3201
3202 if (auio != NULL) {
3203 len += uio_resid(auio);
3204 }
3205 }
3206 return len;
3207 }
3208
3209 int
3210 uio_array_is_valid(struct uio **uiop, u_int count)
3211 {
3212 user_ssize_t len = 0;
3213 u_int i;
3214
3215 for (i = 0; i < count; i++) {
3216 struct uio *auio = uiop[i];
3217
3218 if (auio != NULL) {
3219 user_ssize_t resid = uio_resid(auio);
3220
3221 /*
3222 * Sanity check on the validity of the iovec:
3223 * no point of going over sb_max
3224 */
3225 if (resid < 0 || (u_int32_t)resid > sb_max) {
3226 return 0;
3227 }
3228
3229 len += resid;
3230 if (len < 0 || (u_int32_t)len > sb_max) {
3231 return 0;
3232 }
3233 }
3234 }
3235 return 1;
3236 }
3237
3238
3239 struct recv_msg_elem *
3240 alloc_recv_msg_array(u_int count)
3241 {
3242 struct recv_msg_elem *recv_msg_array;
3243
3244 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3245 M_TEMP, M_WAITOK | M_ZERO);
3246
3247 return recv_msg_array;
3248 }
3249
3250 void
3251 free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3252 {
3253 u_int i;
3254
3255 for (i = 0; i < count; i++) {
3256 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3257
3258 if (recv_msg_elem->uio != NULL) {
3259 uio_free(recv_msg_elem->uio);
3260 }
3261 if (recv_msg_elem->psa != NULL) {
3262 _FREE(recv_msg_elem->psa, M_TEMP);
3263 }
3264 if (recv_msg_elem->controlp != NULL) {
3265 m_freem(recv_msg_elem->controlp);
3266 }
3267 }
3268 _FREE(recv_msg_array, M_TEMP);
3269 }
3270
3271
3272 __private_extern__ user_ssize_t
3273 recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3274 {
3275 user_ssize_t len = 0;
3276 u_int i;
3277
3278 for (i = 0; i < count; i++) {
3279 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3280
3281 if (recv_msg_elem->uio != NULL) {
3282 len += uio_resid(recv_msg_elem->uio);
3283 }
3284 }
3285 return len;
3286 }
3287
3288 int
3289 recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3290 {
3291 user_ssize_t len = 0;
3292 u_int i;
3293
3294 for (i = 0; i < count; i++) {
3295 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3296
3297 if (recv_msg_elem->uio != NULL) {
3298 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3299
3300 /*
3301 * Sanity check on the validity of the iovec:
3302 * no point of going over sb_max
3303 */
3304 if (resid < 0 || (u_int32_t)resid > sb_max) {
3305 return 0;
3306 }
3307
3308 len += resid;
3309 if (len < 0 || (u_int32_t)len > sb_max) {
3310 return 0;
3311 }
3312 }
3313 }
3314 return 1;
3315 }
3316
3317 #if SENDFILE
3318
3319 #define SFUIOBUFS 64
3320
3321 /* Macros to compute the number of mbufs needed depending on cluster size */
3322 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3323 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3324
3325 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3326 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3327
3328 /* Upper send limit in the number of mbuf clusters */
3329 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3330 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3331
3332 static void
3333 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3334 struct mbuf **m, boolean_t jumbocl)
3335 {
3336 unsigned int needed;
3337
3338 if (pktlen == 0) {
3339 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
3340 }
3341
3342 /*
3343 * Try to allocate for the whole thing. Since we want full control
3344 * over the buffer size and be able to accept partial result, we can't
3345 * use mbuf_allocpacket(). The logic below is similar to sosend().
3346 */
3347 *m = NULL;
3348 if (pktlen > MBIGCLBYTES && jumbocl) {
3349 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3350 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3351 }
3352 if (*m == NULL) {
3353 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3354 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3355 }
3356
3357 /*
3358 * Our previous attempt(s) at allocation had failed; the system
3359 * may be short on mbufs, and we want to block until they are
3360 * available. This time, ask just for 1 mbuf and don't return
3361 * until we get it.
3362 */
3363 if (*m == NULL) {
3364 needed = 1;
3365 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3366 }
3367 if (*m == NULL) {
3368 panic("%s: blocking allocation returned NULL\n", __func__);
3369 }
3370
3371 *maxchunks = needed;
3372 }
3373
3374 /*
3375 * sendfile(2).
3376 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3377 * struct sf_hdtr *hdtr, int flags)
3378 *
3379 * Send a file specified by 'fd' and starting at 'offset' to a socket
3380 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3381 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3382 * output. If specified, write the total number of bytes sent into *nbytes.
3383 */
3384 int
3385 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
3386 {
3387 struct fileproc *fp;
3388 struct vnode *vp;
3389 struct socket *so;
3390 struct writev_nocancel_args nuap;
3391 user_ssize_t writev_retval;
3392 struct user_sf_hdtr user_hdtr;
3393 struct user32_sf_hdtr user32_hdtr;
3394 struct user64_sf_hdtr user64_hdtr;
3395 off_t off, xfsize;
3396 off_t nbytes = 0, sbytes = 0;
3397 int error = 0;
3398 size_t sizeof_hdtr;
3399 off_t file_size;
3400 struct vfs_context context = *vfs_context_current();
3401
3402 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3403 0, 0, 0, 0);
3404
3405 AUDIT_ARG(fd, uap->fd);
3406 AUDIT_ARG(value32, uap->s);
3407
3408 /*
3409 * Do argument checking. Must be a regular file in, stream
3410 * type and connected socket out, positive offset.
3411 */
3412 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3413 goto done;
3414 }
3415 if ((fp->f_flag & FREAD) == 0) {
3416 error = EBADF;
3417 goto done1;
3418 }
3419 if (vnode_isreg(vp) == 0) {
3420 error = ENOTSUP;
3421 goto done1;
3422 }
3423 error = file_socket(uap->s, &so);
3424 if (error) {
3425 goto done1;
3426 }
3427 if (so == NULL) {
3428 error = EBADF;
3429 goto done2;
3430 }
3431 if (so->so_type != SOCK_STREAM) {
3432 error = EINVAL;
3433 goto done2;
3434 }
3435 if ((so->so_state & SS_ISCONNECTED) == 0) {
3436 error = ENOTCONN;
3437 goto done2;
3438 }
3439 if (uap->offset < 0) {
3440 error = EINVAL;
3441 goto done2;
3442 }
3443 if (uap->nbytes == USER_ADDR_NULL) {
3444 error = EINVAL;
3445 goto done2;
3446 }
3447 if (uap->flags != 0) {
3448 error = EINVAL;
3449 goto done2;
3450 }
3451
3452 context.vc_ucred = fp->f_fglob->fg_cred;
3453
3454 #if CONFIG_MACF_SOCKET_SUBSET
3455 /* JMM - fetch connected sockaddr? */
3456 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3457 if (error) {
3458 goto done2;
3459 }
3460 #endif
3461
3462 /*
3463 * Get number of bytes to send
3464 * Should it applies to size of header and trailer?
3465 */
3466 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3467 if (error) {
3468 goto done2;
3469 }
3470
3471 /*
3472 * If specified, get the pointer to the sf_hdtr struct for
3473 * any headers/trailers.
3474 */
3475 if (uap->hdtr != USER_ADDR_NULL) {
3476 caddr_t hdtrp;
3477
3478 bzero(&user_hdtr, sizeof(user_hdtr));
3479 if (IS_64BIT_PROCESS(p)) {
3480 hdtrp = (caddr_t)&user64_hdtr;
3481 sizeof_hdtr = sizeof(user64_hdtr);
3482 } else {
3483 hdtrp = (caddr_t)&user32_hdtr;
3484 sizeof_hdtr = sizeof(user32_hdtr);
3485 }
3486 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3487 if (error) {
3488 goto done2;
3489 }
3490 if (IS_64BIT_PROCESS(p)) {
3491 user_hdtr.headers = user64_hdtr.headers;
3492 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3493 user_hdtr.trailers = user64_hdtr.trailers;
3494 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3495 } else {
3496 user_hdtr.headers = user32_hdtr.headers;
3497 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3498 user_hdtr.trailers = user32_hdtr.trailers;
3499 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3500 }
3501
3502 /*
3503 * Send any headers. Wimp out and use writev(2).
3504 */
3505 if (user_hdtr.headers != USER_ADDR_NULL) {
3506 bzero(&nuap, sizeof(struct writev_args));
3507 nuap.fd = uap->s;
3508 nuap.iovp = user_hdtr.headers;
3509 nuap.iovcnt = user_hdtr.hdr_cnt;
3510 error = writev_nocancel(p, &nuap, &writev_retval);
3511 if (error) {
3512 goto done2;
3513 }
3514 sbytes += writev_retval;
3515 }
3516 }
3517
3518 /*
3519 * Get the file size for 2 reasons:
3520 * 1. We don't want to allocate more mbufs than necessary
3521 * 2. We don't want to read past the end of file
3522 */
3523 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3524 goto done2;
3525 }
3526
3527 /*
3528 * Simply read file data into a chain of mbufs that used with scatter
3529 * gather reads. We're not (yet?) setup to use zero copy external
3530 * mbufs that point to the file pages.
3531 */
3532 socket_lock(so, 1);
3533 error = sblock(&so->so_snd, SBL_WAIT);
3534 if (error) {
3535 socket_unlock(so, 1);
3536 goto done2;
3537 }
3538 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3539 mbuf_t m0 = NULL, m;
3540 unsigned int nbufs = SFUIOBUFS, i;
3541 uio_t auio;
3542 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3543 size_t uiolen;
3544 user_ssize_t rlen;
3545 off_t pgoff;
3546 size_t pktlen;
3547 boolean_t jumbocl;
3548
3549 /*
3550 * Calculate the amount to transfer.
3551 * Align to round number of pages.
3552 * Not to exceed send socket buffer,
3553 * the EOF, or the passed in nbytes.
3554 */
3555 xfsize = sbspace(&so->so_snd);
3556
3557 if (xfsize <= 0) {
3558 if (so->so_state & SS_CANTSENDMORE) {
3559 error = EPIPE;
3560 goto done3;
3561 } else if ((so->so_state & SS_NBIO)) {
3562 error = EAGAIN;
3563 goto done3;
3564 } else {
3565 xfsize = PAGE_SIZE;
3566 }
3567 }
3568
3569 if (xfsize > SENDFILE_MAX_BYTES) {
3570 xfsize = SENDFILE_MAX_BYTES;
3571 } else if (xfsize > PAGE_SIZE) {
3572 xfsize = trunc_page(xfsize);
3573 }
3574 pgoff = off & PAGE_MASK_64;
3575 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3576 xfsize = PAGE_SIZE_64 - pgoff;
3577 }
3578 if (nbytes && xfsize > (nbytes - sbytes)) {
3579 xfsize = nbytes - sbytes;
3580 }
3581 if (xfsize <= 0) {
3582 break;
3583 }
3584 if (off + xfsize > file_size) {
3585 xfsize = file_size - off;
3586 }
3587 if (xfsize <= 0) {
3588 break;
3589 }
3590
3591 /*
3592 * Attempt to use larger than system page-size clusters for
3593 * large writes only if there is a jumbo cluster pool and
3594 * if the socket is marked accordingly.
3595 */
3596 jumbocl = sosendjcl && njcl > 0 &&
3597 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3598
3599 socket_unlock(so, 0);
3600 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3601 pktlen = mbuf_pkthdr_maxlen(m0);
3602 if (pktlen < (size_t)xfsize) {
3603 xfsize = pktlen;
3604 }
3605
3606 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3607 UIO_READ, &uio_buf[0], sizeof(uio_buf));
3608 if (auio == NULL) {
3609 printf("sendfile failed. nbufs = %d. %s", nbufs,
3610 "File a radar related to rdar://10146739.\n");
3611 mbuf_freem(m0);
3612 error = ENXIO;
3613 socket_lock(so, 0);
3614 goto done3;
3615 }
3616
3617 for (i = 0, m = m0, uiolen = 0;
3618 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3619 i++, m = mbuf_next(m)) {
3620 size_t mlen = mbuf_maxlen(m);
3621
3622 if (mlen + uiolen > (size_t)xfsize) {
3623 mlen = xfsize - uiolen;
3624 }
3625 mbuf_setlen(m, mlen);
3626 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3627 mlen);
3628 uiolen += mlen;
3629 }
3630
3631 if (xfsize != uio_resid(auio)) {
3632 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3633 "%lld\n", xfsize, (long long)uio_resid(auio));
3634 }
3635
3636 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3637 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3638 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3639 error = fo_read(fp, auio, FOF_OFFSET, &context);
3640 socket_lock(so, 0);
3641 if (error != 0) {
3642 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3643 error == EINTR || error == EWOULDBLOCK)) {
3644 error = 0;
3645 } else {
3646 mbuf_freem(m0);
3647 goto done3;
3648 }
3649 }
3650 xfsize -= uio_resid(auio);
3651 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3652 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3653 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3654
3655 if (xfsize == 0) {
3656 // printf("sendfile: fo_read 0 bytes, EOF\n");
3657 break;
3658 }
3659 if (xfsize + off > file_size) {
3660 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3661 "%lld\n", xfsize, off, file_size);
3662 }
3663 for (i = 0, m = m0, rlen = 0;
3664 i < nbufs && m != NULL && rlen < xfsize;
3665 i++, m = mbuf_next(m)) {
3666 size_t mlen = mbuf_maxlen(m);
3667
3668 if (rlen + mlen > (size_t)xfsize) {
3669 mlen = xfsize - rlen;
3670 }
3671 mbuf_setlen(m, mlen);
3672
3673 rlen += mlen;
3674 }
3675 mbuf_pkthdr_setlen(m0, xfsize);
3676
3677 retry_space:
3678 /*
3679 * Make sure that the socket is still able to take more data.
3680 * CANTSENDMORE being true usually means that the connection
3681 * was closed. so_error is true when an error was sensed after
3682 * a previous send.
3683 * The state is checked after the page mapping and buffer
3684 * allocation above since those operations may block and make
3685 * any socket checks stale. From this point forward, nothing
3686 * blocks before the pru_send (or more accurately, any blocking
3687 * results in a loop back to here to re-check).
3688 */
3689 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3690 if (so->so_state & SS_CANTSENDMORE) {
3691 error = EPIPE;
3692 } else {
3693 error = so->so_error;
3694 so->so_error = 0;
3695 }
3696 m_freem(m0);
3697 goto done3;
3698 }
3699 /*
3700 * Wait for socket space to become available. We do this just
3701 * after checking the connection state above in order to avoid
3702 * a race condition with sbwait().
3703 */
3704 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3705 if (so->so_state & SS_NBIO) {
3706 m_freem(m0);
3707 error = EAGAIN;
3708 goto done3;
3709 }
3710 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3711 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3712 error = sbwait(&so->so_snd);
3713 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3714 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3715 /*
3716 * An error from sbwait usually indicates that we've
3717 * been interrupted by a signal. If we've sent anything
3718 * then return bytes sent, otherwise return the error.
3719 */
3720 if (error) {
3721 m_freem(m0);
3722 goto done3;
3723 }
3724 goto retry_space;
3725 }
3726
3727 struct mbuf *control = NULL;
3728 {
3729 /*
3730 * Socket filter processing
3731 */
3732
3733 error = sflt_data_out(so, NULL, &m0, &control, 0);
3734 if (error) {
3735 if (error == EJUSTRETURN) {
3736 error = 0;
3737 continue;
3738 }
3739 goto done3;
3740 }
3741 /*
3742 * End Socket filter processing
3743 */
3744 }
3745 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3746 uap->s, 0, 0, 0, 0);
3747 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3748 0, control, p);
3749 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3750 uap->s, 0, 0, 0, 0);
3751 if (error) {
3752 goto done3;
3753 }
3754 }
3755 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3756 /*
3757 * Send trailers. Wimp out and use writev(2).
3758 */
3759 if (uap->hdtr != USER_ADDR_NULL &&
3760 user_hdtr.trailers != USER_ADDR_NULL) {
3761 bzero(&nuap, sizeof(struct writev_args));
3762 nuap.fd = uap->s;
3763 nuap.iovp = user_hdtr.trailers;
3764 nuap.iovcnt = user_hdtr.trl_cnt;
3765 error = writev_nocancel(p, &nuap, &writev_retval);
3766 if (error) {
3767 goto done2;
3768 }
3769 sbytes += writev_retval;
3770 }
3771 done2:
3772 file_drop(uap->s);
3773 done1:
3774 file_drop(uap->fd);
3775 done:
3776 if (uap->nbytes != USER_ADDR_NULL) {
3777 /* XXX this appears bogus for some early failure conditions */
3778 copyout(&sbytes, uap->nbytes, sizeof(off_t));
3779 }
3780 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3781 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3782 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3783 return error;
3784 done3:
3785 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3786 goto done2;
3787 }
3788
3789
3790 #endif /* SENDFILE */