]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
45e0f674ea53ac4db436b97743f92207baa8d12a
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
80 #include <sys/mbuf.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
91 #include <sys/priv.h>
92 #include <sys/sysctl.h>
93 #include <sys/sys_domain.h>
94
95 #include <security/audit/audit.h>
96
97 #include <sys/kdebug.h>
98 #include <sys/sysproto.h>
99 #include <netinet/in.h>
100 #include <net/route.h>
101 #include <netinet/in_pcb.h>
102
103 #include <os/ptrtools.h>
104
105 #if CONFIG_MACF_SOCKET_SUBSET
106 #include <security/mac_framework.h>
107 #endif /* MAC_SOCKET_SUBSET */
108
109 #define f_flag fp_glob->fg_flag
110 #define f_ops fp_glob->fg_ops
111 #define f_data fp_glob->fg_data
112
113 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
114 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
115 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
116 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
117 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
118 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
119 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
120 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
121 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
122 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
123 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
124 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
125 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
126 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
127 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
128 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
129
130 #if DEBUG || DEVELOPMENT
131 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
132 #define DBG_PRINTF(...) printf(__VA_ARGS__)
133 #else
134 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
135 #define DBG_PRINTF(...) do { } while (0)
136 #endif
137
138 static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
139 int, int32_t *);
140 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
141 int32_t *);
142 static int connectit(struct socket *, struct sockaddr *);
143 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
144 size_t, boolean_t);
145 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
146 user_addr_t, size_t, boolean_t);
147 #if SENDFILE
148 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
149 boolean_t);
150 #endif /* SENDFILE */
151 static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
152 static int connectitx(struct socket *, struct sockaddr *,
153 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
154 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
155 static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
156 int *);
157 static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
158
159 static int internalize_user_msghdr_array(const void *, int, int, u_int,
160 struct user_msghdr_x *, struct uio **);
161 static u_int externalize_user_msghdr_array(void *, int, int, u_int,
162 const struct user_msghdr_x *, struct uio **);
163
164 static void free_uio_array(struct uio **, u_int);
165 static boolean_t uio_array_is_valid(struct uio **, u_int);
166 static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
167 static int internalize_recv_msghdr_array(const void *, int, int,
168 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
169 static u_int externalize_recv_msghdr_array(struct proc *, struct socket *, void *, u_int,
170 struct user_msghdr_x *, struct recv_msg_elem *, int *);
171 static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
172 static void free_recv_msg_array(struct recv_msg_elem *, u_int);
173
174 SYSCTL_DECL(_kern_ipc);
175
176 static u_int somaxsendmsgx = 100;
177 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
178 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
179 static u_int somaxrecvmsgx = 100;
180 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
181 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
182
183 /*
184 * System call interface to the socket abstraction.
185 */
186
187 extern const struct fileops socketops;
188
189 /*
190 * Returns: 0 Success
191 * EACCES Mandatory Access Control failure
192 * falloc:ENFILE
193 * falloc:EMFILE
194 * falloc:ENOMEM
195 * socreate:EAFNOSUPPORT
196 * socreate:EPROTOTYPE
197 * socreate:EPROTONOSUPPORT
198 * socreate:ENOBUFS
199 * socreate:ENOMEM
200 * socreate:??? [other protocol families, IPSEC]
201 */
202 int
203 socket(struct proc *p,
204 struct socket_args *uap,
205 int32_t *retval)
206 {
207 return socket_common(p, uap->domain, uap->type, uap->protocol,
208 proc_selfpid(), retval, 0);
209 }
210
211 int
212 socket_delegate(struct proc *p,
213 struct socket_delegate_args *uap,
214 int32_t *retval)
215 {
216 return socket_common(p, uap->domain, uap->type, uap->protocol,
217 uap->epid, retval, 1);
218 }
219
220 static int
221 socket_common(struct proc *p,
222 int domain,
223 int type,
224 int protocol,
225 pid_t epid,
226 int32_t *retval,
227 int delegate)
228 {
229 struct socket *so;
230 struct fileproc *fp;
231 int fd, error;
232
233 AUDIT_ARG(socket, domain, type, protocol);
234 #if CONFIG_MACF_SOCKET_SUBSET
235 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
236 type, protocol)) != 0) {
237 return error;
238 }
239 #endif /* MAC_SOCKET_SUBSET */
240
241 if (delegate) {
242 error = priv_check_cred(kauth_cred_get(),
243 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
244 if (error) {
245 return EACCES;
246 }
247 }
248
249 error = falloc(p, &fp, &fd, vfs_context_current());
250 if (error) {
251 return error;
252 }
253 fp->f_flag = FREAD | FWRITE;
254 fp->f_ops = &socketops;
255
256 if (delegate) {
257 error = socreate_delegate(domain, &so, type, protocol, epid);
258 } else {
259 error = socreate(domain, &so, type, protocol);
260 }
261
262 if (error) {
263 fp_free(p, fd, fp);
264 } else {
265 fp->f_data = (caddr_t)so;
266
267 proc_fdlock(p);
268 procfdtbl_releasefd(p, fd, NULL);
269
270 fp_drop(p, fd, fp, 1);
271 proc_fdunlock(p);
272
273 *retval = fd;
274 if (ENTR_SHOULDTRACE) {
275 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
276 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
277 }
278 }
279 return error;
280 }
281
282 /*
283 * Returns: 0 Success
284 * EDESTADDRREQ Destination address required
285 * EBADF Bad file descriptor
286 * EACCES Mandatory Access Control failure
287 * file_socket:ENOTSOCK
288 * file_socket:EBADF
289 * getsockaddr:ENAMETOOLONG Filename too long
290 * getsockaddr:EINVAL Invalid argument
291 * getsockaddr:ENOMEM Not enough space
292 * getsockaddr:EFAULT Bad address
293 * sobindlock:???
294 */
295 /* ARGSUSED */
296 int
297 bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
298 {
299 struct sockaddr_storage ss;
300 struct sockaddr *sa = NULL;
301 struct socket *so;
302 boolean_t want_free = TRUE;
303 int error;
304
305 AUDIT_ARG(fd, uap->s);
306 error = file_socket(uap->s, &so);
307 if (error != 0) {
308 return error;
309 }
310 if (so == NULL) {
311 error = EBADF;
312 goto out;
313 }
314 if (uap->name == USER_ADDR_NULL) {
315 error = EDESTADDRREQ;
316 goto out;
317 }
318 if (uap->namelen > sizeof(ss)) {
319 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
320 } else {
321 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
322 if (error == 0) {
323 sa = (struct sockaddr *)&ss;
324 want_free = FALSE;
325 }
326 }
327 if (error != 0) {
328 goto out;
329 }
330 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
331 #if CONFIG_MACF_SOCKET_SUBSET
332 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
333 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
334 error = sobindlock(so, sa, 1); /* will lock socket */
335 }
336 #else
337 error = sobindlock(so, sa, 1); /* will lock socket */
338 #endif /* MAC_SOCKET_SUBSET */
339 if (want_free) {
340 FREE(sa, M_SONAME);
341 }
342 out:
343 file_drop(uap->s);
344 return error;
345 }
346
347 /*
348 * Returns: 0 Success
349 * EBADF
350 * EACCES Mandatory Access Control failure
351 * file_socket:ENOTSOCK
352 * file_socket:EBADF
353 * solisten:EINVAL
354 * solisten:EOPNOTSUPP
355 * solisten:???
356 */
357 int
358 listen(__unused struct proc *p, struct listen_args *uap,
359 __unused int32_t *retval)
360 {
361 int error;
362 struct socket *so;
363
364 AUDIT_ARG(fd, uap->s);
365 error = file_socket(uap->s, &so);
366 if (error) {
367 return error;
368 }
369 if (so != NULL)
370 #if CONFIG_MACF_SOCKET_SUBSET
371 {
372 error = mac_socket_check_listen(kauth_cred_get(), so);
373 if (error == 0) {
374 error = solisten(so, uap->backlog);
375 }
376 }
377 #else
378 { error = solisten(so, uap->backlog);}
379 #endif /* MAC_SOCKET_SUBSET */
380 else {
381 error = EBADF;
382 }
383
384 file_drop(uap->s);
385 return error;
386 }
387
388 /*
389 * Returns: fp_get_ftype:EBADF Bad file descriptor
390 * fp_get_ftype:ENOTSOCK Socket operation on non-socket
391 * :EFAULT Bad address on copyin/copyout
392 * :EBADF Bad file descriptor
393 * :EOPNOTSUPP Operation not supported on socket
394 * :EINVAL Invalid argument
395 * :EWOULDBLOCK Operation would block
396 * :ECONNABORTED Connection aborted
397 * :EINTR Interrupted function
398 * :EACCES Mandatory Access Control failure
399 * falloc:ENFILE Too many files open in system
400 * falloc:EMFILE Too many open files
401 * falloc:ENOMEM Not enough space
402 * 0 Success
403 */
404 int
405 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
406 int32_t *retval)
407 {
408 struct fileproc *fp;
409 struct sockaddr *sa = NULL;
410 socklen_t namelen;
411 int error;
412 struct socket *head, *so = NULL;
413 lck_mtx_t *mutex_held;
414 int fd = uap->s;
415 int newfd;
416 unsigned int fflag;
417 int dosocklock = 0;
418
419 *retval = -1;
420
421 AUDIT_ARG(fd, uap->s);
422
423 if (uap->name) {
424 error = copyin(uap->anamelen, (caddr_t)&namelen,
425 sizeof(socklen_t));
426 if (error) {
427 return error;
428 }
429 }
430 error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
431 if (error) {
432 return error;
433 }
434 head = fp->f_data;
435
436 #if CONFIG_MACF_SOCKET_SUBSET
437 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
438 goto out;
439 }
440 #endif /* MAC_SOCKET_SUBSET */
441
442 socket_lock(head, 1);
443
444 if (head->so_proto->pr_getlock != NULL) {
445 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
446 dosocklock = 1;
447 } else {
448 mutex_held = head->so_proto->pr_domain->dom_mtx;
449 dosocklock = 0;
450 }
451
452 if ((head->so_options & SO_ACCEPTCONN) == 0) {
453 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
454 error = EOPNOTSUPP;
455 } else {
456 /* POSIX: The socket is not accepting connections */
457 error = EINVAL;
458 }
459 socket_unlock(head, 1);
460 goto out;
461 }
462 check_again:
463 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
464 socket_unlock(head, 1);
465 error = EWOULDBLOCK;
466 goto out;
467 }
468 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
469 if (head->so_state & SS_CANTRCVMORE) {
470 head->so_error = ECONNABORTED;
471 break;
472 }
473 if (head->so_usecount < 1) {
474 panic("accept: head=%p refcount=%d\n", head,
475 head->so_usecount);
476 }
477 error = msleep((caddr_t)&head->so_timeo, mutex_held,
478 PSOCK | PCATCH, "accept", 0);
479 if (head->so_usecount < 1) {
480 panic("accept: 2 head=%p refcount=%d\n", head,
481 head->so_usecount);
482 }
483 if ((head->so_state & SS_DRAINING)) {
484 error = ECONNABORTED;
485 }
486 if (error) {
487 socket_unlock(head, 1);
488 goto out;
489 }
490 }
491 if (head->so_error) {
492 error = head->so_error;
493 head->so_error = 0;
494 socket_unlock(head, 1);
495 goto out;
496 }
497
498 /*
499 * At this point we know that there is at least one connection
500 * ready to be accepted. Remove it from the queue prior to
501 * allocating the file descriptor for it since falloc() may
502 * block allowing another process to accept the connection
503 * instead.
504 */
505 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
506
507 so_acquire_accept_list(head, NULL);
508 if (TAILQ_EMPTY(&head->so_comp)) {
509 so_release_accept_list(head);
510 goto check_again;
511 }
512
513 so = TAILQ_FIRST(&head->so_comp);
514 TAILQ_REMOVE(&head->so_comp, so, so_list);
515 so->so_head = NULL;
516 so->so_state &= ~SS_COMP;
517 head->so_qlen--;
518 so_release_accept_list(head);
519
520 /* unlock head to avoid deadlock with select, keep a ref on head */
521 socket_unlock(head, 0);
522
523 #if CONFIG_MACF_SOCKET_SUBSET
524 /*
525 * Pass the pre-accepted socket to the MAC framework. This is
526 * cheaper than allocating a file descriptor for the socket,
527 * calling the protocol accept callback, and possibly freeing
528 * the file descriptor should the MAC check fails.
529 */
530 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
531 socket_lock(so, 1);
532 so->so_state &= ~SS_NOFDREF;
533 socket_unlock(so, 1);
534 soclose(so);
535 /* Drop reference on listening socket */
536 sodereference(head);
537 goto out;
538 }
539 #endif /* MAC_SOCKET_SUBSET */
540
541 /*
542 * Pass the pre-accepted socket to any interested socket filter(s).
543 * Upon failure, the socket would have been closed by the callee.
544 */
545 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
546 /* Drop reference on listening socket */
547 sodereference(head);
548 /* Propagate socket filter's error code to the caller */
549 goto out;
550 }
551
552 fflag = fp->f_flag;
553 error = falloc(p, &fp, &newfd, vfs_context_current());
554 if (error) {
555 /*
556 * Probably ran out of file descriptors.
557 *
558 * <rdar://problem/8554930>
559 * Don't put this back on the socket like we used to, that
560 * just causes the client to spin. Drop the socket.
561 */
562 socket_lock(so, 1);
563 so->so_state &= ~SS_NOFDREF;
564 socket_unlock(so, 1);
565 soclose(so);
566 sodereference(head);
567 goto out;
568 }
569 *retval = newfd;
570 fp->f_flag = fflag;
571 fp->f_ops = &socketops;
572 fp->f_data = (caddr_t)so;
573
574 socket_lock(head, 0);
575 if (dosocklock) {
576 socket_lock(so, 1);
577 }
578
579 /* Sync socket non-blocking/async state with file flags */
580 if (fp->f_flag & FNONBLOCK) {
581 so->so_state |= SS_NBIO;
582 } else {
583 so->so_state &= ~SS_NBIO;
584 }
585
586 if (fp->f_flag & FASYNC) {
587 so->so_state |= SS_ASYNC;
588 so->so_rcv.sb_flags |= SB_ASYNC;
589 so->so_snd.sb_flags |= SB_ASYNC;
590 } else {
591 so->so_state &= ~SS_ASYNC;
592 so->so_rcv.sb_flags &= ~SB_ASYNC;
593 so->so_snd.sb_flags &= ~SB_ASYNC;
594 }
595
596 (void) soacceptlock(so, &sa, 0);
597 socket_unlock(head, 1);
598 if (sa == NULL) {
599 namelen = 0;
600 if (uap->name) {
601 goto gotnoname;
602 }
603 error = 0;
604 goto releasefd;
605 }
606 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
607
608 if (uap->name) {
609 socklen_t sa_len;
610
611 /* save sa_len before it is destroyed */
612 sa_len = sa->sa_len;
613 namelen = MIN(namelen, sa_len);
614 error = copyout(sa, uap->name, namelen);
615 if (!error) {
616 /* return the actual, untruncated address length */
617 namelen = sa_len;
618 }
619 gotnoname:
620 error = copyout((caddr_t)&namelen, uap->anamelen,
621 sizeof(socklen_t));
622 }
623 FREE(sa, M_SONAME);
624
625 releasefd:
626 /*
627 * If the socket has been marked as inactive by sosetdefunct(),
628 * disallow further operations on it.
629 */
630 if (so->so_flags & SOF_DEFUNCT) {
631 sodefunct(current_proc(), so,
632 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
633 }
634
635 if (dosocklock) {
636 socket_unlock(so, 1);
637 }
638
639 proc_fdlock(p);
640 procfdtbl_releasefd(p, newfd, NULL);
641 fp_drop(p, newfd, fp, 1);
642 proc_fdunlock(p);
643
644 out:
645 file_drop(fd);
646
647 if (error == 0 && ENTR_SHOULDTRACE) {
648 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
649 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
650 }
651 return error;
652 }
653
654 int
655 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
656 {
657 __pthread_testcancel(1);
658 return accept_nocancel(p, (struct accept_nocancel_args *)uap,
659 retval);
660 }
661
662 /*
663 * Returns: 0 Success
664 * EBADF Bad file descriptor
665 * EALREADY Connection already in progress
666 * EINPROGRESS Operation in progress
667 * ECONNABORTED Connection aborted
668 * EINTR Interrupted function
669 * EACCES Mandatory Access Control failure
670 * file_socket:ENOTSOCK
671 * file_socket:EBADF
672 * getsockaddr:ENAMETOOLONG Filename too long
673 * getsockaddr:EINVAL Invalid argument
674 * getsockaddr:ENOMEM Not enough space
675 * getsockaddr:EFAULT Bad address
676 * soconnectlock:EOPNOTSUPP
677 * soconnectlock:EISCONN
678 * soconnectlock:??? [depends on protocol, filters]
679 * msleep:EINTR
680 *
681 * Imputed: so_error error may be set from so_error, which
682 * may have been set by soconnectlock.
683 */
684 /* ARGSUSED */
685 int
686 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
687 {
688 __pthread_testcancel(1);
689 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
690 retval);
691 }
692
693 int
694 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
695 {
696 #pragma unused(p, retval)
697 struct socket *so;
698 struct sockaddr_storage ss;
699 struct sockaddr *sa = NULL;
700 int error;
701 int fd = uap->s;
702 boolean_t dgram;
703
704 AUDIT_ARG(fd, uap->s);
705 error = file_socket(fd, &so);
706 if (error != 0) {
707 return error;
708 }
709 if (so == NULL) {
710 error = EBADF;
711 goto out;
712 }
713
714 /*
715 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
716 * if this is a datagram socket; translate for other types.
717 */
718 dgram = (so->so_type == SOCK_DGRAM);
719
720 /* Get socket address now before we obtain socket lock */
721 if (uap->namelen > sizeof(ss)) {
722 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
723 } else {
724 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
725 if (error == 0) {
726 sa = (struct sockaddr *)&ss;
727 }
728 }
729 if (error != 0) {
730 goto out;
731 }
732
733 error = connectit(so, sa);
734
735 if (sa != NULL && sa != SA(&ss)) {
736 FREE(sa, M_SONAME);
737 }
738 if (error == ERESTART) {
739 error = EINTR;
740 }
741 out:
742 file_drop(fd);
743 return error;
744 }
745
746 static int
747 connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
748 {
749 #pragma unused(p, retval)
750 struct sockaddr_storage ss, sd;
751 struct sockaddr *src = NULL, *dst = NULL;
752 struct socket *so;
753 int error, error1, fd = uap->socket;
754 boolean_t dgram;
755 sae_connid_t cid = SAE_CONNID_ANY;
756 struct user32_sa_endpoints ep32;
757 struct user64_sa_endpoints ep64;
758 struct user_sa_endpoints ep;
759 user_ssize_t bytes_written = 0;
760 struct user_iovec *iovp;
761 uio_t auio = NULL;
762
763 AUDIT_ARG(fd, uap->socket);
764 error = file_socket(fd, &so);
765 if (error != 0) {
766 return error;
767 }
768 if (so == NULL) {
769 error = EBADF;
770 goto out;
771 }
772
773 if (uap->endpoints == USER_ADDR_NULL) {
774 error = EINVAL;
775 goto out;
776 }
777
778 if (IS_64BIT_PROCESS(p)) {
779 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
780 if (error != 0) {
781 goto out;
782 }
783
784 ep.sae_srcif = ep64.sae_srcif;
785 ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
786 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
787 ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
788 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
789 } else {
790 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
791 if (error != 0) {
792 goto out;
793 }
794
795 ep.sae_srcif = ep32.sae_srcif;
796 ep.sae_srcaddr = ep32.sae_srcaddr;
797 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
798 ep.sae_dstaddr = ep32.sae_dstaddr;
799 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
800 }
801
802 /*
803 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
804 * if this is a datagram socket; translate for other types.
805 */
806 dgram = (so->so_type == SOCK_DGRAM);
807
808 /* Get socket address now before we obtain socket lock */
809 if (ep.sae_srcaddr != USER_ADDR_NULL) {
810 if (ep.sae_srcaddrlen > sizeof(ss)) {
811 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
812 } else {
813 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
814 if (error == 0) {
815 src = (struct sockaddr *)&ss;
816 }
817 }
818
819 if (error) {
820 goto out;
821 }
822 }
823
824 if (ep.sae_dstaddr == USER_ADDR_NULL) {
825 error = EINVAL;
826 goto out;
827 }
828
829 /* Get socket address now before we obtain socket lock */
830 if (ep.sae_dstaddrlen > sizeof(sd)) {
831 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
832 } else {
833 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
834 if (error == 0) {
835 dst = (struct sockaddr *)&sd;
836 }
837 }
838
839 if (error) {
840 goto out;
841 }
842
843 VERIFY(dst != NULL);
844
845 if (uap->iov != USER_ADDR_NULL) {
846 /* Verify range before calling uio_create() */
847 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
848 error = EINVAL;
849 goto out;
850 }
851
852 if (uap->len == USER_ADDR_NULL) {
853 error = EINVAL;
854 goto out;
855 }
856
857 /* allocate a uio to hold the number of iovecs passed */
858 auio = uio_create(uap->iovcnt, 0,
859 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
860 UIO_WRITE);
861
862 if (auio == NULL) {
863 error = ENOMEM;
864 goto out;
865 }
866
867 /*
868 * get location of iovecs within the uio.
869 * then copyin the iovecs from user space.
870 */
871 iovp = uio_iovsaddr(auio);
872 if (iovp == NULL) {
873 error = ENOMEM;
874 goto out;
875 }
876 error = copyin_user_iovec_array(uap->iov,
877 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
878 uap->iovcnt, iovp);
879 if (error != 0) {
880 goto out;
881 }
882
883 /* finish setup of uio_t */
884 error = uio_calculateresid(auio);
885 if (error != 0) {
886 goto out;
887 }
888 }
889
890 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
891 &cid, auio, uap->flags, &bytes_written);
892 if (error == ERESTART) {
893 error = EINTR;
894 }
895
896 if (uap->len != USER_ADDR_NULL) {
897 error1 = copyout(&bytes_written, uap->len, sizeof(uap->len));
898 /* give precedence to connectitx errors */
899 if ((error1 != 0) && (error == 0)) {
900 error = error1;
901 }
902 }
903
904 if (uap->connid != USER_ADDR_NULL) {
905 error1 = copyout(&cid, uap->connid, sizeof(cid));
906 /* give precedence to connectitx errors */
907 if ((error1 != 0) && (error == 0)) {
908 error = error1;
909 }
910 }
911 out:
912 file_drop(fd);
913 if (auio != NULL) {
914 uio_free(auio);
915 }
916 if (src != NULL && src != SA(&ss)) {
917 FREE(src, M_SONAME);
918 }
919 if (dst != NULL && dst != SA(&sd)) {
920 FREE(dst, M_SONAME);
921 }
922 return error;
923 }
924
925 int
926 connectx(struct proc *p, struct connectx_args *uap, int *retval)
927 {
928 /*
929 * Due to similiarity with a POSIX interface, define as
930 * an unofficial cancellation point.
931 */
932 __pthread_testcancel(1);
933 return connectx_nocancel(p, uap, retval);
934 }
935
936 static int
937 connectit(struct socket *so, struct sockaddr *sa)
938 {
939 int error;
940
941 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
942 #if CONFIG_MACF_SOCKET_SUBSET
943 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
944 return error;
945 }
946 #endif /* MAC_SOCKET_SUBSET */
947
948 socket_lock(so, 1);
949 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
950 error = EALREADY;
951 goto out;
952 }
953 error = soconnectlock(so, sa, 0);
954 if (error != 0) {
955 goto out;
956 }
957 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
958 error = EINPROGRESS;
959 goto out;
960 }
961 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
962 lck_mtx_t *mutex_held;
963
964 if (so->so_proto->pr_getlock != NULL) {
965 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
966 } else {
967 mutex_held = so->so_proto->pr_domain->dom_mtx;
968 }
969 error = msleep((caddr_t)&so->so_timeo, mutex_held,
970 PSOCK | PCATCH, __func__, 0);
971 if (so->so_state & SS_DRAINING) {
972 error = ECONNABORTED;
973 }
974 if (error != 0) {
975 break;
976 }
977 }
978 if (error == 0) {
979 error = so->so_error;
980 so->so_error = 0;
981 }
982 out:
983 socket_unlock(so, 1);
984 return error;
985 }
986
987 static int
988 connectitx(struct socket *so, struct sockaddr *src,
989 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
990 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
991 user_ssize_t *bytes_written)
992 {
993 int error;
994
995 VERIFY(dst != NULL);
996
997 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
998 #if CONFIG_MACF_SOCKET_SUBSET
999 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1000 return error;
1001 }
1002
1003 if (auio != NULL) {
1004 if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1005 return error;
1006 }
1007 }
1008 #endif /* MAC_SOCKET_SUBSET */
1009
1010 socket_lock(so, 1);
1011 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1012 error = EALREADY;
1013 goto out;
1014 }
1015
1016 error = soconnectxlocked(so, src, dst, p, ifscope,
1017 aid, pcid, flags, NULL, 0, auio, bytes_written);
1018 if (error != 0) {
1019 goto out;
1020 }
1021 /*
1022 * If, after the call to soconnectxlocked the flag is still set (in case
1023 * data has been queued and the connect() has actually been triggered,
1024 * it will have been unset by the transport), we exit immediately. There
1025 * is no reason to wait on any event.
1026 */
1027 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1028 error = 0;
1029 goto out;
1030 }
1031 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1032 error = EINPROGRESS;
1033 goto out;
1034 }
1035 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1036 lck_mtx_t *mutex_held;
1037
1038 if (so->so_proto->pr_getlock != NULL) {
1039 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1040 } else {
1041 mutex_held = so->so_proto->pr_domain->dom_mtx;
1042 }
1043 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1044 PSOCK | PCATCH, __func__, 0);
1045 if (so->so_state & SS_DRAINING) {
1046 error = ECONNABORTED;
1047 }
1048 if (error != 0) {
1049 break;
1050 }
1051 }
1052 if (error == 0) {
1053 error = so->so_error;
1054 so->so_error = 0;
1055 }
1056 out:
1057 socket_unlock(so, 1);
1058 return error;
1059 }
1060
1061 int
1062 peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1063 {
1064 #pragma unused(p, uap, retval)
1065 /*
1066 * Due to similiarity with a POSIX interface, define as
1067 * an unofficial cancellation point.
1068 */
1069 __pthread_testcancel(1);
1070 return 0;
1071 }
1072
1073 int
1074 disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1075 {
1076 /*
1077 * Due to similiarity with a POSIX interface, define as
1078 * an unofficial cancellation point.
1079 */
1080 __pthread_testcancel(1);
1081 return disconnectx_nocancel(p, uap, retval);
1082 }
1083
1084 static int
1085 disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1086 {
1087 #pragma unused(p, retval)
1088 struct socket *so;
1089 int fd = uap->s;
1090 int error;
1091
1092 error = file_socket(fd, &so);
1093 if (error != 0) {
1094 return error;
1095 }
1096 if (so == NULL) {
1097 error = EBADF;
1098 goto out;
1099 }
1100
1101 error = sodisconnectx(so, uap->aid, uap->cid);
1102 out:
1103 file_drop(fd);
1104 return error;
1105 }
1106
1107 /*
1108 * Returns: 0 Success
1109 * socreate:EAFNOSUPPORT
1110 * socreate:EPROTOTYPE
1111 * socreate:EPROTONOSUPPORT
1112 * socreate:ENOBUFS
1113 * socreate:ENOMEM
1114 * socreate:EISCONN
1115 * socreate:??? [other protocol families, IPSEC]
1116 * falloc:ENFILE
1117 * falloc:EMFILE
1118 * falloc:ENOMEM
1119 * copyout:EFAULT
1120 * soconnect2:EINVAL
1121 * soconnect2:EPROTOTYPE
1122 * soconnect2:??? [other protocol families[
1123 */
1124 int
1125 socketpair(struct proc *p, struct socketpair_args *uap,
1126 __unused int32_t *retval)
1127 {
1128 struct fileproc *fp1, *fp2;
1129 struct socket *so1, *so2;
1130 int fd, error, sv[2];
1131
1132 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1133 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1134 if (error) {
1135 return error;
1136 }
1137 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1138 if (error) {
1139 goto free1;
1140 }
1141
1142 error = falloc(p, &fp1, &fd, vfs_context_current());
1143 if (error) {
1144 goto free2;
1145 }
1146 fp1->f_flag = FREAD | FWRITE;
1147 fp1->f_ops = &socketops;
1148 fp1->f_data = (caddr_t)so1;
1149 sv[0] = fd;
1150
1151 error = falloc(p, &fp2, &fd, vfs_context_current());
1152 if (error) {
1153 goto free3;
1154 }
1155 fp2->f_flag = FREAD | FWRITE;
1156 fp2->f_ops = &socketops;
1157 fp2->f_data = (caddr_t)so2;
1158 sv[1] = fd;
1159
1160 error = soconnect2(so1, so2);
1161 if (error) {
1162 goto free4;
1163 }
1164 if (uap->type == SOCK_DGRAM) {
1165 /*
1166 * Datagram socket connection is asymmetric.
1167 */
1168 error = soconnect2(so2, so1);
1169 if (error) {
1170 goto free4;
1171 }
1172 }
1173
1174 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1175 goto free4;
1176 }
1177
1178 proc_fdlock(p);
1179 procfdtbl_releasefd(p, sv[0], NULL);
1180 procfdtbl_releasefd(p, sv[1], NULL);
1181 fp_drop(p, sv[0], fp1, 1);
1182 fp_drop(p, sv[1], fp2, 1);
1183 proc_fdunlock(p);
1184
1185 return 0;
1186 free4:
1187 fp_free(p, sv[1], fp2);
1188 free3:
1189 fp_free(p, sv[0], fp1);
1190 free2:
1191 (void) soclose(so2);
1192 free1:
1193 (void) soclose(so1);
1194 return error;
1195 }
1196
1197 /*
1198 * Returns: 0 Success
1199 * EINVAL
1200 * ENOBUFS
1201 * EBADF
1202 * EPIPE
1203 * EACCES Mandatory Access Control failure
1204 * file_socket:ENOTSOCK
1205 * file_socket:EBADF
1206 * getsockaddr:ENAMETOOLONG Filename too long
1207 * getsockaddr:EINVAL Invalid argument
1208 * getsockaddr:ENOMEM Not enough space
1209 * getsockaddr:EFAULT Bad address
1210 * <pru_sosend>:EACCES[TCP]
1211 * <pru_sosend>:EADDRINUSE[TCP]
1212 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1213 * <pru_sosend>:EAFNOSUPPORT[TCP]
1214 * <pru_sosend>:EAGAIN[TCP]
1215 * <pru_sosend>:EBADF
1216 * <pru_sosend>:ECONNRESET[TCP]
1217 * <pru_sosend>:EFAULT
1218 * <pru_sosend>:EHOSTUNREACH[TCP]
1219 * <pru_sosend>:EINTR
1220 * <pru_sosend>:EINVAL
1221 * <pru_sosend>:EISCONN[AF_INET]
1222 * <pru_sosend>:EMSGSIZE[TCP]
1223 * <pru_sosend>:ENETDOWN[TCP]
1224 * <pru_sosend>:ENETUNREACH[TCP]
1225 * <pru_sosend>:ENOBUFS
1226 * <pru_sosend>:ENOMEM[TCP]
1227 * <pru_sosend>:ENOTCONN[AF_INET]
1228 * <pru_sosend>:EOPNOTSUPP
1229 * <pru_sosend>:EPERM[TCP]
1230 * <pru_sosend>:EPIPE
1231 * <pru_sosend>:EWOULDBLOCK
1232 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1233 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1234 * <pru_sosend>:??? [value from so_error]
1235 * sockargs:???
1236 */
1237 static int
1238 sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
1239 int flags, int32_t *retval)
1240 {
1241 struct mbuf *control = NULL;
1242 struct sockaddr_storage ss;
1243 struct sockaddr *to = NULL;
1244 boolean_t want_free = TRUE;
1245 int error;
1246 user_ssize_t len;
1247
1248 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1249
1250 if (mp->msg_name != USER_ADDR_NULL) {
1251 if (mp->msg_namelen > sizeof(ss)) {
1252 error = getsockaddr(so, &to, mp->msg_name,
1253 mp->msg_namelen, TRUE);
1254 } else {
1255 error = getsockaddr_s(so, &ss, mp->msg_name,
1256 mp->msg_namelen, TRUE);
1257 if (error == 0) {
1258 to = (struct sockaddr *)&ss;
1259 want_free = FALSE;
1260 }
1261 }
1262 if (error != 0) {
1263 goto out;
1264 }
1265 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1266 }
1267 if (mp->msg_control != USER_ADDR_NULL) {
1268 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1269 error = EINVAL;
1270 goto bad;
1271 }
1272 error = sockargs(&control, mp->msg_control,
1273 mp->msg_controllen, MT_CONTROL);
1274 if (error != 0) {
1275 goto bad;
1276 }
1277 }
1278
1279 #if CONFIG_MACF_SOCKET_SUBSET
1280 /*
1281 * We check the state without holding the socket lock;
1282 * if a race condition occurs, it would simply result
1283 * in an extra call to the MAC check function.
1284 */
1285 if (to != NULL &&
1286 !(so->so_state & SS_DEFUNCT) &&
1287 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1288 goto bad;
1289 }
1290 #endif /* MAC_SOCKET_SUBSET */
1291
1292 len = uio_resid(uiop);
1293 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1294 control, flags);
1295 if (error != 0) {
1296 if (uio_resid(uiop) != len && (error == ERESTART ||
1297 error == EINTR || error == EWOULDBLOCK)) {
1298 error = 0;
1299 }
1300 /* Generation of SIGPIPE can be controlled per socket */
1301 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1302 !(flags & MSG_NOSIGNAL)) {
1303 psignal(p, SIGPIPE);
1304 }
1305 }
1306 if (error == 0) {
1307 *retval = (int)(len - uio_resid(uiop));
1308 }
1309 bad:
1310 if (want_free) {
1311 FREE(to, M_SONAME);
1312 }
1313 out:
1314 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1315
1316 return error;
1317 }
1318
1319 /*
1320 * Returns: 0 Success
1321 * ENOMEM
1322 * sendit:??? [see sendit definition in this file]
1323 * write:??? [4056224: applicable for pipes]
1324 */
1325 int
1326 sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
1327 {
1328 __pthread_testcancel(1);
1329 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1330 }
1331
1332 int
1333 sendto_nocancel(struct proc *p,
1334 struct sendto_nocancel_args *uap,
1335 int32_t *retval)
1336 {
1337 struct user_msghdr msg;
1338 int error;
1339 uio_t auio = NULL;
1340 struct socket *so;
1341
1342 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1343 AUDIT_ARG(fd, uap->s);
1344
1345 if (uap->flags & MSG_SKIPCFIL) {
1346 error = EPERM;
1347 goto done;
1348 }
1349
1350 if (uap->len > LONG_MAX) {
1351 error = EINVAL;
1352 goto done;
1353 }
1354
1355 auio = uio_create(1, 0,
1356 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1357 UIO_WRITE);
1358 if (auio == NULL) {
1359 error = ENOMEM;
1360 goto done;
1361 }
1362 uio_addiov(auio, uap->buf, uap->len);
1363
1364 msg.msg_name = uap->to;
1365 msg.msg_namelen = uap->tolen;
1366 /* no need to set up msg_iov. sendit uses uio_t we send it */
1367 msg.msg_iov = 0;
1368 msg.msg_iovlen = 0;
1369 msg.msg_control = 0;
1370 msg.msg_flags = 0;
1371
1372 error = file_socket(uap->s, &so);
1373 if (error) {
1374 goto done;
1375 }
1376
1377 if (so == NULL) {
1378 error = EBADF;
1379 } else {
1380 error = sendit(p, so, &msg, auio, uap->flags, retval);
1381 }
1382
1383 file_drop(uap->s);
1384 done:
1385 if (auio != NULL) {
1386 uio_free(auio);
1387 }
1388
1389 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1390
1391 return error;
1392 }
1393
1394 /*
1395 * Returns: 0 Success
1396 * ENOBUFS
1397 * copyin:EFAULT
1398 * sendit:??? [see sendit definition in this file]
1399 */
1400 int
1401 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1402 {
1403 __pthread_testcancel(1);
1404 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1405 retval);
1406 }
1407
1408 int
1409 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1410 int32_t *retval)
1411 {
1412 struct user32_msghdr msg32;
1413 struct user64_msghdr msg64;
1414 struct user_msghdr user_msg;
1415 caddr_t msghdrp;
1416 int size_of_msghdr;
1417 int error;
1418 uio_t auio = NULL;
1419 struct user_iovec *iovp;
1420 struct socket *so;
1421
1422 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1423 AUDIT_ARG(fd, uap->s);
1424
1425 if (uap->flags & MSG_SKIPCFIL) {
1426 error = EPERM;
1427 goto done;
1428 }
1429
1430 if (IS_64BIT_PROCESS(p)) {
1431 msghdrp = (caddr_t)&msg64;
1432 size_of_msghdr = sizeof(msg64);
1433 } else {
1434 msghdrp = (caddr_t)&msg32;
1435 size_of_msghdr = sizeof(msg32);
1436 }
1437 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1438 if (error) {
1439 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1440 return error;
1441 }
1442
1443 if (IS_64BIT_PROCESS(p)) {
1444 user_msg.msg_flags = msg64.msg_flags;
1445 user_msg.msg_controllen = msg64.msg_controllen;
1446 user_msg.msg_control = (user_addr_t)msg64.msg_control;
1447 user_msg.msg_iovlen = msg64.msg_iovlen;
1448 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
1449 user_msg.msg_namelen = msg64.msg_namelen;
1450 user_msg.msg_name = (user_addr_t)msg64.msg_name;
1451 } else {
1452 user_msg.msg_flags = msg32.msg_flags;
1453 user_msg.msg_controllen = msg32.msg_controllen;
1454 user_msg.msg_control = msg32.msg_control;
1455 user_msg.msg_iovlen = msg32.msg_iovlen;
1456 user_msg.msg_iov = msg32.msg_iov;
1457 user_msg.msg_namelen = msg32.msg_namelen;
1458 user_msg.msg_name = msg32.msg_name;
1459 }
1460
1461 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1462 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1463 0, 0, 0, 0);
1464 return EMSGSIZE;
1465 }
1466
1467 /* allocate a uio large enough to hold the number of iovecs passed */
1468 auio = uio_create(user_msg.msg_iovlen, 0,
1469 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1470 UIO_WRITE);
1471 if (auio == NULL) {
1472 error = ENOBUFS;
1473 goto done;
1474 }
1475
1476 if (user_msg.msg_iovlen) {
1477 /*
1478 * get location of iovecs within the uio.
1479 * then copyin the iovecs from user space.
1480 */
1481 iovp = uio_iovsaddr(auio);
1482 if (iovp == NULL) {
1483 error = ENOBUFS;
1484 goto done;
1485 }
1486 error = copyin_user_iovec_array(user_msg.msg_iov,
1487 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1488 user_msg.msg_iovlen, iovp);
1489 if (error) {
1490 goto done;
1491 }
1492 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1493
1494 /* finish setup of uio_t */
1495 error = uio_calculateresid(auio);
1496 if (error) {
1497 goto done;
1498 }
1499 } else {
1500 user_msg.msg_iov = 0;
1501 }
1502
1503 /* msg_flags is ignored for send */
1504 user_msg.msg_flags = 0;
1505
1506 error = file_socket(uap->s, &so);
1507 if (error) {
1508 goto done;
1509 }
1510 if (so == NULL) {
1511 error = EBADF;
1512 } else {
1513 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1514 }
1515 file_drop(uap->s);
1516 done:
1517 if (auio != NULL) {
1518 uio_free(auio);
1519 }
1520 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1521
1522 return error;
1523 }
1524
1525 int
1526 sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1527 {
1528 int error = 0;
1529 struct user_msghdr_x *user_msg_x = NULL;
1530 struct uio **uiop = NULL;
1531 struct socket *so;
1532 u_int i;
1533 struct sockaddr *to = NULL;
1534 user_ssize_t len_before = 0, len_after;
1535 int need_drop = 0;
1536 size_t size_of_msghdr;
1537 void *umsgp = NULL;
1538 u_int uiocnt;
1539 int has_addr_or_ctl = 0;
1540
1541 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1542
1543 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1544 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1545
1546 if (uap->flags & MSG_SKIPCFIL) {
1547 error = EPERM;
1548 goto out;
1549 }
1550
1551 error = file_socket(uap->s, &so);
1552 if (error) {
1553 goto out;
1554 }
1555 need_drop = 1;
1556 if (so == NULL) {
1557 error = EBADF;
1558 goto out;
1559 }
1560
1561 /*
1562 * Input parameter range check
1563 */
1564 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1565 error = EINVAL;
1566 goto out;
1567 }
1568 /*
1569 * Clip to max currently allowed
1570 */
1571 if (uap->cnt > somaxsendmsgx) {
1572 uap->cnt = somaxsendmsgx;
1573 }
1574
1575 user_msg_x = kheap_alloc(KHEAP_TEMP,
1576 uap->cnt * sizeof(struct user_msghdr_x), Z_WAITOK | Z_ZERO);
1577 if (user_msg_x == NULL) {
1578 DBG_PRINTF("%s kheap_alloc user_msg_x failed\n", __func__);
1579 error = ENOMEM;
1580 goto out;
1581 }
1582 uiop = kheap_alloc(KHEAP_TEMP,
1583 uap->cnt * sizeof(struct uio *), Z_WAITOK | Z_ZERO);
1584 if (uiop == NULL) {
1585 DBG_PRINTF("%s kheap_alloc uiop failed\n", __func__);
1586 error = ENOMEM;
1587 goto out;
1588 }
1589
1590 umsgp = kheap_alloc(KHEAP_TEMP,
1591 uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
1592 if (umsgp == NULL) {
1593 printf("%s kheap_alloc user_msg_x failed\n", __func__);
1594 error = ENOMEM;
1595 goto out;
1596 }
1597 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1598 if (error) {
1599 DBG_PRINTF("%s copyin() failed\n", __func__);
1600 goto out;
1601 }
1602 error = internalize_user_msghdr_array(umsgp,
1603 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1604 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1605 if (error) {
1606 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1607 goto out;
1608 }
1609 /*
1610 * Make sure the size of each message iovec and
1611 * the aggregate size of all the iovec is valid
1612 */
1613 if (uio_array_is_valid(uiop, uap->cnt) == false) {
1614 error = EINVAL;
1615 goto out;
1616 }
1617
1618 /*
1619 * Sanity check on passed arguments
1620 */
1621 for (i = 0; i < uap->cnt; i++) {
1622 struct user_msghdr_x *mp = user_msg_x + i;
1623
1624 /*
1625 * No flags on send message
1626 */
1627 if (mp->msg_flags != 0) {
1628 error = EINVAL;
1629 goto out;
1630 }
1631 /*
1632 * No support for address or ancillary data (yet)
1633 */
1634 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1635 has_addr_or_ctl = 1;
1636 }
1637
1638 if (mp->msg_control != USER_ADDR_NULL ||
1639 mp->msg_controllen != 0) {
1640 has_addr_or_ctl = 1;
1641 }
1642
1643 #if CONFIG_MACF_SOCKET_SUBSET
1644 /*
1645 * We check the state without holding the socket lock;
1646 * if a race condition occurs, it would simply result
1647 * in an extra call to the MAC check function.
1648 *
1649 * Note: The following check is never true taken with the
1650 * current limitation that we do not accept to pass an address,
1651 * this is effectively placeholder code. If we add support for
1652 * addresses, we will have to check every address.
1653 */
1654 if (to != NULL &&
1655 !(so->so_state & SS_DEFUNCT) &&
1656 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1657 != 0) {
1658 goto out;
1659 }
1660 #endif /* MAC_SOCKET_SUBSET */
1661 }
1662
1663 len_before = uio_array_resid(uiop, uap->cnt);
1664
1665 /*
1666 * Feed list of packets at once only for connected socket without
1667 * control message
1668 */
1669 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1670 pru_sosend_list_notsupp &&
1671 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1672 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1673 uap->cnt, uap->flags);
1674 } else {
1675 for (i = 0; i < uap->cnt; i++) {
1676 struct user_msghdr_x *mp = user_msg_x + i;
1677 struct user_msghdr user_msg;
1678 uio_t auio = uiop[i];
1679 int32_t tmpval;
1680
1681 user_msg.msg_flags = mp->msg_flags;
1682 user_msg.msg_controllen = mp->msg_controllen;
1683 user_msg.msg_control = mp->msg_control;
1684 user_msg.msg_iovlen = mp->msg_iovlen;
1685 user_msg.msg_iov = mp->msg_iov;
1686 user_msg.msg_namelen = mp->msg_namelen;
1687 user_msg.msg_name = mp->msg_name;
1688
1689 error = sendit(p, so, &user_msg, auio, uap->flags,
1690 &tmpval);
1691 if (error != 0) {
1692 break;
1693 }
1694 }
1695 }
1696 len_after = uio_array_resid(uiop, uap->cnt);
1697
1698 VERIFY(len_after <= len_before);
1699
1700 if (error != 0) {
1701 if (len_after != len_before && (error == ERESTART ||
1702 error == EINTR || error == EWOULDBLOCK ||
1703 error == ENOBUFS)) {
1704 error = 0;
1705 }
1706 /* Generation of SIGPIPE can be controlled per socket */
1707 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1708 !(uap->flags & MSG_NOSIGNAL)) {
1709 psignal(p, SIGPIPE);
1710 }
1711 }
1712 if (error == 0) {
1713 uiocnt = externalize_user_msghdr_array(umsgp,
1714 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1715 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1716
1717 *retval = (int)(uiocnt);
1718 }
1719 out:
1720 if (need_drop) {
1721 file_drop(uap->s);
1722 }
1723 kheap_free(KHEAP_TEMP, umsgp, uap->cnt * size_of_msghdr);
1724 if (uiop != NULL) {
1725 free_uio_array(uiop, uap->cnt);
1726 kheap_free(KHEAP_TEMP, uiop,
1727 uap->cnt * sizeof(struct uio *));
1728 }
1729 kheap_free(KHEAP_TEMP, user_msg_x,
1730 uap->cnt * sizeof(struct user_msghdr_x));
1731
1732 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1733
1734 return error;
1735 }
1736
1737
1738 static int
1739 copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1740 {
1741 int error = 0;
1742 socklen_t sa_len = 0;
1743 ssize_t len;
1744
1745 len = *namelen;
1746 if (len <= 0 || fromsa == 0) {
1747 len = 0;
1748 } else {
1749 #ifndef MIN
1750 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1751 #endif
1752 sa_len = fromsa->sa_len;
1753 len = MIN((unsigned int)len, sa_len);
1754 error = copyout(fromsa, name, (unsigned)len);
1755 if (error) {
1756 goto out;
1757 }
1758 }
1759 *namelen = sa_len;
1760 out:
1761 return 0;
1762 }
1763
1764 static int
1765 copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1766 socklen_t *controllen, int *flags, struct socket *so)
1767 {
1768 int error = 0;
1769 socklen_t len;
1770 user_addr_t ctlbuf;
1771 struct inpcb *inp = so ? sotoinpcb(so) : NULL;
1772
1773 len = *controllen;
1774 *controllen = 0;
1775 ctlbuf = control;
1776
1777 while (m && len > 0) {
1778 socklen_t tocopy;
1779 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1780 socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
1781 socklen_t buflen = m->m_len;
1782
1783 while (buflen > 0 && len > 0) {
1784 /*
1785 * SCM_TIMESTAMP hack because struct timeval has a
1786 * different size for 32 bits and 64 bits processes
1787 */
1788 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1789 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
1790 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1791 socklen_t tmp_space;
1792 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1793
1794 tmp_cp->cmsg_level = SOL_SOCKET;
1795 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1796
1797 if (proc_is64bit(p)) {
1798 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1799
1800 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1801 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
1802
1803 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1804 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1805 } else {
1806 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1807
1808 tv32->tv_sec = (user32_time_t)tv->tv_sec;
1809 tv32->tv_usec = tv->tv_usec;
1810
1811 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1812 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1813 }
1814 if (len >= tmp_space) {
1815 tocopy = tmp_space;
1816 } else {
1817 *flags |= MSG_CTRUNC;
1818 tocopy = len;
1819 }
1820 error = copyout(tmp_buffer, ctlbuf, tocopy);
1821 if (error) {
1822 goto out;
1823 }
1824 } else {
1825 #if CONTENT_FILTER
1826 /* If socket is attached to Content Filter and socket did not request address, ignore it */
1827 if ((so != NULL) && (so->so_cfil_db != NULL) &&
1828 ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp &&
1829 !(inp->inp_flags & INP_RECVDSTADDR)) ||
1830 (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
1831 !(inp->inp_flags & IN6P_PKTINFO)))) {
1832 tocopy = 0;
1833 } else
1834 #endif
1835 {
1836 if (cp_size > buflen) {
1837 panic("cp_size > buflen, something"
1838 "wrong with alignment!");
1839 }
1840 if (len >= cp_size) {
1841 tocopy = cp_size;
1842 } else {
1843 *flags |= MSG_CTRUNC;
1844 tocopy = len;
1845 }
1846 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1847 if (error) {
1848 goto out;
1849 }
1850 }
1851 }
1852
1853 ctlbuf += tocopy;
1854 len -= tocopy;
1855
1856 buflen -= cp_size;
1857 cp = (struct cmsghdr *)(void *)
1858 ((unsigned char *) cp + cp_size);
1859 cp_size = CMSG_ALIGN(cp->cmsg_len);
1860 }
1861
1862 m = m->m_next;
1863 }
1864 *controllen = (socklen_t)(ctlbuf - control);
1865 out:
1866 return error;
1867 }
1868
1869 /*
1870 * Returns: 0 Success
1871 * ENOTSOCK
1872 * EINVAL
1873 * EBADF
1874 * EACCES Mandatory Access Control failure
1875 * copyout:EFAULT
1876 * fp_lookup:EBADF
1877 * <pru_soreceive>:ENOBUFS
1878 * <pru_soreceive>:ENOTCONN
1879 * <pru_soreceive>:EWOULDBLOCK
1880 * <pru_soreceive>:EFAULT
1881 * <pru_soreceive>:EINTR
1882 * <pru_soreceive>:EBADF
1883 * <pru_soreceive>:EINVAL
1884 * <pru_soreceive>:EMSGSIZE
1885 * <pru_soreceive>:???
1886 *
1887 * Notes: Additional return values from calls through <pru_soreceive>
1888 * depend on protocols other than TCP or AF_UNIX, which are
1889 * documented above.
1890 */
1891 static int
1892 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1893 user_addr_t namelenp, int32_t *retval)
1894 {
1895 ssize_t len;
1896 int error;
1897 struct mbuf *control = 0;
1898 struct socket *so;
1899 struct sockaddr *fromsa = 0;
1900 struct fileproc *fp;
1901
1902 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1903 if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
1904 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1905 return error;
1906 }
1907 so = fp->f_data;
1908
1909 #if CONFIG_MACF_SOCKET_SUBSET
1910 /*
1911 * We check the state without holding the socket lock;
1912 * if a race condition occurs, it would simply result
1913 * in an extra call to the MAC check function.
1914 */
1915 if (!(so->so_state & SS_DEFUNCT) &&
1916 !(so->so_state & SS_ISCONNECTED) &&
1917 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1918 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
1919 goto out1;
1920 }
1921 #endif /* MAC_SOCKET_SUBSET */
1922 if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
1923 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1924 error = EINVAL;
1925 goto out1;
1926 }
1927
1928 len = uio_resid(uiop);
1929 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1930 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1931 &mp->msg_flags);
1932 if (fromsa) {
1933 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1934 fromsa);
1935 }
1936 if (error) {
1937 if (uio_resid(uiop) != len && (error == ERESTART ||
1938 error == EINTR || error == EWOULDBLOCK)) {
1939 error = 0;
1940 }
1941 }
1942 if (error) {
1943 goto out;
1944 }
1945
1946 *retval = (int32_t)(len - uio_resid(uiop));
1947
1948 if (mp->msg_name) {
1949 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1950 if (error) {
1951 goto out;
1952 }
1953 /* return the actual, untruncated address length */
1954 if (namelenp &&
1955 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
1956 sizeof(int)))) {
1957 goto out;
1958 }
1959 }
1960
1961 if (mp->msg_control) {
1962 error = copyout_control(p, control, mp->msg_control,
1963 &mp->msg_controllen, &mp->msg_flags, so);
1964 }
1965 out:
1966 FREE(fromsa, M_SONAME);
1967 if (control) {
1968 m_freem(control);
1969 }
1970 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1971 out1:
1972 fp_drop(p, s, fp, 0);
1973 return error;
1974 }
1975
1976 /*
1977 * Returns: 0 Success
1978 * ENOMEM
1979 * copyin:EFAULT
1980 * recvit:???
1981 * read:??? [4056224: applicable for pipes]
1982 *
1983 * Notes: The read entry point is only called as part of support for
1984 * binary backward compatability; new code should use read
1985 * instead of recv or recvfrom when attempting to read data
1986 * from pipes.
1987 *
1988 * For full documentation of the return codes from recvit, see
1989 * the block header for the recvit function.
1990 */
1991 int
1992 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
1993 {
1994 __pthread_testcancel(1);
1995 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
1996 retval);
1997 }
1998
1999 int
2000 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2001 int32_t *retval)
2002 {
2003 struct user_msghdr msg;
2004 int error;
2005 uio_t auio = NULL;
2006
2007 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2008 AUDIT_ARG(fd, uap->s);
2009
2010 if (uap->fromlenaddr) {
2011 error = copyin(uap->fromlenaddr,
2012 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2013 if (error) {
2014 return error;
2015 }
2016 } else {
2017 msg.msg_namelen = 0;
2018 }
2019 msg.msg_name = uap->from;
2020 auio = uio_create(1, 0,
2021 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2022 UIO_READ);
2023 if (auio == NULL) {
2024 return ENOMEM;
2025 }
2026
2027 uio_addiov(auio, uap->buf, uap->len);
2028 /* no need to set up msg_iov. recvit uses uio_t we send it */
2029 msg.msg_iov = 0;
2030 msg.msg_iovlen = 0;
2031 msg.msg_control = 0;
2032 msg.msg_controllen = 0;
2033 msg.msg_flags = uap->flags;
2034 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2035 if (auio != NULL) {
2036 uio_free(auio);
2037 }
2038
2039 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2040
2041 return error;
2042 }
2043
2044 /*
2045 * Returns: 0 Success
2046 * EMSGSIZE
2047 * ENOMEM
2048 * copyin:EFAULT
2049 * copyout:EFAULT
2050 * recvit:???
2051 *
2052 * Notes: For full documentation of the return codes from recvit, see
2053 * the block header for the recvit function.
2054 */
2055 int
2056 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
2057 {
2058 __pthread_testcancel(1);
2059 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2060 retval);
2061 }
2062
2063 int
2064 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2065 int32_t *retval)
2066 {
2067 struct user32_msghdr msg32;
2068 struct user64_msghdr msg64;
2069 struct user_msghdr user_msg;
2070 caddr_t msghdrp;
2071 int size_of_msghdr;
2072 user_addr_t uiov;
2073 int error;
2074 uio_t auio = NULL;
2075 struct user_iovec *iovp;
2076
2077 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2078 AUDIT_ARG(fd, uap->s);
2079 if (IS_64BIT_PROCESS(p)) {
2080 msghdrp = (caddr_t)&msg64;
2081 size_of_msghdr = sizeof(msg64);
2082 } else {
2083 msghdrp = (caddr_t)&msg32;
2084 size_of_msghdr = sizeof(msg32);
2085 }
2086 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2087 if (error) {
2088 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2089 return error;
2090 }
2091
2092 /* only need to copy if user process is not 64-bit */
2093 if (IS_64BIT_PROCESS(p)) {
2094 user_msg.msg_flags = msg64.msg_flags;
2095 user_msg.msg_controllen = msg64.msg_controllen;
2096 user_msg.msg_control = (user_addr_t)msg64.msg_control;
2097 user_msg.msg_iovlen = msg64.msg_iovlen;
2098 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
2099 user_msg.msg_namelen = msg64.msg_namelen;
2100 user_msg.msg_name = (user_addr_t)msg64.msg_name;
2101 } else {
2102 user_msg.msg_flags = msg32.msg_flags;
2103 user_msg.msg_controllen = msg32.msg_controllen;
2104 user_msg.msg_control = msg32.msg_control;
2105 user_msg.msg_iovlen = msg32.msg_iovlen;
2106 user_msg.msg_iov = msg32.msg_iov;
2107 user_msg.msg_namelen = msg32.msg_namelen;
2108 user_msg.msg_name = msg32.msg_name;
2109 }
2110
2111 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2112 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2113 0, 0, 0, 0);
2114 return EMSGSIZE;
2115 }
2116
2117 user_msg.msg_flags = uap->flags;
2118
2119 /* allocate a uio large enough to hold the number of iovecs passed */
2120 auio = uio_create(user_msg.msg_iovlen, 0,
2121 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2122 UIO_READ);
2123 if (auio == NULL) {
2124 error = ENOMEM;
2125 goto done;
2126 }
2127
2128 /*
2129 * get location of iovecs within the uio. then copyin the iovecs from
2130 * user space.
2131 */
2132 iovp = uio_iovsaddr(auio);
2133 if (iovp == NULL) {
2134 error = ENOMEM;
2135 goto done;
2136 }
2137 uiov = user_msg.msg_iov;
2138 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2139 error = copyin_user_iovec_array(uiov,
2140 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2141 user_msg.msg_iovlen, iovp);
2142 if (error) {
2143 goto done;
2144 }
2145
2146 /* finish setup of uio_t */
2147 error = uio_calculateresid(auio);
2148 if (error) {
2149 goto done;
2150 }
2151
2152 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2153 if (!error) {
2154 user_msg.msg_iov = uiov;
2155 if (IS_64BIT_PROCESS(p)) {
2156 msg64.msg_flags = user_msg.msg_flags;
2157 msg64.msg_controllen = user_msg.msg_controllen;
2158 msg64.msg_control = user_msg.msg_control;
2159 msg64.msg_iovlen = user_msg.msg_iovlen;
2160 msg64.msg_iov = user_msg.msg_iov;
2161 msg64.msg_namelen = user_msg.msg_namelen;
2162 msg64.msg_name = user_msg.msg_name;
2163 } else {
2164 msg32.msg_flags = user_msg.msg_flags;
2165 msg32.msg_controllen = user_msg.msg_controllen;
2166 msg32.msg_control = (user32_addr_t)user_msg.msg_control;
2167 msg32.msg_iovlen = user_msg.msg_iovlen;
2168 msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
2169 msg32.msg_namelen = user_msg.msg_namelen;
2170 msg32.msg_name = (user32_addr_t)user_msg.msg_name;
2171 }
2172 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2173 }
2174 done:
2175 if (auio != NULL) {
2176 uio_free(auio);
2177 }
2178 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2179 return error;
2180 }
2181
2182 int
2183 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2184 {
2185 int error = EOPNOTSUPP;
2186 struct user_msghdr_x *user_msg_x = NULL;
2187 struct recv_msg_elem *recv_msg_array = NULL;
2188 struct socket *so;
2189 user_ssize_t len_before = 0, len_after;
2190 int need_drop = 0;
2191 size_t size_of_msghdr;
2192 void *umsgp = NULL;
2193 u_int i;
2194 u_int uiocnt;
2195
2196 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2197
2198 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2199 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2200
2201 error = file_socket(uap->s, &so);
2202 if (error) {
2203 goto out;
2204 }
2205 need_drop = 1;
2206 if (so == NULL) {
2207 error = EBADF;
2208 goto out;
2209 }
2210 /*
2211 * Support only a subset of message flags
2212 */
2213 if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA | MSG_NBIO)) {
2214 return EOPNOTSUPP;
2215 }
2216 /*
2217 * Input parameter range check
2218 */
2219 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2220 error = EINVAL;
2221 goto out;
2222 }
2223 if (uap->cnt > somaxrecvmsgx) {
2224 uap->cnt = somaxrecvmsgx;
2225 }
2226
2227 user_msg_x = kheap_alloc(KHEAP_TEMP,
2228 uap->cnt * sizeof(struct user_msghdr_x), Z_WAITOK | Z_ZERO);
2229 if (user_msg_x == NULL) {
2230 DBG_PRINTF("%s kheap_alloc user_msg_x failed\n", __func__);
2231 error = ENOMEM;
2232 goto out;
2233 }
2234 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2235 if (recv_msg_array == NULL) {
2236 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2237 error = ENOMEM;
2238 goto out;
2239 }
2240
2241 umsgp = kheap_alloc(KHEAP_TEMP,
2242 uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
2243 if (umsgp == NULL) {
2244 DBG_PRINTF("%s kheap_alloc umsgp failed\n", __func__);
2245 error = ENOMEM;
2246 goto out;
2247 }
2248 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2249 if (error) {
2250 DBG_PRINTF("%s copyin() failed\n", __func__);
2251 goto out;
2252 }
2253 error = internalize_recv_msghdr_array(umsgp,
2254 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2255 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2256 if (error) {
2257 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2258 goto out;
2259 }
2260 /*
2261 * Make sure the size of each message iovec and
2262 * the aggregate size of all the iovec is valid
2263 */
2264 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2265 error = EINVAL;
2266 goto out;
2267 }
2268 /*
2269 * Sanity check on passed arguments
2270 */
2271 for (i = 0; i < uap->cnt; i++) {
2272 struct user_msghdr_x *mp = user_msg_x + i;
2273
2274 if (mp->msg_flags != 0) {
2275 error = EINVAL;
2276 goto out;
2277 }
2278 }
2279 #if CONFIG_MACF_SOCKET_SUBSET
2280 /*
2281 * We check the state without holding the socket lock;
2282 * if a race condition occurs, it would simply result
2283 * in an extra call to the MAC check function.
2284 */
2285 if (!(so->so_state & SS_DEFUNCT) &&
2286 !(so->so_state & SS_ISCONNECTED) &&
2287 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2288 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2289 goto out;
2290 }
2291 #endif /* MAC_SOCKET_SUBSET */
2292
2293 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2294
2295 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2296 pru_soreceive_list_notsupp &&
2297 somaxrecvmsgx == 0) {
2298 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2299 recv_msg_array, uap->cnt, &uap->flags);
2300 } else {
2301 int flags = uap->flags;
2302
2303 for (i = 0; i < uap->cnt; i++) {
2304 struct recv_msg_elem *recv_msg_elem;
2305 uio_t auio;
2306 struct sockaddr **psa;
2307 struct mbuf **controlp;
2308
2309 recv_msg_elem = recv_msg_array + i;
2310 auio = recv_msg_elem->uio;
2311
2312 /*
2313 * Do not block if we got at least one packet
2314 */
2315 if (i > 0) {
2316 flags |= MSG_DONTWAIT;
2317 }
2318
2319 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2320 &recv_msg_elem->psa : NULL;
2321 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2322 &recv_msg_elem->controlp : NULL;
2323
2324 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2325 auio, (struct mbuf **)NULL, controlp, &flags);
2326 if (error) {
2327 break;
2328 }
2329 /*
2330 * We have some data
2331 */
2332 recv_msg_elem->which |= SOCK_MSG_DATA;
2333 /*
2334 * Set the messages flags for this packet
2335 */
2336 flags &= ~MSG_DONTWAIT;
2337 recv_msg_elem->flags = flags;
2338 /*
2339 * Stop on partial copy
2340 */
2341 if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
2342 break;
2343 }
2344 }
2345 }
2346
2347 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2348
2349 if (error) {
2350 if (len_after != len_before && (error == ERESTART ||
2351 error == EINTR || error == EWOULDBLOCK)) {
2352 error = 0;
2353 } else {
2354 goto out;
2355 }
2356 }
2357
2358 uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2359 uap->cnt, user_msg_x, recv_msg_array, &error);
2360 if (error != 0) {
2361 goto out;
2362 }
2363
2364 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2365 if (error) {
2366 DBG_PRINTF("%s copyout() failed\n", __func__);
2367 goto out;
2368 }
2369 *retval = (int)(uiocnt);
2370
2371 out:
2372 if (need_drop) {
2373 file_drop(uap->s);
2374 }
2375 kheap_free(KHEAP_TEMP, umsgp, uap->cnt * size_of_msghdr);
2376 free_recv_msg_array(recv_msg_array, uap->cnt);
2377 kheap_free(KHEAP_TEMP, user_msg_x,
2378 uap->cnt * sizeof(struct user_msghdr_x));
2379
2380 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2381
2382 return error;
2383 }
2384
2385 /*
2386 * Returns: 0 Success
2387 * EBADF
2388 * file_socket:ENOTSOCK
2389 * file_socket:EBADF
2390 * soshutdown:EINVAL
2391 * soshutdown:ENOTCONN
2392 * soshutdown:EADDRNOTAVAIL[TCP]
2393 * soshutdown:ENOBUFS[TCP]
2394 * soshutdown:EMSGSIZE[TCP]
2395 * soshutdown:EHOSTUNREACH[TCP]
2396 * soshutdown:ENETUNREACH[TCP]
2397 * soshutdown:ENETDOWN[TCP]
2398 * soshutdown:ENOMEM[TCP]
2399 * soshutdown:EACCES[TCP]
2400 * soshutdown:EMSGSIZE[TCP]
2401 * soshutdown:ENOBUFS[TCP]
2402 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2403 * soshutdown:??? [other protocol families]
2404 */
2405 /* ARGSUSED */
2406 int
2407 shutdown(__unused struct proc *p, struct shutdown_args *uap,
2408 __unused int32_t *retval)
2409 {
2410 struct socket *so;
2411 int error;
2412
2413 AUDIT_ARG(fd, uap->s);
2414 error = file_socket(uap->s, &so);
2415 if (error) {
2416 return error;
2417 }
2418 if (so == NULL) {
2419 error = EBADF;
2420 goto out;
2421 }
2422 error = soshutdown((struct socket *)so, uap->how);
2423 out:
2424 file_drop(uap->s);
2425 return error;
2426 }
2427
2428 /*
2429 * Returns: 0 Success
2430 * EFAULT
2431 * EINVAL
2432 * EACCES Mandatory Access Control failure
2433 * file_socket:ENOTSOCK
2434 * file_socket:EBADF
2435 * sosetopt:EINVAL
2436 * sosetopt:ENOPROTOOPT
2437 * sosetopt:ENOBUFS
2438 * sosetopt:EDOM
2439 * sosetopt:EFAULT
2440 * sosetopt:EOPNOTSUPP[AF_UNIX]
2441 * sosetopt:???
2442 */
2443 /* ARGSUSED */
2444 int
2445 setsockopt(struct proc *p, struct setsockopt_args *uap,
2446 __unused int32_t *retval)
2447 {
2448 struct socket *so;
2449 struct sockopt sopt;
2450 int error;
2451
2452 AUDIT_ARG(fd, uap->s);
2453 if (uap->val == 0 && uap->valsize != 0) {
2454 return EFAULT;
2455 }
2456 /* No bounds checking on size (it's unsigned) */
2457
2458 error = file_socket(uap->s, &so);
2459 if (error) {
2460 return error;
2461 }
2462
2463 sopt.sopt_dir = SOPT_SET;
2464 sopt.sopt_level = uap->level;
2465 sopt.sopt_name = uap->name;
2466 sopt.sopt_val = uap->val;
2467 sopt.sopt_valsize = uap->valsize;
2468 sopt.sopt_p = p;
2469
2470 if (so == NULL) {
2471 error = EINVAL;
2472 goto out;
2473 }
2474 #if CONFIG_MACF_SOCKET_SUBSET
2475 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2476 &sopt)) != 0) {
2477 goto out;
2478 }
2479 #endif /* MAC_SOCKET_SUBSET */
2480 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
2481 out:
2482 file_drop(uap->s);
2483 return error;
2484 }
2485
2486
2487
2488 /*
2489 * Returns: 0 Success
2490 * EINVAL
2491 * EBADF
2492 * EACCES Mandatory Access Control failure
2493 * copyin:EFAULT
2494 * copyout:EFAULT
2495 * file_socket:ENOTSOCK
2496 * file_socket:EBADF
2497 * sogetopt:???
2498 */
2499 int
2500 getsockopt(struct proc *p, struct getsockopt_args *uap,
2501 __unused int32_t *retval)
2502 {
2503 int error;
2504 socklen_t valsize;
2505 struct sockopt sopt;
2506 struct socket *so;
2507
2508 error = file_socket(uap->s, &so);
2509 if (error) {
2510 return error;
2511 }
2512 if (uap->val) {
2513 error = copyin(uap->avalsize, (caddr_t)&valsize,
2514 sizeof(valsize));
2515 if (error) {
2516 goto out;
2517 }
2518 /* No bounds checking on size (it's unsigned) */
2519 } else {
2520 valsize = 0;
2521 }
2522 sopt.sopt_dir = SOPT_GET;
2523 sopt.sopt_level = uap->level;
2524 sopt.sopt_name = uap->name;
2525 sopt.sopt_val = uap->val;
2526 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2527 sopt.sopt_p = p;
2528
2529 if (so == NULL) {
2530 error = EBADF;
2531 goto out;
2532 }
2533 #if CONFIG_MACF_SOCKET_SUBSET
2534 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2535 &sopt)) != 0) {
2536 goto out;
2537 }
2538 #endif /* MAC_SOCKET_SUBSET */
2539 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
2540 if (error == 0) {
2541 valsize = (socklen_t)sopt.sopt_valsize;
2542 error = copyout((caddr_t)&valsize, uap->avalsize,
2543 sizeof(valsize));
2544 }
2545 out:
2546 file_drop(uap->s);
2547 return error;
2548 }
2549
2550
2551 /*
2552 * Get socket name.
2553 *
2554 * Returns: 0 Success
2555 * EBADF
2556 * file_socket:ENOTSOCK
2557 * file_socket:EBADF
2558 * copyin:EFAULT
2559 * copyout:EFAULT
2560 * <pru_sockaddr>:ENOBUFS[TCP]
2561 * <pru_sockaddr>:ECONNRESET[TCP]
2562 * <pru_sockaddr>:EINVAL[AF_UNIX]
2563 * <sf_getsockname>:???
2564 */
2565 /* ARGSUSED */
2566 int
2567 getsockname(__unused struct proc *p, struct getsockname_args *uap,
2568 __unused int32_t *retval)
2569 {
2570 struct socket *so;
2571 struct sockaddr *sa;
2572 socklen_t len;
2573 socklen_t sa_len;
2574 int error;
2575
2576 error = file_socket(uap->fdes, &so);
2577 if (error) {
2578 return error;
2579 }
2580 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2581 if (error) {
2582 goto out;
2583 }
2584 if (so == NULL) {
2585 error = EBADF;
2586 goto out;
2587 }
2588 sa = 0;
2589 socket_lock(so, 1);
2590 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2591 if (error == 0) {
2592 error = sflt_getsockname(so, &sa);
2593 if (error == EJUSTRETURN) {
2594 error = 0;
2595 }
2596 }
2597 socket_unlock(so, 1);
2598 if (error) {
2599 goto bad;
2600 }
2601 if (sa == 0) {
2602 len = 0;
2603 goto gotnothing;
2604 }
2605
2606 sa_len = sa->sa_len;
2607 len = MIN(len, sa_len);
2608 error = copyout((caddr_t)sa, uap->asa, len);
2609 if (error) {
2610 goto bad;
2611 }
2612 /* return the actual, untruncated address length */
2613 len = sa_len;
2614 gotnothing:
2615 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2616 bad:
2617 FREE(sa, M_SONAME);
2618 out:
2619 file_drop(uap->fdes);
2620 return error;
2621 }
2622
2623 /*
2624 * Get name of peer for connected socket.
2625 *
2626 * Returns: 0 Success
2627 * EBADF
2628 * EINVAL
2629 * ENOTCONN
2630 * file_socket:ENOTSOCK
2631 * file_socket:EBADF
2632 * copyin:EFAULT
2633 * copyout:EFAULT
2634 * <pru_peeraddr>:???
2635 * <sf_getpeername>:???
2636 */
2637 /* ARGSUSED */
2638 int
2639 getpeername(__unused struct proc *p, struct getpeername_args *uap,
2640 __unused int32_t *retval)
2641 {
2642 struct socket *so;
2643 struct sockaddr *sa;
2644 socklen_t len;
2645 socklen_t sa_len;
2646 int error;
2647
2648 error = file_socket(uap->fdes, &so);
2649 if (error) {
2650 return error;
2651 }
2652 if (so == NULL) {
2653 error = EBADF;
2654 goto out;
2655 }
2656
2657 socket_lock(so, 1);
2658
2659 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2660 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2661 /* the socket has been shutdown, no more getpeername's */
2662 socket_unlock(so, 1);
2663 error = EINVAL;
2664 goto out;
2665 }
2666
2667 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
2668 socket_unlock(so, 1);
2669 error = ENOTCONN;
2670 goto out;
2671 }
2672 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2673 if (error) {
2674 socket_unlock(so, 1);
2675 goto out;
2676 }
2677 sa = 0;
2678 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2679 if (error == 0) {
2680 error = sflt_getpeername(so, &sa);
2681 if (error == EJUSTRETURN) {
2682 error = 0;
2683 }
2684 }
2685 socket_unlock(so, 1);
2686 if (error) {
2687 goto bad;
2688 }
2689 if (sa == 0) {
2690 len = 0;
2691 goto gotnothing;
2692 }
2693 sa_len = sa->sa_len;
2694 len = MIN(len, sa_len);
2695 error = copyout(sa, uap->asa, len);
2696 if (error) {
2697 goto bad;
2698 }
2699 /* return the actual, untruncated address length */
2700 len = sa_len;
2701 gotnothing:
2702 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2703 bad:
2704 FREE(sa, M_SONAME);
2705 out:
2706 file_drop(uap->fdes);
2707 return error;
2708 }
2709
2710 int
2711 sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
2712 {
2713 struct sockaddr *sa;
2714 struct mbuf *m;
2715 int error;
2716 socklen_t alloc_buflen = buflen;
2717
2718 if (buflen > INT_MAX / 2) {
2719 return EINVAL;
2720 }
2721 if (type == MT_SONAME && buflen > SOCK_MAXADDRLEN) {
2722 return EINVAL;
2723 }
2724
2725 #ifdef __LP64__
2726 /*
2727 * The fd's in the buffer must expand to be pointers, thus we need twice
2728 * as much space
2729 */
2730 if (type == MT_CONTROL) {
2731 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
2732 sizeof(struct cmsghdr);
2733 }
2734 #endif
2735 if (alloc_buflen > MLEN) {
2736 if (type == MT_SONAME && alloc_buflen <= 112) {
2737 alloc_buflen = MLEN; /* unix domain compat. hack */
2738 } else if (alloc_buflen > MCLBYTES) {
2739 return EINVAL;
2740 }
2741 }
2742 m = m_get(M_WAIT, type);
2743 if (m == NULL) {
2744 return ENOBUFS;
2745 }
2746 if (alloc_buflen > MLEN) {
2747 MCLGET(m, M_WAIT);
2748 if ((m->m_flags & M_EXT) == 0) {
2749 m_free(m);
2750 return ENOBUFS;
2751 }
2752 }
2753 /*
2754 * K64: We still copyin the original buflen because it gets expanded
2755 * later and we lie about the size of the mbuf because it only affects
2756 * unp_* functions
2757 */
2758 m->m_len = buflen;
2759 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2760 if (error) {
2761 (void) m_free(m);
2762 } else {
2763 *mp = m;
2764 if (type == MT_SONAME) {
2765 sa = mtod(m, struct sockaddr *);
2766 VERIFY(buflen <= SOCK_MAXADDRLEN);
2767 sa->sa_len = (__uint8_t)buflen;
2768 }
2769 }
2770 return error;
2771 }
2772
2773 /*
2774 * Given a user_addr_t of length len, allocate and fill out a *sa.
2775 *
2776 * Returns: 0 Success
2777 * ENAMETOOLONG Filename too long
2778 * EINVAL Invalid argument
2779 * ENOMEM Not enough space
2780 * copyin:EFAULT Bad address
2781 */
2782 static int
2783 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
2784 size_t len, boolean_t translate_unspec)
2785 {
2786 struct sockaddr *sa;
2787 int error;
2788
2789 if (len > SOCK_MAXADDRLEN) {
2790 return ENAMETOOLONG;
2791 }
2792
2793 if (len < offsetof(struct sockaddr, sa_data[0])) {
2794 return EINVAL;
2795 }
2796
2797 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
2798 if (sa == NULL) {
2799 return ENOMEM;
2800 }
2801 error = copyin(uaddr, (caddr_t)sa, len);
2802 if (error) {
2803 FREE(sa, M_SONAME);
2804 } else {
2805 /*
2806 * Force sa_family to AF_INET on AF_INET sockets to handle
2807 * legacy applications that use AF_UNSPEC (0). On all other
2808 * sockets we leave it unchanged and let the lower layer
2809 * handle it.
2810 */
2811 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2812 SOCK_CHECK_DOM(so, PF_INET) &&
2813 len == sizeof(struct sockaddr_in)) {
2814 sa->sa_family = AF_INET;
2815 }
2816 VERIFY(len <= SOCK_MAXADDRLEN);
2817 sa->sa_len = (__uint8_t)len;
2818 *namp = sa;
2819 }
2820 return error;
2821 }
2822
2823 static int
2824 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
2825 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2826 {
2827 int error;
2828
2829 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2830 len < offsetof(struct sockaddr, sa_data[0])) {
2831 return EINVAL;
2832 }
2833
2834 /*
2835 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2836 * so the check here is inclusive.
2837 */
2838 if (len > sizeof(*ss)) {
2839 return ENAMETOOLONG;
2840 }
2841
2842 bzero(ss, sizeof(*ss));
2843 error = copyin(uaddr, (caddr_t)ss, len);
2844 if (error == 0) {
2845 /*
2846 * Force sa_family to AF_INET on AF_INET sockets to handle
2847 * legacy applications that use AF_UNSPEC (0). On all other
2848 * sockets we leave it unchanged and let the lower layer
2849 * handle it.
2850 */
2851 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2852 SOCK_CHECK_DOM(so, PF_INET) &&
2853 len == sizeof(struct sockaddr_in)) {
2854 ss->ss_family = AF_INET;
2855 }
2856
2857 ss->ss_len = (__uint8_t)len;
2858 }
2859 return error;
2860 }
2861
2862 int
2863 internalize_user_msghdr_array(const void *src, int spacetype, int direction,
2864 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
2865 {
2866 int error = 0;
2867 u_int i;
2868 u_int namecnt = 0;
2869 u_int ctlcnt = 0;
2870
2871 for (i = 0; i < count; i++) {
2872 uio_t auio;
2873 struct user_iovec *iovp;
2874 struct user_msghdr_x *user_msg = dst + i;
2875
2876 if (spacetype == UIO_USERSPACE64) {
2877 const struct user64_msghdr_x *msghdr64;
2878
2879 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2880
2881 user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
2882 user_msg->msg_namelen = msghdr64->msg_namelen;
2883 user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
2884 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2885 user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
2886 user_msg->msg_controllen = msghdr64->msg_controllen;
2887 user_msg->msg_flags = msghdr64->msg_flags;
2888 user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
2889 } else {
2890 const struct user32_msghdr_x *msghdr32;
2891
2892 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2893
2894 user_msg->msg_name = msghdr32->msg_name;
2895 user_msg->msg_namelen = msghdr32->msg_namelen;
2896 user_msg->msg_iov = msghdr32->msg_iov;
2897 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2898 user_msg->msg_control = msghdr32->msg_control;
2899 user_msg->msg_controllen = msghdr32->msg_controllen;
2900 user_msg->msg_flags = msghdr32->msg_flags;
2901 user_msg->msg_datalen = msghdr32->msg_datalen;
2902 }
2903
2904 if (user_msg->msg_iovlen <= 0 ||
2905 user_msg->msg_iovlen > UIO_MAXIOV) {
2906 error = EMSGSIZE;
2907 goto done;
2908 }
2909 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2910 direction);
2911 if (auio == NULL) {
2912 error = ENOMEM;
2913 goto done;
2914 }
2915 uiop[i] = auio;
2916
2917 iovp = uio_iovsaddr(auio);
2918 if (iovp == NULL) {
2919 error = ENOMEM;
2920 goto done;
2921 }
2922 error = copyin_user_iovec_array(user_msg->msg_iov,
2923 spacetype, user_msg->msg_iovlen, iovp);
2924 if (error) {
2925 goto done;
2926 }
2927 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2928
2929 error = uio_calculateresid(auio);
2930 if (error) {
2931 goto done;
2932 }
2933 user_msg->msg_datalen = uio_resid(auio);
2934
2935 if (user_msg->msg_name && user_msg->msg_namelen) {
2936 namecnt++;
2937 }
2938 if (user_msg->msg_control && user_msg->msg_controllen) {
2939 ctlcnt++;
2940 }
2941 }
2942 done:
2943
2944 return error;
2945 }
2946
2947 int
2948 internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2949 u_int count, struct user_msghdr_x *dst,
2950 struct recv_msg_elem *recv_msg_array)
2951 {
2952 int error = 0;
2953 u_int i;
2954
2955 for (i = 0; i < count; i++) {
2956 struct user_iovec *iovp;
2957 struct user_msghdr_x *user_msg = dst + i;
2958 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2959
2960 if (spacetype == UIO_USERSPACE64) {
2961 const struct user64_msghdr_x *msghdr64;
2962
2963 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2964
2965 user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
2966 user_msg->msg_namelen = msghdr64->msg_namelen;
2967 user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
2968 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2969 user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
2970 user_msg->msg_controllen = msghdr64->msg_controllen;
2971 user_msg->msg_flags = msghdr64->msg_flags;
2972 user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
2973 } else {
2974 const struct user32_msghdr_x *msghdr32;
2975
2976 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2977
2978 user_msg->msg_name = msghdr32->msg_name;
2979 user_msg->msg_namelen = msghdr32->msg_namelen;
2980 user_msg->msg_iov = msghdr32->msg_iov;
2981 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2982 user_msg->msg_control = msghdr32->msg_control;
2983 user_msg->msg_controllen = msghdr32->msg_controllen;
2984 user_msg->msg_flags = msghdr32->msg_flags;
2985 user_msg->msg_datalen = msghdr32->msg_datalen;
2986 }
2987
2988 if (user_msg->msg_iovlen <= 0 ||
2989 user_msg->msg_iovlen > UIO_MAXIOV) {
2990 error = EMSGSIZE;
2991 goto done;
2992 }
2993 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
2994 spacetype, direction);
2995 if (recv_msg_elem->uio == NULL) {
2996 error = ENOMEM;
2997 goto done;
2998 }
2999
3000 iovp = uio_iovsaddr(recv_msg_elem->uio);
3001 if (iovp == NULL) {
3002 error = ENOMEM;
3003 goto done;
3004 }
3005 error = copyin_user_iovec_array(user_msg->msg_iov,
3006 spacetype, user_msg->msg_iovlen, iovp);
3007 if (error) {
3008 goto done;
3009 }
3010 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3011
3012 error = uio_calculateresid(recv_msg_elem->uio);
3013 if (error) {
3014 goto done;
3015 }
3016 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3017
3018 if (user_msg->msg_name && user_msg->msg_namelen) {
3019 recv_msg_elem->which |= SOCK_MSG_SA;
3020 }
3021 if (user_msg->msg_control && user_msg->msg_controllen) {
3022 recv_msg_elem->which |= SOCK_MSG_CONTROL;
3023 }
3024 }
3025 done:
3026
3027 return error;
3028 }
3029
3030 u_int
3031 externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3032 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
3033 {
3034 #pragma unused(direction)
3035 u_int i;
3036 int seenlast = 0;
3037 u_int retcnt = 0;
3038
3039 for (i = 0; i < count; i++) {
3040 const struct user_msghdr_x *user_msg = src + i;
3041 uio_t auio = uiop[i];
3042 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3043
3044 if (user_msg->msg_datalen != 0 && len == 0) {
3045 seenlast = 1;
3046 }
3047
3048 if (seenlast == 0) {
3049 retcnt++;
3050 }
3051
3052 if (spacetype == UIO_USERSPACE64) {
3053 struct user64_msghdr_x *msghdr64;
3054
3055 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3056
3057 msghdr64->msg_flags = user_msg->msg_flags;
3058 msghdr64->msg_datalen = len;
3059 } else {
3060 struct user32_msghdr_x *msghdr32;
3061
3062 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3063
3064 msghdr32->msg_flags = user_msg->msg_flags;
3065 msghdr32->msg_datalen = (user32_size_t)len;
3066 }
3067 }
3068 return retcnt;
3069 }
3070
3071 u_int
3072 externalize_recv_msghdr_array(struct proc *p, struct socket *so, void *dst,
3073 u_int count, struct user_msghdr_x *src,
3074 struct recv_msg_elem *recv_msg_array, int *ret_error)
3075 {
3076 u_int i;
3077 u_int retcnt = 0;
3078 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3079
3080 *ret_error = 0;
3081
3082 for (i = 0; i < count; i++) {
3083 struct user_msghdr_x *user_msg = src + i;
3084 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3085 user_ssize_t len = 0;
3086 int error;
3087
3088 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3089
3090 if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3091 retcnt++;
3092
3093
3094 if (recv_msg_elem->which & SOCK_MSG_SA) {
3095 error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3096 &user_msg->msg_namelen);
3097 if (error != 0) {
3098 *ret_error = error;
3099 return 0;
3100 }
3101 }
3102 if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3103 error = copyout_control(p, recv_msg_elem->controlp,
3104 user_msg->msg_control, &user_msg->msg_controllen,
3105 &recv_msg_elem->flags, so);
3106 if (error != 0) {
3107 *ret_error = error;
3108 return 0;
3109 }
3110 }
3111 }
3112
3113 if (spacetype == UIO_USERSPACE64) {
3114 struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3115
3116 msghdr64->msg_namelen = user_msg->msg_namelen;
3117 msghdr64->msg_controllen = user_msg->msg_controllen;
3118 msghdr64->msg_flags = recv_msg_elem->flags;
3119 msghdr64->msg_datalen = len;
3120 } else {
3121 struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3122
3123 msghdr32->msg_namelen = user_msg->msg_namelen;
3124 msghdr32->msg_controllen = user_msg->msg_controllen;
3125 msghdr32->msg_flags = recv_msg_elem->flags;
3126 msghdr32->msg_datalen = (user32_size_t)len;
3127 }
3128 }
3129 return retcnt;
3130 }
3131
3132 void
3133 free_uio_array(struct uio **uiop, u_int count)
3134 {
3135 u_int i;
3136
3137 for (i = 0; i < count; i++) {
3138 if (uiop[i] != NULL) {
3139 uio_free(uiop[i]);
3140 }
3141 }
3142 }
3143
3144 __private_extern__ user_ssize_t
3145 uio_array_resid(struct uio **uiop, u_int count)
3146 {
3147 user_ssize_t len = 0;
3148 u_int i;
3149
3150 for (i = 0; i < count; i++) {
3151 struct uio *auio = uiop[i];
3152
3153 if (auio != NULL) {
3154 len += uio_resid(auio);
3155 }
3156 }
3157 return len;
3158 }
3159
3160 static boolean_t
3161 uio_array_is_valid(struct uio **uiop, u_int count)
3162 {
3163 user_ssize_t len = 0;
3164 u_int i;
3165
3166 for (i = 0; i < count; i++) {
3167 struct uio *auio = uiop[i];
3168
3169 if (auio != NULL) {
3170 user_ssize_t resid = uio_resid(auio);
3171
3172 /*
3173 * Sanity check on the validity of the iovec:
3174 * no point of going over sb_max
3175 */
3176 if (resid < 0 || resid > (user_ssize_t)sb_max) {
3177 return false;
3178 }
3179
3180 len += resid;
3181 if (len < 0 || len > (user_ssize_t)sb_max) {
3182 return false;
3183 }
3184 }
3185 }
3186 return true;
3187 }
3188
3189
3190 struct recv_msg_elem *
3191 alloc_recv_msg_array(u_int count)
3192 {
3193 return kheap_alloc(KHEAP_TEMP,
3194 count * sizeof(struct recv_msg_elem), Z_WAITOK | Z_ZERO);
3195 }
3196
3197 void
3198 free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3199 {
3200 if (recv_msg_array == NULL) {
3201 return;
3202 }
3203 for (uint32_t i = 0; i < count; i++) {
3204 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3205
3206 if (recv_msg_elem->uio != NULL) {
3207 uio_free(recv_msg_elem->uio);
3208 }
3209 _FREE(recv_msg_elem->psa, M_TEMP);
3210 if (recv_msg_elem->controlp != NULL) {
3211 m_freem(recv_msg_elem->controlp);
3212 }
3213 }
3214 kheap_free(KHEAP_TEMP, recv_msg_array,
3215 count * sizeof(struct recv_msg_elem));
3216 }
3217
3218
3219 __private_extern__ user_ssize_t
3220 recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3221 {
3222 user_ssize_t len = 0;
3223 u_int i;
3224
3225 for (i = 0; i < count; i++) {
3226 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3227
3228 if (recv_msg_elem->uio != NULL) {
3229 len += uio_resid(recv_msg_elem->uio);
3230 }
3231 }
3232 return len;
3233 }
3234
3235 int
3236 recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3237 {
3238 user_ssize_t len = 0;
3239 u_int i;
3240
3241 for (i = 0; i < count; i++) {
3242 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3243
3244 if (recv_msg_elem->uio != NULL) {
3245 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3246
3247 /*
3248 * Sanity check on the validity of the iovec:
3249 * no point of going over sb_max
3250 */
3251 if (resid < 0 || (u_int32_t)resid > sb_max) {
3252 return 0;
3253 }
3254
3255 len += resid;
3256 if (len < 0 || (u_int32_t)len > sb_max) {
3257 return 0;
3258 }
3259 }
3260 }
3261 return 1;
3262 }
3263
3264 #if SENDFILE
3265
3266 #define SFUIOBUFS 64
3267
3268 /* Macros to compute the number of mbufs needed depending on cluster size */
3269 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3270 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3271
3272 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3273 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3274
3275 /* Upper send limit in the number of mbuf clusters */
3276 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3277 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3278
3279 static void
3280 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3281 struct mbuf **m, boolean_t jumbocl)
3282 {
3283 unsigned int needed;
3284
3285 if (pktlen == 0) {
3286 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
3287 }
3288
3289 /*
3290 * Try to allocate for the whole thing. Since we want full control
3291 * over the buffer size and be able to accept partial result, we can't
3292 * use mbuf_allocpacket(). The logic below is similar to sosend().
3293 */
3294 *m = NULL;
3295 if (pktlen > MBIGCLBYTES && jumbocl) {
3296 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3297 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3298 }
3299 if (*m == NULL) {
3300 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3301 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3302 }
3303
3304 /*
3305 * Our previous attempt(s) at allocation had failed; the system
3306 * may be short on mbufs, and we want to block until they are
3307 * available. This time, ask just for 1 mbuf and don't return
3308 * until we get it.
3309 */
3310 if (*m == NULL) {
3311 needed = 1;
3312 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3313 }
3314 if (*m == NULL) {
3315 panic("%s: blocking allocation returned NULL\n", __func__);
3316 }
3317
3318 *maxchunks = needed;
3319 }
3320
3321 /*
3322 * sendfile(2).
3323 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3324 * struct sf_hdtr *hdtr, int flags)
3325 *
3326 * Send a file specified by 'fd' and starting at 'offset' to a socket
3327 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3328 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3329 * output. If specified, write the total number of bytes sent into *nbytes.
3330 */
3331 int
3332 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
3333 {
3334 struct fileproc *fp;
3335 struct vnode *vp;
3336 struct socket *so;
3337 struct writev_nocancel_args nuap;
3338 user_ssize_t writev_retval;
3339 struct user_sf_hdtr user_hdtr;
3340 struct user32_sf_hdtr user32_hdtr;
3341 struct user64_sf_hdtr user64_hdtr;
3342 off_t off, xfsize;
3343 off_t nbytes = 0, sbytes = 0;
3344 int error = 0;
3345 size_t sizeof_hdtr;
3346 off_t file_size;
3347 struct vfs_context context = *vfs_context_current();
3348
3349 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3350 0, 0, 0, 0);
3351
3352 AUDIT_ARG(fd, uap->fd);
3353 AUDIT_ARG(value32, uap->s);
3354
3355 /*
3356 * Do argument checking. Must be a regular file in, stream
3357 * type and connected socket out, positive offset.
3358 */
3359 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3360 goto done;
3361 }
3362 if ((fp->f_flag & FREAD) == 0) {
3363 error = EBADF;
3364 goto done1;
3365 }
3366 if (vnode_isreg(vp) == 0) {
3367 error = ENOTSUP;
3368 goto done1;
3369 }
3370 error = file_socket(uap->s, &so);
3371 if (error) {
3372 goto done1;
3373 }
3374 if (so == NULL) {
3375 error = EBADF;
3376 goto done2;
3377 }
3378 if (so->so_type != SOCK_STREAM) {
3379 error = EINVAL;
3380 goto done2;
3381 }
3382 if ((so->so_state & SS_ISCONNECTED) == 0) {
3383 error = ENOTCONN;
3384 goto done2;
3385 }
3386 if (uap->offset < 0) {
3387 error = EINVAL;
3388 goto done2;
3389 }
3390 if (uap->nbytes == USER_ADDR_NULL) {
3391 error = EINVAL;
3392 goto done2;
3393 }
3394 if (uap->flags != 0) {
3395 error = EINVAL;
3396 goto done2;
3397 }
3398
3399 context.vc_ucred = fp->fp_glob->fg_cred;
3400
3401 #if CONFIG_MACF_SOCKET_SUBSET
3402 /* JMM - fetch connected sockaddr? */
3403 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3404 if (error) {
3405 goto done2;
3406 }
3407 #endif
3408
3409 /*
3410 * Get number of bytes to send
3411 * Should it applies to size of header and trailer?
3412 */
3413 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3414 if (error) {
3415 goto done2;
3416 }
3417
3418 /*
3419 * If specified, get the pointer to the sf_hdtr struct for
3420 * any headers/trailers.
3421 */
3422 if (uap->hdtr != USER_ADDR_NULL) {
3423 caddr_t hdtrp;
3424
3425 bzero(&user_hdtr, sizeof(user_hdtr));
3426 if (IS_64BIT_PROCESS(p)) {
3427 hdtrp = (caddr_t)&user64_hdtr;
3428 sizeof_hdtr = sizeof(user64_hdtr);
3429 } else {
3430 hdtrp = (caddr_t)&user32_hdtr;
3431 sizeof_hdtr = sizeof(user32_hdtr);
3432 }
3433 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3434 if (error) {
3435 goto done2;
3436 }
3437 if (IS_64BIT_PROCESS(p)) {
3438 user_hdtr.headers = user64_hdtr.headers;
3439 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3440 user_hdtr.trailers = user64_hdtr.trailers;
3441 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3442 } else {
3443 user_hdtr.headers = user32_hdtr.headers;
3444 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3445 user_hdtr.trailers = user32_hdtr.trailers;
3446 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3447 }
3448
3449 /*
3450 * Send any headers. Wimp out and use writev(2).
3451 */
3452 if (user_hdtr.headers != USER_ADDR_NULL) {
3453 bzero(&nuap, sizeof(struct writev_args));
3454 nuap.fd = uap->s;
3455 nuap.iovp = user_hdtr.headers;
3456 nuap.iovcnt = user_hdtr.hdr_cnt;
3457 error = writev_nocancel(p, &nuap, &writev_retval);
3458 if (error) {
3459 goto done2;
3460 }
3461 sbytes += writev_retval;
3462 }
3463 }
3464
3465 /*
3466 * Get the file size for 2 reasons:
3467 * 1. We don't want to allocate more mbufs than necessary
3468 * 2. We don't want to read past the end of file
3469 */
3470 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3471 goto done2;
3472 }
3473
3474 /*
3475 * Simply read file data into a chain of mbufs that used with scatter
3476 * gather reads. We're not (yet?) setup to use zero copy external
3477 * mbufs that point to the file pages.
3478 */
3479 socket_lock(so, 1);
3480 error = sblock(&so->so_snd, SBL_WAIT);
3481 if (error) {
3482 socket_unlock(so, 1);
3483 goto done2;
3484 }
3485 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3486 mbuf_t m0 = NULL, m;
3487 unsigned int nbufs = SFUIOBUFS, i;
3488 uio_t auio;
3489 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3490 size_t uiolen;
3491 user_ssize_t rlen;
3492 off_t pgoff;
3493 size_t pktlen;
3494 boolean_t jumbocl;
3495
3496 /*
3497 * Calculate the amount to transfer.
3498 * Align to round number of pages.
3499 * Not to exceed send socket buffer,
3500 * the EOF, or the passed in nbytes.
3501 */
3502 xfsize = sbspace(&so->so_snd);
3503
3504 if (xfsize <= 0) {
3505 if (so->so_state & SS_CANTSENDMORE) {
3506 error = EPIPE;
3507 goto done3;
3508 } else if ((so->so_state & SS_NBIO)) {
3509 error = EAGAIN;
3510 goto done3;
3511 } else {
3512 xfsize = PAGE_SIZE;
3513 }
3514 }
3515
3516 if (xfsize > SENDFILE_MAX_BYTES) {
3517 xfsize = SENDFILE_MAX_BYTES;
3518 } else if (xfsize > PAGE_SIZE) {
3519 xfsize = trunc_page(xfsize);
3520 }
3521 pgoff = off & PAGE_MASK_64;
3522 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3523 xfsize = PAGE_SIZE_64 - pgoff;
3524 }
3525 if (nbytes && xfsize > (nbytes - sbytes)) {
3526 xfsize = nbytes - sbytes;
3527 }
3528 if (xfsize <= 0) {
3529 break;
3530 }
3531 if (off + xfsize > file_size) {
3532 xfsize = file_size - off;
3533 }
3534 if (xfsize <= 0) {
3535 break;
3536 }
3537
3538 /*
3539 * Attempt to use larger than system page-size clusters for
3540 * large writes only if there is a jumbo cluster pool and
3541 * if the socket is marked accordingly.
3542 */
3543 jumbocl = sosendjcl && njcl > 0 &&
3544 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3545
3546 socket_unlock(so, 0);
3547 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3548 pktlen = mbuf_pkthdr_maxlen(m0);
3549 if (pktlen < (size_t)xfsize) {
3550 xfsize = pktlen;
3551 }
3552
3553 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3554 UIO_READ, &uio_buf[0], sizeof(uio_buf));
3555 if (auio == NULL) {
3556 printf("sendfile failed. nbufs = %d. %s", nbufs,
3557 "File a radar related to rdar://10146739.\n");
3558 mbuf_freem(m0);
3559 error = ENXIO;
3560 socket_lock(so, 0);
3561 goto done3;
3562 }
3563
3564 for (i = 0, m = m0, uiolen = 0;
3565 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3566 i++, m = mbuf_next(m)) {
3567 size_t mlen = mbuf_maxlen(m);
3568
3569 if (mlen + uiolen > (size_t)xfsize) {
3570 mlen = xfsize - uiolen;
3571 }
3572 mbuf_setlen(m, mlen);
3573 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3574 mlen);
3575 uiolen += mlen;
3576 }
3577
3578 if (xfsize != uio_resid(auio)) {
3579 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3580 "%lld\n", xfsize, (long long)uio_resid(auio));
3581 }
3582
3583 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3584 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3585 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3586 error = fo_read(fp, auio, FOF_OFFSET, &context);
3587 socket_lock(so, 0);
3588 if (error != 0) {
3589 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3590 error == EINTR || error == EWOULDBLOCK)) {
3591 error = 0;
3592 } else {
3593 mbuf_freem(m0);
3594 goto done3;
3595 }
3596 }
3597 xfsize -= uio_resid(auio);
3598 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3599 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3600 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3601
3602 if (xfsize == 0) {
3603 // printf("sendfile: fo_read 0 bytes, EOF\n");
3604 break;
3605 }
3606 if (xfsize + off > file_size) {
3607 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3608 "%lld\n", xfsize, off, file_size);
3609 }
3610 for (i = 0, m = m0, rlen = 0;
3611 i < nbufs && m != NULL && rlen < xfsize;
3612 i++, m = mbuf_next(m)) {
3613 size_t mlen = mbuf_maxlen(m);
3614
3615 if (rlen + mlen > (size_t)xfsize) {
3616 mlen = xfsize - rlen;
3617 }
3618 mbuf_setlen(m, mlen);
3619
3620 rlen += mlen;
3621 }
3622 mbuf_pkthdr_setlen(m0, xfsize);
3623
3624 retry_space:
3625 /*
3626 * Make sure that the socket is still able to take more data.
3627 * CANTSENDMORE being true usually means that the connection
3628 * was closed. so_error is true when an error was sensed after
3629 * a previous send.
3630 * The state is checked after the page mapping and buffer
3631 * allocation above since those operations may block and make
3632 * any socket checks stale. From this point forward, nothing
3633 * blocks before the pru_send (or more accurately, any blocking
3634 * results in a loop back to here to re-check).
3635 */
3636 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3637 if (so->so_state & SS_CANTSENDMORE) {
3638 error = EPIPE;
3639 } else {
3640 error = so->so_error;
3641 so->so_error = 0;
3642 }
3643 m_freem(m0);
3644 goto done3;
3645 }
3646 /*
3647 * Wait for socket space to become available. We do this just
3648 * after checking the connection state above in order to avoid
3649 * a race condition with sbwait().
3650 */
3651 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3652 if (so->so_state & SS_NBIO) {
3653 m_freem(m0);
3654 error = EAGAIN;
3655 goto done3;
3656 }
3657 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3658 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3659 error = sbwait(&so->so_snd);
3660 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3661 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3662 /*
3663 * An error from sbwait usually indicates that we've
3664 * been interrupted by a signal. If we've sent anything
3665 * then return bytes sent, otherwise return the error.
3666 */
3667 if (error) {
3668 m_freem(m0);
3669 goto done3;
3670 }
3671 goto retry_space;
3672 }
3673
3674 struct mbuf *control = NULL;
3675 {
3676 /*
3677 * Socket filter processing
3678 */
3679
3680 error = sflt_data_out(so, NULL, &m0, &control, 0);
3681 if (error) {
3682 if (error == EJUSTRETURN) {
3683 error = 0;
3684 continue;
3685 }
3686 goto done3;
3687 }
3688 /*
3689 * End Socket filter processing
3690 */
3691 }
3692 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3693 uap->s, 0, 0, 0, 0);
3694 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3695 0, control, p);
3696 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3697 uap->s, 0, 0, 0, 0);
3698 if (error) {
3699 goto done3;
3700 }
3701 }
3702 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3703 /*
3704 * Send trailers. Wimp out and use writev(2).
3705 */
3706 if (uap->hdtr != USER_ADDR_NULL &&
3707 user_hdtr.trailers != USER_ADDR_NULL) {
3708 bzero(&nuap, sizeof(struct writev_args));
3709 nuap.fd = uap->s;
3710 nuap.iovp = user_hdtr.trailers;
3711 nuap.iovcnt = user_hdtr.trl_cnt;
3712 error = writev_nocancel(p, &nuap, &writev_retval);
3713 if (error) {
3714 goto done2;
3715 }
3716 sbytes += writev_retval;
3717 }
3718 done2:
3719 file_drop(uap->s);
3720 done1:
3721 file_drop(uap->fd);
3722 done:
3723 if (uap->nbytes != USER_ADDR_NULL) {
3724 /* XXX this appears bogus for some early failure conditions */
3725 copyout(&sbytes, uap->nbytes, sizeof(off_t));
3726 }
3727 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3728 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3729 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3730 return error;
3731 done3:
3732 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3733 goto done2;
3734 }
3735
3736
3737 #endif /* SENDFILE */