]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
80 #include <sys/mbuf.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
91 #include <sys/priv.h>
92 #include <sys/sysctl.h>
93 #include <sys/sys_domain.h>
94
95 #include <security/audit/audit.h>
96
97 #include <sys/kdebug.h>
98 #include <sys/sysproto.h>
99 #include <netinet/in.h>
100 #include <net/route.h>
101 #include <netinet/in_pcb.h>
102
103 #include <os/ptrtools.h>
104
105 #if CONFIG_MACF_SOCKET_SUBSET
106 #include <security/mac_framework.h>
107 #endif /* MAC_SOCKET_SUBSET */
108
109 #define f_flag f_fglob->fg_flag
110 #define f_type f_fglob->fg_ops->fo_type
111 #define f_msgcount f_fglob->fg_msgcount
112 #define f_cred f_fglob->fg_cred
113 #define f_ops f_fglob->fg_ops
114 #define f_offset f_fglob->fg_offset
115 #define f_data f_fglob->fg_data
116
117 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
118 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
119 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
120 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
121 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
122 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
123 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
124 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
125 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
126 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
127 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
128 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
129 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
130 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
131 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
132 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
133
134 #if DEBUG || DEVELOPMENT
135 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
136 #define DBG_PRINTF(...) printf(__VA_ARGS__)
137 #else
138 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
139 #define DBG_PRINTF(...) do { } while (0)
140 #endif
141
142 /* TODO: should be in header file */
143 int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
144
145 static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
146 int, int32_t *);
147 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
148 int32_t *);
149 static int connectit(struct socket *, struct sockaddr *);
150 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
151 size_t, boolean_t);
152 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
153 user_addr_t, size_t, boolean_t);
154 #if SENDFILE
155 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
156 boolean_t);
157 #endif /* SENDFILE */
158 static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
159 static int connectitx(struct socket *, struct sockaddr *,
160 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
161 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
162 static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
163 int *);
164 static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
165
166 static int internalize_user_msghdr_array(const void *, int, int, u_int,
167 struct user_msghdr_x *, struct uio **);
168 static u_int externalize_user_msghdr_array(void *, int, int, u_int,
169 const struct user_msghdr_x *, struct uio **);
170
171 static void free_uio_array(struct uio **, u_int);
172 static int uio_array_is_valid(struct uio **, u_int);
173 static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
174 static int internalize_recv_msghdr_array(const void *, int, int,
175 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
176 static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
177 const struct user_msghdr_x *, struct recv_msg_elem *);
178 static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
179 static void free_recv_msg_array(struct recv_msg_elem *, u_int);
180
181 SYSCTL_DECL(_kern_ipc);
182
183 static u_int somaxsendmsgx = 100;
184 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
185 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
186 static u_int somaxrecvmsgx = 100;
187 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
188 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
189
190 /*
191 * System call interface to the socket abstraction.
192 */
193
194 extern const struct fileops socketops;
195
196 /*
197 * Returns: 0 Success
198 * EACCES Mandatory Access Control failure
199 * falloc:ENFILE
200 * falloc:EMFILE
201 * falloc:ENOMEM
202 * socreate:EAFNOSUPPORT
203 * socreate:EPROTOTYPE
204 * socreate:EPROTONOSUPPORT
205 * socreate:ENOBUFS
206 * socreate:ENOMEM
207 * socreate:??? [other protocol families, IPSEC]
208 */
209 int
210 socket(struct proc *p,
211 struct socket_args *uap,
212 int32_t *retval)
213 {
214 return socket_common(p, uap->domain, uap->type, uap->protocol,
215 proc_selfpid(), retval, 0);
216 }
217
218 int
219 socket_delegate(struct proc *p,
220 struct socket_delegate_args *uap,
221 int32_t *retval)
222 {
223 return socket_common(p, uap->domain, uap->type, uap->protocol,
224 uap->epid, retval, 1);
225 }
226
227 static int
228 socket_common(struct proc *p,
229 int domain,
230 int type,
231 int protocol,
232 pid_t epid,
233 int32_t *retval,
234 int delegate)
235 {
236 struct socket *so;
237 struct fileproc *fp;
238 int fd, error;
239
240 AUDIT_ARG(socket, domain, type, protocol);
241 #if CONFIG_MACF_SOCKET_SUBSET
242 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
243 type, protocol)) != 0) {
244 return error;
245 }
246 #endif /* MAC_SOCKET_SUBSET */
247
248 if (delegate) {
249 error = priv_check_cred(kauth_cred_get(),
250 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
251 if (error) {
252 return EACCES;
253 }
254 }
255
256 error = falloc(p, &fp, &fd, vfs_context_current());
257 if (error) {
258 return error;
259 }
260 fp->f_flag = FREAD | FWRITE;
261 fp->f_ops = &socketops;
262
263 if (delegate) {
264 error = socreate_delegate(domain, &so, type, protocol, epid);
265 } else {
266 error = socreate(domain, &so, type, protocol);
267 }
268
269 if (error) {
270 fp_free(p, fd, fp);
271 } else {
272 fp->f_data = (caddr_t)so;
273
274 proc_fdlock(p);
275 procfdtbl_releasefd(p, fd, NULL);
276
277 fp_drop(p, fd, fp, 1);
278 proc_fdunlock(p);
279
280 *retval = fd;
281 if (ENTR_SHOULDTRACE) {
282 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
283 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
284 }
285 }
286 return error;
287 }
288
289 /*
290 * Returns: 0 Success
291 * EDESTADDRREQ Destination address required
292 * EBADF Bad file descriptor
293 * EACCES Mandatory Access Control failure
294 * file_socket:ENOTSOCK
295 * file_socket:EBADF
296 * getsockaddr:ENAMETOOLONG Filename too long
297 * getsockaddr:EINVAL Invalid argument
298 * getsockaddr:ENOMEM Not enough space
299 * getsockaddr:EFAULT Bad address
300 * sobindlock:???
301 */
302 /* ARGSUSED */
303 int
304 bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
305 {
306 struct sockaddr_storage ss;
307 struct sockaddr *sa = NULL;
308 struct socket *so;
309 boolean_t want_free = TRUE;
310 int error;
311
312 AUDIT_ARG(fd, uap->s);
313 error = file_socket(uap->s, &so);
314 if (error != 0) {
315 return error;
316 }
317 if (so == NULL) {
318 error = EBADF;
319 goto out;
320 }
321 if (uap->name == USER_ADDR_NULL) {
322 error = EDESTADDRREQ;
323 goto out;
324 }
325 if (uap->namelen > sizeof(ss)) {
326 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
327 } else {
328 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
329 if (error == 0) {
330 sa = (struct sockaddr *)&ss;
331 want_free = FALSE;
332 }
333 }
334 if (error != 0) {
335 goto out;
336 }
337 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
338 #if CONFIG_MACF_SOCKET_SUBSET
339 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
340 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
341 error = sobindlock(so, sa, 1); /* will lock socket */
342 }
343 #else
344 error = sobindlock(so, sa, 1); /* will lock socket */
345 #endif /* MAC_SOCKET_SUBSET */
346 if (want_free) {
347 FREE(sa, M_SONAME);
348 }
349 out:
350 file_drop(uap->s);
351 return error;
352 }
353
354 /*
355 * Returns: 0 Success
356 * EBADF
357 * EACCES Mandatory Access Control failure
358 * file_socket:ENOTSOCK
359 * file_socket:EBADF
360 * solisten:EINVAL
361 * solisten:EOPNOTSUPP
362 * solisten:???
363 */
364 int
365 listen(__unused struct proc *p, struct listen_args *uap,
366 __unused int32_t *retval)
367 {
368 int error;
369 struct socket *so;
370
371 AUDIT_ARG(fd, uap->s);
372 error = file_socket(uap->s, &so);
373 if (error) {
374 return error;
375 }
376 if (so != NULL)
377 #if CONFIG_MACF_SOCKET_SUBSET
378 {
379 error = mac_socket_check_listen(kauth_cred_get(), so);
380 if (error == 0) {
381 error = solisten(so, uap->backlog);
382 }
383 }
384 #else
385 { error = solisten(so, uap->backlog);}
386 #endif /* MAC_SOCKET_SUBSET */
387 else {
388 error = EBADF;
389 }
390
391 file_drop(uap->s);
392 return error;
393 }
394
395 /*
396 * Returns: fp_getfsock:EBADF Bad file descriptor
397 * fp_getfsock:EOPNOTSUPP ...
398 * xlate => :ENOTSOCK Socket operation on non-socket
399 * :EFAULT Bad address on copyin/copyout
400 * :EBADF Bad file descriptor
401 * :EOPNOTSUPP Operation not supported on socket
402 * :EINVAL Invalid argument
403 * :EWOULDBLOCK Operation would block
404 * :ECONNABORTED Connection aborted
405 * :EINTR Interrupted function
406 * :EACCES Mandatory Access Control failure
407 * falloc_locked:ENFILE Too many files open in system
408 * falloc_locked::EMFILE Too many open files
409 * falloc_locked::ENOMEM Not enough space
410 * 0 Success
411 */
412 int
413 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
414 int32_t *retval)
415 {
416 struct fileproc *fp;
417 struct sockaddr *sa = NULL;
418 socklen_t namelen;
419 int error;
420 struct socket *head, *so = NULL;
421 lck_mtx_t *mutex_held;
422 int fd = uap->s;
423 int newfd;
424 short fflag; /* type must match fp->f_flag */
425 int dosocklock = 0;
426
427 *retval = -1;
428
429 AUDIT_ARG(fd, uap->s);
430
431 if (uap->name) {
432 error = copyin(uap->anamelen, (caddr_t)&namelen,
433 sizeof(socklen_t));
434 if (error) {
435 return error;
436 }
437 }
438 error = fp_getfsock(p, fd, &fp, &head);
439 if (error) {
440 if (error == EOPNOTSUPP) {
441 error = ENOTSOCK;
442 }
443 return error;
444 }
445 if (head == NULL) {
446 error = EBADF;
447 goto out;
448 }
449 #if CONFIG_MACF_SOCKET_SUBSET
450 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
451 goto out;
452 }
453 #endif /* MAC_SOCKET_SUBSET */
454
455 socket_lock(head, 1);
456
457 if (head->so_proto->pr_getlock != NULL) {
458 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
459 dosocklock = 1;
460 } else {
461 mutex_held = head->so_proto->pr_domain->dom_mtx;
462 dosocklock = 0;
463 }
464
465 if ((head->so_options & SO_ACCEPTCONN) == 0) {
466 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
467 error = EOPNOTSUPP;
468 } else {
469 /* POSIX: The socket is not accepting connections */
470 error = EINVAL;
471 }
472 socket_unlock(head, 1);
473 goto out;
474 }
475 check_again:
476 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
477 socket_unlock(head, 1);
478 error = EWOULDBLOCK;
479 goto out;
480 }
481 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
482 if (head->so_state & SS_CANTRCVMORE) {
483 head->so_error = ECONNABORTED;
484 break;
485 }
486 if (head->so_usecount < 1) {
487 panic("accept: head=%p refcount=%d\n", head,
488 head->so_usecount);
489 }
490 error = msleep((caddr_t)&head->so_timeo, mutex_held,
491 PSOCK | PCATCH, "accept", 0);
492 if (head->so_usecount < 1) {
493 panic("accept: 2 head=%p refcount=%d\n", head,
494 head->so_usecount);
495 }
496 if ((head->so_state & SS_DRAINING)) {
497 error = ECONNABORTED;
498 }
499 if (error) {
500 socket_unlock(head, 1);
501 goto out;
502 }
503 }
504 if (head->so_error) {
505 error = head->so_error;
506 head->so_error = 0;
507 socket_unlock(head, 1);
508 goto out;
509 }
510
511 /*
512 * At this point we know that there is at least one connection
513 * ready to be accepted. Remove it from the queue prior to
514 * allocating the file descriptor for it since falloc() may
515 * block allowing another process to accept the connection
516 * instead.
517 */
518 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
519
520 so_acquire_accept_list(head, NULL);
521 if (TAILQ_EMPTY(&head->so_comp)) {
522 so_release_accept_list(head);
523 goto check_again;
524 }
525
526 so = TAILQ_FIRST(&head->so_comp);
527 TAILQ_REMOVE(&head->so_comp, so, so_list);
528 so->so_head = NULL;
529 so->so_state &= ~SS_COMP;
530 head->so_qlen--;
531 so_release_accept_list(head);
532
533 /* unlock head to avoid deadlock with select, keep a ref on head */
534 socket_unlock(head, 0);
535
536 #if CONFIG_MACF_SOCKET_SUBSET
537 /*
538 * Pass the pre-accepted socket to the MAC framework. This is
539 * cheaper than allocating a file descriptor for the socket,
540 * calling the protocol accept callback, and possibly freeing
541 * the file descriptor should the MAC check fails.
542 */
543 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
544 socket_lock(so, 1);
545 so->so_state &= ~SS_NOFDREF;
546 socket_unlock(so, 1);
547 soclose(so);
548 /* Drop reference on listening socket */
549 sodereference(head);
550 goto out;
551 }
552 #endif /* MAC_SOCKET_SUBSET */
553
554 /*
555 * Pass the pre-accepted socket to any interested socket filter(s).
556 * Upon failure, the socket would have been closed by the callee.
557 */
558 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
559 /* Drop reference on listening socket */
560 sodereference(head);
561 /* Propagate socket filter's error code to the caller */
562 goto out;
563 }
564
565 fflag = fp->f_flag;
566 error = falloc(p, &fp, &newfd, vfs_context_current());
567 if (error) {
568 /*
569 * Probably ran out of file descriptors.
570 *
571 * <rdar://problem/8554930>
572 * Don't put this back on the socket like we used to, that
573 * just causes the client to spin. Drop the socket.
574 */
575 socket_lock(so, 1);
576 so->so_state &= ~SS_NOFDREF;
577 socket_unlock(so, 1);
578 soclose(so);
579 sodereference(head);
580 goto out;
581 }
582 *retval = newfd;
583 fp->f_flag = fflag;
584 fp->f_ops = &socketops;
585 fp->f_data = (caddr_t)so;
586
587 socket_lock(head, 0);
588 if (dosocklock) {
589 socket_lock(so, 1);
590 }
591
592 /* Sync socket non-blocking/async state with file flags */
593 if (fp->f_flag & FNONBLOCK) {
594 so->so_state |= SS_NBIO;
595 } else {
596 so->so_state &= ~SS_NBIO;
597 }
598
599 if (fp->f_flag & FASYNC) {
600 so->so_state |= SS_ASYNC;
601 so->so_rcv.sb_flags |= SB_ASYNC;
602 so->so_snd.sb_flags |= SB_ASYNC;
603 } else {
604 so->so_state &= ~SS_ASYNC;
605 so->so_rcv.sb_flags &= ~SB_ASYNC;
606 so->so_snd.sb_flags &= ~SB_ASYNC;
607 }
608
609 (void) soacceptlock(so, &sa, 0);
610 socket_unlock(head, 1);
611 if (sa == NULL) {
612 namelen = 0;
613 if (uap->name) {
614 goto gotnoname;
615 }
616 error = 0;
617 goto releasefd;
618 }
619 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
620
621 if (uap->name) {
622 socklen_t sa_len;
623
624 /* save sa_len before it is destroyed */
625 sa_len = sa->sa_len;
626 namelen = MIN(namelen, sa_len);
627 error = copyout(sa, uap->name, namelen);
628 if (!error) {
629 /* return the actual, untruncated address length */
630 namelen = sa_len;
631 }
632 gotnoname:
633 error = copyout((caddr_t)&namelen, uap->anamelen,
634 sizeof(socklen_t));
635 }
636 FREE(sa, M_SONAME);
637
638 releasefd:
639 /*
640 * If the socket has been marked as inactive by sosetdefunct(),
641 * disallow further operations on it.
642 */
643 if (so->so_flags & SOF_DEFUNCT) {
644 sodefunct(current_proc(), so,
645 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
646 }
647
648 if (dosocklock) {
649 socket_unlock(so, 1);
650 }
651
652 proc_fdlock(p);
653 procfdtbl_releasefd(p, newfd, NULL);
654 fp_drop(p, newfd, fp, 1);
655 proc_fdunlock(p);
656
657 out:
658 file_drop(fd);
659
660 if (error == 0 && ENTR_SHOULDTRACE) {
661 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
662 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
663 }
664 return error;
665 }
666
667 int
668 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
669 {
670 __pthread_testcancel(1);
671 return accept_nocancel(p, (struct accept_nocancel_args *)uap,
672 retval);
673 }
674
675 /*
676 * Returns: 0 Success
677 * EBADF Bad file descriptor
678 * EALREADY Connection already in progress
679 * EINPROGRESS Operation in progress
680 * ECONNABORTED Connection aborted
681 * EINTR Interrupted function
682 * EACCES Mandatory Access Control failure
683 * file_socket:ENOTSOCK
684 * file_socket:EBADF
685 * getsockaddr:ENAMETOOLONG Filename too long
686 * getsockaddr:EINVAL Invalid argument
687 * getsockaddr:ENOMEM Not enough space
688 * getsockaddr:EFAULT Bad address
689 * soconnectlock:EOPNOTSUPP
690 * soconnectlock:EISCONN
691 * soconnectlock:??? [depends on protocol, filters]
692 * msleep:EINTR
693 *
694 * Imputed: so_error error may be set from so_error, which
695 * may have been set by soconnectlock.
696 */
697 /* ARGSUSED */
698 int
699 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
700 {
701 __pthread_testcancel(1);
702 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
703 retval);
704 }
705
706 int
707 connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
708 {
709 #pragma unused(p, retval)
710 struct socket *so;
711 struct sockaddr_storage ss;
712 struct sockaddr *sa = NULL;
713 int error;
714 int fd = uap->s;
715 boolean_t dgram;
716
717 AUDIT_ARG(fd, uap->s);
718 error = file_socket(fd, &so);
719 if (error != 0) {
720 return error;
721 }
722 if (so == NULL) {
723 error = EBADF;
724 goto out;
725 }
726
727 /*
728 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
729 * if this is a datagram socket; translate for other types.
730 */
731 dgram = (so->so_type == SOCK_DGRAM);
732
733 /* Get socket address now before we obtain socket lock */
734 if (uap->namelen > sizeof(ss)) {
735 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
736 } else {
737 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
738 if (error == 0) {
739 sa = (struct sockaddr *)&ss;
740 }
741 }
742 if (error != 0) {
743 goto out;
744 }
745
746 error = connectit(so, sa);
747
748 if (sa != NULL && sa != SA(&ss)) {
749 FREE(sa, M_SONAME);
750 }
751 if (error == ERESTART) {
752 error = EINTR;
753 }
754 out:
755 file_drop(fd);
756 return error;
757 }
758
759 static int
760 connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
761 {
762 #pragma unused(p, retval)
763 struct sockaddr_storage ss, sd;
764 struct sockaddr *src = NULL, *dst = NULL;
765 struct socket *so;
766 int error, error1, fd = uap->socket;
767 boolean_t dgram;
768 sae_connid_t cid = SAE_CONNID_ANY;
769 struct user32_sa_endpoints ep32;
770 struct user64_sa_endpoints ep64;
771 struct user_sa_endpoints ep;
772 user_ssize_t bytes_written = 0;
773 struct user_iovec *iovp;
774 uio_t auio = NULL;
775
776 AUDIT_ARG(fd, uap->socket);
777 error = file_socket(fd, &so);
778 if (error != 0) {
779 return error;
780 }
781 if (so == NULL) {
782 error = EBADF;
783 goto out;
784 }
785
786 if (uap->endpoints == USER_ADDR_NULL) {
787 error = EINVAL;
788 goto out;
789 }
790
791 if (IS_64BIT_PROCESS(p)) {
792 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
793 if (error != 0) {
794 goto out;
795 }
796
797 ep.sae_srcif = ep64.sae_srcif;
798 ep.sae_srcaddr = ep64.sae_srcaddr;
799 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
800 ep.sae_dstaddr = ep64.sae_dstaddr;
801 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
802 } else {
803 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
804 if (error != 0) {
805 goto out;
806 }
807
808 ep.sae_srcif = ep32.sae_srcif;
809 ep.sae_srcaddr = ep32.sae_srcaddr;
810 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
811 ep.sae_dstaddr = ep32.sae_dstaddr;
812 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
813 }
814
815 /*
816 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
817 * if this is a datagram socket; translate for other types.
818 */
819 dgram = (so->so_type == SOCK_DGRAM);
820
821 /* Get socket address now before we obtain socket lock */
822 if (ep.sae_srcaddr != USER_ADDR_NULL) {
823 if (ep.sae_srcaddrlen > sizeof(ss)) {
824 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
825 } else {
826 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
827 if (error == 0) {
828 src = (struct sockaddr *)&ss;
829 }
830 }
831
832 if (error) {
833 goto out;
834 }
835 }
836
837 if (ep.sae_dstaddr == USER_ADDR_NULL) {
838 error = EINVAL;
839 goto out;
840 }
841
842 /* Get socket address now before we obtain socket lock */
843 if (ep.sae_dstaddrlen > sizeof(sd)) {
844 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
845 } else {
846 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
847 if (error == 0) {
848 dst = (struct sockaddr *)&sd;
849 }
850 }
851
852 if (error) {
853 goto out;
854 }
855
856 VERIFY(dst != NULL);
857
858 if (uap->iov != USER_ADDR_NULL) {
859 /* Verify range before calling uio_create() */
860 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
861 error = EINVAL;
862 goto out;
863 }
864
865 if (uap->len == USER_ADDR_NULL) {
866 error = EINVAL;
867 goto out;
868 }
869
870 /* allocate a uio to hold the number of iovecs passed */
871 auio = uio_create(uap->iovcnt, 0,
872 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
873 UIO_WRITE);
874
875 if (auio == NULL) {
876 error = ENOMEM;
877 goto out;
878 }
879
880 /*
881 * get location of iovecs within the uio.
882 * then copyin the iovecs from user space.
883 */
884 iovp = uio_iovsaddr(auio);
885 if (iovp == NULL) {
886 error = ENOMEM;
887 goto out;
888 }
889 error = copyin_user_iovec_array(uap->iov,
890 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
891 uap->iovcnt, iovp);
892 if (error != 0) {
893 goto out;
894 }
895
896 /* finish setup of uio_t */
897 error = uio_calculateresid(auio);
898 if (error != 0) {
899 goto out;
900 }
901 }
902
903 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
904 &cid, auio, uap->flags, &bytes_written);
905 if (error == ERESTART) {
906 error = EINTR;
907 }
908
909 if (uap->len != USER_ADDR_NULL) {
910 error1 = copyout(&bytes_written, uap->len, sizeof(uap->len));
911 /* give precedence to connectitx errors */
912 if ((error1 != 0) && (error == 0)) {
913 error = error1;
914 }
915 }
916
917 if (uap->connid != USER_ADDR_NULL) {
918 error1 = copyout(&cid, uap->connid, sizeof(cid));
919 /* give precedence to connectitx errors */
920 if ((error1 != 0) && (error == 0)) {
921 error = error1;
922 }
923 }
924 out:
925 file_drop(fd);
926 if (auio != NULL) {
927 uio_free(auio);
928 }
929 if (src != NULL && src != SA(&ss)) {
930 FREE(src, M_SONAME);
931 }
932 if (dst != NULL && dst != SA(&sd)) {
933 FREE(dst, M_SONAME);
934 }
935 return error;
936 }
937
938 int
939 connectx(struct proc *p, struct connectx_args *uap, int *retval)
940 {
941 /*
942 * Due to similiarity with a POSIX interface, define as
943 * an unofficial cancellation point.
944 */
945 __pthread_testcancel(1);
946 return connectx_nocancel(p, uap, retval);
947 }
948
949 static int
950 connectit(struct socket *so, struct sockaddr *sa)
951 {
952 int error;
953
954 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
955 #if CONFIG_MACF_SOCKET_SUBSET
956 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
957 return error;
958 }
959 #endif /* MAC_SOCKET_SUBSET */
960
961 socket_lock(so, 1);
962 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
963 error = EALREADY;
964 goto out;
965 }
966 error = soconnectlock(so, sa, 0);
967 if (error != 0) {
968 goto out;
969 }
970 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
971 error = EINPROGRESS;
972 goto out;
973 }
974 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
975 lck_mtx_t *mutex_held;
976
977 if (so->so_proto->pr_getlock != NULL) {
978 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
979 } else {
980 mutex_held = so->so_proto->pr_domain->dom_mtx;
981 }
982 error = msleep((caddr_t)&so->so_timeo, mutex_held,
983 PSOCK | PCATCH, __func__, 0);
984 if (so->so_state & SS_DRAINING) {
985 error = ECONNABORTED;
986 }
987 if (error != 0) {
988 break;
989 }
990 }
991 if (error == 0) {
992 error = so->so_error;
993 so->so_error = 0;
994 }
995 out:
996 socket_unlock(so, 1);
997 return error;
998 }
999
1000 static int
1001 connectitx(struct socket *so, struct sockaddr *src,
1002 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
1003 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1004 user_ssize_t *bytes_written)
1005 {
1006 int error;
1007
1008 VERIFY(dst != NULL);
1009
1010 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
1011 #if CONFIG_MACF_SOCKET_SUBSET
1012 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1013 return error;
1014 }
1015
1016 if (auio != NULL) {
1017 if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1018 return error;
1019 }
1020 }
1021 #endif /* MAC_SOCKET_SUBSET */
1022
1023 socket_lock(so, 1);
1024 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1025 error = EALREADY;
1026 goto out;
1027 }
1028
1029 error = soconnectxlocked(so, src, dst, p, ifscope,
1030 aid, pcid, flags, NULL, 0, auio, bytes_written);
1031 if (error != 0) {
1032 goto out;
1033 }
1034 /*
1035 * If, after the call to soconnectxlocked the flag is still set (in case
1036 * data has been queued and the connect() has actually been triggered,
1037 * it will have been unset by the transport), we exit immediately. There
1038 * is no reason to wait on any event.
1039 */
1040 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1041 error = 0;
1042 goto out;
1043 }
1044 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1045 error = EINPROGRESS;
1046 goto out;
1047 }
1048 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1049 lck_mtx_t *mutex_held;
1050
1051 if (so->so_proto->pr_getlock != NULL) {
1052 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1053 } else {
1054 mutex_held = so->so_proto->pr_domain->dom_mtx;
1055 }
1056 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1057 PSOCK | PCATCH, __func__, 0);
1058 if (so->so_state & SS_DRAINING) {
1059 error = ECONNABORTED;
1060 }
1061 if (error != 0) {
1062 break;
1063 }
1064 }
1065 if (error == 0) {
1066 error = so->so_error;
1067 so->so_error = 0;
1068 }
1069 out:
1070 socket_unlock(so, 1);
1071 return error;
1072 }
1073
1074 int
1075 peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1076 {
1077 #pragma unused(p, uap, retval)
1078 /*
1079 * Due to similiarity with a POSIX interface, define as
1080 * an unofficial cancellation point.
1081 */
1082 __pthread_testcancel(1);
1083 return 0;
1084 }
1085
1086 int
1087 disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1088 {
1089 /*
1090 * Due to similiarity with a POSIX interface, define as
1091 * an unofficial cancellation point.
1092 */
1093 __pthread_testcancel(1);
1094 return disconnectx_nocancel(p, uap, retval);
1095 }
1096
1097 static int
1098 disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1099 {
1100 #pragma unused(p, retval)
1101 struct socket *so;
1102 int fd = uap->s;
1103 int error;
1104
1105 error = file_socket(fd, &so);
1106 if (error != 0) {
1107 return error;
1108 }
1109 if (so == NULL) {
1110 error = EBADF;
1111 goto out;
1112 }
1113
1114 error = sodisconnectx(so, uap->aid, uap->cid);
1115 out:
1116 file_drop(fd);
1117 return error;
1118 }
1119
1120 /*
1121 * Returns: 0 Success
1122 * socreate:EAFNOSUPPORT
1123 * socreate:EPROTOTYPE
1124 * socreate:EPROTONOSUPPORT
1125 * socreate:ENOBUFS
1126 * socreate:ENOMEM
1127 * socreate:EISCONN
1128 * socreate:??? [other protocol families, IPSEC]
1129 * falloc:ENFILE
1130 * falloc:EMFILE
1131 * falloc:ENOMEM
1132 * copyout:EFAULT
1133 * soconnect2:EINVAL
1134 * soconnect2:EPROTOTYPE
1135 * soconnect2:??? [other protocol families[
1136 */
1137 int
1138 socketpair(struct proc *p, struct socketpair_args *uap,
1139 __unused int32_t *retval)
1140 {
1141 struct fileproc *fp1, *fp2;
1142 struct socket *so1, *so2;
1143 int fd, error, sv[2];
1144
1145 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1146 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1147 if (error) {
1148 return error;
1149 }
1150 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1151 if (error) {
1152 goto free1;
1153 }
1154
1155 error = falloc(p, &fp1, &fd, vfs_context_current());
1156 if (error) {
1157 goto free2;
1158 }
1159 fp1->f_flag = FREAD | FWRITE;
1160 fp1->f_ops = &socketops;
1161 fp1->f_data = (caddr_t)so1;
1162 sv[0] = fd;
1163
1164 error = falloc(p, &fp2, &fd, vfs_context_current());
1165 if (error) {
1166 goto free3;
1167 }
1168 fp2->f_flag = FREAD | FWRITE;
1169 fp2->f_ops = &socketops;
1170 fp2->f_data = (caddr_t)so2;
1171 sv[1] = fd;
1172
1173 error = soconnect2(so1, so2);
1174 if (error) {
1175 goto free4;
1176 }
1177 if (uap->type == SOCK_DGRAM) {
1178 /*
1179 * Datagram socket connection is asymmetric.
1180 */
1181 error = soconnect2(so2, so1);
1182 if (error) {
1183 goto free4;
1184 }
1185 }
1186
1187 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
1188 goto free4;
1189 }
1190
1191 proc_fdlock(p);
1192 procfdtbl_releasefd(p, sv[0], NULL);
1193 procfdtbl_releasefd(p, sv[1], NULL);
1194 fp_drop(p, sv[0], fp1, 1);
1195 fp_drop(p, sv[1], fp2, 1);
1196 proc_fdunlock(p);
1197
1198 return 0;
1199 free4:
1200 fp_free(p, sv[1], fp2);
1201 free3:
1202 fp_free(p, sv[0], fp1);
1203 free2:
1204 (void) soclose(so2);
1205 free1:
1206 (void) soclose(so1);
1207 return error;
1208 }
1209
1210 /*
1211 * Returns: 0 Success
1212 * EINVAL
1213 * ENOBUFS
1214 * EBADF
1215 * EPIPE
1216 * EACCES Mandatory Access Control failure
1217 * file_socket:ENOTSOCK
1218 * file_socket:EBADF
1219 * getsockaddr:ENAMETOOLONG Filename too long
1220 * getsockaddr:EINVAL Invalid argument
1221 * getsockaddr:ENOMEM Not enough space
1222 * getsockaddr:EFAULT Bad address
1223 * <pru_sosend>:EACCES[TCP]
1224 * <pru_sosend>:EADDRINUSE[TCP]
1225 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1226 * <pru_sosend>:EAFNOSUPPORT[TCP]
1227 * <pru_sosend>:EAGAIN[TCP]
1228 * <pru_sosend>:EBADF
1229 * <pru_sosend>:ECONNRESET[TCP]
1230 * <pru_sosend>:EFAULT
1231 * <pru_sosend>:EHOSTUNREACH[TCP]
1232 * <pru_sosend>:EINTR
1233 * <pru_sosend>:EINVAL
1234 * <pru_sosend>:EISCONN[AF_INET]
1235 * <pru_sosend>:EMSGSIZE[TCP]
1236 * <pru_sosend>:ENETDOWN[TCP]
1237 * <pru_sosend>:ENETUNREACH[TCP]
1238 * <pru_sosend>:ENOBUFS
1239 * <pru_sosend>:ENOMEM[TCP]
1240 * <pru_sosend>:ENOTCONN[AF_INET]
1241 * <pru_sosend>:EOPNOTSUPP
1242 * <pru_sosend>:EPERM[TCP]
1243 * <pru_sosend>:EPIPE
1244 * <pru_sosend>:EWOULDBLOCK
1245 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1246 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1247 * <pru_sosend>:??? [value from so_error]
1248 * sockargs:???
1249 */
1250 static int
1251 sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
1252 int flags, int32_t *retval)
1253 {
1254 struct mbuf *control = NULL;
1255 struct sockaddr_storage ss;
1256 struct sockaddr *to = NULL;
1257 boolean_t want_free = TRUE;
1258 int error;
1259 user_ssize_t len;
1260
1261 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1262
1263 if (mp->msg_name != USER_ADDR_NULL) {
1264 if (mp->msg_namelen > sizeof(ss)) {
1265 error = getsockaddr(so, &to, mp->msg_name,
1266 mp->msg_namelen, TRUE);
1267 } else {
1268 error = getsockaddr_s(so, &ss, mp->msg_name,
1269 mp->msg_namelen, TRUE);
1270 if (error == 0) {
1271 to = (struct sockaddr *)&ss;
1272 want_free = FALSE;
1273 }
1274 }
1275 if (error != 0) {
1276 goto out;
1277 }
1278 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1279 }
1280 if (mp->msg_control != USER_ADDR_NULL) {
1281 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1282 error = EINVAL;
1283 goto bad;
1284 }
1285 error = sockargs(&control, mp->msg_control,
1286 mp->msg_controllen, MT_CONTROL);
1287 if (error != 0) {
1288 goto bad;
1289 }
1290 }
1291
1292 #if CONFIG_MACF_SOCKET_SUBSET
1293 /*
1294 * We check the state without holding the socket lock;
1295 * if a race condition occurs, it would simply result
1296 * in an extra call to the MAC check function.
1297 */
1298 if (to != NULL &&
1299 !(so->so_state & SS_DEFUNCT) &&
1300 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
1301 goto bad;
1302 }
1303 #endif /* MAC_SOCKET_SUBSET */
1304
1305 len = uio_resid(uiop);
1306 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1307 control, flags);
1308 if (error != 0) {
1309 if (uio_resid(uiop) != len && (error == ERESTART ||
1310 error == EINTR || error == EWOULDBLOCK)) {
1311 error = 0;
1312 }
1313 /* Generation of SIGPIPE can be controlled per socket */
1314 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) {
1315 psignal(p, SIGPIPE);
1316 }
1317 }
1318 if (error == 0) {
1319 *retval = (int)(len - uio_resid(uiop));
1320 }
1321 bad:
1322 if (to != NULL && want_free) {
1323 FREE(to, M_SONAME);
1324 }
1325 out:
1326 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1327
1328 return error;
1329 }
1330
1331 /*
1332 * Returns: 0 Success
1333 * ENOMEM
1334 * sendit:??? [see sendit definition in this file]
1335 * write:??? [4056224: applicable for pipes]
1336 */
1337 int
1338 sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
1339 {
1340 __pthread_testcancel(1);
1341 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
1342 }
1343
1344 int
1345 sendto_nocancel(struct proc *p,
1346 struct sendto_nocancel_args *uap,
1347 int32_t *retval)
1348 {
1349 struct user_msghdr msg;
1350 int error;
1351 uio_t auio = NULL;
1352 struct socket *so;
1353
1354 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1355 AUDIT_ARG(fd, uap->s);
1356
1357 if (uap->flags & MSG_SKIPCFIL) {
1358 error = EPERM;
1359 goto done;
1360 }
1361
1362 auio = uio_create(1, 0,
1363 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1364 UIO_WRITE);
1365 if (auio == NULL) {
1366 error = ENOMEM;
1367 goto done;
1368 }
1369 uio_addiov(auio, uap->buf, uap->len);
1370
1371 msg.msg_name = uap->to;
1372 msg.msg_namelen = uap->tolen;
1373 /* no need to set up msg_iov. sendit uses uio_t we send it */
1374 msg.msg_iov = 0;
1375 msg.msg_iovlen = 0;
1376 msg.msg_control = 0;
1377 msg.msg_flags = 0;
1378
1379 error = file_socket(uap->s, &so);
1380 if (error) {
1381 goto done;
1382 }
1383
1384 if (so == NULL) {
1385 error = EBADF;
1386 } else {
1387 error = sendit(p, so, &msg, auio, uap->flags, retval);
1388 }
1389
1390 file_drop(uap->s);
1391 done:
1392 if (auio != NULL) {
1393 uio_free(auio);
1394 }
1395
1396 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1397
1398 return error;
1399 }
1400
1401 /*
1402 * Returns: 0 Success
1403 * ENOBUFS
1404 * copyin:EFAULT
1405 * sendit:??? [see sendit definition in this file]
1406 */
1407 int
1408 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1409 {
1410 __pthread_testcancel(1);
1411 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1412 retval);
1413 }
1414
1415 int
1416 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1417 int32_t *retval)
1418 {
1419 struct user32_msghdr msg32;
1420 struct user64_msghdr msg64;
1421 struct user_msghdr user_msg;
1422 caddr_t msghdrp;
1423 int size_of_msghdr;
1424 int error;
1425 uio_t auio = NULL;
1426 struct user_iovec *iovp;
1427 struct socket *so;
1428
1429 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1430 AUDIT_ARG(fd, uap->s);
1431
1432 if (uap->flags & MSG_SKIPCFIL) {
1433 error = EPERM;
1434 goto done;
1435 }
1436
1437 if (IS_64BIT_PROCESS(p)) {
1438 msghdrp = (caddr_t)&msg64;
1439 size_of_msghdr = sizeof(msg64);
1440 } else {
1441 msghdrp = (caddr_t)&msg32;
1442 size_of_msghdr = sizeof(msg32);
1443 }
1444 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1445 if (error) {
1446 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1447 return error;
1448 }
1449
1450 if (IS_64BIT_PROCESS(p)) {
1451 user_msg.msg_flags = msg64.msg_flags;
1452 user_msg.msg_controllen = msg64.msg_controllen;
1453 user_msg.msg_control = msg64.msg_control;
1454 user_msg.msg_iovlen = msg64.msg_iovlen;
1455 user_msg.msg_iov = msg64.msg_iov;
1456 user_msg.msg_namelen = msg64.msg_namelen;
1457 user_msg.msg_name = msg64.msg_name;
1458 } else {
1459 user_msg.msg_flags = msg32.msg_flags;
1460 user_msg.msg_controllen = msg32.msg_controllen;
1461 user_msg.msg_control = msg32.msg_control;
1462 user_msg.msg_iovlen = msg32.msg_iovlen;
1463 user_msg.msg_iov = msg32.msg_iov;
1464 user_msg.msg_namelen = msg32.msg_namelen;
1465 user_msg.msg_name = msg32.msg_name;
1466 }
1467
1468 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1469 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1470 0, 0, 0, 0);
1471 return EMSGSIZE;
1472 }
1473
1474 /* allocate a uio large enough to hold the number of iovecs passed */
1475 auio = uio_create(user_msg.msg_iovlen, 0,
1476 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1477 UIO_WRITE);
1478 if (auio == NULL) {
1479 error = ENOBUFS;
1480 goto done;
1481 }
1482
1483 if (user_msg.msg_iovlen) {
1484 /*
1485 * get location of iovecs within the uio.
1486 * then copyin the iovecs from user space.
1487 */
1488 iovp = uio_iovsaddr(auio);
1489 if (iovp == NULL) {
1490 error = ENOBUFS;
1491 goto done;
1492 }
1493 error = copyin_user_iovec_array(user_msg.msg_iov,
1494 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1495 user_msg.msg_iovlen, iovp);
1496 if (error) {
1497 goto done;
1498 }
1499 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1500
1501 /* finish setup of uio_t */
1502 error = uio_calculateresid(auio);
1503 if (error) {
1504 goto done;
1505 }
1506 } else {
1507 user_msg.msg_iov = 0;
1508 }
1509
1510 /* msg_flags is ignored for send */
1511 user_msg.msg_flags = 0;
1512
1513 error = file_socket(uap->s, &so);
1514 if (error) {
1515 goto done;
1516 }
1517 if (so == NULL) {
1518 error = EBADF;
1519 } else {
1520 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1521 }
1522 file_drop(uap->s);
1523 done:
1524 if (auio != NULL) {
1525 uio_free(auio);
1526 }
1527 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1528
1529 return error;
1530 }
1531
1532 int
1533 sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1534 {
1535 int error = 0;
1536 struct user_msghdr_x *user_msg_x = NULL;
1537 struct uio **uiop = NULL;
1538 struct socket *so;
1539 u_int i;
1540 struct sockaddr *to = NULL;
1541 user_ssize_t len_before = 0, len_after;
1542 int need_drop = 0;
1543 size_t size_of_msghdr;
1544 void *umsgp = NULL;
1545 u_int uiocnt;
1546 int has_addr_or_ctl = 0;
1547
1548 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1549
1550 if (uap->flags & MSG_SKIPCFIL) {
1551 error = EPERM;
1552 goto out;
1553 }
1554
1555 error = file_socket(uap->s, &so);
1556 if (error) {
1557 goto out;
1558 }
1559 need_drop = 1;
1560 if (so == NULL) {
1561 error = EBADF;
1562 goto out;
1563 }
1564
1565 /*
1566 * Input parameter range check
1567 */
1568 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1569 error = EINVAL;
1570 goto out;
1571 }
1572 /*
1573 * Clip to max currently allowed
1574 */
1575 if (uap->cnt > somaxsendmsgx) {
1576 uap->cnt = somaxsendmsgx;
1577 }
1578
1579 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
1580 M_TEMP, M_WAITOK | M_ZERO);
1581 if (user_msg_x == NULL) {
1582 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
1583 error = ENOMEM;
1584 goto out;
1585 }
1586 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1587 M_TEMP, M_WAITOK | M_ZERO);
1588 if (uiop == NULL) {
1589 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
1590 error = ENOMEM;
1591 goto out;
1592 }
1593
1594 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1595 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1596
1597 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
1598 M_TEMP, M_WAITOK | M_ZERO);
1599 if (umsgp == NULL) {
1600 printf("%s _MALLOC() user_msg_x failed\n", __func__);
1601 error = ENOMEM;
1602 goto out;
1603 }
1604 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1605 if (error) {
1606 DBG_PRINTF("%s copyin() failed\n", __func__);
1607 goto out;
1608 }
1609 error = internalize_user_msghdr_array(umsgp,
1610 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1611 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1612 if (error) {
1613 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1614 goto out;
1615 }
1616 /*
1617 * Make sure the size of each message iovec and
1618 * the aggregate size of all the iovec is valid
1619 */
1620 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1621 error = EINVAL;
1622 goto out;
1623 }
1624
1625 /*
1626 * Sanity check on passed arguments
1627 */
1628 for (i = 0; i < uap->cnt; i++) {
1629 struct user_msghdr_x *mp = user_msg_x + i;
1630
1631 /*
1632 * No flags on send message
1633 */
1634 if (mp->msg_flags != 0) {
1635 error = EINVAL;
1636 goto out;
1637 }
1638 /*
1639 * No support for address or ancillary data (yet)
1640 */
1641 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
1642 has_addr_or_ctl = 1;
1643 }
1644
1645 if (mp->msg_control != USER_ADDR_NULL ||
1646 mp->msg_controllen != 0) {
1647 has_addr_or_ctl = 1;
1648 }
1649
1650 #if CONFIG_MACF_SOCKET_SUBSET
1651 /*
1652 * We check the state without holding the socket lock;
1653 * if a race condition occurs, it would simply result
1654 * in an extra call to the MAC check function.
1655 *
1656 * Note: The following check is never true taken with the
1657 * current limitation that we do not accept to pass an address,
1658 * this is effectively placeholder code. If we add support for
1659 * addresses, we will have to check every address.
1660 */
1661 if (to != NULL &&
1662 !(so->so_state & SS_DEFUNCT) &&
1663 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1664 != 0) {
1665 goto out;
1666 }
1667 #endif /* MAC_SOCKET_SUBSET */
1668 }
1669
1670 len_before = uio_array_resid(uiop, uap->cnt);
1671
1672 /*
1673 * Feed list of packets at once only for connected socket without
1674 * control message
1675 */
1676 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1677 pru_sosend_list_notsupp &&
1678 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1679 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1680 uap->cnt, uap->flags);
1681 } else {
1682 for (i = 0; i < uap->cnt; i++) {
1683 struct user_msghdr_x *mp = user_msg_x + i;
1684 struct user_msghdr user_msg;
1685 uio_t auio = uiop[i];
1686 int32_t tmpval;
1687
1688 user_msg.msg_flags = mp->msg_flags;
1689 user_msg.msg_controllen = mp->msg_controllen;
1690 user_msg.msg_control = mp->msg_control;
1691 user_msg.msg_iovlen = mp->msg_iovlen;
1692 user_msg.msg_iov = mp->msg_iov;
1693 user_msg.msg_namelen = mp->msg_namelen;
1694 user_msg.msg_name = mp->msg_name;
1695
1696 error = sendit(p, so, &user_msg, auio, uap->flags,
1697 &tmpval);
1698 if (error != 0) {
1699 break;
1700 }
1701 }
1702 }
1703 len_after = uio_array_resid(uiop, uap->cnt);
1704
1705 VERIFY(len_after <= len_before);
1706
1707 if (error != 0) {
1708 if (len_after != len_before && (error == ERESTART ||
1709 error == EINTR || error == EWOULDBLOCK ||
1710 error == ENOBUFS)) {
1711 error = 0;
1712 }
1713 /* Generation of SIGPIPE can be controlled per socket */
1714 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) {
1715 psignal(p, SIGPIPE);
1716 }
1717 }
1718 if (error == 0) {
1719 uiocnt = externalize_user_msghdr_array(umsgp,
1720 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1721 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1722
1723 *retval = (int)(uiocnt);
1724 }
1725 out:
1726 if (need_drop) {
1727 file_drop(uap->s);
1728 }
1729 if (umsgp != NULL) {
1730 _FREE(umsgp, M_TEMP);
1731 }
1732 if (uiop != NULL) {
1733 free_uio_array(uiop, uap->cnt);
1734 _FREE(uiop, M_TEMP);
1735 }
1736 if (user_msg_x != NULL) {
1737 _FREE(user_msg_x, M_TEMP);
1738 }
1739
1740 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1741
1742 return error;
1743 }
1744
1745
1746 static int
1747 copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1748 {
1749 int error = 0;
1750 socklen_t sa_len = 0;
1751 ssize_t len;
1752
1753 len = *namelen;
1754 if (len <= 0 || fromsa == 0) {
1755 len = 0;
1756 } else {
1757 #ifndef MIN
1758 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1759 #endif
1760 sa_len = fromsa->sa_len;
1761 len = MIN((unsigned int)len, sa_len);
1762 error = copyout(fromsa, name, (unsigned)len);
1763 if (error) {
1764 goto out;
1765 }
1766 }
1767 *namelen = sa_len;
1768 out:
1769 return 0;
1770 }
1771
1772 static int
1773 copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1774 socklen_t *controllen, int *flags)
1775 {
1776 int error = 0;
1777 ssize_t len;
1778 user_addr_t ctlbuf;
1779
1780 len = *controllen;
1781 *controllen = 0;
1782 ctlbuf = control;
1783
1784 while (m && len > 0) {
1785 unsigned int tocopy;
1786 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1787 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1788 int buflen = m->m_len;
1789
1790 while (buflen > 0 && len > 0) {
1791 /*
1792 * SCM_TIMESTAMP hack because struct timeval has a
1793 * different size for 32 bits and 64 bits processes
1794 */
1795 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1796 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
1797 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1798 int tmp_space;
1799 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1800
1801 tmp_cp->cmsg_level = SOL_SOCKET;
1802 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1803
1804 if (proc_is64bit(p)) {
1805 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1806
1807 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1808 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
1809
1810 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1811 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1812 } else {
1813 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1814
1815 tv32->tv_sec = tv->tv_sec;
1816 tv32->tv_usec = tv->tv_usec;
1817
1818 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1819 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1820 }
1821 if (len >= tmp_space) {
1822 tocopy = tmp_space;
1823 } else {
1824 *flags |= MSG_CTRUNC;
1825 tocopy = len;
1826 }
1827 error = copyout(tmp_buffer, ctlbuf, tocopy);
1828 if (error) {
1829 goto out;
1830 }
1831 } else {
1832 if (cp_size > buflen) {
1833 panic("cp_size > buflen, something"
1834 "wrong with alignment!");
1835 }
1836 if (len >= cp_size) {
1837 tocopy = cp_size;
1838 } else {
1839 *flags |= MSG_CTRUNC;
1840 tocopy = len;
1841 }
1842 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1843 if (error) {
1844 goto out;
1845 }
1846 }
1847
1848 ctlbuf += tocopy;
1849 len -= tocopy;
1850
1851 buflen -= cp_size;
1852 cp = (struct cmsghdr *)(void *)
1853 ((unsigned char *) cp + cp_size);
1854 cp_size = CMSG_ALIGN(cp->cmsg_len);
1855 }
1856
1857 m = m->m_next;
1858 }
1859 *controllen = ctlbuf - control;
1860 out:
1861 return error;
1862 }
1863
1864 /*
1865 * Returns: 0 Success
1866 * ENOTSOCK
1867 * EINVAL
1868 * EBADF
1869 * EACCES Mandatory Access Control failure
1870 * copyout:EFAULT
1871 * fp_lookup:EBADF
1872 * <pru_soreceive>:ENOBUFS
1873 * <pru_soreceive>:ENOTCONN
1874 * <pru_soreceive>:EWOULDBLOCK
1875 * <pru_soreceive>:EFAULT
1876 * <pru_soreceive>:EINTR
1877 * <pru_soreceive>:EBADF
1878 * <pru_soreceive>:EINVAL
1879 * <pru_soreceive>:EMSGSIZE
1880 * <pru_soreceive>:???
1881 *
1882 * Notes: Additional return values from calls through <pru_soreceive>
1883 * depend on protocols other than TCP or AF_UNIX, which are
1884 * documented above.
1885 */
1886 static int
1887 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1888 user_addr_t namelenp, int32_t *retval)
1889 {
1890 ssize_t len;
1891 int error;
1892 struct mbuf *control = 0;
1893 struct socket *so;
1894 struct sockaddr *fromsa = 0;
1895 struct fileproc *fp;
1896
1897 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1898 proc_fdlock(p);
1899 if ((error = fp_lookup(p, s, &fp, 1))) {
1900 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1901 proc_fdunlock(p);
1902 return error;
1903 }
1904 if (fp->f_type != DTYPE_SOCKET) {
1905 fp_drop(p, s, fp, 1);
1906 proc_fdunlock(p);
1907 return ENOTSOCK;
1908 }
1909
1910 so = (struct socket *)fp->f_data;
1911 if (so == NULL) {
1912 fp_drop(p, s, fp, 1);
1913 proc_fdunlock(p);
1914 return EBADF;
1915 }
1916
1917 proc_fdunlock(p);
1918
1919 #if CONFIG_MACF_SOCKET_SUBSET
1920 /*
1921 * We check the state without holding the socket lock;
1922 * if a race condition occurs, it would simply result
1923 * in an extra call to the MAC check function.
1924 */
1925 if (!(so->so_state & SS_DEFUNCT) &&
1926 !(so->so_state & SS_ISCONNECTED) &&
1927 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1928 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
1929 goto out1;
1930 }
1931 #endif /* MAC_SOCKET_SUBSET */
1932 if (uio_resid(uiop) < 0) {
1933 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1934 error = EINVAL;
1935 goto out1;
1936 }
1937
1938 len = uio_resid(uiop);
1939 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1940 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1941 &mp->msg_flags);
1942 if (fromsa) {
1943 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1944 fromsa);
1945 }
1946 if (error) {
1947 if (uio_resid(uiop) != len && (error == ERESTART ||
1948 error == EINTR || error == EWOULDBLOCK)) {
1949 error = 0;
1950 }
1951 }
1952 if (error) {
1953 goto out;
1954 }
1955
1956 *retval = len - uio_resid(uiop);
1957
1958 if (mp->msg_name) {
1959 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1960 if (error) {
1961 goto out;
1962 }
1963 /* return the actual, untruncated address length */
1964 if (namelenp &&
1965 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
1966 sizeof(int)))) {
1967 goto out;
1968 }
1969 }
1970
1971 if (mp->msg_control) {
1972 error = copyout_control(p, control, mp->msg_control,
1973 &mp->msg_controllen, &mp->msg_flags);
1974 }
1975 out:
1976 if (fromsa) {
1977 FREE(fromsa, M_SONAME);
1978 }
1979 if (control) {
1980 m_freem(control);
1981 }
1982 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1983 out1:
1984 fp_drop(p, s, fp, 0);
1985 return error;
1986 }
1987
1988 /*
1989 * Returns: 0 Success
1990 * ENOMEM
1991 * copyin:EFAULT
1992 * recvit:???
1993 * read:??? [4056224: applicable for pipes]
1994 *
1995 * Notes: The read entry point is only called as part of support for
1996 * binary backward compatability; new code should use read
1997 * instead of recv or recvfrom when attempting to read data
1998 * from pipes.
1999 *
2000 * For full documentation of the return codes from recvit, see
2001 * the block header for the recvit function.
2002 */
2003 int
2004 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2005 {
2006 __pthread_testcancel(1);
2007 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2008 retval);
2009 }
2010
2011 int
2012 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2013 int32_t *retval)
2014 {
2015 struct user_msghdr msg;
2016 int error;
2017 uio_t auio = NULL;
2018
2019 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2020 AUDIT_ARG(fd, uap->s);
2021
2022 if (uap->fromlenaddr) {
2023 error = copyin(uap->fromlenaddr,
2024 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2025 if (error) {
2026 return error;
2027 }
2028 } else {
2029 msg.msg_namelen = 0;
2030 }
2031 msg.msg_name = uap->from;
2032 auio = uio_create(1, 0,
2033 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2034 UIO_READ);
2035 if (auio == NULL) {
2036 return ENOMEM;
2037 }
2038
2039 uio_addiov(auio, uap->buf, uap->len);
2040 /* no need to set up msg_iov. recvit uses uio_t we send it */
2041 msg.msg_iov = 0;
2042 msg.msg_iovlen = 0;
2043 msg.msg_control = 0;
2044 msg.msg_controllen = 0;
2045 msg.msg_flags = uap->flags;
2046 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2047 if (auio != NULL) {
2048 uio_free(auio);
2049 }
2050
2051 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2052
2053 return error;
2054 }
2055
2056 /*
2057 * Returns: 0 Success
2058 * EMSGSIZE
2059 * ENOMEM
2060 * copyin:EFAULT
2061 * copyout:EFAULT
2062 * recvit:???
2063 *
2064 * Notes: For full documentation of the return codes from recvit, see
2065 * the block header for the recvit function.
2066 */
2067 int
2068 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
2069 {
2070 __pthread_testcancel(1);
2071 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2072 retval);
2073 }
2074
2075 int
2076 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2077 int32_t *retval)
2078 {
2079 struct user32_msghdr msg32;
2080 struct user64_msghdr msg64;
2081 struct user_msghdr user_msg;
2082 caddr_t msghdrp;
2083 int size_of_msghdr;
2084 user_addr_t uiov;
2085 int error;
2086 uio_t auio = NULL;
2087 struct user_iovec *iovp;
2088
2089 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2090 AUDIT_ARG(fd, uap->s);
2091 if (IS_64BIT_PROCESS(p)) {
2092 msghdrp = (caddr_t)&msg64;
2093 size_of_msghdr = sizeof(msg64);
2094 } else {
2095 msghdrp = (caddr_t)&msg32;
2096 size_of_msghdr = sizeof(msg32);
2097 }
2098 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2099 if (error) {
2100 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2101 return error;
2102 }
2103
2104 /* only need to copy if user process is not 64-bit */
2105 if (IS_64BIT_PROCESS(p)) {
2106 user_msg.msg_flags = msg64.msg_flags;
2107 user_msg.msg_controllen = msg64.msg_controllen;
2108 user_msg.msg_control = msg64.msg_control;
2109 user_msg.msg_iovlen = msg64.msg_iovlen;
2110 user_msg.msg_iov = msg64.msg_iov;
2111 user_msg.msg_namelen = msg64.msg_namelen;
2112 user_msg.msg_name = msg64.msg_name;
2113 } else {
2114 user_msg.msg_flags = msg32.msg_flags;
2115 user_msg.msg_controllen = msg32.msg_controllen;
2116 user_msg.msg_control = msg32.msg_control;
2117 user_msg.msg_iovlen = msg32.msg_iovlen;
2118 user_msg.msg_iov = msg32.msg_iov;
2119 user_msg.msg_namelen = msg32.msg_namelen;
2120 user_msg.msg_name = msg32.msg_name;
2121 }
2122
2123 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2124 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2125 0, 0, 0, 0);
2126 return EMSGSIZE;
2127 }
2128
2129 user_msg.msg_flags = uap->flags;
2130
2131 /* allocate a uio large enough to hold the number of iovecs passed */
2132 auio = uio_create(user_msg.msg_iovlen, 0,
2133 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2134 UIO_READ);
2135 if (auio == NULL) {
2136 error = ENOMEM;
2137 goto done;
2138 }
2139
2140 /*
2141 * get location of iovecs within the uio. then copyin the iovecs from
2142 * user space.
2143 */
2144 iovp = uio_iovsaddr(auio);
2145 if (iovp == NULL) {
2146 error = ENOMEM;
2147 goto done;
2148 }
2149 uiov = user_msg.msg_iov;
2150 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2151 error = copyin_user_iovec_array(uiov,
2152 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2153 user_msg.msg_iovlen, iovp);
2154 if (error) {
2155 goto done;
2156 }
2157
2158 /* finish setup of uio_t */
2159 error = uio_calculateresid(auio);
2160 if (error) {
2161 goto done;
2162 }
2163
2164 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2165 if (!error) {
2166 user_msg.msg_iov = uiov;
2167 if (IS_64BIT_PROCESS(p)) {
2168 msg64.msg_flags = user_msg.msg_flags;
2169 msg64.msg_controllen = user_msg.msg_controllen;
2170 msg64.msg_control = user_msg.msg_control;
2171 msg64.msg_iovlen = user_msg.msg_iovlen;
2172 msg64.msg_iov = user_msg.msg_iov;
2173 msg64.msg_namelen = user_msg.msg_namelen;
2174 msg64.msg_name = user_msg.msg_name;
2175 } else {
2176 msg32.msg_flags = user_msg.msg_flags;
2177 msg32.msg_controllen = user_msg.msg_controllen;
2178 msg32.msg_control = user_msg.msg_control;
2179 msg32.msg_iovlen = user_msg.msg_iovlen;
2180 msg32.msg_iov = user_msg.msg_iov;
2181 msg32.msg_namelen = user_msg.msg_namelen;
2182 msg32.msg_name = user_msg.msg_name;
2183 }
2184 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2185 }
2186 done:
2187 if (auio != NULL) {
2188 uio_free(auio);
2189 }
2190 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2191 return error;
2192 }
2193
2194 int
2195 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2196 {
2197 int error = EOPNOTSUPP;
2198 struct user_msghdr_x *user_msg_x = NULL;
2199 struct recv_msg_elem *recv_msg_array = NULL;
2200 struct socket *so;
2201 user_ssize_t len_before = 0, len_after;
2202 int need_drop = 0;
2203 size_t size_of_msghdr;
2204 void *umsgp = NULL;
2205 u_int i;
2206 u_int uiocnt;
2207
2208 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2209
2210 error = file_socket(uap->s, &so);
2211 if (error) {
2212 goto out;
2213 }
2214 need_drop = 1;
2215 if (so == NULL) {
2216 error = EBADF;
2217 goto out;
2218 }
2219 /*
2220 * Input parameter range check
2221 */
2222 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2223 error = EINVAL;
2224 goto out;
2225 }
2226 if (uap->cnt > somaxrecvmsgx) {
2227 uap->cnt = somaxrecvmsgx;
2228 }
2229
2230 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
2231 M_TEMP, M_WAITOK | M_ZERO);
2232 if (user_msg_x == NULL) {
2233 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
2234 error = ENOMEM;
2235 goto out;
2236 }
2237 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2238 if (recv_msg_array == NULL) {
2239 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2240 error = ENOMEM;
2241 goto out;
2242 }
2243 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2244 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2245
2246 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2247 if (umsgp == NULL) {
2248 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
2249 error = ENOMEM;
2250 goto out;
2251 }
2252 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2253 if (error) {
2254 DBG_PRINTF("%s copyin() failed\n", __func__);
2255 goto out;
2256 }
2257 error = internalize_recv_msghdr_array(umsgp,
2258 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2259 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2260 if (error) {
2261 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2262 goto out;
2263 }
2264 /*
2265 * Make sure the size of each message iovec and
2266 * the aggregate size of all the iovec is valid
2267 */
2268 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2269 error = EINVAL;
2270 goto out;
2271 }
2272 /*
2273 * Sanity check on passed arguments
2274 */
2275 for (i = 0; i < uap->cnt; i++) {
2276 struct user_msghdr_x *mp = user_msg_x + i;
2277
2278 if (mp->msg_flags != 0) {
2279 error = EINVAL;
2280 goto out;
2281 }
2282 }
2283 #if CONFIG_MACF_SOCKET_SUBSET
2284 /*
2285 * We check the state without holding the socket lock;
2286 * if a race condition occurs, it would simply result
2287 * in an extra call to the MAC check function.
2288 */
2289 if (!(so->so_state & SS_DEFUNCT) &&
2290 !(so->so_state & SS_ISCONNECTED) &&
2291 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2292 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2293 goto out;
2294 }
2295 #endif /* MAC_SOCKET_SUBSET */
2296
2297 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2298
2299 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2300 pru_soreceive_list_notsupp &&
2301 somaxrecvmsgx == 0) {
2302 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2303 recv_msg_array, uap->cnt, &uap->flags);
2304 } else {
2305 int flags = uap->flags;
2306
2307 for (i = 0; i < uap->cnt; i++) {
2308 struct recv_msg_elem *recv_msg_elem;
2309 uio_t auio;
2310 struct sockaddr **psa;
2311 struct mbuf **controlp;
2312
2313 recv_msg_elem = recv_msg_array + i;
2314 auio = recv_msg_elem->uio;
2315
2316 /*
2317 * Do not block if we got at least one packet
2318 */
2319 if (i > 0) {
2320 flags |= MSG_DONTWAIT;
2321 }
2322
2323 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2324 &recv_msg_elem->psa : NULL;
2325 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2326 &recv_msg_elem->controlp : NULL;
2327
2328 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2329 auio, (struct mbuf **)0, controlp, &flags);
2330 if (error) {
2331 break;
2332 }
2333 /*
2334 * We have some data
2335 */
2336 recv_msg_elem->which |= SOCK_MSG_DATA;
2337 /*
2338 * Stop on partial copy
2339 */
2340 if (flags & (MSG_RCVMORE | MSG_TRUNC)) {
2341 break;
2342 }
2343 }
2344 if ((uap->flags & MSG_DONTWAIT) == 0) {
2345 flags &= ~MSG_DONTWAIT;
2346 }
2347 uap->flags = flags;
2348 }
2349
2350 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2351
2352 if (error) {
2353 if (len_after != len_before && (error == ERESTART ||
2354 error == EINTR || error == EWOULDBLOCK)) {
2355 error = 0;
2356 } else {
2357 goto out;
2358 }
2359 }
2360
2361 uiocnt = externalize_recv_msghdr_array(umsgp,
2362 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2363 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2364
2365 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2366 if (error) {
2367 DBG_PRINTF("%s copyout() failed\n", __func__);
2368 goto out;
2369 }
2370 *retval = (int)(uiocnt);
2371
2372 for (i = 0; i < uap->cnt; i++) {
2373 struct user_msghdr_x *mp = user_msg_x + i;
2374 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2375 struct sockaddr *fromsa = recv_msg_elem->psa;
2376
2377 if (mp->msg_name) {
2378 error = copyout_sa(fromsa, mp->msg_name,
2379 &mp->msg_namelen);
2380 if (error) {
2381 goto out;
2382 }
2383 }
2384 if (mp->msg_control) {
2385 error = copyout_control(p, recv_msg_elem->controlp,
2386 mp->msg_control, &mp->msg_controllen,
2387 &mp->msg_flags);
2388 if (error) {
2389 goto out;
2390 }
2391 }
2392 }
2393 out:
2394 if (need_drop) {
2395 file_drop(uap->s);
2396 }
2397 if (umsgp != NULL) {
2398 _FREE(umsgp, M_TEMP);
2399 }
2400 if (recv_msg_array != NULL) {
2401 free_recv_msg_array(recv_msg_array, uap->cnt);
2402 }
2403 if (user_msg_x != NULL) {
2404 _FREE(user_msg_x, M_TEMP);
2405 }
2406
2407 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2408
2409 return error;
2410 }
2411
2412 /*
2413 * Returns: 0 Success
2414 * EBADF
2415 * file_socket:ENOTSOCK
2416 * file_socket:EBADF
2417 * soshutdown:EINVAL
2418 * soshutdown:ENOTCONN
2419 * soshutdown:EADDRNOTAVAIL[TCP]
2420 * soshutdown:ENOBUFS[TCP]
2421 * soshutdown:EMSGSIZE[TCP]
2422 * soshutdown:EHOSTUNREACH[TCP]
2423 * soshutdown:ENETUNREACH[TCP]
2424 * soshutdown:ENETDOWN[TCP]
2425 * soshutdown:ENOMEM[TCP]
2426 * soshutdown:EACCES[TCP]
2427 * soshutdown:EMSGSIZE[TCP]
2428 * soshutdown:ENOBUFS[TCP]
2429 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2430 * soshutdown:??? [other protocol families]
2431 */
2432 /* ARGSUSED */
2433 int
2434 shutdown(__unused struct proc *p, struct shutdown_args *uap,
2435 __unused int32_t *retval)
2436 {
2437 struct socket *so;
2438 int error;
2439
2440 AUDIT_ARG(fd, uap->s);
2441 error = file_socket(uap->s, &so);
2442 if (error) {
2443 return error;
2444 }
2445 if (so == NULL) {
2446 error = EBADF;
2447 goto out;
2448 }
2449 error = soshutdown((struct socket *)so, uap->how);
2450 out:
2451 file_drop(uap->s);
2452 return error;
2453 }
2454
2455 /*
2456 * Returns: 0 Success
2457 * EFAULT
2458 * EINVAL
2459 * EACCES Mandatory Access Control failure
2460 * file_socket:ENOTSOCK
2461 * file_socket:EBADF
2462 * sosetopt:EINVAL
2463 * sosetopt:ENOPROTOOPT
2464 * sosetopt:ENOBUFS
2465 * sosetopt:EDOM
2466 * sosetopt:EFAULT
2467 * sosetopt:EOPNOTSUPP[AF_UNIX]
2468 * sosetopt:???
2469 */
2470 /* ARGSUSED */
2471 int
2472 setsockopt(struct proc *p, struct setsockopt_args *uap,
2473 __unused int32_t *retval)
2474 {
2475 struct socket *so;
2476 struct sockopt sopt;
2477 int error;
2478
2479 AUDIT_ARG(fd, uap->s);
2480 if (uap->val == 0 && uap->valsize != 0) {
2481 return EFAULT;
2482 }
2483 /* No bounds checking on size (it's unsigned) */
2484
2485 error = file_socket(uap->s, &so);
2486 if (error) {
2487 return error;
2488 }
2489
2490 sopt.sopt_dir = SOPT_SET;
2491 sopt.sopt_level = uap->level;
2492 sopt.sopt_name = uap->name;
2493 sopt.sopt_val = uap->val;
2494 sopt.sopt_valsize = uap->valsize;
2495 sopt.sopt_p = p;
2496
2497 if (so == NULL) {
2498 error = EINVAL;
2499 goto out;
2500 }
2501 #if CONFIG_MACF_SOCKET_SUBSET
2502 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2503 &sopt)) != 0) {
2504 goto out;
2505 }
2506 #endif /* MAC_SOCKET_SUBSET */
2507 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
2508 out:
2509 file_drop(uap->s);
2510 return error;
2511 }
2512
2513
2514
2515 /*
2516 * Returns: 0 Success
2517 * EINVAL
2518 * EBADF
2519 * EACCES Mandatory Access Control failure
2520 * copyin:EFAULT
2521 * copyout:EFAULT
2522 * file_socket:ENOTSOCK
2523 * file_socket:EBADF
2524 * sogetopt:???
2525 */
2526 int
2527 getsockopt(struct proc *p, struct getsockopt_args *uap,
2528 __unused int32_t *retval)
2529 {
2530 int error;
2531 socklen_t valsize;
2532 struct sockopt sopt;
2533 struct socket *so;
2534
2535 error = file_socket(uap->s, &so);
2536 if (error) {
2537 return error;
2538 }
2539 if (uap->val) {
2540 error = copyin(uap->avalsize, (caddr_t)&valsize,
2541 sizeof(valsize));
2542 if (error) {
2543 goto out;
2544 }
2545 /* No bounds checking on size (it's unsigned) */
2546 } else {
2547 valsize = 0;
2548 }
2549 sopt.sopt_dir = SOPT_GET;
2550 sopt.sopt_level = uap->level;
2551 sopt.sopt_name = uap->name;
2552 sopt.sopt_val = uap->val;
2553 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2554 sopt.sopt_p = p;
2555
2556 if (so == NULL) {
2557 error = EBADF;
2558 goto out;
2559 }
2560 #if CONFIG_MACF_SOCKET_SUBSET
2561 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2562 &sopt)) != 0) {
2563 goto out;
2564 }
2565 #endif /* MAC_SOCKET_SUBSET */
2566 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
2567 if (error == 0) {
2568 valsize = sopt.sopt_valsize;
2569 error = copyout((caddr_t)&valsize, uap->avalsize,
2570 sizeof(valsize));
2571 }
2572 out:
2573 file_drop(uap->s);
2574 return error;
2575 }
2576
2577
2578 /*
2579 * Get socket name.
2580 *
2581 * Returns: 0 Success
2582 * EBADF
2583 * file_socket:ENOTSOCK
2584 * file_socket:EBADF
2585 * copyin:EFAULT
2586 * copyout:EFAULT
2587 * <pru_sockaddr>:ENOBUFS[TCP]
2588 * <pru_sockaddr>:ECONNRESET[TCP]
2589 * <pru_sockaddr>:EINVAL[AF_UNIX]
2590 * <sf_getsockname>:???
2591 */
2592 /* ARGSUSED */
2593 int
2594 getsockname(__unused struct proc *p, struct getsockname_args *uap,
2595 __unused int32_t *retval)
2596 {
2597 struct socket *so;
2598 struct sockaddr *sa;
2599 socklen_t len;
2600 socklen_t sa_len;
2601 int error;
2602
2603 error = file_socket(uap->fdes, &so);
2604 if (error) {
2605 return error;
2606 }
2607 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2608 if (error) {
2609 goto out;
2610 }
2611 if (so == NULL) {
2612 error = EBADF;
2613 goto out;
2614 }
2615 sa = 0;
2616 socket_lock(so, 1);
2617 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2618 if (error == 0) {
2619 error = sflt_getsockname(so, &sa);
2620 if (error == EJUSTRETURN) {
2621 error = 0;
2622 }
2623 }
2624 socket_unlock(so, 1);
2625 if (error) {
2626 goto bad;
2627 }
2628 if (sa == 0) {
2629 len = 0;
2630 goto gotnothing;
2631 }
2632
2633 sa_len = sa->sa_len;
2634 len = MIN(len, sa_len);
2635 error = copyout((caddr_t)sa, uap->asa, len);
2636 if (error) {
2637 goto bad;
2638 }
2639 /* return the actual, untruncated address length */
2640 len = sa_len;
2641 gotnothing:
2642 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2643 bad:
2644 if (sa) {
2645 FREE(sa, M_SONAME);
2646 }
2647 out:
2648 file_drop(uap->fdes);
2649 return error;
2650 }
2651
2652 /*
2653 * Get name of peer for connected socket.
2654 *
2655 * Returns: 0 Success
2656 * EBADF
2657 * EINVAL
2658 * ENOTCONN
2659 * file_socket:ENOTSOCK
2660 * file_socket:EBADF
2661 * copyin:EFAULT
2662 * copyout:EFAULT
2663 * <pru_peeraddr>:???
2664 * <sf_getpeername>:???
2665 */
2666 /* ARGSUSED */
2667 int
2668 getpeername(__unused struct proc *p, struct getpeername_args *uap,
2669 __unused int32_t *retval)
2670 {
2671 struct socket *so;
2672 struct sockaddr *sa;
2673 socklen_t len;
2674 socklen_t sa_len;
2675 int error;
2676
2677 error = file_socket(uap->fdes, &so);
2678 if (error) {
2679 return error;
2680 }
2681 if (so == NULL) {
2682 error = EBADF;
2683 goto out;
2684 }
2685
2686 socket_lock(so, 1);
2687
2688 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2689 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2690 /* the socket has been shutdown, no more getpeername's */
2691 socket_unlock(so, 1);
2692 error = EINVAL;
2693 goto out;
2694 }
2695
2696 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
2697 socket_unlock(so, 1);
2698 error = ENOTCONN;
2699 goto out;
2700 }
2701 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2702 if (error) {
2703 socket_unlock(so, 1);
2704 goto out;
2705 }
2706 sa = 0;
2707 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2708 if (error == 0) {
2709 error = sflt_getpeername(so, &sa);
2710 if (error == EJUSTRETURN) {
2711 error = 0;
2712 }
2713 }
2714 socket_unlock(so, 1);
2715 if (error) {
2716 goto bad;
2717 }
2718 if (sa == 0) {
2719 len = 0;
2720 goto gotnothing;
2721 }
2722 sa_len = sa->sa_len;
2723 len = MIN(len, sa_len);
2724 error = copyout(sa, uap->asa, len);
2725 if (error) {
2726 goto bad;
2727 }
2728 /* return the actual, untruncated address length */
2729 len = sa_len;
2730 gotnothing:
2731 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
2732 bad:
2733 if (sa) {
2734 FREE(sa, M_SONAME);
2735 }
2736 out:
2737 file_drop(uap->fdes);
2738 return error;
2739 }
2740
2741 int
2742 sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
2743 {
2744 struct sockaddr *sa;
2745 struct mbuf *m;
2746 int error;
2747
2748 size_t alloc_buflen = (size_t)buflen;
2749
2750 if (alloc_buflen > INT_MAX / 2) {
2751 return EINVAL;
2752 }
2753 #ifdef __LP64__
2754 /*
2755 * The fd's in the buffer must expand to be pointers, thus we need twice
2756 * as much space
2757 */
2758 if (type == MT_CONTROL) {
2759 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
2760 sizeof(struct cmsghdr);
2761 }
2762 #endif
2763 if (alloc_buflen > MLEN) {
2764 if (type == MT_SONAME && alloc_buflen <= 112) {
2765 alloc_buflen = MLEN; /* unix domain compat. hack */
2766 } else if (alloc_buflen > MCLBYTES) {
2767 return EINVAL;
2768 }
2769 }
2770 m = m_get(M_WAIT, type);
2771 if (m == NULL) {
2772 return ENOBUFS;
2773 }
2774 if (alloc_buflen > MLEN) {
2775 MCLGET(m, M_WAIT);
2776 if ((m->m_flags & M_EXT) == 0) {
2777 m_free(m);
2778 return ENOBUFS;
2779 }
2780 }
2781 /*
2782 * K64: We still copyin the original buflen because it gets expanded
2783 * later and we lie about the size of the mbuf because it only affects
2784 * unp_* functions
2785 */
2786 m->m_len = buflen;
2787 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2788 if (error) {
2789 (void) m_free(m);
2790 } else {
2791 *mp = m;
2792 if (type == MT_SONAME) {
2793 sa = mtod(m, struct sockaddr *);
2794 sa->sa_len = buflen;
2795 }
2796 }
2797 return error;
2798 }
2799
2800 /*
2801 * Given a user_addr_t of length len, allocate and fill out a *sa.
2802 *
2803 * Returns: 0 Success
2804 * ENAMETOOLONG Filename too long
2805 * EINVAL Invalid argument
2806 * ENOMEM Not enough space
2807 * copyin:EFAULT Bad address
2808 */
2809 static int
2810 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
2811 size_t len, boolean_t translate_unspec)
2812 {
2813 struct sockaddr *sa;
2814 int error;
2815
2816 if (len > SOCK_MAXADDRLEN) {
2817 return ENAMETOOLONG;
2818 }
2819
2820 if (len < offsetof(struct sockaddr, sa_data[0])) {
2821 return EINVAL;
2822 }
2823
2824 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
2825 if (sa == NULL) {
2826 return ENOMEM;
2827 }
2828 error = copyin(uaddr, (caddr_t)sa, len);
2829 if (error) {
2830 FREE(sa, M_SONAME);
2831 } else {
2832 /*
2833 * Force sa_family to AF_INET on AF_INET sockets to handle
2834 * legacy applications that use AF_UNSPEC (0). On all other
2835 * sockets we leave it unchanged and let the lower layer
2836 * handle it.
2837 */
2838 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2839 SOCK_CHECK_DOM(so, PF_INET) &&
2840 len == sizeof(struct sockaddr_in)) {
2841 sa->sa_family = AF_INET;
2842 }
2843
2844 sa->sa_len = len;
2845 *namp = sa;
2846 }
2847 return error;
2848 }
2849
2850 static int
2851 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
2852 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2853 {
2854 int error;
2855
2856 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2857 len < offsetof(struct sockaddr, sa_data[0])) {
2858 return EINVAL;
2859 }
2860
2861 /*
2862 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2863 * so the check here is inclusive.
2864 */
2865 if (len > sizeof(*ss)) {
2866 return ENAMETOOLONG;
2867 }
2868
2869 bzero(ss, sizeof(*ss));
2870 error = copyin(uaddr, (caddr_t)ss, len);
2871 if (error == 0) {
2872 /*
2873 * Force sa_family to AF_INET on AF_INET sockets to handle
2874 * legacy applications that use AF_UNSPEC (0). On all other
2875 * sockets we leave it unchanged and let the lower layer
2876 * handle it.
2877 */
2878 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2879 SOCK_CHECK_DOM(so, PF_INET) &&
2880 len == sizeof(struct sockaddr_in)) {
2881 ss->ss_family = AF_INET;
2882 }
2883
2884 ss->ss_len = len;
2885 }
2886 return error;
2887 }
2888
2889 int
2890 internalize_user_msghdr_array(const void *src, int spacetype, int direction,
2891 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
2892 {
2893 int error = 0;
2894 u_int i;
2895 u_int namecnt = 0;
2896 u_int ctlcnt = 0;
2897
2898 for (i = 0; i < count; i++) {
2899 uio_t auio;
2900 struct user_iovec *iovp;
2901 struct user_msghdr_x *user_msg = dst + i;
2902
2903 if (spacetype == UIO_USERSPACE64) {
2904 const struct user64_msghdr_x *msghdr64;
2905
2906 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2907
2908 user_msg->msg_name = msghdr64->msg_name;
2909 user_msg->msg_namelen = msghdr64->msg_namelen;
2910 user_msg->msg_iov = msghdr64->msg_iov;
2911 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2912 user_msg->msg_control = msghdr64->msg_control;
2913 user_msg->msg_controllen = msghdr64->msg_controllen;
2914 user_msg->msg_flags = msghdr64->msg_flags;
2915 user_msg->msg_datalen = msghdr64->msg_datalen;
2916 } else {
2917 const struct user32_msghdr_x *msghdr32;
2918
2919 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2920
2921 user_msg->msg_name = msghdr32->msg_name;
2922 user_msg->msg_namelen = msghdr32->msg_namelen;
2923 user_msg->msg_iov = msghdr32->msg_iov;
2924 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2925 user_msg->msg_control = msghdr32->msg_control;
2926 user_msg->msg_controllen = msghdr32->msg_controllen;
2927 user_msg->msg_flags = msghdr32->msg_flags;
2928 user_msg->msg_datalen = msghdr32->msg_datalen;
2929 }
2930
2931 if (user_msg->msg_iovlen <= 0 ||
2932 user_msg->msg_iovlen > UIO_MAXIOV) {
2933 error = EMSGSIZE;
2934 goto done;
2935 }
2936 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2937 direction);
2938 if (auio == NULL) {
2939 error = ENOMEM;
2940 goto done;
2941 }
2942 uiop[i] = auio;
2943
2944 iovp = uio_iovsaddr(auio);
2945 if (iovp == NULL) {
2946 error = ENOMEM;
2947 goto done;
2948 }
2949 error = copyin_user_iovec_array(user_msg->msg_iov,
2950 spacetype, user_msg->msg_iovlen, iovp);
2951 if (error) {
2952 goto done;
2953 }
2954 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2955
2956 error = uio_calculateresid(auio);
2957 if (error) {
2958 goto done;
2959 }
2960 user_msg->msg_datalen = uio_resid(auio);
2961
2962 if (user_msg->msg_name && user_msg->msg_namelen) {
2963 namecnt++;
2964 }
2965 if (user_msg->msg_control && user_msg->msg_controllen) {
2966 ctlcnt++;
2967 }
2968 }
2969 done:
2970
2971 return error;
2972 }
2973
2974 int
2975 internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2976 u_int count, struct user_msghdr_x *dst,
2977 struct recv_msg_elem *recv_msg_array)
2978 {
2979 int error = 0;
2980 u_int i;
2981
2982 for (i = 0; i < count; i++) {
2983 struct user_iovec *iovp;
2984 struct user_msghdr_x *user_msg = dst + i;
2985 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2986
2987 if (spacetype == UIO_USERSPACE64) {
2988 const struct user64_msghdr_x *msghdr64;
2989
2990 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2991
2992 user_msg->msg_name = msghdr64->msg_name;
2993 user_msg->msg_namelen = msghdr64->msg_namelen;
2994 user_msg->msg_iov = msghdr64->msg_iov;
2995 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2996 user_msg->msg_control = msghdr64->msg_control;
2997 user_msg->msg_controllen = msghdr64->msg_controllen;
2998 user_msg->msg_flags = msghdr64->msg_flags;
2999 user_msg->msg_datalen = msghdr64->msg_datalen;
3000 } else {
3001 const struct user32_msghdr_x *msghdr32;
3002
3003 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3004
3005 user_msg->msg_name = msghdr32->msg_name;
3006 user_msg->msg_namelen = msghdr32->msg_namelen;
3007 user_msg->msg_iov = msghdr32->msg_iov;
3008 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3009 user_msg->msg_control = msghdr32->msg_control;
3010 user_msg->msg_controllen = msghdr32->msg_controllen;
3011 user_msg->msg_flags = msghdr32->msg_flags;
3012 user_msg->msg_datalen = msghdr32->msg_datalen;
3013 }
3014
3015 if (user_msg->msg_iovlen <= 0 ||
3016 user_msg->msg_iovlen > UIO_MAXIOV) {
3017 error = EMSGSIZE;
3018 goto done;
3019 }
3020 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3021 spacetype, direction);
3022 if (recv_msg_elem->uio == NULL) {
3023 error = ENOMEM;
3024 goto done;
3025 }
3026
3027 iovp = uio_iovsaddr(recv_msg_elem->uio);
3028 if (iovp == NULL) {
3029 error = ENOMEM;
3030 goto done;
3031 }
3032 error = copyin_user_iovec_array(user_msg->msg_iov,
3033 spacetype, user_msg->msg_iovlen, iovp);
3034 if (error) {
3035 goto done;
3036 }
3037 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3038
3039 error = uio_calculateresid(recv_msg_elem->uio);
3040 if (error) {
3041 goto done;
3042 }
3043 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3044
3045 if (user_msg->msg_name && user_msg->msg_namelen) {
3046 recv_msg_elem->which |= SOCK_MSG_SA;
3047 }
3048 if (user_msg->msg_control && user_msg->msg_controllen) {
3049 recv_msg_elem->which |= SOCK_MSG_CONTROL;
3050 }
3051 }
3052 done:
3053
3054 return error;
3055 }
3056
3057 u_int
3058 externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3059 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
3060 {
3061 #pragma unused(direction)
3062 u_int i;
3063 int seenlast = 0;
3064 u_int retcnt = 0;
3065
3066 for (i = 0; i < count; i++) {
3067 const struct user_msghdr_x *user_msg = src + i;
3068 uio_t auio = uiop[i];
3069 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3070
3071 if (user_msg->msg_datalen != 0 && len == 0) {
3072 seenlast = 1;
3073 }
3074
3075 if (seenlast == 0) {
3076 retcnt++;
3077 }
3078
3079 if (spacetype == UIO_USERSPACE64) {
3080 struct user64_msghdr_x *msghdr64;
3081
3082 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3083
3084 msghdr64->msg_flags = user_msg->msg_flags;
3085 msghdr64->msg_datalen = len;
3086 } else {
3087 struct user32_msghdr_x *msghdr32;
3088
3089 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3090
3091 msghdr32->msg_flags = user_msg->msg_flags;
3092 msghdr32->msg_datalen = len;
3093 }
3094 }
3095 return retcnt;
3096 }
3097
3098 u_int
3099 externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
3100 u_int count, const struct user_msghdr_x *src,
3101 struct recv_msg_elem *recv_msg_array)
3102 {
3103 u_int i;
3104 int seenlast = 0;
3105 u_int retcnt = 0;
3106
3107 for (i = 0; i < count; i++) {
3108 const struct user_msghdr_x *user_msg = src + i;
3109 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3110 user_ssize_t len;
3111
3112 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3113
3114 if (direction == UIO_READ) {
3115 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0) {
3116 seenlast = 1;
3117 }
3118 } else {
3119 if (user_msg->msg_datalen != 0 && len == 0) {
3120 seenlast = 1;
3121 }
3122 }
3123
3124 if (seenlast == 0) {
3125 retcnt++;
3126 }
3127
3128 if (spacetype == UIO_USERSPACE64) {
3129 struct user64_msghdr_x *msghdr64;
3130
3131 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3132
3133 msghdr64->msg_flags = user_msg->msg_flags;
3134 msghdr64->msg_datalen = len;
3135 } else {
3136 struct user32_msghdr_x *msghdr32;
3137
3138 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3139
3140 msghdr32->msg_flags = user_msg->msg_flags;
3141 msghdr32->msg_datalen = len;
3142 }
3143 }
3144 return retcnt;
3145 }
3146
3147 void
3148 free_uio_array(struct uio **uiop, u_int count)
3149 {
3150 u_int i;
3151
3152 for (i = 0; i < count; i++) {
3153 if (uiop[i] != NULL) {
3154 uio_free(uiop[i]);
3155 }
3156 }
3157 }
3158
3159 __private_extern__ user_ssize_t
3160 uio_array_resid(struct uio **uiop, u_int count)
3161 {
3162 user_ssize_t len = 0;
3163 u_int i;
3164
3165 for (i = 0; i < count; i++) {
3166 struct uio *auio = uiop[i];
3167
3168 if (auio != NULL) {
3169 len += uio_resid(auio);
3170 }
3171 }
3172 return len;
3173 }
3174
3175 int
3176 uio_array_is_valid(struct uio **uiop, u_int count)
3177 {
3178 user_ssize_t len = 0;
3179 u_int i;
3180
3181 for (i = 0; i < count; i++) {
3182 struct uio *auio = uiop[i];
3183
3184 if (auio != NULL) {
3185 user_ssize_t resid = uio_resid(auio);
3186
3187 /*
3188 * Sanity check on the validity of the iovec:
3189 * no point of going over sb_max
3190 */
3191 if (resid < 0 || (u_int32_t)resid > sb_max) {
3192 return 0;
3193 }
3194
3195 len += resid;
3196 if (len < 0 || (u_int32_t)len > sb_max) {
3197 return 0;
3198 }
3199 }
3200 }
3201 return 1;
3202 }
3203
3204
3205 struct recv_msg_elem *
3206 alloc_recv_msg_array(u_int count)
3207 {
3208 struct recv_msg_elem *recv_msg_array;
3209
3210 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3211 M_TEMP, M_WAITOK | M_ZERO);
3212
3213 return recv_msg_array;
3214 }
3215
3216 void
3217 free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3218 {
3219 u_int i;
3220
3221 for (i = 0; i < count; i++) {
3222 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3223
3224 if (recv_msg_elem->uio != NULL) {
3225 uio_free(recv_msg_elem->uio);
3226 }
3227 if (recv_msg_elem->psa != NULL) {
3228 _FREE(recv_msg_elem->psa, M_TEMP);
3229 }
3230 if (recv_msg_elem->controlp != NULL) {
3231 m_freem(recv_msg_elem->controlp);
3232 }
3233 }
3234 _FREE(recv_msg_array, M_TEMP);
3235 }
3236
3237
3238 __private_extern__ user_ssize_t
3239 recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3240 {
3241 user_ssize_t len = 0;
3242 u_int i;
3243
3244 for (i = 0; i < count; i++) {
3245 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3246
3247 if (recv_msg_elem->uio != NULL) {
3248 len += uio_resid(recv_msg_elem->uio);
3249 }
3250 }
3251 return len;
3252 }
3253
3254 int
3255 recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3256 {
3257 user_ssize_t len = 0;
3258 u_int i;
3259
3260 for (i = 0; i < count; i++) {
3261 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3262
3263 if (recv_msg_elem->uio != NULL) {
3264 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3265
3266 /*
3267 * Sanity check on the validity of the iovec:
3268 * no point of going over sb_max
3269 */
3270 if (resid < 0 || (u_int32_t)resid > sb_max) {
3271 return 0;
3272 }
3273
3274 len += resid;
3275 if (len < 0 || (u_int32_t)len > sb_max) {
3276 return 0;
3277 }
3278 }
3279 }
3280 return 1;
3281 }
3282
3283 #if SENDFILE
3284
3285 #define SFUIOBUFS 64
3286
3287 /* Macros to compute the number of mbufs needed depending on cluster size */
3288 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3289 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3290
3291 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3292 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3293
3294 /* Upper send limit in the number of mbuf clusters */
3295 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3296 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3297
3298 static void
3299 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3300 struct mbuf **m, boolean_t jumbocl)
3301 {
3302 unsigned int needed;
3303
3304 if (pktlen == 0) {
3305 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
3306 }
3307
3308 /*
3309 * Try to allocate for the whole thing. Since we want full control
3310 * over the buffer size and be able to accept partial result, we can't
3311 * use mbuf_allocpacket(). The logic below is similar to sosend().
3312 */
3313 *m = NULL;
3314 if (pktlen > MBIGCLBYTES && jumbocl) {
3315 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3316 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3317 }
3318 if (*m == NULL) {
3319 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3320 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3321 }
3322
3323 /*
3324 * Our previous attempt(s) at allocation had failed; the system
3325 * may be short on mbufs, and we want to block until they are
3326 * available. This time, ask just for 1 mbuf and don't return
3327 * until we get it.
3328 */
3329 if (*m == NULL) {
3330 needed = 1;
3331 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3332 }
3333 if (*m == NULL) {
3334 panic("%s: blocking allocation returned NULL\n", __func__);
3335 }
3336
3337 *maxchunks = needed;
3338 }
3339
3340 /*
3341 * sendfile(2).
3342 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3343 * struct sf_hdtr *hdtr, int flags)
3344 *
3345 * Send a file specified by 'fd' and starting at 'offset' to a socket
3346 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3347 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3348 * output. If specified, write the total number of bytes sent into *nbytes.
3349 */
3350 int
3351 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
3352 {
3353 struct fileproc *fp;
3354 struct vnode *vp;
3355 struct socket *so;
3356 struct writev_nocancel_args nuap;
3357 user_ssize_t writev_retval;
3358 struct user_sf_hdtr user_hdtr;
3359 struct user32_sf_hdtr user32_hdtr;
3360 struct user64_sf_hdtr user64_hdtr;
3361 off_t off, xfsize;
3362 off_t nbytes = 0, sbytes = 0;
3363 int error = 0;
3364 size_t sizeof_hdtr;
3365 off_t file_size;
3366 struct vfs_context context = *vfs_context_current();
3367
3368 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3369 0, 0, 0, 0);
3370
3371 AUDIT_ARG(fd, uap->fd);
3372 AUDIT_ARG(value32, uap->s);
3373
3374 /*
3375 * Do argument checking. Must be a regular file in, stream
3376 * type and connected socket out, positive offset.
3377 */
3378 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3379 goto done;
3380 }
3381 if ((fp->f_flag & FREAD) == 0) {
3382 error = EBADF;
3383 goto done1;
3384 }
3385 if (vnode_isreg(vp) == 0) {
3386 error = ENOTSUP;
3387 goto done1;
3388 }
3389 error = file_socket(uap->s, &so);
3390 if (error) {
3391 goto done1;
3392 }
3393 if (so == NULL) {
3394 error = EBADF;
3395 goto done2;
3396 }
3397 if (so->so_type != SOCK_STREAM) {
3398 error = EINVAL;
3399 goto done2;
3400 }
3401 if ((so->so_state & SS_ISCONNECTED) == 0) {
3402 error = ENOTCONN;
3403 goto done2;
3404 }
3405 if (uap->offset < 0) {
3406 error = EINVAL;
3407 goto done2;
3408 }
3409 if (uap->nbytes == USER_ADDR_NULL) {
3410 error = EINVAL;
3411 goto done2;
3412 }
3413 if (uap->flags != 0) {
3414 error = EINVAL;
3415 goto done2;
3416 }
3417
3418 context.vc_ucred = fp->f_fglob->fg_cred;
3419
3420 #if CONFIG_MACF_SOCKET_SUBSET
3421 /* JMM - fetch connected sockaddr? */
3422 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3423 if (error) {
3424 goto done2;
3425 }
3426 #endif
3427
3428 /*
3429 * Get number of bytes to send
3430 * Should it applies to size of header and trailer?
3431 */
3432 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3433 if (error) {
3434 goto done2;
3435 }
3436
3437 /*
3438 * If specified, get the pointer to the sf_hdtr struct for
3439 * any headers/trailers.
3440 */
3441 if (uap->hdtr != USER_ADDR_NULL) {
3442 caddr_t hdtrp;
3443
3444 bzero(&user_hdtr, sizeof(user_hdtr));
3445 if (IS_64BIT_PROCESS(p)) {
3446 hdtrp = (caddr_t)&user64_hdtr;
3447 sizeof_hdtr = sizeof(user64_hdtr);
3448 } else {
3449 hdtrp = (caddr_t)&user32_hdtr;
3450 sizeof_hdtr = sizeof(user32_hdtr);
3451 }
3452 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3453 if (error) {
3454 goto done2;
3455 }
3456 if (IS_64BIT_PROCESS(p)) {
3457 user_hdtr.headers = user64_hdtr.headers;
3458 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3459 user_hdtr.trailers = user64_hdtr.trailers;
3460 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3461 } else {
3462 user_hdtr.headers = user32_hdtr.headers;
3463 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3464 user_hdtr.trailers = user32_hdtr.trailers;
3465 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3466 }
3467
3468 /*
3469 * Send any headers. Wimp out and use writev(2).
3470 */
3471 if (user_hdtr.headers != USER_ADDR_NULL) {
3472 bzero(&nuap, sizeof(struct writev_args));
3473 nuap.fd = uap->s;
3474 nuap.iovp = user_hdtr.headers;
3475 nuap.iovcnt = user_hdtr.hdr_cnt;
3476 error = writev_nocancel(p, &nuap, &writev_retval);
3477 if (error) {
3478 goto done2;
3479 }
3480 sbytes += writev_retval;
3481 }
3482 }
3483
3484 /*
3485 * Get the file size for 2 reasons:
3486 * 1. We don't want to allocate more mbufs than necessary
3487 * 2. We don't want to read past the end of file
3488 */
3489 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3490 goto done2;
3491 }
3492
3493 /*
3494 * Simply read file data into a chain of mbufs that used with scatter
3495 * gather reads. We're not (yet?) setup to use zero copy external
3496 * mbufs that point to the file pages.
3497 */
3498 socket_lock(so, 1);
3499 error = sblock(&so->so_snd, SBL_WAIT);
3500 if (error) {
3501 socket_unlock(so, 1);
3502 goto done2;
3503 }
3504 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3505 mbuf_t m0 = NULL, m;
3506 unsigned int nbufs = SFUIOBUFS, i;
3507 uio_t auio;
3508 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3509 size_t uiolen;
3510 user_ssize_t rlen;
3511 off_t pgoff;
3512 size_t pktlen;
3513 boolean_t jumbocl;
3514
3515 /*
3516 * Calculate the amount to transfer.
3517 * Align to round number of pages.
3518 * Not to exceed send socket buffer,
3519 * the EOF, or the passed in nbytes.
3520 */
3521 xfsize = sbspace(&so->so_snd);
3522
3523 if (xfsize <= 0) {
3524 if (so->so_state & SS_CANTSENDMORE) {
3525 error = EPIPE;
3526 goto done3;
3527 } else if ((so->so_state & SS_NBIO)) {
3528 error = EAGAIN;
3529 goto done3;
3530 } else {
3531 xfsize = PAGE_SIZE;
3532 }
3533 }
3534
3535 if (xfsize > SENDFILE_MAX_BYTES) {
3536 xfsize = SENDFILE_MAX_BYTES;
3537 } else if (xfsize > PAGE_SIZE) {
3538 xfsize = trunc_page(xfsize);
3539 }
3540 pgoff = off & PAGE_MASK_64;
3541 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
3542 xfsize = PAGE_SIZE_64 - pgoff;
3543 }
3544 if (nbytes && xfsize > (nbytes - sbytes)) {
3545 xfsize = nbytes - sbytes;
3546 }
3547 if (xfsize <= 0) {
3548 break;
3549 }
3550 if (off + xfsize > file_size) {
3551 xfsize = file_size - off;
3552 }
3553 if (xfsize <= 0) {
3554 break;
3555 }
3556
3557 /*
3558 * Attempt to use larger than system page-size clusters for
3559 * large writes only if there is a jumbo cluster pool and
3560 * if the socket is marked accordingly.
3561 */
3562 jumbocl = sosendjcl && njcl > 0 &&
3563 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3564
3565 socket_unlock(so, 0);
3566 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3567 pktlen = mbuf_pkthdr_maxlen(m0);
3568 if (pktlen < (size_t)xfsize) {
3569 xfsize = pktlen;
3570 }
3571
3572 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3573 UIO_READ, &uio_buf[0], sizeof(uio_buf));
3574 if (auio == NULL) {
3575 printf("sendfile failed. nbufs = %d. %s", nbufs,
3576 "File a radar related to rdar://10146739.\n");
3577 mbuf_freem(m0);
3578 error = ENXIO;
3579 socket_lock(so, 0);
3580 goto done3;
3581 }
3582
3583 for (i = 0, m = m0, uiolen = 0;
3584 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3585 i++, m = mbuf_next(m)) {
3586 size_t mlen = mbuf_maxlen(m);
3587
3588 if (mlen + uiolen > (size_t)xfsize) {
3589 mlen = xfsize - uiolen;
3590 }
3591 mbuf_setlen(m, mlen);
3592 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3593 mlen);
3594 uiolen += mlen;
3595 }
3596
3597 if (xfsize != uio_resid(auio)) {
3598 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3599 "%lld\n", xfsize, (long long)uio_resid(auio));
3600 }
3601
3602 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3603 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3604 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3605 error = fo_read(fp, auio, FOF_OFFSET, &context);
3606 socket_lock(so, 0);
3607 if (error != 0) {
3608 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3609 error == EINTR || error == EWOULDBLOCK)) {
3610 error = 0;
3611 } else {
3612 mbuf_freem(m0);
3613 goto done3;
3614 }
3615 }
3616 xfsize -= uio_resid(auio);
3617 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3618 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3619 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3620
3621 if (xfsize == 0) {
3622 // printf("sendfile: fo_read 0 bytes, EOF\n");
3623 break;
3624 }
3625 if (xfsize + off > file_size) {
3626 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3627 "%lld\n", xfsize, off, file_size);
3628 }
3629 for (i = 0, m = m0, rlen = 0;
3630 i < nbufs && m != NULL && rlen < xfsize;
3631 i++, m = mbuf_next(m)) {
3632 size_t mlen = mbuf_maxlen(m);
3633
3634 if (rlen + mlen > (size_t)xfsize) {
3635 mlen = xfsize - rlen;
3636 }
3637 mbuf_setlen(m, mlen);
3638
3639 rlen += mlen;
3640 }
3641 mbuf_pkthdr_setlen(m0, xfsize);
3642
3643 retry_space:
3644 /*
3645 * Make sure that the socket is still able to take more data.
3646 * CANTSENDMORE being true usually means that the connection
3647 * was closed. so_error is true when an error was sensed after
3648 * a previous send.
3649 * The state is checked after the page mapping and buffer
3650 * allocation above since those operations may block and make
3651 * any socket checks stale. From this point forward, nothing
3652 * blocks before the pru_send (or more accurately, any blocking
3653 * results in a loop back to here to re-check).
3654 */
3655 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3656 if (so->so_state & SS_CANTSENDMORE) {
3657 error = EPIPE;
3658 } else {
3659 error = so->so_error;
3660 so->so_error = 0;
3661 }
3662 m_freem(m0);
3663 goto done3;
3664 }
3665 /*
3666 * Wait for socket space to become available. We do this just
3667 * after checking the connection state above in order to avoid
3668 * a race condition with sbwait().
3669 */
3670 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3671 if (so->so_state & SS_NBIO) {
3672 m_freem(m0);
3673 error = EAGAIN;
3674 goto done3;
3675 }
3676 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3677 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3678 error = sbwait(&so->so_snd);
3679 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3680 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3681 /*
3682 * An error from sbwait usually indicates that we've
3683 * been interrupted by a signal. If we've sent anything
3684 * then return bytes sent, otherwise return the error.
3685 */
3686 if (error) {
3687 m_freem(m0);
3688 goto done3;
3689 }
3690 goto retry_space;
3691 }
3692
3693 struct mbuf *control = NULL;
3694 {
3695 /*
3696 * Socket filter processing
3697 */
3698
3699 error = sflt_data_out(so, NULL, &m0, &control, 0);
3700 if (error) {
3701 if (error == EJUSTRETURN) {
3702 error = 0;
3703 continue;
3704 }
3705 goto done3;
3706 }
3707 /*
3708 * End Socket filter processing
3709 */
3710 }
3711 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3712 uap->s, 0, 0, 0, 0);
3713 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3714 0, control, p);
3715 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3716 uap->s, 0, 0, 0, 0);
3717 if (error) {
3718 goto done3;
3719 }
3720 }
3721 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3722 /*
3723 * Send trailers. Wimp out and use writev(2).
3724 */
3725 if (uap->hdtr != USER_ADDR_NULL &&
3726 user_hdtr.trailers != USER_ADDR_NULL) {
3727 bzero(&nuap, sizeof(struct writev_args));
3728 nuap.fd = uap->s;
3729 nuap.iovp = user_hdtr.trailers;
3730 nuap.iovcnt = user_hdtr.trl_cnt;
3731 error = writev_nocancel(p, &nuap, &writev_retval);
3732 if (error) {
3733 goto done2;
3734 }
3735 sbytes += writev_retval;
3736 }
3737 done2:
3738 file_drop(uap->s);
3739 done1:
3740 file_drop(uap->fd);
3741 done:
3742 if (uap->nbytes != USER_ADDR_NULL) {
3743 /* XXX this appears bogus for some early failure conditions */
3744 copyout(&sbytes, uap->nbytes, sizeof(off_t));
3745 }
3746 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3747 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3748 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3749 return error;
3750 done3:
3751 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3752 goto done2;
3753 }
3754
3755
3756 #endif /* SENDFILE */