]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/kern/uipc_syscalls.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
77#include <sys/vnode_internal.h>
78#include <sys/malloc.h>
79#include <sys/mcache.h>
80#include <sys/mbuf.h>
81#include <kern/locks.h>
82#include <sys/domain.h>
83#include <sys/protosw.h>
84#include <sys/signalvar.h>
85#include <sys/socket.h>
86#include <sys/socketvar.h>
87#include <sys/kernel.h>
88#include <sys/uio_internal.h>
89#include <sys/kauth.h>
90#include <kern/task.h>
91#include <sys/priv.h>
92#include <sys/sysctl.h>
93
94#include <security/audit/audit.h>
95
96#include <sys/kdebug.h>
97#include <sys/sysproto.h>
98#include <netinet/in.h>
99#include <net/route.h>
100#include <netinet/in_pcb.h>
101
102#if CONFIG_MACF_SOCKET_SUBSET
103#include <security/mac_framework.h>
104#endif /* MAC_SOCKET_SUBSET */
105
106#define f_flag f_fglob->fg_flag
107#define f_type f_fglob->fg_ops->fo_type
108#define f_msgcount f_fglob->fg_msgcount
109#define f_cred f_fglob->fg_cred
110#define f_ops f_fglob->fg_ops
111#define f_offset f_fglob->fg_offset
112#define f_data f_fglob->fg_data
113
114#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
115#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
116#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
117#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
118#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
119#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
120#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
121#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
122#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
123#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
124#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
125#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
126#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
127#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
128#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
129#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
130
131#if DEBUG || DEVELOPMENT
132#define DEBUG_KERNEL_ADDRPERM(_v) (_v)
133#define DBG_PRINTF(...) printf(__VA_ARGS__)
134#else
135#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
136#define DBG_PRINTF(...) do { } while (0)
137#endif
138
139/* TODO: should be in header file */
140int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
141
142static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
143 int, int32_t *);
144static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
145 int32_t *);
146static int connectit(struct socket *, struct sockaddr *);
147static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
148 size_t, boolean_t);
149static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
150 user_addr_t, size_t, boolean_t);
151#if SENDFILE
152static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
153 boolean_t);
154#endif /* SENDFILE */
155static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
156static int connectitx(struct socket *, struct sockaddr *,
157 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
158 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
159static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *);
160static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
161 int *);
162static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
163
164static int internalize_user_msghdr_array(const void *, int, int, u_int,
165 struct user_msghdr_x *, struct uio **);
166static u_int externalize_user_msghdr_array(void *, int, int, u_int,
167 const struct user_msghdr_x *, struct uio **);
168
169static void free_uio_array(struct uio **, u_int);
170static int uio_array_is_valid(struct uio **, u_int);
171static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
172static int internalize_recv_msghdr_array(const void *, int, int,
173 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
174static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
175 const struct user_msghdr_x *, struct recv_msg_elem *);
176static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
177static void free_recv_msg_array(struct recv_msg_elem *, u_int);
178
179SYSCTL_DECL(_kern_ipc);
180
181static u_int somaxsendmsgx = 100;
182SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
183 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
184static u_int somaxrecvmsgx = 100;
185SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
186 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
187
188/*
189 * System call interface to the socket abstraction.
190 */
191
192extern const struct fileops socketops;
193
194/*
195 * Returns: 0 Success
196 * EACCES Mandatory Access Control failure
197 * falloc:ENFILE
198 * falloc:EMFILE
199 * falloc:ENOMEM
200 * socreate:EAFNOSUPPORT
201 * socreate:EPROTOTYPE
202 * socreate:EPROTONOSUPPORT
203 * socreate:ENOBUFS
204 * socreate:ENOMEM
205 * socreate:??? [other protocol families, IPSEC]
206 */
207int
208socket(struct proc *p,
209 struct socket_args *uap,
210 int32_t *retval)
211{
212 return (socket_common(p, uap->domain, uap->type, uap->protocol,
213 proc_selfpid(), retval, 0));
214}
215
216int
217socket_delegate(struct proc *p,
218 struct socket_delegate_args *uap,
219 int32_t *retval)
220{
221 return socket_common(p, uap->domain, uap->type, uap->protocol,
222 uap->epid, retval, 1);
223}
224
225static int
226socket_common(struct proc *p,
227 int domain,
228 int type,
229 int protocol,
230 pid_t epid,
231 int32_t *retval,
232 int delegate)
233{
234 struct socket *so;
235 struct fileproc *fp;
236 int fd, error;
237
238 AUDIT_ARG(socket, domain, type, protocol);
239#if CONFIG_MACF_SOCKET_SUBSET
240 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
241 type, protocol)) != 0)
242 return (error);
243#endif /* MAC_SOCKET_SUBSET */
244
245 if (delegate) {
246 error = priv_check_cred(kauth_cred_get(),
247 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
248 if (error)
249 return (EACCES);
250 }
251
252 error = falloc(p, &fp, &fd, vfs_context_current());
253 if (error) {
254 return (error);
255 }
256 fp->f_flag = FREAD|FWRITE;
257 fp->f_ops = &socketops;
258
259 if (delegate)
260 error = socreate_delegate(domain, &so, type, protocol, epid);
261 else
262 error = socreate(domain, &so, type, protocol);
263
264 if (error) {
265 fp_free(p, fd, fp);
266 } else {
267 fp->f_data = (caddr_t)so;
268
269 proc_fdlock(p);
270 procfdtbl_releasefd(p, fd, NULL);
271
272 fp_drop(p, fd, fp, 1);
273 proc_fdunlock(p);
274
275 *retval = fd;
276 if (ENTR_SHOULDTRACE) {
277 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
278 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
279 }
280 }
281 return (error);
282}
283
284/*
285 * Returns: 0 Success
286 * EDESTADDRREQ Destination address required
287 * EBADF Bad file descriptor
288 * EACCES Mandatory Access Control failure
289 * file_socket:ENOTSOCK
290 * file_socket:EBADF
291 * getsockaddr:ENAMETOOLONG Filename too long
292 * getsockaddr:EINVAL Invalid argument
293 * getsockaddr:ENOMEM Not enough space
294 * getsockaddr:EFAULT Bad address
295 * sobindlock:???
296 */
297/* ARGSUSED */
298int
299bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
300{
301 struct sockaddr_storage ss;
302 struct sockaddr *sa = NULL;
303 struct socket *so;
304 boolean_t want_free = TRUE;
305 int error;
306
307 AUDIT_ARG(fd, uap->s);
308 error = file_socket(uap->s, &so);
309 if (error != 0)
310 return (error);
311 if (so == NULL) {
312 error = EBADF;
313 goto out;
314 }
315 if (uap->name == USER_ADDR_NULL) {
316 error = EDESTADDRREQ;
317 goto out;
318 }
319 if (uap->namelen > sizeof (ss)) {
320 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
321 } else {
322 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
323 if (error == 0) {
324 sa = (struct sockaddr *)&ss;
325 want_free = FALSE;
326 }
327 }
328 if (error != 0)
329 goto out;
330 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
331#if CONFIG_MACF_SOCKET_SUBSET
332 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
333 error = sobindlock(so, sa, 1); /* will lock socket */
334#else
335 error = sobindlock(so, sa, 1); /* will lock socket */
336#endif /* MAC_SOCKET_SUBSET */
337 if (want_free)
338 FREE(sa, M_SONAME);
339out:
340 file_drop(uap->s);
341 return (error);
342}
343
344/*
345 * Returns: 0 Success
346 * EBADF
347 * EACCES Mandatory Access Control failure
348 * file_socket:ENOTSOCK
349 * file_socket:EBADF
350 * solisten:EINVAL
351 * solisten:EOPNOTSUPP
352 * solisten:???
353 */
354int
355listen(__unused struct proc *p, struct listen_args *uap,
356 __unused int32_t *retval)
357{
358 int error;
359 struct socket *so;
360
361 AUDIT_ARG(fd, uap->s);
362 error = file_socket(uap->s, &so);
363 if (error)
364 return (error);
365 if (so != NULL)
366#if CONFIG_MACF_SOCKET_SUBSET
367 {
368 error = mac_socket_check_listen(kauth_cred_get(), so);
369 if (error == 0)
370 error = solisten(so, uap->backlog);
371 }
372#else
373 error = solisten(so, uap->backlog);
374#endif /* MAC_SOCKET_SUBSET */
375 else
376 error = EBADF;
377
378 file_drop(uap->s);
379 return (error);
380}
381
382/*
383 * Returns: fp_getfsock:EBADF Bad file descriptor
384 * fp_getfsock:EOPNOTSUPP ...
385 * xlate => :ENOTSOCK Socket operation on non-socket
386 * :EFAULT Bad address on copyin/copyout
387 * :EBADF Bad file descriptor
388 * :EOPNOTSUPP Operation not supported on socket
389 * :EINVAL Invalid argument
390 * :EWOULDBLOCK Operation would block
391 * :ECONNABORTED Connection aborted
392 * :EINTR Interrupted function
393 * :EACCES Mandatory Access Control failure
394 * falloc_locked:ENFILE Too many files open in system
395 * falloc_locked::EMFILE Too many open files
396 * falloc_locked::ENOMEM Not enough space
397 * 0 Success
398 */
399int
400accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
401 int32_t *retval)
402{
403 struct fileproc *fp;
404 struct sockaddr *sa = NULL;
405 socklen_t namelen;
406 int error;
407 struct socket *head, *so = NULL;
408 lck_mtx_t *mutex_held;
409 int fd = uap->s;
410 int newfd;
411 short fflag; /* type must match fp->f_flag */
412 int dosocklock = 0;
413
414 *retval = -1;
415
416 AUDIT_ARG(fd, uap->s);
417
418 if (uap->name) {
419 error = copyin(uap->anamelen, (caddr_t)&namelen,
420 sizeof (socklen_t));
421 if (error)
422 return (error);
423 }
424 error = fp_getfsock(p, fd, &fp, &head);
425 if (error) {
426 if (error == EOPNOTSUPP)
427 error = ENOTSOCK;
428 return (error);
429 }
430 if (head == NULL) {
431 error = EBADF;
432 goto out;
433 }
434#if CONFIG_MACF_SOCKET_SUBSET
435 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
436 goto out;
437#endif /* MAC_SOCKET_SUBSET */
438
439 socket_lock(head, 1);
440
441 if (head->so_proto->pr_getlock != NULL) {
442 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
443 dosocklock = 1;
444 } else {
445 mutex_held = head->so_proto->pr_domain->dom_mtx;
446 dosocklock = 0;
447 }
448
449 if ((head->so_options & SO_ACCEPTCONN) == 0) {
450 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
451 error = EOPNOTSUPP;
452 } else {
453 /* POSIX: The socket is not accepting connections */
454 error = EINVAL;
455 }
456 socket_unlock(head, 1);
457 goto out;
458 }
459check_again:
460 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
461 socket_unlock(head, 1);
462 error = EWOULDBLOCK;
463 goto out;
464 }
465 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
466 if (head->so_state & SS_CANTRCVMORE) {
467 head->so_error = ECONNABORTED;
468 break;
469 }
470 if (head->so_usecount < 1)
471 panic("accept: head=%p refcount=%d\n", head,
472 head->so_usecount);
473 error = msleep((caddr_t)&head->so_timeo, mutex_held,
474 PSOCK | PCATCH, "accept", 0);
475 if (head->so_usecount < 1)
476 panic("accept: 2 head=%p refcount=%d\n", head,
477 head->so_usecount);
478 if ((head->so_state & SS_DRAINING)) {
479 error = ECONNABORTED;
480 }
481 if (error) {
482 socket_unlock(head, 1);
483 goto out;
484 }
485 }
486 if (head->so_error) {
487 error = head->so_error;
488 head->so_error = 0;
489 socket_unlock(head, 1);
490 goto out;
491 }
492
493 /*
494 * At this point we know that there is at least one connection
495 * ready to be accepted. Remove it from the queue prior to
496 * allocating the file descriptor for it since falloc() may
497 * block allowing another process to accept the connection
498 * instead.
499 */
500 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
501
502 so_acquire_accept_list(head, NULL);
503 if (TAILQ_EMPTY(&head->so_comp)) {
504 so_release_accept_list(head);
505 goto check_again;
506 }
507
508 so = TAILQ_FIRST(&head->so_comp);
509 TAILQ_REMOVE(&head->so_comp, so, so_list);
510 so->so_head = NULL;
511 so->so_state &= ~SS_COMP;
512 head->so_qlen--;
513 so_release_accept_list(head);
514
515 /* unlock head to avoid deadlock with select, keep a ref on head */
516 socket_unlock(head, 0);
517
518#if CONFIG_MACF_SOCKET_SUBSET
519 /*
520 * Pass the pre-accepted socket to the MAC framework. This is
521 * cheaper than allocating a file descriptor for the socket,
522 * calling the protocol accept callback, and possibly freeing
523 * the file descriptor should the MAC check fails.
524 */
525 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
526 socket_lock(so, 1);
527 so->so_state &= ~SS_NOFDREF;
528 socket_unlock(so, 1);
529 soclose(so);
530 /* Drop reference on listening socket */
531 sodereference(head);
532 goto out;
533 }
534#endif /* MAC_SOCKET_SUBSET */
535
536 /*
537 * Pass the pre-accepted socket to any interested socket filter(s).
538 * Upon failure, the socket would have been closed by the callee.
539 */
540 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
541 /* Drop reference on listening socket */
542 sodereference(head);
543 /* Propagate socket filter's error code to the caller */
544 goto out;
545 }
546
547 fflag = fp->f_flag;
548 error = falloc(p, &fp, &newfd, vfs_context_current());
549 if (error) {
550 /*
551 * Probably ran out of file descriptors.
552 *
553 * <rdar://problem/8554930>
554 * Don't put this back on the socket like we used to, that
555 * just causes the client to spin. Drop the socket.
556 */
557 socket_lock(so, 1);
558 so->so_state &= ~SS_NOFDREF;
559 socket_unlock(so, 1);
560 soclose(so);
561 sodereference(head);
562 goto out;
563 }
564 *retval = newfd;
565 fp->f_flag = fflag;
566 fp->f_ops = &socketops;
567 fp->f_data = (caddr_t)so;
568
569 socket_lock(head, 0);
570 if (dosocklock)
571 socket_lock(so, 1);
572
573 /* Sync socket non-blocking/async state with file flags */
574 if (fp->f_flag & FNONBLOCK) {
575 so->so_state |= SS_NBIO;
576 } else {
577 so->so_state &= ~SS_NBIO;
578 }
579
580 if (fp->f_flag & FASYNC) {
581 so->so_state |= SS_ASYNC;
582 so->so_rcv.sb_flags |= SB_ASYNC;
583 so->so_snd.sb_flags |= SB_ASYNC;
584 } else {
585 so->so_state &= ~SS_ASYNC;
586 so->so_rcv.sb_flags &= ~SB_ASYNC;
587 so->so_snd.sb_flags &= ~SB_ASYNC;
588 }
589
590 (void) soacceptlock(so, &sa, 0);
591 socket_unlock(head, 1);
592 if (sa == NULL) {
593 namelen = 0;
594 if (uap->name)
595 goto gotnoname;
596 error = 0;
597 goto releasefd;
598 }
599 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
600
601 if (uap->name) {
602 socklen_t sa_len;
603
604 /* save sa_len before it is destroyed */
605 sa_len = sa->sa_len;
606 namelen = MIN(namelen, sa_len);
607 error = copyout(sa, uap->name, namelen);
608 if (!error)
609 /* return the actual, untruncated address length */
610 namelen = sa_len;
611gotnoname:
612 error = copyout((caddr_t)&namelen, uap->anamelen,
613 sizeof (socklen_t));
614 }
615 FREE(sa, M_SONAME);
616
617releasefd:
618 /*
619 * If the socket has been marked as inactive by sosetdefunct(),
620 * disallow further operations on it.
621 */
622 if (so->so_flags & SOF_DEFUNCT) {
623 sodefunct(current_proc(), so,
624 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
625 }
626
627 if (dosocklock)
628 socket_unlock(so, 1);
629
630 proc_fdlock(p);
631 procfdtbl_releasefd(p, newfd, NULL);
632 fp_drop(p, newfd, fp, 1);
633 proc_fdunlock(p);
634
635out:
636 file_drop(fd);
637
638 if (error == 0 && ENTR_SHOULDTRACE) {
639 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
640 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
641 }
642 return (error);
643}
644
645int
646accept(struct proc *p, struct accept_args *uap, int32_t *retval)
647{
648 __pthread_testcancel(1);
649 return (accept_nocancel(p, (struct accept_nocancel_args *)uap,
650 retval));
651}
652
653/*
654 * Returns: 0 Success
655 * EBADF Bad file descriptor
656 * EALREADY Connection already in progress
657 * EINPROGRESS Operation in progress
658 * ECONNABORTED Connection aborted
659 * EINTR Interrupted function
660 * EACCES Mandatory Access Control failure
661 * file_socket:ENOTSOCK
662 * file_socket:EBADF
663 * getsockaddr:ENAMETOOLONG Filename too long
664 * getsockaddr:EINVAL Invalid argument
665 * getsockaddr:ENOMEM Not enough space
666 * getsockaddr:EFAULT Bad address
667 * soconnectlock:EOPNOTSUPP
668 * soconnectlock:EISCONN
669 * soconnectlock:??? [depends on protocol, filters]
670 * msleep:EINTR
671 *
672 * Imputed: so_error error may be set from so_error, which
673 * may have been set by soconnectlock.
674 */
675/* ARGSUSED */
676int
677connect(struct proc *p, struct connect_args *uap, int32_t *retval)
678{
679 __pthread_testcancel(1);
680 return (connect_nocancel(p, (struct connect_nocancel_args *)uap,
681 retval));
682}
683
684int
685connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
686{
687#pragma unused(p, retval)
688 struct socket *so;
689 struct sockaddr_storage ss;
690 struct sockaddr *sa = NULL;
691 int error;
692 int fd = uap->s;
693 boolean_t dgram;
694
695 AUDIT_ARG(fd, uap->s);
696 error = file_socket(fd, &so);
697 if (error != 0)
698 return (error);
699 if (so == NULL) {
700 error = EBADF;
701 goto out;
702 }
703
704 /*
705 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
706 * if this is a datagram socket; translate for other types.
707 */
708 dgram = (so->so_type == SOCK_DGRAM);
709
710 /* Get socket address now before we obtain socket lock */
711 if (uap->namelen > sizeof (ss)) {
712 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
713 } else {
714 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
715 if (error == 0)
716 sa = (struct sockaddr *)&ss;
717 }
718 if (error != 0)
719 goto out;
720
721 error = connectit(so, sa);
722
723 if (sa != NULL && sa != SA(&ss))
724 FREE(sa, M_SONAME);
725 if (error == ERESTART)
726 error = EINTR;
727out:
728 file_drop(fd);
729 return (error);
730}
731
732static int
733connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
734{
735#pragma unused(p, retval)
736 struct sockaddr_storage ss, sd;
737 struct sockaddr *src = NULL, *dst = NULL;
738 struct socket *so;
739 int error, error1, fd = uap->socket;
740 boolean_t dgram;
741 sae_connid_t cid = SAE_CONNID_ANY;
742 struct user32_sa_endpoints ep32;
743 struct user64_sa_endpoints ep64;
744 struct user_sa_endpoints ep;
745 user_ssize_t bytes_written = 0;
746 struct user_iovec *iovp;
747 uio_t auio = NULL;
748
749 AUDIT_ARG(fd, uap->socket);
750 error = file_socket(fd, &so);
751 if (error != 0)
752 return (error);
753 if (so == NULL) {
754 error = EBADF;
755 goto out;
756 }
757
758 if (uap->endpoints == USER_ADDR_NULL) {
759 error = EINVAL;
760 goto out;
761 }
762
763 if (IS_64BIT_PROCESS(p)) {
764 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
765 if (error != 0)
766 goto out;
767
768 ep.sae_srcif = ep64.sae_srcif;
769 ep.sae_srcaddr = ep64.sae_srcaddr;
770 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
771 ep.sae_dstaddr = ep64.sae_dstaddr;
772 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
773 } else {
774 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
775 if (error != 0)
776 goto out;
777
778 ep.sae_srcif = ep32.sae_srcif;
779 ep.sae_srcaddr = ep32.sae_srcaddr;
780 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
781 ep.sae_dstaddr = ep32.sae_dstaddr;
782 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
783 }
784
785 /*
786 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
787 * if this is a datagram socket; translate for other types.
788 */
789 dgram = (so->so_type == SOCK_DGRAM);
790
791 /* Get socket address now before we obtain socket lock */
792 if (ep.sae_srcaddr != USER_ADDR_NULL) {
793 if (ep.sae_srcaddrlen > sizeof (ss)) {
794 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
795 } else {
796 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
797 if (error == 0)
798 src = (struct sockaddr *)&ss;
799 }
800
801 if (error)
802 goto out;
803 }
804
805 if (ep.sae_dstaddr == USER_ADDR_NULL) {
806 error = EINVAL;
807 goto out;
808 }
809
810 /* Get socket address now before we obtain socket lock */
811 if (ep.sae_dstaddrlen > sizeof (sd)) {
812 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
813 } else {
814 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
815 if (error == 0)
816 dst = (struct sockaddr *)&sd;
817 }
818
819 if (error)
820 goto out;
821
822 VERIFY(dst != NULL);
823
824 if (uap->iov != USER_ADDR_NULL) {
825 /* Verify range before calling uio_create() */
826 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
827 return (EINVAL);
828
829 if (uap->len == USER_ADDR_NULL)
830 return (EINVAL);
831
832 /* allocate a uio to hold the number of iovecs passed */
833 auio = uio_create(uap->iovcnt, 0,
834 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
835 UIO_WRITE);
836
837 if (auio == NULL) {
838 error = ENOMEM;
839 goto out;
840 }
841
842 /*
843 * get location of iovecs within the uio.
844 * then copyin the iovecs from user space.
845 */
846 iovp = uio_iovsaddr(auio);
847 if (iovp == NULL) {
848 error = ENOMEM;
849 goto out;
850 }
851 error = copyin_user_iovec_array(uap->iov,
852 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
853 uap->iovcnt, iovp);
854 if (error != 0)
855 goto out;
856
857 /* finish setup of uio_t */
858 error = uio_calculateresid(auio);
859 if (error != 0) {
860 goto out;
861 }
862 }
863
864 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
865 &cid, auio, uap->flags, &bytes_written);
866 if (error == ERESTART)
867 error = EINTR;
868
869 if (uap->len != USER_ADDR_NULL) {
870 error1 = copyout(&bytes_written, uap->len, sizeof (uap->len));
871 /* give precedence to connectitx errors */
872 if ((error1 != 0) && (error == 0))
873 error = error1;
874 }
875
876 if (uap->connid != USER_ADDR_NULL) {
877 error1 = copyout(&cid, uap->connid, sizeof (cid));
878 /* give precedence to connectitx errors */
879 if ((error1 != 0) && (error == 0))
880 error = error1;
881 }
882out:
883 file_drop(fd);
884 if (auio != NULL) {
885 uio_free(auio);
886 }
887 if (src != NULL && src != SA(&ss))
888 FREE(src, M_SONAME);
889 if (dst != NULL && dst != SA(&sd))
890 FREE(dst, M_SONAME);
891 return (error);
892}
893
894int
895connectx(struct proc *p, struct connectx_args *uap, int *retval)
896{
897 /*
898 * Due to similiarity with a POSIX interface, define as
899 * an unofficial cancellation point.
900 */
901 __pthread_testcancel(1);
902 return (connectx_nocancel(p, uap, retval));
903}
904
905static int
906connectit(struct socket *so, struct sockaddr *sa)
907{
908 int error;
909
910 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
911#if CONFIG_MACF_SOCKET_SUBSET
912 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0)
913 return (error);
914#endif /* MAC_SOCKET_SUBSET */
915
916 socket_lock(so, 1);
917 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
918 error = EALREADY;
919 goto out;
920 }
921 error = soconnectlock(so, sa, 0);
922 if (error != 0) {
923 so->so_state &= ~SS_ISCONNECTING;
924 goto out;
925 }
926 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
927 error = EINPROGRESS;
928 goto out;
929 }
930 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
931 lck_mtx_t *mutex_held;
932
933 if (so->so_proto->pr_getlock != NULL)
934 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
935 else
936 mutex_held = so->so_proto->pr_domain->dom_mtx;
937 error = msleep((caddr_t)&so->so_timeo, mutex_held,
938 PSOCK | PCATCH, __func__, 0);
939 if (so->so_state & SS_DRAINING) {
940 error = ECONNABORTED;
941 }
942 if (error != 0)
943 break;
944 }
945 if (error == 0) {
946 error = so->so_error;
947 so->so_error = 0;
948 }
949out:
950 socket_unlock(so, 1);
951 return (error);
952}
953
954static int
955connectitx(struct socket *so, struct sockaddr *src,
956 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
957 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
958 user_ssize_t *bytes_written)
959{
960 int error;
961#pragma unused (flags)
962
963 VERIFY(dst != NULL);
964
965 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
966#if CONFIG_MACF_SOCKET_SUBSET
967 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0)
968 return (error);
969#endif /* MAC_SOCKET_SUBSET */
970
971 socket_lock(so, 1);
972 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
973 error = EALREADY;
974 goto out;
975 }
976
977 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
978 (flags & CONNECT_DATA_IDEMPOTENT)) {
979 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
980
981 if (flags & CONNECT_DATA_AUTHENTICATED)
982 so->so_flags |= SOF1_DATA_AUTHENTICATED;
983 }
984
985 /*
986 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
987 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
988 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
989 * Case 3 allows user to combine write with connect even if they have
990 * no use for TFO (such as regular TCP, and UDP).
991 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
992 */
993 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
994 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio))
995 so->so_flags1 |= SOF1_PRECONNECT_DATA;
996
997 /*
998 * If a user sets data idempotent and does not pass an uio, or
999 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1000 * SOF1_DATA_IDEMPOTENT.
1001 */
1002 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1003 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1004 /* We should return EINVAL instead perhaps. */
1005 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1006 }
1007
1008 error = soconnectxlocked(so, src, dst, p, ifscope,
1009 aid, pcid, 0, NULL, 0, auio, bytes_written);
1010 if (error != 0) {
1011 so->so_state &= ~SS_ISCONNECTING;
1012 goto out;
1013 }
1014 /*
1015 * If, after the call to soconnectxlocked the flag is still set (in case
1016 * data has been queued and the connect() has actually been triggered,
1017 * it will have been unset by the transport), we exit immediately. There
1018 * is no reason to wait on any event.
1019 */
1020 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1021 error = 0;
1022 goto out;
1023 }
1024 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
1025 error = EINPROGRESS;
1026 goto out;
1027 }
1028 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
1029 lck_mtx_t *mutex_held;
1030
1031 if (so->so_proto->pr_getlock != NULL)
1032 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1033 else
1034 mutex_held = so->so_proto->pr_domain->dom_mtx;
1035 error = msleep((caddr_t)&so->so_timeo, mutex_held,
1036 PSOCK | PCATCH, __func__, 0);
1037 if (so->so_state & SS_DRAINING) {
1038 error = ECONNABORTED;
1039 }
1040 if (error != 0)
1041 break;
1042 }
1043 if (error == 0) {
1044 error = so->so_error;
1045 so->so_error = 0;
1046 }
1047out:
1048 socket_unlock(so, 1);
1049 return (error);
1050}
1051
1052int
1053peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1054{
1055 /*
1056 * Due to similiarity with a POSIX interface, define as
1057 * an unofficial cancellation point.
1058 */
1059 __pthread_testcancel(1);
1060 return (peeloff_nocancel(p, uap, retval));
1061}
1062
1063static int
1064peeloff_nocancel(struct proc *p, struct peeloff_args *uap, int *retval)
1065{
1066 struct fileproc *fp;
1067 struct socket *mp_so, *so = NULL;
1068 int newfd, fd = uap->s;
1069 short fflag; /* type must match fp->f_flag */
1070 int error;
1071
1072 *retval = -1;
1073
1074 error = fp_getfsock(p, fd, &fp, &mp_so);
1075 if (error != 0) {
1076 if (error == EOPNOTSUPP)
1077 error = ENOTSOCK;
1078 goto out_nofile;
1079 }
1080 if (mp_so == NULL) {
1081 error = EBADF;
1082 goto out;
1083 }
1084
1085 socket_lock(mp_so, 1);
1086 error = sopeelofflocked(mp_so, uap->aid, &so);
1087 if (error != 0) {
1088 socket_unlock(mp_so, 1);
1089 goto out;
1090 }
1091 VERIFY(so != NULL);
1092 socket_unlock(mp_so, 0); /* keep ref on mp_so for us */
1093
1094 fflag = fp->f_flag;
1095 error = falloc(p, &fp, &newfd, vfs_context_current());
1096 if (error != 0) {
1097 /* drop this socket (probably ran out of file descriptors) */
1098 soclose(so);
1099 sodereference(mp_so); /* our mp_so ref */
1100 goto out;
1101 }
1102
1103 fp->f_flag = fflag;
1104 fp->f_ops = &socketops;
1105 fp->f_data = (caddr_t)so;
1106
1107 /*
1108 * If the socket has been marked as inactive by sosetdefunct(),
1109 * disallow further operations on it.
1110 */
1111 if (so->so_flags & SOF_DEFUNCT) {
1112 sodefunct(current_proc(), so,
1113 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
1114 }
1115
1116 proc_fdlock(p);
1117 procfdtbl_releasefd(p, newfd, NULL);
1118 fp_drop(p, newfd, fp, 1);
1119 proc_fdunlock(p);
1120
1121 sodereference(mp_so); /* our mp_so ref */
1122 *retval = newfd;
1123
1124out:
1125 file_drop(fd);
1126
1127out_nofile:
1128 return (error);
1129}
1130
1131int
1132disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1133{
1134 /*
1135 * Due to similiarity with a POSIX interface, define as
1136 * an unofficial cancellation point.
1137 */
1138 __pthread_testcancel(1);
1139 return (disconnectx_nocancel(p, uap, retval));
1140}
1141
1142static int
1143disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1144{
1145#pragma unused(p, retval)
1146 struct socket *so;
1147 int fd = uap->s;
1148 int error;
1149
1150 error = file_socket(fd, &so);
1151 if (error != 0)
1152 return (error);
1153 if (so == NULL) {
1154 error = EBADF;
1155 goto out;
1156 }
1157
1158 error = sodisconnectx(so, uap->aid, uap->cid);
1159out:
1160 file_drop(fd);
1161 return (error);
1162}
1163
1164/*
1165 * Returns: 0 Success
1166 * socreate:EAFNOSUPPORT
1167 * socreate:EPROTOTYPE
1168 * socreate:EPROTONOSUPPORT
1169 * socreate:ENOBUFS
1170 * socreate:ENOMEM
1171 * socreate:EISCONN
1172 * socreate:??? [other protocol families, IPSEC]
1173 * falloc:ENFILE
1174 * falloc:EMFILE
1175 * falloc:ENOMEM
1176 * copyout:EFAULT
1177 * soconnect2:EINVAL
1178 * soconnect2:EPROTOTYPE
1179 * soconnect2:??? [other protocol families[
1180 */
1181int
1182socketpair(struct proc *p, struct socketpair_args *uap,
1183 __unused int32_t *retval)
1184{
1185 struct fileproc *fp1, *fp2;
1186 struct socket *so1, *so2;
1187 int fd, error, sv[2];
1188
1189 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1190 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1191 if (error)
1192 return (error);
1193 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1194 if (error)
1195 goto free1;
1196
1197 error = falloc(p, &fp1, &fd, vfs_context_current());
1198 if (error) {
1199 goto free2;
1200 }
1201 fp1->f_flag = FREAD|FWRITE;
1202 fp1->f_ops = &socketops;
1203 fp1->f_data = (caddr_t)so1;
1204 sv[0] = fd;
1205
1206 error = falloc(p, &fp2, &fd, vfs_context_current());
1207 if (error) {
1208 goto free3;
1209 }
1210 fp2->f_flag = FREAD|FWRITE;
1211 fp2->f_ops = &socketops;
1212 fp2->f_data = (caddr_t)so2;
1213 sv[1] = fd;
1214
1215 error = soconnect2(so1, so2);
1216 if (error) {
1217 goto free4;
1218 }
1219 if (uap->type == SOCK_DGRAM) {
1220 /*
1221 * Datagram socket connection is asymmetric.
1222 */
1223 error = soconnect2(so2, so1);
1224 if (error) {
1225 goto free4;
1226 }
1227 }
1228
1229 if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
1230 goto free4;
1231
1232 proc_fdlock(p);
1233 procfdtbl_releasefd(p, sv[0], NULL);
1234 procfdtbl_releasefd(p, sv[1], NULL);
1235 fp_drop(p, sv[0], fp1, 1);
1236 fp_drop(p, sv[1], fp2, 1);
1237 proc_fdunlock(p);
1238
1239 return (0);
1240free4:
1241 fp_free(p, sv[1], fp2);
1242free3:
1243 fp_free(p, sv[0], fp1);
1244free2:
1245 (void) soclose(so2);
1246free1:
1247 (void) soclose(so1);
1248 return (error);
1249}
1250
1251/*
1252 * Returns: 0 Success
1253 * EINVAL
1254 * ENOBUFS
1255 * EBADF
1256 * EPIPE
1257 * EACCES Mandatory Access Control failure
1258 * file_socket:ENOTSOCK
1259 * file_socket:EBADF
1260 * getsockaddr:ENAMETOOLONG Filename too long
1261 * getsockaddr:EINVAL Invalid argument
1262 * getsockaddr:ENOMEM Not enough space
1263 * getsockaddr:EFAULT Bad address
1264 * <pru_sosend>:EACCES[TCP]
1265 * <pru_sosend>:EADDRINUSE[TCP]
1266 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1267 * <pru_sosend>:EAFNOSUPPORT[TCP]
1268 * <pru_sosend>:EAGAIN[TCP]
1269 * <pru_sosend>:EBADF
1270 * <pru_sosend>:ECONNRESET[TCP]
1271 * <pru_sosend>:EFAULT
1272 * <pru_sosend>:EHOSTUNREACH[TCP]
1273 * <pru_sosend>:EINTR
1274 * <pru_sosend>:EINVAL
1275 * <pru_sosend>:EISCONN[AF_INET]
1276 * <pru_sosend>:EMSGSIZE[TCP]
1277 * <pru_sosend>:ENETDOWN[TCP]
1278 * <pru_sosend>:ENETUNREACH[TCP]
1279 * <pru_sosend>:ENOBUFS
1280 * <pru_sosend>:ENOMEM[TCP]
1281 * <pru_sosend>:ENOTCONN[AF_INET]
1282 * <pru_sosend>:EOPNOTSUPP
1283 * <pru_sosend>:EPERM[TCP]
1284 * <pru_sosend>:EPIPE
1285 * <pru_sosend>:EWOULDBLOCK
1286 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1287 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1288 * <pru_sosend>:??? [value from so_error]
1289 * sockargs:???
1290 */
1291static int
1292sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
1293 int flags, int32_t *retval)
1294{
1295 struct mbuf *control = NULL;
1296 struct sockaddr_storage ss;
1297 struct sockaddr *to = NULL;
1298 boolean_t want_free = TRUE;
1299 int error;
1300 user_ssize_t len;
1301
1302 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1303
1304 if (mp->msg_name != USER_ADDR_NULL) {
1305 if (mp->msg_namelen > sizeof (ss)) {
1306 error = getsockaddr(so, &to, mp->msg_name,
1307 mp->msg_namelen, TRUE);
1308 } else {
1309 error = getsockaddr_s(so, &ss, mp->msg_name,
1310 mp->msg_namelen, TRUE);
1311 if (error == 0) {
1312 to = (struct sockaddr *)&ss;
1313 want_free = FALSE;
1314 }
1315 }
1316 if (error != 0)
1317 goto out;
1318 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
1319 }
1320 if (mp->msg_control != USER_ADDR_NULL) {
1321 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
1322 error = EINVAL;
1323 goto bad;
1324 }
1325 error = sockargs(&control, mp->msg_control,
1326 mp->msg_controllen, MT_CONTROL);
1327 if (error != 0)
1328 goto bad;
1329 }
1330
1331#if CONFIG_MACF_SOCKET_SUBSET
1332 /*
1333 * We check the state without holding the socket lock;
1334 * if a race condition occurs, it would simply result
1335 * in an extra call to the MAC check function.
1336 */
1337 if (to != NULL &&
1338 !(so->so_state & SS_DEFUNCT) &&
1339 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
1340 goto bad;
1341#endif /* MAC_SOCKET_SUBSET */
1342
1343 len = uio_resid(uiop);
1344 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1345 control, flags);
1346 if (error != 0) {
1347 if (uio_resid(uiop) != len && (error == ERESTART ||
1348 error == EINTR || error == EWOULDBLOCK))
1349 error = 0;
1350 /* Generation of SIGPIPE can be controlled per socket */
1351 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1352 psignal(p, SIGPIPE);
1353 }
1354 if (error == 0)
1355 *retval = (int)(len - uio_resid(uiop));
1356bad:
1357 if (to != NULL && want_free)
1358 FREE(to, M_SONAME);
1359out:
1360 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1361
1362 return (error);
1363}
1364
1365/*
1366 * Returns: 0 Success
1367 * ENOMEM
1368 * sendit:??? [see sendit definition in this file]
1369 * write:??? [4056224: applicable for pipes]
1370 */
1371int
1372sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
1373{
1374 __pthread_testcancel(1);
1375 return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
1376}
1377
1378int
1379sendto_nocancel(struct proc *p,
1380 struct sendto_nocancel_args *uap,
1381 int32_t *retval)
1382{
1383 struct user_msghdr msg;
1384 int error;
1385 uio_t auio = NULL;
1386 struct socket *so;
1387
1388 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
1389 AUDIT_ARG(fd, uap->s);
1390
1391 auio = uio_create(1, 0,
1392 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1393 UIO_WRITE);
1394 if (auio == NULL) {
1395 error = ENOMEM;
1396 goto done;
1397 }
1398 uio_addiov(auio, uap->buf, uap->len);
1399
1400 msg.msg_name = uap->to;
1401 msg.msg_namelen = uap->tolen;
1402 /* no need to set up msg_iov. sendit uses uio_t we send it */
1403 msg.msg_iov = 0;
1404 msg.msg_iovlen = 0;
1405 msg.msg_control = 0;
1406 msg.msg_flags = 0;
1407
1408 error = file_socket(uap->s, &so);
1409 if (error)
1410 goto done;
1411
1412 if (so == NULL) {
1413 error = EBADF;
1414 } else {
1415 error = sendit(p, so, &msg, auio, uap->flags, retval);
1416 }
1417
1418 file_drop(uap->s);
1419done:
1420 if (auio != NULL)
1421 uio_free(auio);
1422
1423 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1424
1425 return (error);
1426}
1427
1428/*
1429 * Returns: 0 Success
1430 * ENOBUFS
1431 * copyin:EFAULT
1432 * sendit:??? [see sendit definition in this file]
1433 */
1434int
1435sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1436{
1437 __pthread_testcancel(1);
1438 return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1439 retval));
1440}
1441
1442int
1443sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1444 int32_t *retval)
1445{
1446 struct user32_msghdr msg32;
1447 struct user64_msghdr msg64;
1448 struct user_msghdr user_msg;
1449 caddr_t msghdrp;
1450 int size_of_msghdr;
1451 int error;
1452 uio_t auio = NULL;
1453 struct user_iovec *iovp;
1454 struct socket *so;
1455
1456 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1457 AUDIT_ARG(fd, uap->s);
1458 if (IS_64BIT_PROCESS(p)) {
1459 msghdrp = (caddr_t)&msg64;
1460 size_of_msghdr = sizeof (msg64);
1461 } else {
1462 msghdrp = (caddr_t)&msg32;
1463 size_of_msghdr = sizeof (msg32);
1464 }
1465 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1466 if (error) {
1467 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1468 return (error);
1469 }
1470
1471 if (IS_64BIT_PROCESS(p)) {
1472 user_msg.msg_flags = msg64.msg_flags;
1473 user_msg.msg_controllen = msg64.msg_controllen;
1474 user_msg.msg_control = msg64.msg_control;
1475 user_msg.msg_iovlen = msg64.msg_iovlen;
1476 user_msg.msg_iov = msg64.msg_iov;
1477 user_msg.msg_namelen = msg64.msg_namelen;
1478 user_msg.msg_name = msg64.msg_name;
1479 } else {
1480 user_msg.msg_flags = msg32.msg_flags;
1481 user_msg.msg_controllen = msg32.msg_controllen;
1482 user_msg.msg_control = msg32.msg_control;
1483 user_msg.msg_iovlen = msg32.msg_iovlen;
1484 user_msg.msg_iov = msg32.msg_iov;
1485 user_msg.msg_namelen = msg32.msg_namelen;
1486 user_msg.msg_name = msg32.msg_name;
1487 }
1488
1489 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1490 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1491 0, 0, 0, 0);
1492 return (EMSGSIZE);
1493 }
1494
1495 /* allocate a uio large enough to hold the number of iovecs passed */
1496 auio = uio_create(user_msg.msg_iovlen, 0,
1497 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1498 UIO_WRITE);
1499 if (auio == NULL) {
1500 error = ENOBUFS;
1501 goto done;
1502 }
1503
1504 if (user_msg.msg_iovlen) {
1505 /*
1506 * get location of iovecs within the uio.
1507 * then copyin the iovecs from user space.
1508 */
1509 iovp = uio_iovsaddr(auio);
1510 if (iovp == NULL) {
1511 error = ENOBUFS;
1512 goto done;
1513 }
1514 error = copyin_user_iovec_array(user_msg.msg_iov,
1515 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1516 user_msg.msg_iovlen, iovp);
1517 if (error)
1518 goto done;
1519 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1520
1521 /* finish setup of uio_t */
1522 error = uio_calculateresid(auio);
1523 if (error) {
1524 goto done;
1525 }
1526 } else {
1527 user_msg.msg_iov = 0;
1528 }
1529
1530 /* msg_flags is ignored for send */
1531 user_msg.msg_flags = 0;
1532
1533 error = file_socket(uap->s, &so);
1534 if (error) {
1535 goto done;
1536 }
1537 if (so == NULL) {
1538 error = EBADF;
1539 } else {
1540 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1541 }
1542 file_drop(uap->s);
1543done:
1544 if (auio != NULL) {
1545 uio_free(auio);
1546 }
1547 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1548
1549 return (error);
1550}
1551
1552int
1553sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1554{
1555 int error = 0;
1556 struct user_msghdr_x *user_msg_x = NULL;
1557 struct uio **uiop = NULL;
1558 struct socket *so;
1559 u_int i;
1560 struct sockaddr *to = NULL;
1561 user_ssize_t len_before = 0, len_after;
1562 int need_drop = 0;
1563 size_t size_of_msghdr;
1564 void *umsgp = NULL;
1565 u_int uiocnt;
1566 int has_addr_or_ctl = 0;
1567
1568 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1569
1570 error = file_socket(uap->s, &so);
1571 if (error) {
1572 goto out;
1573 }
1574 need_drop = 1;
1575 if (so == NULL) {
1576 error = EBADF;
1577 goto out;
1578 }
1579
1580 /*
1581 * Input parameter range check
1582 */
1583 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1584 error = EINVAL;
1585 goto out;
1586 }
1587 /*
1588 * Clip to max currently allowed
1589 */
1590 if (uap->cnt > somaxsendmsgx)
1591 uap->cnt = somaxsendmsgx;
1592
1593 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
1594 M_TEMP, M_WAITOK | M_ZERO);
1595 if (user_msg_x == NULL) {
1596 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
1597 error = ENOMEM;
1598 goto out;
1599 }
1600 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1601 M_TEMP, M_WAITOK | M_ZERO);
1602 if (uiop == NULL) {
1603 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
1604 error = ENOMEM;
1605 goto out;
1606 }
1607
1608 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1609 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1610
1611 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
1612 M_TEMP, M_WAITOK | M_ZERO);
1613 if (umsgp == NULL) {
1614 printf("%s _MALLOC() user_msg_x failed\n", __func__);
1615 error = ENOMEM;
1616 goto out;
1617 }
1618 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1619 if (error) {
1620 DBG_PRINTF("%s copyin() failed\n", __func__);
1621 goto out;
1622 }
1623 error = internalize_user_msghdr_array(umsgp,
1624 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1625 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1626 if (error) {
1627 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
1628 goto out;
1629 }
1630 /*
1631 * Make sure the size of each message iovec and
1632 * the aggregate size of all the iovec is valid
1633 */
1634 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1635 error = EINVAL;
1636 goto out;
1637 }
1638
1639 /*
1640 * Sanity check on passed arguments
1641 */
1642 for (i = 0; i < uap->cnt; i++) {
1643 struct user_msghdr_x *mp = user_msg_x + i;
1644
1645 /*
1646 * No flags on send message
1647 */
1648 if (mp->msg_flags != 0) {
1649 error = EINVAL;
1650 goto out;
1651 }
1652 /*
1653 * No support for address or ancillary data (yet)
1654 */
1655 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0)
1656 has_addr_or_ctl = 1;
1657
1658 if (mp->msg_control != USER_ADDR_NULL ||
1659 mp->msg_controllen != 0)
1660 has_addr_or_ctl = 1;
1661
1662#if CONFIG_MACF_SOCKET_SUBSET
1663 /*
1664 * We check the state without holding the socket lock;
1665 * if a race condition occurs, it would simply result
1666 * in an extra call to the MAC check function.
1667 *
1668 * Note: The following check is never true taken with the
1669 * current limitation that we do not accept to pass an address,
1670 * this is effectively placeholder code. If we add support for
1671 * addresses, we will have to check every address.
1672 */
1673 if (to != NULL &&
1674 !(so->so_state & SS_DEFUNCT) &&
1675 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1676 != 0)
1677 goto out;
1678#endif /* MAC_SOCKET_SUBSET */
1679 }
1680
1681 len_before = uio_array_resid(uiop, uap->cnt);
1682
1683 /*
1684 * Feed list of packets at once only for connected socket without
1685 * control message
1686 */
1687 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1688 pru_sosend_list_notsupp &&
1689 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1690 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1691 uap->cnt, uap->flags);
1692 } else {
1693 for (i = 0; i < uap->cnt; i++) {
1694 struct user_msghdr_x *mp = user_msg_x + i;
1695 struct user_msghdr user_msg;
1696 uio_t auio = uiop[i];
1697 int32_t tmpval;
1698
1699 user_msg.msg_flags = mp->msg_flags;
1700 user_msg.msg_controllen = mp->msg_controllen;
1701 user_msg.msg_control = mp->msg_control;
1702 user_msg.msg_iovlen = mp->msg_iovlen;
1703 user_msg.msg_iov = mp->msg_iov;
1704 user_msg.msg_namelen = mp->msg_namelen;
1705 user_msg.msg_name = mp->msg_name;
1706
1707 error = sendit(p, so, &user_msg, auio, uap->flags,
1708 &tmpval);
1709 if (error != 0)
1710 break;
1711 }
1712 }
1713 len_after = uio_array_resid(uiop, uap->cnt);
1714
1715 VERIFY(len_after <= len_before);
1716
1717 if (error != 0) {
1718 if (len_after != len_before && (error == ERESTART ||
1719 error == EINTR || error == EWOULDBLOCK ||
1720 error == ENOBUFS))
1721 error = 0;
1722 /* Generation of SIGPIPE can be controlled per socket */
1723 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1724 psignal(p, SIGPIPE);
1725 }
1726 if (error == 0) {
1727 uiocnt = externalize_user_msghdr_array(umsgp,
1728 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1729 UIO_WRITE, uap->cnt, user_msg_x, uiop);
1730
1731 *retval = (int)(uiocnt);
1732 }
1733out:
1734 if (need_drop)
1735 file_drop(uap->s);
1736 if (umsgp != NULL)
1737 _FREE(umsgp, M_TEMP);
1738 if (uiop != NULL) {
1739 free_uio_array(uiop, uap->cnt);
1740 _FREE(uiop, M_TEMP);
1741 }
1742 if (user_msg_x != NULL)
1743 _FREE(user_msg_x, M_TEMP);
1744
1745 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1746
1747 return (error);
1748}
1749
1750
1751static int
1752copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1753{
1754 int error = 0;
1755 socklen_t sa_len = 0;
1756 ssize_t len;
1757
1758 len = *namelen;
1759 if (len <= 0 || fromsa == 0) {
1760 len = 0;
1761 } else {
1762#ifndef MIN
1763#define MIN(a, b) ((a) > (b) ? (b) : (a))
1764#endif
1765 sa_len = fromsa->sa_len;
1766 len = MIN((unsigned int)len, sa_len);
1767 error = copyout(fromsa, name, (unsigned)len);
1768 if (error)
1769 goto out;
1770 }
1771 *namelen = sa_len;
1772out:
1773 return (0);
1774}
1775
1776static int
1777copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1778 socklen_t *controllen, int *flags)
1779{
1780 int error = 0;
1781 ssize_t len;
1782 user_addr_t ctlbuf;
1783
1784 len = *controllen;
1785 *controllen = 0;
1786 ctlbuf = control;
1787
1788 while (m && len > 0) {
1789 unsigned int tocopy;
1790 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1791 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1792 int buflen = m->m_len;
1793
1794 while (buflen > 0 && len > 0) {
1795 /*
1796 * SCM_TIMESTAMP hack because struct timeval has a
1797 * different size for 32 bits and 64 bits processes
1798 */
1799 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1800 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
1801 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1802 int tmp_space;
1803 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1804
1805 tmp_cp->cmsg_level = SOL_SOCKET;
1806 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1807
1808 if (proc_is64bit(p)) {
1809 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1810
1811 tv64->tv_sec = tv->tv_sec;
1812 tv64->tv_usec = tv->tv_usec;
1813
1814 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1815 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1816 } else {
1817 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1818
1819 tv32->tv_sec = tv->tv_sec;
1820 tv32->tv_usec = tv->tv_usec;
1821
1822 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1823 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1824 }
1825 if (len >= tmp_space) {
1826 tocopy = tmp_space;
1827 } else {
1828 *flags |= MSG_CTRUNC;
1829 tocopy = len;
1830 }
1831 error = copyout(tmp_buffer, ctlbuf, tocopy);
1832 if (error)
1833 goto out;
1834 } else {
1835 if (cp_size > buflen) {
1836 panic("cp_size > buflen, something"
1837 "wrong with alignment!");
1838 }
1839 if (len >= cp_size) {
1840 tocopy = cp_size;
1841 } else {
1842 *flags |= MSG_CTRUNC;
1843 tocopy = len;
1844 }
1845 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1846 if (error)
1847 goto out;
1848 }
1849
1850 ctlbuf += tocopy;
1851 len -= tocopy;
1852
1853 buflen -= cp_size;
1854 cp = (struct cmsghdr *)(void *)
1855 ((unsigned char *) cp + cp_size);
1856 cp_size = CMSG_ALIGN(cp->cmsg_len);
1857 }
1858
1859 m = m->m_next;
1860 }
1861 *controllen = ctlbuf - control;
1862out:
1863 return (error);
1864}
1865
1866/*
1867 * Returns: 0 Success
1868 * ENOTSOCK
1869 * EINVAL
1870 * EBADF
1871 * EACCES Mandatory Access Control failure
1872 * copyout:EFAULT
1873 * fp_lookup:EBADF
1874 * <pru_soreceive>:ENOBUFS
1875 * <pru_soreceive>:ENOTCONN
1876 * <pru_soreceive>:EWOULDBLOCK
1877 * <pru_soreceive>:EFAULT
1878 * <pru_soreceive>:EINTR
1879 * <pru_soreceive>:EBADF
1880 * <pru_soreceive>:EINVAL
1881 * <pru_soreceive>:EMSGSIZE
1882 * <pru_soreceive>:???
1883 *
1884 * Notes: Additional return values from calls through <pru_soreceive>
1885 * depend on protocols other than TCP or AF_UNIX, which are
1886 * documented above.
1887 */
1888static int
1889recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1890 user_addr_t namelenp, int32_t *retval)
1891{
1892 ssize_t len;
1893 int error;
1894 struct mbuf *control = 0;
1895 struct socket *so;
1896 struct sockaddr *fromsa = 0;
1897 struct fileproc *fp;
1898
1899 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1900 proc_fdlock(p);
1901 if ((error = fp_lookup(p, s, &fp, 1))) {
1902 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1903 proc_fdunlock(p);
1904 return (error);
1905 }
1906 if (fp->f_type != DTYPE_SOCKET) {
1907 fp_drop(p, s, fp, 1);
1908 proc_fdunlock(p);
1909 return (ENOTSOCK);
1910 }
1911
1912 so = (struct socket *)fp->f_data;
1913 if (so == NULL) {
1914 fp_drop(p, s, fp, 1);
1915 proc_fdunlock(p);
1916 return (EBADF);
1917 }
1918
1919 proc_fdunlock(p);
1920
1921#if CONFIG_MACF_SOCKET_SUBSET
1922 /*
1923 * We check the state without holding the socket lock;
1924 * if a race condition occurs, it would simply result
1925 * in an extra call to the MAC check function.
1926 */
1927 if (!(so->so_state & SS_DEFUNCT) &&
1928 !(so->so_state & SS_ISCONNECTED) &&
1929 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1930 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1931 goto out1;
1932#endif /* MAC_SOCKET_SUBSET */
1933 if (uio_resid(uiop) < 0) {
1934 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1935 error = EINVAL;
1936 goto out1;
1937 }
1938
1939 len = uio_resid(uiop);
1940 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1941 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1942 &mp->msg_flags);
1943 if (fromsa)
1944 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1945 fromsa);
1946 if (error) {
1947 if (uio_resid(uiop) != len && (error == ERESTART ||
1948 error == EINTR || error == EWOULDBLOCK))
1949 error = 0;
1950 }
1951 if (error)
1952 goto out;
1953
1954 *retval = len - uio_resid(uiop);
1955
1956 if (mp->msg_name) {
1957 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1958 if (error)
1959 goto out;
1960 /* return the actual, untruncated address length */
1961 if (namelenp &&
1962 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
1963 sizeof (int)))) {
1964 goto out;
1965 }
1966 }
1967
1968 if (mp->msg_control) {
1969 error = copyout_control(p, control, mp->msg_control,
1970 &mp->msg_controllen, &mp->msg_flags);
1971 }
1972out:
1973 if (fromsa)
1974 FREE(fromsa, M_SONAME);
1975 if (control)
1976 m_freem(control);
1977 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1978out1:
1979 fp_drop(p, s, fp, 0);
1980 return (error);
1981}
1982
1983/*
1984 * Returns: 0 Success
1985 * ENOMEM
1986 * copyin:EFAULT
1987 * recvit:???
1988 * read:??? [4056224: applicable for pipes]
1989 *
1990 * Notes: The read entry point is only called as part of support for
1991 * binary backward compatability; new code should use read
1992 * instead of recv or recvfrom when attempting to read data
1993 * from pipes.
1994 *
1995 * For full documentation of the return codes from recvit, see
1996 * the block header for the recvit function.
1997 */
1998int
1999recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2000{
2001 __pthread_testcancel(1);
2002 return (recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2003 retval));
2004}
2005
2006int
2007recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2008 int32_t *retval)
2009{
2010 struct user_msghdr msg;
2011 int error;
2012 uio_t auio = NULL;
2013
2014 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
2015 AUDIT_ARG(fd, uap->s);
2016
2017 if (uap->fromlenaddr) {
2018 error = copyin(uap->fromlenaddr,
2019 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
2020 if (error)
2021 return (error);
2022 } else {
2023 msg.msg_namelen = 0;
2024 }
2025 msg.msg_name = uap->from;
2026 auio = uio_create(1, 0,
2027 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2028 UIO_READ);
2029 if (auio == NULL) {
2030 return (ENOMEM);
2031 }
2032
2033 uio_addiov(auio, uap->buf, uap->len);
2034 /* no need to set up msg_iov. recvit uses uio_t we send it */
2035 msg.msg_iov = 0;
2036 msg.msg_iovlen = 0;
2037 msg.msg_control = 0;
2038 msg.msg_controllen = 0;
2039 msg.msg_flags = uap->flags;
2040 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2041 if (auio != NULL) {
2042 uio_free(auio);
2043 }
2044
2045 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
2046
2047 return (error);
2048}
2049
2050/*
2051 * Returns: 0 Success
2052 * EMSGSIZE
2053 * ENOMEM
2054 * copyin:EFAULT
2055 * copyout:EFAULT
2056 * recvit:???
2057 *
2058 * Notes: For full documentation of the return codes from recvit, see
2059 * the block header for the recvit function.
2060 */
2061int
2062recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
2063{
2064 __pthread_testcancel(1);
2065 return (recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2066 retval));
2067}
2068
2069int
2070recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2071 int32_t *retval)
2072{
2073 struct user32_msghdr msg32;
2074 struct user64_msghdr msg64;
2075 struct user_msghdr user_msg;
2076 caddr_t msghdrp;
2077 int size_of_msghdr;
2078 user_addr_t uiov;
2079 int error;
2080 uio_t auio = NULL;
2081 struct user_iovec *iovp;
2082
2083 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
2084 AUDIT_ARG(fd, uap->s);
2085 if (IS_64BIT_PROCESS(p)) {
2086 msghdrp = (caddr_t)&msg64;
2087 size_of_msghdr = sizeof (msg64);
2088 } else {
2089 msghdrp = (caddr_t)&msg32;
2090 size_of_msghdr = sizeof (msg32);
2091 }
2092 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2093 if (error) {
2094 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2095 return (error);
2096 }
2097
2098 /* only need to copy if user process is not 64-bit */
2099 if (IS_64BIT_PROCESS(p)) {
2100 user_msg.msg_flags = msg64.msg_flags;
2101 user_msg.msg_controllen = msg64.msg_controllen;
2102 user_msg.msg_control = msg64.msg_control;
2103 user_msg.msg_iovlen = msg64.msg_iovlen;
2104 user_msg.msg_iov = msg64.msg_iov;
2105 user_msg.msg_namelen = msg64.msg_namelen;
2106 user_msg.msg_name = msg64.msg_name;
2107 } else {
2108 user_msg.msg_flags = msg32.msg_flags;
2109 user_msg.msg_controllen = msg32.msg_controllen;
2110 user_msg.msg_control = msg32.msg_control;
2111 user_msg.msg_iovlen = msg32.msg_iovlen;
2112 user_msg.msg_iov = msg32.msg_iov;
2113 user_msg.msg_namelen = msg32.msg_namelen;
2114 user_msg.msg_name = msg32.msg_name;
2115 }
2116
2117 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2118 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2119 0, 0, 0, 0);
2120 return (EMSGSIZE);
2121 }
2122
2123 user_msg.msg_flags = uap->flags;
2124
2125 /* allocate a uio large enough to hold the number of iovecs passed */
2126 auio = uio_create(user_msg.msg_iovlen, 0,
2127 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2128 UIO_READ);
2129 if (auio == NULL) {
2130 error = ENOMEM;
2131 goto done;
2132 }
2133
2134 /*
2135 * get location of iovecs within the uio. then copyin the iovecs from
2136 * user space.
2137 */
2138 iovp = uio_iovsaddr(auio);
2139 if (iovp == NULL) {
2140 error = ENOMEM;
2141 goto done;
2142 }
2143 uiov = user_msg.msg_iov;
2144 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2145 error = copyin_user_iovec_array(uiov,
2146 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2147 user_msg.msg_iovlen, iovp);
2148 if (error)
2149 goto done;
2150
2151 /* finish setup of uio_t */
2152 error = uio_calculateresid(auio);
2153 if (error) {
2154 goto done;
2155 }
2156
2157 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
2158 if (!error) {
2159 user_msg.msg_iov = uiov;
2160 if (IS_64BIT_PROCESS(p)) {
2161 msg64.msg_flags = user_msg.msg_flags;
2162 msg64.msg_controllen = user_msg.msg_controllen;
2163 msg64.msg_control = user_msg.msg_control;
2164 msg64.msg_iovlen = user_msg.msg_iovlen;
2165 msg64.msg_iov = user_msg.msg_iov;
2166 msg64.msg_namelen = user_msg.msg_namelen;
2167 msg64.msg_name = user_msg.msg_name;
2168 } else {
2169 msg32.msg_flags = user_msg.msg_flags;
2170 msg32.msg_controllen = user_msg.msg_controllen;
2171 msg32.msg_control = user_msg.msg_control;
2172 msg32.msg_iovlen = user_msg.msg_iovlen;
2173 msg32.msg_iov = user_msg.msg_iov;
2174 msg32.msg_namelen = user_msg.msg_namelen;
2175 msg32.msg_name = user_msg.msg_name;
2176 }
2177 error = copyout(msghdrp, uap->msg, size_of_msghdr);
2178 }
2179done:
2180 if (auio != NULL) {
2181 uio_free(auio);
2182 }
2183 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
2184 return (error);
2185}
2186
2187int
2188recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2189{
2190 int error = EOPNOTSUPP;
2191 struct user_msghdr_x *user_msg_x = NULL;
2192 struct recv_msg_elem *recv_msg_array = NULL;
2193 struct socket *so;
2194 user_ssize_t len_before = 0, len_after;
2195 int need_drop = 0;
2196 size_t size_of_msghdr;
2197 void *umsgp = NULL;
2198 u_int i;
2199 u_int uiocnt;
2200
2201 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2202
2203 error = file_socket(uap->s, &so);
2204 if (error) {
2205 goto out;
2206 }
2207 need_drop = 1;
2208 if (so == NULL) {
2209 error = EBADF;
2210 goto out;
2211 }
2212 /*
2213 * Input parameter range check
2214 */
2215 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2216 error = EINVAL;
2217 goto out;
2218 }
2219 if (uap->cnt > somaxrecvmsgx)
2220 uap->cnt = somaxrecvmsgx;
2221
2222 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
2223 M_TEMP, M_WAITOK | M_ZERO);
2224 if (user_msg_x == NULL) {
2225 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
2226 error = ENOMEM;
2227 goto out;
2228 }
2229 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2230 if (recv_msg_array == NULL) {
2231 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
2232 error = ENOMEM;
2233 goto out;
2234 }
2235 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2236 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2237
2238 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2239 if (umsgp == NULL) {
2240 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
2241 error = ENOMEM;
2242 goto out;
2243 }
2244 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2245 if (error) {
2246 DBG_PRINTF("%s copyin() failed\n", __func__);
2247 goto out;
2248 }
2249 error = internalize_recv_msghdr_array(umsgp,
2250 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2251 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2252 if (error) {
2253 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
2254 goto out;
2255 }
2256 /*
2257 * Make sure the size of each message iovec and
2258 * the aggregate size of all the iovec is valid
2259 */
2260 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
2261 error = EINVAL;
2262 goto out;
2263 }
2264 /*
2265 * Sanity check on passed arguments
2266 */
2267 for (i = 0; i < uap->cnt; i++) {
2268 struct user_msghdr_x *mp = user_msg_x + i;
2269
2270 if (mp->msg_flags != 0) {
2271 error = EINVAL;
2272 goto out;
2273 }
2274 }
2275#if CONFIG_MACF_SOCKET_SUBSET
2276 /*
2277 * We check the state without holding the socket lock;
2278 * if a race condition occurs, it would simply result
2279 * in an extra call to the MAC check function.
2280 */
2281 if (!(so->so_state & SS_DEFUNCT) &&
2282 !(so->so_state & SS_ISCONNECTED) &&
2283 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2284 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
2285 goto out;
2286#endif /* MAC_SOCKET_SUBSET */
2287
2288 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
2289
2290 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2291 pru_soreceive_list_notsupp &&
2292 somaxrecvmsgx == 0) {
2293 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2294 recv_msg_array, uap->cnt, &uap->flags);
2295 } else {
2296 int flags = uap->flags;
2297
2298 for (i = 0; i < uap->cnt; i++) {
2299 struct recv_msg_elem *recv_msg_elem;
2300 uio_t auio;
2301 struct sockaddr **psa;
2302 struct mbuf **controlp;
2303
2304 recv_msg_elem = recv_msg_array + i;
2305 auio = recv_msg_elem->uio;
2306
2307 /*
2308 * Do not block if we got at least one packet
2309 */
2310 if (i > 0)
2311 flags |= MSG_DONTWAIT;
2312
2313 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2314 &recv_msg_elem->psa : NULL;
2315 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2316 &recv_msg_elem->controlp : NULL;
2317
2318 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2319 auio, (struct mbuf **)0, controlp, &flags);
2320 if (error)
2321 break;
2322 /*
2323 * We have some data
2324 */
2325 recv_msg_elem->which |= SOCK_MSG_DATA;
2326 /*
2327 * Stop on partial copy
2328 */
2329 if (flags & (MSG_RCVMORE | MSG_TRUNC))
2330 break;
2331 }
2332 if ((uap->flags & MSG_DONTWAIT) == 0)
2333 flags &= ~MSG_DONTWAIT;
2334 uap->flags = flags;
2335 }
2336
2337 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
2338
2339 if (error) {
2340 if (len_after != len_before && (error == ERESTART ||
2341 error == EINTR || error == EWOULDBLOCK))
2342 error = 0;
2343 else
2344 goto out;
2345 }
2346
2347 uiocnt = externalize_recv_msghdr_array(umsgp,
2348 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2349 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2350
2351 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2352 if (error) {
2353 DBG_PRINTF("%s copyout() failed\n", __func__);
2354 goto out;
2355 }
2356 *retval = (int)(uiocnt);
2357
2358 for (i = 0; i < uap->cnt; i++) {
2359 struct user_msghdr_x *mp = user_msg_x + i;
2360 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2361 struct sockaddr *fromsa = recv_msg_elem->psa;
2362
2363 if (mp->msg_name) {
2364 error = copyout_sa(fromsa, mp->msg_name,
2365 &mp->msg_namelen);
2366 if (error)
2367 goto out;
2368 }
2369 if (mp->msg_control) {
2370 error = copyout_control(p, recv_msg_elem->controlp,
2371 mp->msg_control, &mp->msg_controllen,
2372 &mp->msg_flags);
2373 if (error)
2374 goto out;
2375 }
2376 }
2377out:
2378 if (need_drop)
2379 file_drop(uap->s);
2380 if (umsgp != NULL)
2381 _FREE(umsgp, M_TEMP);
2382 if (recv_msg_array != NULL)
2383 free_recv_msg_array(recv_msg_array, uap->cnt);
2384 if (user_msg_x != NULL)
2385 _FREE(user_msg_x, M_TEMP);
2386
2387 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
2388
2389 return (error);
2390}
2391
2392/*
2393 * Returns: 0 Success
2394 * EBADF
2395 * file_socket:ENOTSOCK
2396 * file_socket:EBADF
2397 * soshutdown:EINVAL
2398 * soshutdown:ENOTCONN
2399 * soshutdown:EADDRNOTAVAIL[TCP]
2400 * soshutdown:ENOBUFS[TCP]
2401 * soshutdown:EMSGSIZE[TCP]
2402 * soshutdown:EHOSTUNREACH[TCP]
2403 * soshutdown:ENETUNREACH[TCP]
2404 * soshutdown:ENETDOWN[TCP]
2405 * soshutdown:ENOMEM[TCP]
2406 * soshutdown:EACCES[TCP]
2407 * soshutdown:EMSGSIZE[TCP]
2408 * soshutdown:ENOBUFS[TCP]
2409 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2410 * soshutdown:??? [other protocol families]
2411 */
2412/* ARGSUSED */
2413int
2414shutdown(__unused struct proc *p, struct shutdown_args *uap,
2415 __unused int32_t *retval)
2416{
2417 struct socket *so;
2418 int error;
2419
2420 AUDIT_ARG(fd, uap->s);
2421 error = file_socket(uap->s, &so);
2422 if (error)
2423 return (error);
2424 if (so == NULL) {
2425 error = EBADF;
2426 goto out;
2427 }
2428 error = soshutdown((struct socket *)so, uap->how);
2429out:
2430 file_drop(uap->s);
2431 return (error);
2432}
2433
2434/*
2435 * Returns: 0 Success
2436 * EFAULT
2437 * EINVAL
2438 * EACCES Mandatory Access Control failure
2439 * file_socket:ENOTSOCK
2440 * file_socket:EBADF
2441 * sosetopt:EINVAL
2442 * sosetopt:ENOPROTOOPT
2443 * sosetopt:ENOBUFS
2444 * sosetopt:EDOM
2445 * sosetopt:EFAULT
2446 * sosetopt:EOPNOTSUPP[AF_UNIX]
2447 * sosetopt:???
2448 */
2449/* ARGSUSED */
2450int
2451setsockopt(struct proc *p, struct setsockopt_args *uap,
2452 __unused int32_t *retval)
2453{
2454 struct socket *so;
2455 struct sockopt sopt;
2456 int error;
2457
2458 AUDIT_ARG(fd, uap->s);
2459 if (uap->val == 0 && uap->valsize != 0)
2460 return (EFAULT);
2461 /* No bounds checking on size (it's unsigned) */
2462
2463 error = file_socket(uap->s, &so);
2464 if (error)
2465 return (error);
2466
2467 sopt.sopt_dir = SOPT_SET;
2468 sopt.sopt_level = uap->level;
2469 sopt.sopt_name = uap->name;
2470 sopt.sopt_val = uap->val;
2471 sopt.sopt_valsize = uap->valsize;
2472 sopt.sopt_p = p;
2473
2474 if (so == NULL) {
2475 error = EINVAL;
2476 goto out;
2477 }
2478#if CONFIG_MACF_SOCKET_SUBSET
2479 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2480 &sopt)) != 0)
2481 goto out;
2482#endif /* MAC_SOCKET_SUBSET */
2483 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
2484out:
2485 file_drop(uap->s);
2486 return (error);
2487}
2488
2489
2490
2491/*
2492 * Returns: 0 Success
2493 * EINVAL
2494 * EBADF
2495 * EACCES Mandatory Access Control failure
2496 * copyin:EFAULT
2497 * copyout:EFAULT
2498 * file_socket:ENOTSOCK
2499 * file_socket:EBADF
2500 * sogetopt:???
2501 */
2502int
2503getsockopt(struct proc *p, struct getsockopt_args *uap,
2504 __unused int32_t *retval)
2505{
2506 int error;
2507 socklen_t valsize;
2508 struct sockopt sopt;
2509 struct socket *so;
2510
2511 error = file_socket(uap->s, &so);
2512 if (error)
2513 return (error);
2514 if (uap->val) {
2515 error = copyin(uap->avalsize, (caddr_t)&valsize,
2516 sizeof (valsize));
2517 if (error)
2518 goto out;
2519 /* No bounds checking on size (it's unsigned) */
2520 } else {
2521 valsize = 0;
2522 }
2523 sopt.sopt_dir = SOPT_GET;
2524 sopt.sopt_level = uap->level;
2525 sopt.sopt_name = uap->name;
2526 sopt.sopt_val = uap->val;
2527 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2528 sopt.sopt_p = p;
2529
2530 if (so == NULL) {
2531 error = EBADF;
2532 goto out;
2533 }
2534#if CONFIG_MACF_SOCKET_SUBSET
2535 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2536 &sopt)) != 0)
2537 goto out;
2538#endif /* MAC_SOCKET_SUBSET */
2539 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
2540 if (error == 0) {
2541 valsize = sopt.sopt_valsize;
2542 error = copyout((caddr_t)&valsize, uap->avalsize,
2543 sizeof (valsize));
2544 }
2545out:
2546 file_drop(uap->s);
2547 return (error);
2548}
2549
2550
2551/*
2552 * Get socket name.
2553 *
2554 * Returns: 0 Success
2555 * EBADF
2556 * file_socket:ENOTSOCK
2557 * file_socket:EBADF
2558 * copyin:EFAULT
2559 * copyout:EFAULT
2560 * <pru_sockaddr>:ENOBUFS[TCP]
2561 * <pru_sockaddr>:ECONNRESET[TCP]
2562 * <pru_sockaddr>:EINVAL[AF_UNIX]
2563 * <sf_getsockname>:???
2564 */
2565/* ARGSUSED */
2566int
2567getsockname(__unused struct proc *p, struct getsockname_args *uap,
2568 __unused int32_t *retval)
2569{
2570 struct socket *so;
2571 struct sockaddr *sa;
2572 socklen_t len;
2573 socklen_t sa_len;
2574 int error;
2575
2576 error = file_socket(uap->fdes, &so);
2577 if (error)
2578 return (error);
2579 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
2580 if (error)
2581 goto out;
2582 if (so == NULL) {
2583 error = EBADF;
2584 goto out;
2585 }
2586 sa = 0;
2587 socket_lock(so, 1);
2588 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2589 if (error == 0) {
2590 error = sflt_getsockname(so, &sa);
2591 if (error == EJUSTRETURN)
2592 error = 0;
2593 }
2594 socket_unlock(so, 1);
2595 if (error)
2596 goto bad;
2597 if (sa == 0) {
2598 len = 0;
2599 goto gotnothing;
2600 }
2601
2602 sa_len = sa->sa_len;
2603 len = MIN(len, sa_len);
2604 error = copyout((caddr_t)sa, uap->asa, len);
2605 if (error)
2606 goto bad;
2607 /* return the actual, untruncated address length */
2608 len = sa_len;
2609gotnothing:
2610 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
2611bad:
2612 if (sa)
2613 FREE(sa, M_SONAME);
2614out:
2615 file_drop(uap->fdes);
2616 return (error);
2617}
2618
2619/*
2620 * Get name of peer for connected socket.
2621 *
2622 * Returns: 0 Success
2623 * EBADF
2624 * EINVAL
2625 * ENOTCONN
2626 * file_socket:ENOTSOCK
2627 * file_socket:EBADF
2628 * copyin:EFAULT
2629 * copyout:EFAULT
2630 * <pru_peeraddr>:???
2631 * <sf_getpeername>:???
2632 */
2633/* ARGSUSED */
2634int
2635getpeername(__unused struct proc *p, struct getpeername_args *uap,
2636 __unused int32_t *retval)
2637{
2638 struct socket *so;
2639 struct sockaddr *sa;
2640 socklen_t len;
2641 socklen_t sa_len;
2642 int error;
2643
2644 error = file_socket(uap->fdes, &so);
2645 if (error)
2646 return (error);
2647 if (so == NULL) {
2648 error = EBADF;
2649 goto out;
2650 }
2651
2652 socket_lock(so, 1);
2653
2654 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2655 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2656 /* the socket has been shutdown, no more getpeername's */
2657 socket_unlock(so, 1);
2658 error = EINVAL;
2659 goto out;
2660 }
2661
2662 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
2663 socket_unlock(so, 1);
2664 error = ENOTCONN;
2665 goto out;
2666 }
2667 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
2668 if (error) {
2669 socket_unlock(so, 1);
2670 goto out;
2671 }
2672 sa = 0;
2673 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2674 if (error == 0) {
2675 error = sflt_getpeername(so, &sa);
2676 if (error == EJUSTRETURN)
2677 error = 0;
2678 }
2679 socket_unlock(so, 1);
2680 if (error)
2681 goto bad;
2682 if (sa == 0) {
2683 len = 0;
2684 goto gotnothing;
2685 }
2686 sa_len = sa->sa_len;
2687 len = MIN(len, sa_len);
2688 error = copyout(sa, uap->asa, len);
2689 if (error)
2690 goto bad;
2691 /* return the actual, untruncated address length */
2692 len = sa_len;
2693gotnothing:
2694 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
2695bad:
2696 if (sa) FREE(sa, M_SONAME);
2697out:
2698 file_drop(uap->fdes);
2699 return (error);
2700}
2701
2702int
2703sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
2704{
2705 struct sockaddr *sa;
2706 struct mbuf *m;
2707 int error;
2708
2709 size_t alloc_buflen = (size_t)buflen;
2710
2711 if (alloc_buflen > INT_MAX/2)
2712 return (EINVAL);
2713#ifdef __LP64__
2714 /*
2715 * The fd's in the buffer must expand to be pointers, thus we need twice
2716 * as much space
2717 */
2718 if (type == MT_CONTROL)
2719 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) +
2720 sizeof(struct cmsghdr);
2721#endif
2722 if (alloc_buflen > MLEN) {
2723 if (type == MT_SONAME && alloc_buflen <= 112)
2724 alloc_buflen = MLEN; /* unix domain compat. hack */
2725 else if (alloc_buflen > MCLBYTES)
2726 return (EINVAL);
2727 }
2728 m = m_get(M_WAIT, type);
2729 if (m == NULL)
2730 return (ENOBUFS);
2731 if (alloc_buflen > MLEN) {
2732 MCLGET(m, M_WAIT);
2733 if ((m->m_flags & M_EXT) == 0) {
2734 m_free(m);
2735 return (ENOBUFS);
2736 }
2737 }
2738 /*
2739 * K64: We still copyin the original buflen because it gets expanded
2740 * later and we lie about the size of the mbuf because it only affects
2741 * unp_* functions
2742 */
2743 m->m_len = buflen;
2744 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2745 if (error) {
2746 (void) m_free(m);
2747 } else {
2748 *mp = m;
2749 if (type == MT_SONAME) {
2750 sa = mtod(m, struct sockaddr *);
2751 sa->sa_len = buflen;
2752 }
2753 }
2754 return (error);
2755}
2756
2757/*
2758 * Given a user_addr_t of length len, allocate and fill out a *sa.
2759 *
2760 * Returns: 0 Success
2761 * ENAMETOOLONG Filename too long
2762 * EINVAL Invalid argument
2763 * ENOMEM Not enough space
2764 * copyin:EFAULT Bad address
2765 */
2766static int
2767getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
2768 size_t len, boolean_t translate_unspec)
2769{
2770 struct sockaddr *sa;
2771 int error;
2772
2773 if (len > SOCK_MAXADDRLEN)
2774 return (ENAMETOOLONG);
2775
2776 if (len < offsetof(struct sockaddr, sa_data[0]))
2777 return (EINVAL);
2778
2779 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
2780 if (sa == NULL) {
2781 return (ENOMEM);
2782 }
2783 error = copyin(uaddr, (caddr_t)sa, len);
2784 if (error) {
2785 FREE(sa, M_SONAME);
2786 } else {
2787 /*
2788 * Force sa_family to AF_INET on AF_INET sockets to handle
2789 * legacy applications that use AF_UNSPEC (0). On all other
2790 * sockets we leave it unchanged and let the lower layer
2791 * handle it.
2792 */
2793 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2794 SOCK_CHECK_DOM(so, PF_INET) &&
2795 len == sizeof (struct sockaddr_in))
2796 sa->sa_family = AF_INET;
2797
2798 sa->sa_len = len;
2799 *namp = sa;
2800 }
2801 return (error);
2802}
2803
2804static int
2805getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
2806 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
2807{
2808 int error;
2809
2810 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2811 len < offsetof(struct sockaddr, sa_data[0]))
2812 return (EINVAL);
2813
2814 /*
2815 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2816 * so the check here is inclusive.
2817 */
2818 if (len > sizeof (*ss))
2819 return (ENAMETOOLONG);
2820
2821 bzero(ss, sizeof (*ss));
2822 error = copyin(uaddr, (caddr_t)ss, len);
2823 if (error == 0) {
2824 /*
2825 * Force sa_family to AF_INET on AF_INET sockets to handle
2826 * legacy applications that use AF_UNSPEC (0). On all other
2827 * sockets we leave it unchanged and let the lower layer
2828 * handle it.
2829 */
2830 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2831 SOCK_CHECK_DOM(so, PF_INET) &&
2832 len == sizeof (struct sockaddr_in))
2833 ss->ss_family = AF_INET;
2834
2835 ss->ss_len = len;
2836 }
2837 return (error);
2838}
2839
2840int
2841internalize_user_msghdr_array(const void *src, int spacetype, int direction,
2842 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
2843{
2844 int error = 0;
2845 u_int i;
2846 u_int namecnt = 0;
2847 u_int ctlcnt = 0;
2848
2849 for (i = 0; i < count; i++) {
2850 uio_t auio;
2851 struct user_iovec *iovp;
2852 struct user_msghdr_x *user_msg = dst + i;
2853
2854 if (spacetype == UIO_USERSPACE64) {
2855 const struct user64_msghdr_x *msghdr64;
2856
2857 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2858
2859 user_msg->msg_name = msghdr64->msg_name;
2860 user_msg->msg_namelen = msghdr64->msg_namelen;
2861 user_msg->msg_iov = msghdr64->msg_iov;
2862 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2863 user_msg->msg_control = msghdr64->msg_control;
2864 user_msg->msg_controllen = msghdr64->msg_controllen;
2865 user_msg->msg_flags = msghdr64->msg_flags;
2866 user_msg->msg_datalen = msghdr64->msg_datalen;
2867 } else {
2868 const struct user32_msghdr_x *msghdr32;
2869
2870 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2871
2872 user_msg->msg_name = msghdr32->msg_name;
2873 user_msg->msg_namelen = msghdr32->msg_namelen;
2874 user_msg->msg_iov = msghdr32->msg_iov;
2875 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2876 user_msg->msg_control = msghdr32->msg_control;
2877 user_msg->msg_controllen = msghdr32->msg_controllen;
2878 user_msg->msg_flags = msghdr32->msg_flags;
2879 user_msg->msg_datalen = msghdr32->msg_datalen;
2880 }
2881
2882 if (user_msg->msg_iovlen <= 0 ||
2883 user_msg->msg_iovlen > UIO_MAXIOV) {
2884 error = EMSGSIZE;
2885 goto done;
2886 }
2887 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2888 direction);
2889 if (auio == NULL) {
2890 error = ENOMEM;
2891 goto done;
2892 }
2893 uiop[i] = auio;
2894
2895 iovp = uio_iovsaddr(auio);
2896 if (iovp == NULL) {
2897 error = ENOMEM;
2898 goto done;
2899 }
2900 error = copyin_user_iovec_array(user_msg->msg_iov,
2901 spacetype, user_msg->msg_iovlen, iovp);
2902 if (error)
2903 goto done;
2904 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2905
2906 error = uio_calculateresid(auio);
2907 if (error)
2908 goto done;
2909 user_msg->msg_datalen = uio_resid(auio);
2910
2911 if (user_msg->msg_name && user_msg->msg_namelen)
2912 namecnt++;
2913 if (user_msg->msg_control && user_msg->msg_controllen)
2914 ctlcnt++;
2915 }
2916done:
2917
2918 return (error);
2919}
2920
2921int
2922internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2923 u_int count, struct user_msghdr_x *dst,
2924 struct recv_msg_elem *recv_msg_array)
2925{
2926 int error = 0;
2927 u_int i;
2928
2929 for (i = 0; i < count; i++) {
2930 struct user_iovec *iovp;
2931 struct user_msghdr_x *user_msg = dst + i;
2932 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2933
2934 if (spacetype == UIO_USERSPACE64) {
2935 const struct user64_msghdr_x *msghdr64;
2936
2937 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2938
2939 user_msg->msg_name = msghdr64->msg_name;
2940 user_msg->msg_namelen = msghdr64->msg_namelen;
2941 user_msg->msg_iov = msghdr64->msg_iov;
2942 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2943 user_msg->msg_control = msghdr64->msg_control;
2944 user_msg->msg_controllen = msghdr64->msg_controllen;
2945 user_msg->msg_flags = msghdr64->msg_flags;
2946 user_msg->msg_datalen = msghdr64->msg_datalen;
2947 } else {
2948 const struct user32_msghdr_x *msghdr32;
2949
2950 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2951
2952 user_msg->msg_name = msghdr32->msg_name;
2953 user_msg->msg_namelen = msghdr32->msg_namelen;
2954 user_msg->msg_iov = msghdr32->msg_iov;
2955 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2956 user_msg->msg_control = msghdr32->msg_control;
2957 user_msg->msg_controllen = msghdr32->msg_controllen;
2958 user_msg->msg_flags = msghdr32->msg_flags;
2959 user_msg->msg_datalen = msghdr32->msg_datalen;
2960 }
2961
2962 if (user_msg->msg_iovlen <= 0 ||
2963 user_msg->msg_iovlen > UIO_MAXIOV) {
2964 error = EMSGSIZE;
2965 goto done;
2966 }
2967 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
2968 spacetype, direction);
2969 if (recv_msg_elem->uio == NULL) {
2970 error = ENOMEM;
2971 goto done;
2972 }
2973
2974 iovp = uio_iovsaddr(recv_msg_elem->uio);
2975 if (iovp == NULL) {
2976 error = ENOMEM;
2977 goto done;
2978 }
2979 error = copyin_user_iovec_array(user_msg->msg_iov,
2980 spacetype, user_msg->msg_iovlen, iovp);
2981 if (error)
2982 goto done;
2983 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2984
2985 error = uio_calculateresid(recv_msg_elem->uio);
2986 if (error)
2987 goto done;
2988 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
2989
2990 if (user_msg->msg_name && user_msg->msg_namelen)
2991 recv_msg_elem->which |= SOCK_MSG_SA;
2992 if (user_msg->msg_control && user_msg->msg_controllen)
2993 recv_msg_elem->which |= SOCK_MSG_CONTROL;
2994 }
2995done:
2996
2997 return (error);
2998}
2999
3000u_int
3001externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3002 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
3003{
3004#pragma unused(direction)
3005 u_int i;
3006 int seenlast = 0;
3007 u_int retcnt = 0;
3008
3009 for (i = 0; i < count; i++) {
3010 const struct user_msghdr_x *user_msg = src + i;
3011 uio_t auio = uiop[i];
3012 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3013
3014 if (user_msg->msg_datalen != 0 && len == 0)
3015 seenlast = 1;
3016
3017 if (seenlast == 0)
3018 retcnt ++;
3019
3020 if (spacetype == UIO_USERSPACE64) {
3021 struct user64_msghdr_x *msghdr64;
3022
3023 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3024
3025 msghdr64->msg_flags = user_msg->msg_flags;
3026 msghdr64->msg_datalen = len;
3027
3028 } else {
3029 struct user32_msghdr_x *msghdr32;
3030
3031 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3032
3033 msghdr32->msg_flags = user_msg->msg_flags;
3034 msghdr32->msg_datalen = len;
3035 }
3036 }
3037 return (retcnt);
3038}
3039
3040u_int
3041externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
3042 u_int count, const struct user_msghdr_x *src,
3043 struct recv_msg_elem *recv_msg_array)
3044{
3045 u_int i;
3046 int seenlast = 0;
3047 u_int retcnt = 0;
3048
3049 for (i = 0; i < count; i++) {
3050 const struct user_msghdr_x *user_msg = src + i;
3051 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3052 user_ssize_t len;
3053
3054 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3055
3056 if (direction == UIO_READ) {
3057 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0)
3058 seenlast = 1;
3059 } else {
3060 if (user_msg->msg_datalen != 0 && len == 0)
3061 seenlast = 1;
3062 }
3063
3064 if (seenlast == 0)
3065 retcnt ++;
3066
3067 if (spacetype == UIO_USERSPACE64) {
3068 struct user64_msghdr_x *msghdr64;
3069
3070 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3071
3072 msghdr64->msg_flags = user_msg->msg_flags;
3073 msghdr64->msg_datalen = len;
3074
3075 } else {
3076 struct user32_msghdr_x *msghdr32;
3077
3078 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3079
3080 msghdr32->msg_flags = user_msg->msg_flags;
3081 msghdr32->msg_datalen = len;
3082 }
3083 }
3084 return (retcnt);
3085}
3086
3087void
3088free_uio_array(struct uio **uiop, u_int count)
3089{
3090 u_int i;
3091
3092 for (i = 0; i < count; i++) {
3093 if (uiop[i] != NULL)
3094 uio_free(uiop[i]);
3095 }
3096}
3097
3098__private_extern__ user_ssize_t
3099uio_array_resid(struct uio **uiop, u_int count)
3100{
3101 user_ssize_t len = 0;
3102 u_int i;
3103
3104 for (i = 0; i < count; i++) {
3105 struct uio *auio = uiop[i];
3106
3107 if (auio != NULL)
3108 len += uio_resid(auio);
3109 }
3110 return (len);
3111}
3112
3113int
3114uio_array_is_valid(struct uio **uiop, u_int count)
3115{
3116 user_ssize_t len = 0;
3117 u_int i;
3118
3119 for (i = 0; i < count; i++) {
3120 struct uio *auio = uiop[i];
3121
3122 if (auio != NULL) {
3123 user_ssize_t resid = uio_resid(auio);
3124
3125 /*
3126 * Sanity check on the validity of the iovec:
3127 * no point of going over sb_max
3128 */
3129 if (resid < 0 || (u_int32_t)resid > sb_max)
3130 return (0);
3131
3132 len += resid;
3133 if (len < 0 || (u_int32_t)len > sb_max)
3134 return (0);
3135 }
3136 }
3137 return (1);
3138}
3139
3140
3141struct recv_msg_elem *
3142alloc_recv_msg_array(u_int count)
3143{
3144 struct recv_msg_elem *recv_msg_array;
3145
3146 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3147 M_TEMP, M_WAITOK | M_ZERO);
3148
3149 return (recv_msg_array);
3150}
3151
3152void
3153free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3154{
3155 u_int i;
3156
3157 for (i = 0; i < count; i++) {
3158 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3159
3160 if (recv_msg_elem->uio != NULL)
3161 uio_free(recv_msg_elem->uio);
3162 if (recv_msg_elem->psa != NULL)
3163 _FREE(recv_msg_elem->psa, M_TEMP);
3164 if (recv_msg_elem->controlp != NULL)
3165 m_freem(recv_msg_elem->controlp);
3166 }
3167 _FREE(recv_msg_array, M_TEMP);
3168}
3169
3170
3171__private_extern__ user_ssize_t
3172recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3173{
3174 user_ssize_t len = 0;
3175 u_int i;
3176
3177 for (i = 0; i < count; i++) {
3178 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3179
3180 if (recv_msg_elem->uio != NULL)
3181 len += uio_resid(recv_msg_elem->uio);
3182 }
3183 return (len);
3184}
3185
3186int
3187recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3188{
3189 user_ssize_t len = 0;
3190 u_int i;
3191
3192 for (i = 0; i < count; i++) {
3193 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3194
3195 if (recv_msg_elem->uio != NULL) {
3196 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3197
3198 /*
3199 * Sanity check on the validity of the iovec:
3200 * no point of going over sb_max
3201 */
3202 if (resid < 0 || (u_int32_t)resid > sb_max)
3203 return (0);
3204
3205 len += resid;
3206 if (len < 0 || (u_int32_t)len > sb_max)
3207 return (0);
3208 }
3209 }
3210 return (1);
3211}
3212
3213#if SENDFILE
3214
3215#define SFUIOBUFS 64
3216
3217/* Macros to compute the number of mbufs needed depending on cluster size */
3218#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3219#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3220
3221/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3222#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3223
3224/* Upper send limit in the number of mbuf clusters */
3225#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3226#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3227
3228static void
3229alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3230 struct mbuf **m, boolean_t jumbocl)
3231{
3232 unsigned int needed;
3233
3234 if (pktlen == 0)
3235 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
3236
3237 /*
3238 * Try to allocate for the whole thing. Since we want full control
3239 * over the buffer size and be able to accept partial result, we can't
3240 * use mbuf_allocpacket(). The logic below is similar to sosend().
3241 */
3242 *m = NULL;
3243 if (pktlen > MBIGCLBYTES && jumbocl) {
3244 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3245 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3246 }
3247 if (*m == NULL) {
3248 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
3249 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
3250 }
3251
3252 /*
3253 * Our previous attempt(s) at allocation had failed; the system
3254 * may be short on mbufs, and we want to block until they are
3255 * available. This time, ask just for 1 mbuf and don't return
3256 * until we get it.
3257 */
3258 if (*m == NULL) {
3259 needed = 1;
3260 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
3261 }
3262 if (*m == NULL)
3263 panic("%s: blocking allocation returned NULL\n", __func__);
3264
3265 *maxchunks = needed;
3266}
3267
3268/*
3269 * sendfile(2).
3270 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3271 * struct sf_hdtr *hdtr, int flags)
3272 *
3273 * Send a file specified by 'fd' and starting at 'offset' to a socket
3274 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3275 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3276 * output. If specified, write the total number of bytes sent into *nbytes.
3277 */
3278int
3279sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
3280{
3281 struct fileproc *fp;
3282 struct vnode *vp;
3283 struct socket *so;
3284 struct writev_nocancel_args nuap;
3285 user_ssize_t writev_retval;
3286 struct user_sf_hdtr user_hdtr;
3287 struct user32_sf_hdtr user32_hdtr;
3288 struct user64_sf_hdtr user64_hdtr;
3289 off_t off, xfsize;
3290 off_t nbytes = 0, sbytes = 0;
3291 int error = 0;
3292 size_t sizeof_hdtr;
3293 off_t file_size;
3294 struct vfs_context context = *vfs_context_current();
3295
3296 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3297 0, 0, 0, 0);
3298
3299 AUDIT_ARG(fd, uap->fd);
3300 AUDIT_ARG(value32, uap->s);
3301
3302 /*
3303 * Do argument checking. Must be a regular file in, stream
3304 * type and connected socket out, positive offset.
3305 */
3306 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
3307 goto done;
3308 }
3309 if ((fp->f_flag & FREAD) == 0) {
3310 error = EBADF;
3311 goto done1;
3312 }
3313 if (vnode_isreg(vp) == 0) {
3314 error = ENOTSUP;
3315 goto done1;
3316 }
3317 error = file_socket(uap->s, &so);
3318 if (error) {
3319 goto done1;
3320 }
3321 if (so == NULL) {
3322 error = EBADF;
3323 goto done2;
3324 }
3325 if (so->so_type != SOCK_STREAM) {
3326 error = EINVAL;
3327 goto done2;
3328 }
3329 if ((so->so_state & SS_ISCONNECTED) == 0) {
3330 error = ENOTCONN;
3331 goto done2;
3332 }
3333 if (uap->offset < 0) {
3334 error = EINVAL;
3335 goto done2;
3336 }
3337 if (uap->nbytes == USER_ADDR_NULL) {
3338 error = EINVAL;
3339 goto done2;
3340 }
3341 if (uap->flags != 0) {
3342 error = EINVAL;
3343 goto done2;
3344 }
3345
3346 context.vc_ucred = fp->f_fglob->fg_cred;
3347
3348#if CONFIG_MACF_SOCKET_SUBSET
3349 /* JMM - fetch connected sockaddr? */
3350 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3351 if (error)
3352 goto done2;
3353#endif
3354
3355 /*
3356 * Get number of bytes to send
3357 * Should it applies to size of header and trailer?
3358 * JMM - error handling?
3359 */
3360 copyin(uap->nbytes, &nbytes, sizeof (off_t));
3361
3362 /*
3363 * If specified, get the pointer to the sf_hdtr struct for
3364 * any headers/trailers.
3365 */
3366 if (uap->hdtr != USER_ADDR_NULL) {
3367 caddr_t hdtrp;
3368
3369 bzero(&user_hdtr, sizeof (user_hdtr));
3370 if (IS_64BIT_PROCESS(p)) {
3371 hdtrp = (caddr_t)&user64_hdtr;
3372 sizeof_hdtr = sizeof (user64_hdtr);
3373 } else {
3374 hdtrp = (caddr_t)&user32_hdtr;
3375 sizeof_hdtr = sizeof (user32_hdtr);
3376 }
3377 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
3378 if (error)
3379 goto done2;
3380 if (IS_64BIT_PROCESS(p)) {
3381 user_hdtr.headers = user64_hdtr.headers;
3382 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3383 user_hdtr.trailers = user64_hdtr.trailers;
3384 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3385 } else {
3386 user_hdtr.headers = user32_hdtr.headers;
3387 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3388 user_hdtr.trailers = user32_hdtr.trailers;
3389 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
3390 }
3391
3392 /*
3393 * Send any headers. Wimp out and use writev(2).
3394 */
3395 if (user_hdtr.headers != USER_ADDR_NULL) {
3396 bzero(&nuap, sizeof (struct writev_args));
3397 nuap.fd = uap->s;
3398 nuap.iovp = user_hdtr.headers;
3399 nuap.iovcnt = user_hdtr.hdr_cnt;
3400 error = writev_nocancel(p, &nuap, &writev_retval);
3401 if (error) {
3402 goto done2;
3403 }
3404 sbytes += writev_retval;
3405 }
3406 }
3407
3408 /*
3409 * Get the file size for 2 reasons:
3410 * 1. We don't want to allocate more mbufs than necessary
3411 * 2. We don't want to read past the end of file
3412 */
3413 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
3414 goto done2;
3415 }
3416
3417 /*
3418 * Simply read file data into a chain of mbufs that used with scatter
3419 * gather reads. We're not (yet?) setup to use zero copy external
3420 * mbufs that point to the file pages.
3421 */
3422 socket_lock(so, 1);
3423 error = sblock(&so->so_snd, SBL_WAIT);
3424 if (error) {
3425 socket_unlock(so, 1);
3426 goto done2;
3427 }
3428 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
3429 mbuf_t m0 = NULL, m;
3430 unsigned int nbufs = SFUIOBUFS, i;
3431 uio_t auio;
3432 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3433 size_t uiolen;
3434 user_ssize_t rlen;
3435 off_t pgoff;
3436 size_t pktlen;
3437 boolean_t jumbocl;
3438
3439 /*
3440 * Calculate the amount to transfer.
3441 * Align to round number of pages.
3442 * Not to exceed send socket buffer,
3443 * the EOF, or the passed in nbytes.
3444 */
3445 xfsize = sbspace(&so->so_snd);
3446
3447 if (xfsize <= 0) {
3448 if (so->so_state & SS_CANTSENDMORE) {
3449 error = EPIPE;
3450 goto done3;
3451 } else if ((so->so_state & SS_NBIO)) {
3452 error = EAGAIN;
3453 goto done3;
3454 } else {
3455 xfsize = PAGE_SIZE;
3456 }
3457 }
3458
3459 if (xfsize > SENDFILE_MAX_BYTES)
3460 xfsize = SENDFILE_MAX_BYTES;
3461 else if (xfsize > PAGE_SIZE)
3462 xfsize = trunc_page(xfsize);
3463 pgoff = off & PAGE_MASK_64;
3464 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
3465 xfsize = PAGE_SIZE_64 - pgoff;
3466 if (nbytes && xfsize > (nbytes - sbytes))
3467 xfsize = nbytes - sbytes;
3468 if (xfsize <= 0)
3469 break;
3470 if (off + xfsize > file_size)
3471 xfsize = file_size - off;
3472 if (xfsize <= 0)
3473 break;
3474
3475 /*
3476 * Attempt to use larger than system page-size clusters for
3477 * large writes only if there is a jumbo cluster pool and
3478 * if the socket is marked accordingly.
3479 */
3480 jumbocl = sosendjcl && njcl > 0 &&
3481 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3482
3483 socket_unlock(so, 0);
3484 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
3485 pktlen = mbuf_pkthdr_maxlen(m0);
3486 if (pktlen < (size_t)xfsize)
3487 xfsize = pktlen;
3488
3489 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3490 UIO_READ, &uio_buf[0], sizeof (uio_buf));
3491 if (auio == NULL) {
3492 printf("sendfile failed. nbufs = %d. %s", nbufs,
3493 "File a radar related to rdar://10146739.\n");
3494 mbuf_freem(m0);
3495 error = ENXIO;
3496 socket_lock(so, 0);
3497 goto done3;
3498 }
3499
3500 for (i = 0, m = m0, uiolen = 0;
3501 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
3502 i++, m = mbuf_next(m)) {
3503 size_t mlen = mbuf_maxlen(m);
3504
3505 if (mlen + uiolen > (size_t)xfsize)
3506 mlen = xfsize - uiolen;
3507 mbuf_setlen(m, mlen);
3508 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3509 mlen);
3510 uiolen += mlen;
3511 }
3512
3513 if (xfsize != uio_resid(auio))
3514 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3515 "%lld\n", xfsize, (long long)uio_resid(auio));
3516
3517 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3518 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3519 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3520 error = fo_read(fp, auio, FOF_OFFSET, &context);
3521 socket_lock(so, 0);
3522 if (error != 0) {
3523 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3524 error == EINTR || error == EWOULDBLOCK)) {
3525 error = 0;
3526 } else {
3527 mbuf_freem(m0);
3528 goto done3;
3529 }
3530 }
3531 xfsize -= uio_resid(auio);
3532 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3533 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3534 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3535
3536 if (xfsize == 0) {
3537 // printf("sendfile: fo_read 0 bytes, EOF\n");
3538 break;
3539 }
3540 if (xfsize + off > file_size)
3541 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3542 "%lld\n", xfsize, off, file_size);
3543 for (i = 0, m = m0, rlen = 0;
3544 i < nbufs && m != NULL && rlen < xfsize;
3545 i++, m = mbuf_next(m)) {
3546 size_t mlen = mbuf_maxlen(m);
3547
3548 if (rlen + mlen > (size_t)xfsize)
3549 mlen = xfsize - rlen;
3550 mbuf_setlen(m, mlen);
3551
3552 rlen += mlen;
3553 }
3554 mbuf_pkthdr_setlen(m0, xfsize);
3555
3556retry_space:
3557 /*
3558 * Make sure that the socket is still able to take more data.
3559 * CANTSENDMORE being true usually means that the connection
3560 * was closed. so_error is true when an error was sensed after
3561 * a previous send.
3562 * The state is checked after the page mapping and buffer
3563 * allocation above since those operations may block and make
3564 * any socket checks stale. From this point forward, nothing
3565 * blocks before the pru_send (or more accurately, any blocking
3566 * results in a loop back to here to re-check).
3567 */
3568 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3569 if (so->so_state & SS_CANTSENDMORE) {
3570 error = EPIPE;
3571 } else {
3572 error = so->so_error;
3573 so->so_error = 0;
3574 }
3575 m_freem(m0);
3576 goto done3;
3577 }
3578 /*
3579 * Wait for socket space to become available. We do this just
3580 * after checking the connection state above in order to avoid
3581 * a race condition with sbwait().
3582 */
3583 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
3584 if (so->so_state & SS_NBIO) {
3585 m_freem(m0);
3586 error = EAGAIN;
3587 goto done3;
3588 }
3589 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3590 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
3591 error = sbwait(&so->so_snd);
3592 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
3593 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
3594 /*
3595 * An error from sbwait usually indicates that we've
3596 * been interrupted by a signal. If we've sent anything
3597 * then return bytes sent, otherwise return the error.
3598 */
3599 if (error) {
3600 m_freem(m0);
3601 goto done3;
3602 }
3603 goto retry_space;
3604 }
3605
3606 struct mbuf *control = NULL;
3607 {
3608 /*
3609 * Socket filter processing
3610 */
3611
3612 error = sflt_data_out(so, NULL, &m0, &control, 0);
3613 if (error) {
3614 if (error == EJUSTRETURN) {
3615 error = 0;
3616 continue;
3617 }
3618 goto done3;
3619 }
3620 /*
3621 * End Socket filter processing
3622 */
3623 }
3624 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3625 uap->s, 0, 0, 0, 0);
3626 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
3627 0, control, p);
3628 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3629 uap->s, 0, 0, 0, 0);
3630 if (error) {
3631 goto done3;
3632 }
3633 }
3634 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3635 /*
3636 * Send trailers. Wimp out and use writev(2).
3637 */
3638 if (uap->hdtr != USER_ADDR_NULL &&
3639 user_hdtr.trailers != USER_ADDR_NULL) {
3640 bzero(&nuap, sizeof (struct writev_args));
3641 nuap.fd = uap->s;
3642 nuap.iovp = user_hdtr.trailers;
3643 nuap.iovcnt = user_hdtr.trl_cnt;
3644 error = writev_nocancel(p, &nuap, &writev_retval);
3645 if (error) {
3646 goto done2;
3647 }
3648 sbytes += writev_retval;
3649 }
3650done2:
3651 file_drop(uap->s);
3652done1:
3653 file_drop(uap->fd);
3654done:
3655 if (uap->nbytes != USER_ADDR_NULL) {
3656 /* XXX this appears bogus for some early failure conditions */
3657 copyout(&sbytes, uap->nbytes, sizeof (off_t));
3658 }
3659 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3660 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3661 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
3662 return (error);
3663done3:
3664 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
3665 goto done2;
3666}
3667
3668
3669#endif /* SENDFILE */