]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_syscalls.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
2d21ac55 33 * Copyright (c) 1998, David Greenman. All rights reserved.
1c79356b
A
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
2d21ac55
A
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
1c79356b
A
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
2d21ac55 77#include <sys/vnode_internal.h>
1c79356b 78#include <sys/malloc.h>
39236c6e 79#include <sys/mcache.h>
1c79356b 80#include <sys/mbuf.h>
fe8ab488 81#include <kern/locks.h>
91447636 82#include <sys/domain.h>
1c79356b 83#include <sys/protosw.h>
91447636 84#include <sys/signalvar.h>
1c79356b
A
85#include <sys/socket.h>
86#include <sys/socketvar.h>
1c79356b 87#include <sys/kernel.h>
91447636 88#include <sys/uio_internal.h>
2d21ac55 89#include <sys/kauth.h>
6d2010ae 90#include <kern/task.h>
39236c6e 91#include <sys/priv.h>
3e170ce0 92#include <sys/sysctl.h>
e5568f75 93
b0d623f7 94#include <security/audit/audit.h>
1c79356b
A
95
96#include <sys/kdebug.h>
91447636 97#include <sys/sysproto.h>
2d21ac55
A
98#include <netinet/in.h>
99#include <net/route.h>
100#include <netinet/in_pcb.h>
101
102#if CONFIG_MACF_SOCKET_SUBSET
103#include <security/mac_framework.h>
104#endif /* MAC_SOCKET_SUBSET */
105
106#define f_flag f_fglob->fg_flag
39236c6e 107#define f_type f_fglob->fg_ops->fo_type
2d21ac55
A
108#define f_msgcount f_fglob->fg_msgcount
109#define f_cred f_fglob->fg_cred
110#define f_ops f_fglob->fg_ops
111#define f_offset f_fglob->fg_offset
112#define f_data f_fglob->fg_data
113
2d21ac55
A
114#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
115#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
116#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
117#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
118#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
119#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
120#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
121#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
122#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
123#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
124#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
125#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
126#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
127#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
fe8ab488
A
128#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
129#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
2d21ac55 130
3e170ce0
A
131#if DEBUG || DEVELOPMENT
132#define DEBUG_KERNEL_ADDRPERM(_v) (_v)
133#define DBG_PRINTF(...) printf(__VA_ARGS__)
134#else
135#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
136#define DBG_PRINTF(...) do { } while (0)
137#endif
2d21ac55 138
2d21ac55
A
139/* TODO: should be in header file */
140int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
141
3e170ce0
A
142static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
143 int, int32_t *);
2d21ac55 144static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
b0d623f7 145 int32_t *);
39236c6e 146static int connectit(struct socket *, struct sockaddr *);
2d21ac55 147static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
4a3eedf9 148 size_t, boolean_t);
2d21ac55 149static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
4a3eedf9 150 user_addr_t, size_t, boolean_t);
39236c6e
A
151static int getsockaddrlist(struct socket *, struct sockaddr_list **,
152 user_addr_t, socklen_t, boolean_t);
1c79356b 153#if SENDFILE
2d21ac55
A
154static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
155 boolean_t);
156#endif /* SENDFILE */
39236c6e
A
157static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
158static int connectitx(struct socket *, struct sockaddr_list **,
3e170ce0
A
159 struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
160 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
39236c6e
A
161static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *);
162static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
163 int *);
164static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
1c79356b 165
fe8ab488 166static int internalize_user_msghdr_array(const void *, int, int, u_int,
3e170ce0 167 struct user_msghdr_x *, struct uio **);
fe8ab488 168static u_int externalize_user_msghdr_array(void *, int, int, u_int,
3e170ce0 169 const struct user_msghdr_x *, struct uio **);
fe8ab488
A
170
171static void free_uio_array(struct uio **, u_int);
172static int uio_array_is_valid(struct uio **, u_int);
3e170ce0
A
173static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
174static int internalize_recv_msghdr_array(const void *, int, int,
175 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
176static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
177 const struct user_msghdr_x *, struct recv_msg_elem *);
178static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
179static void free_recv_msg_array(struct recv_msg_elem *, u_int);
180
181SYSCTL_DECL(_kern_ipc);
182
183static u_int somaxsendmsgx = 100;
184SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
185 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
186static u_int somaxrecvmsgx = 100;
187SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
188 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
fe8ab488 189
1c79356b
A
190/*
191 * System call interface to the socket abstraction.
192 */
1c79356b 193
39236c6e 194extern const struct fileops socketops;
1c79356b 195
2d21ac55
A
196/*
197 * Returns: 0 Success
198 * EACCES Mandatory Access Control failure
199 * falloc:ENFILE
200 * falloc:EMFILE
201 * falloc:ENOMEM
202 * socreate:EAFNOSUPPORT
203 * socreate:EPROTOTYPE
204 * socreate:EPROTONOSUPPORT
205 * socreate:ENOBUFS
206 * socreate:ENOMEM
2d21ac55
A
207 * socreate:??? [other protocol families, IPSEC]
208 */
1c79356b 209int
39236c6e
A
210socket(struct proc *p,
211 struct socket_args *uap,
212 int32_t *retval)
213{
214 return (socket_common(p, uap->domain, uap->type, uap->protocol,
215 proc_selfpid(), retval, 0));
216}
217
218int
219socket_delegate(struct proc *p,
220 struct socket_delegate_args *uap,
221 int32_t *retval)
222{
223 return socket_common(p, uap->domain, uap->type, uap->protocol,
224 uap->epid, retval, 1);
225}
226
227static int
228socket_common(struct proc *p,
229 int domain,
230 int type,
231 int protocol,
232 pid_t epid,
233 int32_t *retval,
234 int delegate)
1c79356b 235{
1c79356b 236 struct socket *so;
91447636 237 struct fileproc *fp;
1c79356b
A
238 int fd, error;
239
39236c6e 240 AUDIT_ARG(socket, domain, type, protocol);
2d21ac55 241#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
242 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
243 type, protocol)) != 0)
2d21ac55
A
244 return (error);
245#endif /* MAC_SOCKET_SUBSET */
1c79356b 246
39236c6e
A
247 if (delegate) {
248 error = priv_check_cred(kauth_cred_get(),
249 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
250 if (error)
251 return (EACCES);
252 }
253
2d21ac55 254 error = falloc(p, &fp, &fd, vfs_context_current());
91447636 255 if (error) {
1c79356b 256 return (error);
91447636 257 }
1c79356b 258 fp->f_flag = FREAD|FWRITE;
1c79356b 259 fp->f_ops = &socketops;
91447636 260
39236c6e
A
261 if (delegate)
262 error = socreate_delegate(domain, &so, type, protocol, epid);
263 else
264 error = socreate(domain, &so, type, protocol);
265
91447636
A
266 if (error) {
267 fp_free(p, fd, fp);
1c79356b
A
268 } else {
269 fp->f_data = (caddr_t)so;
91447636
A
270
271 proc_fdlock(p);
6601e61a 272 procfdtbl_releasefd(p, fd, NULL);
2d21ac55 273
91447636
A
274 fp_drop(p, fd, fp, 1);
275 proc_fdunlock(p);
276
1c79356b 277 *retval = fd;
3e170ce0
A
278 if (ENTR_SHOULDTRACE) {
279 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
280 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
281 }
1c79356b
A
282 }
283 return (error);
284}
285
2d21ac55
A
286/*
287 * Returns: 0 Success
288 * EDESTADDRREQ Destination address required
289 * EBADF Bad file descriptor
290 * EACCES Mandatory Access Control failure
291 * file_socket:ENOTSOCK
292 * file_socket:EBADF
293 * getsockaddr:ENAMETOOLONG Filename too long
294 * getsockaddr:EINVAL Invalid argument
295 * getsockaddr:ENOMEM Not enough space
296 * getsockaddr:EFAULT Bad address
39236c6e 297 * sobindlock:???
2d21ac55 298 */
1c79356b
A
299/* ARGSUSED */
300int
b0d623f7 301bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
1c79356b 302{
2d21ac55
A
303 struct sockaddr_storage ss;
304 struct sockaddr *sa = NULL;
91447636 305 struct socket *so;
2d21ac55 306 boolean_t want_free = TRUE;
1c79356b
A
307 int error;
308
55e303ae 309 AUDIT_ARG(fd, uap->s);
91447636 310 error = file_socket(uap->s, &so);
2d21ac55 311 if (error != 0)
1c79356b 312 return (error);
2d21ac55
A
313 if (so == NULL) {
314 error = EBADF;
315 goto out;
316 }
317 if (uap->name == USER_ADDR_NULL) {
318 error = EDESTADDRREQ;
319 goto out;
320 }
321 if (uap->namelen > sizeof (ss)) {
4a3eedf9 322 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
2d21ac55 323 } else {
4a3eedf9 324 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
2d21ac55
A
325 if (error == 0) {
326 sa = (struct sockaddr *)&ss;
327 want_free = FALSE;
328 }
329 }
330 if (error != 0)
91447636 331 goto out;
2d21ac55
A
332 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
333#if CONFIG_MACF_SOCKET_SUBSET
334 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
39236c6e 335 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55 336#else
39236c6e 337 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55
A
338#endif /* MAC_SOCKET_SUBSET */
339 if (want_free)
340 FREE(sa, M_SONAME);
91447636
A
341out:
342 file_drop(uap->s);
1c79356b
A
343 return (error);
344}
345
2d21ac55
A
346/*
347 * Returns: 0 Success
348 * EBADF
349 * EACCES Mandatory Access Control failure
350 * file_socket:ENOTSOCK
351 * file_socket:EBADF
352 * solisten:EINVAL
353 * solisten:EOPNOTSUPP
354 * solisten:???
355 */
1c79356b 356int
2d21ac55 357listen(__unused struct proc *p, struct listen_args *uap,
b0d623f7 358 __unused int32_t *retval)
1c79356b 359{
1c79356b 360 int error;
2d21ac55 361 struct socket *so;
1c79356b 362
55e303ae 363 AUDIT_ARG(fd, uap->s);
91447636 364 error = file_socket(uap->s, &so);
1c79356b
A
365 if (error)
366 return (error);
91447636 367 if (so != NULL)
2d21ac55
A
368#if CONFIG_MACF_SOCKET_SUBSET
369 {
370 error = mac_socket_check_listen(kauth_cred_get(), so);
371 if (error == 0)
372 error = solisten(so, uap->backlog);
373 }
374#else
91447636 375 error = solisten(so, uap->backlog);
2d21ac55 376#endif /* MAC_SOCKET_SUBSET */
55e303ae 377 else
91447636 378 error = EBADF;
2d21ac55 379
91447636
A
380 file_drop(uap->s);
381 return (error);
1c79356b
A
382}
383
2d21ac55
A
384/*
385 * Returns: fp_getfsock:EBADF Bad file descriptor
386 * fp_getfsock:EOPNOTSUPP ...
387 * xlate => :ENOTSOCK Socket operation on non-socket
388 * :EFAULT Bad address on copyin/copyout
389 * :EBADF Bad file descriptor
390 * :EOPNOTSUPP Operation not supported on socket
391 * :EINVAL Invalid argument
392 * :EWOULDBLOCK Operation would block
393 * :ECONNABORTED Connection aborted
394 * :EINTR Interrupted function
395 * :EACCES Mandatory Access Control failure
396 * falloc_locked:ENFILE Too many files open in system
397 * falloc_locked::EMFILE Too many open files
398 * falloc_locked::ENOMEM Not enough space
399 * 0 Success
400 */
1c79356b 401int
2d21ac55 402accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
b0d623f7 403 int32_t *retval)
1c79356b 404{
91447636 405 struct fileproc *fp;
2d21ac55 406 struct sockaddr *sa = NULL;
91447636
A
407 socklen_t namelen;
408 int error;
409 struct socket *head, *so = NULL;
410 lck_mtx_t *mutex_held;
411 int fd = uap->s;
2d21ac55 412 int newfd;
1c79356b 413 short fflag; /* type must match fp->f_flag */
91447636 414 int dosocklock = 0;
1c79356b 415
2d21ac55
A
416 *retval = -1;
417
55e303ae 418 AUDIT_ARG(fd, uap->s);
2d21ac55 419
1c79356b 420 if (uap->name) {
91447636 421 error = copyin(uap->anamelen, (caddr_t)&namelen,
2d21ac55
A
422 sizeof (socklen_t));
423 if (error)
1c79356b
A
424 return (error);
425 }
91447636
A
426 error = fp_getfsock(p, fd, &fp, &head);
427 if (error) {
428 if (error == EOPNOTSUPP)
429 error = ENOTSOCK;
1c79356b 430 return (error);
91447636 431 }
55e303ae 432 if (head == NULL) {
91447636
A
433 error = EBADF;
434 goto out;
55e303ae 435 }
2d21ac55
A
436#if CONFIG_MACF_SOCKET_SUBSET
437 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
438 goto out;
439#endif /* MAC_SOCKET_SUBSET */
91447636
A
440
441 socket_lock(head, 1);
442
443 if (head->so_proto->pr_getlock != NULL) {
444 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
445 dosocklock = 1;
2d21ac55 446 } else {
91447636
A
447 mutex_held = head->so_proto->pr_domain->dom_mtx;
448 dosocklock = 0;
449 }
450
1c79356b 451 if ((head->so_options & SO_ACCEPTCONN) == 0) {
2d21ac55
A
452 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
453 error = EOPNOTSUPP;
454 } else {
455 /* POSIX: The socket is not accepting connections */
456 error = EINVAL;
457 }
91447636 458 socket_unlock(head, 1);
91447636 459 goto out;
1c79356b
A
460 }
461 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
91447636
A
462 socket_unlock(head, 1);
463 error = EWOULDBLOCK;
464 goto out;
1c79356b 465 }
2d21ac55 466 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
1c79356b
A
467 if (head->so_state & SS_CANTRCVMORE) {
468 head->so_error = ECONNABORTED;
469 break;
470 }
91447636 471 if (head->so_usecount < 1)
2d21ac55
A
472 panic("accept: head=%p refcount=%d\n", head,
473 head->so_usecount);
474 error = msleep((caddr_t)&head->so_timeo, mutex_held,
475 PSOCK | PCATCH, "accept", 0);
91447636 476 if (head->so_usecount < 1)
2d21ac55
A
477 panic("accept: 2 head=%p refcount=%d\n", head,
478 head->so_usecount);
91447636
A
479 if ((head->so_state & SS_DRAINING)) {
480 error = ECONNABORTED;
481 }
1c79356b 482 if (error) {
91447636
A
483 socket_unlock(head, 1);
484 goto out;
1c79356b
A
485 }
486 }
487 if (head->so_error) {
488 error = head->so_error;
489 head->so_error = 0;
91447636
A
490 socket_unlock(head, 1);
491 goto out;
1c79356b
A
492 }
493
494
495 /*
496 * At this point we know that there is at least one connection
497 * ready to be accepted. Remove it from the queue prior to
498 * allocating the file descriptor for it since falloc() may
499 * block allowing another process to accept the connection
500 * instead.
501 */
91447636 502 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
e3027f41 503 so = TAILQ_FIRST(&head->so_comp);
1c79356b
A
504 TAILQ_REMOVE(&head->so_comp, so, so_list);
505 head->so_qlen--;
2d21ac55
A
506 /* unlock head to avoid deadlock with select, keep a ref on head */
507 socket_unlock(head, 0);
508
509#if CONFIG_MACF_SOCKET_SUBSET
510 /*
511 * Pass the pre-accepted socket to the MAC framework. This is
512 * cheaper than allocating a file descriptor for the socket,
513 * calling the protocol accept callback, and possibly freeing
514 * the file descriptor should the MAC check fails.
515 */
516 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
39236c6e 517 socket_lock(so, 1);
2d21ac55
A
518 so->so_state &= ~(SS_NOFDREF | SS_COMP);
519 so->so_head = NULL;
39236c6e 520 socket_unlock(so, 1);
2d21ac55
A
521 soclose(so);
522 /* Drop reference on listening socket */
523 sodereference(head);
524 goto out;
525 }
526#endif /* MAC_SOCKET_SUBSET */
527
528 /*
529 * Pass the pre-accepted socket to any interested socket filter(s).
530 * Upon failure, the socket would have been closed by the callee.
531 */
532 if (so->so_filt != NULL && (error = soacceptfilter(so)) != 0) {
533 /* Drop reference on listening socket */
534 sodereference(head);
535 /* Propagate socket filter's error code to the caller */
536 goto out;
537 }
538
1c79356b 539 fflag = fp->f_flag;
2d21ac55 540 error = falloc(p, &fp, &newfd, vfs_context_current());
1c79356b 541 if (error) {
39236c6e 542 /*
316670eb
A
543 * Probably ran out of file descriptors.
544 *
545 * <rdar://problem/8554930>
546 * Don't put this back on the socket like we used to, that
547 * just causes the client to spin. Drop the socket.
1c79356b 548 */
39236c6e 549 socket_lock(so, 1);
316670eb
A
550 so->so_state &= ~(SS_NOFDREF | SS_COMP);
551 so->so_head = NULL;
39236c6e 552 socket_unlock(so, 1);
316670eb
A
553 soclose(so);
554 sodereference(head);
91447636 555 goto out;
2d21ac55 556 }
91447636 557 *retval = newfd;
1c79356b
A
558 fp->f_flag = fflag;
559 fp->f_ops = &socketops;
560 fp->f_data = (caddr_t)so;
fe8ab488 561
91447636
A
562 socket_lock(head, 0);
563 if (dosocklock)
564 socket_lock(so, 1);
fe8ab488 565
91447636
A
566 so->so_state &= ~SS_COMP;
567 so->so_head = NULL;
fe8ab488
A
568
569 /* Sync socket non-blocking/async state with file flags */
570 if (fp->f_flag & FNONBLOCK) {
571 so->so_state |= SS_NBIO;
572 } else {
573 so->so_state &= ~SS_NBIO;
574 }
575
576 if (fp->f_flag & FASYNC) {
577 so->so_state |= SS_ASYNC;
578 so->so_rcv.sb_flags |= SB_ASYNC;
579 so->so_snd.sb_flags |= SB_ASYNC;
580 } else {
581 so->so_state &= ~SS_ASYNC;
582 so->so_rcv.sb_flags &= ~SB_ASYNC;
583 so->so_snd.sb_flags &= ~SB_ASYNC;
584 }
585
91447636
A
586 (void) soacceptlock(so, &sa, 0);
587 socket_unlock(head, 1);
2d21ac55 588 if (sa == NULL) {
1c79356b
A
589 namelen = 0;
590 if (uap->name)
591 goto gotnoname;
91447636 592 error = 0;
2d21ac55 593 goto releasefd;
1c79356b 594 }
2d21ac55
A
595 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
596
1c79356b 597 if (uap->name) {
2d21ac55
A
598 socklen_t sa_len;
599
600 /* save sa_len before it is destroyed */
601 sa_len = sa->sa_len;
602 namelen = MIN(namelen, sa_len);
91447636 603 error = copyout(sa, uap->name, namelen);
1c79356b 604 if (!error)
2d21ac55
A
605 /* return the actual, untruncated address length */
606 namelen = sa_len;
1c79356b 607gotnoname:
2d21ac55
A
608 error = copyout((caddr_t)&namelen, uap->anamelen,
609 sizeof (socklen_t));
1c79356b
A
610 }
611 FREE(sa, M_SONAME);
2d21ac55 612
b0d623f7 613releasefd:
2d21ac55 614 /*
6d2010ae
A
615 * If the socket has been marked as inactive by sosetdefunct(),
616 * disallow further operations on it.
2d21ac55
A
617 */
618 if (so->so_flags & SOF_DEFUNCT) {
6d2010ae
A
619 sodefunct(current_proc(), so,
620 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
2d21ac55
A
621 }
622
91447636
A
623 if (dosocklock)
624 socket_unlock(so, 1);
2d21ac55 625
2d21ac55
A
626 proc_fdlock(p);
627 procfdtbl_releasefd(p, newfd, NULL);
628 fp_drop(p, newfd, fp, 1);
629 proc_fdunlock(p);
630
91447636
A
631out:
632 file_drop(fd);
3e170ce0
A
633
634 if (error == 0 && ENTR_SHOULDTRACE) {
635 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
636 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
637 }
1c79356b
A
638 return (error);
639}
640
641int
b0d623f7 642accept(struct proc *p, struct accept_args *uap, int32_t *retval)
1c79356b 643{
2d21ac55 644 __pthread_testcancel(1);
3e170ce0
A
645 return (accept_nocancel(p, (struct accept_nocancel_args *)uap,
646 retval));
1c79356b
A
647}
648
2d21ac55
A
649/*
650 * Returns: 0 Success
651 * EBADF Bad file descriptor
652 * EALREADY Connection already in progress
653 * EINPROGRESS Operation in progress
654 * ECONNABORTED Connection aborted
655 * EINTR Interrupted function
656 * EACCES Mandatory Access Control failure
657 * file_socket:ENOTSOCK
658 * file_socket:EBADF
659 * getsockaddr:ENAMETOOLONG Filename too long
660 * getsockaddr:EINVAL Invalid argument
661 * getsockaddr:ENOMEM Not enough space
662 * getsockaddr:EFAULT Bad address
663 * soconnectlock:EOPNOTSUPP
664 * soconnectlock:EISCONN
665 * soconnectlock:??? [depends on protocol, filters]
666 * msleep:EINTR
667 *
668 * Imputed: so_error error may be set from so_error, which
669 * may have been set by soconnectlock.
670 */
671/* ARGSUSED */
1c79356b 672int
b0d623f7 673connect(struct proc *p, struct connect_args *uap, int32_t *retval)
1c79356b 674{
2d21ac55 675 __pthread_testcancel(1);
3e170ce0
A
676 return (connect_nocancel(p, (struct connect_nocancel_args *)uap,
677 retval));
1c79356b 678}
1c79356b 679
1c79356b 680int
39236c6e 681connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
1c79356b 682{
39236c6e 683#pragma unused(p, retval)
91447636 684 struct socket *so;
2d21ac55
A
685 struct sockaddr_storage ss;
686 struct sockaddr *sa = NULL;
91447636
A
687 int error;
688 int fd = uap->s;
4a3eedf9 689 boolean_t dgram;
1c79356b 690
55e303ae 691 AUDIT_ARG(fd, uap->s);
2d21ac55
A
692 error = file_socket(fd, &so);
693 if (error != 0)
1c79356b 694 return (error);
91447636
A
695 if (so == NULL) {
696 error = EBADF;
697 goto out;
698 }
699
4a3eedf9
A
700 /*
701 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
702 * if this is a datagram socket; translate for other types.
703 */
704 dgram = (so->so_type == SOCK_DGRAM);
705
2d21ac55
A
706 /* Get socket address now before we obtain socket lock */
707 if (uap->namelen > sizeof (ss)) {
4a3eedf9 708 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
2d21ac55 709 } else {
4a3eedf9 710 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
39236c6e 711 if (error == 0)
2d21ac55 712 sa = (struct sockaddr *)&ss;
2d21ac55
A
713 }
714 if (error != 0)
715 goto out;
716
39236c6e
A
717 error = connectit(so, sa);
718
719 if (sa != NULL && sa != SA(&ss))
720 FREE(sa, M_SONAME);
721 if (error == ERESTART)
722 error = EINTR;
723out:
724 file_drop(fd);
725 return (error);
726}
727
728static int
729connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
730{
731#pragma unused(p, retval)
732 struct sockaddr_list *src_sl = NULL, *dst_sl = NULL;
733 struct socket *so;
3e170ce0 734 int error, error1, fd = uap->socket;
39236c6e 735 boolean_t dgram;
3e170ce0
A
736 sae_connid_t cid = SAE_CONNID_ANY;
737 struct user32_sa_endpoints ep32;
738 struct user64_sa_endpoints ep64;
739 struct user_sa_endpoints ep;
740 user_ssize_t bytes_written = 0;
741 struct user_iovec *iovp;
742 uio_t auio = NULL;
39236c6e 743
3e170ce0 744 AUDIT_ARG(fd, uap->socket);
39236c6e
A
745 error = file_socket(fd, &so);
746 if (error != 0)
747 return (error);
748 if (so == NULL) {
749 error = EBADF;
750 goto out;
751 }
752
3e170ce0
A
753 if (uap->endpoints == USER_ADDR_NULL) {
754 error = EINVAL;
755 goto out;
756 }
757
758 if (IS_64BIT_PROCESS(p)) {
759 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
760 if (error != 0)
761 goto out;
762
763 ep.sae_srcif = ep64.sae_srcif;
764 ep.sae_srcaddr = ep64.sae_srcaddr;
765 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
766 ep.sae_dstaddr = ep64.sae_dstaddr;
767 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
768 } else {
769 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
770 if (error != 0)
771 goto out;
772
773 ep.sae_srcif = ep32.sae_srcif;
774 ep.sae_srcaddr = ep32.sae_srcaddr;
775 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
776 ep.sae_dstaddr = ep32.sae_dstaddr;
777 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
778 }
fe8ab488 779
39236c6e
A
780 /*
781 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
782 * if this is a datagram socket; translate for other types.
783 */
784 dgram = (so->so_type == SOCK_DGRAM);
785
786 /*
787 * Get socket address(es) now before we obtain socket lock; use
788 * sockaddr_list for src address for convenience, if present,
789 * even though it won't hold more than one.
790 */
3e170ce0
A
791 if (ep.sae_srcaddr != USER_ADDR_NULL && (error = getsockaddrlist(so,
792 &src_sl, (user_addr_t)(caddr_t)ep.sae_srcaddr, ep.sae_srcaddrlen,
793 dgram)) != 0)
39236c6e
A
794 goto out;
795
3e170ce0
A
796 if (ep.sae_dstaddr == USER_ADDR_NULL) {
797 error = EINVAL;
798 goto out;
799 }
800
801 error = getsockaddrlist(so, &dst_sl, (user_addr_t)(caddr_t)ep.sae_dstaddr,
802 ep.sae_dstaddrlen, dgram);
39236c6e
A
803 if (error != 0)
804 goto out;
805
806 VERIFY(dst_sl != NULL &&
807 !TAILQ_EMPTY(&dst_sl->sl_head) && dst_sl->sl_cnt > 0);
808
3e170ce0
A
809 if (uap->iov != USER_ADDR_NULL) {
810 /* Verify range before calling uio_create() */
811 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
812 return (EINVAL);
813
814 if (uap->len == USER_ADDR_NULL)
815 return (EINVAL);
816
817 /* allocate a uio to hold the number of iovecs passed */
818 auio = uio_create(uap->iovcnt, 0,
819 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
820 UIO_WRITE);
821
822 if (auio == NULL) {
823 error = ENOMEM;
824 goto out;
825 }
826
827 /*
828 * get location of iovecs within the uio.
829 * then copyin the iovecs from user space.
830 */
831 iovp = uio_iovsaddr(auio);
832 if (iovp == NULL) {
833 error = ENOMEM;
834 goto out;
835 }
836 error = copyin_user_iovec_array(uap->iov,
837 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
838 uap->iovcnt, iovp);
839 if (error != 0)
840 goto out;
841
842 /* finish setup of uio_t */
843 error = uio_calculateresid(auio);
844 if (error != 0) {
845 goto out;
846 }
847 }
848
849 error = connectitx(so, &src_sl, &dst_sl, p, ep.sae_srcif, uap->associd,
850 &cid, auio, uap->flags, &bytes_written);
39236c6e
A
851 if (error == ERESTART)
852 error = EINTR;
853
3e170ce0
A
854 if (uap->len != USER_ADDR_NULL) {
855 error1 = copyout(&bytes_written, uap->len, sizeof (uap->len));
856 /* give precedence to connectitx errors */
857 if ((error1 != 0) && (error == 0))
858 error = error1;
859 }
39236c6e 860
3e170ce0
A
861 if (uap->connid != USER_ADDR_NULL) {
862 error1 = copyout(&cid, uap->connid, sizeof (cid));
863 /* give precedence to connectitx errors */
864 if ((error1 != 0) && (error == 0))
865 error = error1;
866 }
39236c6e
A
867out:
868 file_drop(fd);
3e170ce0
A
869 if (auio != NULL) {
870 uio_free(auio);
871 }
39236c6e
A
872 if (src_sl != NULL)
873 sockaddrlist_free(src_sl);
874 if (dst_sl != NULL)
875 sockaddrlist_free(dst_sl);
876 return (error);
877}
878
879int
880connectx(struct proc *p, struct connectx_args *uap, int *retval)
881{
882 /*
883 * Due to similiarity with a POSIX interface, define as
884 * an unofficial cancellation point.
885 */
886 __pthread_testcancel(1);
887 return (connectx_nocancel(p, uap, retval));
888}
889
890static int
891connectit(struct socket *so, struct sockaddr *sa)
892{
893 int error;
894
2d21ac55
A
895 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
896#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
897 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0)
898 return (error);
899#endif /* MAC_SOCKET_SUBSET */
900
901 socket_lock(so, 1);
902 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
903 error = EALREADY;
904 goto out;
905 }
906 error = soconnectlock(so, sa, 0);
907 if (error != 0) {
908 so->so_state &= ~SS_ISCONNECTING;
2d21ac55
A
909 goto out;
910 }
39236c6e
A
911 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
912 error = EINPROGRESS;
913 goto out;
914 }
915 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
916 lck_mtx_t *mutex_held;
917
918 if (so->so_proto->pr_getlock != NULL)
919 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
920 else
921 mutex_held = so->so_proto->pr_domain->dom_mtx;
922 error = msleep((caddr_t)&so->so_timeo, mutex_held,
923 PSOCK | PCATCH, __func__, 0);
924 if (so->so_state & SS_DRAINING) {
925 error = ECONNABORTED;
926 }
927 if (error != 0)
928 break;
929 }
930 if (error == 0) {
931 error = so->so_error;
932 so->so_error = 0;
933 }
934out:
935 socket_unlock(so, 1);
936 return (error);
937}
938
939static int
940connectitx(struct socket *so, struct sockaddr_list **src_sl,
941 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
3e170ce0
A
942 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
943 user_ssize_t *bytes_written)
39236c6e
A
944{
945 struct sockaddr_entry *se;
946 int error;
3e170ce0 947#pragma unused (flags)
39236c6e
A
948
949 VERIFY(dst_sl != NULL && *dst_sl != NULL);
950
951 TAILQ_FOREACH(se, &(*dst_sl)->sl_head, se_link) {
952 VERIFY(se->se_addr != NULL);
953 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
954 se->se_addr);
955#if CONFIG_MACF_SOCKET_SUBSET
956 if ((error = mac_socket_check_connect(kauth_cred_get(),
957 so, se->se_addr)) != 0)
958 return (error);
2d21ac55 959#endif /* MAC_SOCKET_SUBSET */
39236c6e 960 }
91447636 961
39236c6e 962 socket_lock(so, 1);
91447636 963 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
964 error = EALREADY;
965 goto out;
966 }
3e170ce0
A
967
968 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
969 (flags & CONNECT_DATA_IDEMPOTENT))
970 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
971
972 /*
973 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
974 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
975 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
976 * Case 3 allows user to combine write with connect even if they have
977 * no use for TFO (such as regular TCP, and UDP).
978 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
979 */
980 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
981 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio))
982 so->so_flags1 |= SOF1_PRECONNECT_DATA;
983
984 /*
985 * If a user sets data idempotent and does not pass an uio, or
986 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
987 * SOF1_DATA_IDEMPOTENT.
988 */
989 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
990 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
991 /* We should return EINVAL instead perhaps. */
992 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
993 }
994
39236c6e 995 error = soconnectxlocked(so, src_sl, dst_sl, p, ifscope,
3e170ce0 996 aid, pcid, 0, NULL, 0, auio, bytes_written);
39236c6e
A
997 if (error != 0) {
998 so->so_state &= ~SS_ISCONNECTING;
999 goto out;
1000 }
3e170ce0
A
1001 /*
1002 * If, after the call to soconnectxlocked the flag is still set (in case
1003 * data has been queued and the connect() has actually been triggered,
1004 * it will have been unset by the transport), we exit immediately. There
1005 * is no reason to wait on any event.
1006 */
1007 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1008 error = 0;
1009 goto out;
1010 }
1c79356b 1011 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
1012 error = EINPROGRESS;
1013 goto out;
1c79356b 1014 }
1c79356b 1015 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
39236c6e
A
1016 lck_mtx_t *mutex_held;
1017
2d21ac55 1018 if (so->so_proto->pr_getlock != NULL)
91447636 1019 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2d21ac55 1020 else
91447636 1021 mutex_held = so->so_proto->pr_domain->dom_mtx;
2d21ac55 1022 error = msleep((caddr_t)&so->so_timeo, mutex_held,
39236c6e
A
1023 PSOCK | PCATCH, __func__, 0);
1024 if (so->so_state & SS_DRAINING) {
91447636
A
1025 error = ECONNABORTED;
1026 }
39236c6e 1027 if (error != 0)
1c79356b
A
1028 break;
1029 }
1030 if (error == 0) {
1031 error = so->so_error;
1032 so->so_error = 0;
1033 }
39236c6e 1034out:
91447636 1035 socket_unlock(so, 1);
39236c6e
A
1036 return (error);
1037}
1038
1039int
1040peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1041{
1042 /*
1043 * Due to similiarity with a POSIX interface, define as
1044 * an unofficial cancellation point.
1045 */
1046 __pthread_testcancel(1);
1047 return (peeloff_nocancel(p, uap, retval));
1048}
1049
1050static int
1051peeloff_nocancel(struct proc *p, struct peeloff_args *uap, int *retval)
1052{
1053 struct fileproc *fp;
1054 struct socket *mp_so, *so = NULL;
1055 int newfd, fd = uap->s;
1056 short fflag; /* type must match fp->f_flag */
1057 int error;
1058
1059 *retval = -1;
1060
1061 error = fp_getfsock(p, fd, &fp, &mp_so);
1062 if (error != 0) {
1063 if (error == EOPNOTSUPP)
1064 error = ENOTSOCK;
1065 goto out_nofile;
1066 }
1067 if (mp_so == NULL) {
1068 error = EBADF;
1069 goto out;
1070 }
1071
1072 socket_lock(mp_so, 1);
1073 error = sopeelofflocked(mp_so, uap->aid, &so);
1074 if (error != 0) {
1075 socket_unlock(mp_so, 1);
1076 goto out;
1077 }
1078 VERIFY(so != NULL);
1079 socket_unlock(mp_so, 0); /* keep ref on mp_so for us */
1080
1081 fflag = fp->f_flag;
1082 error = falloc(p, &fp, &newfd, vfs_context_current());
1083 if (error != 0) {
1084 /* drop this socket (probably ran out of file descriptors) */
1085 soclose(so);
1086 sodereference(mp_so); /* our mp_so ref */
1087 goto out;
1088 }
1089
1090 fp->f_flag = fflag;
1091 fp->f_ops = &socketops;
1092 fp->f_data = (caddr_t)so;
1093
1094 /*
1095 * If the socket has been marked as inactive by sosetdefunct(),
1096 * disallow further operations on it.
1097 */
1098 if (so->so_flags & SOF_DEFUNCT) {
1099 sodefunct(current_proc(), so,
1100 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
1101 }
1102
1103 proc_fdlock(p);
1104 procfdtbl_releasefd(p, newfd, NULL);
1105 fp_drop(p, newfd, fp, 1);
1106 proc_fdunlock(p);
1107
1108 sodereference(mp_so); /* our mp_so ref */
1109 *retval = newfd;
1110
1111out:
1112 file_drop(fd);
1113
1114out_nofile:
1115 return (error);
1116}
1117
1118int
1119disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1120{
1121 /*
1122 * Due to similiarity with a POSIX interface, define as
1123 * an unofficial cancellation point.
1124 */
1125 __pthread_testcancel(1);
1126 return (disconnectx_nocancel(p, uap, retval));
1127}
1128
1129static int
1130disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1131{
1132#pragma unused(p, retval)
1133 struct socket *so;
1134 int fd = uap->s;
1135 int error;
1136
1137 error = file_socket(fd, &so);
1138 if (error != 0)
1139 return (error);
1140 if (so == NULL) {
1141 error = EBADF;
1142 goto out;
1143 }
1144
1145 error = sodisconnectx(so, uap->aid, uap->cid);
91447636
A
1146out:
1147 file_drop(fd);
1c79356b
A
1148 return (error);
1149}
1150
2d21ac55
A
1151/*
1152 * Returns: 0 Success
1153 * socreate:EAFNOSUPPORT
1154 * socreate:EPROTOTYPE
1155 * socreate:EPROTONOSUPPORT
1156 * socreate:ENOBUFS
1157 * socreate:ENOMEM
1158 * socreate:EISCONN
1159 * socreate:??? [other protocol families, IPSEC]
1160 * falloc:ENFILE
1161 * falloc:EMFILE
1162 * falloc:ENOMEM
1163 * copyout:EFAULT
1164 * soconnect2:EINVAL
1165 * soconnect2:EPROTOTYPE
1166 * soconnect2:??? [other protocol families[
1167 */
1c79356b 1168int
2d21ac55 1169socketpair(struct proc *p, struct socketpair_args *uap,
b0d623f7 1170 __unused int32_t *retval)
1c79356b 1171{
91447636 1172 struct fileproc *fp1, *fp2;
1c79356b
A
1173 struct socket *so1, *so2;
1174 int fd, error, sv[2];
1175
55e303ae 1176 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1c79356b
A
1177 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1178 if (error)
1179 return (error);
1180 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1181 if (error)
1182 goto free1;
91447636 1183
2d21ac55 1184 error = falloc(p, &fp1, &fd, vfs_context_current());
91447636 1185 if (error) {
1c79356b 1186 goto free2;
91447636 1187 }
1c79356b 1188 fp1->f_flag = FREAD|FWRITE;
1c79356b
A
1189 fp1->f_ops = &socketops;
1190 fp1->f_data = (caddr_t)so1;
91447636
A
1191 sv[0] = fd;
1192
2d21ac55 1193 error = falloc(p, &fp2, &fd, vfs_context_current());
91447636 1194 if (error) {
1c79356b 1195 goto free3;
91447636 1196 }
1c79356b 1197 fp2->f_flag = FREAD|FWRITE;
1c79356b
A
1198 fp2->f_ops = &socketops;
1199 fp2->f_data = (caddr_t)so2;
1200 sv[1] = fd;
91447636 1201
1c79356b
A
1202 error = soconnect2(so1, so2);
1203 if (error) {
1c79356b
A
1204 goto free4;
1205 }
1c79356b
A
1206 if (uap->type == SOCK_DGRAM) {
1207 /*
1208 * Datagram socket connection is asymmetric.
1209 */
2d21ac55
A
1210 error = soconnect2(so2, so1);
1211 if (error) {
1212 goto free4;
1213 }
1c79356b 1214 }
91447636 1215
6d2010ae
A
1216 if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
1217 goto free4;
1218
91447636 1219 proc_fdlock(p);
6601e61a
A
1220 procfdtbl_releasefd(p, sv[0], NULL);
1221 procfdtbl_releasefd(p, sv[1], NULL);
91447636
A
1222 fp_drop(p, sv[0], fp1, 1);
1223 fp_drop(p, sv[1], fp2, 1);
1224 proc_fdunlock(p);
1225
6d2010ae 1226 return (0);
1c79356b 1227free4:
91447636 1228 fp_free(p, sv[1], fp2);
1c79356b 1229free3:
91447636 1230 fp_free(p, sv[0], fp1);
1c79356b 1231free2:
2d21ac55 1232 (void) soclose(so2);
1c79356b 1233free1:
2d21ac55 1234 (void) soclose(so1);
1c79356b
A
1235 return (error);
1236}
1237
2d21ac55
A
1238/*
1239 * Returns: 0 Success
1240 * EINVAL
1241 * ENOBUFS
1242 * EBADF
1243 * EPIPE
1244 * EACCES Mandatory Access Control failure
1245 * file_socket:ENOTSOCK
1246 * file_socket:EBADF
1247 * getsockaddr:ENAMETOOLONG Filename too long
1248 * getsockaddr:EINVAL Invalid argument
1249 * getsockaddr:ENOMEM Not enough space
1250 * getsockaddr:EFAULT Bad address
1251 * <pru_sosend>:EACCES[TCP]
1252 * <pru_sosend>:EADDRINUSE[TCP]
1253 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1254 * <pru_sosend>:EAFNOSUPPORT[TCP]
1255 * <pru_sosend>:EAGAIN[TCP]
1256 * <pru_sosend>:EBADF
1257 * <pru_sosend>:ECONNRESET[TCP]
1258 * <pru_sosend>:EFAULT
1259 * <pru_sosend>:EHOSTUNREACH[TCP]
1260 * <pru_sosend>:EINTR
1261 * <pru_sosend>:EINVAL
1262 * <pru_sosend>:EISCONN[AF_INET]
1263 * <pru_sosend>:EMSGSIZE[TCP]
1264 * <pru_sosend>:ENETDOWN[TCP]
1265 * <pru_sosend>:ENETUNREACH[TCP]
1266 * <pru_sosend>:ENOBUFS
1267 * <pru_sosend>:ENOMEM[TCP]
1268 * <pru_sosend>:ENOTCONN[AF_INET]
1269 * <pru_sosend>:EOPNOTSUPP
1270 * <pru_sosend>:EPERM[TCP]
1271 * <pru_sosend>:EPIPE
1272 * <pru_sosend>:EWOULDBLOCK
1273 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1274 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1275 * <pru_sosend>:??? [value from so_error]
1276 * sockargs:???
1277 */
1c79356b 1278static int
3e170ce0 1279sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1280 int flags, int32_t *retval)
1c79356b 1281{
2d21ac55
A
1282 struct mbuf *control = NULL;
1283 struct sockaddr_storage ss;
1284 struct sockaddr *to = NULL;
1285 boolean_t want_free = TRUE;
91447636 1286 int error;
91447636 1287 user_ssize_t len;
2d21ac55
A
1288
1289 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b 1290
2d21ac55
A
1291 if (mp->msg_name != USER_ADDR_NULL) {
1292 if (mp->msg_namelen > sizeof (ss)) {
1293 error = getsockaddr(so, &to, mp->msg_name,
4a3eedf9 1294 mp->msg_namelen, TRUE);
2d21ac55
A
1295 } else {
1296 error = getsockaddr_s(so, &ss, mp->msg_name,
4a3eedf9 1297 mp->msg_namelen, TRUE);
2d21ac55
A
1298 if (error == 0) {
1299 to = (struct sockaddr *)&ss;
1300 want_free = FALSE;
1301 }
1c79356b 1302 }
2d21ac55
A
1303 if (error != 0)
1304 goto out;
1305 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
91447636 1306 }
2d21ac55
A
1307 if (mp->msg_control != USER_ADDR_NULL) {
1308 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
1c79356b
A
1309 error = EINVAL;
1310 goto bad;
1311 }
1312 error = sockargs(&control, mp->msg_control,
1313 mp->msg_controllen, MT_CONTROL);
2d21ac55 1314 if (error != 0)
1c79356b 1315 goto bad;
91447636 1316 }
1c79356b 1317
2d21ac55
A
1318#if CONFIG_MACF_SOCKET_SUBSET
1319 /*
1320 * We check the state without holding the socket lock;
1321 * if a race condition occurs, it would simply result
3e170ce0 1322 * in an extra call to the MAC check function.
2d21ac55 1323 */
3e170ce0 1324 if (to != NULL &&
316670eb 1325 !(so->so_state & SS_DEFUNCT) &&
2d21ac55
A
1326 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
1327 goto bad;
1328#endif /* MAC_SOCKET_SUBSET */
91447636
A
1329
1330 len = uio_resid(uiop);
39236c6e
A
1331 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1332 control, flags);
2d21ac55 1333 if (error != 0) {
91447636 1334 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1335 error == EINTR || error == EWOULDBLOCK))
1336 error = 0;
2d21ac55 1337 /* Generation of SIGPIPE can be controlled per socket */
9bccf70c 1338 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1c79356b
A
1339 psignal(p, SIGPIPE);
1340 }
1341 if (error == 0)
91447636
A
1342 *retval = (int)(len - uio_resid(uiop));
1343bad:
2d21ac55 1344 if (to != NULL && want_free)
1c79356b 1345 FREE(to, M_SONAME);
91447636 1346out:
2d21ac55 1347 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 1348
1c79356b
A
1349 return (error);
1350}
1351
2d21ac55
A
1352/*
1353 * Returns: 0 Success
1354 * ENOMEM
1355 * sendit:??? [see sendit definition in this file]
1356 * write:??? [4056224: applicable for pipes]
1357 */
1c79356b 1358int
b0d623f7 1359sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
2d21ac55
A
1360{
1361 __pthread_testcancel(1);
39236c6e 1362 return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
2d21ac55
A
1363}
1364
1365int
39236c6e
A
1366sendto_nocancel(struct proc *p,
1367 struct sendto_nocancel_args *uap,
1368 int32_t *retval)
1c79356b 1369{
91447636
A
1370 struct user_msghdr msg;
1371 int error;
1372 uio_t auio = NULL;
3e170ce0 1373 struct socket *so;
1c79356b 1374
2d21ac55 1375 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1376 AUDIT_ARG(fd, uap->s);
1c79356b 1377
91447636 1378 auio = uio_create(1, 0,
2d21ac55
A
1379 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1380 UIO_WRITE);
91447636 1381 if (auio == NULL) {
3e170ce0
A
1382 error = ENOMEM;
1383 goto done;
91447636
A
1384 }
1385 uio_addiov(auio, uap->buf, uap->len);
1386
1c79356b
A
1387 msg.msg_name = uap->to;
1388 msg.msg_namelen = uap->tolen;
91447636
A
1389 /* no need to set up msg_iov. sendit uses uio_t we send it */
1390 msg.msg_iov = 0;
1391 msg.msg_iovlen = 0;
1c79356b 1392 msg.msg_control = 0;
1c79356b 1393 msg.msg_flags = 0;
1c79356b 1394
3e170ce0
A
1395 error = file_socket(uap->s, &so);
1396 if (error)
1397 goto done;
2d21ac55 1398
3e170ce0
A
1399 if (so == NULL) {
1400 error = EBADF;
1401 } else {
1402 error = sendit(p, so, &msg, auio, uap->flags, retval);
91447636 1403 }
2d21ac55 1404
3e170ce0
A
1405 file_drop(uap->s);
1406done:
1407 if (auio != NULL)
1408 uio_free(auio);
1409
2d21ac55 1410 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1c79356b 1411
2d21ac55 1412 return (error);
1c79356b 1413}
1c79356b 1414
2d21ac55
A
1415/*
1416 * Returns: 0 Success
1417 * ENOBUFS
1418 * copyin:EFAULT
1419 * sendit:??? [see sendit definition in this file]
1420 */
1c79356b 1421int
b0d623f7 1422sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1c79356b 1423{
2d21ac55 1424 __pthread_testcancel(1);
3e170ce0
A
1425 return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1426 retval));
1c79356b 1427}
1c79356b
A
1428
1429int
3e170ce0
A
1430sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1431 int32_t *retval)
1c79356b 1432{
b0d623f7
A
1433 struct user32_msghdr msg32;
1434 struct user64_msghdr msg64;
91447636
A
1435 struct user_msghdr user_msg;
1436 caddr_t msghdrp;
1437 int size_of_msghdr;
1c79356b 1438 int error;
91447636
A
1439 uio_t auio = NULL;
1440 struct user_iovec *iovp;
3e170ce0 1441 struct socket *so;
1c79356b 1442
2d21ac55 1443 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1444 AUDIT_ARG(fd, uap->s);
91447636 1445 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
1446 msghdrp = (caddr_t)&msg64;
1447 size_of_msghdr = sizeof (msg64);
2d21ac55 1448 } else {
b0d623f7
A
1449 msghdrp = (caddr_t)&msg32;
1450 size_of_msghdr = sizeof (msg32);
91447636
A
1451 }
1452 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1453 if (error) {
1454 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1455 return (error);
1c79356b 1456 }
91447636 1457
b0d623f7
A
1458 if (IS_64BIT_PROCESS(p)) {
1459 user_msg.msg_flags = msg64.msg_flags;
1460 user_msg.msg_controllen = msg64.msg_controllen;
1461 user_msg.msg_control = msg64.msg_control;
1462 user_msg.msg_iovlen = msg64.msg_iovlen;
1463 user_msg.msg_iov = msg64.msg_iov;
1464 user_msg.msg_namelen = msg64.msg_namelen;
1465 user_msg.msg_name = msg64.msg_name;
1466 } else {
1467 user_msg.msg_flags = msg32.msg_flags;
1468 user_msg.msg_controllen = msg32.msg_controllen;
1469 user_msg.msg_control = msg32.msg_control;
1470 user_msg.msg_iovlen = msg32.msg_iovlen;
1471 user_msg.msg_iov = msg32.msg_iov;
1472 user_msg.msg_namelen = msg32.msg_namelen;
1473 user_msg.msg_name = msg32.msg_name;
91447636
A
1474 }
1475
1476 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1477 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1478 0, 0, 0, 0);
91447636
A
1479 return (EMSGSIZE);
1480 }
1481
1482 /* allocate a uio large enough to hold the number of iovecs passed */
1483 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1484 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1485 UIO_WRITE);
91447636
A
1486 if (auio == NULL) {
1487 error = ENOBUFS;
1488 goto done;
1489 }
2d21ac55 1490
91447636 1491 if (user_msg.msg_iovlen) {
2d21ac55
A
1492 /*
1493 * get location of iovecs within the uio.
1494 * then copyin the iovecs from user space.
91447636
A
1495 */
1496 iovp = uio_iovsaddr(auio);
1497 if (iovp == NULL) {
1498 error = ENOBUFS;
1499 goto done;
1500 }
b0d623f7
A
1501 error = copyin_user_iovec_array(user_msg.msg_iov,
1502 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1503 user_msg.msg_iovlen, iovp);
91447636
A
1504 if (error)
1505 goto done;
1506 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2d21ac55
A
1507
1508 /* finish setup of uio_t */
39236c6e
A
1509 error = uio_calculateresid(auio);
1510 if (error) {
1511 goto done;
1512 }
2d21ac55 1513 } else {
91447636
A
1514 user_msg.msg_iov = 0;
1515 }
2d21ac55
A
1516
1517 /* msg_flags is ignored for send */
91447636 1518 user_msg.msg_flags = 0;
2d21ac55 1519
3e170ce0
A
1520 error = file_socket(uap->s, &so);
1521 if (error) {
1522 goto done;
1523 }
1524 if (so == NULL) {
1525 error = EBADF;
1526 } else {
1527 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1528 }
1529 file_drop(uap->s);
1c79356b 1530done:
91447636
A
1531 if (auio != NULL) {
1532 uio_free(auio);
1533 }
2d21ac55 1534 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1535
1c79356b
A
1536 return (error);
1537}
1538
fe8ab488
A
1539int
1540sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1541{
1542 int error = 0;
3e170ce0 1543 struct user_msghdr_x *user_msg_x = NULL;
fe8ab488
A
1544 struct uio **uiop = NULL;
1545 struct socket *so;
1546 u_int i;
1547 struct sockaddr *to = NULL;
fe8ab488
A
1548 user_ssize_t len_before = 0, len_after;
1549 int need_drop = 0;
1550 size_t size_of_msghdr;
1551 void *umsgp = NULL;
1552 u_int uiocnt;
3e170ce0 1553 int has_addr_or_ctl = 0;
fe8ab488
A
1554
1555 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1556
1557 error = file_socket(uap->s, &so);
1558 if (error) {
1559 goto out;
1560 }
1561 need_drop = 1;
1562 if (so == NULL) {
1563 error = EBADF;
1564 goto out;
1565 }
fe8ab488
A
1566
1567 /*
1568 * Input parameter range check
1569 */
1570 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1571 error = EINVAL;
1572 goto out;
1573 }
3e170ce0
A
1574 /*
1575 * Clip to max currently allowed
1576 */
1577 if (uap->cnt > somaxsendmsgx)
1578 uap->cnt = somaxsendmsgx;
1579
1580 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
fe8ab488 1581 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
1582 if (user_msg_x == NULL) {
1583 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1584 error = ENOMEM;
1585 goto out;
1586 }
1587 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1588 M_TEMP, M_WAITOK | M_ZERO);
1589 if (uiop == NULL) {
3e170ce0 1590 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
fe8ab488
A
1591 error = ENOMEM;
1592 goto out;
1593 }
1594
1595 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1596 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1597
3e170ce0 1598 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
fe8ab488
A
1599 M_TEMP, M_WAITOK | M_ZERO);
1600 if (umsgp == NULL) {
3e170ce0 1601 printf("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1602 error = ENOMEM;
1603 goto out;
1604 }
1605 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1606 if (error) {
3e170ce0 1607 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
1608 goto out;
1609 }
1610 error = internalize_user_msghdr_array(umsgp,
1611 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1612 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488 1613 if (error) {
3e170ce0 1614 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
1615 goto out;
1616 }
1617 /*
1618 * Make sure the size of each message iovec and
1619 * the aggregate size of all the iovec is valid
1620 */
1621 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1622 error = EINVAL;
1623 goto out;
1624 }
1625
1626 /*
1627 * Sanity check on passed arguments
1628 */
1629 for (i = 0; i < uap->cnt; i++) {
3e170ce0 1630 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
1631
1632 /*
1633 * No flags on send message
1634 */
1635 if (mp->msg_flags != 0) {
1636 error = EINVAL;
1637 goto out;
1638 }
1639 /*
1640 * No support for address or ancillary data (yet)
1641 */
3e170ce0
A
1642 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0)
1643 has_addr_or_ctl = 1;
1644
fe8ab488 1645 if (mp->msg_control != USER_ADDR_NULL ||
3e170ce0
A
1646 mp->msg_controllen != 0)
1647 has_addr_or_ctl = 1;
1648
fe8ab488
A
1649#if CONFIG_MACF_SOCKET_SUBSET
1650 /*
1651 * We check the state without holding the socket lock;
1652 * if a race condition occurs, it would simply result
3e170ce0 1653 * in an extra call to the MAC check function.
fe8ab488
A
1654 *
1655 * Note: The following check is never true taken with the
1656 * current limitation that we do not accept to pass an address,
3e170ce0
A
1657 * this is effectively placeholder code. If we add support for
1658 * addresses, we will have to check every address.
fe8ab488 1659 */
3e170ce0 1660 if (to != NULL &&
fe8ab488 1661 !(so->so_state & SS_DEFUNCT) &&
3e170ce0
A
1662 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1663 != 0)
fe8ab488
A
1664 goto out;
1665#endif /* MAC_SOCKET_SUBSET */
1666 }
1667
1668 len_before = uio_array_resid(uiop, uap->cnt);
1669
3e170ce0
A
1670 /*
1671 * Feed list of packets at once only for connected socket without
1672 * control message
1673 */
1674 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1675 pru_sosend_list_notsupp &&
1676 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1677 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1678 uap->cnt, uap->flags);
1679 } else {
1680 for (i = 0; i < uap->cnt; i++) {
1681 struct user_msghdr_x *mp = user_msg_x + i;
1682 struct user_msghdr user_msg;
1683 uio_t auio = uiop[i];
1684 int32_t tmpval;
1685
1686 user_msg.msg_flags = mp->msg_flags;
1687 user_msg.msg_controllen = mp->msg_controllen;
1688 user_msg.msg_control = mp->msg_control;
1689 user_msg.msg_iovlen = mp->msg_iovlen;
1690 user_msg.msg_iov = mp->msg_iov;
1691 user_msg.msg_namelen = mp->msg_namelen;
1692 user_msg.msg_name = mp->msg_name;
1693
1694 error = sendit(p, so, &user_msg, auio, uap->flags,
1695 &tmpval);
1696 if (error != 0)
1697 break;
1698 }
1699 }
fe8ab488
A
1700 len_after = uio_array_resid(uiop, uap->cnt);
1701
3e170ce0
A
1702 VERIFY(len_after <= len_before);
1703
fe8ab488
A
1704 if (error != 0) {
1705 if (len_after != len_before && (error == ERESTART ||
3e170ce0
A
1706 error == EINTR || error == EWOULDBLOCK ||
1707 error == ENOBUFS))
fe8ab488
A
1708 error = 0;
1709 /* Generation of SIGPIPE can be controlled per socket */
1710 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1711 psignal(p, SIGPIPE);
1712 }
1713 if (error == 0) {
1714 uiocnt = externalize_user_msghdr_array(umsgp,
1715 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1716 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488
A
1717
1718 *retval = (int)(uiocnt);
1719 }
1720out:
1721 if (need_drop)
1722 file_drop(uap->s);
1723 if (umsgp != NULL)
1724 _FREE(umsgp, M_TEMP);
1725 if (uiop != NULL) {
1726 free_uio_array(uiop, uap->cnt);
1727 _FREE(uiop, M_TEMP);
1728 }
3e170ce0
A
1729 if (user_msg_x != NULL)
1730 _FREE(user_msg_x, M_TEMP);
fe8ab488
A
1731
1732 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1733
1734 return (error);
1735}
1736
3e170ce0
A
1737
1738static int
1739copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1740{
1741 int error = 0;
1742 socklen_t sa_len = 0;
1743 ssize_t len;
1744
1745 len = *namelen;
1746 if (len <= 0 || fromsa == 0) {
1747 len = 0;
1748 } else {
1749#ifndef MIN
1750#define MIN(a, b) ((a) > (b) ? (b) : (a))
1751#endif
1752 sa_len = fromsa->sa_len;
1753 len = MIN((unsigned int)len, sa_len);
1754 error = copyout(fromsa, name, (unsigned)len);
1755 if (error)
1756 goto out;
1757 }
1758 *namelen = sa_len;
1759out:
1760 return (0);
1761}
1762
1763static int
1764copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1765 socklen_t *controllen, int *flags)
1766{
1767 int error = 0;
1768 ssize_t len;
1769 user_addr_t ctlbuf;
1770
1771 len = *controllen;
1772 *controllen = 0;
1773 ctlbuf = control;
1774
1775 while (m && len > 0) {
1776 unsigned int tocopy;
1777 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1778 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1779 int buflen = m->m_len;
1780
1781 while (buflen > 0 && len > 0) {
1782 /*
1783 * SCM_TIMESTAMP hack because struct timeval has a
1784 * different size for 32 bits and 64 bits processes
1785 */
1786 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1787 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
1788 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1789 int tmp_space;
1790 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1791
1792 tmp_cp->cmsg_level = SOL_SOCKET;
1793 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1794
1795 if (proc_is64bit(p)) {
1796 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1797
1798 tv64->tv_sec = tv->tv_sec;
1799 tv64->tv_usec = tv->tv_usec;
1800
1801 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1802 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1803 } else {
1804 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1805
1806 tv32->tv_sec = tv->tv_sec;
1807 tv32->tv_usec = tv->tv_usec;
1808
1809 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1810 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1811 }
1812 if (len >= tmp_space) {
1813 tocopy = tmp_space;
1814 } else {
1815 *flags |= MSG_CTRUNC;
1816 tocopy = len;
1817 }
1818 error = copyout(tmp_buffer, ctlbuf, tocopy);
1819 if (error)
1820 goto out;
1821 } else {
1822 if (cp_size > buflen) {
1823 panic("cp_size > buflen, something"
1824 "wrong with alignment!");
1825 }
1826 if (len >= cp_size) {
1827 tocopy = cp_size;
1828 } else {
1829 *flags |= MSG_CTRUNC;
1830 tocopy = len;
1831 }
1832 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1833 if (error)
1834 goto out;
1835 }
1836
1837 ctlbuf += tocopy;
1838 len -= tocopy;
1839
1840 buflen -= cp_size;
1841 cp = (struct cmsghdr *)(void *)
1842 ((unsigned char *) cp + cp_size);
1843 cp_size = CMSG_ALIGN(cp->cmsg_len);
1844 }
1845
1846 m = m->m_next;
1847 }
1848 *controllen = ctlbuf - control;
1849out:
1850 return (error);
1851}
1852
2d21ac55
A
1853/*
1854 * Returns: 0 Success
1855 * ENOTSOCK
1856 * EINVAL
1857 * EBADF
1858 * EACCES Mandatory Access Control failure
1859 * copyout:EFAULT
1860 * fp_lookup:EBADF
1861 * <pru_soreceive>:ENOBUFS
1862 * <pru_soreceive>:ENOTCONN
1863 * <pru_soreceive>:EWOULDBLOCK
1864 * <pru_soreceive>:EFAULT
1865 * <pru_soreceive>:EINTR
1866 * <pru_soreceive>:EBADF
1867 * <pru_soreceive>:EINVAL
1868 * <pru_soreceive>:EMSGSIZE
1869 * <pru_soreceive>:???
1870 *
1871 * Notes: Additional return values from calls through <pru_soreceive>
1872 * depend on protocols other than TCP or AF_UNIX, which are
1873 * documented above.
1874 */
1c79356b 1875static int
2d21ac55 1876recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1877 user_addr_t namelenp, int32_t *retval)
1c79356b 1878{
39236c6e
A
1879 ssize_t len;
1880 int error;
3e170ce0 1881 struct mbuf *control = 0;
1c79356b
A
1882 struct socket *so;
1883 struct sockaddr *fromsa = 0;
91447636 1884 struct fileproc *fp;
1c79356b 1885
2d21ac55 1886 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
91447636 1887 proc_fdlock(p);
2d21ac55
A
1888 if ((error = fp_lookup(p, s, &fp, 1))) {
1889 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1890 proc_fdunlock(p);
2d21ac55 1891 return (error);
1c79356b 1892 }
91447636 1893 if (fp->f_type != DTYPE_SOCKET) {
2d21ac55 1894 fp_drop(p, s, fp, 1);
91447636 1895 proc_fdunlock(p);
2d21ac55 1896 return (ENOTSOCK);
91447636 1897 }
1c79356b 1898
2d21ac55
A
1899 so = (struct socket *)fp->f_data;
1900 if (so == NULL) {
1901 fp_drop(p, s, fp, 1);
1902 proc_fdunlock(p);
1903 return (EBADF);
1904 }
91447636
A
1905
1906 proc_fdunlock(p);
2d21ac55
A
1907
1908#if CONFIG_MACF_SOCKET_SUBSET
1909 /*
1910 * We check the state without holding the socket lock;
1911 * if a race condition occurs, it would simply result
1912 * in an extra call to the MAC check function.
1913 */
316670eb
A
1914 if (!(so->so_state & SS_DEFUNCT) &&
1915 !(so->so_state & SS_ISCONNECTED) &&
39236c6e 1916 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2d21ac55
A
1917 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1918 goto out1;
1919#endif /* MAC_SOCKET_SUBSET */
91447636 1920 if (uio_resid(uiop) < 0) {
2d21ac55 1921 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
91447636
A
1922 error = EINVAL;
1923 goto out1;
1c79356b 1924 }
91447636
A
1925
1926 len = uio_resid(uiop);
2d21ac55
A
1927 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1928 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1929 &mp->msg_flags);
b0d623f7
A
1930 if (fromsa)
1931 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1932 fromsa);
1c79356b 1933 if (error) {
91447636 1934 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1935 error == EINTR || error == EWOULDBLOCK))
1936 error = 0;
1937 }
1c79356b
A
1938 if (error)
1939 goto out;
2d21ac55 1940
91447636 1941 *retval = len - uio_resid(uiop);
2d21ac55 1942
3e170ce0
A
1943 if (mp->msg_name) {
1944 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1945 if (error)
1946 goto out;
2d21ac55 1947 /* return the actual, untruncated address length */
1c79356b 1948 if (namelenp &&
3e170ce0 1949 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2d21ac55 1950 sizeof (int)))) {
1c79356b
A
1951 goto out;
1952 }
1953 }
39236c6e 1954
3e170ce0
A
1955 if (mp->msg_control) {
1956 error = copyout_control(p, control, mp->msg_control,
1957 &mp->msg_controllen, &mp->msg_flags);
1c79356b
A
1958 }
1959out:
1960 if (fromsa)
1961 FREE(fromsa, M_SONAME);
1962 if (control)
1963 m_freem(control);
2d21ac55 1964 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636
A
1965out1:
1966 fp_drop(p, s, fp, 0);
1c79356b
A
1967 return (error);
1968}
1969
2d21ac55
A
1970/*
1971 * Returns: 0 Success
1972 * ENOMEM
1973 * copyin:EFAULT
1974 * recvit:???
1975 * read:??? [4056224: applicable for pipes]
1976 *
1977 * Notes: The read entry point is only called as part of support for
1978 * binary backward compatability; new code should use read
1979 * instead of recv or recvfrom when attempting to read data
1980 * from pipes.
1981 *
1982 * For full documentation of the return codes from recvit, see
1983 * the block header for the recvit function.
1984 */
1985int
b0d623f7 1986recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2d21ac55
A
1987{
1988 __pthread_testcancel(1);
3e170ce0
A
1989 return (recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
1990 retval));
2d21ac55
A
1991}
1992
1c79356b 1993int
3e170ce0
A
1994recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
1995 int32_t *retval)
1c79356b 1996{
91447636 1997 struct user_msghdr msg;
1c79356b 1998 int error;
91447636 1999 uio_t auio = NULL;
1c79356b 2000
2d21ac55 2001 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 2002 AUDIT_ARG(fd, uap->s);
1c79356b
A
2003
2004 if (uap->fromlenaddr) {
91447636 2005 error = copyin(uap->fromlenaddr,
1c79356b
A
2006 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
2007 if (error)
2008 return (error);
2d21ac55 2009 } else {
1c79356b 2010 msg.msg_namelen = 0;
2d21ac55 2011 }
1c79356b 2012 msg.msg_name = uap->from;
91447636 2013 auio = uio_create(1, 0,
2d21ac55
A
2014 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2015 UIO_READ);
91447636
A
2016 if (auio == NULL) {
2017 return (ENOMEM);
2018 }
2d21ac55 2019
91447636
A
2020 uio_addiov(auio, uap->buf, uap->len);
2021 /* no need to set up msg_iov. recvit uses uio_t we send it */
2022 msg.msg_iov = 0;
2023 msg.msg_iovlen = 0;
1c79356b 2024 msg.msg_control = 0;
91447636 2025 msg.msg_controllen = 0;
1c79356b 2026 msg.msg_flags = uap->flags;
91447636
A
2027 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2028 if (auio != NULL) {
2029 uio_free(auio);
2030 }
2d21ac55 2031
2d21ac55 2032 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b 2033
2d21ac55 2034 return (error);
1c79356b
A
2035}
2036
2037/*
2d21ac55
A
2038 * Returns: 0 Success
2039 * EMSGSIZE
2040 * ENOMEM
2041 * copyin:EFAULT
2042 * copyout:EFAULT
2043 * recvit:???
2044 *
2045 * Notes: For full documentation of the return codes from recvit, see
2046 * the block header for the recvit function.
1c79356b
A
2047 */
2048int
b0d623f7 2049recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1c79356b 2050{
2d21ac55 2051 __pthread_testcancel(1);
3e170ce0
A
2052 return (recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2053 retval));
1c79356b 2054}
1c79356b
A
2055
2056int
3e170ce0
A
2057recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2058 int32_t *retval)
1c79356b 2059{
b0d623f7
A
2060 struct user32_msghdr msg32;
2061 struct user64_msghdr msg64;
91447636
A
2062 struct user_msghdr user_msg;
2063 caddr_t msghdrp;
2064 int size_of_msghdr;
2065 user_addr_t uiov;
2d21ac55 2066 int error;
91447636
A
2067 uio_t auio = NULL;
2068 struct user_iovec *iovp;
1c79356b 2069
2d21ac55 2070 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 2071 AUDIT_ARG(fd, uap->s);
91447636 2072 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
2073 msghdrp = (caddr_t)&msg64;
2074 size_of_msghdr = sizeof (msg64);
2d21ac55 2075 } else {
b0d623f7
A
2076 msghdrp = (caddr_t)&msg32;
2077 size_of_msghdr = sizeof (msg32);
91447636
A
2078 }
2079 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
2080 if (error) {
2081 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
2082 return (error);
2083 }
2084
91447636 2085 /* only need to copy if user process is not 64-bit */
b0d623f7
A
2086 if (IS_64BIT_PROCESS(p)) {
2087 user_msg.msg_flags = msg64.msg_flags;
2088 user_msg.msg_controllen = msg64.msg_controllen;
2089 user_msg.msg_control = msg64.msg_control;
2090 user_msg.msg_iovlen = msg64.msg_iovlen;
2091 user_msg.msg_iov = msg64.msg_iov;
2092 user_msg.msg_namelen = msg64.msg_namelen;
2093 user_msg.msg_name = msg64.msg_name;
2094 } else {
2095 user_msg.msg_flags = msg32.msg_flags;
2096 user_msg.msg_controllen = msg32.msg_controllen;
2097 user_msg.msg_control = msg32.msg_control;
2098 user_msg.msg_iovlen = msg32.msg_iovlen;
2099 user_msg.msg_iov = msg32.msg_iov;
2100 user_msg.msg_namelen = msg32.msg_namelen;
2101 user_msg.msg_name = msg32.msg_name;
91447636
A
2102 }
2103
2104 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
2105 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2106 0, 0, 0, 0);
91447636
A
2107 return (EMSGSIZE);
2108 }
2109
91447636 2110 user_msg.msg_flags = uap->flags;
91447636
A
2111
2112 /* allocate a uio large enough to hold the number of iovecs passed */
2113 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
2114 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2115 UIO_READ);
91447636
A
2116 if (auio == NULL) {
2117 error = ENOMEM;
2118 goto done;
2119 }
2120
2d21ac55
A
2121 /*
2122 * get location of iovecs within the uio. then copyin the iovecs from
91447636
A
2123 * user space.
2124 */
2125 iovp = uio_iovsaddr(auio);
2126 if (iovp == NULL) {
2127 error = ENOMEM;
2128 goto done;
2129 }
2130 uiov = user_msg.msg_iov;
2131 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
b0d623f7
A
2132 error = copyin_user_iovec_array(uiov,
2133 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2134 user_msg.msg_iovlen, iovp);
1c79356b
A
2135 if (error)
2136 goto done;
91447636 2137
2d21ac55 2138 /* finish setup of uio_t */
39236c6e
A
2139 error = uio_calculateresid(auio);
2140 if (error) {
2141 goto done;
2142 }
2d21ac55 2143
91447636 2144 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1c79356b 2145 if (!error) {
91447636 2146 user_msg.msg_iov = uiov;
b0d623f7
A
2147 if (IS_64BIT_PROCESS(p)) {
2148 msg64.msg_flags = user_msg.msg_flags;
2149 msg64.msg_controllen = user_msg.msg_controllen;
2150 msg64.msg_control = user_msg.msg_control;
2151 msg64.msg_iovlen = user_msg.msg_iovlen;
2152 msg64.msg_iov = user_msg.msg_iov;
2153 msg64.msg_namelen = user_msg.msg_namelen;
2154 msg64.msg_name = user_msg.msg_name;
2155 } else {
2156 msg32.msg_flags = user_msg.msg_flags;
2157 msg32.msg_controllen = user_msg.msg_controllen;
2158 msg32.msg_control = user_msg.msg_control;
2159 msg32.msg_iovlen = user_msg.msg_iovlen;
2160 msg32.msg_iov = user_msg.msg_iov;
2161 msg32.msg_namelen = user_msg.msg_namelen;
2162 msg32.msg_name = user_msg.msg_name;
91447636
A
2163 }
2164 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1c79356b
A
2165 }
2166done:
91447636
A
2167 if (auio != NULL) {
2168 uio_free(auio);
2169 }
2d21ac55 2170 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
2171 return (error);
2172}
2173
fe8ab488
A
2174int
2175recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2176{
2177 int error = EOPNOTSUPP;
3e170ce0
A
2178 struct user_msghdr_x *user_msg_x = NULL;
2179 struct recv_msg_elem *recv_msg_array = NULL;
fe8ab488
A
2180 struct socket *so;
2181 user_ssize_t len_before = 0, len_after;
2182 int need_drop = 0;
2183 size_t size_of_msghdr;
2184 void *umsgp = NULL;
2185 u_int i;
2186 u_int uiocnt;
2187
2188 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2189
2190 error = file_socket(uap->s, &so);
2191 if (error) {
2192 goto out;
2193 }
2194 need_drop = 1;
2195 if (so == NULL) {
2196 error = EBADF;
2197 goto out;
2198 }
fe8ab488
A
2199 /*
2200 * Input parameter range check
2201 */
2202 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2203 error = EINVAL;
2204 goto out;
2205 }
3e170ce0
A
2206 if (uap->cnt > somaxrecvmsgx)
2207 uap->cnt = somaxrecvmsgx;
2208
2209 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
fe8ab488 2210 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
2211 if (user_msg_x == NULL) {
2212 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
2213 error = ENOMEM;
2214 goto out;
2215 }
3e170ce0
A
2216 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2217 if (recv_msg_array == NULL) {
2218 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
fe8ab488
A
2219 error = ENOMEM;
2220 goto out;
2221 }
fe8ab488
A
2222 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2223 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2224
2225 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2226 if (umsgp == NULL) {
3e170ce0 2227 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
fe8ab488
A
2228 error = ENOMEM;
2229 goto out;
2230 }
2231 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2232 if (error) {
3e170ce0 2233 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
2234 goto out;
2235 }
3e170ce0 2236 error = internalize_recv_msghdr_array(umsgp,
fe8ab488 2237 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 2238 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
fe8ab488 2239 if (error) {
3e170ce0 2240 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
2241 goto out;
2242 }
2243 /*
2244 * Make sure the size of each message iovec and
2245 * the aggregate size of all the iovec is valid
2246 */
3e170ce0 2247 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
fe8ab488
A
2248 error = EINVAL;
2249 goto out;
2250 }
fe8ab488
A
2251 /*
2252 * Sanity check on passed arguments
2253 */
2254 for (i = 0; i < uap->cnt; i++) {
3e170ce0 2255 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
2256
2257 if (mp->msg_flags != 0) {
2258 error = EINVAL;
2259 goto out;
2260 }
fe8ab488
A
2261 }
2262#if CONFIG_MACF_SOCKET_SUBSET
2263 /*
2264 * We check the state without holding the socket lock;
2265 * if a race condition occurs, it would simply result
2266 * in an extra call to the MAC check function.
2267 */
2268 if (!(so->so_state & SS_DEFUNCT) &&
2269 !(so->so_state & SS_ISCONNECTED) &&
2270 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2271 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
2272 goto out;
2273#endif /* MAC_SOCKET_SUBSET */
2274
3e170ce0 2275 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488 2276
3e170ce0
A
2277 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2278 pru_soreceive_list_notsupp &&
2279 somaxrecvmsgx == 0) {
2280 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2281 recv_msg_array, uap->cnt, &uap->flags);
2282 } else {
2283 int flags = uap->flags;
fe8ab488 2284
3e170ce0
A
2285 for (i = 0; i < uap->cnt; i++) {
2286 struct recv_msg_elem *recv_msg_elem;
2287 uio_t auio;
2288 struct sockaddr **psa;
2289 struct mbuf **controlp;
2290
2291 recv_msg_elem = recv_msg_array + i;
2292 auio = recv_msg_elem->uio;
2293
2294 /*
2295 * Do not block if we got at least one packet
2296 */
2297 if (i > 0)
2298 flags |= MSG_DONTWAIT;
2299
2300 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2301 &recv_msg_elem->psa : NULL;
2302 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2303 &recv_msg_elem->controlp : NULL;
2304
2305 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2306 auio, (struct mbuf **)0, controlp, &flags);
2307 if (error)
2308 break;
2309 /*
2310 * We have some data
2311 */
2312 recv_msg_elem->which |= SOCK_MSG_DATA;
2313 /*
2314 * Stop on partial copy
2315 */
2316 if (flags & (MSG_RCVMORE | MSG_TRUNC))
2317 break;
2318 }
2319 if ((uap->flags & MSG_DONTWAIT) == 0)
2320 flags &= ~MSG_DONTWAIT;
2321 uap->flags = flags;
2322 }
2323
2324 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488
A
2325
2326 if (error) {
2327 if (len_after != len_before && (error == ERESTART ||
2328 error == EINTR || error == EWOULDBLOCK))
2329 error = 0;
3e170ce0
A
2330 else
2331 goto out;
fe8ab488 2332 }
fe8ab488 2333
3e170ce0
A
2334 uiocnt = externalize_recv_msghdr_array(umsgp,
2335 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2336 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2337
2338 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2339 if (error) {
2340 DBG_PRINTF("%s copyout() failed\n", __func__);
2341 goto out;
2342 }
2343 *retval = (int)(uiocnt);
2344
2345 for (i = 0; i < uap->cnt; i++) {
2346 struct user_msghdr_x *mp = user_msg_x + i;
2347 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2348 struct sockaddr *fromsa = recv_msg_elem->psa;
2349
2350 if (mp->msg_name) {
2351 error = copyout_sa(fromsa, mp->msg_name,
2352 &mp->msg_namelen);
2353 if (error)
2354 goto out;
2355 }
2356 if (mp->msg_control) {
2357 error = copyout_control(p, recv_msg_elem->controlp,
2358 mp->msg_control, &mp->msg_controllen,
2359 &mp->msg_flags);
2360 if (error)
2361 goto out;
fe8ab488 2362 }
fe8ab488
A
2363 }
2364out:
2365 if (need_drop)
2366 file_drop(uap->s);
2367 if (umsgp != NULL)
2368 _FREE(umsgp, M_TEMP);
3e170ce0
A
2369 if (recv_msg_array != NULL)
2370 free_recv_msg_array(recv_msg_array, uap->cnt);
2371 if (user_msg_x != NULL)
2372 _FREE(user_msg_x, M_TEMP);
2373
fe8ab488 2374 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 2375
fe8ab488
A
2376 return (error);
2377}
2378
2d21ac55
A
2379/*
2380 * Returns: 0 Success
2381 * EBADF
2382 * file_socket:ENOTSOCK
2383 * file_socket:EBADF
2384 * soshutdown:EINVAL
2385 * soshutdown:ENOTCONN
2386 * soshutdown:EADDRNOTAVAIL[TCP]
2387 * soshutdown:ENOBUFS[TCP]
2388 * soshutdown:EMSGSIZE[TCP]
2389 * soshutdown:EHOSTUNREACH[TCP]
2390 * soshutdown:ENETUNREACH[TCP]
2391 * soshutdown:ENETDOWN[TCP]
2392 * soshutdown:ENOMEM[TCP]
2393 * soshutdown:EACCES[TCP]
2394 * soshutdown:EMSGSIZE[TCP]
2395 * soshutdown:ENOBUFS[TCP]
2396 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2397 * soshutdown:??? [other protocol families]
2398 */
1c79356b
A
2399/* ARGSUSED */
2400int
2d21ac55 2401shutdown(__unused struct proc *p, struct shutdown_args *uap,
b0d623f7 2402 __unused int32_t *retval)
1c79356b 2403{
2d21ac55 2404 struct socket *so;
1c79356b
A
2405 int error;
2406
55e303ae 2407 AUDIT_ARG(fd, uap->s);
91447636 2408 error = file_socket(uap->s, &so);
1c79356b
A
2409 if (error)
2410 return (error);
91447636
A
2411 if (so == NULL) {
2412 error = EBADF;
2413 goto out;
2414 }
2415 error = soshutdown((struct socket *)so, uap->how);
2416out:
2417 file_drop(uap->s);
2d21ac55 2418 return (error);
1c79356b
A
2419}
2420
2d21ac55
A
2421/*
2422 * Returns: 0 Success
2423 * EFAULT
2424 * EINVAL
2425 * EACCES Mandatory Access Control failure
2426 * file_socket:ENOTSOCK
2427 * file_socket:EBADF
2428 * sosetopt:EINVAL
2429 * sosetopt:ENOPROTOOPT
2430 * sosetopt:ENOBUFS
2431 * sosetopt:EDOM
2432 * sosetopt:EFAULT
2433 * sosetopt:EOPNOTSUPP[AF_UNIX]
2434 * sosetopt:???
2435 */
1c79356b
A
2436/* ARGSUSED */
2437int
2d21ac55 2438setsockopt(struct proc *p, struct setsockopt_args *uap,
b0d623f7 2439 __unused int32_t *retval)
1c79356b 2440{
2d21ac55 2441 struct socket *so;
1c79356b
A
2442 struct sockopt sopt;
2443 int error;
2444
55e303ae 2445 AUDIT_ARG(fd, uap->s);
1c79356b
A
2446 if (uap->val == 0 && uap->valsize != 0)
2447 return (EFAULT);
2d21ac55 2448 /* No bounds checking on size (it's unsigned) */
1c79356b 2449
91447636 2450 error = file_socket(uap->s, &so);
1c79356b
A
2451 if (error)
2452 return (error);
2453
2454 sopt.sopt_dir = SOPT_SET;
2455 sopt.sopt_level = uap->level;
2456 sopt.sopt_name = uap->name;
2457 sopt.sopt_val = uap->val;
2458 sopt.sopt_valsize = uap->valsize;
2459 sopt.sopt_p = p;
2460
91447636
A
2461 if (so == NULL) {
2462 error = EINVAL;
2463 goto out;
2464 }
2d21ac55
A
2465#if CONFIG_MACF_SOCKET_SUBSET
2466 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2467 &sopt)) != 0)
2468 goto out;
2469#endif /* MAC_SOCKET_SUBSET */
39236c6e 2470 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
91447636
A
2471out:
2472 file_drop(uap->s);
2d21ac55 2473 return (error);
1c79356b
A
2474}
2475
2476
2477
2d21ac55
A
2478/*
2479 * Returns: 0 Success
2480 * EINVAL
2481 * EBADF
2482 * EACCES Mandatory Access Control failure
2483 * copyin:EFAULT
2484 * copyout:EFAULT
2485 * file_socket:ENOTSOCK
2486 * file_socket:EBADF
2487 * sogetopt:???
2488 */
1c79356b 2489int
2d21ac55 2490getsockopt(struct proc *p, struct getsockopt_args *uap,
b0d623f7 2491 __unused int32_t *retval)
1c79356b 2492{
91447636
A
2493 int error;
2494 socklen_t valsize;
2495 struct sockopt sopt;
2d21ac55 2496 struct socket *so;
1c79356b 2497
91447636 2498 error = file_socket(uap->s, &so);
1c79356b
A
2499 if (error)
2500 return (error);
2501 if (uap->val) {
2d21ac55
A
2502 error = copyin(uap->avalsize, (caddr_t)&valsize,
2503 sizeof (valsize));
1c79356b 2504 if (error)
91447636 2505 goto out;
2d21ac55
A
2506 /* No bounds checking on size (it's unsigned) */
2507 } else {
1c79356b 2508 valsize = 0;
2d21ac55 2509 }
1c79356b
A
2510 sopt.sopt_dir = SOPT_GET;
2511 sopt.sopt_level = uap->level;
2512 sopt.sopt_name = uap->name;
2513 sopt.sopt_val = uap->val;
2514 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2515 sopt.sopt_p = p;
2516
91447636
A
2517 if (so == NULL) {
2518 error = EBADF;
2519 goto out;
2520 }
2d21ac55
A
2521#if CONFIG_MACF_SOCKET_SUBSET
2522 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2523 &sopt)) != 0)
2524 goto out;
2525#endif /* MAC_SOCKET_SUBSET */
39236c6e 2526 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
1c79356b
A
2527 if (error == 0) {
2528 valsize = sopt.sopt_valsize;
2d21ac55
A
2529 error = copyout((caddr_t)&valsize, uap->avalsize,
2530 sizeof (valsize));
1c79356b 2531 }
91447636
A
2532out:
2533 file_drop(uap->s);
1c79356b
A
2534 return (error);
2535}
2536
2537
2538/*
2539 * Get socket name.
2d21ac55
A
2540 *
2541 * Returns: 0 Success
2542 * EBADF
2543 * file_socket:ENOTSOCK
2544 * file_socket:EBADF
2545 * copyin:EFAULT
2546 * copyout:EFAULT
2547 * <pru_sockaddr>:ENOBUFS[TCP]
2548 * <pru_sockaddr>:ECONNRESET[TCP]
2549 * <pru_sockaddr>:EINVAL[AF_UNIX]
2550 * <sf_getsockname>:???
1c79356b
A
2551 */
2552/* ARGSUSED */
2d21ac55
A
2553int
2554getsockname(__unused struct proc *p, struct getsockname_args *uap,
b0d623f7 2555 __unused int32_t *retval)
1c79356b 2556{
91447636 2557 struct socket *so;
1c79356b 2558 struct sockaddr *sa;
91447636 2559 socklen_t len;
2d21ac55 2560 socklen_t sa_len;
1c79356b
A
2561 int error;
2562
91447636 2563 error = file_socket(uap->fdes, &so);
1c79356b
A
2564 if (error)
2565 return (error);
2d21ac55 2566 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1c79356b 2567 if (error)
91447636
A
2568 goto out;
2569 if (so == NULL) {
2570 error = EBADF;
2571 goto out;
2572 }
1c79356b 2573 sa = 0;
91447636 2574 socket_lock(so, 1);
1c79356b 2575 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2d21ac55 2576 if (error == 0) {
6d2010ae 2577 error = sflt_getsockname(so, &sa);
91447636
A
2578 if (error == EJUSTRETURN)
2579 error = 0;
91447636
A
2580 }
2581 socket_unlock(so, 1);
1c79356b
A
2582 if (error)
2583 goto bad;
2584 if (sa == 0) {
2585 len = 0;
2586 goto gotnothing;
2587 }
2588
2d21ac55
A
2589 sa_len = sa->sa_len;
2590 len = MIN(len, sa_len);
91447636 2591 error = copyout((caddr_t)sa, uap->asa, len);
2d21ac55
A
2592 if (error)
2593 goto bad;
2594 /* return the actual, untruncated address length */
2595 len = sa_len;
1c79356b 2596gotnothing:
2d21ac55 2597 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
2598bad:
2599 if (sa)
2600 FREE(sa, M_SONAME);
91447636
A
2601out:
2602 file_drop(uap->fdes);
1c79356b
A
2603 return (error);
2604}
2605
1c79356b
A
2606/*
2607 * Get name of peer for connected socket.
2d21ac55
A
2608 *
2609 * Returns: 0 Success
2610 * EBADF
2611 * EINVAL
2612 * ENOTCONN
2613 * file_socket:ENOTSOCK
2614 * file_socket:EBADF
2615 * copyin:EFAULT
2616 * copyout:EFAULT
2617 * <pru_peeraddr>:???
2618 * <sf_getpeername>:???
1c79356b
A
2619 */
2620/* ARGSUSED */
2621int
2d21ac55 2622getpeername(__unused struct proc *p, struct getpeername_args *uap,
b0d623f7 2623 __unused int32_t *retval)
1c79356b 2624{
91447636 2625 struct socket *so;
1c79356b 2626 struct sockaddr *sa;
91447636 2627 socklen_t len;
2d21ac55 2628 socklen_t sa_len;
1c79356b
A
2629 int error;
2630
91447636 2631 error = file_socket(uap->fdes, &so);
1c79356b
A
2632 if (error)
2633 return (error);
91447636
A
2634 if (so == NULL) {
2635 error = EBADF;
2636 goto out;
2637 }
2638
2639 socket_lock(so, 1);
2640
2d21ac55
A
2641 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2642 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2643 /* the socket has been shutdown, no more getpeername's */
2644 socket_unlock(so, 1);
2645 error = EINVAL;
2646 goto out;
2647 }
2648
91447636
A
2649 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
2650 socket_unlock(so, 1);
2651 error = ENOTCONN;
2652 goto out;
2653 }
2d21ac55 2654 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
91447636
A
2655 if (error) {
2656 socket_unlock(so, 1);
2657 goto out;
2658 }
1c79356b
A
2659 sa = 0;
2660 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2d21ac55 2661 if (error == 0) {
6d2010ae 2662 error = sflt_getpeername(so, &sa);
91447636
A
2663 if (error == EJUSTRETURN)
2664 error = 0;
91447636
A
2665 }
2666 socket_unlock(so, 1);
1c79356b
A
2667 if (error)
2668 goto bad;
2669 if (sa == 0) {
2670 len = 0;
2671 goto gotnothing;
2672 }
2d21ac55
A
2673 sa_len = sa->sa_len;
2674 len = MIN(len, sa_len);
91447636 2675 error = copyout(sa, uap->asa, len);
1c79356b
A
2676 if (error)
2677 goto bad;
2d21ac55
A
2678 /* return the actual, untruncated address length */
2679 len = sa_len;
1c79356b 2680gotnothing:
2d21ac55 2681 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
2682bad:
2683 if (sa) FREE(sa, M_SONAME);
91447636
A
2684out:
2685 file_drop(uap->fdes);
1c79356b
A
2686 return (error);
2687}
2688
2689int
2d21ac55 2690sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1c79356b 2691{
2d21ac55
A
2692 struct sockaddr *sa;
2693 struct mbuf *m;
1c79356b
A
2694 int error;
2695
e2d2fc5c 2696 size_t alloc_buflen = (size_t)buflen;
39236c6e 2697
3e170ce0 2698 if (alloc_buflen > INT_MAX/2)
e2d2fc5c 2699 return (EINVAL);
b0d623f7 2700#ifdef __LP64__
3e170ce0
A
2701 /*
2702 * The fd's in the buffer must expand to be pointers, thus we need twice
2703 * as much space
2704 */
2705 if (type == MT_CONTROL)
2706 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) +
2707 sizeof(struct cmsghdr);
b0d623f7 2708#endif
e2d2fc5c
A
2709 if (alloc_buflen > MLEN) {
2710 if (type == MT_SONAME && alloc_buflen <= 112)
3e170ce0 2711 alloc_buflen = MLEN; /* unix domain compat. hack */
e2d2fc5c 2712 else if (alloc_buflen > MCLBYTES)
91447636 2713 return (EINVAL);
1c79356b
A
2714 }
2715 m = m_get(M_WAIT, type);
2716 if (m == NULL)
2717 return (ENOBUFS);
e2d2fc5c 2718 if (alloc_buflen > MLEN) {
91447636
A
2719 MCLGET(m, M_WAIT);
2720 if ((m->m_flags & M_EXT) == 0) {
2721 m_free(m);
2d21ac55 2722 return (ENOBUFS);
91447636
A
2723 }
2724 }
3e170ce0
A
2725 /*
2726 * K64: We still copyin the original buflen because it gets expanded
2727 * later and we lie about the size of the mbuf because it only affects
2728 * unp_* functions
b0d623f7 2729 */
1c79356b 2730 m->m_len = buflen;
91447636 2731 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2d21ac55 2732 if (error) {
1c79356b 2733 (void) m_free(m);
2d21ac55 2734 } else {
1c79356b
A
2735 *mp = m;
2736 if (type == MT_SONAME) {
2737 sa = mtod(m, struct sockaddr *);
1c79356b
A
2738 sa->sa_len = buflen;
2739 }
2740 }
2741 return (error);
2742}
2743
91447636
A
2744/*
2745 * Given a user_addr_t of length len, allocate and fill out a *sa.
2d21ac55
A
2746 *
2747 * Returns: 0 Success
2748 * ENAMETOOLONG Filename too long
2749 * EINVAL Invalid argument
2750 * ENOMEM Not enough space
2751 * copyin:EFAULT Bad address
91447636 2752 */
2d21ac55
A
2753static int
2754getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
4a3eedf9 2755 size_t len, boolean_t translate_unspec)
1c79356b
A
2756{
2757 struct sockaddr *sa;
2758 int error;
2759
2760 if (len > SOCK_MAXADDRLEN)
2d21ac55 2761 return (ENAMETOOLONG);
1c79356b 2762
2d21ac55
A
2763 if (len < offsetof(struct sockaddr, sa_data[0]))
2764 return (EINVAL);
1c79356b 2765
490019cf 2766 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
91447636 2767 if (sa == NULL) {
2d21ac55 2768 return (ENOMEM);
91447636
A
2769 }
2770 error = copyin(uaddr, (caddr_t)sa, len);
1c79356b
A
2771 if (error) {
2772 FREE(sa, M_SONAME);
2773 } else {
2d21ac55
A
2774 /*
2775 * Force sa_family to AF_INET on AF_INET sockets to handle
2776 * legacy applications that use AF_UNSPEC (0). On all other
2777 * sockets we leave it unchanged and let the lower layer
2778 * handle it.
2779 */
4a3eedf9 2780 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
39236c6e 2781 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2782 len == sizeof (struct sockaddr_in))
2783 sa->sa_family = AF_INET;
2784
1c79356b
A
2785 sa->sa_len = len;
2786 *namp = sa;
2787 }
2d21ac55 2788 return (error);
1c79356b
A
2789}
2790
2d21ac55
A
2791static int
2792getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
4a3eedf9 2793 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1c79356b 2794{
2d21ac55
A
2795 int error;
2796
2797 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2798 len < offsetof(struct sockaddr, sa_data[0]))
2799 return (EINVAL);
2800
2801 /*
2802 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2803 * so the check here is inclusive.
2804 */
2805 if (len > sizeof (*ss))
2806 return (ENAMETOOLONG);
1c79356b 2807
2d21ac55
A
2808 bzero(ss, sizeof (*ss));
2809 error = copyin(uaddr, (caddr_t)ss, len);
2810 if (error == 0) {
2811 /*
2812 * Force sa_family to AF_INET on AF_INET sockets to handle
2813 * legacy applications that use AF_UNSPEC (0). On all other
2814 * sockets we leave it unchanged and let the lower layer
2815 * handle it.
2816 */
4a3eedf9 2817 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
39236c6e 2818 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2819 len == sizeof (struct sockaddr_in))
2820 ss->ss_family = AF_INET;
91447636 2821
2d21ac55 2822 ss->ss_len = len;
1c79356b 2823 }
2d21ac55 2824 return (error);
1c79356b
A
2825}
2826
39236c6e
A
2827/*
2828 * Hard limit on the number of source and/or destination addresses
2829 * that can be specified by an application.
2830 */
2831#define SOCKADDRLIST_MAX_ENTRIES 64
2832
2833static int
2834getsockaddrlist(struct socket *so, struct sockaddr_list **slp,
2835 user_addr_t uaddr, socklen_t uaddrlen, boolean_t xlate_unspec)
2836{
2837 struct sockaddr_list *sl;
2838 int error = 0;
2d21ac55 2839
39236c6e
A
2840 *slp = NULL;
2841
490019cf
A
2842 if (uaddr == USER_ADDR_NULL || uaddrlen == 0 ||
2843 uaddrlen > (sizeof(struct sockaddr_in6) * SOCKADDRLIST_MAX_ENTRIES))
39236c6e
A
2844 return (EINVAL);
2845
2846 sl = sockaddrlist_alloc(M_WAITOK);
2847 if (sl == NULL)
2848 return (ENOMEM);
2849
2850 VERIFY(sl->sl_cnt == 0);
2851 while (uaddrlen > 0 && sl->sl_cnt < SOCKADDRLIST_MAX_ENTRIES) {
2852 struct sockaddr_storage ss;
2853 struct sockaddr_entry *se;
2854 struct sockaddr *sa;
2855
2856 if (uaddrlen < sizeof (struct sockaddr)) {
2857 error = EINVAL;
2858 break;
2859 }
2860
2861 bzero(&ss, sizeof (ss));
2862 error = copyin(uaddr, (caddr_t)&ss, sizeof (struct sockaddr));
2863 if (error != 0)
2864 break;
2865
2866 /* getsockaddr does the same but we need them now */
2867 if (uaddrlen < ss.ss_len ||
2868 ss.ss_len < offsetof(struct sockaddr, sa_data[0])) {
2869 error = EINVAL;
2870 break;
2871 } else if (ss.ss_len > sizeof (ss)) {
2872 /*
2873 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
490019cf 2874 * so the check here is inclusive. We could use the
39236c6e
A
2875 * latter instead, but seems like an overkill for now.
2876 */
2877 error = ENAMETOOLONG;
2878 break;
2879 }
2880
2881 se = sockaddrentry_alloc(M_WAITOK);
490019cf
A
2882 if (se == NULL) {
2883 error = ENOBUFS;
39236c6e 2884 break;
490019cf 2885 }
39236c6e
A
2886
2887 sockaddrlist_insert(sl, se);
2888
2889 error = getsockaddr(so, &sa, uaddr, ss.ss_len, xlate_unspec);
2890 if (error != 0)
2891 break;
2892
2893 VERIFY(sa != NULL && sa->sa_len == ss.ss_len);
2894 se->se_addr = sa;
2895
2896 uaddr += ss.ss_len;
2897 VERIFY(((signed)uaddrlen - ss.ss_len) >= 0);
2898 uaddrlen -= ss.ss_len;
2899 }
2900
2901 if (error != 0)
2902 sockaddrlist_free(sl);
2903 else
2904 *slp = sl;
2905
2906 return (error);
2907}
2908
fe8ab488
A
2909int
2910internalize_user_msghdr_array(const void *src, int spacetype, int direction,
3e170ce0 2911 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
fe8ab488
A
2912{
2913 int error = 0;
2914 u_int i;
3e170ce0
A
2915 u_int namecnt = 0;
2916 u_int ctlcnt = 0;
fe8ab488
A
2917
2918 for (i = 0; i < count; i++) {
2919 uio_t auio;
2920 struct user_iovec *iovp;
3e170ce0 2921 struct user_msghdr_x *user_msg = dst + i;
fe8ab488
A
2922
2923 if (spacetype == UIO_USERSPACE64) {
3e170ce0 2924 const struct user64_msghdr_x *msghdr64;
fe8ab488 2925
3e170ce0 2926 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
fe8ab488
A
2927
2928 user_msg->msg_name = msghdr64->msg_name;
2929 user_msg->msg_namelen = msghdr64->msg_namelen;
2930 user_msg->msg_iov = msghdr64->msg_iov;
2931 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2932 user_msg->msg_control = msghdr64->msg_control;
2933 user_msg->msg_controllen = msghdr64->msg_controllen;
2934 user_msg->msg_flags = msghdr64->msg_flags;
2935 user_msg->msg_datalen = msghdr64->msg_datalen;
2936 } else {
3e170ce0 2937 const struct user32_msghdr_x *msghdr32;
fe8ab488 2938
3e170ce0 2939 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
fe8ab488
A
2940
2941 user_msg->msg_name = msghdr32->msg_name;
2942 user_msg->msg_namelen = msghdr32->msg_namelen;
2943 user_msg->msg_iov = msghdr32->msg_iov;
2944 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2945 user_msg->msg_control = msghdr32->msg_control;
2946 user_msg->msg_controllen = msghdr32->msg_controllen;
2947 user_msg->msg_flags = msghdr32->msg_flags;
2948 user_msg->msg_datalen = msghdr32->msg_datalen;
2949 }
3e170ce0
A
2950
2951 if (user_msg->msg_iovlen <= 0 ||
2952 user_msg->msg_iovlen > UIO_MAXIOV) {
fe8ab488
A
2953 error = EMSGSIZE;
2954 goto done;
2955 }
3e170ce0
A
2956 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2957 direction);
fe8ab488
A
2958 if (auio == NULL) {
2959 error = ENOMEM;
2960 goto done;
2961 }
2962 uiop[i] = auio;
2963
3e170ce0
A
2964 iovp = uio_iovsaddr(auio);
2965 if (iovp == NULL) {
2966 error = ENOMEM;
2967 goto done;
2968 }
2969 error = copyin_user_iovec_array(user_msg->msg_iov,
2970 spacetype, user_msg->msg_iovlen, iovp);
2971 if (error)
2972 goto done;
2973 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
fe8ab488 2974
3e170ce0
A
2975 error = uio_calculateresid(auio);
2976 if (error)
2977 goto done;
2978 user_msg->msg_datalen = uio_resid(auio);
2979
2980 if (user_msg->msg_name && user_msg->msg_namelen)
2981 namecnt++;
2982 if (user_msg->msg_control && user_msg->msg_controllen)
2983 ctlcnt++;
2984 }
2985done:
2986
2987 return (error);
2988}
2989
2990int
2991internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2992 u_int count, struct user_msghdr_x *dst,
2993 struct recv_msg_elem *recv_msg_array)
2994{
2995 int error = 0;
2996 u_int i;
2997
2998 for (i = 0; i < count; i++) {
2999 struct user_iovec *iovp;
3000 struct user_msghdr_x *user_msg = dst + i;
3001 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3002
3003 if (spacetype == UIO_USERSPACE64) {
3004 const struct user64_msghdr_x *msghdr64;
3005
3006 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3007
3008 user_msg->msg_name = msghdr64->msg_name;
3009 user_msg->msg_namelen = msghdr64->msg_namelen;
3010 user_msg->msg_iov = msghdr64->msg_iov;
3011 user_msg->msg_iovlen = msghdr64->msg_iovlen;
3012 user_msg->msg_control = msghdr64->msg_control;
3013 user_msg->msg_controllen = msghdr64->msg_controllen;
3014 user_msg->msg_flags = msghdr64->msg_flags;
3015 user_msg->msg_datalen = msghdr64->msg_datalen;
fe8ab488 3016 } else {
3e170ce0
A
3017 const struct user32_msghdr_x *msghdr32;
3018
3019 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3020
3021 user_msg->msg_name = msghdr32->msg_name;
3022 user_msg->msg_namelen = msghdr32->msg_namelen;
3023 user_msg->msg_iov = msghdr32->msg_iov;
3024 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3025 user_msg->msg_control = msghdr32->msg_control;
3026 user_msg->msg_controllen = msghdr32->msg_controllen;
3027 user_msg->msg_flags = msghdr32->msg_flags;
3028 user_msg->msg_datalen = msghdr32->msg_datalen;
fe8ab488 3029 }
3e170ce0
A
3030
3031 if (user_msg->msg_iovlen <= 0 ||
3032 user_msg->msg_iovlen > UIO_MAXIOV) {
3033 error = EMSGSIZE;
3034 goto done;
3035 }
3036 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3037 spacetype, direction);
3038 if (recv_msg_elem->uio == NULL) {
3039 error = ENOMEM;
3040 goto done;
3041 }
3042
3043 iovp = uio_iovsaddr(recv_msg_elem->uio);
3044 if (iovp == NULL) {
3045 error = ENOMEM;
3046 goto done;
3047 }
3048 error = copyin_user_iovec_array(user_msg->msg_iov,
3049 spacetype, user_msg->msg_iovlen, iovp);
3050 if (error)
3051 goto done;
3052 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3053
3054 error = uio_calculateresid(recv_msg_elem->uio);
3055 if (error)
3056 goto done;
3057 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3058
3059 if (user_msg->msg_name && user_msg->msg_namelen)
3060 recv_msg_elem->which |= SOCK_MSG_SA;
3061 if (user_msg->msg_control && user_msg->msg_controllen)
3062 recv_msg_elem->which |= SOCK_MSG_CONTROL;
fe8ab488
A
3063 }
3064done:
3e170ce0 3065
fe8ab488
A
3066 return (error);
3067}
3068
3069u_int
3070externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3e170ce0 3071 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
fe8ab488
A
3072{
3073#pragma unused(direction)
3074 u_int i;
3075 int seenlast = 0;
3076 u_int retcnt = 0;
3077
3078 for (i = 0; i < count; i++) {
3e170ce0 3079 const struct user_msghdr_x *user_msg = src + i;
fe8ab488
A
3080 uio_t auio = uiop[i];
3081 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3082
3083 if (user_msg->msg_datalen != 0 && len == 0)
3084 seenlast = 1;
3e170ce0
A
3085
3086 if (seenlast == 0)
3087 retcnt ++;
3088
3089 if (spacetype == UIO_USERSPACE64) {
3090 struct user64_msghdr_x *msghdr64;
3091
3092 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3093
3094 msghdr64->msg_flags = user_msg->msg_flags;
3095 msghdr64->msg_datalen = len;
3096
3097 } else {
3098 struct user32_msghdr_x *msghdr32;
3099
3100 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3101
3102 msghdr32->msg_flags = user_msg->msg_flags;
3103 msghdr32->msg_datalen = len;
3104 }
3105 }
3106 return (retcnt);
3107}
3108
3109u_int
3110externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
3111 u_int count, const struct user_msghdr_x *src,
3112 struct recv_msg_elem *recv_msg_array)
3113{
3114 u_int i;
3115 int seenlast = 0;
3116 u_int retcnt = 0;
3117
3118 for (i = 0; i < count; i++) {
3119 const struct user_msghdr_x *user_msg = src + i;
3120 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3121 user_ssize_t len;
3122
3123 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3124
3125 if (direction == UIO_READ) {
3126 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0)
3127 seenlast = 1;
3128 } else {
3129 if (user_msg->msg_datalen != 0 && len == 0)
3130 seenlast = 1;
3131 }
3132
fe8ab488
A
3133 if (seenlast == 0)
3134 retcnt ++;
3135
3136 if (spacetype == UIO_USERSPACE64) {
3137 struct user64_msghdr_x *msghdr64;
3138
3139 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3140
3141 msghdr64->msg_flags = user_msg->msg_flags;
3142 msghdr64->msg_datalen = len;
3e170ce0 3143
fe8ab488
A
3144 } else {
3145 struct user32_msghdr_x *msghdr32;
3146
3147 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3148
3149 msghdr32->msg_flags = user_msg->msg_flags;
3150 msghdr32->msg_datalen = len;
3151 }
3152 }
3153 return (retcnt);
3154}
3155
3156void
3157free_uio_array(struct uio **uiop, u_int count)
3158{
3159 u_int i;
3160
3161 for (i = 0; i < count; i++) {
3162 if (uiop[i] != NULL)
3163 uio_free(uiop[i]);
3164 }
3165}
3166
3167__private_extern__ user_ssize_t
3168uio_array_resid(struct uio **uiop, u_int count)
3169{
3170 user_ssize_t len = 0;
3171 u_int i;
3172
3173 for (i = 0; i < count; i++) {
3174 struct uio *auio = uiop[i];
3175
3e170ce0 3176 if (auio != NULL)
fe8ab488
A
3177 len += uio_resid(auio);
3178 }
3179 return (len);
3180}
3181
3182int
3183uio_array_is_valid(struct uio **uiop, u_int count)
3184{
3185 user_ssize_t len = 0;
3186 u_int i;
3187
3188 for (i = 0; i < count; i++) {
3189 struct uio *auio = uiop[i];
3e170ce0 3190
fe8ab488
A
3191 if (auio != NULL) {
3192 user_ssize_t resid = uio_resid(auio);
3e170ce0 3193
fe8ab488
A
3194 /*
3195 * Sanity check on the validity of the iovec:
3196 * no point of going over sb_max
3197 */
3198 if (resid < 0 || (u_int32_t)resid > sb_max)
3199 return (0);
3e170ce0
A
3200
3201 len += resid;
3202 if (len < 0 || (u_int32_t)len > sb_max)
3203 return (0);
3204 }
3205 }
3206 return (1);
3207}
3208
3209
3210struct recv_msg_elem *
3211alloc_recv_msg_array(u_int count)
3212{
3213 struct recv_msg_elem *recv_msg_array;
3214
3215 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3216 M_TEMP, M_WAITOK | M_ZERO);
3217
3218 return (recv_msg_array);
3219}
3220
3221void
3222free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3223{
3224 u_int i;
3225
3226 for (i = 0; i < count; i++) {
3227 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3228
3229 if (recv_msg_elem->uio != NULL)
3230 uio_free(recv_msg_elem->uio);
3231 if (recv_msg_elem->psa != NULL)
3232 _FREE(recv_msg_elem->psa, M_TEMP);
3233 if (recv_msg_elem->controlp != NULL)
3234 m_freem(recv_msg_elem->controlp);
3235 }
3236 _FREE(recv_msg_array, M_TEMP);
3237}
3238
3239
3240__private_extern__ user_ssize_t
3241recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3242{
3243 user_ssize_t len = 0;
3244 u_int i;
3245
3246 for (i = 0; i < count; i++) {
3247 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3248
3249 if (recv_msg_elem->uio != NULL)
3250 len += uio_resid(recv_msg_elem->uio);
3251 }
3252 return (len);
3253}
3254
3255int
3256recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3257{
3258 user_ssize_t len = 0;
3259 u_int i;
3260
3261 for (i = 0; i < count; i++) {
3262 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3263
3264 if (recv_msg_elem->uio != NULL) {
3265 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3266
3267 /*
3268 * Sanity check on the validity of the iovec:
3269 * no point of going over sb_max
3270 */
3271 if (resid < 0 || (u_int32_t)resid > sb_max)
3272 return (0);
3273
fe8ab488
A
3274 len += resid;
3275 if (len < 0 || (u_int32_t)len > sb_max)
3276 return (0);
3277 }
3278 }
3279 return (1);
3280}
3281
39236c6e 3282#if SENDFILE
2d21ac55
A
3283
3284#define SFUIOBUFS 64
2d21ac55
A
3285
3286/* Macros to compute the number of mbufs needed depending on cluster size */
3e170ce0
A
3287#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3288#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
2d21ac55 3289
39236c6e 3290/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3e170ce0 3291#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
2d21ac55
A
3292
3293/* Upper send limit in the number of mbuf clusters */
3294#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3295#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3296
1c79356b 3297static void
2d21ac55
A
3298alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3299 struct mbuf **m, boolean_t jumbocl)
1c79356b 3300{
2d21ac55 3301 unsigned int needed;
1c79356b 3302
2d21ac55
A
3303 if (pktlen == 0)
3304 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
1c79356b 3305
2d21ac55
A
3306 /*
3307 * Try to allocate for the whole thing. Since we want full control
3308 * over the buffer size and be able to accept partial result, we can't
3309 * use mbuf_allocpacket(). The logic below is similar to sosend().
3310 */
3311 *m = NULL;
6d2010ae 3312 if (pktlen > MBIGCLBYTES && jumbocl) {
2d21ac55
A
3313 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3314 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3315 }
3316 if (*m == NULL) {
3317 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
6d2010ae 3318 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
2d21ac55
A
3319 }
3320
3321 /*
3322 * Our previous attempt(s) at allocation had failed; the system
3323 * may be short on mbufs, and we want to block until they are
3324 * available. This time, ask just for 1 mbuf and don't return
3325 * until we get it.
3326 */
3327 if (*m == NULL) {
3328 needed = 1;
6d2010ae 3329 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
1c79356b 3330 }
2d21ac55
A
3331 if (*m == NULL)
3332 panic("%s: blocking allocation returned NULL\n", __func__);
3333
3334 *maxchunks = needed;
1c79356b
A
3335}
3336
3337/*
3338 * sendfile(2).
2d21ac55
A
3339 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3340 * struct sf_hdtr *hdtr, int flags)
1c79356b
A
3341 *
3342 * Send a file specified by 'fd' and starting at 'offset' to a socket
2d21ac55
A
3343 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3344 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3345 * output. If specified, write the total number of bytes sent into *nbytes.
1c79356b
A
3346 */
3347int
2d21ac55 3348sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
1c79356b 3349{
91447636 3350 struct fileproc *fp;
1c79356b 3351 struct vnode *vp;
1c79356b 3352 struct socket *so;
2d21ac55
A
3353 struct writev_nocancel_args nuap;
3354 user_ssize_t writev_retval;
2d21ac55 3355 struct user_sf_hdtr user_hdtr;
b0d623f7
A
3356 struct user32_sf_hdtr user32_hdtr;
3357 struct user64_sf_hdtr user64_hdtr;
2d21ac55
A
3358 off_t off, xfsize;
3359 off_t nbytes = 0, sbytes = 0;
3360 int error = 0;
3361 size_t sizeof_hdtr;
2d21ac55
A
3362 off_t file_size;
3363 struct vfs_context context = *vfs_context_current();
3e170ce0 3364
2d21ac55
A
3365 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3366 0, 0, 0, 0);
b0d623f7
A
3367
3368 AUDIT_ARG(fd, uap->fd);
3369 AUDIT_ARG(value32, uap->s);
3370
1c79356b
A
3371 /*
3372 * Do argument checking. Must be a regular file in, stream
3373 * type and connected socket out, positive offset.
3374 */
2d21ac55 3375 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
1c79356b 3376 goto done;
2d21ac55
A
3377 }
3378 if ((fp->f_flag & FREAD) == 0) {
91447636
A
3379 error = EBADF;
3380 goto done1;
1c79356b 3381 }
2d21ac55
A
3382 if (vnode_isreg(vp) == 0) {
3383 error = ENOTSUP;
91447636 3384 goto done1;
1c79356b 3385 }
91447636 3386 error = file_socket(uap->s, &so);
2d21ac55 3387 if (error) {
91447636 3388 goto done1;
2d21ac55 3389 }
55e303ae
A
3390 if (so == NULL) {
3391 error = EBADF;
91447636 3392 goto done2;
55e303ae 3393 }
1c79356b
A
3394 if (so->so_type != SOCK_STREAM) {
3395 error = EINVAL;
2d21ac55 3396 goto done2;
1c79356b
A
3397 }
3398 if ((so->so_state & SS_ISCONNECTED) == 0) {
3399 error = ENOTCONN;
2d21ac55 3400 goto done2;
1c79356b
A
3401 }
3402 if (uap->offset < 0) {
3403 error = EINVAL;
2d21ac55 3404 goto done2;
1c79356b 3405 }
2d21ac55
A
3406 if (uap->nbytes == USER_ADDR_NULL) {
3407 error = EINVAL;
3408 goto done2;
3409 }
3410 if (uap->flags != 0) {
3411 error = EINVAL;
3412 goto done2;
3413 }
3414
3415 context.vc_ucred = fp->f_fglob->fg_cred;
3416
3417#if CONFIG_MACF_SOCKET_SUBSET
3418 /* JMM - fetch connected sockaddr? */
3419 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3420 if (error)
3421 goto done2;
3422#endif
3423
3424 /*
3425 * Get number of bytes to send
3426 * Should it applies to size of header and trailer?
3427 * JMM - error handling?
3428 */
3429 copyin(uap->nbytes, &nbytes, sizeof (off_t));
1c79356b
A
3430
3431 /*
3432 * If specified, get the pointer to the sf_hdtr struct for
3433 * any headers/trailers.
3434 */
2d21ac55
A
3435 if (uap->hdtr != USER_ADDR_NULL) {
3436 caddr_t hdtrp;
3437
3438 bzero(&user_hdtr, sizeof (user_hdtr));
3439 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
3440 hdtrp = (caddr_t)&user64_hdtr;
3441 sizeof_hdtr = sizeof (user64_hdtr);
2d21ac55 3442 } else {
b0d623f7
A
3443 hdtrp = (caddr_t)&user32_hdtr;
3444 sizeof_hdtr = sizeof (user32_hdtr);
2d21ac55
A
3445 }
3446 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
1c79356b 3447 if (error)
2d21ac55 3448 goto done2;
b0d623f7
A
3449 if (IS_64BIT_PROCESS(p)) {
3450 user_hdtr.headers = user64_hdtr.headers;
3451 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3452 user_hdtr.trailers = user64_hdtr.trailers;
3453 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3454 } else {
3455 user_hdtr.headers = user32_hdtr.headers;
3456 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3457 user_hdtr.trailers = user32_hdtr.trailers;
3458 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2d21ac55
A
3459 }
3460
1c79356b
A
3461 /*
3462 * Send any headers. Wimp out and use writev(2).
3463 */
2d21ac55
A
3464 if (user_hdtr.headers != USER_ADDR_NULL) {
3465 bzero(&nuap, sizeof (struct writev_args));
1c79356b 3466 nuap.fd = uap->s;
2d21ac55
A
3467 nuap.iovp = user_hdtr.headers;
3468 nuap.iovcnt = user_hdtr.hdr_cnt;
3469 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3470 if (error) {
2d21ac55 3471 goto done2;
316670eb 3472 }
2d21ac55 3473 sbytes += writev_retval;
1c79356b
A
3474 }
3475 }
3476
3477 /*
2d21ac55
A
3478 * Get the file size for 2 reasons:
3479 * 1. We don't want to allocate more mbufs than necessary
3480 * 2. We don't want to read past the end of file
1c79356b 3481 */
316670eb 3482 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
2d21ac55 3483 goto done2;
316670eb 3484 }
1c79356b
A
3485
3486 /*
2d21ac55
A
3487 * Simply read file data into a chain of mbufs that used with scatter
3488 * gather reads. We're not (yet?) setup to use zero copy external
3489 * mbufs that point to the file pages.
1c79356b 3490 */
2d21ac55 3491 socket_lock(so, 1);
39236c6e 3492 error = sblock(&so->so_snd, SBL_WAIT);
2d21ac55
A
3493 if (error) {
3494 socket_unlock(so, 1);
3495 goto done2;
3496 }
1c79356b 3497 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2d21ac55 3498 mbuf_t m0 = NULL, m;
39236c6e 3499 unsigned int nbufs = SFUIOBUFS, i;
2d21ac55 3500 uio_t auio;
39236c6e 3501 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
2d21ac55
A
3502 size_t uiolen;
3503 user_ssize_t rlen;
3504 off_t pgoff;
3505 size_t pktlen;
3506 boolean_t jumbocl;
1c79356b 3507
1c79356b 3508 /*
2d21ac55
A
3509 * Calculate the amount to transfer.
3510 * Align to round number of pages.
3511 * Not to exceed send socket buffer,
1c79356b
A
3512 * the EOF, or the passed in nbytes.
3513 */
2d21ac55
A
3514 xfsize = sbspace(&so->so_snd);
3515
3516 if (xfsize <= 0) {
3517 if (so->so_state & SS_CANTSENDMORE) {
3518 error = EPIPE;
3519 goto done3;
3520 } else if ((so->so_state & SS_NBIO)) {
3521 error = EAGAIN;
3522 goto done3;
3523 } else {
3524 xfsize = PAGE_SIZE;
3525 }
3526 }
3527
3528 if (xfsize > SENDFILE_MAX_BYTES)
3529 xfsize = SENDFILE_MAX_BYTES;
3530 else if (xfsize > PAGE_SIZE)
3531 xfsize = trunc_page(xfsize);
3532 pgoff = off & PAGE_MASK_64;
3533 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
1c79356b 3534 xfsize = PAGE_SIZE_64 - pgoff;
2d21ac55
A
3535 if (nbytes && xfsize > (nbytes - sbytes))
3536 xfsize = nbytes - sbytes;
3537 if (xfsize <= 0)
3538 break;
3539 if (off + xfsize > file_size)
3540 xfsize = file_size - off;
1c79356b
A
3541 if (xfsize <= 0)
3542 break;
2d21ac55 3543
1c79356b 3544 /*
2d21ac55
A
3545 * Attempt to use larger than system page-size clusters for
3546 * large writes only if there is a jumbo cluster pool and
3547 * if the socket is marked accordingly.
1c79356b 3548 */
2d21ac55
A
3549 jumbocl = sosendjcl && njcl > 0 &&
3550 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3551
3552 socket_unlock(so, 0);
3553 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
fe8ab488 3554 pktlen = mbuf_pkthdr_maxlen(m0);
b0d623f7 3555 if (pktlen < (size_t)xfsize)
2d21ac55 3556 xfsize = pktlen;
39236c6e 3557
2d21ac55
A
3558 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3559 UIO_READ, &uio_buf[0], sizeof (uio_buf));
3560 if (auio == NULL) {
316670eb
A
3561 printf("sendfile failed. nbufs = %d. %s", nbufs,
3562 "File a radar related to rdar://10146739.\n");
2d21ac55
A
3563 mbuf_freem(m0);
3564 error = ENXIO;
3565 socket_lock(so, 0);
3566 goto done3;
1c79356b 3567 }
1c79356b 3568
2d21ac55 3569 for (i = 0, m = m0, uiolen = 0;
b0d623f7 3570 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2d21ac55
A
3571 i++, m = mbuf_next(m)) {
3572 size_t mlen = mbuf_maxlen(m);
3573
b0d623f7 3574 if (mlen + uiolen > (size_t)xfsize)
2d21ac55
A
3575 mlen = xfsize - uiolen;
3576 mbuf_setlen(m, mlen);
3577 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3578 mlen);
3579 uiolen += mlen;
3580 }
3581
3582 if (xfsize != uio_resid(auio))
3583 printf("sendfile: xfsize: %lld != uio_resid(auio): "
6d2010ae 3584 "%lld\n", xfsize, (long long)uio_resid(auio));
2d21ac55
A
3585
3586 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3587 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3588 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3589 error = fo_read(fp, auio, FOF_OFFSET, &context);
3590 socket_lock(so, 0);
3591 if (error != 0) {
3592 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3593 error == EINTR || error == EWOULDBLOCK)) {
3594 error = 0;
3595 } else {
3596 mbuf_freem(m0);
3597 goto done3;
1c79356b 3598 }
1c79356b 3599 }
2d21ac55
A
3600 xfsize -= uio_resid(auio);
3601 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3602 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3603 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3604
3605 if (xfsize == 0) {
3e170ce0 3606 // printf("sendfile: fo_read 0 bytes, EOF\n");
2d21ac55 3607 break;
91447636 3608 }
2d21ac55
A
3609 if (xfsize + off > file_size)
3610 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3611 "%lld\n", xfsize, off, file_size);
3612 for (i = 0, m = m0, rlen = 0;
3613 i < nbufs && m != NULL && rlen < xfsize;
3614 i++, m = mbuf_next(m)) {
3615 size_t mlen = mbuf_maxlen(m);
3616
b0d623f7 3617 if (rlen + mlen > (size_t)xfsize)
2d21ac55
A
3618 mlen = xfsize - rlen;
3619 mbuf_setlen(m, mlen);
3620
3621 rlen += mlen;
3622 }
3623 mbuf_pkthdr_setlen(m0, xfsize);
3624
1c79356b
A
3625retry_space:
3626 /*
3627 * Make sure that the socket is still able to take more data.
3628 * CANTSENDMORE being true usually means that the connection
3629 * was closed. so_error is true when an error was sensed after
3630 * a previous send.
3631 * The state is checked after the page mapping and buffer
3632 * allocation above since those operations may block and make
3633 * any socket checks stale. From this point forward, nothing
3634 * blocks before the pru_send (or more accurately, any blocking
3635 * results in a loop back to here to re-check).
3636 */
3637 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3638 if (so->so_state & SS_CANTSENDMORE) {
3639 error = EPIPE;
3640 } else {
3641 error = so->so_error;
3642 so->so_error = 0;
3643 }
2d21ac55
A
3644 m_freem(m0);
3645 goto done3;
1c79356b
A
3646 }
3647 /*
3648 * Wait for socket space to become available. We do this just
3649 * after checking the connection state above in order to avoid
3650 * a race condition with sbwait().
3651 */
2d21ac55 3652 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
1c79356b 3653 if (so->so_state & SS_NBIO) {
2d21ac55 3654 m_freem(m0);
1c79356b 3655 error = EAGAIN;
2d21ac55 3656 goto done3;
1c79356b 3657 }
2d21ac55
A
3658 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3659 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
1c79356b 3660 error = sbwait(&so->so_snd);
2d21ac55
A
3661 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
3662 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
1c79356b
A
3663 /*
3664 * An error from sbwait usually indicates that we've
3665 * been interrupted by a signal. If we've sent anything
3666 * then return bytes sent, otherwise return the error.
3667 */
3668 if (error) {
2d21ac55
A
3669 m_freem(m0);
3670 goto done3;
1c79356b
A
3671 }
3672 goto retry_space;
3673 }
39236c6e 3674
6d2010ae 3675 struct mbuf *control = NULL;
2d21ac55
A
3676 {
3677 /*
3678 * Socket filter processing
3679 */
2d21ac55 3680
6d2010ae
A
3681 error = sflt_data_out(so, NULL, &m0, &control, 0);
3682 if (error) {
3683 if (error == EJUSTRETURN) {
3684 error = 0;
3685 continue;
2d21ac55 3686 }
6d2010ae 3687 goto done3;
2d21ac55
A
3688 }
3689 /*
3690 * End Socket filter processing
3691 */
3692 }
3693 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3694 uap->s, 0, 0, 0, 0);
3695 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
6d2010ae 3696 0, control, p);
2d21ac55
A
3697 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3698 uap->s, 0, 0, 0, 0);
1c79356b 3699 if (error) {
2d21ac55 3700 goto done3;
1c79356b
A
3701 }
3702 }
39236c6e 3703 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
1c79356b
A
3704 /*
3705 * Send trailers. Wimp out and use writev(2).
3706 */
2d21ac55
A
3707 if (uap->hdtr != USER_ADDR_NULL &&
3708 user_hdtr.trailers != USER_ADDR_NULL) {
3709 bzero(&nuap, sizeof (struct writev_args));
3710 nuap.fd = uap->s;
3711 nuap.iovp = user_hdtr.trailers;
3712 nuap.iovcnt = user_hdtr.trl_cnt;
3713 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3714 if (error) {
2d21ac55 3715 goto done2;
316670eb 3716 }
2d21ac55 3717 sbytes += writev_retval;
1c79356b 3718 }
91447636
A
3719done2:
3720 file_drop(uap->s);
3721done1:
3722 file_drop(uap->fd);
1c79356b 3723done:
2d21ac55 3724 if (uap->nbytes != USER_ADDR_NULL) {
91447636 3725 /* XXX this appears bogus for some early failure conditions */
2d21ac55 3726 copyout(&sbytes, uap->nbytes, sizeof (off_t));
1c79356b 3727 }
2d21ac55
A
3728 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3729 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3730 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
1c79356b 3731 return (error);
91447636 3732done3:
39236c6e 3733 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
91447636 3734 goto done2;
1c79356b
A
3735}
3736
2d21ac55
A
3737
3738#endif /* SENDFILE */