]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_syscalls.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
2d21ac55 33 * Copyright (c) 1998, David Greenman. All rights reserved.
1c79356b
A
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
2d21ac55
A
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
1c79356b
A
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
2d21ac55 77#include <sys/vnode_internal.h>
1c79356b 78#include <sys/malloc.h>
39236c6e 79#include <sys/mcache.h>
1c79356b 80#include <sys/mbuf.h>
fe8ab488 81#include <kern/locks.h>
91447636 82#include <sys/domain.h>
1c79356b 83#include <sys/protosw.h>
91447636 84#include <sys/signalvar.h>
1c79356b
A
85#include <sys/socket.h>
86#include <sys/socketvar.h>
1c79356b 87#include <sys/kernel.h>
91447636 88#include <sys/uio_internal.h>
2d21ac55 89#include <sys/kauth.h>
6d2010ae 90#include <kern/task.h>
39236c6e 91#include <sys/priv.h>
3e170ce0 92#include <sys/sysctl.h>
5c9f4661 93#include <sys/sys_domain.h>
e5568f75 94
b0d623f7 95#include <security/audit/audit.h>
1c79356b
A
96
97#include <sys/kdebug.h>
91447636 98#include <sys/sysproto.h>
2d21ac55
A
99#include <netinet/in.h>
100#include <net/route.h>
101#include <netinet/in_pcb.h>
102
cb323159
A
103#include <os/ptrtools.h>
104
2d21ac55
A
105#if CONFIG_MACF_SOCKET_SUBSET
106#include <security/mac_framework.h>
107#endif /* MAC_SOCKET_SUBSET */
108
f427ee49
A
109#define f_flag fp_glob->fg_flag
110#define f_ops fp_glob->fg_ops
111#define f_data fp_glob->fg_data
0a7de745
A
112
113#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
114#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
115#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
116#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
117#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
118#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
119#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
120#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
121#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
122#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
123#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
124#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
125#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
126#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
127#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
128#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
2d21ac55 129
3e170ce0 130#if DEBUG || DEVELOPMENT
0a7de745
A
131#define DEBUG_KERNEL_ADDRPERM(_v) (_v)
132#define DBG_PRINTF(...) printf(__VA_ARGS__)
3e170ce0 133#else
0a7de745
A
134#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
135#define DBG_PRINTF(...) do { } while (0)
3e170ce0 136#endif
2d21ac55 137
3e170ce0
A
138static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
139 int, int32_t *);
2d21ac55 140static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
b0d623f7 141 int32_t *);
39236c6e 142static int connectit(struct socket *, struct sockaddr *);
2d21ac55 143static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
4a3eedf9 144 size_t, boolean_t);
2d21ac55 145static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
4a3eedf9 146 user_addr_t, size_t, boolean_t);
1c79356b 147#if SENDFILE
2d21ac55
A
148static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
149 boolean_t);
150#endif /* SENDFILE */
39236c6e 151static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
813fb2f6
A
152static int connectitx(struct socket *, struct sockaddr *,
153 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
3e170ce0 154 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
39236c6e
A
155static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
156 int *);
157static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
1c79356b 158
fe8ab488 159static int internalize_user_msghdr_array(const void *, int, int, u_int,
3e170ce0 160 struct user_msghdr_x *, struct uio **);
fe8ab488 161static u_int externalize_user_msghdr_array(void *, int, int, u_int,
3e170ce0 162 const struct user_msghdr_x *, struct uio **);
fe8ab488
A
163
164static void free_uio_array(struct uio **, u_int);
f427ee49 165static boolean_t uio_array_is_valid(struct uio **, u_int);
3e170ce0
A
166static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
167static int internalize_recv_msghdr_array(const void *, int, int,
168 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
c3c9b80d
A
169static u_int externalize_recv_msghdr_array(struct proc *, struct socket *, void *, u_int,
170 struct user_msghdr_x *, struct recv_msg_elem *, int *);
3e170ce0
A
171static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
172static void free_recv_msg_array(struct recv_msg_elem *, u_int);
173
174SYSCTL_DECL(_kern_ipc);
175
176static u_int somaxsendmsgx = 100;
177SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
0a7de745 178 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
3e170ce0
A
179static u_int somaxrecvmsgx = 100;
180SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
0a7de745 181 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
fe8ab488 182
1c79356b
A
183/*
184 * System call interface to the socket abstraction.
185 */
1c79356b 186
39236c6e 187extern const struct fileops socketops;
1c79356b 188
2d21ac55
A
189/*
190 * Returns: 0 Success
191 * EACCES Mandatory Access Control failure
192 * falloc:ENFILE
193 * falloc:EMFILE
194 * falloc:ENOMEM
195 * socreate:EAFNOSUPPORT
196 * socreate:EPROTOTYPE
197 * socreate:EPROTONOSUPPORT
198 * socreate:ENOBUFS
199 * socreate:ENOMEM
2d21ac55
A
200 * socreate:??? [other protocol families, IPSEC]
201 */
1c79356b 202int
39236c6e 203socket(struct proc *p,
0a7de745
A
204 struct socket_args *uap,
205 int32_t *retval)
39236c6e 206{
0a7de745
A
207 return socket_common(p, uap->domain, uap->type, uap->protocol,
208 proc_selfpid(), retval, 0);
39236c6e
A
209}
210
211int
212socket_delegate(struct proc *p,
0a7de745
A
213 struct socket_delegate_args *uap,
214 int32_t *retval)
39236c6e
A
215{
216 return socket_common(p, uap->domain, uap->type, uap->protocol,
0a7de745 217 uap->epid, retval, 1);
39236c6e
A
218}
219
220static int
221socket_common(struct proc *p,
0a7de745
A
222 int domain,
223 int type,
224 int protocol,
225 pid_t epid,
226 int32_t *retval,
227 int delegate)
1c79356b 228{
1c79356b 229 struct socket *so;
91447636 230 struct fileproc *fp;
1c79356b
A
231 int fd, error;
232
39236c6e 233 AUDIT_ARG(socket, domain, type, protocol);
2d21ac55 234#if CONFIG_MACF_SOCKET_SUBSET
39236c6e 235 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
0a7de745
A
236 type, protocol)) != 0) {
237 return error;
238 }
2d21ac55 239#endif /* MAC_SOCKET_SUBSET */
1c79356b 240
39236c6e
A
241 if (delegate) {
242 error = priv_check_cred(kauth_cred_get(),
243 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
0a7de745
A
244 if (error) {
245 return EACCES;
246 }
39236c6e
A
247 }
248
2d21ac55 249 error = falloc(p, &fp, &fd, vfs_context_current());
91447636 250 if (error) {
0a7de745 251 return error;
91447636 252 }
0a7de745 253 fp->f_flag = FREAD | FWRITE;
1c79356b 254 fp->f_ops = &socketops;
91447636 255
0a7de745 256 if (delegate) {
39236c6e 257 error = socreate_delegate(domain, &so, type, protocol, epid);
0a7de745 258 } else {
39236c6e 259 error = socreate(domain, &so, type, protocol);
0a7de745 260 }
39236c6e 261
91447636
A
262 if (error) {
263 fp_free(p, fd, fp);
1c79356b
A
264 } else {
265 fp->f_data = (caddr_t)so;
91447636
A
266
267 proc_fdlock(p);
6601e61a 268 procfdtbl_releasefd(p, fd, NULL);
2d21ac55 269
91447636
A
270 fp_drop(p, fd, fp, 1);
271 proc_fdunlock(p);
272
1c79356b 273 *retval = fd;
3e170ce0
A
274 if (ENTR_SHOULDTRACE) {
275 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
276 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
277 }
1c79356b 278 }
0a7de745 279 return error;
1c79356b
A
280}
281
2d21ac55
A
282/*
283 * Returns: 0 Success
284 * EDESTADDRREQ Destination address required
285 * EBADF Bad file descriptor
286 * EACCES Mandatory Access Control failure
287 * file_socket:ENOTSOCK
288 * file_socket:EBADF
289 * getsockaddr:ENAMETOOLONG Filename too long
290 * getsockaddr:EINVAL Invalid argument
291 * getsockaddr:ENOMEM Not enough space
292 * getsockaddr:EFAULT Bad address
39236c6e 293 * sobindlock:???
2d21ac55 294 */
1c79356b
A
295/* ARGSUSED */
296int
b0d623f7 297bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
1c79356b 298{
2d21ac55
A
299 struct sockaddr_storage ss;
300 struct sockaddr *sa = NULL;
91447636 301 struct socket *so;
2d21ac55 302 boolean_t want_free = TRUE;
1c79356b
A
303 int error;
304
55e303ae 305 AUDIT_ARG(fd, uap->s);
91447636 306 error = file_socket(uap->s, &so);
0a7de745
A
307 if (error != 0) {
308 return error;
309 }
2d21ac55
A
310 if (so == NULL) {
311 error = EBADF;
312 goto out;
313 }
314 if (uap->name == USER_ADDR_NULL) {
315 error = EDESTADDRREQ;
316 goto out;
317 }
0a7de745 318 if (uap->namelen > sizeof(ss)) {
4a3eedf9 319 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
2d21ac55 320 } else {
4a3eedf9 321 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
2d21ac55
A
322 if (error == 0) {
323 sa = (struct sockaddr *)&ss;
324 want_free = FALSE;
325 }
326 }
0a7de745 327 if (error != 0) {
91447636 328 goto out;
0a7de745 329 }
2d21ac55
A
330 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
331#if CONFIG_MACF_SOCKET_SUBSET
5c9f4661 332 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
0a7de745
A
333 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
334 error = sobindlock(so, sa, 1); /* will lock socket */
335 }
2d21ac55 336#else
0a7de745 337 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55 338#endif /* MAC_SOCKET_SUBSET */
0a7de745 339 if (want_free) {
2d21ac55 340 FREE(sa, M_SONAME);
0a7de745 341 }
91447636
A
342out:
343 file_drop(uap->s);
0a7de745 344 return error;
1c79356b
A
345}
346
2d21ac55
A
347/*
348 * Returns: 0 Success
349 * EBADF
350 * EACCES Mandatory Access Control failure
351 * file_socket:ENOTSOCK
352 * file_socket:EBADF
353 * solisten:EINVAL
354 * solisten:EOPNOTSUPP
355 * solisten:???
356 */
1c79356b 357int
2d21ac55 358listen(__unused struct proc *p, struct listen_args *uap,
b0d623f7 359 __unused int32_t *retval)
1c79356b 360{
1c79356b 361 int error;
2d21ac55 362 struct socket *so;
1c79356b 363
55e303ae 364 AUDIT_ARG(fd, uap->s);
91447636 365 error = file_socket(uap->s, &so);
0a7de745
A
366 if (error) {
367 return error;
368 }
91447636 369 if (so != NULL)
2d21ac55
A
370#if CONFIG_MACF_SOCKET_SUBSET
371 {
372 error = mac_socket_check_listen(kauth_cred_get(), so);
0a7de745 373 if (error == 0) {
2d21ac55 374 error = solisten(so, uap->backlog);
0a7de745 375 }
2d21ac55
A
376 }
377#else
0a7de745 378 { error = solisten(so, uap->backlog);}
2d21ac55 379#endif /* MAC_SOCKET_SUBSET */
0a7de745 380 else {
91447636 381 error = EBADF;
0a7de745 382 }
2d21ac55 383
91447636 384 file_drop(uap->s);
0a7de745 385 return error;
1c79356b
A
386}
387
2d21ac55 388/*
f427ee49
A
389 * Returns: fp_get_ftype:EBADF Bad file descriptor
390 * fp_get_ftype:ENOTSOCK Socket operation on non-socket
2d21ac55
A
391 * :EFAULT Bad address on copyin/copyout
392 * :EBADF Bad file descriptor
393 * :EOPNOTSUPP Operation not supported on socket
394 * :EINVAL Invalid argument
395 * :EWOULDBLOCK Operation would block
396 * :ECONNABORTED Connection aborted
397 * :EINTR Interrupted function
398 * :EACCES Mandatory Access Control failure
f427ee49
A
399 * falloc:ENFILE Too many files open in system
400 * falloc:EMFILE Too many open files
401 * falloc:ENOMEM Not enough space
2d21ac55
A
402 * 0 Success
403 */
1c79356b 404int
2d21ac55 405accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
b0d623f7 406 int32_t *retval)
1c79356b 407{
91447636 408 struct fileproc *fp;
2d21ac55 409 struct sockaddr *sa = NULL;
91447636
A
410 socklen_t namelen;
411 int error;
412 struct socket *head, *so = NULL;
413 lck_mtx_t *mutex_held;
414 int fd = uap->s;
2d21ac55 415 int newfd;
f427ee49 416 unsigned int fflag;
91447636 417 int dosocklock = 0;
1c79356b 418
2d21ac55
A
419 *retval = -1;
420
55e303ae 421 AUDIT_ARG(fd, uap->s);
2d21ac55 422
1c79356b 423 if (uap->name) {
91447636 424 error = copyin(uap->anamelen, (caddr_t)&namelen,
0a7de745
A
425 sizeof(socklen_t));
426 if (error) {
427 return error;
428 }
1c79356b 429 }
f427ee49 430 error = fp_get_ftype(p, fd, DTYPE_SOCKET, ENOTSOCK, &fp);
91447636 431 if (error) {
0a7de745 432 return error;
91447636 433 }
f427ee49
A
434 head = fp->f_data;
435
2d21ac55 436#if CONFIG_MACF_SOCKET_SUBSET
0a7de745 437 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
2d21ac55 438 goto out;
0a7de745 439 }
2d21ac55 440#endif /* MAC_SOCKET_SUBSET */
91447636
A
441
442 socket_lock(head, 1);
443
0a7de745 444 if (head->so_proto->pr_getlock != NULL) {
5ba3f43e 445 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
91447636 446 dosocklock = 1;
2d21ac55 447 } else {
91447636
A
448 mutex_held = head->so_proto->pr_domain->dom_mtx;
449 dosocklock = 0;
450 }
451
1c79356b 452 if ((head->so_options & SO_ACCEPTCONN) == 0) {
2d21ac55
A
453 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
454 error = EOPNOTSUPP;
455 } else {
456 /* POSIX: The socket is not accepting connections */
457 error = EINVAL;
458 }
91447636 459 socket_unlock(head, 1);
91447636 460 goto out;
1c79356b 461 }
813fb2f6 462check_again:
1c79356b 463 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
91447636
A
464 socket_unlock(head, 1);
465 error = EWOULDBLOCK;
466 goto out;
1c79356b 467 }
2d21ac55 468 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
1c79356b
A
469 if (head->so_state & SS_CANTRCVMORE) {
470 head->so_error = ECONNABORTED;
471 break;
472 }
0a7de745 473 if (head->so_usecount < 1) {
2d21ac55
A
474 panic("accept: head=%p refcount=%d\n", head,
475 head->so_usecount);
0a7de745 476 }
2d21ac55
A
477 error = msleep((caddr_t)&head->so_timeo, mutex_held,
478 PSOCK | PCATCH, "accept", 0);
0a7de745 479 if (head->so_usecount < 1) {
2d21ac55
A
480 panic("accept: 2 head=%p refcount=%d\n", head,
481 head->so_usecount);
0a7de745 482 }
91447636
A
483 if ((head->so_state & SS_DRAINING)) {
484 error = ECONNABORTED;
485 }
1c79356b 486 if (error) {
91447636
A
487 socket_unlock(head, 1);
488 goto out;
1c79356b
A
489 }
490 }
491 if (head->so_error) {
492 error = head->so_error;
493 head->so_error = 0;
91447636
A
494 socket_unlock(head, 1);
495 goto out;
1c79356b
A
496 }
497
1c79356b
A
498 /*
499 * At this point we know that there is at least one connection
500 * ready to be accepted. Remove it from the queue prior to
501 * allocating the file descriptor for it since falloc() may
502 * block allowing another process to accept the connection
503 * instead.
504 */
91447636 505 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
813fb2f6
A
506
507 so_acquire_accept_list(head, NULL);
508 if (TAILQ_EMPTY(&head->so_comp)) {
509 so_release_accept_list(head);
510 goto check_again;
511 }
512
e3027f41 513 so = TAILQ_FIRST(&head->so_comp);
1c79356b 514 TAILQ_REMOVE(&head->so_comp, so, so_list);
d190cdc3
A
515 so->so_head = NULL;
516 so->so_state &= ~SS_COMP;
1c79356b 517 head->so_qlen--;
813fb2f6
A
518 so_release_accept_list(head);
519
2d21ac55
A
520 /* unlock head to avoid deadlock with select, keep a ref on head */
521 socket_unlock(head, 0);
522
523#if CONFIG_MACF_SOCKET_SUBSET
524 /*
525 * Pass the pre-accepted socket to the MAC framework. This is
526 * cheaper than allocating a file descriptor for the socket,
527 * calling the protocol accept callback, and possibly freeing
528 * the file descriptor should the MAC check fails.
529 */
530 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
39236c6e 531 socket_lock(so, 1);
d190cdc3 532 so->so_state &= ~SS_NOFDREF;
39236c6e 533 socket_unlock(so, 1);
2d21ac55
A
534 soclose(so);
535 /* Drop reference on listening socket */
536 sodereference(head);
537 goto out;
538 }
539#endif /* MAC_SOCKET_SUBSET */
540
541 /*
542 * Pass the pre-accepted socket to any interested socket filter(s).
543 * Upon failure, the socket would have been closed by the callee.
544 */
d190cdc3 545 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
2d21ac55
A
546 /* Drop reference on listening socket */
547 sodereference(head);
548 /* Propagate socket filter's error code to the caller */
549 goto out;
550 }
551
1c79356b 552 fflag = fp->f_flag;
2d21ac55 553 error = falloc(p, &fp, &newfd, vfs_context_current());
1c79356b 554 if (error) {
39236c6e 555 /*
316670eb
A
556 * Probably ran out of file descriptors.
557 *
558 * <rdar://problem/8554930>
559 * Don't put this back on the socket like we used to, that
560 * just causes the client to spin. Drop the socket.
1c79356b 561 */
39236c6e 562 socket_lock(so, 1);
d190cdc3 563 so->so_state &= ~SS_NOFDREF;
39236c6e 564 socket_unlock(so, 1);
316670eb
A
565 soclose(so);
566 sodereference(head);
91447636 567 goto out;
2d21ac55 568 }
91447636 569 *retval = newfd;
1c79356b
A
570 fp->f_flag = fflag;
571 fp->f_ops = &socketops;
572 fp->f_data = (caddr_t)so;
fe8ab488 573
91447636 574 socket_lock(head, 0);
0a7de745 575 if (dosocklock) {
91447636 576 socket_lock(so, 1);
0a7de745 577 }
fe8ab488 578
fe8ab488
A
579 /* Sync socket non-blocking/async state with file flags */
580 if (fp->f_flag & FNONBLOCK) {
581 so->so_state |= SS_NBIO;
582 } else {
583 so->so_state &= ~SS_NBIO;
584 }
585
586 if (fp->f_flag & FASYNC) {
587 so->so_state |= SS_ASYNC;
588 so->so_rcv.sb_flags |= SB_ASYNC;
589 so->so_snd.sb_flags |= SB_ASYNC;
590 } else {
591 so->so_state &= ~SS_ASYNC;
592 so->so_rcv.sb_flags &= ~SB_ASYNC;
593 so->so_snd.sb_flags &= ~SB_ASYNC;
594 }
595
91447636
A
596 (void) soacceptlock(so, &sa, 0);
597 socket_unlock(head, 1);
2d21ac55 598 if (sa == NULL) {
1c79356b 599 namelen = 0;
0a7de745 600 if (uap->name) {
1c79356b 601 goto gotnoname;
0a7de745 602 }
91447636 603 error = 0;
2d21ac55 604 goto releasefd;
1c79356b 605 }
2d21ac55
A
606 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
607
1c79356b 608 if (uap->name) {
0a7de745 609 socklen_t sa_len;
2d21ac55
A
610
611 /* save sa_len before it is destroyed */
612 sa_len = sa->sa_len;
613 namelen = MIN(namelen, sa_len);
91447636 614 error = copyout(sa, uap->name, namelen);
0a7de745 615 if (!error) {
2d21ac55
A
616 /* return the actual, untruncated address length */
617 namelen = sa_len;
0a7de745 618 }
1c79356b 619gotnoname:
2d21ac55 620 error = copyout((caddr_t)&namelen, uap->anamelen,
0a7de745 621 sizeof(socklen_t));
1c79356b
A
622 }
623 FREE(sa, M_SONAME);
2d21ac55 624
b0d623f7 625releasefd:
2d21ac55 626 /*
6d2010ae
A
627 * If the socket has been marked as inactive by sosetdefunct(),
628 * disallow further operations on it.
2d21ac55
A
629 */
630 if (so->so_flags & SOF_DEFUNCT) {
6d2010ae
A
631 sodefunct(current_proc(), so,
632 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
2d21ac55
A
633 }
634
0a7de745 635 if (dosocklock) {
91447636 636 socket_unlock(so, 1);
0a7de745 637 }
2d21ac55 638
2d21ac55
A
639 proc_fdlock(p);
640 procfdtbl_releasefd(p, newfd, NULL);
641 fp_drop(p, newfd, fp, 1);
642 proc_fdunlock(p);
643
91447636
A
644out:
645 file_drop(fd);
3e170ce0
A
646
647 if (error == 0 && ENTR_SHOULDTRACE) {
648 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
649 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
650 }
0a7de745 651 return error;
1c79356b
A
652}
653
654int
b0d623f7 655accept(struct proc *p, struct accept_args *uap, int32_t *retval)
1c79356b 656{
2d21ac55 657 __pthread_testcancel(1);
0a7de745
A
658 return accept_nocancel(p, (struct accept_nocancel_args *)uap,
659 retval);
1c79356b
A
660}
661
2d21ac55
A
662/*
663 * Returns: 0 Success
664 * EBADF Bad file descriptor
665 * EALREADY Connection already in progress
666 * EINPROGRESS Operation in progress
667 * ECONNABORTED Connection aborted
668 * EINTR Interrupted function
669 * EACCES Mandatory Access Control failure
670 * file_socket:ENOTSOCK
671 * file_socket:EBADF
672 * getsockaddr:ENAMETOOLONG Filename too long
673 * getsockaddr:EINVAL Invalid argument
674 * getsockaddr:ENOMEM Not enough space
675 * getsockaddr:EFAULT Bad address
676 * soconnectlock:EOPNOTSUPP
677 * soconnectlock:EISCONN
678 * soconnectlock:??? [depends on protocol, filters]
679 * msleep:EINTR
680 *
681 * Imputed: so_error error may be set from so_error, which
682 * may have been set by soconnectlock.
683 */
684/* ARGSUSED */
1c79356b 685int
b0d623f7 686connect(struct proc *p, struct connect_args *uap, int32_t *retval)
1c79356b 687{
2d21ac55 688 __pthread_testcancel(1);
0a7de745
A
689 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
690 retval);
1c79356b 691}
1c79356b 692
1c79356b 693int
39236c6e 694connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
1c79356b 695{
39236c6e 696#pragma unused(p, retval)
91447636 697 struct socket *so;
2d21ac55
A
698 struct sockaddr_storage ss;
699 struct sockaddr *sa = NULL;
91447636
A
700 int error;
701 int fd = uap->s;
4a3eedf9 702 boolean_t dgram;
1c79356b 703
55e303ae 704 AUDIT_ARG(fd, uap->s);
2d21ac55 705 error = file_socket(fd, &so);
0a7de745
A
706 if (error != 0) {
707 return error;
708 }
91447636
A
709 if (so == NULL) {
710 error = EBADF;
711 goto out;
712 }
713
4a3eedf9
A
714 /*
715 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
716 * if this is a datagram socket; translate for other types.
717 */
718 dgram = (so->so_type == SOCK_DGRAM);
719
2d21ac55 720 /* Get socket address now before we obtain socket lock */
0a7de745 721 if (uap->namelen > sizeof(ss)) {
4a3eedf9 722 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
2d21ac55 723 } else {
4a3eedf9 724 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
0a7de745 725 if (error == 0) {
2d21ac55 726 sa = (struct sockaddr *)&ss;
0a7de745 727 }
2d21ac55 728 }
0a7de745 729 if (error != 0) {
2d21ac55 730 goto out;
0a7de745 731 }
2d21ac55 732
39236c6e
A
733 error = connectit(so, sa);
734
0a7de745 735 if (sa != NULL && sa != SA(&ss)) {
39236c6e 736 FREE(sa, M_SONAME);
0a7de745
A
737 }
738 if (error == ERESTART) {
39236c6e 739 error = EINTR;
0a7de745 740 }
39236c6e
A
741out:
742 file_drop(fd);
0a7de745 743 return error;
39236c6e
A
744}
745
746static int
747connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
748{
749#pragma unused(p, retval)
813fb2f6
A
750 struct sockaddr_storage ss, sd;
751 struct sockaddr *src = NULL, *dst = NULL;
39236c6e 752 struct socket *so;
3e170ce0 753 int error, error1, fd = uap->socket;
39236c6e 754 boolean_t dgram;
3e170ce0
A
755 sae_connid_t cid = SAE_CONNID_ANY;
756 struct user32_sa_endpoints ep32;
757 struct user64_sa_endpoints ep64;
758 struct user_sa_endpoints ep;
759 user_ssize_t bytes_written = 0;
760 struct user_iovec *iovp;
761 uio_t auio = NULL;
39236c6e 762
3e170ce0 763 AUDIT_ARG(fd, uap->socket);
39236c6e 764 error = file_socket(fd, &so);
0a7de745
A
765 if (error != 0) {
766 return error;
767 }
39236c6e
A
768 if (so == NULL) {
769 error = EBADF;
770 goto out;
771 }
772
3e170ce0
A
773 if (uap->endpoints == USER_ADDR_NULL) {
774 error = EINVAL;
775 goto out;
776 }
777
778 if (IS_64BIT_PROCESS(p)) {
779 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
0a7de745 780 if (error != 0) {
3e170ce0 781 goto out;
0a7de745 782 }
3e170ce0
A
783
784 ep.sae_srcif = ep64.sae_srcif;
f427ee49 785 ep.sae_srcaddr = (user_addr_t)ep64.sae_srcaddr;
3e170ce0 786 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
f427ee49 787 ep.sae_dstaddr = (user_addr_t)ep64.sae_dstaddr;
3e170ce0
A
788 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
789 } else {
790 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
0a7de745 791 if (error != 0) {
3e170ce0 792 goto out;
0a7de745 793 }
3e170ce0
A
794
795 ep.sae_srcif = ep32.sae_srcif;
796 ep.sae_srcaddr = ep32.sae_srcaddr;
797 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
798 ep.sae_dstaddr = ep32.sae_dstaddr;
799 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
800 }
fe8ab488 801
39236c6e
A
802 /*
803 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
804 * if this is a datagram socket; translate for other types.
805 */
806 dgram = (so->so_type == SOCK_DGRAM);
807
813fb2f6
A
808 /* Get socket address now before we obtain socket lock */
809 if (ep.sae_srcaddr != USER_ADDR_NULL) {
0a7de745 810 if (ep.sae_srcaddrlen > sizeof(ss)) {
813fb2f6
A
811 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
812 } else {
813 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
0a7de745 814 if (error == 0) {
813fb2f6 815 src = (struct sockaddr *)&ss;
0a7de745 816 }
813fb2f6
A
817 }
818
0a7de745 819 if (error) {
813fb2f6 820 goto out;
0a7de745 821 }
813fb2f6 822 }
39236c6e 823
3e170ce0
A
824 if (ep.sae_dstaddr == USER_ADDR_NULL) {
825 error = EINVAL;
826 goto out;
827 }
828
813fb2f6 829 /* Get socket address now before we obtain socket lock */
0a7de745 830 if (ep.sae_dstaddrlen > sizeof(sd)) {
813fb2f6
A
831 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
832 } else {
833 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
0a7de745 834 if (error == 0) {
813fb2f6 835 dst = (struct sockaddr *)&sd;
0a7de745 836 }
813fb2f6
A
837 }
838
0a7de745 839 if (error) {
39236c6e 840 goto out;
0a7de745 841 }
39236c6e 842
813fb2f6 843 VERIFY(dst != NULL);
39236c6e 844
3e170ce0
A
845 if (uap->iov != USER_ADDR_NULL) {
846 /* Verify range before calling uio_create() */
0a7de745 847 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
cc8bc92a
A
848 error = EINVAL;
849 goto out;
850 }
3e170ce0 851
0a7de745 852 if (uap->len == USER_ADDR_NULL) {
cc8bc92a
A
853 error = EINVAL;
854 goto out;
855 }
3e170ce0
A
856
857 /* allocate a uio to hold the number of iovecs passed */
858 auio = uio_create(uap->iovcnt, 0,
859 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
860 UIO_WRITE);
861
862 if (auio == NULL) {
863 error = ENOMEM;
864 goto out;
865 }
866
867 /*
868 * get location of iovecs within the uio.
869 * then copyin the iovecs from user space.
870 */
871 iovp = uio_iovsaddr(auio);
872 if (iovp == NULL) {
873 error = ENOMEM;
874 goto out;
875 }
876 error = copyin_user_iovec_array(uap->iov,
0a7de745
A
877 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
878 uap->iovcnt, iovp);
879 if (error != 0) {
3e170ce0 880 goto out;
0a7de745 881 }
3e170ce0
A
882
883 /* finish setup of uio_t */
884 error = uio_calculateresid(auio);
885 if (error != 0) {
886 goto out;
887 }
888 }
889
813fb2f6 890 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
3e170ce0 891 &cid, auio, uap->flags, &bytes_written);
0a7de745 892 if (error == ERESTART) {
39236c6e 893 error = EINTR;
0a7de745 894 }
39236c6e 895
3e170ce0 896 if (uap->len != USER_ADDR_NULL) {
0a7de745 897 error1 = copyout(&bytes_written, uap->len, sizeof(uap->len));
3e170ce0 898 /* give precedence to connectitx errors */
0a7de745 899 if ((error1 != 0) && (error == 0)) {
3e170ce0 900 error = error1;
0a7de745 901 }
3e170ce0 902 }
39236c6e 903
3e170ce0 904 if (uap->connid != USER_ADDR_NULL) {
0a7de745 905 error1 = copyout(&cid, uap->connid, sizeof(cid));
3e170ce0 906 /* give precedence to connectitx errors */
0a7de745 907 if ((error1 != 0) && (error == 0)) {
3e170ce0 908 error = error1;
0a7de745 909 }
3e170ce0 910 }
39236c6e
A
911out:
912 file_drop(fd);
3e170ce0
A
913 if (auio != NULL) {
914 uio_free(auio);
915 }
0a7de745 916 if (src != NULL && src != SA(&ss)) {
813fb2f6 917 FREE(src, M_SONAME);
0a7de745
A
918 }
919 if (dst != NULL && dst != SA(&sd)) {
813fb2f6 920 FREE(dst, M_SONAME);
0a7de745
A
921 }
922 return error;
39236c6e
A
923}
924
925int
926connectx(struct proc *p, struct connectx_args *uap, int *retval)
927{
928 /*
929 * Due to similiarity with a POSIX interface, define as
930 * an unofficial cancellation point.
931 */
932 __pthread_testcancel(1);
0a7de745 933 return connectx_nocancel(p, uap, retval);
39236c6e
A
934}
935
936static int
937connectit(struct socket *so, struct sockaddr *sa)
938{
939 int error;
940
2d21ac55
A
941 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
942#if CONFIG_MACF_SOCKET_SUBSET
0a7de745
A
943 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
944 return error;
945 }
39236c6e
A
946#endif /* MAC_SOCKET_SUBSET */
947
948 socket_lock(so, 1);
949 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
950 error = EALREADY;
951 goto out;
952 }
953 error = soconnectlock(so, sa, 0);
954 if (error != 0) {
2d21ac55
A
955 goto out;
956 }
39236c6e
A
957 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
958 error = EINPROGRESS;
959 goto out;
960 }
961 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
962 lck_mtx_t *mutex_held;
963
0a7de745 964 if (so->so_proto->pr_getlock != NULL) {
5ba3f43e 965 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
0a7de745 966 } else {
39236c6e 967 mutex_held = so->so_proto->pr_domain->dom_mtx;
0a7de745 968 }
39236c6e
A
969 error = msleep((caddr_t)&so->so_timeo, mutex_held,
970 PSOCK | PCATCH, __func__, 0);
971 if (so->so_state & SS_DRAINING) {
972 error = ECONNABORTED;
973 }
0a7de745 974 if (error != 0) {
39236c6e 975 break;
0a7de745 976 }
39236c6e
A
977 }
978 if (error == 0) {
979 error = so->so_error;
980 so->so_error = 0;
981 }
982out:
983 socket_unlock(so, 1);
0a7de745 984 return error;
39236c6e
A
985}
986
987static int
813fb2f6
A
988connectitx(struct socket *so, struct sockaddr *src,
989 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3e170ce0
A
990 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
991 user_ssize_t *bytes_written)
39236c6e 992{
39236c6e
A
993 int error;
994
813fb2f6 995 VERIFY(dst != NULL);
39236c6e 996
813fb2f6 997 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
39236c6e 998#if CONFIG_MACF_SOCKET_SUBSET
0a7de745
A
999 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1000 return error;
1001 }
cb323159
A
1002
1003 if (auio != NULL) {
1004 if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1005 return error;
1006 }
1007 }
2d21ac55 1008#endif /* MAC_SOCKET_SUBSET */
91447636 1009
39236c6e 1010 socket_lock(so, 1);
91447636 1011 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
1012 error = EALREADY;
1013 goto out;
1014 }
3e170ce0 1015
813fb2f6 1016 error = soconnectxlocked(so, src, dst, p, ifscope,
bca245ac 1017 aid, pcid, flags, NULL, 0, auio, bytes_written);
39236c6e 1018 if (error != 0) {
39236c6e
A
1019 goto out;
1020 }
3e170ce0
A
1021 /*
1022 * If, after the call to soconnectxlocked the flag is still set (in case
1023 * data has been queued and the connect() has actually been triggered,
1024 * it will have been unset by the transport), we exit immediately. There
1025 * is no reason to wait on any event.
1026 */
1027 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1028 error = 0;
1029 goto out;
1030 }
1c79356b 1031 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
1032 error = EINPROGRESS;
1033 goto out;
1c79356b 1034 }
1c79356b 1035 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
39236c6e
A
1036 lck_mtx_t *mutex_held;
1037
0a7de745 1038 if (so->so_proto->pr_getlock != NULL) {
5ba3f43e 1039 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
0a7de745 1040 } else {
91447636 1041 mutex_held = so->so_proto->pr_domain->dom_mtx;
0a7de745 1042 }
2d21ac55 1043 error = msleep((caddr_t)&so->so_timeo, mutex_held,
39236c6e
A
1044 PSOCK | PCATCH, __func__, 0);
1045 if (so->so_state & SS_DRAINING) {
91447636
A
1046 error = ECONNABORTED;
1047 }
0a7de745 1048 if (error != 0) {
1c79356b 1049 break;
0a7de745 1050 }
1c79356b
A
1051 }
1052 if (error == 0) {
1053 error = so->so_error;
1054 so->so_error = 0;
1055 }
39236c6e 1056out:
91447636 1057 socket_unlock(so, 1);
0a7de745 1058 return error;
39236c6e
A
1059}
1060
1061int
1062peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1063{
5ba3f43e 1064#pragma unused(p, uap, retval)
39236c6e
A
1065 /*
1066 * Due to similiarity with a POSIX interface, define as
1067 * an unofficial cancellation point.
1068 */
1069 __pthread_testcancel(1);
0a7de745 1070 return 0;
39236c6e
A
1071}
1072
1073int
1074disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1075{
1076 /*
1077 * Due to similiarity with a POSIX interface, define as
1078 * an unofficial cancellation point.
1079 */
1080 __pthread_testcancel(1);
0a7de745 1081 return disconnectx_nocancel(p, uap, retval);
39236c6e
A
1082}
1083
1084static int
1085disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1086{
1087#pragma unused(p, retval)
1088 struct socket *so;
1089 int fd = uap->s;
1090 int error;
1091
1092 error = file_socket(fd, &so);
0a7de745
A
1093 if (error != 0) {
1094 return error;
1095 }
39236c6e
A
1096 if (so == NULL) {
1097 error = EBADF;
1098 goto out;
1099 }
1100
1101 error = sodisconnectx(so, uap->aid, uap->cid);
91447636
A
1102out:
1103 file_drop(fd);
0a7de745 1104 return error;
1c79356b
A
1105}
1106
2d21ac55
A
1107/*
1108 * Returns: 0 Success
1109 * socreate:EAFNOSUPPORT
1110 * socreate:EPROTOTYPE
1111 * socreate:EPROTONOSUPPORT
1112 * socreate:ENOBUFS
1113 * socreate:ENOMEM
1114 * socreate:EISCONN
1115 * socreate:??? [other protocol families, IPSEC]
1116 * falloc:ENFILE
1117 * falloc:EMFILE
1118 * falloc:ENOMEM
1119 * copyout:EFAULT
1120 * soconnect2:EINVAL
1121 * soconnect2:EPROTOTYPE
1122 * soconnect2:??? [other protocol families[
1123 */
1c79356b 1124int
2d21ac55 1125socketpair(struct proc *p, struct socketpair_args *uap,
b0d623f7 1126 __unused int32_t *retval)
1c79356b 1127{
91447636 1128 struct fileproc *fp1, *fp2;
1c79356b
A
1129 struct socket *so1, *so2;
1130 int fd, error, sv[2];
1131
55e303ae 1132 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1c79356b 1133 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
0a7de745
A
1134 if (error) {
1135 return error;
1136 }
1c79356b 1137 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
0a7de745 1138 if (error) {
1c79356b 1139 goto free1;
0a7de745 1140 }
91447636 1141
2d21ac55 1142 error = falloc(p, &fp1, &fd, vfs_context_current());
91447636 1143 if (error) {
1c79356b 1144 goto free2;
91447636 1145 }
0a7de745 1146 fp1->f_flag = FREAD | FWRITE;
1c79356b
A
1147 fp1->f_ops = &socketops;
1148 fp1->f_data = (caddr_t)so1;
91447636
A
1149 sv[0] = fd;
1150
2d21ac55 1151 error = falloc(p, &fp2, &fd, vfs_context_current());
91447636 1152 if (error) {
1c79356b 1153 goto free3;
91447636 1154 }
0a7de745 1155 fp2->f_flag = FREAD | FWRITE;
1c79356b
A
1156 fp2->f_ops = &socketops;
1157 fp2->f_data = (caddr_t)so2;
1158 sv[1] = fd;
91447636 1159
1c79356b
A
1160 error = soconnect2(so1, so2);
1161 if (error) {
1c79356b
A
1162 goto free4;
1163 }
1c79356b
A
1164 if (uap->type == SOCK_DGRAM) {
1165 /*
1166 * Datagram socket connection is asymmetric.
1167 */
2d21ac55
A
1168 error = soconnect2(so2, so1);
1169 if (error) {
1170 goto free4;
1171 }
1c79356b 1172 }
91447636 1173
0a7de745 1174 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
6d2010ae 1175 goto free4;
0a7de745 1176 }
6d2010ae 1177
91447636 1178 proc_fdlock(p);
6601e61a
A
1179 procfdtbl_releasefd(p, sv[0], NULL);
1180 procfdtbl_releasefd(p, sv[1], NULL);
91447636
A
1181 fp_drop(p, sv[0], fp1, 1);
1182 fp_drop(p, sv[1], fp2, 1);
1183 proc_fdunlock(p);
1184
0a7de745 1185 return 0;
1c79356b 1186free4:
91447636 1187 fp_free(p, sv[1], fp2);
1c79356b 1188free3:
91447636 1189 fp_free(p, sv[0], fp1);
1c79356b 1190free2:
2d21ac55 1191 (void) soclose(so2);
1c79356b 1192free1:
2d21ac55 1193 (void) soclose(so1);
0a7de745 1194 return error;
1c79356b
A
1195}
1196
2d21ac55
A
1197/*
1198 * Returns: 0 Success
1199 * EINVAL
1200 * ENOBUFS
1201 * EBADF
1202 * EPIPE
1203 * EACCES Mandatory Access Control failure
1204 * file_socket:ENOTSOCK
1205 * file_socket:EBADF
1206 * getsockaddr:ENAMETOOLONG Filename too long
1207 * getsockaddr:EINVAL Invalid argument
1208 * getsockaddr:ENOMEM Not enough space
1209 * getsockaddr:EFAULT Bad address
1210 * <pru_sosend>:EACCES[TCP]
1211 * <pru_sosend>:EADDRINUSE[TCP]
1212 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1213 * <pru_sosend>:EAFNOSUPPORT[TCP]
1214 * <pru_sosend>:EAGAIN[TCP]
1215 * <pru_sosend>:EBADF
1216 * <pru_sosend>:ECONNRESET[TCP]
1217 * <pru_sosend>:EFAULT
1218 * <pru_sosend>:EHOSTUNREACH[TCP]
1219 * <pru_sosend>:EINTR
1220 * <pru_sosend>:EINVAL
1221 * <pru_sosend>:EISCONN[AF_INET]
1222 * <pru_sosend>:EMSGSIZE[TCP]
1223 * <pru_sosend>:ENETDOWN[TCP]
1224 * <pru_sosend>:ENETUNREACH[TCP]
1225 * <pru_sosend>:ENOBUFS
1226 * <pru_sosend>:ENOMEM[TCP]
1227 * <pru_sosend>:ENOTCONN[AF_INET]
1228 * <pru_sosend>:EOPNOTSUPP
1229 * <pru_sosend>:EPERM[TCP]
1230 * <pru_sosend>:EPIPE
1231 * <pru_sosend>:EWOULDBLOCK
1232 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1233 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1234 * <pru_sosend>:??? [value from so_error]
1235 * sockargs:???
1236 */
1c79356b 1237static int
3e170ce0 1238sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1239 int flags, int32_t *retval)
1c79356b 1240{
2d21ac55
A
1241 struct mbuf *control = NULL;
1242 struct sockaddr_storage ss;
1243 struct sockaddr *to = NULL;
1244 boolean_t want_free = TRUE;
91447636 1245 int error;
91447636 1246 user_ssize_t len;
2d21ac55
A
1247
1248 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b 1249
2d21ac55 1250 if (mp->msg_name != USER_ADDR_NULL) {
0a7de745 1251 if (mp->msg_namelen > sizeof(ss)) {
2d21ac55 1252 error = getsockaddr(so, &to, mp->msg_name,
4a3eedf9 1253 mp->msg_namelen, TRUE);
2d21ac55
A
1254 } else {
1255 error = getsockaddr_s(so, &ss, mp->msg_name,
4a3eedf9 1256 mp->msg_namelen, TRUE);
2d21ac55
A
1257 if (error == 0) {
1258 to = (struct sockaddr *)&ss;
1259 want_free = FALSE;
1260 }
1c79356b 1261 }
0a7de745 1262 if (error != 0) {
2d21ac55 1263 goto out;
0a7de745 1264 }
2d21ac55 1265 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
91447636 1266 }
2d21ac55 1267 if (mp->msg_control != USER_ADDR_NULL) {
0a7de745 1268 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1c79356b
A
1269 error = EINVAL;
1270 goto bad;
1271 }
1272 error = sockargs(&control, mp->msg_control,
1273 mp->msg_controllen, MT_CONTROL);
0a7de745 1274 if (error != 0) {
1c79356b 1275 goto bad;
0a7de745 1276 }
91447636 1277 }
1c79356b 1278
2d21ac55
A
1279#if CONFIG_MACF_SOCKET_SUBSET
1280 /*
1281 * We check the state without holding the socket lock;
1282 * if a race condition occurs, it would simply result
3e170ce0 1283 * in an extra call to the MAC check function.
2d21ac55 1284 */
3e170ce0 1285 if (to != NULL &&
316670eb 1286 !(so->so_state & SS_DEFUNCT) &&
0a7de745 1287 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
2d21ac55 1288 goto bad;
0a7de745 1289 }
2d21ac55 1290#endif /* MAC_SOCKET_SUBSET */
91447636
A
1291
1292 len = uio_resid(uiop);
39236c6e 1293 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
0a7de745 1294 control, flags);
2d21ac55 1295 if (error != 0) {
91447636 1296 if (uio_resid(uiop) != len && (error == ERESTART ||
0a7de745 1297 error == EINTR || error == EWOULDBLOCK)) {
1c79356b 1298 error = 0;
0a7de745 1299 }
2d21ac55 1300 /* Generation of SIGPIPE can be controlled per socket */
f427ee49
A
1301 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1302 !(flags & MSG_NOSIGNAL)) {
1c79356b 1303 psignal(p, SIGPIPE);
0a7de745 1304 }
1c79356b 1305 }
0a7de745 1306 if (error == 0) {
91447636 1307 *retval = (int)(len - uio_resid(uiop));
0a7de745 1308 }
91447636 1309bad:
c3c9b80d 1310 if (want_free) {
1c79356b 1311 FREE(to, M_SONAME);
0a7de745 1312 }
91447636 1313out:
2d21ac55 1314 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 1315
0a7de745 1316 return error;
1c79356b
A
1317}
1318
2d21ac55
A
1319/*
1320 * Returns: 0 Success
1321 * ENOMEM
1322 * sendit:??? [see sendit definition in this file]
1323 * write:??? [4056224: applicable for pipes]
1324 */
1c79356b 1325int
b0d623f7 1326sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
2d21ac55
A
1327{
1328 __pthread_testcancel(1);
0a7de745 1329 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
2d21ac55
A
1330}
1331
1332int
39236c6e 1333sendto_nocancel(struct proc *p,
0a7de745
A
1334 struct sendto_nocancel_args *uap,
1335 int32_t *retval)
1c79356b 1336{
91447636
A
1337 struct user_msghdr msg;
1338 int error;
1339 uio_t auio = NULL;
3e170ce0 1340 struct socket *so;
1c79356b 1341
2d21ac55 1342 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1343 AUDIT_ARG(fd, uap->s);
1c79356b 1344
94ff46dc
A
1345 if (uap->flags & MSG_SKIPCFIL) {
1346 error = EPERM;
1347 goto done;
1348 }
1349
f427ee49
A
1350 if (uap->len > LONG_MAX) {
1351 error = EINVAL;
1352 goto done;
1353 }
1354
91447636 1355 auio = uio_create(1, 0,
2d21ac55
A
1356 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1357 UIO_WRITE);
91447636 1358 if (auio == NULL) {
3e170ce0
A
1359 error = ENOMEM;
1360 goto done;
91447636
A
1361 }
1362 uio_addiov(auio, uap->buf, uap->len);
1363
1c79356b
A
1364 msg.msg_name = uap->to;
1365 msg.msg_namelen = uap->tolen;
91447636
A
1366 /* no need to set up msg_iov. sendit uses uio_t we send it */
1367 msg.msg_iov = 0;
1368 msg.msg_iovlen = 0;
1c79356b 1369 msg.msg_control = 0;
1c79356b 1370 msg.msg_flags = 0;
1c79356b 1371
3e170ce0 1372 error = file_socket(uap->s, &so);
0a7de745 1373 if (error) {
3e170ce0 1374 goto done;
0a7de745 1375 }
2d21ac55 1376
3e170ce0
A
1377 if (so == NULL) {
1378 error = EBADF;
1379 } else {
1380 error = sendit(p, so, &msg, auio, uap->flags, retval);
91447636 1381 }
2d21ac55 1382
3e170ce0
A
1383 file_drop(uap->s);
1384done:
0a7de745 1385 if (auio != NULL) {
3e170ce0 1386 uio_free(auio);
0a7de745 1387 }
3e170ce0 1388
2d21ac55 1389 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1c79356b 1390
0a7de745 1391 return error;
1c79356b 1392}
1c79356b 1393
2d21ac55
A
1394/*
1395 * Returns: 0 Success
1396 * ENOBUFS
1397 * copyin:EFAULT
1398 * sendit:??? [see sendit definition in this file]
1399 */
1c79356b 1400int
b0d623f7 1401sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1c79356b 1402{
2d21ac55 1403 __pthread_testcancel(1);
0a7de745
A
1404 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1405 retval);
1c79356b 1406}
1c79356b
A
1407
1408int
3e170ce0
A
1409sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1410 int32_t *retval)
1c79356b 1411{
b0d623f7
A
1412 struct user32_msghdr msg32;
1413 struct user64_msghdr msg64;
91447636
A
1414 struct user_msghdr user_msg;
1415 caddr_t msghdrp;
0a7de745 1416 int size_of_msghdr;
1c79356b 1417 int error;
91447636
A
1418 uio_t auio = NULL;
1419 struct user_iovec *iovp;
3e170ce0 1420 struct socket *so;
1c79356b 1421
2d21ac55 1422 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1423 AUDIT_ARG(fd, uap->s);
94ff46dc
A
1424
1425 if (uap->flags & MSG_SKIPCFIL) {
1426 error = EPERM;
1427 goto done;
1428 }
1429
91447636 1430 if (IS_64BIT_PROCESS(p)) {
b0d623f7 1431 msghdrp = (caddr_t)&msg64;
0a7de745 1432 size_of_msghdr = sizeof(msg64);
2d21ac55 1433 } else {
b0d623f7 1434 msghdrp = (caddr_t)&msg32;
0a7de745 1435 size_of_msghdr = sizeof(msg32);
91447636
A
1436 }
1437 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1438 if (error) {
1439 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
0a7de745 1440 return error;
1c79356b 1441 }
91447636 1442
b0d623f7
A
1443 if (IS_64BIT_PROCESS(p)) {
1444 user_msg.msg_flags = msg64.msg_flags;
1445 user_msg.msg_controllen = msg64.msg_controllen;
f427ee49 1446 user_msg.msg_control = (user_addr_t)msg64.msg_control;
b0d623f7 1447 user_msg.msg_iovlen = msg64.msg_iovlen;
f427ee49 1448 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
b0d623f7 1449 user_msg.msg_namelen = msg64.msg_namelen;
f427ee49 1450 user_msg.msg_name = (user_addr_t)msg64.msg_name;
b0d623f7
A
1451 } else {
1452 user_msg.msg_flags = msg32.msg_flags;
1453 user_msg.msg_controllen = msg32.msg_controllen;
1454 user_msg.msg_control = msg32.msg_control;
1455 user_msg.msg_iovlen = msg32.msg_iovlen;
1456 user_msg.msg_iov = msg32.msg_iov;
1457 user_msg.msg_namelen = msg32.msg_namelen;
1458 user_msg.msg_name = msg32.msg_name;
91447636
A
1459 }
1460
1461 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1462 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1463 0, 0, 0, 0);
0a7de745 1464 return EMSGSIZE;
91447636
A
1465 }
1466
1467 /* allocate a uio large enough to hold the number of iovecs passed */
1468 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1469 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1470 UIO_WRITE);
91447636
A
1471 if (auio == NULL) {
1472 error = ENOBUFS;
1473 goto done;
1474 }
2d21ac55 1475
91447636 1476 if (user_msg.msg_iovlen) {
2d21ac55
A
1477 /*
1478 * get location of iovecs within the uio.
1479 * then copyin the iovecs from user space.
91447636
A
1480 */
1481 iovp = uio_iovsaddr(auio);
1482 if (iovp == NULL) {
1483 error = ENOBUFS;
1484 goto done;
1485 }
b0d623f7 1486 error = copyin_user_iovec_array(user_msg.msg_iov,
0a7de745
A
1487 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1488 user_msg.msg_iovlen, iovp);
1489 if (error) {
91447636 1490 goto done;
0a7de745 1491 }
91447636 1492 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2d21ac55
A
1493
1494 /* finish setup of uio_t */
39236c6e
A
1495 error = uio_calculateresid(auio);
1496 if (error) {
1497 goto done;
1498 }
2d21ac55 1499 } else {
91447636
A
1500 user_msg.msg_iov = 0;
1501 }
2d21ac55
A
1502
1503 /* msg_flags is ignored for send */
91447636 1504 user_msg.msg_flags = 0;
2d21ac55 1505
3e170ce0
A
1506 error = file_socket(uap->s, &so);
1507 if (error) {
1508 goto done;
1509 }
1510 if (so == NULL) {
1511 error = EBADF;
1512 } else {
1513 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1514 }
1515 file_drop(uap->s);
1c79356b 1516done:
91447636
A
1517 if (auio != NULL) {
1518 uio_free(auio);
1519 }
2d21ac55 1520 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1521
0a7de745 1522 return error;
1c79356b
A
1523}
1524
fe8ab488
A
1525int
1526sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1527{
1528 int error = 0;
3e170ce0 1529 struct user_msghdr_x *user_msg_x = NULL;
fe8ab488
A
1530 struct uio **uiop = NULL;
1531 struct socket *so;
1532 u_int i;
1533 struct sockaddr *to = NULL;
fe8ab488
A
1534 user_ssize_t len_before = 0, len_after;
1535 int need_drop = 0;
1536 size_t size_of_msghdr;
1537 void *umsgp = NULL;
1538 u_int uiocnt;
3e170ce0 1539 int has_addr_or_ctl = 0;
fe8ab488
A
1540
1541 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1542
c3c9b80d
A
1543 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1544 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1545
94ff46dc
A
1546 if (uap->flags & MSG_SKIPCFIL) {
1547 error = EPERM;
1548 goto out;
1549 }
1550
fe8ab488
A
1551 error = file_socket(uap->s, &so);
1552 if (error) {
1553 goto out;
1554 }
1555 need_drop = 1;
1556 if (so == NULL) {
1557 error = EBADF;
1558 goto out;
1559 }
fe8ab488
A
1560
1561 /*
1562 * Input parameter range check
1563 */
1564 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1565 error = EINVAL;
1566 goto out;
1567 }
3e170ce0
A
1568 /*
1569 * Clip to max currently allowed
1570 */
0a7de745 1571 if (uap->cnt > somaxsendmsgx) {
3e170ce0 1572 uap->cnt = somaxsendmsgx;
0a7de745 1573 }
3e170ce0 1574
c3c9b80d
A
1575 user_msg_x = kheap_alloc(KHEAP_TEMP,
1576 uap->cnt * sizeof(struct user_msghdr_x), Z_WAITOK | Z_ZERO);
3e170ce0 1577 if (user_msg_x == NULL) {
c3c9b80d 1578 DBG_PRINTF("%s kheap_alloc user_msg_x failed\n", __func__);
fe8ab488
A
1579 error = ENOMEM;
1580 goto out;
1581 }
c3c9b80d
A
1582 uiop = kheap_alloc(KHEAP_TEMP,
1583 uap->cnt * sizeof(struct uio *), Z_WAITOK | Z_ZERO);
fe8ab488 1584 if (uiop == NULL) {
c3c9b80d 1585 DBG_PRINTF("%s kheap_alloc uiop failed\n", __func__);
fe8ab488
A
1586 error = ENOMEM;
1587 goto out;
1588 }
1589
c3c9b80d
A
1590 umsgp = kheap_alloc(KHEAP_TEMP,
1591 uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
fe8ab488 1592 if (umsgp == NULL) {
c3c9b80d 1593 printf("%s kheap_alloc user_msg_x failed\n", __func__);
fe8ab488
A
1594 error = ENOMEM;
1595 goto out;
1596 }
1597 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1598 if (error) {
3e170ce0 1599 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
1600 goto out;
1601 }
1602 error = internalize_user_msghdr_array(umsgp,
0a7de745
A
1603 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1604 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488 1605 if (error) {
3e170ce0 1606 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
1607 goto out;
1608 }
1609 /*
1610 * Make sure the size of each message iovec and
1611 * the aggregate size of all the iovec is valid
1612 */
f427ee49 1613 if (uio_array_is_valid(uiop, uap->cnt) == false) {
fe8ab488
A
1614 error = EINVAL;
1615 goto out;
1616 }
1617
1618 /*
1619 * Sanity check on passed arguments
1620 */
1621 for (i = 0; i < uap->cnt; i++) {
3e170ce0 1622 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
1623
1624 /*
1625 * No flags on send message
1626 */
1627 if (mp->msg_flags != 0) {
1628 error = EINVAL;
1629 goto out;
1630 }
1631 /*
1632 * No support for address or ancillary data (yet)
1633 */
0a7de745 1634 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
3e170ce0 1635 has_addr_or_ctl = 1;
0a7de745 1636 }
3e170ce0 1637
fe8ab488 1638 if (mp->msg_control != USER_ADDR_NULL ||
0a7de745 1639 mp->msg_controllen != 0) {
3e170ce0 1640 has_addr_or_ctl = 1;
0a7de745 1641 }
3e170ce0 1642
fe8ab488
A
1643#if CONFIG_MACF_SOCKET_SUBSET
1644 /*
1645 * We check the state without holding the socket lock;
1646 * if a race condition occurs, it would simply result
3e170ce0 1647 * in an extra call to the MAC check function.
fe8ab488
A
1648 *
1649 * Note: The following check is never true taken with the
1650 * current limitation that we do not accept to pass an address,
3e170ce0
A
1651 * this is effectively placeholder code. If we add support for
1652 * addresses, we will have to check every address.
fe8ab488 1653 */
3e170ce0 1654 if (to != NULL &&
fe8ab488 1655 !(so->so_state & SS_DEFUNCT) &&
3e170ce0 1656 (error = mac_socket_check_send(kauth_cred_get(), so, to))
0a7de745 1657 != 0) {
fe8ab488 1658 goto out;
0a7de745 1659 }
fe8ab488
A
1660#endif /* MAC_SOCKET_SUBSET */
1661 }
1662
1663 len_before = uio_array_resid(uiop, uap->cnt);
1664
3e170ce0
A
1665 /*
1666 * Feed list of packets at once only for connected socket without
1667 * control message
1668 */
1669 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1670 pru_sosend_list_notsupp &&
1671 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1672 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1673 uap->cnt, uap->flags);
1674 } else {
1675 for (i = 0; i < uap->cnt; i++) {
1676 struct user_msghdr_x *mp = user_msg_x + i;
1677 struct user_msghdr user_msg;
1678 uio_t auio = uiop[i];
1679 int32_t tmpval;
1680
1681 user_msg.msg_flags = mp->msg_flags;
1682 user_msg.msg_controllen = mp->msg_controllen;
1683 user_msg.msg_control = mp->msg_control;
1684 user_msg.msg_iovlen = mp->msg_iovlen;
1685 user_msg.msg_iov = mp->msg_iov;
1686 user_msg.msg_namelen = mp->msg_namelen;
1687 user_msg.msg_name = mp->msg_name;
1688
1689 error = sendit(p, so, &user_msg, auio, uap->flags,
1690 &tmpval);
0a7de745 1691 if (error != 0) {
3e170ce0 1692 break;
0a7de745 1693 }
3e170ce0
A
1694 }
1695 }
fe8ab488
A
1696 len_after = uio_array_resid(uiop, uap->cnt);
1697
3e170ce0
A
1698 VERIFY(len_after <= len_before);
1699
fe8ab488
A
1700 if (error != 0) {
1701 if (len_after != len_before && (error == ERESTART ||
3e170ce0 1702 error == EINTR || error == EWOULDBLOCK ||
0a7de745 1703 error == ENOBUFS)) {
fe8ab488 1704 error = 0;
0a7de745 1705 }
fe8ab488 1706 /* Generation of SIGPIPE can be controlled per socket */
f427ee49
A
1707 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE) &&
1708 !(uap->flags & MSG_NOSIGNAL)) {
fe8ab488 1709 psignal(p, SIGPIPE);
0a7de745 1710 }
fe8ab488
A
1711 }
1712 if (error == 0) {
1713 uiocnt = externalize_user_msghdr_array(umsgp,
1714 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1715 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488
A
1716
1717 *retval = (int)(uiocnt);
1718 }
1719out:
0a7de745 1720 if (need_drop) {
fe8ab488 1721 file_drop(uap->s);
0a7de745 1722 }
c3c9b80d 1723 kheap_free(KHEAP_TEMP, umsgp, uap->cnt * size_of_msghdr);
fe8ab488
A
1724 if (uiop != NULL) {
1725 free_uio_array(uiop, uap->cnt);
c3c9b80d
A
1726 kheap_free(KHEAP_TEMP, uiop,
1727 uap->cnt * sizeof(struct uio *));
0a7de745 1728 }
c3c9b80d
A
1729 kheap_free(KHEAP_TEMP, user_msg_x,
1730 uap->cnt * sizeof(struct user_msghdr_x));
fe8ab488
A
1731
1732 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1733
0a7de745 1734 return error;
fe8ab488
A
1735}
1736
3e170ce0
A
1737
1738static int
1739copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1740{
1741 int error = 0;
1742 socklen_t sa_len = 0;
1743 ssize_t len;
1744
1745 len = *namelen;
1746 if (len <= 0 || fromsa == 0) {
1747 len = 0;
1748 } else {
1749#ifndef MIN
0a7de745 1750#define MIN(a, b) ((a) > (b) ? (b) : (a))
3e170ce0
A
1751#endif
1752 sa_len = fromsa->sa_len;
1753 len = MIN((unsigned int)len, sa_len);
1754 error = copyout(fromsa, name, (unsigned)len);
0a7de745 1755 if (error) {
3e170ce0 1756 goto out;
0a7de745 1757 }
3e170ce0
A
1758 }
1759 *namelen = sa_len;
1760out:
0a7de745 1761 return 0;
3e170ce0
A
1762}
1763
1764static int
1765copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
f427ee49 1766 socklen_t *controllen, int *flags, struct socket *so)
3e170ce0
A
1767{
1768 int error = 0;
f427ee49 1769 socklen_t len;
3e170ce0 1770 user_addr_t ctlbuf;
f427ee49 1771 struct inpcb *inp = so ? sotoinpcb(so) : NULL;
3e170ce0
A
1772
1773 len = *controllen;
1774 *controllen = 0;
1775 ctlbuf = control;
1776
1777 while (m && len > 0) {
f427ee49 1778 socklen_t tocopy;
3e170ce0 1779 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
f427ee49
A
1780 socklen_t cp_size = CMSG_ALIGN(cp->cmsg_len);
1781 socklen_t buflen = m->m_len;
3e170ce0
A
1782
1783 while (buflen > 0 && len > 0) {
1784 /*
1785 * SCM_TIMESTAMP hack because struct timeval has a
1786 * different size for 32 bits and 64 bits processes
1787 */
1788 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
527f9951 1789 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
3e170ce0 1790 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
f427ee49 1791 socklen_t tmp_space;
3e170ce0
A
1792 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1793
1794 tmp_cp->cmsg_level = SOL_SOCKET;
1795 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1796
1797 if (proc_is64bit(p)) {
1798 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1799
cb323159
A
1800 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1801 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
3e170ce0
A
1802
1803 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1804 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1805 } else {
1806 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1807
f427ee49 1808 tv32->tv_sec = (user32_time_t)tv->tv_sec;
3e170ce0
A
1809 tv32->tv_usec = tv->tv_usec;
1810
1811 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1812 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1813 }
1814 if (len >= tmp_space) {
1815 tocopy = tmp_space;
1816 } else {
1817 *flags |= MSG_CTRUNC;
1818 tocopy = len;
1819 }
1820 error = copyout(tmp_buffer, ctlbuf, tocopy);
0a7de745 1821 if (error) {
3e170ce0 1822 goto out;
0a7de745 1823 }
3e170ce0 1824 } else {
f427ee49
A
1825#if CONTENT_FILTER
1826 /* If socket is attached to Content Filter and socket did not request address, ignore it */
1827 if ((so != NULL) && (so->so_cfil_db != NULL) &&
1828 ((cp->cmsg_level == IPPROTO_IP && cp->cmsg_type == IP_RECVDSTADDR && inp &&
1829 !(inp->inp_flags & INP_RECVDSTADDR)) ||
1830 (cp->cmsg_level == IPPROTO_IPV6 && (cp->cmsg_type == IPV6_PKTINFO || cp->cmsg_type == IPV6_2292PKTINFO) && inp &&
1831 !(inp->inp_flags & IN6P_PKTINFO)))) {
1832 tocopy = 0;
1833 } else
1834#endif
1835 {
1836 if (cp_size > buflen) {
1837 panic("cp_size > buflen, something"
1838 "wrong with alignment!");
1839 }
1840 if (len >= cp_size) {
1841 tocopy = cp_size;
1842 } else {
1843 *flags |= MSG_CTRUNC;
1844 tocopy = len;
1845 }
1846 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1847 if (error) {
1848 goto out;
1849 }
0a7de745 1850 }
3e170ce0
A
1851 }
1852
1853 ctlbuf += tocopy;
1854 len -= tocopy;
1855
1856 buflen -= cp_size;
1857 cp = (struct cmsghdr *)(void *)
1858 ((unsigned char *) cp + cp_size);
1859 cp_size = CMSG_ALIGN(cp->cmsg_len);
1860 }
1861
1862 m = m->m_next;
1863 }
f427ee49 1864 *controllen = (socklen_t)(ctlbuf - control);
3e170ce0 1865out:
0a7de745 1866 return error;
3e170ce0
A
1867}
1868
2d21ac55
A
1869/*
1870 * Returns: 0 Success
1871 * ENOTSOCK
1872 * EINVAL
1873 * EBADF
1874 * EACCES Mandatory Access Control failure
1875 * copyout:EFAULT
1876 * fp_lookup:EBADF
1877 * <pru_soreceive>:ENOBUFS
1878 * <pru_soreceive>:ENOTCONN
1879 * <pru_soreceive>:EWOULDBLOCK
1880 * <pru_soreceive>:EFAULT
1881 * <pru_soreceive>:EINTR
1882 * <pru_soreceive>:EBADF
1883 * <pru_soreceive>:EINVAL
1884 * <pru_soreceive>:EMSGSIZE
1885 * <pru_soreceive>:???
1886 *
1887 * Notes: Additional return values from calls through <pru_soreceive>
1888 * depend on protocols other than TCP or AF_UNIX, which are
1889 * documented above.
1890 */
1c79356b 1891static int
2d21ac55 1892recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1893 user_addr_t namelenp, int32_t *retval)
1c79356b 1894{
39236c6e
A
1895 ssize_t len;
1896 int error;
3e170ce0 1897 struct mbuf *control = 0;
1c79356b
A
1898 struct socket *so;
1899 struct sockaddr *fromsa = 0;
91447636 1900 struct fileproc *fp;
1c79356b 1901
2d21ac55 1902 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
f427ee49 1903 if ((error = fp_get_ftype(p, s, DTYPE_SOCKET, ENOTSOCK, &fp))) {
2d21ac55 1904 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
0a7de745 1905 return error;
1c79356b 1906 }
f427ee49 1907 so = fp->f_data;
2d21ac55
A
1908
1909#if CONFIG_MACF_SOCKET_SUBSET
1910 /*
1911 * We check the state without holding the socket lock;
1912 * if a race condition occurs, it would simply result
1913 * in an extra call to the MAC check function.
1914 */
316670eb
A
1915 if (!(so->so_state & SS_DEFUNCT) &&
1916 !(so->so_state & SS_ISCONNECTED) &&
39236c6e 1917 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
0a7de745 1918 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2d21ac55 1919 goto out1;
0a7de745 1920 }
2d21ac55 1921#endif /* MAC_SOCKET_SUBSET */
f427ee49 1922 if (uio_resid(uiop) < 0 || uio_resid(uiop) > INT_MAX) {
2d21ac55 1923 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
91447636
A
1924 error = EINVAL;
1925 goto out1;
1c79356b 1926 }
91447636
A
1927
1928 len = uio_resid(uiop);
2d21ac55
A
1929 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1930 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1931 &mp->msg_flags);
0a7de745 1932 if (fromsa) {
b0d623f7
A
1933 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1934 fromsa);
0a7de745 1935 }
1c79356b 1936 if (error) {
91447636 1937 if (uio_resid(uiop) != len && (error == ERESTART ||
0a7de745 1938 error == EINTR || error == EWOULDBLOCK)) {
1c79356b 1939 error = 0;
0a7de745 1940 }
1c79356b 1941 }
0a7de745 1942 if (error) {
1c79356b 1943 goto out;
0a7de745 1944 }
2d21ac55 1945
f427ee49 1946 *retval = (int32_t)(len - uio_resid(uiop));
2d21ac55 1947
3e170ce0
A
1948 if (mp->msg_name) {
1949 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
0a7de745 1950 if (error) {
3e170ce0 1951 goto out;
0a7de745 1952 }
2d21ac55 1953 /* return the actual, untruncated address length */
1c79356b 1954 if (namelenp &&
3e170ce0 1955 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
0a7de745 1956 sizeof(int)))) {
1c79356b
A
1957 goto out;
1958 }
1959 }
39236c6e 1960
3e170ce0
A
1961 if (mp->msg_control) {
1962 error = copyout_control(p, control, mp->msg_control,
f427ee49 1963 &mp->msg_controllen, &mp->msg_flags, so);
1c79356b
A
1964 }
1965out:
c3c9b80d 1966 FREE(fromsa, M_SONAME);
0a7de745 1967 if (control) {
1c79356b 1968 m_freem(control);
0a7de745 1969 }
2d21ac55 1970 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636
A
1971out1:
1972 fp_drop(p, s, fp, 0);
0a7de745 1973 return error;
1c79356b
A
1974}
1975
2d21ac55
A
1976/*
1977 * Returns: 0 Success
1978 * ENOMEM
1979 * copyin:EFAULT
1980 * recvit:???
1981 * read:??? [4056224: applicable for pipes]
1982 *
1983 * Notes: The read entry point is only called as part of support for
1984 * binary backward compatability; new code should use read
1985 * instead of recv or recvfrom when attempting to read data
1986 * from pipes.
1987 *
1988 * For full documentation of the return codes from recvit, see
1989 * the block header for the recvit function.
1990 */
1991int
b0d623f7 1992recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2d21ac55
A
1993{
1994 __pthread_testcancel(1);
0a7de745
A
1995 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
1996 retval);
2d21ac55
A
1997}
1998
1c79356b 1999int
3e170ce0
A
2000recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2001 int32_t *retval)
1c79356b 2002{
91447636 2003 struct user_msghdr msg;
1c79356b 2004 int error;
91447636 2005 uio_t auio = NULL;
1c79356b 2006
2d21ac55 2007 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 2008 AUDIT_ARG(fd, uap->s);
1c79356b
A
2009
2010 if (uap->fromlenaddr) {
91447636 2011 error = copyin(uap->fromlenaddr,
0a7de745
A
2012 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2013 if (error) {
2014 return error;
2015 }
2d21ac55 2016 } else {
1c79356b 2017 msg.msg_namelen = 0;
2d21ac55 2018 }
1c79356b 2019 msg.msg_name = uap->from;
91447636 2020 auio = uio_create(1, 0,
2d21ac55
A
2021 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2022 UIO_READ);
91447636 2023 if (auio == NULL) {
0a7de745 2024 return ENOMEM;
91447636 2025 }
2d21ac55 2026
91447636
A
2027 uio_addiov(auio, uap->buf, uap->len);
2028 /* no need to set up msg_iov. recvit uses uio_t we send it */
2029 msg.msg_iov = 0;
2030 msg.msg_iovlen = 0;
1c79356b 2031 msg.msg_control = 0;
91447636 2032 msg.msg_controllen = 0;
1c79356b 2033 msg.msg_flags = uap->flags;
91447636
A
2034 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2035 if (auio != NULL) {
2036 uio_free(auio);
2037 }
2d21ac55 2038
2d21ac55 2039 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b 2040
0a7de745 2041 return error;
1c79356b
A
2042}
2043
2044/*
2d21ac55
A
2045 * Returns: 0 Success
2046 * EMSGSIZE
2047 * ENOMEM
2048 * copyin:EFAULT
2049 * copyout:EFAULT
2050 * recvit:???
2051 *
2052 * Notes: For full documentation of the return codes from recvit, see
2053 * the block header for the recvit function.
1c79356b
A
2054 */
2055int
b0d623f7 2056recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1c79356b 2057{
2d21ac55 2058 __pthread_testcancel(1);
0a7de745
A
2059 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2060 retval);
1c79356b 2061}
1c79356b
A
2062
2063int
3e170ce0
A
2064recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2065 int32_t *retval)
1c79356b 2066{
b0d623f7
A
2067 struct user32_msghdr msg32;
2068 struct user64_msghdr msg64;
91447636
A
2069 struct user_msghdr user_msg;
2070 caddr_t msghdrp;
0a7de745 2071 int size_of_msghdr;
91447636 2072 user_addr_t uiov;
2d21ac55 2073 int error;
91447636
A
2074 uio_t auio = NULL;
2075 struct user_iovec *iovp;
1c79356b 2076
2d21ac55 2077 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 2078 AUDIT_ARG(fd, uap->s);
91447636 2079 if (IS_64BIT_PROCESS(p)) {
b0d623f7 2080 msghdrp = (caddr_t)&msg64;
0a7de745 2081 size_of_msghdr = sizeof(msg64);
2d21ac55 2082 } else {
b0d623f7 2083 msghdrp = (caddr_t)&msg32;
0a7de745 2084 size_of_msghdr = sizeof(msg32);
91447636
A
2085 }
2086 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
2087 if (error) {
2088 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
0a7de745 2089 return error;
1c79356b
A
2090 }
2091
91447636 2092 /* only need to copy if user process is not 64-bit */
b0d623f7
A
2093 if (IS_64BIT_PROCESS(p)) {
2094 user_msg.msg_flags = msg64.msg_flags;
2095 user_msg.msg_controllen = msg64.msg_controllen;
f427ee49 2096 user_msg.msg_control = (user_addr_t)msg64.msg_control;
b0d623f7 2097 user_msg.msg_iovlen = msg64.msg_iovlen;
f427ee49 2098 user_msg.msg_iov = (user_addr_t)msg64.msg_iov;
b0d623f7 2099 user_msg.msg_namelen = msg64.msg_namelen;
f427ee49 2100 user_msg.msg_name = (user_addr_t)msg64.msg_name;
b0d623f7
A
2101 } else {
2102 user_msg.msg_flags = msg32.msg_flags;
2103 user_msg.msg_controllen = msg32.msg_controllen;
2104 user_msg.msg_control = msg32.msg_control;
2105 user_msg.msg_iovlen = msg32.msg_iovlen;
2106 user_msg.msg_iov = msg32.msg_iov;
2107 user_msg.msg_namelen = msg32.msg_namelen;
2108 user_msg.msg_name = msg32.msg_name;
91447636
A
2109 }
2110
2111 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
2112 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2113 0, 0, 0, 0);
0a7de745 2114 return EMSGSIZE;
91447636
A
2115 }
2116
91447636 2117 user_msg.msg_flags = uap->flags;
91447636
A
2118
2119 /* allocate a uio large enough to hold the number of iovecs passed */
2120 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
2121 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2122 UIO_READ);
91447636
A
2123 if (auio == NULL) {
2124 error = ENOMEM;
2125 goto done;
2126 }
2127
2d21ac55
A
2128 /*
2129 * get location of iovecs within the uio. then copyin the iovecs from
91447636
A
2130 * user space.
2131 */
2132 iovp = uio_iovsaddr(auio);
2133 if (iovp == NULL) {
2134 error = ENOMEM;
2135 goto done;
2136 }
2137 uiov = user_msg.msg_iov;
2138 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
b0d623f7 2139 error = copyin_user_iovec_array(uiov,
0a7de745
A
2140 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2141 user_msg.msg_iovlen, iovp);
2142 if (error) {
1c79356b 2143 goto done;
0a7de745 2144 }
91447636 2145
2d21ac55 2146 /* finish setup of uio_t */
39236c6e
A
2147 error = uio_calculateresid(auio);
2148 if (error) {
2149 goto done;
2150 }
2d21ac55 2151
91447636 2152 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1c79356b 2153 if (!error) {
91447636 2154 user_msg.msg_iov = uiov;
b0d623f7
A
2155 if (IS_64BIT_PROCESS(p)) {
2156 msg64.msg_flags = user_msg.msg_flags;
2157 msg64.msg_controllen = user_msg.msg_controllen;
2158 msg64.msg_control = user_msg.msg_control;
2159 msg64.msg_iovlen = user_msg.msg_iovlen;
2160 msg64.msg_iov = user_msg.msg_iov;
2161 msg64.msg_namelen = user_msg.msg_namelen;
2162 msg64.msg_name = user_msg.msg_name;
2163 } else {
2164 msg32.msg_flags = user_msg.msg_flags;
2165 msg32.msg_controllen = user_msg.msg_controllen;
f427ee49 2166 msg32.msg_control = (user32_addr_t)user_msg.msg_control;
b0d623f7 2167 msg32.msg_iovlen = user_msg.msg_iovlen;
f427ee49 2168 msg32.msg_iov = (user32_addr_t)user_msg.msg_iov;
b0d623f7 2169 msg32.msg_namelen = user_msg.msg_namelen;
f427ee49 2170 msg32.msg_name = (user32_addr_t)user_msg.msg_name;
91447636
A
2171 }
2172 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1c79356b
A
2173 }
2174done:
91447636
A
2175 if (auio != NULL) {
2176 uio_free(auio);
2177 }
2d21ac55 2178 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
0a7de745 2179 return error;
1c79356b
A
2180}
2181
fe8ab488
A
2182int
2183recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2184{
2185 int error = EOPNOTSUPP;
3e170ce0
A
2186 struct user_msghdr_x *user_msg_x = NULL;
2187 struct recv_msg_elem *recv_msg_array = NULL;
fe8ab488
A
2188 struct socket *so;
2189 user_ssize_t len_before = 0, len_after;
2190 int need_drop = 0;
2191 size_t size_of_msghdr;
2192 void *umsgp = NULL;
2193 u_int i;
2194 u_int uiocnt;
2195
2196 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2197
c3c9b80d
A
2198 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2199 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2200
fe8ab488
A
2201 error = file_socket(uap->s, &so);
2202 if (error) {
2203 goto out;
2204 }
2205 need_drop = 1;
2206 if (so == NULL) {
2207 error = EBADF;
2208 goto out;
2209 }
c3c9b80d
A
2210 /*
2211 * Support only a subset of message flags
2212 */
2213 if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA | MSG_NBIO)) {
2214 return EOPNOTSUPP;
2215 }
fe8ab488
A
2216 /*
2217 * Input parameter range check
2218 */
2219 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2220 error = EINVAL;
2221 goto out;
2222 }
0a7de745 2223 if (uap->cnt > somaxrecvmsgx) {
3e170ce0 2224 uap->cnt = somaxrecvmsgx;
0a7de745 2225 }
3e170ce0 2226
c3c9b80d
A
2227 user_msg_x = kheap_alloc(KHEAP_TEMP,
2228 uap->cnt * sizeof(struct user_msghdr_x), Z_WAITOK | Z_ZERO);
3e170ce0 2229 if (user_msg_x == NULL) {
c3c9b80d 2230 DBG_PRINTF("%s kheap_alloc user_msg_x failed\n", __func__);
fe8ab488
A
2231 error = ENOMEM;
2232 goto out;
2233 }
3e170ce0
A
2234 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2235 if (recv_msg_array == NULL) {
2236 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
fe8ab488
A
2237 error = ENOMEM;
2238 goto out;
2239 }
fe8ab488 2240
c3c9b80d
A
2241 umsgp = kheap_alloc(KHEAP_TEMP,
2242 uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
fe8ab488 2243 if (umsgp == NULL) {
c3c9b80d 2244 DBG_PRINTF("%s kheap_alloc umsgp failed\n", __func__);
fe8ab488
A
2245 error = ENOMEM;
2246 goto out;
2247 }
2248 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2249 if (error) {
3e170ce0 2250 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
2251 goto out;
2252 }
3e170ce0 2253 error = internalize_recv_msghdr_array(umsgp,
fe8ab488 2254 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 2255 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
fe8ab488 2256 if (error) {
3e170ce0 2257 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
2258 goto out;
2259 }
2260 /*
2261 * Make sure the size of each message iovec and
2262 * the aggregate size of all the iovec is valid
2263 */
3e170ce0 2264 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
fe8ab488
A
2265 error = EINVAL;
2266 goto out;
2267 }
fe8ab488
A
2268 /*
2269 * Sanity check on passed arguments
2270 */
2271 for (i = 0; i < uap->cnt; i++) {
3e170ce0 2272 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
2273
2274 if (mp->msg_flags != 0) {
2275 error = EINVAL;
2276 goto out;
2277 }
fe8ab488
A
2278 }
2279#if CONFIG_MACF_SOCKET_SUBSET
2280 /*
2281 * We check the state without holding the socket lock;
2282 * if a race condition occurs, it would simply result
2283 * in an extra call to the MAC check function.
2284 */
2285 if (!(so->so_state & SS_DEFUNCT) &&
2286 !(so->so_state & SS_ISCONNECTED) &&
2287 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
0a7de745 2288 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
fe8ab488 2289 goto out;
0a7de745 2290 }
fe8ab488
A
2291#endif /* MAC_SOCKET_SUBSET */
2292
3e170ce0 2293 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488 2294
3e170ce0
A
2295 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2296 pru_soreceive_list_notsupp &&
2297 somaxrecvmsgx == 0) {
2298 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2299 recv_msg_array, uap->cnt, &uap->flags);
2300 } else {
2301 int flags = uap->flags;
fe8ab488 2302
3e170ce0
A
2303 for (i = 0; i < uap->cnt; i++) {
2304 struct recv_msg_elem *recv_msg_elem;
2305 uio_t auio;
2306 struct sockaddr **psa;
2307 struct mbuf **controlp;
2308
2309 recv_msg_elem = recv_msg_array + i;
2310 auio = recv_msg_elem->uio;
2311
2312 /*
2313 * Do not block if we got at least one packet
2314 */
0a7de745 2315 if (i > 0) {
3e170ce0 2316 flags |= MSG_DONTWAIT;
0a7de745 2317 }
3e170ce0
A
2318
2319 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2320 &recv_msg_elem->psa : NULL;
2321 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2322 &recv_msg_elem->controlp : NULL;
2323
2324 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
c3c9b80d 2325 auio, (struct mbuf **)NULL, controlp, &flags);
0a7de745 2326 if (error) {
3e170ce0 2327 break;
0a7de745 2328 }
3e170ce0
A
2329 /*
2330 * We have some data
2331 */
2332 recv_msg_elem->which |= SOCK_MSG_DATA;
c3c9b80d
A
2333 /*
2334 * Set the messages flags for this packet
2335 */
2336 flags &= ~MSG_DONTWAIT;
2337 recv_msg_elem->flags = flags;
3e170ce0
A
2338 /*
2339 * Stop on partial copy
2340 */
c3c9b80d 2341 if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
3e170ce0 2342 break;
0a7de745 2343 }
3e170ce0 2344 }
3e170ce0
A
2345 }
2346
2347 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488
A
2348
2349 if (error) {
2350 if (len_after != len_before && (error == ERESTART ||
0a7de745 2351 error == EINTR || error == EWOULDBLOCK)) {
fe8ab488 2352 error = 0;
0a7de745 2353 } else {
3e170ce0 2354 goto out;
0a7de745 2355 }
fe8ab488 2356 }
fe8ab488 2357
c3c9b80d
A
2358 uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
2359 uap->cnt, user_msg_x, recv_msg_array, &error);
2360 if (error != 0) {
2361 goto out;
2362 }
3e170ce0
A
2363
2364 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2365 if (error) {
2366 DBG_PRINTF("%s copyout() failed\n", __func__);
2367 goto out;
2368 }
2369 *retval = (int)(uiocnt);
2370
fe8ab488 2371out:
0a7de745 2372 if (need_drop) {
fe8ab488 2373 file_drop(uap->s);
0a7de745 2374 }
c3c9b80d
A
2375 kheap_free(KHEAP_TEMP, umsgp, uap->cnt * size_of_msghdr);
2376 free_recv_msg_array(recv_msg_array, uap->cnt);
2377 kheap_free(KHEAP_TEMP, user_msg_x,
2378 uap->cnt * sizeof(struct user_msghdr_x));
3e170ce0 2379
fe8ab488 2380 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 2381
0a7de745 2382 return error;
fe8ab488
A
2383}
2384
2d21ac55
A
2385/*
2386 * Returns: 0 Success
2387 * EBADF
2388 * file_socket:ENOTSOCK
2389 * file_socket:EBADF
2390 * soshutdown:EINVAL
2391 * soshutdown:ENOTCONN
2392 * soshutdown:EADDRNOTAVAIL[TCP]
2393 * soshutdown:ENOBUFS[TCP]
2394 * soshutdown:EMSGSIZE[TCP]
2395 * soshutdown:EHOSTUNREACH[TCP]
2396 * soshutdown:ENETUNREACH[TCP]
2397 * soshutdown:ENETDOWN[TCP]
2398 * soshutdown:ENOMEM[TCP]
2399 * soshutdown:EACCES[TCP]
2400 * soshutdown:EMSGSIZE[TCP]
2401 * soshutdown:ENOBUFS[TCP]
2402 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2403 * soshutdown:??? [other protocol families]
2404 */
1c79356b
A
2405/* ARGSUSED */
2406int
2d21ac55 2407shutdown(__unused struct proc *p, struct shutdown_args *uap,
b0d623f7 2408 __unused int32_t *retval)
1c79356b 2409{
2d21ac55 2410 struct socket *so;
1c79356b
A
2411 int error;
2412
55e303ae 2413 AUDIT_ARG(fd, uap->s);
91447636 2414 error = file_socket(uap->s, &so);
0a7de745
A
2415 if (error) {
2416 return error;
2417 }
91447636
A
2418 if (so == NULL) {
2419 error = EBADF;
2420 goto out;
2421 }
2422 error = soshutdown((struct socket *)so, uap->how);
2423out:
2424 file_drop(uap->s);
0a7de745 2425 return error;
1c79356b
A
2426}
2427
2d21ac55
A
2428/*
2429 * Returns: 0 Success
2430 * EFAULT
2431 * EINVAL
2432 * EACCES Mandatory Access Control failure
2433 * file_socket:ENOTSOCK
2434 * file_socket:EBADF
2435 * sosetopt:EINVAL
2436 * sosetopt:ENOPROTOOPT
2437 * sosetopt:ENOBUFS
2438 * sosetopt:EDOM
2439 * sosetopt:EFAULT
2440 * sosetopt:EOPNOTSUPP[AF_UNIX]
2441 * sosetopt:???
2442 */
1c79356b
A
2443/* ARGSUSED */
2444int
2d21ac55 2445setsockopt(struct proc *p, struct setsockopt_args *uap,
b0d623f7 2446 __unused int32_t *retval)
1c79356b 2447{
2d21ac55 2448 struct socket *so;
1c79356b
A
2449 struct sockopt sopt;
2450 int error;
2451
55e303ae 2452 AUDIT_ARG(fd, uap->s);
0a7de745
A
2453 if (uap->val == 0 && uap->valsize != 0) {
2454 return EFAULT;
2455 }
2d21ac55 2456 /* No bounds checking on size (it's unsigned) */
1c79356b 2457
91447636 2458 error = file_socket(uap->s, &so);
0a7de745
A
2459 if (error) {
2460 return error;
2461 }
1c79356b
A
2462
2463 sopt.sopt_dir = SOPT_SET;
2464 sopt.sopt_level = uap->level;
2465 sopt.sopt_name = uap->name;
2466 sopt.sopt_val = uap->val;
2467 sopt.sopt_valsize = uap->valsize;
2468 sopt.sopt_p = p;
2469
91447636
A
2470 if (so == NULL) {
2471 error = EINVAL;
2472 goto out;
2473 }
2d21ac55
A
2474#if CONFIG_MACF_SOCKET_SUBSET
2475 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
0a7de745 2476 &sopt)) != 0) {
2d21ac55 2477 goto out;
0a7de745 2478 }
2d21ac55 2479#endif /* MAC_SOCKET_SUBSET */
0a7de745 2480 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
91447636
A
2481out:
2482 file_drop(uap->s);
0a7de745 2483 return error;
1c79356b
A
2484}
2485
2486
2487
2d21ac55
A
2488/*
2489 * Returns: 0 Success
2490 * EINVAL
2491 * EBADF
2492 * EACCES Mandatory Access Control failure
2493 * copyin:EFAULT
2494 * copyout:EFAULT
2495 * file_socket:ENOTSOCK
2496 * file_socket:EBADF
2497 * sogetopt:???
2498 */
1c79356b 2499int
2d21ac55 2500getsockopt(struct proc *p, struct getsockopt_args *uap,
b0d623f7 2501 __unused int32_t *retval)
1c79356b 2502{
0a7de745
A
2503 int error;
2504 socklen_t valsize;
2505 struct sockopt sopt;
2d21ac55 2506 struct socket *so;
1c79356b 2507
91447636 2508 error = file_socket(uap->s, &so);
0a7de745
A
2509 if (error) {
2510 return error;
2511 }
1c79356b 2512 if (uap->val) {
2d21ac55 2513 error = copyin(uap->avalsize, (caddr_t)&valsize,
0a7de745
A
2514 sizeof(valsize));
2515 if (error) {
91447636 2516 goto out;
0a7de745 2517 }
2d21ac55
A
2518 /* No bounds checking on size (it's unsigned) */
2519 } else {
1c79356b 2520 valsize = 0;
2d21ac55 2521 }
1c79356b
A
2522 sopt.sopt_dir = SOPT_GET;
2523 sopt.sopt_level = uap->level;
2524 sopt.sopt_name = uap->name;
2525 sopt.sopt_val = uap->val;
2526 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2527 sopt.sopt_p = p;
2528
91447636
A
2529 if (so == NULL) {
2530 error = EBADF;
2531 goto out;
2532 }
2d21ac55
A
2533#if CONFIG_MACF_SOCKET_SUBSET
2534 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
0a7de745 2535 &sopt)) != 0) {
2d21ac55 2536 goto out;
0a7de745 2537 }
2d21ac55 2538#endif /* MAC_SOCKET_SUBSET */
0a7de745 2539 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
1c79356b 2540 if (error == 0) {
f427ee49 2541 valsize = (socklen_t)sopt.sopt_valsize;
2d21ac55 2542 error = copyout((caddr_t)&valsize, uap->avalsize,
0a7de745 2543 sizeof(valsize));
1c79356b 2544 }
91447636
A
2545out:
2546 file_drop(uap->s);
0a7de745 2547 return error;
1c79356b
A
2548}
2549
2550
2551/*
2552 * Get socket name.
2d21ac55
A
2553 *
2554 * Returns: 0 Success
2555 * EBADF
2556 * file_socket:ENOTSOCK
2557 * file_socket:EBADF
2558 * copyin:EFAULT
2559 * copyout:EFAULT
2560 * <pru_sockaddr>:ENOBUFS[TCP]
2561 * <pru_sockaddr>:ECONNRESET[TCP]
2562 * <pru_sockaddr>:EINVAL[AF_UNIX]
2563 * <sf_getsockname>:???
1c79356b
A
2564 */
2565/* ARGSUSED */
2d21ac55
A
2566int
2567getsockname(__unused struct proc *p, struct getsockname_args *uap,
b0d623f7 2568 __unused int32_t *retval)
1c79356b 2569{
91447636 2570 struct socket *so;
1c79356b 2571 struct sockaddr *sa;
91447636 2572 socklen_t len;
2d21ac55 2573 socklen_t sa_len;
1c79356b
A
2574 int error;
2575
91447636 2576 error = file_socket(uap->fdes, &so);
0a7de745
A
2577 if (error) {
2578 return error;
2579 }
2580 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2581 if (error) {
91447636 2582 goto out;
0a7de745 2583 }
91447636
A
2584 if (so == NULL) {
2585 error = EBADF;
2586 goto out;
2587 }
1c79356b 2588 sa = 0;
91447636 2589 socket_lock(so, 1);
1c79356b 2590 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2d21ac55 2591 if (error == 0) {
6d2010ae 2592 error = sflt_getsockname(so, &sa);
0a7de745 2593 if (error == EJUSTRETURN) {
91447636 2594 error = 0;
0a7de745 2595 }
91447636
A
2596 }
2597 socket_unlock(so, 1);
0a7de745 2598 if (error) {
1c79356b 2599 goto bad;
0a7de745 2600 }
1c79356b
A
2601 if (sa == 0) {
2602 len = 0;
2603 goto gotnothing;
2604 }
2605
2d21ac55
A
2606 sa_len = sa->sa_len;
2607 len = MIN(len, sa_len);
91447636 2608 error = copyout((caddr_t)sa, uap->asa, len);
0a7de745 2609 if (error) {
2d21ac55 2610 goto bad;
0a7de745 2611 }
2d21ac55
A
2612 /* return the actual, untruncated address length */
2613 len = sa_len;
1c79356b 2614gotnothing:
0a7de745 2615 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
1c79356b 2616bad:
c3c9b80d 2617 FREE(sa, M_SONAME);
91447636
A
2618out:
2619 file_drop(uap->fdes);
0a7de745 2620 return error;
1c79356b
A
2621}
2622
1c79356b
A
2623/*
2624 * Get name of peer for connected socket.
2d21ac55
A
2625 *
2626 * Returns: 0 Success
2627 * EBADF
2628 * EINVAL
2629 * ENOTCONN
2630 * file_socket:ENOTSOCK
2631 * file_socket:EBADF
2632 * copyin:EFAULT
2633 * copyout:EFAULT
2634 * <pru_peeraddr>:???
2635 * <sf_getpeername>:???
1c79356b
A
2636 */
2637/* ARGSUSED */
2638int
2d21ac55 2639getpeername(__unused struct proc *p, struct getpeername_args *uap,
b0d623f7 2640 __unused int32_t *retval)
1c79356b 2641{
91447636 2642 struct socket *so;
1c79356b 2643 struct sockaddr *sa;
91447636 2644 socklen_t len;
2d21ac55 2645 socklen_t sa_len;
1c79356b
A
2646 int error;
2647
91447636 2648 error = file_socket(uap->fdes, &so);
0a7de745
A
2649 if (error) {
2650 return error;
2651 }
91447636
A
2652 if (so == NULL) {
2653 error = EBADF;
2654 goto out;
2655 }
2656
2657 socket_lock(so, 1);
2658
2d21ac55
A
2659 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2660 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2661 /* the socket has been shutdown, no more getpeername's */
2662 socket_unlock(so, 1);
2663 error = EINVAL;
2664 goto out;
2665 }
2666
0a7de745 2667 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
91447636
A
2668 socket_unlock(so, 1);
2669 error = ENOTCONN;
2670 goto out;
2671 }
0a7de745 2672 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
91447636
A
2673 if (error) {
2674 socket_unlock(so, 1);
2675 goto out;
2676 }
1c79356b
A
2677 sa = 0;
2678 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2d21ac55 2679 if (error == 0) {
6d2010ae 2680 error = sflt_getpeername(so, &sa);
0a7de745 2681 if (error == EJUSTRETURN) {
91447636 2682 error = 0;
0a7de745 2683 }
91447636
A
2684 }
2685 socket_unlock(so, 1);
0a7de745 2686 if (error) {
1c79356b 2687 goto bad;
0a7de745 2688 }
1c79356b
A
2689 if (sa == 0) {
2690 len = 0;
2691 goto gotnothing;
2692 }
2d21ac55
A
2693 sa_len = sa->sa_len;
2694 len = MIN(len, sa_len);
91447636 2695 error = copyout(sa, uap->asa, len);
0a7de745 2696 if (error) {
1c79356b 2697 goto bad;
0a7de745 2698 }
2d21ac55
A
2699 /* return the actual, untruncated address length */
2700 len = sa_len;
1c79356b 2701gotnothing:
0a7de745 2702 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
1c79356b 2703bad:
c3c9b80d 2704 FREE(sa, M_SONAME);
91447636
A
2705out:
2706 file_drop(uap->fdes);
0a7de745 2707 return error;
1c79356b
A
2708}
2709
2710int
f427ee49 2711sockargs(struct mbuf **mp, user_addr_t data, socklen_t buflen, int type)
1c79356b 2712{
2d21ac55
A
2713 struct sockaddr *sa;
2714 struct mbuf *m;
1c79356b 2715 int error;
f427ee49 2716 socklen_t alloc_buflen = buflen;
1c79356b 2717
f427ee49 2718 if (buflen > INT_MAX / 2) {
0a7de745
A
2719 return EINVAL;
2720 }
f427ee49
A
2721 if (type == MT_SONAME && buflen > SOCK_MAXADDRLEN) {
2722 return EINVAL;
2723 }
2724
b0d623f7 2725#ifdef __LP64__
3e170ce0
A
2726 /*
2727 * The fd's in the buffer must expand to be pointers, thus we need twice
2728 * as much space
2729 */
0a7de745
A
2730 if (type == MT_CONTROL) {
2731 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
3e170ce0 2732 sizeof(struct cmsghdr);
0a7de745 2733 }
b0d623f7 2734#endif
e2d2fc5c 2735 if (alloc_buflen > MLEN) {
0a7de745
A
2736 if (type == MT_SONAME && alloc_buflen <= 112) {
2737 alloc_buflen = MLEN; /* unix domain compat. hack */
2738 } else if (alloc_buflen > MCLBYTES) {
2739 return EINVAL;
2740 }
1c79356b
A
2741 }
2742 m = m_get(M_WAIT, type);
0a7de745
A
2743 if (m == NULL) {
2744 return ENOBUFS;
2745 }
e2d2fc5c 2746 if (alloc_buflen > MLEN) {
91447636
A
2747 MCLGET(m, M_WAIT);
2748 if ((m->m_flags & M_EXT) == 0) {
2749 m_free(m);
0a7de745 2750 return ENOBUFS;
91447636
A
2751 }
2752 }
3e170ce0
A
2753 /*
2754 * K64: We still copyin the original buflen because it gets expanded
2755 * later and we lie about the size of the mbuf because it only affects
2756 * unp_* functions
b0d623f7 2757 */
1c79356b 2758 m->m_len = buflen;
91447636 2759 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2d21ac55 2760 if (error) {
1c79356b 2761 (void) m_free(m);
2d21ac55 2762 } else {
1c79356b
A
2763 *mp = m;
2764 if (type == MT_SONAME) {
2765 sa = mtod(m, struct sockaddr *);
f427ee49
A
2766 VERIFY(buflen <= SOCK_MAXADDRLEN);
2767 sa->sa_len = (__uint8_t)buflen;
1c79356b
A
2768 }
2769 }
0a7de745 2770 return error;
1c79356b
A
2771}
2772
91447636
A
2773/*
2774 * Given a user_addr_t of length len, allocate and fill out a *sa.
2d21ac55
A
2775 *
2776 * Returns: 0 Success
2777 * ENAMETOOLONG Filename too long
2778 * EINVAL Invalid argument
2779 * ENOMEM Not enough space
2780 * copyin:EFAULT Bad address
91447636 2781 */
2d21ac55
A
2782static int
2783getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
4a3eedf9 2784 size_t len, boolean_t translate_unspec)
1c79356b
A
2785{
2786 struct sockaddr *sa;
2787 int error;
2788
0a7de745
A
2789 if (len > SOCK_MAXADDRLEN) {
2790 return ENAMETOOLONG;
2791 }
1c79356b 2792
0a7de745
A
2793 if (len < offsetof(struct sockaddr, sa_data[0])) {
2794 return EINVAL;
2795 }
1c79356b 2796
490019cf 2797 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
91447636 2798 if (sa == NULL) {
0a7de745 2799 return ENOMEM;
91447636
A
2800 }
2801 error = copyin(uaddr, (caddr_t)sa, len);
1c79356b
A
2802 if (error) {
2803 FREE(sa, M_SONAME);
2804 } else {
2d21ac55
A
2805 /*
2806 * Force sa_family to AF_INET on AF_INET sockets to handle
2807 * legacy applications that use AF_UNSPEC (0). On all other
2808 * sockets we leave it unchanged and let the lower layer
2809 * handle it.
2810 */
4a3eedf9 2811 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
39236c6e 2812 SOCK_CHECK_DOM(so, PF_INET) &&
0a7de745 2813 len == sizeof(struct sockaddr_in)) {
2d21ac55 2814 sa->sa_family = AF_INET;
0a7de745 2815 }
f427ee49
A
2816 VERIFY(len <= SOCK_MAXADDRLEN);
2817 sa->sa_len = (__uint8_t)len;
1c79356b
A
2818 *namp = sa;
2819 }
0a7de745 2820 return error;
1c79356b
A
2821}
2822
2d21ac55
A
2823static int
2824getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
4a3eedf9 2825 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1c79356b 2826{
2d21ac55
A
2827 int error;
2828
2829 if (ss == NULL || uaddr == USER_ADDR_NULL ||
0a7de745
A
2830 len < offsetof(struct sockaddr, sa_data[0])) {
2831 return EINVAL;
2832 }
2d21ac55
A
2833
2834 /*
2835 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2836 * so the check here is inclusive.
2837 */
0a7de745
A
2838 if (len > sizeof(*ss)) {
2839 return ENAMETOOLONG;
2840 }
1c79356b 2841
0a7de745 2842 bzero(ss, sizeof(*ss));
2d21ac55
A
2843 error = copyin(uaddr, (caddr_t)ss, len);
2844 if (error == 0) {
2845 /*
2846 * Force sa_family to AF_INET on AF_INET sockets to handle
2847 * legacy applications that use AF_UNSPEC (0). On all other
2848 * sockets we leave it unchanged and let the lower layer
2849 * handle it.
2850 */
4a3eedf9 2851 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
39236c6e 2852 SOCK_CHECK_DOM(so, PF_INET) &&
0a7de745 2853 len == sizeof(struct sockaddr_in)) {
2d21ac55 2854 ss->ss_family = AF_INET;
0a7de745 2855 }
91447636 2856
f427ee49 2857 ss->ss_len = (__uint8_t)len;
1c79356b 2858 }
0a7de745 2859 return error;
1c79356b
A
2860}
2861
fe8ab488
A
2862int
2863internalize_user_msghdr_array(const void *src, int spacetype, int direction,
3e170ce0 2864 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
fe8ab488
A
2865{
2866 int error = 0;
2867 u_int i;
3e170ce0
A
2868 u_int namecnt = 0;
2869 u_int ctlcnt = 0;
fe8ab488
A
2870
2871 for (i = 0; i < count; i++) {
2872 uio_t auio;
2873 struct user_iovec *iovp;
3e170ce0 2874 struct user_msghdr_x *user_msg = dst + i;
fe8ab488
A
2875
2876 if (spacetype == UIO_USERSPACE64) {
3e170ce0 2877 const struct user64_msghdr_x *msghdr64;
fe8ab488 2878
3e170ce0 2879 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
fe8ab488 2880
f427ee49 2881 user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
fe8ab488 2882 user_msg->msg_namelen = msghdr64->msg_namelen;
f427ee49 2883 user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
fe8ab488 2884 user_msg->msg_iovlen = msghdr64->msg_iovlen;
f427ee49 2885 user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
fe8ab488
A
2886 user_msg->msg_controllen = msghdr64->msg_controllen;
2887 user_msg->msg_flags = msghdr64->msg_flags;
f427ee49 2888 user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
fe8ab488 2889 } else {
3e170ce0 2890 const struct user32_msghdr_x *msghdr32;
fe8ab488 2891
3e170ce0 2892 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
fe8ab488
A
2893
2894 user_msg->msg_name = msghdr32->msg_name;
2895 user_msg->msg_namelen = msghdr32->msg_namelen;
2896 user_msg->msg_iov = msghdr32->msg_iov;
2897 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2898 user_msg->msg_control = msghdr32->msg_control;
2899 user_msg->msg_controllen = msghdr32->msg_controllen;
2900 user_msg->msg_flags = msghdr32->msg_flags;
2901 user_msg->msg_datalen = msghdr32->msg_datalen;
2902 }
3e170ce0
A
2903
2904 if (user_msg->msg_iovlen <= 0 ||
2905 user_msg->msg_iovlen > UIO_MAXIOV) {
fe8ab488
A
2906 error = EMSGSIZE;
2907 goto done;
2908 }
3e170ce0
A
2909 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2910 direction);
fe8ab488
A
2911 if (auio == NULL) {
2912 error = ENOMEM;
2913 goto done;
2914 }
2915 uiop[i] = auio;
2916
3e170ce0
A
2917 iovp = uio_iovsaddr(auio);
2918 if (iovp == NULL) {
2919 error = ENOMEM;
2920 goto done;
2921 }
2922 error = copyin_user_iovec_array(user_msg->msg_iov,
0a7de745
A
2923 spacetype, user_msg->msg_iovlen, iovp);
2924 if (error) {
3e170ce0 2925 goto done;
0a7de745 2926 }
3e170ce0 2927 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
fe8ab488 2928
3e170ce0 2929 error = uio_calculateresid(auio);
0a7de745 2930 if (error) {
3e170ce0 2931 goto done;
0a7de745 2932 }
3e170ce0
A
2933 user_msg->msg_datalen = uio_resid(auio);
2934
0a7de745 2935 if (user_msg->msg_name && user_msg->msg_namelen) {
3e170ce0 2936 namecnt++;
0a7de745
A
2937 }
2938 if (user_msg->msg_control && user_msg->msg_controllen) {
3e170ce0 2939 ctlcnt++;
0a7de745 2940 }
3e170ce0
A
2941 }
2942done:
2943
0a7de745 2944 return error;
3e170ce0
A
2945}
2946
2947int
2948internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2949 u_int count, struct user_msghdr_x *dst,
2950 struct recv_msg_elem *recv_msg_array)
2951{
2952 int error = 0;
2953 u_int i;
2954
2955 for (i = 0; i < count; i++) {
2956 struct user_iovec *iovp;
2957 struct user_msghdr_x *user_msg = dst + i;
2958 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2959
2960 if (spacetype == UIO_USERSPACE64) {
2961 const struct user64_msghdr_x *msghdr64;
2962
2963 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2964
f427ee49 2965 user_msg->msg_name = (user_addr_t)msghdr64->msg_name;
3e170ce0 2966 user_msg->msg_namelen = msghdr64->msg_namelen;
f427ee49 2967 user_msg->msg_iov = (user_addr_t)msghdr64->msg_iov;
3e170ce0 2968 user_msg->msg_iovlen = msghdr64->msg_iovlen;
f427ee49 2969 user_msg->msg_control = (user_addr_t)msghdr64->msg_control;
3e170ce0
A
2970 user_msg->msg_controllen = msghdr64->msg_controllen;
2971 user_msg->msg_flags = msghdr64->msg_flags;
f427ee49 2972 user_msg->msg_datalen = (size_t)msghdr64->msg_datalen;
fe8ab488 2973 } else {
3e170ce0
A
2974 const struct user32_msghdr_x *msghdr32;
2975
2976 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2977
2978 user_msg->msg_name = msghdr32->msg_name;
2979 user_msg->msg_namelen = msghdr32->msg_namelen;
2980 user_msg->msg_iov = msghdr32->msg_iov;
2981 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2982 user_msg->msg_control = msghdr32->msg_control;
2983 user_msg->msg_controllen = msghdr32->msg_controllen;
2984 user_msg->msg_flags = msghdr32->msg_flags;
2985 user_msg->msg_datalen = msghdr32->msg_datalen;
fe8ab488 2986 }
3e170ce0
A
2987
2988 if (user_msg->msg_iovlen <= 0 ||
2989 user_msg->msg_iovlen > UIO_MAXIOV) {
2990 error = EMSGSIZE;
2991 goto done;
2992 }
2993 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
2994 spacetype, direction);
2995 if (recv_msg_elem->uio == NULL) {
2996 error = ENOMEM;
2997 goto done;
2998 }
2999
3000 iovp = uio_iovsaddr(recv_msg_elem->uio);
3001 if (iovp == NULL) {
3002 error = ENOMEM;
3003 goto done;
3004 }
3005 error = copyin_user_iovec_array(user_msg->msg_iov,
0a7de745
A
3006 spacetype, user_msg->msg_iovlen, iovp);
3007 if (error) {
3e170ce0 3008 goto done;
0a7de745 3009 }
3e170ce0
A
3010 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3011
3012 error = uio_calculateresid(recv_msg_elem->uio);
0a7de745 3013 if (error) {
3e170ce0 3014 goto done;
0a7de745 3015 }
3e170ce0
A
3016 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3017
0a7de745 3018 if (user_msg->msg_name && user_msg->msg_namelen) {
3e170ce0 3019 recv_msg_elem->which |= SOCK_MSG_SA;
0a7de745
A
3020 }
3021 if (user_msg->msg_control && user_msg->msg_controllen) {
3e170ce0 3022 recv_msg_elem->which |= SOCK_MSG_CONTROL;
0a7de745 3023 }
fe8ab488
A
3024 }
3025done:
3e170ce0 3026
0a7de745 3027 return error;
fe8ab488
A
3028}
3029
3030u_int
3031externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3e170ce0 3032 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
fe8ab488
A
3033{
3034#pragma unused(direction)
3035 u_int i;
3036 int seenlast = 0;
3037 u_int retcnt = 0;
3038
3039 for (i = 0; i < count; i++) {
3e170ce0 3040 const struct user_msghdr_x *user_msg = src + i;
fe8ab488
A
3041 uio_t auio = uiop[i];
3042 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3043
0a7de745 3044 if (user_msg->msg_datalen != 0 && len == 0) {
fe8ab488 3045 seenlast = 1;
0a7de745 3046 }
3e170ce0 3047
0a7de745
A
3048 if (seenlast == 0) {
3049 retcnt++;
3050 }
3e170ce0
A
3051
3052 if (spacetype == UIO_USERSPACE64) {
3053 struct user64_msghdr_x *msghdr64;
3054
3055 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3056
3057 msghdr64->msg_flags = user_msg->msg_flags;
3058 msghdr64->msg_datalen = len;
3e170ce0
A
3059 } else {
3060 struct user32_msghdr_x *msghdr32;
3061
3062 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3063
3064 msghdr32->msg_flags = user_msg->msg_flags;
f427ee49 3065 msghdr32->msg_datalen = (user32_size_t)len;
3e170ce0
A
3066 }
3067 }
0a7de745 3068 return retcnt;
3e170ce0
A
3069}
3070
3071u_int
c3c9b80d
A
3072externalize_recv_msghdr_array(struct proc *p, struct socket *so, void *dst,
3073 u_int count, struct user_msghdr_x *src,
3074 struct recv_msg_elem *recv_msg_array, int *ret_error)
3e170ce0
A
3075{
3076 u_int i;
3e170ce0 3077 u_int retcnt = 0;
c3c9b80d
A
3078 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3079
3080 *ret_error = 0;
3e170ce0
A
3081
3082 for (i = 0; i < count; i++) {
c3c9b80d 3083 struct user_msghdr_x *user_msg = src + i;
3e170ce0 3084 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
c3c9b80d
A
3085 user_ssize_t len = 0;
3086 int error;
3e170ce0
A
3087
3088 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3089
c3c9b80d
A
3090 if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
3091 retcnt++;
3092
3093
3094 if (recv_msg_elem->which & SOCK_MSG_SA) {
3095 error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
3096 &user_msg->msg_namelen);
3097 if (error != 0) {
3098 *ret_error = error;
3099 return 0;
3100 }
0a7de745 3101 }
c3c9b80d
A
3102 if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
3103 error = copyout_control(p, recv_msg_elem->controlp,
3104 user_msg->msg_control, &user_msg->msg_controllen,
3105 &recv_msg_elem->flags, so);
3106 if (error != 0) {
3107 *ret_error = error;
3108 return 0;
3109 }
0a7de745 3110 }
3e170ce0
A
3111 }
3112
fe8ab488 3113 if (spacetype == UIO_USERSPACE64) {
c3c9b80d 3114 struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
fe8ab488 3115
c3c9b80d
A
3116 msghdr64->msg_namelen = user_msg->msg_namelen;
3117 msghdr64->msg_controllen = user_msg->msg_controllen;
3118 msghdr64->msg_flags = recv_msg_elem->flags;
fe8ab488 3119 msghdr64->msg_datalen = len;
fe8ab488 3120 } else {
c3c9b80d 3121 struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
fe8ab488 3122
c3c9b80d
A
3123 msghdr32->msg_namelen = user_msg->msg_namelen;
3124 msghdr32->msg_controllen = user_msg->msg_controllen;
3125 msghdr32->msg_flags = recv_msg_elem->flags;
f427ee49 3126 msghdr32->msg_datalen = (user32_size_t)len;
fe8ab488
A
3127 }
3128 }
0a7de745 3129 return retcnt;
fe8ab488
A
3130}
3131
3132void
3133free_uio_array(struct uio **uiop, u_int count)
3134{
3135 u_int i;
3136
3137 for (i = 0; i < count; i++) {
0a7de745 3138 if (uiop[i] != NULL) {
fe8ab488 3139 uio_free(uiop[i]);
0a7de745 3140 }
fe8ab488
A
3141 }
3142}
3143
3144__private_extern__ user_ssize_t
3145uio_array_resid(struct uio **uiop, u_int count)
3146{
3147 user_ssize_t len = 0;
3148 u_int i;
3149
3150 for (i = 0; i < count; i++) {
3151 struct uio *auio = uiop[i];
3152
0a7de745 3153 if (auio != NULL) {
fe8ab488 3154 len += uio_resid(auio);
0a7de745 3155 }
fe8ab488 3156 }
0a7de745 3157 return len;
fe8ab488
A
3158}
3159
f427ee49 3160static boolean_t
fe8ab488
A
3161uio_array_is_valid(struct uio **uiop, u_int count)
3162{
3163 user_ssize_t len = 0;
3164 u_int i;
3165
3166 for (i = 0; i < count; i++) {
3167 struct uio *auio = uiop[i];
3e170ce0 3168
fe8ab488
A
3169 if (auio != NULL) {
3170 user_ssize_t resid = uio_resid(auio);
3e170ce0 3171
fe8ab488
A
3172 /*
3173 * Sanity check on the validity of the iovec:
3174 * no point of going over sb_max
3175 */
f427ee49
A
3176 if (resid < 0 || resid > (user_ssize_t)sb_max) {
3177 return false;
0a7de745 3178 }
3e170ce0
A
3179
3180 len += resid;
f427ee49
A
3181 if (len < 0 || len > (user_ssize_t)sb_max) {
3182 return false;
0a7de745 3183 }
3e170ce0
A
3184 }
3185 }
f427ee49 3186 return true;
3e170ce0
A
3187}
3188
3189
3190struct recv_msg_elem *
3191alloc_recv_msg_array(u_int count)
3192{
c3c9b80d
A
3193 return kheap_alloc(KHEAP_TEMP,
3194 count * sizeof(struct recv_msg_elem), Z_WAITOK | Z_ZERO);
3e170ce0
A
3195}
3196
3197void
3198free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3199{
c3c9b80d
A
3200 if (recv_msg_array == NULL) {
3201 return;
3202 }
3203 for (uint32_t i = 0; i < count; i++) {
3e170ce0
A
3204 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3205
0a7de745 3206 if (recv_msg_elem->uio != NULL) {
3e170ce0 3207 uio_free(recv_msg_elem->uio);
0a7de745 3208 }
c3c9b80d 3209 _FREE(recv_msg_elem->psa, M_TEMP);
0a7de745 3210 if (recv_msg_elem->controlp != NULL) {
3e170ce0 3211 m_freem(recv_msg_elem->controlp);
0a7de745 3212 }
3e170ce0 3213 }
c3c9b80d
A
3214 kheap_free(KHEAP_TEMP, recv_msg_array,
3215 count * sizeof(struct recv_msg_elem));
3e170ce0
A
3216}
3217
3218
3219__private_extern__ user_ssize_t
3220recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3221{
3222 user_ssize_t len = 0;
3223 u_int i;
3224
3225 for (i = 0; i < count; i++) {
3226 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3227
0a7de745 3228 if (recv_msg_elem->uio != NULL) {
3e170ce0 3229 len += uio_resid(recv_msg_elem->uio);
0a7de745 3230 }
3e170ce0 3231 }
0a7de745 3232 return len;
3e170ce0
A
3233}
3234
3235int
3236recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3237{
3238 user_ssize_t len = 0;
3239 u_int i;
3240
3241 for (i = 0; i < count; i++) {
3242 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3243
3244 if (recv_msg_elem->uio != NULL) {
3245 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3246
3247 /*
3248 * Sanity check on the validity of the iovec:
3249 * no point of going over sb_max
3250 */
0a7de745
A
3251 if (resid < 0 || (u_int32_t)resid > sb_max) {
3252 return 0;
3253 }
3e170ce0 3254
fe8ab488 3255 len += resid;
0a7de745
A
3256 if (len < 0 || (u_int32_t)len > sb_max) {
3257 return 0;
3258 }
fe8ab488
A
3259 }
3260 }
0a7de745 3261 return 1;
fe8ab488
A
3262}
3263
39236c6e 3264#if SENDFILE
2d21ac55 3265
0a7de745 3266#define SFUIOBUFS 64
2d21ac55
A
3267
3268/* Macros to compute the number of mbufs needed depending on cluster size */
0a7de745
A
3269#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3270#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
2d21ac55 3271
39236c6e 3272/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
0a7de745 3273#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
2d21ac55
A
3274
3275/* Upper send limit in the number of mbuf clusters */
0a7de745
A
3276#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3277#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
2d21ac55 3278
1c79356b 3279static void
2d21ac55
A
3280alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3281 struct mbuf **m, boolean_t jumbocl)
1c79356b 3282{
2d21ac55 3283 unsigned int needed;
1c79356b 3284
0a7de745 3285 if (pktlen == 0) {
2d21ac55 3286 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
0a7de745 3287 }
1c79356b 3288
2d21ac55
A
3289 /*
3290 * Try to allocate for the whole thing. Since we want full control
3291 * over the buffer size and be able to accept partial result, we can't
3292 * use mbuf_allocpacket(). The logic below is similar to sosend().
3293 */
3294 *m = NULL;
6d2010ae 3295 if (pktlen > MBIGCLBYTES && jumbocl) {
2d21ac55
A
3296 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3297 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3298 }
3299 if (*m == NULL) {
3300 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
6d2010ae 3301 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
2d21ac55
A
3302 }
3303
3304 /*
3305 * Our previous attempt(s) at allocation had failed; the system
3306 * may be short on mbufs, and we want to block until they are
3307 * available. This time, ask just for 1 mbuf and don't return
3308 * until we get it.
3309 */
3310 if (*m == NULL) {
3311 needed = 1;
6d2010ae 3312 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
1c79356b 3313 }
0a7de745 3314 if (*m == NULL) {
2d21ac55 3315 panic("%s: blocking allocation returned NULL\n", __func__);
0a7de745 3316 }
2d21ac55
A
3317
3318 *maxchunks = needed;
1c79356b
A
3319}
3320
3321/*
3322 * sendfile(2).
2d21ac55
A
3323 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3324 * struct sf_hdtr *hdtr, int flags)
1c79356b
A
3325 *
3326 * Send a file specified by 'fd' and starting at 'offset' to a socket
2d21ac55
A
3327 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3328 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3329 * output. If specified, write the total number of bytes sent into *nbytes.
1c79356b
A
3330 */
3331int
2d21ac55 3332sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
1c79356b 3333{
91447636 3334 struct fileproc *fp;
1c79356b 3335 struct vnode *vp;
1c79356b 3336 struct socket *so;
2d21ac55
A
3337 struct writev_nocancel_args nuap;
3338 user_ssize_t writev_retval;
2d21ac55 3339 struct user_sf_hdtr user_hdtr;
b0d623f7
A
3340 struct user32_sf_hdtr user32_hdtr;
3341 struct user64_sf_hdtr user64_hdtr;
2d21ac55
A
3342 off_t off, xfsize;
3343 off_t nbytes = 0, sbytes = 0;
3344 int error = 0;
3345 size_t sizeof_hdtr;
2d21ac55
A
3346 off_t file_size;
3347 struct vfs_context context = *vfs_context_current();
3e170ce0 3348
2d21ac55
A
3349 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3350 0, 0, 0, 0);
b0d623f7
A
3351
3352 AUDIT_ARG(fd, uap->fd);
3353 AUDIT_ARG(value32, uap->s);
3354
1c79356b
A
3355 /*
3356 * Do argument checking. Must be a regular file in, stream
3357 * type and connected socket out, positive offset.
3358 */
2d21ac55 3359 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
1c79356b 3360 goto done;
2d21ac55
A
3361 }
3362 if ((fp->f_flag & FREAD) == 0) {
91447636
A
3363 error = EBADF;
3364 goto done1;
1c79356b 3365 }
2d21ac55
A
3366 if (vnode_isreg(vp) == 0) {
3367 error = ENOTSUP;
91447636 3368 goto done1;
1c79356b 3369 }
91447636 3370 error = file_socket(uap->s, &so);
2d21ac55 3371 if (error) {
91447636 3372 goto done1;
2d21ac55 3373 }
55e303ae
A
3374 if (so == NULL) {
3375 error = EBADF;
91447636 3376 goto done2;
55e303ae 3377 }
1c79356b
A
3378 if (so->so_type != SOCK_STREAM) {
3379 error = EINVAL;
2d21ac55 3380 goto done2;
1c79356b
A
3381 }
3382 if ((so->so_state & SS_ISCONNECTED) == 0) {
3383 error = ENOTCONN;
2d21ac55 3384 goto done2;
1c79356b
A
3385 }
3386 if (uap->offset < 0) {
3387 error = EINVAL;
2d21ac55 3388 goto done2;
1c79356b 3389 }
2d21ac55
A
3390 if (uap->nbytes == USER_ADDR_NULL) {
3391 error = EINVAL;
3392 goto done2;
3393 }
3394 if (uap->flags != 0) {
3395 error = EINVAL;
3396 goto done2;
3397 }
3398
f427ee49 3399 context.vc_ucred = fp->fp_glob->fg_cred;
2d21ac55
A
3400
3401#if CONFIG_MACF_SOCKET_SUBSET
3402 /* JMM - fetch connected sockaddr? */
3403 error = mac_socket_check_send(context.vc_ucred, so, NULL);
0a7de745 3404 if (error) {
2d21ac55 3405 goto done2;
0a7de745 3406 }
2d21ac55
A
3407#endif
3408
3409 /*
3410 * Get number of bytes to send
3411 * Should it applies to size of header and trailer?
2d21ac55 3412 */
cb323159
A
3413 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3414 if (error) {
3415 goto done2;
3416 }
1c79356b
A
3417
3418 /*
3419 * If specified, get the pointer to the sf_hdtr struct for
3420 * any headers/trailers.
3421 */
2d21ac55
A
3422 if (uap->hdtr != USER_ADDR_NULL) {
3423 caddr_t hdtrp;
3424
0a7de745 3425 bzero(&user_hdtr, sizeof(user_hdtr));
2d21ac55 3426 if (IS_64BIT_PROCESS(p)) {
b0d623f7 3427 hdtrp = (caddr_t)&user64_hdtr;
0a7de745 3428 sizeof_hdtr = sizeof(user64_hdtr);
2d21ac55 3429 } else {
b0d623f7 3430 hdtrp = (caddr_t)&user32_hdtr;
0a7de745 3431 sizeof_hdtr = sizeof(user32_hdtr);
2d21ac55
A
3432 }
3433 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
0a7de745 3434 if (error) {
2d21ac55 3435 goto done2;
0a7de745 3436 }
b0d623f7
A
3437 if (IS_64BIT_PROCESS(p)) {
3438 user_hdtr.headers = user64_hdtr.headers;
3439 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3440 user_hdtr.trailers = user64_hdtr.trailers;
3441 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3442 } else {
3443 user_hdtr.headers = user32_hdtr.headers;
3444 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3445 user_hdtr.trailers = user32_hdtr.trailers;
3446 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2d21ac55
A
3447 }
3448
1c79356b
A
3449 /*
3450 * Send any headers. Wimp out and use writev(2).
3451 */
2d21ac55 3452 if (user_hdtr.headers != USER_ADDR_NULL) {
0a7de745 3453 bzero(&nuap, sizeof(struct writev_args));
1c79356b 3454 nuap.fd = uap->s;
2d21ac55
A
3455 nuap.iovp = user_hdtr.headers;
3456 nuap.iovcnt = user_hdtr.hdr_cnt;
3457 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3458 if (error) {
2d21ac55 3459 goto done2;
316670eb 3460 }
2d21ac55 3461 sbytes += writev_retval;
1c79356b
A
3462 }
3463 }
3464
3465 /*
2d21ac55
A
3466 * Get the file size for 2 reasons:
3467 * 1. We don't want to allocate more mbufs than necessary
3468 * 2. We don't want to read past the end of file
1c79356b 3469 */
316670eb 3470 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
2d21ac55 3471 goto done2;
316670eb 3472 }
1c79356b
A
3473
3474 /*
2d21ac55
A
3475 * Simply read file data into a chain of mbufs that used with scatter
3476 * gather reads. We're not (yet?) setup to use zero copy external
3477 * mbufs that point to the file pages.
1c79356b 3478 */
2d21ac55 3479 socket_lock(so, 1);
39236c6e 3480 error = sblock(&so->so_snd, SBL_WAIT);
2d21ac55
A
3481 if (error) {
3482 socket_unlock(so, 1);
3483 goto done2;
3484 }
0a7de745
A
3485 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3486 mbuf_t m0 = NULL, m;
3487 unsigned int nbufs = SFUIOBUFS, i;
3488 uio_t auio;
3489 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3490 size_t uiolen;
3491 user_ssize_t rlen;
3492 off_t pgoff;
3493 size_t pktlen;
2d21ac55 3494 boolean_t jumbocl;
1c79356b 3495
1c79356b 3496 /*
2d21ac55
A
3497 * Calculate the amount to transfer.
3498 * Align to round number of pages.
3499 * Not to exceed send socket buffer,
1c79356b
A
3500 * the EOF, or the passed in nbytes.
3501 */
2d21ac55
A
3502 xfsize = sbspace(&so->so_snd);
3503
3504 if (xfsize <= 0) {
3505 if (so->so_state & SS_CANTSENDMORE) {
3506 error = EPIPE;
3507 goto done3;
3508 } else if ((so->so_state & SS_NBIO)) {
3509 error = EAGAIN;
3510 goto done3;
3511 } else {
3512 xfsize = PAGE_SIZE;
3513 }
3514 }
3515
0a7de745 3516 if (xfsize > SENDFILE_MAX_BYTES) {
2d21ac55 3517 xfsize = SENDFILE_MAX_BYTES;
0a7de745 3518 } else if (xfsize > PAGE_SIZE) {
2d21ac55 3519 xfsize = trunc_page(xfsize);
0a7de745 3520 }
2d21ac55 3521 pgoff = off & PAGE_MASK_64;
0a7de745 3522 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
1c79356b 3523 xfsize = PAGE_SIZE_64 - pgoff;
0a7de745
A
3524 }
3525 if (nbytes && xfsize > (nbytes - sbytes)) {
2d21ac55 3526 xfsize = nbytes - sbytes;
0a7de745
A
3527 }
3528 if (xfsize <= 0) {
2d21ac55 3529 break;
0a7de745
A
3530 }
3531 if (off + xfsize > file_size) {
2d21ac55 3532 xfsize = file_size - off;
0a7de745
A
3533 }
3534 if (xfsize <= 0) {
1c79356b 3535 break;
0a7de745 3536 }
2d21ac55 3537
1c79356b 3538 /*
2d21ac55
A
3539 * Attempt to use larger than system page-size clusters for
3540 * large writes only if there is a jumbo cluster pool and
3541 * if the socket is marked accordingly.
1c79356b 3542 */
2d21ac55
A
3543 jumbocl = sosendjcl && njcl > 0 &&
3544 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3545
3546 socket_unlock(so, 0);
3547 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
fe8ab488 3548 pktlen = mbuf_pkthdr_maxlen(m0);
0a7de745 3549 if (pktlen < (size_t)xfsize) {
2d21ac55 3550 xfsize = pktlen;
0a7de745 3551 }
39236c6e 3552
2d21ac55 3553 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
0a7de745 3554 UIO_READ, &uio_buf[0], sizeof(uio_buf));
2d21ac55 3555 if (auio == NULL) {
316670eb 3556 printf("sendfile failed. nbufs = %d. %s", nbufs,
0a7de745 3557 "File a radar related to rdar://10146739.\n");
2d21ac55
A
3558 mbuf_freem(m0);
3559 error = ENXIO;
3560 socket_lock(so, 0);
3561 goto done3;
1c79356b 3562 }
1c79356b 3563
2d21ac55 3564 for (i = 0, m = m0, uiolen = 0;
b0d623f7 3565 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2d21ac55
A
3566 i++, m = mbuf_next(m)) {
3567 size_t mlen = mbuf_maxlen(m);
3568
0a7de745 3569 if (mlen + uiolen > (size_t)xfsize) {
2d21ac55 3570 mlen = xfsize - uiolen;
0a7de745 3571 }
2d21ac55
A
3572 mbuf_setlen(m, mlen);
3573 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3574 mlen);
3575 uiolen += mlen;
3576 }
3577
0a7de745 3578 if (xfsize != uio_resid(auio)) {
2d21ac55 3579 printf("sendfile: xfsize: %lld != uio_resid(auio): "
0a7de745
A
3580 "%lld\n", xfsize, (long long)uio_resid(auio));
3581 }
2d21ac55
A
3582
3583 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3584 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3585 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3586 error = fo_read(fp, auio, FOF_OFFSET, &context);
3587 socket_lock(so, 0);
3588 if (error != 0) {
3589 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3590 error == EINTR || error == EWOULDBLOCK)) {
3591 error = 0;
3592 } else {
3593 mbuf_freem(m0);
3594 goto done3;
1c79356b 3595 }
1c79356b 3596 }
2d21ac55
A
3597 xfsize -= uio_resid(auio);
3598 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3599 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3600 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3601
3602 if (xfsize == 0) {
3e170ce0 3603 // printf("sendfile: fo_read 0 bytes, EOF\n");
2d21ac55 3604 break;
91447636 3605 }
0a7de745 3606 if (xfsize + off > file_size) {
2d21ac55
A
3607 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3608 "%lld\n", xfsize, off, file_size);
0a7de745 3609 }
2d21ac55
A
3610 for (i = 0, m = m0, rlen = 0;
3611 i < nbufs && m != NULL && rlen < xfsize;
3612 i++, m = mbuf_next(m)) {
3613 size_t mlen = mbuf_maxlen(m);
3614
0a7de745 3615 if (rlen + mlen > (size_t)xfsize) {
2d21ac55 3616 mlen = xfsize - rlen;
0a7de745 3617 }
2d21ac55
A
3618 mbuf_setlen(m, mlen);
3619
3620 rlen += mlen;
3621 }
3622 mbuf_pkthdr_setlen(m0, xfsize);
3623
1c79356b
A
3624retry_space:
3625 /*
3626 * Make sure that the socket is still able to take more data.
3627 * CANTSENDMORE being true usually means that the connection
3628 * was closed. so_error is true when an error was sensed after
3629 * a previous send.
3630 * The state is checked after the page mapping and buffer
3631 * allocation above since those operations may block and make
3632 * any socket checks stale. From this point forward, nothing
3633 * blocks before the pru_send (or more accurately, any blocking
3634 * results in a loop back to here to re-check).
3635 */
3636 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3637 if (so->so_state & SS_CANTSENDMORE) {
3638 error = EPIPE;
3639 } else {
3640 error = so->so_error;
3641 so->so_error = 0;
3642 }
2d21ac55
A
3643 m_freem(m0);
3644 goto done3;
1c79356b
A
3645 }
3646 /*
3647 * Wait for socket space to become available. We do this just
3648 * after checking the connection state above in order to avoid
3649 * a race condition with sbwait().
3650 */
2d21ac55 3651 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
1c79356b 3652 if (so->so_state & SS_NBIO) {
2d21ac55 3653 m_freem(m0);
1c79356b 3654 error = EAGAIN;
2d21ac55 3655 goto done3;
1c79356b 3656 }
2d21ac55
A
3657 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3658 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
1c79356b 3659 error = sbwait(&so->so_snd);
0a7de745 3660 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
2d21ac55 3661 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
1c79356b
A
3662 /*
3663 * An error from sbwait usually indicates that we've
3664 * been interrupted by a signal. If we've sent anything
3665 * then return bytes sent, otherwise return the error.
3666 */
3667 if (error) {
2d21ac55
A
3668 m_freem(m0);
3669 goto done3;
1c79356b
A
3670 }
3671 goto retry_space;
3672 }
39236c6e 3673
6d2010ae 3674 struct mbuf *control = NULL;
2d21ac55
A
3675 {
3676 /*
3677 * Socket filter processing
3678 */
2d21ac55 3679
6d2010ae
A
3680 error = sflt_data_out(so, NULL, &m0, &control, 0);
3681 if (error) {
3682 if (error == EJUSTRETURN) {
3683 error = 0;
3684 continue;
2d21ac55 3685 }
6d2010ae 3686 goto done3;
2d21ac55
A
3687 }
3688 /*
3689 * End Socket filter processing
3690 */
3691 }
3692 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3693 uap->s, 0, 0, 0, 0);
3694 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
6d2010ae 3695 0, control, p);
2d21ac55
A
3696 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3697 uap->s, 0, 0, 0, 0);
1c79356b 3698 if (error) {
2d21ac55 3699 goto done3;
1c79356b
A
3700 }
3701 }
0a7de745 3702 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
1c79356b
A
3703 /*
3704 * Send trailers. Wimp out and use writev(2).
3705 */
2d21ac55
A
3706 if (uap->hdtr != USER_ADDR_NULL &&
3707 user_hdtr.trailers != USER_ADDR_NULL) {
0a7de745 3708 bzero(&nuap, sizeof(struct writev_args));
2d21ac55
A
3709 nuap.fd = uap->s;
3710 nuap.iovp = user_hdtr.trailers;
3711 nuap.iovcnt = user_hdtr.trl_cnt;
3712 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3713 if (error) {
2d21ac55 3714 goto done2;
316670eb 3715 }
2d21ac55 3716 sbytes += writev_retval;
1c79356b 3717 }
91447636
A
3718done2:
3719 file_drop(uap->s);
3720done1:
3721 file_drop(uap->fd);
1c79356b 3722done:
2d21ac55 3723 if (uap->nbytes != USER_ADDR_NULL) {
91447636 3724 /* XXX this appears bogus for some early failure conditions */
0a7de745 3725 copyout(&sbytes, uap->nbytes, sizeof(off_t));
1c79356b 3726 }
2d21ac55
A
3727 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3728 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3729 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
0a7de745 3730 return error;
91447636 3731done3:
0a7de745 3732 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
91447636 3733 goto done2;
1c79356b
A
3734}
3735
2d21ac55
A
3736
3737#endif /* SENDFILE */