]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_syscalls.c
xnu-6153.81.5.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
2d21ac55 33 * Copyright (c) 1998, David Greenman. All rights reserved.
1c79356b
A
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
2d21ac55
A
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
1c79356b
A
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
2d21ac55 77#include <sys/vnode_internal.h>
1c79356b 78#include <sys/malloc.h>
39236c6e 79#include <sys/mcache.h>
1c79356b 80#include <sys/mbuf.h>
fe8ab488 81#include <kern/locks.h>
91447636 82#include <sys/domain.h>
1c79356b 83#include <sys/protosw.h>
91447636 84#include <sys/signalvar.h>
1c79356b
A
85#include <sys/socket.h>
86#include <sys/socketvar.h>
1c79356b 87#include <sys/kernel.h>
91447636 88#include <sys/uio_internal.h>
2d21ac55 89#include <sys/kauth.h>
6d2010ae 90#include <kern/task.h>
39236c6e 91#include <sys/priv.h>
3e170ce0 92#include <sys/sysctl.h>
5c9f4661 93#include <sys/sys_domain.h>
e5568f75 94
b0d623f7 95#include <security/audit/audit.h>
1c79356b
A
96
97#include <sys/kdebug.h>
91447636 98#include <sys/sysproto.h>
2d21ac55
A
99#include <netinet/in.h>
100#include <net/route.h>
101#include <netinet/in_pcb.h>
102
cb323159
A
103#include <os/ptrtools.h>
104
2d21ac55
A
105#if CONFIG_MACF_SOCKET_SUBSET
106#include <security/mac_framework.h>
107#endif /* MAC_SOCKET_SUBSET */
108
0a7de745
A
109#define f_flag f_fglob->fg_flag
110#define f_type f_fglob->fg_ops->fo_type
111#define f_msgcount f_fglob->fg_msgcount
112#define f_cred f_fglob->fg_cred
113#define f_ops f_fglob->fg_ops
114#define f_offset f_fglob->fg_offset
115#define f_data f_fglob->fg_data
116
117#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
118#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
119#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
120#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
121#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
122#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
123#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
124#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
125#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
126#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
127#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
128#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
129#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
130#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
131#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
132#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
2d21ac55 133
3e170ce0 134#if DEBUG || DEVELOPMENT
0a7de745
A
135#define DEBUG_KERNEL_ADDRPERM(_v) (_v)
136#define DBG_PRINTF(...) printf(__VA_ARGS__)
3e170ce0 137#else
0a7de745
A
138#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
139#define DBG_PRINTF(...) do { } while (0)
3e170ce0 140#endif
2d21ac55 141
2d21ac55
A
142/* TODO: should be in header file */
143int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
144
3e170ce0
A
145static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
146 int, int32_t *);
2d21ac55 147static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
b0d623f7 148 int32_t *);
39236c6e 149static int connectit(struct socket *, struct sockaddr *);
2d21ac55 150static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
4a3eedf9 151 size_t, boolean_t);
2d21ac55 152static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
4a3eedf9 153 user_addr_t, size_t, boolean_t);
1c79356b 154#if SENDFILE
2d21ac55
A
155static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
156 boolean_t);
157#endif /* SENDFILE */
39236c6e 158static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
813fb2f6
A
159static int connectitx(struct socket *, struct sockaddr *,
160 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
3e170ce0 161 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
39236c6e
A
162static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
163 int *);
164static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
1c79356b 165
fe8ab488 166static int internalize_user_msghdr_array(const void *, int, int, u_int,
3e170ce0 167 struct user_msghdr_x *, struct uio **);
fe8ab488 168static u_int externalize_user_msghdr_array(void *, int, int, u_int,
3e170ce0 169 const struct user_msghdr_x *, struct uio **);
fe8ab488
A
170
171static void free_uio_array(struct uio **, u_int);
172static int uio_array_is_valid(struct uio **, u_int);
3e170ce0
A
173static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
174static int internalize_recv_msghdr_array(const void *, int, int,
175 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
176static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
177 const struct user_msghdr_x *, struct recv_msg_elem *);
178static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
179static void free_recv_msg_array(struct recv_msg_elem *, u_int);
180
181SYSCTL_DECL(_kern_ipc);
182
183static u_int somaxsendmsgx = 100;
184SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
0a7de745 185 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
3e170ce0
A
186static u_int somaxrecvmsgx = 100;
187SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
0a7de745 188 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
fe8ab488 189
1c79356b
A
190/*
191 * System call interface to the socket abstraction.
192 */
1c79356b 193
39236c6e 194extern const struct fileops socketops;
1c79356b 195
2d21ac55
A
196/*
197 * Returns: 0 Success
198 * EACCES Mandatory Access Control failure
199 * falloc:ENFILE
200 * falloc:EMFILE
201 * falloc:ENOMEM
202 * socreate:EAFNOSUPPORT
203 * socreate:EPROTOTYPE
204 * socreate:EPROTONOSUPPORT
205 * socreate:ENOBUFS
206 * socreate:ENOMEM
2d21ac55
A
207 * socreate:??? [other protocol families, IPSEC]
208 */
1c79356b 209int
39236c6e 210socket(struct proc *p,
0a7de745
A
211 struct socket_args *uap,
212 int32_t *retval)
39236c6e 213{
0a7de745
A
214 return socket_common(p, uap->domain, uap->type, uap->protocol,
215 proc_selfpid(), retval, 0);
39236c6e
A
216}
217
218int
219socket_delegate(struct proc *p,
0a7de745
A
220 struct socket_delegate_args *uap,
221 int32_t *retval)
39236c6e
A
222{
223 return socket_common(p, uap->domain, uap->type, uap->protocol,
0a7de745 224 uap->epid, retval, 1);
39236c6e
A
225}
226
227static int
228socket_common(struct proc *p,
0a7de745
A
229 int domain,
230 int type,
231 int protocol,
232 pid_t epid,
233 int32_t *retval,
234 int delegate)
1c79356b 235{
1c79356b 236 struct socket *so;
91447636 237 struct fileproc *fp;
1c79356b
A
238 int fd, error;
239
39236c6e 240 AUDIT_ARG(socket, domain, type, protocol);
2d21ac55 241#if CONFIG_MACF_SOCKET_SUBSET
39236c6e 242 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
0a7de745
A
243 type, protocol)) != 0) {
244 return error;
245 }
2d21ac55 246#endif /* MAC_SOCKET_SUBSET */
1c79356b 247
39236c6e
A
248 if (delegate) {
249 error = priv_check_cred(kauth_cred_get(),
250 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
0a7de745
A
251 if (error) {
252 return EACCES;
253 }
39236c6e
A
254 }
255
2d21ac55 256 error = falloc(p, &fp, &fd, vfs_context_current());
91447636 257 if (error) {
0a7de745 258 return error;
91447636 259 }
0a7de745 260 fp->f_flag = FREAD | FWRITE;
1c79356b 261 fp->f_ops = &socketops;
91447636 262
0a7de745 263 if (delegate) {
39236c6e 264 error = socreate_delegate(domain, &so, type, protocol, epid);
0a7de745 265 } else {
39236c6e 266 error = socreate(domain, &so, type, protocol);
0a7de745 267 }
39236c6e 268
91447636
A
269 if (error) {
270 fp_free(p, fd, fp);
1c79356b
A
271 } else {
272 fp->f_data = (caddr_t)so;
91447636
A
273
274 proc_fdlock(p);
6601e61a 275 procfdtbl_releasefd(p, fd, NULL);
2d21ac55 276
91447636
A
277 fp_drop(p, fd, fp, 1);
278 proc_fdunlock(p);
279
1c79356b 280 *retval = fd;
3e170ce0
A
281 if (ENTR_SHOULDTRACE) {
282 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
283 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
284 }
1c79356b 285 }
0a7de745 286 return error;
1c79356b
A
287}
288
2d21ac55
A
289/*
290 * Returns: 0 Success
291 * EDESTADDRREQ Destination address required
292 * EBADF Bad file descriptor
293 * EACCES Mandatory Access Control failure
294 * file_socket:ENOTSOCK
295 * file_socket:EBADF
296 * getsockaddr:ENAMETOOLONG Filename too long
297 * getsockaddr:EINVAL Invalid argument
298 * getsockaddr:ENOMEM Not enough space
299 * getsockaddr:EFAULT Bad address
39236c6e 300 * sobindlock:???
2d21ac55 301 */
1c79356b
A
302/* ARGSUSED */
303int
b0d623f7 304bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
1c79356b 305{
2d21ac55
A
306 struct sockaddr_storage ss;
307 struct sockaddr *sa = NULL;
91447636 308 struct socket *so;
2d21ac55 309 boolean_t want_free = TRUE;
1c79356b
A
310 int error;
311
55e303ae 312 AUDIT_ARG(fd, uap->s);
91447636 313 error = file_socket(uap->s, &so);
0a7de745
A
314 if (error != 0) {
315 return error;
316 }
2d21ac55
A
317 if (so == NULL) {
318 error = EBADF;
319 goto out;
320 }
321 if (uap->name == USER_ADDR_NULL) {
322 error = EDESTADDRREQ;
323 goto out;
324 }
0a7de745 325 if (uap->namelen > sizeof(ss)) {
4a3eedf9 326 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
2d21ac55 327 } else {
4a3eedf9 328 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
2d21ac55
A
329 if (error == 0) {
330 sa = (struct sockaddr *)&ss;
331 want_free = FALSE;
332 }
333 }
0a7de745 334 if (error != 0) {
91447636 335 goto out;
0a7de745 336 }
2d21ac55
A
337 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
338#if CONFIG_MACF_SOCKET_SUBSET
5c9f4661 339 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
0a7de745
A
340 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) {
341 error = sobindlock(so, sa, 1); /* will lock socket */
342 }
2d21ac55 343#else
0a7de745 344 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55 345#endif /* MAC_SOCKET_SUBSET */
0a7de745 346 if (want_free) {
2d21ac55 347 FREE(sa, M_SONAME);
0a7de745 348 }
91447636
A
349out:
350 file_drop(uap->s);
0a7de745 351 return error;
1c79356b
A
352}
353
2d21ac55
A
354/*
355 * Returns: 0 Success
356 * EBADF
357 * EACCES Mandatory Access Control failure
358 * file_socket:ENOTSOCK
359 * file_socket:EBADF
360 * solisten:EINVAL
361 * solisten:EOPNOTSUPP
362 * solisten:???
363 */
1c79356b 364int
2d21ac55 365listen(__unused struct proc *p, struct listen_args *uap,
b0d623f7 366 __unused int32_t *retval)
1c79356b 367{
1c79356b 368 int error;
2d21ac55 369 struct socket *so;
1c79356b 370
55e303ae 371 AUDIT_ARG(fd, uap->s);
91447636 372 error = file_socket(uap->s, &so);
0a7de745
A
373 if (error) {
374 return error;
375 }
91447636 376 if (so != NULL)
2d21ac55
A
377#if CONFIG_MACF_SOCKET_SUBSET
378 {
379 error = mac_socket_check_listen(kauth_cred_get(), so);
0a7de745 380 if (error == 0) {
2d21ac55 381 error = solisten(so, uap->backlog);
0a7de745 382 }
2d21ac55
A
383 }
384#else
0a7de745 385 { error = solisten(so, uap->backlog);}
2d21ac55 386#endif /* MAC_SOCKET_SUBSET */
0a7de745 387 else {
91447636 388 error = EBADF;
0a7de745 389 }
2d21ac55 390
91447636 391 file_drop(uap->s);
0a7de745 392 return error;
1c79356b
A
393}
394
2d21ac55
A
395/*
396 * Returns: fp_getfsock:EBADF Bad file descriptor
397 * fp_getfsock:EOPNOTSUPP ...
398 * xlate => :ENOTSOCK Socket operation on non-socket
399 * :EFAULT Bad address on copyin/copyout
400 * :EBADF Bad file descriptor
401 * :EOPNOTSUPP Operation not supported on socket
402 * :EINVAL Invalid argument
403 * :EWOULDBLOCK Operation would block
404 * :ECONNABORTED Connection aborted
405 * :EINTR Interrupted function
406 * :EACCES Mandatory Access Control failure
407 * falloc_locked:ENFILE Too many files open in system
408 * falloc_locked::EMFILE Too many open files
409 * falloc_locked::ENOMEM Not enough space
410 * 0 Success
411 */
1c79356b 412int
2d21ac55 413accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
b0d623f7 414 int32_t *retval)
1c79356b 415{
91447636 416 struct fileproc *fp;
2d21ac55 417 struct sockaddr *sa = NULL;
91447636
A
418 socklen_t namelen;
419 int error;
420 struct socket *head, *so = NULL;
421 lck_mtx_t *mutex_held;
422 int fd = uap->s;
2d21ac55 423 int newfd;
0a7de745 424 short fflag; /* type must match fp->f_flag */
91447636 425 int dosocklock = 0;
1c79356b 426
2d21ac55
A
427 *retval = -1;
428
55e303ae 429 AUDIT_ARG(fd, uap->s);
2d21ac55 430
1c79356b 431 if (uap->name) {
91447636 432 error = copyin(uap->anamelen, (caddr_t)&namelen,
0a7de745
A
433 sizeof(socklen_t));
434 if (error) {
435 return error;
436 }
1c79356b 437 }
91447636
A
438 error = fp_getfsock(p, fd, &fp, &head);
439 if (error) {
0a7de745 440 if (error == EOPNOTSUPP) {
91447636 441 error = ENOTSOCK;
0a7de745
A
442 }
443 return error;
91447636 444 }
55e303ae 445 if (head == NULL) {
91447636
A
446 error = EBADF;
447 goto out;
55e303ae 448 }
2d21ac55 449#if CONFIG_MACF_SOCKET_SUBSET
0a7de745 450 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) {
2d21ac55 451 goto out;
0a7de745 452 }
2d21ac55 453#endif /* MAC_SOCKET_SUBSET */
91447636
A
454
455 socket_lock(head, 1);
456
0a7de745 457 if (head->so_proto->pr_getlock != NULL) {
5ba3f43e 458 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
91447636 459 dosocklock = 1;
2d21ac55 460 } else {
91447636
A
461 mutex_held = head->so_proto->pr_domain->dom_mtx;
462 dosocklock = 0;
463 }
464
1c79356b 465 if ((head->so_options & SO_ACCEPTCONN) == 0) {
2d21ac55
A
466 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
467 error = EOPNOTSUPP;
468 } else {
469 /* POSIX: The socket is not accepting connections */
470 error = EINVAL;
471 }
91447636 472 socket_unlock(head, 1);
91447636 473 goto out;
1c79356b 474 }
813fb2f6 475check_again:
1c79356b 476 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
91447636
A
477 socket_unlock(head, 1);
478 error = EWOULDBLOCK;
479 goto out;
1c79356b 480 }
2d21ac55 481 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
1c79356b
A
482 if (head->so_state & SS_CANTRCVMORE) {
483 head->so_error = ECONNABORTED;
484 break;
485 }
0a7de745 486 if (head->so_usecount < 1) {
2d21ac55
A
487 panic("accept: head=%p refcount=%d\n", head,
488 head->so_usecount);
0a7de745 489 }
2d21ac55
A
490 error = msleep((caddr_t)&head->so_timeo, mutex_held,
491 PSOCK | PCATCH, "accept", 0);
0a7de745 492 if (head->so_usecount < 1) {
2d21ac55
A
493 panic("accept: 2 head=%p refcount=%d\n", head,
494 head->so_usecount);
0a7de745 495 }
91447636
A
496 if ((head->so_state & SS_DRAINING)) {
497 error = ECONNABORTED;
498 }
1c79356b 499 if (error) {
91447636
A
500 socket_unlock(head, 1);
501 goto out;
1c79356b
A
502 }
503 }
504 if (head->so_error) {
505 error = head->so_error;
506 head->so_error = 0;
91447636
A
507 socket_unlock(head, 1);
508 goto out;
1c79356b
A
509 }
510
1c79356b
A
511 /*
512 * At this point we know that there is at least one connection
513 * ready to be accepted. Remove it from the queue prior to
514 * allocating the file descriptor for it since falloc() may
515 * block allowing another process to accept the connection
516 * instead.
517 */
91447636 518 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
813fb2f6
A
519
520 so_acquire_accept_list(head, NULL);
521 if (TAILQ_EMPTY(&head->so_comp)) {
522 so_release_accept_list(head);
523 goto check_again;
524 }
525
e3027f41 526 so = TAILQ_FIRST(&head->so_comp);
1c79356b 527 TAILQ_REMOVE(&head->so_comp, so, so_list);
d190cdc3
A
528 so->so_head = NULL;
529 so->so_state &= ~SS_COMP;
1c79356b 530 head->so_qlen--;
813fb2f6
A
531 so_release_accept_list(head);
532
2d21ac55
A
533 /* unlock head to avoid deadlock with select, keep a ref on head */
534 socket_unlock(head, 0);
535
536#if CONFIG_MACF_SOCKET_SUBSET
537 /*
538 * Pass the pre-accepted socket to the MAC framework. This is
539 * cheaper than allocating a file descriptor for the socket,
540 * calling the protocol accept callback, and possibly freeing
541 * the file descriptor should the MAC check fails.
542 */
543 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
39236c6e 544 socket_lock(so, 1);
d190cdc3 545 so->so_state &= ~SS_NOFDREF;
39236c6e 546 socket_unlock(so, 1);
2d21ac55
A
547 soclose(so);
548 /* Drop reference on listening socket */
549 sodereference(head);
550 goto out;
551 }
552#endif /* MAC_SOCKET_SUBSET */
553
554 /*
555 * Pass the pre-accepted socket to any interested socket filter(s).
556 * Upon failure, the socket would have been closed by the callee.
557 */
d190cdc3 558 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
2d21ac55
A
559 /* Drop reference on listening socket */
560 sodereference(head);
561 /* Propagate socket filter's error code to the caller */
562 goto out;
563 }
564
1c79356b 565 fflag = fp->f_flag;
2d21ac55 566 error = falloc(p, &fp, &newfd, vfs_context_current());
1c79356b 567 if (error) {
39236c6e 568 /*
316670eb
A
569 * Probably ran out of file descriptors.
570 *
571 * <rdar://problem/8554930>
572 * Don't put this back on the socket like we used to, that
573 * just causes the client to spin. Drop the socket.
1c79356b 574 */
39236c6e 575 socket_lock(so, 1);
d190cdc3 576 so->so_state &= ~SS_NOFDREF;
39236c6e 577 socket_unlock(so, 1);
316670eb
A
578 soclose(so);
579 sodereference(head);
91447636 580 goto out;
2d21ac55 581 }
91447636 582 *retval = newfd;
1c79356b
A
583 fp->f_flag = fflag;
584 fp->f_ops = &socketops;
585 fp->f_data = (caddr_t)so;
fe8ab488 586
91447636 587 socket_lock(head, 0);
0a7de745 588 if (dosocklock) {
91447636 589 socket_lock(so, 1);
0a7de745 590 }
fe8ab488 591
fe8ab488
A
592 /* Sync socket non-blocking/async state with file flags */
593 if (fp->f_flag & FNONBLOCK) {
594 so->so_state |= SS_NBIO;
595 } else {
596 so->so_state &= ~SS_NBIO;
597 }
598
599 if (fp->f_flag & FASYNC) {
600 so->so_state |= SS_ASYNC;
601 so->so_rcv.sb_flags |= SB_ASYNC;
602 so->so_snd.sb_flags |= SB_ASYNC;
603 } else {
604 so->so_state &= ~SS_ASYNC;
605 so->so_rcv.sb_flags &= ~SB_ASYNC;
606 so->so_snd.sb_flags &= ~SB_ASYNC;
607 }
608
91447636
A
609 (void) soacceptlock(so, &sa, 0);
610 socket_unlock(head, 1);
2d21ac55 611 if (sa == NULL) {
1c79356b 612 namelen = 0;
0a7de745 613 if (uap->name) {
1c79356b 614 goto gotnoname;
0a7de745 615 }
91447636 616 error = 0;
2d21ac55 617 goto releasefd;
1c79356b 618 }
2d21ac55
A
619 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
620
1c79356b 621 if (uap->name) {
0a7de745 622 socklen_t sa_len;
2d21ac55
A
623
624 /* save sa_len before it is destroyed */
625 sa_len = sa->sa_len;
626 namelen = MIN(namelen, sa_len);
91447636 627 error = copyout(sa, uap->name, namelen);
0a7de745 628 if (!error) {
2d21ac55
A
629 /* return the actual, untruncated address length */
630 namelen = sa_len;
0a7de745 631 }
1c79356b 632gotnoname:
2d21ac55 633 error = copyout((caddr_t)&namelen, uap->anamelen,
0a7de745 634 sizeof(socklen_t));
1c79356b
A
635 }
636 FREE(sa, M_SONAME);
2d21ac55 637
b0d623f7 638releasefd:
2d21ac55 639 /*
6d2010ae
A
640 * If the socket has been marked as inactive by sosetdefunct(),
641 * disallow further operations on it.
2d21ac55
A
642 */
643 if (so->so_flags & SOF_DEFUNCT) {
6d2010ae
A
644 sodefunct(current_proc(), so,
645 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
2d21ac55
A
646 }
647
0a7de745 648 if (dosocklock) {
91447636 649 socket_unlock(so, 1);
0a7de745 650 }
2d21ac55 651
2d21ac55
A
652 proc_fdlock(p);
653 procfdtbl_releasefd(p, newfd, NULL);
654 fp_drop(p, newfd, fp, 1);
655 proc_fdunlock(p);
656
91447636
A
657out:
658 file_drop(fd);
3e170ce0
A
659
660 if (error == 0 && ENTR_SHOULDTRACE) {
661 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
662 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
663 }
0a7de745 664 return error;
1c79356b
A
665}
666
667int
b0d623f7 668accept(struct proc *p, struct accept_args *uap, int32_t *retval)
1c79356b 669{
2d21ac55 670 __pthread_testcancel(1);
0a7de745
A
671 return accept_nocancel(p, (struct accept_nocancel_args *)uap,
672 retval);
1c79356b
A
673}
674
2d21ac55
A
675/*
676 * Returns: 0 Success
677 * EBADF Bad file descriptor
678 * EALREADY Connection already in progress
679 * EINPROGRESS Operation in progress
680 * ECONNABORTED Connection aborted
681 * EINTR Interrupted function
682 * EACCES Mandatory Access Control failure
683 * file_socket:ENOTSOCK
684 * file_socket:EBADF
685 * getsockaddr:ENAMETOOLONG Filename too long
686 * getsockaddr:EINVAL Invalid argument
687 * getsockaddr:ENOMEM Not enough space
688 * getsockaddr:EFAULT Bad address
689 * soconnectlock:EOPNOTSUPP
690 * soconnectlock:EISCONN
691 * soconnectlock:??? [depends on protocol, filters]
692 * msleep:EINTR
693 *
694 * Imputed: so_error error may be set from so_error, which
695 * may have been set by soconnectlock.
696 */
697/* ARGSUSED */
1c79356b 698int
b0d623f7 699connect(struct proc *p, struct connect_args *uap, int32_t *retval)
1c79356b 700{
2d21ac55 701 __pthread_testcancel(1);
0a7de745
A
702 return connect_nocancel(p, (struct connect_nocancel_args *)uap,
703 retval);
1c79356b 704}
1c79356b 705
1c79356b 706int
39236c6e 707connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
1c79356b 708{
39236c6e 709#pragma unused(p, retval)
91447636 710 struct socket *so;
2d21ac55
A
711 struct sockaddr_storage ss;
712 struct sockaddr *sa = NULL;
91447636
A
713 int error;
714 int fd = uap->s;
4a3eedf9 715 boolean_t dgram;
1c79356b 716
55e303ae 717 AUDIT_ARG(fd, uap->s);
2d21ac55 718 error = file_socket(fd, &so);
0a7de745
A
719 if (error != 0) {
720 return error;
721 }
91447636
A
722 if (so == NULL) {
723 error = EBADF;
724 goto out;
725 }
726
4a3eedf9
A
727 /*
728 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
729 * if this is a datagram socket; translate for other types.
730 */
731 dgram = (so->so_type == SOCK_DGRAM);
732
2d21ac55 733 /* Get socket address now before we obtain socket lock */
0a7de745 734 if (uap->namelen > sizeof(ss)) {
4a3eedf9 735 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
2d21ac55 736 } else {
4a3eedf9 737 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
0a7de745 738 if (error == 0) {
2d21ac55 739 sa = (struct sockaddr *)&ss;
0a7de745 740 }
2d21ac55 741 }
0a7de745 742 if (error != 0) {
2d21ac55 743 goto out;
0a7de745 744 }
2d21ac55 745
39236c6e
A
746 error = connectit(so, sa);
747
0a7de745 748 if (sa != NULL && sa != SA(&ss)) {
39236c6e 749 FREE(sa, M_SONAME);
0a7de745
A
750 }
751 if (error == ERESTART) {
39236c6e 752 error = EINTR;
0a7de745 753 }
39236c6e
A
754out:
755 file_drop(fd);
0a7de745 756 return error;
39236c6e
A
757}
758
759static int
760connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
761{
762#pragma unused(p, retval)
813fb2f6
A
763 struct sockaddr_storage ss, sd;
764 struct sockaddr *src = NULL, *dst = NULL;
39236c6e 765 struct socket *so;
3e170ce0 766 int error, error1, fd = uap->socket;
39236c6e 767 boolean_t dgram;
3e170ce0
A
768 sae_connid_t cid = SAE_CONNID_ANY;
769 struct user32_sa_endpoints ep32;
770 struct user64_sa_endpoints ep64;
771 struct user_sa_endpoints ep;
772 user_ssize_t bytes_written = 0;
773 struct user_iovec *iovp;
774 uio_t auio = NULL;
39236c6e 775
3e170ce0 776 AUDIT_ARG(fd, uap->socket);
39236c6e 777 error = file_socket(fd, &so);
0a7de745
A
778 if (error != 0) {
779 return error;
780 }
39236c6e
A
781 if (so == NULL) {
782 error = EBADF;
783 goto out;
784 }
785
3e170ce0
A
786 if (uap->endpoints == USER_ADDR_NULL) {
787 error = EINVAL;
788 goto out;
789 }
790
791 if (IS_64BIT_PROCESS(p)) {
792 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
0a7de745 793 if (error != 0) {
3e170ce0 794 goto out;
0a7de745 795 }
3e170ce0
A
796
797 ep.sae_srcif = ep64.sae_srcif;
798 ep.sae_srcaddr = ep64.sae_srcaddr;
799 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
800 ep.sae_dstaddr = ep64.sae_dstaddr;
801 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
802 } else {
803 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
0a7de745 804 if (error != 0) {
3e170ce0 805 goto out;
0a7de745 806 }
3e170ce0
A
807
808 ep.sae_srcif = ep32.sae_srcif;
809 ep.sae_srcaddr = ep32.sae_srcaddr;
810 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
811 ep.sae_dstaddr = ep32.sae_dstaddr;
812 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
813 }
fe8ab488 814
39236c6e
A
815 /*
816 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
817 * if this is a datagram socket; translate for other types.
818 */
819 dgram = (so->so_type == SOCK_DGRAM);
820
813fb2f6
A
821 /* Get socket address now before we obtain socket lock */
822 if (ep.sae_srcaddr != USER_ADDR_NULL) {
0a7de745 823 if (ep.sae_srcaddrlen > sizeof(ss)) {
813fb2f6
A
824 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
825 } else {
826 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
0a7de745 827 if (error == 0) {
813fb2f6 828 src = (struct sockaddr *)&ss;
0a7de745 829 }
813fb2f6
A
830 }
831
0a7de745 832 if (error) {
813fb2f6 833 goto out;
0a7de745 834 }
813fb2f6 835 }
39236c6e 836
3e170ce0
A
837 if (ep.sae_dstaddr == USER_ADDR_NULL) {
838 error = EINVAL;
839 goto out;
840 }
841
813fb2f6 842 /* Get socket address now before we obtain socket lock */
0a7de745 843 if (ep.sae_dstaddrlen > sizeof(sd)) {
813fb2f6
A
844 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
845 } else {
846 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
0a7de745 847 if (error == 0) {
813fb2f6 848 dst = (struct sockaddr *)&sd;
0a7de745 849 }
813fb2f6
A
850 }
851
0a7de745 852 if (error) {
39236c6e 853 goto out;
0a7de745 854 }
39236c6e 855
813fb2f6 856 VERIFY(dst != NULL);
39236c6e 857
3e170ce0
A
858 if (uap->iov != USER_ADDR_NULL) {
859 /* Verify range before calling uio_create() */
0a7de745 860 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) {
cc8bc92a
A
861 error = EINVAL;
862 goto out;
863 }
3e170ce0 864
0a7de745 865 if (uap->len == USER_ADDR_NULL) {
cc8bc92a
A
866 error = EINVAL;
867 goto out;
868 }
3e170ce0
A
869
870 /* allocate a uio to hold the number of iovecs passed */
871 auio = uio_create(uap->iovcnt, 0,
872 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
873 UIO_WRITE);
874
875 if (auio == NULL) {
876 error = ENOMEM;
877 goto out;
878 }
879
880 /*
881 * get location of iovecs within the uio.
882 * then copyin the iovecs from user space.
883 */
884 iovp = uio_iovsaddr(auio);
885 if (iovp == NULL) {
886 error = ENOMEM;
887 goto out;
888 }
889 error = copyin_user_iovec_array(uap->iov,
0a7de745
A
890 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
891 uap->iovcnt, iovp);
892 if (error != 0) {
3e170ce0 893 goto out;
0a7de745 894 }
3e170ce0
A
895
896 /* finish setup of uio_t */
897 error = uio_calculateresid(auio);
898 if (error != 0) {
899 goto out;
900 }
901 }
902
813fb2f6 903 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
3e170ce0 904 &cid, auio, uap->flags, &bytes_written);
0a7de745 905 if (error == ERESTART) {
39236c6e 906 error = EINTR;
0a7de745 907 }
39236c6e 908
3e170ce0 909 if (uap->len != USER_ADDR_NULL) {
0a7de745 910 error1 = copyout(&bytes_written, uap->len, sizeof(uap->len));
3e170ce0 911 /* give precedence to connectitx errors */
0a7de745 912 if ((error1 != 0) && (error == 0)) {
3e170ce0 913 error = error1;
0a7de745 914 }
3e170ce0 915 }
39236c6e 916
3e170ce0 917 if (uap->connid != USER_ADDR_NULL) {
0a7de745 918 error1 = copyout(&cid, uap->connid, sizeof(cid));
3e170ce0 919 /* give precedence to connectitx errors */
0a7de745 920 if ((error1 != 0) && (error == 0)) {
3e170ce0 921 error = error1;
0a7de745 922 }
3e170ce0 923 }
39236c6e
A
924out:
925 file_drop(fd);
3e170ce0
A
926 if (auio != NULL) {
927 uio_free(auio);
928 }
0a7de745 929 if (src != NULL && src != SA(&ss)) {
813fb2f6 930 FREE(src, M_SONAME);
0a7de745
A
931 }
932 if (dst != NULL && dst != SA(&sd)) {
813fb2f6 933 FREE(dst, M_SONAME);
0a7de745
A
934 }
935 return error;
39236c6e
A
936}
937
938int
939connectx(struct proc *p, struct connectx_args *uap, int *retval)
940{
941 /*
942 * Due to similiarity with a POSIX interface, define as
943 * an unofficial cancellation point.
944 */
945 __pthread_testcancel(1);
0a7de745 946 return connectx_nocancel(p, uap, retval);
39236c6e
A
947}
948
949static int
950connectit(struct socket *so, struct sockaddr *sa)
951{
952 int error;
953
2d21ac55
A
954 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
955#if CONFIG_MACF_SOCKET_SUBSET
0a7de745
A
956 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
957 return error;
958 }
39236c6e
A
959#endif /* MAC_SOCKET_SUBSET */
960
961 socket_lock(so, 1);
962 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
963 error = EALREADY;
964 goto out;
965 }
966 error = soconnectlock(so, sa, 0);
967 if (error != 0) {
2d21ac55
A
968 goto out;
969 }
39236c6e
A
970 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
971 error = EINPROGRESS;
972 goto out;
973 }
974 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
975 lck_mtx_t *mutex_held;
976
0a7de745 977 if (so->so_proto->pr_getlock != NULL) {
5ba3f43e 978 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
0a7de745 979 } else {
39236c6e 980 mutex_held = so->so_proto->pr_domain->dom_mtx;
0a7de745 981 }
39236c6e
A
982 error = msleep((caddr_t)&so->so_timeo, mutex_held,
983 PSOCK | PCATCH, __func__, 0);
984 if (so->so_state & SS_DRAINING) {
985 error = ECONNABORTED;
986 }
0a7de745 987 if (error != 0) {
39236c6e 988 break;
0a7de745 989 }
39236c6e
A
990 }
991 if (error == 0) {
992 error = so->so_error;
993 so->so_error = 0;
994 }
995out:
996 socket_unlock(so, 1);
0a7de745 997 return error;
39236c6e
A
998}
999
1000static int
813fb2f6
A
1001connectitx(struct socket *so, struct sockaddr *src,
1002 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3e170ce0
A
1003 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
1004 user_ssize_t *bytes_written)
39236c6e 1005{
39236c6e 1006 int error;
3e170ce0 1007#pragma unused (flags)
39236c6e 1008
813fb2f6 1009 VERIFY(dst != NULL);
39236c6e 1010
813fb2f6 1011 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
39236c6e 1012#if CONFIG_MACF_SOCKET_SUBSET
0a7de745
A
1013 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) {
1014 return error;
1015 }
cb323159
A
1016
1017 if (auio != NULL) {
1018 if ((error = mac_socket_check_send(kauth_cred_get(), so, dst)) != 0) {
1019 return error;
1020 }
1021 }
2d21ac55 1022#endif /* MAC_SOCKET_SUBSET */
91447636 1023
39236c6e 1024 socket_lock(so, 1);
91447636 1025 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
1026 error = EALREADY;
1027 goto out;
1028 }
3e170ce0
A
1029
1030 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
813fb2f6 1031 (flags & CONNECT_DATA_IDEMPOTENT)) {
3e170ce0
A
1032 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
1033
0a7de745 1034 if (flags & CONNECT_DATA_AUTHENTICATED) {
5ba3f43e 1035 so->so_flags1 |= SOF1_DATA_AUTHENTICATED;
0a7de745 1036 }
813fb2f6
A
1037 }
1038
3e170ce0
A
1039 /*
1040 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
1041 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
1042 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
1043 * Case 3 allows user to combine write with connect even if they have
1044 * no use for TFO (such as regular TCP, and UDP).
1045 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
1046 */
1047 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
0a7de745 1048 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio)) {
3e170ce0 1049 so->so_flags1 |= SOF1_PRECONNECT_DATA;
0a7de745 1050 }
3e170ce0
A
1051
1052 /*
1053 * If a user sets data idempotent and does not pass an uio, or
1054 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1055 * SOF1_DATA_IDEMPOTENT.
1056 */
1057 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1058 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1059 /* We should return EINVAL instead perhaps. */
1060 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1061 }
1062
813fb2f6 1063 error = soconnectxlocked(so, src, dst, p, ifscope,
3e170ce0 1064 aid, pcid, 0, NULL, 0, auio, bytes_written);
39236c6e 1065 if (error != 0) {
39236c6e
A
1066 goto out;
1067 }
3e170ce0
A
1068 /*
1069 * If, after the call to soconnectxlocked the flag is still set (in case
1070 * data has been queued and the connect() has actually been triggered,
1071 * it will have been unset by the transport), we exit immediately. There
1072 * is no reason to wait on any event.
1073 */
1074 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1075 error = 0;
1076 goto out;
1077 }
1c79356b 1078 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
1079 error = EINPROGRESS;
1080 goto out;
1c79356b 1081 }
1c79356b 1082 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
39236c6e
A
1083 lck_mtx_t *mutex_held;
1084
0a7de745 1085 if (so->so_proto->pr_getlock != NULL) {
5ba3f43e 1086 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
0a7de745 1087 } else {
91447636 1088 mutex_held = so->so_proto->pr_domain->dom_mtx;
0a7de745 1089 }
2d21ac55 1090 error = msleep((caddr_t)&so->so_timeo, mutex_held,
39236c6e
A
1091 PSOCK | PCATCH, __func__, 0);
1092 if (so->so_state & SS_DRAINING) {
91447636
A
1093 error = ECONNABORTED;
1094 }
0a7de745 1095 if (error != 0) {
1c79356b 1096 break;
0a7de745 1097 }
1c79356b
A
1098 }
1099 if (error == 0) {
1100 error = so->so_error;
1101 so->so_error = 0;
1102 }
39236c6e 1103out:
91447636 1104 socket_unlock(so, 1);
0a7de745 1105 return error;
39236c6e
A
1106}
1107
1108int
1109peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1110{
5ba3f43e 1111#pragma unused(p, uap, retval)
39236c6e
A
1112 /*
1113 * Due to similiarity with a POSIX interface, define as
1114 * an unofficial cancellation point.
1115 */
1116 __pthread_testcancel(1);
0a7de745 1117 return 0;
39236c6e
A
1118}
1119
1120int
1121disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1122{
1123 /*
1124 * Due to similiarity with a POSIX interface, define as
1125 * an unofficial cancellation point.
1126 */
1127 __pthread_testcancel(1);
0a7de745 1128 return disconnectx_nocancel(p, uap, retval);
39236c6e
A
1129}
1130
1131static int
1132disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1133{
1134#pragma unused(p, retval)
1135 struct socket *so;
1136 int fd = uap->s;
1137 int error;
1138
1139 error = file_socket(fd, &so);
0a7de745
A
1140 if (error != 0) {
1141 return error;
1142 }
39236c6e
A
1143 if (so == NULL) {
1144 error = EBADF;
1145 goto out;
1146 }
1147
1148 error = sodisconnectx(so, uap->aid, uap->cid);
91447636
A
1149out:
1150 file_drop(fd);
0a7de745 1151 return error;
1c79356b
A
1152}
1153
2d21ac55
A
1154/*
1155 * Returns: 0 Success
1156 * socreate:EAFNOSUPPORT
1157 * socreate:EPROTOTYPE
1158 * socreate:EPROTONOSUPPORT
1159 * socreate:ENOBUFS
1160 * socreate:ENOMEM
1161 * socreate:EISCONN
1162 * socreate:??? [other protocol families, IPSEC]
1163 * falloc:ENFILE
1164 * falloc:EMFILE
1165 * falloc:ENOMEM
1166 * copyout:EFAULT
1167 * soconnect2:EINVAL
1168 * soconnect2:EPROTOTYPE
1169 * soconnect2:??? [other protocol families[
1170 */
1c79356b 1171int
2d21ac55 1172socketpair(struct proc *p, struct socketpair_args *uap,
b0d623f7 1173 __unused int32_t *retval)
1c79356b 1174{
91447636 1175 struct fileproc *fp1, *fp2;
1c79356b
A
1176 struct socket *so1, *so2;
1177 int fd, error, sv[2];
1178
55e303ae 1179 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1c79356b 1180 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
0a7de745
A
1181 if (error) {
1182 return error;
1183 }
1c79356b 1184 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
0a7de745 1185 if (error) {
1c79356b 1186 goto free1;
0a7de745 1187 }
91447636 1188
2d21ac55 1189 error = falloc(p, &fp1, &fd, vfs_context_current());
91447636 1190 if (error) {
1c79356b 1191 goto free2;
91447636 1192 }
0a7de745 1193 fp1->f_flag = FREAD | FWRITE;
1c79356b
A
1194 fp1->f_ops = &socketops;
1195 fp1->f_data = (caddr_t)so1;
91447636
A
1196 sv[0] = fd;
1197
2d21ac55 1198 error = falloc(p, &fp2, &fd, vfs_context_current());
91447636 1199 if (error) {
1c79356b 1200 goto free3;
91447636 1201 }
0a7de745 1202 fp2->f_flag = FREAD | FWRITE;
1c79356b
A
1203 fp2->f_ops = &socketops;
1204 fp2->f_data = (caddr_t)so2;
1205 sv[1] = fd;
91447636 1206
1c79356b
A
1207 error = soconnect2(so1, so2);
1208 if (error) {
1c79356b
A
1209 goto free4;
1210 }
1c79356b
A
1211 if (uap->type == SOCK_DGRAM) {
1212 /*
1213 * Datagram socket connection is asymmetric.
1214 */
2d21ac55
A
1215 error = soconnect2(so2, so1);
1216 if (error) {
1217 goto free4;
1218 }
1c79356b 1219 }
91447636 1220
0a7de745 1221 if ((error = copyout(sv, uap->rsv, 2 * sizeof(int))) != 0) {
6d2010ae 1222 goto free4;
0a7de745 1223 }
6d2010ae 1224
91447636 1225 proc_fdlock(p);
6601e61a
A
1226 procfdtbl_releasefd(p, sv[0], NULL);
1227 procfdtbl_releasefd(p, sv[1], NULL);
91447636
A
1228 fp_drop(p, sv[0], fp1, 1);
1229 fp_drop(p, sv[1], fp2, 1);
1230 proc_fdunlock(p);
1231
0a7de745 1232 return 0;
1c79356b 1233free4:
91447636 1234 fp_free(p, sv[1], fp2);
1c79356b 1235free3:
91447636 1236 fp_free(p, sv[0], fp1);
1c79356b 1237free2:
2d21ac55 1238 (void) soclose(so2);
1c79356b 1239free1:
2d21ac55 1240 (void) soclose(so1);
0a7de745 1241 return error;
1c79356b
A
1242}
1243
2d21ac55
A
1244/*
1245 * Returns: 0 Success
1246 * EINVAL
1247 * ENOBUFS
1248 * EBADF
1249 * EPIPE
1250 * EACCES Mandatory Access Control failure
1251 * file_socket:ENOTSOCK
1252 * file_socket:EBADF
1253 * getsockaddr:ENAMETOOLONG Filename too long
1254 * getsockaddr:EINVAL Invalid argument
1255 * getsockaddr:ENOMEM Not enough space
1256 * getsockaddr:EFAULT Bad address
1257 * <pru_sosend>:EACCES[TCP]
1258 * <pru_sosend>:EADDRINUSE[TCP]
1259 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1260 * <pru_sosend>:EAFNOSUPPORT[TCP]
1261 * <pru_sosend>:EAGAIN[TCP]
1262 * <pru_sosend>:EBADF
1263 * <pru_sosend>:ECONNRESET[TCP]
1264 * <pru_sosend>:EFAULT
1265 * <pru_sosend>:EHOSTUNREACH[TCP]
1266 * <pru_sosend>:EINTR
1267 * <pru_sosend>:EINVAL
1268 * <pru_sosend>:EISCONN[AF_INET]
1269 * <pru_sosend>:EMSGSIZE[TCP]
1270 * <pru_sosend>:ENETDOWN[TCP]
1271 * <pru_sosend>:ENETUNREACH[TCP]
1272 * <pru_sosend>:ENOBUFS
1273 * <pru_sosend>:ENOMEM[TCP]
1274 * <pru_sosend>:ENOTCONN[AF_INET]
1275 * <pru_sosend>:EOPNOTSUPP
1276 * <pru_sosend>:EPERM[TCP]
1277 * <pru_sosend>:EPIPE
1278 * <pru_sosend>:EWOULDBLOCK
1279 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1280 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1281 * <pru_sosend>:??? [value from so_error]
1282 * sockargs:???
1283 */
1c79356b 1284static int
3e170ce0 1285sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1286 int flags, int32_t *retval)
1c79356b 1287{
2d21ac55
A
1288 struct mbuf *control = NULL;
1289 struct sockaddr_storage ss;
1290 struct sockaddr *to = NULL;
1291 boolean_t want_free = TRUE;
91447636 1292 int error;
91447636 1293 user_ssize_t len;
2d21ac55
A
1294
1295 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b 1296
2d21ac55 1297 if (mp->msg_name != USER_ADDR_NULL) {
0a7de745 1298 if (mp->msg_namelen > sizeof(ss)) {
2d21ac55 1299 error = getsockaddr(so, &to, mp->msg_name,
4a3eedf9 1300 mp->msg_namelen, TRUE);
2d21ac55
A
1301 } else {
1302 error = getsockaddr_s(so, &ss, mp->msg_name,
4a3eedf9 1303 mp->msg_namelen, TRUE);
2d21ac55
A
1304 if (error == 0) {
1305 to = (struct sockaddr *)&ss;
1306 want_free = FALSE;
1307 }
1c79356b 1308 }
0a7de745 1309 if (error != 0) {
2d21ac55 1310 goto out;
0a7de745 1311 }
2d21ac55 1312 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
91447636 1313 }
2d21ac55 1314 if (mp->msg_control != USER_ADDR_NULL) {
0a7de745 1315 if (mp->msg_controllen < sizeof(struct cmsghdr)) {
1c79356b
A
1316 error = EINVAL;
1317 goto bad;
1318 }
1319 error = sockargs(&control, mp->msg_control,
1320 mp->msg_controllen, MT_CONTROL);
0a7de745 1321 if (error != 0) {
1c79356b 1322 goto bad;
0a7de745 1323 }
91447636 1324 }
1c79356b 1325
2d21ac55
A
1326#if CONFIG_MACF_SOCKET_SUBSET
1327 /*
1328 * We check the state without holding the socket lock;
1329 * if a race condition occurs, it would simply result
3e170ce0 1330 * in an extra call to the MAC check function.
2d21ac55 1331 */
3e170ce0 1332 if (to != NULL &&
316670eb 1333 !(so->so_state & SS_DEFUNCT) &&
0a7de745 1334 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) {
2d21ac55 1335 goto bad;
0a7de745 1336 }
2d21ac55 1337#endif /* MAC_SOCKET_SUBSET */
91447636
A
1338
1339 len = uio_resid(uiop);
39236c6e 1340 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
0a7de745 1341 control, flags);
2d21ac55 1342 if (error != 0) {
91447636 1343 if (uio_resid(uiop) != len && (error == ERESTART ||
0a7de745 1344 error == EINTR || error == EWOULDBLOCK)) {
1c79356b 1345 error = 0;
0a7de745 1346 }
2d21ac55 1347 /* Generation of SIGPIPE can be controlled per socket */
0a7de745 1348 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) {
1c79356b 1349 psignal(p, SIGPIPE);
0a7de745 1350 }
1c79356b 1351 }
0a7de745 1352 if (error == 0) {
91447636 1353 *retval = (int)(len - uio_resid(uiop));
0a7de745 1354 }
91447636 1355bad:
0a7de745 1356 if (to != NULL && want_free) {
1c79356b 1357 FREE(to, M_SONAME);
0a7de745 1358 }
91447636 1359out:
2d21ac55 1360 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 1361
0a7de745 1362 return error;
1c79356b
A
1363}
1364
2d21ac55
A
1365/*
1366 * Returns: 0 Success
1367 * ENOMEM
1368 * sendit:??? [see sendit definition in this file]
1369 * write:??? [4056224: applicable for pipes]
1370 */
1c79356b 1371int
b0d623f7 1372sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
2d21ac55
A
1373{
1374 __pthread_testcancel(1);
0a7de745 1375 return sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval);
2d21ac55
A
1376}
1377
1378int
39236c6e 1379sendto_nocancel(struct proc *p,
0a7de745
A
1380 struct sendto_nocancel_args *uap,
1381 int32_t *retval)
1c79356b 1382{
91447636
A
1383 struct user_msghdr msg;
1384 int error;
1385 uio_t auio = NULL;
3e170ce0 1386 struct socket *so;
1c79356b 1387
2d21ac55 1388 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1389 AUDIT_ARG(fd, uap->s);
1c79356b 1390
94ff46dc
A
1391 if (uap->flags & MSG_SKIPCFIL) {
1392 error = EPERM;
1393 goto done;
1394 }
1395
91447636 1396 auio = uio_create(1, 0,
2d21ac55
A
1397 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1398 UIO_WRITE);
91447636 1399 if (auio == NULL) {
3e170ce0
A
1400 error = ENOMEM;
1401 goto done;
91447636
A
1402 }
1403 uio_addiov(auio, uap->buf, uap->len);
1404
1c79356b
A
1405 msg.msg_name = uap->to;
1406 msg.msg_namelen = uap->tolen;
91447636
A
1407 /* no need to set up msg_iov. sendit uses uio_t we send it */
1408 msg.msg_iov = 0;
1409 msg.msg_iovlen = 0;
1c79356b 1410 msg.msg_control = 0;
1c79356b 1411 msg.msg_flags = 0;
1c79356b 1412
3e170ce0 1413 error = file_socket(uap->s, &so);
0a7de745 1414 if (error) {
3e170ce0 1415 goto done;
0a7de745 1416 }
2d21ac55 1417
3e170ce0
A
1418 if (so == NULL) {
1419 error = EBADF;
1420 } else {
1421 error = sendit(p, so, &msg, auio, uap->flags, retval);
91447636 1422 }
2d21ac55 1423
3e170ce0
A
1424 file_drop(uap->s);
1425done:
0a7de745 1426 if (auio != NULL) {
3e170ce0 1427 uio_free(auio);
0a7de745 1428 }
3e170ce0 1429
2d21ac55 1430 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1c79356b 1431
0a7de745 1432 return error;
1c79356b 1433}
1c79356b 1434
2d21ac55
A
1435/*
1436 * Returns: 0 Success
1437 * ENOBUFS
1438 * copyin:EFAULT
1439 * sendit:??? [see sendit definition in this file]
1440 */
1c79356b 1441int
b0d623f7 1442sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1c79356b 1443{
2d21ac55 1444 __pthread_testcancel(1);
0a7de745
A
1445 return sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1446 retval);
1c79356b 1447}
1c79356b
A
1448
1449int
3e170ce0
A
1450sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1451 int32_t *retval)
1c79356b 1452{
b0d623f7
A
1453 struct user32_msghdr msg32;
1454 struct user64_msghdr msg64;
91447636
A
1455 struct user_msghdr user_msg;
1456 caddr_t msghdrp;
0a7de745 1457 int size_of_msghdr;
1c79356b 1458 int error;
91447636
A
1459 uio_t auio = NULL;
1460 struct user_iovec *iovp;
3e170ce0 1461 struct socket *so;
1c79356b 1462
2d21ac55 1463 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1464 AUDIT_ARG(fd, uap->s);
94ff46dc
A
1465
1466 if (uap->flags & MSG_SKIPCFIL) {
1467 error = EPERM;
1468 goto done;
1469 }
1470
91447636 1471 if (IS_64BIT_PROCESS(p)) {
b0d623f7 1472 msghdrp = (caddr_t)&msg64;
0a7de745 1473 size_of_msghdr = sizeof(msg64);
2d21ac55 1474 } else {
b0d623f7 1475 msghdrp = (caddr_t)&msg32;
0a7de745 1476 size_of_msghdr = sizeof(msg32);
91447636
A
1477 }
1478 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1479 if (error) {
1480 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
0a7de745 1481 return error;
1c79356b 1482 }
91447636 1483
b0d623f7
A
1484 if (IS_64BIT_PROCESS(p)) {
1485 user_msg.msg_flags = msg64.msg_flags;
1486 user_msg.msg_controllen = msg64.msg_controllen;
1487 user_msg.msg_control = msg64.msg_control;
1488 user_msg.msg_iovlen = msg64.msg_iovlen;
1489 user_msg.msg_iov = msg64.msg_iov;
1490 user_msg.msg_namelen = msg64.msg_namelen;
1491 user_msg.msg_name = msg64.msg_name;
1492 } else {
1493 user_msg.msg_flags = msg32.msg_flags;
1494 user_msg.msg_controllen = msg32.msg_controllen;
1495 user_msg.msg_control = msg32.msg_control;
1496 user_msg.msg_iovlen = msg32.msg_iovlen;
1497 user_msg.msg_iov = msg32.msg_iov;
1498 user_msg.msg_namelen = msg32.msg_namelen;
1499 user_msg.msg_name = msg32.msg_name;
91447636
A
1500 }
1501
1502 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1503 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1504 0, 0, 0, 0);
0a7de745 1505 return EMSGSIZE;
91447636
A
1506 }
1507
1508 /* allocate a uio large enough to hold the number of iovecs passed */
1509 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1510 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1511 UIO_WRITE);
91447636
A
1512 if (auio == NULL) {
1513 error = ENOBUFS;
1514 goto done;
1515 }
2d21ac55 1516
91447636 1517 if (user_msg.msg_iovlen) {
2d21ac55
A
1518 /*
1519 * get location of iovecs within the uio.
1520 * then copyin the iovecs from user space.
91447636
A
1521 */
1522 iovp = uio_iovsaddr(auio);
1523 if (iovp == NULL) {
1524 error = ENOBUFS;
1525 goto done;
1526 }
b0d623f7 1527 error = copyin_user_iovec_array(user_msg.msg_iov,
0a7de745
A
1528 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1529 user_msg.msg_iovlen, iovp);
1530 if (error) {
91447636 1531 goto done;
0a7de745 1532 }
91447636 1533 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2d21ac55
A
1534
1535 /* finish setup of uio_t */
39236c6e
A
1536 error = uio_calculateresid(auio);
1537 if (error) {
1538 goto done;
1539 }
2d21ac55 1540 } else {
91447636
A
1541 user_msg.msg_iov = 0;
1542 }
2d21ac55
A
1543
1544 /* msg_flags is ignored for send */
91447636 1545 user_msg.msg_flags = 0;
2d21ac55 1546
3e170ce0
A
1547 error = file_socket(uap->s, &so);
1548 if (error) {
1549 goto done;
1550 }
1551 if (so == NULL) {
1552 error = EBADF;
1553 } else {
1554 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1555 }
1556 file_drop(uap->s);
1c79356b 1557done:
91447636
A
1558 if (auio != NULL) {
1559 uio_free(auio);
1560 }
2d21ac55 1561 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1562
0a7de745 1563 return error;
1c79356b
A
1564}
1565
fe8ab488
A
1566int
1567sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1568{
1569 int error = 0;
3e170ce0 1570 struct user_msghdr_x *user_msg_x = NULL;
fe8ab488
A
1571 struct uio **uiop = NULL;
1572 struct socket *so;
1573 u_int i;
1574 struct sockaddr *to = NULL;
fe8ab488
A
1575 user_ssize_t len_before = 0, len_after;
1576 int need_drop = 0;
1577 size_t size_of_msghdr;
1578 void *umsgp = NULL;
1579 u_int uiocnt;
3e170ce0 1580 int has_addr_or_ctl = 0;
fe8ab488
A
1581
1582 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1583
94ff46dc
A
1584 if (uap->flags & MSG_SKIPCFIL) {
1585 error = EPERM;
1586 goto out;
1587 }
1588
fe8ab488
A
1589 error = file_socket(uap->s, &so);
1590 if (error) {
1591 goto out;
1592 }
1593 need_drop = 1;
1594 if (so == NULL) {
1595 error = EBADF;
1596 goto out;
1597 }
fe8ab488
A
1598
1599 /*
1600 * Input parameter range check
1601 */
1602 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1603 error = EINVAL;
1604 goto out;
1605 }
3e170ce0
A
1606 /*
1607 * Clip to max currently allowed
1608 */
0a7de745 1609 if (uap->cnt > somaxsendmsgx) {
3e170ce0 1610 uap->cnt = somaxsendmsgx;
0a7de745 1611 }
3e170ce0
A
1612
1613 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
0a7de745 1614 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
1615 if (user_msg_x == NULL) {
1616 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1617 error = ENOMEM;
1618 goto out;
1619 }
1620 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
0a7de745 1621 M_TEMP, M_WAITOK | M_ZERO);
fe8ab488 1622 if (uiop == NULL) {
3e170ce0 1623 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
fe8ab488
A
1624 error = ENOMEM;
1625 goto out;
1626 }
1627
1628 size_of_msghdr = IS_64BIT_PROCESS(p) ?
0a7de745 1629 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
fe8ab488 1630
3e170ce0 1631 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
0a7de745 1632 M_TEMP, M_WAITOK | M_ZERO);
fe8ab488 1633 if (umsgp == NULL) {
3e170ce0 1634 printf("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1635 error = ENOMEM;
1636 goto out;
1637 }
1638 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1639 if (error) {
3e170ce0 1640 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
1641 goto out;
1642 }
1643 error = internalize_user_msghdr_array(umsgp,
0a7de745
A
1644 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1645 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488 1646 if (error) {
3e170ce0 1647 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
1648 goto out;
1649 }
1650 /*
1651 * Make sure the size of each message iovec and
1652 * the aggregate size of all the iovec is valid
1653 */
1654 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1655 error = EINVAL;
1656 goto out;
1657 }
1658
1659 /*
1660 * Sanity check on passed arguments
1661 */
1662 for (i = 0; i < uap->cnt; i++) {
3e170ce0 1663 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
1664
1665 /*
1666 * No flags on send message
1667 */
1668 if (mp->msg_flags != 0) {
1669 error = EINVAL;
1670 goto out;
1671 }
1672 /*
1673 * No support for address or ancillary data (yet)
1674 */
0a7de745 1675 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
3e170ce0 1676 has_addr_or_ctl = 1;
0a7de745 1677 }
3e170ce0 1678
fe8ab488 1679 if (mp->msg_control != USER_ADDR_NULL ||
0a7de745 1680 mp->msg_controllen != 0) {
3e170ce0 1681 has_addr_or_ctl = 1;
0a7de745 1682 }
3e170ce0 1683
fe8ab488
A
1684#if CONFIG_MACF_SOCKET_SUBSET
1685 /*
1686 * We check the state without holding the socket lock;
1687 * if a race condition occurs, it would simply result
3e170ce0 1688 * in an extra call to the MAC check function.
fe8ab488
A
1689 *
1690 * Note: The following check is never true taken with the
1691 * current limitation that we do not accept to pass an address,
3e170ce0
A
1692 * this is effectively placeholder code. If we add support for
1693 * addresses, we will have to check every address.
fe8ab488 1694 */
3e170ce0 1695 if (to != NULL &&
fe8ab488 1696 !(so->so_state & SS_DEFUNCT) &&
3e170ce0 1697 (error = mac_socket_check_send(kauth_cred_get(), so, to))
0a7de745 1698 != 0) {
fe8ab488 1699 goto out;
0a7de745 1700 }
fe8ab488
A
1701#endif /* MAC_SOCKET_SUBSET */
1702 }
1703
1704 len_before = uio_array_resid(uiop, uap->cnt);
1705
3e170ce0
A
1706 /*
1707 * Feed list of packets at once only for connected socket without
1708 * control message
1709 */
1710 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1711 pru_sosend_list_notsupp &&
1712 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1713 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1714 uap->cnt, uap->flags);
1715 } else {
1716 for (i = 0; i < uap->cnt; i++) {
1717 struct user_msghdr_x *mp = user_msg_x + i;
1718 struct user_msghdr user_msg;
1719 uio_t auio = uiop[i];
1720 int32_t tmpval;
1721
1722 user_msg.msg_flags = mp->msg_flags;
1723 user_msg.msg_controllen = mp->msg_controllen;
1724 user_msg.msg_control = mp->msg_control;
1725 user_msg.msg_iovlen = mp->msg_iovlen;
1726 user_msg.msg_iov = mp->msg_iov;
1727 user_msg.msg_namelen = mp->msg_namelen;
1728 user_msg.msg_name = mp->msg_name;
1729
1730 error = sendit(p, so, &user_msg, auio, uap->flags,
1731 &tmpval);
0a7de745 1732 if (error != 0) {
3e170ce0 1733 break;
0a7de745 1734 }
3e170ce0
A
1735 }
1736 }
fe8ab488
A
1737 len_after = uio_array_resid(uiop, uap->cnt);
1738
3e170ce0
A
1739 VERIFY(len_after <= len_before);
1740
fe8ab488
A
1741 if (error != 0) {
1742 if (len_after != len_before && (error == ERESTART ||
3e170ce0 1743 error == EINTR || error == EWOULDBLOCK ||
0a7de745 1744 error == ENOBUFS)) {
fe8ab488 1745 error = 0;
0a7de745 1746 }
fe8ab488 1747 /* Generation of SIGPIPE can be controlled per socket */
0a7de745 1748 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) {
fe8ab488 1749 psignal(p, SIGPIPE);
0a7de745 1750 }
fe8ab488
A
1751 }
1752 if (error == 0) {
1753 uiocnt = externalize_user_msghdr_array(umsgp,
1754 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1755 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488
A
1756
1757 *retval = (int)(uiocnt);
1758 }
1759out:
0a7de745 1760 if (need_drop) {
fe8ab488 1761 file_drop(uap->s);
0a7de745
A
1762 }
1763 if (umsgp != NULL) {
fe8ab488 1764 _FREE(umsgp, M_TEMP);
0a7de745 1765 }
fe8ab488
A
1766 if (uiop != NULL) {
1767 free_uio_array(uiop, uap->cnt);
1768 _FREE(uiop, M_TEMP);
1769 }
0a7de745 1770 if (user_msg_x != NULL) {
3e170ce0 1771 _FREE(user_msg_x, M_TEMP);
0a7de745 1772 }
fe8ab488
A
1773
1774 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1775
0a7de745 1776 return error;
fe8ab488
A
1777}
1778
3e170ce0
A
1779
1780static int
1781copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1782{
1783 int error = 0;
1784 socklen_t sa_len = 0;
1785 ssize_t len;
1786
1787 len = *namelen;
1788 if (len <= 0 || fromsa == 0) {
1789 len = 0;
1790 } else {
1791#ifndef MIN
0a7de745 1792#define MIN(a, b) ((a) > (b) ? (b) : (a))
3e170ce0
A
1793#endif
1794 sa_len = fromsa->sa_len;
1795 len = MIN((unsigned int)len, sa_len);
1796 error = copyout(fromsa, name, (unsigned)len);
0a7de745 1797 if (error) {
3e170ce0 1798 goto out;
0a7de745 1799 }
3e170ce0
A
1800 }
1801 *namelen = sa_len;
1802out:
0a7de745 1803 return 0;
3e170ce0
A
1804}
1805
1806static int
1807copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1808 socklen_t *controllen, int *flags)
1809{
1810 int error = 0;
1811 ssize_t len;
1812 user_addr_t ctlbuf;
1813
1814 len = *controllen;
1815 *controllen = 0;
1816 ctlbuf = control;
1817
1818 while (m && len > 0) {
1819 unsigned int tocopy;
1820 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1821 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1822 int buflen = m->m_len;
1823
1824 while (buflen > 0 && len > 0) {
1825 /*
1826 * SCM_TIMESTAMP hack because struct timeval has a
1827 * different size for 32 bits and 64 bits processes
1828 */
1829 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
527f9951 1830 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
3e170ce0
A
1831 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1832 int tmp_space;
1833 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1834
1835 tmp_cp->cmsg_level = SOL_SOCKET;
1836 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1837
1838 if (proc_is64bit(p)) {
1839 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1840
cb323159
A
1841 os_unaligned_deref(&tv64->tv_sec) = tv->tv_sec;
1842 os_unaligned_deref(&tv64->tv_usec) = tv->tv_usec;
3e170ce0
A
1843
1844 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1845 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1846 } else {
1847 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1848
1849 tv32->tv_sec = tv->tv_sec;
1850 tv32->tv_usec = tv->tv_usec;
1851
1852 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1853 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1854 }
1855 if (len >= tmp_space) {
1856 tocopy = tmp_space;
1857 } else {
1858 *flags |= MSG_CTRUNC;
1859 tocopy = len;
1860 }
1861 error = copyout(tmp_buffer, ctlbuf, tocopy);
0a7de745 1862 if (error) {
3e170ce0 1863 goto out;
0a7de745 1864 }
3e170ce0
A
1865 } else {
1866 if (cp_size > buflen) {
1867 panic("cp_size > buflen, something"
1868 "wrong with alignment!");
1869 }
1870 if (len >= cp_size) {
1871 tocopy = cp_size;
1872 } else {
1873 *flags |= MSG_CTRUNC;
1874 tocopy = len;
1875 }
1876 error = copyout((caddr_t) cp, ctlbuf, tocopy);
0a7de745 1877 if (error) {
3e170ce0 1878 goto out;
0a7de745 1879 }
3e170ce0
A
1880 }
1881
1882 ctlbuf += tocopy;
1883 len -= tocopy;
1884
1885 buflen -= cp_size;
1886 cp = (struct cmsghdr *)(void *)
1887 ((unsigned char *) cp + cp_size);
1888 cp_size = CMSG_ALIGN(cp->cmsg_len);
1889 }
1890
1891 m = m->m_next;
1892 }
1893 *controllen = ctlbuf - control;
1894out:
0a7de745 1895 return error;
3e170ce0
A
1896}
1897
2d21ac55
A
1898/*
1899 * Returns: 0 Success
1900 * ENOTSOCK
1901 * EINVAL
1902 * EBADF
1903 * EACCES Mandatory Access Control failure
1904 * copyout:EFAULT
1905 * fp_lookup:EBADF
1906 * <pru_soreceive>:ENOBUFS
1907 * <pru_soreceive>:ENOTCONN
1908 * <pru_soreceive>:EWOULDBLOCK
1909 * <pru_soreceive>:EFAULT
1910 * <pru_soreceive>:EINTR
1911 * <pru_soreceive>:EBADF
1912 * <pru_soreceive>:EINVAL
1913 * <pru_soreceive>:EMSGSIZE
1914 * <pru_soreceive>:???
1915 *
1916 * Notes: Additional return values from calls through <pru_soreceive>
1917 * depend on protocols other than TCP or AF_UNIX, which are
1918 * documented above.
1919 */
1c79356b 1920static int
2d21ac55 1921recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1922 user_addr_t namelenp, int32_t *retval)
1c79356b 1923{
39236c6e
A
1924 ssize_t len;
1925 int error;
3e170ce0 1926 struct mbuf *control = 0;
1c79356b
A
1927 struct socket *so;
1928 struct sockaddr *fromsa = 0;
91447636 1929 struct fileproc *fp;
1c79356b 1930
2d21ac55 1931 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
91447636 1932 proc_fdlock(p);
2d21ac55
A
1933 if ((error = fp_lookup(p, s, &fp, 1))) {
1934 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1935 proc_fdunlock(p);
0a7de745 1936 return error;
1c79356b 1937 }
91447636 1938 if (fp->f_type != DTYPE_SOCKET) {
2d21ac55 1939 fp_drop(p, s, fp, 1);
91447636 1940 proc_fdunlock(p);
0a7de745 1941 return ENOTSOCK;
91447636 1942 }
1c79356b 1943
2d21ac55
A
1944 so = (struct socket *)fp->f_data;
1945 if (so == NULL) {
1946 fp_drop(p, s, fp, 1);
1947 proc_fdunlock(p);
0a7de745 1948 return EBADF;
2d21ac55 1949 }
91447636
A
1950
1951 proc_fdunlock(p);
2d21ac55
A
1952
1953#if CONFIG_MACF_SOCKET_SUBSET
1954 /*
1955 * We check the state without holding the socket lock;
1956 * if a race condition occurs, it would simply result
1957 * in an extra call to the MAC check function.
1958 */
316670eb
A
1959 if (!(so->so_state & SS_DEFUNCT) &&
1960 !(so->so_state & SS_ISCONNECTED) &&
39236c6e 1961 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
0a7de745 1962 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
2d21ac55 1963 goto out1;
0a7de745 1964 }
2d21ac55 1965#endif /* MAC_SOCKET_SUBSET */
91447636 1966 if (uio_resid(uiop) < 0) {
2d21ac55 1967 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
91447636
A
1968 error = EINVAL;
1969 goto out1;
1c79356b 1970 }
91447636
A
1971
1972 len = uio_resid(uiop);
2d21ac55
A
1973 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1974 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1975 &mp->msg_flags);
0a7de745 1976 if (fromsa) {
b0d623f7
A
1977 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1978 fromsa);
0a7de745 1979 }
1c79356b 1980 if (error) {
91447636 1981 if (uio_resid(uiop) != len && (error == ERESTART ||
0a7de745 1982 error == EINTR || error == EWOULDBLOCK)) {
1c79356b 1983 error = 0;
0a7de745 1984 }
1c79356b 1985 }
0a7de745 1986 if (error) {
1c79356b 1987 goto out;
0a7de745 1988 }
2d21ac55 1989
91447636 1990 *retval = len - uio_resid(uiop);
2d21ac55 1991
3e170ce0
A
1992 if (mp->msg_name) {
1993 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
0a7de745 1994 if (error) {
3e170ce0 1995 goto out;
0a7de745 1996 }
2d21ac55 1997 /* return the actual, untruncated address length */
1c79356b 1998 if (namelenp &&
3e170ce0 1999 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
0a7de745 2000 sizeof(int)))) {
1c79356b
A
2001 goto out;
2002 }
2003 }
39236c6e 2004
3e170ce0
A
2005 if (mp->msg_control) {
2006 error = copyout_control(p, control, mp->msg_control,
2007 &mp->msg_controllen, &mp->msg_flags);
1c79356b
A
2008 }
2009out:
0a7de745 2010 if (fromsa) {
1c79356b 2011 FREE(fromsa, M_SONAME);
0a7de745
A
2012 }
2013 if (control) {
1c79356b 2014 m_freem(control);
0a7de745 2015 }
2d21ac55 2016 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636
A
2017out1:
2018 fp_drop(p, s, fp, 0);
0a7de745 2019 return error;
1c79356b
A
2020}
2021
2d21ac55
A
2022/*
2023 * Returns: 0 Success
2024 * ENOMEM
2025 * copyin:EFAULT
2026 * recvit:???
2027 * read:??? [4056224: applicable for pipes]
2028 *
2029 * Notes: The read entry point is only called as part of support for
2030 * binary backward compatability; new code should use read
2031 * instead of recv or recvfrom when attempting to read data
2032 * from pipes.
2033 *
2034 * For full documentation of the return codes from recvit, see
2035 * the block header for the recvit function.
2036 */
2037int
b0d623f7 2038recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2d21ac55
A
2039{
2040 __pthread_testcancel(1);
0a7de745
A
2041 return recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2042 retval);
2d21ac55
A
2043}
2044
1c79356b 2045int
3e170ce0
A
2046recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2047 int32_t *retval)
1c79356b 2048{
91447636 2049 struct user_msghdr msg;
1c79356b 2050 int error;
91447636 2051 uio_t auio = NULL;
1c79356b 2052
2d21ac55 2053 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 2054 AUDIT_ARG(fd, uap->s);
1c79356b
A
2055
2056 if (uap->fromlenaddr) {
91447636 2057 error = copyin(uap->fromlenaddr,
0a7de745
A
2058 (caddr_t)&msg.msg_namelen, sizeof(msg.msg_namelen));
2059 if (error) {
2060 return error;
2061 }
2d21ac55 2062 } else {
1c79356b 2063 msg.msg_namelen = 0;
2d21ac55 2064 }
1c79356b 2065 msg.msg_name = uap->from;
91447636 2066 auio = uio_create(1, 0,
2d21ac55
A
2067 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2068 UIO_READ);
91447636 2069 if (auio == NULL) {
0a7de745 2070 return ENOMEM;
91447636 2071 }
2d21ac55 2072
91447636
A
2073 uio_addiov(auio, uap->buf, uap->len);
2074 /* no need to set up msg_iov. recvit uses uio_t we send it */
2075 msg.msg_iov = 0;
2076 msg.msg_iovlen = 0;
1c79356b 2077 msg.msg_control = 0;
91447636 2078 msg.msg_controllen = 0;
1c79356b 2079 msg.msg_flags = uap->flags;
91447636
A
2080 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2081 if (auio != NULL) {
2082 uio_free(auio);
2083 }
2d21ac55 2084
2d21ac55 2085 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b 2086
0a7de745 2087 return error;
1c79356b
A
2088}
2089
2090/*
2d21ac55
A
2091 * Returns: 0 Success
2092 * EMSGSIZE
2093 * ENOMEM
2094 * copyin:EFAULT
2095 * copyout:EFAULT
2096 * recvit:???
2097 *
2098 * Notes: For full documentation of the return codes from recvit, see
2099 * the block header for the recvit function.
1c79356b
A
2100 */
2101int
b0d623f7 2102recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1c79356b 2103{
2d21ac55 2104 __pthread_testcancel(1);
0a7de745
A
2105 return recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2106 retval);
1c79356b 2107}
1c79356b
A
2108
2109int
3e170ce0
A
2110recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2111 int32_t *retval)
1c79356b 2112{
b0d623f7
A
2113 struct user32_msghdr msg32;
2114 struct user64_msghdr msg64;
91447636
A
2115 struct user_msghdr user_msg;
2116 caddr_t msghdrp;
0a7de745 2117 int size_of_msghdr;
91447636 2118 user_addr_t uiov;
2d21ac55 2119 int error;
91447636
A
2120 uio_t auio = NULL;
2121 struct user_iovec *iovp;
1c79356b 2122
2d21ac55 2123 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 2124 AUDIT_ARG(fd, uap->s);
91447636 2125 if (IS_64BIT_PROCESS(p)) {
b0d623f7 2126 msghdrp = (caddr_t)&msg64;
0a7de745 2127 size_of_msghdr = sizeof(msg64);
2d21ac55 2128 } else {
b0d623f7 2129 msghdrp = (caddr_t)&msg32;
0a7de745 2130 size_of_msghdr = sizeof(msg32);
91447636
A
2131 }
2132 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
2133 if (error) {
2134 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
0a7de745 2135 return error;
1c79356b
A
2136 }
2137
91447636 2138 /* only need to copy if user process is not 64-bit */
b0d623f7
A
2139 if (IS_64BIT_PROCESS(p)) {
2140 user_msg.msg_flags = msg64.msg_flags;
2141 user_msg.msg_controllen = msg64.msg_controllen;
2142 user_msg.msg_control = msg64.msg_control;
2143 user_msg.msg_iovlen = msg64.msg_iovlen;
2144 user_msg.msg_iov = msg64.msg_iov;
2145 user_msg.msg_namelen = msg64.msg_namelen;
2146 user_msg.msg_name = msg64.msg_name;
2147 } else {
2148 user_msg.msg_flags = msg32.msg_flags;
2149 user_msg.msg_controllen = msg32.msg_controllen;
2150 user_msg.msg_control = msg32.msg_control;
2151 user_msg.msg_iovlen = msg32.msg_iovlen;
2152 user_msg.msg_iov = msg32.msg_iov;
2153 user_msg.msg_namelen = msg32.msg_namelen;
2154 user_msg.msg_name = msg32.msg_name;
91447636
A
2155 }
2156
2157 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
2158 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2159 0, 0, 0, 0);
0a7de745 2160 return EMSGSIZE;
91447636
A
2161 }
2162
91447636 2163 user_msg.msg_flags = uap->flags;
91447636
A
2164
2165 /* allocate a uio large enough to hold the number of iovecs passed */
2166 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
2167 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2168 UIO_READ);
91447636
A
2169 if (auio == NULL) {
2170 error = ENOMEM;
2171 goto done;
2172 }
2173
2d21ac55
A
2174 /*
2175 * get location of iovecs within the uio. then copyin the iovecs from
91447636
A
2176 * user space.
2177 */
2178 iovp = uio_iovsaddr(auio);
2179 if (iovp == NULL) {
2180 error = ENOMEM;
2181 goto done;
2182 }
2183 uiov = user_msg.msg_iov;
2184 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
b0d623f7 2185 error = copyin_user_iovec_array(uiov,
0a7de745
A
2186 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2187 user_msg.msg_iovlen, iovp);
2188 if (error) {
1c79356b 2189 goto done;
0a7de745 2190 }
91447636 2191
2d21ac55 2192 /* finish setup of uio_t */
39236c6e
A
2193 error = uio_calculateresid(auio);
2194 if (error) {
2195 goto done;
2196 }
2d21ac55 2197
91447636 2198 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1c79356b 2199 if (!error) {
91447636 2200 user_msg.msg_iov = uiov;
b0d623f7
A
2201 if (IS_64BIT_PROCESS(p)) {
2202 msg64.msg_flags = user_msg.msg_flags;
2203 msg64.msg_controllen = user_msg.msg_controllen;
2204 msg64.msg_control = user_msg.msg_control;
2205 msg64.msg_iovlen = user_msg.msg_iovlen;
2206 msg64.msg_iov = user_msg.msg_iov;
2207 msg64.msg_namelen = user_msg.msg_namelen;
2208 msg64.msg_name = user_msg.msg_name;
2209 } else {
2210 msg32.msg_flags = user_msg.msg_flags;
2211 msg32.msg_controllen = user_msg.msg_controllen;
2212 msg32.msg_control = user_msg.msg_control;
2213 msg32.msg_iovlen = user_msg.msg_iovlen;
2214 msg32.msg_iov = user_msg.msg_iov;
2215 msg32.msg_namelen = user_msg.msg_namelen;
2216 msg32.msg_name = user_msg.msg_name;
91447636
A
2217 }
2218 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1c79356b
A
2219 }
2220done:
91447636
A
2221 if (auio != NULL) {
2222 uio_free(auio);
2223 }
2d21ac55 2224 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
0a7de745 2225 return error;
1c79356b
A
2226}
2227
fe8ab488
A
2228int
2229recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2230{
2231 int error = EOPNOTSUPP;
3e170ce0
A
2232 struct user_msghdr_x *user_msg_x = NULL;
2233 struct recv_msg_elem *recv_msg_array = NULL;
fe8ab488
A
2234 struct socket *so;
2235 user_ssize_t len_before = 0, len_after;
2236 int need_drop = 0;
2237 size_t size_of_msghdr;
2238 void *umsgp = NULL;
2239 u_int i;
2240 u_int uiocnt;
2241
2242 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2243
2244 error = file_socket(uap->s, &so);
2245 if (error) {
2246 goto out;
2247 }
2248 need_drop = 1;
2249 if (so == NULL) {
2250 error = EBADF;
2251 goto out;
2252 }
fe8ab488
A
2253 /*
2254 * Input parameter range check
2255 */
2256 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2257 error = EINVAL;
2258 goto out;
2259 }
0a7de745 2260 if (uap->cnt > somaxrecvmsgx) {
3e170ce0 2261 uap->cnt = somaxrecvmsgx;
0a7de745 2262 }
3e170ce0
A
2263
2264 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
fe8ab488 2265 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
2266 if (user_msg_x == NULL) {
2267 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
2268 error = ENOMEM;
2269 goto out;
2270 }
3e170ce0
A
2271 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2272 if (recv_msg_array == NULL) {
2273 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
fe8ab488
A
2274 error = ENOMEM;
2275 goto out;
2276 }
fe8ab488
A
2277 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2278 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2279
2280 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2281 if (umsgp == NULL) {
3e170ce0 2282 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
fe8ab488
A
2283 error = ENOMEM;
2284 goto out;
2285 }
2286 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2287 if (error) {
3e170ce0 2288 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
2289 goto out;
2290 }
3e170ce0 2291 error = internalize_recv_msghdr_array(umsgp,
fe8ab488 2292 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 2293 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
fe8ab488 2294 if (error) {
3e170ce0 2295 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
2296 goto out;
2297 }
2298 /*
2299 * Make sure the size of each message iovec and
2300 * the aggregate size of all the iovec is valid
2301 */
3e170ce0 2302 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
fe8ab488
A
2303 error = EINVAL;
2304 goto out;
2305 }
fe8ab488
A
2306 /*
2307 * Sanity check on passed arguments
2308 */
2309 for (i = 0; i < uap->cnt; i++) {
3e170ce0 2310 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
2311
2312 if (mp->msg_flags != 0) {
2313 error = EINVAL;
2314 goto out;
2315 }
fe8ab488
A
2316 }
2317#if CONFIG_MACF_SOCKET_SUBSET
2318 /*
2319 * We check the state without holding the socket lock;
2320 * if a race condition occurs, it would simply result
2321 * in an extra call to the MAC check function.
2322 */
2323 if (!(so->so_state & SS_DEFUNCT) &&
2324 !(so->so_state & SS_ISCONNECTED) &&
2325 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
0a7de745 2326 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) {
fe8ab488 2327 goto out;
0a7de745 2328 }
fe8ab488
A
2329#endif /* MAC_SOCKET_SUBSET */
2330
3e170ce0 2331 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488 2332
3e170ce0
A
2333 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2334 pru_soreceive_list_notsupp &&
2335 somaxrecvmsgx == 0) {
2336 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2337 recv_msg_array, uap->cnt, &uap->flags);
2338 } else {
2339 int flags = uap->flags;
fe8ab488 2340
3e170ce0
A
2341 for (i = 0; i < uap->cnt; i++) {
2342 struct recv_msg_elem *recv_msg_elem;
2343 uio_t auio;
2344 struct sockaddr **psa;
2345 struct mbuf **controlp;
2346
2347 recv_msg_elem = recv_msg_array + i;
2348 auio = recv_msg_elem->uio;
2349
2350 /*
2351 * Do not block if we got at least one packet
2352 */
0a7de745 2353 if (i > 0) {
3e170ce0 2354 flags |= MSG_DONTWAIT;
0a7de745 2355 }
3e170ce0
A
2356
2357 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2358 &recv_msg_elem->psa : NULL;
2359 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2360 &recv_msg_elem->controlp : NULL;
2361
2362 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2363 auio, (struct mbuf **)0, controlp, &flags);
0a7de745 2364 if (error) {
3e170ce0 2365 break;
0a7de745 2366 }
3e170ce0
A
2367 /*
2368 * We have some data
2369 */
2370 recv_msg_elem->which |= SOCK_MSG_DATA;
2371 /*
2372 * Stop on partial copy
2373 */
0a7de745 2374 if (flags & (MSG_RCVMORE | MSG_TRUNC)) {
3e170ce0 2375 break;
0a7de745 2376 }
3e170ce0 2377 }
0a7de745 2378 if ((uap->flags & MSG_DONTWAIT) == 0) {
3e170ce0 2379 flags &= ~MSG_DONTWAIT;
0a7de745 2380 }
3e170ce0
A
2381 uap->flags = flags;
2382 }
2383
2384 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488
A
2385
2386 if (error) {
2387 if (len_after != len_before && (error == ERESTART ||
0a7de745 2388 error == EINTR || error == EWOULDBLOCK)) {
fe8ab488 2389 error = 0;
0a7de745 2390 } else {
3e170ce0 2391 goto out;
0a7de745 2392 }
fe8ab488 2393 }
fe8ab488 2394
3e170ce0
A
2395 uiocnt = externalize_recv_msghdr_array(umsgp,
2396 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2397 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2398
2399 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2400 if (error) {
2401 DBG_PRINTF("%s copyout() failed\n", __func__);
2402 goto out;
2403 }
2404 *retval = (int)(uiocnt);
2405
2406 for (i = 0; i < uap->cnt; i++) {
2407 struct user_msghdr_x *mp = user_msg_x + i;
2408 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2409 struct sockaddr *fromsa = recv_msg_elem->psa;
2410
2411 if (mp->msg_name) {
2412 error = copyout_sa(fromsa, mp->msg_name,
2413 &mp->msg_namelen);
0a7de745 2414 if (error) {
3e170ce0 2415 goto out;
0a7de745 2416 }
3e170ce0
A
2417 }
2418 if (mp->msg_control) {
2419 error = copyout_control(p, recv_msg_elem->controlp,
2420 mp->msg_control, &mp->msg_controllen,
2421 &mp->msg_flags);
0a7de745 2422 if (error) {
3e170ce0 2423 goto out;
0a7de745 2424 }
fe8ab488 2425 }
fe8ab488
A
2426 }
2427out:
0a7de745 2428 if (need_drop) {
fe8ab488 2429 file_drop(uap->s);
0a7de745
A
2430 }
2431 if (umsgp != NULL) {
fe8ab488 2432 _FREE(umsgp, M_TEMP);
0a7de745
A
2433 }
2434 if (recv_msg_array != NULL) {
3e170ce0 2435 free_recv_msg_array(recv_msg_array, uap->cnt);
0a7de745
A
2436 }
2437 if (user_msg_x != NULL) {
3e170ce0 2438 _FREE(user_msg_x, M_TEMP);
0a7de745 2439 }
3e170ce0 2440
fe8ab488 2441 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 2442
0a7de745 2443 return error;
fe8ab488
A
2444}
2445
2d21ac55
A
2446/*
2447 * Returns: 0 Success
2448 * EBADF
2449 * file_socket:ENOTSOCK
2450 * file_socket:EBADF
2451 * soshutdown:EINVAL
2452 * soshutdown:ENOTCONN
2453 * soshutdown:EADDRNOTAVAIL[TCP]
2454 * soshutdown:ENOBUFS[TCP]
2455 * soshutdown:EMSGSIZE[TCP]
2456 * soshutdown:EHOSTUNREACH[TCP]
2457 * soshutdown:ENETUNREACH[TCP]
2458 * soshutdown:ENETDOWN[TCP]
2459 * soshutdown:ENOMEM[TCP]
2460 * soshutdown:EACCES[TCP]
2461 * soshutdown:EMSGSIZE[TCP]
2462 * soshutdown:ENOBUFS[TCP]
2463 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2464 * soshutdown:??? [other protocol families]
2465 */
1c79356b
A
2466/* ARGSUSED */
2467int
2d21ac55 2468shutdown(__unused struct proc *p, struct shutdown_args *uap,
b0d623f7 2469 __unused int32_t *retval)
1c79356b 2470{
2d21ac55 2471 struct socket *so;
1c79356b
A
2472 int error;
2473
55e303ae 2474 AUDIT_ARG(fd, uap->s);
91447636 2475 error = file_socket(uap->s, &so);
0a7de745
A
2476 if (error) {
2477 return error;
2478 }
91447636
A
2479 if (so == NULL) {
2480 error = EBADF;
2481 goto out;
2482 }
2483 error = soshutdown((struct socket *)so, uap->how);
2484out:
2485 file_drop(uap->s);
0a7de745 2486 return error;
1c79356b
A
2487}
2488
2d21ac55
A
2489/*
2490 * Returns: 0 Success
2491 * EFAULT
2492 * EINVAL
2493 * EACCES Mandatory Access Control failure
2494 * file_socket:ENOTSOCK
2495 * file_socket:EBADF
2496 * sosetopt:EINVAL
2497 * sosetopt:ENOPROTOOPT
2498 * sosetopt:ENOBUFS
2499 * sosetopt:EDOM
2500 * sosetopt:EFAULT
2501 * sosetopt:EOPNOTSUPP[AF_UNIX]
2502 * sosetopt:???
2503 */
1c79356b
A
2504/* ARGSUSED */
2505int
2d21ac55 2506setsockopt(struct proc *p, struct setsockopt_args *uap,
b0d623f7 2507 __unused int32_t *retval)
1c79356b 2508{
2d21ac55 2509 struct socket *so;
1c79356b
A
2510 struct sockopt sopt;
2511 int error;
2512
55e303ae 2513 AUDIT_ARG(fd, uap->s);
0a7de745
A
2514 if (uap->val == 0 && uap->valsize != 0) {
2515 return EFAULT;
2516 }
2d21ac55 2517 /* No bounds checking on size (it's unsigned) */
1c79356b 2518
91447636 2519 error = file_socket(uap->s, &so);
0a7de745
A
2520 if (error) {
2521 return error;
2522 }
1c79356b
A
2523
2524 sopt.sopt_dir = SOPT_SET;
2525 sopt.sopt_level = uap->level;
2526 sopt.sopt_name = uap->name;
2527 sopt.sopt_val = uap->val;
2528 sopt.sopt_valsize = uap->valsize;
2529 sopt.sopt_p = p;
2530
91447636
A
2531 if (so == NULL) {
2532 error = EINVAL;
2533 goto out;
2534 }
2d21ac55
A
2535#if CONFIG_MACF_SOCKET_SUBSET
2536 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
0a7de745 2537 &sopt)) != 0) {
2d21ac55 2538 goto out;
0a7de745 2539 }
2d21ac55 2540#endif /* MAC_SOCKET_SUBSET */
0a7de745 2541 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
91447636
A
2542out:
2543 file_drop(uap->s);
0a7de745 2544 return error;
1c79356b
A
2545}
2546
2547
2548
2d21ac55
A
2549/*
2550 * Returns: 0 Success
2551 * EINVAL
2552 * EBADF
2553 * EACCES Mandatory Access Control failure
2554 * copyin:EFAULT
2555 * copyout:EFAULT
2556 * file_socket:ENOTSOCK
2557 * file_socket:EBADF
2558 * sogetopt:???
2559 */
1c79356b 2560int
2d21ac55 2561getsockopt(struct proc *p, struct getsockopt_args *uap,
b0d623f7 2562 __unused int32_t *retval)
1c79356b 2563{
0a7de745
A
2564 int error;
2565 socklen_t valsize;
2566 struct sockopt sopt;
2d21ac55 2567 struct socket *so;
1c79356b 2568
91447636 2569 error = file_socket(uap->s, &so);
0a7de745
A
2570 if (error) {
2571 return error;
2572 }
1c79356b 2573 if (uap->val) {
2d21ac55 2574 error = copyin(uap->avalsize, (caddr_t)&valsize,
0a7de745
A
2575 sizeof(valsize));
2576 if (error) {
91447636 2577 goto out;
0a7de745 2578 }
2d21ac55
A
2579 /* No bounds checking on size (it's unsigned) */
2580 } else {
1c79356b 2581 valsize = 0;
2d21ac55 2582 }
1c79356b
A
2583 sopt.sopt_dir = SOPT_GET;
2584 sopt.sopt_level = uap->level;
2585 sopt.sopt_name = uap->name;
2586 sopt.sopt_val = uap->val;
2587 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2588 sopt.sopt_p = p;
2589
91447636
A
2590 if (so == NULL) {
2591 error = EBADF;
2592 goto out;
2593 }
2d21ac55
A
2594#if CONFIG_MACF_SOCKET_SUBSET
2595 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
0a7de745 2596 &sopt)) != 0) {
2d21ac55 2597 goto out;
0a7de745 2598 }
2d21ac55 2599#endif /* MAC_SOCKET_SUBSET */
0a7de745 2600 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
1c79356b
A
2601 if (error == 0) {
2602 valsize = sopt.sopt_valsize;
2d21ac55 2603 error = copyout((caddr_t)&valsize, uap->avalsize,
0a7de745 2604 sizeof(valsize));
1c79356b 2605 }
91447636
A
2606out:
2607 file_drop(uap->s);
0a7de745 2608 return error;
1c79356b
A
2609}
2610
2611
2612/*
2613 * Get socket name.
2d21ac55
A
2614 *
2615 * Returns: 0 Success
2616 * EBADF
2617 * file_socket:ENOTSOCK
2618 * file_socket:EBADF
2619 * copyin:EFAULT
2620 * copyout:EFAULT
2621 * <pru_sockaddr>:ENOBUFS[TCP]
2622 * <pru_sockaddr>:ECONNRESET[TCP]
2623 * <pru_sockaddr>:EINVAL[AF_UNIX]
2624 * <sf_getsockname>:???
1c79356b
A
2625 */
2626/* ARGSUSED */
2d21ac55
A
2627int
2628getsockname(__unused struct proc *p, struct getsockname_args *uap,
b0d623f7 2629 __unused int32_t *retval)
1c79356b 2630{
91447636 2631 struct socket *so;
1c79356b 2632 struct sockaddr *sa;
91447636 2633 socklen_t len;
2d21ac55 2634 socklen_t sa_len;
1c79356b
A
2635 int error;
2636
91447636 2637 error = file_socket(uap->fdes, &so);
0a7de745
A
2638 if (error) {
2639 return error;
2640 }
2641 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
2642 if (error) {
91447636 2643 goto out;
0a7de745 2644 }
91447636
A
2645 if (so == NULL) {
2646 error = EBADF;
2647 goto out;
2648 }
1c79356b 2649 sa = 0;
91447636 2650 socket_lock(so, 1);
1c79356b 2651 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2d21ac55 2652 if (error == 0) {
6d2010ae 2653 error = sflt_getsockname(so, &sa);
0a7de745 2654 if (error == EJUSTRETURN) {
91447636 2655 error = 0;
0a7de745 2656 }
91447636
A
2657 }
2658 socket_unlock(so, 1);
0a7de745 2659 if (error) {
1c79356b 2660 goto bad;
0a7de745 2661 }
1c79356b
A
2662 if (sa == 0) {
2663 len = 0;
2664 goto gotnothing;
2665 }
2666
2d21ac55
A
2667 sa_len = sa->sa_len;
2668 len = MIN(len, sa_len);
91447636 2669 error = copyout((caddr_t)sa, uap->asa, len);
0a7de745 2670 if (error) {
2d21ac55 2671 goto bad;
0a7de745 2672 }
2d21ac55
A
2673 /* return the actual, untruncated address length */
2674 len = sa_len;
1c79356b 2675gotnothing:
0a7de745 2676 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
1c79356b 2677bad:
0a7de745 2678 if (sa) {
1c79356b 2679 FREE(sa, M_SONAME);
0a7de745 2680 }
91447636
A
2681out:
2682 file_drop(uap->fdes);
0a7de745 2683 return error;
1c79356b
A
2684}
2685
1c79356b
A
2686/*
2687 * Get name of peer for connected socket.
2d21ac55
A
2688 *
2689 * Returns: 0 Success
2690 * EBADF
2691 * EINVAL
2692 * ENOTCONN
2693 * file_socket:ENOTSOCK
2694 * file_socket:EBADF
2695 * copyin:EFAULT
2696 * copyout:EFAULT
2697 * <pru_peeraddr>:???
2698 * <sf_getpeername>:???
1c79356b
A
2699 */
2700/* ARGSUSED */
2701int
2d21ac55 2702getpeername(__unused struct proc *p, struct getpeername_args *uap,
b0d623f7 2703 __unused int32_t *retval)
1c79356b 2704{
91447636 2705 struct socket *so;
1c79356b 2706 struct sockaddr *sa;
91447636 2707 socklen_t len;
2d21ac55 2708 socklen_t sa_len;
1c79356b
A
2709 int error;
2710
91447636 2711 error = file_socket(uap->fdes, &so);
0a7de745
A
2712 if (error) {
2713 return error;
2714 }
91447636
A
2715 if (so == NULL) {
2716 error = EBADF;
2717 goto out;
2718 }
2719
2720 socket_lock(so, 1);
2721
2d21ac55
A
2722 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2723 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2724 /* the socket has been shutdown, no more getpeername's */
2725 socket_unlock(so, 1);
2726 error = EINVAL;
2727 goto out;
2728 }
2729
0a7de745 2730 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) == 0) {
91447636
A
2731 socket_unlock(so, 1);
2732 error = ENOTCONN;
2733 goto out;
2734 }
0a7de745 2735 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
91447636
A
2736 if (error) {
2737 socket_unlock(so, 1);
2738 goto out;
2739 }
1c79356b
A
2740 sa = 0;
2741 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2d21ac55 2742 if (error == 0) {
6d2010ae 2743 error = sflt_getpeername(so, &sa);
0a7de745 2744 if (error == EJUSTRETURN) {
91447636 2745 error = 0;
0a7de745 2746 }
91447636
A
2747 }
2748 socket_unlock(so, 1);
0a7de745 2749 if (error) {
1c79356b 2750 goto bad;
0a7de745 2751 }
1c79356b
A
2752 if (sa == 0) {
2753 len = 0;
2754 goto gotnothing;
2755 }
2d21ac55
A
2756 sa_len = sa->sa_len;
2757 len = MIN(len, sa_len);
91447636 2758 error = copyout(sa, uap->asa, len);
0a7de745 2759 if (error) {
1c79356b 2760 goto bad;
0a7de745 2761 }
2d21ac55
A
2762 /* return the actual, untruncated address length */
2763 len = sa_len;
1c79356b 2764gotnothing:
0a7de745 2765 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
1c79356b 2766bad:
0a7de745
A
2767 if (sa) {
2768 FREE(sa, M_SONAME);
2769 }
91447636
A
2770out:
2771 file_drop(uap->fdes);
0a7de745 2772 return error;
1c79356b
A
2773}
2774
2775int
2d21ac55 2776sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1c79356b 2777{
2d21ac55
A
2778 struct sockaddr *sa;
2779 struct mbuf *m;
1c79356b
A
2780 int error;
2781
e2d2fc5c 2782 size_t alloc_buflen = (size_t)buflen;
39236c6e 2783
0a7de745
A
2784 if (alloc_buflen > INT_MAX / 2) {
2785 return EINVAL;
2786 }
b0d623f7 2787#ifdef __LP64__
3e170ce0
A
2788 /*
2789 * The fd's in the buffer must expand to be pointers, thus we need twice
2790 * as much space
2791 */
0a7de745
A
2792 if (type == MT_CONTROL) {
2793 alloc_buflen = ((buflen - sizeof(struct cmsghdr)) * 2) +
3e170ce0 2794 sizeof(struct cmsghdr);
0a7de745 2795 }
b0d623f7 2796#endif
e2d2fc5c 2797 if (alloc_buflen > MLEN) {
0a7de745
A
2798 if (type == MT_SONAME && alloc_buflen <= 112) {
2799 alloc_buflen = MLEN; /* unix domain compat. hack */
2800 } else if (alloc_buflen > MCLBYTES) {
2801 return EINVAL;
2802 }
1c79356b
A
2803 }
2804 m = m_get(M_WAIT, type);
0a7de745
A
2805 if (m == NULL) {
2806 return ENOBUFS;
2807 }
e2d2fc5c 2808 if (alloc_buflen > MLEN) {
91447636
A
2809 MCLGET(m, M_WAIT);
2810 if ((m->m_flags & M_EXT) == 0) {
2811 m_free(m);
0a7de745 2812 return ENOBUFS;
91447636
A
2813 }
2814 }
3e170ce0
A
2815 /*
2816 * K64: We still copyin the original buflen because it gets expanded
2817 * later and we lie about the size of the mbuf because it only affects
2818 * unp_* functions
b0d623f7 2819 */
1c79356b 2820 m->m_len = buflen;
91447636 2821 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2d21ac55 2822 if (error) {
1c79356b 2823 (void) m_free(m);
2d21ac55 2824 } else {
1c79356b
A
2825 *mp = m;
2826 if (type == MT_SONAME) {
2827 sa = mtod(m, struct sockaddr *);
1c79356b
A
2828 sa->sa_len = buflen;
2829 }
2830 }
0a7de745 2831 return error;
1c79356b
A
2832}
2833
91447636
A
2834/*
2835 * Given a user_addr_t of length len, allocate and fill out a *sa.
2d21ac55
A
2836 *
2837 * Returns: 0 Success
2838 * ENAMETOOLONG Filename too long
2839 * EINVAL Invalid argument
2840 * ENOMEM Not enough space
2841 * copyin:EFAULT Bad address
91447636 2842 */
2d21ac55
A
2843static int
2844getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
4a3eedf9 2845 size_t len, boolean_t translate_unspec)
1c79356b
A
2846{
2847 struct sockaddr *sa;
2848 int error;
2849
0a7de745
A
2850 if (len > SOCK_MAXADDRLEN) {
2851 return ENAMETOOLONG;
2852 }
1c79356b 2853
0a7de745
A
2854 if (len < offsetof(struct sockaddr, sa_data[0])) {
2855 return EINVAL;
2856 }
1c79356b 2857
490019cf 2858 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
91447636 2859 if (sa == NULL) {
0a7de745 2860 return ENOMEM;
91447636
A
2861 }
2862 error = copyin(uaddr, (caddr_t)sa, len);
1c79356b
A
2863 if (error) {
2864 FREE(sa, M_SONAME);
2865 } else {
2d21ac55
A
2866 /*
2867 * Force sa_family to AF_INET on AF_INET sockets to handle
2868 * legacy applications that use AF_UNSPEC (0). On all other
2869 * sockets we leave it unchanged and let the lower layer
2870 * handle it.
2871 */
4a3eedf9 2872 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
39236c6e 2873 SOCK_CHECK_DOM(so, PF_INET) &&
0a7de745 2874 len == sizeof(struct sockaddr_in)) {
2d21ac55 2875 sa->sa_family = AF_INET;
0a7de745 2876 }
2d21ac55 2877
1c79356b
A
2878 sa->sa_len = len;
2879 *namp = sa;
2880 }
0a7de745 2881 return error;
1c79356b
A
2882}
2883
2d21ac55
A
2884static int
2885getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
4a3eedf9 2886 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1c79356b 2887{
2d21ac55
A
2888 int error;
2889
2890 if (ss == NULL || uaddr == USER_ADDR_NULL ||
0a7de745
A
2891 len < offsetof(struct sockaddr, sa_data[0])) {
2892 return EINVAL;
2893 }
2d21ac55
A
2894
2895 /*
2896 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2897 * so the check here is inclusive.
2898 */
0a7de745
A
2899 if (len > sizeof(*ss)) {
2900 return ENAMETOOLONG;
2901 }
1c79356b 2902
0a7de745 2903 bzero(ss, sizeof(*ss));
2d21ac55
A
2904 error = copyin(uaddr, (caddr_t)ss, len);
2905 if (error == 0) {
2906 /*
2907 * Force sa_family to AF_INET on AF_INET sockets to handle
2908 * legacy applications that use AF_UNSPEC (0). On all other
2909 * sockets we leave it unchanged and let the lower layer
2910 * handle it.
2911 */
4a3eedf9 2912 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
39236c6e 2913 SOCK_CHECK_DOM(so, PF_INET) &&
0a7de745 2914 len == sizeof(struct sockaddr_in)) {
2d21ac55 2915 ss->ss_family = AF_INET;
0a7de745 2916 }
91447636 2917
2d21ac55 2918 ss->ss_len = len;
1c79356b 2919 }
0a7de745 2920 return error;
1c79356b
A
2921}
2922
fe8ab488
A
2923int
2924internalize_user_msghdr_array(const void *src, int spacetype, int direction,
3e170ce0 2925 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
fe8ab488
A
2926{
2927 int error = 0;
2928 u_int i;
3e170ce0
A
2929 u_int namecnt = 0;
2930 u_int ctlcnt = 0;
fe8ab488
A
2931
2932 for (i = 0; i < count; i++) {
2933 uio_t auio;
2934 struct user_iovec *iovp;
3e170ce0 2935 struct user_msghdr_x *user_msg = dst + i;
fe8ab488
A
2936
2937 if (spacetype == UIO_USERSPACE64) {
3e170ce0 2938 const struct user64_msghdr_x *msghdr64;
fe8ab488 2939
3e170ce0 2940 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
fe8ab488
A
2941
2942 user_msg->msg_name = msghdr64->msg_name;
2943 user_msg->msg_namelen = msghdr64->msg_namelen;
2944 user_msg->msg_iov = msghdr64->msg_iov;
2945 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2946 user_msg->msg_control = msghdr64->msg_control;
2947 user_msg->msg_controllen = msghdr64->msg_controllen;
2948 user_msg->msg_flags = msghdr64->msg_flags;
2949 user_msg->msg_datalen = msghdr64->msg_datalen;
2950 } else {
3e170ce0 2951 const struct user32_msghdr_x *msghdr32;
fe8ab488 2952
3e170ce0 2953 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
fe8ab488
A
2954
2955 user_msg->msg_name = msghdr32->msg_name;
2956 user_msg->msg_namelen = msghdr32->msg_namelen;
2957 user_msg->msg_iov = msghdr32->msg_iov;
2958 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2959 user_msg->msg_control = msghdr32->msg_control;
2960 user_msg->msg_controllen = msghdr32->msg_controllen;
2961 user_msg->msg_flags = msghdr32->msg_flags;
2962 user_msg->msg_datalen = msghdr32->msg_datalen;
2963 }
3e170ce0
A
2964
2965 if (user_msg->msg_iovlen <= 0 ||
2966 user_msg->msg_iovlen > UIO_MAXIOV) {
fe8ab488
A
2967 error = EMSGSIZE;
2968 goto done;
2969 }
3e170ce0
A
2970 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2971 direction);
fe8ab488
A
2972 if (auio == NULL) {
2973 error = ENOMEM;
2974 goto done;
2975 }
2976 uiop[i] = auio;
2977
3e170ce0
A
2978 iovp = uio_iovsaddr(auio);
2979 if (iovp == NULL) {
2980 error = ENOMEM;
2981 goto done;
2982 }
2983 error = copyin_user_iovec_array(user_msg->msg_iov,
0a7de745
A
2984 spacetype, user_msg->msg_iovlen, iovp);
2985 if (error) {
3e170ce0 2986 goto done;
0a7de745 2987 }
3e170ce0 2988 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
fe8ab488 2989
3e170ce0 2990 error = uio_calculateresid(auio);
0a7de745 2991 if (error) {
3e170ce0 2992 goto done;
0a7de745 2993 }
3e170ce0
A
2994 user_msg->msg_datalen = uio_resid(auio);
2995
0a7de745 2996 if (user_msg->msg_name && user_msg->msg_namelen) {
3e170ce0 2997 namecnt++;
0a7de745
A
2998 }
2999 if (user_msg->msg_control && user_msg->msg_controllen) {
3e170ce0 3000 ctlcnt++;
0a7de745 3001 }
3e170ce0
A
3002 }
3003done:
3004
0a7de745 3005 return error;
3e170ce0
A
3006}
3007
3008int
3009internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
3010 u_int count, struct user_msghdr_x *dst,
3011 struct recv_msg_elem *recv_msg_array)
3012{
3013 int error = 0;
3014 u_int i;
3015
3016 for (i = 0; i < count; i++) {
3017 struct user_iovec *iovp;
3018 struct user_msghdr_x *user_msg = dst + i;
3019 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3020
3021 if (spacetype == UIO_USERSPACE64) {
3022 const struct user64_msghdr_x *msghdr64;
3023
3024 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3025
3026 user_msg->msg_name = msghdr64->msg_name;
3027 user_msg->msg_namelen = msghdr64->msg_namelen;
3028 user_msg->msg_iov = msghdr64->msg_iov;
3029 user_msg->msg_iovlen = msghdr64->msg_iovlen;
3030 user_msg->msg_control = msghdr64->msg_control;
3031 user_msg->msg_controllen = msghdr64->msg_controllen;
3032 user_msg->msg_flags = msghdr64->msg_flags;
3033 user_msg->msg_datalen = msghdr64->msg_datalen;
fe8ab488 3034 } else {
3e170ce0
A
3035 const struct user32_msghdr_x *msghdr32;
3036
3037 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3038
3039 user_msg->msg_name = msghdr32->msg_name;
3040 user_msg->msg_namelen = msghdr32->msg_namelen;
3041 user_msg->msg_iov = msghdr32->msg_iov;
3042 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3043 user_msg->msg_control = msghdr32->msg_control;
3044 user_msg->msg_controllen = msghdr32->msg_controllen;
3045 user_msg->msg_flags = msghdr32->msg_flags;
3046 user_msg->msg_datalen = msghdr32->msg_datalen;
fe8ab488 3047 }
3e170ce0
A
3048
3049 if (user_msg->msg_iovlen <= 0 ||
3050 user_msg->msg_iovlen > UIO_MAXIOV) {
3051 error = EMSGSIZE;
3052 goto done;
3053 }
3054 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3055 spacetype, direction);
3056 if (recv_msg_elem->uio == NULL) {
3057 error = ENOMEM;
3058 goto done;
3059 }
3060
3061 iovp = uio_iovsaddr(recv_msg_elem->uio);
3062 if (iovp == NULL) {
3063 error = ENOMEM;
3064 goto done;
3065 }
3066 error = copyin_user_iovec_array(user_msg->msg_iov,
0a7de745
A
3067 spacetype, user_msg->msg_iovlen, iovp);
3068 if (error) {
3e170ce0 3069 goto done;
0a7de745 3070 }
3e170ce0
A
3071 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3072
3073 error = uio_calculateresid(recv_msg_elem->uio);
0a7de745 3074 if (error) {
3e170ce0 3075 goto done;
0a7de745 3076 }
3e170ce0
A
3077 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3078
0a7de745 3079 if (user_msg->msg_name && user_msg->msg_namelen) {
3e170ce0 3080 recv_msg_elem->which |= SOCK_MSG_SA;
0a7de745
A
3081 }
3082 if (user_msg->msg_control && user_msg->msg_controllen) {
3e170ce0 3083 recv_msg_elem->which |= SOCK_MSG_CONTROL;
0a7de745 3084 }
fe8ab488
A
3085 }
3086done:
3e170ce0 3087
0a7de745 3088 return error;
fe8ab488
A
3089}
3090
3091u_int
3092externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3e170ce0 3093 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
fe8ab488
A
3094{
3095#pragma unused(direction)
3096 u_int i;
3097 int seenlast = 0;
3098 u_int retcnt = 0;
3099
3100 for (i = 0; i < count; i++) {
3e170ce0 3101 const struct user_msghdr_x *user_msg = src + i;
fe8ab488
A
3102 uio_t auio = uiop[i];
3103 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3104
0a7de745 3105 if (user_msg->msg_datalen != 0 && len == 0) {
fe8ab488 3106 seenlast = 1;
0a7de745 3107 }
3e170ce0 3108
0a7de745
A
3109 if (seenlast == 0) {
3110 retcnt++;
3111 }
3e170ce0
A
3112
3113 if (spacetype == UIO_USERSPACE64) {
3114 struct user64_msghdr_x *msghdr64;
3115
3116 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3117
3118 msghdr64->msg_flags = user_msg->msg_flags;
3119 msghdr64->msg_datalen = len;
3e170ce0
A
3120 } else {
3121 struct user32_msghdr_x *msghdr32;
3122
3123 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3124
3125 msghdr32->msg_flags = user_msg->msg_flags;
3126 msghdr32->msg_datalen = len;
3127 }
3128 }
0a7de745 3129 return retcnt;
3e170ce0
A
3130}
3131
3132u_int
3133externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
3134 u_int count, const struct user_msghdr_x *src,
3135 struct recv_msg_elem *recv_msg_array)
3136{
3137 u_int i;
3138 int seenlast = 0;
3139 u_int retcnt = 0;
3140
3141 for (i = 0; i < count; i++) {
3142 const struct user_msghdr_x *user_msg = src + i;
3143 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3144 user_ssize_t len;
3145
3146 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3147
3148 if (direction == UIO_READ) {
0a7de745 3149 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0) {
3e170ce0 3150 seenlast = 1;
0a7de745 3151 }
3e170ce0 3152 } else {
0a7de745 3153 if (user_msg->msg_datalen != 0 && len == 0) {
3e170ce0 3154 seenlast = 1;
0a7de745 3155 }
3e170ce0
A
3156 }
3157
0a7de745
A
3158 if (seenlast == 0) {
3159 retcnt++;
3160 }
fe8ab488
A
3161
3162 if (spacetype == UIO_USERSPACE64) {
3163 struct user64_msghdr_x *msghdr64;
3164
3165 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3166
3167 msghdr64->msg_flags = user_msg->msg_flags;
3168 msghdr64->msg_datalen = len;
fe8ab488
A
3169 } else {
3170 struct user32_msghdr_x *msghdr32;
3171
3172 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3173
3174 msghdr32->msg_flags = user_msg->msg_flags;
3175 msghdr32->msg_datalen = len;
3176 }
3177 }
0a7de745 3178 return retcnt;
fe8ab488
A
3179}
3180
3181void
3182free_uio_array(struct uio **uiop, u_int count)
3183{
3184 u_int i;
3185
3186 for (i = 0; i < count; i++) {
0a7de745 3187 if (uiop[i] != NULL) {
fe8ab488 3188 uio_free(uiop[i]);
0a7de745 3189 }
fe8ab488
A
3190 }
3191}
3192
3193__private_extern__ user_ssize_t
3194uio_array_resid(struct uio **uiop, u_int count)
3195{
3196 user_ssize_t len = 0;
3197 u_int i;
3198
3199 for (i = 0; i < count; i++) {
3200 struct uio *auio = uiop[i];
3201
0a7de745 3202 if (auio != NULL) {
fe8ab488 3203 len += uio_resid(auio);
0a7de745 3204 }
fe8ab488 3205 }
0a7de745 3206 return len;
fe8ab488
A
3207}
3208
3209int
3210uio_array_is_valid(struct uio **uiop, u_int count)
3211{
3212 user_ssize_t len = 0;
3213 u_int i;
3214
3215 for (i = 0; i < count; i++) {
3216 struct uio *auio = uiop[i];
3e170ce0 3217
fe8ab488
A
3218 if (auio != NULL) {
3219 user_ssize_t resid = uio_resid(auio);
3e170ce0 3220
fe8ab488
A
3221 /*
3222 * Sanity check on the validity of the iovec:
3223 * no point of going over sb_max
3224 */
0a7de745
A
3225 if (resid < 0 || (u_int32_t)resid > sb_max) {
3226 return 0;
3227 }
3e170ce0
A
3228
3229 len += resid;
0a7de745
A
3230 if (len < 0 || (u_int32_t)len > sb_max) {
3231 return 0;
3232 }
3e170ce0
A
3233 }
3234 }
0a7de745 3235 return 1;
3e170ce0
A
3236}
3237
3238
3239struct recv_msg_elem *
3240alloc_recv_msg_array(u_int count)
3241{
3242 struct recv_msg_elem *recv_msg_array;
3243
3244 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3245 M_TEMP, M_WAITOK | M_ZERO);
3246
0a7de745 3247 return recv_msg_array;
3e170ce0
A
3248}
3249
3250void
3251free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3252{
3253 u_int i;
3254
3255 for (i = 0; i < count; i++) {
3256 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3257
0a7de745 3258 if (recv_msg_elem->uio != NULL) {
3e170ce0 3259 uio_free(recv_msg_elem->uio);
0a7de745
A
3260 }
3261 if (recv_msg_elem->psa != NULL) {
3e170ce0 3262 _FREE(recv_msg_elem->psa, M_TEMP);
0a7de745
A
3263 }
3264 if (recv_msg_elem->controlp != NULL) {
3e170ce0 3265 m_freem(recv_msg_elem->controlp);
0a7de745 3266 }
3e170ce0
A
3267 }
3268 _FREE(recv_msg_array, M_TEMP);
3269}
3270
3271
3272__private_extern__ user_ssize_t
3273recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3274{
3275 user_ssize_t len = 0;
3276 u_int i;
3277
3278 for (i = 0; i < count; i++) {
3279 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3280
0a7de745 3281 if (recv_msg_elem->uio != NULL) {
3e170ce0 3282 len += uio_resid(recv_msg_elem->uio);
0a7de745 3283 }
3e170ce0 3284 }
0a7de745 3285 return len;
3e170ce0
A
3286}
3287
3288int
3289recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3290{
3291 user_ssize_t len = 0;
3292 u_int i;
3293
3294 for (i = 0; i < count; i++) {
3295 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3296
3297 if (recv_msg_elem->uio != NULL) {
3298 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3299
3300 /*
3301 * Sanity check on the validity of the iovec:
3302 * no point of going over sb_max
3303 */
0a7de745
A
3304 if (resid < 0 || (u_int32_t)resid > sb_max) {
3305 return 0;
3306 }
3e170ce0 3307
fe8ab488 3308 len += resid;
0a7de745
A
3309 if (len < 0 || (u_int32_t)len > sb_max) {
3310 return 0;
3311 }
fe8ab488
A
3312 }
3313 }
0a7de745 3314 return 1;
fe8ab488
A
3315}
3316
39236c6e 3317#if SENDFILE
2d21ac55 3318
0a7de745 3319#define SFUIOBUFS 64
2d21ac55
A
3320
3321/* Macros to compute the number of mbufs needed depending on cluster size */
0a7de745
A
3322#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3323#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
2d21ac55 3324
39236c6e 3325/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
0a7de745 3326#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
2d21ac55
A
3327
3328/* Upper send limit in the number of mbuf clusters */
0a7de745
A
3329#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3330#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
2d21ac55 3331
1c79356b 3332static void
2d21ac55
A
3333alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3334 struct mbuf **m, boolean_t jumbocl)
1c79356b 3335{
2d21ac55 3336 unsigned int needed;
1c79356b 3337
0a7de745 3338 if (pktlen == 0) {
2d21ac55 3339 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
0a7de745 3340 }
1c79356b 3341
2d21ac55
A
3342 /*
3343 * Try to allocate for the whole thing. Since we want full control
3344 * over the buffer size and be able to accept partial result, we can't
3345 * use mbuf_allocpacket(). The logic below is similar to sosend().
3346 */
3347 *m = NULL;
6d2010ae 3348 if (pktlen > MBIGCLBYTES && jumbocl) {
2d21ac55
A
3349 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3350 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3351 }
3352 if (*m == NULL) {
3353 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
6d2010ae 3354 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
2d21ac55
A
3355 }
3356
3357 /*
3358 * Our previous attempt(s) at allocation had failed; the system
3359 * may be short on mbufs, and we want to block until they are
3360 * available. This time, ask just for 1 mbuf and don't return
3361 * until we get it.
3362 */
3363 if (*m == NULL) {
3364 needed = 1;
6d2010ae 3365 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
1c79356b 3366 }
0a7de745 3367 if (*m == NULL) {
2d21ac55 3368 panic("%s: blocking allocation returned NULL\n", __func__);
0a7de745 3369 }
2d21ac55
A
3370
3371 *maxchunks = needed;
1c79356b
A
3372}
3373
3374/*
3375 * sendfile(2).
2d21ac55
A
3376 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3377 * struct sf_hdtr *hdtr, int flags)
1c79356b
A
3378 *
3379 * Send a file specified by 'fd' and starting at 'offset' to a socket
2d21ac55
A
3380 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3381 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3382 * output. If specified, write the total number of bytes sent into *nbytes.
1c79356b
A
3383 */
3384int
2d21ac55 3385sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
1c79356b 3386{
91447636 3387 struct fileproc *fp;
1c79356b 3388 struct vnode *vp;
1c79356b 3389 struct socket *so;
2d21ac55
A
3390 struct writev_nocancel_args nuap;
3391 user_ssize_t writev_retval;
2d21ac55 3392 struct user_sf_hdtr user_hdtr;
b0d623f7
A
3393 struct user32_sf_hdtr user32_hdtr;
3394 struct user64_sf_hdtr user64_hdtr;
2d21ac55
A
3395 off_t off, xfsize;
3396 off_t nbytes = 0, sbytes = 0;
3397 int error = 0;
3398 size_t sizeof_hdtr;
2d21ac55
A
3399 off_t file_size;
3400 struct vfs_context context = *vfs_context_current();
3e170ce0 3401
2d21ac55
A
3402 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3403 0, 0, 0, 0);
b0d623f7
A
3404
3405 AUDIT_ARG(fd, uap->fd);
3406 AUDIT_ARG(value32, uap->s);
3407
1c79356b
A
3408 /*
3409 * Do argument checking. Must be a regular file in, stream
3410 * type and connected socket out, positive offset.
3411 */
2d21ac55 3412 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
1c79356b 3413 goto done;
2d21ac55
A
3414 }
3415 if ((fp->f_flag & FREAD) == 0) {
91447636
A
3416 error = EBADF;
3417 goto done1;
1c79356b 3418 }
2d21ac55
A
3419 if (vnode_isreg(vp) == 0) {
3420 error = ENOTSUP;
91447636 3421 goto done1;
1c79356b 3422 }
91447636 3423 error = file_socket(uap->s, &so);
2d21ac55 3424 if (error) {
91447636 3425 goto done1;
2d21ac55 3426 }
55e303ae
A
3427 if (so == NULL) {
3428 error = EBADF;
91447636 3429 goto done2;
55e303ae 3430 }
1c79356b
A
3431 if (so->so_type != SOCK_STREAM) {
3432 error = EINVAL;
2d21ac55 3433 goto done2;
1c79356b
A
3434 }
3435 if ((so->so_state & SS_ISCONNECTED) == 0) {
3436 error = ENOTCONN;
2d21ac55 3437 goto done2;
1c79356b
A
3438 }
3439 if (uap->offset < 0) {
3440 error = EINVAL;
2d21ac55 3441 goto done2;
1c79356b 3442 }
2d21ac55
A
3443 if (uap->nbytes == USER_ADDR_NULL) {
3444 error = EINVAL;
3445 goto done2;
3446 }
3447 if (uap->flags != 0) {
3448 error = EINVAL;
3449 goto done2;
3450 }
3451
3452 context.vc_ucred = fp->f_fglob->fg_cred;
3453
3454#if CONFIG_MACF_SOCKET_SUBSET
3455 /* JMM - fetch connected sockaddr? */
3456 error = mac_socket_check_send(context.vc_ucred, so, NULL);
0a7de745 3457 if (error) {
2d21ac55 3458 goto done2;
0a7de745 3459 }
2d21ac55
A
3460#endif
3461
3462 /*
3463 * Get number of bytes to send
3464 * Should it applies to size of header and trailer?
2d21ac55 3465 */
cb323159
A
3466 error = copyin(uap->nbytes, &nbytes, sizeof(off_t));
3467 if (error) {
3468 goto done2;
3469 }
1c79356b
A
3470
3471 /*
3472 * If specified, get the pointer to the sf_hdtr struct for
3473 * any headers/trailers.
3474 */
2d21ac55
A
3475 if (uap->hdtr != USER_ADDR_NULL) {
3476 caddr_t hdtrp;
3477
0a7de745 3478 bzero(&user_hdtr, sizeof(user_hdtr));
2d21ac55 3479 if (IS_64BIT_PROCESS(p)) {
b0d623f7 3480 hdtrp = (caddr_t)&user64_hdtr;
0a7de745 3481 sizeof_hdtr = sizeof(user64_hdtr);
2d21ac55 3482 } else {
b0d623f7 3483 hdtrp = (caddr_t)&user32_hdtr;
0a7de745 3484 sizeof_hdtr = sizeof(user32_hdtr);
2d21ac55
A
3485 }
3486 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
0a7de745 3487 if (error) {
2d21ac55 3488 goto done2;
0a7de745 3489 }
b0d623f7
A
3490 if (IS_64BIT_PROCESS(p)) {
3491 user_hdtr.headers = user64_hdtr.headers;
3492 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3493 user_hdtr.trailers = user64_hdtr.trailers;
3494 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3495 } else {
3496 user_hdtr.headers = user32_hdtr.headers;
3497 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3498 user_hdtr.trailers = user32_hdtr.trailers;
3499 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2d21ac55
A
3500 }
3501
1c79356b
A
3502 /*
3503 * Send any headers. Wimp out and use writev(2).
3504 */
2d21ac55 3505 if (user_hdtr.headers != USER_ADDR_NULL) {
0a7de745 3506 bzero(&nuap, sizeof(struct writev_args));
1c79356b 3507 nuap.fd = uap->s;
2d21ac55
A
3508 nuap.iovp = user_hdtr.headers;
3509 nuap.iovcnt = user_hdtr.hdr_cnt;
3510 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3511 if (error) {
2d21ac55 3512 goto done2;
316670eb 3513 }
2d21ac55 3514 sbytes += writev_retval;
1c79356b
A
3515 }
3516 }
3517
3518 /*
2d21ac55
A
3519 * Get the file size for 2 reasons:
3520 * 1. We don't want to allocate more mbufs than necessary
3521 * 2. We don't want to read past the end of file
1c79356b 3522 */
316670eb 3523 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
2d21ac55 3524 goto done2;
316670eb 3525 }
1c79356b
A
3526
3527 /*
2d21ac55
A
3528 * Simply read file data into a chain of mbufs that used with scatter
3529 * gather reads. We're not (yet?) setup to use zero copy external
3530 * mbufs that point to the file pages.
1c79356b 3531 */
2d21ac55 3532 socket_lock(so, 1);
39236c6e 3533 error = sblock(&so->so_snd, SBL_WAIT);
2d21ac55
A
3534 if (error) {
3535 socket_unlock(so, 1);
3536 goto done2;
3537 }
0a7de745
A
3538 for (off = uap->offset;; off += xfsize, sbytes += xfsize) {
3539 mbuf_t m0 = NULL, m;
3540 unsigned int nbufs = SFUIOBUFS, i;
3541 uio_t auio;
3542 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
3543 size_t uiolen;
3544 user_ssize_t rlen;
3545 off_t pgoff;
3546 size_t pktlen;
2d21ac55 3547 boolean_t jumbocl;
1c79356b 3548
1c79356b 3549 /*
2d21ac55
A
3550 * Calculate the amount to transfer.
3551 * Align to round number of pages.
3552 * Not to exceed send socket buffer,
1c79356b
A
3553 * the EOF, or the passed in nbytes.
3554 */
2d21ac55
A
3555 xfsize = sbspace(&so->so_snd);
3556
3557 if (xfsize <= 0) {
3558 if (so->so_state & SS_CANTSENDMORE) {
3559 error = EPIPE;
3560 goto done3;
3561 } else if ((so->so_state & SS_NBIO)) {
3562 error = EAGAIN;
3563 goto done3;
3564 } else {
3565 xfsize = PAGE_SIZE;
3566 }
3567 }
3568
0a7de745 3569 if (xfsize > SENDFILE_MAX_BYTES) {
2d21ac55 3570 xfsize = SENDFILE_MAX_BYTES;
0a7de745 3571 } else if (xfsize > PAGE_SIZE) {
2d21ac55 3572 xfsize = trunc_page(xfsize);
0a7de745 3573 }
2d21ac55 3574 pgoff = off & PAGE_MASK_64;
0a7de745 3575 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) {
1c79356b 3576 xfsize = PAGE_SIZE_64 - pgoff;
0a7de745
A
3577 }
3578 if (nbytes && xfsize > (nbytes - sbytes)) {
2d21ac55 3579 xfsize = nbytes - sbytes;
0a7de745
A
3580 }
3581 if (xfsize <= 0) {
2d21ac55 3582 break;
0a7de745
A
3583 }
3584 if (off + xfsize > file_size) {
2d21ac55 3585 xfsize = file_size - off;
0a7de745
A
3586 }
3587 if (xfsize <= 0) {
1c79356b 3588 break;
0a7de745 3589 }
2d21ac55 3590
1c79356b 3591 /*
2d21ac55
A
3592 * Attempt to use larger than system page-size clusters for
3593 * large writes only if there is a jumbo cluster pool and
3594 * if the socket is marked accordingly.
1c79356b 3595 */
2d21ac55
A
3596 jumbocl = sosendjcl && njcl > 0 &&
3597 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3598
3599 socket_unlock(so, 0);
3600 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
fe8ab488 3601 pktlen = mbuf_pkthdr_maxlen(m0);
0a7de745 3602 if (pktlen < (size_t)xfsize) {
2d21ac55 3603 xfsize = pktlen;
0a7de745 3604 }
39236c6e 3605
2d21ac55 3606 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
0a7de745 3607 UIO_READ, &uio_buf[0], sizeof(uio_buf));
2d21ac55 3608 if (auio == NULL) {
316670eb 3609 printf("sendfile failed. nbufs = %d. %s", nbufs,
0a7de745 3610 "File a radar related to rdar://10146739.\n");
2d21ac55
A
3611 mbuf_freem(m0);
3612 error = ENXIO;
3613 socket_lock(so, 0);
3614 goto done3;
1c79356b 3615 }
1c79356b 3616
2d21ac55 3617 for (i = 0, m = m0, uiolen = 0;
b0d623f7 3618 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2d21ac55
A
3619 i++, m = mbuf_next(m)) {
3620 size_t mlen = mbuf_maxlen(m);
3621
0a7de745 3622 if (mlen + uiolen > (size_t)xfsize) {
2d21ac55 3623 mlen = xfsize - uiolen;
0a7de745 3624 }
2d21ac55
A
3625 mbuf_setlen(m, mlen);
3626 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3627 mlen);
3628 uiolen += mlen;
3629 }
3630
0a7de745 3631 if (xfsize != uio_resid(auio)) {
2d21ac55 3632 printf("sendfile: xfsize: %lld != uio_resid(auio): "
0a7de745
A
3633 "%lld\n", xfsize, (long long)uio_resid(auio));
3634 }
2d21ac55
A
3635
3636 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3637 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3638 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3639 error = fo_read(fp, auio, FOF_OFFSET, &context);
3640 socket_lock(so, 0);
3641 if (error != 0) {
3642 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3643 error == EINTR || error == EWOULDBLOCK)) {
3644 error = 0;
3645 } else {
3646 mbuf_freem(m0);
3647 goto done3;
1c79356b 3648 }
1c79356b 3649 }
2d21ac55
A
3650 xfsize -= uio_resid(auio);
3651 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3652 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3653 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3654
3655 if (xfsize == 0) {
3e170ce0 3656 // printf("sendfile: fo_read 0 bytes, EOF\n");
2d21ac55 3657 break;
91447636 3658 }
0a7de745 3659 if (xfsize + off > file_size) {
2d21ac55
A
3660 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3661 "%lld\n", xfsize, off, file_size);
0a7de745 3662 }
2d21ac55
A
3663 for (i = 0, m = m0, rlen = 0;
3664 i < nbufs && m != NULL && rlen < xfsize;
3665 i++, m = mbuf_next(m)) {
3666 size_t mlen = mbuf_maxlen(m);
3667
0a7de745 3668 if (rlen + mlen > (size_t)xfsize) {
2d21ac55 3669 mlen = xfsize - rlen;
0a7de745 3670 }
2d21ac55
A
3671 mbuf_setlen(m, mlen);
3672
3673 rlen += mlen;
3674 }
3675 mbuf_pkthdr_setlen(m0, xfsize);
3676
1c79356b
A
3677retry_space:
3678 /*
3679 * Make sure that the socket is still able to take more data.
3680 * CANTSENDMORE being true usually means that the connection
3681 * was closed. so_error is true when an error was sensed after
3682 * a previous send.
3683 * The state is checked after the page mapping and buffer
3684 * allocation above since those operations may block and make
3685 * any socket checks stale. From this point forward, nothing
3686 * blocks before the pru_send (or more accurately, any blocking
3687 * results in a loop back to here to re-check).
3688 */
3689 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3690 if (so->so_state & SS_CANTSENDMORE) {
3691 error = EPIPE;
3692 } else {
3693 error = so->so_error;
3694 so->so_error = 0;
3695 }
2d21ac55
A
3696 m_freem(m0);
3697 goto done3;
1c79356b
A
3698 }
3699 /*
3700 * Wait for socket space to become available. We do this just
3701 * after checking the connection state above in order to avoid
3702 * a race condition with sbwait().
3703 */
2d21ac55 3704 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
1c79356b 3705 if (so->so_state & SS_NBIO) {
2d21ac55 3706 m_freem(m0);
1c79356b 3707 error = EAGAIN;
2d21ac55 3708 goto done3;
1c79356b 3709 }
2d21ac55
A
3710 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3711 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
1c79356b 3712 error = sbwait(&so->so_snd);
0a7de745 3713 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
2d21ac55 3714 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
1c79356b
A
3715 /*
3716 * An error from sbwait usually indicates that we've
3717 * been interrupted by a signal. If we've sent anything
3718 * then return bytes sent, otherwise return the error.
3719 */
3720 if (error) {
2d21ac55
A
3721 m_freem(m0);
3722 goto done3;
1c79356b
A
3723 }
3724 goto retry_space;
3725 }
39236c6e 3726
6d2010ae 3727 struct mbuf *control = NULL;
2d21ac55
A
3728 {
3729 /*
3730 * Socket filter processing
3731 */
2d21ac55 3732
6d2010ae
A
3733 error = sflt_data_out(so, NULL, &m0, &control, 0);
3734 if (error) {
3735 if (error == EJUSTRETURN) {
3736 error = 0;
3737 continue;
2d21ac55 3738 }
6d2010ae 3739 goto done3;
2d21ac55
A
3740 }
3741 /*
3742 * End Socket filter processing
3743 */
3744 }
3745 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3746 uap->s, 0, 0, 0, 0);
3747 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
6d2010ae 3748 0, control, p);
2d21ac55
A
3749 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3750 uap->s, 0, 0, 0, 0);
1c79356b 3751 if (error) {
2d21ac55 3752 goto done3;
1c79356b
A
3753 }
3754 }
0a7de745 3755 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
1c79356b
A
3756 /*
3757 * Send trailers. Wimp out and use writev(2).
3758 */
2d21ac55
A
3759 if (uap->hdtr != USER_ADDR_NULL &&
3760 user_hdtr.trailers != USER_ADDR_NULL) {
0a7de745 3761 bzero(&nuap, sizeof(struct writev_args));
2d21ac55
A
3762 nuap.fd = uap->s;
3763 nuap.iovp = user_hdtr.trailers;
3764 nuap.iovcnt = user_hdtr.trl_cnt;
3765 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3766 if (error) {
2d21ac55 3767 goto done2;
316670eb 3768 }
2d21ac55 3769 sbytes += writev_retval;
1c79356b 3770 }
91447636
A
3771done2:
3772 file_drop(uap->s);
3773done1:
3774 file_drop(uap->fd);
1c79356b 3775done:
2d21ac55 3776 if (uap->nbytes != USER_ADDR_NULL) {
91447636 3777 /* XXX this appears bogus for some early failure conditions */
0a7de745 3778 copyout(&sbytes, uap->nbytes, sizeof(off_t));
1c79356b 3779 }
2d21ac55
A
3780 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3781 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3782 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
0a7de745 3783 return error;
91447636 3784done3:
0a7de745 3785 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
91447636 3786 goto done2;
1c79356b
A
3787}
3788
2d21ac55
A
3789
3790#endif /* SENDFILE */