]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_syscalls.c
xnu-3789.41.3.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
2d21ac55 33 * Copyright (c) 1998, David Greenman. All rights reserved.
1c79356b
A
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
2d21ac55
A
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
1c79356b
A
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
2d21ac55 77#include <sys/vnode_internal.h>
1c79356b 78#include <sys/malloc.h>
39236c6e 79#include <sys/mcache.h>
1c79356b 80#include <sys/mbuf.h>
fe8ab488 81#include <kern/locks.h>
91447636 82#include <sys/domain.h>
1c79356b 83#include <sys/protosw.h>
91447636 84#include <sys/signalvar.h>
1c79356b
A
85#include <sys/socket.h>
86#include <sys/socketvar.h>
1c79356b 87#include <sys/kernel.h>
91447636 88#include <sys/uio_internal.h>
2d21ac55 89#include <sys/kauth.h>
6d2010ae 90#include <kern/task.h>
39236c6e 91#include <sys/priv.h>
3e170ce0 92#include <sys/sysctl.h>
e5568f75 93
b0d623f7 94#include <security/audit/audit.h>
1c79356b
A
95
96#include <sys/kdebug.h>
91447636 97#include <sys/sysproto.h>
2d21ac55
A
98#include <netinet/in.h>
99#include <net/route.h>
100#include <netinet/in_pcb.h>
101
102#if CONFIG_MACF_SOCKET_SUBSET
103#include <security/mac_framework.h>
104#endif /* MAC_SOCKET_SUBSET */
105
106#define f_flag f_fglob->fg_flag
39236c6e 107#define f_type f_fglob->fg_ops->fo_type
2d21ac55
A
108#define f_msgcount f_fglob->fg_msgcount
109#define f_cred f_fglob->fg_cred
110#define f_ops f_fglob->fg_ops
111#define f_offset f_fglob->fg_offset
112#define f_data f_fglob->fg_data
113
2d21ac55
A
114#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
115#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
116#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
117#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
118#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
119#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
120#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
121#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
122#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
123#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
124#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
125#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
126#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
127#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
fe8ab488
A
128#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
129#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
2d21ac55 130
3e170ce0
A
131#if DEBUG || DEVELOPMENT
132#define DEBUG_KERNEL_ADDRPERM(_v) (_v)
133#define DBG_PRINTF(...) printf(__VA_ARGS__)
134#else
135#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
136#define DBG_PRINTF(...) do { } while (0)
137#endif
2d21ac55 138
2d21ac55
A
139/* TODO: should be in header file */
140int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
141
3e170ce0
A
142static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
143 int, int32_t *);
2d21ac55 144static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
b0d623f7 145 int32_t *);
39236c6e 146static int connectit(struct socket *, struct sockaddr *);
2d21ac55 147static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
4a3eedf9 148 size_t, boolean_t);
2d21ac55 149static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
4a3eedf9 150 user_addr_t, size_t, boolean_t);
39236c6e
A
151static int getsockaddrlist(struct socket *, struct sockaddr_list **,
152 user_addr_t, socklen_t, boolean_t);
1c79356b 153#if SENDFILE
2d21ac55
A
154static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
155 boolean_t);
156#endif /* SENDFILE */
39236c6e
A
157static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
158static int connectitx(struct socket *, struct sockaddr_list **,
3e170ce0
A
159 struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
160 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
39236c6e
A
161static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *);
162static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
163 int *);
164static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
1c79356b 165
fe8ab488 166static int internalize_user_msghdr_array(const void *, int, int, u_int,
3e170ce0 167 struct user_msghdr_x *, struct uio **);
fe8ab488 168static u_int externalize_user_msghdr_array(void *, int, int, u_int,
3e170ce0 169 const struct user_msghdr_x *, struct uio **);
fe8ab488
A
170
171static void free_uio_array(struct uio **, u_int);
172static int uio_array_is_valid(struct uio **, u_int);
3e170ce0
A
173static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
174static int internalize_recv_msghdr_array(const void *, int, int,
175 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
176static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
177 const struct user_msghdr_x *, struct recv_msg_elem *);
178static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
179static void free_recv_msg_array(struct recv_msg_elem *, u_int);
180
181SYSCTL_DECL(_kern_ipc);
182
183static u_int somaxsendmsgx = 100;
184SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
185 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
186static u_int somaxrecvmsgx = 100;
187SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
188 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
fe8ab488 189
1c79356b
A
190/*
191 * System call interface to the socket abstraction.
192 */
1c79356b 193
39236c6e 194extern const struct fileops socketops;
1c79356b 195
2d21ac55
A
196/*
197 * Returns: 0 Success
198 * EACCES Mandatory Access Control failure
199 * falloc:ENFILE
200 * falloc:EMFILE
201 * falloc:ENOMEM
202 * socreate:EAFNOSUPPORT
203 * socreate:EPROTOTYPE
204 * socreate:EPROTONOSUPPORT
205 * socreate:ENOBUFS
206 * socreate:ENOMEM
2d21ac55
A
207 * socreate:??? [other protocol families, IPSEC]
208 */
1c79356b 209int
39236c6e
A
210socket(struct proc *p,
211 struct socket_args *uap,
212 int32_t *retval)
213{
214 return (socket_common(p, uap->domain, uap->type, uap->protocol,
215 proc_selfpid(), retval, 0));
216}
217
218int
219socket_delegate(struct proc *p,
220 struct socket_delegate_args *uap,
221 int32_t *retval)
222{
223 return socket_common(p, uap->domain, uap->type, uap->protocol,
224 uap->epid, retval, 1);
225}
226
227static int
228socket_common(struct proc *p,
229 int domain,
230 int type,
231 int protocol,
232 pid_t epid,
233 int32_t *retval,
234 int delegate)
1c79356b 235{
1c79356b 236 struct socket *so;
91447636 237 struct fileproc *fp;
1c79356b
A
238 int fd, error;
239
39236c6e 240 AUDIT_ARG(socket, domain, type, protocol);
2d21ac55 241#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
242 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
243 type, protocol)) != 0)
2d21ac55
A
244 return (error);
245#endif /* MAC_SOCKET_SUBSET */
1c79356b 246
39236c6e
A
247 if (delegate) {
248 error = priv_check_cred(kauth_cred_get(),
249 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
250 if (error)
251 return (EACCES);
252 }
253
2d21ac55 254 error = falloc(p, &fp, &fd, vfs_context_current());
91447636 255 if (error) {
1c79356b 256 return (error);
91447636 257 }
1c79356b 258 fp->f_flag = FREAD|FWRITE;
1c79356b 259 fp->f_ops = &socketops;
91447636 260
39236c6e
A
261 if (delegate)
262 error = socreate_delegate(domain, &so, type, protocol, epid);
263 else
264 error = socreate(domain, &so, type, protocol);
265
91447636
A
266 if (error) {
267 fp_free(p, fd, fp);
1c79356b
A
268 } else {
269 fp->f_data = (caddr_t)so;
91447636
A
270
271 proc_fdlock(p);
6601e61a 272 procfdtbl_releasefd(p, fd, NULL);
2d21ac55 273
91447636
A
274 fp_drop(p, fd, fp, 1);
275 proc_fdunlock(p);
276
1c79356b 277 *retval = fd;
3e170ce0
A
278 if (ENTR_SHOULDTRACE) {
279 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
280 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
281 }
1c79356b
A
282 }
283 return (error);
284}
285
2d21ac55
A
286/*
287 * Returns: 0 Success
288 * EDESTADDRREQ Destination address required
289 * EBADF Bad file descriptor
290 * EACCES Mandatory Access Control failure
291 * file_socket:ENOTSOCK
292 * file_socket:EBADF
293 * getsockaddr:ENAMETOOLONG Filename too long
294 * getsockaddr:EINVAL Invalid argument
295 * getsockaddr:ENOMEM Not enough space
296 * getsockaddr:EFAULT Bad address
39236c6e 297 * sobindlock:???
2d21ac55 298 */
1c79356b
A
299/* ARGSUSED */
300int
b0d623f7 301bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
1c79356b 302{
2d21ac55
A
303 struct sockaddr_storage ss;
304 struct sockaddr *sa = NULL;
91447636 305 struct socket *so;
2d21ac55 306 boolean_t want_free = TRUE;
1c79356b
A
307 int error;
308
55e303ae 309 AUDIT_ARG(fd, uap->s);
91447636 310 error = file_socket(uap->s, &so);
2d21ac55 311 if (error != 0)
1c79356b 312 return (error);
2d21ac55
A
313 if (so == NULL) {
314 error = EBADF;
315 goto out;
316 }
317 if (uap->name == USER_ADDR_NULL) {
318 error = EDESTADDRREQ;
319 goto out;
320 }
321 if (uap->namelen > sizeof (ss)) {
4a3eedf9 322 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
2d21ac55 323 } else {
4a3eedf9 324 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
2d21ac55
A
325 if (error == 0) {
326 sa = (struct sockaddr *)&ss;
327 want_free = FALSE;
328 }
329 }
330 if (error != 0)
91447636 331 goto out;
2d21ac55
A
332 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
333#if CONFIG_MACF_SOCKET_SUBSET
334 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
39236c6e 335 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55 336#else
39236c6e 337 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55
A
338#endif /* MAC_SOCKET_SUBSET */
339 if (want_free)
340 FREE(sa, M_SONAME);
91447636
A
341out:
342 file_drop(uap->s);
1c79356b
A
343 return (error);
344}
345
2d21ac55
A
346/*
347 * Returns: 0 Success
348 * EBADF
349 * EACCES Mandatory Access Control failure
350 * file_socket:ENOTSOCK
351 * file_socket:EBADF
352 * solisten:EINVAL
353 * solisten:EOPNOTSUPP
354 * solisten:???
355 */
1c79356b 356int
2d21ac55 357listen(__unused struct proc *p, struct listen_args *uap,
b0d623f7 358 __unused int32_t *retval)
1c79356b 359{
1c79356b 360 int error;
2d21ac55 361 struct socket *so;
1c79356b 362
55e303ae 363 AUDIT_ARG(fd, uap->s);
91447636 364 error = file_socket(uap->s, &so);
1c79356b
A
365 if (error)
366 return (error);
91447636 367 if (so != NULL)
2d21ac55
A
368#if CONFIG_MACF_SOCKET_SUBSET
369 {
370 error = mac_socket_check_listen(kauth_cred_get(), so);
371 if (error == 0)
372 error = solisten(so, uap->backlog);
373 }
374#else
91447636 375 error = solisten(so, uap->backlog);
2d21ac55 376#endif /* MAC_SOCKET_SUBSET */
55e303ae 377 else
91447636 378 error = EBADF;
2d21ac55 379
91447636
A
380 file_drop(uap->s);
381 return (error);
1c79356b
A
382}
383
2d21ac55
A
384/*
385 * Returns: fp_getfsock:EBADF Bad file descriptor
386 * fp_getfsock:EOPNOTSUPP ...
387 * xlate => :ENOTSOCK Socket operation on non-socket
388 * :EFAULT Bad address on copyin/copyout
389 * :EBADF Bad file descriptor
390 * :EOPNOTSUPP Operation not supported on socket
391 * :EINVAL Invalid argument
392 * :EWOULDBLOCK Operation would block
393 * :ECONNABORTED Connection aborted
394 * :EINTR Interrupted function
395 * :EACCES Mandatory Access Control failure
396 * falloc_locked:ENFILE Too many files open in system
397 * falloc_locked::EMFILE Too many open files
398 * falloc_locked::ENOMEM Not enough space
399 * 0 Success
400 */
1c79356b 401int
2d21ac55 402accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
b0d623f7 403 int32_t *retval)
1c79356b 404{
91447636 405 struct fileproc *fp;
2d21ac55 406 struct sockaddr *sa = NULL;
91447636
A
407 socklen_t namelen;
408 int error;
409 struct socket *head, *so = NULL;
410 lck_mtx_t *mutex_held;
411 int fd = uap->s;
2d21ac55 412 int newfd;
1c79356b 413 short fflag; /* type must match fp->f_flag */
91447636 414 int dosocklock = 0;
1c79356b 415
2d21ac55
A
416 *retval = -1;
417
55e303ae 418 AUDIT_ARG(fd, uap->s);
2d21ac55 419
1c79356b 420 if (uap->name) {
91447636 421 error = copyin(uap->anamelen, (caddr_t)&namelen,
2d21ac55
A
422 sizeof (socklen_t));
423 if (error)
1c79356b
A
424 return (error);
425 }
91447636
A
426 error = fp_getfsock(p, fd, &fp, &head);
427 if (error) {
428 if (error == EOPNOTSUPP)
429 error = ENOTSOCK;
1c79356b 430 return (error);
91447636 431 }
55e303ae 432 if (head == NULL) {
91447636
A
433 error = EBADF;
434 goto out;
55e303ae 435 }
2d21ac55
A
436#if CONFIG_MACF_SOCKET_SUBSET
437 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
438 goto out;
439#endif /* MAC_SOCKET_SUBSET */
91447636
A
440
441 socket_lock(head, 1);
442
443 if (head->so_proto->pr_getlock != NULL) {
444 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
445 dosocklock = 1;
2d21ac55 446 } else {
91447636
A
447 mutex_held = head->so_proto->pr_domain->dom_mtx;
448 dosocklock = 0;
449 }
450
1c79356b 451 if ((head->so_options & SO_ACCEPTCONN) == 0) {
2d21ac55
A
452 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
453 error = EOPNOTSUPP;
454 } else {
455 /* POSIX: The socket is not accepting connections */
456 error = EINVAL;
457 }
91447636 458 socket_unlock(head, 1);
91447636 459 goto out;
1c79356b
A
460 }
461 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
91447636
A
462 socket_unlock(head, 1);
463 error = EWOULDBLOCK;
464 goto out;
1c79356b 465 }
2d21ac55 466 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
1c79356b
A
467 if (head->so_state & SS_CANTRCVMORE) {
468 head->so_error = ECONNABORTED;
469 break;
470 }
91447636 471 if (head->so_usecount < 1)
2d21ac55
A
472 panic("accept: head=%p refcount=%d\n", head,
473 head->so_usecount);
474 error = msleep((caddr_t)&head->so_timeo, mutex_held,
475 PSOCK | PCATCH, "accept", 0);
91447636 476 if (head->so_usecount < 1)
2d21ac55
A
477 panic("accept: 2 head=%p refcount=%d\n", head,
478 head->so_usecount);
91447636
A
479 if ((head->so_state & SS_DRAINING)) {
480 error = ECONNABORTED;
481 }
1c79356b 482 if (error) {
91447636
A
483 socket_unlock(head, 1);
484 goto out;
1c79356b
A
485 }
486 }
487 if (head->so_error) {
488 error = head->so_error;
489 head->so_error = 0;
91447636
A
490 socket_unlock(head, 1);
491 goto out;
1c79356b
A
492 }
493
1c79356b
A
494 /*
495 * At this point we know that there is at least one connection
496 * ready to be accepted. Remove it from the queue prior to
497 * allocating the file descriptor for it since falloc() may
498 * block allowing another process to accept the connection
499 * instead.
500 */
91447636 501 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
e3027f41 502 so = TAILQ_FIRST(&head->so_comp);
1c79356b 503 TAILQ_REMOVE(&head->so_comp, so, so_list);
d190cdc3
A
504 so->so_head = NULL;
505 so->so_state &= ~SS_COMP;
1c79356b 506 head->so_qlen--;
2d21ac55
A
507 /* unlock head to avoid deadlock with select, keep a ref on head */
508 socket_unlock(head, 0);
509
510#if CONFIG_MACF_SOCKET_SUBSET
511 /*
512 * Pass the pre-accepted socket to the MAC framework. This is
513 * cheaper than allocating a file descriptor for the socket,
514 * calling the protocol accept callback, and possibly freeing
515 * the file descriptor should the MAC check fails.
516 */
517 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
39236c6e 518 socket_lock(so, 1);
d190cdc3 519 so->so_state &= ~SS_NOFDREF;
39236c6e 520 socket_unlock(so, 1);
2d21ac55
A
521 soclose(so);
522 /* Drop reference on listening socket */
523 sodereference(head);
524 goto out;
525 }
526#endif /* MAC_SOCKET_SUBSET */
527
528 /*
529 * Pass the pre-accepted socket to any interested socket filter(s).
530 * Upon failure, the socket would have been closed by the callee.
531 */
d190cdc3 532 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
2d21ac55
A
533 /* Drop reference on listening socket */
534 sodereference(head);
535 /* Propagate socket filter's error code to the caller */
536 goto out;
537 }
538
1c79356b 539 fflag = fp->f_flag;
2d21ac55 540 error = falloc(p, &fp, &newfd, vfs_context_current());
1c79356b 541 if (error) {
39236c6e 542 /*
316670eb
A
543 * Probably ran out of file descriptors.
544 *
545 * <rdar://problem/8554930>
546 * Don't put this back on the socket like we used to, that
547 * just causes the client to spin. Drop the socket.
1c79356b 548 */
39236c6e 549 socket_lock(so, 1);
d190cdc3 550 so->so_state &= ~SS_NOFDREF;
39236c6e 551 socket_unlock(so, 1);
316670eb
A
552 soclose(so);
553 sodereference(head);
91447636 554 goto out;
2d21ac55 555 }
91447636 556 *retval = newfd;
1c79356b
A
557 fp->f_flag = fflag;
558 fp->f_ops = &socketops;
559 fp->f_data = (caddr_t)so;
fe8ab488 560
91447636
A
561 socket_lock(head, 0);
562 if (dosocklock)
563 socket_lock(so, 1);
fe8ab488 564
fe8ab488
A
565 /* Sync socket non-blocking/async state with file flags */
566 if (fp->f_flag & FNONBLOCK) {
567 so->so_state |= SS_NBIO;
568 } else {
569 so->so_state &= ~SS_NBIO;
570 }
571
572 if (fp->f_flag & FASYNC) {
573 so->so_state |= SS_ASYNC;
574 so->so_rcv.sb_flags |= SB_ASYNC;
575 so->so_snd.sb_flags |= SB_ASYNC;
576 } else {
577 so->so_state &= ~SS_ASYNC;
578 so->so_rcv.sb_flags &= ~SB_ASYNC;
579 so->so_snd.sb_flags &= ~SB_ASYNC;
580 }
581
91447636
A
582 (void) soacceptlock(so, &sa, 0);
583 socket_unlock(head, 1);
2d21ac55 584 if (sa == NULL) {
1c79356b
A
585 namelen = 0;
586 if (uap->name)
587 goto gotnoname;
91447636 588 error = 0;
2d21ac55 589 goto releasefd;
1c79356b 590 }
2d21ac55
A
591 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
592
1c79356b 593 if (uap->name) {
2d21ac55
A
594 socklen_t sa_len;
595
596 /* save sa_len before it is destroyed */
597 sa_len = sa->sa_len;
598 namelen = MIN(namelen, sa_len);
91447636 599 error = copyout(sa, uap->name, namelen);
1c79356b 600 if (!error)
2d21ac55
A
601 /* return the actual, untruncated address length */
602 namelen = sa_len;
1c79356b 603gotnoname:
2d21ac55
A
604 error = copyout((caddr_t)&namelen, uap->anamelen,
605 sizeof (socklen_t));
1c79356b
A
606 }
607 FREE(sa, M_SONAME);
2d21ac55 608
b0d623f7 609releasefd:
2d21ac55 610 /*
6d2010ae
A
611 * If the socket has been marked as inactive by sosetdefunct(),
612 * disallow further operations on it.
2d21ac55
A
613 */
614 if (so->so_flags & SOF_DEFUNCT) {
6d2010ae
A
615 sodefunct(current_proc(), so,
616 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
2d21ac55
A
617 }
618
91447636
A
619 if (dosocklock)
620 socket_unlock(so, 1);
2d21ac55 621
2d21ac55
A
622 proc_fdlock(p);
623 procfdtbl_releasefd(p, newfd, NULL);
624 fp_drop(p, newfd, fp, 1);
625 proc_fdunlock(p);
626
91447636
A
627out:
628 file_drop(fd);
3e170ce0
A
629
630 if (error == 0 && ENTR_SHOULDTRACE) {
631 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
632 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
633 }
1c79356b
A
634 return (error);
635}
636
637int
b0d623f7 638accept(struct proc *p, struct accept_args *uap, int32_t *retval)
1c79356b 639{
2d21ac55 640 __pthread_testcancel(1);
3e170ce0
A
641 return (accept_nocancel(p, (struct accept_nocancel_args *)uap,
642 retval));
1c79356b
A
643}
644
2d21ac55
A
645/*
646 * Returns: 0 Success
647 * EBADF Bad file descriptor
648 * EALREADY Connection already in progress
649 * EINPROGRESS Operation in progress
650 * ECONNABORTED Connection aborted
651 * EINTR Interrupted function
652 * EACCES Mandatory Access Control failure
653 * file_socket:ENOTSOCK
654 * file_socket:EBADF
655 * getsockaddr:ENAMETOOLONG Filename too long
656 * getsockaddr:EINVAL Invalid argument
657 * getsockaddr:ENOMEM Not enough space
658 * getsockaddr:EFAULT Bad address
659 * soconnectlock:EOPNOTSUPP
660 * soconnectlock:EISCONN
661 * soconnectlock:??? [depends on protocol, filters]
662 * msleep:EINTR
663 *
664 * Imputed: so_error error may be set from so_error, which
665 * may have been set by soconnectlock.
666 */
667/* ARGSUSED */
1c79356b 668int
b0d623f7 669connect(struct proc *p, struct connect_args *uap, int32_t *retval)
1c79356b 670{
2d21ac55 671 __pthread_testcancel(1);
3e170ce0
A
672 return (connect_nocancel(p, (struct connect_nocancel_args *)uap,
673 retval));
1c79356b 674}
1c79356b 675
1c79356b 676int
39236c6e 677connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
1c79356b 678{
39236c6e 679#pragma unused(p, retval)
91447636 680 struct socket *so;
2d21ac55
A
681 struct sockaddr_storage ss;
682 struct sockaddr *sa = NULL;
91447636
A
683 int error;
684 int fd = uap->s;
4a3eedf9 685 boolean_t dgram;
1c79356b 686
55e303ae 687 AUDIT_ARG(fd, uap->s);
2d21ac55
A
688 error = file_socket(fd, &so);
689 if (error != 0)
1c79356b 690 return (error);
91447636
A
691 if (so == NULL) {
692 error = EBADF;
693 goto out;
694 }
695
4a3eedf9
A
696 /*
697 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
698 * if this is a datagram socket; translate for other types.
699 */
700 dgram = (so->so_type == SOCK_DGRAM);
701
2d21ac55
A
702 /* Get socket address now before we obtain socket lock */
703 if (uap->namelen > sizeof (ss)) {
4a3eedf9 704 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
2d21ac55 705 } else {
4a3eedf9 706 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
39236c6e 707 if (error == 0)
2d21ac55 708 sa = (struct sockaddr *)&ss;
2d21ac55
A
709 }
710 if (error != 0)
711 goto out;
712
39236c6e
A
713 error = connectit(so, sa);
714
715 if (sa != NULL && sa != SA(&ss))
716 FREE(sa, M_SONAME);
717 if (error == ERESTART)
718 error = EINTR;
719out:
720 file_drop(fd);
721 return (error);
722}
723
724static int
725connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
726{
727#pragma unused(p, retval)
728 struct sockaddr_list *src_sl = NULL, *dst_sl = NULL;
729 struct socket *so;
3e170ce0 730 int error, error1, fd = uap->socket;
39236c6e 731 boolean_t dgram;
3e170ce0
A
732 sae_connid_t cid = SAE_CONNID_ANY;
733 struct user32_sa_endpoints ep32;
734 struct user64_sa_endpoints ep64;
735 struct user_sa_endpoints ep;
736 user_ssize_t bytes_written = 0;
737 struct user_iovec *iovp;
738 uio_t auio = NULL;
39236c6e 739
3e170ce0 740 AUDIT_ARG(fd, uap->socket);
39236c6e
A
741 error = file_socket(fd, &so);
742 if (error != 0)
743 return (error);
744 if (so == NULL) {
745 error = EBADF;
746 goto out;
747 }
748
3e170ce0
A
749 if (uap->endpoints == USER_ADDR_NULL) {
750 error = EINVAL;
751 goto out;
752 }
753
754 if (IS_64BIT_PROCESS(p)) {
755 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
756 if (error != 0)
757 goto out;
758
759 ep.sae_srcif = ep64.sae_srcif;
760 ep.sae_srcaddr = ep64.sae_srcaddr;
761 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
762 ep.sae_dstaddr = ep64.sae_dstaddr;
763 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
764 } else {
765 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
766 if (error != 0)
767 goto out;
768
769 ep.sae_srcif = ep32.sae_srcif;
770 ep.sae_srcaddr = ep32.sae_srcaddr;
771 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
772 ep.sae_dstaddr = ep32.sae_dstaddr;
773 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
774 }
fe8ab488 775
39236c6e
A
776 /*
777 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
778 * if this is a datagram socket; translate for other types.
779 */
780 dgram = (so->so_type == SOCK_DGRAM);
781
782 /*
783 * Get socket address(es) now before we obtain socket lock; use
784 * sockaddr_list for src address for convenience, if present,
785 * even though it won't hold more than one.
786 */
3e170ce0
A
787 if (ep.sae_srcaddr != USER_ADDR_NULL && (error = getsockaddrlist(so,
788 &src_sl, (user_addr_t)(caddr_t)ep.sae_srcaddr, ep.sae_srcaddrlen,
789 dgram)) != 0)
39236c6e
A
790 goto out;
791
3e170ce0
A
792 if (ep.sae_dstaddr == USER_ADDR_NULL) {
793 error = EINVAL;
794 goto out;
795 }
796
797 error = getsockaddrlist(so, &dst_sl, (user_addr_t)(caddr_t)ep.sae_dstaddr,
798 ep.sae_dstaddrlen, dgram);
39236c6e
A
799 if (error != 0)
800 goto out;
801
802 VERIFY(dst_sl != NULL &&
803 !TAILQ_EMPTY(&dst_sl->sl_head) && dst_sl->sl_cnt > 0);
804
3e170ce0
A
805 if (uap->iov != USER_ADDR_NULL) {
806 /* Verify range before calling uio_create() */
807 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
808 return (EINVAL);
809
810 if (uap->len == USER_ADDR_NULL)
811 return (EINVAL);
812
813 /* allocate a uio to hold the number of iovecs passed */
814 auio = uio_create(uap->iovcnt, 0,
815 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
816 UIO_WRITE);
817
818 if (auio == NULL) {
819 error = ENOMEM;
820 goto out;
821 }
822
823 /*
824 * get location of iovecs within the uio.
825 * then copyin the iovecs from user space.
826 */
827 iovp = uio_iovsaddr(auio);
828 if (iovp == NULL) {
829 error = ENOMEM;
830 goto out;
831 }
832 error = copyin_user_iovec_array(uap->iov,
833 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
834 uap->iovcnt, iovp);
835 if (error != 0)
836 goto out;
837
838 /* finish setup of uio_t */
839 error = uio_calculateresid(auio);
840 if (error != 0) {
841 goto out;
842 }
843 }
844
845 error = connectitx(so, &src_sl, &dst_sl, p, ep.sae_srcif, uap->associd,
846 &cid, auio, uap->flags, &bytes_written);
39236c6e
A
847 if (error == ERESTART)
848 error = EINTR;
849
3e170ce0
A
850 if (uap->len != USER_ADDR_NULL) {
851 error1 = copyout(&bytes_written, uap->len, sizeof (uap->len));
852 /* give precedence to connectitx errors */
853 if ((error1 != 0) && (error == 0))
854 error = error1;
855 }
39236c6e 856
3e170ce0
A
857 if (uap->connid != USER_ADDR_NULL) {
858 error1 = copyout(&cid, uap->connid, sizeof (cid));
859 /* give precedence to connectitx errors */
860 if ((error1 != 0) && (error == 0))
861 error = error1;
862 }
39236c6e
A
863out:
864 file_drop(fd);
3e170ce0
A
865 if (auio != NULL) {
866 uio_free(auio);
867 }
39236c6e
A
868 if (src_sl != NULL)
869 sockaddrlist_free(src_sl);
870 if (dst_sl != NULL)
871 sockaddrlist_free(dst_sl);
872 return (error);
873}
874
875int
876connectx(struct proc *p, struct connectx_args *uap, int *retval)
877{
878 /*
879 * Due to similiarity with a POSIX interface, define as
880 * an unofficial cancellation point.
881 */
882 __pthread_testcancel(1);
883 return (connectx_nocancel(p, uap, retval));
884}
885
886static int
887connectit(struct socket *so, struct sockaddr *sa)
888{
889 int error;
890
2d21ac55
A
891 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
892#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
893 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0)
894 return (error);
895#endif /* MAC_SOCKET_SUBSET */
896
897 socket_lock(so, 1);
898 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
899 error = EALREADY;
900 goto out;
901 }
902 error = soconnectlock(so, sa, 0);
903 if (error != 0) {
904 so->so_state &= ~SS_ISCONNECTING;
2d21ac55
A
905 goto out;
906 }
39236c6e
A
907 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
908 error = EINPROGRESS;
909 goto out;
910 }
911 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
912 lck_mtx_t *mutex_held;
913
914 if (so->so_proto->pr_getlock != NULL)
915 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
916 else
917 mutex_held = so->so_proto->pr_domain->dom_mtx;
918 error = msleep((caddr_t)&so->so_timeo, mutex_held,
919 PSOCK | PCATCH, __func__, 0);
920 if (so->so_state & SS_DRAINING) {
921 error = ECONNABORTED;
922 }
923 if (error != 0)
924 break;
925 }
926 if (error == 0) {
927 error = so->so_error;
928 so->so_error = 0;
929 }
930out:
931 socket_unlock(so, 1);
932 return (error);
933}
934
935static int
936connectitx(struct socket *so, struct sockaddr_list **src_sl,
937 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
3e170ce0
A
938 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
939 user_ssize_t *bytes_written)
39236c6e
A
940{
941 struct sockaddr_entry *se;
942 int error;
3e170ce0 943#pragma unused (flags)
39236c6e
A
944
945 VERIFY(dst_sl != NULL && *dst_sl != NULL);
946
947 TAILQ_FOREACH(se, &(*dst_sl)->sl_head, se_link) {
948 VERIFY(se->se_addr != NULL);
949 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
950 se->se_addr);
951#if CONFIG_MACF_SOCKET_SUBSET
952 if ((error = mac_socket_check_connect(kauth_cred_get(),
953 so, se->se_addr)) != 0)
954 return (error);
2d21ac55 955#endif /* MAC_SOCKET_SUBSET */
39236c6e 956 }
91447636 957
39236c6e 958 socket_lock(so, 1);
91447636 959 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
960 error = EALREADY;
961 goto out;
962 }
3e170ce0
A
963
964 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
965 (flags & CONNECT_DATA_IDEMPOTENT))
966 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
967
968 /*
969 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
970 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
971 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
972 * Case 3 allows user to combine write with connect even if they have
973 * no use for TFO (such as regular TCP, and UDP).
974 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
975 */
976 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
977 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio))
978 so->so_flags1 |= SOF1_PRECONNECT_DATA;
979
980 /*
981 * If a user sets data idempotent and does not pass an uio, or
982 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
983 * SOF1_DATA_IDEMPOTENT.
984 */
985 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
986 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
987 /* We should return EINVAL instead perhaps. */
988 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
989 }
990
39236c6e 991 error = soconnectxlocked(so, src_sl, dst_sl, p, ifscope,
3e170ce0 992 aid, pcid, 0, NULL, 0, auio, bytes_written);
39236c6e
A
993 if (error != 0) {
994 so->so_state &= ~SS_ISCONNECTING;
995 goto out;
996 }
3e170ce0
A
997 /*
998 * If, after the call to soconnectxlocked the flag is still set (in case
999 * data has been queued and the connect() has actually been triggered,
1000 * it will have been unset by the transport), we exit immediately. There
1001 * is no reason to wait on any event.
1002 */
1003 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1004 error = 0;
1005 goto out;
1006 }
1c79356b 1007 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
1008 error = EINPROGRESS;
1009 goto out;
1c79356b 1010 }
1c79356b 1011 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
39236c6e
A
1012 lck_mtx_t *mutex_held;
1013
2d21ac55 1014 if (so->so_proto->pr_getlock != NULL)
91447636 1015 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2d21ac55 1016 else
91447636 1017 mutex_held = so->so_proto->pr_domain->dom_mtx;
2d21ac55 1018 error = msleep((caddr_t)&so->so_timeo, mutex_held,
39236c6e
A
1019 PSOCK | PCATCH, __func__, 0);
1020 if (so->so_state & SS_DRAINING) {
91447636
A
1021 error = ECONNABORTED;
1022 }
39236c6e 1023 if (error != 0)
1c79356b
A
1024 break;
1025 }
1026 if (error == 0) {
1027 error = so->so_error;
1028 so->so_error = 0;
1029 }
39236c6e 1030out:
91447636 1031 socket_unlock(so, 1);
39236c6e
A
1032 return (error);
1033}
1034
1035int
1036peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1037{
1038 /*
1039 * Due to similiarity with a POSIX interface, define as
1040 * an unofficial cancellation point.
1041 */
1042 __pthread_testcancel(1);
1043 return (peeloff_nocancel(p, uap, retval));
1044}
1045
1046static int
1047peeloff_nocancel(struct proc *p, struct peeloff_args *uap, int *retval)
1048{
1049 struct fileproc *fp;
1050 struct socket *mp_so, *so = NULL;
1051 int newfd, fd = uap->s;
1052 short fflag; /* type must match fp->f_flag */
1053 int error;
1054
1055 *retval = -1;
1056
1057 error = fp_getfsock(p, fd, &fp, &mp_so);
1058 if (error != 0) {
1059 if (error == EOPNOTSUPP)
1060 error = ENOTSOCK;
1061 goto out_nofile;
1062 }
1063 if (mp_so == NULL) {
1064 error = EBADF;
1065 goto out;
1066 }
1067
1068 socket_lock(mp_so, 1);
1069 error = sopeelofflocked(mp_so, uap->aid, &so);
1070 if (error != 0) {
1071 socket_unlock(mp_so, 1);
1072 goto out;
1073 }
1074 VERIFY(so != NULL);
1075 socket_unlock(mp_so, 0); /* keep ref on mp_so for us */
1076
1077 fflag = fp->f_flag;
1078 error = falloc(p, &fp, &newfd, vfs_context_current());
1079 if (error != 0) {
1080 /* drop this socket (probably ran out of file descriptors) */
1081 soclose(so);
1082 sodereference(mp_so); /* our mp_so ref */
1083 goto out;
1084 }
1085
1086 fp->f_flag = fflag;
1087 fp->f_ops = &socketops;
1088 fp->f_data = (caddr_t)so;
1089
1090 /*
1091 * If the socket has been marked as inactive by sosetdefunct(),
1092 * disallow further operations on it.
1093 */
1094 if (so->so_flags & SOF_DEFUNCT) {
1095 sodefunct(current_proc(), so,
1096 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
1097 }
1098
1099 proc_fdlock(p);
1100 procfdtbl_releasefd(p, newfd, NULL);
1101 fp_drop(p, newfd, fp, 1);
1102 proc_fdunlock(p);
1103
1104 sodereference(mp_so); /* our mp_so ref */
1105 *retval = newfd;
1106
1107out:
1108 file_drop(fd);
1109
1110out_nofile:
1111 return (error);
1112}
1113
1114int
1115disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1116{
1117 /*
1118 * Due to similiarity with a POSIX interface, define as
1119 * an unofficial cancellation point.
1120 */
1121 __pthread_testcancel(1);
1122 return (disconnectx_nocancel(p, uap, retval));
1123}
1124
1125static int
1126disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1127{
1128#pragma unused(p, retval)
1129 struct socket *so;
1130 int fd = uap->s;
1131 int error;
1132
1133 error = file_socket(fd, &so);
1134 if (error != 0)
1135 return (error);
1136 if (so == NULL) {
1137 error = EBADF;
1138 goto out;
1139 }
1140
1141 error = sodisconnectx(so, uap->aid, uap->cid);
91447636
A
1142out:
1143 file_drop(fd);
1c79356b
A
1144 return (error);
1145}
1146
2d21ac55
A
1147/*
1148 * Returns: 0 Success
1149 * socreate:EAFNOSUPPORT
1150 * socreate:EPROTOTYPE
1151 * socreate:EPROTONOSUPPORT
1152 * socreate:ENOBUFS
1153 * socreate:ENOMEM
1154 * socreate:EISCONN
1155 * socreate:??? [other protocol families, IPSEC]
1156 * falloc:ENFILE
1157 * falloc:EMFILE
1158 * falloc:ENOMEM
1159 * copyout:EFAULT
1160 * soconnect2:EINVAL
1161 * soconnect2:EPROTOTYPE
1162 * soconnect2:??? [other protocol families[
1163 */
1c79356b 1164int
2d21ac55 1165socketpair(struct proc *p, struct socketpair_args *uap,
b0d623f7 1166 __unused int32_t *retval)
1c79356b 1167{
91447636 1168 struct fileproc *fp1, *fp2;
1c79356b
A
1169 struct socket *so1, *so2;
1170 int fd, error, sv[2];
1171
55e303ae 1172 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1c79356b
A
1173 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1174 if (error)
1175 return (error);
1176 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1177 if (error)
1178 goto free1;
91447636 1179
2d21ac55 1180 error = falloc(p, &fp1, &fd, vfs_context_current());
91447636 1181 if (error) {
1c79356b 1182 goto free2;
91447636 1183 }
1c79356b 1184 fp1->f_flag = FREAD|FWRITE;
1c79356b
A
1185 fp1->f_ops = &socketops;
1186 fp1->f_data = (caddr_t)so1;
91447636
A
1187 sv[0] = fd;
1188
2d21ac55 1189 error = falloc(p, &fp2, &fd, vfs_context_current());
91447636 1190 if (error) {
1c79356b 1191 goto free3;
91447636 1192 }
1c79356b 1193 fp2->f_flag = FREAD|FWRITE;
1c79356b
A
1194 fp2->f_ops = &socketops;
1195 fp2->f_data = (caddr_t)so2;
1196 sv[1] = fd;
91447636 1197
1c79356b
A
1198 error = soconnect2(so1, so2);
1199 if (error) {
1c79356b
A
1200 goto free4;
1201 }
1c79356b
A
1202 if (uap->type == SOCK_DGRAM) {
1203 /*
1204 * Datagram socket connection is asymmetric.
1205 */
2d21ac55
A
1206 error = soconnect2(so2, so1);
1207 if (error) {
1208 goto free4;
1209 }
1c79356b 1210 }
91447636 1211
6d2010ae
A
1212 if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
1213 goto free4;
1214
91447636 1215 proc_fdlock(p);
6601e61a
A
1216 procfdtbl_releasefd(p, sv[0], NULL);
1217 procfdtbl_releasefd(p, sv[1], NULL);
91447636
A
1218 fp_drop(p, sv[0], fp1, 1);
1219 fp_drop(p, sv[1], fp2, 1);
1220 proc_fdunlock(p);
1221
6d2010ae 1222 return (0);
1c79356b 1223free4:
91447636 1224 fp_free(p, sv[1], fp2);
1c79356b 1225free3:
91447636 1226 fp_free(p, sv[0], fp1);
1c79356b 1227free2:
2d21ac55 1228 (void) soclose(so2);
1c79356b 1229free1:
2d21ac55 1230 (void) soclose(so1);
1c79356b
A
1231 return (error);
1232}
1233
2d21ac55
A
1234/*
1235 * Returns: 0 Success
1236 * EINVAL
1237 * ENOBUFS
1238 * EBADF
1239 * EPIPE
1240 * EACCES Mandatory Access Control failure
1241 * file_socket:ENOTSOCK
1242 * file_socket:EBADF
1243 * getsockaddr:ENAMETOOLONG Filename too long
1244 * getsockaddr:EINVAL Invalid argument
1245 * getsockaddr:ENOMEM Not enough space
1246 * getsockaddr:EFAULT Bad address
1247 * <pru_sosend>:EACCES[TCP]
1248 * <pru_sosend>:EADDRINUSE[TCP]
1249 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1250 * <pru_sosend>:EAFNOSUPPORT[TCP]
1251 * <pru_sosend>:EAGAIN[TCP]
1252 * <pru_sosend>:EBADF
1253 * <pru_sosend>:ECONNRESET[TCP]
1254 * <pru_sosend>:EFAULT
1255 * <pru_sosend>:EHOSTUNREACH[TCP]
1256 * <pru_sosend>:EINTR
1257 * <pru_sosend>:EINVAL
1258 * <pru_sosend>:EISCONN[AF_INET]
1259 * <pru_sosend>:EMSGSIZE[TCP]
1260 * <pru_sosend>:ENETDOWN[TCP]
1261 * <pru_sosend>:ENETUNREACH[TCP]
1262 * <pru_sosend>:ENOBUFS
1263 * <pru_sosend>:ENOMEM[TCP]
1264 * <pru_sosend>:ENOTCONN[AF_INET]
1265 * <pru_sosend>:EOPNOTSUPP
1266 * <pru_sosend>:EPERM[TCP]
1267 * <pru_sosend>:EPIPE
1268 * <pru_sosend>:EWOULDBLOCK
1269 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1270 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1271 * <pru_sosend>:??? [value from so_error]
1272 * sockargs:???
1273 */
1c79356b 1274static int
3e170ce0 1275sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1276 int flags, int32_t *retval)
1c79356b 1277{
2d21ac55
A
1278 struct mbuf *control = NULL;
1279 struct sockaddr_storage ss;
1280 struct sockaddr *to = NULL;
1281 boolean_t want_free = TRUE;
91447636 1282 int error;
91447636 1283 user_ssize_t len;
2d21ac55
A
1284
1285 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b 1286
2d21ac55
A
1287 if (mp->msg_name != USER_ADDR_NULL) {
1288 if (mp->msg_namelen > sizeof (ss)) {
1289 error = getsockaddr(so, &to, mp->msg_name,
4a3eedf9 1290 mp->msg_namelen, TRUE);
2d21ac55
A
1291 } else {
1292 error = getsockaddr_s(so, &ss, mp->msg_name,
4a3eedf9 1293 mp->msg_namelen, TRUE);
2d21ac55
A
1294 if (error == 0) {
1295 to = (struct sockaddr *)&ss;
1296 want_free = FALSE;
1297 }
1c79356b 1298 }
2d21ac55
A
1299 if (error != 0)
1300 goto out;
1301 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
91447636 1302 }
2d21ac55
A
1303 if (mp->msg_control != USER_ADDR_NULL) {
1304 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
1c79356b
A
1305 error = EINVAL;
1306 goto bad;
1307 }
1308 error = sockargs(&control, mp->msg_control,
1309 mp->msg_controllen, MT_CONTROL);
2d21ac55 1310 if (error != 0)
1c79356b 1311 goto bad;
91447636 1312 }
1c79356b 1313
2d21ac55
A
1314#if CONFIG_MACF_SOCKET_SUBSET
1315 /*
1316 * We check the state without holding the socket lock;
1317 * if a race condition occurs, it would simply result
3e170ce0 1318 * in an extra call to the MAC check function.
2d21ac55 1319 */
3e170ce0 1320 if (to != NULL &&
316670eb 1321 !(so->so_state & SS_DEFUNCT) &&
2d21ac55
A
1322 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
1323 goto bad;
1324#endif /* MAC_SOCKET_SUBSET */
91447636
A
1325
1326 len = uio_resid(uiop);
39236c6e
A
1327 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1328 control, flags);
2d21ac55 1329 if (error != 0) {
91447636 1330 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1331 error == EINTR || error == EWOULDBLOCK))
1332 error = 0;
2d21ac55 1333 /* Generation of SIGPIPE can be controlled per socket */
9bccf70c 1334 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1c79356b
A
1335 psignal(p, SIGPIPE);
1336 }
1337 if (error == 0)
91447636
A
1338 *retval = (int)(len - uio_resid(uiop));
1339bad:
2d21ac55 1340 if (to != NULL && want_free)
1c79356b 1341 FREE(to, M_SONAME);
91447636 1342out:
2d21ac55 1343 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 1344
1c79356b
A
1345 return (error);
1346}
1347
2d21ac55
A
1348/*
1349 * Returns: 0 Success
1350 * ENOMEM
1351 * sendit:??? [see sendit definition in this file]
1352 * write:??? [4056224: applicable for pipes]
1353 */
1c79356b 1354int
b0d623f7 1355sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
2d21ac55
A
1356{
1357 __pthread_testcancel(1);
39236c6e 1358 return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
2d21ac55
A
1359}
1360
1361int
39236c6e
A
1362sendto_nocancel(struct proc *p,
1363 struct sendto_nocancel_args *uap,
1364 int32_t *retval)
1c79356b 1365{
91447636
A
1366 struct user_msghdr msg;
1367 int error;
1368 uio_t auio = NULL;
3e170ce0 1369 struct socket *so;
1c79356b 1370
2d21ac55 1371 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1372 AUDIT_ARG(fd, uap->s);
1c79356b 1373
91447636 1374 auio = uio_create(1, 0,
2d21ac55
A
1375 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1376 UIO_WRITE);
91447636 1377 if (auio == NULL) {
3e170ce0
A
1378 error = ENOMEM;
1379 goto done;
91447636
A
1380 }
1381 uio_addiov(auio, uap->buf, uap->len);
1382
1c79356b
A
1383 msg.msg_name = uap->to;
1384 msg.msg_namelen = uap->tolen;
91447636
A
1385 /* no need to set up msg_iov. sendit uses uio_t we send it */
1386 msg.msg_iov = 0;
1387 msg.msg_iovlen = 0;
1c79356b 1388 msg.msg_control = 0;
1c79356b 1389 msg.msg_flags = 0;
1c79356b 1390
3e170ce0
A
1391 error = file_socket(uap->s, &so);
1392 if (error)
1393 goto done;
2d21ac55 1394
3e170ce0
A
1395 if (so == NULL) {
1396 error = EBADF;
1397 } else {
1398 error = sendit(p, so, &msg, auio, uap->flags, retval);
91447636 1399 }
2d21ac55 1400
3e170ce0
A
1401 file_drop(uap->s);
1402done:
1403 if (auio != NULL)
1404 uio_free(auio);
1405
2d21ac55 1406 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1c79356b 1407
2d21ac55 1408 return (error);
1c79356b 1409}
1c79356b 1410
2d21ac55
A
1411/*
1412 * Returns: 0 Success
1413 * ENOBUFS
1414 * copyin:EFAULT
1415 * sendit:??? [see sendit definition in this file]
1416 */
1c79356b 1417int
b0d623f7 1418sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1c79356b 1419{
2d21ac55 1420 __pthread_testcancel(1);
3e170ce0
A
1421 return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1422 retval));
1c79356b 1423}
1c79356b
A
1424
1425int
3e170ce0
A
1426sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1427 int32_t *retval)
1c79356b 1428{
b0d623f7
A
1429 struct user32_msghdr msg32;
1430 struct user64_msghdr msg64;
91447636
A
1431 struct user_msghdr user_msg;
1432 caddr_t msghdrp;
1433 int size_of_msghdr;
1c79356b 1434 int error;
91447636
A
1435 uio_t auio = NULL;
1436 struct user_iovec *iovp;
3e170ce0 1437 struct socket *so;
1c79356b 1438
2d21ac55 1439 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1440 AUDIT_ARG(fd, uap->s);
91447636 1441 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
1442 msghdrp = (caddr_t)&msg64;
1443 size_of_msghdr = sizeof (msg64);
2d21ac55 1444 } else {
b0d623f7
A
1445 msghdrp = (caddr_t)&msg32;
1446 size_of_msghdr = sizeof (msg32);
91447636
A
1447 }
1448 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1449 if (error) {
1450 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1451 return (error);
1c79356b 1452 }
91447636 1453
b0d623f7
A
1454 if (IS_64BIT_PROCESS(p)) {
1455 user_msg.msg_flags = msg64.msg_flags;
1456 user_msg.msg_controllen = msg64.msg_controllen;
1457 user_msg.msg_control = msg64.msg_control;
1458 user_msg.msg_iovlen = msg64.msg_iovlen;
1459 user_msg.msg_iov = msg64.msg_iov;
1460 user_msg.msg_namelen = msg64.msg_namelen;
1461 user_msg.msg_name = msg64.msg_name;
1462 } else {
1463 user_msg.msg_flags = msg32.msg_flags;
1464 user_msg.msg_controllen = msg32.msg_controllen;
1465 user_msg.msg_control = msg32.msg_control;
1466 user_msg.msg_iovlen = msg32.msg_iovlen;
1467 user_msg.msg_iov = msg32.msg_iov;
1468 user_msg.msg_namelen = msg32.msg_namelen;
1469 user_msg.msg_name = msg32.msg_name;
91447636
A
1470 }
1471
1472 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1473 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1474 0, 0, 0, 0);
91447636
A
1475 return (EMSGSIZE);
1476 }
1477
1478 /* allocate a uio large enough to hold the number of iovecs passed */
1479 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1480 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1481 UIO_WRITE);
91447636
A
1482 if (auio == NULL) {
1483 error = ENOBUFS;
1484 goto done;
1485 }
2d21ac55 1486
91447636 1487 if (user_msg.msg_iovlen) {
2d21ac55
A
1488 /*
1489 * get location of iovecs within the uio.
1490 * then copyin the iovecs from user space.
91447636
A
1491 */
1492 iovp = uio_iovsaddr(auio);
1493 if (iovp == NULL) {
1494 error = ENOBUFS;
1495 goto done;
1496 }
b0d623f7
A
1497 error = copyin_user_iovec_array(user_msg.msg_iov,
1498 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1499 user_msg.msg_iovlen, iovp);
91447636
A
1500 if (error)
1501 goto done;
1502 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2d21ac55
A
1503
1504 /* finish setup of uio_t */
39236c6e
A
1505 error = uio_calculateresid(auio);
1506 if (error) {
1507 goto done;
1508 }
2d21ac55 1509 } else {
91447636
A
1510 user_msg.msg_iov = 0;
1511 }
2d21ac55
A
1512
1513 /* msg_flags is ignored for send */
91447636 1514 user_msg.msg_flags = 0;
2d21ac55 1515
3e170ce0
A
1516 error = file_socket(uap->s, &so);
1517 if (error) {
1518 goto done;
1519 }
1520 if (so == NULL) {
1521 error = EBADF;
1522 } else {
1523 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1524 }
1525 file_drop(uap->s);
1c79356b 1526done:
91447636
A
1527 if (auio != NULL) {
1528 uio_free(auio);
1529 }
2d21ac55 1530 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1531
1c79356b
A
1532 return (error);
1533}
1534
fe8ab488
A
1535int
1536sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1537{
1538 int error = 0;
3e170ce0 1539 struct user_msghdr_x *user_msg_x = NULL;
fe8ab488
A
1540 struct uio **uiop = NULL;
1541 struct socket *so;
1542 u_int i;
1543 struct sockaddr *to = NULL;
fe8ab488
A
1544 user_ssize_t len_before = 0, len_after;
1545 int need_drop = 0;
1546 size_t size_of_msghdr;
1547 void *umsgp = NULL;
1548 u_int uiocnt;
3e170ce0 1549 int has_addr_or_ctl = 0;
fe8ab488
A
1550
1551 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1552
1553 error = file_socket(uap->s, &so);
1554 if (error) {
1555 goto out;
1556 }
1557 need_drop = 1;
1558 if (so == NULL) {
1559 error = EBADF;
1560 goto out;
1561 }
fe8ab488
A
1562
1563 /*
1564 * Input parameter range check
1565 */
1566 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1567 error = EINVAL;
1568 goto out;
1569 }
3e170ce0
A
1570 /*
1571 * Clip to max currently allowed
1572 */
1573 if (uap->cnt > somaxsendmsgx)
1574 uap->cnt = somaxsendmsgx;
1575
1576 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
fe8ab488 1577 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
1578 if (user_msg_x == NULL) {
1579 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1580 error = ENOMEM;
1581 goto out;
1582 }
1583 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1584 M_TEMP, M_WAITOK | M_ZERO);
1585 if (uiop == NULL) {
3e170ce0 1586 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
fe8ab488
A
1587 error = ENOMEM;
1588 goto out;
1589 }
1590
1591 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1592 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1593
3e170ce0 1594 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
fe8ab488
A
1595 M_TEMP, M_WAITOK | M_ZERO);
1596 if (umsgp == NULL) {
3e170ce0 1597 printf("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1598 error = ENOMEM;
1599 goto out;
1600 }
1601 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1602 if (error) {
3e170ce0 1603 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
1604 goto out;
1605 }
1606 error = internalize_user_msghdr_array(umsgp,
1607 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1608 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488 1609 if (error) {
3e170ce0 1610 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
1611 goto out;
1612 }
1613 /*
1614 * Make sure the size of each message iovec and
1615 * the aggregate size of all the iovec is valid
1616 */
1617 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1618 error = EINVAL;
1619 goto out;
1620 }
1621
1622 /*
1623 * Sanity check on passed arguments
1624 */
1625 for (i = 0; i < uap->cnt; i++) {
3e170ce0 1626 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
1627
1628 /*
1629 * No flags on send message
1630 */
1631 if (mp->msg_flags != 0) {
1632 error = EINVAL;
1633 goto out;
1634 }
1635 /*
1636 * No support for address or ancillary data (yet)
1637 */
3e170ce0
A
1638 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0)
1639 has_addr_or_ctl = 1;
1640
fe8ab488 1641 if (mp->msg_control != USER_ADDR_NULL ||
3e170ce0
A
1642 mp->msg_controllen != 0)
1643 has_addr_or_ctl = 1;
1644
fe8ab488
A
1645#if CONFIG_MACF_SOCKET_SUBSET
1646 /*
1647 * We check the state without holding the socket lock;
1648 * if a race condition occurs, it would simply result
3e170ce0 1649 * in an extra call to the MAC check function.
fe8ab488
A
1650 *
1651 * Note: The following check is never true taken with the
1652 * current limitation that we do not accept to pass an address,
3e170ce0
A
1653 * this is effectively placeholder code. If we add support for
1654 * addresses, we will have to check every address.
fe8ab488 1655 */
3e170ce0 1656 if (to != NULL &&
fe8ab488 1657 !(so->so_state & SS_DEFUNCT) &&
3e170ce0
A
1658 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1659 != 0)
fe8ab488
A
1660 goto out;
1661#endif /* MAC_SOCKET_SUBSET */
1662 }
1663
1664 len_before = uio_array_resid(uiop, uap->cnt);
1665
3e170ce0
A
1666 /*
1667 * Feed list of packets at once only for connected socket without
1668 * control message
1669 */
1670 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1671 pru_sosend_list_notsupp &&
1672 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1673 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1674 uap->cnt, uap->flags);
1675 } else {
1676 for (i = 0; i < uap->cnt; i++) {
1677 struct user_msghdr_x *mp = user_msg_x + i;
1678 struct user_msghdr user_msg;
1679 uio_t auio = uiop[i];
1680 int32_t tmpval;
1681
1682 user_msg.msg_flags = mp->msg_flags;
1683 user_msg.msg_controllen = mp->msg_controllen;
1684 user_msg.msg_control = mp->msg_control;
1685 user_msg.msg_iovlen = mp->msg_iovlen;
1686 user_msg.msg_iov = mp->msg_iov;
1687 user_msg.msg_namelen = mp->msg_namelen;
1688 user_msg.msg_name = mp->msg_name;
1689
1690 error = sendit(p, so, &user_msg, auio, uap->flags,
1691 &tmpval);
1692 if (error != 0)
1693 break;
1694 }
1695 }
fe8ab488
A
1696 len_after = uio_array_resid(uiop, uap->cnt);
1697
3e170ce0
A
1698 VERIFY(len_after <= len_before);
1699
fe8ab488
A
1700 if (error != 0) {
1701 if (len_after != len_before && (error == ERESTART ||
3e170ce0
A
1702 error == EINTR || error == EWOULDBLOCK ||
1703 error == ENOBUFS))
fe8ab488
A
1704 error = 0;
1705 /* Generation of SIGPIPE can be controlled per socket */
1706 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1707 psignal(p, SIGPIPE);
1708 }
1709 if (error == 0) {
1710 uiocnt = externalize_user_msghdr_array(umsgp,
1711 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1712 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488
A
1713
1714 *retval = (int)(uiocnt);
1715 }
1716out:
1717 if (need_drop)
1718 file_drop(uap->s);
1719 if (umsgp != NULL)
1720 _FREE(umsgp, M_TEMP);
1721 if (uiop != NULL) {
1722 free_uio_array(uiop, uap->cnt);
1723 _FREE(uiop, M_TEMP);
1724 }
3e170ce0
A
1725 if (user_msg_x != NULL)
1726 _FREE(user_msg_x, M_TEMP);
fe8ab488
A
1727
1728 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1729
1730 return (error);
1731}
1732
3e170ce0
A
1733
1734static int
1735copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1736{
1737 int error = 0;
1738 socklen_t sa_len = 0;
1739 ssize_t len;
1740
1741 len = *namelen;
1742 if (len <= 0 || fromsa == 0) {
1743 len = 0;
1744 } else {
1745#ifndef MIN
1746#define MIN(a, b) ((a) > (b) ? (b) : (a))
1747#endif
1748 sa_len = fromsa->sa_len;
1749 len = MIN((unsigned int)len, sa_len);
1750 error = copyout(fromsa, name, (unsigned)len);
1751 if (error)
1752 goto out;
1753 }
1754 *namelen = sa_len;
1755out:
1756 return (0);
1757}
1758
1759static int
1760copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1761 socklen_t *controllen, int *flags)
1762{
1763 int error = 0;
1764 ssize_t len;
1765 user_addr_t ctlbuf;
1766
1767 len = *controllen;
1768 *controllen = 0;
1769 ctlbuf = control;
1770
1771 while (m && len > 0) {
1772 unsigned int tocopy;
1773 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1774 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1775 int buflen = m->m_len;
1776
1777 while (buflen > 0 && len > 0) {
1778 /*
1779 * SCM_TIMESTAMP hack because struct timeval has a
1780 * different size for 32 bits and 64 bits processes
1781 */
1782 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1783 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
1784 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1785 int tmp_space;
1786 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1787
1788 tmp_cp->cmsg_level = SOL_SOCKET;
1789 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1790
1791 if (proc_is64bit(p)) {
1792 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1793
1794 tv64->tv_sec = tv->tv_sec;
1795 tv64->tv_usec = tv->tv_usec;
1796
1797 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1798 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1799 } else {
1800 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1801
1802 tv32->tv_sec = tv->tv_sec;
1803 tv32->tv_usec = tv->tv_usec;
1804
1805 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1806 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1807 }
1808 if (len >= tmp_space) {
1809 tocopy = tmp_space;
1810 } else {
1811 *flags |= MSG_CTRUNC;
1812 tocopy = len;
1813 }
1814 error = copyout(tmp_buffer, ctlbuf, tocopy);
1815 if (error)
1816 goto out;
1817 } else {
1818 if (cp_size > buflen) {
1819 panic("cp_size > buflen, something"
1820 "wrong with alignment!");
1821 }
1822 if (len >= cp_size) {
1823 tocopy = cp_size;
1824 } else {
1825 *flags |= MSG_CTRUNC;
1826 tocopy = len;
1827 }
1828 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1829 if (error)
1830 goto out;
1831 }
1832
1833 ctlbuf += tocopy;
1834 len -= tocopy;
1835
1836 buflen -= cp_size;
1837 cp = (struct cmsghdr *)(void *)
1838 ((unsigned char *) cp + cp_size);
1839 cp_size = CMSG_ALIGN(cp->cmsg_len);
1840 }
1841
1842 m = m->m_next;
1843 }
1844 *controllen = ctlbuf - control;
1845out:
1846 return (error);
1847}
1848
2d21ac55
A
1849/*
1850 * Returns: 0 Success
1851 * ENOTSOCK
1852 * EINVAL
1853 * EBADF
1854 * EACCES Mandatory Access Control failure
1855 * copyout:EFAULT
1856 * fp_lookup:EBADF
1857 * <pru_soreceive>:ENOBUFS
1858 * <pru_soreceive>:ENOTCONN
1859 * <pru_soreceive>:EWOULDBLOCK
1860 * <pru_soreceive>:EFAULT
1861 * <pru_soreceive>:EINTR
1862 * <pru_soreceive>:EBADF
1863 * <pru_soreceive>:EINVAL
1864 * <pru_soreceive>:EMSGSIZE
1865 * <pru_soreceive>:???
1866 *
1867 * Notes: Additional return values from calls through <pru_soreceive>
1868 * depend on protocols other than TCP or AF_UNIX, which are
1869 * documented above.
1870 */
1c79356b 1871static int
2d21ac55 1872recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1873 user_addr_t namelenp, int32_t *retval)
1c79356b 1874{
39236c6e
A
1875 ssize_t len;
1876 int error;
3e170ce0 1877 struct mbuf *control = 0;
1c79356b
A
1878 struct socket *so;
1879 struct sockaddr *fromsa = 0;
91447636 1880 struct fileproc *fp;
1c79356b 1881
2d21ac55 1882 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
91447636 1883 proc_fdlock(p);
2d21ac55
A
1884 if ((error = fp_lookup(p, s, &fp, 1))) {
1885 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1886 proc_fdunlock(p);
2d21ac55 1887 return (error);
1c79356b 1888 }
91447636 1889 if (fp->f_type != DTYPE_SOCKET) {
2d21ac55 1890 fp_drop(p, s, fp, 1);
91447636 1891 proc_fdunlock(p);
2d21ac55 1892 return (ENOTSOCK);
91447636 1893 }
1c79356b 1894
2d21ac55
A
1895 so = (struct socket *)fp->f_data;
1896 if (so == NULL) {
1897 fp_drop(p, s, fp, 1);
1898 proc_fdunlock(p);
1899 return (EBADF);
1900 }
91447636
A
1901
1902 proc_fdunlock(p);
2d21ac55
A
1903
1904#if CONFIG_MACF_SOCKET_SUBSET
1905 /*
1906 * We check the state without holding the socket lock;
1907 * if a race condition occurs, it would simply result
1908 * in an extra call to the MAC check function.
1909 */
316670eb
A
1910 if (!(so->so_state & SS_DEFUNCT) &&
1911 !(so->so_state & SS_ISCONNECTED) &&
39236c6e 1912 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2d21ac55
A
1913 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1914 goto out1;
1915#endif /* MAC_SOCKET_SUBSET */
91447636 1916 if (uio_resid(uiop) < 0) {
2d21ac55 1917 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
91447636
A
1918 error = EINVAL;
1919 goto out1;
1c79356b 1920 }
91447636
A
1921
1922 len = uio_resid(uiop);
2d21ac55
A
1923 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1924 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1925 &mp->msg_flags);
b0d623f7
A
1926 if (fromsa)
1927 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1928 fromsa);
1c79356b 1929 if (error) {
91447636 1930 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1931 error == EINTR || error == EWOULDBLOCK))
1932 error = 0;
1933 }
1c79356b
A
1934 if (error)
1935 goto out;
2d21ac55 1936
91447636 1937 *retval = len - uio_resid(uiop);
2d21ac55 1938
3e170ce0
A
1939 if (mp->msg_name) {
1940 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1941 if (error)
1942 goto out;
2d21ac55 1943 /* return the actual, untruncated address length */
1c79356b 1944 if (namelenp &&
3e170ce0 1945 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2d21ac55 1946 sizeof (int)))) {
1c79356b
A
1947 goto out;
1948 }
1949 }
39236c6e 1950
3e170ce0
A
1951 if (mp->msg_control) {
1952 error = copyout_control(p, control, mp->msg_control,
1953 &mp->msg_controllen, &mp->msg_flags);
1c79356b
A
1954 }
1955out:
1956 if (fromsa)
1957 FREE(fromsa, M_SONAME);
1958 if (control)
1959 m_freem(control);
2d21ac55 1960 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636
A
1961out1:
1962 fp_drop(p, s, fp, 0);
1c79356b
A
1963 return (error);
1964}
1965
2d21ac55
A
1966/*
1967 * Returns: 0 Success
1968 * ENOMEM
1969 * copyin:EFAULT
1970 * recvit:???
1971 * read:??? [4056224: applicable for pipes]
1972 *
1973 * Notes: The read entry point is only called as part of support for
1974 * binary backward compatability; new code should use read
1975 * instead of recv or recvfrom when attempting to read data
1976 * from pipes.
1977 *
1978 * For full documentation of the return codes from recvit, see
1979 * the block header for the recvit function.
1980 */
1981int
b0d623f7 1982recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2d21ac55
A
1983{
1984 __pthread_testcancel(1);
3e170ce0
A
1985 return (recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
1986 retval));
2d21ac55
A
1987}
1988
1c79356b 1989int
3e170ce0
A
1990recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
1991 int32_t *retval)
1c79356b 1992{
91447636 1993 struct user_msghdr msg;
1c79356b 1994 int error;
91447636 1995 uio_t auio = NULL;
1c79356b 1996
2d21ac55 1997 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1998 AUDIT_ARG(fd, uap->s);
1c79356b
A
1999
2000 if (uap->fromlenaddr) {
91447636 2001 error = copyin(uap->fromlenaddr,
1c79356b
A
2002 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
2003 if (error)
2004 return (error);
2d21ac55 2005 } else {
1c79356b 2006 msg.msg_namelen = 0;
2d21ac55 2007 }
1c79356b 2008 msg.msg_name = uap->from;
91447636 2009 auio = uio_create(1, 0,
2d21ac55
A
2010 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2011 UIO_READ);
91447636
A
2012 if (auio == NULL) {
2013 return (ENOMEM);
2014 }
2d21ac55 2015
91447636
A
2016 uio_addiov(auio, uap->buf, uap->len);
2017 /* no need to set up msg_iov. recvit uses uio_t we send it */
2018 msg.msg_iov = 0;
2019 msg.msg_iovlen = 0;
1c79356b 2020 msg.msg_control = 0;
91447636 2021 msg.msg_controllen = 0;
1c79356b 2022 msg.msg_flags = uap->flags;
91447636
A
2023 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2024 if (auio != NULL) {
2025 uio_free(auio);
2026 }
2d21ac55 2027
2d21ac55 2028 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b 2029
2d21ac55 2030 return (error);
1c79356b
A
2031}
2032
2033/*
2d21ac55
A
2034 * Returns: 0 Success
2035 * EMSGSIZE
2036 * ENOMEM
2037 * copyin:EFAULT
2038 * copyout:EFAULT
2039 * recvit:???
2040 *
2041 * Notes: For full documentation of the return codes from recvit, see
2042 * the block header for the recvit function.
1c79356b
A
2043 */
2044int
b0d623f7 2045recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1c79356b 2046{
2d21ac55 2047 __pthread_testcancel(1);
3e170ce0
A
2048 return (recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2049 retval));
1c79356b 2050}
1c79356b
A
2051
2052int
3e170ce0
A
2053recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2054 int32_t *retval)
1c79356b 2055{
b0d623f7
A
2056 struct user32_msghdr msg32;
2057 struct user64_msghdr msg64;
91447636
A
2058 struct user_msghdr user_msg;
2059 caddr_t msghdrp;
2060 int size_of_msghdr;
2061 user_addr_t uiov;
2d21ac55 2062 int error;
91447636
A
2063 uio_t auio = NULL;
2064 struct user_iovec *iovp;
1c79356b 2065
2d21ac55 2066 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 2067 AUDIT_ARG(fd, uap->s);
91447636 2068 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
2069 msghdrp = (caddr_t)&msg64;
2070 size_of_msghdr = sizeof (msg64);
2d21ac55 2071 } else {
b0d623f7
A
2072 msghdrp = (caddr_t)&msg32;
2073 size_of_msghdr = sizeof (msg32);
91447636
A
2074 }
2075 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
2076 if (error) {
2077 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
2078 return (error);
2079 }
2080
91447636 2081 /* only need to copy if user process is not 64-bit */
b0d623f7
A
2082 if (IS_64BIT_PROCESS(p)) {
2083 user_msg.msg_flags = msg64.msg_flags;
2084 user_msg.msg_controllen = msg64.msg_controllen;
2085 user_msg.msg_control = msg64.msg_control;
2086 user_msg.msg_iovlen = msg64.msg_iovlen;
2087 user_msg.msg_iov = msg64.msg_iov;
2088 user_msg.msg_namelen = msg64.msg_namelen;
2089 user_msg.msg_name = msg64.msg_name;
2090 } else {
2091 user_msg.msg_flags = msg32.msg_flags;
2092 user_msg.msg_controllen = msg32.msg_controllen;
2093 user_msg.msg_control = msg32.msg_control;
2094 user_msg.msg_iovlen = msg32.msg_iovlen;
2095 user_msg.msg_iov = msg32.msg_iov;
2096 user_msg.msg_namelen = msg32.msg_namelen;
2097 user_msg.msg_name = msg32.msg_name;
91447636
A
2098 }
2099
2100 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
2101 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2102 0, 0, 0, 0);
91447636
A
2103 return (EMSGSIZE);
2104 }
2105
91447636 2106 user_msg.msg_flags = uap->flags;
91447636
A
2107
2108 /* allocate a uio large enough to hold the number of iovecs passed */
2109 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
2110 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2111 UIO_READ);
91447636
A
2112 if (auio == NULL) {
2113 error = ENOMEM;
2114 goto done;
2115 }
2116
2d21ac55
A
2117 /*
2118 * get location of iovecs within the uio. then copyin the iovecs from
91447636
A
2119 * user space.
2120 */
2121 iovp = uio_iovsaddr(auio);
2122 if (iovp == NULL) {
2123 error = ENOMEM;
2124 goto done;
2125 }
2126 uiov = user_msg.msg_iov;
2127 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
b0d623f7
A
2128 error = copyin_user_iovec_array(uiov,
2129 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2130 user_msg.msg_iovlen, iovp);
1c79356b
A
2131 if (error)
2132 goto done;
91447636 2133
2d21ac55 2134 /* finish setup of uio_t */
39236c6e
A
2135 error = uio_calculateresid(auio);
2136 if (error) {
2137 goto done;
2138 }
2d21ac55 2139
91447636 2140 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1c79356b 2141 if (!error) {
91447636 2142 user_msg.msg_iov = uiov;
b0d623f7
A
2143 if (IS_64BIT_PROCESS(p)) {
2144 msg64.msg_flags = user_msg.msg_flags;
2145 msg64.msg_controllen = user_msg.msg_controllen;
2146 msg64.msg_control = user_msg.msg_control;
2147 msg64.msg_iovlen = user_msg.msg_iovlen;
2148 msg64.msg_iov = user_msg.msg_iov;
2149 msg64.msg_namelen = user_msg.msg_namelen;
2150 msg64.msg_name = user_msg.msg_name;
2151 } else {
2152 msg32.msg_flags = user_msg.msg_flags;
2153 msg32.msg_controllen = user_msg.msg_controllen;
2154 msg32.msg_control = user_msg.msg_control;
2155 msg32.msg_iovlen = user_msg.msg_iovlen;
2156 msg32.msg_iov = user_msg.msg_iov;
2157 msg32.msg_namelen = user_msg.msg_namelen;
2158 msg32.msg_name = user_msg.msg_name;
91447636
A
2159 }
2160 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1c79356b
A
2161 }
2162done:
91447636
A
2163 if (auio != NULL) {
2164 uio_free(auio);
2165 }
2d21ac55 2166 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
2167 return (error);
2168}
2169
fe8ab488
A
2170int
2171recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2172{
2173 int error = EOPNOTSUPP;
3e170ce0
A
2174 struct user_msghdr_x *user_msg_x = NULL;
2175 struct recv_msg_elem *recv_msg_array = NULL;
fe8ab488
A
2176 struct socket *so;
2177 user_ssize_t len_before = 0, len_after;
2178 int need_drop = 0;
2179 size_t size_of_msghdr;
2180 void *umsgp = NULL;
2181 u_int i;
2182 u_int uiocnt;
2183
2184 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2185
2186 error = file_socket(uap->s, &so);
2187 if (error) {
2188 goto out;
2189 }
2190 need_drop = 1;
2191 if (so == NULL) {
2192 error = EBADF;
2193 goto out;
2194 }
fe8ab488
A
2195 /*
2196 * Input parameter range check
2197 */
2198 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2199 error = EINVAL;
2200 goto out;
2201 }
3e170ce0
A
2202 if (uap->cnt > somaxrecvmsgx)
2203 uap->cnt = somaxrecvmsgx;
2204
2205 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
fe8ab488 2206 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
2207 if (user_msg_x == NULL) {
2208 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
2209 error = ENOMEM;
2210 goto out;
2211 }
3e170ce0
A
2212 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2213 if (recv_msg_array == NULL) {
2214 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
fe8ab488
A
2215 error = ENOMEM;
2216 goto out;
2217 }
fe8ab488
A
2218 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2219 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2220
2221 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2222 if (umsgp == NULL) {
3e170ce0 2223 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
fe8ab488
A
2224 error = ENOMEM;
2225 goto out;
2226 }
2227 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2228 if (error) {
3e170ce0 2229 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
2230 goto out;
2231 }
3e170ce0 2232 error = internalize_recv_msghdr_array(umsgp,
fe8ab488 2233 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 2234 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
fe8ab488 2235 if (error) {
3e170ce0 2236 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
2237 goto out;
2238 }
2239 /*
2240 * Make sure the size of each message iovec and
2241 * the aggregate size of all the iovec is valid
2242 */
3e170ce0 2243 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
fe8ab488
A
2244 error = EINVAL;
2245 goto out;
2246 }
fe8ab488
A
2247 /*
2248 * Sanity check on passed arguments
2249 */
2250 for (i = 0; i < uap->cnt; i++) {
3e170ce0 2251 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
2252
2253 if (mp->msg_flags != 0) {
2254 error = EINVAL;
2255 goto out;
2256 }
fe8ab488
A
2257 }
2258#if CONFIG_MACF_SOCKET_SUBSET
2259 /*
2260 * We check the state without holding the socket lock;
2261 * if a race condition occurs, it would simply result
2262 * in an extra call to the MAC check function.
2263 */
2264 if (!(so->so_state & SS_DEFUNCT) &&
2265 !(so->so_state & SS_ISCONNECTED) &&
2266 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2267 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
2268 goto out;
2269#endif /* MAC_SOCKET_SUBSET */
2270
3e170ce0 2271 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488 2272
3e170ce0
A
2273 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2274 pru_soreceive_list_notsupp &&
2275 somaxrecvmsgx == 0) {
2276 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2277 recv_msg_array, uap->cnt, &uap->flags);
2278 } else {
2279 int flags = uap->flags;
fe8ab488 2280
3e170ce0
A
2281 for (i = 0; i < uap->cnt; i++) {
2282 struct recv_msg_elem *recv_msg_elem;
2283 uio_t auio;
2284 struct sockaddr **psa;
2285 struct mbuf **controlp;
2286
2287 recv_msg_elem = recv_msg_array + i;
2288 auio = recv_msg_elem->uio;
2289
2290 /*
2291 * Do not block if we got at least one packet
2292 */
2293 if (i > 0)
2294 flags |= MSG_DONTWAIT;
2295
2296 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2297 &recv_msg_elem->psa : NULL;
2298 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2299 &recv_msg_elem->controlp : NULL;
2300
2301 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2302 auio, (struct mbuf **)0, controlp, &flags);
2303 if (error)
2304 break;
2305 /*
2306 * We have some data
2307 */
2308 recv_msg_elem->which |= SOCK_MSG_DATA;
2309 /*
2310 * Stop on partial copy
2311 */
2312 if (flags & (MSG_RCVMORE | MSG_TRUNC))
2313 break;
2314 }
2315 if ((uap->flags & MSG_DONTWAIT) == 0)
2316 flags &= ~MSG_DONTWAIT;
2317 uap->flags = flags;
2318 }
2319
2320 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488
A
2321
2322 if (error) {
2323 if (len_after != len_before && (error == ERESTART ||
2324 error == EINTR || error == EWOULDBLOCK))
2325 error = 0;
3e170ce0
A
2326 else
2327 goto out;
fe8ab488 2328 }
fe8ab488 2329
3e170ce0
A
2330 uiocnt = externalize_recv_msghdr_array(umsgp,
2331 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2332 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2333
2334 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2335 if (error) {
2336 DBG_PRINTF("%s copyout() failed\n", __func__);
2337 goto out;
2338 }
2339 *retval = (int)(uiocnt);
2340
2341 for (i = 0; i < uap->cnt; i++) {
2342 struct user_msghdr_x *mp = user_msg_x + i;
2343 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2344 struct sockaddr *fromsa = recv_msg_elem->psa;
2345
2346 if (mp->msg_name) {
2347 error = copyout_sa(fromsa, mp->msg_name,
2348 &mp->msg_namelen);
2349 if (error)
2350 goto out;
2351 }
2352 if (mp->msg_control) {
2353 error = copyout_control(p, recv_msg_elem->controlp,
2354 mp->msg_control, &mp->msg_controllen,
2355 &mp->msg_flags);
2356 if (error)
2357 goto out;
fe8ab488 2358 }
fe8ab488
A
2359 }
2360out:
2361 if (need_drop)
2362 file_drop(uap->s);
2363 if (umsgp != NULL)
2364 _FREE(umsgp, M_TEMP);
3e170ce0
A
2365 if (recv_msg_array != NULL)
2366 free_recv_msg_array(recv_msg_array, uap->cnt);
2367 if (user_msg_x != NULL)
2368 _FREE(user_msg_x, M_TEMP);
2369
fe8ab488 2370 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 2371
fe8ab488
A
2372 return (error);
2373}
2374
2d21ac55
A
2375/*
2376 * Returns: 0 Success
2377 * EBADF
2378 * file_socket:ENOTSOCK
2379 * file_socket:EBADF
2380 * soshutdown:EINVAL
2381 * soshutdown:ENOTCONN
2382 * soshutdown:EADDRNOTAVAIL[TCP]
2383 * soshutdown:ENOBUFS[TCP]
2384 * soshutdown:EMSGSIZE[TCP]
2385 * soshutdown:EHOSTUNREACH[TCP]
2386 * soshutdown:ENETUNREACH[TCP]
2387 * soshutdown:ENETDOWN[TCP]
2388 * soshutdown:ENOMEM[TCP]
2389 * soshutdown:EACCES[TCP]
2390 * soshutdown:EMSGSIZE[TCP]
2391 * soshutdown:ENOBUFS[TCP]
2392 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2393 * soshutdown:??? [other protocol families]
2394 */
1c79356b
A
2395/* ARGSUSED */
2396int
2d21ac55 2397shutdown(__unused struct proc *p, struct shutdown_args *uap,
b0d623f7 2398 __unused int32_t *retval)
1c79356b 2399{
2d21ac55 2400 struct socket *so;
1c79356b
A
2401 int error;
2402
55e303ae 2403 AUDIT_ARG(fd, uap->s);
91447636 2404 error = file_socket(uap->s, &so);
1c79356b
A
2405 if (error)
2406 return (error);
91447636
A
2407 if (so == NULL) {
2408 error = EBADF;
2409 goto out;
2410 }
2411 error = soshutdown((struct socket *)so, uap->how);
2412out:
2413 file_drop(uap->s);
2d21ac55 2414 return (error);
1c79356b
A
2415}
2416
2d21ac55
A
2417/*
2418 * Returns: 0 Success
2419 * EFAULT
2420 * EINVAL
2421 * EACCES Mandatory Access Control failure
2422 * file_socket:ENOTSOCK
2423 * file_socket:EBADF
2424 * sosetopt:EINVAL
2425 * sosetopt:ENOPROTOOPT
2426 * sosetopt:ENOBUFS
2427 * sosetopt:EDOM
2428 * sosetopt:EFAULT
2429 * sosetopt:EOPNOTSUPP[AF_UNIX]
2430 * sosetopt:???
2431 */
1c79356b
A
2432/* ARGSUSED */
2433int
2d21ac55 2434setsockopt(struct proc *p, struct setsockopt_args *uap,
b0d623f7 2435 __unused int32_t *retval)
1c79356b 2436{
2d21ac55 2437 struct socket *so;
1c79356b
A
2438 struct sockopt sopt;
2439 int error;
2440
55e303ae 2441 AUDIT_ARG(fd, uap->s);
1c79356b
A
2442 if (uap->val == 0 && uap->valsize != 0)
2443 return (EFAULT);
2d21ac55 2444 /* No bounds checking on size (it's unsigned) */
1c79356b 2445
91447636 2446 error = file_socket(uap->s, &so);
1c79356b
A
2447 if (error)
2448 return (error);
2449
2450 sopt.sopt_dir = SOPT_SET;
2451 sopt.sopt_level = uap->level;
2452 sopt.sopt_name = uap->name;
2453 sopt.sopt_val = uap->val;
2454 sopt.sopt_valsize = uap->valsize;
2455 sopt.sopt_p = p;
2456
91447636
A
2457 if (so == NULL) {
2458 error = EINVAL;
2459 goto out;
2460 }
2d21ac55
A
2461#if CONFIG_MACF_SOCKET_SUBSET
2462 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2463 &sopt)) != 0)
2464 goto out;
2465#endif /* MAC_SOCKET_SUBSET */
39236c6e 2466 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
91447636
A
2467out:
2468 file_drop(uap->s);
2d21ac55 2469 return (error);
1c79356b
A
2470}
2471
2472
2473
2d21ac55
A
2474/*
2475 * Returns: 0 Success
2476 * EINVAL
2477 * EBADF
2478 * EACCES Mandatory Access Control failure
2479 * copyin:EFAULT
2480 * copyout:EFAULT
2481 * file_socket:ENOTSOCK
2482 * file_socket:EBADF
2483 * sogetopt:???
2484 */
1c79356b 2485int
2d21ac55 2486getsockopt(struct proc *p, struct getsockopt_args *uap,
b0d623f7 2487 __unused int32_t *retval)
1c79356b 2488{
91447636
A
2489 int error;
2490 socklen_t valsize;
2491 struct sockopt sopt;
2d21ac55 2492 struct socket *so;
1c79356b 2493
91447636 2494 error = file_socket(uap->s, &so);
1c79356b
A
2495 if (error)
2496 return (error);
2497 if (uap->val) {
2d21ac55
A
2498 error = copyin(uap->avalsize, (caddr_t)&valsize,
2499 sizeof (valsize));
1c79356b 2500 if (error)
91447636 2501 goto out;
2d21ac55
A
2502 /* No bounds checking on size (it's unsigned) */
2503 } else {
1c79356b 2504 valsize = 0;
2d21ac55 2505 }
1c79356b
A
2506 sopt.sopt_dir = SOPT_GET;
2507 sopt.sopt_level = uap->level;
2508 sopt.sopt_name = uap->name;
2509 sopt.sopt_val = uap->val;
2510 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2511 sopt.sopt_p = p;
2512
91447636
A
2513 if (so == NULL) {
2514 error = EBADF;
2515 goto out;
2516 }
2d21ac55
A
2517#if CONFIG_MACF_SOCKET_SUBSET
2518 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2519 &sopt)) != 0)
2520 goto out;
2521#endif /* MAC_SOCKET_SUBSET */
39236c6e 2522 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
1c79356b
A
2523 if (error == 0) {
2524 valsize = sopt.sopt_valsize;
2d21ac55
A
2525 error = copyout((caddr_t)&valsize, uap->avalsize,
2526 sizeof (valsize));
1c79356b 2527 }
91447636
A
2528out:
2529 file_drop(uap->s);
1c79356b
A
2530 return (error);
2531}
2532
2533
2534/*
2535 * Get socket name.
2d21ac55
A
2536 *
2537 * Returns: 0 Success
2538 * EBADF
2539 * file_socket:ENOTSOCK
2540 * file_socket:EBADF
2541 * copyin:EFAULT
2542 * copyout:EFAULT
2543 * <pru_sockaddr>:ENOBUFS[TCP]
2544 * <pru_sockaddr>:ECONNRESET[TCP]
2545 * <pru_sockaddr>:EINVAL[AF_UNIX]
2546 * <sf_getsockname>:???
1c79356b
A
2547 */
2548/* ARGSUSED */
2d21ac55
A
2549int
2550getsockname(__unused struct proc *p, struct getsockname_args *uap,
b0d623f7 2551 __unused int32_t *retval)
1c79356b 2552{
91447636 2553 struct socket *so;
1c79356b 2554 struct sockaddr *sa;
91447636 2555 socklen_t len;
2d21ac55 2556 socklen_t sa_len;
1c79356b
A
2557 int error;
2558
91447636 2559 error = file_socket(uap->fdes, &so);
1c79356b
A
2560 if (error)
2561 return (error);
2d21ac55 2562 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1c79356b 2563 if (error)
91447636
A
2564 goto out;
2565 if (so == NULL) {
2566 error = EBADF;
2567 goto out;
2568 }
1c79356b 2569 sa = 0;
91447636 2570 socket_lock(so, 1);
1c79356b 2571 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2d21ac55 2572 if (error == 0) {
6d2010ae 2573 error = sflt_getsockname(so, &sa);
91447636
A
2574 if (error == EJUSTRETURN)
2575 error = 0;
91447636
A
2576 }
2577 socket_unlock(so, 1);
1c79356b
A
2578 if (error)
2579 goto bad;
2580 if (sa == 0) {
2581 len = 0;
2582 goto gotnothing;
2583 }
2584
2d21ac55
A
2585 sa_len = sa->sa_len;
2586 len = MIN(len, sa_len);
91447636 2587 error = copyout((caddr_t)sa, uap->asa, len);
2d21ac55
A
2588 if (error)
2589 goto bad;
2590 /* return the actual, untruncated address length */
2591 len = sa_len;
1c79356b 2592gotnothing:
2d21ac55 2593 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
2594bad:
2595 if (sa)
2596 FREE(sa, M_SONAME);
91447636
A
2597out:
2598 file_drop(uap->fdes);
1c79356b
A
2599 return (error);
2600}
2601
1c79356b
A
2602/*
2603 * Get name of peer for connected socket.
2d21ac55
A
2604 *
2605 * Returns: 0 Success
2606 * EBADF
2607 * EINVAL
2608 * ENOTCONN
2609 * file_socket:ENOTSOCK
2610 * file_socket:EBADF
2611 * copyin:EFAULT
2612 * copyout:EFAULT
2613 * <pru_peeraddr>:???
2614 * <sf_getpeername>:???
1c79356b
A
2615 */
2616/* ARGSUSED */
2617int
2d21ac55 2618getpeername(__unused struct proc *p, struct getpeername_args *uap,
b0d623f7 2619 __unused int32_t *retval)
1c79356b 2620{
91447636 2621 struct socket *so;
1c79356b 2622 struct sockaddr *sa;
91447636 2623 socklen_t len;
2d21ac55 2624 socklen_t sa_len;
1c79356b
A
2625 int error;
2626
91447636 2627 error = file_socket(uap->fdes, &so);
1c79356b
A
2628 if (error)
2629 return (error);
91447636
A
2630 if (so == NULL) {
2631 error = EBADF;
2632 goto out;
2633 }
2634
2635 socket_lock(so, 1);
2636
2d21ac55
A
2637 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2638 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2639 /* the socket has been shutdown, no more getpeername's */
2640 socket_unlock(so, 1);
2641 error = EINVAL;
2642 goto out;
2643 }
2644
91447636
A
2645 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
2646 socket_unlock(so, 1);
2647 error = ENOTCONN;
2648 goto out;
2649 }
2d21ac55 2650 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
91447636
A
2651 if (error) {
2652 socket_unlock(so, 1);
2653 goto out;
2654 }
1c79356b
A
2655 sa = 0;
2656 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2d21ac55 2657 if (error == 0) {
6d2010ae 2658 error = sflt_getpeername(so, &sa);
91447636
A
2659 if (error == EJUSTRETURN)
2660 error = 0;
91447636
A
2661 }
2662 socket_unlock(so, 1);
1c79356b
A
2663 if (error)
2664 goto bad;
2665 if (sa == 0) {
2666 len = 0;
2667 goto gotnothing;
2668 }
2d21ac55
A
2669 sa_len = sa->sa_len;
2670 len = MIN(len, sa_len);
91447636 2671 error = copyout(sa, uap->asa, len);
1c79356b
A
2672 if (error)
2673 goto bad;
2d21ac55
A
2674 /* return the actual, untruncated address length */
2675 len = sa_len;
1c79356b 2676gotnothing:
2d21ac55 2677 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
2678bad:
2679 if (sa) FREE(sa, M_SONAME);
91447636
A
2680out:
2681 file_drop(uap->fdes);
1c79356b
A
2682 return (error);
2683}
2684
2685int
2d21ac55 2686sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1c79356b 2687{
2d21ac55
A
2688 struct sockaddr *sa;
2689 struct mbuf *m;
1c79356b
A
2690 int error;
2691
e2d2fc5c 2692 size_t alloc_buflen = (size_t)buflen;
39236c6e 2693
3e170ce0 2694 if (alloc_buflen > INT_MAX/2)
e2d2fc5c 2695 return (EINVAL);
b0d623f7 2696#ifdef __LP64__
3e170ce0
A
2697 /*
2698 * The fd's in the buffer must expand to be pointers, thus we need twice
2699 * as much space
2700 */
2701 if (type == MT_CONTROL)
2702 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) +
2703 sizeof(struct cmsghdr);
b0d623f7 2704#endif
e2d2fc5c
A
2705 if (alloc_buflen > MLEN) {
2706 if (type == MT_SONAME && alloc_buflen <= 112)
3e170ce0 2707 alloc_buflen = MLEN; /* unix domain compat. hack */
e2d2fc5c 2708 else if (alloc_buflen > MCLBYTES)
91447636 2709 return (EINVAL);
1c79356b
A
2710 }
2711 m = m_get(M_WAIT, type);
2712 if (m == NULL)
2713 return (ENOBUFS);
e2d2fc5c 2714 if (alloc_buflen > MLEN) {
91447636
A
2715 MCLGET(m, M_WAIT);
2716 if ((m->m_flags & M_EXT) == 0) {
2717 m_free(m);
2d21ac55 2718 return (ENOBUFS);
91447636
A
2719 }
2720 }
3e170ce0
A
2721 /*
2722 * K64: We still copyin the original buflen because it gets expanded
2723 * later and we lie about the size of the mbuf because it only affects
2724 * unp_* functions
b0d623f7 2725 */
1c79356b 2726 m->m_len = buflen;
91447636 2727 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2d21ac55 2728 if (error) {
1c79356b 2729 (void) m_free(m);
2d21ac55 2730 } else {
1c79356b
A
2731 *mp = m;
2732 if (type == MT_SONAME) {
2733 sa = mtod(m, struct sockaddr *);
1c79356b
A
2734 sa->sa_len = buflen;
2735 }
2736 }
2737 return (error);
2738}
2739
91447636
A
2740/*
2741 * Given a user_addr_t of length len, allocate and fill out a *sa.
2d21ac55
A
2742 *
2743 * Returns: 0 Success
2744 * ENAMETOOLONG Filename too long
2745 * EINVAL Invalid argument
2746 * ENOMEM Not enough space
2747 * copyin:EFAULT Bad address
91447636 2748 */
2d21ac55
A
2749static int
2750getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
4a3eedf9 2751 size_t len, boolean_t translate_unspec)
1c79356b
A
2752{
2753 struct sockaddr *sa;
2754 int error;
2755
2756 if (len > SOCK_MAXADDRLEN)
2d21ac55 2757 return (ENAMETOOLONG);
1c79356b 2758
2d21ac55
A
2759 if (len < offsetof(struct sockaddr, sa_data[0]))
2760 return (EINVAL);
1c79356b 2761
490019cf 2762 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
91447636 2763 if (sa == NULL) {
2d21ac55 2764 return (ENOMEM);
91447636
A
2765 }
2766 error = copyin(uaddr, (caddr_t)sa, len);
1c79356b
A
2767 if (error) {
2768 FREE(sa, M_SONAME);
2769 } else {
2d21ac55
A
2770 /*
2771 * Force sa_family to AF_INET on AF_INET sockets to handle
2772 * legacy applications that use AF_UNSPEC (0). On all other
2773 * sockets we leave it unchanged and let the lower layer
2774 * handle it.
2775 */
4a3eedf9 2776 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
39236c6e 2777 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2778 len == sizeof (struct sockaddr_in))
2779 sa->sa_family = AF_INET;
2780
1c79356b
A
2781 sa->sa_len = len;
2782 *namp = sa;
2783 }
2d21ac55 2784 return (error);
1c79356b
A
2785}
2786
2d21ac55
A
2787static int
2788getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
4a3eedf9 2789 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1c79356b 2790{
2d21ac55
A
2791 int error;
2792
2793 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2794 len < offsetof(struct sockaddr, sa_data[0]))
2795 return (EINVAL);
2796
2797 /*
2798 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2799 * so the check here is inclusive.
2800 */
2801 if (len > sizeof (*ss))
2802 return (ENAMETOOLONG);
1c79356b 2803
2d21ac55
A
2804 bzero(ss, sizeof (*ss));
2805 error = copyin(uaddr, (caddr_t)ss, len);
2806 if (error == 0) {
2807 /*
2808 * Force sa_family to AF_INET on AF_INET sockets to handle
2809 * legacy applications that use AF_UNSPEC (0). On all other
2810 * sockets we leave it unchanged and let the lower layer
2811 * handle it.
2812 */
4a3eedf9 2813 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
39236c6e 2814 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2815 len == sizeof (struct sockaddr_in))
2816 ss->ss_family = AF_INET;
91447636 2817
2d21ac55 2818 ss->ss_len = len;
1c79356b 2819 }
2d21ac55 2820 return (error);
1c79356b
A
2821}
2822
39236c6e
A
2823/*
2824 * Hard limit on the number of source and/or destination addresses
2825 * that can be specified by an application.
2826 */
2827#define SOCKADDRLIST_MAX_ENTRIES 64
2828
2829static int
2830getsockaddrlist(struct socket *so, struct sockaddr_list **slp,
2831 user_addr_t uaddr, socklen_t uaddrlen, boolean_t xlate_unspec)
2832{
2833 struct sockaddr_list *sl;
2834 int error = 0;
2d21ac55 2835
39236c6e
A
2836 *slp = NULL;
2837
490019cf
A
2838 if (uaddr == USER_ADDR_NULL || uaddrlen == 0 ||
2839 uaddrlen > (sizeof(struct sockaddr_in6) * SOCKADDRLIST_MAX_ENTRIES))
39236c6e
A
2840 return (EINVAL);
2841
2842 sl = sockaddrlist_alloc(M_WAITOK);
2843 if (sl == NULL)
2844 return (ENOMEM);
2845
2846 VERIFY(sl->sl_cnt == 0);
2847 while (uaddrlen > 0 && sl->sl_cnt < SOCKADDRLIST_MAX_ENTRIES) {
2848 struct sockaddr_storage ss;
2849 struct sockaddr_entry *se;
2850 struct sockaddr *sa;
2851
2852 if (uaddrlen < sizeof (struct sockaddr)) {
2853 error = EINVAL;
2854 break;
2855 }
2856
2857 bzero(&ss, sizeof (ss));
2858 error = copyin(uaddr, (caddr_t)&ss, sizeof (struct sockaddr));
2859 if (error != 0)
2860 break;
2861
2862 /* getsockaddr does the same but we need them now */
2863 if (uaddrlen < ss.ss_len ||
2864 ss.ss_len < offsetof(struct sockaddr, sa_data[0])) {
2865 error = EINVAL;
2866 break;
2867 } else if (ss.ss_len > sizeof (ss)) {
2868 /*
2869 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
490019cf 2870 * so the check here is inclusive. We could use the
39236c6e
A
2871 * latter instead, but seems like an overkill for now.
2872 */
2873 error = ENAMETOOLONG;
2874 break;
2875 }
2876
2877 se = sockaddrentry_alloc(M_WAITOK);
490019cf
A
2878 if (se == NULL) {
2879 error = ENOBUFS;
39236c6e 2880 break;
490019cf 2881 }
39236c6e
A
2882
2883 sockaddrlist_insert(sl, se);
2884
2885 error = getsockaddr(so, &sa, uaddr, ss.ss_len, xlate_unspec);
2886 if (error != 0)
2887 break;
2888
2889 VERIFY(sa != NULL && sa->sa_len == ss.ss_len);
2890 se->se_addr = sa;
2891
2892 uaddr += ss.ss_len;
2893 VERIFY(((signed)uaddrlen - ss.ss_len) >= 0);
2894 uaddrlen -= ss.ss_len;
2895 }
2896
2897 if (error != 0)
2898 sockaddrlist_free(sl);
2899 else
2900 *slp = sl;
2901
2902 return (error);
2903}
2904
fe8ab488
A
2905int
2906internalize_user_msghdr_array(const void *src, int spacetype, int direction,
3e170ce0 2907 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
fe8ab488
A
2908{
2909 int error = 0;
2910 u_int i;
3e170ce0
A
2911 u_int namecnt = 0;
2912 u_int ctlcnt = 0;
fe8ab488
A
2913
2914 for (i = 0; i < count; i++) {
2915 uio_t auio;
2916 struct user_iovec *iovp;
3e170ce0 2917 struct user_msghdr_x *user_msg = dst + i;
fe8ab488
A
2918
2919 if (spacetype == UIO_USERSPACE64) {
3e170ce0 2920 const struct user64_msghdr_x *msghdr64;
fe8ab488 2921
3e170ce0 2922 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
fe8ab488
A
2923
2924 user_msg->msg_name = msghdr64->msg_name;
2925 user_msg->msg_namelen = msghdr64->msg_namelen;
2926 user_msg->msg_iov = msghdr64->msg_iov;
2927 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2928 user_msg->msg_control = msghdr64->msg_control;
2929 user_msg->msg_controllen = msghdr64->msg_controllen;
2930 user_msg->msg_flags = msghdr64->msg_flags;
2931 user_msg->msg_datalen = msghdr64->msg_datalen;
2932 } else {
3e170ce0 2933 const struct user32_msghdr_x *msghdr32;
fe8ab488 2934
3e170ce0 2935 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
fe8ab488
A
2936
2937 user_msg->msg_name = msghdr32->msg_name;
2938 user_msg->msg_namelen = msghdr32->msg_namelen;
2939 user_msg->msg_iov = msghdr32->msg_iov;
2940 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2941 user_msg->msg_control = msghdr32->msg_control;
2942 user_msg->msg_controllen = msghdr32->msg_controllen;
2943 user_msg->msg_flags = msghdr32->msg_flags;
2944 user_msg->msg_datalen = msghdr32->msg_datalen;
2945 }
3e170ce0
A
2946
2947 if (user_msg->msg_iovlen <= 0 ||
2948 user_msg->msg_iovlen > UIO_MAXIOV) {
fe8ab488
A
2949 error = EMSGSIZE;
2950 goto done;
2951 }
3e170ce0
A
2952 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2953 direction);
fe8ab488
A
2954 if (auio == NULL) {
2955 error = ENOMEM;
2956 goto done;
2957 }
2958 uiop[i] = auio;
2959
3e170ce0
A
2960 iovp = uio_iovsaddr(auio);
2961 if (iovp == NULL) {
2962 error = ENOMEM;
2963 goto done;
2964 }
2965 error = copyin_user_iovec_array(user_msg->msg_iov,
2966 spacetype, user_msg->msg_iovlen, iovp);
2967 if (error)
2968 goto done;
2969 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
fe8ab488 2970
3e170ce0
A
2971 error = uio_calculateresid(auio);
2972 if (error)
2973 goto done;
2974 user_msg->msg_datalen = uio_resid(auio);
2975
2976 if (user_msg->msg_name && user_msg->msg_namelen)
2977 namecnt++;
2978 if (user_msg->msg_control && user_msg->msg_controllen)
2979 ctlcnt++;
2980 }
2981done:
2982
2983 return (error);
2984}
2985
2986int
2987internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2988 u_int count, struct user_msghdr_x *dst,
2989 struct recv_msg_elem *recv_msg_array)
2990{
2991 int error = 0;
2992 u_int i;
2993
2994 for (i = 0; i < count; i++) {
2995 struct user_iovec *iovp;
2996 struct user_msghdr_x *user_msg = dst + i;
2997 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2998
2999 if (spacetype == UIO_USERSPACE64) {
3000 const struct user64_msghdr_x *msghdr64;
3001
3002 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
3003
3004 user_msg->msg_name = msghdr64->msg_name;
3005 user_msg->msg_namelen = msghdr64->msg_namelen;
3006 user_msg->msg_iov = msghdr64->msg_iov;
3007 user_msg->msg_iovlen = msghdr64->msg_iovlen;
3008 user_msg->msg_control = msghdr64->msg_control;
3009 user_msg->msg_controllen = msghdr64->msg_controllen;
3010 user_msg->msg_flags = msghdr64->msg_flags;
3011 user_msg->msg_datalen = msghdr64->msg_datalen;
fe8ab488 3012 } else {
3e170ce0
A
3013 const struct user32_msghdr_x *msghdr32;
3014
3015 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
3016
3017 user_msg->msg_name = msghdr32->msg_name;
3018 user_msg->msg_namelen = msghdr32->msg_namelen;
3019 user_msg->msg_iov = msghdr32->msg_iov;
3020 user_msg->msg_iovlen = msghdr32->msg_iovlen;
3021 user_msg->msg_control = msghdr32->msg_control;
3022 user_msg->msg_controllen = msghdr32->msg_controllen;
3023 user_msg->msg_flags = msghdr32->msg_flags;
3024 user_msg->msg_datalen = msghdr32->msg_datalen;
fe8ab488 3025 }
3e170ce0
A
3026
3027 if (user_msg->msg_iovlen <= 0 ||
3028 user_msg->msg_iovlen > UIO_MAXIOV) {
3029 error = EMSGSIZE;
3030 goto done;
3031 }
3032 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
3033 spacetype, direction);
3034 if (recv_msg_elem->uio == NULL) {
3035 error = ENOMEM;
3036 goto done;
3037 }
3038
3039 iovp = uio_iovsaddr(recv_msg_elem->uio);
3040 if (iovp == NULL) {
3041 error = ENOMEM;
3042 goto done;
3043 }
3044 error = copyin_user_iovec_array(user_msg->msg_iov,
3045 spacetype, user_msg->msg_iovlen, iovp);
3046 if (error)
3047 goto done;
3048 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
3049
3050 error = uio_calculateresid(recv_msg_elem->uio);
3051 if (error)
3052 goto done;
3053 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
3054
3055 if (user_msg->msg_name && user_msg->msg_namelen)
3056 recv_msg_elem->which |= SOCK_MSG_SA;
3057 if (user_msg->msg_control && user_msg->msg_controllen)
3058 recv_msg_elem->which |= SOCK_MSG_CONTROL;
fe8ab488
A
3059 }
3060done:
3e170ce0 3061
fe8ab488
A
3062 return (error);
3063}
3064
3065u_int
3066externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3e170ce0 3067 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
fe8ab488
A
3068{
3069#pragma unused(direction)
3070 u_int i;
3071 int seenlast = 0;
3072 u_int retcnt = 0;
3073
3074 for (i = 0; i < count; i++) {
3e170ce0 3075 const struct user_msghdr_x *user_msg = src + i;
fe8ab488
A
3076 uio_t auio = uiop[i];
3077 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3078
3079 if (user_msg->msg_datalen != 0 && len == 0)
3080 seenlast = 1;
3e170ce0
A
3081
3082 if (seenlast == 0)
3083 retcnt ++;
3084
3085 if (spacetype == UIO_USERSPACE64) {
3086 struct user64_msghdr_x *msghdr64;
3087
3088 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3089
3090 msghdr64->msg_flags = user_msg->msg_flags;
3091 msghdr64->msg_datalen = len;
3092
3093 } else {
3094 struct user32_msghdr_x *msghdr32;
3095
3096 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3097
3098 msghdr32->msg_flags = user_msg->msg_flags;
3099 msghdr32->msg_datalen = len;
3100 }
3101 }
3102 return (retcnt);
3103}
3104
3105u_int
3106externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
3107 u_int count, const struct user_msghdr_x *src,
3108 struct recv_msg_elem *recv_msg_array)
3109{
3110 u_int i;
3111 int seenlast = 0;
3112 u_int retcnt = 0;
3113
3114 for (i = 0; i < count; i++) {
3115 const struct user_msghdr_x *user_msg = src + i;
3116 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3117 user_ssize_t len;
3118
3119 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3120
3121 if (direction == UIO_READ) {
3122 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0)
3123 seenlast = 1;
3124 } else {
3125 if (user_msg->msg_datalen != 0 && len == 0)
3126 seenlast = 1;
3127 }
3128
fe8ab488
A
3129 if (seenlast == 0)
3130 retcnt ++;
3131
3132 if (spacetype == UIO_USERSPACE64) {
3133 struct user64_msghdr_x *msghdr64;
3134
3135 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3136
3137 msghdr64->msg_flags = user_msg->msg_flags;
3138 msghdr64->msg_datalen = len;
3e170ce0 3139
fe8ab488
A
3140 } else {
3141 struct user32_msghdr_x *msghdr32;
3142
3143 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3144
3145 msghdr32->msg_flags = user_msg->msg_flags;
3146 msghdr32->msg_datalen = len;
3147 }
3148 }
3149 return (retcnt);
3150}
3151
3152void
3153free_uio_array(struct uio **uiop, u_int count)
3154{
3155 u_int i;
3156
3157 for (i = 0; i < count; i++) {
3158 if (uiop[i] != NULL)
3159 uio_free(uiop[i]);
3160 }
3161}
3162
3163__private_extern__ user_ssize_t
3164uio_array_resid(struct uio **uiop, u_int count)
3165{
3166 user_ssize_t len = 0;
3167 u_int i;
3168
3169 for (i = 0; i < count; i++) {
3170 struct uio *auio = uiop[i];
3171
3e170ce0 3172 if (auio != NULL)
fe8ab488
A
3173 len += uio_resid(auio);
3174 }
3175 return (len);
3176}
3177
3178int
3179uio_array_is_valid(struct uio **uiop, u_int count)
3180{
3181 user_ssize_t len = 0;
3182 u_int i;
3183
3184 for (i = 0; i < count; i++) {
3185 struct uio *auio = uiop[i];
3e170ce0 3186
fe8ab488
A
3187 if (auio != NULL) {
3188 user_ssize_t resid = uio_resid(auio);
3e170ce0 3189
fe8ab488
A
3190 /*
3191 * Sanity check on the validity of the iovec:
3192 * no point of going over sb_max
3193 */
3194 if (resid < 0 || (u_int32_t)resid > sb_max)
3195 return (0);
3e170ce0
A
3196
3197 len += resid;
3198 if (len < 0 || (u_int32_t)len > sb_max)
3199 return (0);
3200 }
3201 }
3202 return (1);
3203}
3204
3205
3206struct recv_msg_elem *
3207alloc_recv_msg_array(u_int count)
3208{
3209 struct recv_msg_elem *recv_msg_array;
3210
3211 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3212 M_TEMP, M_WAITOK | M_ZERO);
3213
3214 return (recv_msg_array);
3215}
3216
3217void
3218free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3219{
3220 u_int i;
3221
3222 for (i = 0; i < count; i++) {
3223 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3224
3225 if (recv_msg_elem->uio != NULL)
3226 uio_free(recv_msg_elem->uio);
3227 if (recv_msg_elem->psa != NULL)
3228 _FREE(recv_msg_elem->psa, M_TEMP);
3229 if (recv_msg_elem->controlp != NULL)
3230 m_freem(recv_msg_elem->controlp);
3231 }
3232 _FREE(recv_msg_array, M_TEMP);
3233}
3234
3235
3236__private_extern__ user_ssize_t
3237recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3238{
3239 user_ssize_t len = 0;
3240 u_int i;
3241
3242 for (i = 0; i < count; i++) {
3243 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3244
3245 if (recv_msg_elem->uio != NULL)
3246 len += uio_resid(recv_msg_elem->uio);
3247 }
3248 return (len);
3249}
3250
3251int
3252recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3253{
3254 user_ssize_t len = 0;
3255 u_int i;
3256
3257 for (i = 0; i < count; i++) {
3258 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3259
3260 if (recv_msg_elem->uio != NULL) {
3261 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3262
3263 /*
3264 * Sanity check on the validity of the iovec:
3265 * no point of going over sb_max
3266 */
3267 if (resid < 0 || (u_int32_t)resid > sb_max)
3268 return (0);
3269
fe8ab488
A
3270 len += resid;
3271 if (len < 0 || (u_int32_t)len > sb_max)
3272 return (0);
3273 }
3274 }
3275 return (1);
3276}
3277
39236c6e 3278#if SENDFILE
2d21ac55
A
3279
3280#define SFUIOBUFS 64
2d21ac55
A
3281
3282/* Macros to compute the number of mbufs needed depending on cluster size */
3e170ce0
A
3283#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3284#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
2d21ac55 3285
39236c6e 3286/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3e170ce0 3287#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
2d21ac55
A
3288
3289/* Upper send limit in the number of mbuf clusters */
3290#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3291#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3292
1c79356b 3293static void
2d21ac55
A
3294alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3295 struct mbuf **m, boolean_t jumbocl)
1c79356b 3296{
2d21ac55 3297 unsigned int needed;
1c79356b 3298
2d21ac55
A
3299 if (pktlen == 0)
3300 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
1c79356b 3301
2d21ac55
A
3302 /*
3303 * Try to allocate for the whole thing. Since we want full control
3304 * over the buffer size and be able to accept partial result, we can't
3305 * use mbuf_allocpacket(). The logic below is similar to sosend().
3306 */
3307 *m = NULL;
6d2010ae 3308 if (pktlen > MBIGCLBYTES && jumbocl) {
2d21ac55
A
3309 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3310 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3311 }
3312 if (*m == NULL) {
3313 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
6d2010ae 3314 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
2d21ac55
A
3315 }
3316
3317 /*
3318 * Our previous attempt(s) at allocation had failed; the system
3319 * may be short on mbufs, and we want to block until they are
3320 * available. This time, ask just for 1 mbuf and don't return
3321 * until we get it.
3322 */
3323 if (*m == NULL) {
3324 needed = 1;
6d2010ae 3325 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
1c79356b 3326 }
2d21ac55
A
3327 if (*m == NULL)
3328 panic("%s: blocking allocation returned NULL\n", __func__);
3329
3330 *maxchunks = needed;
1c79356b
A
3331}
3332
3333/*
3334 * sendfile(2).
2d21ac55
A
3335 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3336 * struct sf_hdtr *hdtr, int flags)
1c79356b
A
3337 *
3338 * Send a file specified by 'fd' and starting at 'offset' to a socket
2d21ac55
A
3339 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3340 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3341 * output. If specified, write the total number of bytes sent into *nbytes.
1c79356b
A
3342 */
3343int
2d21ac55 3344sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
1c79356b 3345{
91447636 3346 struct fileproc *fp;
1c79356b 3347 struct vnode *vp;
1c79356b 3348 struct socket *so;
2d21ac55
A
3349 struct writev_nocancel_args nuap;
3350 user_ssize_t writev_retval;
2d21ac55 3351 struct user_sf_hdtr user_hdtr;
b0d623f7
A
3352 struct user32_sf_hdtr user32_hdtr;
3353 struct user64_sf_hdtr user64_hdtr;
2d21ac55
A
3354 off_t off, xfsize;
3355 off_t nbytes = 0, sbytes = 0;
3356 int error = 0;
3357 size_t sizeof_hdtr;
2d21ac55
A
3358 off_t file_size;
3359 struct vfs_context context = *vfs_context_current();
3e170ce0 3360
2d21ac55
A
3361 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3362 0, 0, 0, 0);
b0d623f7
A
3363
3364 AUDIT_ARG(fd, uap->fd);
3365 AUDIT_ARG(value32, uap->s);
3366
1c79356b
A
3367 /*
3368 * Do argument checking. Must be a regular file in, stream
3369 * type and connected socket out, positive offset.
3370 */
2d21ac55 3371 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
1c79356b 3372 goto done;
2d21ac55
A
3373 }
3374 if ((fp->f_flag & FREAD) == 0) {
91447636
A
3375 error = EBADF;
3376 goto done1;
1c79356b 3377 }
2d21ac55
A
3378 if (vnode_isreg(vp) == 0) {
3379 error = ENOTSUP;
91447636 3380 goto done1;
1c79356b 3381 }
91447636 3382 error = file_socket(uap->s, &so);
2d21ac55 3383 if (error) {
91447636 3384 goto done1;
2d21ac55 3385 }
55e303ae
A
3386 if (so == NULL) {
3387 error = EBADF;
91447636 3388 goto done2;
55e303ae 3389 }
1c79356b
A
3390 if (so->so_type != SOCK_STREAM) {
3391 error = EINVAL;
2d21ac55 3392 goto done2;
1c79356b
A
3393 }
3394 if ((so->so_state & SS_ISCONNECTED) == 0) {
3395 error = ENOTCONN;
2d21ac55 3396 goto done2;
1c79356b
A
3397 }
3398 if (uap->offset < 0) {
3399 error = EINVAL;
2d21ac55 3400 goto done2;
1c79356b 3401 }
2d21ac55
A
3402 if (uap->nbytes == USER_ADDR_NULL) {
3403 error = EINVAL;
3404 goto done2;
3405 }
3406 if (uap->flags != 0) {
3407 error = EINVAL;
3408 goto done2;
3409 }
3410
3411 context.vc_ucred = fp->f_fglob->fg_cred;
3412
3413#if CONFIG_MACF_SOCKET_SUBSET
3414 /* JMM - fetch connected sockaddr? */
3415 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3416 if (error)
3417 goto done2;
3418#endif
3419
3420 /*
3421 * Get number of bytes to send
3422 * Should it applies to size of header and trailer?
3423 * JMM - error handling?
3424 */
3425 copyin(uap->nbytes, &nbytes, sizeof (off_t));
1c79356b
A
3426
3427 /*
3428 * If specified, get the pointer to the sf_hdtr struct for
3429 * any headers/trailers.
3430 */
2d21ac55
A
3431 if (uap->hdtr != USER_ADDR_NULL) {
3432 caddr_t hdtrp;
3433
3434 bzero(&user_hdtr, sizeof (user_hdtr));
3435 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
3436 hdtrp = (caddr_t)&user64_hdtr;
3437 sizeof_hdtr = sizeof (user64_hdtr);
2d21ac55 3438 } else {
b0d623f7
A
3439 hdtrp = (caddr_t)&user32_hdtr;
3440 sizeof_hdtr = sizeof (user32_hdtr);
2d21ac55
A
3441 }
3442 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
1c79356b 3443 if (error)
2d21ac55 3444 goto done2;
b0d623f7
A
3445 if (IS_64BIT_PROCESS(p)) {
3446 user_hdtr.headers = user64_hdtr.headers;
3447 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3448 user_hdtr.trailers = user64_hdtr.trailers;
3449 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3450 } else {
3451 user_hdtr.headers = user32_hdtr.headers;
3452 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3453 user_hdtr.trailers = user32_hdtr.trailers;
3454 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2d21ac55
A
3455 }
3456
1c79356b
A
3457 /*
3458 * Send any headers. Wimp out and use writev(2).
3459 */
2d21ac55
A
3460 if (user_hdtr.headers != USER_ADDR_NULL) {
3461 bzero(&nuap, sizeof (struct writev_args));
1c79356b 3462 nuap.fd = uap->s;
2d21ac55
A
3463 nuap.iovp = user_hdtr.headers;
3464 nuap.iovcnt = user_hdtr.hdr_cnt;
3465 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3466 if (error) {
2d21ac55 3467 goto done2;
316670eb 3468 }
2d21ac55 3469 sbytes += writev_retval;
1c79356b
A
3470 }
3471 }
3472
3473 /*
2d21ac55
A
3474 * Get the file size for 2 reasons:
3475 * 1. We don't want to allocate more mbufs than necessary
3476 * 2. We don't want to read past the end of file
1c79356b 3477 */
316670eb 3478 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
2d21ac55 3479 goto done2;
316670eb 3480 }
1c79356b
A
3481
3482 /*
2d21ac55
A
3483 * Simply read file data into a chain of mbufs that used with scatter
3484 * gather reads. We're not (yet?) setup to use zero copy external
3485 * mbufs that point to the file pages.
1c79356b 3486 */
2d21ac55 3487 socket_lock(so, 1);
39236c6e 3488 error = sblock(&so->so_snd, SBL_WAIT);
2d21ac55
A
3489 if (error) {
3490 socket_unlock(so, 1);
3491 goto done2;
3492 }
1c79356b 3493 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2d21ac55 3494 mbuf_t m0 = NULL, m;
39236c6e 3495 unsigned int nbufs = SFUIOBUFS, i;
2d21ac55 3496 uio_t auio;
39236c6e 3497 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
2d21ac55
A
3498 size_t uiolen;
3499 user_ssize_t rlen;
3500 off_t pgoff;
3501 size_t pktlen;
3502 boolean_t jumbocl;
1c79356b 3503
1c79356b 3504 /*
2d21ac55
A
3505 * Calculate the amount to transfer.
3506 * Align to round number of pages.
3507 * Not to exceed send socket buffer,
1c79356b
A
3508 * the EOF, or the passed in nbytes.
3509 */
2d21ac55
A
3510 xfsize = sbspace(&so->so_snd);
3511
3512 if (xfsize <= 0) {
3513 if (so->so_state & SS_CANTSENDMORE) {
3514 error = EPIPE;
3515 goto done3;
3516 } else if ((so->so_state & SS_NBIO)) {
3517 error = EAGAIN;
3518 goto done3;
3519 } else {
3520 xfsize = PAGE_SIZE;
3521 }
3522 }
3523
3524 if (xfsize > SENDFILE_MAX_BYTES)
3525 xfsize = SENDFILE_MAX_BYTES;
3526 else if (xfsize > PAGE_SIZE)
3527 xfsize = trunc_page(xfsize);
3528 pgoff = off & PAGE_MASK_64;
3529 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
1c79356b 3530 xfsize = PAGE_SIZE_64 - pgoff;
2d21ac55
A
3531 if (nbytes && xfsize > (nbytes - sbytes))
3532 xfsize = nbytes - sbytes;
3533 if (xfsize <= 0)
3534 break;
3535 if (off + xfsize > file_size)
3536 xfsize = file_size - off;
1c79356b
A
3537 if (xfsize <= 0)
3538 break;
2d21ac55 3539
1c79356b 3540 /*
2d21ac55
A
3541 * Attempt to use larger than system page-size clusters for
3542 * large writes only if there is a jumbo cluster pool and
3543 * if the socket is marked accordingly.
1c79356b 3544 */
2d21ac55
A
3545 jumbocl = sosendjcl && njcl > 0 &&
3546 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3547
3548 socket_unlock(so, 0);
3549 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
fe8ab488 3550 pktlen = mbuf_pkthdr_maxlen(m0);
b0d623f7 3551 if (pktlen < (size_t)xfsize)
2d21ac55 3552 xfsize = pktlen;
39236c6e 3553
2d21ac55
A
3554 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3555 UIO_READ, &uio_buf[0], sizeof (uio_buf));
3556 if (auio == NULL) {
316670eb
A
3557 printf("sendfile failed. nbufs = %d. %s", nbufs,
3558 "File a radar related to rdar://10146739.\n");
2d21ac55
A
3559 mbuf_freem(m0);
3560 error = ENXIO;
3561 socket_lock(so, 0);
3562 goto done3;
1c79356b 3563 }
1c79356b 3564
2d21ac55 3565 for (i = 0, m = m0, uiolen = 0;
b0d623f7 3566 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2d21ac55
A
3567 i++, m = mbuf_next(m)) {
3568 size_t mlen = mbuf_maxlen(m);
3569
b0d623f7 3570 if (mlen + uiolen > (size_t)xfsize)
2d21ac55
A
3571 mlen = xfsize - uiolen;
3572 mbuf_setlen(m, mlen);
3573 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3574 mlen);
3575 uiolen += mlen;
3576 }
3577
3578 if (xfsize != uio_resid(auio))
3579 printf("sendfile: xfsize: %lld != uio_resid(auio): "
6d2010ae 3580 "%lld\n", xfsize, (long long)uio_resid(auio));
2d21ac55
A
3581
3582 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3583 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3584 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3585 error = fo_read(fp, auio, FOF_OFFSET, &context);
3586 socket_lock(so, 0);
3587 if (error != 0) {
3588 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3589 error == EINTR || error == EWOULDBLOCK)) {
3590 error = 0;
3591 } else {
3592 mbuf_freem(m0);
3593 goto done3;
1c79356b 3594 }
1c79356b 3595 }
2d21ac55
A
3596 xfsize -= uio_resid(auio);
3597 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3598 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3599 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3600
3601 if (xfsize == 0) {
3e170ce0 3602 // printf("sendfile: fo_read 0 bytes, EOF\n");
2d21ac55 3603 break;
91447636 3604 }
2d21ac55
A
3605 if (xfsize + off > file_size)
3606 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3607 "%lld\n", xfsize, off, file_size);
3608 for (i = 0, m = m0, rlen = 0;
3609 i < nbufs && m != NULL && rlen < xfsize;
3610 i++, m = mbuf_next(m)) {
3611 size_t mlen = mbuf_maxlen(m);
3612
b0d623f7 3613 if (rlen + mlen > (size_t)xfsize)
2d21ac55
A
3614 mlen = xfsize - rlen;
3615 mbuf_setlen(m, mlen);
3616
3617 rlen += mlen;
3618 }
3619 mbuf_pkthdr_setlen(m0, xfsize);
3620
1c79356b
A
3621retry_space:
3622 /*
3623 * Make sure that the socket is still able to take more data.
3624 * CANTSENDMORE being true usually means that the connection
3625 * was closed. so_error is true when an error was sensed after
3626 * a previous send.
3627 * The state is checked after the page mapping and buffer
3628 * allocation above since those operations may block and make
3629 * any socket checks stale. From this point forward, nothing
3630 * blocks before the pru_send (or more accurately, any blocking
3631 * results in a loop back to here to re-check).
3632 */
3633 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3634 if (so->so_state & SS_CANTSENDMORE) {
3635 error = EPIPE;
3636 } else {
3637 error = so->so_error;
3638 so->so_error = 0;
3639 }
2d21ac55
A
3640 m_freem(m0);
3641 goto done3;
1c79356b
A
3642 }
3643 /*
3644 * Wait for socket space to become available. We do this just
3645 * after checking the connection state above in order to avoid
3646 * a race condition with sbwait().
3647 */
2d21ac55 3648 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
1c79356b 3649 if (so->so_state & SS_NBIO) {
2d21ac55 3650 m_freem(m0);
1c79356b 3651 error = EAGAIN;
2d21ac55 3652 goto done3;
1c79356b 3653 }
2d21ac55
A
3654 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3655 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
1c79356b 3656 error = sbwait(&so->so_snd);
2d21ac55
A
3657 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
3658 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
1c79356b
A
3659 /*
3660 * An error from sbwait usually indicates that we've
3661 * been interrupted by a signal. If we've sent anything
3662 * then return bytes sent, otherwise return the error.
3663 */
3664 if (error) {
2d21ac55
A
3665 m_freem(m0);
3666 goto done3;
1c79356b
A
3667 }
3668 goto retry_space;
3669 }
39236c6e 3670
6d2010ae 3671 struct mbuf *control = NULL;
2d21ac55
A
3672 {
3673 /*
3674 * Socket filter processing
3675 */
2d21ac55 3676
6d2010ae
A
3677 error = sflt_data_out(so, NULL, &m0, &control, 0);
3678 if (error) {
3679 if (error == EJUSTRETURN) {
3680 error = 0;
3681 continue;
2d21ac55 3682 }
6d2010ae 3683 goto done3;
2d21ac55
A
3684 }
3685 /*
3686 * End Socket filter processing
3687 */
3688 }
3689 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3690 uap->s, 0, 0, 0, 0);
3691 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
6d2010ae 3692 0, control, p);
2d21ac55
A
3693 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3694 uap->s, 0, 0, 0, 0);
1c79356b 3695 if (error) {
2d21ac55 3696 goto done3;
1c79356b
A
3697 }
3698 }
39236c6e 3699 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
1c79356b
A
3700 /*
3701 * Send trailers. Wimp out and use writev(2).
3702 */
2d21ac55
A
3703 if (uap->hdtr != USER_ADDR_NULL &&
3704 user_hdtr.trailers != USER_ADDR_NULL) {
3705 bzero(&nuap, sizeof (struct writev_args));
3706 nuap.fd = uap->s;
3707 nuap.iovp = user_hdtr.trailers;
3708 nuap.iovcnt = user_hdtr.trl_cnt;
3709 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3710 if (error) {
2d21ac55 3711 goto done2;
316670eb 3712 }
2d21ac55 3713 sbytes += writev_retval;
1c79356b 3714 }
91447636
A
3715done2:
3716 file_drop(uap->s);
3717done1:
3718 file_drop(uap->fd);
1c79356b 3719done:
2d21ac55 3720 if (uap->nbytes != USER_ADDR_NULL) {
91447636 3721 /* XXX this appears bogus for some early failure conditions */
2d21ac55 3722 copyout(&sbytes, uap->nbytes, sizeof (off_t));
1c79356b 3723 }
2d21ac55
A
3724 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3725 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3726 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
1c79356b 3727 return (error);
91447636 3728done3:
39236c6e 3729 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
91447636 3730 goto done2;
1c79356b
A
3731}
3732
2d21ac55
A
3733
3734#endif /* SENDFILE */