]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_syscalls.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
2d21ac55 33 * Copyright (c) 1998, David Greenman. All rights reserved.
1c79356b
A
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
2d21ac55
A
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
1c79356b
A
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
2d21ac55 77#include <sys/vnode_internal.h>
1c79356b 78#include <sys/malloc.h>
39236c6e 79#include <sys/mcache.h>
1c79356b 80#include <sys/mbuf.h>
fe8ab488 81#include <kern/locks.h>
91447636 82#include <sys/domain.h>
1c79356b 83#include <sys/protosw.h>
91447636 84#include <sys/signalvar.h>
1c79356b
A
85#include <sys/socket.h>
86#include <sys/socketvar.h>
1c79356b 87#include <sys/kernel.h>
91447636 88#include <sys/uio_internal.h>
2d21ac55 89#include <sys/kauth.h>
6d2010ae 90#include <kern/task.h>
39236c6e 91#include <sys/priv.h>
3e170ce0 92#include <sys/sysctl.h>
e5568f75 93
b0d623f7 94#include <security/audit/audit.h>
1c79356b
A
95
96#include <sys/kdebug.h>
91447636 97#include <sys/sysproto.h>
2d21ac55
A
98#include <netinet/in.h>
99#include <net/route.h>
100#include <netinet/in_pcb.h>
101
102#if CONFIG_MACF_SOCKET_SUBSET
103#include <security/mac_framework.h>
104#endif /* MAC_SOCKET_SUBSET */
105
106#define f_flag f_fglob->fg_flag
39236c6e 107#define f_type f_fglob->fg_ops->fo_type
2d21ac55
A
108#define f_msgcount f_fglob->fg_msgcount
109#define f_cred f_fglob->fg_cred
110#define f_ops f_fglob->fg_ops
111#define f_offset f_fglob->fg_offset
112#define f_data f_fglob->fg_data
113
2d21ac55
A
114#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
115#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
116#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
117#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
118#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
119#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
120#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
121#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
122#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
123#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
124#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
125#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
126#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
127#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
fe8ab488
A
128#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
129#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
2d21ac55 130
3e170ce0
A
131#if DEBUG || DEVELOPMENT
132#define DEBUG_KERNEL_ADDRPERM(_v) (_v)
133#define DBG_PRINTF(...) printf(__VA_ARGS__)
134#else
135#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
136#define DBG_PRINTF(...) do { } while (0)
137#endif
2d21ac55 138
2d21ac55
A
139/* TODO: should be in header file */
140int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
141
3e170ce0
A
142static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
143 int, int32_t *);
2d21ac55 144static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
b0d623f7 145 int32_t *);
39236c6e 146static int connectit(struct socket *, struct sockaddr *);
2d21ac55 147static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
4a3eedf9 148 size_t, boolean_t);
2d21ac55 149static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
4a3eedf9 150 user_addr_t, size_t, boolean_t);
1c79356b 151#if SENDFILE
2d21ac55
A
152static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
153 boolean_t);
154#endif /* SENDFILE */
39236c6e 155static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
813fb2f6
A
156static int connectitx(struct socket *, struct sockaddr *,
157 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
3e170ce0 158 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
39236c6e
A
159static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *);
160static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
161 int *);
162static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
1c79356b 163
fe8ab488 164static int internalize_user_msghdr_array(const void *, int, int, u_int,
3e170ce0 165 struct user_msghdr_x *, struct uio **);
fe8ab488 166static u_int externalize_user_msghdr_array(void *, int, int, u_int,
3e170ce0 167 const struct user_msghdr_x *, struct uio **);
fe8ab488
A
168
169static void free_uio_array(struct uio **, u_int);
170static int uio_array_is_valid(struct uio **, u_int);
3e170ce0
A
171static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
172static int internalize_recv_msghdr_array(const void *, int, int,
173 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
174static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
175 const struct user_msghdr_x *, struct recv_msg_elem *);
176static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
177static void free_recv_msg_array(struct recv_msg_elem *, u_int);
178
179SYSCTL_DECL(_kern_ipc);
180
181static u_int somaxsendmsgx = 100;
182SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
183 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
184static u_int somaxrecvmsgx = 100;
185SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
186 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
fe8ab488 187
1c79356b
A
188/*
189 * System call interface to the socket abstraction.
190 */
1c79356b 191
39236c6e 192extern const struct fileops socketops;
1c79356b 193
2d21ac55
A
194/*
195 * Returns: 0 Success
196 * EACCES Mandatory Access Control failure
197 * falloc:ENFILE
198 * falloc:EMFILE
199 * falloc:ENOMEM
200 * socreate:EAFNOSUPPORT
201 * socreate:EPROTOTYPE
202 * socreate:EPROTONOSUPPORT
203 * socreate:ENOBUFS
204 * socreate:ENOMEM
2d21ac55
A
205 * socreate:??? [other protocol families, IPSEC]
206 */
1c79356b 207int
39236c6e
A
208socket(struct proc *p,
209 struct socket_args *uap,
210 int32_t *retval)
211{
212 return (socket_common(p, uap->domain, uap->type, uap->protocol,
213 proc_selfpid(), retval, 0));
214}
215
216int
217socket_delegate(struct proc *p,
218 struct socket_delegate_args *uap,
219 int32_t *retval)
220{
221 return socket_common(p, uap->domain, uap->type, uap->protocol,
222 uap->epid, retval, 1);
223}
224
225static int
226socket_common(struct proc *p,
227 int domain,
228 int type,
229 int protocol,
230 pid_t epid,
231 int32_t *retval,
232 int delegate)
1c79356b 233{
1c79356b 234 struct socket *so;
91447636 235 struct fileproc *fp;
1c79356b
A
236 int fd, error;
237
39236c6e 238 AUDIT_ARG(socket, domain, type, protocol);
2d21ac55 239#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
240 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
241 type, protocol)) != 0)
2d21ac55
A
242 return (error);
243#endif /* MAC_SOCKET_SUBSET */
1c79356b 244
39236c6e
A
245 if (delegate) {
246 error = priv_check_cred(kauth_cred_get(),
247 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
248 if (error)
249 return (EACCES);
250 }
251
2d21ac55 252 error = falloc(p, &fp, &fd, vfs_context_current());
91447636 253 if (error) {
1c79356b 254 return (error);
91447636 255 }
1c79356b 256 fp->f_flag = FREAD|FWRITE;
1c79356b 257 fp->f_ops = &socketops;
91447636 258
39236c6e
A
259 if (delegate)
260 error = socreate_delegate(domain, &so, type, protocol, epid);
261 else
262 error = socreate(domain, &so, type, protocol);
263
91447636
A
264 if (error) {
265 fp_free(p, fd, fp);
1c79356b
A
266 } else {
267 fp->f_data = (caddr_t)so;
91447636
A
268
269 proc_fdlock(p);
6601e61a 270 procfdtbl_releasefd(p, fd, NULL);
2d21ac55 271
91447636
A
272 fp_drop(p, fd, fp, 1);
273 proc_fdunlock(p);
274
1c79356b 275 *retval = fd;
3e170ce0
A
276 if (ENTR_SHOULDTRACE) {
277 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
278 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
279 }
1c79356b
A
280 }
281 return (error);
282}
283
2d21ac55
A
284/*
285 * Returns: 0 Success
286 * EDESTADDRREQ Destination address required
287 * EBADF Bad file descriptor
288 * EACCES Mandatory Access Control failure
289 * file_socket:ENOTSOCK
290 * file_socket:EBADF
291 * getsockaddr:ENAMETOOLONG Filename too long
292 * getsockaddr:EINVAL Invalid argument
293 * getsockaddr:ENOMEM Not enough space
294 * getsockaddr:EFAULT Bad address
39236c6e 295 * sobindlock:???
2d21ac55 296 */
1c79356b
A
297/* ARGSUSED */
298int
b0d623f7 299bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
1c79356b 300{
2d21ac55
A
301 struct sockaddr_storage ss;
302 struct sockaddr *sa = NULL;
91447636 303 struct socket *so;
2d21ac55 304 boolean_t want_free = TRUE;
1c79356b
A
305 int error;
306
55e303ae 307 AUDIT_ARG(fd, uap->s);
91447636 308 error = file_socket(uap->s, &so);
2d21ac55 309 if (error != 0)
1c79356b 310 return (error);
2d21ac55
A
311 if (so == NULL) {
312 error = EBADF;
313 goto out;
314 }
315 if (uap->name == USER_ADDR_NULL) {
316 error = EDESTADDRREQ;
317 goto out;
318 }
319 if (uap->namelen > sizeof (ss)) {
4a3eedf9 320 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
2d21ac55 321 } else {
4a3eedf9 322 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
2d21ac55
A
323 if (error == 0) {
324 sa = (struct sockaddr *)&ss;
325 want_free = FALSE;
326 }
327 }
328 if (error != 0)
91447636 329 goto out;
2d21ac55
A
330 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
331#if CONFIG_MACF_SOCKET_SUBSET
332 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
39236c6e 333 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55 334#else
39236c6e 335 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55
A
336#endif /* MAC_SOCKET_SUBSET */
337 if (want_free)
338 FREE(sa, M_SONAME);
91447636
A
339out:
340 file_drop(uap->s);
1c79356b
A
341 return (error);
342}
343
2d21ac55
A
344/*
345 * Returns: 0 Success
346 * EBADF
347 * EACCES Mandatory Access Control failure
348 * file_socket:ENOTSOCK
349 * file_socket:EBADF
350 * solisten:EINVAL
351 * solisten:EOPNOTSUPP
352 * solisten:???
353 */
1c79356b 354int
2d21ac55 355listen(__unused struct proc *p, struct listen_args *uap,
b0d623f7 356 __unused int32_t *retval)
1c79356b 357{
1c79356b 358 int error;
2d21ac55 359 struct socket *so;
1c79356b 360
55e303ae 361 AUDIT_ARG(fd, uap->s);
91447636 362 error = file_socket(uap->s, &so);
1c79356b
A
363 if (error)
364 return (error);
91447636 365 if (so != NULL)
2d21ac55
A
366#if CONFIG_MACF_SOCKET_SUBSET
367 {
368 error = mac_socket_check_listen(kauth_cred_get(), so);
369 if (error == 0)
370 error = solisten(so, uap->backlog);
371 }
372#else
91447636 373 error = solisten(so, uap->backlog);
2d21ac55 374#endif /* MAC_SOCKET_SUBSET */
55e303ae 375 else
91447636 376 error = EBADF;
2d21ac55 377
91447636
A
378 file_drop(uap->s);
379 return (error);
1c79356b
A
380}
381
2d21ac55
A
382/*
383 * Returns: fp_getfsock:EBADF Bad file descriptor
384 * fp_getfsock:EOPNOTSUPP ...
385 * xlate => :ENOTSOCK Socket operation on non-socket
386 * :EFAULT Bad address on copyin/copyout
387 * :EBADF Bad file descriptor
388 * :EOPNOTSUPP Operation not supported on socket
389 * :EINVAL Invalid argument
390 * :EWOULDBLOCK Operation would block
391 * :ECONNABORTED Connection aborted
392 * :EINTR Interrupted function
393 * :EACCES Mandatory Access Control failure
394 * falloc_locked:ENFILE Too many files open in system
395 * falloc_locked::EMFILE Too many open files
396 * falloc_locked::ENOMEM Not enough space
397 * 0 Success
398 */
1c79356b 399int
2d21ac55 400accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
b0d623f7 401 int32_t *retval)
1c79356b 402{
91447636 403 struct fileproc *fp;
2d21ac55 404 struct sockaddr *sa = NULL;
91447636
A
405 socklen_t namelen;
406 int error;
407 struct socket *head, *so = NULL;
408 lck_mtx_t *mutex_held;
409 int fd = uap->s;
2d21ac55 410 int newfd;
1c79356b 411 short fflag; /* type must match fp->f_flag */
91447636 412 int dosocklock = 0;
1c79356b 413
2d21ac55
A
414 *retval = -1;
415
55e303ae 416 AUDIT_ARG(fd, uap->s);
2d21ac55 417
1c79356b 418 if (uap->name) {
91447636 419 error = copyin(uap->anamelen, (caddr_t)&namelen,
2d21ac55
A
420 sizeof (socklen_t));
421 if (error)
1c79356b
A
422 return (error);
423 }
91447636
A
424 error = fp_getfsock(p, fd, &fp, &head);
425 if (error) {
426 if (error == EOPNOTSUPP)
427 error = ENOTSOCK;
1c79356b 428 return (error);
91447636 429 }
55e303ae 430 if (head == NULL) {
91447636
A
431 error = EBADF;
432 goto out;
55e303ae 433 }
2d21ac55
A
434#if CONFIG_MACF_SOCKET_SUBSET
435 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
436 goto out;
437#endif /* MAC_SOCKET_SUBSET */
91447636
A
438
439 socket_lock(head, 1);
440
441 if (head->so_proto->pr_getlock != NULL) {
442 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
443 dosocklock = 1;
2d21ac55 444 } else {
91447636
A
445 mutex_held = head->so_proto->pr_domain->dom_mtx;
446 dosocklock = 0;
447 }
448
1c79356b 449 if ((head->so_options & SO_ACCEPTCONN) == 0) {
2d21ac55
A
450 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
451 error = EOPNOTSUPP;
452 } else {
453 /* POSIX: The socket is not accepting connections */
454 error = EINVAL;
455 }
91447636 456 socket_unlock(head, 1);
91447636 457 goto out;
1c79356b 458 }
813fb2f6 459check_again:
1c79356b 460 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
91447636
A
461 socket_unlock(head, 1);
462 error = EWOULDBLOCK;
463 goto out;
1c79356b 464 }
2d21ac55 465 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
1c79356b
A
466 if (head->so_state & SS_CANTRCVMORE) {
467 head->so_error = ECONNABORTED;
468 break;
469 }
91447636 470 if (head->so_usecount < 1)
2d21ac55
A
471 panic("accept: head=%p refcount=%d\n", head,
472 head->so_usecount);
473 error = msleep((caddr_t)&head->so_timeo, mutex_held,
474 PSOCK | PCATCH, "accept", 0);
91447636 475 if (head->so_usecount < 1)
2d21ac55
A
476 panic("accept: 2 head=%p refcount=%d\n", head,
477 head->so_usecount);
91447636
A
478 if ((head->so_state & SS_DRAINING)) {
479 error = ECONNABORTED;
480 }
1c79356b 481 if (error) {
91447636
A
482 socket_unlock(head, 1);
483 goto out;
1c79356b
A
484 }
485 }
486 if (head->so_error) {
487 error = head->so_error;
488 head->so_error = 0;
91447636
A
489 socket_unlock(head, 1);
490 goto out;
1c79356b
A
491 }
492
1c79356b
A
493 /*
494 * At this point we know that there is at least one connection
495 * ready to be accepted. Remove it from the queue prior to
496 * allocating the file descriptor for it since falloc() may
497 * block allowing another process to accept the connection
498 * instead.
499 */
91447636 500 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
813fb2f6
A
501
502 so_acquire_accept_list(head, NULL);
503 if (TAILQ_EMPTY(&head->so_comp)) {
504 so_release_accept_list(head);
505 goto check_again;
506 }
507
e3027f41 508 so = TAILQ_FIRST(&head->so_comp);
1c79356b 509 TAILQ_REMOVE(&head->so_comp, so, so_list);
d190cdc3
A
510 so->so_head = NULL;
511 so->so_state &= ~SS_COMP;
1c79356b 512 head->so_qlen--;
813fb2f6
A
513 so_release_accept_list(head);
514
2d21ac55
A
515 /* unlock head to avoid deadlock with select, keep a ref on head */
516 socket_unlock(head, 0);
517
518#if CONFIG_MACF_SOCKET_SUBSET
519 /*
520 * Pass the pre-accepted socket to the MAC framework. This is
521 * cheaper than allocating a file descriptor for the socket,
522 * calling the protocol accept callback, and possibly freeing
523 * the file descriptor should the MAC check fails.
524 */
525 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
39236c6e 526 socket_lock(so, 1);
d190cdc3 527 so->so_state &= ~SS_NOFDREF;
39236c6e 528 socket_unlock(so, 1);
2d21ac55
A
529 soclose(so);
530 /* Drop reference on listening socket */
531 sodereference(head);
532 goto out;
533 }
534#endif /* MAC_SOCKET_SUBSET */
535
536 /*
537 * Pass the pre-accepted socket to any interested socket filter(s).
538 * Upon failure, the socket would have been closed by the callee.
539 */
d190cdc3 540 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
2d21ac55
A
541 /* Drop reference on listening socket */
542 sodereference(head);
543 /* Propagate socket filter's error code to the caller */
544 goto out;
545 }
546
1c79356b 547 fflag = fp->f_flag;
2d21ac55 548 error = falloc(p, &fp, &newfd, vfs_context_current());
1c79356b 549 if (error) {
39236c6e 550 /*
316670eb
A
551 * Probably ran out of file descriptors.
552 *
553 * <rdar://problem/8554930>
554 * Don't put this back on the socket like we used to, that
555 * just causes the client to spin. Drop the socket.
1c79356b 556 */
39236c6e 557 socket_lock(so, 1);
d190cdc3 558 so->so_state &= ~SS_NOFDREF;
39236c6e 559 socket_unlock(so, 1);
316670eb
A
560 soclose(so);
561 sodereference(head);
91447636 562 goto out;
2d21ac55 563 }
91447636 564 *retval = newfd;
1c79356b
A
565 fp->f_flag = fflag;
566 fp->f_ops = &socketops;
567 fp->f_data = (caddr_t)so;
fe8ab488 568
91447636
A
569 socket_lock(head, 0);
570 if (dosocklock)
571 socket_lock(so, 1);
fe8ab488 572
fe8ab488
A
573 /* Sync socket non-blocking/async state with file flags */
574 if (fp->f_flag & FNONBLOCK) {
575 so->so_state |= SS_NBIO;
576 } else {
577 so->so_state &= ~SS_NBIO;
578 }
579
580 if (fp->f_flag & FASYNC) {
581 so->so_state |= SS_ASYNC;
582 so->so_rcv.sb_flags |= SB_ASYNC;
583 so->so_snd.sb_flags |= SB_ASYNC;
584 } else {
585 so->so_state &= ~SS_ASYNC;
586 so->so_rcv.sb_flags &= ~SB_ASYNC;
587 so->so_snd.sb_flags &= ~SB_ASYNC;
588 }
589
91447636
A
590 (void) soacceptlock(so, &sa, 0);
591 socket_unlock(head, 1);
2d21ac55 592 if (sa == NULL) {
1c79356b
A
593 namelen = 0;
594 if (uap->name)
595 goto gotnoname;
91447636 596 error = 0;
2d21ac55 597 goto releasefd;
1c79356b 598 }
2d21ac55
A
599 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
600
1c79356b 601 if (uap->name) {
2d21ac55
A
602 socklen_t sa_len;
603
604 /* save sa_len before it is destroyed */
605 sa_len = sa->sa_len;
606 namelen = MIN(namelen, sa_len);
91447636 607 error = copyout(sa, uap->name, namelen);
1c79356b 608 if (!error)
2d21ac55
A
609 /* return the actual, untruncated address length */
610 namelen = sa_len;
1c79356b 611gotnoname:
2d21ac55
A
612 error = copyout((caddr_t)&namelen, uap->anamelen,
613 sizeof (socklen_t));
1c79356b
A
614 }
615 FREE(sa, M_SONAME);
2d21ac55 616
b0d623f7 617releasefd:
2d21ac55 618 /*
6d2010ae
A
619 * If the socket has been marked as inactive by sosetdefunct(),
620 * disallow further operations on it.
2d21ac55
A
621 */
622 if (so->so_flags & SOF_DEFUNCT) {
6d2010ae
A
623 sodefunct(current_proc(), so,
624 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
2d21ac55
A
625 }
626
91447636
A
627 if (dosocklock)
628 socket_unlock(so, 1);
2d21ac55 629
2d21ac55
A
630 proc_fdlock(p);
631 procfdtbl_releasefd(p, newfd, NULL);
632 fp_drop(p, newfd, fp, 1);
633 proc_fdunlock(p);
634
91447636
A
635out:
636 file_drop(fd);
3e170ce0
A
637
638 if (error == 0 && ENTR_SHOULDTRACE) {
639 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
640 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
641 }
1c79356b
A
642 return (error);
643}
644
645int
b0d623f7 646accept(struct proc *p, struct accept_args *uap, int32_t *retval)
1c79356b 647{
2d21ac55 648 __pthread_testcancel(1);
3e170ce0
A
649 return (accept_nocancel(p, (struct accept_nocancel_args *)uap,
650 retval));
1c79356b
A
651}
652
2d21ac55
A
653/*
654 * Returns: 0 Success
655 * EBADF Bad file descriptor
656 * EALREADY Connection already in progress
657 * EINPROGRESS Operation in progress
658 * ECONNABORTED Connection aborted
659 * EINTR Interrupted function
660 * EACCES Mandatory Access Control failure
661 * file_socket:ENOTSOCK
662 * file_socket:EBADF
663 * getsockaddr:ENAMETOOLONG Filename too long
664 * getsockaddr:EINVAL Invalid argument
665 * getsockaddr:ENOMEM Not enough space
666 * getsockaddr:EFAULT Bad address
667 * soconnectlock:EOPNOTSUPP
668 * soconnectlock:EISCONN
669 * soconnectlock:??? [depends on protocol, filters]
670 * msleep:EINTR
671 *
672 * Imputed: so_error error may be set from so_error, which
673 * may have been set by soconnectlock.
674 */
675/* ARGSUSED */
1c79356b 676int
b0d623f7 677connect(struct proc *p, struct connect_args *uap, int32_t *retval)
1c79356b 678{
2d21ac55 679 __pthread_testcancel(1);
3e170ce0
A
680 return (connect_nocancel(p, (struct connect_nocancel_args *)uap,
681 retval));
1c79356b 682}
1c79356b 683
1c79356b 684int
39236c6e 685connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
1c79356b 686{
39236c6e 687#pragma unused(p, retval)
91447636 688 struct socket *so;
2d21ac55
A
689 struct sockaddr_storage ss;
690 struct sockaddr *sa = NULL;
91447636
A
691 int error;
692 int fd = uap->s;
4a3eedf9 693 boolean_t dgram;
1c79356b 694
55e303ae 695 AUDIT_ARG(fd, uap->s);
2d21ac55
A
696 error = file_socket(fd, &so);
697 if (error != 0)
1c79356b 698 return (error);
91447636
A
699 if (so == NULL) {
700 error = EBADF;
701 goto out;
702 }
703
4a3eedf9
A
704 /*
705 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
706 * if this is a datagram socket; translate for other types.
707 */
708 dgram = (so->so_type == SOCK_DGRAM);
709
2d21ac55
A
710 /* Get socket address now before we obtain socket lock */
711 if (uap->namelen > sizeof (ss)) {
4a3eedf9 712 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
2d21ac55 713 } else {
4a3eedf9 714 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
39236c6e 715 if (error == 0)
2d21ac55 716 sa = (struct sockaddr *)&ss;
2d21ac55
A
717 }
718 if (error != 0)
719 goto out;
720
39236c6e
A
721 error = connectit(so, sa);
722
723 if (sa != NULL && sa != SA(&ss))
724 FREE(sa, M_SONAME);
725 if (error == ERESTART)
726 error = EINTR;
727out:
728 file_drop(fd);
729 return (error);
730}
731
732static int
733connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
734{
735#pragma unused(p, retval)
813fb2f6
A
736 struct sockaddr_storage ss, sd;
737 struct sockaddr *src = NULL, *dst = NULL;
39236c6e 738 struct socket *so;
3e170ce0 739 int error, error1, fd = uap->socket;
39236c6e 740 boolean_t dgram;
3e170ce0
A
741 sae_connid_t cid = SAE_CONNID_ANY;
742 struct user32_sa_endpoints ep32;
743 struct user64_sa_endpoints ep64;
744 struct user_sa_endpoints ep;
745 user_ssize_t bytes_written = 0;
746 struct user_iovec *iovp;
747 uio_t auio = NULL;
39236c6e 748
3e170ce0 749 AUDIT_ARG(fd, uap->socket);
39236c6e
A
750 error = file_socket(fd, &so);
751 if (error != 0)
752 return (error);
753 if (so == NULL) {
754 error = EBADF;
755 goto out;
756 }
757
3e170ce0
A
758 if (uap->endpoints == USER_ADDR_NULL) {
759 error = EINVAL;
760 goto out;
761 }
762
763 if (IS_64BIT_PROCESS(p)) {
764 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
765 if (error != 0)
766 goto out;
767
768 ep.sae_srcif = ep64.sae_srcif;
769 ep.sae_srcaddr = ep64.sae_srcaddr;
770 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
771 ep.sae_dstaddr = ep64.sae_dstaddr;
772 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
773 } else {
774 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
775 if (error != 0)
776 goto out;
777
778 ep.sae_srcif = ep32.sae_srcif;
779 ep.sae_srcaddr = ep32.sae_srcaddr;
780 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
781 ep.sae_dstaddr = ep32.sae_dstaddr;
782 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
783 }
fe8ab488 784
39236c6e
A
785 /*
786 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
787 * if this is a datagram socket; translate for other types.
788 */
789 dgram = (so->so_type == SOCK_DGRAM);
790
813fb2f6
A
791 /* Get socket address now before we obtain socket lock */
792 if (ep.sae_srcaddr != USER_ADDR_NULL) {
793 if (ep.sae_srcaddrlen > sizeof (ss)) {
794 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
795 } else {
796 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
797 if (error == 0)
798 src = (struct sockaddr *)&ss;
799 }
800
801 if (error)
802 goto out;
803 }
39236c6e 804
3e170ce0
A
805 if (ep.sae_dstaddr == USER_ADDR_NULL) {
806 error = EINVAL;
807 goto out;
808 }
809
813fb2f6
A
810 /* Get socket address now before we obtain socket lock */
811 if (ep.sae_dstaddrlen > sizeof (sd)) {
812 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
813 } else {
814 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
815 if (error == 0)
816 dst = (struct sockaddr *)&sd;
817 }
818
819 if (error)
39236c6e
A
820 goto out;
821
813fb2f6 822 VERIFY(dst != NULL);
39236c6e 823
3e170ce0
A
824 if (uap->iov != USER_ADDR_NULL) {
825 /* Verify range before calling uio_create() */
826 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
827 return (EINVAL);
828
829 if (uap->len == USER_ADDR_NULL)
830 return (EINVAL);
831
832 /* allocate a uio to hold the number of iovecs passed */
833 auio = uio_create(uap->iovcnt, 0,
834 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
835 UIO_WRITE);
836
837 if (auio == NULL) {
838 error = ENOMEM;
839 goto out;
840 }
841
842 /*
843 * get location of iovecs within the uio.
844 * then copyin the iovecs from user space.
845 */
846 iovp = uio_iovsaddr(auio);
847 if (iovp == NULL) {
848 error = ENOMEM;
849 goto out;
850 }
851 error = copyin_user_iovec_array(uap->iov,
852 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
853 uap->iovcnt, iovp);
854 if (error != 0)
855 goto out;
856
857 /* finish setup of uio_t */
858 error = uio_calculateresid(auio);
859 if (error != 0) {
860 goto out;
861 }
862 }
863
813fb2f6 864 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
3e170ce0 865 &cid, auio, uap->flags, &bytes_written);
39236c6e
A
866 if (error == ERESTART)
867 error = EINTR;
868
3e170ce0
A
869 if (uap->len != USER_ADDR_NULL) {
870 error1 = copyout(&bytes_written, uap->len, sizeof (uap->len));
871 /* give precedence to connectitx errors */
872 if ((error1 != 0) && (error == 0))
873 error = error1;
874 }
39236c6e 875
3e170ce0
A
876 if (uap->connid != USER_ADDR_NULL) {
877 error1 = copyout(&cid, uap->connid, sizeof (cid));
878 /* give precedence to connectitx errors */
879 if ((error1 != 0) && (error == 0))
880 error = error1;
881 }
39236c6e
A
882out:
883 file_drop(fd);
3e170ce0
A
884 if (auio != NULL) {
885 uio_free(auio);
886 }
813fb2f6
A
887 if (src != NULL && src != SA(&ss))
888 FREE(src, M_SONAME);
889 if (dst != NULL && dst != SA(&sd))
890 FREE(dst, M_SONAME);
39236c6e
A
891 return (error);
892}
893
894int
895connectx(struct proc *p, struct connectx_args *uap, int *retval)
896{
897 /*
898 * Due to similiarity with a POSIX interface, define as
899 * an unofficial cancellation point.
900 */
901 __pthread_testcancel(1);
902 return (connectx_nocancel(p, uap, retval));
903}
904
905static int
906connectit(struct socket *so, struct sockaddr *sa)
907{
908 int error;
909
2d21ac55
A
910 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
911#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
912 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0)
913 return (error);
914#endif /* MAC_SOCKET_SUBSET */
915
916 socket_lock(so, 1);
917 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
918 error = EALREADY;
919 goto out;
920 }
921 error = soconnectlock(so, sa, 0);
922 if (error != 0) {
923 so->so_state &= ~SS_ISCONNECTING;
2d21ac55
A
924 goto out;
925 }
39236c6e
A
926 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
927 error = EINPROGRESS;
928 goto out;
929 }
930 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
931 lck_mtx_t *mutex_held;
932
933 if (so->so_proto->pr_getlock != NULL)
934 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
935 else
936 mutex_held = so->so_proto->pr_domain->dom_mtx;
937 error = msleep((caddr_t)&so->so_timeo, mutex_held,
938 PSOCK | PCATCH, __func__, 0);
939 if (so->so_state & SS_DRAINING) {
940 error = ECONNABORTED;
941 }
942 if (error != 0)
943 break;
944 }
945 if (error == 0) {
946 error = so->so_error;
947 so->so_error = 0;
948 }
949out:
950 socket_unlock(so, 1);
951 return (error);
952}
953
954static int
813fb2f6
A
955connectitx(struct socket *so, struct sockaddr *src,
956 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3e170ce0
A
957 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
958 user_ssize_t *bytes_written)
39236c6e 959{
39236c6e 960 int error;
3e170ce0 961#pragma unused (flags)
39236c6e 962
813fb2f6 963 VERIFY(dst != NULL);
39236c6e 964
813fb2f6 965 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
39236c6e 966#if CONFIG_MACF_SOCKET_SUBSET
813fb2f6
A
967 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0)
968 return (error);
2d21ac55 969#endif /* MAC_SOCKET_SUBSET */
91447636 970
39236c6e 971 socket_lock(so, 1);
91447636 972 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
973 error = EALREADY;
974 goto out;
975 }
3e170ce0
A
976
977 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
813fb2f6 978 (flags & CONNECT_DATA_IDEMPOTENT)) {
3e170ce0
A
979 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
980
813fb2f6
A
981 if (flags & CONNECT_DATA_AUTHENTICATED)
982 so->so_flags |= SOF1_DATA_AUTHENTICATED;
983 }
984
3e170ce0
A
985 /*
986 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
987 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
988 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
989 * Case 3 allows user to combine write with connect even if they have
990 * no use for TFO (such as regular TCP, and UDP).
991 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
992 */
993 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
994 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio))
995 so->so_flags1 |= SOF1_PRECONNECT_DATA;
996
997 /*
998 * If a user sets data idempotent and does not pass an uio, or
999 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1000 * SOF1_DATA_IDEMPOTENT.
1001 */
1002 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1003 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1004 /* We should return EINVAL instead perhaps. */
1005 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1006 }
1007
813fb2f6 1008 error = soconnectxlocked(so, src, dst, p, ifscope,
3e170ce0 1009 aid, pcid, 0, NULL, 0, auio, bytes_written);
39236c6e
A
1010 if (error != 0) {
1011 so->so_state &= ~SS_ISCONNECTING;
1012 goto out;
1013 }
3e170ce0
A
1014 /*
1015 * If, after the call to soconnectxlocked the flag is still set (in case
1016 * data has been queued and the connect() has actually been triggered,
1017 * it will have been unset by the transport), we exit immediately. There
1018 * is no reason to wait on any event.
1019 */
1020 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1021 error = 0;
1022 goto out;
1023 }
1c79356b 1024 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
1025 error = EINPROGRESS;
1026 goto out;
1c79356b 1027 }
1c79356b 1028 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
39236c6e
A
1029 lck_mtx_t *mutex_held;
1030
2d21ac55 1031 if (so->so_proto->pr_getlock != NULL)
91447636 1032 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2d21ac55 1033 else
91447636 1034 mutex_held = so->so_proto->pr_domain->dom_mtx;
2d21ac55 1035 error = msleep((caddr_t)&so->so_timeo, mutex_held,
39236c6e
A
1036 PSOCK | PCATCH, __func__, 0);
1037 if (so->so_state & SS_DRAINING) {
91447636
A
1038 error = ECONNABORTED;
1039 }
39236c6e 1040 if (error != 0)
1c79356b
A
1041 break;
1042 }
1043 if (error == 0) {
1044 error = so->so_error;
1045 so->so_error = 0;
1046 }
39236c6e 1047out:
91447636 1048 socket_unlock(so, 1);
39236c6e
A
1049 return (error);
1050}
1051
1052int
1053peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1054{
1055 /*
1056 * Due to similiarity with a POSIX interface, define as
1057 * an unofficial cancellation point.
1058 */
1059 __pthread_testcancel(1);
1060 return (peeloff_nocancel(p, uap, retval));
1061}
1062
1063static int
1064peeloff_nocancel(struct proc *p, struct peeloff_args *uap, int *retval)
1065{
1066 struct fileproc *fp;
1067 struct socket *mp_so, *so = NULL;
1068 int newfd, fd = uap->s;
1069 short fflag; /* type must match fp->f_flag */
1070 int error;
1071
1072 *retval = -1;
1073
1074 error = fp_getfsock(p, fd, &fp, &mp_so);
1075 if (error != 0) {
1076 if (error == EOPNOTSUPP)
1077 error = ENOTSOCK;
1078 goto out_nofile;
1079 }
1080 if (mp_so == NULL) {
1081 error = EBADF;
1082 goto out;
1083 }
1084
1085 socket_lock(mp_so, 1);
1086 error = sopeelofflocked(mp_so, uap->aid, &so);
1087 if (error != 0) {
1088 socket_unlock(mp_so, 1);
1089 goto out;
1090 }
1091 VERIFY(so != NULL);
1092 socket_unlock(mp_so, 0); /* keep ref on mp_so for us */
1093
1094 fflag = fp->f_flag;
1095 error = falloc(p, &fp, &newfd, vfs_context_current());
1096 if (error != 0) {
1097 /* drop this socket (probably ran out of file descriptors) */
1098 soclose(so);
1099 sodereference(mp_so); /* our mp_so ref */
1100 goto out;
1101 }
1102
1103 fp->f_flag = fflag;
1104 fp->f_ops = &socketops;
1105 fp->f_data = (caddr_t)so;
1106
1107 /*
1108 * If the socket has been marked as inactive by sosetdefunct(),
1109 * disallow further operations on it.
1110 */
1111 if (so->so_flags & SOF_DEFUNCT) {
1112 sodefunct(current_proc(), so,
1113 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
1114 }
1115
1116 proc_fdlock(p);
1117 procfdtbl_releasefd(p, newfd, NULL);
1118 fp_drop(p, newfd, fp, 1);
1119 proc_fdunlock(p);
1120
1121 sodereference(mp_so); /* our mp_so ref */
1122 *retval = newfd;
1123
1124out:
1125 file_drop(fd);
1126
1127out_nofile:
1128 return (error);
1129}
1130
1131int
1132disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1133{
1134 /*
1135 * Due to similiarity with a POSIX interface, define as
1136 * an unofficial cancellation point.
1137 */
1138 __pthread_testcancel(1);
1139 return (disconnectx_nocancel(p, uap, retval));
1140}
1141
1142static int
1143disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1144{
1145#pragma unused(p, retval)
1146 struct socket *so;
1147 int fd = uap->s;
1148 int error;
1149
1150 error = file_socket(fd, &so);
1151 if (error != 0)
1152 return (error);
1153 if (so == NULL) {
1154 error = EBADF;
1155 goto out;
1156 }
1157
1158 error = sodisconnectx(so, uap->aid, uap->cid);
91447636
A
1159out:
1160 file_drop(fd);
1c79356b
A
1161 return (error);
1162}
1163
2d21ac55
A
1164/*
1165 * Returns: 0 Success
1166 * socreate:EAFNOSUPPORT
1167 * socreate:EPROTOTYPE
1168 * socreate:EPROTONOSUPPORT
1169 * socreate:ENOBUFS
1170 * socreate:ENOMEM
1171 * socreate:EISCONN
1172 * socreate:??? [other protocol families, IPSEC]
1173 * falloc:ENFILE
1174 * falloc:EMFILE
1175 * falloc:ENOMEM
1176 * copyout:EFAULT
1177 * soconnect2:EINVAL
1178 * soconnect2:EPROTOTYPE
1179 * soconnect2:??? [other protocol families[
1180 */
1c79356b 1181int
2d21ac55 1182socketpair(struct proc *p, struct socketpair_args *uap,
b0d623f7 1183 __unused int32_t *retval)
1c79356b 1184{
91447636 1185 struct fileproc *fp1, *fp2;
1c79356b
A
1186 struct socket *so1, *so2;
1187 int fd, error, sv[2];
1188
55e303ae 1189 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1c79356b
A
1190 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1191 if (error)
1192 return (error);
1193 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1194 if (error)
1195 goto free1;
91447636 1196
2d21ac55 1197 error = falloc(p, &fp1, &fd, vfs_context_current());
91447636 1198 if (error) {
1c79356b 1199 goto free2;
91447636 1200 }
1c79356b 1201 fp1->f_flag = FREAD|FWRITE;
1c79356b
A
1202 fp1->f_ops = &socketops;
1203 fp1->f_data = (caddr_t)so1;
91447636
A
1204 sv[0] = fd;
1205
2d21ac55 1206 error = falloc(p, &fp2, &fd, vfs_context_current());
91447636 1207 if (error) {
1c79356b 1208 goto free3;
91447636 1209 }
1c79356b 1210 fp2->f_flag = FREAD|FWRITE;
1c79356b
A
1211 fp2->f_ops = &socketops;
1212 fp2->f_data = (caddr_t)so2;
1213 sv[1] = fd;
91447636 1214
1c79356b
A
1215 error = soconnect2(so1, so2);
1216 if (error) {
1c79356b
A
1217 goto free4;
1218 }
1c79356b
A
1219 if (uap->type == SOCK_DGRAM) {
1220 /*
1221 * Datagram socket connection is asymmetric.
1222 */
2d21ac55
A
1223 error = soconnect2(so2, so1);
1224 if (error) {
1225 goto free4;
1226 }
1c79356b 1227 }
91447636 1228
6d2010ae
A
1229 if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
1230 goto free4;
1231
91447636 1232 proc_fdlock(p);
6601e61a
A
1233 procfdtbl_releasefd(p, sv[0], NULL);
1234 procfdtbl_releasefd(p, sv[1], NULL);
91447636
A
1235 fp_drop(p, sv[0], fp1, 1);
1236 fp_drop(p, sv[1], fp2, 1);
1237 proc_fdunlock(p);
1238
6d2010ae 1239 return (0);
1c79356b 1240free4:
91447636 1241 fp_free(p, sv[1], fp2);
1c79356b 1242free3:
91447636 1243 fp_free(p, sv[0], fp1);
1c79356b 1244free2:
2d21ac55 1245 (void) soclose(so2);
1c79356b 1246free1:
2d21ac55 1247 (void) soclose(so1);
1c79356b
A
1248 return (error);
1249}
1250
2d21ac55
A
1251/*
1252 * Returns: 0 Success
1253 * EINVAL
1254 * ENOBUFS
1255 * EBADF
1256 * EPIPE
1257 * EACCES Mandatory Access Control failure
1258 * file_socket:ENOTSOCK
1259 * file_socket:EBADF
1260 * getsockaddr:ENAMETOOLONG Filename too long
1261 * getsockaddr:EINVAL Invalid argument
1262 * getsockaddr:ENOMEM Not enough space
1263 * getsockaddr:EFAULT Bad address
1264 * <pru_sosend>:EACCES[TCP]
1265 * <pru_sosend>:EADDRINUSE[TCP]
1266 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1267 * <pru_sosend>:EAFNOSUPPORT[TCP]
1268 * <pru_sosend>:EAGAIN[TCP]
1269 * <pru_sosend>:EBADF
1270 * <pru_sosend>:ECONNRESET[TCP]
1271 * <pru_sosend>:EFAULT
1272 * <pru_sosend>:EHOSTUNREACH[TCP]
1273 * <pru_sosend>:EINTR
1274 * <pru_sosend>:EINVAL
1275 * <pru_sosend>:EISCONN[AF_INET]
1276 * <pru_sosend>:EMSGSIZE[TCP]
1277 * <pru_sosend>:ENETDOWN[TCP]
1278 * <pru_sosend>:ENETUNREACH[TCP]
1279 * <pru_sosend>:ENOBUFS
1280 * <pru_sosend>:ENOMEM[TCP]
1281 * <pru_sosend>:ENOTCONN[AF_INET]
1282 * <pru_sosend>:EOPNOTSUPP
1283 * <pru_sosend>:EPERM[TCP]
1284 * <pru_sosend>:EPIPE
1285 * <pru_sosend>:EWOULDBLOCK
1286 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1287 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1288 * <pru_sosend>:??? [value from so_error]
1289 * sockargs:???
1290 */
1c79356b 1291static int
3e170ce0 1292sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1293 int flags, int32_t *retval)
1c79356b 1294{
2d21ac55
A
1295 struct mbuf *control = NULL;
1296 struct sockaddr_storage ss;
1297 struct sockaddr *to = NULL;
1298 boolean_t want_free = TRUE;
91447636 1299 int error;
91447636 1300 user_ssize_t len;
2d21ac55
A
1301
1302 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b 1303
2d21ac55
A
1304 if (mp->msg_name != USER_ADDR_NULL) {
1305 if (mp->msg_namelen > sizeof (ss)) {
1306 error = getsockaddr(so, &to, mp->msg_name,
4a3eedf9 1307 mp->msg_namelen, TRUE);
2d21ac55
A
1308 } else {
1309 error = getsockaddr_s(so, &ss, mp->msg_name,
4a3eedf9 1310 mp->msg_namelen, TRUE);
2d21ac55
A
1311 if (error == 0) {
1312 to = (struct sockaddr *)&ss;
1313 want_free = FALSE;
1314 }
1c79356b 1315 }
2d21ac55
A
1316 if (error != 0)
1317 goto out;
1318 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
91447636 1319 }
2d21ac55
A
1320 if (mp->msg_control != USER_ADDR_NULL) {
1321 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
1c79356b
A
1322 error = EINVAL;
1323 goto bad;
1324 }
1325 error = sockargs(&control, mp->msg_control,
1326 mp->msg_controllen, MT_CONTROL);
2d21ac55 1327 if (error != 0)
1c79356b 1328 goto bad;
91447636 1329 }
1c79356b 1330
2d21ac55
A
1331#if CONFIG_MACF_SOCKET_SUBSET
1332 /*
1333 * We check the state without holding the socket lock;
1334 * if a race condition occurs, it would simply result
3e170ce0 1335 * in an extra call to the MAC check function.
2d21ac55 1336 */
3e170ce0 1337 if (to != NULL &&
316670eb 1338 !(so->so_state & SS_DEFUNCT) &&
2d21ac55
A
1339 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
1340 goto bad;
1341#endif /* MAC_SOCKET_SUBSET */
91447636
A
1342
1343 len = uio_resid(uiop);
39236c6e
A
1344 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1345 control, flags);
2d21ac55 1346 if (error != 0) {
91447636 1347 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1348 error == EINTR || error == EWOULDBLOCK))
1349 error = 0;
2d21ac55 1350 /* Generation of SIGPIPE can be controlled per socket */
9bccf70c 1351 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1c79356b
A
1352 psignal(p, SIGPIPE);
1353 }
1354 if (error == 0)
91447636
A
1355 *retval = (int)(len - uio_resid(uiop));
1356bad:
2d21ac55 1357 if (to != NULL && want_free)
1c79356b 1358 FREE(to, M_SONAME);
91447636 1359out:
2d21ac55 1360 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 1361
1c79356b
A
1362 return (error);
1363}
1364
2d21ac55
A
1365/*
1366 * Returns: 0 Success
1367 * ENOMEM
1368 * sendit:??? [see sendit definition in this file]
1369 * write:??? [4056224: applicable for pipes]
1370 */
1c79356b 1371int
b0d623f7 1372sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
2d21ac55
A
1373{
1374 __pthread_testcancel(1);
39236c6e 1375 return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
2d21ac55
A
1376}
1377
1378int
39236c6e
A
1379sendto_nocancel(struct proc *p,
1380 struct sendto_nocancel_args *uap,
1381 int32_t *retval)
1c79356b 1382{
91447636
A
1383 struct user_msghdr msg;
1384 int error;
1385 uio_t auio = NULL;
3e170ce0 1386 struct socket *so;
1c79356b 1387
2d21ac55 1388 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1389 AUDIT_ARG(fd, uap->s);
1c79356b 1390
91447636 1391 auio = uio_create(1, 0,
2d21ac55
A
1392 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1393 UIO_WRITE);
91447636 1394 if (auio == NULL) {
3e170ce0
A
1395 error = ENOMEM;
1396 goto done;
91447636
A
1397 }
1398 uio_addiov(auio, uap->buf, uap->len);
1399
1c79356b
A
1400 msg.msg_name = uap->to;
1401 msg.msg_namelen = uap->tolen;
91447636
A
1402 /* no need to set up msg_iov. sendit uses uio_t we send it */
1403 msg.msg_iov = 0;
1404 msg.msg_iovlen = 0;
1c79356b 1405 msg.msg_control = 0;
1c79356b 1406 msg.msg_flags = 0;
1c79356b 1407
3e170ce0
A
1408 error = file_socket(uap->s, &so);
1409 if (error)
1410 goto done;
2d21ac55 1411
3e170ce0
A
1412 if (so == NULL) {
1413 error = EBADF;
1414 } else {
1415 error = sendit(p, so, &msg, auio, uap->flags, retval);
91447636 1416 }
2d21ac55 1417
3e170ce0
A
1418 file_drop(uap->s);
1419done:
1420 if (auio != NULL)
1421 uio_free(auio);
1422
2d21ac55 1423 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1c79356b 1424
2d21ac55 1425 return (error);
1c79356b 1426}
1c79356b 1427
2d21ac55
A
1428/*
1429 * Returns: 0 Success
1430 * ENOBUFS
1431 * copyin:EFAULT
1432 * sendit:??? [see sendit definition in this file]
1433 */
1c79356b 1434int
b0d623f7 1435sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1c79356b 1436{
2d21ac55 1437 __pthread_testcancel(1);
3e170ce0
A
1438 return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1439 retval));
1c79356b 1440}
1c79356b
A
1441
1442int
3e170ce0
A
1443sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1444 int32_t *retval)
1c79356b 1445{
b0d623f7
A
1446 struct user32_msghdr msg32;
1447 struct user64_msghdr msg64;
91447636
A
1448 struct user_msghdr user_msg;
1449 caddr_t msghdrp;
1450 int size_of_msghdr;
1c79356b 1451 int error;
91447636
A
1452 uio_t auio = NULL;
1453 struct user_iovec *iovp;
3e170ce0 1454 struct socket *so;
1c79356b 1455
2d21ac55 1456 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1457 AUDIT_ARG(fd, uap->s);
91447636 1458 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
1459 msghdrp = (caddr_t)&msg64;
1460 size_of_msghdr = sizeof (msg64);
2d21ac55 1461 } else {
b0d623f7
A
1462 msghdrp = (caddr_t)&msg32;
1463 size_of_msghdr = sizeof (msg32);
91447636
A
1464 }
1465 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1466 if (error) {
1467 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1468 return (error);
1c79356b 1469 }
91447636 1470
b0d623f7
A
1471 if (IS_64BIT_PROCESS(p)) {
1472 user_msg.msg_flags = msg64.msg_flags;
1473 user_msg.msg_controllen = msg64.msg_controllen;
1474 user_msg.msg_control = msg64.msg_control;
1475 user_msg.msg_iovlen = msg64.msg_iovlen;
1476 user_msg.msg_iov = msg64.msg_iov;
1477 user_msg.msg_namelen = msg64.msg_namelen;
1478 user_msg.msg_name = msg64.msg_name;
1479 } else {
1480 user_msg.msg_flags = msg32.msg_flags;
1481 user_msg.msg_controllen = msg32.msg_controllen;
1482 user_msg.msg_control = msg32.msg_control;
1483 user_msg.msg_iovlen = msg32.msg_iovlen;
1484 user_msg.msg_iov = msg32.msg_iov;
1485 user_msg.msg_namelen = msg32.msg_namelen;
1486 user_msg.msg_name = msg32.msg_name;
91447636
A
1487 }
1488
1489 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1490 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1491 0, 0, 0, 0);
91447636
A
1492 return (EMSGSIZE);
1493 }
1494
1495 /* allocate a uio large enough to hold the number of iovecs passed */
1496 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1497 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1498 UIO_WRITE);
91447636
A
1499 if (auio == NULL) {
1500 error = ENOBUFS;
1501 goto done;
1502 }
2d21ac55 1503
91447636 1504 if (user_msg.msg_iovlen) {
2d21ac55
A
1505 /*
1506 * get location of iovecs within the uio.
1507 * then copyin the iovecs from user space.
91447636
A
1508 */
1509 iovp = uio_iovsaddr(auio);
1510 if (iovp == NULL) {
1511 error = ENOBUFS;
1512 goto done;
1513 }
b0d623f7
A
1514 error = copyin_user_iovec_array(user_msg.msg_iov,
1515 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1516 user_msg.msg_iovlen, iovp);
91447636
A
1517 if (error)
1518 goto done;
1519 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2d21ac55
A
1520
1521 /* finish setup of uio_t */
39236c6e
A
1522 error = uio_calculateresid(auio);
1523 if (error) {
1524 goto done;
1525 }
2d21ac55 1526 } else {
91447636
A
1527 user_msg.msg_iov = 0;
1528 }
2d21ac55
A
1529
1530 /* msg_flags is ignored for send */
91447636 1531 user_msg.msg_flags = 0;
2d21ac55 1532
3e170ce0
A
1533 error = file_socket(uap->s, &so);
1534 if (error) {
1535 goto done;
1536 }
1537 if (so == NULL) {
1538 error = EBADF;
1539 } else {
1540 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1541 }
1542 file_drop(uap->s);
1c79356b 1543done:
91447636
A
1544 if (auio != NULL) {
1545 uio_free(auio);
1546 }
2d21ac55 1547 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1548
1c79356b
A
1549 return (error);
1550}
1551
fe8ab488
A
1552int
1553sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1554{
1555 int error = 0;
3e170ce0 1556 struct user_msghdr_x *user_msg_x = NULL;
fe8ab488
A
1557 struct uio **uiop = NULL;
1558 struct socket *so;
1559 u_int i;
1560 struct sockaddr *to = NULL;
fe8ab488
A
1561 user_ssize_t len_before = 0, len_after;
1562 int need_drop = 0;
1563 size_t size_of_msghdr;
1564 void *umsgp = NULL;
1565 u_int uiocnt;
3e170ce0 1566 int has_addr_or_ctl = 0;
fe8ab488
A
1567
1568 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1569
1570 error = file_socket(uap->s, &so);
1571 if (error) {
1572 goto out;
1573 }
1574 need_drop = 1;
1575 if (so == NULL) {
1576 error = EBADF;
1577 goto out;
1578 }
fe8ab488
A
1579
1580 /*
1581 * Input parameter range check
1582 */
1583 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1584 error = EINVAL;
1585 goto out;
1586 }
3e170ce0
A
1587 /*
1588 * Clip to max currently allowed
1589 */
1590 if (uap->cnt > somaxsendmsgx)
1591 uap->cnt = somaxsendmsgx;
1592
1593 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
fe8ab488 1594 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
1595 if (user_msg_x == NULL) {
1596 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1597 error = ENOMEM;
1598 goto out;
1599 }
1600 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1601 M_TEMP, M_WAITOK | M_ZERO);
1602 if (uiop == NULL) {
3e170ce0 1603 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
fe8ab488
A
1604 error = ENOMEM;
1605 goto out;
1606 }
1607
1608 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1609 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1610
3e170ce0 1611 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
fe8ab488
A
1612 M_TEMP, M_WAITOK | M_ZERO);
1613 if (umsgp == NULL) {
3e170ce0 1614 printf("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1615 error = ENOMEM;
1616 goto out;
1617 }
1618 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1619 if (error) {
3e170ce0 1620 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
1621 goto out;
1622 }
1623 error = internalize_user_msghdr_array(umsgp,
1624 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1625 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488 1626 if (error) {
3e170ce0 1627 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
1628 goto out;
1629 }
1630 /*
1631 * Make sure the size of each message iovec and
1632 * the aggregate size of all the iovec is valid
1633 */
1634 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1635 error = EINVAL;
1636 goto out;
1637 }
1638
1639 /*
1640 * Sanity check on passed arguments
1641 */
1642 for (i = 0; i < uap->cnt; i++) {
3e170ce0 1643 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
1644
1645 /*
1646 * No flags on send message
1647 */
1648 if (mp->msg_flags != 0) {
1649 error = EINVAL;
1650 goto out;
1651 }
1652 /*
1653 * No support for address or ancillary data (yet)
1654 */
3e170ce0
A
1655 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0)
1656 has_addr_or_ctl = 1;
1657
fe8ab488 1658 if (mp->msg_control != USER_ADDR_NULL ||
3e170ce0
A
1659 mp->msg_controllen != 0)
1660 has_addr_or_ctl = 1;
1661
fe8ab488
A
1662#if CONFIG_MACF_SOCKET_SUBSET
1663 /*
1664 * We check the state without holding the socket lock;
1665 * if a race condition occurs, it would simply result
3e170ce0 1666 * in an extra call to the MAC check function.
fe8ab488
A
1667 *
1668 * Note: The following check is never true taken with the
1669 * current limitation that we do not accept to pass an address,
3e170ce0
A
1670 * this is effectively placeholder code. If we add support for
1671 * addresses, we will have to check every address.
fe8ab488 1672 */
3e170ce0 1673 if (to != NULL &&
fe8ab488 1674 !(so->so_state & SS_DEFUNCT) &&
3e170ce0
A
1675 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1676 != 0)
fe8ab488
A
1677 goto out;
1678#endif /* MAC_SOCKET_SUBSET */
1679 }
1680
1681 len_before = uio_array_resid(uiop, uap->cnt);
1682
3e170ce0
A
1683 /*
1684 * Feed list of packets at once only for connected socket without
1685 * control message
1686 */
1687 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1688 pru_sosend_list_notsupp &&
1689 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1690 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1691 uap->cnt, uap->flags);
1692 } else {
1693 for (i = 0; i < uap->cnt; i++) {
1694 struct user_msghdr_x *mp = user_msg_x + i;
1695 struct user_msghdr user_msg;
1696 uio_t auio = uiop[i];
1697 int32_t tmpval;
1698
1699 user_msg.msg_flags = mp->msg_flags;
1700 user_msg.msg_controllen = mp->msg_controllen;
1701 user_msg.msg_control = mp->msg_control;
1702 user_msg.msg_iovlen = mp->msg_iovlen;
1703 user_msg.msg_iov = mp->msg_iov;
1704 user_msg.msg_namelen = mp->msg_namelen;
1705 user_msg.msg_name = mp->msg_name;
1706
1707 error = sendit(p, so, &user_msg, auio, uap->flags,
1708 &tmpval);
1709 if (error != 0)
1710 break;
1711 }
1712 }
fe8ab488
A
1713 len_after = uio_array_resid(uiop, uap->cnt);
1714
3e170ce0
A
1715 VERIFY(len_after <= len_before);
1716
fe8ab488
A
1717 if (error != 0) {
1718 if (len_after != len_before && (error == ERESTART ||
3e170ce0
A
1719 error == EINTR || error == EWOULDBLOCK ||
1720 error == ENOBUFS))
fe8ab488
A
1721 error = 0;
1722 /* Generation of SIGPIPE can be controlled per socket */
1723 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1724 psignal(p, SIGPIPE);
1725 }
1726 if (error == 0) {
1727 uiocnt = externalize_user_msghdr_array(umsgp,
1728 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1729 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488
A
1730
1731 *retval = (int)(uiocnt);
1732 }
1733out:
1734 if (need_drop)
1735 file_drop(uap->s);
1736 if (umsgp != NULL)
1737 _FREE(umsgp, M_TEMP);
1738 if (uiop != NULL) {
1739 free_uio_array(uiop, uap->cnt);
1740 _FREE(uiop, M_TEMP);
1741 }
3e170ce0
A
1742 if (user_msg_x != NULL)
1743 _FREE(user_msg_x, M_TEMP);
fe8ab488
A
1744
1745 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1746
1747 return (error);
1748}
1749
3e170ce0
A
1750
1751static int
1752copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1753{
1754 int error = 0;
1755 socklen_t sa_len = 0;
1756 ssize_t len;
1757
1758 len = *namelen;
1759 if (len <= 0 || fromsa == 0) {
1760 len = 0;
1761 } else {
1762#ifndef MIN
1763#define MIN(a, b) ((a) > (b) ? (b) : (a))
1764#endif
1765 sa_len = fromsa->sa_len;
1766 len = MIN((unsigned int)len, sa_len);
1767 error = copyout(fromsa, name, (unsigned)len);
1768 if (error)
1769 goto out;
1770 }
1771 *namelen = sa_len;
1772out:
1773 return (0);
1774}
1775
1776static int
1777copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1778 socklen_t *controllen, int *flags)
1779{
1780 int error = 0;
1781 ssize_t len;
1782 user_addr_t ctlbuf;
1783
1784 len = *controllen;
1785 *controllen = 0;
1786 ctlbuf = control;
1787
1788 while (m && len > 0) {
1789 unsigned int tocopy;
1790 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1791 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1792 int buflen = m->m_len;
1793
1794 while (buflen > 0 && len > 0) {
1795 /*
1796 * SCM_TIMESTAMP hack because struct timeval has a
1797 * different size for 32 bits and 64 bits processes
1798 */
1799 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1800 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
1801 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1802 int tmp_space;
1803 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1804
1805 tmp_cp->cmsg_level = SOL_SOCKET;
1806 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1807
1808 if (proc_is64bit(p)) {
1809 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1810
1811 tv64->tv_sec = tv->tv_sec;
1812 tv64->tv_usec = tv->tv_usec;
1813
1814 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1815 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1816 } else {
1817 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1818
1819 tv32->tv_sec = tv->tv_sec;
1820 tv32->tv_usec = tv->tv_usec;
1821
1822 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1823 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1824 }
1825 if (len >= tmp_space) {
1826 tocopy = tmp_space;
1827 } else {
1828 *flags |= MSG_CTRUNC;
1829 tocopy = len;
1830 }
1831 error = copyout(tmp_buffer, ctlbuf, tocopy);
1832 if (error)
1833 goto out;
1834 } else {
1835 if (cp_size > buflen) {
1836 panic("cp_size > buflen, something"
1837 "wrong with alignment!");
1838 }
1839 if (len >= cp_size) {
1840 tocopy = cp_size;
1841 } else {
1842 *flags |= MSG_CTRUNC;
1843 tocopy = len;
1844 }
1845 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1846 if (error)
1847 goto out;
1848 }
1849
1850 ctlbuf += tocopy;
1851 len -= tocopy;
1852
1853 buflen -= cp_size;
1854 cp = (struct cmsghdr *)(void *)
1855 ((unsigned char *) cp + cp_size);
1856 cp_size = CMSG_ALIGN(cp->cmsg_len);
1857 }
1858
1859 m = m->m_next;
1860 }
1861 *controllen = ctlbuf - control;
1862out:
1863 return (error);
1864}
1865
2d21ac55
A
1866/*
1867 * Returns: 0 Success
1868 * ENOTSOCK
1869 * EINVAL
1870 * EBADF
1871 * EACCES Mandatory Access Control failure
1872 * copyout:EFAULT
1873 * fp_lookup:EBADF
1874 * <pru_soreceive>:ENOBUFS
1875 * <pru_soreceive>:ENOTCONN
1876 * <pru_soreceive>:EWOULDBLOCK
1877 * <pru_soreceive>:EFAULT
1878 * <pru_soreceive>:EINTR
1879 * <pru_soreceive>:EBADF
1880 * <pru_soreceive>:EINVAL
1881 * <pru_soreceive>:EMSGSIZE
1882 * <pru_soreceive>:???
1883 *
1884 * Notes: Additional return values from calls through <pru_soreceive>
1885 * depend on protocols other than TCP or AF_UNIX, which are
1886 * documented above.
1887 */
1c79356b 1888static int
2d21ac55 1889recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1890 user_addr_t namelenp, int32_t *retval)
1c79356b 1891{
39236c6e
A
1892 ssize_t len;
1893 int error;
3e170ce0 1894 struct mbuf *control = 0;
1c79356b
A
1895 struct socket *so;
1896 struct sockaddr *fromsa = 0;
91447636 1897 struct fileproc *fp;
1c79356b 1898
2d21ac55 1899 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
91447636 1900 proc_fdlock(p);
2d21ac55
A
1901 if ((error = fp_lookup(p, s, &fp, 1))) {
1902 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1903 proc_fdunlock(p);
2d21ac55 1904 return (error);
1c79356b 1905 }
91447636 1906 if (fp->f_type != DTYPE_SOCKET) {
2d21ac55 1907 fp_drop(p, s, fp, 1);
91447636 1908 proc_fdunlock(p);
2d21ac55 1909 return (ENOTSOCK);
91447636 1910 }
1c79356b 1911
2d21ac55
A
1912 so = (struct socket *)fp->f_data;
1913 if (so == NULL) {
1914 fp_drop(p, s, fp, 1);
1915 proc_fdunlock(p);
1916 return (EBADF);
1917 }
91447636
A
1918
1919 proc_fdunlock(p);
2d21ac55
A
1920
1921#if CONFIG_MACF_SOCKET_SUBSET
1922 /*
1923 * We check the state without holding the socket lock;
1924 * if a race condition occurs, it would simply result
1925 * in an extra call to the MAC check function.
1926 */
316670eb
A
1927 if (!(so->so_state & SS_DEFUNCT) &&
1928 !(so->so_state & SS_ISCONNECTED) &&
39236c6e 1929 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2d21ac55
A
1930 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1931 goto out1;
1932#endif /* MAC_SOCKET_SUBSET */
91447636 1933 if (uio_resid(uiop) < 0) {
2d21ac55 1934 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
91447636
A
1935 error = EINVAL;
1936 goto out1;
1c79356b 1937 }
91447636
A
1938
1939 len = uio_resid(uiop);
2d21ac55
A
1940 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1941 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1942 &mp->msg_flags);
b0d623f7
A
1943 if (fromsa)
1944 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1945 fromsa);
1c79356b 1946 if (error) {
91447636 1947 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1948 error == EINTR || error == EWOULDBLOCK))
1949 error = 0;
1950 }
1c79356b
A
1951 if (error)
1952 goto out;
2d21ac55 1953
91447636 1954 *retval = len - uio_resid(uiop);
2d21ac55 1955
3e170ce0
A
1956 if (mp->msg_name) {
1957 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1958 if (error)
1959 goto out;
2d21ac55 1960 /* return the actual, untruncated address length */
1c79356b 1961 if (namelenp &&
3e170ce0 1962 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2d21ac55 1963 sizeof (int)))) {
1c79356b
A
1964 goto out;
1965 }
1966 }
39236c6e 1967
3e170ce0
A
1968 if (mp->msg_control) {
1969 error = copyout_control(p, control, mp->msg_control,
1970 &mp->msg_controllen, &mp->msg_flags);
1c79356b
A
1971 }
1972out:
1973 if (fromsa)
1974 FREE(fromsa, M_SONAME);
1975 if (control)
1976 m_freem(control);
2d21ac55 1977 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636
A
1978out1:
1979 fp_drop(p, s, fp, 0);
1c79356b
A
1980 return (error);
1981}
1982
2d21ac55
A
1983/*
1984 * Returns: 0 Success
1985 * ENOMEM
1986 * copyin:EFAULT
1987 * recvit:???
1988 * read:??? [4056224: applicable for pipes]
1989 *
1990 * Notes: The read entry point is only called as part of support for
1991 * binary backward compatability; new code should use read
1992 * instead of recv or recvfrom when attempting to read data
1993 * from pipes.
1994 *
1995 * For full documentation of the return codes from recvit, see
1996 * the block header for the recvit function.
1997 */
1998int
b0d623f7 1999recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2d21ac55
A
2000{
2001 __pthread_testcancel(1);
3e170ce0
A
2002 return (recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
2003 retval));
2d21ac55
A
2004}
2005
1c79356b 2006int
3e170ce0
A
2007recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
2008 int32_t *retval)
1c79356b 2009{
91447636 2010 struct user_msghdr msg;
1c79356b 2011 int error;
91447636 2012 uio_t auio = NULL;
1c79356b 2013
2d21ac55 2014 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 2015 AUDIT_ARG(fd, uap->s);
1c79356b
A
2016
2017 if (uap->fromlenaddr) {
91447636 2018 error = copyin(uap->fromlenaddr,
1c79356b
A
2019 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
2020 if (error)
2021 return (error);
2d21ac55 2022 } else {
1c79356b 2023 msg.msg_namelen = 0;
2d21ac55 2024 }
1c79356b 2025 msg.msg_name = uap->from;
91447636 2026 auio = uio_create(1, 0,
2d21ac55
A
2027 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2028 UIO_READ);
91447636
A
2029 if (auio == NULL) {
2030 return (ENOMEM);
2031 }
2d21ac55 2032
91447636
A
2033 uio_addiov(auio, uap->buf, uap->len);
2034 /* no need to set up msg_iov. recvit uses uio_t we send it */
2035 msg.msg_iov = 0;
2036 msg.msg_iovlen = 0;
1c79356b 2037 msg.msg_control = 0;
91447636 2038 msg.msg_controllen = 0;
1c79356b 2039 msg.msg_flags = uap->flags;
91447636
A
2040 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
2041 if (auio != NULL) {
2042 uio_free(auio);
2043 }
2d21ac55 2044
2d21ac55 2045 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b 2046
2d21ac55 2047 return (error);
1c79356b
A
2048}
2049
2050/*
2d21ac55
A
2051 * Returns: 0 Success
2052 * EMSGSIZE
2053 * ENOMEM
2054 * copyin:EFAULT
2055 * copyout:EFAULT
2056 * recvit:???
2057 *
2058 * Notes: For full documentation of the return codes from recvit, see
2059 * the block header for the recvit function.
1c79356b
A
2060 */
2061int
b0d623f7 2062recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1c79356b 2063{
2d21ac55 2064 __pthread_testcancel(1);
3e170ce0
A
2065 return (recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2066 retval));
1c79356b 2067}
1c79356b
A
2068
2069int
3e170ce0
A
2070recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2071 int32_t *retval)
1c79356b 2072{
b0d623f7
A
2073 struct user32_msghdr msg32;
2074 struct user64_msghdr msg64;
91447636
A
2075 struct user_msghdr user_msg;
2076 caddr_t msghdrp;
2077 int size_of_msghdr;
2078 user_addr_t uiov;
2d21ac55 2079 int error;
91447636
A
2080 uio_t auio = NULL;
2081 struct user_iovec *iovp;
1c79356b 2082
2d21ac55 2083 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 2084 AUDIT_ARG(fd, uap->s);
91447636 2085 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
2086 msghdrp = (caddr_t)&msg64;
2087 size_of_msghdr = sizeof (msg64);
2d21ac55 2088 } else {
b0d623f7
A
2089 msghdrp = (caddr_t)&msg32;
2090 size_of_msghdr = sizeof (msg32);
91447636
A
2091 }
2092 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
2093 if (error) {
2094 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
2095 return (error);
2096 }
2097
91447636 2098 /* only need to copy if user process is not 64-bit */
b0d623f7
A
2099 if (IS_64BIT_PROCESS(p)) {
2100 user_msg.msg_flags = msg64.msg_flags;
2101 user_msg.msg_controllen = msg64.msg_controllen;
2102 user_msg.msg_control = msg64.msg_control;
2103 user_msg.msg_iovlen = msg64.msg_iovlen;
2104 user_msg.msg_iov = msg64.msg_iov;
2105 user_msg.msg_namelen = msg64.msg_namelen;
2106 user_msg.msg_name = msg64.msg_name;
2107 } else {
2108 user_msg.msg_flags = msg32.msg_flags;
2109 user_msg.msg_controllen = msg32.msg_controllen;
2110 user_msg.msg_control = msg32.msg_control;
2111 user_msg.msg_iovlen = msg32.msg_iovlen;
2112 user_msg.msg_iov = msg32.msg_iov;
2113 user_msg.msg_namelen = msg32.msg_namelen;
2114 user_msg.msg_name = msg32.msg_name;
91447636
A
2115 }
2116
2117 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
2118 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2119 0, 0, 0, 0);
91447636
A
2120 return (EMSGSIZE);
2121 }
2122
91447636 2123 user_msg.msg_flags = uap->flags;
91447636
A
2124
2125 /* allocate a uio large enough to hold the number of iovecs passed */
2126 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
2127 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2128 UIO_READ);
91447636
A
2129 if (auio == NULL) {
2130 error = ENOMEM;
2131 goto done;
2132 }
2133
2d21ac55
A
2134 /*
2135 * get location of iovecs within the uio. then copyin the iovecs from
91447636
A
2136 * user space.
2137 */
2138 iovp = uio_iovsaddr(auio);
2139 if (iovp == NULL) {
2140 error = ENOMEM;
2141 goto done;
2142 }
2143 uiov = user_msg.msg_iov;
2144 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
b0d623f7
A
2145 error = copyin_user_iovec_array(uiov,
2146 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2147 user_msg.msg_iovlen, iovp);
1c79356b
A
2148 if (error)
2149 goto done;
91447636 2150
2d21ac55 2151 /* finish setup of uio_t */
39236c6e
A
2152 error = uio_calculateresid(auio);
2153 if (error) {
2154 goto done;
2155 }
2d21ac55 2156
91447636 2157 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1c79356b 2158 if (!error) {
91447636 2159 user_msg.msg_iov = uiov;
b0d623f7
A
2160 if (IS_64BIT_PROCESS(p)) {
2161 msg64.msg_flags = user_msg.msg_flags;
2162 msg64.msg_controllen = user_msg.msg_controllen;
2163 msg64.msg_control = user_msg.msg_control;
2164 msg64.msg_iovlen = user_msg.msg_iovlen;
2165 msg64.msg_iov = user_msg.msg_iov;
2166 msg64.msg_namelen = user_msg.msg_namelen;
2167 msg64.msg_name = user_msg.msg_name;
2168 } else {
2169 msg32.msg_flags = user_msg.msg_flags;
2170 msg32.msg_controllen = user_msg.msg_controllen;
2171 msg32.msg_control = user_msg.msg_control;
2172 msg32.msg_iovlen = user_msg.msg_iovlen;
2173 msg32.msg_iov = user_msg.msg_iov;
2174 msg32.msg_namelen = user_msg.msg_namelen;
2175 msg32.msg_name = user_msg.msg_name;
91447636
A
2176 }
2177 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1c79356b
A
2178 }
2179done:
91447636
A
2180 if (auio != NULL) {
2181 uio_free(auio);
2182 }
2d21ac55 2183 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
2184 return (error);
2185}
2186
fe8ab488
A
2187int
2188recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2189{
2190 int error = EOPNOTSUPP;
3e170ce0
A
2191 struct user_msghdr_x *user_msg_x = NULL;
2192 struct recv_msg_elem *recv_msg_array = NULL;
fe8ab488
A
2193 struct socket *so;
2194 user_ssize_t len_before = 0, len_after;
2195 int need_drop = 0;
2196 size_t size_of_msghdr;
2197 void *umsgp = NULL;
2198 u_int i;
2199 u_int uiocnt;
2200
2201 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2202
2203 error = file_socket(uap->s, &so);
2204 if (error) {
2205 goto out;
2206 }
2207 need_drop = 1;
2208 if (so == NULL) {
2209 error = EBADF;
2210 goto out;
2211 }
fe8ab488
A
2212 /*
2213 * Input parameter range check
2214 */
2215 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2216 error = EINVAL;
2217 goto out;
2218 }
3e170ce0
A
2219 if (uap->cnt > somaxrecvmsgx)
2220 uap->cnt = somaxrecvmsgx;
2221
2222 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
fe8ab488 2223 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
2224 if (user_msg_x == NULL) {
2225 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
2226 error = ENOMEM;
2227 goto out;
2228 }
3e170ce0
A
2229 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2230 if (recv_msg_array == NULL) {
2231 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
fe8ab488
A
2232 error = ENOMEM;
2233 goto out;
2234 }
fe8ab488
A
2235 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2236 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2237
2238 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2239 if (umsgp == NULL) {
3e170ce0 2240 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
fe8ab488
A
2241 error = ENOMEM;
2242 goto out;
2243 }
2244 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2245 if (error) {
3e170ce0 2246 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
2247 goto out;
2248 }
3e170ce0 2249 error = internalize_recv_msghdr_array(umsgp,
fe8ab488 2250 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 2251 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
fe8ab488 2252 if (error) {
3e170ce0 2253 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
2254 goto out;
2255 }
2256 /*
2257 * Make sure the size of each message iovec and
2258 * the aggregate size of all the iovec is valid
2259 */
3e170ce0 2260 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
fe8ab488
A
2261 error = EINVAL;
2262 goto out;
2263 }
fe8ab488
A
2264 /*
2265 * Sanity check on passed arguments
2266 */
2267 for (i = 0; i < uap->cnt; i++) {
3e170ce0 2268 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
2269
2270 if (mp->msg_flags != 0) {
2271 error = EINVAL;
2272 goto out;
2273 }
fe8ab488
A
2274 }
2275#if CONFIG_MACF_SOCKET_SUBSET
2276 /*
2277 * We check the state without holding the socket lock;
2278 * if a race condition occurs, it would simply result
2279 * in an extra call to the MAC check function.
2280 */
2281 if (!(so->so_state & SS_DEFUNCT) &&
2282 !(so->so_state & SS_ISCONNECTED) &&
2283 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2284 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
2285 goto out;
2286#endif /* MAC_SOCKET_SUBSET */
2287
3e170ce0 2288 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488 2289
3e170ce0
A
2290 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2291 pru_soreceive_list_notsupp &&
2292 somaxrecvmsgx == 0) {
2293 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2294 recv_msg_array, uap->cnt, &uap->flags);
2295 } else {
2296 int flags = uap->flags;
fe8ab488 2297
3e170ce0
A
2298 for (i = 0; i < uap->cnt; i++) {
2299 struct recv_msg_elem *recv_msg_elem;
2300 uio_t auio;
2301 struct sockaddr **psa;
2302 struct mbuf **controlp;
2303
2304 recv_msg_elem = recv_msg_array + i;
2305 auio = recv_msg_elem->uio;
2306
2307 /*
2308 * Do not block if we got at least one packet
2309 */
2310 if (i > 0)
2311 flags |= MSG_DONTWAIT;
2312
2313 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2314 &recv_msg_elem->psa : NULL;
2315 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2316 &recv_msg_elem->controlp : NULL;
2317
2318 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2319 auio, (struct mbuf **)0, controlp, &flags);
2320 if (error)
2321 break;
2322 /*
2323 * We have some data
2324 */
2325 recv_msg_elem->which |= SOCK_MSG_DATA;
2326 /*
2327 * Stop on partial copy
2328 */
2329 if (flags & (MSG_RCVMORE | MSG_TRUNC))
2330 break;
2331 }
2332 if ((uap->flags & MSG_DONTWAIT) == 0)
2333 flags &= ~MSG_DONTWAIT;
2334 uap->flags = flags;
2335 }
2336
2337 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488
A
2338
2339 if (error) {
2340 if (len_after != len_before && (error == ERESTART ||
2341 error == EINTR || error == EWOULDBLOCK))
2342 error = 0;
3e170ce0
A
2343 else
2344 goto out;
fe8ab488 2345 }
fe8ab488 2346
3e170ce0
A
2347 uiocnt = externalize_recv_msghdr_array(umsgp,
2348 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2349 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2350
2351 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2352 if (error) {
2353 DBG_PRINTF("%s copyout() failed\n", __func__);
2354 goto out;
2355 }
2356 *retval = (int)(uiocnt);
2357
2358 for (i = 0; i < uap->cnt; i++) {
2359 struct user_msghdr_x *mp = user_msg_x + i;
2360 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2361 struct sockaddr *fromsa = recv_msg_elem->psa;
2362
2363 if (mp->msg_name) {
2364 error = copyout_sa(fromsa, mp->msg_name,
2365 &mp->msg_namelen);
2366 if (error)
2367 goto out;
2368 }
2369 if (mp->msg_control) {
2370 error = copyout_control(p, recv_msg_elem->controlp,
2371 mp->msg_control, &mp->msg_controllen,
2372 &mp->msg_flags);
2373 if (error)
2374 goto out;
fe8ab488 2375 }
fe8ab488
A
2376 }
2377out:
2378 if (need_drop)
2379 file_drop(uap->s);
2380 if (umsgp != NULL)
2381 _FREE(umsgp, M_TEMP);
3e170ce0
A
2382 if (recv_msg_array != NULL)
2383 free_recv_msg_array(recv_msg_array, uap->cnt);
2384 if (user_msg_x != NULL)
2385 _FREE(user_msg_x, M_TEMP);
2386
fe8ab488 2387 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 2388
fe8ab488
A
2389 return (error);
2390}
2391
2d21ac55
A
2392/*
2393 * Returns: 0 Success
2394 * EBADF
2395 * file_socket:ENOTSOCK
2396 * file_socket:EBADF
2397 * soshutdown:EINVAL
2398 * soshutdown:ENOTCONN
2399 * soshutdown:EADDRNOTAVAIL[TCP]
2400 * soshutdown:ENOBUFS[TCP]
2401 * soshutdown:EMSGSIZE[TCP]
2402 * soshutdown:EHOSTUNREACH[TCP]
2403 * soshutdown:ENETUNREACH[TCP]
2404 * soshutdown:ENETDOWN[TCP]
2405 * soshutdown:ENOMEM[TCP]
2406 * soshutdown:EACCES[TCP]
2407 * soshutdown:EMSGSIZE[TCP]
2408 * soshutdown:ENOBUFS[TCP]
2409 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2410 * soshutdown:??? [other protocol families]
2411 */
1c79356b
A
2412/* ARGSUSED */
2413int
2d21ac55 2414shutdown(__unused struct proc *p, struct shutdown_args *uap,
b0d623f7 2415 __unused int32_t *retval)
1c79356b 2416{
2d21ac55 2417 struct socket *so;
1c79356b
A
2418 int error;
2419
55e303ae 2420 AUDIT_ARG(fd, uap->s);
91447636 2421 error = file_socket(uap->s, &so);
1c79356b
A
2422 if (error)
2423 return (error);
91447636
A
2424 if (so == NULL) {
2425 error = EBADF;
2426 goto out;
2427 }
2428 error = soshutdown((struct socket *)so, uap->how);
2429out:
2430 file_drop(uap->s);
2d21ac55 2431 return (error);
1c79356b
A
2432}
2433
2d21ac55
A
2434/*
2435 * Returns: 0 Success
2436 * EFAULT
2437 * EINVAL
2438 * EACCES Mandatory Access Control failure
2439 * file_socket:ENOTSOCK
2440 * file_socket:EBADF
2441 * sosetopt:EINVAL
2442 * sosetopt:ENOPROTOOPT
2443 * sosetopt:ENOBUFS
2444 * sosetopt:EDOM
2445 * sosetopt:EFAULT
2446 * sosetopt:EOPNOTSUPP[AF_UNIX]
2447 * sosetopt:???
2448 */
1c79356b
A
2449/* ARGSUSED */
2450int
2d21ac55 2451setsockopt(struct proc *p, struct setsockopt_args *uap,
b0d623f7 2452 __unused int32_t *retval)
1c79356b 2453{
2d21ac55 2454 struct socket *so;
1c79356b
A
2455 struct sockopt sopt;
2456 int error;
2457
55e303ae 2458 AUDIT_ARG(fd, uap->s);
1c79356b
A
2459 if (uap->val == 0 && uap->valsize != 0)
2460 return (EFAULT);
2d21ac55 2461 /* No bounds checking on size (it's unsigned) */
1c79356b 2462
91447636 2463 error = file_socket(uap->s, &so);
1c79356b
A
2464 if (error)
2465 return (error);
2466
2467 sopt.sopt_dir = SOPT_SET;
2468 sopt.sopt_level = uap->level;
2469 sopt.sopt_name = uap->name;
2470 sopt.sopt_val = uap->val;
2471 sopt.sopt_valsize = uap->valsize;
2472 sopt.sopt_p = p;
2473
91447636
A
2474 if (so == NULL) {
2475 error = EINVAL;
2476 goto out;
2477 }
2d21ac55
A
2478#if CONFIG_MACF_SOCKET_SUBSET
2479 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2480 &sopt)) != 0)
2481 goto out;
2482#endif /* MAC_SOCKET_SUBSET */
39236c6e 2483 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
91447636
A
2484out:
2485 file_drop(uap->s);
2d21ac55 2486 return (error);
1c79356b
A
2487}
2488
2489
2490
2d21ac55
A
2491/*
2492 * Returns: 0 Success
2493 * EINVAL
2494 * EBADF
2495 * EACCES Mandatory Access Control failure
2496 * copyin:EFAULT
2497 * copyout:EFAULT
2498 * file_socket:ENOTSOCK
2499 * file_socket:EBADF
2500 * sogetopt:???
2501 */
1c79356b 2502int
2d21ac55 2503getsockopt(struct proc *p, struct getsockopt_args *uap,
b0d623f7 2504 __unused int32_t *retval)
1c79356b 2505{
91447636
A
2506 int error;
2507 socklen_t valsize;
2508 struct sockopt sopt;
2d21ac55 2509 struct socket *so;
1c79356b 2510
91447636 2511 error = file_socket(uap->s, &so);
1c79356b
A
2512 if (error)
2513 return (error);
2514 if (uap->val) {
2d21ac55
A
2515 error = copyin(uap->avalsize, (caddr_t)&valsize,
2516 sizeof (valsize));
1c79356b 2517 if (error)
91447636 2518 goto out;
2d21ac55
A
2519 /* No bounds checking on size (it's unsigned) */
2520 } else {
1c79356b 2521 valsize = 0;
2d21ac55 2522 }
1c79356b
A
2523 sopt.sopt_dir = SOPT_GET;
2524 sopt.sopt_level = uap->level;
2525 sopt.sopt_name = uap->name;
2526 sopt.sopt_val = uap->val;
2527 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2528 sopt.sopt_p = p;
2529
91447636
A
2530 if (so == NULL) {
2531 error = EBADF;
2532 goto out;
2533 }
2d21ac55
A
2534#if CONFIG_MACF_SOCKET_SUBSET
2535 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2536 &sopt)) != 0)
2537 goto out;
2538#endif /* MAC_SOCKET_SUBSET */
39236c6e 2539 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
1c79356b
A
2540 if (error == 0) {
2541 valsize = sopt.sopt_valsize;
2d21ac55
A
2542 error = copyout((caddr_t)&valsize, uap->avalsize,
2543 sizeof (valsize));
1c79356b 2544 }
91447636
A
2545out:
2546 file_drop(uap->s);
1c79356b
A
2547 return (error);
2548}
2549
2550
2551/*
2552 * Get socket name.
2d21ac55
A
2553 *
2554 * Returns: 0 Success
2555 * EBADF
2556 * file_socket:ENOTSOCK
2557 * file_socket:EBADF
2558 * copyin:EFAULT
2559 * copyout:EFAULT
2560 * <pru_sockaddr>:ENOBUFS[TCP]
2561 * <pru_sockaddr>:ECONNRESET[TCP]
2562 * <pru_sockaddr>:EINVAL[AF_UNIX]
2563 * <sf_getsockname>:???
1c79356b
A
2564 */
2565/* ARGSUSED */
2d21ac55
A
2566int
2567getsockname(__unused struct proc *p, struct getsockname_args *uap,
b0d623f7 2568 __unused int32_t *retval)
1c79356b 2569{
91447636 2570 struct socket *so;
1c79356b 2571 struct sockaddr *sa;
91447636 2572 socklen_t len;
2d21ac55 2573 socklen_t sa_len;
1c79356b
A
2574 int error;
2575
91447636 2576 error = file_socket(uap->fdes, &so);
1c79356b
A
2577 if (error)
2578 return (error);
2d21ac55 2579 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1c79356b 2580 if (error)
91447636
A
2581 goto out;
2582 if (so == NULL) {
2583 error = EBADF;
2584 goto out;
2585 }
1c79356b 2586 sa = 0;
91447636 2587 socket_lock(so, 1);
1c79356b 2588 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2d21ac55 2589 if (error == 0) {
6d2010ae 2590 error = sflt_getsockname(so, &sa);
91447636
A
2591 if (error == EJUSTRETURN)
2592 error = 0;
91447636
A
2593 }
2594 socket_unlock(so, 1);
1c79356b
A
2595 if (error)
2596 goto bad;
2597 if (sa == 0) {
2598 len = 0;
2599 goto gotnothing;
2600 }
2601
2d21ac55
A
2602 sa_len = sa->sa_len;
2603 len = MIN(len, sa_len);
91447636 2604 error = copyout((caddr_t)sa, uap->asa, len);
2d21ac55
A
2605 if (error)
2606 goto bad;
2607 /* return the actual, untruncated address length */
2608 len = sa_len;
1c79356b 2609gotnothing:
2d21ac55 2610 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
2611bad:
2612 if (sa)
2613 FREE(sa, M_SONAME);
91447636
A
2614out:
2615 file_drop(uap->fdes);
1c79356b
A
2616 return (error);
2617}
2618
1c79356b
A
2619/*
2620 * Get name of peer for connected socket.
2d21ac55
A
2621 *
2622 * Returns: 0 Success
2623 * EBADF
2624 * EINVAL
2625 * ENOTCONN
2626 * file_socket:ENOTSOCK
2627 * file_socket:EBADF
2628 * copyin:EFAULT
2629 * copyout:EFAULT
2630 * <pru_peeraddr>:???
2631 * <sf_getpeername>:???
1c79356b
A
2632 */
2633/* ARGSUSED */
2634int
2d21ac55 2635getpeername(__unused struct proc *p, struct getpeername_args *uap,
b0d623f7 2636 __unused int32_t *retval)
1c79356b 2637{
91447636 2638 struct socket *so;
1c79356b 2639 struct sockaddr *sa;
91447636 2640 socklen_t len;
2d21ac55 2641 socklen_t sa_len;
1c79356b
A
2642 int error;
2643
91447636 2644 error = file_socket(uap->fdes, &so);
1c79356b
A
2645 if (error)
2646 return (error);
91447636
A
2647 if (so == NULL) {
2648 error = EBADF;
2649 goto out;
2650 }
2651
2652 socket_lock(so, 1);
2653
2d21ac55
A
2654 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2655 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2656 /* the socket has been shutdown, no more getpeername's */
2657 socket_unlock(so, 1);
2658 error = EINVAL;
2659 goto out;
2660 }
2661
91447636
A
2662 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
2663 socket_unlock(so, 1);
2664 error = ENOTCONN;
2665 goto out;
2666 }
2d21ac55 2667 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
91447636
A
2668 if (error) {
2669 socket_unlock(so, 1);
2670 goto out;
2671 }
1c79356b
A
2672 sa = 0;
2673 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2d21ac55 2674 if (error == 0) {
6d2010ae 2675 error = sflt_getpeername(so, &sa);
91447636
A
2676 if (error == EJUSTRETURN)
2677 error = 0;
91447636
A
2678 }
2679 socket_unlock(so, 1);
1c79356b
A
2680 if (error)
2681 goto bad;
2682 if (sa == 0) {
2683 len = 0;
2684 goto gotnothing;
2685 }
2d21ac55
A
2686 sa_len = sa->sa_len;
2687 len = MIN(len, sa_len);
91447636 2688 error = copyout(sa, uap->asa, len);
1c79356b
A
2689 if (error)
2690 goto bad;
2d21ac55
A
2691 /* return the actual, untruncated address length */
2692 len = sa_len;
1c79356b 2693gotnothing:
2d21ac55 2694 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
2695bad:
2696 if (sa) FREE(sa, M_SONAME);
91447636
A
2697out:
2698 file_drop(uap->fdes);
1c79356b
A
2699 return (error);
2700}
2701
2702int
2d21ac55 2703sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1c79356b 2704{
2d21ac55
A
2705 struct sockaddr *sa;
2706 struct mbuf *m;
1c79356b
A
2707 int error;
2708
e2d2fc5c 2709 size_t alloc_buflen = (size_t)buflen;
39236c6e 2710
3e170ce0 2711 if (alloc_buflen > INT_MAX/2)
e2d2fc5c 2712 return (EINVAL);
b0d623f7 2713#ifdef __LP64__
3e170ce0
A
2714 /*
2715 * The fd's in the buffer must expand to be pointers, thus we need twice
2716 * as much space
2717 */
2718 if (type == MT_CONTROL)
2719 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) +
2720 sizeof(struct cmsghdr);
b0d623f7 2721#endif
e2d2fc5c
A
2722 if (alloc_buflen > MLEN) {
2723 if (type == MT_SONAME && alloc_buflen <= 112)
3e170ce0 2724 alloc_buflen = MLEN; /* unix domain compat. hack */
e2d2fc5c 2725 else if (alloc_buflen > MCLBYTES)
91447636 2726 return (EINVAL);
1c79356b
A
2727 }
2728 m = m_get(M_WAIT, type);
2729 if (m == NULL)
2730 return (ENOBUFS);
e2d2fc5c 2731 if (alloc_buflen > MLEN) {
91447636
A
2732 MCLGET(m, M_WAIT);
2733 if ((m->m_flags & M_EXT) == 0) {
2734 m_free(m);
2d21ac55 2735 return (ENOBUFS);
91447636
A
2736 }
2737 }
3e170ce0
A
2738 /*
2739 * K64: We still copyin the original buflen because it gets expanded
2740 * later and we lie about the size of the mbuf because it only affects
2741 * unp_* functions
b0d623f7 2742 */
1c79356b 2743 m->m_len = buflen;
91447636 2744 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2d21ac55 2745 if (error) {
1c79356b 2746 (void) m_free(m);
2d21ac55 2747 } else {
1c79356b
A
2748 *mp = m;
2749 if (type == MT_SONAME) {
2750 sa = mtod(m, struct sockaddr *);
1c79356b
A
2751 sa->sa_len = buflen;
2752 }
2753 }
2754 return (error);
2755}
2756
91447636
A
2757/*
2758 * Given a user_addr_t of length len, allocate and fill out a *sa.
2d21ac55
A
2759 *
2760 * Returns: 0 Success
2761 * ENAMETOOLONG Filename too long
2762 * EINVAL Invalid argument
2763 * ENOMEM Not enough space
2764 * copyin:EFAULT Bad address
91447636 2765 */
2d21ac55
A
2766static int
2767getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
4a3eedf9 2768 size_t len, boolean_t translate_unspec)
1c79356b
A
2769{
2770 struct sockaddr *sa;
2771 int error;
2772
2773 if (len > SOCK_MAXADDRLEN)
2d21ac55 2774 return (ENAMETOOLONG);
1c79356b 2775
2d21ac55
A
2776 if (len < offsetof(struct sockaddr, sa_data[0]))
2777 return (EINVAL);
1c79356b 2778
490019cf 2779 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
91447636 2780 if (sa == NULL) {
2d21ac55 2781 return (ENOMEM);
91447636
A
2782 }
2783 error = copyin(uaddr, (caddr_t)sa, len);
1c79356b
A
2784 if (error) {
2785 FREE(sa, M_SONAME);
2786 } else {
2d21ac55
A
2787 /*
2788 * Force sa_family to AF_INET on AF_INET sockets to handle
2789 * legacy applications that use AF_UNSPEC (0). On all other
2790 * sockets we leave it unchanged and let the lower layer
2791 * handle it.
2792 */
4a3eedf9 2793 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
39236c6e 2794 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2795 len == sizeof (struct sockaddr_in))
2796 sa->sa_family = AF_INET;
2797
1c79356b
A
2798 sa->sa_len = len;
2799 *namp = sa;
2800 }
2d21ac55 2801 return (error);
1c79356b
A
2802}
2803
2d21ac55
A
2804static int
2805getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
4a3eedf9 2806 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1c79356b 2807{
2d21ac55
A
2808 int error;
2809
2810 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2811 len < offsetof(struct sockaddr, sa_data[0]))
2812 return (EINVAL);
2813
2814 /*
2815 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2816 * so the check here is inclusive.
2817 */
2818 if (len > sizeof (*ss))
2819 return (ENAMETOOLONG);
1c79356b 2820
2d21ac55
A
2821 bzero(ss, sizeof (*ss));
2822 error = copyin(uaddr, (caddr_t)ss, len);
2823 if (error == 0) {
2824 /*
2825 * Force sa_family to AF_INET on AF_INET sockets to handle
2826 * legacy applications that use AF_UNSPEC (0). On all other
2827 * sockets we leave it unchanged and let the lower layer
2828 * handle it.
2829 */
4a3eedf9 2830 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
39236c6e 2831 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2832 len == sizeof (struct sockaddr_in))
2833 ss->ss_family = AF_INET;
91447636 2834
2d21ac55 2835 ss->ss_len = len;
1c79356b 2836 }
2d21ac55 2837 return (error);
1c79356b
A
2838}
2839
fe8ab488
A
2840int
2841internalize_user_msghdr_array(const void *src, int spacetype, int direction,
3e170ce0 2842 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
fe8ab488
A
2843{
2844 int error = 0;
2845 u_int i;
3e170ce0
A
2846 u_int namecnt = 0;
2847 u_int ctlcnt = 0;
fe8ab488
A
2848
2849 for (i = 0; i < count; i++) {
2850 uio_t auio;
2851 struct user_iovec *iovp;
3e170ce0 2852 struct user_msghdr_x *user_msg = dst + i;
fe8ab488
A
2853
2854 if (spacetype == UIO_USERSPACE64) {
3e170ce0 2855 const struct user64_msghdr_x *msghdr64;
fe8ab488 2856
3e170ce0 2857 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
fe8ab488
A
2858
2859 user_msg->msg_name = msghdr64->msg_name;
2860 user_msg->msg_namelen = msghdr64->msg_namelen;
2861 user_msg->msg_iov = msghdr64->msg_iov;
2862 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2863 user_msg->msg_control = msghdr64->msg_control;
2864 user_msg->msg_controllen = msghdr64->msg_controllen;
2865 user_msg->msg_flags = msghdr64->msg_flags;
2866 user_msg->msg_datalen = msghdr64->msg_datalen;
2867 } else {
3e170ce0 2868 const struct user32_msghdr_x *msghdr32;
fe8ab488 2869
3e170ce0 2870 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
fe8ab488
A
2871
2872 user_msg->msg_name = msghdr32->msg_name;
2873 user_msg->msg_namelen = msghdr32->msg_namelen;
2874 user_msg->msg_iov = msghdr32->msg_iov;
2875 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2876 user_msg->msg_control = msghdr32->msg_control;
2877 user_msg->msg_controllen = msghdr32->msg_controllen;
2878 user_msg->msg_flags = msghdr32->msg_flags;
2879 user_msg->msg_datalen = msghdr32->msg_datalen;
2880 }
3e170ce0
A
2881
2882 if (user_msg->msg_iovlen <= 0 ||
2883 user_msg->msg_iovlen > UIO_MAXIOV) {
fe8ab488
A
2884 error = EMSGSIZE;
2885 goto done;
2886 }
3e170ce0
A
2887 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2888 direction);
fe8ab488
A
2889 if (auio == NULL) {
2890 error = ENOMEM;
2891 goto done;
2892 }
2893 uiop[i] = auio;
2894
3e170ce0
A
2895 iovp = uio_iovsaddr(auio);
2896 if (iovp == NULL) {
2897 error = ENOMEM;
2898 goto done;
2899 }
2900 error = copyin_user_iovec_array(user_msg->msg_iov,
2901 spacetype, user_msg->msg_iovlen, iovp);
2902 if (error)
2903 goto done;
2904 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
fe8ab488 2905
3e170ce0
A
2906 error = uio_calculateresid(auio);
2907 if (error)
2908 goto done;
2909 user_msg->msg_datalen = uio_resid(auio);
2910
2911 if (user_msg->msg_name && user_msg->msg_namelen)
2912 namecnt++;
2913 if (user_msg->msg_control && user_msg->msg_controllen)
2914 ctlcnt++;
2915 }
2916done:
2917
2918 return (error);
2919}
2920
2921int
2922internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2923 u_int count, struct user_msghdr_x *dst,
2924 struct recv_msg_elem *recv_msg_array)
2925{
2926 int error = 0;
2927 u_int i;
2928
2929 for (i = 0; i < count; i++) {
2930 struct user_iovec *iovp;
2931 struct user_msghdr_x *user_msg = dst + i;
2932 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2933
2934 if (spacetype == UIO_USERSPACE64) {
2935 const struct user64_msghdr_x *msghdr64;
2936
2937 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2938
2939 user_msg->msg_name = msghdr64->msg_name;
2940 user_msg->msg_namelen = msghdr64->msg_namelen;
2941 user_msg->msg_iov = msghdr64->msg_iov;
2942 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2943 user_msg->msg_control = msghdr64->msg_control;
2944 user_msg->msg_controllen = msghdr64->msg_controllen;
2945 user_msg->msg_flags = msghdr64->msg_flags;
2946 user_msg->msg_datalen = msghdr64->msg_datalen;
fe8ab488 2947 } else {
3e170ce0
A
2948 const struct user32_msghdr_x *msghdr32;
2949
2950 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2951
2952 user_msg->msg_name = msghdr32->msg_name;
2953 user_msg->msg_namelen = msghdr32->msg_namelen;
2954 user_msg->msg_iov = msghdr32->msg_iov;
2955 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2956 user_msg->msg_control = msghdr32->msg_control;
2957 user_msg->msg_controllen = msghdr32->msg_controllen;
2958 user_msg->msg_flags = msghdr32->msg_flags;
2959 user_msg->msg_datalen = msghdr32->msg_datalen;
fe8ab488 2960 }
3e170ce0
A
2961
2962 if (user_msg->msg_iovlen <= 0 ||
2963 user_msg->msg_iovlen > UIO_MAXIOV) {
2964 error = EMSGSIZE;
2965 goto done;
2966 }
2967 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
2968 spacetype, direction);
2969 if (recv_msg_elem->uio == NULL) {
2970 error = ENOMEM;
2971 goto done;
2972 }
2973
2974 iovp = uio_iovsaddr(recv_msg_elem->uio);
2975 if (iovp == NULL) {
2976 error = ENOMEM;
2977 goto done;
2978 }
2979 error = copyin_user_iovec_array(user_msg->msg_iov,
2980 spacetype, user_msg->msg_iovlen, iovp);
2981 if (error)
2982 goto done;
2983 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2984
2985 error = uio_calculateresid(recv_msg_elem->uio);
2986 if (error)
2987 goto done;
2988 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
2989
2990 if (user_msg->msg_name && user_msg->msg_namelen)
2991 recv_msg_elem->which |= SOCK_MSG_SA;
2992 if (user_msg->msg_control && user_msg->msg_controllen)
2993 recv_msg_elem->which |= SOCK_MSG_CONTROL;
fe8ab488
A
2994 }
2995done:
3e170ce0 2996
fe8ab488
A
2997 return (error);
2998}
2999
3000u_int
3001externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3e170ce0 3002 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
fe8ab488
A
3003{
3004#pragma unused(direction)
3005 u_int i;
3006 int seenlast = 0;
3007 u_int retcnt = 0;
3008
3009 for (i = 0; i < count; i++) {
3e170ce0 3010 const struct user_msghdr_x *user_msg = src + i;
fe8ab488
A
3011 uio_t auio = uiop[i];
3012 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
3013
3014 if (user_msg->msg_datalen != 0 && len == 0)
3015 seenlast = 1;
3e170ce0
A
3016
3017 if (seenlast == 0)
3018 retcnt ++;
3019
3020 if (spacetype == UIO_USERSPACE64) {
3021 struct user64_msghdr_x *msghdr64;
3022
3023 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3024
3025 msghdr64->msg_flags = user_msg->msg_flags;
3026 msghdr64->msg_datalen = len;
3027
3028 } else {
3029 struct user32_msghdr_x *msghdr32;
3030
3031 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3032
3033 msghdr32->msg_flags = user_msg->msg_flags;
3034 msghdr32->msg_datalen = len;
3035 }
3036 }
3037 return (retcnt);
3038}
3039
3040u_int
3041externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
3042 u_int count, const struct user_msghdr_x *src,
3043 struct recv_msg_elem *recv_msg_array)
3044{
3045 u_int i;
3046 int seenlast = 0;
3047 u_int retcnt = 0;
3048
3049 for (i = 0; i < count; i++) {
3050 const struct user_msghdr_x *user_msg = src + i;
3051 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3052 user_ssize_t len;
3053
3054 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
3055
3056 if (direction == UIO_READ) {
3057 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0)
3058 seenlast = 1;
3059 } else {
3060 if (user_msg->msg_datalen != 0 && len == 0)
3061 seenlast = 1;
3062 }
3063
fe8ab488
A
3064 if (seenlast == 0)
3065 retcnt ++;
3066
3067 if (spacetype == UIO_USERSPACE64) {
3068 struct user64_msghdr_x *msghdr64;
3069
3070 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3071
3072 msghdr64->msg_flags = user_msg->msg_flags;
3073 msghdr64->msg_datalen = len;
3e170ce0 3074
fe8ab488
A
3075 } else {
3076 struct user32_msghdr_x *msghdr32;
3077
3078 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3079
3080 msghdr32->msg_flags = user_msg->msg_flags;
3081 msghdr32->msg_datalen = len;
3082 }
3083 }
3084 return (retcnt);
3085}
3086
3087void
3088free_uio_array(struct uio **uiop, u_int count)
3089{
3090 u_int i;
3091
3092 for (i = 0; i < count; i++) {
3093 if (uiop[i] != NULL)
3094 uio_free(uiop[i]);
3095 }
3096}
3097
3098__private_extern__ user_ssize_t
3099uio_array_resid(struct uio **uiop, u_int count)
3100{
3101 user_ssize_t len = 0;
3102 u_int i;
3103
3104 for (i = 0; i < count; i++) {
3105 struct uio *auio = uiop[i];
3106
3e170ce0 3107 if (auio != NULL)
fe8ab488
A
3108 len += uio_resid(auio);
3109 }
3110 return (len);
3111}
3112
3113int
3114uio_array_is_valid(struct uio **uiop, u_int count)
3115{
3116 user_ssize_t len = 0;
3117 u_int i;
3118
3119 for (i = 0; i < count; i++) {
3120 struct uio *auio = uiop[i];
3e170ce0 3121
fe8ab488
A
3122 if (auio != NULL) {
3123 user_ssize_t resid = uio_resid(auio);
3e170ce0 3124
fe8ab488
A
3125 /*
3126 * Sanity check on the validity of the iovec:
3127 * no point of going over sb_max
3128 */
3129 if (resid < 0 || (u_int32_t)resid > sb_max)
3130 return (0);
3e170ce0
A
3131
3132 len += resid;
3133 if (len < 0 || (u_int32_t)len > sb_max)
3134 return (0);
3135 }
3136 }
3137 return (1);
3138}
3139
3140
3141struct recv_msg_elem *
3142alloc_recv_msg_array(u_int count)
3143{
3144 struct recv_msg_elem *recv_msg_array;
3145
3146 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3147 M_TEMP, M_WAITOK | M_ZERO);
3148
3149 return (recv_msg_array);
3150}
3151
3152void
3153free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3154{
3155 u_int i;
3156
3157 for (i = 0; i < count; i++) {
3158 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3159
3160 if (recv_msg_elem->uio != NULL)
3161 uio_free(recv_msg_elem->uio);
3162 if (recv_msg_elem->psa != NULL)
3163 _FREE(recv_msg_elem->psa, M_TEMP);
3164 if (recv_msg_elem->controlp != NULL)
3165 m_freem(recv_msg_elem->controlp);
3166 }
3167 _FREE(recv_msg_array, M_TEMP);
3168}
3169
3170
3171__private_extern__ user_ssize_t
3172recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3173{
3174 user_ssize_t len = 0;
3175 u_int i;
3176
3177 for (i = 0; i < count; i++) {
3178 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3179
3180 if (recv_msg_elem->uio != NULL)
3181 len += uio_resid(recv_msg_elem->uio);
3182 }
3183 return (len);
3184}
3185
3186int
3187recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3188{
3189 user_ssize_t len = 0;
3190 u_int i;
3191
3192 for (i = 0; i < count; i++) {
3193 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3194
3195 if (recv_msg_elem->uio != NULL) {
3196 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3197
3198 /*
3199 * Sanity check on the validity of the iovec:
3200 * no point of going over sb_max
3201 */
3202 if (resid < 0 || (u_int32_t)resid > sb_max)
3203 return (0);
3204
fe8ab488
A
3205 len += resid;
3206 if (len < 0 || (u_int32_t)len > sb_max)
3207 return (0);
3208 }
3209 }
3210 return (1);
3211}
3212
39236c6e 3213#if SENDFILE
2d21ac55
A
3214
3215#define SFUIOBUFS 64
2d21ac55
A
3216
3217/* Macros to compute the number of mbufs needed depending on cluster size */
3e170ce0
A
3218#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3219#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
2d21ac55 3220
39236c6e 3221/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3e170ce0 3222#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
2d21ac55
A
3223
3224/* Upper send limit in the number of mbuf clusters */
3225#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3226#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3227
1c79356b 3228static void
2d21ac55
A
3229alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3230 struct mbuf **m, boolean_t jumbocl)
1c79356b 3231{
2d21ac55 3232 unsigned int needed;
1c79356b 3233
2d21ac55
A
3234 if (pktlen == 0)
3235 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
1c79356b 3236
2d21ac55
A
3237 /*
3238 * Try to allocate for the whole thing. Since we want full control
3239 * over the buffer size and be able to accept partial result, we can't
3240 * use mbuf_allocpacket(). The logic below is similar to sosend().
3241 */
3242 *m = NULL;
6d2010ae 3243 if (pktlen > MBIGCLBYTES && jumbocl) {
2d21ac55
A
3244 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3245 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3246 }
3247 if (*m == NULL) {
3248 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
6d2010ae 3249 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
2d21ac55
A
3250 }
3251
3252 /*
3253 * Our previous attempt(s) at allocation had failed; the system
3254 * may be short on mbufs, and we want to block until they are
3255 * available. This time, ask just for 1 mbuf and don't return
3256 * until we get it.
3257 */
3258 if (*m == NULL) {
3259 needed = 1;
6d2010ae 3260 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
1c79356b 3261 }
2d21ac55
A
3262 if (*m == NULL)
3263 panic("%s: blocking allocation returned NULL\n", __func__);
3264
3265 *maxchunks = needed;
1c79356b
A
3266}
3267
3268/*
3269 * sendfile(2).
2d21ac55
A
3270 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3271 * struct sf_hdtr *hdtr, int flags)
1c79356b
A
3272 *
3273 * Send a file specified by 'fd' and starting at 'offset' to a socket
2d21ac55
A
3274 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3275 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3276 * output. If specified, write the total number of bytes sent into *nbytes.
1c79356b
A
3277 */
3278int
2d21ac55 3279sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
1c79356b 3280{
91447636 3281 struct fileproc *fp;
1c79356b 3282 struct vnode *vp;
1c79356b 3283 struct socket *so;
2d21ac55
A
3284 struct writev_nocancel_args nuap;
3285 user_ssize_t writev_retval;
2d21ac55 3286 struct user_sf_hdtr user_hdtr;
b0d623f7
A
3287 struct user32_sf_hdtr user32_hdtr;
3288 struct user64_sf_hdtr user64_hdtr;
2d21ac55
A
3289 off_t off, xfsize;
3290 off_t nbytes = 0, sbytes = 0;
3291 int error = 0;
3292 size_t sizeof_hdtr;
2d21ac55
A
3293 off_t file_size;
3294 struct vfs_context context = *vfs_context_current();
3e170ce0 3295
2d21ac55
A
3296 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3297 0, 0, 0, 0);
b0d623f7
A
3298
3299 AUDIT_ARG(fd, uap->fd);
3300 AUDIT_ARG(value32, uap->s);
3301
1c79356b
A
3302 /*
3303 * Do argument checking. Must be a regular file in, stream
3304 * type and connected socket out, positive offset.
3305 */
2d21ac55 3306 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
1c79356b 3307 goto done;
2d21ac55
A
3308 }
3309 if ((fp->f_flag & FREAD) == 0) {
91447636
A
3310 error = EBADF;
3311 goto done1;
1c79356b 3312 }
2d21ac55
A
3313 if (vnode_isreg(vp) == 0) {
3314 error = ENOTSUP;
91447636 3315 goto done1;
1c79356b 3316 }
91447636 3317 error = file_socket(uap->s, &so);
2d21ac55 3318 if (error) {
91447636 3319 goto done1;
2d21ac55 3320 }
55e303ae
A
3321 if (so == NULL) {
3322 error = EBADF;
91447636 3323 goto done2;
55e303ae 3324 }
1c79356b
A
3325 if (so->so_type != SOCK_STREAM) {
3326 error = EINVAL;
2d21ac55 3327 goto done2;
1c79356b
A
3328 }
3329 if ((so->so_state & SS_ISCONNECTED) == 0) {
3330 error = ENOTCONN;
2d21ac55 3331 goto done2;
1c79356b
A
3332 }
3333 if (uap->offset < 0) {
3334 error = EINVAL;
2d21ac55 3335 goto done2;
1c79356b 3336 }
2d21ac55
A
3337 if (uap->nbytes == USER_ADDR_NULL) {
3338 error = EINVAL;
3339 goto done2;
3340 }
3341 if (uap->flags != 0) {
3342 error = EINVAL;
3343 goto done2;
3344 }
3345
3346 context.vc_ucred = fp->f_fglob->fg_cred;
3347
3348#if CONFIG_MACF_SOCKET_SUBSET
3349 /* JMM - fetch connected sockaddr? */
3350 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3351 if (error)
3352 goto done2;
3353#endif
3354
3355 /*
3356 * Get number of bytes to send
3357 * Should it applies to size of header and trailer?
3358 * JMM - error handling?
3359 */
3360 copyin(uap->nbytes, &nbytes, sizeof (off_t));
1c79356b
A
3361
3362 /*
3363 * If specified, get the pointer to the sf_hdtr struct for
3364 * any headers/trailers.
3365 */
2d21ac55
A
3366 if (uap->hdtr != USER_ADDR_NULL) {
3367 caddr_t hdtrp;
3368
3369 bzero(&user_hdtr, sizeof (user_hdtr));
3370 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
3371 hdtrp = (caddr_t)&user64_hdtr;
3372 sizeof_hdtr = sizeof (user64_hdtr);
2d21ac55 3373 } else {
b0d623f7
A
3374 hdtrp = (caddr_t)&user32_hdtr;
3375 sizeof_hdtr = sizeof (user32_hdtr);
2d21ac55
A
3376 }
3377 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
1c79356b 3378 if (error)
2d21ac55 3379 goto done2;
b0d623f7
A
3380 if (IS_64BIT_PROCESS(p)) {
3381 user_hdtr.headers = user64_hdtr.headers;
3382 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3383 user_hdtr.trailers = user64_hdtr.trailers;
3384 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3385 } else {
3386 user_hdtr.headers = user32_hdtr.headers;
3387 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3388 user_hdtr.trailers = user32_hdtr.trailers;
3389 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2d21ac55
A
3390 }
3391
1c79356b
A
3392 /*
3393 * Send any headers. Wimp out and use writev(2).
3394 */
2d21ac55
A
3395 if (user_hdtr.headers != USER_ADDR_NULL) {
3396 bzero(&nuap, sizeof (struct writev_args));
1c79356b 3397 nuap.fd = uap->s;
2d21ac55
A
3398 nuap.iovp = user_hdtr.headers;
3399 nuap.iovcnt = user_hdtr.hdr_cnt;
3400 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3401 if (error) {
2d21ac55 3402 goto done2;
316670eb 3403 }
2d21ac55 3404 sbytes += writev_retval;
1c79356b
A
3405 }
3406 }
3407
3408 /*
2d21ac55
A
3409 * Get the file size for 2 reasons:
3410 * 1. We don't want to allocate more mbufs than necessary
3411 * 2. We don't want to read past the end of file
1c79356b 3412 */
316670eb 3413 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
2d21ac55 3414 goto done2;
316670eb 3415 }
1c79356b
A
3416
3417 /*
2d21ac55
A
3418 * Simply read file data into a chain of mbufs that used with scatter
3419 * gather reads. We're not (yet?) setup to use zero copy external
3420 * mbufs that point to the file pages.
1c79356b 3421 */
2d21ac55 3422 socket_lock(so, 1);
39236c6e 3423 error = sblock(&so->so_snd, SBL_WAIT);
2d21ac55
A
3424 if (error) {
3425 socket_unlock(so, 1);
3426 goto done2;
3427 }
1c79356b 3428 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2d21ac55 3429 mbuf_t m0 = NULL, m;
39236c6e 3430 unsigned int nbufs = SFUIOBUFS, i;
2d21ac55 3431 uio_t auio;
39236c6e 3432 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
2d21ac55
A
3433 size_t uiolen;
3434 user_ssize_t rlen;
3435 off_t pgoff;
3436 size_t pktlen;
3437 boolean_t jumbocl;
1c79356b 3438
1c79356b 3439 /*
2d21ac55
A
3440 * Calculate the amount to transfer.
3441 * Align to round number of pages.
3442 * Not to exceed send socket buffer,
1c79356b
A
3443 * the EOF, or the passed in nbytes.
3444 */
2d21ac55
A
3445 xfsize = sbspace(&so->so_snd);
3446
3447 if (xfsize <= 0) {
3448 if (so->so_state & SS_CANTSENDMORE) {
3449 error = EPIPE;
3450 goto done3;
3451 } else if ((so->so_state & SS_NBIO)) {
3452 error = EAGAIN;
3453 goto done3;
3454 } else {
3455 xfsize = PAGE_SIZE;
3456 }
3457 }
3458
3459 if (xfsize > SENDFILE_MAX_BYTES)
3460 xfsize = SENDFILE_MAX_BYTES;
3461 else if (xfsize > PAGE_SIZE)
3462 xfsize = trunc_page(xfsize);
3463 pgoff = off & PAGE_MASK_64;
3464 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
1c79356b 3465 xfsize = PAGE_SIZE_64 - pgoff;
2d21ac55
A
3466 if (nbytes && xfsize > (nbytes - sbytes))
3467 xfsize = nbytes - sbytes;
3468 if (xfsize <= 0)
3469 break;
3470 if (off + xfsize > file_size)
3471 xfsize = file_size - off;
1c79356b
A
3472 if (xfsize <= 0)
3473 break;
2d21ac55 3474
1c79356b 3475 /*
2d21ac55
A
3476 * Attempt to use larger than system page-size clusters for
3477 * large writes only if there is a jumbo cluster pool and
3478 * if the socket is marked accordingly.
1c79356b 3479 */
2d21ac55
A
3480 jumbocl = sosendjcl && njcl > 0 &&
3481 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3482
3483 socket_unlock(so, 0);
3484 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
fe8ab488 3485 pktlen = mbuf_pkthdr_maxlen(m0);
b0d623f7 3486 if (pktlen < (size_t)xfsize)
2d21ac55 3487 xfsize = pktlen;
39236c6e 3488
2d21ac55
A
3489 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3490 UIO_READ, &uio_buf[0], sizeof (uio_buf));
3491 if (auio == NULL) {
316670eb
A
3492 printf("sendfile failed. nbufs = %d. %s", nbufs,
3493 "File a radar related to rdar://10146739.\n");
2d21ac55
A
3494 mbuf_freem(m0);
3495 error = ENXIO;
3496 socket_lock(so, 0);
3497 goto done3;
1c79356b 3498 }
1c79356b 3499
2d21ac55 3500 for (i = 0, m = m0, uiolen = 0;
b0d623f7 3501 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2d21ac55
A
3502 i++, m = mbuf_next(m)) {
3503 size_t mlen = mbuf_maxlen(m);
3504
b0d623f7 3505 if (mlen + uiolen > (size_t)xfsize)
2d21ac55
A
3506 mlen = xfsize - uiolen;
3507 mbuf_setlen(m, mlen);
3508 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3509 mlen);
3510 uiolen += mlen;
3511 }
3512
3513 if (xfsize != uio_resid(auio))
3514 printf("sendfile: xfsize: %lld != uio_resid(auio): "
6d2010ae 3515 "%lld\n", xfsize, (long long)uio_resid(auio));
2d21ac55
A
3516
3517 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3518 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3519 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3520 error = fo_read(fp, auio, FOF_OFFSET, &context);
3521 socket_lock(so, 0);
3522 if (error != 0) {
3523 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3524 error == EINTR || error == EWOULDBLOCK)) {
3525 error = 0;
3526 } else {
3527 mbuf_freem(m0);
3528 goto done3;
1c79356b 3529 }
1c79356b 3530 }
2d21ac55
A
3531 xfsize -= uio_resid(auio);
3532 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3533 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3534 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3535
3536 if (xfsize == 0) {
3e170ce0 3537 // printf("sendfile: fo_read 0 bytes, EOF\n");
2d21ac55 3538 break;
91447636 3539 }
2d21ac55
A
3540 if (xfsize + off > file_size)
3541 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3542 "%lld\n", xfsize, off, file_size);
3543 for (i = 0, m = m0, rlen = 0;
3544 i < nbufs && m != NULL && rlen < xfsize;
3545 i++, m = mbuf_next(m)) {
3546 size_t mlen = mbuf_maxlen(m);
3547
b0d623f7 3548 if (rlen + mlen > (size_t)xfsize)
2d21ac55
A
3549 mlen = xfsize - rlen;
3550 mbuf_setlen(m, mlen);
3551
3552 rlen += mlen;
3553 }
3554 mbuf_pkthdr_setlen(m0, xfsize);
3555
1c79356b
A
3556retry_space:
3557 /*
3558 * Make sure that the socket is still able to take more data.
3559 * CANTSENDMORE being true usually means that the connection
3560 * was closed. so_error is true when an error was sensed after
3561 * a previous send.
3562 * The state is checked after the page mapping and buffer
3563 * allocation above since those operations may block and make
3564 * any socket checks stale. From this point forward, nothing
3565 * blocks before the pru_send (or more accurately, any blocking
3566 * results in a loop back to here to re-check).
3567 */
3568 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3569 if (so->so_state & SS_CANTSENDMORE) {
3570 error = EPIPE;
3571 } else {
3572 error = so->so_error;
3573 so->so_error = 0;
3574 }
2d21ac55
A
3575 m_freem(m0);
3576 goto done3;
1c79356b
A
3577 }
3578 /*
3579 * Wait for socket space to become available. We do this just
3580 * after checking the connection state above in order to avoid
3581 * a race condition with sbwait().
3582 */
2d21ac55 3583 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
1c79356b 3584 if (so->so_state & SS_NBIO) {
2d21ac55 3585 m_freem(m0);
1c79356b 3586 error = EAGAIN;
2d21ac55 3587 goto done3;
1c79356b 3588 }
2d21ac55
A
3589 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3590 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
1c79356b 3591 error = sbwait(&so->so_snd);
2d21ac55
A
3592 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
3593 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
1c79356b
A
3594 /*
3595 * An error from sbwait usually indicates that we've
3596 * been interrupted by a signal. If we've sent anything
3597 * then return bytes sent, otherwise return the error.
3598 */
3599 if (error) {
2d21ac55
A
3600 m_freem(m0);
3601 goto done3;
1c79356b
A
3602 }
3603 goto retry_space;
3604 }
39236c6e 3605
6d2010ae 3606 struct mbuf *control = NULL;
2d21ac55
A
3607 {
3608 /*
3609 * Socket filter processing
3610 */
2d21ac55 3611
6d2010ae
A
3612 error = sflt_data_out(so, NULL, &m0, &control, 0);
3613 if (error) {
3614 if (error == EJUSTRETURN) {
3615 error = 0;
3616 continue;
2d21ac55 3617 }
6d2010ae 3618 goto done3;
2d21ac55
A
3619 }
3620 /*
3621 * End Socket filter processing
3622 */
3623 }
3624 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3625 uap->s, 0, 0, 0, 0);
3626 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
6d2010ae 3627 0, control, p);
2d21ac55
A
3628 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3629 uap->s, 0, 0, 0, 0);
1c79356b 3630 if (error) {
2d21ac55 3631 goto done3;
1c79356b
A
3632 }
3633 }
39236c6e 3634 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
1c79356b
A
3635 /*
3636 * Send trailers. Wimp out and use writev(2).
3637 */
2d21ac55
A
3638 if (uap->hdtr != USER_ADDR_NULL &&
3639 user_hdtr.trailers != USER_ADDR_NULL) {
3640 bzero(&nuap, sizeof (struct writev_args));
3641 nuap.fd = uap->s;
3642 nuap.iovp = user_hdtr.trailers;
3643 nuap.iovcnt = user_hdtr.trl_cnt;
3644 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3645 if (error) {
2d21ac55 3646 goto done2;
316670eb 3647 }
2d21ac55 3648 sbytes += writev_retval;
1c79356b 3649 }
91447636
A
3650done2:
3651 file_drop(uap->s);
3652done1:
3653 file_drop(uap->fd);
1c79356b 3654done:
2d21ac55 3655 if (uap->nbytes != USER_ADDR_NULL) {
91447636 3656 /* XXX this appears bogus for some early failure conditions */
2d21ac55 3657 copyout(&sbytes, uap->nbytes, sizeof (off_t));
1c79356b 3658 }
2d21ac55
A
3659 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3660 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3661 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
1c79356b 3662 return (error);
91447636 3663done3:
39236c6e 3664 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
91447636 3665 goto done2;
1c79356b
A
3666}
3667
2d21ac55
A
3668
3669#endif /* SENDFILE */