]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_syscalls.c
xnu-4570.1.46.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
2d21ac55 33 * Copyright (c) 1998, David Greenman. All rights reserved.
1c79356b
A
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
2d21ac55
A
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
1c79356b
A
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
2d21ac55 77#include <sys/vnode_internal.h>
1c79356b 78#include <sys/malloc.h>
39236c6e 79#include <sys/mcache.h>
1c79356b 80#include <sys/mbuf.h>
fe8ab488 81#include <kern/locks.h>
91447636 82#include <sys/domain.h>
1c79356b 83#include <sys/protosw.h>
91447636 84#include <sys/signalvar.h>
1c79356b
A
85#include <sys/socket.h>
86#include <sys/socketvar.h>
1c79356b 87#include <sys/kernel.h>
91447636 88#include <sys/uio_internal.h>
2d21ac55 89#include <sys/kauth.h>
6d2010ae 90#include <kern/task.h>
39236c6e 91#include <sys/priv.h>
3e170ce0 92#include <sys/sysctl.h>
e5568f75 93
b0d623f7 94#include <security/audit/audit.h>
1c79356b
A
95
96#include <sys/kdebug.h>
91447636 97#include <sys/sysproto.h>
2d21ac55
A
98#include <netinet/in.h>
99#include <net/route.h>
100#include <netinet/in_pcb.h>
101
102#if CONFIG_MACF_SOCKET_SUBSET
103#include <security/mac_framework.h>
104#endif /* MAC_SOCKET_SUBSET */
105
106#define f_flag f_fglob->fg_flag
39236c6e 107#define f_type f_fglob->fg_ops->fo_type
2d21ac55
A
108#define f_msgcount f_fglob->fg_msgcount
109#define f_cred f_fglob->fg_cred
110#define f_ops f_fglob->fg_ops
111#define f_offset f_fglob->fg_offset
112#define f_data f_fglob->fg_data
113
2d21ac55
A
114#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
115#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
116#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
117#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
118#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
119#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
120#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
121#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
122#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
123#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
124#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
125#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
126#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
127#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
fe8ab488
A
128#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
129#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
2d21ac55 130
3e170ce0
A
131#if DEBUG || DEVELOPMENT
132#define DEBUG_KERNEL_ADDRPERM(_v) (_v)
133#define DBG_PRINTF(...) printf(__VA_ARGS__)
134#else
135#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
136#define DBG_PRINTF(...) do { } while (0)
137#endif
2d21ac55 138
2d21ac55
A
139/* TODO: should be in header file */
140int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
141
3e170ce0
A
142static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
143 int, int32_t *);
2d21ac55 144static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
b0d623f7 145 int32_t *);
39236c6e 146static int connectit(struct socket *, struct sockaddr *);
2d21ac55 147static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
4a3eedf9 148 size_t, boolean_t);
2d21ac55 149static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
4a3eedf9 150 user_addr_t, size_t, boolean_t);
1c79356b 151#if SENDFILE
2d21ac55
A
152static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
153 boolean_t);
154#endif /* SENDFILE */
39236c6e 155static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
813fb2f6
A
156static int connectitx(struct socket *, struct sockaddr *,
157 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
3e170ce0 158 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
39236c6e
A
159static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
160 int *);
161static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
1c79356b 162
fe8ab488 163static int internalize_user_msghdr_array(const void *, int, int, u_int,
3e170ce0 164 struct user_msghdr_x *, struct uio **);
fe8ab488 165static u_int externalize_user_msghdr_array(void *, int, int, u_int,
3e170ce0 166 const struct user_msghdr_x *, struct uio **);
fe8ab488
A
167
168static void free_uio_array(struct uio **, u_int);
169static int uio_array_is_valid(struct uio **, u_int);
3e170ce0
A
170static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
171static int internalize_recv_msghdr_array(const void *, int, int,
172 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
173static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
174 const struct user_msghdr_x *, struct recv_msg_elem *);
175static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
176static void free_recv_msg_array(struct recv_msg_elem *, u_int);
177
178SYSCTL_DECL(_kern_ipc);
179
180static u_int somaxsendmsgx = 100;
181SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
182 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
183static u_int somaxrecvmsgx = 100;
184SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
185 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
fe8ab488 186
1c79356b
A
187/*
188 * System call interface to the socket abstraction.
189 */
1c79356b 190
39236c6e 191extern const struct fileops socketops;
1c79356b 192
2d21ac55
A
193/*
194 * Returns: 0 Success
195 * EACCES Mandatory Access Control failure
196 * falloc:ENFILE
197 * falloc:EMFILE
198 * falloc:ENOMEM
199 * socreate:EAFNOSUPPORT
200 * socreate:EPROTOTYPE
201 * socreate:EPROTONOSUPPORT
202 * socreate:ENOBUFS
203 * socreate:ENOMEM
2d21ac55
A
204 * socreate:??? [other protocol families, IPSEC]
205 */
1c79356b 206int
39236c6e
A
207socket(struct proc *p,
208 struct socket_args *uap,
209 int32_t *retval)
210{
211 return (socket_common(p, uap->domain, uap->type, uap->protocol,
212 proc_selfpid(), retval, 0));
213}
214
215int
216socket_delegate(struct proc *p,
217 struct socket_delegate_args *uap,
218 int32_t *retval)
219{
220 return socket_common(p, uap->domain, uap->type, uap->protocol,
221 uap->epid, retval, 1);
222}
223
224static int
225socket_common(struct proc *p,
226 int domain,
227 int type,
228 int protocol,
229 pid_t epid,
230 int32_t *retval,
231 int delegate)
1c79356b 232{
1c79356b 233 struct socket *so;
91447636 234 struct fileproc *fp;
1c79356b
A
235 int fd, error;
236
39236c6e 237 AUDIT_ARG(socket, domain, type, protocol);
2d21ac55 238#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
239 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
240 type, protocol)) != 0)
2d21ac55
A
241 return (error);
242#endif /* MAC_SOCKET_SUBSET */
1c79356b 243
39236c6e
A
244 if (delegate) {
245 error = priv_check_cred(kauth_cred_get(),
246 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
247 if (error)
248 return (EACCES);
249 }
250
2d21ac55 251 error = falloc(p, &fp, &fd, vfs_context_current());
91447636 252 if (error) {
1c79356b 253 return (error);
91447636 254 }
1c79356b 255 fp->f_flag = FREAD|FWRITE;
1c79356b 256 fp->f_ops = &socketops;
91447636 257
39236c6e
A
258 if (delegate)
259 error = socreate_delegate(domain, &so, type, protocol, epid);
260 else
261 error = socreate(domain, &so, type, protocol);
262
91447636
A
263 if (error) {
264 fp_free(p, fd, fp);
1c79356b
A
265 } else {
266 fp->f_data = (caddr_t)so;
91447636
A
267
268 proc_fdlock(p);
6601e61a 269 procfdtbl_releasefd(p, fd, NULL);
2d21ac55 270
91447636
A
271 fp_drop(p, fd, fp, 1);
272 proc_fdunlock(p);
273
1c79356b 274 *retval = fd;
3e170ce0
A
275 if (ENTR_SHOULDTRACE) {
276 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
277 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
278 }
1c79356b
A
279 }
280 return (error);
281}
282
2d21ac55
A
283/*
284 * Returns: 0 Success
285 * EDESTADDRREQ Destination address required
286 * EBADF Bad file descriptor
287 * EACCES Mandatory Access Control failure
288 * file_socket:ENOTSOCK
289 * file_socket:EBADF
290 * getsockaddr:ENAMETOOLONG Filename too long
291 * getsockaddr:EINVAL Invalid argument
292 * getsockaddr:ENOMEM Not enough space
293 * getsockaddr:EFAULT Bad address
39236c6e 294 * sobindlock:???
2d21ac55 295 */
1c79356b
A
296/* ARGSUSED */
297int
b0d623f7 298bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
1c79356b 299{
2d21ac55
A
300 struct sockaddr_storage ss;
301 struct sockaddr *sa = NULL;
91447636 302 struct socket *so;
2d21ac55 303 boolean_t want_free = TRUE;
1c79356b
A
304 int error;
305
55e303ae 306 AUDIT_ARG(fd, uap->s);
91447636 307 error = file_socket(uap->s, &so);
2d21ac55 308 if (error != 0)
1c79356b 309 return (error);
2d21ac55
A
310 if (so == NULL) {
311 error = EBADF;
312 goto out;
313 }
314 if (uap->name == USER_ADDR_NULL) {
315 error = EDESTADDRREQ;
316 goto out;
317 }
318 if (uap->namelen > sizeof (ss)) {
4a3eedf9 319 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
2d21ac55 320 } else {
4a3eedf9 321 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
2d21ac55
A
322 if (error == 0) {
323 sa = (struct sockaddr *)&ss;
324 want_free = FALSE;
325 }
326 }
327 if (error != 0)
91447636 328 goto out;
2d21ac55
A
329 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
330#if CONFIG_MACF_SOCKET_SUBSET
331 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
39236c6e 332 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55 333#else
39236c6e 334 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55
A
335#endif /* MAC_SOCKET_SUBSET */
336 if (want_free)
337 FREE(sa, M_SONAME);
91447636
A
338out:
339 file_drop(uap->s);
1c79356b
A
340 return (error);
341}
342
2d21ac55
A
343/*
344 * Returns: 0 Success
345 * EBADF
346 * EACCES Mandatory Access Control failure
347 * file_socket:ENOTSOCK
348 * file_socket:EBADF
349 * solisten:EINVAL
350 * solisten:EOPNOTSUPP
351 * solisten:???
352 */
1c79356b 353int
2d21ac55 354listen(__unused struct proc *p, struct listen_args *uap,
b0d623f7 355 __unused int32_t *retval)
1c79356b 356{
1c79356b 357 int error;
2d21ac55 358 struct socket *so;
1c79356b 359
55e303ae 360 AUDIT_ARG(fd, uap->s);
91447636 361 error = file_socket(uap->s, &so);
1c79356b
A
362 if (error)
363 return (error);
91447636 364 if (so != NULL)
2d21ac55
A
365#if CONFIG_MACF_SOCKET_SUBSET
366 {
367 error = mac_socket_check_listen(kauth_cred_get(), so);
368 if (error == 0)
369 error = solisten(so, uap->backlog);
370 }
371#else
91447636 372 error = solisten(so, uap->backlog);
2d21ac55 373#endif /* MAC_SOCKET_SUBSET */
55e303ae 374 else
91447636 375 error = EBADF;
2d21ac55 376
91447636
A
377 file_drop(uap->s);
378 return (error);
1c79356b
A
379}
380
2d21ac55
A
381/*
382 * Returns: fp_getfsock:EBADF Bad file descriptor
383 * fp_getfsock:EOPNOTSUPP ...
384 * xlate => :ENOTSOCK Socket operation on non-socket
385 * :EFAULT Bad address on copyin/copyout
386 * :EBADF Bad file descriptor
387 * :EOPNOTSUPP Operation not supported on socket
388 * :EINVAL Invalid argument
389 * :EWOULDBLOCK Operation would block
390 * :ECONNABORTED Connection aborted
391 * :EINTR Interrupted function
392 * :EACCES Mandatory Access Control failure
393 * falloc_locked:ENFILE Too many files open in system
394 * falloc_locked::EMFILE Too many open files
395 * falloc_locked::ENOMEM Not enough space
396 * 0 Success
397 */
1c79356b 398int
2d21ac55 399accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
b0d623f7 400 int32_t *retval)
1c79356b 401{
91447636 402 struct fileproc *fp;
2d21ac55 403 struct sockaddr *sa = NULL;
91447636
A
404 socklen_t namelen;
405 int error;
406 struct socket *head, *so = NULL;
407 lck_mtx_t *mutex_held;
408 int fd = uap->s;
2d21ac55 409 int newfd;
1c79356b 410 short fflag; /* type must match fp->f_flag */
91447636 411 int dosocklock = 0;
1c79356b 412
2d21ac55
A
413 *retval = -1;
414
55e303ae 415 AUDIT_ARG(fd, uap->s);
2d21ac55 416
1c79356b 417 if (uap->name) {
91447636 418 error = copyin(uap->anamelen, (caddr_t)&namelen,
2d21ac55
A
419 sizeof (socklen_t));
420 if (error)
1c79356b
A
421 return (error);
422 }
91447636
A
423 error = fp_getfsock(p, fd, &fp, &head);
424 if (error) {
425 if (error == EOPNOTSUPP)
426 error = ENOTSOCK;
1c79356b 427 return (error);
91447636 428 }
55e303ae 429 if (head == NULL) {
91447636
A
430 error = EBADF;
431 goto out;
55e303ae 432 }
2d21ac55
A
433#if CONFIG_MACF_SOCKET_SUBSET
434 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
435 goto out;
436#endif /* MAC_SOCKET_SUBSET */
91447636
A
437
438 socket_lock(head, 1);
439
440 if (head->so_proto->pr_getlock != NULL) {
5ba3f43e 441 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
91447636 442 dosocklock = 1;
2d21ac55 443 } else {
91447636
A
444 mutex_held = head->so_proto->pr_domain->dom_mtx;
445 dosocklock = 0;
446 }
447
1c79356b 448 if ((head->so_options & SO_ACCEPTCONN) == 0) {
2d21ac55
A
449 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
450 error = EOPNOTSUPP;
451 } else {
452 /* POSIX: The socket is not accepting connections */
453 error = EINVAL;
454 }
91447636 455 socket_unlock(head, 1);
91447636 456 goto out;
1c79356b 457 }
813fb2f6 458check_again:
1c79356b 459 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
91447636
A
460 socket_unlock(head, 1);
461 error = EWOULDBLOCK;
462 goto out;
1c79356b 463 }
2d21ac55 464 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
1c79356b
A
465 if (head->so_state & SS_CANTRCVMORE) {
466 head->so_error = ECONNABORTED;
467 break;
468 }
91447636 469 if (head->so_usecount < 1)
2d21ac55
A
470 panic("accept: head=%p refcount=%d\n", head,
471 head->so_usecount);
472 error = msleep((caddr_t)&head->so_timeo, mutex_held,
473 PSOCK | PCATCH, "accept", 0);
91447636 474 if (head->so_usecount < 1)
2d21ac55
A
475 panic("accept: 2 head=%p refcount=%d\n", head,
476 head->so_usecount);
91447636
A
477 if ((head->so_state & SS_DRAINING)) {
478 error = ECONNABORTED;
479 }
1c79356b 480 if (error) {
91447636
A
481 socket_unlock(head, 1);
482 goto out;
1c79356b
A
483 }
484 }
485 if (head->so_error) {
486 error = head->so_error;
487 head->so_error = 0;
91447636
A
488 socket_unlock(head, 1);
489 goto out;
1c79356b
A
490 }
491
1c79356b
A
492 /*
493 * At this point we know that there is at least one connection
494 * ready to be accepted. Remove it from the queue prior to
495 * allocating the file descriptor for it since falloc() may
496 * block allowing another process to accept the connection
497 * instead.
498 */
91447636 499 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
813fb2f6
A
500
501 so_acquire_accept_list(head, NULL);
502 if (TAILQ_EMPTY(&head->so_comp)) {
503 so_release_accept_list(head);
504 goto check_again;
505 }
506
e3027f41 507 so = TAILQ_FIRST(&head->so_comp);
1c79356b 508 TAILQ_REMOVE(&head->so_comp, so, so_list);
d190cdc3
A
509 so->so_head = NULL;
510 so->so_state &= ~SS_COMP;
1c79356b 511 head->so_qlen--;
813fb2f6
A
512 so_release_accept_list(head);
513
2d21ac55
A
514 /* unlock head to avoid deadlock with select, keep a ref on head */
515 socket_unlock(head, 0);
516
517#if CONFIG_MACF_SOCKET_SUBSET
518 /*
519 * Pass the pre-accepted socket to the MAC framework. This is
520 * cheaper than allocating a file descriptor for the socket,
521 * calling the protocol accept callback, and possibly freeing
522 * the file descriptor should the MAC check fails.
523 */
524 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
39236c6e 525 socket_lock(so, 1);
d190cdc3 526 so->so_state &= ~SS_NOFDREF;
39236c6e 527 socket_unlock(so, 1);
2d21ac55
A
528 soclose(so);
529 /* Drop reference on listening socket */
530 sodereference(head);
531 goto out;
532 }
533#endif /* MAC_SOCKET_SUBSET */
534
535 /*
536 * Pass the pre-accepted socket to any interested socket filter(s).
537 * Upon failure, the socket would have been closed by the callee.
538 */
d190cdc3 539 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
2d21ac55
A
540 /* Drop reference on listening socket */
541 sodereference(head);
542 /* Propagate socket filter's error code to the caller */
543 goto out;
544 }
545
1c79356b 546 fflag = fp->f_flag;
2d21ac55 547 error = falloc(p, &fp, &newfd, vfs_context_current());
1c79356b 548 if (error) {
39236c6e 549 /*
316670eb
A
550 * Probably ran out of file descriptors.
551 *
552 * <rdar://problem/8554930>
553 * Don't put this back on the socket like we used to, that
554 * just causes the client to spin. Drop the socket.
1c79356b 555 */
39236c6e 556 socket_lock(so, 1);
d190cdc3 557 so->so_state &= ~SS_NOFDREF;
39236c6e 558 socket_unlock(so, 1);
316670eb
A
559 soclose(so);
560 sodereference(head);
91447636 561 goto out;
2d21ac55 562 }
91447636 563 *retval = newfd;
1c79356b
A
564 fp->f_flag = fflag;
565 fp->f_ops = &socketops;
566 fp->f_data = (caddr_t)so;
fe8ab488 567
91447636
A
568 socket_lock(head, 0);
569 if (dosocklock)
570 socket_lock(so, 1);
fe8ab488 571
fe8ab488
A
572 /* Sync socket non-blocking/async state with file flags */
573 if (fp->f_flag & FNONBLOCK) {
574 so->so_state |= SS_NBIO;
575 } else {
576 so->so_state &= ~SS_NBIO;
577 }
578
579 if (fp->f_flag & FASYNC) {
580 so->so_state |= SS_ASYNC;
581 so->so_rcv.sb_flags |= SB_ASYNC;
582 so->so_snd.sb_flags |= SB_ASYNC;
583 } else {
584 so->so_state &= ~SS_ASYNC;
585 so->so_rcv.sb_flags &= ~SB_ASYNC;
586 so->so_snd.sb_flags &= ~SB_ASYNC;
587 }
588
91447636
A
589 (void) soacceptlock(so, &sa, 0);
590 socket_unlock(head, 1);
2d21ac55 591 if (sa == NULL) {
1c79356b
A
592 namelen = 0;
593 if (uap->name)
594 goto gotnoname;
91447636 595 error = 0;
2d21ac55 596 goto releasefd;
1c79356b 597 }
2d21ac55
A
598 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
599
1c79356b 600 if (uap->name) {
2d21ac55
A
601 socklen_t sa_len;
602
603 /* save sa_len before it is destroyed */
604 sa_len = sa->sa_len;
605 namelen = MIN(namelen, sa_len);
91447636 606 error = copyout(sa, uap->name, namelen);
1c79356b 607 if (!error)
2d21ac55
A
608 /* return the actual, untruncated address length */
609 namelen = sa_len;
1c79356b 610gotnoname:
2d21ac55
A
611 error = copyout((caddr_t)&namelen, uap->anamelen,
612 sizeof (socklen_t));
1c79356b
A
613 }
614 FREE(sa, M_SONAME);
2d21ac55 615
b0d623f7 616releasefd:
2d21ac55 617 /*
6d2010ae
A
618 * If the socket has been marked as inactive by sosetdefunct(),
619 * disallow further operations on it.
2d21ac55
A
620 */
621 if (so->so_flags & SOF_DEFUNCT) {
6d2010ae
A
622 sodefunct(current_proc(), so,
623 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
2d21ac55
A
624 }
625
91447636
A
626 if (dosocklock)
627 socket_unlock(so, 1);
2d21ac55 628
2d21ac55
A
629 proc_fdlock(p);
630 procfdtbl_releasefd(p, newfd, NULL);
631 fp_drop(p, newfd, fp, 1);
632 proc_fdunlock(p);
633
91447636
A
634out:
635 file_drop(fd);
3e170ce0
A
636
637 if (error == 0 && ENTR_SHOULDTRACE) {
638 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
639 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
640 }
1c79356b
A
641 return (error);
642}
643
644int
b0d623f7 645accept(struct proc *p, struct accept_args *uap, int32_t *retval)
1c79356b 646{
2d21ac55 647 __pthread_testcancel(1);
3e170ce0
A
648 return (accept_nocancel(p, (struct accept_nocancel_args *)uap,
649 retval));
1c79356b
A
650}
651
2d21ac55
A
652/*
653 * Returns: 0 Success
654 * EBADF Bad file descriptor
655 * EALREADY Connection already in progress
656 * EINPROGRESS Operation in progress
657 * ECONNABORTED Connection aborted
658 * EINTR Interrupted function
659 * EACCES Mandatory Access Control failure
660 * file_socket:ENOTSOCK
661 * file_socket:EBADF
662 * getsockaddr:ENAMETOOLONG Filename too long
663 * getsockaddr:EINVAL Invalid argument
664 * getsockaddr:ENOMEM Not enough space
665 * getsockaddr:EFAULT Bad address
666 * soconnectlock:EOPNOTSUPP
667 * soconnectlock:EISCONN
668 * soconnectlock:??? [depends on protocol, filters]
669 * msleep:EINTR
670 *
671 * Imputed: so_error error may be set from so_error, which
672 * may have been set by soconnectlock.
673 */
674/* ARGSUSED */
1c79356b 675int
b0d623f7 676connect(struct proc *p, struct connect_args *uap, int32_t *retval)
1c79356b 677{
2d21ac55 678 __pthread_testcancel(1);
3e170ce0
A
679 return (connect_nocancel(p, (struct connect_nocancel_args *)uap,
680 retval));
1c79356b 681}
1c79356b 682
1c79356b 683int
39236c6e 684connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
1c79356b 685{
39236c6e 686#pragma unused(p, retval)
91447636 687 struct socket *so;
2d21ac55
A
688 struct sockaddr_storage ss;
689 struct sockaddr *sa = NULL;
91447636
A
690 int error;
691 int fd = uap->s;
4a3eedf9 692 boolean_t dgram;
1c79356b 693
55e303ae 694 AUDIT_ARG(fd, uap->s);
2d21ac55
A
695 error = file_socket(fd, &so);
696 if (error != 0)
1c79356b 697 return (error);
91447636
A
698 if (so == NULL) {
699 error = EBADF;
700 goto out;
701 }
702
4a3eedf9
A
703 /*
704 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
705 * if this is a datagram socket; translate for other types.
706 */
707 dgram = (so->so_type == SOCK_DGRAM);
708
2d21ac55
A
709 /* Get socket address now before we obtain socket lock */
710 if (uap->namelen > sizeof (ss)) {
4a3eedf9 711 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
2d21ac55 712 } else {
4a3eedf9 713 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
39236c6e 714 if (error == 0)
2d21ac55 715 sa = (struct sockaddr *)&ss;
2d21ac55
A
716 }
717 if (error != 0)
718 goto out;
719
39236c6e
A
720 error = connectit(so, sa);
721
722 if (sa != NULL && sa != SA(&ss))
723 FREE(sa, M_SONAME);
724 if (error == ERESTART)
725 error = EINTR;
726out:
727 file_drop(fd);
728 return (error);
729}
730
731static int
732connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
733{
734#pragma unused(p, retval)
813fb2f6
A
735 struct sockaddr_storage ss, sd;
736 struct sockaddr *src = NULL, *dst = NULL;
39236c6e 737 struct socket *so;
3e170ce0 738 int error, error1, fd = uap->socket;
39236c6e 739 boolean_t dgram;
3e170ce0
A
740 sae_connid_t cid = SAE_CONNID_ANY;
741 struct user32_sa_endpoints ep32;
742 struct user64_sa_endpoints ep64;
743 struct user_sa_endpoints ep;
744 user_ssize_t bytes_written = 0;
745 struct user_iovec *iovp;
746 uio_t auio = NULL;
39236c6e 747
3e170ce0 748 AUDIT_ARG(fd, uap->socket);
39236c6e
A
749 error = file_socket(fd, &so);
750 if (error != 0)
751 return (error);
752 if (so == NULL) {
753 error = EBADF;
754 goto out;
755 }
756
3e170ce0
A
757 if (uap->endpoints == USER_ADDR_NULL) {
758 error = EINVAL;
759 goto out;
760 }
761
762 if (IS_64BIT_PROCESS(p)) {
763 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
764 if (error != 0)
765 goto out;
766
767 ep.sae_srcif = ep64.sae_srcif;
768 ep.sae_srcaddr = ep64.sae_srcaddr;
769 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
770 ep.sae_dstaddr = ep64.sae_dstaddr;
771 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
772 } else {
773 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
774 if (error != 0)
775 goto out;
776
777 ep.sae_srcif = ep32.sae_srcif;
778 ep.sae_srcaddr = ep32.sae_srcaddr;
779 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
780 ep.sae_dstaddr = ep32.sae_dstaddr;
781 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
782 }
fe8ab488 783
39236c6e
A
784 /*
785 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
786 * if this is a datagram socket; translate for other types.
787 */
788 dgram = (so->so_type == SOCK_DGRAM);
789
813fb2f6
A
790 /* Get socket address now before we obtain socket lock */
791 if (ep.sae_srcaddr != USER_ADDR_NULL) {
792 if (ep.sae_srcaddrlen > sizeof (ss)) {
793 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
794 } else {
795 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
796 if (error == 0)
797 src = (struct sockaddr *)&ss;
798 }
799
800 if (error)
801 goto out;
802 }
39236c6e 803
3e170ce0
A
804 if (ep.sae_dstaddr == USER_ADDR_NULL) {
805 error = EINVAL;
806 goto out;
807 }
808
813fb2f6
A
809 /* Get socket address now before we obtain socket lock */
810 if (ep.sae_dstaddrlen > sizeof (sd)) {
811 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
812 } else {
813 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
814 if (error == 0)
815 dst = (struct sockaddr *)&sd;
816 }
817
818 if (error)
39236c6e
A
819 goto out;
820
813fb2f6 821 VERIFY(dst != NULL);
39236c6e 822
3e170ce0
A
823 if (uap->iov != USER_ADDR_NULL) {
824 /* Verify range before calling uio_create() */
825 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
826 return (EINVAL);
827
828 if (uap->len == USER_ADDR_NULL)
829 return (EINVAL);
830
831 /* allocate a uio to hold the number of iovecs passed */
832 auio = uio_create(uap->iovcnt, 0,
833 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
834 UIO_WRITE);
835
836 if (auio == NULL) {
837 error = ENOMEM;
838 goto out;
839 }
840
841 /*
842 * get location of iovecs within the uio.
843 * then copyin the iovecs from user space.
844 */
845 iovp = uio_iovsaddr(auio);
846 if (iovp == NULL) {
847 error = ENOMEM;
848 goto out;
849 }
850 error = copyin_user_iovec_array(uap->iov,
851 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
852 uap->iovcnt, iovp);
853 if (error != 0)
854 goto out;
855
856 /* finish setup of uio_t */
857 error = uio_calculateresid(auio);
858 if (error != 0) {
859 goto out;
860 }
861 }
862
813fb2f6 863 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
3e170ce0 864 &cid, auio, uap->flags, &bytes_written);
39236c6e
A
865 if (error == ERESTART)
866 error = EINTR;
867
3e170ce0
A
868 if (uap->len != USER_ADDR_NULL) {
869 error1 = copyout(&bytes_written, uap->len, sizeof (uap->len));
870 /* give precedence to connectitx errors */
871 if ((error1 != 0) && (error == 0))
872 error = error1;
873 }
39236c6e 874
3e170ce0
A
875 if (uap->connid != USER_ADDR_NULL) {
876 error1 = copyout(&cid, uap->connid, sizeof (cid));
877 /* give precedence to connectitx errors */
878 if ((error1 != 0) && (error == 0))
879 error = error1;
880 }
39236c6e
A
881out:
882 file_drop(fd);
3e170ce0
A
883 if (auio != NULL) {
884 uio_free(auio);
885 }
813fb2f6
A
886 if (src != NULL && src != SA(&ss))
887 FREE(src, M_SONAME);
888 if (dst != NULL && dst != SA(&sd))
889 FREE(dst, M_SONAME);
39236c6e
A
890 return (error);
891}
892
893int
894connectx(struct proc *p, struct connectx_args *uap, int *retval)
895{
896 /*
897 * Due to similiarity with a POSIX interface, define as
898 * an unofficial cancellation point.
899 */
900 __pthread_testcancel(1);
901 return (connectx_nocancel(p, uap, retval));
902}
903
904static int
905connectit(struct socket *so, struct sockaddr *sa)
906{
907 int error;
908
2d21ac55
A
909 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
910#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
911 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0)
912 return (error);
913#endif /* MAC_SOCKET_SUBSET */
914
915 socket_lock(so, 1);
916 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
917 error = EALREADY;
918 goto out;
919 }
920 error = soconnectlock(so, sa, 0);
921 if (error != 0) {
922 so->so_state &= ~SS_ISCONNECTING;
2d21ac55
A
923 goto out;
924 }
39236c6e
A
925 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
926 error = EINPROGRESS;
927 goto out;
928 }
929 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
930 lck_mtx_t *mutex_held;
931
932 if (so->so_proto->pr_getlock != NULL)
5ba3f43e 933 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
39236c6e
A
934 else
935 mutex_held = so->so_proto->pr_domain->dom_mtx;
936 error = msleep((caddr_t)&so->so_timeo, mutex_held,
937 PSOCK | PCATCH, __func__, 0);
938 if (so->so_state & SS_DRAINING) {
939 error = ECONNABORTED;
940 }
941 if (error != 0)
942 break;
943 }
944 if (error == 0) {
945 error = so->so_error;
946 so->so_error = 0;
947 }
948out:
949 socket_unlock(so, 1);
950 return (error);
951}
952
953static int
813fb2f6
A
954connectitx(struct socket *so, struct sockaddr *src,
955 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3e170ce0
A
956 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
957 user_ssize_t *bytes_written)
39236c6e 958{
39236c6e 959 int error;
3e170ce0 960#pragma unused (flags)
39236c6e 961
813fb2f6 962 VERIFY(dst != NULL);
39236c6e 963
813fb2f6 964 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
39236c6e 965#if CONFIG_MACF_SOCKET_SUBSET
813fb2f6
A
966 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0)
967 return (error);
2d21ac55 968#endif /* MAC_SOCKET_SUBSET */
91447636 969
39236c6e 970 socket_lock(so, 1);
91447636 971 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
972 error = EALREADY;
973 goto out;
974 }
3e170ce0
A
975
976 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
813fb2f6 977 (flags & CONNECT_DATA_IDEMPOTENT)) {
3e170ce0
A
978 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
979
813fb2f6 980 if (flags & CONNECT_DATA_AUTHENTICATED)
5ba3f43e 981 so->so_flags1 |= SOF1_DATA_AUTHENTICATED;
813fb2f6
A
982 }
983
3e170ce0
A
984 /*
985 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
986 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
987 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
988 * Case 3 allows user to combine write with connect even if they have
989 * no use for TFO (such as regular TCP, and UDP).
990 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
991 */
992 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
993 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio))
994 so->so_flags1 |= SOF1_PRECONNECT_DATA;
995
996 /*
997 * If a user sets data idempotent and does not pass an uio, or
998 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
999 * SOF1_DATA_IDEMPOTENT.
1000 */
1001 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1002 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1003 /* We should return EINVAL instead perhaps. */
1004 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1005 }
1006
813fb2f6 1007 error = soconnectxlocked(so, src, dst, p, ifscope,
3e170ce0 1008 aid, pcid, 0, NULL, 0, auio, bytes_written);
39236c6e
A
1009 if (error != 0) {
1010 so->so_state &= ~SS_ISCONNECTING;
1011 goto out;
1012 }
3e170ce0
A
1013 /*
1014 * If, after the call to soconnectxlocked the flag is still set (in case
1015 * data has been queued and the connect() has actually been triggered,
1016 * it will have been unset by the transport), we exit immediately. There
1017 * is no reason to wait on any event.
1018 */
1019 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1020 error = 0;
1021 goto out;
1022 }
1c79356b 1023 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
1024 error = EINPROGRESS;
1025 goto out;
1c79356b 1026 }
1c79356b 1027 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
39236c6e
A
1028 lck_mtx_t *mutex_held;
1029
2d21ac55 1030 if (so->so_proto->pr_getlock != NULL)
5ba3f43e 1031 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2d21ac55 1032 else
91447636 1033 mutex_held = so->so_proto->pr_domain->dom_mtx;
2d21ac55 1034 error = msleep((caddr_t)&so->so_timeo, mutex_held,
39236c6e
A
1035 PSOCK | PCATCH, __func__, 0);
1036 if (so->so_state & SS_DRAINING) {
91447636
A
1037 error = ECONNABORTED;
1038 }
39236c6e 1039 if (error != 0)
1c79356b
A
1040 break;
1041 }
1042 if (error == 0) {
1043 error = so->so_error;
1044 so->so_error = 0;
1045 }
39236c6e 1046out:
91447636 1047 socket_unlock(so, 1);
39236c6e
A
1048 return (error);
1049}
1050
1051int
1052peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1053{
5ba3f43e 1054#pragma unused(p, uap, retval)
39236c6e
A
1055 /*
1056 * Due to similiarity with a POSIX interface, define as
1057 * an unofficial cancellation point.
1058 */
1059 __pthread_testcancel(1);
5ba3f43e 1060 return (0);
39236c6e
A
1061}
1062
1063int
1064disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1065{
1066 /*
1067 * Due to similiarity with a POSIX interface, define as
1068 * an unofficial cancellation point.
1069 */
1070 __pthread_testcancel(1);
1071 return (disconnectx_nocancel(p, uap, retval));
1072}
1073
1074static int
1075disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1076{
1077#pragma unused(p, retval)
1078 struct socket *so;
1079 int fd = uap->s;
1080 int error;
1081
1082 error = file_socket(fd, &so);
1083 if (error != 0)
1084 return (error);
1085 if (so == NULL) {
1086 error = EBADF;
1087 goto out;
1088 }
1089
1090 error = sodisconnectx(so, uap->aid, uap->cid);
91447636
A
1091out:
1092 file_drop(fd);
1c79356b
A
1093 return (error);
1094}
1095
2d21ac55
A
1096/*
1097 * Returns: 0 Success
1098 * socreate:EAFNOSUPPORT
1099 * socreate:EPROTOTYPE
1100 * socreate:EPROTONOSUPPORT
1101 * socreate:ENOBUFS
1102 * socreate:ENOMEM
1103 * socreate:EISCONN
1104 * socreate:??? [other protocol families, IPSEC]
1105 * falloc:ENFILE
1106 * falloc:EMFILE
1107 * falloc:ENOMEM
1108 * copyout:EFAULT
1109 * soconnect2:EINVAL
1110 * soconnect2:EPROTOTYPE
1111 * soconnect2:??? [other protocol families[
1112 */
1c79356b 1113int
2d21ac55 1114socketpair(struct proc *p, struct socketpair_args *uap,
b0d623f7 1115 __unused int32_t *retval)
1c79356b 1116{
91447636 1117 struct fileproc *fp1, *fp2;
1c79356b
A
1118 struct socket *so1, *so2;
1119 int fd, error, sv[2];
1120
55e303ae 1121 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1c79356b
A
1122 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1123 if (error)
1124 return (error);
1125 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1126 if (error)
1127 goto free1;
91447636 1128
2d21ac55 1129 error = falloc(p, &fp1, &fd, vfs_context_current());
91447636 1130 if (error) {
1c79356b 1131 goto free2;
91447636 1132 }
1c79356b 1133 fp1->f_flag = FREAD|FWRITE;
1c79356b
A
1134 fp1->f_ops = &socketops;
1135 fp1->f_data = (caddr_t)so1;
91447636
A
1136 sv[0] = fd;
1137
2d21ac55 1138 error = falloc(p, &fp2, &fd, vfs_context_current());
91447636 1139 if (error) {
1c79356b 1140 goto free3;
91447636 1141 }
1c79356b 1142 fp2->f_flag = FREAD|FWRITE;
1c79356b
A
1143 fp2->f_ops = &socketops;
1144 fp2->f_data = (caddr_t)so2;
1145 sv[1] = fd;
91447636 1146
1c79356b
A
1147 error = soconnect2(so1, so2);
1148 if (error) {
1c79356b
A
1149 goto free4;
1150 }
1c79356b
A
1151 if (uap->type == SOCK_DGRAM) {
1152 /*
1153 * Datagram socket connection is asymmetric.
1154 */
2d21ac55
A
1155 error = soconnect2(so2, so1);
1156 if (error) {
1157 goto free4;
1158 }
1c79356b 1159 }
91447636 1160
6d2010ae
A
1161 if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
1162 goto free4;
1163
91447636 1164 proc_fdlock(p);
6601e61a
A
1165 procfdtbl_releasefd(p, sv[0], NULL);
1166 procfdtbl_releasefd(p, sv[1], NULL);
91447636
A
1167 fp_drop(p, sv[0], fp1, 1);
1168 fp_drop(p, sv[1], fp2, 1);
1169 proc_fdunlock(p);
1170
6d2010ae 1171 return (0);
1c79356b 1172free4:
91447636 1173 fp_free(p, sv[1], fp2);
1c79356b 1174free3:
91447636 1175 fp_free(p, sv[0], fp1);
1c79356b 1176free2:
2d21ac55 1177 (void) soclose(so2);
1c79356b 1178free1:
2d21ac55 1179 (void) soclose(so1);
1c79356b
A
1180 return (error);
1181}
1182
2d21ac55
A
1183/*
1184 * Returns: 0 Success
1185 * EINVAL
1186 * ENOBUFS
1187 * EBADF
1188 * EPIPE
1189 * EACCES Mandatory Access Control failure
1190 * file_socket:ENOTSOCK
1191 * file_socket:EBADF
1192 * getsockaddr:ENAMETOOLONG Filename too long
1193 * getsockaddr:EINVAL Invalid argument
1194 * getsockaddr:ENOMEM Not enough space
1195 * getsockaddr:EFAULT Bad address
1196 * <pru_sosend>:EACCES[TCP]
1197 * <pru_sosend>:EADDRINUSE[TCP]
1198 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1199 * <pru_sosend>:EAFNOSUPPORT[TCP]
1200 * <pru_sosend>:EAGAIN[TCP]
1201 * <pru_sosend>:EBADF
1202 * <pru_sosend>:ECONNRESET[TCP]
1203 * <pru_sosend>:EFAULT
1204 * <pru_sosend>:EHOSTUNREACH[TCP]
1205 * <pru_sosend>:EINTR
1206 * <pru_sosend>:EINVAL
1207 * <pru_sosend>:EISCONN[AF_INET]
1208 * <pru_sosend>:EMSGSIZE[TCP]
1209 * <pru_sosend>:ENETDOWN[TCP]
1210 * <pru_sosend>:ENETUNREACH[TCP]
1211 * <pru_sosend>:ENOBUFS
1212 * <pru_sosend>:ENOMEM[TCP]
1213 * <pru_sosend>:ENOTCONN[AF_INET]
1214 * <pru_sosend>:EOPNOTSUPP
1215 * <pru_sosend>:EPERM[TCP]
1216 * <pru_sosend>:EPIPE
1217 * <pru_sosend>:EWOULDBLOCK
1218 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1219 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1220 * <pru_sosend>:??? [value from so_error]
1221 * sockargs:???
1222 */
1c79356b 1223static int
3e170ce0 1224sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1225 int flags, int32_t *retval)
1c79356b 1226{
2d21ac55
A
1227 struct mbuf *control = NULL;
1228 struct sockaddr_storage ss;
1229 struct sockaddr *to = NULL;
1230 boolean_t want_free = TRUE;
91447636 1231 int error;
91447636 1232 user_ssize_t len;
2d21ac55
A
1233
1234 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b 1235
2d21ac55
A
1236 if (mp->msg_name != USER_ADDR_NULL) {
1237 if (mp->msg_namelen > sizeof (ss)) {
1238 error = getsockaddr(so, &to, mp->msg_name,
4a3eedf9 1239 mp->msg_namelen, TRUE);
2d21ac55
A
1240 } else {
1241 error = getsockaddr_s(so, &ss, mp->msg_name,
4a3eedf9 1242 mp->msg_namelen, TRUE);
2d21ac55
A
1243 if (error == 0) {
1244 to = (struct sockaddr *)&ss;
1245 want_free = FALSE;
1246 }
1c79356b 1247 }
2d21ac55
A
1248 if (error != 0)
1249 goto out;
1250 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
91447636 1251 }
2d21ac55
A
1252 if (mp->msg_control != USER_ADDR_NULL) {
1253 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
1c79356b
A
1254 error = EINVAL;
1255 goto bad;
1256 }
1257 error = sockargs(&control, mp->msg_control,
1258 mp->msg_controllen, MT_CONTROL);
2d21ac55 1259 if (error != 0)
1c79356b 1260 goto bad;
91447636 1261 }
1c79356b 1262
2d21ac55
A
1263#if CONFIG_MACF_SOCKET_SUBSET
1264 /*
1265 * We check the state without holding the socket lock;
1266 * if a race condition occurs, it would simply result
3e170ce0 1267 * in an extra call to the MAC check function.
2d21ac55 1268 */
3e170ce0 1269 if (to != NULL &&
316670eb 1270 !(so->so_state & SS_DEFUNCT) &&
2d21ac55
A
1271 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
1272 goto bad;
1273#endif /* MAC_SOCKET_SUBSET */
91447636
A
1274
1275 len = uio_resid(uiop);
39236c6e
A
1276 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1277 control, flags);
2d21ac55 1278 if (error != 0) {
91447636 1279 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1280 error == EINTR || error == EWOULDBLOCK))
1281 error = 0;
2d21ac55 1282 /* Generation of SIGPIPE can be controlled per socket */
9bccf70c 1283 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1c79356b
A
1284 psignal(p, SIGPIPE);
1285 }
1286 if (error == 0)
91447636
A
1287 *retval = (int)(len - uio_resid(uiop));
1288bad:
2d21ac55 1289 if (to != NULL && want_free)
1c79356b 1290 FREE(to, M_SONAME);
91447636 1291out:
2d21ac55 1292 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 1293
1c79356b
A
1294 return (error);
1295}
1296
2d21ac55
A
1297/*
1298 * Returns: 0 Success
1299 * ENOMEM
1300 * sendit:??? [see sendit definition in this file]
1301 * write:??? [4056224: applicable for pipes]
1302 */
1c79356b 1303int
b0d623f7 1304sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
2d21ac55
A
1305{
1306 __pthread_testcancel(1);
39236c6e 1307 return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
2d21ac55
A
1308}
1309
1310int
39236c6e
A
1311sendto_nocancel(struct proc *p,
1312 struct sendto_nocancel_args *uap,
1313 int32_t *retval)
1c79356b 1314{
91447636
A
1315 struct user_msghdr msg;
1316 int error;
1317 uio_t auio = NULL;
3e170ce0 1318 struct socket *so;
1c79356b 1319
2d21ac55 1320 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1321 AUDIT_ARG(fd, uap->s);
1c79356b 1322
91447636 1323 auio = uio_create(1, 0,
2d21ac55
A
1324 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1325 UIO_WRITE);
91447636 1326 if (auio == NULL) {
3e170ce0
A
1327 error = ENOMEM;
1328 goto done;
91447636
A
1329 }
1330 uio_addiov(auio, uap->buf, uap->len);
1331
1c79356b
A
1332 msg.msg_name = uap->to;
1333 msg.msg_namelen = uap->tolen;
91447636
A
1334 /* no need to set up msg_iov. sendit uses uio_t we send it */
1335 msg.msg_iov = 0;
1336 msg.msg_iovlen = 0;
1c79356b 1337 msg.msg_control = 0;
1c79356b 1338 msg.msg_flags = 0;
1c79356b 1339
3e170ce0
A
1340 error = file_socket(uap->s, &so);
1341 if (error)
1342 goto done;
2d21ac55 1343
3e170ce0
A
1344 if (so == NULL) {
1345 error = EBADF;
1346 } else {
1347 error = sendit(p, so, &msg, auio, uap->flags, retval);
91447636 1348 }
2d21ac55 1349
3e170ce0
A
1350 file_drop(uap->s);
1351done:
1352 if (auio != NULL)
1353 uio_free(auio);
1354
2d21ac55 1355 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1c79356b 1356
2d21ac55 1357 return (error);
1c79356b 1358}
1c79356b 1359
2d21ac55
A
1360/*
1361 * Returns: 0 Success
1362 * ENOBUFS
1363 * copyin:EFAULT
1364 * sendit:??? [see sendit definition in this file]
1365 */
1c79356b 1366int
b0d623f7 1367sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1c79356b 1368{
2d21ac55 1369 __pthread_testcancel(1);
3e170ce0
A
1370 return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1371 retval));
1c79356b 1372}
1c79356b
A
1373
1374int
3e170ce0
A
1375sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1376 int32_t *retval)
1c79356b 1377{
b0d623f7
A
1378 struct user32_msghdr msg32;
1379 struct user64_msghdr msg64;
91447636
A
1380 struct user_msghdr user_msg;
1381 caddr_t msghdrp;
1382 int size_of_msghdr;
1c79356b 1383 int error;
91447636
A
1384 uio_t auio = NULL;
1385 struct user_iovec *iovp;
3e170ce0 1386 struct socket *so;
1c79356b 1387
2d21ac55 1388 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1389 AUDIT_ARG(fd, uap->s);
91447636 1390 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
1391 msghdrp = (caddr_t)&msg64;
1392 size_of_msghdr = sizeof (msg64);
2d21ac55 1393 } else {
b0d623f7
A
1394 msghdrp = (caddr_t)&msg32;
1395 size_of_msghdr = sizeof (msg32);
91447636
A
1396 }
1397 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1398 if (error) {
1399 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1400 return (error);
1c79356b 1401 }
91447636 1402
b0d623f7
A
1403 if (IS_64BIT_PROCESS(p)) {
1404 user_msg.msg_flags = msg64.msg_flags;
1405 user_msg.msg_controllen = msg64.msg_controllen;
1406 user_msg.msg_control = msg64.msg_control;
1407 user_msg.msg_iovlen = msg64.msg_iovlen;
1408 user_msg.msg_iov = msg64.msg_iov;
1409 user_msg.msg_namelen = msg64.msg_namelen;
1410 user_msg.msg_name = msg64.msg_name;
1411 } else {
1412 user_msg.msg_flags = msg32.msg_flags;
1413 user_msg.msg_controllen = msg32.msg_controllen;
1414 user_msg.msg_control = msg32.msg_control;
1415 user_msg.msg_iovlen = msg32.msg_iovlen;
1416 user_msg.msg_iov = msg32.msg_iov;
1417 user_msg.msg_namelen = msg32.msg_namelen;
1418 user_msg.msg_name = msg32.msg_name;
91447636
A
1419 }
1420
1421 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1422 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1423 0, 0, 0, 0);
91447636
A
1424 return (EMSGSIZE);
1425 }
1426
1427 /* allocate a uio large enough to hold the number of iovecs passed */
1428 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1429 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1430 UIO_WRITE);
91447636
A
1431 if (auio == NULL) {
1432 error = ENOBUFS;
1433 goto done;
1434 }
2d21ac55 1435
91447636 1436 if (user_msg.msg_iovlen) {
2d21ac55
A
1437 /*
1438 * get location of iovecs within the uio.
1439 * then copyin the iovecs from user space.
91447636
A
1440 */
1441 iovp = uio_iovsaddr(auio);
1442 if (iovp == NULL) {
1443 error = ENOBUFS;
1444 goto done;
1445 }
b0d623f7
A
1446 error = copyin_user_iovec_array(user_msg.msg_iov,
1447 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1448 user_msg.msg_iovlen, iovp);
91447636
A
1449 if (error)
1450 goto done;
1451 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2d21ac55
A
1452
1453 /* finish setup of uio_t */
39236c6e
A
1454 error = uio_calculateresid(auio);
1455 if (error) {
1456 goto done;
1457 }
2d21ac55 1458 } else {
91447636
A
1459 user_msg.msg_iov = 0;
1460 }
2d21ac55
A
1461
1462 /* msg_flags is ignored for send */
91447636 1463 user_msg.msg_flags = 0;
2d21ac55 1464
3e170ce0
A
1465 error = file_socket(uap->s, &so);
1466 if (error) {
1467 goto done;
1468 }
1469 if (so == NULL) {
1470 error = EBADF;
1471 } else {
1472 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1473 }
1474 file_drop(uap->s);
1c79356b 1475done:
91447636
A
1476 if (auio != NULL) {
1477 uio_free(auio);
1478 }
2d21ac55 1479 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1480
1c79356b
A
1481 return (error);
1482}
1483
fe8ab488
A
1484int
1485sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1486{
1487 int error = 0;
3e170ce0 1488 struct user_msghdr_x *user_msg_x = NULL;
fe8ab488
A
1489 struct uio **uiop = NULL;
1490 struct socket *so;
1491 u_int i;
1492 struct sockaddr *to = NULL;
fe8ab488
A
1493 user_ssize_t len_before = 0, len_after;
1494 int need_drop = 0;
1495 size_t size_of_msghdr;
1496 void *umsgp = NULL;
1497 u_int uiocnt;
3e170ce0 1498 int has_addr_or_ctl = 0;
fe8ab488
A
1499
1500 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1501
1502 error = file_socket(uap->s, &so);
1503 if (error) {
1504 goto out;
1505 }
1506 need_drop = 1;
1507 if (so == NULL) {
1508 error = EBADF;
1509 goto out;
1510 }
fe8ab488
A
1511
1512 /*
1513 * Input parameter range check
1514 */
1515 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1516 error = EINVAL;
1517 goto out;
1518 }
3e170ce0
A
1519 /*
1520 * Clip to max currently allowed
1521 */
1522 if (uap->cnt > somaxsendmsgx)
1523 uap->cnt = somaxsendmsgx;
1524
1525 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
fe8ab488 1526 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
1527 if (user_msg_x == NULL) {
1528 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1529 error = ENOMEM;
1530 goto out;
1531 }
1532 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1533 M_TEMP, M_WAITOK | M_ZERO);
1534 if (uiop == NULL) {
3e170ce0 1535 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
fe8ab488
A
1536 error = ENOMEM;
1537 goto out;
1538 }
1539
1540 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1541 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1542
3e170ce0 1543 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
fe8ab488
A
1544 M_TEMP, M_WAITOK | M_ZERO);
1545 if (umsgp == NULL) {
3e170ce0 1546 printf("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1547 error = ENOMEM;
1548 goto out;
1549 }
1550 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1551 if (error) {
3e170ce0 1552 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
1553 goto out;
1554 }
1555 error = internalize_user_msghdr_array(umsgp,
1556 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1557 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488 1558 if (error) {
3e170ce0 1559 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
1560 goto out;
1561 }
1562 /*
1563 * Make sure the size of each message iovec and
1564 * the aggregate size of all the iovec is valid
1565 */
1566 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1567 error = EINVAL;
1568 goto out;
1569 }
1570
1571 /*
1572 * Sanity check on passed arguments
1573 */
1574 for (i = 0; i < uap->cnt; i++) {
3e170ce0 1575 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
1576
1577 /*
1578 * No flags on send message
1579 */
1580 if (mp->msg_flags != 0) {
1581 error = EINVAL;
1582 goto out;
1583 }
1584 /*
1585 * No support for address or ancillary data (yet)
1586 */
3e170ce0
A
1587 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0)
1588 has_addr_or_ctl = 1;
1589
fe8ab488 1590 if (mp->msg_control != USER_ADDR_NULL ||
3e170ce0
A
1591 mp->msg_controllen != 0)
1592 has_addr_or_ctl = 1;
1593
fe8ab488
A
1594#if CONFIG_MACF_SOCKET_SUBSET
1595 /*
1596 * We check the state without holding the socket lock;
1597 * if a race condition occurs, it would simply result
3e170ce0 1598 * in an extra call to the MAC check function.
fe8ab488
A
1599 *
1600 * Note: The following check is never true taken with the
1601 * current limitation that we do not accept to pass an address,
3e170ce0
A
1602 * this is effectively placeholder code. If we add support for
1603 * addresses, we will have to check every address.
fe8ab488 1604 */
3e170ce0 1605 if (to != NULL &&
fe8ab488 1606 !(so->so_state & SS_DEFUNCT) &&
3e170ce0
A
1607 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1608 != 0)
fe8ab488
A
1609 goto out;
1610#endif /* MAC_SOCKET_SUBSET */
1611 }
1612
1613 len_before = uio_array_resid(uiop, uap->cnt);
1614
3e170ce0
A
1615 /*
1616 * Feed list of packets at once only for connected socket without
1617 * control message
1618 */
1619 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1620 pru_sosend_list_notsupp &&
1621 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1622 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1623 uap->cnt, uap->flags);
1624 } else {
1625 for (i = 0; i < uap->cnt; i++) {
1626 struct user_msghdr_x *mp = user_msg_x + i;
1627 struct user_msghdr user_msg;
1628 uio_t auio = uiop[i];
1629 int32_t tmpval;
1630
1631 user_msg.msg_flags = mp->msg_flags;
1632 user_msg.msg_controllen = mp->msg_controllen;
1633 user_msg.msg_control = mp->msg_control;
1634 user_msg.msg_iovlen = mp->msg_iovlen;
1635 user_msg.msg_iov = mp->msg_iov;
1636 user_msg.msg_namelen = mp->msg_namelen;
1637 user_msg.msg_name = mp->msg_name;
1638
1639 error = sendit(p, so, &user_msg, auio, uap->flags,
1640 &tmpval);
1641 if (error != 0)
1642 break;
1643 }
1644 }
fe8ab488
A
1645 len_after = uio_array_resid(uiop, uap->cnt);
1646
3e170ce0
A
1647 VERIFY(len_after <= len_before);
1648
fe8ab488
A
1649 if (error != 0) {
1650 if (len_after != len_before && (error == ERESTART ||
3e170ce0
A
1651 error == EINTR || error == EWOULDBLOCK ||
1652 error == ENOBUFS))
fe8ab488
A
1653 error = 0;
1654 /* Generation of SIGPIPE can be controlled per socket */
1655 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1656 psignal(p, SIGPIPE);
1657 }
1658 if (error == 0) {
1659 uiocnt = externalize_user_msghdr_array(umsgp,
1660 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1661 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488
A
1662
1663 *retval = (int)(uiocnt);
1664 }
1665out:
1666 if (need_drop)
1667 file_drop(uap->s);
1668 if (umsgp != NULL)
1669 _FREE(umsgp, M_TEMP);
1670 if (uiop != NULL) {
1671 free_uio_array(uiop, uap->cnt);
1672 _FREE(uiop, M_TEMP);
1673 }
3e170ce0
A
1674 if (user_msg_x != NULL)
1675 _FREE(user_msg_x, M_TEMP);
fe8ab488
A
1676
1677 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1678
1679 return (error);
1680}
1681
3e170ce0
A
1682
1683static int
1684copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1685{
1686 int error = 0;
1687 socklen_t sa_len = 0;
1688 ssize_t len;
1689
1690 len = *namelen;
1691 if (len <= 0 || fromsa == 0) {
1692 len = 0;
1693 } else {
1694#ifndef MIN
1695#define MIN(a, b) ((a) > (b) ? (b) : (a))
1696#endif
1697 sa_len = fromsa->sa_len;
1698 len = MIN((unsigned int)len, sa_len);
1699 error = copyout(fromsa, name, (unsigned)len);
1700 if (error)
1701 goto out;
1702 }
1703 *namelen = sa_len;
1704out:
1705 return (0);
1706}
1707
1708static int
1709copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1710 socklen_t *controllen, int *flags)
1711{
1712 int error = 0;
1713 ssize_t len;
1714 user_addr_t ctlbuf;
1715
1716 len = *controllen;
1717 *controllen = 0;
1718 ctlbuf = control;
1719
1720 while (m && len > 0) {
1721 unsigned int tocopy;
1722 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1723 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1724 int buflen = m->m_len;
1725
1726 while (buflen > 0 && len > 0) {
1727 /*
1728 * SCM_TIMESTAMP hack because struct timeval has a
1729 * different size for 32 bits and 64 bits processes
1730 */
1731 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1732 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
1733 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1734 int tmp_space;
1735 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1736
1737 tmp_cp->cmsg_level = SOL_SOCKET;
1738 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1739
1740 if (proc_is64bit(p)) {
1741 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1742
1743 tv64->tv_sec = tv->tv_sec;
1744 tv64->tv_usec = tv->tv_usec;
1745
1746 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1747 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1748 } else {
1749 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1750
1751 tv32->tv_sec = tv->tv_sec;
1752 tv32->tv_usec = tv->tv_usec;
1753
1754 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1755 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1756 }
1757 if (len >= tmp_space) {
1758 tocopy = tmp_space;
1759 } else {
1760 *flags |= MSG_CTRUNC;
1761 tocopy = len;
1762 }
1763 error = copyout(tmp_buffer, ctlbuf, tocopy);
1764 if (error)
1765 goto out;
1766 } else {
1767 if (cp_size > buflen) {
1768 panic("cp_size > buflen, something"
1769 "wrong with alignment!");
1770 }
1771 if (len >= cp_size) {
1772 tocopy = cp_size;
1773 } else {
1774 *flags |= MSG_CTRUNC;
1775 tocopy = len;
1776 }
1777 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1778 if (error)
1779 goto out;
1780 }
1781
1782 ctlbuf += tocopy;
1783 len -= tocopy;
1784
1785 buflen -= cp_size;
1786 cp = (struct cmsghdr *)(void *)
1787 ((unsigned char *) cp + cp_size);
1788 cp_size = CMSG_ALIGN(cp->cmsg_len);
1789 }
1790
1791 m = m->m_next;
1792 }
1793 *controllen = ctlbuf - control;
1794out:
1795 return (error);
1796}
1797
2d21ac55
A
1798/*
1799 * Returns: 0 Success
1800 * ENOTSOCK
1801 * EINVAL
1802 * EBADF
1803 * EACCES Mandatory Access Control failure
1804 * copyout:EFAULT
1805 * fp_lookup:EBADF
1806 * <pru_soreceive>:ENOBUFS
1807 * <pru_soreceive>:ENOTCONN
1808 * <pru_soreceive>:EWOULDBLOCK
1809 * <pru_soreceive>:EFAULT
1810 * <pru_soreceive>:EINTR
1811 * <pru_soreceive>:EBADF
1812 * <pru_soreceive>:EINVAL
1813 * <pru_soreceive>:EMSGSIZE
1814 * <pru_soreceive>:???
1815 *
1816 * Notes: Additional return values from calls through <pru_soreceive>
1817 * depend on protocols other than TCP or AF_UNIX, which are
1818 * documented above.
1819 */
1c79356b 1820static int
2d21ac55 1821recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1822 user_addr_t namelenp, int32_t *retval)
1c79356b 1823{
39236c6e
A
1824 ssize_t len;
1825 int error;
3e170ce0 1826 struct mbuf *control = 0;
1c79356b
A
1827 struct socket *so;
1828 struct sockaddr *fromsa = 0;
91447636 1829 struct fileproc *fp;
1c79356b 1830
2d21ac55 1831 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
91447636 1832 proc_fdlock(p);
2d21ac55
A
1833 if ((error = fp_lookup(p, s, &fp, 1))) {
1834 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1835 proc_fdunlock(p);
2d21ac55 1836 return (error);
1c79356b 1837 }
91447636 1838 if (fp->f_type != DTYPE_SOCKET) {
2d21ac55 1839 fp_drop(p, s, fp, 1);
91447636 1840 proc_fdunlock(p);
2d21ac55 1841 return (ENOTSOCK);
91447636 1842 }
1c79356b 1843
2d21ac55
A
1844 so = (struct socket *)fp->f_data;
1845 if (so == NULL) {
1846 fp_drop(p, s, fp, 1);
1847 proc_fdunlock(p);
1848 return (EBADF);
1849 }
91447636
A
1850
1851 proc_fdunlock(p);
2d21ac55
A
1852
1853#if CONFIG_MACF_SOCKET_SUBSET
1854 /*
1855 * We check the state without holding the socket lock;
1856 * if a race condition occurs, it would simply result
1857 * in an extra call to the MAC check function.
1858 */
316670eb
A
1859 if (!(so->so_state & SS_DEFUNCT) &&
1860 !(so->so_state & SS_ISCONNECTED) &&
39236c6e 1861 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2d21ac55
A
1862 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1863 goto out1;
1864#endif /* MAC_SOCKET_SUBSET */
91447636 1865 if (uio_resid(uiop) < 0) {
2d21ac55 1866 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
91447636
A
1867 error = EINVAL;
1868 goto out1;
1c79356b 1869 }
91447636
A
1870
1871 len = uio_resid(uiop);
2d21ac55
A
1872 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1873 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1874 &mp->msg_flags);
b0d623f7
A
1875 if (fromsa)
1876 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1877 fromsa);
1c79356b 1878 if (error) {
91447636 1879 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1880 error == EINTR || error == EWOULDBLOCK))
1881 error = 0;
1882 }
1c79356b
A
1883 if (error)
1884 goto out;
2d21ac55 1885
91447636 1886 *retval = len - uio_resid(uiop);
2d21ac55 1887
3e170ce0
A
1888 if (mp->msg_name) {
1889 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1890 if (error)
1891 goto out;
2d21ac55 1892 /* return the actual, untruncated address length */
1c79356b 1893 if (namelenp &&
3e170ce0 1894 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2d21ac55 1895 sizeof (int)))) {
1c79356b
A
1896 goto out;
1897 }
1898 }
39236c6e 1899
3e170ce0
A
1900 if (mp->msg_control) {
1901 error = copyout_control(p, control, mp->msg_control,
1902 &mp->msg_controllen, &mp->msg_flags);
1c79356b
A
1903 }
1904out:
1905 if (fromsa)
1906 FREE(fromsa, M_SONAME);
1907 if (control)
1908 m_freem(control);
2d21ac55 1909 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636
A
1910out1:
1911 fp_drop(p, s, fp, 0);
1c79356b
A
1912 return (error);
1913}
1914
2d21ac55
A
1915/*
1916 * Returns: 0 Success
1917 * ENOMEM
1918 * copyin:EFAULT
1919 * recvit:???
1920 * read:??? [4056224: applicable for pipes]
1921 *
1922 * Notes: The read entry point is only called as part of support for
1923 * binary backward compatability; new code should use read
1924 * instead of recv or recvfrom when attempting to read data
1925 * from pipes.
1926 *
1927 * For full documentation of the return codes from recvit, see
1928 * the block header for the recvit function.
1929 */
1930int
b0d623f7 1931recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2d21ac55
A
1932{
1933 __pthread_testcancel(1);
3e170ce0
A
1934 return (recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
1935 retval));
2d21ac55
A
1936}
1937
1c79356b 1938int
3e170ce0
A
1939recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
1940 int32_t *retval)
1c79356b 1941{
91447636 1942 struct user_msghdr msg;
1c79356b 1943 int error;
91447636 1944 uio_t auio = NULL;
1c79356b 1945
2d21ac55 1946 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1947 AUDIT_ARG(fd, uap->s);
1c79356b
A
1948
1949 if (uap->fromlenaddr) {
91447636 1950 error = copyin(uap->fromlenaddr,
1c79356b
A
1951 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1952 if (error)
1953 return (error);
2d21ac55 1954 } else {
1c79356b 1955 msg.msg_namelen = 0;
2d21ac55 1956 }
1c79356b 1957 msg.msg_name = uap->from;
91447636 1958 auio = uio_create(1, 0,
2d21ac55
A
1959 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1960 UIO_READ);
91447636
A
1961 if (auio == NULL) {
1962 return (ENOMEM);
1963 }
2d21ac55 1964
91447636
A
1965 uio_addiov(auio, uap->buf, uap->len);
1966 /* no need to set up msg_iov. recvit uses uio_t we send it */
1967 msg.msg_iov = 0;
1968 msg.msg_iovlen = 0;
1c79356b 1969 msg.msg_control = 0;
91447636 1970 msg.msg_controllen = 0;
1c79356b 1971 msg.msg_flags = uap->flags;
91447636
A
1972 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1973 if (auio != NULL) {
1974 uio_free(auio);
1975 }
2d21ac55 1976
2d21ac55 1977 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b 1978
2d21ac55 1979 return (error);
1c79356b
A
1980}
1981
1982/*
2d21ac55
A
1983 * Returns: 0 Success
1984 * EMSGSIZE
1985 * ENOMEM
1986 * copyin:EFAULT
1987 * copyout:EFAULT
1988 * recvit:???
1989 *
1990 * Notes: For full documentation of the return codes from recvit, see
1991 * the block header for the recvit function.
1c79356b
A
1992 */
1993int
b0d623f7 1994recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1c79356b 1995{
2d21ac55 1996 __pthread_testcancel(1);
3e170ce0
A
1997 return (recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
1998 retval));
1c79356b 1999}
1c79356b
A
2000
2001int
3e170ce0
A
2002recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2003 int32_t *retval)
1c79356b 2004{
b0d623f7
A
2005 struct user32_msghdr msg32;
2006 struct user64_msghdr msg64;
91447636
A
2007 struct user_msghdr user_msg;
2008 caddr_t msghdrp;
2009 int size_of_msghdr;
2010 user_addr_t uiov;
2d21ac55 2011 int error;
91447636
A
2012 uio_t auio = NULL;
2013 struct user_iovec *iovp;
1c79356b 2014
2d21ac55 2015 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 2016 AUDIT_ARG(fd, uap->s);
91447636 2017 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
2018 msghdrp = (caddr_t)&msg64;
2019 size_of_msghdr = sizeof (msg64);
2d21ac55 2020 } else {
b0d623f7
A
2021 msghdrp = (caddr_t)&msg32;
2022 size_of_msghdr = sizeof (msg32);
91447636
A
2023 }
2024 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
2025 if (error) {
2026 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
2027 return (error);
2028 }
2029
91447636 2030 /* only need to copy if user process is not 64-bit */
b0d623f7
A
2031 if (IS_64BIT_PROCESS(p)) {
2032 user_msg.msg_flags = msg64.msg_flags;
2033 user_msg.msg_controllen = msg64.msg_controllen;
2034 user_msg.msg_control = msg64.msg_control;
2035 user_msg.msg_iovlen = msg64.msg_iovlen;
2036 user_msg.msg_iov = msg64.msg_iov;
2037 user_msg.msg_namelen = msg64.msg_namelen;
2038 user_msg.msg_name = msg64.msg_name;
2039 } else {
2040 user_msg.msg_flags = msg32.msg_flags;
2041 user_msg.msg_controllen = msg32.msg_controllen;
2042 user_msg.msg_control = msg32.msg_control;
2043 user_msg.msg_iovlen = msg32.msg_iovlen;
2044 user_msg.msg_iov = msg32.msg_iov;
2045 user_msg.msg_namelen = msg32.msg_namelen;
2046 user_msg.msg_name = msg32.msg_name;
91447636
A
2047 }
2048
2049 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
2050 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2051 0, 0, 0, 0);
91447636
A
2052 return (EMSGSIZE);
2053 }
2054
91447636 2055 user_msg.msg_flags = uap->flags;
91447636
A
2056
2057 /* allocate a uio large enough to hold the number of iovecs passed */
2058 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
2059 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2060 UIO_READ);
91447636
A
2061 if (auio == NULL) {
2062 error = ENOMEM;
2063 goto done;
2064 }
2065
2d21ac55
A
2066 /*
2067 * get location of iovecs within the uio. then copyin the iovecs from
91447636
A
2068 * user space.
2069 */
2070 iovp = uio_iovsaddr(auio);
2071 if (iovp == NULL) {
2072 error = ENOMEM;
2073 goto done;
2074 }
2075 uiov = user_msg.msg_iov;
2076 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
b0d623f7
A
2077 error = copyin_user_iovec_array(uiov,
2078 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2079 user_msg.msg_iovlen, iovp);
1c79356b
A
2080 if (error)
2081 goto done;
91447636 2082
2d21ac55 2083 /* finish setup of uio_t */
39236c6e
A
2084 error = uio_calculateresid(auio);
2085 if (error) {
2086 goto done;
2087 }
2d21ac55 2088
91447636 2089 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1c79356b 2090 if (!error) {
91447636 2091 user_msg.msg_iov = uiov;
b0d623f7
A
2092 if (IS_64BIT_PROCESS(p)) {
2093 msg64.msg_flags = user_msg.msg_flags;
2094 msg64.msg_controllen = user_msg.msg_controllen;
2095 msg64.msg_control = user_msg.msg_control;
2096 msg64.msg_iovlen = user_msg.msg_iovlen;
2097 msg64.msg_iov = user_msg.msg_iov;
2098 msg64.msg_namelen = user_msg.msg_namelen;
2099 msg64.msg_name = user_msg.msg_name;
2100 } else {
2101 msg32.msg_flags = user_msg.msg_flags;
2102 msg32.msg_controllen = user_msg.msg_controllen;
2103 msg32.msg_control = user_msg.msg_control;
2104 msg32.msg_iovlen = user_msg.msg_iovlen;
2105 msg32.msg_iov = user_msg.msg_iov;
2106 msg32.msg_namelen = user_msg.msg_namelen;
2107 msg32.msg_name = user_msg.msg_name;
91447636
A
2108 }
2109 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1c79356b
A
2110 }
2111done:
91447636
A
2112 if (auio != NULL) {
2113 uio_free(auio);
2114 }
2d21ac55 2115 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
2116 return (error);
2117}
2118
fe8ab488
A
2119int
2120recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2121{
2122 int error = EOPNOTSUPP;
3e170ce0
A
2123 struct user_msghdr_x *user_msg_x = NULL;
2124 struct recv_msg_elem *recv_msg_array = NULL;
fe8ab488
A
2125 struct socket *so;
2126 user_ssize_t len_before = 0, len_after;
2127 int need_drop = 0;
2128 size_t size_of_msghdr;
2129 void *umsgp = NULL;
2130 u_int i;
2131 u_int uiocnt;
2132
2133 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2134
2135 error = file_socket(uap->s, &so);
2136 if (error) {
2137 goto out;
2138 }
2139 need_drop = 1;
2140 if (so == NULL) {
2141 error = EBADF;
2142 goto out;
2143 }
fe8ab488
A
2144 /*
2145 * Input parameter range check
2146 */
2147 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2148 error = EINVAL;
2149 goto out;
2150 }
3e170ce0
A
2151 if (uap->cnt > somaxrecvmsgx)
2152 uap->cnt = somaxrecvmsgx;
2153
2154 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
fe8ab488 2155 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
2156 if (user_msg_x == NULL) {
2157 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
2158 error = ENOMEM;
2159 goto out;
2160 }
3e170ce0
A
2161 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2162 if (recv_msg_array == NULL) {
2163 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
fe8ab488
A
2164 error = ENOMEM;
2165 goto out;
2166 }
fe8ab488
A
2167 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2168 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2169
2170 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2171 if (umsgp == NULL) {
3e170ce0 2172 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
fe8ab488
A
2173 error = ENOMEM;
2174 goto out;
2175 }
2176 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2177 if (error) {
3e170ce0 2178 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
2179 goto out;
2180 }
3e170ce0 2181 error = internalize_recv_msghdr_array(umsgp,
fe8ab488 2182 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 2183 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
fe8ab488 2184 if (error) {
3e170ce0 2185 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
2186 goto out;
2187 }
2188 /*
2189 * Make sure the size of each message iovec and
2190 * the aggregate size of all the iovec is valid
2191 */
3e170ce0 2192 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
fe8ab488
A
2193 error = EINVAL;
2194 goto out;
2195 }
fe8ab488
A
2196 /*
2197 * Sanity check on passed arguments
2198 */
2199 for (i = 0; i < uap->cnt; i++) {
3e170ce0 2200 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
2201
2202 if (mp->msg_flags != 0) {
2203 error = EINVAL;
2204 goto out;
2205 }
fe8ab488
A
2206 }
2207#if CONFIG_MACF_SOCKET_SUBSET
2208 /*
2209 * We check the state without holding the socket lock;
2210 * if a race condition occurs, it would simply result
2211 * in an extra call to the MAC check function.
2212 */
2213 if (!(so->so_state & SS_DEFUNCT) &&
2214 !(so->so_state & SS_ISCONNECTED) &&
2215 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2216 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
2217 goto out;
2218#endif /* MAC_SOCKET_SUBSET */
2219
3e170ce0 2220 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488 2221
3e170ce0
A
2222 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2223 pru_soreceive_list_notsupp &&
2224 somaxrecvmsgx == 0) {
2225 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2226 recv_msg_array, uap->cnt, &uap->flags);
2227 } else {
2228 int flags = uap->flags;
fe8ab488 2229
3e170ce0
A
2230 for (i = 0; i < uap->cnt; i++) {
2231 struct recv_msg_elem *recv_msg_elem;
2232 uio_t auio;
2233 struct sockaddr **psa;
2234 struct mbuf **controlp;
2235
2236 recv_msg_elem = recv_msg_array + i;
2237 auio = recv_msg_elem->uio;
2238
2239 /*
2240 * Do not block if we got at least one packet
2241 */
2242 if (i > 0)
2243 flags |= MSG_DONTWAIT;
2244
2245 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2246 &recv_msg_elem->psa : NULL;
2247 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2248 &recv_msg_elem->controlp : NULL;
2249
2250 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2251 auio, (struct mbuf **)0, controlp, &flags);
2252 if (error)
2253 break;
2254 /*
2255 * We have some data
2256 */
2257 recv_msg_elem->which |= SOCK_MSG_DATA;
2258 /*
2259 * Stop on partial copy
2260 */
2261 if (flags & (MSG_RCVMORE | MSG_TRUNC))
2262 break;
2263 }
2264 if ((uap->flags & MSG_DONTWAIT) == 0)
2265 flags &= ~MSG_DONTWAIT;
2266 uap->flags = flags;
2267 }
2268
2269 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488
A
2270
2271 if (error) {
2272 if (len_after != len_before && (error == ERESTART ||
2273 error == EINTR || error == EWOULDBLOCK))
2274 error = 0;
3e170ce0
A
2275 else
2276 goto out;
fe8ab488 2277 }
fe8ab488 2278
3e170ce0
A
2279 uiocnt = externalize_recv_msghdr_array(umsgp,
2280 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2281 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2282
2283 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2284 if (error) {
2285 DBG_PRINTF("%s copyout() failed\n", __func__);
2286 goto out;
2287 }
2288 *retval = (int)(uiocnt);
2289
2290 for (i = 0; i < uap->cnt; i++) {
2291 struct user_msghdr_x *mp = user_msg_x + i;
2292 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2293 struct sockaddr *fromsa = recv_msg_elem->psa;
2294
2295 if (mp->msg_name) {
2296 error = copyout_sa(fromsa, mp->msg_name,
2297 &mp->msg_namelen);
2298 if (error)
2299 goto out;
2300 }
2301 if (mp->msg_control) {
2302 error = copyout_control(p, recv_msg_elem->controlp,
2303 mp->msg_control, &mp->msg_controllen,
2304 &mp->msg_flags);
2305 if (error)
2306 goto out;
fe8ab488 2307 }
fe8ab488
A
2308 }
2309out:
2310 if (need_drop)
2311 file_drop(uap->s);
2312 if (umsgp != NULL)
2313 _FREE(umsgp, M_TEMP);
3e170ce0
A
2314 if (recv_msg_array != NULL)
2315 free_recv_msg_array(recv_msg_array, uap->cnt);
2316 if (user_msg_x != NULL)
2317 _FREE(user_msg_x, M_TEMP);
2318
fe8ab488 2319 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 2320
fe8ab488
A
2321 return (error);
2322}
2323
2d21ac55
A
2324/*
2325 * Returns: 0 Success
2326 * EBADF
2327 * file_socket:ENOTSOCK
2328 * file_socket:EBADF
2329 * soshutdown:EINVAL
2330 * soshutdown:ENOTCONN
2331 * soshutdown:EADDRNOTAVAIL[TCP]
2332 * soshutdown:ENOBUFS[TCP]
2333 * soshutdown:EMSGSIZE[TCP]
2334 * soshutdown:EHOSTUNREACH[TCP]
2335 * soshutdown:ENETUNREACH[TCP]
2336 * soshutdown:ENETDOWN[TCP]
2337 * soshutdown:ENOMEM[TCP]
2338 * soshutdown:EACCES[TCP]
2339 * soshutdown:EMSGSIZE[TCP]
2340 * soshutdown:ENOBUFS[TCP]
2341 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2342 * soshutdown:??? [other protocol families]
2343 */
1c79356b
A
2344/* ARGSUSED */
2345int
2d21ac55 2346shutdown(__unused struct proc *p, struct shutdown_args *uap,
b0d623f7 2347 __unused int32_t *retval)
1c79356b 2348{
2d21ac55 2349 struct socket *so;
1c79356b
A
2350 int error;
2351
55e303ae 2352 AUDIT_ARG(fd, uap->s);
91447636 2353 error = file_socket(uap->s, &so);
1c79356b
A
2354 if (error)
2355 return (error);
91447636
A
2356 if (so == NULL) {
2357 error = EBADF;
2358 goto out;
2359 }
2360 error = soshutdown((struct socket *)so, uap->how);
2361out:
2362 file_drop(uap->s);
2d21ac55 2363 return (error);
1c79356b
A
2364}
2365
2d21ac55
A
2366/*
2367 * Returns: 0 Success
2368 * EFAULT
2369 * EINVAL
2370 * EACCES Mandatory Access Control failure
2371 * file_socket:ENOTSOCK
2372 * file_socket:EBADF
2373 * sosetopt:EINVAL
2374 * sosetopt:ENOPROTOOPT
2375 * sosetopt:ENOBUFS
2376 * sosetopt:EDOM
2377 * sosetopt:EFAULT
2378 * sosetopt:EOPNOTSUPP[AF_UNIX]
2379 * sosetopt:???
2380 */
1c79356b
A
2381/* ARGSUSED */
2382int
2d21ac55 2383setsockopt(struct proc *p, struct setsockopt_args *uap,
b0d623f7 2384 __unused int32_t *retval)
1c79356b 2385{
2d21ac55 2386 struct socket *so;
1c79356b
A
2387 struct sockopt sopt;
2388 int error;
2389
55e303ae 2390 AUDIT_ARG(fd, uap->s);
1c79356b
A
2391 if (uap->val == 0 && uap->valsize != 0)
2392 return (EFAULT);
2d21ac55 2393 /* No bounds checking on size (it's unsigned) */
1c79356b 2394
91447636 2395 error = file_socket(uap->s, &so);
1c79356b
A
2396 if (error)
2397 return (error);
2398
2399 sopt.sopt_dir = SOPT_SET;
2400 sopt.sopt_level = uap->level;
2401 sopt.sopt_name = uap->name;
2402 sopt.sopt_val = uap->val;
2403 sopt.sopt_valsize = uap->valsize;
2404 sopt.sopt_p = p;
2405
91447636
A
2406 if (so == NULL) {
2407 error = EINVAL;
2408 goto out;
2409 }
2d21ac55
A
2410#if CONFIG_MACF_SOCKET_SUBSET
2411 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2412 &sopt)) != 0)
2413 goto out;
2414#endif /* MAC_SOCKET_SUBSET */
39236c6e 2415 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
91447636
A
2416out:
2417 file_drop(uap->s);
2d21ac55 2418 return (error);
1c79356b
A
2419}
2420
2421
2422
2d21ac55
A
2423/*
2424 * Returns: 0 Success
2425 * EINVAL
2426 * EBADF
2427 * EACCES Mandatory Access Control failure
2428 * copyin:EFAULT
2429 * copyout:EFAULT
2430 * file_socket:ENOTSOCK
2431 * file_socket:EBADF
2432 * sogetopt:???
2433 */
1c79356b 2434int
2d21ac55 2435getsockopt(struct proc *p, struct getsockopt_args *uap,
b0d623f7 2436 __unused int32_t *retval)
1c79356b 2437{
91447636
A
2438 int error;
2439 socklen_t valsize;
2440 struct sockopt sopt;
2d21ac55 2441 struct socket *so;
1c79356b 2442
91447636 2443 error = file_socket(uap->s, &so);
1c79356b
A
2444 if (error)
2445 return (error);
2446 if (uap->val) {
2d21ac55
A
2447 error = copyin(uap->avalsize, (caddr_t)&valsize,
2448 sizeof (valsize));
1c79356b 2449 if (error)
91447636 2450 goto out;
2d21ac55
A
2451 /* No bounds checking on size (it's unsigned) */
2452 } else {
1c79356b 2453 valsize = 0;
2d21ac55 2454 }
1c79356b
A
2455 sopt.sopt_dir = SOPT_GET;
2456 sopt.sopt_level = uap->level;
2457 sopt.sopt_name = uap->name;
2458 sopt.sopt_val = uap->val;
2459 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2460 sopt.sopt_p = p;
2461
91447636
A
2462 if (so == NULL) {
2463 error = EBADF;
2464 goto out;
2465 }
2d21ac55
A
2466#if CONFIG_MACF_SOCKET_SUBSET
2467 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2468 &sopt)) != 0)
2469 goto out;
2470#endif /* MAC_SOCKET_SUBSET */
39236c6e 2471 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
1c79356b
A
2472 if (error == 0) {
2473 valsize = sopt.sopt_valsize;
2d21ac55
A
2474 error = copyout((caddr_t)&valsize, uap->avalsize,
2475 sizeof (valsize));
1c79356b 2476 }
91447636
A
2477out:
2478 file_drop(uap->s);
1c79356b
A
2479 return (error);
2480}
2481
2482
2483/*
2484 * Get socket name.
2d21ac55
A
2485 *
2486 * Returns: 0 Success
2487 * EBADF
2488 * file_socket:ENOTSOCK
2489 * file_socket:EBADF
2490 * copyin:EFAULT
2491 * copyout:EFAULT
2492 * <pru_sockaddr>:ENOBUFS[TCP]
2493 * <pru_sockaddr>:ECONNRESET[TCP]
2494 * <pru_sockaddr>:EINVAL[AF_UNIX]
2495 * <sf_getsockname>:???
1c79356b
A
2496 */
2497/* ARGSUSED */
2d21ac55
A
2498int
2499getsockname(__unused struct proc *p, struct getsockname_args *uap,
b0d623f7 2500 __unused int32_t *retval)
1c79356b 2501{
91447636 2502 struct socket *so;
1c79356b 2503 struct sockaddr *sa;
91447636 2504 socklen_t len;
2d21ac55 2505 socklen_t sa_len;
1c79356b
A
2506 int error;
2507
91447636 2508 error = file_socket(uap->fdes, &so);
1c79356b
A
2509 if (error)
2510 return (error);
2d21ac55 2511 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1c79356b 2512 if (error)
91447636
A
2513 goto out;
2514 if (so == NULL) {
2515 error = EBADF;
2516 goto out;
2517 }
1c79356b 2518 sa = 0;
91447636 2519 socket_lock(so, 1);
1c79356b 2520 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2d21ac55 2521 if (error == 0) {
6d2010ae 2522 error = sflt_getsockname(so, &sa);
91447636
A
2523 if (error == EJUSTRETURN)
2524 error = 0;
91447636
A
2525 }
2526 socket_unlock(so, 1);
1c79356b
A
2527 if (error)
2528 goto bad;
2529 if (sa == 0) {
2530 len = 0;
2531 goto gotnothing;
2532 }
2533
2d21ac55
A
2534 sa_len = sa->sa_len;
2535 len = MIN(len, sa_len);
91447636 2536 error = copyout((caddr_t)sa, uap->asa, len);
2d21ac55
A
2537 if (error)
2538 goto bad;
2539 /* return the actual, untruncated address length */
2540 len = sa_len;
1c79356b 2541gotnothing:
2d21ac55 2542 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
2543bad:
2544 if (sa)
2545 FREE(sa, M_SONAME);
91447636
A
2546out:
2547 file_drop(uap->fdes);
1c79356b
A
2548 return (error);
2549}
2550
1c79356b
A
2551/*
2552 * Get name of peer for connected socket.
2d21ac55
A
2553 *
2554 * Returns: 0 Success
2555 * EBADF
2556 * EINVAL
2557 * ENOTCONN
2558 * file_socket:ENOTSOCK
2559 * file_socket:EBADF
2560 * copyin:EFAULT
2561 * copyout:EFAULT
2562 * <pru_peeraddr>:???
2563 * <sf_getpeername>:???
1c79356b
A
2564 */
2565/* ARGSUSED */
2566int
2d21ac55 2567getpeername(__unused struct proc *p, struct getpeername_args *uap,
b0d623f7 2568 __unused int32_t *retval)
1c79356b 2569{
91447636 2570 struct socket *so;
1c79356b 2571 struct sockaddr *sa;
91447636 2572 socklen_t len;
2d21ac55 2573 socklen_t sa_len;
1c79356b
A
2574 int error;
2575
91447636 2576 error = file_socket(uap->fdes, &so);
1c79356b
A
2577 if (error)
2578 return (error);
91447636
A
2579 if (so == NULL) {
2580 error = EBADF;
2581 goto out;
2582 }
2583
2584 socket_lock(so, 1);
2585
2d21ac55
A
2586 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2587 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2588 /* the socket has been shutdown, no more getpeername's */
2589 socket_unlock(so, 1);
2590 error = EINVAL;
2591 goto out;
2592 }
2593
91447636
A
2594 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
2595 socket_unlock(so, 1);
2596 error = ENOTCONN;
2597 goto out;
2598 }
2d21ac55 2599 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
91447636
A
2600 if (error) {
2601 socket_unlock(so, 1);
2602 goto out;
2603 }
1c79356b
A
2604 sa = 0;
2605 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2d21ac55 2606 if (error == 0) {
6d2010ae 2607 error = sflt_getpeername(so, &sa);
91447636
A
2608 if (error == EJUSTRETURN)
2609 error = 0;
91447636
A
2610 }
2611 socket_unlock(so, 1);
1c79356b
A
2612 if (error)
2613 goto bad;
2614 if (sa == 0) {
2615 len = 0;
2616 goto gotnothing;
2617 }
2d21ac55
A
2618 sa_len = sa->sa_len;
2619 len = MIN(len, sa_len);
91447636 2620 error = copyout(sa, uap->asa, len);
1c79356b
A
2621 if (error)
2622 goto bad;
2d21ac55
A
2623 /* return the actual, untruncated address length */
2624 len = sa_len;
1c79356b 2625gotnothing:
2d21ac55 2626 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
2627bad:
2628 if (sa) FREE(sa, M_SONAME);
91447636
A
2629out:
2630 file_drop(uap->fdes);
1c79356b
A
2631 return (error);
2632}
2633
2634int
2d21ac55 2635sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1c79356b 2636{
2d21ac55
A
2637 struct sockaddr *sa;
2638 struct mbuf *m;
1c79356b
A
2639 int error;
2640
e2d2fc5c 2641 size_t alloc_buflen = (size_t)buflen;
39236c6e 2642
3e170ce0 2643 if (alloc_buflen > INT_MAX/2)
e2d2fc5c 2644 return (EINVAL);
b0d623f7 2645#ifdef __LP64__
3e170ce0
A
2646 /*
2647 * The fd's in the buffer must expand to be pointers, thus we need twice
2648 * as much space
2649 */
2650 if (type == MT_CONTROL)
2651 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) +
2652 sizeof(struct cmsghdr);
b0d623f7 2653#endif
e2d2fc5c
A
2654 if (alloc_buflen > MLEN) {
2655 if (type == MT_SONAME && alloc_buflen <= 112)
3e170ce0 2656 alloc_buflen = MLEN; /* unix domain compat. hack */
e2d2fc5c 2657 else if (alloc_buflen > MCLBYTES)
91447636 2658 return (EINVAL);
1c79356b
A
2659 }
2660 m = m_get(M_WAIT, type);
2661 if (m == NULL)
2662 return (ENOBUFS);
e2d2fc5c 2663 if (alloc_buflen > MLEN) {
91447636
A
2664 MCLGET(m, M_WAIT);
2665 if ((m->m_flags & M_EXT) == 0) {
2666 m_free(m);
2d21ac55 2667 return (ENOBUFS);
91447636
A
2668 }
2669 }
3e170ce0
A
2670 /*
2671 * K64: We still copyin the original buflen because it gets expanded
2672 * later and we lie about the size of the mbuf because it only affects
2673 * unp_* functions
b0d623f7 2674 */
1c79356b 2675 m->m_len = buflen;
91447636 2676 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2d21ac55 2677 if (error) {
1c79356b 2678 (void) m_free(m);
2d21ac55 2679 } else {
1c79356b
A
2680 *mp = m;
2681 if (type == MT_SONAME) {
2682 sa = mtod(m, struct sockaddr *);
1c79356b
A
2683 sa->sa_len = buflen;
2684 }
2685 }
2686 return (error);
2687}
2688
91447636
A
2689/*
2690 * Given a user_addr_t of length len, allocate and fill out a *sa.
2d21ac55
A
2691 *
2692 * Returns: 0 Success
2693 * ENAMETOOLONG Filename too long
2694 * EINVAL Invalid argument
2695 * ENOMEM Not enough space
2696 * copyin:EFAULT Bad address
91447636 2697 */
2d21ac55
A
2698static int
2699getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
4a3eedf9 2700 size_t len, boolean_t translate_unspec)
1c79356b
A
2701{
2702 struct sockaddr *sa;
2703 int error;
2704
2705 if (len > SOCK_MAXADDRLEN)
2d21ac55 2706 return (ENAMETOOLONG);
1c79356b 2707
2d21ac55
A
2708 if (len < offsetof(struct sockaddr, sa_data[0]))
2709 return (EINVAL);
1c79356b 2710
490019cf 2711 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
91447636 2712 if (sa == NULL) {
2d21ac55 2713 return (ENOMEM);
91447636
A
2714 }
2715 error = copyin(uaddr, (caddr_t)sa, len);
1c79356b
A
2716 if (error) {
2717 FREE(sa, M_SONAME);
2718 } else {
2d21ac55
A
2719 /*
2720 * Force sa_family to AF_INET on AF_INET sockets to handle
2721 * legacy applications that use AF_UNSPEC (0). On all other
2722 * sockets we leave it unchanged and let the lower layer
2723 * handle it.
2724 */
4a3eedf9 2725 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
39236c6e 2726 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2727 len == sizeof (struct sockaddr_in))
2728 sa->sa_family = AF_INET;
2729
1c79356b
A
2730 sa->sa_len = len;
2731 *namp = sa;
2732 }
2d21ac55 2733 return (error);
1c79356b
A
2734}
2735
2d21ac55
A
2736static int
2737getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
4a3eedf9 2738 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1c79356b 2739{
2d21ac55
A
2740 int error;
2741
2742 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2743 len < offsetof(struct sockaddr, sa_data[0]))
2744 return (EINVAL);
2745
2746 /*
2747 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2748 * so the check here is inclusive.
2749 */
2750 if (len > sizeof (*ss))
2751 return (ENAMETOOLONG);
1c79356b 2752
2d21ac55
A
2753 bzero(ss, sizeof (*ss));
2754 error = copyin(uaddr, (caddr_t)ss, len);
2755 if (error == 0) {
2756 /*
2757 * Force sa_family to AF_INET on AF_INET sockets to handle
2758 * legacy applications that use AF_UNSPEC (0). On all other
2759 * sockets we leave it unchanged and let the lower layer
2760 * handle it.
2761 */
4a3eedf9 2762 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
39236c6e 2763 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2764 len == sizeof (struct sockaddr_in))
2765 ss->ss_family = AF_INET;
91447636 2766
2d21ac55 2767 ss->ss_len = len;
1c79356b 2768 }
2d21ac55 2769 return (error);
1c79356b
A
2770}
2771
fe8ab488
A
2772int
2773internalize_user_msghdr_array(const void *src, int spacetype, int direction,
3e170ce0 2774 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
fe8ab488
A
2775{
2776 int error = 0;
2777 u_int i;
3e170ce0
A
2778 u_int namecnt = 0;
2779 u_int ctlcnt = 0;
fe8ab488
A
2780
2781 for (i = 0; i < count; i++) {
2782 uio_t auio;
2783 struct user_iovec *iovp;
3e170ce0 2784 struct user_msghdr_x *user_msg = dst + i;
fe8ab488
A
2785
2786 if (spacetype == UIO_USERSPACE64) {
3e170ce0 2787 const struct user64_msghdr_x *msghdr64;
fe8ab488 2788
3e170ce0 2789 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
fe8ab488
A
2790
2791 user_msg->msg_name = msghdr64->msg_name;
2792 user_msg->msg_namelen = msghdr64->msg_namelen;
2793 user_msg->msg_iov = msghdr64->msg_iov;
2794 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2795 user_msg->msg_control = msghdr64->msg_control;
2796 user_msg->msg_controllen = msghdr64->msg_controllen;
2797 user_msg->msg_flags = msghdr64->msg_flags;
2798 user_msg->msg_datalen = msghdr64->msg_datalen;
2799 } else {
3e170ce0 2800 const struct user32_msghdr_x *msghdr32;
fe8ab488 2801
3e170ce0 2802 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
fe8ab488
A
2803
2804 user_msg->msg_name = msghdr32->msg_name;
2805 user_msg->msg_namelen = msghdr32->msg_namelen;
2806 user_msg->msg_iov = msghdr32->msg_iov;
2807 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2808 user_msg->msg_control = msghdr32->msg_control;
2809 user_msg->msg_controllen = msghdr32->msg_controllen;
2810 user_msg->msg_flags = msghdr32->msg_flags;
2811 user_msg->msg_datalen = msghdr32->msg_datalen;
2812 }
3e170ce0
A
2813
2814 if (user_msg->msg_iovlen <= 0 ||
2815 user_msg->msg_iovlen > UIO_MAXIOV) {
fe8ab488
A
2816 error = EMSGSIZE;
2817 goto done;
2818 }
3e170ce0
A
2819 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2820 direction);
fe8ab488
A
2821 if (auio == NULL) {
2822 error = ENOMEM;
2823 goto done;
2824 }
2825 uiop[i] = auio;
2826
3e170ce0
A
2827 iovp = uio_iovsaddr(auio);
2828 if (iovp == NULL) {
2829 error = ENOMEM;
2830 goto done;
2831 }
2832 error = copyin_user_iovec_array(user_msg->msg_iov,
2833 spacetype, user_msg->msg_iovlen, iovp);
2834 if (error)
2835 goto done;
2836 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
fe8ab488 2837
3e170ce0
A
2838 error = uio_calculateresid(auio);
2839 if (error)
2840 goto done;
2841 user_msg->msg_datalen = uio_resid(auio);
2842
2843 if (user_msg->msg_name && user_msg->msg_namelen)
2844 namecnt++;
2845 if (user_msg->msg_control && user_msg->msg_controllen)
2846 ctlcnt++;
2847 }
2848done:
2849
2850 return (error);
2851}
2852
2853int
2854internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2855 u_int count, struct user_msghdr_x *dst,
2856 struct recv_msg_elem *recv_msg_array)
2857{
2858 int error = 0;
2859 u_int i;
2860
2861 for (i = 0; i < count; i++) {
2862 struct user_iovec *iovp;
2863 struct user_msghdr_x *user_msg = dst + i;
2864 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2865
2866 if (spacetype == UIO_USERSPACE64) {
2867 const struct user64_msghdr_x *msghdr64;
2868
2869 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2870
2871 user_msg->msg_name = msghdr64->msg_name;
2872 user_msg->msg_namelen = msghdr64->msg_namelen;
2873 user_msg->msg_iov = msghdr64->msg_iov;
2874 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2875 user_msg->msg_control = msghdr64->msg_control;
2876 user_msg->msg_controllen = msghdr64->msg_controllen;
2877 user_msg->msg_flags = msghdr64->msg_flags;
2878 user_msg->msg_datalen = msghdr64->msg_datalen;
fe8ab488 2879 } else {
3e170ce0
A
2880 const struct user32_msghdr_x *msghdr32;
2881
2882 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2883
2884 user_msg->msg_name = msghdr32->msg_name;
2885 user_msg->msg_namelen = msghdr32->msg_namelen;
2886 user_msg->msg_iov = msghdr32->msg_iov;
2887 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2888 user_msg->msg_control = msghdr32->msg_control;
2889 user_msg->msg_controllen = msghdr32->msg_controllen;
2890 user_msg->msg_flags = msghdr32->msg_flags;
2891 user_msg->msg_datalen = msghdr32->msg_datalen;
fe8ab488 2892 }
3e170ce0
A
2893
2894 if (user_msg->msg_iovlen <= 0 ||
2895 user_msg->msg_iovlen > UIO_MAXIOV) {
2896 error = EMSGSIZE;
2897 goto done;
2898 }
2899 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
2900 spacetype, direction);
2901 if (recv_msg_elem->uio == NULL) {
2902 error = ENOMEM;
2903 goto done;
2904 }
2905
2906 iovp = uio_iovsaddr(recv_msg_elem->uio);
2907 if (iovp == NULL) {
2908 error = ENOMEM;
2909 goto done;
2910 }
2911 error = copyin_user_iovec_array(user_msg->msg_iov,
2912 spacetype, user_msg->msg_iovlen, iovp);
2913 if (error)
2914 goto done;
2915 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2916
2917 error = uio_calculateresid(recv_msg_elem->uio);
2918 if (error)
2919 goto done;
2920 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
2921
2922 if (user_msg->msg_name && user_msg->msg_namelen)
2923 recv_msg_elem->which |= SOCK_MSG_SA;
2924 if (user_msg->msg_control && user_msg->msg_controllen)
2925 recv_msg_elem->which |= SOCK_MSG_CONTROL;
fe8ab488
A
2926 }
2927done:
3e170ce0 2928
fe8ab488
A
2929 return (error);
2930}
2931
2932u_int
2933externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3e170ce0 2934 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
fe8ab488
A
2935{
2936#pragma unused(direction)
2937 u_int i;
2938 int seenlast = 0;
2939 u_int retcnt = 0;
2940
2941 for (i = 0; i < count; i++) {
3e170ce0 2942 const struct user_msghdr_x *user_msg = src + i;
fe8ab488
A
2943 uio_t auio = uiop[i];
2944 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
2945
2946 if (user_msg->msg_datalen != 0 && len == 0)
2947 seenlast = 1;
3e170ce0
A
2948
2949 if (seenlast == 0)
2950 retcnt ++;
2951
2952 if (spacetype == UIO_USERSPACE64) {
2953 struct user64_msghdr_x *msghdr64;
2954
2955 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
2956
2957 msghdr64->msg_flags = user_msg->msg_flags;
2958 msghdr64->msg_datalen = len;
2959
2960 } else {
2961 struct user32_msghdr_x *msghdr32;
2962
2963 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
2964
2965 msghdr32->msg_flags = user_msg->msg_flags;
2966 msghdr32->msg_datalen = len;
2967 }
2968 }
2969 return (retcnt);
2970}
2971
2972u_int
2973externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
2974 u_int count, const struct user_msghdr_x *src,
2975 struct recv_msg_elem *recv_msg_array)
2976{
2977 u_int i;
2978 int seenlast = 0;
2979 u_int retcnt = 0;
2980
2981 for (i = 0; i < count; i++) {
2982 const struct user_msghdr_x *user_msg = src + i;
2983 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2984 user_ssize_t len;
2985
2986 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
2987
2988 if (direction == UIO_READ) {
2989 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0)
2990 seenlast = 1;
2991 } else {
2992 if (user_msg->msg_datalen != 0 && len == 0)
2993 seenlast = 1;
2994 }
2995
fe8ab488
A
2996 if (seenlast == 0)
2997 retcnt ++;
2998
2999 if (spacetype == UIO_USERSPACE64) {
3000 struct user64_msghdr_x *msghdr64;
3001
3002 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3003
3004 msghdr64->msg_flags = user_msg->msg_flags;
3005 msghdr64->msg_datalen = len;
3e170ce0 3006
fe8ab488
A
3007 } else {
3008 struct user32_msghdr_x *msghdr32;
3009
3010 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3011
3012 msghdr32->msg_flags = user_msg->msg_flags;
3013 msghdr32->msg_datalen = len;
3014 }
3015 }
3016 return (retcnt);
3017}
3018
3019void
3020free_uio_array(struct uio **uiop, u_int count)
3021{
3022 u_int i;
3023
3024 for (i = 0; i < count; i++) {
3025 if (uiop[i] != NULL)
3026 uio_free(uiop[i]);
3027 }
3028}
3029
3030__private_extern__ user_ssize_t
3031uio_array_resid(struct uio **uiop, u_int count)
3032{
3033 user_ssize_t len = 0;
3034 u_int i;
3035
3036 for (i = 0; i < count; i++) {
3037 struct uio *auio = uiop[i];
3038
3e170ce0 3039 if (auio != NULL)
fe8ab488
A
3040 len += uio_resid(auio);
3041 }
3042 return (len);
3043}
3044
3045int
3046uio_array_is_valid(struct uio **uiop, u_int count)
3047{
3048 user_ssize_t len = 0;
3049 u_int i;
3050
3051 for (i = 0; i < count; i++) {
3052 struct uio *auio = uiop[i];
3e170ce0 3053
fe8ab488
A
3054 if (auio != NULL) {
3055 user_ssize_t resid = uio_resid(auio);
3e170ce0 3056
fe8ab488
A
3057 /*
3058 * Sanity check on the validity of the iovec:
3059 * no point of going over sb_max
3060 */
3061 if (resid < 0 || (u_int32_t)resid > sb_max)
3062 return (0);
3e170ce0
A
3063
3064 len += resid;
3065 if (len < 0 || (u_int32_t)len > sb_max)
3066 return (0);
3067 }
3068 }
3069 return (1);
3070}
3071
3072
3073struct recv_msg_elem *
3074alloc_recv_msg_array(u_int count)
3075{
3076 struct recv_msg_elem *recv_msg_array;
3077
3078 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3079 M_TEMP, M_WAITOK | M_ZERO);
3080
3081 return (recv_msg_array);
3082}
3083
3084void
3085free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3086{
3087 u_int i;
3088
3089 for (i = 0; i < count; i++) {
3090 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3091
3092 if (recv_msg_elem->uio != NULL)
3093 uio_free(recv_msg_elem->uio);
3094 if (recv_msg_elem->psa != NULL)
3095 _FREE(recv_msg_elem->psa, M_TEMP);
3096 if (recv_msg_elem->controlp != NULL)
3097 m_freem(recv_msg_elem->controlp);
3098 }
3099 _FREE(recv_msg_array, M_TEMP);
3100}
3101
3102
3103__private_extern__ user_ssize_t
3104recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3105{
3106 user_ssize_t len = 0;
3107 u_int i;
3108
3109 for (i = 0; i < count; i++) {
3110 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3111
3112 if (recv_msg_elem->uio != NULL)
3113 len += uio_resid(recv_msg_elem->uio);
3114 }
3115 return (len);
3116}
3117
3118int
3119recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3120{
3121 user_ssize_t len = 0;
3122 u_int i;
3123
3124 for (i = 0; i < count; i++) {
3125 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3126
3127 if (recv_msg_elem->uio != NULL) {
3128 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3129
3130 /*
3131 * Sanity check on the validity of the iovec:
3132 * no point of going over sb_max
3133 */
3134 if (resid < 0 || (u_int32_t)resid > sb_max)
3135 return (0);
3136
fe8ab488
A
3137 len += resid;
3138 if (len < 0 || (u_int32_t)len > sb_max)
3139 return (0);
3140 }
3141 }
3142 return (1);
3143}
3144
39236c6e 3145#if SENDFILE
2d21ac55
A
3146
3147#define SFUIOBUFS 64
2d21ac55
A
3148
3149/* Macros to compute the number of mbufs needed depending on cluster size */
3e170ce0
A
3150#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3151#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
2d21ac55 3152
39236c6e 3153/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3e170ce0 3154#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
2d21ac55
A
3155
3156/* Upper send limit in the number of mbuf clusters */
3157#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3158#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3159
1c79356b 3160static void
2d21ac55
A
3161alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3162 struct mbuf **m, boolean_t jumbocl)
1c79356b 3163{
2d21ac55 3164 unsigned int needed;
1c79356b 3165
2d21ac55
A
3166 if (pktlen == 0)
3167 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
1c79356b 3168
2d21ac55
A
3169 /*
3170 * Try to allocate for the whole thing. Since we want full control
3171 * over the buffer size and be able to accept partial result, we can't
3172 * use mbuf_allocpacket(). The logic below is similar to sosend().
3173 */
3174 *m = NULL;
6d2010ae 3175 if (pktlen > MBIGCLBYTES && jumbocl) {
2d21ac55
A
3176 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3177 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3178 }
3179 if (*m == NULL) {
3180 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
6d2010ae 3181 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
2d21ac55
A
3182 }
3183
3184 /*
3185 * Our previous attempt(s) at allocation had failed; the system
3186 * may be short on mbufs, and we want to block until they are
3187 * available. This time, ask just for 1 mbuf and don't return
3188 * until we get it.
3189 */
3190 if (*m == NULL) {
3191 needed = 1;
6d2010ae 3192 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
1c79356b 3193 }
2d21ac55
A
3194 if (*m == NULL)
3195 panic("%s: blocking allocation returned NULL\n", __func__);
3196
3197 *maxchunks = needed;
1c79356b
A
3198}
3199
3200/*
3201 * sendfile(2).
2d21ac55
A
3202 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3203 * struct sf_hdtr *hdtr, int flags)
1c79356b
A
3204 *
3205 * Send a file specified by 'fd' and starting at 'offset' to a socket
2d21ac55
A
3206 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3207 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3208 * output. If specified, write the total number of bytes sent into *nbytes.
1c79356b
A
3209 */
3210int
2d21ac55 3211sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
1c79356b 3212{
91447636 3213 struct fileproc *fp;
1c79356b 3214 struct vnode *vp;
1c79356b 3215 struct socket *so;
2d21ac55
A
3216 struct writev_nocancel_args nuap;
3217 user_ssize_t writev_retval;
2d21ac55 3218 struct user_sf_hdtr user_hdtr;
b0d623f7
A
3219 struct user32_sf_hdtr user32_hdtr;
3220 struct user64_sf_hdtr user64_hdtr;
2d21ac55
A
3221 off_t off, xfsize;
3222 off_t nbytes = 0, sbytes = 0;
3223 int error = 0;
3224 size_t sizeof_hdtr;
2d21ac55
A
3225 off_t file_size;
3226 struct vfs_context context = *vfs_context_current();
3e170ce0 3227
2d21ac55
A
3228 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3229 0, 0, 0, 0);
b0d623f7
A
3230
3231 AUDIT_ARG(fd, uap->fd);
3232 AUDIT_ARG(value32, uap->s);
3233
1c79356b
A
3234 /*
3235 * Do argument checking. Must be a regular file in, stream
3236 * type and connected socket out, positive offset.
3237 */
2d21ac55 3238 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
1c79356b 3239 goto done;
2d21ac55
A
3240 }
3241 if ((fp->f_flag & FREAD) == 0) {
91447636
A
3242 error = EBADF;
3243 goto done1;
1c79356b 3244 }
2d21ac55
A
3245 if (vnode_isreg(vp) == 0) {
3246 error = ENOTSUP;
91447636 3247 goto done1;
1c79356b 3248 }
91447636 3249 error = file_socket(uap->s, &so);
2d21ac55 3250 if (error) {
91447636 3251 goto done1;
2d21ac55 3252 }
55e303ae
A
3253 if (so == NULL) {
3254 error = EBADF;
91447636 3255 goto done2;
55e303ae 3256 }
1c79356b
A
3257 if (so->so_type != SOCK_STREAM) {
3258 error = EINVAL;
2d21ac55 3259 goto done2;
1c79356b
A
3260 }
3261 if ((so->so_state & SS_ISCONNECTED) == 0) {
3262 error = ENOTCONN;
2d21ac55 3263 goto done2;
1c79356b
A
3264 }
3265 if (uap->offset < 0) {
3266 error = EINVAL;
2d21ac55 3267 goto done2;
1c79356b 3268 }
2d21ac55
A
3269 if (uap->nbytes == USER_ADDR_NULL) {
3270 error = EINVAL;
3271 goto done2;
3272 }
3273 if (uap->flags != 0) {
3274 error = EINVAL;
3275 goto done2;
3276 }
3277
3278 context.vc_ucred = fp->f_fglob->fg_cred;
3279
3280#if CONFIG_MACF_SOCKET_SUBSET
3281 /* JMM - fetch connected sockaddr? */
3282 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3283 if (error)
3284 goto done2;
3285#endif
3286
3287 /*
3288 * Get number of bytes to send
3289 * Should it applies to size of header and trailer?
3290 * JMM - error handling?
3291 */
3292 copyin(uap->nbytes, &nbytes, sizeof (off_t));
1c79356b
A
3293
3294 /*
3295 * If specified, get the pointer to the sf_hdtr struct for
3296 * any headers/trailers.
3297 */
2d21ac55
A
3298 if (uap->hdtr != USER_ADDR_NULL) {
3299 caddr_t hdtrp;
3300
3301 bzero(&user_hdtr, sizeof (user_hdtr));
3302 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
3303 hdtrp = (caddr_t)&user64_hdtr;
3304 sizeof_hdtr = sizeof (user64_hdtr);
2d21ac55 3305 } else {
b0d623f7
A
3306 hdtrp = (caddr_t)&user32_hdtr;
3307 sizeof_hdtr = sizeof (user32_hdtr);
2d21ac55
A
3308 }
3309 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
1c79356b 3310 if (error)
2d21ac55 3311 goto done2;
b0d623f7
A
3312 if (IS_64BIT_PROCESS(p)) {
3313 user_hdtr.headers = user64_hdtr.headers;
3314 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3315 user_hdtr.trailers = user64_hdtr.trailers;
3316 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3317 } else {
3318 user_hdtr.headers = user32_hdtr.headers;
3319 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3320 user_hdtr.trailers = user32_hdtr.trailers;
3321 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2d21ac55
A
3322 }
3323
1c79356b
A
3324 /*
3325 * Send any headers. Wimp out and use writev(2).
3326 */
2d21ac55
A
3327 if (user_hdtr.headers != USER_ADDR_NULL) {
3328 bzero(&nuap, sizeof (struct writev_args));
1c79356b 3329 nuap.fd = uap->s;
2d21ac55
A
3330 nuap.iovp = user_hdtr.headers;
3331 nuap.iovcnt = user_hdtr.hdr_cnt;
3332 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3333 if (error) {
2d21ac55 3334 goto done2;
316670eb 3335 }
2d21ac55 3336 sbytes += writev_retval;
1c79356b
A
3337 }
3338 }
3339
3340 /*
2d21ac55
A
3341 * Get the file size for 2 reasons:
3342 * 1. We don't want to allocate more mbufs than necessary
3343 * 2. We don't want to read past the end of file
1c79356b 3344 */
316670eb 3345 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
2d21ac55 3346 goto done2;
316670eb 3347 }
1c79356b
A
3348
3349 /*
2d21ac55
A
3350 * Simply read file data into a chain of mbufs that used with scatter
3351 * gather reads. We're not (yet?) setup to use zero copy external
3352 * mbufs that point to the file pages.
1c79356b 3353 */
2d21ac55 3354 socket_lock(so, 1);
39236c6e 3355 error = sblock(&so->so_snd, SBL_WAIT);
2d21ac55
A
3356 if (error) {
3357 socket_unlock(so, 1);
3358 goto done2;
3359 }
1c79356b 3360 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2d21ac55 3361 mbuf_t m0 = NULL, m;
39236c6e 3362 unsigned int nbufs = SFUIOBUFS, i;
2d21ac55 3363 uio_t auio;
39236c6e 3364 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
2d21ac55
A
3365 size_t uiolen;
3366 user_ssize_t rlen;
3367 off_t pgoff;
3368 size_t pktlen;
3369 boolean_t jumbocl;
1c79356b 3370
1c79356b 3371 /*
2d21ac55
A
3372 * Calculate the amount to transfer.
3373 * Align to round number of pages.
3374 * Not to exceed send socket buffer,
1c79356b
A
3375 * the EOF, or the passed in nbytes.
3376 */
2d21ac55
A
3377 xfsize = sbspace(&so->so_snd);
3378
3379 if (xfsize <= 0) {
3380 if (so->so_state & SS_CANTSENDMORE) {
3381 error = EPIPE;
3382 goto done3;
3383 } else if ((so->so_state & SS_NBIO)) {
3384 error = EAGAIN;
3385 goto done3;
3386 } else {
3387 xfsize = PAGE_SIZE;
3388 }
3389 }
3390
3391 if (xfsize > SENDFILE_MAX_BYTES)
3392 xfsize = SENDFILE_MAX_BYTES;
3393 else if (xfsize > PAGE_SIZE)
3394 xfsize = trunc_page(xfsize);
3395 pgoff = off & PAGE_MASK_64;
3396 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
1c79356b 3397 xfsize = PAGE_SIZE_64 - pgoff;
2d21ac55
A
3398 if (nbytes && xfsize > (nbytes - sbytes))
3399 xfsize = nbytes - sbytes;
3400 if (xfsize <= 0)
3401 break;
3402 if (off + xfsize > file_size)
3403 xfsize = file_size - off;
1c79356b
A
3404 if (xfsize <= 0)
3405 break;
2d21ac55 3406
1c79356b 3407 /*
2d21ac55
A
3408 * Attempt to use larger than system page-size clusters for
3409 * large writes only if there is a jumbo cluster pool and
3410 * if the socket is marked accordingly.
1c79356b 3411 */
2d21ac55
A
3412 jumbocl = sosendjcl && njcl > 0 &&
3413 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3414
3415 socket_unlock(so, 0);
3416 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
fe8ab488 3417 pktlen = mbuf_pkthdr_maxlen(m0);
b0d623f7 3418 if (pktlen < (size_t)xfsize)
2d21ac55 3419 xfsize = pktlen;
39236c6e 3420
2d21ac55
A
3421 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3422 UIO_READ, &uio_buf[0], sizeof (uio_buf));
3423 if (auio == NULL) {
316670eb
A
3424 printf("sendfile failed. nbufs = %d. %s", nbufs,
3425 "File a radar related to rdar://10146739.\n");
2d21ac55
A
3426 mbuf_freem(m0);
3427 error = ENXIO;
3428 socket_lock(so, 0);
3429 goto done3;
1c79356b 3430 }
1c79356b 3431
2d21ac55 3432 for (i = 0, m = m0, uiolen = 0;
b0d623f7 3433 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2d21ac55
A
3434 i++, m = mbuf_next(m)) {
3435 size_t mlen = mbuf_maxlen(m);
3436
b0d623f7 3437 if (mlen + uiolen > (size_t)xfsize)
2d21ac55
A
3438 mlen = xfsize - uiolen;
3439 mbuf_setlen(m, mlen);
3440 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3441 mlen);
3442 uiolen += mlen;
3443 }
3444
3445 if (xfsize != uio_resid(auio))
3446 printf("sendfile: xfsize: %lld != uio_resid(auio): "
6d2010ae 3447 "%lld\n", xfsize, (long long)uio_resid(auio));
2d21ac55
A
3448
3449 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3450 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3451 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3452 error = fo_read(fp, auio, FOF_OFFSET, &context);
3453 socket_lock(so, 0);
3454 if (error != 0) {
3455 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3456 error == EINTR || error == EWOULDBLOCK)) {
3457 error = 0;
3458 } else {
3459 mbuf_freem(m0);
3460 goto done3;
1c79356b 3461 }
1c79356b 3462 }
2d21ac55
A
3463 xfsize -= uio_resid(auio);
3464 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3465 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3466 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3467
3468 if (xfsize == 0) {
3e170ce0 3469 // printf("sendfile: fo_read 0 bytes, EOF\n");
2d21ac55 3470 break;
91447636 3471 }
2d21ac55
A
3472 if (xfsize + off > file_size)
3473 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3474 "%lld\n", xfsize, off, file_size);
3475 for (i = 0, m = m0, rlen = 0;
3476 i < nbufs && m != NULL && rlen < xfsize;
3477 i++, m = mbuf_next(m)) {
3478 size_t mlen = mbuf_maxlen(m);
3479
b0d623f7 3480 if (rlen + mlen > (size_t)xfsize)
2d21ac55
A
3481 mlen = xfsize - rlen;
3482 mbuf_setlen(m, mlen);
3483
3484 rlen += mlen;
3485 }
3486 mbuf_pkthdr_setlen(m0, xfsize);
3487
1c79356b
A
3488retry_space:
3489 /*
3490 * Make sure that the socket is still able to take more data.
3491 * CANTSENDMORE being true usually means that the connection
3492 * was closed. so_error is true when an error was sensed after
3493 * a previous send.
3494 * The state is checked after the page mapping and buffer
3495 * allocation above since those operations may block and make
3496 * any socket checks stale. From this point forward, nothing
3497 * blocks before the pru_send (or more accurately, any blocking
3498 * results in a loop back to here to re-check).
3499 */
3500 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3501 if (so->so_state & SS_CANTSENDMORE) {
3502 error = EPIPE;
3503 } else {
3504 error = so->so_error;
3505 so->so_error = 0;
3506 }
2d21ac55
A
3507 m_freem(m0);
3508 goto done3;
1c79356b
A
3509 }
3510 /*
3511 * Wait for socket space to become available. We do this just
3512 * after checking the connection state above in order to avoid
3513 * a race condition with sbwait().
3514 */
2d21ac55 3515 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
1c79356b 3516 if (so->so_state & SS_NBIO) {
2d21ac55 3517 m_freem(m0);
1c79356b 3518 error = EAGAIN;
2d21ac55 3519 goto done3;
1c79356b 3520 }
2d21ac55
A
3521 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3522 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
1c79356b 3523 error = sbwait(&so->so_snd);
2d21ac55
A
3524 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
3525 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
1c79356b
A
3526 /*
3527 * An error from sbwait usually indicates that we've
3528 * been interrupted by a signal. If we've sent anything
3529 * then return bytes sent, otherwise return the error.
3530 */
3531 if (error) {
2d21ac55
A
3532 m_freem(m0);
3533 goto done3;
1c79356b
A
3534 }
3535 goto retry_space;
3536 }
39236c6e 3537
6d2010ae 3538 struct mbuf *control = NULL;
2d21ac55
A
3539 {
3540 /*
3541 * Socket filter processing
3542 */
2d21ac55 3543
6d2010ae
A
3544 error = sflt_data_out(so, NULL, &m0, &control, 0);
3545 if (error) {
3546 if (error == EJUSTRETURN) {
3547 error = 0;
3548 continue;
2d21ac55 3549 }
6d2010ae 3550 goto done3;
2d21ac55
A
3551 }
3552 /*
3553 * End Socket filter processing
3554 */
3555 }
3556 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3557 uap->s, 0, 0, 0, 0);
3558 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
6d2010ae 3559 0, control, p);
2d21ac55
A
3560 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3561 uap->s, 0, 0, 0, 0);
1c79356b 3562 if (error) {
2d21ac55 3563 goto done3;
1c79356b
A
3564 }
3565 }
39236c6e 3566 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
1c79356b
A
3567 /*
3568 * Send trailers. Wimp out and use writev(2).
3569 */
2d21ac55
A
3570 if (uap->hdtr != USER_ADDR_NULL &&
3571 user_hdtr.trailers != USER_ADDR_NULL) {
3572 bzero(&nuap, sizeof (struct writev_args));
3573 nuap.fd = uap->s;
3574 nuap.iovp = user_hdtr.trailers;
3575 nuap.iovcnt = user_hdtr.trl_cnt;
3576 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3577 if (error) {
2d21ac55 3578 goto done2;
316670eb 3579 }
2d21ac55 3580 sbytes += writev_retval;
1c79356b 3581 }
91447636
A
3582done2:
3583 file_drop(uap->s);
3584done1:
3585 file_drop(uap->fd);
1c79356b 3586done:
2d21ac55 3587 if (uap->nbytes != USER_ADDR_NULL) {
91447636 3588 /* XXX this appears bogus for some early failure conditions */
2d21ac55 3589 copyout(&sbytes, uap->nbytes, sizeof (off_t));
1c79356b 3590 }
2d21ac55
A
3591 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3592 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3593 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
1c79356b 3594 return (error);
91447636 3595done3:
39236c6e 3596 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
91447636 3597 goto done2;
1c79356b
A
3598}
3599
2d21ac55
A
3600
3601#endif /* SENDFILE */