]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_syscalls.c
xnu-4903.231.4.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
2d21ac55 33 * Copyright (c) 1998, David Greenman. All rights reserved.
1c79356b
A
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
2d21ac55
A
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
1c79356b
A
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
2d21ac55 77#include <sys/vnode_internal.h>
1c79356b 78#include <sys/malloc.h>
39236c6e 79#include <sys/mcache.h>
1c79356b 80#include <sys/mbuf.h>
fe8ab488 81#include <kern/locks.h>
91447636 82#include <sys/domain.h>
1c79356b 83#include <sys/protosw.h>
91447636 84#include <sys/signalvar.h>
1c79356b
A
85#include <sys/socket.h>
86#include <sys/socketvar.h>
1c79356b 87#include <sys/kernel.h>
91447636 88#include <sys/uio_internal.h>
2d21ac55 89#include <sys/kauth.h>
6d2010ae 90#include <kern/task.h>
39236c6e 91#include <sys/priv.h>
3e170ce0 92#include <sys/sysctl.h>
5c9f4661 93#include <sys/sys_domain.h>
e5568f75 94
b0d623f7 95#include <security/audit/audit.h>
1c79356b
A
96
97#include <sys/kdebug.h>
91447636 98#include <sys/sysproto.h>
2d21ac55
A
99#include <netinet/in.h>
100#include <net/route.h>
101#include <netinet/in_pcb.h>
102
103#if CONFIG_MACF_SOCKET_SUBSET
104#include <security/mac_framework.h>
105#endif /* MAC_SOCKET_SUBSET */
106
107#define f_flag f_fglob->fg_flag
39236c6e 108#define f_type f_fglob->fg_ops->fo_type
2d21ac55
A
109#define f_msgcount f_fglob->fg_msgcount
110#define f_cred f_fglob->fg_cred
111#define f_ops f_fglob->fg_ops
112#define f_offset f_fglob->fg_offset
113#define f_data f_fglob->fg_data
114
2d21ac55
A
115#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
116#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
117#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
118#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
119#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
120#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
121#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
122#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
123#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
124#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
125#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
126#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
127#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
128#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
fe8ab488
A
129#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
130#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
2d21ac55 131
3e170ce0
A
132#if DEBUG || DEVELOPMENT
133#define DEBUG_KERNEL_ADDRPERM(_v) (_v)
134#define DBG_PRINTF(...) printf(__VA_ARGS__)
135#else
136#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
137#define DBG_PRINTF(...) do { } while (0)
138#endif
2d21ac55 139
2d21ac55
A
140/* TODO: should be in header file */
141int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
142
3e170ce0
A
143static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
144 int, int32_t *);
2d21ac55 145static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
b0d623f7 146 int32_t *);
39236c6e 147static int connectit(struct socket *, struct sockaddr *);
2d21ac55 148static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
4a3eedf9 149 size_t, boolean_t);
2d21ac55 150static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
4a3eedf9 151 user_addr_t, size_t, boolean_t);
1c79356b 152#if SENDFILE
2d21ac55
A
153static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
154 boolean_t);
155#endif /* SENDFILE */
39236c6e 156static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
813fb2f6
A
157static int connectitx(struct socket *, struct sockaddr *,
158 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
3e170ce0 159 sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
39236c6e
A
160static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
161 int *);
162static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
1c79356b 163
fe8ab488 164static int internalize_user_msghdr_array(const void *, int, int, u_int,
3e170ce0 165 struct user_msghdr_x *, struct uio **);
fe8ab488 166static u_int externalize_user_msghdr_array(void *, int, int, u_int,
3e170ce0 167 const struct user_msghdr_x *, struct uio **);
fe8ab488
A
168
169static void free_uio_array(struct uio **, u_int);
170static int uio_array_is_valid(struct uio **, u_int);
3e170ce0
A
171static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
172static int internalize_recv_msghdr_array(const void *, int, int,
173 u_int, struct user_msghdr_x *, struct recv_msg_elem *);
174static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
175 const struct user_msghdr_x *, struct recv_msg_elem *);
176static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
177static void free_recv_msg_array(struct recv_msg_elem *, u_int);
178
179SYSCTL_DECL(_kern_ipc);
180
181static u_int somaxsendmsgx = 100;
182SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
183 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
184static u_int somaxrecvmsgx = 100;
185SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
186 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
fe8ab488 187
1c79356b
A
188/*
189 * System call interface to the socket abstraction.
190 */
1c79356b 191
39236c6e 192extern const struct fileops socketops;
1c79356b 193
2d21ac55
A
194/*
195 * Returns: 0 Success
196 * EACCES Mandatory Access Control failure
197 * falloc:ENFILE
198 * falloc:EMFILE
199 * falloc:ENOMEM
200 * socreate:EAFNOSUPPORT
201 * socreate:EPROTOTYPE
202 * socreate:EPROTONOSUPPORT
203 * socreate:ENOBUFS
204 * socreate:ENOMEM
2d21ac55
A
205 * socreate:??? [other protocol families, IPSEC]
206 */
1c79356b 207int
39236c6e
A
208socket(struct proc *p,
209 struct socket_args *uap,
210 int32_t *retval)
211{
212 return (socket_common(p, uap->domain, uap->type, uap->protocol,
213 proc_selfpid(), retval, 0));
214}
215
216int
217socket_delegate(struct proc *p,
218 struct socket_delegate_args *uap,
219 int32_t *retval)
220{
221 return socket_common(p, uap->domain, uap->type, uap->protocol,
222 uap->epid, retval, 1);
223}
224
225static int
226socket_common(struct proc *p,
227 int domain,
228 int type,
229 int protocol,
230 pid_t epid,
231 int32_t *retval,
232 int delegate)
1c79356b 233{
1c79356b 234 struct socket *so;
91447636 235 struct fileproc *fp;
1c79356b
A
236 int fd, error;
237
39236c6e 238 AUDIT_ARG(socket, domain, type, protocol);
2d21ac55 239#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
240 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
241 type, protocol)) != 0)
2d21ac55
A
242 return (error);
243#endif /* MAC_SOCKET_SUBSET */
1c79356b 244
39236c6e
A
245 if (delegate) {
246 error = priv_check_cred(kauth_cred_get(),
247 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
248 if (error)
249 return (EACCES);
250 }
251
2d21ac55 252 error = falloc(p, &fp, &fd, vfs_context_current());
91447636 253 if (error) {
1c79356b 254 return (error);
91447636 255 }
1c79356b 256 fp->f_flag = FREAD|FWRITE;
1c79356b 257 fp->f_ops = &socketops;
91447636 258
39236c6e
A
259 if (delegate)
260 error = socreate_delegate(domain, &so, type, protocol, epid);
261 else
262 error = socreate(domain, &so, type, protocol);
263
91447636
A
264 if (error) {
265 fp_free(p, fd, fp);
1c79356b
A
266 } else {
267 fp->f_data = (caddr_t)so;
91447636
A
268
269 proc_fdlock(p);
6601e61a 270 procfdtbl_releasefd(p, fd, NULL);
2d21ac55 271
91447636
A
272 fp_drop(p, fd, fp, 1);
273 proc_fdunlock(p);
274
1c79356b 275 *retval = fd;
3e170ce0
A
276 if (ENTR_SHOULDTRACE) {
277 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
278 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
279 }
1c79356b
A
280 }
281 return (error);
282}
283
2d21ac55
A
284/*
285 * Returns: 0 Success
286 * EDESTADDRREQ Destination address required
287 * EBADF Bad file descriptor
288 * EACCES Mandatory Access Control failure
289 * file_socket:ENOTSOCK
290 * file_socket:EBADF
291 * getsockaddr:ENAMETOOLONG Filename too long
292 * getsockaddr:EINVAL Invalid argument
293 * getsockaddr:ENOMEM Not enough space
294 * getsockaddr:EFAULT Bad address
39236c6e 295 * sobindlock:???
2d21ac55 296 */
1c79356b
A
297/* ARGSUSED */
298int
b0d623f7 299bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
1c79356b 300{
2d21ac55
A
301 struct sockaddr_storage ss;
302 struct sockaddr *sa = NULL;
91447636 303 struct socket *so;
2d21ac55 304 boolean_t want_free = TRUE;
1c79356b
A
305 int error;
306
55e303ae 307 AUDIT_ARG(fd, uap->s);
91447636 308 error = file_socket(uap->s, &so);
2d21ac55 309 if (error != 0)
1c79356b 310 return (error);
2d21ac55
A
311 if (so == NULL) {
312 error = EBADF;
313 goto out;
314 }
315 if (uap->name == USER_ADDR_NULL) {
316 error = EDESTADDRREQ;
317 goto out;
318 }
319 if (uap->namelen > sizeof (ss)) {
4a3eedf9 320 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
2d21ac55 321 } else {
4a3eedf9 322 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
2d21ac55
A
323 if (error == 0) {
324 sa = (struct sockaddr *)&ss;
325 want_free = FALSE;
326 }
327 }
328 if (error != 0)
91447636 329 goto out;
2d21ac55
A
330 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
331#if CONFIG_MACF_SOCKET_SUBSET
5c9f4661
A
332 if ((sa != NULL && sa->sa_family == AF_SYSTEM) ||
333 (error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
39236c6e 334 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55 335#else
39236c6e 336 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55
A
337#endif /* MAC_SOCKET_SUBSET */
338 if (want_free)
339 FREE(sa, M_SONAME);
91447636
A
340out:
341 file_drop(uap->s);
1c79356b
A
342 return (error);
343}
344
2d21ac55
A
345/*
346 * Returns: 0 Success
347 * EBADF
348 * EACCES Mandatory Access Control failure
349 * file_socket:ENOTSOCK
350 * file_socket:EBADF
351 * solisten:EINVAL
352 * solisten:EOPNOTSUPP
353 * solisten:???
354 */
1c79356b 355int
2d21ac55 356listen(__unused struct proc *p, struct listen_args *uap,
b0d623f7 357 __unused int32_t *retval)
1c79356b 358{
1c79356b 359 int error;
2d21ac55 360 struct socket *so;
1c79356b 361
55e303ae 362 AUDIT_ARG(fd, uap->s);
91447636 363 error = file_socket(uap->s, &so);
1c79356b
A
364 if (error)
365 return (error);
91447636 366 if (so != NULL)
2d21ac55
A
367#if CONFIG_MACF_SOCKET_SUBSET
368 {
369 error = mac_socket_check_listen(kauth_cred_get(), so);
370 if (error == 0)
371 error = solisten(so, uap->backlog);
372 }
373#else
91447636 374 error = solisten(so, uap->backlog);
2d21ac55 375#endif /* MAC_SOCKET_SUBSET */
55e303ae 376 else
91447636 377 error = EBADF;
2d21ac55 378
91447636
A
379 file_drop(uap->s);
380 return (error);
1c79356b
A
381}
382
2d21ac55
A
383/*
384 * Returns: fp_getfsock:EBADF Bad file descriptor
385 * fp_getfsock:EOPNOTSUPP ...
386 * xlate => :ENOTSOCK Socket operation on non-socket
387 * :EFAULT Bad address on copyin/copyout
388 * :EBADF Bad file descriptor
389 * :EOPNOTSUPP Operation not supported on socket
390 * :EINVAL Invalid argument
391 * :EWOULDBLOCK Operation would block
392 * :ECONNABORTED Connection aborted
393 * :EINTR Interrupted function
394 * :EACCES Mandatory Access Control failure
395 * falloc_locked:ENFILE Too many files open in system
396 * falloc_locked::EMFILE Too many open files
397 * falloc_locked::ENOMEM Not enough space
398 * 0 Success
399 */
1c79356b 400int
2d21ac55 401accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
b0d623f7 402 int32_t *retval)
1c79356b 403{
91447636 404 struct fileproc *fp;
2d21ac55 405 struct sockaddr *sa = NULL;
91447636
A
406 socklen_t namelen;
407 int error;
408 struct socket *head, *so = NULL;
409 lck_mtx_t *mutex_held;
410 int fd = uap->s;
2d21ac55 411 int newfd;
1c79356b 412 short fflag; /* type must match fp->f_flag */
91447636 413 int dosocklock = 0;
1c79356b 414
2d21ac55
A
415 *retval = -1;
416
55e303ae 417 AUDIT_ARG(fd, uap->s);
2d21ac55 418
1c79356b 419 if (uap->name) {
91447636 420 error = copyin(uap->anamelen, (caddr_t)&namelen,
2d21ac55
A
421 sizeof (socklen_t));
422 if (error)
1c79356b
A
423 return (error);
424 }
91447636
A
425 error = fp_getfsock(p, fd, &fp, &head);
426 if (error) {
427 if (error == EOPNOTSUPP)
428 error = ENOTSOCK;
1c79356b 429 return (error);
91447636 430 }
55e303ae 431 if (head == NULL) {
91447636
A
432 error = EBADF;
433 goto out;
55e303ae 434 }
2d21ac55
A
435#if CONFIG_MACF_SOCKET_SUBSET
436 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
437 goto out;
438#endif /* MAC_SOCKET_SUBSET */
91447636
A
439
440 socket_lock(head, 1);
441
442 if (head->so_proto->pr_getlock != NULL) {
5ba3f43e 443 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
91447636 444 dosocklock = 1;
2d21ac55 445 } else {
91447636
A
446 mutex_held = head->so_proto->pr_domain->dom_mtx;
447 dosocklock = 0;
448 }
449
1c79356b 450 if ((head->so_options & SO_ACCEPTCONN) == 0) {
2d21ac55
A
451 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
452 error = EOPNOTSUPP;
453 } else {
454 /* POSIX: The socket is not accepting connections */
455 error = EINVAL;
456 }
91447636 457 socket_unlock(head, 1);
91447636 458 goto out;
1c79356b 459 }
813fb2f6 460check_again:
1c79356b 461 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
91447636
A
462 socket_unlock(head, 1);
463 error = EWOULDBLOCK;
464 goto out;
1c79356b 465 }
2d21ac55 466 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
1c79356b
A
467 if (head->so_state & SS_CANTRCVMORE) {
468 head->so_error = ECONNABORTED;
469 break;
470 }
91447636 471 if (head->so_usecount < 1)
2d21ac55
A
472 panic("accept: head=%p refcount=%d\n", head,
473 head->so_usecount);
474 error = msleep((caddr_t)&head->so_timeo, mutex_held,
475 PSOCK | PCATCH, "accept", 0);
91447636 476 if (head->so_usecount < 1)
2d21ac55
A
477 panic("accept: 2 head=%p refcount=%d\n", head,
478 head->so_usecount);
91447636
A
479 if ((head->so_state & SS_DRAINING)) {
480 error = ECONNABORTED;
481 }
1c79356b 482 if (error) {
91447636
A
483 socket_unlock(head, 1);
484 goto out;
1c79356b
A
485 }
486 }
487 if (head->so_error) {
488 error = head->so_error;
489 head->so_error = 0;
91447636
A
490 socket_unlock(head, 1);
491 goto out;
1c79356b
A
492 }
493
1c79356b
A
494 /*
495 * At this point we know that there is at least one connection
496 * ready to be accepted. Remove it from the queue prior to
497 * allocating the file descriptor for it since falloc() may
498 * block allowing another process to accept the connection
499 * instead.
500 */
91447636 501 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
813fb2f6
A
502
503 so_acquire_accept_list(head, NULL);
504 if (TAILQ_EMPTY(&head->so_comp)) {
505 so_release_accept_list(head);
506 goto check_again;
507 }
508
e3027f41 509 so = TAILQ_FIRST(&head->so_comp);
1c79356b 510 TAILQ_REMOVE(&head->so_comp, so, so_list);
d190cdc3
A
511 so->so_head = NULL;
512 so->so_state &= ~SS_COMP;
1c79356b 513 head->so_qlen--;
813fb2f6
A
514 so_release_accept_list(head);
515
2d21ac55
A
516 /* unlock head to avoid deadlock with select, keep a ref on head */
517 socket_unlock(head, 0);
518
519#if CONFIG_MACF_SOCKET_SUBSET
520 /*
521 * Pass the pre-accepted socket to the MAC framework. This is
522 * cheaper than allocating a file descriptor for the socket,
523 * calling the protocol accept callback, and possibly freeing
524 * the file descriptor should the MAC check fails.
525 */
526 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
39236c6e 527 socket_lock(so, 1);
d190cdc3 528 so->so_state &= ~SS_NOFDREF;
39236c6e 529 socket_unlock(so, 1);
2d21ac55
A
530 soclose(so);
531 /* Drop reference on listening socket */
532 sodereference(head);
533 goto out;
534 }
535#endif /* MAC_SOCKET_SUBSET */
536
537 /*
538 * Pass the pre-accepted socket to any interested socket filter(s).
539 * Upon failure, the socket would have been closed by the callee.
540 */
d190cdc3 541 if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
2d21ac55
A
542 /* Drop reference on listening socket */
543 sodereference(head);
544 /* Propagate socket filter's error code to the caller */
545 goto out;
546 }
547
1c79356b 548 fflag = fp->f_flag;
2d21ac55 549 error = falloc(p, &fp, &newfd, vfs_context_current());
1c79356b 550 if (error) {
39236c6e 551 /*
316670eb
A
552 * Probably ran out of file descriptors.
553 *
554 * <rdar://problem/8554930>
555 * Don't put this back on the socket like we used to, that
556 * just causes the client to spin. Drop the socket.
1c79356b 557 */
39236c6e 558 socket_lock(so, 1);
d190cdc3 559 so->so_state &= ~SS_NOFDREF;
39236c6e 560 socket_unlock(so, 1);
316670eb
A
561 soclose(so);
562 sodereference(head);
91447636 563 goto out;
2d21ac55 564 }
91447636 565 *retval = newfd;
1c79356b
A
566 fp->f_flag = fflag;
567 fp->f_ops = &socketops;
568 fp->f_data = (caddr_t)so;
fe8ab488 569
91447636
A
570 socket_lock(head, 0);
571 if (dosocklock)
572 socket_lock(so, 1);
fe8ab488 573
fe8ab488
A
574 /* Sync socket non-blocking/async state with file flags */
575 if (fp->f_flag & FNONBLOCK) {
576 so->so_state |= SS_NBIO;
577 } else {
578 so->so_state &= ~SS_NBIO;
579 }
580
581 if (fp->f_flag & FASYNC) {
582 so->so_state |= SS_ASYNC;
583 so->so_rcv.sb_flags |= SB_ASYNC;
584 so->so_snd.sb_flags |= SB_ASYNC;
585 } else {
586 so->so_state &= ~SS_ASYNC;
587 so->so_rcv.sb_flags &= ~SB_ASYNC;
588 so->so_snd.sb_flags &= ~SB_ASYNC;
589 }
590
91447636
A
591 (void) soacceptlock(so, &sa, 0);
592 socket_unlock(head, 1);
2d21ac55 593 if (sa == NULL) {
1c79356b
A
594 namelen = 0;
595 if (uap->name)
596 goto gotnoname;
91447636 597 error = 0;
2d21ac55 598 goto releasefd;
1c79356b 599 }
2d21ac55
A
600 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
601
1c79356b 602 if (uap->name) {
2d21ac55
A
603 socklen_t sa_len;
604
605 /* save sa_len before it is destroyed */
606 sa_len = sa->sa_len;
607 namelen = MIN(namelen, sa_len);
91447636 608 error = copyout(sa, uap->name, namelen);
1c79356b 609 if (!error)
2d21ac55
A
610 /* return the actual, untruncated address length */
611 namelen = sa_len;
1c79356b 612gotnoname:
2d21ac55
A
613 error = copyout((caddr_t)&namelen, uap->anamelen,
614 sizeof (socklen_t));
1c79356b
A
615 }
616 FREE(sa, M_SONAME);
2d21ac55 617
b0d623f7 618releasefd:
2d21ac55 619 /*
6d2010ae
A
620 * If the socket has been marked as inactive by sosetdefunct(),
621 * disallow further operations on it.
2d21ac55
A
622 */
623 if (so->so_flags & SOF_DEFUNCT) {
6d2010ae
A
624 sodefunct(current_proc(), so,
625 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
2d21ac55
A
626 }
627
91447636
A
628 if (dosocklock)
629 socket_unlock(so, 1);
2d21ac55 630
2d21ac55
A
631 proc_fdlock(p);
632 procfdtbl_releasefd(p, newfd, NULL);
633 fp_drop(p, newfd, fp, 1);
634 proc_fdunlock(p);
635
91447636
A
636out:
637 file_drop(fd);
3e170ce0
A
638
639 if (error == 0 && ENTR_SHOULDTRACE) {
640 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
641 newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
642 }
1c79356b
A
643 return (error);
644}
645
646int
b0d623f7 647accept(struct proc *p, struct accept_args *uap, int32_t *retval)
1c79356b 648{
2d21ac55 649 __pthread_testcancel(1);
3e170ce0
A
650 return (accept_nocancel(p, (struct accept_nocancel_args *)uap,
651 retval));
1c79356b
A
652}
653
2d21ac55
A
654/*
655 * Returns: 0 Success
656 * EBADF Bad file descriptor
657 * EALREADY Connection already in progress
658 * EINPROGRESS Operation in progress
659 * ECONNABORTED Connection aborted
660 * EINTR Interrupted function
661 * EACCES Mandatory Access Control failure
662 * file_socket:ENOTSOCK
663 * file_socket:EBADF
664 * getsockaddr:ENAMETOOLONG Filename too long
665 * getsockaddr:EINVAL Invalid argument
666 * getsockaddr:ENOMEM Not enough space
667 * getsockaddr:EFAULT Bad address
668 * soconnectlock:EOPNOTSUPP
669 * soconnectlock:EISCONN
670 * soconnectlock:??? [depends on protocol, filters]
671 * msleep:EINTR
672 *
673 * Imputed: so_error error may be set from so_error, which
674 * may have been set by soconnectlock.
675 */
676/* ARGSUSED */
1c79356b 677int
b0d623f7 678connect(struct proc *p, struct connect_args *uap, int32_t *retval)
1c79356b 679{
2d21ac55 680 __pthread_testcancel(1);
3e170ce0
A
681 return (connect_nocancel(p, (struct connect_nocancel_args *)uap,
682 retval));
1c79356b 683}
1c79356b 684
1c79356b 685int
39236c6e 686connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
1c79356b 687{
39236c6e 688#pragma unused(p, retval)
91447636 689 struct socket *so;
2d21ac55
A
690 struct sockaddr_storage ss;
691 struct sockaddr *sa = NULL;
91447636
A
692 int error;
693 int fd = uap->s;
4a3eedf9 694 boolean_t dgram;
1c79356b 695
55e303ae 696 AUDIT_ARG(fd, uap->s);
2d21ac55
A
697 error = file_socket(fd, &so);
698 if (error != 0)
1c79356b 699 return (error);
91447636
A
700 if (so == NULL) {
701 error = EBADF;
702 goto out;
703 }
704
4a3eedf9
A
705 /*
706 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
707 * if this is a datagram socket; translate for other types.
708 */
709 dgram = (so->so_type == SOCK_DGRAM);
710
2d21ac55
A
711 /* Get socket address now before we obtain socket lock */
712 if (uap->namelen > sizeof (ss)) {
4a3eedf9 713 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
2d21ac55 714 } else {
4a3eedf9 715 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
39236c6e 716 if (error == 0)
2d21ac55 717 sa = (struct sockaddr *)&ss;
2d21ac55
A
718 }
719 if (error != 0)
720 goto out;
721
39236c6e
A
722 error = connectit(so, sa);
723
724 if (sa != NULL && sa != SA(&ss))
725 FREE(sa, M_SONAME);
726 if (error == ERESTART)
727 error = EINTR;
728out:
729 file_drop(fd);
730 return (error);
731}
732
733static int
734connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
735{
736#pragma unused(p, retval)
813fb2f6
A
737 struct sockaddr_storage ss, sd;
738 struct sockaddr *src = NULL, *dst = NULL;
39236c6e 739 struct socket *so;
3e170ce0 740 int error, error1, fd = uap->socket;
39236c6e 741 boolean_t dgram;
3e170ce0
A
742 sae_connid_t cid = SAE_CONNID_ANY;
743 struct user32_sa_endpoints ep32;
744 struct user64_sa_endpoints ep64;
745 struct user_sa_endpoints ep;
746 user_ssize_t bytes_written = 0;
747 struct user_iovec *iovp;
748 uio_t auio = NULL;
39236c6e 749
3e170ce0 750 AUDIT_ARG(fd, uap->socket);
39236c6e
A
751 error = file_socket(fd, &so);
752 if (error != 0)
753 return (error);
754 if (so == NULL) {
755 error = EBADF;
756 goto out;
757 }
758
3e170ce0
A
759 if (uap->endpoints == USER_ADDR_NULL) {
760 error = EINVAL;
761 goto out;
762 }
763
764 if (IS_64BIT_PROCESS(p)) {
765 error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
766 if (error != 0)
767 goto out;
768
769 ep.sae_srcif = ep64.sae_srcif;
770 ep.sae_srcaddr = ep64.sae_srcaddr;
771 ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
772 ep.sae_dstaddr = ep64.sae_dstaddr;
773 ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
774 } else {
775 error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
776 if (error != 0)
777 goto out;
778
779 ep.sae_srcif = ep32.sae_srcif;
780 ep.sae_srcaddr = ep32.sae_srcaddr;
781 ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
782 ep.sae_dstaddr = ep32.sae_dstaddr;
783 ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
784 }
fe8ab488 785
39236c6e
A
786 /*
787 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
788 * if this is a datagram socket; translate for other types.
789 */
790 dgram = (so->so_type == SOCK_DGRAM);
791
813fb2f6
A
792 /* Get socket address now before we obtain socket lock */
793 if (ep.sae_srcaddr != USER_ADDR_NULL) {
794 if (ep.sae_srcaddrlen > sizeof (ss)) {
795 error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
796 } else {
797 error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram);
798 if (error == 0)
799 src = (struct sockaddr *)&ss;
800 }
801
802 if (error)
803 goto out;
804 }
39236c6e 805
3e170ce0
A
806 if (ep.sae_dstaddr == USER_ADDR_NULL) {
807 error = EINVAL;
808 goto out;
809 }
810
813fb2f6
A
811 /* Get socket address now before we obtain socket lock */
812 if (ep.sae_dstaddrlen > sizeof (sd)) {
813 error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
814 } else {
815 error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram);
816 if (error == 0)
817 dst = (struct sockaddr *)&sd;
818 }
819
820 if (error)
39236c6e
A
821 goto out;
822
813fb2f6 823 VERIFY(dst != NULL);
39236c6e 824
3e170ce0
A
825 if (uap->iov != USER_ADDR_NULL) {
826 /* Verify range before calling uio_create() */
cc8bc92a
A
827 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV){
828 error = EINVAL;
829 goto out;
830 }
3e170ce0 831
cc8bc92a
A
832 if (uap->len == USER_ADDR_NULL){
833 error = EINVAL;
834 goto out;
835 }
3e170ce0
A
836
837 /* allocate a uio to hold the number of iovecs passed */
838 auio = uio_create(uap->iovcnt, 0,
839 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
840 UIO_WRITE);
841
842 if (auio == NULL) {
843 error = ENOMEM;
844 goto out;
845 }
846
847 /*
848 * get location of iovecs within the uio.
849 * then copyin the iovecs from user space.
850 */
851 iovp = uio_iovsaddr(auio);
852 if (iovp == NULL) {
853 error = ENOMEM;
854 goto out;
855 }
856 error = copyin_user_iovec_array(uap->iov,
857 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
858 uap->iovcnt, iovp);
859 if (error != 0)
860 goto out;
861
862 /* finish setup of uio_t */
863 error = uio_calculateresid(auio);
864 if (error != 0) {
865 goto out;
866 }
867 }
868
813fb2f6 869 error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd,
3e170ce0 870 &cid, auio, uap->flags, &bytes_written);
39236c6e
A
871 if (error == ERESTART)
872 error = EINTR;
873
3e170ce0
A
874 if (uap->len != USER_ADDR_NULL) {
875 error1 = copyout(&bytes_written, uap->len, sizeof (uap->len));
876 /* give precedence to connectitx errors */
877 if ((error1 != 0) && (error == 0))
878 error = error1;
879 }
39236c6e 880
3e170ce0
A
881 if (uap->connid != USER_ADDR_NULL) {
882 error1 = copyout(&cid, uap->connid, sizeof (cid));
883 /* give precedence to connectitx errors */
884 if ((error1 != 0) && (error == 0))
885 error = error1;
886 }
39236c6e
A
887out:
888 file_drop(fd);
3e170ce0
A
889 if (auio != NULL) {
890 uio_free(auio);
891 }
813fb2f6
A
892 if (src != NULL && src != SA(&ss))
893 FREE(src, M_SONAME);
894 if (dst != NULL && dst != SA(&sd))
895 FREE(dst, M_SONAME);
39236c6e
A
896 return (error);
897}
898
899int
900connectx(struct proc *p, struct connectx_args *uap, int *retval)
901{
902 /*
903 * Due to similiarity with a POSIX interface, define as
904 * an unofficial cancellation point.
905 */
906 __pthread_testcancel(1);
907 return (connectx_nocancel(p, uap, retval));
908}
909
910static int
911connectit(struct socket *so, struct sockaddr *sa)
912{
913 int error;
914
2d21ac55
A
915 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
916#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
917 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0)
918 return (error);
919#endif /* MAC_SOCKET_SUBSET */
920
921 socket_lock(so, 1);
922 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
923 error = EALREADY;
924 goto out;
925 }
926 error = soconnectlock(so, sa, 0);
927 if (error != 0) {
928 so->so_state &= ~SS_ISCONNECTING;
2d21ac55
A
929 goto out;
930 }
39236c6e
A
931 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
932 error = EINPROGRESS;
933 goto out;
934 }
935 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
936 lck_mtx_t *mutex_held;
937
938 if (so->so_proto->pr_getlock != NULL)
5ba3f43e 939 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
39236c6e
A
940 else
941 mutex_held = so->so_proto->pr_domain->dom_mtx;
942 error = msleep((caddr_t)&so->so_timeo, mutex_held,
943 PSOCK | PCATCH, __func__, 0);
944 if (so->so_state & SS_DRAINING) {
945 error = ECONNABORTED;
946 }
947 if (error != 0)
948 break;
949 }
950 if (error == 0) {
951 error = so->so_error;
952 so->so_error = 0;
953 }
954out:
955 socket_unlock(so, 1);
956 return (error);
957}
958
959static int
813fb2f6
A
960connectitx(struct socket *so, struct sockaddr *src,
961 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3e170ce0
A
962 sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
963 user_ssize_t *bytes_written)
39236c6e 964{
39236c6e 965 int error;
3e170ce0 966#pragma unused (flags)
39236c6e 967
813fb2f6 968 VERIFY(dst != NULL);
39236c6e 969
813fb2f6 970 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst);
39236c6e 971#if CONFIG_MACF_SOCKET_SUBSET
813fb2f6
A
972 if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0)
973 return (error);
2d21ac55 974#endif /* MAC_SOCKET_SUBSET */
91447636 975
39236c6e 976 socket_lock(so, 1);
91447636 977 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
978 error = EALREADY;
979 goto out;
980 }
3e170ce0
A
981
982 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
813fb2f6 983 (flags & CONNECT_DATA_IDEMPOTENT)) {
3e170ce0
A
984 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
985
813fb2f6 986 if (flags & CONNECT_DATA_AUTHENTICATED)
5ba3f43e 987 so->so_flags1 |= SOF1_DATA_AUTHENTICATED;
813fb2f6
A
988 }
989
3e170ce0
A
990 /*
991 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
992 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
993 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
994 * Case 3 allows user to combine write with connect even if they have
995 * no use for TFO (such as regular TCP, and UDP).
996 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
997 */
998 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
999 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio))
1000 so->so_flags1 |= SOF1_PRECONNECT_DATA;
1001
1002 /*
1003 * If a user sets data idempotent and does not pass an uio, or
1004 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1005 * SOF1_DATA_IDEMPOTENT.
1006 */
1007 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1008 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1009 /* We should return EINVAL instead perhaps. */
1010 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1011 }
1012
813fb2f6 1013 error = soconnectxlocked(so, src, dst, p, ifscope,
3e170ce0 1014 aid, pcid, 0, NULL, 0, auio, bytes_written);
39236c6e
A
1015 if (error != 0) {
1016 so->so_state &= ~SS_ISCONNECTING;
1017 goto out;
1018 }
3e170ce0
A
1019 /*
1020 * If, after the call to soconnectxlocked the flag is still set (in case
1021 * data has been queued and the connect() has actually been triggered,
1022 * it will have been unset by the transport), we exit immediately. There
1023 * is no reason to wait on any event.
1024 */
1025 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
1026 error = 0;
1027 goto out;
1028 }
1c79356b 1029 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
1030 error = EINPROGRESS;
1031 goto out;
1c79356b 1032 }
1c79356b 1033 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
39236c6e
A
1034 lck_mtx_t *mutex_held;
1035
2d21ac55 1036 if (so->so_proto->pr_getlock != NULL)
5ba3f43e 1037 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2d21ac55 1038 else
91447636 1039 mutex_held = so->so_proto->pr_domain->dom_mtx;
2d21ac55 1040 error = msleep((caddr_t)&so->so_timeo, mutex_held,
39236c6e
A
1041 PSOCK | PCATCH, __func__, 0);
1042 if (so->so_state & SS_DRAINING) {
91447636
A
1043 error = ECONNABORTED;
1044 }
39236c6e 1045 if (error != 0)
1c79356b
A
1046 break;
1047 }
1048 if (error == 0) {
1049 error = so->so_error;
1050 so->so_error = 0;
1051 }
39236c6e 1052out:
91447636 1053 socket_unlock(so, 1);
39236c6e
A
1054 return (error);
1055}
1056
1057int
1058peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
1059{
5ba3f43e 1060#pragma unused(p, uap, retval)
39236c6e
A
1061 /*
1062 * Due to similiarity with a POSIX interface, define as
1063 * an unofficial cancellation point.
1064 */
1065 __pthread_testcancel(1);
5ba3f43e 1066 return (0);
39236c6e
A
1067}
1068
1069int
1070disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
1071{
1072 /*
1073 * Due to similiarity with a POSIX interface, define as
1074 * an unofficial cancellation point.
1075 */
1076 __pthread_testcancel(1);
1077 return (disconnectx_nocancel(p, uap, retval));
1078}
1079
1080static int
1081disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
1082{
1083#pragma unused(p, retval)
1084 struct socket *so;
1085 int fd = uap->s;
1086 int error;
1087
1088 error = file_socket(fd, &so);
1089 if (error != 0)
1090 return (error);
1091 if (so == NULL) {
1092 error = EBADF;
1093 goto out;
1094 }
1095
1096 error = sodisconnectx(so, uap->aid, uap->cid);
91447636
A
1097out:
1098 file_drop(fd);
1c79356b
A
1099 return (error);
1100}
1101
2d21ac55
A
1102/*
1103 * Returns: 0 Success
1104 * socreate:EAFNOSUPPORT
1105 * socreate:EPROTOTYPE
1106 * socreate:EPROTONOSUPPORT
1107 * socreate:ENOBUFS
1108 * socreate:ENOMEM
1109 * socreate:EISCONN
1110 * socreate:??? [other protocol families, IPSEC]
1111 * falloc:ENFILE
1112 * falloc:EMFILE
1113 * falloc:ENOMEM
1114 * copyout:EFAULT
1115 * soconnect2:EINVAL
1116 * soconnect2:EPROTOTYPE
1117 * soconnect2:??? [other protocol families[
1118 */
1c79356b 1119int
2d21ac55 1120socketpair(struct proc *p, struct socketpair_args *uap,
b0d623f7 1121 __unused int32_t *retval)
1c79356b 1122{
91447636 1123 struct fileproc *fp1, *fp2;
1c79356b
A
1124 struct socket *so1, *so2;
1125 int fd, error, sv[2];
1126
55e303ae 1127 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1c79356b
A
1128 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
1129 if (error)
1130 return (error);
1131 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
1132 if (error)
1133 goto free1;
91447636 1134
2d21ac55 1135 error = falloc(p, &fp1, &fd, vfs_context_current());
91447636 1136 if (error) {
1c79356b 1137 goto free2;
91447636 1138 }
1c79356b 1139 fp1->f_flag = FREAD|FWRITE;
1c79356b
A
1140 fp1->f_ops = &socketops;
1141 fp1->f_data = (caddr_t)so1;
91447636
A
1142 sv[0] = fd;
1143
2d21ac55 1144 error = falloc(p, &fp2, &fd, vfs_context_current());
91447636 1145 if (error) {
1c79356b 1146 goto free3;
91447636 1147 }
1c79356b 1148 fp2->f_flag = FREAD|FWRITE;
1c79356b
A
1149 fp2->f_ops = &socketops;
1150 fp2->f_data = (caddr_t)so2;
1151 sv[1] = fd;
91447636 1152
1c79356b
A
1153 error = soconnect2(so1, so2);
1154 if (error) {
1c79356b
A
1155 goto free4;
1156 }
1c79356b
A
1157 if (uap->type == SOCK_DGRAM) {
1158 /*
1159 * Datagram socket connection is asymmetric.
1160 */
2d21ac55
A
1161 error = soconnect2(so2, so1);
1162 if (error) {
1163 goto free4;
1164 }
1c79356b 1165 }
91447636 1166
6d2010ae
A
1167 if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
1168 goto free4;
1169
91447636 1170 proc_fdlock(p);
6601e61a
A
1171 procfdtbl_releasefd(p, sv[0], NULL);
1172 procfdtbl_releasefd(p, sv[1], NULL);
91447636
A
1173 fp_drop(p, sv[0], fp1, 1);
1174 fp_drop(p, sv[1], fp2, 1);
1175 proc_fdunlock(p);
1176
6d2010ae 1177 return (0);
1c79356b 1178free4:
91447636 1179 fp_free(p, sv[1], fp2);
1c79356b 1180free3:
91447636 1181 fp_free(p, sv[0], fp1);
1c79356b 1182free2:
2d21ac55 1183 (void) soclose(so2);
1c79356b 1184free1:
2d21ac55 1185 (void) soclose(so1);
1c79356b
A
1186 return (error);
1187}
1188
2d21ac55
A
1189/*
1190 * Returns: 0 Success
1191 * EINVAL
1192 * ENOBUFS
1193 * EBADF
1194 * EPIPE
1195 * EACCES Mandatory Access Control failure
1196 * file_socket:ENOTSOCK
1197 * file_socket:EBADF
1198 * getsockaddr:ENAMETOOLONG Filename too long
1199 * getsockaddr:EINVAL Invalid argument
1200 * getsockaddr:ENOMEM Not enough space
1201 * getsockaddr:EFAULT Bad address
1202 * <pru_sosend>:EACCES[TCP]
1203 * <pru_sosend>:EADDRINUSE[TCP]
1204 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1205 * <pru_sosend>:EAFNOSUPPORT[TCP]
1206 * <pru_sosend>:EAGAIN[TCP]
1207 * <pru_sosend>:EBADF
1208 * <pru_sosend>:ECONNRESET[TCP]
1209 * <pru_sosend>:EFAULT
1210 * <pru_sosend>:EHOSTUNREACH[TCP]
1211 * <pru_sosend>:EINTR
1212 * <pru_sosend>:EINVAL
1213 * <pru_sosend>:EISCONN[AF_INET]
1214 * <pru_sosend>:EMSGSIZE[TCP]
1215 * <pru_sosend>:ENETDOWN[TCP]
1216 * <pru_sosend>:ENETUNREACH[TCP]
1217 * <pru_sosend>:ENOBUFS
1218 * <pru_sosend>:ENOMEM[TCP]
1219 * <pru_sosend>:ENOTCONN[AF_INET]
1220 * <pru_sosend>:EOPNOTSUPP
1221 * <pru_sosend>:EPERM[TCP]
1222 * <pru_sosend>:EPIPE
1223 * <pru_sosend>:EWOULDBLOCK
1224 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1225 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1226 * <pru_sosend>:??? [value from so_error]
1227 * sockargs:???
1228 */
1c79356b 1229static int
3e170ce0 1230sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1231 int flags, int32_t *retval)
1c79356b 1232{
2d21ac55
A
1233 struct mbuf *control = NULL;
1234 struct sockaddr_storage ss;
1235 struct sockaddr *to = NULL;
1236 boolean_t want_free = TRUE;
91447636 1237 int error;
91447636 1238 user_ssize_t len;
2d21ac55
A
1239
1240 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b 1241
2d21ac55
A
1242 if (mp->msg_name != USER_ADDR_NULL) {
1243 if (mp->msg_namelen > sizeof (ss)) {
1244 error = getsockaddr(so, &to, mp->msg_name,
4a3eedf9 1245 mp->msg_namelen, TRUE);
2d21ac55
A
1246 } else {
1247 error = getsockaddr_s(so, &ss, mp->msg_name,
4a3eedf9 1248 mp->msg_namelen, TRUE);
2d21ac55
A
1249 if (error == 0) {
1250 to = (struct sockaddr *)&ss;
1251 want_free = FALSE;
1252 }
1c79356b 1253 }
2d21ac55
A
1254 if (error != 0)
1255 goto out;
1256 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
91447636 1257 }
2d21ac55
A
1258 if (mp->msg_control != USER_ADDR_NULL) {
1259 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
1c79356b
A
1260 error = EINVAL;
1261 goto bad;
1262 }
1263 error = sockargs(&control, mp->msg_control,
1264 mp->msg_controllen, MT_CONTROL);
2d21ac55 1265 if (error != 0)
1c79356b 1266 goto bad;
91447636 1267 }
1c79356b 1268
2d21ac55
A
1269#if CONFIG_MACF_SOCKET_SUBSET
1270 /*
1271 * We check the state without holding the socket lock;
1272 * if a race condition occurs, it would simply result
3e170ce0 1273 * in an extra call to the MAC check function.
2d21ac55 1274 */
3e170ce0 1275 if (to != NULL &&
316670eb 1276 !(so->so_state & SS_DEFUNCT) &&
2d21ac55
A
1277 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
1278 goto bad;
1279#endif /* MAC_SOCKET_SUBSET */
91447636
A
1280
1281 len = uio_resid(uiop);
39236c6e
A
1282 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1283 control, flags);
2d21ac55 1284 if (error != 0) {
91447636 1285 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1286 error == EINTR || error == EWOULDBLOCK))
1287 error = 0;
2d21ac55 1288 /* Generation of SIGPIPE can be controlled per socket */
9bccf70c 1289 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1c79356b
A
1290 psignal(p, SIGPIPE);
1291 }
1292 if (error == 0)
91447636
A
1293 *retval = (int)(len - uio_resid(uiop));
1294bad:
2d21ac55 1295 if (to != NULL && want_free)
1c79356b 1296 FREE(to, M_SONAME);
91447636 1297out:
2d21ac55 1298 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 1299
1c79356b
A
1300 return (error);
1301}
1302
2d21ac55
A
1303/*
1304 * Returns: 0 Success
1305 * ENOMEM
1306 * sendit:??? [see sendit definition in this file]
1307 * write:??? [4056224: applicable for pipes]
1308 */
1c79356b 1309int
b0d623f7 1310sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
2d21ac55
A
1311{
1312 __pthread_testcancel(1);
39236c6e 1313 return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
2d21ac55
A
1314}
1315
1316int
39236c6e
A
1317sendto_nocancel(struct proc *p,
1318 struct sendto_nocancel_args *uap,
1319 int32_t *retval)
1c79356b 1320{
91447636
A
1321 struct user_msghdr msg;
1322 int error;
1323 uio_t auio = NULL;
3e170ce0 1324 struct socket *so;
1c79356b 1325
2d21ac55 1326 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1327 AUDIT_ARG(fd, uap->s);
1c79356b 1328
91447636 1329 auio = uio_create(1, 0,
2d21ac55
A
1330 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1331 UIO_WRITE);
91447636 1332 if (auio == NULL) {
3e170ce0
A
1333 error = ENOMEM;
1334 goto done;
91447636
A
1335 }
1336 uio_addiov(auio, uap->buf, uap->len);
1337
1c79356b
A
1338 msg.msg_name = uap->to;
1339 msg.msg_namelen = uap->tolen;
91447636
A
1340 /* no need to set up msg_iov. sendit uses uio_t we send it */
1341 msg.msg_iov = 0;
1342 msg.msg_iovlen = 0;
1c79356b 1343 msg.msg_control = 0;
1c79356b 1344 msg.msg_flags = 0;
1c79356b 1345
3e170ce0
A
1346 error = file_socket(uap->s, &so);
1347 if (error)
1348 goto done;
2d21ac55 1349
3e170ce0
A
1350 if (so == NULL) {
1351 error = EBADF;
1352 } else {
1353 error = sendit(p, so, &msg, auio, uap->flags, retval);
91447636 1354 }
2d21ac55 1355
3e170ce0
A
1356 file_drop(uap->s);
1357done:
1358 if (auio != NULL)
1359 uio_free(auio);
1360
2d21ac55 1361 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1c79356b 1362
2d21ac55 1363 return (error);
1c79356b 1364}
1c79356b 1365
2d21ac55
A
1366/*
1367 * Returns: 0 Success
1368 * ENOBUFS
1369 * copyin:EFAULT
1370 * sendit:??? [see sendit definition in this file]
1371 */
1c79356b 1372int
b0d623f7 1373sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1c79356b 1374{
2d21ac55 1375 __pthread_testcancel(1);
3e170ce0
A
1376 return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
1377 retval));
1c79356b 1378}
1c79356b
A
1379
1380int
3e170ce0
A
1381sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
1382 int32_t *retval)
1c79356b 1383{
b0d623f7
A
1384 struct user32_msghdr msg32;
1385 struct user64_msghdr msg64;
91447636
A
1386 struct user_msghdr user_msg;
1387 caddr_t msghdrp;
1388 int size_of_msghdr;
1c79356b 1389 int error;
91447636
A
1390 uio_t auio = NULL;
1391 struct user_iovec *iovp;
3e170ce0 1392 struct socket *so;
1c79356b 1393
2d21ac55 1394 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1395 AUDIT_ARG(fd, uap->s);
91447636 1396 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
1397 msghdrp = (caddr_t)&msg64;
1398 size_of_msghdr = sizeof (msg64);
2d21ac55 1399 } else {
b0d623f7
A
1400 msghdrp = (caddr_t)&msg32;
1401 size_of_msghdr = sizeof (msg32);
91447636
A
1402 }
1403 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1404 if (error) {
1405 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1406 return (error);
1c79356b 1407 }
91447636 1408
b0d623f7
A
1409 if (IS_64BIT_PROCESS(p)) {
1410 user_msg.msg_flags = msg64.msg_flags;
1411 user_msg.msg_controllen = msg64.msg_controllen;
1412 user_msg.msg_control = msg64.msg_control;
1413 user_msg.msg_iovlen = msg64.msg_iovlen;
1414 user_msg.msg_iov = msg64.msg_iov;
1415 user_msg.msg_namelen = msg64.msg_namelen;
1416 user_msg.msg_name = msg64.msg_name;
1417 } else {
1418 user_msg.msg_flags = msg32.msg_flags;
1419 user_msg.msg_controllen = msg32.msg_controllen;
1420 user_msg.msg_control = msg32.msg_control;
1421 user_msg.msg_iovlen = msg32.msg_iovlen;
1422 user_msg.msg_iov = msg32.msg_iov;
1423 user_msg.msg_namelen = msg32.msg_namelen;
1424 user_msg.msg_name = msg32.msg_name;
91447636
A
1425 }
1426
1427 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1428 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1429 0, 0, 0, 0);
91447636
A
1430 return (EMSGSIZE);
1431 }
1432
1433 /* allocate a uio large enough to hold the number of iovecs passed */
1434 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1435 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1436 UIO_WRITE);
91447636
A
1437 if (auio == NULL) {
1438 error = ENOBUFS;
1439 goto done;
1440 }
2d21ac55 1441
91447636 1442 if (user_msg.msg_iovlen) {
2d21ac55
A
1443 /*
1444 * get location of iovecs within the uio.
1445 * then copyin the iovecs from user space.
91447636
A
1446 */
1447 iovp = uio_iovsaddr(auio);
1448 if (iovp == NULL) {
1449 error = ENOBUFS;
1450 goto done;
1451 }
b0d623f7
A
1452 error = copyin_user_iovec_array(user_msg.msg_iov,
1453 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1454 user_msg.msg_iovlen, iovp);
91447636
A
1455 if (error)
1456 goto done;
1457 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2d21ac55
A
1458
1459 /* finish setup of uio_t */
39236c6e
A
1460 error = uio_calculateresid(auio);
1461 if (error) {
1462 goto done;
1463 }
2d21ac55 1464 } else {
91447636
A
1465 user_msg.msg_iov = 0;
1466 }
2d21ac55
A
1467
1468 /* msg_flags is ignored for send */
91447636 1469 user_msg.msg_flags = 0;
2d21ac55 1470
3e170ce0
A
1471 error = file_socket(uap->s, &so);
1472 if (error) {
1473 goto done;
1474 }
1475 if (so == NULL) {
1476 error = EBADF;
1477 } else {
1478 error = sendit(p, so, &user_msg, auio, uap->flags, retval);
1479 }
1480 file_drop(uap->s);
1c79356b 1481done:
91447636
A
1482 if (auio != NULL) {
1483 uio_free(auio);
1484 }
2d21ac55 1485 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1486
1c79356b
A
1487 return (error);
1488}
1489
fe8ab488
A
1490int
1491sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
1492{
1493 int error = 0;
3e170ce0 1494 struct user_msghdr_x *user_msg_x = NULL;
fe8ab488
A
1495 struct uio **uiop = NULL;
1496 struct socket *so;
1497 u_int i;
1498 struct sockaddr *to = NULL;
fe8ab488
A
1499 user_ssize_t len_before = 0, len_after;
1500 int need_drop = 0;
1501 size_t size_of_msghdr;
1502 void *umsgp = NULL;
1503 u_int uiocnt;
3e170ce0 1504 int has_addr_or_ctl = 0;
fe8ab488
A
1505
1506 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
1507
1508 error = file_socket(uap->s, &so);
1509 if (error) {
1510 goto out;
1511 }
1512 need_drop = 1;
1513 if (so == NULL) {
1514 error = EBADF;
1515 goto out;
1516 }
fe8ab488
A
1517
1518 /*
1519 * Input parameter range check
1520 */
1521 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
1522 error = EINVAL;
1523 goto out;
1524 }
3e170ce0
A
1525 /*
1526 * Clip to max currently allowed
1527 */
1528 if (uap->cnt > somaxsendmsgx)
1529 uap->cnt = somaxsendmsgx;
1530
1531 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
fe8ab488 1532 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
1533 if (user_msg_x == NULL) {
1534 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1535 error = ENOMEM;
1536 goto out;
1537 }
1538 uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
1539 M_TEMP, M_WAITOK | M_ZERO);
1540 if (uiop == NULL) {
3e170ce0 1541 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
fe8ab488
A
1542 error = ENOMEM;
1543 goto out;
1544 }
1545
1546 size_of_msghdr = IS_64BIT_PROCESS(p) ?
1547 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
1548
3e170ce0 1549 umsgp = _MALLOC(uap->cnt * size_of_msghdr,
fe8ab488
A
1550 M_TEMP, M_WAITOK | M_ZERO);
1551 if (umsgp == NULL) {
3e170ce0 1552 printf("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
1553 error = ENOMEM;
1554 goto out;
1555 }
1556 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
1557 if (error) {
3e170ce0 1558 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
1559 goto out;
1560 }
1561 error = internalize_user_msghdr_array(umsgp,
1562 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1563 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488 1564 if (error) {
3e170ce0 1565 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
1566 goto out;
1567 }
1568 /*
1569 * Make sure the size of each message iovec and
1570 * the aggregate size of all the iovec is valid
1571 */
1572 if (uio_array_is_valid(uiop, uap->cnt) == 0) {
1573 error = EINVAL;
1574 goto out;
1575 }
1576
1577 /*
1578 * Sanity check on passed arguments
1579 */
1580 for (i = 0; i < uap->cnt; i++) {
3e170ce0 1581 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
1582
1583 /*
1584 * No flags on send message
1585 */
1586 if (mp->msg_flags != 0) {
1587 error = EINVAL;
1588 goto out;
1589 }
1590 /*
1591 * No support for address or ancillary data (yet)
1592 */
3e170ce0
A
1593 if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0)
1594 has_addr_or_ctl = 1;
1595
fe8ab488 1596 if (mp->msg_control != USER_ADDR_NULL ||
3e170ce0
A
1597 mp->msg_controllen != 0)
1598 has_addr_or_ctl = 1;
1599
fe8ab488
A
1600#if CONFIG_MACF_SOCKET_SUBSET
1601 /*
1602 * We check the state without holding the socket lock;
1603 * if a race condition occurs, it would simply result
3e170ce0 1604 * in an extra call to the MAC check function.
fe8ab488
A
1605 *
1606 * Note: The following check is never true taken with the
1607 * current limitation that we do not accept to pass an address,
3e170ce0
A
1608 * this is effectively placeholder code. If we add support for
1609 * addresses, we will have to check every address.
fe8ab488 1610 */
3e170ce0 1611 if (to != NULL &&
fe8ab488 1612 !(so->so_state & SS_DEFUNCT) &&
3e170ce0
A
1613 (error = mac_socket_check_send(kauth_cred_get(), so, to))
1614 != 0)
fe8ab488
A
1615 goto out;
1616#endif /* MAC_SOCKET_SUBSET */
1617 }
1618
1619 len_before = uio_array_resid(uiop, uap->cnt);
1620
3e170ce0
A
1621 /*
1622 * Feed list of packets at once only for connected socket without
1623 * control message
1624 */
1625 if (so->so_proto->pr_usrreqs->pru_sosend_list !=
1626 pru_sosend_list_notsupp &&
1627 has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
1628 error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
1629 uap->cnt, uap->flags);
1630 } else {
1631 for (i = 0; i < uap->cnt; i++) {
1632 struct user_msghdr_x *mp = user_msg_x + i;
1633 struct user_msghdr user_msg;
1634 uio_t auio = uiop[i];
1635 int32_t tmpval;
1636
1637 user_msg.msg_flags = mp->msg_flags;
1638 user_msg.msg_controllen = mp->msg_controllen;
1639 user_msg.msg_control = mp->msg_control;
1640 user_msg.msg_iovlen = mp->msg_iovlen;
1641 user_msg.msg_iov = mp->msg_iov;
1642 user_msg.msg_namelen = mp->msg_namelen;
1643 user_msg.msg_name = mp->msg_name;
1644
1645 error = sendit(p, so, &user_msg, auio, uap->flags,
1646 &tmpval);
1647 if (error != 0)
1648 break;
1649 }
1650 }
fe8ab488
A
1651 len_after = uio_array_resid(uiop, uap->cnt);
1652
3e170ce0
A
1653 VERIFY(len_after <= len_before);
1654
fe8ab488
A
1655 if (error != 0) {
1656 if (len_after != len_before && (error == ERESTART ||
3e170ce0
A
1657 error == EINTR || error == EWOULDBLOCK ||
1658 error == ENOBUFS))
fe8ab488
A
1659 error = 0;
1660 /* Generation of SIGPIPE can be controlled per socket */
1661 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1662 psignal(p, SIGPIPE);
1663 }
1664 if (error == 0) {
1665 uiocnt = externalize_user_msghdr_array(umsgp,
1666 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 1667 UIO_WRITE, uap->cnt, user_msg_x, uiop);
fe8ab488
A
1668
1669 *retval = (int)(uiocnt);
1670 }
1671out:
1672 if (need_drop)
1673 file_drop(uap->s);
1674 if (umsgp != NULL)
1675 _FREE(umsgp, M_TEMP);
1676 if (uiop != NULL) {
1677 free_uio_array(uiop, uap->cnt);
1678 _FREE(uiop, M_TEMP);
1679 }
3e170ce0
A
1680 if (user_msg_x != NULL)
1681 _FREE(user_msg_x, M_TEMP);
fe8ab488
A
1682
1683 KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
1684
1685 return (error);
1686}
1687
3e170ce0
A
1688
1689static int
1690copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
1691{
1692 int error = 0;
1693 socklen_t sa_len = 0;
1694 ssize_t len;
1695
1696 len = *namelen;
1697 if (len <= 0 || fromsa == 0) {
1698 len = 0;
1699 } else {
1700#ifndef MIN
1701#define MIN(a, b) ((a) > (b) ? (b) : (a))
1702#endif
1703 sa_len = fromsa->sa_len;
1704 len = MIN((unsigned int)len, sa_len);
1705 error = copyout(fromsa, name, (unsigned)len);
1706 if (error)
1707 goto out;
1708 }
1709 *namelen = sa_len;
1710out:
1711 return (0);
1712}
1713
1714static int
1715copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
1716 socklen_t *controllen, int *flags)
1717{
1718 int error = 0;
1719 ssize_t len;
1720 user_addr_t ctlbuf;
1721
1722 len = *controllen;
1723 *controllen = 0;
1724 ctlbuf = control;
1725
1726 while (m && len > 0) {
1727 unsigned int tocopy;
1728 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1729 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1730 int buflen = m->m_len;
1731
1732 while (buflen > 0 && len > 0) {
1733 /*
1734 * SCM_TIMESTAMP hack because struct timeval has a
1735 * different size for 32 bits and 64 bits processes
1736 */
1737 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
527f9951 1738 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))] = {};
3e170ce0
A
1739 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
1740 int tmp_space;
1741 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
1742
1743 tmp_cp->cmsg_level = SOL_SOCKET;
1744 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1745
1746 if (proc_is64bit(p)) {
1747 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
1748
1749 tv64->tv_sec = tv->tv_sec;
1750 tv64->tv_usec = tv->tv_usec;
1751
1752 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1753 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1754 } else {
1755 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
1756
1757 tv32->tv_sec = tv->tv_sec;
1758 tv32->tv_usec = tv->tv_usec;
1759
1760 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1761 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1762 }
1763 if (len >= tmp_space) {
1764 tocopy = tmp_space;
1765 } else {
1766 *flags |= MSG_CTRUNC;
1767 tocopy = len;
1768 }
1769 error = copyout(tmp_buffer, ctlbuf, tocopy);
1770 if (error)
1771 goto out;
1772 } else {
1773 if (cp_size > buflen) {
1774 panic("cp_size > buflen, something"
1775 "wrong with alignment!");
1776 }
1777 if (len >= cp_size) {
1778 tocopy = cp_size;
1779 } else {
1780 *flags |= MSG_CTRUNC;
1781 tocopy = len;
1782 }
1783 error = copyout((caddr_t) cp, ctlbuf, tocopy);
1784 if (error)
1785 goto out;
1786 }
1787
1788 ctlbuf += tocopy;
1789 len -= tocopy;
1790
1791 buflen -= cp_size;
1792 cp = (struct cmsghdr *)(void *)
1793 ((unsigned char *) cp + cp_size);
1794 cp_size = CMSG_ALIGN(cp->cmsg_len);
1795 }
1796
1797 m = m->m_next;
1798 }
1799 *controllen = ctlbuf - control;
1800out:
1801 return (error);
1802}
1803
2d21ac55
A
1804/*
1805 * Returns: 0 Success
1806 * ENOTSOCK
1807 * EINVAL
1808 * EBADF
1809 * EACCES Mandatory Access Control failure
1810 * copyout:EFAULT
1811 * fp_lookup:EBADF
1812 * <pru_soreceive>:ENOBUFS
1813 * <pru_soreceive>:ENOTCONN
1814 * <pru_soreceive>:EWOULDBLOCK
1815 * <pru_soreceive>:EFAULT
1816 * <pru_soreceive>:EINTR
1817 * <pru_soreceive>:EBADF
1818 * <pru_soreceive>:EINVAL
1819 * <pru_soreceive>:EMSGSIZE
1820 * <pru_soreceive>:???
1821 *
1822 * Notes: Additional return values from calls through <pru_soreceive>
1823 * depend on protocols other than TCP or AF_UNIX, which are
1824 * documented above.
1825 */
1c79356b 1826static int
2d21ac55 1827recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1828 user_addr_t namelenp, int32_t *retval)
1c79356b 1829{
39236c6e
A
1830 ssize_t len;
1831 int error;
3e170ce0 1832 struct mbuf *control = 0;
1c79356b
A
1833 struct socket *so;
1834 struct sockaddr *fromsa = 0;
91447636 1835 struct fileproc *fp;
1c79356b 1836
2d21ac55 1837 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
91447636 1838 proc_fdlock(p);
2d21ac55
A
1839 if ((error = fp_lookup(p, s, &fp, 1))) {
1840 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1841 proc_fdunlock(p);
2d21ac55 1842 return (error);
1c79356b 1843 }
91447636 1844 if (fp->f_type != DTYPE_SOCKET) {
2d21ac55 1845 fp_drop(p, s, fp, 1);
91447636 1846 proc_fdunlock(p);
2d21ac55 1847 return (ENOTSOCK);
91447636 1848 }
1c79356b 1849
2d21ac55
A
1850 so = (struct socket *)fp->f_data;
1851 if (so == NULL) {
1852 fp_drop(p, s, fp, 1);
1853 proc_fdunlock(p);
1854 return (EBADF);
1855 }
91447636
A
1856
1857 proc_fdunlock(p);
2d21ac55
A
1858
1859#if CONFIG_MACF_SOCKET_SUBSET
1860 /*
1861 * We check the state without holding the socket lock;
1862 * if a race condition occurs, it would simply result
1863 * in an extra call to the MAC check function.
1864 */
316670eb
A
1865 if (!(so->so_state & SS_DEFUNCT) &&
1866 !(so->so_state & SS_ISCONNECTED) &&
39236c6e 1867 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2d21ac55
A
1868 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1869 goto out1;
1870#endif /* MAC_SOCKET_SUBSET */
91447636 1871 if (uio_resid(uiop) < 0) {
2d21ac55 1872 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
91447636
A
1873 error = EINVAL;
1874 goto out1;
1c79356b 1875 }
91447636
A
1876
1877 len = uio_resid(uiop);
2d21ac55
A
1878 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1879 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1880 &mp->msg_flags);
b0d623f7
A
1881 if (fromsa)
1882 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1883 fromsa);
1c79356b 1884 if (error) {
91447636 1885 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1886 error == EINTR || error == EWOULDBLOCK))
1887 error = 0;
1888 }
1c79356b
A
1889 if (error)
1890 goto out;
2d21ac55 1891
91447636 1892 *retval = len - uio_resid(uiop);
2d21ac55 1893
3e170ce0
A
1894 if (mp->msg_name) {
1895 error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
1896 if (error)
1897 goto out;
2d21ac55 1898 /* return the actual, untruncated address length */
1c79356b 1899 if (namelenp &&
3e170ce0 1900 (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
2d21ac55 1901 sizeof (int)))) {
1c79356b
A
1902 goto out;
1903 }
1904 }
39236c6e 1905
3e170ce0
A
1906 if (mp->msg_control) {
1907 error = copyout_control(p, control, mp->msg_control,
1908 &mp->msg_controllen, &mp->msg_flags);
1c79356b
A
1909 }
1910out:
1911 if (fromsa)
1912 FREE(fromsa, M_SONAME);
1913 if (control)
1914 m_freem(control);
2d21ac55 1915 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636
A
1916out1:
1917 fp_drop(p, s, fp, 0);
1c79356b
A
1918 return (error);
1919}
1920
2d21ac55
A
1921/*
1922 * Returns: 0 Success
1923 * ENOMEM
1924 * copyin:EFAULT
1925 * recvit:???
1926 * read:??? [4056224: applicable for pipes]
1927 *
1928 * Notes: The read entry point is only called as part of support for
1929 * binary backward compatability; new code should use read
1930 * instead of recv or recvfrom when attempting to read data
1931 * from pipes.
1932 *
1933 * For full documentation of the return codes from recvit, see
1934 * the block header for the recvit function.
1935 */
1936int
b0d623f7 1937recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2d21ac55
A
1938{
1939 __pthread_testcancel(1);
3e170ce0
A
1940 return (recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
1941 retval));
2d21ac55
A
1942}
1943
1c79356b 1944int
3e170ce0
A
1945recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
1946 int32_t *retval)
1c79356b 1947{
91447636 1948 struct user_msghdr msg;
1c79356b 1949 int error;
91447636 1950 uio_t auio = NULL;
1c79356b 1951
2d21ac55 1952 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1953 AUDIT_ARG(fd, uap->s);
1c79356b
A
1954
1955 if (uap->fromlenaddr) {
91447636 1956 error = copyin(uap->fromlenaddr,
1c79356b
A
1957 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1958 if (error)
1959 return (error);
2d21ac55 1960 } else {
1c79356b 1961 msg.msg_namelen = 0;
2d21ac55 1962 }
1c79356b 1963 msg.msg_name = uap->from;
91447636 1964 auio = uio_create(1, 0,
2d21ac55
A
1965 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1966 UIO_READ);
91447636
A
1967 if (auio == NULL) {
1968 return (ENOMEM);
1969 }
2d21ac55 1970
91447636
A
1971 uio_addiov(auio, uap->buf, uap->len);
1972 /* no need to set up msg_iov. recvit uses uio_t we send it */
1973 msg.msg_iov = 0;
1974 msg.msg_iovlen = 0;
1c79356b 1975 msg.msg_control = 0;
91447636 1976 msg.msg_controllen = 0;
1c79356b 1977 msg.msg_flags = uap->flags;
91447636
A
1978 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1979 if (auio != NULL) {
1980 uio_free(auio);
1981 }
2d21ac55 1982
2d21ac55 1983 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b 1984
2d21ac55 1985 return (error);
1c79356b
A
1986}
1987
1988/*
2d21ac55
A
1989 * Returns: 0 Success
1990 * EMSGSIZE
1991 * ENOMEM
1992 * copyin:EFAULT
1993 * copyout:EFAULT
1994 * recvit:???
1995 *
1996 * Notes: For full documentation of the return codes from recvit, see
1997 * the block header for the recvit function.
1c79356b
A
1998 */
1999int
b0d623f7 2000recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1c79356b 2001{
2d21ac55 2002 __pthread_testcancel(1);
3e170ce0
A
2003 return (recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
2004 retval));
1c79356b 2005}
1c79356b
A
2006
2007int
3e170ce0
A
2008recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
2009 int32_t *retval)
1c79356b 2010{
b0d623f7
A
2011 struct user32_msghdr msg32;
2012 struct user64_msghdr msg64;
91447636
A
2013 struct user_msghdr user_msg;
2014 caddr_t msghdrp;
2015 int size_of_msghdr;
2016 user_addr_t uiov;
2d21ac55 2017 int error;
91447636
A
2018 uio_t auio = NULL;
2019 struct user_iovec *iovp;
1c79356b 2020
2d21ac55 2021 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 2022 AUDIT_ARG(fd, uap->s);
91447636 2023 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
2024 msghdrp = (caddr_t)&msg64;
2025 size_of_msghdr = sizeof (msg64);
2d21ac55 2026 } else {
b0d623f7
A
2027 msghdrp = (caddr_t)&msg32;
2028 size_of_msghdr = sizeof (msg32);
91447636
A
2029 }
2030 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
2031 if (error) {
2032 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
2033 return (error);
2034 }
2035
91447636 2036 /* only need to copy if user process is not 64-bit */
b0d623f7
A
2037 if (IS_64BIT_PROCESS(p)) {
2038 user_msg.msg_flags = msg64.msg_flags;
2039 user_msg.msg_controllen = msg64.msg_controllen;
2040 user_msg.msg_control = msg64.msg_control;
2041 user_msg.msg_iovlen = msg64.msg_iovlen;
2042 user_msg.msg_iov = msg64.msg_iov;
2043 user_msg.msg_namelen = msg64.msg_namelen;
2044 user_msg.msg_name = msg64.msg_name;
2045 } else {
2046 user_msg.msg_flags = msg32.msg_flags;
2047 user_msg.msg_controllen = msg32.msg_controllen;
2048 user_msg.msg_control = msg32.msg_control;
2049 user_msg.msg_iovlen = msg32.msg_iovlen;
2050 user_msg.msg_iov = msg32.msg_iov;
2051 user_msg.msg_namelen = msg32.msg_namelen;
2052 user_msg.msg_name = msg32.msg_name;
91447636
A
2053 }
2054
2055 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
2056 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
2057 0, 0, 0, 0);
91447636
A
2058 return (EMSGSIZE);
2059 }
2060
91447636 2061 user_msg.msg_flags = uap->flags;
91447636
A
2062
2063 /* allocate a uio large enough to hold the number of iovecs passed */
2064 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
2065 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
2066 UIO_READ);
91447636
A
2067 if (auio == NULL) {
2068 error = ENOMEM;
2069 goto done;
2070 }
2071
2d21ac55
A
2072 /*
2073 * get location of iovecs within the uio. then copyin the iovecs from
91447636
A
2074 * user space.
2075 */
2076 iovp = uio_iovsaddr(auio);
2077 if (iovp == NULL) {
2078 error = ENOMEM;
2079 goto done;
2080 }
2081 uiov = user_msg.msg_iov;
2082 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
b0d623f7
A
2083 error = copyin_user_iovec_array(uiov,
2084 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2085 user_msg.msg_iovlen, iovp);
1c79356b
A
2086 if (error)
2087 goto done;
91447636 2088
2d21ac55 2089 /* finish setup of uio_t */
39236c6e
A
2090 error = uio_calculateresid(auio);
2091 if (error) {
2092 goto done;
2093 }
2d21ac55 2094
91447636 2095 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1c79356b 2096 if (!error) {
91447636 2097 user_msg.msg_iov = uiov;
b0d623f7
A
2098 if (IS_64BIT_PROCESS(p)) {
2099 msg64.msg_flags = user_msg.msg_flags;
2100 msg64.msg_controllen = user_msg.msg_controllen;
2101 msg64.msg_control = user_msg.msg_control;
2102 msg64.msg_iovlen = user_msg.msg_iovlen;
2103 msg64.msg_iov = user_msg.msg_iov;
2104 msg64.msg_namelen = user_msg.msg_namelen;
2105 msg64.msg_name = user_msg.msg_name;
2106 } else {
2107 msg32.msg_flags = user_msg.msg_flags;
2108 msg32.msg_controllen = user_msg.msg_controllen;
2109 msg32.msg_control = user_msg.msg_control;
2110 msg32.msg_iovlen = user_msg.msg_iovlen;
2111 msg32.msg_iov = user_msg.msg_iov;
2112 msg32.msg_namelen = user_msg.msg_namelen;
2113 msg32.msg_name = user_msg.msg_name;
91447636
A
2114 }
2115 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1c79356b
A
2116 }
2117done:
91447636
A
2118 if (auio != NULL) {
2119 uio_free(auio);
2120 }
2d21ac55 2121 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
2122 return (error);
2123}
2124
fe8ab488
A
2125int
2126recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
2127{
2128 int error = EOPNOTSUPP;
3e170ce0
A
2129 struct user_msghdr_x *user_msg_x = NULL;
2130 struct recv_msg_elem *recv_msg_array = NULL;
fe8ab488
A
2131 struct socket *so;
2132 user_ssize_t len_before = 0, len_after;
2133 int need_drop = 0;
2134 size_t size_of_msghdr;
2135 void *umsgp = NULL;
2136 u_int i;
2137 u_int uiocnt;
2138
2139 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
2140
2141 error = file_socket(uap->s, &so);
2142 if (error) {
2143 goto out;
2144 }
2145 need_drop = 1;
2146 if (so == NULL) {
2147 error = EBADF;
2148 goto out;
2149 }
fe8ab488
A
2150 /*
2151 * Input parameter range check
2152 */
2153 if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
2154 error = EINVAL;
2155 goto out;
2156 }
3e170ce0
A
2157 if (uap->cnt > somaxrecvmsgx)
2158 uap->cnt = somaxrecvmsgx;
2159
2160 user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
fe8ab488 2161 M_TEMP, M_WAITOK | M_ZERO);
3e170ce0
A
2162 if (user_msg_x == NULL) {
2163 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
fe8ab488
A
2164 error = ENOMEM;
2165 goto out;
2166 }
3e170ce0
A
2167 recv_msg_array = alloc_recv_msg_array(uap->cnt);
2168 if (recv_msg_array == NULL) {
2169 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
fe8ab488
A
2170 error = ENOMEM;
2171 goto out;
2172 }
fe8ab488
A
2173 size_of_msghdr = IS_64BIT_PROCESS(p) ?
2174 sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
2175
2176 umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
2177 if (umsgp == NULL) {
3e170ce0 2178 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
fe8ab488
A
2179 error = ENOMEM;
2180 goto out;
2181 }
2182 error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
2183 if (error) {
3e170ce0 2184 DBG_PRINTF("%s copyin() failed\n", __func__);
fe8ab488
A
2185 goto out;
2186 }
3e170ce0 2187 error = internalize_recv_msghdr_array(umsgp,
fe8ab488 2188 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
3e170ce0 2189 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
fe8ab488 2190 if (error) {
3e170ce0 2191 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
fe8ab488
A
2192 goto out;
2193 }
2194 /*
2195 * Make sure the size of each message iovec and
2196 * the aggregate size of all the iovec is valid
2197 */
3e170ce0 2198 if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
fe8ab488
A
2199 error = EINVAL;
2200 goto out;
2201 }
fe8ab488
A
2202 /*
2203 * Sanity check on passed arguments
2204 */
2205 for (i = 0; i < uap->cnt; i++) {
3e170ce0 2206 struct user_msghdr_x *mp = user_msg_x + i;
fe8ab488
A
2207
2208 if (mp->msg_flags != 0) {
2209 error = EINVAL;
2210 goto out;
2211 }
fe8ab488
A
2212 }
2213#if CONFIG_MACF_SOCKET_SUBSET
2214 /*
2215 * We check the state without holding the socket lock;
2216 * if a race condition occurs, it would simply result
2217 * in an extra call to the MAC check function.
2218 */
2219 if (!(so->so_state & SS_DEFUNCT) &&
2220 !(so->so_state & SS_ISCONNECTED) &&
2221 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2222 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
2223 goto out;
2224#endif /* MAC_SOCKET_SUBSET */
2225
3e170ce0 2226 len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488 2227
3e170ce0
A
2228 if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
2229 pru_soreceive_list_notsupp &&
2230 somaxrecvmsgx == 0) {
2231 error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
2232 recv_msg_array, uap->cnt, &uap->flags);
2233 } else {
2234 int flags = uap->flags;
fe8ab488 2235
3e170ce0
A
2236 for (i = 0; i < uap->cnt; i++) {
2237 struct recv_msg_elem *recv_msg_elem;
2238 uio_t auio;
2239 struct sockaddr **psa;
2240 struct mbuf **controlp;
2241
2242 recv_msg_elem = recv_msg_array + i;
2243 auio = recv_msg_elem->uio;
2244
2245 /*
2246 * Do not block if we got at least one packet
2247 */
2248 if (i > 0)
2249 flags |= MSG_DONTWAIT;
2250
2251 psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
2252 &recv_msg_elem->psa : NULL;
2253 controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
2254 &recv_msg_elem->controlp : NULL;
2255
2256 error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
2257 auio, (struct mbuf **)0, controlp, &flags);
2258 if (error)
2259 break;
2260 /*
2261 * We have some data
2262 */
2263 recv_msg_elem->which |= SOCK_MSG_DATA;
2264 /*
2265 * Stop on partial copy
2266 */
2267 if (flags & (MSG_RCVMORE | MSG_TRUNC))
2268 break;
2269 }
2270 if ((uap->flags & MSG_DONTWAIT) == 0)
2271 flags &= ~MSG_DONTWAIT;
2272 uap->flags = flags;
2273 }
2274
2275 len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
fe8ab488
A
2276
2277 if (error) {
2278 if (len_after != len_before && (error == ERESTART ||
2279 error == EINTR || error == EWOULDBLOCK))
2280 error = 0;
3e170ce0
A
2281 else
2282 goto out;
fe8ab488 2283 }
fe8ab488 2284
3e170ce0
A
2285 uiocnt = externalize_recv_msghdr_array(umsgp,
2286 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
2287 UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
2288
2289 error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
2290 if (error) {
2291 DBG_PRINTF("%s copyout() failed\n", __func__);
2292 goto out;
2293 }
2294 *retval = (int)(uiocnt);
2295
2296 for (i = 0; i < uap->cnt; i++) {
2297 struct user_msghdr_x *mp = user_msg_x + i;
2298 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2299 struct sockaddr *fromsa = recv_msg_elem->psa;
2300
2301 if (mp->msg_name) {
2302 error = copyout_sa(fromsa, mp->msg_name,
2303 &mp->msg_namelen);
2304 if (error)
2305 goto out;
2306 }
2307 if (mp->msg_control) {
2308 error = copyout_control(p, recv_msg_elem->controlp,
2309 mp->msg_control, &mp->msg_controllen,
2310 &mp->msg_flags);
2311 if (error)
2312 goto out;
fe8ab488 2313 }
fe8ab488
A
2314 }
2315out:
2316 if (need_drop)
2317 file_drop(uap->s);
2318 if (umsgp != NULL)
2319 _FREE(umsgp, M_TEMP);
3e170ce0
A
2320 if (recv_msg_array != NULL)
2321 free_recv_msg_array(recv_msg_array, uap->cnt);
2322 if (user_msg_x != NULL)
2323 _FREE(user_msg_x, M_TEMP);
2324
fe8ab488 2325 KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
3e170ce0 2326
fe8ab488
A
2327 return (error);
2328}
2329
2d21ac55
A
2330/*
2331 * Returns: 0 Success
2332 * EBADF
2333 * file_socket:ENOTSOCK
2334 * file_socket:EBADF
2335 * soshutdown:EINVAL
2336 * soshutdown:ENOTCONN
2337 * soshutdown:EADDRNOTAVAIL[TCP]
2338 * soshutdown:ENOBUFS[TCP]
2339 * soshutdown:EMSGSIZE[TCP]
2340 * soshutdown:EHOSTUNREACH[TCP]
2341 * soshutdown:ENETUNREACH[TCP]
2342 * soshutdown:ENETDOWN[TCP]
2343 * soshutdown:ENOMEM[TCP]
2344 * soshutdown:EACCES[TCP]
2345 * soshutdown:EMSGSIZE[TCP]
2346 * soshutdown:ENOBUFS[TCP]
2347 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2348 * soshutdown:??? [other protocol families]
2349 */
1c79356b
A
2350/* ARGSUSED */
2351int
2d21ac55 2352shutdown(__unused struct proc *p, struct shutdown_args *uap,
b0d623f7 2353 __unused int32_t *retval)
1c79356b 2354{
2d21ac55 2355 struct socket *so;
1c79356b
A
2356 int error;
2357
55e303ae 2358 AUDIT_ARG(fd, uap->s);
91447636 2359 error = file_socket(uap->s, &so);
1c79356b
A
2360 if (error)
2361 return (error);
91447636
A
2362 if (so == NULL) {
2363 error = EBADF;
2364 goto out;
2365 }
2366 error = soshutdown((struct socket *)so, uap->how);
2367out:
2368 file_drop(uap->s);
2d21ac55 2369 return (error);
1c79356b
A
2370}
2371
2d21ac55
A
2372/*
2373 * Returns: 0 Success
2374 * EFAULT
2375 * EINVAL
2376 * EACCES Mandatory Access Control failure
2377 * file_socket:ENOTSOCK
2378 * file_socket:EBADF
2379 * sosetopt:EINVAL
2380 * sosetopt:ENOPROTOOPT
2381 * sosetopt:ENOBUFS
2382 * sosetopt:EDOM
2383 * sosetopt:EFAULT
2384 * sosetopt:EOPNOTSUPP[AF_UNIX]
2385 * sosetopt:???
2386 */
1c79356b
A
2387/* ARGSUSED */
2388int
2d21ac55 2389setsockopt(struct proc *p, struct setsockopt_args *uap,
b0d623f7 2390 __unused int32_t *retval)
1c79356b 2391{
2d21ac55 2392 struct socket *so;
1c79356b
A
2393 struct sockopt sopt;
2394 int error;
2395
55e303ae 2396 AUDIT_ARG(fd, uap->s);
1c79356b
A
2397 if (uap->val == 0 && uap->valsize != 0)
2398 return (EFAULT);
2d21ac55 2399 /* No bounds checking on size (it's unsigned) */
1c79356b 2400
91447636 2401 error = file_socket(uap->s, &so);
1c79356b
A
2402 if (error)
2403 return (error);
2404
2405 sopt.sopt_dir = SOPT_SET;
2406 sopt.sopt_level = uap->level;
2407 sopt.sopt_name = uap->name;
2408 sopt.sopt_val = uap->val;
2409 sopt.sopt_valsize = uap->valsize;
2410 sopt.sopt_p = p;
2411
91447636
A
2412 if (so == NULL) {
2413 error = EINVAL;
2414 goto out;
2415 }
2d21ac55
A
2416#if CONFIG_MACF_SOCKET_SUBSET
2417 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
2418 &sopt)) != 0)
2419 goto out;
2420#endif /* MAC_SOCKET_SUBSET */
39236c6e 2421 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
91447636
A
2422out:
2423 file_drop(uap->s);
2d21ac55 2424 return (error);
1c79356b
A
2425}
2426
2427
2428
2d21ac55
A
2429/*
2430 * Returns: 0 Success
2431 * EINVAL
2432 * EBADF
2433 * EACCES Mandatory Access Control failure
2434 * copyin:EFAULT
2435 * copyout:EFAULT
2436 * file_socket:ENOTSOCK
2437 * file_socket:EBADF
2438 * sogetopt:???
2439 */
1c79356b 2440int
2d21ac55 2441getsockopt(struct proc *p, struct getsockopt_args *uap,
b0d623f7 2442 __unused int32_t *retval)
1c79356b 2443{
91447636
A
2444 int error;
2445 socklen_t valsize;
2446 struct sockopt sopt;
2d21ac55 2447 struct socket *so;
1c79356b 2448
91447636 2449 error = file_socket(uap->s, &so);
1c79356b
A
2450 if (error)
2451 return (error);
2452 if (uap->val) {
2d21ac55
A
2453 error = copyin(uap->avalsize, (caddr_t)&valsize,
2454 sizeof (valsize));
1c79356b 2455 if (error)
91447636 2456 goto out;
2d21ac55
A
2457 /* No bounds checking on size (it's unsigned) */
2458 } else {
1c79356b 2459 valsize = 0;
2d21ac55 2460 }
1c79356b
A
2461 sopt.sopt_dir = SOPT_GET;
2462 sopt.sopt_level = uap->level;
2463 sopt.sopt_name = uap->name;
2464 sopt.sopt_val = uap->val;
2465 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
2466 sopt.sopt_p = p;
2467
91447636
A
2468 if (so == NULL) {
2469 error = EBADF;
2470 goto out;
2471 }
2d21ac55
A
2472#if CONFIG_MACF_SOCKET_SUBSET
2473 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
2474 &sopt)) != 0)
2475 goto out;
2476#endif /* MAC_SOCKET_SUBSET */
39236c6e 2477 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
1c79356b
A
2478 if (error == 0) {
2479 valsize = sopt.sopt_valsize;
2d21ac55
A
2480 error = copyout((caddr_t)&valsize, uap->avalsize,
2481 sizeof (valsize));
1c79356b 2482 }
91447636
A
2483out:
2484 file_drop(uap->s);
1c79356b
A
2485 return (error);
2486}
2487
2488
2489/*
2490 * Get socket name.
2d21ac55
A
2491 *
2492 * Returns: 0 Success
2493 * EBADF
2494 * file_socket:ENOTSOCK
2495 * file_socket:EBADF
2496 * copyin:EFAULT
2497 * copyout:EFAULT
2498 * <pru_sockaddr>:ENOBUFS[TCP]
2499 * <pru_sockaddr>:ECONNRESET[TCP]
2500 * <pru_sockaddr>:EINVAL[AF_UNIX]
2501 * <sf_getsockname>:???
1c79356b
A
2502 */
2503/* ARGSUSED */
2d21ac55
A
2504int
2505getsockname(__unused struct proc *p, struct getsockname_args *uap,
b0d623f7 2506 __unused int32_t *retval)
1c79356b 2507{
91447636 2508 struct socket *so;
1c79356b 2509 struct sockaddr *sa;
91447636 2510 socklen_t len;
2d21ac55 2511 socklen_t sa_len;
1c79356b
A
2512 int error;
2513
91447636 2514 error = file_socket(uap->fdes, &so);
1c79356b
A
2515 if (error)
2516 return (error);
2d21ac55 2517 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1c79356b 2518 if (error)
91447636
A
2519 goto out;
2520 if (so == NULL) {
2521 error = EBADF;
2522 goto out;
2523 }
1c79356b 2524 sa = 0;
91447636 2525 socket_lock(so, 1);
1c79356b 2526 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2d21ac55 2527 if (error == 0) {
6d2010ae 2528 error = sflt_getsockname(so, &sa);
91447636
A
2529 if (error == EJUSTRETURN)
2530 error = 0;
91447636
A
2531 }
2532 socket_unlock(so, 1);
1c79356b
A
2533 if (error)
2534 goto bad;
2535 if (sa == 0) {
2536 len = 0;
2537 goto gotnothing;
2538 }
2539
2d21ac55
A
2540 sa_len = sa->sa_len;
2541 len = MIN(len, sa_len);
91447636 2542 error = copyout((caddr_t)sa, uap->asa, len);
2d21ac55
A
2543 if (error)
2544 goto bad;
2545 /* return the actual, untruncated address length */
2546 len = sa_len;
1c79356b 2547gotnothing:
2d21ac55 2548 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
2549bad:
2550 if (sa)
2551 FREE(sa, M_SONAME);
91447636
A
2552out:
2553 file_drop(uap->fdes);
1c79356b
A
2554 return (error);
2555}
2556
1c79356b
A
2557/*
2558 * Get name of peer for connected socket.
2d21ac55
A
2559 *
2560 * Returns: 0 Success
2561 * EBADF
2562 * EINVAL
2563 * ENOTCONN
2564 * file_socket:ENOTSOCK
2565 * file_socket:EBADF
2566 * copyin:EFAULT
2567 * copyout:EFAULT
2568 * <pru_peeraddr>:???
2569 * <sf_getpeername>:???
1c79356b
A
2570 */
2571/* ARGSUSED */
2572int
2d21ac55 2573getpeername(__unused struct proc *p, struct getpeername_args *uap,
b0d623f7 2574 __unused int32_t *retval)
1c79356b 2575{
91447636 2576 struct socket *so;
1c79356b 2577 struct sockaddr *sa;
91447636 2578 socklen_t len;
2d21ac55 2579 socklen_t sa_len;
1c79356b
A
2580 int error;
2581
91447636 2582 error = file_socket(uap->fdes, &so);
1c79356b
A
2583 if (error)
2584 return (error);
91447636
A
2585 if (so == NULL) {
2586 error = EBADF;
2587 goto out;
2588 }
2589
2590 socket_lock(so, 1);
2591
2d21ac55
A
2592 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2593 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2594 /* the socket has been shutdown, no more getpeername's */
2595 socket_unlock(so, 1);
2596 error = EINVAL;
2597 goto out;
2598 }
2599
91447636
A
2600 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
2601 socket_unlock(so, 1);
2602 error = ENOTCONN;
2603 goto out;
2604 }
2d21ac55 2605 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
91447636
A
2606 if (error) {
2607 socket_unlock(so, 1);
2608 goto out;
2609 }
1c79356b
A
2610 sa = 0;
2611 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2d21ac55 2612 if (error == 0) {
6d2010ae 2613 error = sflt_getpeername(so, &sa);
91447636
A
2614 if (error == EJUSTRETURN)
2615 error = 0;
91447636
A
2616 }
2617 socket_unlock(so, 1);
1c79356b
A
2618 if (error)
2619 goto bad;
2620 if (sa == 0) {
2621 len = 0;
2622 goto gotnothing;
2623 }
2d21ac55
A
2624 sa_len = sa->sa_len;
2625 len = MIN(len, sa_len);
91447636 2626 error = copyout(sa, uap->asa, len);
1c79356b
A
2627 if (error)
2628 goto bad;
2d21ac55
A
2629 /* return the actual, untruncated address length */
2630 len = sa_len;
1c79356b 2631gotnothing:
2d21ac55 2632 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
2633bad:
2634 if (sa) FREE(sa, M_SONAME);
91447636
A
2635out:
2636 file_drop(uap->fdes);
1c79356b
A
2637 return (error);
2638}
2639
2640int
2d21ac55 2641sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1c79356b 2642{
2d21ac55
A
2643 struct sockaddr *sa;
2644 struct mbuf *m;
1c79356b
A
2645 int error;
2646
e2d2fc5c 2647 size_t alloc_buflen = (size_t)buflen;
39236c6e 2648
3e170ce0 2649 if (alloc_buflen > INT_MAX/2)
e2d2fc5c 2650 return (EINVAL);
b0d623f7 2651#ifdef __LP64__
3e170ce0
A
2652 /*
2653 * The fd's in the buffer must expand to be pointers, thus we need twice
2654 * as much space
2655 */
2656 if (type == MT_CONTROL)
2657 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) +
2658 sizeof(struct cmsghdr);
b0d623f7 2659#endif
e2d2fc5c
A
2660 if (alloc_buflen > MLEN) {
2661 if (type == MT_SONAME && alloc_buflen <= 112)
3e170ce0 2662 alloc_buflen = MLEN; /* unix domain compat. hack */
e2d2fc5c 2663 else if (alloc_buflen > MCLBYTES)
91447636 2664 return (EINVAL);
1c79356b
A
2665 }
2666 m = m_get(M_WAIT, type);
2667 if (m == NULL)
2668 return (ENOBUFS);
e2d2fc5c 2669 if (alloc_buflen > MLEN) {
91447636
A
2670 MCLGET(m, M_WAIT);
2671 if ((m->m_flags & M_EXT) == 0) {
2672 m_free(m);
2d21ac55 2673 return (ENOBUFS);
91447636
A
2674 }
2675 }
3e170ce0
A
2676 /*
2677 * K64: We still copyin the original buflen because it gets expanded
2678 * later and we lie about the size of the mbuf because it only affects
2679 * unp_* functions
b0d623f7 2680 */
1c79356b 2681 m->m_len = buflen;
91447636 2682 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2d21ac55 2683 if (error) {
1c79356b 2684 (void) m_free(m);
2d21ac55 2685 } else {
1c79356b
A
2686 *mp = m;
2687 if (type == MT_SONAME) {
2688 sa = mtod(m, struct sockaddr *);
1c79356b
A
2689 sa->sa_len = buflen;
2690 }
2691 }
2692 return (error);
2693}
2694
91447636
A
2695/*
2696 * Given a user_addr_t of length len, allocate and fill out a *sa.
2d21ac55
A
2697 *
2698 * Returns: 0 Success
2699 * ENAMETOOLONG Filename too long
2700 * EINVAL Invalid argument
2701 * ENOMEM Not enough space
2702 * copyin:EFAULT Bad address
91447636 2703 */
2d21ac55
A
2704static int
2705getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
4a3eedf9 2706 size_t len, boolean_t translate_unspec)
1c79356b
A
2707{
2708 struct sockaddr *sa;
2709 int error;
2710
2711 if (len > SOCK_MAXADDRLEN)
2d21ac55 2712 return (ENAMETOOLONG);
1c79356b 2713
2d21ac55
A
2714 if (len < offsetof(struct sockaddr, sa_data[0]))
2715 return (EINVAL);
1c79356b 2716
490019cf 2717 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
91447636 2718 if (sa == NULL) {
2d21ac55 2719 return (ENOMEM);
91447636
A
2720 }
2721 error = copyin(uaddr, (caddr_t)sa, len);
1c79356b
A
2722 if (error) {
2723 FREE(sa, M_SONAME);
2724 } else {
2d21ac55
A
2725 /*
2726 * Force sa_family to AF_INET on AF_INET sockets to handle
2727 * legacy applications that use AF_UNSPEC (0). On all other
2728 * sockets we leave it unchanged and let the lower layer
2729 * handle it.
2730 */
4a3eedf9 2731 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
39236c6e 2732 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2733 len == sizeof (struct sockaddr_in))
2734 sa->sa_family = AF_INET;
2735
1c79356b
A
2736 sa->sa_len = len;
2737 *namp = sa;
2738 }
2d21ac55 2739 return (error);
1c79356b
A
2740}
2741
2d21ac55
A
2742static int
2743getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
4a3eedf9 2744 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1c79356b 2745{
2d21ac55
A
2746 int error;
2747
2748 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2749 len < offsetof(struct sockaddr, sa_data[0]))
2750 return (EINVAL);
2751
2752 /*
2753 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2754 * so the check here is inclusive.
2755 */
2756 if (len > sizeof (*ss))
2757 return (ENAMETOOLONG);
1c79356b 2758
2d21ac55
A
2759 bzero(ss, sizeof (*ss));
2760 error = copyin(uaddr, (caddr_t)ss, len);
2761 if (error == 0) {
2762 /*
2763 * Force sa_family to AF_INET on AF_INET sockets to handle
2764 * legacy applications that use AF_UNSPEC (0). On all other
2765 * sockets we leave it unchanged and let the lower layer
2766 * handle it.
2767 */
4a3eedf9 2768 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
39236c6e 2769 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2770 len == sizeof (struct sockaddr_in))
2771 ss->ss_family = AF_INET;
91447636 2772
2d21ac55 2773 ss->ss_len = len;
1c79356b 2774 }
2d21ac55 2775 return (error);
1c79356b
A
2776}
2777
fe8ab488
A
2778int
2779internalize_user_msghdr_array(const void *src, int spacetype, int direction,
3e170ce0 2780 u_int count, struct user_msghdr_x *dst, struct uio **uiop)
fe8ab488
A
2781{
2782 int error = 0;
2783 u_int i;
3e170ce0
A
2784 u_int namecnt = 0;
2785 u_int ctlcnt = 0;
fe8ab488
A
2786
2787 for (i = 0; i < count; i++) {
2788 uio_t auio;
2789 struct user_iovec *iovp;
3e170ce0 2790 struct user_msghdr_x *user_msg = dst + i;
fe8ab488
A
2791
2792 if (spacetype == UIO_USERSPACE64) {
3e170ce0 2793 const struct user64_msghdr_x *msghdr64;
fe8ab488 2794
3e170ce0 2795 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
fe8ab488
A
2796
2797 user_msg->msg_name = msghdr64->msg_name;
2798 user_msg->msg_namelen = msghdr64->msg_namelen;
2799 user_msg->msg_iov = msghdr64->msg_iov;
2800 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2801 user_msg->msg_control = msghdr64->msg_control;
2802 user_msg->msg_controllen = msghdr64->msg_controllen;
2803 user_msg->msg_flags = msghdr64->msg_flags;
2804 user_msg->msg_datalen = msghdr64->msg_datalen;
2805 } else {
3e170ce0 2806 const struct user32_msghdr_x *msghdr32;
fe8ab488 2807
3e170ce0 2808 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
fe8ab488
A
2809
2810 user_msg->msg_name = msghdr32->msg_name;
2811 user_msg->msg_namelen = msghdr32->msg_namelen;
2812 user_msg->msg_iov = msghdr32->msg_iov;
2813 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2814 user_msg->msg_control = msghdr32->msg_control;
2815 user_msg->msg_controllen = msghdr32->msg_controllen;
2816 user_msg->msg_flags = msghdr32->msg_flags;
2817 user_msg->msg_datalen = msghdr32->msg_datalen;
2818 }
3e170ce0
A
2819
2820 if (user_msg->msg_iovlen <= 0 ||
2821 user_msg->msg_iovlen > UIO_MAXIOV) {
fe8ab488
A
2822 error = EMSGSIZE;
2823 goto done;
2824 }
3e170ce0
A
2825 auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
2826 direction);
fe8ab488
A
2827 if (auio == NULL) {
2828 error = ENOMEM;
2829 goto done;
2830 }
2831 uiop[i] = auio;
2832
3e170ce0
A
2833 iovp = uio_iovsaddr(auio);
2834 if (iovp == NULL) {
2835 error = ENOMEM;
2836 goto done;
2837 }
2838 error = copyin_user_iovec_array(user_msg->msg_iov,
2839 spacetype, user_msg->msg_iovlen, iovp);
2840 if (error)
2841 goto done;
2842 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
fe8ab488 2843
3e170ce0
A
2844 error = uio_calculateresid(auio);
2845 if (error)
2846 goto done;
2847 user_msg->msg_datalen = uio_resid(auio);
2848
2849 if (user_msg->msg_name && user_msg->msg_namelen)
2850 namecnt++;
2851 if (user_msg->msg_control && user_msg->msg_controllen)
2852 ctlcnt++;
2853 }
2854done:
2855
2856 return (error);
2857}
2858
2859int
2860internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
2861 u_int count, struct user_msghdr_x *dst,
2862 struct recv_msg_elem *recv_msg_array)
2863{
2864 int error = 0;
2865 u_int i;
2866
2867 for (i = 0; i < count; i++) {
2868 struct user_iovec *iovp;
2869 struct user_msghdr_x *user_msg = dst + i;
2870 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2871
2872 if (spacetype == UIO_USERSPACE64) {
2873 const struct user64_msghdr_x *msghdr64;
2874
2875 msghdr64 = ((const struct user64_msghdr_x *)src) + i;
2876
2877 user_msg->msg_name = msghdr64->msg_name;
2878 user_msg->msg_namelen = msghdr64->msg_namelen;
2879 user_msg->msg_iov = msghdr64->msg_iov;
2880 user_msg->msg_iovlen = msghdr64->msg_iovlen;
2881 user_msg->msg_control = msghdr64->msg_control;
2882 user_msg->msg_controllen = msghdr64->msg_controllen;
2883 user_msg->msg_flags = msghdr64->msg_flags;
2884 user_msg->msg_datalen = msghdr64->msg_datalen;
fe8ab488 2885 } else {
3e170ce0
A
2886 const struct user32_msghdr_x *msghdr32;
2887
2888 msghdr32 = ((const struct user32_msghdr_x *)src) + i;
2889
2890 user_msg->msg_name = msghdr32->msg_name;
2891 user_msg->msg_namelen = msghdr32->msg_namelen;
2892 user_msg->msg_iov = msghdr32->msg_iov;
2893 user_msg->msg_iovlen = msghdr32->msg_iovlen;
2894 user_msg->msg_control = msghdr32->msg_control;
2895 user_msg->msg_controllen = msghdr32->msg_controllen;
2896 user_msg->msg_flags = msghdr32->msg_flags;
2897 user_msg->msg_datalen = msghdr32->msg_datalen;
fe8ab488 2898 }
3e170ce0
A
2899
2900 if (user_msg->msg_iovlen <= 0 ||
2901 user_msg->msg_iovlen > UIO_MAXIOV) {
2902 error = EMSGSIZE;
2903 goto done;
2904 }
2905 recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
2906 spacetype, direction);
2907 if (recv_msg_elem->uio == NULL) {
2908 error = ENOMEM;
2909 goto done;
2910 }
2911
2912 iovp = uio_iovsaddr(recv_msg_elem->uio);
2913 if (iovp == NULL) {
2914 error = ENOMEM;
2915 goto done;
2916 }
2917 error = copyin_user_iovec_array(user_msg->msg_iov,
2918 spacetype, user_msg->msg_iovlen, iovp);
2919 if (error)
2920 goto done;
2921 user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
2922
2923 error = uio_calculateresid(recv_msg_elem->uio);
2924 if (error)
2925 goto done;
2926 user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
2927
2928 if (user_msg->msg_name && user_msg->msg_namelen)
2929 recv_msg_elem->which |= SOCK_MSG_SA;
2930 if (user_msg->msg_control && user_msg->msg_controllen)
2931 recv_msg_elem->which |= SOCK_MSG_CONTROL;
fe8ab488
A
2932 }
2933done:
3e170ce0 2934
fe8ab488
A
2935 return (error);
2936}
2937
2938u_int
2939externalize_user_msghdr_array(void *dst, int spacetype, int direction,
3e170ce0 2940 u_int count, const struct user_msghdr_x *src, struct uio **uiop)
fe8ab488
A
2941{
2942#pragma unused(direction)
2943 u_int i;
2944 int seenlast = 0;
2945 u_int retcnt = 0;
2946
2947 for (i = 0; i < count; i++) {
3e170ce0 2948 const struct user_msghdr_x *user_msg = src + i;
fe8ab488
A
2949 uio_t auio = uiop[i];
2950 user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
2951
2952 if (user_msg->msg_datalen != 0 && len == 0)
2953 seenlast = 1;
3e170ce0
A
2954
2955 if (seenlast == 0)
2956 retcnt ++;
2957
2958 if (spacetype == UIO_USERSPACE64) {
2959 struct user64_msghdr_x *msghdr64;
2960
2961 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
2962
2963 msghdr64->msg_flags = user_msg->msg_flags;
2964 msghdr64->msg_datalen = len;
2965
2966 } else {
2967 struct user32_msghdr_x *msghdr32;
2968
2969 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
2970
2971 msghdr32->msg_flags = user_msg->msg_flags;
2972 msghdr32->msg_datalen = len;
2973 }
2974 }
2975 return (retcnt);
2976}
2977
2978u_int
2979externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
2980 u_int count, const struct user_msghdr_x *src,
2981 struct recv_msg_elem *recv_msg_array)
2982{
2983 u_int i;
2984 int seenlast = 0;
2985 u_int retcnt = 0;
2986
2987 for (i = 0; i < count; i++) {
2988 const struct user_msghdr_x *user_msg = src + i;
2989 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
2990 user_ssize_t len;
2991
2992 len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
2993
2994 if (direction == UIO_READ) {
2995 if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0)
2996 seenlast = 1;
2997 } else {
2998 if (user_msg->msg_datalen != 0 && len == 0)
2999 seenlast = 1;
3000 }
3001
fe8ab488
A
3002 if (seenlast == 0)
3003 retcnt ++;
3004
3005 if (spacetype == UIO_USERSPACE64) {
3006 struct user64_msghdr_x *msghdr64;
3007
3008 msghdr64 = ((struct user64_msghdr_x *)dst) + i;
3009
3010 msghdr64->msg_flags = user_msg->msg_flags;
3011 msghdr64->msg_datalen = len;
3e170ce0 3012
fe8ab488
A
3013 } else {
3014 struct user32_msghdr_x *msghdr32;
3015
3016 msghdr32 = ((struct user32_msghdr_x *)dst) + i;
3017
3018 msghdr32->msg_flags = user_msg->msg_flags;
3019 msghdr32->msg_datalen = len;
3020 }
3021 }
3022 return (retcnt);
3023}
3024
3025void
3026free_uio_array(struct uio **uiop, u_int count)
3027{
3028 u_int i;
3029
3030 for (i = 0; i < count; i++) {
3031 if (uiop[i] != NULL)
3032 uio_free(uiop[i]);
3033 }
3034}
3035
3036__private_extern__ user_ssize_t
3037uio_array_resid(struct uio **uiop, u_int count)
3038{
3039 user_ssize_t len = 0;
3040 u_int i;
3041
3042 for (i = 0; i < count; i++) {
3043 struct uio *auio = uiop[i];
3044
3e170ce0 3045 if (auio != NULL)
fe8ab488
A
3046 len += uio_resid(auio);
3047 }
3048 return (len);
3049}
3050
3051int
3052uio_array_is_valid(struct uio **uiop, u_int count)
3053{
3054 user_ssize_t len = 0;
3055 u_int i;
3056
3057 for (i = 0; i < count; i++) {
3058 struct uio *auio = uiop[i];
3e170ce0 3059
fe8ab488
A
3060 if (auio != NULL) {
3061 user_ssize_t resid = uio_resid(auio);
3e170ce0 3062
fe8ab488
A
3063 /*
3064 * Sanity check on the validity of the iovec:
3065 * no point of going over sb_max
3066 */
3067 if (resid < 0 || (u_int32_t)resid > sb_max)
3068 return (0);
3e170ce0
A
3069
3070 len += resid;
3071 if (len < 0 || (u_int32_t)len > sb_max)
3072 return (0);
3073 }
3074 }
3075 return (1);
3076}
3077
3078
3079struct recv_msg_elem *
3080alloc_recv_msg_array(u_int count)
3081{
3082 struct recv_msg_elem *recv_msg_array;
3083
3084 recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
3085 M_TEMP, M_WAITOK | M_ZERO);
3086
3087 return (recv_msg_array);
3088}
3089
3090void
3091free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
3092{
3093 u_int i;
3094
3095 for (i = 0; i < count; i++) {
3096 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3097
3098 if (recv_msg_elem->uio != NULL)
3099 uio_free(recv_msg_elem->uio);
3100 if (recv_msg_elem->psa != NULL)
3101 _FREE(recv_msg_elem->psa, M_TEMP);
3102 if (recv_msg_elem->controlp != NULL)
3103 m_freem(recv_msg_elem->controlp);
3104 }
3105 _FREE(recv_msg_array, M_TEMP);
3106}
3107
3108
3109__private_extern__ user_ssize_t
3110recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
3111{
3112 user_ssize_t len = 0;
3113 u_int i;
3114
3115 for (i = 0; i < count; i++) {
3116 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3117
3118 if (recv_msg_elem->uio != NULL)
3119 len += uio_resid(recv_msg_elem->uio);
3120 }
3121 return (len);
3122}
3123
3124int
3125recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
3126{
3127 user_ssize_t len = 0;
3128 u_int i;
3129
3130 for (i = 0; i < count; i++) {
3131 struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
3132
3133 if (recv_msg_elem->uio != NULL) {
3134 user_ssize_t resid = uio_resid(recv_msg_elem->uio);
3135
3136 /*
3137 * Sanity check on the validity of the iovec:
3138 * no point of going over sb_max
3139 */
3140 if (resid < 0 || (u_int32_t)resid > sb_max)
3141 return (0);
3142
fe8ab488
A
3143 len += resid;
3144 if (len < 0 || (u_int32_t)len > sb_max)
3145 return (0);
3146 }
3147 }
3148 return (1);
3149}
3150
39236c6e 3151#if SENDFILE
2d21ac55
A
3152
3153#define SFUIOBUFS 64
2d21ac55
A
3154
3155/* Macros to compute the number of mbufs needed depending on cluster size */
3e170ce0
A
3156#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3157#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
2d21ac55 3158
39236c6e 3159/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3e170ce0 3160#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
2d21ac55
A
3161
3162/* Upper send limit in the number of mbuf clusters */
3163#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3164#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3165
1c79356b 3166static void
2d21ac55
A
3167alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
3168 struct mbuf **m, boolean_t jumbocl)
1c79356b 3169{
2d21ac55 3170 unsigned int needed;
1c79356b 3171
2d21ac55
A
3172 if (pktlen == 0)
3173 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
1c79356b 3174
2d21ac55
A
3175 /*
3176 * Try to allocate for the whole thing. Since we want full control
3177 * over the buffer size and be able to accept partial result, we can't
3178 * use mbuf_allocpacket(). The logic below is similar to sosend().
3179 */
3180 *m = NULL;
6d2010ae 3181 if (pktlen > MBIGCLBYTES && jumbocl) {
2d21ac55
A
3182 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
3183 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
3184 }
3185 if (*m == NULL) {
3186 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
6d2010ae 3187 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
2d21ac55
A
3188 }
3189
3190 /*
3191 * Our previous attempt(s) at allocation had failed; the system
3192 * may be short on mbufs, and we want to block until they are
3193 * available. This time, ask just for 1 mbuf and don't return
3194 * until we get it.
3195 */
3196 if (*m == NULL) {
3197 needed = 1;
6d2010ae 3198 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
1c79356b 3199 }
2d21ac55
A
3200 if (*m == NULL)
3201 panic("%s: blocking allocation returned NULL\n", __func__);
3202
3203 *maxchunks = needed;
1c79356b
A
3204}
3205
3206/*
3207 * sendfile(2).
2d21ac55
A
3208 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3209 * struct sf_hdtr *hdtr, int flags)
1c79356b
A
3210 *
3211 * Send a file specified by 'fd' and starting at 'offset' to a socket
2d21ac55
A
3212 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3213 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3214 * output. If specified, write the total number of bytes sent into *nbytes.
1c79356b
A
3215 */
3216int
2d21ac55 3217sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
1c79356b 3218{
91447636 3219 struct fileproc *fp;
1c79356b 3220 struct vnode *vp;
1c79356b 3221 struct socket *so;
2d21ac55
A
3222 struct writev_nocancel_args nuap;
3223 user_ssize_t writev_retval;
2d21ac55 3224 struct user_sf_hdtr user_hdtr;
b0d623f7
A
3225 struct user32_sf_hdtr user32_hdtr;
3226 struct user64_sf_hdtr user64_hdtr;
2d21ac55
A
3227 off_t off, xfsize;
3228 off_t nbytes = 0, sbytes = 0;
3229 int error = 0;
3230 size_t sizeof_hdtr;
2d21ac55
A
3231 off_t file_size;
3232 struct vfs_context context = *vfs_context_current();
3e170ce0 3233
2d21ac55
A
3234 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
3235 0, 0, 0, 0);
b0d623f7
A
3236
3237 AUDIT_ARG(fd, uap->fd);
3238 AUDIT_ARG(value32, uap->s);
3239
1c79356b
A
3240 /*
3241 * Do argument checking. Must be a regular file in, stream
3242 * type and connected socket out, positive offset.
3243 */
2d21ac55 3244 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
1c79356b 3245 goto done;
2d21ac55
A
3246 }
3247 if ((fp->f_flag & FREAD) == 0) {
91447636
A
3248 error = EBADF;
3249 goto done1;
1c79356b 3250 }
2d21ac55
A
3251 if (vnode_isreg(vp) == 0) {
3252 error = ENOTSUP;
91447636 3253 goto done1;
1c79356b 3254 }
91447636 3255 error = file_socket(uap->s, &so);
2d21ac55 3256 if (error) {
91447636 3257 goto done1;
2d21ac55 3258 }
55e303ae
A
3259 if (so == NULL) {
3260 error = EBADF;
91447636 3261 goto done2;
55e303ae 3262 }
1c79356b
A
3263 if (so->so_type != SOCK_STREAM) {
3264 error = EINVAL;
2d21ac55 3265 goto done2;
1c79356b
A
3266 }
3267 if ((so->so_state & SS_ISCONNECTED) == 0) {
3268 error = ENOTCONN;
2d21ac55 3269 goto done2;
1c79356b
A
3270 }
3271 if (uap->offset < 0) {
3272 error = EINVAL;
2d21ac55 3273 goto done2;
1c79356b 3274 }
2d21ac55
A
3275 if (uap->nbytes == USER_ADDR_NULL) {
3276 error = EINVAL;
3277 goto done2;
3278 }
3279 if (uap->flags != 0) {
3280 error = EINVAL;
3281 goto done2;
3282 }
3283
3284 context.vc_ucred = fp->f_fglob->fg_cred;
3285
3286#if CONFIG_MACF_SOCKET_SUBSET
3287 /* JMM - fetch connected sockaddr? */
3288 error = mac_socket_check_send(context.vc_ucred, so, NULL);
3289 if (error)
3290 goto done2;
3291#endif
3292
3293 /*
3294 * Get number of bytes to send
3295 * Should it applies to size of header and trailer?
3296 * JMM - error handling?
3297 */
3298 copyin(uap->nbytes, &nbytes, sizeof (off_t));
1c79356b
A
3299
3300 /*
3301 * If specified, get the pointer to the sf_hdtr struct for
3302 * any headers/trailers.
3303 */
2d21ac55
A
3304 if (uap->hdtr != USER_ADDR_NULL) {
3305 caddr_t hdtrp;
3306
3307 bzero(&user_hdtr, sizeof (user_hdtr));
3308 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
3309 hdtrp = (caddr_t)&user64_hdtr;
3310 sizeof_hdtr = sizeof (user64_hdtr);
2d21ac55 3311 } else {
b0d623f7
A
3312 hdtrp = (caddr_t)&user32_hdtr;
3313 sizeof_hdtr = sizeof (user32_hdtr);
2d21ac55
A
3314 }
3315 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
1c79356b 3316 if (error)
2d21ac55 3317 goto done2;
b0d623f7
A
3318 if (IS_64BIT_PROCESS(p)) {
3319 user_hdtr.headers = user64_hdtr.headers;
3320 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
3321 user_hdtr.trailers = user64_hdtr.trailers;
3322 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
3323 } else {
3324 user_hdtr.headers = user32_hdtr.headers;
3325 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
3326 user_hdtr.trailers = user32_hdtr.trailers;
3327 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2d21ac55
A
3328 }
3329
1c79356b
A
3330 /*
3331 * Send any headers. Wimp out and use writev(2).
3332 */
2d21ac55
A
3333 if (user_hdtr.headers != USER_ADDR_NULL) {
3334 bzero(&nuap, sizeof (struct writev_args));
1c79356b 3335 nuap.fd = uap->s;
2d21ac55
A
3336 nuap.iovp = user_hdtr.headers;
3337 nuap.iovcnt = user_hdtr.hdr_cnt;
3338 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3339 if (error) {
2d21ac55 3340 goto done2;
316670eb 3341 }
2d21ac55 3342 sbytes += writev_retval;
1c79356b
A
3343 }
3344 }
3345
3346 /*
2d21ac55
A
3347 * Get the file size for 2 reasons:
3348 * 1. We don't want to allocate more mbufs than necessary
3349 * 2. We don't want to read past the end of file
1c79356b 3350 */
316670eb 3351 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
2d21ac55 3352 goto done2;
316670eb 3353 }
1c79356b
A
3354
3355 /*
2d21ac55
A
3356 * Simply read file data into a chain of mbufs that used with scatter
3357 * gather reads. We're not (yet?) setup to use zero copy external
3358 * mbufs that point to the file pages.
1c79356b 3359 */
2d21ac55 3360 socket_lock(so, 1);
39236c6e 3361 error = sblock(&so->so_snd, SBL_WAIT);
2d21ac55
A
3362 if (error) {
3363 socket_unlock(so, 1);
3364 goto done2;
3365 }
1c79356b 3366 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2d21ac55 3367 mbuf_t m0 = NULL, m;
39236c6e 3368 unsigned int nbufs = SFUIOBUFS, i;
2d21ac55 3369 uio_t auio;
39236c6e 3370 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
2d21ac55
A
3371 size_t uiolen;
3372 user_ssize_t rlen;
3373 off_t pgoff;
3374 size_t pktlen;
3375 boolean_t jumbocl;
1c79356b 3376
1c79356b 3377 /*
2d21ac55
A
3378 * Calculate the amount to transfer.
3379 * Align to round number of pages.
3380 * Not to exceed send socket buffer,
1c79356b
A
3381 * the EOF, or the passed in nbytes.
3382 */
2d21ac55
A
3383 xfsize = sbspace(&so->so_snd);
3384
3385 if (xfsize <= 0) {
3386 if (so->so_state & SS_CANTSENDMORE) {
3387 error = EPIPE;
3388 goto done3;
3389 } else if ((so->so_state & SS_NBIO)) {
3390 error = EAGAIN;
3391 goto done3;
3392 } else {
3393 xfsize = PAGE_SIZE;
3394 }
3395 }
3396
3397 if (xfsize > SENDFILE_MAX_BYTES)
3398 xfsize = SENDFILE_MAX_BYTES;
3399 else if (xfsize > PAGE_SIZE)
3400 xfsize = trunc_page(xfsize);
3401 pgoff = off & PAGE_MASK_64;
3402 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
1c79356b 3403 xfsize = PAGE_SIZE_64 - pgoff;
2d21ac55
A
3404 if (nbytes && xfsize > (nbytes - sbytes))
3405 xfsize = nbytes - sbytes;
3406 if (xfsize <= 0)
3407 break;
3408 if (off + xfsize > file_size)
3409 xfsize = file_size - off;
1c79356b
A
3410 if (xfsize <= 0)
3411 break;
2d21ac55 3412
1c79356b 3413 /*
2d21ac55
A
3414 * Attempt to use larger than system page-size clusters for
3415 * large writes only if there is a jumbo cluster pool and
3416 * if the socket is marked accordingly.
1c79356b 3417 */
2d21ac55
A
3418 jumbocl = sosendjcl && njcl > 0 &&
3419 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
3420
3421 socket_unlock(so, 0);
3422 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
fe8ab488 3423 pktlen = mbuf_pkthdr_maxlen(m0);
b0d623f7 3424 if (pktlen < (size_t)xfsize)
2d21ac55 3425 xfsize = pktlen;
39236c6e 3426
2d21ac55
A
3427 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
3428 UIO_READ, &uio_buf[0], sizeof (uio_buf));
3429 if (auio == NULL) {
316670eb
A
3430 printf("sendfile failed. nbufs = %d. %s", nbufs,
3431 "File a radar related to rdar://10146739.\n");
2d21ac55
A
3432 mbuf_freem(m0);
3433 error = ENXIO;
3434 socket_lock(so, 0);
3435 goto done3;
1c79356b 3436 }
1c79356b 3437
2d21ac55 3438 for (i = 0, m = m0, uiolen = 0;
b0d623f7 3439 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2d21ac55
A
3440 i++, m = mbuf_next(m)) {
3441 size_t mlen = mbuf_maxlen(m);
3442
b0d623f7 3443 if (mlen + uiolen > (size_t)xfsize)
2d21ac55
A
3444 mlen = xfsize - uiolen;
3445 mbuf_setlen(m, mlen);
3446 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
3447 mlen);
3448 uiolen += mlen;
3449 }
3450
3451 if (xfsize != uio_resid(auio))
3452 printf("sendfile: xfsize: %lld != uio_resid(auio): "
6d2010ae 3453 "%lld\n", xfsize, (long long)uio_resid(auio));
2d21ac55
A
3454
3455 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
3456 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3457 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3458 error = fo_read(fp, auio, FOF_OFFSET, &context);
3459 socket_lock(so, 0);
3460 if (error != 0) {
3461 if (uio_resid(auio) != xfsize && (error == ERESTART ||
3462 error == EINTR || error == EWOULDBLOCK)) {
3463 error = 0;
3464 } else {
3465 mbuf_freem(m0);
3466 goto done3;
1c79356b 3467 }
1c79356b 3468 }
2d21ac55
A
3469 xfsize -= uio_resid(auio);
3470 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
3471 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
3472 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
3473
3474 if (xfsize == 0) {
3e170ce0 3475 // printf("sendfile: fo_read 0 bytes, EOF\n");
2d21ac55 3476 break;
91447636 3477 }
2d21ac55
A
3478 if (xfsize + off > file_size)
3479 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3480 "%lld\n", xfsize, off, file_size);
3481 for (i = 0, m = m0, rlen = 0;
3482 i < nbufs && m != NULL && rlen < xfsize;
3483 i++, m = mbuf_next(m)) {
3484 size_t mlen = mbuf_maxlen(m);
3485
b0d623f7 3486 if (rlen + mlen > (size_t)xfsize)
2d21ac55
A
3487 mlen = xfsize - rlen;
3488 mbuf_setlen(m, mlen);
3489
3490 rlen += mlen;
3491 }
3492 mbuf_pkthdr_setlen(m0, xfsize);
3493
1c79356b
A
3494retry_space:
3495 /*
3496 * Make sure that the socket is still able to take more data.
3497 * CANTSENDMORE being true usually means that the connection
3498 * was closed. so_error is true when an error was sensed after
3499 * a previous send.
3500 * The state is checked after the page mapping and buffer
3501 * allocation above since those operations may block and make
3502 * any socket checks stale. From this point forward, nothing
3503 * blocks before the pru_send (or more accurately, any blocking
3504 * results in a loop back to here to re-check).
3505 */
3506 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
3507 if (so->so_state & SS_CANTSENDMORE) {
3508 error = EPIPE;
3509 } else {
3510 error = so->so_error;
3511 so->so_error = 0;
3512 }
2d21ac55
A
3513 m_freem(m0);
3514 goto done3;
1c79356b
A
3515 }
3516 /*
3517 * Wait for socket space to become available. We do this just
3518 * after checking the connection state above in order to avoid
3519 * a race condition with sbwait().
3520 */
2d21ac55 3521 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
1c79356b 3522 if (so->so_state & SS_NBIO) {
2d21ac55 3523 m_freem(m0);
1c79356b 3524 error = EAGAIN;
2d21ac55 3525 goto done3;
1c79356b 3526 }
2d21ac55
A
3527 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
3528 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
1c79356b 3529 error = sbwait(&so->so_snd);
2d21ac55
A
3530 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
3531 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
1c79356b
A
3532 /*
3533 * An error from sbwait usually indicates that we've
3534 * been interrupted by a signal. If we've sent anything
3535 * then return bytes sent, otherwise return the error.
3536 */
3537 if (error) {
2d21ac55
A
3538 m_freem(m0);
3539 goto done3;
1c79356b
A
3540 }
3541 goto retry_space;
3542 }
39236c6e 3543
6d2010ae 3544 struct mbuf *control = NULL;
2d21ac55
A
3545 {
3546 /*
3547 * Socket filter processing
3548 */
2d21ac55 3549
6d2010ae
A
3550 error = sflt_data_out(so, NULL, &m0, &control, 0);
3551 if (error) {
3552 if (error == EJUSTRETURN) {
3553 error = 0;
3554 continue;
2d21ac55 3555 }
6d2010ae 3556 goto done3;
2d21ac55
A
3557 }
3558 /*
3559 * End Socket filter processing
3560 */
3561 }
3562 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3563 uap->s, 0, 0, 0, 0);
3564 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
6d2010ae 3565 0, control, p);
2d21ac55
A
3566 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
3567 uap->s, 0, 0, 0, 0);
1c79356b 3568 if (error) {
2d21ac55 3569 goto done3;
1c79356b
A
3570 }
3571 }
39236c6e 3572 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
1c79356b
A
3573 /*
3574 * Send trailers. Wimp out and use writev(2).
3575 */
2d21ac55
A
3576 if (uap->hdtr != USER_ADDR_NULL &&
3577 user_hdtr.trailers != USER_ADDR_NULL) {
3578 bzero(&nuap, sizeof (struct writev_args));
3579 nuap.fd = uap->s;
3580 nuap.iovp = user_hdtr.trailers;
3581 nuap.iovcnt = user_hdtr.trl_cnt;
3582 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb 3583 if (error) {
2d21ac55 3584 goto done2;
316670eb 3585 }
2d21ac55 3586 sbytes += writev_retval;
1c79356b 3587 }
91447636
A
3588done2:
3589 file_drop(uap->s);
3590done1:
3591 file_drop(uap->fd);
1c79356b 3592done:
2d21ac55 3593 if (uap->nbytes != USER_ADDR_NULL) {
91447636 3594 /* XXX this appears bogus for some early failure conditions */
2d21ac55 3595 copyout(&sbytes, uap->nbytes, sizeof (off_t));
1c79356b 3596 }
2d21ac55
A
3597 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
3598 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
3599 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
1c79356b 3600 return (error);
91447636 3601done3:
39236c6e 3602 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
91447636 3603 goto done2;
1c79356b
A
3604}
3605
2d21ac55
A
3606
3607#endif /* SENDFILE */