]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_syscalls.c
xnu-2422.115.4.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
2d21ac55 33 * Copyright (c) 1998, David Greenman. All rights reserved.
1c79356b
A
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
2d21ac55
A
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
1c79356b
A
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
2d21ac55 77#include <sys/vnode_internal.h>
1c79356b 78#include <sys/malloc.h>
39236c6e 79#include <sys/mcache.h>
1c79356b 80#include <sys/mbuf.h>
91447636
A
81#include <kern/lock.h>
82#include <sys/domain.h>
1c79356b 83#include <sys/protosw.h>
91447636 84#include <sys/signalvar.h>
1c79356b
A
85#include <sys/socket.h>
86#include <sys/socketvar.h>
1c79356b 87#include <sys/kernel.h>
91447636 88#include <sys/uio_internal.h>
2d21ac55 89#include <sys/kauth.h>
6d2010ae 90#include <kern/task.h>
39236c6e 91#include <sys/priv.h>
e5568f75 92
b0d623f7 93#include <security/audit/audit.h>
1c79356b
A
94
95#include <sys/kdebug.h>
91447636 96#include <sys/sysproto.h>
2d21ac55
A
97#include <netinet/in.h>
98#include <net/route.h>
99#include <netinet/in_pcb.h>
100
101#if CONFIG_MACF_SOCKET_SUBSET
102#include <security/mac_framework.h>
103#endif /* MAC_SOCKET_SUBSET */
104
105#define f_flag f_fglob->fg_flag
39236c6e 106#define f_type f_fglob->fg_ops->fo_type
2d21ac55
A
107#define f_msgcount f_fglob->fg_msgcount
108#define f_cred f_fglob->fg_cred
109#define f_ops f_fglob->fg_ops
110#define f_offset f_fglob->fg_offset
111#define f_data f_fglob->fg_data
112
113
114#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
115#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
116#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
117#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
118#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
119#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
120#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
121#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
122#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
123#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
124#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
125#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
126#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
127#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
128
129
2d21ac55
A
130/* TODO: should be in header file */
131int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
132
133static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int,
b0d623f7 134 int32_t *);
2d21ac55 135static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
b0d623f7 136 int32_t *);
39236c6e 137static int connectit(struct socket *, struct sockaddr *);
2d21ac55 138static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
4a3eedf9 139 size_t, boolean_t);
2d21ac55 140static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
4a3eedf9 141 user_addr_t, size_t, boolean_t);
39236c6e
A
142static int getsockaddrlist(struct socket *, struct sockaddr_list **,
143 user_addr_t, socklen_t, boolean_t);
1c79356b 144#if SENDFILE
2d21ac55
A
145static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
146 boolean_t);
147#endif /* SENDFILE */
39236c6e
A
148static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
149static int connectitx(struct socket *, struct sockaddr_list **,
150 struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *);
151static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *);
152static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
153 int *);
154static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
1c79356b
A
155
156/*
157 * System call interface to the socket abstraction.
158 */
1c79356b 159
39236c6e 160extern const struct fileops socketops;
1c79356b 161
2d21ac55
A
162/*
163 * Returns: 0 Success
164 * EACCES Mandatory Access Control failure
165 * falloc:ENFILE
166 * falloc:EMFILE
167 * falloc:ENOMEM
168 * socreate:EAFNOSUPPORT
169 * socreate:EPROTOTYPE
170 * socreate:EPROTONOSUPPORT
171 * socreate:ENOBUFS
172 * socreate:ENOMEM
2d21ac55
A
173 * socreate:??? [other protocol families, IPSEC]
174 */
1c79356b 175int
39236c6e
A
176socket(struct proc *p,
177 struct socket_args *uap,
178 int32_t *retval)
179{
180 return (socket_common(p, uap->domain, uap->type, uap->protocol,
181 proc_selfpid(), retval, 0));
182}
183
184int
185socket_delegate(struct proc *p,
186 struct socket_delegate_args *uap,
187 int32_t *retval)
188{
189 return socket_common(p, uap->domain, uap->type, uap->protocol,
190 uap->epid, retval, 1);
191}
192
193static int
194socket_common(struct proc *p,
195 int domain,
196 int type,
197 int protocol,
198 pid_t epid,
199 int32_t *retval,
200 int delegate)
1c79356b 201{
1c79356b 202 struct socket *so;
91447636 203 struct fileproc *fp;
1c79356b
A
204 int fd, error;
205
39236c6e 206 AUDIT_ARG(socket, domain, type, protocol);
2d21ac55 207#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
208 if ((error = mac_socket_check_create(kauth_cred_get(), domain,
209 type, protocol)) != 0)
2d21ac55
A
210 return (error);
211#endif /* MAC_SOCKET_SUBSET */
1c79356b 212
39236c6e
A
213 if (delegate) {
214 error = priv_check_cred(kauth_cred_get(),
215 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
216 if (error)
217 return (EACCES);
218 }
219
2d21ac55 220 error = falloc(p, &fp, &fd, vfs_context_current());
91447636 221 if (error) {
1c79356b 222 return (error);
91447636 223 }
1c79356b 224 fp->f_flag = FREAD|FWRITE;
1c79356b 225 fp->f_ops = &socketops;
91447636 226
39236c6e
A
227 if (delegate)
228 error = socreate_delegate(domain, &so, type, protocol, epid);
229 else
230 error = socreate(domain, &so, type, protocol);
231
91447636
A
232 if (error) {
233 fp_free(p, fd, fp);
1c79356b
A
234 } else {
235 fp->f_data = (caddr_t)so;
91447636
A
236
237 proc_fdlock(p);
6601e61a 238 procfdtbl_releasefd(p, fd, NULL);
2d21ac55 239
91447636
A
240 fp_drop(p, fd, fp, 1);
241 proc_fdunlock(p);
242
1c79356b
A
243 *retval = fd;
244 }
245 return (error);
246}
247
2d21ac55
A
248/*
249 * Returns: 0 Success
250 * EDESTADDRREQ Destination address required
251 * EBADF Bad file descriptor
252 * EACCES Mandatory Access Control failure
253 * file_socket:ENOTSOCK
254 * file_socket:EBADF
255 * getsockaddr:ENAMETOOLONG Filename too long
256 * getsockaddr:EINVAL Invalid argument
257 * getsockaddr:ENOMEM Not enough space
258 * getsockaddr:EFAULT Bad address
39236c6e 259 * sobindlock:???
2d21ac55 260 */
1c79356b
A
261/* ARGSUSED */
262int
b0d623f7 263bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
1c79356b 264{
2d21ac55
A
265 struct sockaddr_storage ss;
266 struct sockaddr *sa = NULL;
91447636 267 struct socket *so;
2d21ac55 268 boolean_t want_free = TRUE;
1c79356b
A
269 int error;
270
55e303ae 271 AUDIT_ARG(fd, uap->s);
91447636 272 error = file_socket(uap->s, &so);
2d21ac55 273 if (error != 0)
1c79356b 274 return (error);
2d21ac55
A
275 if (so == NULL) {
276 error = EBADF;
277 goto out;
278 }
279 if (uap->name == USER_ADDR_NULL) {
280 error = EDESTADDRREQ;
281 goto out;
282 }
283 if (uap->namelen > sizeof (ss)) {
4a3eedf9 284 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
2d21ac55 285 } else {
4a3eedf9 286 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
2d21ac55
A
287 if (error == 0) {
288 sa = (struct sockaddr *)&ss;
289 want_free = FALSE;
290 }
291 }
292 if (error != 0)
91447636 293 goto out;
2d21ac55
A
294 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
295#if CONFIG_MACF_SOCKET_SUBSET
296 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
39236c6e 297 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55 298#else
39236c6e 299 error = sobindlock(so, sa, 1); /* will lock socket */
2d21ac55
A
300#endif /* MAC_SOCKET_SUBSET */
301 if (want_free)
302 FREE(sa, M_SONAME);
91447636
A
303out:
304 file_drop(uap->s);
1c79356b
A
305 return (error);
306}
307
2d21ac55
A
308/*
309 * Returns: 0 Success
310 * EBADF
311 * EACCES Mandatory Access Control failure
312 * file_socket:ENOTSOCK
313 * file_socket:EBADF
314 * solisten:EINVAL
315 * solisten:EOPNOTSUPP
316 * solisten:???
317 */
1c79356b 318int
2d21ac55 319listen(__unused struct proc *p, struct listen_args *uap,
b0d623f7 320 __unused int32_t *retval)
1c79356b 321{
1c79356b 322 int error;
2d21ac55 323 struct socket *so;
1c79356b 324
55e303ae 325 AUDIT_ARG(fd, uap->s);
91447636 326 error = file_socket(uap->s, &so);
1c79356b
A
327 if (error)
328 return (error);
91447636 329 if (so != NULL)
2d21ac55
A
330#if CONFIG_MACF_SOCKET_SUBSET
331 {
332 error = mac_socket_check_listen(kauth_cred_get(), so);
333 if (error == 0)
334 error = solisten(so, uap->backlog);
335 }
336#else
91447636 337 error = solisten(so, uap->backlog);
2d21ac55 338#endif /* MAC_SOCKET_SUBSET */
55e303ae 339 else
91447636 340 error = EBADF;
2d21ac55 341
91447636
A
342 file_drop(uap->s);
343 return (error);
1c79356b
A
344}
345
2d21ac55
A
346/*
347 * Returns: fp_getfsock:EBADF Bad file descriptor
348 * fp_getfsock:EOPNOTSUPP ...
349 * xlate => :ENOTSOCK Socket operation on non-socket
350 * :EFAULT Bad address on copyin/copyout
351 * :EBADF Bad file descriptor
352 * :EOPNOTSUPP Operation not supported on socket
353 * :EINVAL Invalid argument
354 * :EWOULDBLOCK Operation would block
355 * :ECONNABORTED Connection aborted
356 * :EINTR Interrupted function
357 * :EACCES Mandatory Access Control failure
358 * falloc_locked:ENFILE Too many files open in system
359 * falloc_locked::EMFILE Too many open files
360 * falloc_locked::ENOMEM Not enough space
361 * 0 Success
362 */
1c79356b 363int
2d21ac55 364accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
b0d623f7 365 int32_t *retval)
1c79356b 366{
91447636 367 struct fileproc *fp;
2d21ac55 368 struct sockaddr *sa = NULL;
91447636
A
369 socklen_t namelen;
370 int error;
371 struct socket *head, *so = NULL;
372 lck_mtx_t *mutex_held;
373 int fd = uap->s;
2d21ac55 374 int newfd;
1c79356b 375 short fflag; /* type must match fp->f_flag */
91447636 376 int dosocklock = 0;
1c79356b 377
2d21ac55
A
378 *retval = -1;
379
55e303ae 380 AUDIT_ARG(fd, uap->s);
2d21ac55 381
1c79356b 382 if (uap->name) {
91447636 383 error = copyin(uap->anamelen, (caddr_t)&namelen,
2d21ac55
A
384 sizeof (socklen_t));
385 if (error)
1c79356b
A
386 return (error);
387 }
91447636
A
388 error = fp_getfsock(p, fd, &fp, &head);
389 if (error) {
390 if (error == EOPNOTSUPP)
391 error = ENOTSOCK;
1c79356b 392 return (error);
91447636 393 }
55e303ae 394 if (head == NULL) {
91447636
A
395 error = EBADF;
396 goto out;
55e303ae 397 }
2d21ac55
A
398#if CONFIG_MACF_SOCKET_SUBSET
399 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
400 goto out;
401#endif /* MAC_SOCKET_SUBSET */
91447636
A
402
403 socket_lock(head, 1);
404
405 if (head->so_proto->pr_getlock != NULL) {
406 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
407 dosocklock = 1;
2d21ac55 408 } else {
91447636
A
409 mutex_held = head->so_proto->pr_domain->dom_mtx;
410 dosocklock = 0;
411 }
412
1c79356b 413 if ((head->so_options & SO_ACCEPTCONN) == 0) {
2d21ac55
A
414 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
415 error = EOPNOTSUPP;
416 } else {
417 /* POSIX: The socket is not accepting connections */
418 error = EINVAL;
419 }
91447636 420 socket_unlock(head, 1);
91447636 421 goto out;
1c79356b
A
422 }
423 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
91447636
A
424 socket_unlock(head, 1);
425 error = EWOULDBLOCK;
426 goto out;
1c79356b 427 }
2d21ac55 428 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
1c79356b
A
429 if (head->so_state & SS_CANTRCVMORE) {
430 head->so_error = ECONNABORTED;
431 break;
432 }
91447636 433 if (head->so_usecount < 1)
2d21ac55
A
434 panic("accept: head=%p refcount=%d\n", head,
435 head->so_usecount);
436 error = msleep((caddr_t)&head->so_timeo, mutex_held,
437 PSOCK | PCATCH, "accept", 0);
91447636 438 if (head->so_usecount < 1)
2d21ac55
A
439 panic("accept: 2 head=%p refcount=%d\n", head,
440 head->so_usecount);
91447636
A
441 if ((head->so_state & SS_DRAINING)) {
442 error = ECONNABORTED;
443 }
1c79356b 444 if (error) {
91447636
A
445 socket_unlock(head, 1);
446 goto out;
1c79356b
A
447 }
448 }
449 if (head->so_error) {
450 error = head->so_error;
451 head->so_error = 0;
91447636
A
452 socket_unlock(head, 1);
453 goto out;
1c79356b
A
454 }
455
456
457 /*
458 * At this point we know that there is at least one connection
459 * ready to be accepted. Remove it from the queue prior to
460 * allocating the file descriptor for it since falloc() may
461 * block allowing another process to accept the connection
462 * instead.
463 */
91447636 464 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
e3027f41 465 so = TAILQ_FIRST(&head->so_comp);
1c79356b
A
466 TAILQ_REMOVE(&head->so_comp, so, so_list);
467 head->so_qlen--;
2d21ac55
A
468 /* unlock head to avoid deadlock with select, keep a ref on head */
469 socket_unlock(head, 0);
470
471#if CONFIG_MACF_SOCKET_SUBSET
472 /*
473 * Pass the pre-accepted socket to the MAC framework. This is
474 * cheaper than allocating a file descriptor for the socket,
475 * calling the protocol accept callback, and possibly freeing
476 * the file descriptor should the MAC check fails.
477 */
478 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
39236c6e 479 socket_lock(so, 1);
2d21ac55
A
480 so->so_state &= ~(SS_NOFDREF | SS_COMP);
481 so->so_head = NULL;
39236c6e 482 socket_unlock(so, 1);
2d21ac55
A
483 soclose(so);
484 /* Drop reference on listening socket */
485 sodereference(head);
486 goto out;
487 }
488#endif /* MAC_SOCKET_SUBSET */
489
490 /*
491 * Pass the pre-accepted socket to any interested socket filter(s).
492 * Upon failure, the socket would have been closed by the callee.
493 */
494 if (so->so_filt != NULL && (error = soacceptfilter(so)) != 0) {
495 /* Drop reference on listening socket */
496 sodereference(head);
497 /* Propagate socket filter's error code to the caller */
498 goto out;
499 }
500
1c79356b 501 fflag = fp->f_flag;
2d21ac55 502 error = falloc(p, &fp, &newfd, vfs_context_current());
1c79356b 503 if (error) {
39236c6e 504 /*
316670eb
A
505 * Probably ran out of file descriptors.
506 *
507 * <rdar://problem/8554930>
508 * Don't put this back on the socket like we used to, that
509 * just causes the client to spin. Drop the socket.
1c79356b 510 */
39236c6e 511 socket_lock(so, 1);
316670eb
A
512 so->so_state &= ~(SS_NOFDREF | SS_COMP);
513 so->so_head = NULL;
39236c6e 514 socket_unlock(so, 1);
316670eb
A
515 soclose(so);
516 sodereference(head);
91447636 517 goto out;
2d21ac55 518 }
91447636 519 *retval = newfd;
1c79356b
A
520 fp->f_flag = fflag;
521 fp->f_ops = &socketops;
522 fp->f_data = (caddr_t)so;
91447636
A
523 socket_lock(head, 0);
524 if (dosocklock)
525 socket_lock(so, 1);
526 so->so_state &= ~SS_COMP;
527 so->so_head = NULL;
91447636
A
528 (void) soacceptlock(so, &sa, 0);
529 socket_unlock(head, 1);
2d21ac55 530 if (sa == NULL) {
1c79356b
A
531 namelen = 0;
532 if (uap->name)
533 goto gotnoname;
91447636 534 error = 0;
2d21ac55 535 goto releasefd;
1c79356b 536 }
2d21ac55
A
537 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
538
1c79356b 539 if (uap->name) {
2d21ac55
A
540 socklen_t sa_len;
541
542 /* save sa_len before it is destroyed */
543 sa_len = sa->sa_len;
544 namelen = MIN(namelen, sa_len);
91447636 545 error = copyout(sa, uap->name, namelen);
1c79356b 546 if (!error)
2d21ac55
A
547 /* return the actual, untruncated address length */
548 namelen = sa_len;
1c79356b 549gotnoname:
2d21ac55
A
550 error = copyout((caddr_t)&namelen, uap->anamelen,
551 sizeof (socklen_t));
1c79356b
A
552 }
553 FREE(sa, M_SONAME);
2d21ac55 554
b0d623f7 555releasefd:
2d21ac55 556 /*
6d2010ae
A
557 * If the socket has been marked as inactive by sosetdefunct(),
558 * disallow further operations on it.
2d21ac55
A
559 */
560 if (so->so_flags & SOF_DEFUNCT) {
6d2010ae
A
561 sodefunct(current_proc(), so,
562 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
2d21ac55
A
563 }
564
91447636
A
565 if (dosocklock)
566 socket_unlock(so, 1);
2d21ac55 567
2d21ac55
A
568 proc_fdlock(p);
569 procfdtbl_releasefd(p, newfd, NULL);
570 fp_drop(p, newfd, fp, 1);
571 proc_fdunlock(p);
572
91447636
A
573out:
574 file_drop(fd);
1c79356b
A
575 return (error);
576}
577
578int
b0d623f7 579accept(struct proc *p, struct accept_args *uap, int32_t *retval)
1c79356b 580{
2d21ac55
A
581 __pthread_testcancel(1);
582 return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval));
1c79356b
A
583}
584
2d21ac55
A
585/*
586 * Returns: 0 Success
587 * EBADF Bad file descriptor
588 * EALREADY Connection already in progress
589 * EINPROGRESS Operation in progress
590 * ECONNABORTED Connection aborted
591 * EINTR Interrupted function
592 * EACCES Mandatory Access Control failure
593 * file_socket:ENOTSOCK
594 * file_socket:EBADF
595 * getsockaddr:ENAMETOOLONG Filename too long
596 * getsockaddr:EINVAL Invalid argument
597 * getsockaddr:ENOMEM Not enough space
598 * getsockaddr:EFAULT Bad address
599 * soconnectlock:EOPNOTSUPP
600 * soconnectlock:EISCONN
601 * soconnectlock:??? [depends on protocol, filters]
602 * msleep:EINTR
603 *
604 * Imputed: so_error error may be set from so_error, which
605 * may have been set by soconnectlock.
606 */
607/* ARGSUSED */
1c79356b 608int
b0d623f7 609connect(struct proc *p, struct connect_args *uap, int32_t *retval)
1c79356b 610{
2d21ac55
A
611 __pthread_testcancel(1);
612 return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval));
1c79356b 613}
1c79356b 614
1c79356b 615int
39236c6e 616connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
1c79356b 617{
39236c6e 618#pragma unused(p, retval)
91447636 619 struct socket *so;
2d21ac55
A
620 struct sockaddr_storage ss;
621 struct sockaddr *sa = NULL;
91447636
A
622 int error;
623 int fd = uap->s;
4a3eedf9 624 boolean_t dgram;
1c79356b 625
55e303ae 626 AUDIT_ARG(fd, uap->s);
2d21ac55
A
627 error = file_socket(fd, &so);
628 if (error != 0)
1c79356b 629 return (error);
91447636
A
630 if (so == NULL) {
631 error = EBADF;
632 goto out;
633 }
634
4a3eedf9
A
635 /*
636 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
637 * if this is a datagram socket; translate for other types.
638 */
639 dgram = (so->so_type == SOCK_DGRAM);
640
2d21ac55
A
641 /* Get socket address now before we obtain socket lock */
642 if (uap->namelen > sizeof (ss)) {
4a3eedf9 643 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
2d21ac55 644 } else {
4a3eedf9 645 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
39236c6e 646 if (error == 0)
2d21ac55 647 sa = (struct sockaddr *)&ss;
2d21ac55
A
648 }
649 if (error != 0)
650 goto out;
651
39236c6e
A
652 error = connectit(so, sa);
653
654 if (sa != NULL && sa != SA(&ss))
655 FREE(sa, M_SONAME);
656 if (error == ERESTART)
657 error = EINTR;
658out:
659 file_drop(fd);
660 return (error);
661}
662
663static int
664connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
665{
666#pragma unused(p, retval)
667 struct sockaddr_list *src_sl = NULL, *dst_sl = NULL;
668 struct socket *so;
669 int error, fd = uap->s;
670 boolean_t dgram;
671 connid_t cid = CONNID_ANY;
672
673 AUDIT_ARG(fd, uap->s);
674 error = file_socket(fd, &so);
675 if (error != 0)
676 return (error);
677 if (so == NULL) {
678 error = EBADF;
679 goto out;
680 }
681
682 /*
683 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
684 * if this is a datagram socket; translate for other types.
685 */
686 dgram = (so->so_type == SOCK_DGRAM);
687
688 /*
689 * Get socket address(es) now before we obtain socket lock; use
690 * sockaddr_list for src address for convenience, if present,
691 * even though it won't hold more than one.
692 */
693 if (uap->src != USER_ADDR_NULL && (error = getsockaddrlist(so,
694 &src_sl, uap->src, uap->srclen, dgram)) != 0)
695 goto out;
696
697 error = getsockaddrlist(so, &dst_sl, uap->dsts, uap->dstlen, dgram);
698 if (error != 0)
699 goto out;
700
701 VERIFY(dst_sl != NULL &&
702 !TAILQ_EMPTY(&dst_sl->sl_head) && dst_sl->sl_cnt > 0);
703
704 error = connectitx(so, &src_sl, &dst_sl, p, uap->ifscope,
705 uap->aid, &cid);
706 if (error == ERESTART)
707 error = EINTR;
708
709 if (uap->cid != USER_ADDR_NULL)
710 (void) copyout(&cid, uap->cid, sizeof (cid));
711
712out:
713 file_drop(fd);
714 if (src_sl != NULL)
715 sockaddrlist_free(src_sl);
716 if (dst_sl != NULL)
717 sockaddrlist_free(dst_sl);
718 return (error);
719}
720
721int
722connectx(struct proc *p, struct connectx_args *uap, int *retval)
723{
724 /*
725 * Due to similiarity with a POSIX interface, define as
726 * an unofficial cancellation point.
727 */
728 __pthread_testcancel(1);
729 return (connectx_nocancel(p, uap, retval));
730}
731
732static int
733connectit(struct socket *so, struct sockaddr *sa)
734{
735 int error;
736
2d21ac55
A
737 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
738#if CONFIG_MACF_SOCKET_SUBSET
39236c6e
A
739 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0)
740 return (error);
741#endif /* MAC_SOCKET_SUBSET */
742
743 socket_lock(so, 1);
744 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
745 error = EALREADY;
746 goto out;
747 }
748 error = soconnectlock(so, sa, 0);
749 if (error != 0) {
750 so->so_state &= ~SS_ISCONNECTING;
2d21ac55
A
751 goto out;
752 }
39236c6e
A
753 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
754 error = EINPROGRESS;
755 goto out;
756 }
757 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
758 lck_mtx_t *mutex_held;
759
760 if (so->so_proto->pr_getlock != NULL)
761 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
762 else
763 mutex_held = so->so_proto->pr_domain->dom_mtx;
764 error = msleep((caddr_t)&so->so_timeo, mutex_held,
765 PSOCK | PCATCH, __func__, 0);
766 if (so->so_state & SS_DRAINING) {
767 error = ECONNABORTED;
768 }
769 if (error != 0)
770 break;
771 }
772 if (error == 0) {
773 error = so->so_error;
774 so->so_error = 0;
775 }
776out:
777 socket_unlock(so, 1);
778 return (error);
779}
780
781static int
782connectitx(struct socket *so, struct sockaddr_list **src_sl,
783 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
784 associd_t aid, connid_t *pcid)
785{
786 struct sockaddr_entry *se;
787 int error;
788
789 VERIFY(dst_sl != NULL && *dst_sl != NULL);
790
791 TAILQ_FOREACH(se, &(*dst_sl)->sl_head, se_link) {
792 VERIFY(se->se_addr != NULL);
793 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
794 se->se_addr);
795#if CONFIG_MACF_SOCKET_SUBSET
796 if ((error = mac_socket_check_connect(kauth_cred_get(),
797 so, se->se_addr)) != 0)
798 return (error);
2d21ac55 799#endif /* MAC_SOCKET_SUBSET */
39236c6e 800 }
91447636 801
39236c6e 802 socket_lock(so, 1);
91447636 803 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
804 error = EALREADY;
805 goto out;
806 }
39236c6e
A
807 error = soconnectxlocked(so, src_sl, dst_sl, p, ifscope,
808 aid, pcid, 0, NULL, 0);
809 if (error != 0) {
810 so->so_state &= ~SS_ISCONNECTING;
811 goto out;
812 }
1c79356b 813 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
91447636
A
814 error = EINPROGRESS;
815 goto out;
1c79356b 816 }
1c79356b 817 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
39236c6e
A
818 lck_mtx_t *mutex_held;
819
2d21ac55 820 if (so->so_proto->pr_getlock != NULL)
91447636 821 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2d21ac55 822 else
91447636 823 mutex_held = so->so_proto->pr_domain->dom_mtx;
2d21ac55 824 error = msleep((caddr_t)&so->so_timeo, mutex_held,
39236c6e
A
825 PSOCK | PCATCH, __func__, 0);
826 if (so->so_state & SS_DRAINING) {
91447636
A
827 error = ECONNABORTED;
828 }
39236c6e 829 if (error != 0)
1c79356b
A
830 break;
831 }
832 if (error == 0) {
833 error = so->so_error;
834 so->so_error = 0;
835 }
39236c6e 836out:
91447636 837 socket_unlock(so, 1);
39236c6e
A
838 return (error);
839}
840
841int
842peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
843{
844 /*
845 * Due to similiarity with a POSIX interface, define as
846 * an unofficial cancellation point.
847 */
848 __pthread_testcancel(1);
849 return (peeloff_nocancel(p, uap, retval));
850}
851
852static int
853peeloff_nocancel(struct proc *p, struct peeloff_args *uap, int *retval)
854{
855 struct fileproc *fp;
856 struct socket *mp_so, *so = NULL;
857 int newfd, fd = uap->s;
858 short fflag; /* type must match fp->f_flag */
859 int error;
860
861 *retval = -1;
862
863 error = fp_getfsock(p, fd, &fp, &mp_so);
864 if (error != 0) {
865 if (error == EOPNOTSUPP)
866 error = ENOTSOCK;
867 goto out_nofile;
868 }
869 if (mp_so == NULL) {
870 error = EBADF;
871 goto out;
872 }
873
874 socket_lock(mp_so, 1);
875 error = sopeelofflocked(mp_so, uap->aid, &so);
876 if (error != 0) {
877 socket_unlock(mp_so, 1);
878 goto out;
879 }
880 VERIFY(so != NULL);
881 socket_unlock(mp_so, 0); /* keep ref on mp_so for us */
882
883 fflag = fp->f_flag;
884 error = falloc(p, &fp, &newfd, vfs_context_current());
885 if (error != 0) {
886 /* drop this socket (probably ran out of file descriptors) */
887 soclose(so);
888 sodereference(mp_so); /* our mp_so ref */
889 goto out;
890 }
891
892 fp->f_flag = fflag;
893 fp->f_ops = &socketops;
894 fp->f_data = (caddr_t)so;
895
896 /*
897 * If the socket has been marked as inactive by sosetdefunct(),
898 * disallow further operations on it.
899 */
900 if (so->so_flags & SOF_DEFUNCT) {
901 sodefunct(current_proc(), so,
902 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
903 }
904
905 proc_fdlock(p);
906 procfdtbl_releasefd(p, newfd, NULL);
907 fp_drop(p, newfd, fp, 1);
908 proc_fdunlock(p);
909
910 sodereference(mp_so); /* our mp_so ref */
911 *retval = newfd;
912
913out:
914 file_drop(fd);
915
916out_nofile:
917 return (error);
918}
919
920int
921disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
922{
923 /*
924 * Due to similiarity with a POSIX interface, define as
925 * an unofficial cancellation point.
926 */
927 __pthread_testcancel(1);
928 return (disconnectx_nocancel(p, uap, retval));
929}
930
931static int
932disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
933{
934#pragma unused(p, retval)
935 struct socket *so;
936 int fd = uap->s;
937 int error;
938
939 error = file_socket(fd, &so);
940 if (error != 0)
941 return (error);
942 if (so == NULL) {
943 error = EBADF;
944 goto out;
945 }
946
947 error = sodisconnectx(so, uap->aid, uap->cid);
91447636
A
948out:
949 file_drop(fd);
1c79356b
A
950 return (error);
951}
952
2d21ac55
A
953/*
954 * Returns: 0 Success
955 * socreate:EAFNOSUPPORT
956 * socreate:EPROTOTYPE
957 * socreate:EPROTONOSUPPORT
958 * socreate:ENOBUFS
959 * socreate:ENOMEM
960 * socreate:EISCONN
961 * socreate:??? [other protocol families, IPSEC]
962 * falloc:ENFILE
963 * falloc:EMFILE
964 * falloc:ENOMEM
965 * copyout:EFAULT
966 * soconnect2:EINVAL
967 * soconnect2:EPROTOTYPE
968 * soconnect2:??? [other protocol families[
969 */
1c79356b 970int
2d21ac55 971socketpair(struct proc *p, struct socketpair_args *uap,
b0d623f7 972 __unused int32_t *retval)
1c79356b 973{
91447636 974 struct fileproc *fp1, *fp2;
1c79356b
A
975 struct socket *so1, *so2;
976 int fd, error, sv[2];
977
55e303ae 978 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1c79356b
A
979 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
980 if (error)
981 return (error);
982 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
983 if (error)
984 goto free1;
91447636 985
2d21ac55 986 error = falloc(p, &fp1, &fd, vfs_context_current());
91447636 987 if (error) {
1c79356b 988 goto free2;
91447636 989 }
1c79356b 990 fp1->f_flag = FREAD|FWRITE;
1c79356b
A
991 fp1->f_ops = &socketops;
992 fp1->f_data = (caddr_t)so1;
91447636
A
993 sv[0] = fd;
994
2d21ac55 995 error = falloc(p, &fp2, &fd, vfs_context_current());
91447636 996 if (error) {
1c79356b 997 goto free3;
91447636 998 }
1c79356b 999 fp2->f_flag = FREAD|FWRITE;
1c79356b
A
1000 fp2->f_ops = &socketops;
1001 fp2->f_data = (caddr_t)so2;
1002 sv[1] = fd;
91447636 1003
1c79356b
A
1004 error = soconnect2(so1, so2);
1005 if (error) {
1c79356b
A
1006 goto free4;
1007 }
1c79356b
A
1008 if (uap->type == SOCK_DGRAM) {
1009 /*
1010 * Datagram socket connection is asymmetric.
1011 */
2d21ac55
A
1012 error = soconnect2(so2, so1);
1013 if (error) {
1014 goto free4;
1015 }
1c79356b 1016 }
91447636 1017
6d2010ae
A
1018 if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
1019 goto free4;
1020
91447636 1021 proc_fdlock(p);
6601e61a
A
1022 procfdtbl_releasefd(p, sv[0], NULL);
1023 procfdtbl_releasefd(p, sv[1], NULL);
91447636
A
1024 fp_drop(p, sv[0], fp1, 1);
1025 fp_drop(p, sv[1], fp2, 1);
1026 proc_fdunlock(p);
1027
6d2010ae 1028 return (0);
1c79356b 1029free4:
91447636 1030 fp_free(p, sv[1], fp2);
1c79356b 1031free3:
91447636 1032 fp_free(p, sv[0], fp1);
1c79356b 1033free2:
2d21ac55 1034 (void) soclose(so2);
1c79356b 1035free1:
2d21ac55 1036 (void) soclose(so1);
1c79356b
A
1037 return (error);
1038}
1039
2d21ac55
A
1040/*
1041 * Returns: 0 Success
1042 * EINVAL
1043 * ENOBUFS
1044 * EBADF
1045 * EPIPE
1046 * EACCES Mandatory Access Control failure
1047 * file_socket:ENOTSOCK
1048 * file_socket:EBADF
1049 * getsockaddr:ENAMETOOLONG Filename too long
1050 * getsockaddr:EINVAL Invalid argument
1051 * getsockaddr:ENOMEM Not enough space
1052 * getsockaddr:EFAULT Bad address
1053 * <pru_sosend>:EACCES[TCP]
1054 * <pru_sosend>:EADDRINUSE[TCP]
1055 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1056 * <pru_sosend>:EAFNOSUPPORT[TCP]
1057 * <pru_sosend>:EAGAIN[TCP]
1058 * <pru_sosend>:EBADF
1059 * <pru_sosend>:ECONNRESET[TCP]
1060 * <pru_sosend>:EFAULT
1061 * <pru_sosend>:EHOSTUNREACH[TCP]
1062 * <pru_sosend>:EINTR
1063 * <pru_sosend>:EINVAL
1064 * <pru_sosend>:EISCONN[AF_INET]
1065 * <pru_sosend>:EMSGSIZE[TCP]
1066 * <pru_sosend>:ENETDOWN[TCP]
1067 * <pru_sosend>:ENETUNREACH[TCP]
1068 * <pru_sosend>:ENOBUFS
1069 * <pru_sosend>:ENOMEM[TCP]
1070 * <pru_sosend>:ENOTCONN[AF_INET]
1071 * <pru_sosend>:EOPNOTSUPP
1072 * <pru_sosend>:EPERM[TCP]
1073 * <pru_sosend>:EPIPE
1074 * <pru_sosend>:EWOULDBLOCK
1075 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1076 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1077 * <pru_sosend>:??? [value from so_error]
1078 * sockargs:???
1079 */
1c79356b 1080static int
2d21ac55 1081sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1082 int flags, int32_t *retval)
1c79356b 1083{
2d21ac55
A
1084 struct mbuf *control = NULL;
1085 struct sockaddr_storage ss;
1086 struct sockaddr *to = NULL;
1087 boolean_t want_free = TRUE;
91447636 1088 int error;
1c79356b 1089 struct socket *so;
91447636 1090 user_ssize_t len;
2d21ac55
A
1091
1092 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b 1093
91447636 1094 error = file_socket(s, &so);
2d21ac55
A
1095 if (error) {
1096 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1097 return (error);
1c79356b 1098 }
2d21ac55
A
1099 if (so == NULL) {
1100 error = EBADF;
1101 goto out;
1102 }
1103 if (mp->msg_name != USER_ADDR_NULL) {
1104 if (mp->msg_namelen > sizeof (ss)) {
1105 error = getsockaddr(so, &to, mp->msg_name,
4a3eedf9 1106 mp->msg_namelen, TRUE);
2d21ac55
A
1107 } else {
1108 error = getsockaddr_s(so, &ss, mp->msg_name,
4a3eedf9 1109 mp->msg_namelen, TRUE);
2d21ac55
A
1110 if (error == 0) {
1111 to = (struct sockaddr *)&ss;
1112 want_free = FALSE;
1113 }
1c79356b 1114 }
2d21ac55
A
1115 if (error != 0)
1116 goto out;
1117 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
91447636 1118 }
2d21ac55
A
1119 if (mp->msg_control != USER_ADDR_NULL) {
1120 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
1c79356b
A
1121 error = EINVAL;
1122 goto bad;
1123 }
1124 error = sockargs(&control, mp->msg_control,
1125 mp->msg_controllen, MT_CONTROL);
2d21ac55 1126 if (error != 0)
1c79356b 1127 goto bad;
91447636 1128 }
1c79356b 1129
2d21ac55
A
1130#if CONFIG_MACF_SOCKET_SUBSET
1131 /*
1132 * We check the state without holding the socket lock;
1133 * if a race condition occurs, it would simply result
316670eb 1134 * in an extra call to the MAC check function.
2d21ac55 1135 */
316670eb
A
1136 if ( to != NULL &&
1137 !(so->so_state & SS_DEFUNCT) &&
2d21ac55
A
1138 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
1139 goto bad;
1140#endif /* MAC_SOCKET_SUBSET */
91447636
A
1141
1142 len = uio_resid(uiop);
39236c6e
A
1143 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
1144 control, flags);
2d21ac55 1145 if (error != 0) {
91447636 1146 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1147 error == EINTR || error == EWOULDBLOCK))
1148 error = 0;
2d21ac55 1149 /* Generation of SIGPIPE can be controlled per socket */
9bccf70c 1150 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1c79356b
A
1151 psignal(p, SIGPIPE);
1152 }
1153 if (error == 0)
91447636
A
1154 *retval = (int)(len - uio_resid(uiop));
1155bad:
2d21ac55 1156 if (to != NULL && want_free)
1c79356b 1157 FREE(to, M_SONAME);
91447636 1158out:
2d21ac55 1159 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1160 file_drop(s);
1c79356b
A
1161 return (error);
1162}
1163
2d21ac55
A
1164/*
1165 * Returns: 0 Success
1166 * ENOMEM
1167 * sendit:??? [see sendit definition in this file]
1168 * write:??? [4056224: applicable for pipes]
1169 */
1c79356b 1170int
b0d623f7 1171sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
2d21ac55
A
1172{
1173 __pthread_testcancel(1);
39236c6e 1174 return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
2d21ac55
A
1175}
1176
1177int
39236c6e
A
1178sendto_nocancel(struct proc *p,
1179 struct sendto_nocancel_args *uap,
1180 int32_t *retval)
1c79356b 1181{
91447636
A
1182 struct user_msghdr msg;
1183 int error;
1184 uio_t auio = NULL;
1c79356b 1185
2d21ac55 1186 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1187 AUDIT_ARG(fd, uap->s);
1c79356b 1188
91447636 1189 auio = uio_create(1, 0,
2d21ac55
A
1190 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1191 UIO_WRITE);
91447636
A
1192 if (auio == NULL) {
1193 return (ENOMEM);
1194 }
1195 uio_addiov(auio, uap->buf, uap->len);
1196
1c79356b
A
1197 msg.msg_name = uap->to;
1198 msg.msg_namelen = uap->tolen;
91447636
A
1199 /* no need to set up msg_iov. sendit uses uio_t we send it */
1200 msg.msg_iov = 0;
1201 msg.msg_iovlen = 0;
1c79356b 1202 msg.msg_control = 0;
1c79356b 1203 msg.msg_flags = 0;
1c79356b 1204
91447636 1205 error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
2d21ac55 1206
91447636
A
1207 if (auio != NULL) {
1208 uio_free(auio);
1209 }
2d21ac55 1210
2d21ac55 1211 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1c79356b 1212
2d21ac55 1213 return (error);
1c79356b 1214}
1c79356b 1215
2d21ac55
A
1216/*
1217 * Returns: 0 Success
1218 * ENOBUFS
1219 * copyin:EFAULT
1220 * sendit:??? [see sendit definition in this file]
1221 */
1c79356b 1222int
b0d623f7 1223sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1c79356b 1224{
2d21ac55 1225 __pthread_testcancel(1);
39236c6e 1226 return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval));
1c79356b 1227}
1c79356b
A
1228
1229int
b0d623f7 1230sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval)
1c79356b 1231{
b0d623f7
A
1232 struct user32_msghdr msg32;
1233 struct user64_msghdr msg64;
91447636
A
1234 struct user_msghdr user_msg;
1235 caddr_t msghdrp;
1236 int size_of_msghdr;
1c79356b 1237 int error;
91447636
A
1238 uio_t auio = NULL;
1239 struct user_iovec *iovp;
1c79356b 1240
2d21ac55 1241 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1242 AUDIT_ARG(fd, uap->s);
91447636 1243 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
1244 msghdrp = (caddr_t)&msg64;
1245 size_of_msghdr = sizeof (msg64);
2d21ac55 1246 } else {
b0d623f7
A
1247 msghdrp = (caddr_t)&msg32;
1248 size_of_msghdr = sizeof (msg32);
91447636
A
1249 }
1250 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1251 if (error) {
1252 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1253 return (error);
1c79356b 1254 }
91447636 1255
b0d623f7
A
1256 if (IS_64BIT_PROCESS(p)) {
1257 user_msg.msg_flags = msg64.msg_flags;
1258 user_msg.msg_controllen = msg64.msg_controllen;
1259 user_msg.msg_control = msg64.msg_control;
1260 user_msg.msg_iovlen = msg64.msg_iovlen;
1261 user_msg.msg_iov = msg64.msg_iov;
1262 user_msg.msg_namelen = msg64.msg_namelen;
1263 user_msg.msg_name = msg64.msg_name;
1264 } else {
1265 user_msg.msg_flags = msg32.msg_flags;
1266 user_msg.msg_controllen = msg32.msg_controllen;
1267 user_msg.msg_control = msg32.msg_control;
1268 user_msg.msg_iovlen = msg32.msg_iovlen;
1269 user_msg.msg_iov = msg32.msg_iov;
1270 user_msg.msg_namelen = msg32.msg_namelen;
1271 user_msg.msg_name = msg32.msg_name;
91447636
A
1272 }
1273
1274 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1275 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1276 0, 0, 0, 0);
91447636
A
1277 return (EMSGSIZE);
1278 }
1279
1280 /* allocate a uio large enough to hold the number of iovecs passed */
1281 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1282 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1283 UIO_WRITE);
91447636
A
1284 if (auio == NULL) {
1285 error = ENOBUFS;
1286 goto done;
1287 }
2d21ac55 1288
91447636 1289 if (user_msg.msg_iovlen) {
2d21ac55
A
1290 /*
1291 * get location of iovecs within the uio.
1292 * then copyin the iovecs from user space.
91447636
A
1293 */
1294 iovp = uio_iovsaddr(auio);
1295 if (iovp == NULL) {
1296 error = ENOBUFS;
1297 goto done;
1298 }
b0d623f7
A
1299 error = copyin_user_iovec_array(user_msg.msg_iov,
1300 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1301 user_msg.msg_iovlen, iovp);
91447636
A
1302 if (error)
1303 goto done;
1304 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2d21ac55
A
1305
1306 /* finish setup of uio_t */
39236c6e
A
1307 error = uio_calculateresid(auio);
1308 if (error) {
1309 goto done;
1310 }
2d21ac55 1311 } else {
91447636
A
1312 user_msg.msg_iov = 0;
1313 }
2d21ac55
A
1314
1315 /* msg_flags is ignored for send */
91447636 1316 user_msg.msg_flags = 0;
2d21ac55 1317
91447636 1318 error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
1c79356b 1319done:
91447636
A
1320 if (auio != NULL) {
1321 uio_free(auio);
1322 }
2d21ac55 1323 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1324
1c79356b
A
1325 return (error);
1326}
1327
2d21ac55
A
1328/*
1329 * Returns: 0 Success
1330 * ENOTSOCK
1331 * EINVAL
1332 * EBADF
1333 * EACCES Mandatory Access Control failure
1334 * copyout:EFAULT
1335 * fp_lookup:EBADF
1336 * <pru_soreceive>:ENOBUFS
1337 * <pru_soreceive>:ENOTCONN
1338 * <pru_soreceive>:EWOULDBLOCK
1339 * <pru_soreceive>:EFAULT
1340 * <pru_soreceive>:EINTR
1341 * <pru_soreceive>:EBADF
1342 * <pru_soreceive>:EINVAL
1343 * <pru_soreceive>:EMSGSIZE
1344 * <pru_soreceive>:???
1345 *
1346 * Notes: Additional return values from calls through <pru_soreceive>
1347 * depend on protocols other than TCP or AF_UNIX, which are
1348 * documented above.
1349 */
1c79356b 1350static int
2d21ac55 1351recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1352 user_addr_t namelenp, int32_t *retval)
1c79356b 1353{
39236c6e
A
1354 ssize_t len;
1355 int error;
1c79356b 1356 struct mbuf *m, *control = 0;
91447636 1357 user_addr_t ctlbuf;
1c79356b
A
1358 struct socket *so;
1359 struct sockaddr *fromsa = 0;
91447636 1360 struct fileproc *fp;
1c79356b 1361
2d21ac55 1362 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
91447636 1363 proc_fdlock(p);
2d21ac55
A
1364 if ((error = fp_lookup(p, s, &fp, 1))) {
1365 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1366 proc_fdunlock(p);
2d21ac55 1367 return (error);
1c79356b 1368 }
91447636 1369 if (fp->f_type != DTYPE_SOCKET) {
2d21ac55 1370 fp_drop(p, s, fp, 1);
91447636 1371 proc_fdunlock(p);
2d21ac55 1372 return (ENOTSOCK);
91447636 1373 }
1c79356b 1374
2d21ac55
A
1375 so = (struct socket *)fp->f_data;
1376 if (so == NULL) {
1377 fp_drop(p, s, fp, 1);
1378 proc_fdunlock(p);
1379 return (EBADF);
1380 }
91447636
A
1381
1382 proc_fdunlock(p);
2d21ac55
A
1383
1384#if CONFIG_MACF_SOCKET_SUBSET
1385 /*
1386 * We check the state without holding the socket lock;
1387 * if a race condition occurs, it would simply result
1388 * in an extra call to the MAC check function.
1389 */
316670eb
A
1390 if (!(so->so_state & SS_DEFUNCT) &&
1391 !(so->so_state & SS_ISCONNECTED) &&
39236c6e 1392 !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
2d21ac55
A
1393 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1394 goto out1;
1395#endif /* MAC_SOCKET_SUBSET */
91447636 1396 if (uio_resid(uiop) < 0) {
2d21ac55 1397 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
91447636
A
1398 error = EINVAL;
1399 goto out1;
1c79356b 1400 }
91447636
A
1401
1402 len = uio_resid(uiop);
2d21ac55
A
1403 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1404 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1405 &mp->msg_flags);
b0d623f7
A
1406 if (fromsa)
1407 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1408 fromsa);
1c79356b 1409 if (error) {
91447636 1410 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1411 error == EINTR || error == EWOULDBLOCK))
1412 error = 0;
1413 }
2d21ac55 1414
1c79356b
A
1415 if (error)
1416 goto out;
2d21ac55 1417
91447636 1418 *retval = len - uio_resid(uiop);
1c79356b 1419 if (mp->msg_name) {
2d21ac55
A
1420 socklen_t sa_len = 0;
1421
1c79356b 1422 len = mp->msg_namelen;
2d21ac55 1423 if (len <= 0 || fromsa == 0) {
1c79356b 1424 len = 0;
2d21ac55 1425 } else {
1c79356b 1426#ifndef MIN
2d21ac55 1427#define MIN(a, b) ((a) > (b) ? (b) : (a))
1c79356b 1428#endif
2d21ac55
A
1429 sa_len = fromsa->sa_len;
1430 len = MIN((unsigned int)len, sa_len);
91447636 1431 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1c79356b
A
1432 if (error)
1433 goto out;
1434 }
2d21ac55
A
1435 mp->msg_namelen = sa_len;
1436 /* return the actual, untruncated address length */
1c79356b 1437 if (namelenp &&
2d21ac55
A
1438 (error = copyout((caddr_t)&sa_len, namelenp,
1439 sizeof (int)))) {
1c79356b
A
1440 goto out;
1441 }
1442 }
1443 if (mp->msg_control) {
1c79356b
A
1444 len = mp->msg_controllen;
1445 m = control;
1446 mp->msg_controllen = 0;
91447636 1447 ctlbuf = mp->msg_control;
1c79356b
A
1448
1449 while (m && len > 0) {
1450 unsigned int tocopy;
b0d623f7 1451 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
6d2010ae
A
1452 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1453 int buflen = m->m_len;
39236c6e 1454
6d2010ae 1455 while (buflen > 0 && len > 0) {
39236c6e 1456
6d2010ae
A
1457 /*
1458 SCM_TIMESTAMP hack because struct timeval has a
1459 * different size for 32 bits and 64 bits processes
1460 */
1461 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1462 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
316670eb 1463 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
6d2010ae 1464 int tmp_space;
316670eb 1465 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
39236c6e 1466
6d2010ae
A
1467 tmp_cp->cmsg_level = SOL_SOCKET;
1468 tmp_cp->cmsg_type = SCM_TIMESTAMP;
39236c6e 1469
6d2010ae 1470 if (proc_is64bit(p)) {
316670eb 1471 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
39236c6e 1472
6d2010ae
A
1473 tv64->tv_sec = tv->tv_sec;
1474 tv64->tv_usec = tv->tv_usec;
39236c6e 1475
6d2010ae
A
1476 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1477 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1478 } else {
316670eb 1479 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
39236c6e 1480
6d2010ae
A
1481 tv32->tv_sec = tv->tv_sec;
1482 tv32->tv_usec = tv->tv_usec;
39236c6e 1483
6d2010ae
A
1484 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1485 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1486 }
1487 if (len >= tmp_space) {
1488 tocopy = tmp_space;
1489 } else {
1490 mp->msg_flags |= MSG_CTRUNC;
1491 tocopy = len;
1492 }
1493 error = copyout(tmp_buffer, ctlbuf, tocopy);
1494 if (error)
1495 goto out;
39236c6e 1496
b0d623f7 1497 } else {
39236c6e 1498
6d2010ae
A
1499 if (cp_size > buflen) {
1500 panic("cp_size > buflen, something wrong with alignment!");
1501 }
39236c6e 1502
6d2010ae
A
1503 if (len >= cp_size) {
1504 tocopy = cp_size;
1505 } else {
1506 mp->msg_flags |= MSG_CTRUNC;
1507 tocopy = len;
1508 }
39236c6e 1509
6d2010ae
A
1510 error = copyout((caddr_t) cp, ctlbuf,
1511 tocopy);
1512 if (error)
1513 goto out;
b0d623f7 1514 }
39236c6e
A
1515
1516
6d2010ae
A
1517 ctlbuf += tocopy;
1518 len -= tocopy;
39236c6e 1519
6d2010ae 1520 buflen -= cp_size;
316670eb 1521 cp = (struct cmsghdr *)(void *)((unsigned char *) cp + cp_size);
6d2010ae 1522 cp_size = CMSG_ALIGN(cp->cmsg_len);
1c79356b 1523 }
39236c6e 1524
1c79356b
A
1525 m = m->m_next;
1526 }
1527 mp->msg_controllen = ctlbuf - mp->msg_control;
1528 }
1529out:
1530 if (fromsa)
1531 FREE(fromsa, M_SONAME);
1532 if (control)
1533 m_freem(control);
2d21ac55 1534 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636
A
1535out1:
1536 fp_drop(p, s, fp, 0);
1c79356b
A
1537 return (error);
1538}
1539
2d21ac55
A
1540/*
1541 * Returns: 0 Success
1542 * ENOMEM
1543 * copyin:EFAULT
1544 * recvit:???
1545 * read:??? [4056224: applicable for pipes]
1546 *
1547 * Notes: The read entry point is only called as part of support for
1548 * binary backward compatability; new code should use read
1549 * instead of recv or recvfrom when attempting to read data
1550 * from pipes.
1551 *
1552 * For full documentation of the return codes from recvit, see
1553 * the block header for the recvit function.
1554 */
1555int
b0d623f7 1556recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2d21ac55
A
1557{
1558 __pthread_testcancel(1);
1559 return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval));
1560}
1561
1c79356b 1562int
b0d623f7 1563recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval)
1c79356b 1564{
91447636 1565 struct user_msghdr msg;
1c79356b 1566 int error;
91447636 1567 uio_t auio = NULL;
1c79356b 1568
2d21ac55 1569 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1570 AUDIT_ARG(fd, uap->s);
1c79356b
A
1571
1572 if (uap->fromlenaddr) {
91447636 1573 error = copyin(uap->fromlenaddr,
1c79356b
A
1574 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1575 if (error)
1576 return (error);
2d21ac55 1577 } else {
1c79356b 1578 msg.msg_namelen = 0;
2d21ac55 1579 }
1c79356b 1580 msg.msg_name = uap->from;
91447636 1581 auio = uio_create(1, 0,
2d21ac55
A
1582 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1583 UIO_READ);
91447636
A
1584 if (auio == NULL) {
1585 return (ENOMEM);
1586 }
2d21ac55 1587
91447636
A
1588 uio_addiov(auio, uap->buf, uap->len);
1589 /* no need to set up msg_iov. recvit uses uio_t we send it */
1590 msg.msg_iov = 0;
1591 msg.msg_iovlen = 0;
1c79356b 1592 msg.msg_control = 0;
91447636 1593 msg.msg_controllen = 0;
1c79356b 1594 msg.msg_flags = uap->flags;
91447636
A
1595 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1596 if (auio != NULL) {
1597 uio_free(auio);
1598 }
2d21ac55 1599
2d21ac55 1600 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b 1601
2d21ac55 1602 return (error);
1c79356b
A
1603}
1604
1605/*
2d21ac55
A
1606 * Returns: 0 Success
1607 * EMSGSIZE
1608 * ENOMEM
1609 * copyin:EFAULT
1610 * copyout:EFAULT
1611 * recvit:???
1612 *
1613 * Notes: For full documentation of the return codes from recvit, see
1614 * the block header for the recvit function.
1c79356b
A
1615 */
1616int
b0d623f7 1617recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1c79356b 1618{
2d21ac55
A
1619 __pthread_testcancel(1);
1620 return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval));
1c79356b 1621}
1c79356b
A
1622
1623int
b0d623f7 1624recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval)
1c79356b 1625{
b0d623f7
A
1626 struct user32_msghdr msg32;
1627 struct user64_msghdr msg64;
91447636
A
1628 struct user_msghdr user_msg;
1629 caddr_t msghdrp;
1630 int size_of_msghdr;
1631 user_addr_t uiov;
2d21ac55 1632 int error;
91447636
A
1633 uio_t auio = NULL;
1634 struct user_iovec *iovp;
1c79356b 1635
2d21ac55 1636 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1637 AUDIT_ARG(fd, uap->s);
91447636 1638 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
1639 msghdrp = (caddr_t)&msg64;
1640 size_of_msghdr = sizeof (msg64);
2d21ac55 1641 } else {
b0d623f7
A
1642 msghdrp = (caddr_t)&msg32;
1643 size_of_msghdr = sizeof (msg32);
91447636
A
1644 }
1645 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1646 if (error) {
1647 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
1648 return (error);
1649 }
1650
91447636 1651 /* only need to copy if user process is not 64-bit */
b0d623f7
A
1652 if (IS_64BIT_PROCESS(p)) {
1653 user_msg.msg_flags = msg64.msg_flags;
1654 user_msg.msg_controllen = msg64.msg_controllen;
1655 user_msg.msg_control = msg64.msg_control;
1656 user_msg.msg_iovlen = msg64.msg_iovlen;
1657 user_msg.msg_iov = msg64.msg_iov;
1658 user_msg.msg_namelen = msg64.msg_namelen;
1659 user_msg.msg_name = msg64.msg_name;
1660 } else {
1661 user_msg.msg_flags = msg32.msg_flags;
1662 user_msg.msg_controllen = msg32.msg_controllen;
1663 user_msg.msg_control = msg32.msg_control;
1664 user_msg.msg_iovlen = msg32.msg_iovlen;
1665 user_msg.msg_iov = msg32.msg_iov;
1666 user_msg.msg_namelen = msg32.msg_namelen;
1667 user_msg.msg_name = msg32.msg_name;
91447636
A
1668 }
1669
1670 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1671 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
1672 0, 0, 0, 0);
91447636
A
1673 return (EMSGSIZE);
1674 }
1675
91447636 1676 user_msg.msg_flags = uap->flags;
91447636
A
1677
1678 /* allocate a uio large enough to hold the number of iovecs passed */
1679 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1680 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1681 UIO_READ);
91447636
A
1682 if (auio == NULL) {
1683 error = ENOMEM;
1684 goto done;
1685 }
1686
2d21ac55
A
1687 /*
1688 * get location of iovecs within the uio. then copyin the iovecs from
91447636
A
1689 * user space.
1690 */
1691 iovp = uio_iovsaddr(auio);
1692 if (iovp == NULL) {
1693 error = ENOMEM;
1694 goto done;
1695 }
1696 uiov = user_msg.msg_iov;
1697 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
b0d623f7
A
1698 error = copyin_user_iovec_array(uiov,
1699 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1700 user_msg.msg_iovlen, iovp);
1c79356b
A
1701 if (error)
1702 goto done;
91447636 1703
2d21ac55 1704 /* finish setup of uio_t */
39236c6e
A
1705 error = uio_calculateresid(auio);
1706 if (error) {
1707 goto done;
1708 }
2d21ac55 1709
91447636 1710 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1c79356b 1711 if (!error) {
91447636 1712 user_msg.msg_iov = uiov;
b0d623f7
A
1713 if (IS_64BIT_PROCESS(p)) {
1714 msg64.msg_flags = user_msg.msg_flags;
1715 msg64.msg_controllen = user_msg.msg_controllen;
1716 msg64.msg_control = user_msg.msg_control;
1717 msg64.msg_iovlen = user_msg.msg_iovlen;
1718 msg64.msg_iov = user_msg.msg_iov;
1719 msg64.msg_namelen = user_msg.msg_namelen;
1720 msg64.msg_name = user_msg.msg_name;
1721 } else {
1722 msg32.msg_flags = user_msg.msg_flags;
1723 msg32.msg_controllen = user_msg.msg_controllen;
1724 msg32.msg_control = user_msg.msg_control;
1725 msg32.msg_iovlen = user_msg.msg_iovlen;
1726 msg32.msg_iov = user_msg.msg_iov;
1727 msg32.msg_namelen = user_msg.msg_namelen;
1728 msg32.msg_name = user_msg.msg_name;
91447636
A
1729 }
1730 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1c79356b
A
1731 }
1732done:
91447636
A
1733 if (auio != NULL) {
1734 uio_free(auio);
1735 }
2d21ac55 1736 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
1737 return (error);
1738}
1739
2d21ac55
A
1740/*
1741 * Returns: 0 Success
1742 * EBADF
1743 * file_socket:ENOTSOCK
1744 * file_socket:EBADF
1745 * soshutdown:EINVAL
1746 * soshutdown:ENOTCONN
1747 * soshutdown:EADDRNOTAVAIL[TCP]
1748 * soshutdown:ENOBUFS[TCP]
1749 * soshutdown:EMSGSIZE[TCP]
1750 * soshutdown:EHOSTUNREACH[TCP]
1751 * soshutdown:ENETUNREACH[TCP]
1752 * soshutdown:ENETDOWN[TCP]
1753 * soshutdown:ENOMEM[TCP]
1754 * soshutdown:EACCES[TCP]
1755 * soshutdown:EMSGSIZE[TCP]
1756 * soshutdown:ENOBUFS[TCP]
1757 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1758 * soshutdown:??? [other protocol families]
1759 */
1c79356b
A
1760/* ARGSUSED */
1761int
2d21ac55 1762shutdown(__unused struct proc *p, struct shutdown_args *uap,
b0d623f7 1763 __unused int32_t *retval)
1c79356b 1764{
2d21ac55 1765 struct socket *so;
1c79356b
A
1766 int error;
1767
55e303ae 1768 AUDIT_ARG(fd, uap->s);
91447636 1769 error = file_socket(uap->s, &so);
1c79356b
A
1770 if (error)
1771 return (error);
91447636
A
1772 if (so == NULL) {
1773 error = EBADF;
1774 goto out;
1775 }
1776 error = soshutdown((struct socket *)so, uap->how);
1777out:
1778 file_drop(uap->s);
2d21ac55 1779 return (error);
1c79356b
A
1780}
1781
2d21ac55
A
1782/*
1783 * Returns: 0 Success
1784 * EFAULT
1785 * EINVAL
1786 * EACCES Mandatory Access Control failure
1787 * file_socket:ENOTSOCK
1788 * file_socket:EBADF
1789 * sosetopt:EINVAL
1790 * sosetopt:ENOPROTOOPT
1791 * sosetopt:ENOBUFS
1792 * sosetopt:EDOM
1793 * sosetopt:EFAULT
1794 * sosetopt:EOPNOTSUPP[AF_UNIX]
1795 * sosetopt:???
1796 */
1c79356b
A
1797/* ARGSUSED */
1798int
2d21ac55 1799setsockopt(struct proc *p, struct setsockopt_args *uap,
b0d623f7 1800 __unused int32_t *retval)
1c79356b 1801{
2d21ac55 1802 struct socket *so;
1c79356b
A
1803 struct sockopt sopt;
1804 int error;
1805
55e303ae 1806 AUDIT_ARG(fd, uap->s);
1c79356b
A
1807 if (uap->val == 0 && uap->valsize != 0)
1808 return (EFAULT);
2d21ac55 1809 /* No bounds checking on size (it's unsigned) */
1c79356b 1810
91447636 1811 error = file_socket(uap->s, &so);
1c79356b
A
1812 if (error)
1813 return (error);
1814
1815 sopt.sopt_dir = SOPT_SET;
1816 sopt.sopt_level = uap->level;
1817 sopt.sopt_name = uap->name;
1818 sopt.sopt_val = uap->val;
1819 sopt.sopt_valsize = uap->valsize;
1820 sopt.sopt_p = p;
1821
91447636
A
1822 if (so == NULL) {
1823 error = EINVAL;
1824 goto out;
1825 }
2d21ac55
A
1826#if CONFIG_MACF_SOCKET_SUBSET
1827 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
1828 &sopt)) != 0)
1829 goto out;
1830#endif /* MAC_SOCKET_SUBSET */
39236c6e 1831 error = sosetoptlock(so, &sopt, 1); /* will lock socket */
91447636
A
1832out:
1833 file_drop(uap->s);
2d21ac55 1834 return (error);
1c79356b
A
1835}
1836
1837
1838
2d21ac55
A
1839/*
1840 * Returns: 0 Success
1841 * EINVAL
1842 * EBADF
1843 * EACCES Mandatory Access Control failure
1844 * copyin:EFAULT
1845 * copyout:EFAULT
1846 * file_socket:ENOTSOCK
1847 * file_socket:EBADF
1848 * sogetopt:???
1849 */
1c79356b 1850int
2d21ac55 1851getsockopt(struct proc *p, struct getsockopt_args *uap,
b0d623f7 1852 __unused int32_t *retval)
1c79356b 1853{
91447636
A
1854 int error;
1855 socklen_t valsize;
1856 struct sockopt sopt;
2d21ac55 1857 struct socket *so;
1c79356b 1858
91447636 1859 error = file_socket(uap->s, &so);
1c79356b
A
1860 if (error)
1861 return (error);
1862 if (uap->val) {
2d21ac55
A
1863 error = copyin(uap->avalsize, (caddr_t)&valsize,
1864 sizeof (valsize));
1c79356b 1865 if (error)
91447636 1866 goto out;
2d21ac55
A
1867 /* No bounds checking on size (it's unsigned) */
1868 } else {
1c79356b 1869 valsize = 0;
2d21ac55 1870 }
1c79356b
A
1871 sopt.sopt_dir = SOPT_GET;
1872 sopt.sopt_level = uap->level;
1873 sopt.sopt_name = uap->name;
1874 sopt.sopt_val = uap->val;
1875 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1876 sopt.sopt_p = p;
1877
91447636
A
1878 if (so == NULL) {
1879 error = EBADF;
1880 goto out;
1881 }
2d21ac55
A
1882#if CONFIG_MACF_SOCKET_SUBSET
1883 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
1884 &sopt)) != 0)
1885 goto out;
1886#endif /* MAC_SOCKET_SUBSET */
39236c6e 1887 error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
1c79356b
A
1888 if (error == 0) {
1889 valsize = sopt.sopt_valsize;
2d21ac55
A
1890 error = copyout((caddr_t)&valsize, uap->avalsize,
1891 sizeof (valsize));
1c79356b 1892 }
91447636
A
1893out:
1894 file_drop(uap->s);
1c79356b
A
1895 return (error);
1896}
1897
1898
1899/*
1900 * Get socket name.
2d21ac55
A
1901 *
1902 * Returns: 0 Success
1903 * EBADF
1904 * file_socket:ENOTSOCK
1905 * file_socket:EBADF
1906 * copyin:EFAULT
1907 * copyout:EFAULT
1908 * <pru_sockaddr>:ENOBUFS[TCP]
1909 * <pru_sockaddr>:ECONNRESET[TCP]
1910 * <pru_sockaddr>:EINVAL[AF_UNIX]
1911 * <sf_getsockname>:???
1c79356b
A
1912 */
1913/* ARGSUSED */
2d21ac55
A
1914int
1915getsockname(__unused struct proc *p, struct getsockname_args *uap,
b0d623f7 1916 __unused int32_t *retval)
1c79356b 1917{
91447636 1918 struct socket *so;
1c79356b 1919 struct sockaddr *sa;
91447636 1920 socklen_t len;
2d21ac55 1921 socklen_t sa_len;
1c79356b
A
1922 int error;
1923
91447636 1924 error = file_socket(uap->fdes, &so);
1c79356b
A
1925 if (error)
1926 return (error);
2d21ac55 1927 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1c79356b 1928 if (error)
91447636
A
1929 goto out;
1930 if (so == NULL) {
1931 error = EBADF;
1932 goto out;
1933 }
1c79356b 1934 sa = 0;
91447636 1935 socket_lock(so, 1);
1c79356b 1936 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2d21ac55 1937 if (error == 0) {
6d2010ae 1938 error = sflt_getsockname(so, &sa);
91447636
A
1939 if (error == EJUSTRETURN)
1940 error = 0;
91447636
A
1941 }
1942 socket_unlock(so, 1);
1c79356b
A
1943 if (error)
1944 goto bad;
1945 if (sa == 0) {
1946 len = 0;
1947 goto gotnothing;
1948 }
1949
2d21ac55
A
1950 sa_len = sa->sa_len;
1951 len = MIN(len, sa_len);
91447636 1952 error = copyout((caddr_t)sa, uap->asa, len);
2d21ac55
A
1953 if (error)
1954 goto bad;
1955 /* return the actual, untruncated address length */
1956 len = sa_len;
1c79356b 1957gotnothing:
2d21ac55 1958 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
1959bad:
1960 if (sa)
1961 FREE(sa, M_SONAME);
91447636
A
1962out:
1963 file_drop(uap->fdes);
1c79356b
A
1964 return (error);
1965}
1966
1c79356b
A
1967/*
1968 * Get name of peer for connected socket.
2d21ac55
A
1969 *
1970 * Returns: 0 Success
1971 * EBADF
1972 * EINVAL
1973 * ENOTCONN
1974 * file_socket:ENOTSOCK
1975 * file_socket:EBADF
1976 * copyin:EFAULT
1977 * copyout:EFAULT
1978 * <pru_peeraddr>:???
1979 * <sf_getpeername>:???
1c79356b
A
1980 */
1981/* ARGSUSED */
1982int
2d21ac55 1983getpeername(__unused struct proc *p, struct getpeername_args *uap,
b0d623f7 1984 __unused int32_t *retval)
1c79356b 1985{
91447636 1986 struct socket *so;
1c79356b 1987 struct sockaddr *sa;
91447636 1988 socklen_t len;
2d21ac55 1989 socklen_t sa_len;
1c79356b
A
1990 int error;
1991
91447636 1992 error = file_socket(uap->fdes, &so);
1c79356b
A
1993 if (error)
1994 return (error);
91447636
A
1995 if (so == NULL) {
1996 error = EBADF;
1997 goto out;
1998 }
1999
2000 socket_lock(so, 1);
2001
2d21ac55
A
2002 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
2003 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
2004 /* the socket has been shutdown, no more getpeername's */
2005 socket_unlock(so, 1);
2006 error = EINVAL;
2007 goto out;
2008 }
2009
91447636
A
2010 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
2011 socket_unlock(so, 1);
2012 error = ENOTCONN;
2013 goto out;
2014 }
2d21ac55 2015 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
91447636
A
2016 if (error) {
2017 socket_unlock(so, 1);
2018 goto out;
2019 }
1c79356b
A
2020 sa = 0;
2021 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2d21ac55 2022 if (error == 0) {
6d2010ae 2023 error = sflt_getpeername(so, &sa);
91447636
A
2024 if (error == EJUSTRETURN)
2025 error = 0;
91447636
A
2026 }
2027 socket_unlock(so, 1);
1c79356b
A
2028 if (error)
2029 goto bad;
2030 if (sa == 0) {
2031 len = 0;
2032 goto gotnothing;
2033 }
2d21ac55
A
2034 sa_len = sa->sa_len;
2035 len = MIN(len, sa_len);
91447636 2036 error = copyout(sa, uap->asa, len);
1c79356b
A
2037 if (error)
2038 goto bad;
2d21ac55
A
2039 /* return the actual, untruncated address length */
2040 len = sa_len;
1c79356b 2041gotnothing:
2d21ac55 2042 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
2043bad:
2044 if (sa) FREE(sa, M_SONAME);
91447636
A
2045out:
2046 file_drop(uap->fdes);
1c79356b
A
2047 return (error);
2048}
2049
2050int
2d21ac55 2051sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1c79356b 2052{
2d21ac55
A
2053 struct sockaddr *sa;
2054 struct mbuf *m;
1c79356b
A
2055 int error;
2056
e2d2fc5c 2057 size_t alloc_buflen = (size_t)buflen;
39236c6e 2058
e2d2fc5c
A
2059 if(alloc_buflen > INT_MAX/2)
2060 return (EINVAL);
b0d623f7
A
2061#ifdef __LP64__
2062 /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */
2063 if(type == MT_CONTROL)
2064 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr);
2065#endif
e2d2fc5c
A
2066 if (alloc_buflen > MLEN) {
2067 if (type == MT_SONAME && alloc_buflen <= 112)
b0d623f7 2068 alloc_buflen = MLEN; /* unix domain compat. hack */
e2d2fc5c 2069 else if (alloc_buflen > MCLBYTES)
91447636 2070 return (EINVAL);
1c79356b
A
2071 }
2072 m = m_get(M_WAIT, type);
2073 if (m == NULL)
2074 return (ENOBUFS);
e2d2fc5c 2075 if (alloc_buflen > MLEN) {
91447636
A
2076 MCLGET(m, M_WAIT);
2077 if ((m->m_flags & M_EXT) == 0) {
2078 m_free(m);
2d21ac55 2079 return (ENOBUFS);
91447636
A
2080 }
2081 }
b0d623f7
A
2082 /* K64: We still copyin the original buflen because it gets expanded later
2083 * and we lie about the size of the mbuf because it only affects unp_* functions
2084 */
1c79356b 2085 m->m_len = buflen;
91447636 2086 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2d21ac55 2087 if (error) {
1c79356b 2088 (void) m_free(m);
2d21ac55 2089 } else {
1c79356b
A
2090 *mp = m;
2091 if (type == MT_SONAME) {
2092 sa = mtod(m, struct sockaddr *);
1c79356b
A
2093 sa->sa_len = buflen;
2094 }
2095 }
2096 return (error);
2097}
2098
91447636
A
2099/*
2100 * Given a user_addr_t of length len, allocate and fill out a *sa.
2d21ac55
A
2101 *
2102 * Returns: 0 Success
2103 * ENAMETOOLONG Filename too long
2104 * EINVAL Invalid argument
2105 * ENOMEM Not enough space
2106 * copyin:EFAULT Bad address
91447636 2107 */
2d21ac55
A
2108static int
2109getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
4a3eedf9 2110 size_t len, boolean_t translate_unspec)
1c79356b
A
2111{
2112 struct sockaddr *sa;
2113 int error;
2114
2115 if (len > SOCK_MAXADDRLEN)
2d21ac55 2116 return (ENAMETOOLONG);
1c79356b 2117
2d21ac55
A
2118 if (len < offsetof(struct sockaddr, sa_data[0]))
2119 return (EINVAL);
1c79356b 2120
2d21ac55 2121 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
91447636 2122 if (sa == NULL) {
2d21ac55 2123 return (ENOMEM);
91447636
A
2124 }
2125 error = copyin(uaddr, (caddr_t)sa, len);
1c79356b
A
2126 if (error) {
2127 FREE(sa, M_SONAME);
2128 } else {
2d21ac55
A
2129 /*
2130 * Force sa_family to AF_INET on AF_INET sockets to handle
2131 * legacy applications that use AF_UNSPEC (0). On all other
2132 * sockets we leave it unchanged and let the lower layer
2133 * handle it.
2134 */
4a3eedf9 2135 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
39236c6e 2136 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2137 len == sizeof (struct sockaddr_in))
2138 sa->sa_family = AF_INET;
2139
1c79356b
A
2140 sa->sa_len = len;
2141 *namp = sa;
2142 }
2d21ac55 2143 return (error);
1c79356b
A
2144}
2145
2d21ac55
A
2146static int
2147getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
4a3eedf9 2148 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1c79356b 2149{
2d21ac55
A
2150 int error;
2151
2152 if (ss == NULL || uaddr == USER_ADDR_NULL ||
2153 len < offsetof(struct sockaddr, sa_data[0]))
2154 return (EINVAL);
2155
2156 /*
2157 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2158 * so the check here is inclusive.
2159 */
2160 if (len > sizeof (*ss))
2161 return (ENAMETOOLONG);
1c79356b 2162
2d21ac55
A
2163 bzero(ss, sizeof (*ss));
2164 error = copyin(uaddr, (caddr_t)ss, len);
2165 if (error == 0) {
2166 /*
2167 * Force sa_family to AF_INET on AF_INET sockets to handle
2168 * legacy applications that use AF_UNSPEC (0). On all other
2169 * sockets we leave it unchanged and let the lower layer
2170 * handle it.
2171 */
4a3eedf9 2172 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
39236c6e 2173 SOCK_CHECK_DOM(so, PF_INET) &&
2d21ac55
A
2174 len == sizeof (struct sockaddr_in))
2175 ss->ss_family = AF_INET;
91447636 2176
2d21ac55 2177 ss->ss_len = len;
1c79356b 2178 }
2d21ac55 2179 return (error);
1c79356b
A
2180}
2181
39236c6e
A
2182/*
2183 * Hard limit on the number of source and/or destination addresses
2184 * that can be specified by an application.
2185 */
2186#define SOCKADDRLIST_MAX_ENTRIES 64
2187
2188static int
2189getsockaddrlist(struct socket *so, struct sockaddr_list **slp,
2190 user_addr_t uaddr, socklen_t uaddrlen, boolean_t xlate_unspec)
2191{
2192 struct sockaddr_list *sl;
2193 int error = 0;
2d21ac55 2194
39236c6e
A
2195 *slp = NULL;
2196
2197 if (uaddr == USER_ADDR_NULL || uaddrlen == 0)
2198 return (EINVAL);
2199
2200 sl = sockaddrlist_alloc(M_WAITOK);
2201 if (sl == NULL)
2202 return (ENOMEM);
2203
2204 VERIFY(sl->sl_cnt == 0);
2205 while (uaddrlen > 0 && sl->sl_cnt < SOCKADDRLIST_MAX_ENTRIES) {
2206 struct sockaddr_storage ss;
2207 struct sockaddr_entry *se;
2208 struct sockaddr *sa;
2209
2210 if (uaddrlen < sizeof (struct sockaddr)) {
2211 error = EINVAL;
2212 break;
2213 }
2214
2215 bzero(&ss, sizeof (ss));
2216 error = copyin(uaddr, (caddr_t)&ss, sizeof (struct sockaddr));
2217 if (error != 0)
2218 break;
2219
2220 /* getsockaddr does the same but we need them now */
2221 if (uaddrlen < ss.ss_len ||
2222 ss.ss_len < offsetof(struct sockaddr, sa_data[0])) {
2223 error = EINVAL;
2224 break;
2225 } else if (ss.ss_len > sizeof (ss)) {
2226 /*
2227 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2228 * so the check here is inclusive. We could user the
2229 * latter instead, but seems like an overkill for now.
2230 */
2231 error = ENAMETOOLONG;
2232 break;
2233 }
2234
2235 se = sockaddrentry_alloc(M_WAITOK);
2236 if (se == NULL)
2237 break;
2238
2239 sockaddrlist_insert(sl, se);
2240
2241 error = getsockaddr(so, &sa, uaddr, ss.ss_len, xlate_unspec);
2242 if (error != 0)
2243 break;
2244
2245 VERIFY(sa != NULL && sa->sa_len == ss.ss_len);
2246 se->se_addr = sa;
2247
2248 uaddr += ss.ss_len;
2249 VERIFY(((signed)uaddrlen - ss.ss_len) >= 0);
2250 uaddrlen -= ss.ss_len;
2251 }
2252
2253 if (error != 0)
2254 sockaddrlist_free(sl);
2255 else
2256 *slp = sl;
2257
2258 return (error);
2259}
2260
2261#if SENDFILE
2d21ac55
A
2262
2263#define SFUIOBUFS 64
2d21ac55
A
2264
2265/* Macros to compute the number of mbufs needed depending on cluster size */
2266#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
2267#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
2268
39236c6e
A
2269/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
2270#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
2d21ac55
A
2271
2272/* Upper send limit in the number of mbuf clusters */
2273#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
2274#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
2275
2276size_t mbuf_pkt_maxlen(mbuf_t m);
2277
2278__private_extern__ size_t
2279mbuf_pkt_maxlen(mbuf_t m)
1c79356b 2280{
2d21ac55 2281 size_t maxlen = 0;
1c79356b 2282
2d21ac55
A
2283 while (m) {
2284 maxlen += mbuf_maxlen(m);
2285 m = mbuf_next(m);
1c79356b 2286 }
2d21ac55 2287 return (maxlen);
1c79356b
A
2288}
2289
1c79356b 2290static void
2d21ac55
A
2291alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
2292 struct mbuf **m, boolean_t jumbocl)
1c79356b 2293{
2d21ac55 2294 unsigned int needed;
1c79356b 2295
2d21ac55
A
2296 if (pktlen == 0)
2297 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
1c79356b 2298
2d21ac55
A
2299 /*
2300 * Try to allocate for the whole thing. Since we want full control
2301 * over the buffer size and be able to accept partial result, we can't
2302 * use mbuf_allocpacket(). The logic below is similar to sosend().
2303 */
2304 *m = NULL;
6d2010ae 2305 if (pktlen > MBIGCLBYTES && jumbocl) {
2d21ac55
A
2306 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
2307 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
2308 }
2309 if (*m == NULL) {
2310 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
6d2010ae 2311 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
2d21ac55
A
2312 }
2313
2314 /*
2315 * Our previous attempt(s) at allocation had failed; the system
2316 * may be short on mbufs, and we want to block until they are
2317 * available. This time, ask just for 1 mbuf and don't return
2318 * until we get it.
2319 */
2320 if (*m == NULL) {
2321 needed = 1;
6d2010ae 2322 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
1c79356b 2323 }
2d21ac55
A
2324 if (*m == NULL)
2325 panic("%s: blocking allocation returned NULL\n", __func__);
2326
2327 *maxchunks = needed;
1c79356b
A
2328}
2329
2330/*
2331 * sendfile(2).
2d21ac55
A
2332 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
2333 * struct sf_hdtr *hdtr, int flags)
1c79356b
A
2334 *
2335 * Send a file specified by 'fd' and starting at 'offset' to a socket
2d21ac55
A
2336 * specified by 's'. Send only '*nbytes' of the file or until EOF if
2337 * *nbytes == 0. Optionally add a header and/or trailer to the socket
2338 * output. If specified, write the total number of bytes sent into *nbytes.
1c79356b
A
2339 */
2340int
2d21ac55 2341sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
1c79356b 2342{
91447636 2343 struct fileproc *fp;
1c79356b 2344 struct vnode *vp;
1c79356b 2345 struct socket *so;
2d21ac55
A
2346 struct writev_nocancel_args nuap;
2347 user_ssize_t writev_retval;
2d21ac55 2348 struct user_sf_hdtr user_hdtr;
b0d623f7
A
2349 struct user32_sf_hdtr user32_hdtr;
2350 struct user64_sf_hdtr user64_hdtr;
2d21ac55
A
2351 off_t off, xfsize;
2352 off_t nbytes = 0, sbytes = 0;
2353 int error = 0;
2354 size_t sizeof_hdtr;
2d21ac55
A
2355 off_t file_size;
2356 struct vfs_context context = *vfs_context_current();
316670eb
A
2357#define ENXIO_10146739_DBG(err_str) { \
2358 if (error == ENXIO) { \
2359 printf(err_str, \
2360 __func__, \
2361 "File a radar related to rdar://10146739 \n"); \
2362 } \
2363}
2d21ac55
A
2364 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
2365 0, 0, 0, 0);
b0d623f7
A
2366
2367 AUDIT_ARG(fd, uap->fd);
2368 AUDIT_ARG(value32, uap->s);
2369
1c79356b
A
2370 /*
2371 * Do argument checking. Must be a regular file in, stream
2372 * type and connected socket out, positive offset.
2373 */
2d21ac55 2374 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
316670eb 2375 ENXIO_10146739_DBG("%s: fp_getfvp error. %s");
1c79356b 2376 goto done;
2d21ac55
A
2377 }
2378 if ((fp->f_flag & FREAD) == 0) {
91447636
A
2379 error = EBADF;
2380 goto done1;
1c79356b 2381 }
2d21ac55
A
2382 if (vnode_isreg(vp) == 0) {
2383 error = ENOTSUP;
91447636 2384 goto done1;
1c79356b 2385 }
91447636 2386 error = file_socket(uap->s, &so);
2d21ac55 2387 if (error) {
316670eb 2388 ENXIO_10146739_DBG("%s: file_socket error. %s");
91447636 2389 goto done1;
2d21ac55 2390 }
55e303ae
A
2391 if (so == NULL) {
2392 error = EBADF;
91447636 2393 goto done2;
55e303ae 2394 }
1c79356b
A
2395 if (so->so_type != SOCK_STREAM) {
2396 error = EINVAL;
2d21ac55 2397 goto done2;
1c79356b
A
2398 }
2399 if ((so->so_state & SS_ISCONNECTED) == 0) {
2400 error = ENOTCONN;
2d21ac55 2401 goto done2;
1c79356b
A
2402 }
2403 if (uap->offset < 0) {
2404 error = EINVAL;
2d21ac55 2405 goto done2;
1c79356b 2406 }
2d21ac55
A
2407 if (uap->nbytes == USER_ADDR_NULL) {
2408 error = EINVAL;
2409 goto done2;
2410 }
2411 if (uap->flags != 0) {
2412 error = EINVAL;
2413 goto done2;
2414 }
2415
2416 context.vc_ucred = fp->f_fglob->fg_cred;
2417
2418#if CONFIG_MACF_SOCKET_SUBSET
2419 /* JMM - fetch connected sockaddr? */
2420 error = mac_socket_check_send(context.vc_ucred, so, NULL);
2421 if (error)
2422 goto done2;
2423#endif
2424
2425 /*
2426 * Get number of bytes to send
2427 * Should it applies to size of header and trailer?
2428 * JMM - error handling?
2429 */
2430 copyin(uap->nbytes, &nbytes, sizeof (off_t));
1c79356b
A
2431
2432 /*
2433 * If specified, get the pointer to the sf_hdtr struct for
2434 * any headers/trailers.
2435 */
2d21ac55
A
2436 if (uap->hdtr != USER_ADDR_NULL) {
2437 caddr_t hdtrp;
2438
2439 bzero(&user_hdtr, sizeof (user_hdtr));
2440 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
2441 hdtrp = (caddr_t)&user64_hdtr;
2442 sizeof_hdtr = sizeof (user64_hdtr);
2d21ac55 2443 } else {
b0d623f7
A
2444 hdtrp = (caddr_t)&user32_hdtr;
2445 sizeof_hdtr = sizeof (user32_hdtr);
2d21ac55
A
2446 }
2447 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
1c79356b 2448 if (error)
2d21ac55 2449 goto done2;
b0d623f7
A
2450 if (IS_64BIT_PROCESS(p)) {
2451 user_hdtr.headers = user64_hdtr.headers;
2452 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
2453 user_hdtr.trailers = user64_hdtr.trailers;
2454 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
2455 } else {
2456 user_hdtr.headers = user32_hdtr.headers;
2457 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
2458 user_hdtr.trailers = user32_hdtr.trailers;
2459 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2d21ac55
A
2460 }
2461
1c79356b
A
2462 /*
2463 * Send any headers. Wimp out and use writev(2).
2464 */
2d21ac55
A
2465 if (user_hdtr.headers != USER_ADDR_NULL) {
2466 bzero(&nuap, sizeof (struct writev_args));
1c79356b 2467 nuap.fd = uap->s;
2d21ac55
A
2468 nuap.iovp = user_hdtr.headers;
2469 nuap.iovcnt = user_hdtr.hdr_cnt;
2470 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb
A
2471 if (error) {
2472 ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
2d21ac55 2473 goto done2;
316670eb 2474 }
2d21ac55 2475 sbytes += writev_retval;
1c79356b
A
2476 }
2477 }
2478
2479 /*
2d21ac55
A
2480 * Get the file size for 2 reasons:
2481 * 1. We don't want to allocate more mbufs than necessary
2482 * 2. We don't want to read past the end of file
1c79356b 2483 */
316670eb
A
2484 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
2485 ENXIO_10146739_DBG("%s: vnode_size error. %s");
2d21ac55 2486 goto done2;
316670eb 2487 }
1c79356b
A
2488
2489 /*
2d21ac55
A
2490 * Simply read file data into a chain of mbufs that used with scatter
2491 * gather reads. We're not (yet?) setup to use zero copy external
2492 * mbufs that point to the file pages.
1c79356b 2493 */
2d21ac55 2494 socket_lock(so, 1);
39236c6e 2495 error = sblock(&so->so_snd, SBL_WAIT);
2d21ac55
A
2496 if (error) {
2497 socket_unlock(so, 1);
2498 goto done2;
2499 }
1c79356b 2500 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2d21ac55 2501 mbuf_t m0 = NULL, m;
39236c6e 2502 unsigned int nbufs = SFUIOBUFS, i;
2d21ac55 2503 uio_t auio;
39236c6e 2504 char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
2d21ac55
A
2505 size_t uiolen;
2506 user_ssize_t rlen;
2507 off_t pgoff;
2508 size_t pktlen;
2509 boolean_t jumbocl;
1c79356b 2510
1c79356b 2511 /*
2d21ac55
A
2512 * Calculate the amount to transfer.
2513 * Align to round number of pages.
2514 * Not to exceed send socket buffer,
1c79356b
A
2515 * the EOF, or the passed in nbytes.
2516 */
2d21ac55
A
2517 xfsize = sbspace(&so->so_snd);
2518
2519 if (xfsize <= 0) {
2520 if (so->so_state & SS_CANTSENDMORE) {
2521 error = EPIPE;
2522 goto done3;
2523 } else if ((so->so_state & SS_NBIO)) {
2524 error = EAGAIN;
2525 goto done3;
2526 } else {
2527 xfsize = PAGE_SIZE;
2528 }
2529 }
2530
2531 if (xfsize > SENDFILE_MAX_BYTES)
2532 xfsize = SENDFILE_MAX_BYTES;
2533 else if (xfsize > PAGE_SIZE)
2534 xfsize = trunc_page(xfsize);
2535 pgoff = off & PAGE_MASK_64;
2536 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
1c79356b 2537 xfsize = PAGE_SIZE_64 - pgoff;
2d21ac55
A
2538 if (nbytes && xfsize > (nbytes - sbytes))
2539 xfsize = nbytes - sbytes;
2540 if (xfsize <= 0)
2541 break;
2542 if (off + xfsize > file_size)
2543 xfsize = file_size - off;
1c79356b
A
2544 if (xfsize <= 0)
2545 break;
2d21ac55 2546
1c79356b 2547 /*
2d21ac55
A
2548 * Attempt to use larger than system page-size clusters for
2549 * large writes only if there is a jumbo cluster pool and
2550 * if the socket is marked accordingly.
1c79356b 2551 */
2d21ac55
A
2552 jumbocl = sosendjcl && njcl > 0 &&
2553 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
2554
2555 socket_unlock(so, 0);
2556 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
2557 pktlen = mbuf_pkt_maxlen(m0);
b0d623f7 2558 if (pktlen < (size_t)xfsize)
2d21ac55 2559 xfsize = pktlen;
39236c6e 2560
2d21ac55
A
2561 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
2562 UIO_READ, &uio_buf[0], sizeof (uio_buf));
2563 if (auio == NULL) {
316670eb
A
2564 printf("sendfile failed. nbufs = %d. %s", nbufs,
2565 "File a radar related to rdar://10146739.\n");
2d21ac55
A
2566 mbuf_freem(m0);
2567 error = ENXIO;
2568 socket_lock(so, 0);
2569 goto done3;
1c79356b 2570 }
1c79356b 2571
2d21ac55 2572 for (i = 0, m = m0, uiolen = 0;
b0d623f7 2573 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2d21ac55
A
2574 i++, m = mbuf_next(m)) {
2575 size_t mlen = mbuf_maxlen(m);
2576
b0d623f7 2577 if (mlen + uiolen > (size_t)xfsize)
2d21ac55
A
2578 mlen = xfsize - uiolen;
2579 mbuf_setlen(m, mlen);
2580 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
2581 mlen);
2582 uiolen += mlen;
2583 }
2584
2585 if (xfsize != uio_resid(auio))
2586 printf("sendfile: xfsize: %lld != uio_resid(auio): "
6d2010ae 2587 "%lld\n", xfsize, (long long)uio_resid(auio));
2d21ac55
A
2588
2589 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
2590 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2591 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2592 error = fo_read(fp, auio, FOF_OFFSET, &context);
2593 socket_lock(so, 0);
2594 if (error != 0) {
2595 if (uio_resid(auio) != xfsize && (error == ERESTART ||
2596 error == EINTR || error == EWOULDBLOCK)) {
2597 error = 0;
2598 } else {
316670eb 2599 ENXIO_10146739_DBG("%s: fo_read error. %s");
2d21ac55
A
2600 mbuf_freem(m0);
2601 goto done3;
1c79356b 2602 }
1c79356b 2603 }
2d21ac55
A
2604 xfsize -= uio_resid(auio);
2605 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
2606 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2607 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2608
2609 if (xfsize == 0) {
2610 //printf("sendfile: fo_read 0 bytes, EOF\n");
2611 break;
91447636 2612 }
2d21ac55
A
2613 if (xfsize + off > file_size)
2614 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
2615 "%lld\n", xfsize, off, file_size);
2616 for (i = 0, m = m0, rlen = 0;
2617 i < nbufs && m != NULL && rlen < xfsize;
2618 i++, m = mbuf_next(m)) {
2619 size_t mlen = mbuf_maxlen(m);
2620
b0d623f7 2621 if (rlen + mlen > (size_t)xfsize)
2d21ac55
A
2622 mlen = xfsize - rlen;
2623 mbuf_setlen(m, mlen);
2624
2625 rlen += mlen;
2626 }
2627 mbuf_pkthdr_setlen(m0, xfsize);
2628
1c79356b
A
2629retry_space:
2630 /*
2631 * Make sure that the socket is still able to take more data.
2632 * CANTSENDMORE being true usually means that the connection
2633 * was closed. so_error is true when an error was sensed after
2634 * a previous send.
2635 * The state is checked after the page mapping and buffer
2636 * allocation above since those operations may block and make
2637 * any socket checks stale. From this point forward, nothing
2638 * blocks before the pru_send (or more accurately, any blocking
2639 * results in a loop back to here to re-check).
2640 */
2641 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2642 if (so->so_state & SS_CANTSENDMORE) {
2643 error = EPIPE;
2644 } else {
2645 error = so->so_error;
2646 so->so_error = 0;
2647 }
2d21ac55 2648 m_freem(m0);
316670eb 2649 ENXIO_10146739_DBG("%s: Unexpected socket error. %s");
2d21ac55 2650 goto done3;
1c79356b
A
2651 }
2652 /*
2653 * Wait for socket space to become available. We do this just
2654 * after checking the connection state above in order to avoid
2655 * a race condition with sbwait().
2656 */
2d21ac55 2657 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
1c79356b 2658 if (so->so_state & SS_NBIO) {
2d21ac55 2659 m_freem(m0);
1c79356b 2660 error = EAGAIN;
2d21ac55 2661 goto done3;
1c79356b 2662 }
2d21ac55
A
2663 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
2664 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
1c79356b 2665 error = sbwait(&so->so_snd);
2d21ac55
A
2666 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
2667 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
1c79356b
A
2668 /*
2669 * An error from sbwait usually indicates that we've
2670 * been interrupted by a signal. If we've sent anything
2671 * then return bytes sent, otherwise return the error.
2672 */
2673 if (error) {
2d21ac55
A
2674 m_freem(m0);
2675 goto done3;
1c79356b
A
2676 }
2677 goto retry_space;
2678 }
39236c6e 2679
6d2010ae 2680 struct mbuf *control = NULL;
2d21ac55
A
2681 {
2682 /*
2683 * Socket filter processing
2684 */
2d21ac55 2685
6d2010ae
A
2686 error = sflt_data_out(so, NULL, &m0, &control, 0);
2687 if (error) {
2688 if (error == EJUSTRETURN) {
2689 error = 0;
2690 continue;
2d21ac55 2691 }
316670eb 2692 ENXIO_10146739_DBG("%s: sflt_data_out error. %s");
6d2010ae 2693 goto done3;
2d21ac55
A
2694 }
2695 /*
2696 * End Socket filter processing
2697 */
2698 }
2699 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2700 uap->s, 0, 0, 0, 0);
2701 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
6d2010ae 2702 0, control, p);
2d21ac55
A
2703 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2704 uap->s, 0, 0, 0, 0);
1c79356b 2705 if (error) {
316670eb 2706 ENXIO_10146739_DBG("%s: pru_send error. %s");
2d21ac55 2707 goto done3;
1c79356b
A
2708 }
2709 }
39236c6e 2710 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
1c79356b
A
2711 /*
2712 * Send trailers. Wimp out and use writev(2).
2713 */
2d21ac55
A
2714 if (uap->hdtr != USER_ADDR_NULL &&
2715 user_hdtr.trailers != USER_ADDR_NULL) {
2716 bzero(&nuap, sizeof (struct writev_args));
2717 nuap.fd = uap->s;
2718 nuap.iovp = user_hdtr.trailers;
2719 nuap.iovcnt = user_hdtr.trl_cnt;
2720 error = writev_nocancel(p, &nuap, &writev_retval);
316670eb
A
2721 if (error) {
2722 ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
2d21ac55 2723 goto done2;
316670eb 2724 }
2d21ac55 2725 sbytes += writev_retval;
1c79356b 2726 }
91447636
A
2727done2:
2728 file_drop(uap->s);
2729done1:
2730 file_drop(uap->fd);
1c79356b 2731done:
2d21ac55 2732 if (uap->nbytes != USER_ADDR_NULL) {
91447636 2733 /* XXX this appears bogus for some early failure conditions */
2d21ac55 2734 copyout(&sbytes, uap->nbytes, sizeof (off_t));
1c79356b 2735 }
2d21ac55
A
2736 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
2737 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
2738 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
1c79356b 2739 return (error);
91447636 2740done3:
39236c6e 2741 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
91447636 2742 goto done2;
1c79356b
A
2743}
2744
2d21ac55
A
2745
2746#endif /* SENDFILE */