]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_syscalls.c
xnu-1504.9.26.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
2d21ac55 33 * Copyright (c) 1998, David Greenman. All rights reserved.
1c79356b
A
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
2d21ac55
A
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
1c79356b
A
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
2d21ac55 77#include <sys/vnode_internal.h>
1c79356b
A
78#include <sys/malloc.h>
79#include <sys/mbuf.h>
91447636
A
80#include <kern/lock.h>
81#include <sys/domain.h>
1c79356b 82#include <sys/protosw.h>
91447636 83#include <sys/signalvar.h>
1c79356b
A
84#include <sys/socket.h>
85#include <sys/socketvar.h>
1c79356b 86#include <sys/kernel.h>
91447636 87#include <sys/uio_internal.h>
2d21ac55 88#include <sys/kauth.h>
e5568f75 89
b0d623f7 90#include <security/audit/audit.h>
1c79356b
A
91
92#include <sys/kdebug.h>
91447636 93#include <sys/sysproto.h>
2d21ac55
A
94#include <netinet/in.h>
95#include <net/route.h>
96#include <netinet/in_pcb.h>
97
98#if CONFIG_MACF_SOCKET_SUBSET
99#include <security/mac_framework.h>
100#endif /* MAC_SOCKET_SUBSET */
101
102#define f_flag f_fglob->fg_flag
103#define f_type f_fglob->fg_type
104#define f_msgcount f_fglob->fg_msgcount
105#define f_cred f_fglob->fg_cred
106#define f_ops f_fglob->fg_ops
107#define f_offset f_fglob->fg_offset
108#define f_data f_fglob->fg_data
109
110
111#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
112#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
113#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
114#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
115#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
116#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
117#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
118#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
119#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
120#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
121#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
122#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
123#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
124#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
125
126
127#define HACK_FOR_4056224 1
91447636
A
128#if HACK_FOR_4056224
129static pid_t last_pid_4056224 = 0;
130#endif /* HACK_FOR_4056224 */
1c79356b 131
2d21ac55
A
132/* TODO: should be in header file */
133int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
134
135static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int,
b0d623f7 136 int32_t *);
2d21ac55 137static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
b0d623f7 138 int32_t *);
2d21ac55 139static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
4a3eedf9 140 size_t, boolean_t);
2d21ac55 141static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
4a3eedf9 142 user_addr_t, size_t, boolean_t);
1c79356b 143#if SENDFILE
2d21ac55
A
144static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
145 boolean_t);
146#endif /* SENDFILE */
1c79356b
A
147
148/*
149 * System call interface to the socket abstraction.
150 */
1c79356b
A
151
152extern struct fileops socketops;
153
2d21ac55
A
154/*
155 * Returns: 0 Success
156 * EACCES Mandatory Access Control failure
157 * falloc:ENFILE
158 * falloc:EMFILE
159 * falloc:ENOMEM
160 * socreate:EAFNOSUPPORT
161 * socreate:EPROTOTYPE
162 * socreate:EPROTONOSUPPORT
163 * socreate:ENOBUFS
164 * socreate:ENOMEM
165 * socreate:EISCONN
166 * socreate:??? [other protocol families, IPSEC]
167 */
1c79356b 168int
b0d623f7 169socket(struct proc *p, struct socket_args *uap, int32_t *retval)
1c79356b 170{
1c79356b 171 struct socket *so;
91447636 172 struct fileproc *fp;
1c79356b
A
173 int fd, error;
174
55e303ae 175 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
2d21ac55
A
176#if CONFIG_MACF_SOCKET_SUBSET
177 if ((error = mac_socket_check_create(kauth_cred_get(), uap->domain,
178 uap->type, uap->protocol)) != 0)
179 return (error);
180#endif /* MAC_SOCKET_SUBSET */
1c79356b 181
2d21ac55 182 error = falloc(p, &fp, &fd, vfs_context_current());
91447636 183 if (error) {
1c79356b 184 return (error);
91447636 185 }
1c79356b
A
186 fp->f_flag = FREAD|FWRITE;
187 fp->f_type = DTYPE_SOCKET;
188 fp->f_ops = &socketops;
91447636
A
189
190 error = socreate(uap->domain, &so, uap->type, uap->protocol);
191 if (error) {
192 fp_free(p, fd, fp);
1c79356b
A
193 } else {
194 fp->f_data = (caddr_t)so;
91447636
A
195
196 proc_fdlock(p);
6601e61a 197 procfdtbl_releasefd(p, fd, NULL);
2d21ac55 198
91447636
A
199 fp_drop(p, fd, fp, 1);
200 proc_fdunlock(p);
201
1c79356b
A
202 *retval = fd;
203 }
204 return (error);
205}
206
2d21ac55
A
207/*
208 * Returns: 0 Success
209 * EDESTADDRREQ Destination address required
210 * EBADF Bad file descriptor
211 * EACCES Mandatory Access Control failure
212 * file_socket:ENOTSOCK
213 * file_socket:EBADF
214 * getsockaddr:ENAMETOOLONG Filename too long
215 * getsockaddr:EINVAL Invalid argument
216 * getsockaddr:ENOMEM Not enough space
217 * getsockaddr:EFAULT Bad address
218 * sobind:???
219 */
1c79356b
A
220/* ARGSUSED */
221int
b0d623f7 222bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
1c79356b 223{
2d21ac55
A
224 struct sockaddr_storage ss;
225 struct sockaddr *sa = NULL;
91447636 226 struct socket *so;
2d21ac55 227 boolean_t want_free = TRUE;
1c79356b
A
228 int error;
229
55e303ae 230 AUDIT_ARG(fd, uap->s);
91447636 231 error = file_socket(uap->s, &so);
2d21ac55 232 if (error != 0)
1c79356b 233 return (error);
2d21ac55
A
234 if (so == NULL) {
235 error = EBADF;
236 goto out;
237 }
238 if (uap->name == USER_ADDR_NULL) {
239 error = EDESTADDRREQ;
240 goto out;
241 }
242 if (uap->namelen > sizeof (ss)) {
4a3eedf9 243 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
2d21ac55 244 } else {
4a3eedf9 245 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
2d21ac55
A
246 if (error == 0) {
247 sa = (struct sockaddr *)&ss;
248 want_free = FALSE;
249 }
250 }
251 if (error != 0)
91447636 252 goto out;
2d21ac55
A
253 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
254#if CONFIG_MACF_SOCKET_SUBSET
255 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
91447636 256 error = sobind(so, sa);
2d21ac55
A
257#else
258 error = sobind(so, sa);
259#endif /* MAC_SOCKET_SUBSET */
260 if (want_free)
261 FREE(sa, M_SONAME);
91447636
A
262out:
263 file_drop(uap->s);
1c79356b
A
264 return (error);
265}
266
2d21ac55
A
267/*
268 * Returns: 0 Success
269 * EBADF
270 * EACCES Mandatory Access Control failure
271 * file_socket:ENOTSOCK
272 * file_socket:EBADF
273 * solisten:EINVAL
274 * solisten:EOPNOTSUPP
275 * solisten:???
276 */
1c79356b 277int
2d21ac55 278listen(__unused struct proc *p, struct listen_args *uap,
b0d623f7 279 __unused int32_t *retval)
1c79356b 280{
1c79356b 281 int error;
2d21ac55 282 struct socket *so;
1c79356b 283
55e303ae 284 AUDIT_ARG(fd, uap->s);
91447636 285 error = file_socket(uap->s, &so);
1c79356b
A
286 if (error)
287 return (error);
91447636 288 if (so != NULL)
2d21ac55
A
289#if CONFIG_MACF_SOCKET_SUBSET
290 {
291 error = mac_socket_check_listen(kauth_cred_get(), so);
292 if (error == 0)
293 error = solisten(so, uap->backlog);
294 }
295#else
91447636 296 error = solisten(so, uap->backlog);
2d21ac55 297#endif /* MAC_SOCKET_SUBSET */
55e303ae 298 else
91447636 299 error = EBADF;
2d21ac55 300
91447636
A
301 file_drop(uap->s);
302 return (error);
1c79356b
A
303}
304
2d21ac55
A
305/*
306 * Returns: fp_getfsock:EBADF Bad file descriptor
307 * fp_getfsock:EOPNOTSUPP ...
308 * xlate => :ENOTSOCK Socket operation on non-socket
309 * :EFAULT Bad address on copyin/copyout
310 * :EBADF Bad file descriptor
311 * :EOPNOTSUPP Operation not supported on socket
312 * :EINVAL Invalid argument
313 * :EWOULDBLOCK Operation would block
314 * :ECONNABORTED Connection aborted
315 * :EINTR Interrupted function
316 * :EACCES Mandatory Access Control failure
317 * falloc_locked:ENFILE Too many files open in system
318 * falloc_locked::EMFILE Too many open files
319 * falloc_locked::ENOMEM Not enough space
320 * 0 Success
321 */
1c79356b 322int
2d21ac55 323accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
b0d623f7 324 int32_t *retval)
1c79356b 325{
91447636 326 struct fileproc *fp;
2d21ac55 327 struct sockaddr *sa = NULL;
91447636
A
328 socklen_t namelen;
329 int error;
330 struct socket *head, *so = NULL;
331 lck_mtx_t *mutex_held;
332 int fd = uap->s;
2d21ac55 333 int newfd;
1c79356b 334 short fflag; /* type must match fp->f_flag */
91447636 335 int dosocklock = 0;
1c79356b 336
2d21ac55
A
337 *retval = -1;
338
55e303ae 339 AUDIT_ARG(fd, uap->s);
2d21ac55 340
1c79356b 341 if (uap->name) {
91447636 342 error = copyin(uap->anamelen, (caddr_t)&namelen,
2d21ac55
A
343 sizeof (socklen_t));
344 if (error)
1c79356b
A
345 return (error);
346 }
91447636
A
347 error = fp_getfsock(p, fd, &fp, &head);
348 if (error) {
349 if (error == EOPNOTSUPP)
350 error = ENOTSOCK;
1c79356b 351 return (error);
91447636 352 }
55e303ae 353 if (head == NULL) {
91447636
A
354 error = EBADF;
355 goto out;
55e303ae 356 }
2d21ac55
A
357#if CONFIG_MACF_SOCKET_SUBSET
358 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
359 goto out;
360#endif /* MAC_SOCKET_SUBSET */
91447636
A
361
362 socket_lock(head, 1);
363
364 if (head->so_proto->pr_getlock != NULL) {
365 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
366 dosocklock = 1;
2d21ac55 367 } else {
91447636
A
368 mutex_held = head->so_proto->pr_domain->dom_mtx;
369 dosocklock = 0;
370 }
371
1c79356b 372 if ((head->so_options & SO_ACCEPTCONN) == 0) {
2d21ac55
A
373 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
374 error = EOPNOTSUPP;
375 } else {
376 /* POSIX: The socket is not accepting connections */
377 error = EINVAL;
378 }
91447636 379 socket_unlock(head, 1);
91447636 380 goto out;
1c79356b
A
381 }
382 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
91447636
A
383 socket_unlock(head, 1);
384 error = EWOULDBLOCK;
385 goto out;
1c79356b 386 }
2d21ac55 387 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
1c79356b
A
388 if (head->so_state & SS_CANTRCVMORE) {
389 head->so_error = ECONNABORTED;
390 break;
391 }
91447636 392 if (head->so_usecount < 1)
2d21ac55
A
393 panic("accept: head=%p refcount=%d\n", head,
394 head->so_usecount);
395 error = msleep((caddr_t)&head->so_timeo, mutex_held,
396 PSOCK | PCATCH, "accept", 0);
91447636 397 if (head->so_usecount < 1)
2d21ac55
A
398 panic("accept: 2 head=%p refcount=%d\n", head,
399 head->so_usecount);
91447636
A
400 if ((head->so_state & SS_DRAINING)) {
401 error = ECONNABORTED;
402 }
1c79356b 403 if (error) {
91447636
A
404 socket_unlock(head, 1);
405 goto out;
1c79356b
A
406 }
407 }
408 if (head->so_error) {
409 error = head->so_error;
410 head->so_error = 0;
91447636
A
411 socket_unlock(head, 1);
412 goto out;
1c79356b
A
413 }
414
415
416 /*
417 * At this point we know that there is at least one connection
418 * ready to be accepted. Remove it from the queue prior to
419 * allocating the file descriptor for it since falloc() may
420 * block allowing another process to accept the connection
421 * instead.
422 */
91447636 423 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
e3027f41 424 so = TAILQ_FIRST(&head->so_comp);
1c79356b
A
425 TAILQ_REMOVE(&head->so_comp, so, so_list);
426 head->so_qlen--;
2d21ac55
A
427 /* unlock head to avoid deadlock with select, keep a ref on head */
428 socket_unlock(head, 0);
429
430#if CONFIG_MACF_SOCKET_SUBSET
431 /*
432 * Pass the pre-accepted socket to the MAC framework. This is
433 * cheaper than allocating a file descriptor for the socket,
434 * calling the protocol accept callback, and possibly freeing
435 * the file descriptor should the MAC check fails.
436 */
437 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
438 so->so_state &= ~(SS_NOFDREF | SS_COMP);
439 so->so_head = NULL;
440 soclose(so);
441 /* Drop reference on listening socket */
442 sodereference(head);
443 goto out;
444 }
445#endif /* MAC_SOCKET_SUBSET */
446
447 /*
448 * Pass the pre-accepted socket to any interested socket filter(s).
449 * Upon failure, the socket would have been closed by the callee.
450 */
451 if (so->so_filt != NULL && (error = soacceptfilter(so)) != 0) {
452 /* Drop reference on listening socket */
453 sodereference(head);
454 /* Propagate socket filter's error code to the caller */
455 goto out;
456 }
457
1c79356b 458 fflag = fp->f_flag;
2d21ac55 459 error = falloc(p, &fp, &newfd, vfs_context_current());
1c79356b
A
460 if (error) {
461 /*
462 * Probably ran out of file descriptors. Put the
463 * unaccepted connection back onto the queue and
464 * do another wakeup so some other process might
465 * have a chance at it.
466 */
91447636 467 socket_lock(head, 0);
1c79356b
A
468 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
469 head->so_qlen++;
91447636
A
470 wakeup_one((caddr_t)&head->so_timeo);
471 socket_unlock(head, 1);
472 goto out;
2d21ac55 473 }
91447636 474 *retval = newfd;
1c79356b
A
475 fp->f_type = DTYPE_SOCKET;
476 fp->f_flag = fflag;
477 fp->f_ops = &socketops;
478 fp->f_data = (caddr_t)so;
91447636
A
479 socket_lock(head, 0);
480 if (dosocklock)
481 socket_lock(so, 1);
482 so->so_state &= ~SS_COMP;
483 so->so_head = NULL;
91447636
A
484 (void) soacceptlock(so, &sa, 0);
485 socket_unlock(head, 1);
2d21ac55 486 if (sa == NULL) {
1c79356b
A
487 namelen = 0;
488 if (uap->name)
489 goto gotnoname;
91447636 490 error = 0;
2d21ac55 491 goto releasefd;
1c79356b 492 }
2d21ac55
A
493 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
494
1c79356b 495 if (uap->name) {
2d21ac55
A
496 socklen_t sa_len;
497
498 /* save sa_len before it is destroyed */
499 sa_len = sa->sa_len;
500 namelen = MIN(namelen, sa_len);
91447636 501 error = copyout(sa, uap->name, namelen);
1c79356b 502 if (!error)
2d21ac55
A
503 /* return the actual, untruncated address length */
504 namelen = sa_len;
1c79356b 505gotnoname:
2d21ac55
A
506 error = copyout((caddr_t)&namelen, uap->anamelen,
507 sizeof (socklen_t));
1c79356b
A
508 }
509 FREE(sa, M_SONAME);
2d21ac55 510
b0d623f7 511releasefd:
2d21ac55
A
512 /*
513 * If the socket has been marked as inactive by soacceptfilter(),
514 * disallow further operations on it. We explicitly call shutdown
515 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
516 * states are set for the socket. This would also flush out data
517 * hanging off the receive list of this socket.
518 */
519 if (so->so_flags & SOF_DEFUNCT) {
520 (void) soshutdownlock(so, SHUT_RD);
521 (void) soshutdownlock(so, SHUT_WR);
522 (void) sodisconnectlocked(so);
523 }
524
91447636
A
525 if (dosocklock)
526 socket_unlock(so, 1);
2d21ac55 527
2d21ac55
A
528 proc_fdlock(p);
529 procfdtbl_releasefd(p, newfd, NULL);
530 fp_drop(p, newfd, fp, 1);
531 proc_fdunlock(p);
532
91447636
A
533out:
534 file_drop(fd);
1c79356b
A
535 return (error);
536}
537
538int
b0d623f7 539accept(struct proc *p, struct accept_args *uap, int32_t *retval)
1c79356b 540{
2d21ac55
A
541 __pthread_testcancel(1);
542 return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval));
1c79356b
A
543}
544
2d21ac55
A
545/*
546 * Returns: 0 Success
547 * EBADF Bad file descriptor
548 * EALREADY Connection already in progress
549 * EINPROGRESS Operation in progress
550 * ECONNABORTED Connection aborted
551 * EINTR Interrupted function
552 * EACCES Mandatory Access Control failure
553 * file_socket:ENOTSOCK
554 * file_socket:EBADF
555 * getsockaddr:ENAMETOOLONG Filename too long
556 * getsockaddr:EINVAL Invalid argument
557 * getsockaddr:ENOMEM Not enough space
558 * getsockaddr:EFAULT Bad address
559 * soconnectlock:EOPNOTSUPP
560 * soconnectlock:EISCONN
561 * soconnectlock:??? [depends on protocol, filters]
562 * msleep:EINTR
563 *
564 * Imputed: so_error error may be set from so_error, which
565 * may have been set by soconnectlock.
566 */
567/* ARGSUSED */
1c79356b 568int
b0d623f7 569connect(struct proc *p, struct connect_args *uap, int32_t *retval)
1c79356b 570{
2d21ac55
A
571 __pthread_testcancel(1);
572 return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval));
1c79356b 573}
1c79356b 574
1c79356b 575int
b0d623f7 576connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused int32_t *retval)
1c79356b 577{
91447636 578 struct socket *so;
2d21ac55
A
579 struct sockaddr_storage ss;
580 struct sockaddr *sa = NULL;
91447636 581 lck_mtx_t *mutex_held;
2d21ac55 582 boolean_t want_free = TRUE;
91447636
A
583 int error;
584 int fd = uap->s;
4a3eedf9 585 boolean_t dgram;
1c79356b 586
55e303ae 587 AUDIT_ARG(fd, uap->s);
2d21ac55
A
588 error = file_socket(fd, &so);
589 if (error != 0)
1c79356b 590 return (error);
91447636
A
591 if (so == NULL) {
592 error = EBADF;
593 goto out;
594 }
595
4a3eedf9
A
596 /*
597 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
598 * if this is a datagram socket; translate for other types.
599 */
600 dgram = (so->so_type == SOCK_DGRAM);
601
2d21ac55
A
602 /* Get socket address now before we obtain socket lock */
603 if (uap->namelen > sizeof (ss)) {
4a3eedf9 604 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
2d21ac55 605 } else {
4a3eedf9 606 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
2d21ac55
A
607 if (error == 0) {
608 sa = (struct sockaddr *)&ss;
609 want_free = FALSE;
610 }
611 }
612 if (error != 0)
613 goto out;
614
615 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
616#if CONFIG_MACF_SOCKET_SUBSET
617 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
618 if (want_free)
619 FREE(sa, M_SONAME);
620 goto out;
621 }
622#endif /* MAC_SOCKET_SUBSET */
91447636
A
623 socket_lock(so, 1);
624
625 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
2d21ac55
A
626 if (want_free)
627 FREE(sa, M_SONAME);
91447636
A
628 socket_unlock(so, 1);
629 error = EALREADY;
630 goto out;
631 }
91447636 632 error = soconnectlock(so, sa, 0);
1c79356b
A
633 if (error)
634 goto bad;
635 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
2d21ac55
A
636 if (want_free)
637 FREE(sa, M_SONAME);
91447636
A
638 socket_unlock(so, 1);
639 error = EINPROGRESS;
640 goto out;
1c79356b 641 }
1c79356b 642 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
2d21ac55 643 if (so->so_proto->pr_getlock != NULL)
91447636 644 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2d21ac55 645 else
91447636 646 mutex_held = so->so_proto->pr_domain->dom_mtx;
2d21ac55
A
647 error = msleep((caddr_t)&so->so_timeo, mutex_held,
648 PSOCK | PCATCH, "connect", 0);
91447636
A
649 if ((so->so_state & SS_DRAINING)) {
650 error = ECONNABORTED;
651 }
1c79356b
A
652 if (error)
653 break;
654 }
655 if (error == 0) {
656 error = so->so_error;
657 so->so_error = 0;
658 }
1c79356b
A
659bad:
660 so->so_state &= ~SS_ISCONNECTING;
91447636 661 socket_unlock(so, 1);
2d21ac55
A
662 if (want_free)
663 FREE(sa, M_SONAME);
1c79356b
A
664 if (error == ERESTART)
665 error = EINTR;
91447636
A
666out:
667 file_drop(fd);
1c79356b
A
668 return (error);
669}
670
2d21ac55
A
671/*
672 * Returns: 0 Success
673 * socreate:EAFNOSUPPORT
674 * socreate:EPROTOTYPE
675 * socreate:EPROTONOSUPPORT
676 * socreate:ENOBUFS
677 * socreate:ENOMEM
678 * socreate:EISCONN
679 * socreate:??? [other protocol families, IPSEC]
680 * falloc:ENFILE
681 * falloc:EMFILE
682 * falloc:ENOMEM
683 * copyout:EFAULT
684 * soconnect2:EINVAL
685 * soconnect2:EPROTOTYPE
686 * soconnect2:??? [other protocol families[
687 */
1c79356b 688int
2d21ac55 689socketpair(struct proc *p, struct socketpair_args *uap,
b0d623f7 690 __unused int32_t *retval)
1c79356b 691{
91447636 692 struct fileproc *fp1, *fp2;
1c79356b
A
693 struct socket *so1, *so2;
694 int fd, error, sv[2];
695
55e303ae 696 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1c79356b
A
697 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
698 if (error)
699 return (error);
700 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
701 if (error)
702 goto free1;
91447636 703
2d21ac55 704 error = falloc(p, &fp1, &fd, vfs_context_current());
91447636 705 if (error) {
1c79356b 706 goto free2;
91447636 707 }
1c79356b
A
708 fp1->f_flag = FREAD|FWRITE;
709 fp1->f_type = DTYPE_SOCKET;
710 fp1->f_ops = &socketops;
711 fp1->f_data = (caddr_t)so1;
91447636
A
712 sv[0] = fd;
713
2d21ac55 714 error = falloc(p, &fp2, &fd, vfs_context_current());
91447636 715 if (error) {
1c79356b 716 goto free3;
91447636 717 }
1c79356b
A
718 fp2->f_flag = FREAD|FWRITE;
719 fp2->f_type = DTYPE_SOCKET;
720 fp2->f_ops = &socketops;
721 fp2->f_data = (caddr_t)so2;
722 sv[1] = fd;
91447636 723
1c79356b
A
724 error = soconnect2(so1, so2);
725 if (error) {
1c79356b
A
726 goto free4;
727 }
1c79356b
A
728 if (uap->type == SOCK_DGRAM) {
729 /*
730 * Datagram socket connection is asymmetric.
731 */
2d21ac55
A
732 error = soconnect2(so2, so1);
733 if (error) {
734 goto free4;
735 }
1c79356b 736 }
91447636
A
737
738 proc_fdlock(p);
6601e61a
A
739 procfdtbl_releasefd(p, sv[0], NULL);
740 procfdtbl_releasefd(p, sv[1], NULL);
91447636
A
741 fp_drop(p, sv[0], fp1, 1);
742 fp_drop(p, sv[1], fp2, 1);
743 proc_fdunlock(p);
744
2d21ac55 745 error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof (int));
1c79356b
A
746 return (error);
747free4:
91447636 748 fp_free(p, sv[1], fp2);
1c79356b 749free3:
91447636 750 fp_free(p, sv[0], fp1);
1c79356b 751free2:
2d21ac55 752 (void) soclose(so2);
1c79356b 753free1:
2d21ac55 754 (void) soclose(so1);
1c79356b
A
755 return (error);
756}
757
2d21ac55
A
758/*
759 * Returns: 0 Success
760 * EINVAL
761 * ENOBUFS
762 * EBADF
763 * EPIPE
764 * EACCES Mandatory Access Control failure
765 * file_socket:ENOTSOCK
766 * file_socket:EBADF
767 * getsockaddr:ENAMETOOLONG Filename too long
768 * getsockaddr:EINVAL Invalid argument
769 * getsockaddr:ENOMEM Not enough space
770 * getsockaddr:EFAULT Bad address
771 * <pru_sosend>:EACCES[TCP]
772 * <pru_sosend>:EADDRINUSE[TCP]
773 * <pru_sosend>:EADDRNOTAVAIL[TCP]
774 * <pru_sosend>:EAFNOSUPPORT[TCP]
775 * <pru_sosend>:EAGAIN[TCP]
776 * <pru_sosend>:EBADF
777 * <pru_sosend>:ECONNRESET[TCP]
778 * <pru_sosend>:EFAULT
779 * <pru_sosend>:EHOSTUNREACH[TCP]
780 * <pru_sosend>:EINTR
781 * <pru_sosend>:EINVAL
782 * <pru_sosend>:EISCONN[AF_INET]
783 * <pru_sosend>:EMSGSIZE[TCP]
784 * <pru_sosend>:ENETDOWN[TCP]
785 * <pru_sosend>:ENETUNREACH[TCP]
786 * <pru_sosend>:ENOBUFS
787 * <pru_sosend>:ENOMEM[TCP]
788 * <pru_sosend>:ENOTCONN[AF_INET]
789 * <pru_sosend>:EOPNOTSUPP
790 * <pru_sosend>:EPERM[TCP]
791 * <pru_sosend>:EPIPE
792 * <pru_sosend>:EWOULDBLOCK
793 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
794 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
795 * <pru_sosend>:??? [value from so_error]
796 * sockargs:???
797 */
1c79356b 798static int
2d21ac55 799sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
b0d623f7 800 int flags, int32_t *retval)
1c79356b 801{
2d21ac55
A
802 struct mbuf *control = NULL;
803 struct sockaddr_storage ss;
804 struct sockaddr *to = NULL;
805 boolean_t want_free = TRUE;
91447636 806 int error;
1c79356b 807 struct socket *so;
91447636 808 user_ssize_t len;
2d21ac55
A
809
810 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b 811
91447636 812 error = file_socket(s, &so);
2d21ac55
A
813 if (error) {
814 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
815 return (error);
1c79356b 816 }
2d21ac55
A
817 if (so == NULL) {
818 error = EBADF;
819 goto out;
820 }
821 if (mp->msg_name != USER_ADDR_NULL) {
822 if (mp->msg_namelen > sizeof (ss)) {
823 error = getsockaddr(so, &to, mp->msg_name,
4a3eedf9 824 mp->msg_namelen, TRUE);
2d21ac55
A
825 } else {
826 error = getsockaddr_s(so, &ss, mp->msg_name,
4a3eedf9 827 mp->msg_namelen, TRUE);
2d21ac55
A
828 if (error == 0) {
829 to = (struct sockaddr *)&ss;
830 want_free = FALSE;
831 }
1c79356b 832 }
2d21ac55
A
833 if (error != 0)
834 goto out;
835 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
91447636 836 }
2d21ac55
A
837 if (mp->msg_control != USER_ADDR_NULL) {
838 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
1c79356b
A
839 error = EINVAL;
840 goto bad;
841 }
842 error = sockargs(&control, mp->msg_control,
843 mp->msg_controllen, MT_CONTROL);
2d21ac55 844 if (error != 0)
1c79356b 845 goto bad;
91447636 846 }
1c79356b 847
2d21ac55
A
848#if CONFIG_MACF_SOCKET_SUBSET
849 /*
850 * We check the state without holding the socket lock;
851 * if a race condition occurs, it would simply result
852 * in an extra call to the MAC check function.
853 */
854 if (!(so->so_state & SS_ISCONNECTED) &&
855 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
856 goto bad;
857#endif /* MAC_SOCKET_SUBSET */
91447636
A
858
859 len = uio_resid(uiop);
2d21ac55
A
860 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control,
861 flags);
862 if (error != 0) {
91447636 863 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
864 error == EINTR || error == EWOULDBLOCK))
865 error = 0;
2d21ac55 866 /* Generation of SIGPIPE can be controlled per socket */
9bccf70c 867 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1c79356b
A
868 psignal(p, SIGPIPE);
869 }
870 if (error == 0)
91447636
A
871 *retval = (int)(len - uio_resid(uiop));
872bad:
2d21ac55 873 if (to != NULL && want_free)
1c79356b 874 FREE(to, M_SONAME);
91447636 875out:
2d21ac55 876 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 877 file_drop(s);
1c79356b
A
878 return (error);
879}
880
2d21ac55
A
881/*
882 * Returns: 0 Success
883 * ENOMEM
884 * sendit:??? [see sendit definition in this file]
885 * write:??? [4056224: applicable for pipes]
886 */
1c79356b 887int
b0d623f7 888sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
2d21ac55
A
889{
890 __pthread_testcancel(1);
891 return(sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
892}
893
894int
b0d623f7 895sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, int32_t *retval)
1c79356b 896{
91447636
A
897 struct user_msghdr msg;
898 int error;
899 uio_t auio = NULL;
1c79356b 900
2d21ac55 901 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 902 AUDIT_ARG(fd, uap->s);
1c79356b 903
91447636 904 auio = uio_create(1, 0,
2d21ac55
A
905 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
906 UIO_WRITE);
91447636
A
907 if (auio == NULL) {
908 return (ENOMEM);
909 }
910 uio_addiov(auio, uap->buf, uap->len);
911
1c79356b
A
912 msg.msg_name = uap->to;
913 msg.msg_namelen = uap->tolen;
91447636
A
914 /* no need to set up msg_iov. sendit uses uio_t we send it */
915 msg.msg_iov = 0;
916 msg.msg_iovlen = 0;
1c79356b 917 msg.msg_control = 0;
1c79356b 918 msg.msg_flags = 0;
1c79356b 919
91447636 920 error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
2d21ac55 921
91447636
A
922 if (auio != NULL) {
923 uio_free(auio);
924 }
2d21ac55 925
91447636 926#if HACK_FOR_4056224
2d21ac55
A
927 /*
928 * Radar 4056224
929 * Temporary workaround to let send() and recv() work over
930 * a pipe for binary compatibility
91447636
A
931 * This will be removed in the release following Tiger
932 */
933 if (error == ENOTSOCK) {
934 struct fileproc *fp;
2d21ac55
A
935
936 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
937 (void) fp_drop(p, uap->s, fp, 0);
938
91447636
A
939 if (fp->f_type == DTYPE_PIPE) {
940 struct write_args write_uap;
941 user_ssize_t write_retval;
2d21ac55 942
91447636
A
943 if (p->p_pid > last_pid_4056224) {
944 last_pid_4056224 = p->p_pid;
945
2d21ac55
A
946 printf("%s[%d] uses send/recv "
947 "on a pipe\n", p->p_comm, p->p_pid);
91447636 948 }
2d21ac55
A
949
950 bzero(&write_uap, sizeof (struct write_args));
91447636
A
951 write_uap.fd = uap->s;
952 write_uap.cbuf = uap->buf;
953 write_uap.nbyte = uap->len;
2d21ac55 954
91447636
A
955 error = write(p, &write_uap, &write_retval);
956 *retval = (int)write_retval;
957 }
958 }
959 }
960#endif /* HACK_FOR_4056224 */
1c79356b 961
2d21ac55 962 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1c79356b 963
2d21ac55 964 return (error);
1c79356b 965}
1c79356b 966
2d21ac55
A
967/*
968 * Returns: 0 Success
969 * ENOBUFS
970 * copyin:EFAULT
971 * sendit:??? [see sendit definition in this file]
972 */
1c79356b 973int
b0d623f7 974sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
1c79356b 975{
2d21ac55
A
976 __pthread_testcancel(1);
977 return(sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval));
1c79356b 978}
1c79356b
A
979
980int
b0d623f7 981sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval)
1c79356b 982{
b0d623f7
A
983 struct user32_msghdr msg32;
984 struct user64_msghdr msg64;
91447636
A
985 struct user_msghdr user_msg;
986 caddr_t msghdrp;
987 int size_of_msghdr;
1c79356b 988 int error;
91447636
A
989 uio_t auio = NULL;
990 struct user_iovec *iovp;
1c79356b 991
2d21ac55 992 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 993 AUDIT_ARG(fd, uap->s);
91447636 994 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
995 msghdrp = (caddr_t)&msg64;
996 size_of_msghdr = sizeof (msg64);
2d21ac55 997 } else {
b0d623f7
A
998 msghdrp = (caddr_t)&msg32;
999 size_of_msghdr = sizeof (msg32);
91447636
A
1000 }
1001 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1002 if (error) {
1003 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1004 return (error);
1c79356b 1005 }
91447636 1006
b0d623f7
A
1007 if (IS_64BIT_PROCESS(p)) {
1008 user_msg.msg_flags = msg64.msg_flags;
1009 user_msg.msg_controllen = msg64.msg_controllen;
1010 user_msg.msg_control = msg64.msg_control;
1011 user_msg.msg_iovlen = msg64.msg_iovlen;
1012 user_msg.msg_iov = msg64.msg_iov;
1013 user_msg.msg_namelen = msg64.msg_namelen;
1014 user_msg.msg_name = msg64.msg_name;
1015 } else {
1016 user_msg.msg_flags = msg32.msg_flags;
1017 user_msg.msg_controllen = msg32.msg_controllen;
1018 user_msg.msg_control = msg32.msg_control;
1019 user_msg.msg_iovlen = msg32.msg_iovlen;
1020 user_msg.msg_iov = msg32.msg_iov;
1021 user_msg.msg_namelen = msg32.msg_namelen;
1022 user_msg.msg_name = msg32.msg_name;
91447636
A
1023 }
1024
1025 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1026 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1027 0, 0, 0, 0);
91447636
A
1028 return (EMSGSIZE);
1029 }
1030
1031 /* allocate a uio large enough to hold the number of iovecs passed */
1032 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1033 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1034 UIO_WRITE);
91447636
A
1035 if (auio == NULL) {
1036 error = ENOBUFS;
1037 goto done;
1038 }
2d21ac55 1039
91447636 1040 if (user_msg.msg_iovlen) {
2d21ac55
A
1041 /*
1042 * get location of iovecs within the uio.
1043 * then copyin the iovecs from user space.
91447636
A
1044 */
1045 iovp = uio_iovsaddr(auio);
1046 if (iovp == NULL) {
1047 error = ENOBUFS;
1048 goto done;
1049 }
b0d623f7
A
1050 error = copyin_user_iovec_array(user_msg.msg_iov,
1051 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1052 user_msg.msg_iovlen, iovp);
91447636
A
1053 if (error)
1054 goto done;
1055 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2d21ac55
A
1056
1057 /* finish setup of uio_t */
91447636 1058 uio_calculateresid(auio);
2d21ac55 1059 } else {
91447636
A
1060 user_msg.msg_iov = 0;
1061 }
2d21ac55
A
1062
1063 /* msg_flags is ignored for send */
91447636 1064 user_msg.msg_flags = 0;
2d21ac55 1065
91447636 1066 error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
1c79356b 1067done:
91447636
A
1068 if (auio != NULL) {
1069 uio_free(auio);
1070 }
2d21ac55 1071 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1072
1c79356b
A
1073 return (error);
1074}
1075
2d21ac55
A
1076/*
1077 * Returns: 0 Success
1078 * ENOTSOCK
1079 * EINVAL
1080 * EBADF
1081 * EACCES Mandatory Access Control failure
1082 * copyout:EFAULT
1083 * fp_lookup:EBADF
1084 * <pru_soreceive>:ENOBUFS
1085 * <pru_soreceive>:ENOTCONN
1086 * <pru_soreceive>:EWOULDBLOCK
1087 * <pru_soreceive>:EFAULT
1088 * <pru_soreceive>:EINTR
1089 * <pru_soreceive>:EBADF
1090 * <pru_soreceive>:EINVAL
1091 * <pru_soreceive>:EMSGSIZE
1092 * <pru_soreceive>:???
1093 *
1094 * Notes: Additional return values from calls through <pru_soreceive>
1095 * depend on protocols other than TCP or AF_UNIX, which are
1096 * documented above.
1097 */
1c79356b 1098static int
2d21ac55 1099recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
b0d623f7 1100 user_addr_t namelenp, int32_t *retval)
1c79356b 1101{
1c79356b
A
1102 int len, error;
1103 struct mbuf *m, *control = 0;
91447636 1104 user_addr_t ctlbuf;
1c79356b
A
1105 struct socket *so;
1106 struct sockaddr *fromsa = 0;
91447636 1107 struct fileproc *fp;
1c79356b 1108
2d21ac55 1109 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
91447636 1110 proc_fdlock(p);
2d21ac55
A
1111 if ((error = fp_lookup(p, s, &fp, 1))) {
1112 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1113 proc_fdunlock(p);
2d21ac55 1114 return (error);
1c79356b 1115 }
91447636 1116 if (fp->f_type != DTYPE_SOCKET) {
2d21ac55 1117 fp_drop(p, s, fp, 1);
91447636 1118 proc_fdunlock(p);
2d21ac55 1119 return (ENOTSOCK);
91447636 1120 }
1c79356b 1121
2d21ac55
A
1122 so = (struct socket *)fp->f_data;
1123 if (so == NULL) {
1124 fp_drop(p, s, fp, 1);
1125 proc_fdunlock(p);
1126 return (EBADF);
1127 }
91447636
A
1128
1129 proc_fdunlock(p);
2d21ac55
A
1130
1131#if CONFIG_MACF_SOCKET_SUBSET
1132 /*
1133 * We check the state without holding the socket lock;
1134 * if a race condition occurs, it would simply result
1135 * in an extra call to the MAC check function.
1136 */
1137 if (!(so->so_state & SS_ISCONNECTED) &&
1138 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1139 goto out1;
1140#endif /* MAC_SOCKET_SUBSET */
91447636 1141 if (uio_resid(uiop) < 0) {
2d21ac55 1142 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
91447636
A
1143 error = EINVAL;
1144 goto out1;
1c79356b 1145 }
91447636
A
1146
1147 len = uio_resid(uiop);
2d21ac55
A
1148 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1149 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1150 &mp->msg_flags);
b0d623f7
A
1151 if (fromsa)
1152 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1153 fromsa);
1c79356b 1154 if (error) {
91447636 1155 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1156 error == EINTR || error == EWOULDBLOCK))
1157 error = 0;
1158 }
2d21ac55 1159
1c79356b
A
1160 if (error)
1161 goto out;
2d21ac55 1162
91447636 1163 *retval = len - uio_resid(uiop);
1c79356b 1164 if (mp->msg_name) {
2d21ac55
A
1165 socklen_t sa_len = 0;
1166
1c79356b 1167 len = mp->msg_namelen;
2d21ac55 1168 if (len <= 0 || fromsa == 0) {
1c79356b 1169 len = 0;
2d21ac55 1170 } else {
1c79356b 1171#ifndef MIN
2d21ac55 1172#define MIN(a, b) ((a) > (b) ? (b) : (a))
1c79356b 1173#endif
2d21ac55
A
1174 sa_len = fromsa->sa_len;
1175 len = MIN((unsigned int)len, sa_len);
91447636 1176 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1c79356b
A
1177 if (error)
1178 goto out;
1179 }
2d21ac55
A
1180 mp->msg_namelen = sa_len;
1181 /* return the actual, untruncated address length */
1c79356b 1182 if (namelenp &&
2d21ac55
A
1183 (error = copyout((caddr_t)&sa_len, namelenp,
1184 sizeof (int)))) {
1c79356b
A
1185 goto out;
1186 }
1187 }
1188 if (mp->msg_control) {
1c79356b
A
1189 len = mp->msg_controllen;
1190 m = control;
1191 mp->msg_controllen = 0;
91447636 1192 ctlbuf = mp->msg_control;
1c79356b
A
1193
1194 while (m && len > 0) {
1195 unsigned int tocopy;
b0d623f7
A
1196 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1197
1198 /*
1199 * SCM_TIMESTAMP hack because struct timeval has a
1200 * different size for 32 bits and 64 bits processes
1201 */
1202 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1203 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
1204 struct cmsghdr *tmp_cp = (struct cmsghdr *)tmp_buffer;
1205 int tmp_space;
1206 struct timeval *tv = (struct timeval *)CMSG_DATA(cp);
1207
1208 tmp_cp->cmsg_level = SOL_SOCKET;
1209 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1210
1211 if (proc_is64bit(p)) {
1212 struct user64_timeval *tv64 = (struct user64_timeval *)CMSG_DATA(tmp_cp);
1213
1214 tv64->tv_sec = tv->tv_sec;
1215 tv64->tv_usec = tv->tv_usec;
1216
1217 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1218 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1219 } else {
1220 struct user32_timeval *tv32 = (struct user32_timeval *)CMSG_DATA(tmp_cp);
1221
1222 tv32->tv_sec = tv->tv_sec;
1223 tv32->tv_usec = tv->tv_usec;
1224
1225 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1226 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1227 }
1228 if (len >= tmp_space) {
1229 tocopy = tmp_space;
1230 } else {
1231 mp->msg_flags |= MSG_CTRUNC;
1232 tocopy = len;
1233 }
1234 error = copyout(tmp_buffer, ctlbuf, tocopy);
1235 if (error)
1236 goto out;
1c79356b 1237
2d21ac55 1238 } else {
b0d623f7
A
1239 if (len >= m->m_len) {
1240 tocopy = m->m_len;
1241 } else {
1242 mp->msg_flags |= MSG_CTRUNC;
1243 tocopy = len;
1244 }
1245
1246 error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf,
1247 tocopy);
1248 if (error)
1249 goto out;
1c79356b 1250 }
2d21ac55 1251
1c79356b
A
1252 ctlbuf += tocopy;
1253 len -= tocopy;
1254 m = m->m_next;
1255 }
1256 mp->msg_controllen = ctlbuf - mp->msg_control;
1257 }
1258out:
1259 if (fromsa)
1260 FREE(fromsa, M_SONAME);
1261 if (control)
1262 m_freem(control);
2d21ac55 1263 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636
A
1264out1:
1265 fp_drop(p, s, fp, 0);
1c79356b
A
1266 return (error);
1267}
1268
1269
2d21ac55
A
1270/*
1271 * Returns: 0 Success
1272 * ENOMEM
1273 * copyin:EFAULT
1274 * recvit:???
1275 * read:??? [4056224: applicable for pipes]
1276 *
1277 * Notes: The read entry point is only called as part of support for
1278 * binary backward compatability; new code should use read
1279 * instead of recv or recvfrom when attempting to read data
1280 * from pipes.
1281 *
1282 * For full documentation of the return codes from recvit, see
1283 * the block header for the recvit function.
1284 */
1285int
b0d623f7 1286recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
2d21ac55
A
1287{
1288 __pthread_testcancel(1);
1289 return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval));
1290}
1291
1c79356b 1292int
b0d623f7 1293recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval)
1c79356b 1294{
91447636 1295 struct user_msghdr msg;
1c79356b 1296 int error;
91447636 1297 uio_t auio = NULL;
1c79356b 1298
2d21ac55 1299 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1300 AUDIT_ARG(fd, uap->s);
1c79356b
A
1301
1302 if (uap->fromlenaddr) {
91447636 1303 error = copyin(uap->fromlenaddr,
1c79356b
A
1304 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1305 if (error)
1306 return (error);
2d21ac55 1307 } else {
1c79356b 1308 msg.msg_namelen = 0;
2d21ac55 1309 }
1c79356b 1310 msg.msg_name = uap->from;
91447636 1311 auio = uio_create(1, 0,
2d21ac55
A
1312 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1313 UIO_READ);
91447636
A
1314 if (auio == NULL) {
1315 return (ENOMEM);
1316 }
2d21ac55 1317
91447636
A
1318 uio_addiov(auio, uap->buf, uap->len);
1319 /* no need to set up msg_iov. recvit uses uio_t we send it */
1320 msg.msg_iov = 0;
1321 msg.msg_iovlen = 0;
1c79356b 1322 msg.msg_control = 0;
91447636 1323 msg.msg_controllen = 0;
1c79356b 1324 msg.msg_flags = uap->flags;
91447636
A
1325 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1326 if (auio != NULL) {
1327 uio_free(auio);
1328 }
2d21ac55 1329
91447636 1330#if HACK_FOR_4056224
2d21ac55
A
1331 /*
1332 * Radar 4056224
1333 * Temporary workaround to let send() and recv() work over
1334 * a pipe for binary compatibility
91447636
A
1335 * This will be removed in the release following Tiger
1336 */
1337 if (error == ENOTSOCK && proc_is64bit(p) == 0) {
1338 struct fileproc *fp;
2d21ac55
A
1339
1340 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
1341 (void) fp_drop(p, uap->s, fp, 0);
1342
91447636
A
1343 if (fp->f_type == DTYPE_PIPE) {
1344 struct read_args read_uap;
1345 user_ssize_t read_retval;
2d21ac55 1346
91447636
A
1347 if (p->p_pid > last_pid_4056224) {
1348 last_pid_4056224 = p->p_pid;
1349
2d21ac55
A
1350 printf("%s[%d] uses send/recv on "
1351 "a pipe\n", p->p_comm, p->p_pid);
91447636 1352 }
2d21ac55
A
1353
1354 bzero(&read_uap, sizeof (struct read_args));
91447636
A
1355 read_uap.fd = uap->s;
1356 read_uap.cbuf = uap->buf;
1357 read_uap.nbyte = uap->len;
2d21ac55 1358
91447636
A
1359 error = read(p, &read_uap, &read_retval);
1360 *retval = (int)read_retval;
1361 }
1362 }
1363 }
1364#endif /* HACK_FOR_4056224 */
1365
2d21ac55 1366 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b 1367
2d21ac55 1368 return (error);
1c79356b
A
1369}
1370
1371/*
2d21ac55
A
1372 * Returns: 0 Success
1373 * EMSGSIZE
1374 * ENOMEM
1375 * copyin:EFAULT
1376 * copyout:EFAULT
1377 * recvit:???
1378 *
1379 * Notes: For full documentation of the return codes from recvit, see
1380 * the block header for the recvit function.
1c79356b
A
1381 */
1382int
b0d623f7 1383recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1c79356b 1384{
2d21ac55
A
1385 __pthread_testcancel(1);
1386 return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval));
1c79356b 1387}
1c79356b
A
1388
1389int
b0d623f7 1390recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval)
1c79356b 1391{
b0d623f7
A
1392 struct user32_msghdr msg32;
1393 struct user64_msghdr msg64;
91447636
A
1394 struct user_msghdr user_msg;
1395 caddr_t msghdrp;
1396 int size_of_msghdr;
1397 user_addr_t uiov;
2d21ac55 1398 int error;
91447636
A
1399 uio_t auio = NULL;
1400 struct user_iovec *iovp;
1c79356b 1401
2d21ac55 1402 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1403 AUDIT_ARG(fd, uap->s);
91447636 1404 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
1405 msghdrp = (caddr_t)&msg64;
1406 size_of_msghdr = sizeof (msg64);
2d21ac55 1407 } else {
b0d623f7
A
1408 msghdrp = (caddr_t)&msg32;
1409 size_of_msghdr = sizeof (msg32);
91447636
A
1410 }
1411 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1412 if (error) {
1413 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
1414 return (error);
1415 }
1416
91447636 1417 /* only need to copy if user process is not 64-bit */
b0d623f7
A
1418 if (IS_64BIT_PROCESS(p)) {
1419 user_msg.msg_flags = msg64.msg_flags;
1420 user_msg.msg_controllen = msg64.msg_controllen;
1421 user_msg.msg_control = msg64.msg_control;
1422 user_msg.msg_iovlen = msg64.msg_iovlen;
1423 user_msg.msg_iov = msg64.msg_iov;
1424 user_msg.msg_namelen = msg64.msg_namelen;
1425 user_msg.msg_name = msg64.msg_name;
1426 } else {
1427 user_msg.msg_flags = msg32.msg_flags;
1428 user_msg.msg_controllen = msg32.msg_controllen;
1429 user_msg.msg_control = msg32.msg_control;
1430 user_msg.msg_iovlen = msg32.msg_iovlen;
1431 user_msg.msg_iov = msg32.msg_iov;
1432 user_msg.msg_namelen = msg32.msg_namelen;
1433 user_msg.msg_name = msg32.msg_name;
91447636
A
1434 }
1435
1436 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1437 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
1438 0, 0, 0, 0);
91447636
A
1439 return (EMSGSIZE);
1440 }
1441
91447636 1442 user_msg.msg_flags = uap->flags;
91447636
A
1443
1444 /* allocate a uio large enough to hold the number of iovecs passed */
1445 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1446 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1447 UIO_READ);
91447636
A
1448 if (auio == NULL) {
1449 error = ENOMEM;
1450 goto done;
1451 }
1452
2d21ac55
A
1453 /*
1454 * get location of iovecs within the uio. then copyin the iovecs from
91447636
A
1455 * user space.
1456 */
1457 iovp = uio_iovsaddr(auio);
1458 if (iovp == NULL) {
1459 error = ENOMEM;
1460 goto done;
1461 }
1462 uiov = user_msg.msg_iov;
1463 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
b0d623f7
A
1464 error = copyin_user_iovec_array(uiov,
1465 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1466 user_msg.msg_iovlen, iovp);
1c79356b
A
1467 if (error)
1468 goto done;
91447636 1469
2d21ac55 1470 /* finish setup of uio_t */
91447636 1471 uio_calculateresid(auio);
2d21ac55 1472
91447636 1473 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1c79356b 1474 if (!error) {
91447636 1475 user_msg.msg_iov = uiov;
b0d623f7
A
1476 if (IS_64BIT_PROCESS(p)) {
1477 msg64.msg_flags = user_msg.msg_flags;
1478 msg64.msg_controllen = user_msg.msg_controllen;
1479 msg64.msg_control = user_msg.msg_control;
1480 msg64.msg_iovlen = user_msg.msg_iovlen;
1481 msg64.msg_iov = user_msg.msg_iov;
1482 msg64.msg_namelen = user_msg.msg_namelen;
1483 msg64.msg_name = user_msg.msg_name;
1484 } else {
1485 msg32.msg_flags = user_msg.msg_flags;
1486 msg32.msg_controllen = user_msg.msg_controllen;
1487 msg32.msg_control = user_msg.msg_control;
1488 msg32.msg_iovlen = user_msg.msg_iovlen;
1489 msg32.msg_iov = user_msg.msg_iov;
1490 msg32.msg_namelen = user_msg.msg_namelen;
1491 msg32.msg_name = user_msg.msg_name;
91447636
A
1492 }
1493 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1c79356b
A
1494 }
1495done:
91447636
A
1496 if (auio != NULL) {
1497 uio_free(auio);
1498 }
2d21ac55 1499 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
1500 return (error);
1501}
1502
2d21ac55
A
1503/*
1504 * Returns: 0 Success
1505 * EBADF
1506 * file_socket:ENOTSOCK
1507 * file_socket:EBADF
1508 * soshutdown:EINVAL
1509 * soshutdown:ENOTCONN
1510 * soshutdown:EADDRNOTAVAIL[TCP]
1511 * soshutdown:ENOBUFS[TCP]
1512 * soshutdown:EMSGSIZE[TCP]
1513 * soshutdown:EHOSTUNREACH[TCP]
1514 * soshutdown:ENETUNREACH[TCP]
1515 * soshutdown:ENETDOWN[TCP]
1516 * soshutdown:ENOMEM[TCP]
1517 * soshutdown:EACCES[TCP]
1518 * soshutdown:EMSGSIZE[TCP]
1519 * soshutdown:ENOBUFS[TCP]
1520 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1521 * soshutdown:??? [other protocol families]
1522 */
1c79356b
A
1523/* ARGSUSED */
1524int
2d21ac55 1525shutdown(__unused struct proc *p, struct shutdown_args *uap,
b0d623f7 1526 __unused int32_t *retval)
1c79356b 1527{
2d21ac55 1528 struct socket *so;
1c79356b
A
1529 int error;
1530
55e303ae 1531 AUDIT_ARG(fd, uap->s);
91447636 1532 error = file_socket(uap->s, &so);
1c79356b
A
1533 if (error)
1534 return (error);
91447636
A
1535 if (so == NULL) {
1536 error = EBADF;
1537 goto out;
1538 }
1539 error = soshutdown((struct socket *)so, uap->how);
1540out:
1541 file_drop(uap->s);
2d21ac55 1542 return (error);
1c79356b
A
1543}
1544
2d21ac55
A
1545/*
1546 * Returns: 0 Success
1547 * EFAULT
1548 * EINVAL
1549 * EACCES Mandatory Access Control failure
1550 * file_socket:ENOTSOCK
1551 * file_socket:EBADF
1552 * sosetopt:EINVAL
1553 * sosetopt:ENOPROTOOPT
1554 * sosetopt:ENOBUFS
1555 * sosetopt:EDOM
1556 * sosetopt:EFAULT
1557 * sosetopt:EOPNOTSUPP[AF_UNIX]
1558 * sosetopt:???
1559 */
1c79356b
A
1560/* ARGSUSED */
1561int
2d21ac55 1562setsockopt(struct proc *p, struct setsockopt_args *uap,
b0d623f7 1563 __unused int32_t *retval)
1c79356b 1564{
2d21ac55 1565 struct socket *so;
1c79356b
A
1566 struct sockopt sopt;
1567 int error;
1568
55e303ae 1569 AUDIT_ARG(fd, uap->s);
1c79356b
A
1570 if (uap->val == 0 && uap->valsize != 0)
1571 return (EFAULT);
2d21ac55 1572 /* No bounds checking on size (it's unsigned) */
1c79356b 1573
91447636 1574 error = file_socket(uap->s, &so);
1c79356b
A
1575 if (error)
1576 return (error);
1577
1578 sopt.sopt_dir = SOPT_SET;
1579 sopt.sopt_level = uap->level;
1580 sopt.sopt_name = uap->name;
1581 sopt.sopt_val = uap->val;
1582 sopt.sopt_valsize = uap->valsize;
1583 sopt.sopt_p = p;
1584
91447636
A
1585 if (so == NULL) {
1586 error = EINVAL;
1587 goto out;
1588 }
2d21ac55
A
1589#if CONFIG_MACF_SOCKET_SUBSET
1590 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
1591 &sopt)) != 0)
1592 goto out;
1593#endif /* MAC_SOCKET_SUBSET */
91447636
A
1594 error = sosetopt(so, &sopt);
1595out:
1596 file_drop(uap->s);
2d21ac55 1597 return (error);
1c79356b
A
1598}
1599
1600
1601
2d21ac55
A
1602/*
1603 * Returns: 0 Success
1604 * EINVAL
1605 * EBADF
1606 * EACCES Mandatory Access Control failure
1607 * copyin:EFAULT
1608 * copyout:EFAULT
1609 * file_socket:ENOTSOCK
1610 * file_socket:EBADF
1611 * sogetopt:???
1612 */
1c79356b 1613int
2d21ac55 1614getsockopt(struct proc *p, struct getsockopt_args *uap,
b0d623f7 1615 __unused int32_t *retval)
1c79356b 1616{
91447636
A
1617 int error;
1618 socklen_t valsize;
1619 struct sockopt sopt;
2d21ac55 1620 struct socket *so;
1c79356b 1621
91447636 1622 error = file_socket(uap->s, &so);
1c79356b
A
1623 if (error)
1624 return (error);
1625 if (uap->val) {
2d21ac55
A
1626 error = copyin(uap->avalsize, (caddr_t)&valsize,
1627 sizeof (valsize));
1c79356b 1628 if (error)
91447636 1629 goto out;
2d21ac55
A
1630 /* No bounds checking on size (it's unsigned) */
1631 } else {
1c79356b 1632 valsize = 0;
2d21ac55 1633 }
1c79356b
A
1634 sopt.sopt_dir = SOPT_GET;
1635 sopt.sopt_level = uap->level;
1636 sopt.sopt_name = uap->name;
1637 sopt.sopt_val = uap->val;
1638 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1639 sopt.sopt_p = p;
1640
91447636
A
1641 if (so == NULL) {
1642 error = EBADF;
1643 goto out;
1644 }
2d21ac55
A
1645#if CONFIG_MACF_SOCKET_SUBSET
1646 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
1647 &sopt)) != 0)
1648 goto out;
1649#endif /* MAC_SOCKET_SUBSET */
91447636 1650 error = sogetopt((struct socket *)so, &sopt);
1c79356b
A
1651 if (error == 0) {
1652 valsize = sopt.sopt_valsize;
2d21ac55
A
1653 error = copyout((caddr_t)&valsize, uap->avalsize,
1654 sizeof (valsize));
1c79356b 1655 }
91447636
A
1656out:
1657 file_drop(uap->s);
1c79356b
A
1658 return (error);
1659}
1660
1661
1662/*
1663 * Get socket name.
2d21ac55
A
1664 *
1665 * Returns: 0 Success
1666 * EBADF
1667 * file_socket:ENOTSOCK
1668 * file_socket:EBADF
1669 * copyin:EFAULT
1670 * copyout:EFAULT
1671 * <pru_sockaddr>:ENOBUFS[TCP]
1672 * <pru_sockaddr>:ECONNRESET[TCP]
1673 * <pru_sockaddr>:EINVAL[AF_UNIX]
1674 * <sf_getsockname>:???
1c79356b
A
1675 */
1676/* ARGSUSED */
2d21ac55
A
1677int
1678getsockname(__unused struct proc *p, struct getsockname_args *uap,
b0d623f7 1679 __unused int32_t *retval)
1c79356b 1680{
91447636 1681 struct socket *so;
1c79356b 1682 struct sockaddr *sa;
91447636 1683 socklen_t len;
2d21ac55 1684 socklen_t sa_len;
1c79356b
A
1685 int error;
1686
91447636 1687 error = file_socket(uap->fdes, &so);
1c79356b
A
1688 if (error)
1689 return (error);
2d21ac55 1690 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1c79356b 1691 if (error)
91447636
A
1692 goto out;
1693 if (so == NULL) {
1694 error = EBADF;
1695 goto out;
1696 }
1c79356b 1697 sa = 0;
91447636 1698 socket_lock(so, 1);
1c79356b 1699 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2d21ac55 1700 if (error == 0) {
91447636
A
1701 struct socket_filter_entry *filter;
1702 int filtered = 0;
1703 for (filter = so->so_filt; filter && error == 0;
2d21ac55 1704 filter = filter->sfe_next_onsocket) {
91447636
A
1705 if (filter->sfe_filter->sf_filter.sf_getsockname) {
1706 if (!filtered) {
1707 filtered = 1;
1708 sflt_use(so);
1709 socket_unlock(so, 0);
1710 }
2d21ac55
A
1711 error = filter->sfe_filter->sf_filter.
1712 sf_getsockname(filter->sfe_cookie, so, &sa);
91447636
A
1713 }
1714 }
2d21ac55 1715
91447636
A
1716 if (error == EJUSTRETURN)
1717 error = 0;
2d21ac55 1718
91447636
A
1719 if (filtered) {
1720 socket_lock(so, 0);
1721 sflt_unuse(so);
1722 }
1723 }
1724 socket_unlock(so, 1);
1c79356b
A
1725 if (error)
1726 goto bad;
1727 if (sa == 0) {
1728 len = 0;
1729 goto gotnothing;
1730 }
1731
2d21ac55
A
1732 sa_len = sa->sa_len;
1733 len = MIN(len, sa_len);
91447636 1734 error = copyout((caddr_t)sa, uap->asa, len);
2d21ac55
A
1735 if (error)
1736 goto bad;
1737 /* return the actual, untruncated address length */
1738 len = sa_len;
1c79356b 1739gotnothing:
2d21ac55 1740 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
1741bad:
1742 if (sa)
1743 FREE(sa, M_SONAME);
91447636
A
1744out:
1745 file_drop(uap->fdes);
1c79356b
A
1746 return (error);
1747}
1748
1c79356b
A
1749/*
1750 * Get name of peer for connected socket.
2d21ac55
A
1751 *
1752 * Returns: 0 Success
1753 * EBADF
1754 * EINVAL
1755 * ENOTCONN
1756 * file_socket:ENOTSOCK
1757 * file_socket:EBADF
1758 * copyin:EFAULT
1759 * copyout:EFAULT
1760 * <pru_peeraddr>:???
1761 * <sf_getpeername>:???
1c79356b
A
1762 */
1763/* ARGSUSED */
1764int
2d21ac55 1765getpeername(__unused struct proc *p, struct getpeername_args *uap,
b0d623f7 1766 __unused int32_t *retval)
1c79356b 1767{
91447636 1768 struct socket *so;
1c79356b 1769 struct sockaddr *sa;
91447636 1770 socklen_t len;
2d21ac55 1771 socklen_t sa_len;
1c79356b
A
1772 int error;
1773
91447636 1774 error = file_socket(uap->fdes, &so);
1c79356b
A
1775 if (error)
1776 return (error);
91447636
A
1777 if (so == NULL) {
1778 error = EBADF;
1779 goto out;
1780 }
1781
1782 socket_lock(so, 1);
1783
2d21ac55
A
1784 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1785 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1786 /* the socket has been shutdown, no more getpeername's */
1787 socket_unlock(so, 1);
1788 error = EINVAL;
1789 goto out;
1790 }
1791
91447636
A
1792 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1793 socket_unlock(so, 1);
1794 error = ENOTCONN;
1795 goto out;
1796 }
2d21ac55 1797 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
91447636
A
1798 if (error) {
1799 socket_unlock(so, 1);
1800 goto out;
1801 }
1c79356b
A
1802 sa = 0;
1803 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2d21ac55 1804 if (error == 0) {
91447636
A
1805 struct socket_filter_entry *filter;
1806 int filtered = 0;
1807 for (filter = so->so_filt; filter && error == 0;
2d21ac55 1808 filter = filter->sfe_next_onsocket) {
91447636
A
1809 if (filter->sfe_filter->sf_filter.sf_getpeername) {
1810 if (!filtered) {
1811 filtered = 1;
1812 sflt_use(so);
1813 socket_unlock(so, 0);
1814 }
2d21ac55
A
1815 error = filter->sfe_filter->sf_filter.
1816 sf_getpeername(filter->sfe_cookie, so, &sa);
91447636
A
1817 }
1818 }
2d21ac55 1819
91447636
A
1820 if (error == EJUSTRETURN)
1821 error = 0;
2d21ac55 1822
91447636
A
1823 if (filtered) {
1824 socket_lock(so, 0);
1825 sflt_unuse(so);
1826 }
1827 }
1828 socket_unlock(so, 1);
1c79356b
A
1829 if (error)
1830 goto bad;
1831 if (sa == 0) {
1832 len = 0;
1833 goto gotnothing;
1834 }
2d21ac55
A
1835 sa_len = sa->sa_len;
1836 len = MIN(len, sa_len);
91447636 1837 error = copyout(sa, uap->asa, len);
1c79356b
A
1838 if (error)
1839 goto bad;
2d21ac55
A
1840 /* return the actual, untruncated address length */
1841 len = sa_len;
1c79356b 1842gotnothing:
2d21ac55 1843 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
1844bad:
1845 if (sa) FREE(sa, M_SONAME);
91447636
A
1846out:
1847 file_drop(uap->fdes);
1c79356b
A
1848 return (error);
1849}
1850
1851int
2d21ac55 1852sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1c79356b 1853{
2d21ac55
A
1854 struct sockaddr *sa;
1855 struct mbuf *m;
1c79356b
A
1856 int error;
1857
b0d623f7
A
1858 int alloc_buflen = buflen;
1859#ifdef __LP64__
1860 /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */
1861 if(type == MT_CONTROL)
1862 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr);
1863#endif
1864 if ((u_int)alloc_buflen > MLEN) {
1865 if (type == MT_SONAME && (u_int)alloc_buflen <= 112)
1866 alloc_buflen = MLEN; /* unix domain compat. hack */
1867 else if ((u_int)alloc_buflen > MCLBYTES)
91447636 1868 return (EINVAL);
1c79356b
A
1869 }
1870 m = m_get(M_WAIT, type);
1871 if (m == NULL)
1872 return (ENOBUFS);
b0d623f7 1873 if ((u_int)alloc_buflen > MLEN) {
91447636
A
1874 MCLGET(m, M_WAIT);
1875 if ((m->m_flags & M_EXT) == 0) {
1876 m_free(m);
2d21ac55 1877 return (ENOBUFS);
91447636
A
1878 }
1879 }
b0d623f7
A
1880 /* K64: We still copyin the original buflen because it gets expanded later
1881 * and we lie about the size of the mbuf because it only affects unp_* functions
1882 */
1c79356b 1883 m->m_len = buflen;
91447636 1884 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2d21ac55 1885 if (error) {
1c79356b 1886 (void) m_free(m);
2d21ac55 1887 } else {
1c79356b
A
1888 *mp = m;
1889 if (type == MT_SONAME) {
1890 sa = mtod(m, struct sockaddr *);
1c79356b
A
1891 sa->sa_len = buflen;
1892 }
1893 }
1894 return (error);
1895}
1896
91447636
A
1897/*
1898 * Given a user_addr_t of length len, allocate and fill out a *sa.
2d21ac55
A
1899 *
1900 * Returns: 0 Success
1901 * ENAMETOOLONG Filename too long
1902 * EINVAL Invalid argument
1903 * ENOMEM Not enough space
1904 * copyin:EFAULT Bad address
91447636 1905 */
2d21ac55
A
1906static int
1907getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
4a3eedf9 1908 size_t len, boolean_t translate_unspec)
1c79356b
A
1909{
1910 struct sockaddr *sa;
1911 int error;
1912
1913 if (len > SOCK_MAXADDRLEN)
2d21ac55 1914 return (ENAMETOOLONG);
1c79356b 1915
2d21ac55
A
1916 if (len < offsetof(struct sockaddr, sa_data[0]))
1917 return (EINVAL);
1c79356b 1918
2d21ac55 1919 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
91447636 1920 if (sa == NULL) {
2d21ac55 1921 return (ENOMEM);
91447636
A
1922 }
1923 error = copyin(uaddr, (caddr_t)sa, len);
1c79356b
A
1924 if (error) {
1925 FREE(sa, M_SONAME);
1926 } else {
2d21ac55
A
1927 /*
1928 * Force sa_family to AF_INET on AF_INET sockets to handle
1929 * legacy applications that use AF_UNSPEC (0). On all other
1930 * sockets we leave it unchanged and let the lower layer
1931 * handle it.
1932 */
4a3eedf9 1933 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2d21ac55
A
1934 INP_CHECK_SOCKAF(so, AF_INET) &&
1935 len == sizeof (struct sockaddr_in))
1936 sa->sa_family = AF_INET;
1937
1c79356b
A
1938 sa->sa_len = len;
1939 *namp = sa;
1940 }
2d21ac55 1941 return (error);
1c79356b
A
1942}
1943
2d21ac55
A
1944static int
1945getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
4a3eedf9 1946 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1c79356b 1947{
2d21ac55
A
1948 int error;
1949
1950 if (ss == NULL || uaddr == USER_ADDR_NULL ||
1951 len < offsetof(struct sockaddr, sa_data[0]))
1952 return (EINVAL);
1953
1954 /*
1955 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
1956 * so the check here is inclusive.
1957 */
1958 if (len > sizeof (*ss))
1959 return (ENAMETOOLONG);
1c79356b 1960
2d21ac55
A
1961 bzero(ss, sizeof (*ss));
1962 error = copyin(uaddr, (caddr_t)ss, len);
1963 if (error == 0) {
1964 /*
1965 * Force sa_family to AF_INET on AF_INET sockets to handle
1966 * legacy applications that use AF_UNSPEC (0). On all other
1967 * sockets we leave it unchanged and let the lower layer
1968 * handle it.
1969 */
4a3eedf9 1970 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2d21ac55
A
1971 INP_CHECK_SOCKAF(so, AF_INET) &&
1972 len == sizeof (struct sockaddr_in))
1973 ss->ss_family = AF_INET;
91447636 1974
2d21ac55 1975 ss->ss_len = len;
1c79356b 1976 }
2d21ac55 1977 return (error);
1c79356b
A
1978}
1979
2d21ac55
A
1980#if SENDFILE
1981
1982SYSCTL_DECL(_kern_ipc);
1983
1984#define SFUIOBUFS 64
1985static int sendfileuiobufs = SFUIOBUFS;
1986SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW, &sendfileuiobufs,
1987 0, "");
1988
1989/* Macros to compute the number of mbufs needed depending on cluster size */
1990#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
1991#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
1992
1993/* Upper send limit in bytes (sendfileuiobufs * PAGESIZE) */
1994#define SENDFILE_MAX_BYTES (sendfileuiobufs << PGSHIFT)
1995
1996/* Upper send limit in the number of mbuf clusters */
1997#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
1998#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
1999
2000size_t mbuf_pkt_maxlen(mbuf_t m);
2001
2002__private_extern__ size_t
2003mbuf_pkt_maxlen(mbuf_t m)
1c79356b 2004{
2d21ac55 2005 size_t maxlen = 0;
1c79356b 2006
2d21ac55
A
2007 while (m) {
2008 maxlen += mbuf_maxlen(m);
2009 m = mbuf_next(m);
1c79356b 2010 }
2d21ac55 2011 return (maxlen);
1c79356b
A
2012}
2013
1c79356b 2014static void
2d21ac55
A
2015alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
2016 struct mbuf **m, boolean_t jumbocl)
1c79356b 2017{
2d21ac55 2018 unsigned int needed;
1c79356b 2019
2d21ac55
A
2020 if (pktlen == 0)
2021 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
1c79356b 2022
2d21ac55
A
2023 /*
2024 * Try to allocate for the whole thing. Since we want full control
2025 * over the buffer size and be able to accept partial result, we can't
2026 * use mbuf_allocpacket(). The logic below is similar to sosend().
2027 */
2028 *m = NULL;
2029 if (pktlen > NBPG && jumbocl) {
2030 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
2031 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
2032 }
2033 if (*m == NULL) {
2034 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
2035 *m = m_getpackets_internal(&needed, 1, how, 0, NBPG);
2036 }
2037
2038 /*
2039 * Our previous attempt(s) at allocation had failed; the system
2040 * may be short on mbufs, and we want to block until they are
2041 * available. This time, ask just for 1 mbuf and don't return
2042 * until we get it.
2043 */
2044 if (*m == NULL) {
2045 needed = 1;
2046 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, NBPG);
1c79356b 2047 }
2d21ac55
A
2048 if (*m == NULL)
2049 panic("%s: blocking allocation returned NULL\n", __func__);
2050
2051 *maxchunks = needed;
1c79356b
A
2052}
2053
2054/*
2055 * sendfile(2).
2d21ac55
A
2056 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
2057 * struct sf_hdtr *hdtr, int flags)
1c79356b
A
2058 *
2059 * Send a file specified by 'fd' and starting at 'offset' to a socket
2d21ac55
A
2060 * specified by 's'. Send only '*nbytes' of the file or until EOF if
2061 * *nbytes == 0. Optionally add a header and/or trailer to the socket
2062 * output. If specified, write the total number of bytes sent into *nbytes.
1c79356b
A
2063 */
2064int
2d21ac55 2065sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
1c79356b 2066{
91447636 2067 struct fileproc *fp;
1c79356b 2068 struct vnode *vp;
1c79356b 2069 struct socket *so;
2d21ac55
A
2070 struct writev_nocancel_args nuap;
2071 user_ssize_t writev_retval;
2d21ac55 2072 struct user_sf_hdtr user_hdtr;
b0d623f7
A
2073 struct user32_sf_hdtr user32_hdtr;
2074 struct user64_sf_hdtr user64_hdtr;
2d21ac55
A
2075 off_t off, xfsize;
2076 off_t nbytes = 0, sbytes = 0;
2077 int error = 0;
2078 size_t sizeof_hdtr;
2d21ac55
A
2079 off_t file_size;
2080 struct vfs_context context = *vfs_context_current();
2081
2082 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
2083 0, 0, 0, 0);
b0d623f7
A
2084
2085 AUDIT_ARG(fd, uap->fd);
2086 AUDIT_ARG(value32, uap->s);
2087
1c79356b
A
2088 /*
2089 * Do argument checking. Must be a regular file in, stream
2090 * type and connected socket out, positive offset.
2091 */
2d21ac55 2092 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
1c79356b 2093 goto done;
2d21ac55
A
2094 }
2095 if ((fp->f_flag & FREAD) == 0) {
91447636
A
2096 error = EBADF;
2097 goto done1;
1c79356b 2098 }
2d21ac55
A
2099 if (vnode_isreg(vp) == 0) {
2100 error = ENOTSUP;
91447636 2101 goto done1;
1c79356b 2102 }
91447636 2103 error = file_socket(uap->s, &so);
2d21ac55 2104 if (error) {
91447636 2105 goto done1;
2d21ac55 2106 }
55e303ae
A
2107 if (so == NULL) {
2108 error = EBADF;
91447636 2109 goto done2;
55e303ae 2110 }
1c79356b
A
2111 if (so->so_type != SOCK_STREAM) {
2112 error = EINVAL;
2d21ac55 2113 goto done2;
1c79356b
A
2114 }
2115 if ((so->so_state & SS_ISCONNECTED) == 0) {
2116 error = ENOTCONN;
2d21ac55 2117 goto done2;
1c79356b
A
2118 }
2119 if (uap->offset < 0) {
2120 error = EINVAL;
2d21ac55 2121 goto done2;
1c79356b 2122 }
2d21ac55
A
2123 if (uap->nbytes == USER_ADDR_NULL) {
2124 error = EINVAL;
2125 goto done2;
2126 }
2127 if (uap->flags != 0) {
2128 error = EINVAL;
2129 goto done2;
2130 }
2131
2132 context.vc_ucred = fp->f_fglob->fg_cred;
2133
2134#if CONFIG_MACF_SOCKET_SUBSET
2135 /* JMM - fetch connected sockaddr? */
2136 error = mac_socket_check_send(context.vc_ucred, so, NULL);
2137 if (error)
2138 goto done2;
2139#endif
2140
2141 /*
2142 * Get number of bytes to send
2143 * Should it applies to size of header and trailer?
2144 * JMM - error handling?
2145 */
2146 copyin(uap->nbytes, &nbytes, sizeof (off_t));
1c79356b
A
2147
2148 /*
2149 * If specified, get the pointer to the sf_hdtr struct for
2150 * any headers/trailers.
2151 */
2d21ac55
A
2152 if (uap->hdtr != USER_ADDR_NULL) {
2153 caddr_t hdtrp;
2154
2155 bzero(&user_hdtr, sizeof (user_hdtr));
2156 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
2157 hdtrp = (caddr_t)&user64_hdtr;
2158 sizeof_hdtr = sizeof (user64_hdtr);
2d21ac55 2159 } else {
b0d623f7
A
2160 hdtrp = (caddr_t)&user32_hdtr;
2161 sizeof_hdtr = sizeof (user32_hdtr);
2d21ac55
A
2162 }
2163 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
1c79356b 2164 if (error)
2d21ac55 2165 goto done2;
b0d623f7
A
2166 if (IS_64BIT_PROCESS(p)) {
2167 user_hdtr.headers = user64_hdtr.headers;
2168 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
2169 user_hdtr.trailers = user64_hdtr.trailers;
2170 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
2171 } else {
2172 user_hdtr.headers = user32_hdtr.headers;
2173 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
2174 user_hdtr.trailers = user32_hdtr.trailers;
2175 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2d21ac55
A
2176 }
2177
1c79356b
A
2178 /*
2179 * Send any headers. Wimp out and use writev(2).
2180 */
2d21ac55
A
2181 if (user_hdtr.headers != USER_ADDR_NULL) {
2182 bzero(&nuap, sizeof (struct writev_args));
1c79356b 2183 nuap.fd = uap->s;
2d21ac55
A
2184 nuap.iovp = user_hdtr.headers;
2185 nuap.iovcnt = user_hdtr.hdr_cnt;
2186 error = writev_nocancel(p, &nuap, &writev_retval);
1c79356b 2187 if (error)
2d21ac55
A
2188 goto done2;
2189 sbytes += writev_retval;
1c79356b
A
2190 }
2191 }
2192
2193 /*
2d21ac55
A
2194 * Get the file size for 2 reasons:
2195 * 1. We don't want to allocate more mbufs than necessary
2196 * 2. We don't want to read past the end of file
1c79356b 2197 */
2d21ac55
A
2198 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0)
2199 goto done2;
1c79356b
A
2200
2201 /*
2d21ac55
A
2202 * Simply read file data into a chain of mbufs that used with scatter
2203 * gather reads. We're not (yet?) setup to use zero copy external
2204 * mbufs that point to the file pages.
1c79356b 2205 */
2d21ac55
A
2206 socket_lock(so, 1);
2207 error = sblock(&so->so_snd, M_WAIT);
2208 if (error) {
2209 socket_unlock(so, 1);
2210 goto done2;
2211 }
1c79356b 2212 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2d21ac55
A
2213 mbuf_t m0 = NULL, m;
2214 unsigned int nbufs = sendfileuiobufs, i;
2215 uio_t auio;
2216 char uio_buf[UIO_SIZEOF(sendfileuiobufs)]; /* 1 KB !!! */
2217 size_t uiolen;
2218 user_ssize_t rlen;
2219 off_t pgoff;
2220 size_t pktlen;
2221 boolean_t jumbocl;
1c79356b 2222
1c79356b 2223 /*
2d21ac55
A
2224 * Calculate the amount to transfer.
2225 * Align to round number of pages.
2226 * Not to exceed send socket buffer,
1c79356b
A
2227 * the EOF, or the passed in nbytes.
2228 */
2d21ac55
A
2229 xfsize = sbspace(&so->so_snd);
2230
2231 if (xfsize <= 0) {
2232 if (so->so_state & SS_CANTSENDMORE) {
2233 error = EPIPE;
2234 goto done3;
2235 } else if ((so->so_state & SS_NBIO)) {
2236 error = EAGAIN;
2237 goto done3;
2238 } else {
2239 xfsize = PAGE_SIZE;
2240 }
2241 }
2242
2243 if (xfsize > SENDFILE_MAX_BYTES)
2244 xfsize = SENDFILE_MAX_BYTES;
2245 else if (xfsize > PAGE_SIZE)
2246 xfsize = trunc_page(xfsize);
2247 pgoff = off & PAGE_MASK_64;
2248 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
1c79356b 2249 xfsize = PAGE_SIZE_64 - pgoff;
2d21ac55
A
2250 if (nbytes && xfsize > (nbytes - sbytes))
2251 xfsize = nbytes - sbytes;
2252 if (xfsize <= 0)
2253 break;
2254 if (off + xfsize > file_size)
2255 xfsize = file_size - off;
1c79356b
A
2256 if (xfsize <= 0)
2257 break;
2d21ac55 2258
1c79356b 2259 /*
2d21ac55
A
2260 * Attempt to use larger than system page-size clusters for
2261 * large writes only if there is a jumbo cluster pool and
2262 * if the socket is marked accordingly.
1c79356b 2263 */
2d21ac55
A
2264 jumbocl = sosendjcl && njcl > 0 &&
2265 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
2266
2267 socket_unlock(so, 0);
2268 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
2269 pktlen = mbuf_pkt_maxlen(m0);
b0d623f7 2270 if (pktlen < (size_t)xfsize)
2d21ac55
A
2271 xfsize = pktlen;
2272
2273 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
2274 UIO_READ, &uio_buf[0], sizeof (uio_buf));
2275 if (auio == NULL) {
2276 //printf("sendfile: uio_createwithbuffer failed\n");
2277 mbuf_freem(m0);
2278 error = ENXIO;
2279 socket_lock(so, 0);
2280 goto done3;
1c79356b 2281 }
1c79356b 2282
2d21ac55 2283 for (i = 0, m = m0, uiolen = 0;
b0d623f7 2284 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2d21ac55
A
2285 i++, m = mbuf_next(m)) {
2286 size_t mlen = mbuf_maxlen(m);
2287
b0d623f7 2288 if (mlen + uiolen > (size_t)xfsize)
2d21ac55
A
2289 mlen = xfsize - uiolen;
2290 mbuf_setlen(m, mlen);
2291 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
2292 mlen);
2293 uiolen += mlen;
2294 }
2295
2296 if (xfsize != uio_resid(auio))
2297 printf("sendfile: xfsize: %lld != uio_resid(auio): "
2298 "%lld\n", xfsize, uio_resid(auio));
2299
2300 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
2301 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2302 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2303 error = fo_read(fp, auio, FOF_OFFSET, &context);
2304 socket_lock(so, 0);
2305 if (error != 0) {
2306 if (uio_resid(auio) != xfsize && (error == ERESTART ||
2307 error == EINTR || error == EWOULDBLOCK)) {
2308 error = 0;
2309 } else {
2310 mbuf_freem(m0);
2311 goto done3;
1c79356b 2312 }
1c79356b 2313 }
2d21ac55
A
2314 xfsize -= uio_resid(auio);
2315 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
2316 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2317 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2318
2319 if (xfsize == 0) {
2320 //printf("sendfile: fo_read 0 bytes, EOF\n");
2321 break;
91447636 2322 }
2d21ac55
A
2323 if (xfsize + off > file_size)
2324 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
2325 "%lld\n", xfsize, off, file_size);
2326 for (i = 0, m = m0, rlen = 0;
2327 i < nbufs && m != NULL && rlen < xfsize;
2328 i++, m = mbuf_next(m)) {
2329 size_t mlen = mbuf_maxlen(m);
2330
b0d623f7 2331 if (rlen + mlen > (size_t)xfsize)
2d21ac55
A
2332 mlen = xfsize - rlen;
2333 mbuf_setlen(m, mlen);
2334
2335 rlen += mlen;
2336 }
2337 mbuf_pkthdr_setlen(m0, xfsize);
2338
1c79356b
A
2339retry_space:
2340 /*
2341 * Make sure that the socket is still able to take more data.
2342 * CANTSENDMORE being true usually means that the connection
2343 * was closed. so_error is true when an error was sensed after
2344 * a previous send.
2345 * The state is checked after the page mapping and buffer
2346 * allocation above since those operations may block and make
2347 * any socket checks stale. From this point forward, nothing
2348 * blocks before the pru_send (or more accurately, any blocking
2349 * results in a loop back to here to re-check).
2350 */
2351 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2352 if (so->so_state & SS_CANTSENDMORE) {
2353 error = EPIPE;
2354 } else {
2355 error = so->so_error;
2356 so->so_error = 0;
2357 }
2d21ac55
A
2358 m_freem(m0);
2359 goto done3;
1c79356b
A
2360 }
2361 /*
2362 * Wait for socket space to become available. We do this just
2363 * after checking the connection state above in order to avoid
2364 * a race condition with sbwait().
2365 */
2d21ac55 2366 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
1c79356b 2367 if (so->so_state & SS_NBIO) {
2d21ac55 2368 m_freem(m0);
1c79356b 2369 error = EAGAIN;
2d21ac55 2370 goto done3;
1c79356b 2371 }
2d21ac55
A
2372 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
2373 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
1c79356b 2374 error = sbwait(&so->so_snd);
2d21ac55
A
2375 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
2376 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
1c79356b
A
2377 /*
2378 * An error from sbwait usually indicates that we've
2379 * been interrupted by a signal. If we've sent anything
2380 * then return bytes sent, otherwise return the error.
2381 */
2382 if (error) {
2d21ac55
A
2383 m_freem(m0);
2384 goto done3;
1c79356b
A
2385 }
2386 goto retry_space;
2387 }
2d21ac55
A
2388 {
2389 /*
2390 * Socket filter processing
2391 */
2392 struct socket_filter_entry *filter;
2393 int filtered = 0;
2394 struct mbuf *control = NULL;
2395 boolean_t recursive = (so->so_send_filt_thread != NULL);
2396
2397 error = 0;
2398 for (filter = so->so_filt; filter && (error == 0);
2399 filter = filter->sfe_next_onsocket) {
2400 if (filter->sfe_filter->sf_filter.sf_data_out) {
2401 if (filtered == 0) {
2402 filtered = 1;
2403 so->so_send_filt_thread =
2404 current_thread();
2405 sflt_use(so);
2406 socket_unlock(so, 0);
2407 }
2408 error = filter->sfe_filter->sf_filter.
2409 sf_data_out(filter->sfe_cookie, so,
2410 NULL, &m0, &control, 0);
2411 }
2412 }
2413
2414 if (filtered) {
2415 /*
2416 * At this point, we've run at least one filter.
2417 * The socket is unlocked as is the socket
2418 * buffer. Clear the recorded filter thread
2419 * only when we are outside of a filter's
2420 * context. This allows for a filter to issue
2421 * multiple inject calls from its sf_data_out
2422 * callback routine.
2423 */
2424 socket_lock(so, 0);
2425 sflt_unuse(so);
2426 if (!recursive)
2427 so->so_send_filt_thread = 0;
2428 if (error) {
2429 if (error == EJUSTRETURN) {
2430 error = 0;
2431 continue;
2432 }
2433 goto done3;
2434 }
2435 }
2436 /*
2437 * End Socket filter processing
2438 */
2439 }
2440 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2441 uap->s, 0, 0, 0, 0);
2442 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
2443 0, 0, p);
2444 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2445 uap->s, 0, 0, 0, 0);
1c79356b 2446 if (error) {
2d21ac55 2447 goto done3;
1c79356b
A
2448 }
2449 }
2d21ac55 2450 sbunlock(&so->so_snd, 0); /* will unlock socket */
1c79356b
A
2451 /*
2452 * Send trailers. Wimp out and use writev(2).
2453 */
2d21ac55
A
2454 if (uap->hdtr != USER_ADDR_NULL &&
2455 user_hdtr.trailers != USER_ADDR_NULL) {
2456 bzero(&nuap, sizeof (struct writev_args));
2457 nuap.fd = uap->s;
2458 nuap.iovp = user_hdtr.trailers;
2459 nuap.iovcnt = user_hdtr.trl_cnt;
2460 error = writev_nocancel(p, &nuap, &writev_retval);
2461 if (error)
2462 goto done2;
2463 sbytes += writev_retval;
1c79356b 2464 }
91447636
A
2465done2:
2466 file_drop(uap->s);
2467done1:
2468 file_drop(uap->fd);
1c79356b 2469done:
2d21ac55 2470 if (uap->nbytes != USER_ADDR_NULL) {
91447636 2471 /* XXX this appears bogus for some early failure conditions */
2d21ac55 2472 copyout(&sbytes, uap->nbytes, sizeof (off_t));
1c79356b 2473 }
2d21ac55
A
2474 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
2475 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
2476 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
1c79356b 2477 return (error);
91447636 2478done3:
2d21ac55 2479 sbunlock(&so->so_snd, 0); /* will unlock socket */
91447636 2480 goto done2;
1c79356b
A
2481}
2482
2d21ac55
A
2483
2484#endif /* SENDFILE */