]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_syscalls.c
xnu-1228.15.4.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
2d21ac55 33 * Copyright (c) 1998, David Greenman. All rights reserved.
1c79356b
A
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
2d21ac55
A
65/*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
1c79356b
A
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/filedesc.h>
91447636
A
75#include <sys/proc_internal.h>
76#include <sys/file_internal.h>
2d21ac55 77#include <sys/vnode_internal.h>
1c79356b
A
78#include <sys/malloc.h>
79#include <sys/mbuf.h>
91447636
A
80#include <kern/lock.h>
81#include <sys/domain.h>
1c79356b 82#include <sys/protosw.h>
91447636 83#include <sys/signalvar.h>
1c79356b
A
84#include <sys/socket.h>
85#include <sys/socketvar.h>
1c79356b 86#include <sys/kernel.h>
91447636 87#include <sys/uio_internal.h>
2d21ac55 88#include <sys/kauth.h>
e5568f75
A
89
90#include <bsm/audit_kernel.h>
1c79356b
A
91
92#include <sys/kdebug.h>
91447636 93#include <sys/sysproto.h>
2d21ac55
A
94#include <netinet/in.h>
95#include <net/route.h>
96#include <netinet/in_pcb.h>
97
98#if CONFIG_MACF_SOCKET_SUBSET
99#include <security/mac_framework.h>
100#endif /* MAC_SOCKET_SUBSET */
101
102#define f_flag f_fglob->fg_flag
103#define f_type f_fglob->fg_type
104#define f_msgcount f_fglob->fg_msgcount
105#define f_cred f_fglob->fg_cred
106#define f_ops f_fglob->fg_ops
107#define f_offset f_fglob->fg_offset
108#define f_data f_fglob->fg_data
109
110
111#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
112#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
113#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
114#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
115#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
116#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
117#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
118#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
119#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
120#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
121#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
122#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
123#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
124#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
125
126
127#define HACK_FOR_4056224 1
91447636
A
128#if HACK_FOR_4056224
129static pid_t last_pid_4056224 = 0;
130#endif /* HACK_FOR_4056224 */
1c79356b 131
2d21ac55
A
132/* TODO: should be in header file */
133int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
134
135static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int,
136 register_t *);
137static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
138 register_t *);
139static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
4a3eedf9 140 size_t, boolean_t);
2d21ac55 141static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
4a3eedf9 142 user_addr_t, size_t, boolean_t);
1c79356b 143#if SENDFILE
2d21ac55
A
144static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
145 boolean_t);
146#endif /* SENDFILE */
1c79356b
A
147
148/*
149 * System call interface to the socket abstraction.
150 */
1c79356b
A
151
152extern struct fileops socketops;
153
2d21ac55
A
154/*
155 * Returns: 0 Success
156 * EACCES Mandatory Access Control failure
157 * falloc:ENFILE
158 * falloc:EMFILE
159 * falloc:ENOMEM
160 * socreate:EAFNOSUPPORT
161 * socreate:EPROTOTYPE
162 * socreate:EPROTONOSUPPORT
163 * socreate:ENOBUFS
164 * socreate:ENOMEM
165 * socreate:EISCONN
166 * socreate:??? [other protocol families, IPSEC]
167 */
1c79356b 168int
2d21ac55 169socket(struct proc *p, struct socket_args *uap, register_t *retval)
1c79356b 170{
1c79356b 171 struct socket *so;
91447636 172 struct fileproc *fp;
1c79356b
A
173 int fd, error;
174
55e303ae 175 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
2d21ac55
A
176#if CONFIG_MACF_SOCKET_SUBSET
177 if ((error = mac_socket_check_create(kauth_cred_get(), uap->domain,
178 uap->type, uap->protocol)) != 0)
179 return (error);
180#endif /* MAC_SOCKET_SUBSET */
1c79356b 181
2d21ac55 182 error = falloc(p, &fp, &fd, vfs_context_current());
91447636 183 if (error) {
1c79356b 184 return (error);
91447636 185 }
1c79356b
A
186 fp->f_flag = FREAD|FWRITE;
187 fp->f_type = DTYPE_SOCKET;
188 fp->f_ops = &socketops;
91447636
A
189
190 error = socreate(uap->domain, &so, uap->type, uap->protocol);
191 if (error) {
192 fp_free(p, fd, fp);
1c79356b 193 } else {
2d21ac55
A
194 thread_t thread;
195 struct uthread *ut;
196
197 thread = current_thread();
198 ut = get_bsdthread_info(thread);
199
200 /* if this is a backgrounded thread then throttle all new sockets */
201 if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) {
202 so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
203 so->so_background_thread = thread;
204 }
1c79356b 205 fp->f_data = (caddr_t)so;
91447636
A
206
207 proc_fdlock(p);
6601e61a 208 procfdtbl_releasefd(p, fd, NULL);
2d21ac55 209
91447636
A
210 fp_drop(p, fd, fp, 1);
211 proc_fdunlock(p);
212
1c79356b
A
213 *retval = fd;
214 }
215 return (error);
216}
217
2d21ac55
A
218/*
219 * Returns: 0 Success
220 * EDESTADDRREQ Destination address required
221 * EBADF Bad file descriptor
222 * EACCES Mandatory Access Control failure
223 * file_socket:ENOTSOCK
224 * file_socket:EBADF
225 * getsockaddr:ENAMETOOLONG Filename too long
226 * getsockaddr:EINVAL Invalid argument
227 * getsockaddr:ENOMEM Not enough space
228 * getsockaddr:EFAULT Bad address
229 * sobind:???
230 */
1c79356b
A
231/* ARGSUSED */
232int
2d21ac55 233bind(__unused proc_t p, struct bind_args *uap, __unused register_t *retval)
1c79356b 234{
2d21ac55
A
235 struct sockaddr_storage ss;
236 struct sockaddr *sa = NULL;
91447636 237 struct socket *so;
2d21ac55 238 boolean_t want_free = TRUE;
1c79356b
A
239 int error;
240
55e303ae 241 AUDIT_ARG(fd, uap->s);
91447636 242 error = file_socket(uap->s, &so);
2d21ac55 243 if (error != 0)
1c79356b 244 return (error);
2d21ac55
A
245 if (so == NULL) {
246 error = EBADF;
247 goto out;
248 }
249 if (uap->name == USER_ADDR_NULL) {
250 error = EDESTADDRREQ;
251 goto out;
252 }
253 if (uap->namelen > sizeof (ss)) {
4a3eedf9 254 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
2d21ac55 255 } else {
4a3eedf9 256 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
2d21ac55
A
257 if (error == 0) {
258 sa = (struct sockaddr *)&ss;
259 want_free = FALSE;
260 }
261 }
262 if (error != 0)
91447636 263 goto out;
2d21ac55
A
264 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
265#if CONFIG_MACF_SOCKET_SUBSET
266 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
91447636 267 error = sobind(so, sa);
2d21ac55
A
268#else
269 error = sobind(so, sa);
270#endif /* MAC_SOCKET_SUBSET */
271 if (want_free)
272 FREE(sa, M_SONAME);
91447636
A
273out:
274 file_drop(uap->s);
1c79356b
A
275 return (error);
276}
277
2d21ac55
A
278/*
279 * Returns: 0 Success
280 * EBADF
281 * EACCES Mandatory Access Control failure
282 * file_socket:ENOTSOCK
283 * file_socket:EBADF
284 * solisten:EINVAL
285 * solisten:EOPNOTSUPP
286 * solisten:???
287 */
1c79356b 288int
2d21ac55
A
289listen(__unused struct proc *p, struct listen_args *uap,
290 __unused register_t *retval)
1c79356b 291{
1c79356b 292 int error;
2d21ac55 293 struct socket *so;
1c79356b 294
55e303ae 295 AUDIT_ARG(fd, uap->s);
91447636 296 error = file_socket(uap->s, &so);
1c79356b
A
297 if (error)
298 return (error);
91447636 299 if (so != NULL)
2d21ac55
A
300#if CONFIG_MACF_SOCKET_SUBSET
301 {
302 error = mac_socket_check_listen(kauth_cred_get(), so);
303 if (error == 0)
304 error = solisten(so, uap->backlog);
305 }
306#else
91447636 307 error = solisten(so, uap->backlog);
2d21ac55 308#endif /* MAC_SOCKET_SUBSET */
55e303ae 309 else
91447636 310 error = EBADF;
2d21ac55 311
91447636
A
312 file_drop(uap->s);
313 return (error);
1c79356b
A
314}
315
2d21ac55
A
316/*
317 * Returns: fp_getfsock:EBADF Bad file descriptor
318 * fp_getfsock:EOPNOTSUPP ...
319 * xlate => :ENOTSOCK Socket operation on non-socket
320 * :EFAULT Bad address on copyin/copyout
321 * :EBADF Bad file descriptor
322 * :EOPNOTSUPP Operation not supported on socket
323 * :EINVAL Invalid argument
324 * :EWOULDBLOCK Operation would block
325 * :ECONNABORTED Connection aborted
326 * :EINTR Interrupted function
327 * :EACCES Mandatory Access Control failure
328 * falloc_locked:ENFILE Too many files open in system
329 * falloc_locked::EMFILE Too many open files
330 * falloc_locked::ENOMEM Not enough space
331 * 0 Success
332 */
1c79356b 333int
2d21ac55
A
334accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
335 register_t *retval)
1c79356b 336{
91447636 337 struct fileproc *fp;
2d21ac55 338 struct sockaddr *sa = NULL;
91447636
A
339 socklen_t namelen;
340 int error;
341 struct socket *head, *so = NULL;
342 lck_mtx_t *mutex_held;
343 int fd = uap->s;
2d21ac55 344 int newfd;
1c79356b 345 short fflag; /* type must match fp->f_flag */
91447636 346 int dosocklock = 0;
1c79356b 347
2d21ac55
A
348 *retval = -1;
349
55e303ae 350 AUDIT_ARG(fd, uap->s);
2d21ac55 351
1c79356b 352 if (uap->name) {
91447636 353 error = copyin(uap->anamelen, (caddr_t)&namelen,
2d21ac55
A
354 sizeof (socklen_t));
355 if (error)
1c79356b
A
356 return (error);
357 }
91447636
A
358 error = fp_getfsock(p, fd, &fp, &head);
359 if (error) {
360 if (error == EOPNOTSUPP)
361 error = ENOTSOCK;
1c79356b 362 return (error);
91447636 363 }
55e303ae 364 if (head == NULL) {
91447636
A
365 error = EBADF;
366 goto out;
55e303ae 367 }
2d21ac55
A
368#if CONFIG_MACF_SOCKET_SUBSET
369 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
370 goto out;
371#endif /* MAC_SOCKET_SUBSET */
91447636
A
372
373 socket_lock(head, 1);
374
375 if (head->so_proto->pr_getlock != NULL) {
376 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
377 dosocklock = 1;
2d21ac55 378 } else {
91447636
A
379 mutex_held = head->so_proto->pr_domain->dom_mtx;
380 dosocklock = 0;
381 }
382
1c79356b 383 if ((head->so_options & SO_ACCEPTCONN) == 0) {
2d21ac55
A
384 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
385 error = EOPNOTSUPP;
386 } else {
387 /* POSIX: The socket is not accepting connections */
388 error = EINVAL;
389 }
91447636 390 socket_unlock(head, 1);
91447636 391 goto out;
1c79356b
A
392 }
393 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
91447636
A
394 socket_unlock(head, 1);
395 error = EWOULDBLOCK;
396 goto out;
1c79356b 397 }
2d21ac55 398 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
1c79356b
A
399 if (head->so_state & SS_CANTRCVMORE) {
400 head->so_error = ECONNABORTED;
401 break;
402 }
91447636 403 if (head->so_usecount < 1)
2d21ac55
A
404 panic("accept: head=%p refcount=%d\n", head,
405 head->so_usecount);
406 error = msleep((caddr_t)&head->so_timeo, mutex_held,
407 PSOCK | PCATCH, "accept", 0);
91447636 408 if (head->so_usecount < 1)
2d21ac55
A
409 panic("accept: 2 head=%p refcount=%d\n", head,
410 head->so_usecount);
91447636
A
411 if ((head->so_state & SS_DRAINING)) {
412 error = ECONNABORTED;
413 }
1c79356b 414 if (error) {
91447636
A
415 socket_unlock(head, 1);
416 goto out;
1c79356b
A
417 }
418 }
419 if (head->so_error) {
420 error = head->so_error;
421 head->so_error = 0;
91447636
A
422 socket_unlock(head, 1);
423 goto out;
1c79356b
A
424 }
425
426
427 /*
428 * At this point we know that there is at least one connection
429 * ready to be accepted. Remove it from the queue prior to
430 * allocating the file descriptor for it since falloc() may
431 * block allowing another process to accept the connection
432 * instead.
433 */
91447636 434 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
e3027f41 435 so = TAILQ_FIRST(&head->so_comp);
1c79356b
A
436 TAILQ_REMOVE(&head->so_comp, so, so_list);
437 head->so_qlen--;
2d21ac55
A
438 /* unlock head to avoid deadlock with select, keep a ref on head */
439 socket_unlock(head, 0);
440
441#if CONFIG_MACF_SOCKET_SUBSET
442 /*
443 * Pass the pre-accepted socket to the MAC framework. This is
444 * cheaper than allocating a file descriptor for the socket,
445 * calling the protocol accept callback, and possibly freeing
446 * the file descriptor should the MAC check fails.
447 */
448 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
449 so->so_state &= ~(SS_NOFDREF | SS_COMP);
450 so->so_head = NULL;
451 soclose(so);
452 /* Drop reference on listening socket */
453 sodereference(head);
454 goto out;
455 }
456#endif /* MAC_SOCKET_SUBSET */
457
458 /*
459 * Pass the pre-accepted socket to any interested socket filter(s).
460 * Upon failure, the socket would have been closed by the callee.
461 */
462 if (so->so_filt != NULL && (error = soacceptfilter(so)) != 0) {
463 /* Drop reference on listening socket */
464 sodereference(head);
465 /* Propagate socket filter's error code to the caller */
466 goto out;
467 }
468
1c79356b 469 fflag = fp->f_flag;
2d21ac55 470 error = falloc(p, &fp, &newfd, vfs_context_current());
1c79356b
A
471 if (error) {
472 /*
473 * Probably ran out of file descriptors. Put the
474 * unaccepted connection back onto the queue and
475 * do another wakeup so some other process might
476 * have a chance at it.
477 */
91447636 478 socket_lock(head, 0);
1c79356b
A
479 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
480 head->so_qlen++;
91447636
A
481 wakeup_one((caddr_t)&head->so_timeo);
482 socket_unlock(head, 1);
483 goto out;
2d21ac55 484 }
91447636 485 *retval = newfd;
1c79356b
A
486 fp->f_type = DTYPE_SOCKET;
487 fp->f_flag = fflag;
488 fp->f_ops = &socketops;
489 fp->f_data = (caddr_t)so;
91447636
A
490 socket_lock(head, 0);
491 if (dosocklock)
492 socket_lock(so, 1);
493 so->so_state &= ~SS_COMP;
494 so->so_head = NULL;
91447636
A
495 (void) soacceptlock(so, &sa, 0);
496 socket_unlock(head, 1);
2d21ac55 497 if (sa == NULL) {
1c79356b
A
498 namelen = 0;
499 if (uap->name)
500 goto gotnoname;
91447636
A
501 if (dosocklock)
502 socket_unlock(so, 1);
503 error = 0;
2d21ac55 504 goto releasefd;
1c79356b 505 }
2d21ac55
A
506 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
507
1c79356b 508 if (uap->name) {
2d21ac55
A
509 socklen_t sa_len;
510
511 /* save sa_len before it is destroyed */
512 sa_len = sa->sa_len;
513 namelen = MIN(namelen, sa_len);
91447636 514 error = copyout(sa, uap->name, namelen);
1c79356b 515 if (!error)
2d21ac55
A
516 /* return the actual, untruncated address length */
517 namelen = sa_len;
1c79356b 518gotnoname:
2d21ac55
A
519 error = copyout((caddr_t)&namelen, uap->anamelen,
520 sizeof (socklen_t));
1c79356b
A
521 }
522 FREE(sa, M_SONAME);
2d21ac55
A
523
524 /*
525 * If the socket has been marked as inactive by soacceptfilter(),
526 * disallow further operations on it. We explicitly call shutdown
527 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
528 * states are set for the socket. This would also flush out data
529 * hanging off the receive list of this socket.
530 */
531 if (so->so_flags & SOF_DEFUNCT) {
532 (void) soshutdownlock(so, SHUT_RD);
533 (void) soshutdownlock(so, SHUT_WR);
534 (void) sodisconnectlocked(so);
535 }
536
91447636
A
537 if (dosocklock)
538 socket_unlock(so, 1);
2d21ac55
A
539
540releasefd:
541 proc_fdlock(p);
542 procfdtbl_releasefd(p, newfd, NULL);
543 fp_drop(p, newfd, fp, 1);
544 proc_fdunlock(p);
545
91447636
A
546out:
547 file_drop(fd);
1c79356b
A
548 return (error);
549}
550
551int
91447636 552accept(struct proc *p, struct accept_args *uap, register_t *retval)
1c79356b 553{
2d21ac55
A
554 __pthread_testcancel(1);
555 return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval));
1c79356b
A
556}
557
2d21ac55
A
558/*
559 * Returns: 0 Success
560 * EBADF Bad file descriptor
561 * EALREADY Connection already in progress
562 * EINPROGRESS Operation in progress
563 * ECONNABORTED Connection aborted
564 * EINTR Interrupted function
565 * EACCES Mandatory Access Control failure
566 * file_socket:ENOTSOCK
567 * file_socket:EBADF
568 * getsockaddr:ENAMETOOLONG Filename too long
569 * getsockaddr:EINVAL Invalid argument
570 * getsockaddr:ENOMEM Not enough space
571 * getsockaddr:EFAULT Bad address
572 * soconnectlock:EOPNOTSUPP
573 * soconnectlock:EISCONN
574 * soconnectlock:??? [depends on protocol, filters]
575 * msleep:EINTR
576 *
577 * Imputed: so_error error may be set from so_error, which
578 * may have been set by soconnectlock.
579 */
580/* ARGSUSED */
1c79356b 581int
2d21ac55 582connect(struct proc *p, struct connect_args *uap, register_t *retval)
1c79356b 583{
2d21ac55
A
584 __pthread_testcancel(1);
585 return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval));
1c79356b 586}
1c79356b 587
1c79356b 588int
2d21ac55 589connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused register_t *retval)
1c79356b 590{
91447636 591 struct socket *so;
2d21ac55
A
592 struct sockaddr_storage ss;
593 struct sockaddr *sa = NULL;
91447636 594 lck_mtx_t *mutex_held;
2d21ac55 595 boolean_t want_free = TRUE;
91447636
A
596 int error;
597 int fd = uap->s;
4a3eedf9 598 boolean_t dgram;
1c79356b 599
55e303ae 600 AUDIT_ARG(fd, uap->s);
2d21ac55
A
601 error = file_socket(fd, &so);
602 if (error != 0)
1c79356b 603 return (error);
91447636
A
604 if (so == NULL) {
605 error = EBADF;
606 goto out;
607 }
608
4a3eedf9
A
609 /*
610 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
611 * if this is a datagram socket; translate for other types.
612 */
613 dgram = (so->so_type == SOCK_DGRAM);
614
2d21ac55
A
615 /* Get socket address now before we obtain socket lock */
616 if (uap->namelen > sizeof (ss)) {
4a3eedf9 617 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
2d21ac55 618 } else {
4a3eedf9 619 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
2d21ac55
A
620 if (error == 0) {
621 sa = (struct sockaddr *)&ss;
622 want_free = FALSE;
623 }
624 }
625 if (error != 0)
626 goto out;
627
628 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
629#if CONFIG_MACF_SOCKET_SUBSET
630 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
631 if (want_free)
632 FREE(sa, M_SONAME);
633 goto out;
634 }
635#endif /* MAC_SOCKET_SUBSET */
91447636
A
636 socket_lock(so, 1);
637
638 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
2d21ac55
A
639 if (want_free)
640 FREE(sa, M_SONAME);
91447636
A
641 socket_unlock(so, 1);
642 error = EALREADY;
643 goto out;
644 }
91447636 645 error = soconnectlock(so, sa, 0);
1c79356b
A
646 if (error)
647 goto bad;
648 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
2d21ac55
A
649 if (want_free)
650 FREE(sa, M_SONAME);
91447636
A
651 socket_unlock(so, 1);
652 error = EINPROGRESS;
653 goto out;
1c79356b 654 }
1c79356b 655 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
2d21ac55 656 if (so->so_proto->pr_getlock != NULL)
91447636 657 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
2d21ac55 658 else
91447636 659 mutex_held = so->so_proto->pr_domain->dom_mtx;
2d21ac55
A
660 error = msleep((caddr_t)&so->so_timeo, mutex_held,
661 PSOCK | PCATCH, "connect", 0);
91447636
A
662 if ((so->so_state & SS_DRAINING)) {
663 error = ECONNABORTED;
664 }
1c79356b
A
665 if (error)
666 break;
667 }
668 if (error == 0) {
669 error = so->so_error;
670 so->so_error = 0;
671 }
1c79356b
A
672bad:
673 so->so_state &= ~SS_ISCONNECTING;
91447636 674 socket_unlock(so, 1);
2d21ac55
A
675 if (want_free)
676 FREE(sa, M_SONAME);
1c79356b
A
677 if (error == ERESTART)
678 error = EINTR;
91447636
A
679out:
680 file_drop(fd);
1c79356b
A
681 return (error);
682}
683
2d21ac55
A
684/*
685 * Returns: 0 Success
686 * socreate:EAFNOSUPPORT
687 * socreate:EPROTOTYPE
688 * socreate:EPROTONOSUPPORT
689 * socreate:ENOBUFS
690 * socreate:ENOMEM
691 * socreate:EISCONN
692 * socreate:??? [other protocol families, IPSEC]
693 * falloc:ENFILE
694 * falloc:EMFILE
695 * falloc:ENOMEM
696 * copyout:EFAULT
697 * soconnect2:EINVAL
698 * soconnect2:EPROTOTYPE
699 * soconnect2:??? [other protocol families[
700 */
1c79356b 701int
2d21ac55
A
702socketpair(struct proc *p, struct socketpair_args *uap,
703 __unused register_t *retval)
1c79356b 704{
91447636 705 struct fileproc *fp1, *fp2;
1c79356b
A
706 struct socket *so1, *so2;
707 int fd, error, sv[2];
708
55e303ae 709 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
1c79356b
A
710 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
711 if (error)
712 return (error);
713 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
714 if (error)
715 goto free1;
91447636 716
2d21ac55 717 error = falloc(p, &fp1, &fd, vfs_context_current());
91447636 718 if (error) {
1c79356b 719 goto free2;
91447636 720 }
1c79356b
A
721 fp1->f_flag = FREAD|FWRITE;
722 fp1->f_type = DTYPE_SOCKET;
723 fp1->f_ops = &socketops;
724 fp1->f_data = (caddr_t)so1;
91447636
A
725 sv[0] = fd;
726
2d21ac55 727 error = falloc(p, &fp2, &fd, vfs_context_current());
91447636 728 if (error) {
1c79356b 729 goto free3;
91447636 730 }
1c79356b
A
731 fp2->f_flag = FREAD|FWRITE;
732 fp2->f_type = DTYPE_SOCKET;
733 fp2->f_ops = &socketops;
734 fp2->f_data = (caddr_t)so2;
735 sv[1] = fd;
91447636 736
1c79356b
A
737 error = soconnect2(so1, so2);
738 if (error) {
1c79356b
A
739 goto free4;
740 }
1c79356b
A
741 if (uap->type == SOCK_DGRAM) {
742 /*
743 * Datagram socket connection is asymmetric.
744 */
2d21ac55
A
745 error = soconnect2(so2, so1);
746 if (error) {
747 goto free4;
748 }
1c79356b 749 }
91447636
A
750
751 proc_fdlock(p);
6601e61a
A
752 procfdtbl_releasefd(p, sv[0], NULL);
753 procfdtbl_releasefd(p, sv[1], NULL);
91447636
A
754 fp_drop(p, sv[0], fp1, 1);
755 fp_drop(p, sv[1], fp2, 1);
756 proc_fdunlock(p);
757
2d21ac55 758 error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof (int));
1c79356b
A
759 return (error);
760free4:
91447636 761 fp_free(p, sv[1], fp2);
1c79356b 762free3:
91447636 763 fp_free(p, sv[0], fp1);
1c79356b 764free2:
2d21ac55 765 (void) soclose(so2);
1c79356b 766free1:
2d21ac55 767 (void) soclose(so1);
1c79356b
A
768 return (error);
769}
770
2d21ac55
A
771/*
772 * Returns: 0 Success
773 * EINVAL
774 * ENOBUFS
775 * EBADF
776 * EPIPE
777 * EACCES Mandatory Access Control failure
778 * file_socket:ENOTSOCK
779 * file_socket:EBADF
780 * getsockaddr:ENAMETOOLONG Filename too long
781 * getsockaddr:EINVAL Invalid argument
782 * getsockaddr:ENOMEM Not enough space
783 * getsockaddr:EFAULT Bad address
784 * <pru_sosend>:EACCES[TCP]
785 * <pru_sosend>:EADDRINUSE[TCP]
786 * <pru_sosend>:EADDRNOTAVAIL[TCP]
787 * <pru_sosend>:EAFNOSUPPORT[TCP]
788 * <pru_sosend>:EAGAIN[TCP]
789 * <pru_sosend>:EBADF
790 * <pru_sosend>:ECONNRESET[TCP]
791 * <pru_sosend>:EFAULT
792 * <pru_sosend>:EHOSTUNREACH[TCP]
793 * <pru_sosend>:EINTR
794 * <pru_sosend>:EINVAL
795 * <pru_sosend>:EISCONN[AF_INET]
796 * <pru_sosend>:EMSGSIZE[TCP]
797 * <pru_sosend>:ENETDOWN[TCP]
798 * <pru_sosend>:ENETUNREACH[TCP]
799 * <pru_sosend>:ENOBUFS
800 * <pru_sosend>:ENOMEM[TCP]
801 * <pru_sosend>:ENOTCONN[AF_INET]
802 * <pru_sosend>:EOPNOTSUPP
803 * <pru_sosend>:EPERM[TCP]
804 * <pru_sosend>:EPIPE
805 * <pru_sosend>:EWOULDBLOCK
806 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
807 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
808 * <pru_sosend>:??? [value from so_error]
809 * sockargs:???
810 */
1c79356b 811static int
2d21ac55
A
812sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
813 int flags, register_t *retval)
1c79356b 814{
2d21ac55
A
815 struct mbuf *control = NULL;
816 struct sockaddr_storage ss;
817 struct sockaddr *to = NULL;
818 boolean_t want_free = TRUE;
91447636 819 int error;
1c79356b 820 struct socket *so;
91447636 821 user_ssize_t len;
2d21ac55
A
822
823 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1c79356b 824
91447636 825 error = file_socket(s, &so);
2d21ac55
A
826 if (error) {
827 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
828 return (error);
1c79356b 829 }
2d21ac55
A
830 if (so == NULL) {
831 error = EBADF;
832 goto out;
833 }
834 if (mp->msg_name != USER_ADDR_NULL) {
835 if (mp->msg_namelen > sizeof (ss)) {
836 error = getsockaddr(so, &to, mp->msg_name,
4a3eedf9 837 mp->msg_namelen, TRUE);
2d21ac55
A
838 } else {
839 error = getsockaddr_s(so, &ss, mp->msg_name,
4a3eedf9 840 mp->msg_namelen, TRUE);
2d21ac55
A
841 if (error == 0) {
842 to = (struct sockaddr *)&ss;
843 want_free = FALSE;
844 }
1c79356b 845 }
2d21ac55
A
846 if (error != 0)
847 goto out;
848 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
91447636 849 }
2d21ac55
A
850 if (mp->msg_control != USER_ADDR_NULL) {
851 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
1c79356b
A
852 error = EINVAL;
853 goto bad;
854 }
855 error = sockargs(&control, mp->msg_control,
856 mp->msg_controllen, MT_CONTROL);
2d21ac55 857 if (error != 0)
1c79356b 858 goto bad;
91447636 859 }
1c79356b 860
2d21ac55
A
861#if CONFIG_MACF_SOCKET_SUBSET
862 /*
863 * We check the state without holding the socket lock;
864 * if a race condition occurs, it would simply result
865 * in an extra call to the MAC check function.
866 */
867 if (!(so->so_state & SS_ISCONNECTED) &&
868 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
869 goto bad;
870#endif /* MAC_SOCKET_SUBSET */
91447636
A
871
872 len = uio_resid(uiop);
2d21ac55
A
873 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control,
874 flags);
875 if (error != 0) {
91447636 876 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
877 error == EINTR || error == EWOULDBLOCK))
878 error = 0;
2d21ac55 879 /* Generation of SIGPIPE can be controlled per socket */
9bccf70c 880 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
1c79356b
A
881 psignal(p, SIGPIPE);
882 }
883 if (error == 0)
91447636
A
884 *retval = (int)(len - uio_resid(uiop));
885bad:
2d21ac55 886 if (to != NULL && want_free)
1c79356b 887 FREE(to, M_SONAME);
91447636 888out:
2d21ac55 889 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 890 file_drop(s);
1c79356b
A
891 return (error);
892}
893
2d21ac55
A
894/*
895 * Returns: 0 Success
896 * ENOMEM
897 * sendit:??? [see sendit definition in this file]
898 * write:??? [4056224: applicable for pipes]
899 */
1c79356b 900int
91447636 901sendto(struct proc *p, struct sendto_args *uap, register_t *retval)
2d21ac55
A
902{
903 __pthread_testcancel(1);
904 return(sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
905}
906
907int
908sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, register_t *retval)
1c79356b 909{
91447636
A
910 struct user_msghdr msg;
911 int error;
912 uio_t auio = NULL;
1c79356b 913
2d21ac55 914 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 915 AUDIT_ARG(fd, uap->s);
1c79356b 916
91447636 917 auio = uio_create(1, 0,
2d21ac55
A
918 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
919 UIO_WRITE);
91447636
A
920 if (auio == NULL) {
921 return (ENOMEM);
922 }
923 uio_addiov(auio, uap->buf, uap->len);
924
1c79356b
A
925 msg.msg_name = uap->to;
926 msg.msg_namelen = uap->tolen;
91447636
A
927 /* no need to set up msg_iov. sendit uses uio_t we send it */
928 msg.msg_iov = 0;
929 msg.msg_iovlen = 0;
1c79356b 930 msg.msg_control = 0;
1c79356b 931 msg.msg_flags = 0;
1c79356b 932
91447636 933 error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
2d21ac55 934
91447636
A
935 if (auio != NULL) {
936 uio_free(auio);
937 }
2d21ac55 938
91447636 939#if HACK_FOR_4056224
2d21ac55
A
940 /*
941 * Radar 4056224
942 * Temporary workaround to let send() and recv() work over
943 * a pipe for binary compatibility
91447636
A
944 * This will be removed in the release following Tiger
945 */
946 if (error == ENOTSOCK) {
947 struct fileproc *fp;
2d21ac55
A
948
949 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
950 (void) fp_drop(p, uap->s, fp, 0);
951
91447636
A
952 if (fp->f_type == DTYPE_PIPE) {
953 struct write_args write_uap;
954 user_ssize_t write_retval;
2d21ac55 955
91447636
A
956 if (p->p_pid > last_pid_4056224) {
957 last_pid_4056224 = p->p_pid;
958
2d21ac55
A
959 printf("%s[%d] uses send/recv "
960 "on a pipe\n", p->p_comm, p->p_pid);
91447636 961 }
2d21ac55
A
962
963 bzero(&write_uap, sizeof (struct write_args));
91447636
A
964 write_uap.fd = uap->s;
965 write_uap.cbuf = uap->buf;
966 write_uap.nbyte = uap->len;
2d21ac55 967
91447636
A
968 error = write(p, &write_uap, &write_retval);
969 *retval = (int)write_retval;
970 }
971 }
972 }
973#endif /* HACK_FOR_4056224 */
1c79356b 974
2d21ac55 975 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
1c79356b 976
2d21ac55 977 return (error);
1c79356b 978}
1c79356b 979
2d21ac55
A
980/*
981 * Returns: 0 Success
982 * ENOBUFS
983 * copyin:EFAULT
984 * sendit:??? [see sendit definition in this file]
985 */
1c79356b 986int
2d21ac55 987sendmsg(struct proc *p, struct sendmsg_args *uap, register_t *retval)
1c79356b 988{
2d21ac55
A
989 __pthread_testcancel(1);
990 return(sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval));
1c79356b 991}
1c79356b
A
992
993int
2d21ac55 994sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, register_t *retval)
1c79356b
A
995{
996 struct msghdr msg;
91447636
A
997 struct user_msghdr user_msg;
998 caddr_t msghdrp;
999 int size_of_msghdr;
1c79356b 1000 int error;
91447636
A
1001 int size_of_iovec;
1002 uio_t auio = NULL;
1003 struct user_iovec *iovp;
1c79356b 1004
2d21ac55 1005 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1006 AUDIT_ARG(fd, uap->s);
91447636 1007 if (IS_64BIT_PROCESS(p)) {
2d21ac55
A
1008 msghdrp = (caddr_t)&user_msg;
1009 size_of_msghdr = sizeof (user_msg);
1010 size_of_iovec = sizeof (struct user_iovec);
1011 } else {
1012 msghdrp = (caddr_t)&msg;
1013 size_of_msghdr = sizeof (msg);
1014 size_of_iovec = sizeof (struct iovec);
91447636
A
1015 }
1016 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1017 if (error) {
1018 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1019 return (error);
1c79356b 1020 }
91447636
A
1021
1022 /* only need to copy if user process is not 64-bit */
1023 if (!IS_64BIT_PROCESS(p)) {
1024 user_msg.msg_flags = msg.msg_flags;
1025 user_msg.msg_controllen = msg.msg_controllen;
1026 user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
1027 user_msg.msg_iovlen = msg.msg_iovlen;
1028 user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
1029 user_msg.msg_namelen = msg.msg_namelen;
1030 user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
1031 }
1032
1033 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1034 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1035 0, 0, 0, 0);
91447636
A
1036 return (EMSGSIZE);
1037 }
1038
1039 /* allocate a uio large enough to hold the number of iovecs passed */
1040 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1041 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1042 UIO_WRITE);
91447636
A
1043 if (auio == NULL) {
1044 error = ENOBUFS;
1045 goto done;
1046 }
2d21ac55 1047
91447636 1048 if (user_msg.msg_iovlen) {
2d21ac55
A
1049 /*
1050 * get location of iovecs within the uio.
1051 * then copyin the iovecs from user space.
91447636
A
1052 */
1053 iovp = uio_iovsaddr(auio);
1054 if (iovp == NULL) {
1055 error = ENOBUFS;
1056 goto done;
1057 }
2d21ac55
A
1058 error = copyin(user_msg.msg_iov, (caddr_t)iovp,
1059 (user_msg.msg_iovlen * size_of_iovec));
91447636
A
1060 if (error)
1061 goto done;
1062 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2d21ac55
A
1063
1064 /* finish setup of uio_t */
91447636 1065 uio_calculateresid(auio);
2d21ac55 1066 } else {
91447636
A
1067 user_msg.msg_iov = 0;
1068 }
2d21ac55
A
1069
1070 /* msg_flags is ignored for send */
91447636 1071 user_msg.msg_flags = 0;
2d21ac55 1072
91447636 1073 error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
1c79356b 1074done:
91447636
A
1075 if (auio != NULL) {
1076 uio_free(auio);
1077 }
2d21ac55 1078 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1079
1c79356b
A
1080 return (error);
1081}
1082
2d21ac55
A
1083/*
1084 * Returns: 0 Success
1085 * ENOTSOCK
1086 * EINVAL
1087 * EBADF
1088 * EACCES Mandatory Access Control failure
1089 * copyout:EFAULT
1090 * fp_lookup:EBADF
1091 * <pru_soreceive>:ENOBUFS
1092 * <pru_soreceive>:ENOTCONN
1093 * <pru_soreceive>:EWOULDBLOCK
1094 * <pru_soreceive>:EFAULT
1095 * <pru_soreceive>:EINTR
1096 * <pru_soreceive>:EBADF
1097 * <pru_soreceive>:EINVAL
1098 * <pru_soreceive>:EMSGSIZE
1099 * <pru_soreceive>:???
1100 *
1101 * Notes: Additional return values from calls through <pru_soreceive>
1102 * depend on protocols other than TCP or AF_UNIX, which are
1103 * documented above.
1104 */
1c79356b 1105static int
2d21ac55
A
1106recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1107 user_addr_t namelenp, register_t *retval)
1c79356b 1108{
1c79356b
A
1109 int len, error;
1110 struct mbuf *m, *control = 0;
91447636 1111 user_addr_t ctlbuf;
1c79356b
A
1112 struct socket *so;
1113 struct sockaddr *fromsa = 0;
91447636 1114 struct fileproc *fp;
1c79356b 1115
2d21ac55 1116 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
91447636 1117 proc_fdlock(p);
2d21ac55
A
1118 if ((error = fp_lookup(p, s, &fp, 1))) {
1119 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636 1120 proc_fdunlock(p);
2d21ac55 1121 return (error);
1c79356b 1122 }
91447636 1123 if (fp->f_type != DTYPE_SOCKET) {
2d21ac55 1124 fp_drop(p, s, fp, 1);
91447636 1125 proc_fdunlock(p);
2d21ac55 1126 return (ENOTSOCK);
91447636 1127 }
1c79356b 1128
2d21ac55
A
1129 so = (struct socket *)fp->f_data;
1130 if (so == NULL) {
1131 fp_drop(p, s, fp, 1);
1132 proc_fdunlock(p);
1133 return (EBADF);
1134 }
91447636
A
1135
1136 proc_fdunlock(p);
2d21ac55
A
1137
1138#if CONFIG_MACF_SOCKET_SUBSET
1139 /*
1140 * We check the state without holding the socket lock;
1141 * if a race condition occurs, it would simply result
1142 * in an extra call to the MAC check function.
1143 */
1144 if (!(so->so_state & SS_ISCONNECTED) &&
1145 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1146 goto out1;
1147#endif /* MAC_SOCKET_SUBSET */
91447636 1148 if (uio_resid(uiop) < 0) {
2d21ac55 1149 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
91447636
A
1150 error = EINVAL;
1151 goto out1;
1c79356b 1152 }
91447636
A
1153
1154 len = uio_resid(uiop);
2d21ac55
A
1155 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1156 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1157 &mp->msg_flags);
1158 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), fromsa);
1c79356b 1159 if (error) {
91447636 1160 if (uio_resid(uiop) != len && (error == ERESTART ||
1c79356b
A
1161 error == EINTR || error == EWOULDBLOCK))
1162 error = 0;
1163 }
2d21ac55 1164
1c79356b
A
1165 if (error)
1166 goto out;
2d21ac55 1167
91447636 1168 *retval = len - uio_resid(uiop);
1c79356b 1169 if (mp->msg_name) {
2d21ac55
A
1170 socklen_t sa_len = 0;
1171
1c79356b 1172 len = mp->msg_namelen;
2d21ac55 1173 if (len <= 0 || fromsa == 0) {
1c79356b 1174 len = 0;
2d21ac55 1175 } else {
1c79356b 1176#ifndef MIN
2d21ac55 1177#define MIN(a, b) ((a) > (b) ? (b) : (a))
1c79356b 1178#endif
2d21ac55
A
1179 sa_len = fromsa->sa_len;
1180 len = MIN((unsigned int)len, sa_len);
91447636 1181 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1c79356b
A
1182 if (error)
1183 goto out;
1184 }
2d21ac55
A
1185 mp->msg_namelen = sa_len;
1186 /* return the actual, untruncated address length */
1c79356b 1187 if (namelenp &&
2d21ac55
A
1188 (error = copyout((caddr_t)&sa_len, namelenp,
1189 sizeof (int)))) {
1c79356b
A
1190 goto out;
1191 }
1192 }
1193 if (mp->msg_control) {
1c79356b
A
1194 len = mp->msg_controllen;
1195 m = control;
1196 mp->msg_controllen = 0;
91447636 1197 ctlbuf = mp->msg_control;
1c79356b
A
1198
1199 while (m && len > 0) {
1200 unsigned int tocopy;
1201
2d21ac55 1202 if (len >= m->m_len) {
1c79356b 1203 tocopy = m->m_len;
2d21ac55 1204 } else {
1c79356b
A
1205 mp->msg_flags |= MSG_CTRUNC;
1206 tocopy = len;
1207 }
2d21ac55
A
1208
1209 error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf,
1210 tocopy);
91447636 1211 if (error)
1c79356b
A
1212 goto out;
1213
1214 ctlbuf += tocopy;
1215 len -= tocopy;
1216 m = m->m_next;
1217 }
1218 mp->msg_controllen = ctlbuf - mp->msg_control;
1219 }
1220out:
1221 if (fromsa)
1222 FREE(fromsa, M_SONAME);
1223 if (control)
1224 m_freem(control);
2d21ac55 1225 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
91447636
A
1226out1:
1227 fp_drop(p, s, fp, 0);
1c79356b
A
1228 return (error);
1229}
1230
1231
2d21ac55
A
1232/*
1233 * Returns: 0 Success
1234 * ENOMEM
1235 * copyin:EFAULT
1236 * recvit:???
1237 * read:??? [4056224: applicable for pipes]
1238 *
1239 * Notes: The read entry point is only called as part of support for
1240 * binary backward compatability; new code should use read
1241 * instead of recv or recvfrom when attempting to read data
1242 * from pipes.
1243 *
1244 * For full documentation of the return codes from recvit, see
1245 * the block header for the recvit function.
1246 */
1247int
1248recvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval)
1249{
1250 __pthread_testcancel(1);
1251 return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval));
1252}
1253
1c79356b 1254int
2d21ac55 1255recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, register_t *retval)
1c79356b 1256{
91447636 1257 struct user_msghdr msg;
1c79356b 1258 int error;
91447636 1259 uio_t auio = NULL;
1c79356b 1260
2d21ac55 1261 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1262 AUDIT_ARG(fd, uap->s);
1c79356b
A
1263
1264 if (uap->fromlenaddr) {
91447636 1265 error = copyin(uap->fromlenaddr,
1c79356b
A
1266 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1267 if (error)
1268 return (error);
2d21ac55 1269 } else {
1c79356b 1270 msg.msg_namelen = 0;
2d21ac55 1271 }
1c79356b 1272 msg.msg_name = uap->from;
91447636 1273 auio = uio_create(1, 0,
2d21ac55
A
1274 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1275 UIO_READ);
91447636
A
1276 if (auio == NULL) {
1277 return (ENOMEM);
1278 }
2d21ac55 1279
91447636
A
1280 uio_addiov(auio, uap->buf, uap->len);
1281 /* no need to set up msg_iov. recvit uses uio_t we send it */
1282 msg.msg_iov = 0;
1283 msg.msg_iovlen = 0;
1c79356b 1284 msg.msg_control = 0;
91447636 1285 msg.msg_controllen = 0;
1c79356b 1286 msg.msg_flags = uap->flags;
91447636
A
1287 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1288 if (auio != NULL) {
1289 uio_free(auio);
1290 }
2d21ac55 1291
91447636 1292#if HACK_FOR_4056224
2d21ac55
A
1293 /*
1294 * Radar 4056224
1295 * Temporary workaround to let send() and recv() work over
1296 * a pipe for binary compatibility
91447636
A
1297 * This will be removed in the release following Tiger
1298 */
1299 if (error == ENOTSOCK && proc_is64bit(p) == 0) {
1300 struct fileproc *fp;
2d21ac55
A
1301
1302 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
1303 (void) fp_drop(p, uap->s, fp, 0);
1304
91447636
A
1305 if (fp->f_type == DTYPE_PIPE) {
1306 struct read_args read_uap;
1307 user_ssize_t read_retval;
2d21ac55 1308
91447636
A
1309 if (p->p_pid > last_pid_4056224) {
1310 last_pid_4056224 = p->p_pid;
1311
2d21ac55
A
1312 printf("%s[%d] uses send/recv on "
1313 "a pipe\n", p->p_comm, p->p_pid);
91447636 1314 }
2d21ac55
A
1315
1316 bzero(&read_uap, sizeof (struct read_args));
91447636
A
1317 read_uap.fd = uap->s;
1318 read_uap.cbuf = uap->buf;
1319 read_uap.nbyte = uap->len;
2d21ac55 1320
91447636
A
1321 error = read(p, &read_uap, &read_retval);
1322 *retval = (int)read_retval;
1323 }
1324 }
1325 }
1326#endif /* HACK_FOR_4056224 */
1327
2d21ac55 1328 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b 1329
2d21ac55 1330 return (error);
1c79356b
A
1331}
1332
1333/*
2d21ac55
A
1334 * Returns: 0 Success
1335 * EMSGSIZE
1336 * ENOMEM
1337 * copyin:EFAULT
1338 * copyout:EFAULT
1339 * recvit:???
1340 *
1341 * Notes: For full documentation of the return codes from recvit, see
1342 * the block header for the recvit function.
1c79356b
A
1343 */
1344int
2d21ac55 1345recvmsg(struct proc *p, struct recvmsg_args *uap, register_t *retval)
1c79356b 1346{
2d21ac55
A
1347 __pthread_testcancel(1);
1348 return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval));
1c79356b 1349}
1c79356b
A
1350
1351int
2d21ac55 1352recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, register_t *retval)
1c79356b
A
1353{
1354 struct msghdr msg;
91447636
A
1355 struct user_msghdr user_msg;
1356 caddr_t msghdrp;
1357 int size_of_msghdr;
1358 user_addr_t uiov;
2d21ac55 1359 int error;
91447636
A
1360 int size_of_iovec;
1361 uio_t auio = NULL;
1362 struct user_iovec *iovp;
1c79356b 1363
2d21ac55 1364 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
55e303ae 1365 AUDIT_ARG(fd, uap->s);
91447636 1366 if (IS_64BIT_PROCESS(p)) {
2d21ac55
A
1367 msghdrp = (caddr_t)&user_msg;
1368 size_of_msghdr = sizeof (user_msg);
1369 size_of_iovec = sizeof (struct user_iovec);
1370 } else {
1371 msghdrp = (caddr_t)&msg;
1372 size_of_msghdr = sizeof (msg);
1373 size_of_iovec = sizeof (struct iovec);
91447636
A
1374 }
1375 error = copyin(uap->msg, msghdrp, size_of_msghdr);
2d21ac55
A
1376 if (error) {
1377 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
1378 return (error);
1379 }
1380
91447636
A
1381 /* only need to copy if user process is not 64-bit */
1382 if (!IS_64BIT_PROCESS(p)) {
1383 user_msg.msg_flags = msg.msg_flags;
1384 user_msg.msg_controllen = msg.msg_controllen;
1385 user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
1386 user_msg.msg_iovlen = msg.msg_iovlen;
1387 user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
1388 user_msg.msg_namelen = msg.msg_namelen;
1389 user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
1390 }
1391
1392 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
2d21ac55
A
1393 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
1394 0, 0, 0, 0);
91447636
A
1395 return (EMSGSIZE);
1396 }
1397
91447636 1398 user_msg.msg_flags = uap->flags;
91447636
A
1399
1400 /* allocate a uio large enough to hold the number of iovecs passed */
1401 auio = uio_create(user_msg.msg_iovlen, 0,
2d21ac55
A
1402 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1403 UIO_READ);
91447636
A
1404 if (auio == NULL) {
1405 error = ENOMEM;
1406 goto done;
1407 }
1408
2d21ac55
A
1409 /*
1410 * get location of iovecs within the uio. then copyin the iovecs from
91447636
A
1411 * user space.
1412 */
1413 iovp = uio_iovsaddr(auio);
1414 if (iovp == NULL) {
1415 error = ENOMEM;
1416 goto done;
1417 }
1418 uiov = user_msg.msg_iov;
1419 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
2d21ac55
A
1420 error = copyin(uiov, (caddr_t)iovp,
1421 (user_msg.msg_iovlen * size_of_iovec));
1c79356b
A
1422 if (error)
1423 goto done;
91447636 1424
2d21ac55 1425 /* finish setup of uio_t */
91447636 1426 uio_calculateresid(auio);
2d21ac55 1427
91447636 1428 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1c79356b 1429 if (!error) {
91447636
A
1430 user_msg.msg_iov = uiov;
1431 /* only need to copy if user process is not 64-bit */
1432 if (!IS_64BIT_PROCESS(p)) {
1433 // LP64todo - do all these change? if not, then no need to copy all of them!
1434 msg.msg_flags = user_msg.msg_flags;
1435 msg.msg_controllen = user_msg.msg_controllen;
2d21ac55
A
1436 msg.msg_control =
1437 CAST_DOWN(caddr_t, user_msg.msg_control);
91447636 1438 msg.msg_iovlen = user_msg.msg_iovlen;
2d21ac55
A
1439 msg.msg_iov = (struct iovec *)
1440 CAST_DOWN(caddr_t, user_msg.msg_iov);
91447636
A
1441 msg.msg_namelen = user_msg.msg_namelen;
1442 msg.msg_name = CAST_DOWN(caddr_t, user_msg.msg_name);
1443 }
1444 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1c79356b
A
1445 }
1446done:
91447636
A
1447 if (auio != NULL) {
1448 uio_free(auio);
1449 }
2d21ac55 1450 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1c79356b
A
1451 return (error);
1452}
1453
2d21ac55
A
1454/*
1455 * Returns: 0 Success
1456 * EBADF
1457 * file_socket:ENOTSOCK
1458 * file_socket:EBADF
1459 * soshutdown:EINVAL
1460 * soshutdown:ENOTCONN
1461 * soshutdown:EADDRNOTAVAIL[TCP]
1462 * soshutdown:ENOBUFS[TCP]
1463 * soshutdown:EMSGSIZE[TCP]
1464 * soshutdown:EHOSTUNREACH[TCP]
1465 * soshutdown:ENETUNREACH[TCP]
1466 * soshutdown:ENETDOWN[TCP]
1467 * soshutdown:ENOMEM[TCP]
1468 * soshutdown:EACCES[TCP]
1469 * soshutdown:EMSGSIZE[TCP]
1470 * soshutdown:ENOBUFS[TCP]
1471 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1472 * soshutdown:??? [other protocol families]
1473 */
1c79356b
A
1474/* ARGSUSED */
1475int
2d21ac55
A
1476shutdown(__unused struct proc *p, struct shutdown_args *uap,
1477 __unused register_t *retval)
1c79356b 1478{
2d21ac55 1479 struct socket *so;
1c79356b
A
1480 int error;
1481
55e303ae 1482 AUDIT_ARG(fd, uap->s);
91447636 1483 error = file_socket(uap->s, &so);
1c79356b
A
1484 if (error)
1485 return (error);
91447636
A
1486 if (so == NULL) {
1487 error = EBADF;
1488 goto out;
1489 }
1490 error = soshutdown((struct socket *)so, uap->how);
1491out:
1492 file_drop(uap->s);
2d21ac55 1493 return (error);
1c79356b
A
1494}
1495
2d21ac55
A
1496/*
1497 * Returns: 0 Success
1498 * EFAULT
1499 * EINVAL
1500 * EACCES Mandatory Access Control failure
1501 * file_socket:ENOTSOCK
1502 * file_socket:EBADF
1503 * sosetopt:EINVAL
1504 * sosetopt:ENOPROTOOPT
1505 * sosetopt:ENOBUFS
1506 * sosetopt:EDOM
1507 * sosetopt:EFAULT
1508 * sosetopt:EOPNOTSUPP[AF_UNIX]
1509 * sosetopt:???
1510 */
1c79356b
A
1511/* ARGSUSED */
1512int
2d21ac55
A
1513setsockopt(struct proc *p, struct setsockopt_args *uap,
1514 __unused register_t *retval)
1c79356b 1515{
2d21ac55 1516 struct socket *so;
1c79356b
A
1517 struct sockopt sopt;
1518 int error;
1519
55e303ae 1520 AUDIT_ARG(fd, uap->s);
1c79356b
A
1521 if (uap->val == 0 && uap->valsize != 0)
1522 return (EFAULT);
2d21ac55 1523 /* No bounds checking on size (it's unsigned) */
1c79356b 1524
91447636 1525 error = file_socket(uap->s, &so);
1c79356b
A
1526 if (error)
1527 return (error);
1528
1529 sopt.sopt_dir = SOPT_SET;
1530 sopt.sopt_level = uap->level;
1531 sopt.sopt_name = uap->name;
1532 sopt.sopt_val = uap->val;
1533 sopt.sopt_valsize = uap->valsize;
1534 sopt.sopt_p = p;
1535
91447636
A
1536 if (so == NULL) {
1537 error = EINVAL;
1538 goto out;
1539 }
2d21ac55
A
1540#if CONFIG_MACF_SOCKET_SUBSET
1541 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
1542 &sopt)) != 0)
1543 goto out;
1544#endif /* MAC_SOCKET_SUBSET */
91447636
A
1545 error = sosetopt(so, &sopt);
1546out:
1547 file_drop(uap->s);
2d21ac55 1548 return (error);
1c79356b
A
1549}
1550
1551
1552
2d21ac55
A
1553/*
1554 * Returns: 0 Success
1555 * EINVAL
1556 * EBADF
1557 * EACCES Mandatory Access Control failure
1558 * copyin:EFAULT
1559 * copyout:EFAULT
1560 * file_socket:ENOTSOCK
1561 * file_socket:EBADF
1562 * sogetopt:???
1563 */
1c79356b 1564int
2d21ac55
A
1565getsockopt(struct proc *p, struct getsockopt_args *uap,
1566 __unused register_t *retval)
1c79356b 1567{
91447636
A
1568 int error;
1569 socklen_t valsize;
1570 struct sockopt sopt;
2d21ac55 1571 struct socket *so;
1c79356b 1572
91447636 1573 error = file_socket(uap->s, &so);
1c79356b
A
1574 if (error)
1575 return (error);
1576 if (uap->val) {
2d21ac55
A
1577 error = copyin(uap->avalsize, (caddr_t)&valsize,
1578 sizeof (valsize));
1c79356b 1579 if (error)
91447636 1580 goto out;
2d21ac55
A
1581 /* No bounds checking on size (it's unsigned) */
1582 } else {
1c79356b 1583 valsize = 0;
2d21ac55 1584 }
1c79356b
A
1585 sopt.sopt_dir = SOPT_GET;
1586 sopt.sopt_level = uap->level;
1587 sopt.sopt_name = uap->name;
1588 sopt.sopt_val = uap->val;
1589 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1590 sopt.sopt_p = p;
1591
91447636
A
1592 if (so == NULL) {
1593 error = EBADF;
1594 goto out;
1595 }
2d21ac55
A
1596#if CONFIG_MACF_SOCKET_SUBSET
1597 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
1598 &sopt)) != 0)
1599 goto out;
1600#endif /* MAC_SOCKET_SUBSET */
91447636 1601 error = sogetopt((struct socket *)so, &sopt);
1c79356b
A
1602 if (error == 0) {
1603 valsize = sopt.sopt_valsize;
2d21ac55
A
1604 error = copyout((caddr_t)&valsize, uap->avalsize,
1605 sizeof (valsize));
1c79356b 1606 }
91447636
A
1607out:
1608 file_drop(uap->s);
1c79356b
A
1609 return (error);
1610}
1611
1612
1613/*
1614 * Get socket name.
2d21ac55
A
1615 *
1616 * Returns: 0 Success
1617 * EBADF
1618 * file_socket:ENOTSOCK
1619 * file_socket:EBADF
1620 * copyin:EFAULT
1621 * copyout:EFAULT
1622 * <pru_sockaddr>:ENOBUFS[TCP]
1623 * <pru_sockaddr>:ECONNRESET[TCP]
1624 * <pru_sockaddr>:EINVAL[AF_UNIX]
1625 * <sf_getsockname>:???
1c79356b
A
1626 */
1627/* ARGSUSED */
2d21ac55
A
1628int
1629getsockname(__unused struct proc *p, struct getsockname_args *uap,
1630 __unused register_t *retval)
1c79356b 1631{
91447636 1632 struct socket *so;
1c79356b 1633 struct sockaddr *sa;
91447636 1634 socklen_t len;
2d21ac55 1635 socklen_t sa_len;
1c79356b
A
1636 int error;
1637
91447636 1638 error = file_socket(uap->fdes, &so);
1c79356b
A
1639 if (error)
1640 return (error);
2d21ac55 1641 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1c79356b 1642 if (error)
91447636
A
1643 goto out;
1644 if (so == NULL) {
1645 error = EBADF;
1646 goto out;
1647 }
1c79356b 1648 sa = 0;
91447636 1649 socket_lock(so, 1);
1c79356b 1650 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
2d21ac55 1651 if (error == 0) {
91447636
A
1652 struct socket_filter_entry *filter;
1653 int filtered = 0;
1654 for (filter = so->so_filt; filter && error == 0;
2d21ac55 1655 filter = filter->sfe_next_onsocket) {
91447636
A
1656 if (filter->sfe_filter->sf_filter.sf_getsockname) {
1657 if (!filtered) {
1658 filtered = 1;
1659 sflt_use(so);
1660 socket_unlock(so, 0);
1661 }
2d21ac55
A
1662 error = filter->sfe_filter->sf_filter.
1663 sf_getsockname(filter->sfe_cookie, so, &sa);
91447636
A
1664 }
1665 }
2d21ac55 1666
91447636
A
1667 if (error == EJUSTRETURN)
1668 error = 0;
2d21ac55 1669
91447636
A
1670 if (filtered) {
1671 socket_lock(so, 0);
1672 sflt_unuse(so);
1673 }
1674 }
1675 socket_unlock(so, 1);
1c79356b
A
1676 if (error)
1677 goto bad;
1678 if (sa == 0) {
1679 len = 0;
1680 goto gotnothing;
1681 }
1682
2d21ac55
A
1683 sa_len = sa->sa_len;
1684 len = MIN(len, sa_len);
91447636 1685 error = copyout((caddr_t)sa, uap->asa, len);
2d21ac55
A
1686 if (error)
1687 goto bad;
1688 /* return the actual, untruncated address length */
1689 len = sa_len;
1c79356b 1690gotnothing:
2d21ac55 1691 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
1692bad:
1693 if (sa)
1694 FREE(sa, M_SONAME);
91447636
A
1695out:
1696 file_drop(uap->fdes);
1c79356b
A
1697 return (error);
1698}
1699
1c79356b
A
1700/*
1701 * Get name of peer for connected socket.
2d21ac55
A
1702 *
1703 * Returns: 0 Success
1704 * EBADF
1705 * EINVAL
1706 * ENOTCONN
1707 * file_socket:ENOTSOCK
1708 * file_socket:EBADF
1709 * copyin:EFAULT
1710 * copyout:EFAULT
1711 * <pru_peeraddr>:???
1712 * <sf_getpeername>:???
1c79356b
A
1713 */
1714/* ARGSUSED */
1715int
2d21ac55
A
1716getpeername(__unused struct proc *p, struct getpeername_args *uap,
1717 __unused register_t *retval)
1c79356b 1718{
91447636 1719 struct socket *so;
1c79356b 1720 struct sockaddr *sa;
91447636 1721 socklen_t len;
2d21ac55 1722 socklen_t sa_len;
1c79356b
A
1723 int error;
1724
91447636 1725 error = file_socket(uap->fdes, &so);
1c79356b
A
1726 if (error)
1727 return (error);
91447636
A
1728 if (so == NULL) {
1729 error = EBADF;
1730 goto out;
1731 }
1732
1733 socket_lock(so, 1);
1734
2d21ac55
A
1735 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1736 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1737 /* the socket has been shutdown, no more getpeername's */
1738 socket_unlock(so, 1);
1739 error = EINVAL;
1740 goto out;
1741 }
1742
91447636
A
1743 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1744 socket_unlock(so, 1);
1745 error = ENOTCONN;
1746 goto out;
1747 }
2d21ac55 1748 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
91447636
A
1749 if (error) {
1750 socket_unlock(so, 1);
1751 goto out;
1752 }
1c79356b
A
1753 sa = 0;
1754 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
2d21ac55 1755 if (error == 0) {
91447636
A
1756 struct socket_filter_entry *filter;
1757 int filtered = 0;
1758 for (filter = so->so_filt; filter && error == 0;
2d21ac55 1759 filter = filter->sfe_next_onsocket) {
91447636
A
1760 if (filter->sfe_filter->sf_filter.sf_getpeername) {
1761 if (!filtered) {
1762 filtered = 1;
1763 sflt_use(so);
1764 socket_unlock(so, 0);
1765 }
2d21ac55
A
1766 error = filter->sfe_filter->sf_filter.
1767 sf_getpeername(filter->sfe_cookie, so, &sa);
91447636
A
1768 }
1769 }
2d21ac55 1770
91447636
A
1771 if (error == EJUSTRETURN)
1772 error = 0;
2d21ac55 1773
91447636
A
1774 if (filtered) {
1775 socket_lock(so, 0);
1776 sflt_unuse(so);
1777 }
1778 }
1779 socket_unlock(so, 1);
1c79356b
A
1780 if (error)
1781 goto bad;
1782 if (sa == 0) {
1783 len = 0;
1784 goto gotnothing;
1785 }
2d21ac55
A
1786 sa_len = sa->sa_len;
1787 len = MIN(len, sa_len);
91447636 1788 error = copyout(sa, uap->asa, len);
1c79356b
A
1789 if (error)
1790 goto bad;
2d21ac55
A
1791 /* return the actual, untruncated address length */
1792 len = sa_len;
1c79356b 1793gotnothing:
2d21ac55 1794 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1c79356b
A
1795bad:
1796 if (sa) FREE(sa, M_SONAME);
91447636
A
1797out:
1798 file_drop(uap->fdes);
1c79356b
A
1799 return (error);
1800}
1801
1802int
2d21ac55 1803sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1c79356b 1804{
2d21ac55
A
1805 struct sockaddr *sa;
1806 struct mbuf *m;
1c79356b
A
1807 int error;
1808
1809 if ((u_int)buflen > MLEN) {
1c79356b
A
1810 if (type == MT_SONAME && (u_int)buflen <= 112)
1811 buflen = MLEN; /* unix domain compat. hack */
2d21ac55 1812 else if ((u_int)buflen > MCLBYTES)
91447636 1813 return (EINVAL);
1c79356b
A
1814 }
1815 m = m_get(M_WAIT, type);
1816 if (m == NULL)
1817 return (ENOBUFS);
91447636
A
1818 if ((u_int)buflen > MLEN) {
1819 MCLGET(m, M_WAIT);
1820 if ((m->m_flags & M_EXT) == 0) {
1821 m_free(m);
2d21ac55 1822 return (ENOBUFS);
91447636
A
1823 }
1824 }
1c79356b 1825 m->m_len = buflen;
91447636 1826 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
2d21ac55 1827 if (error) {
1c79356b 1828 (void) m_free(m);
2d21ac55 1829 } else {
1c79356b
A
1830 *mp = m;
1831 if (type == MT_SONAME) {
1832 sa = mtod(m, struct sockaddr *);
1c79356b
A
1833 sa->sa_len = buflen;
1834 }
1835 }
1836 return (error);
1837}
1838
91447636
A
1839/*
1840 * Given a user_addr_t of length len, allocate and fill out a *sa.
2d21ac55
A
1841 *
1842 * Returns: 0 Success
1843 * ENAMETOOLONG Filename too long
1844 * EINVAL Invalid argument
1845 * ENOMEM Not enough space
1846 * copyin:EFAULT Bad address
91447636 1847 */
2d21ac55
A
1848static int
1849getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
4a3eedf9 1850 size_t len, boolean_t translate_unspec)
1c79356b
A
1851{
1852 struct sockaddr *sa;
1853 int error;
1854
1855 if (len > SOCK_MAXADDRLEN)
2d21ac55 1856 return (ENAMETOOLONG);
1c79356b 1857
2d21ac55
A
1858 if (len < offsetof(struct sockaddr, sa_data[0]))
1859 return (EINVAL);
1c79356b 1860
2d21ac55 1861 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
91447636 1862 if (sa == NULL) {
2d21ac55 1863 return (ENOMEM);
91447636
A
1864 }
1865 error = copyin(uaddr, (caddr_t)sa, len);
1c79356b
A
1866 if (error) {
1867 FREE(sa, M_SONAME);
1868 } else {
2d21ac55
A
1869 /*
1870 * Force sa_family to AF_INET on AF_INET sockets to handle
1871 * legacy applications that use AF_UNSPEC (0). On all other
1872 * sockets we leave it unchanged and let the lower layer
1873 * handle it.
1874 */
4a3eedf9 1875 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
2d21ac55
A
1876 INP_CHECK_SOCKAF(so, AF_INET) &&
1877 len == sizeof (struct sockaddr_in))
1878 sa->sa_family = AF_INET;
1879
1c79356b
A
1880 sa->sa_len = len;
1881 *namp = sa;
1882 }
2d21ac55 1883 return (error);
1c79356b
A
1884}
1885
2d21ac55
A
1886static int
1887getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
4a3eedf9 1888 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1c79356b 1889{
2d21ac55
A
1890 int error;
1891
1892 if (ss == NULL || uaddr == USER_ADDR_NULL ||
1893 len < offsetof(struct sockaddr, sa_data[0]))
1894 return (EINVAL);
1895
1896 /*
1897 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
1898 * so the check here is inclusive.
1899 */
1900 if (len > sizeof (*ss))
1901 return (ENAMETOOLONG);
1c79356b 1902
2d21ac55
A
1903 bzero(ss, sizeof (*ss));
1904 error = copyin(uaddr, (caddr_t)ss, len);
1905 if (error == 0) {
1906 /*
1907 * Force sa_family to AF_INET on AF_INET sockets to handle
1908 * legacy applications that use AF_UNSPEC (0). On all other
1909 * sockets we leave it unchanged and let the lower layer
1910 * handle it.
1911 */
4a3eedf9 1912 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
2d21ac55
A
1913 INP_CHECK_SOCKAF(so, AF_INET) &&
1914 len == sizeof (struct sockaddr_in))
1915 ss->ss_family = AF_INET;
91447636 1916
2d21ac55 1917 ss->ss_len = len;
1c79356b 1918 }
2d21ac55 1919 return (error);
1c79356b
A
1920}
1921
2d21ac55
A
1922#if SENDFILE
1923
1924SYSCTL_DECL(_kern_ipc);
1925
1926#define SFUIOBUFS 64
1927static int sendfileuiobufs = SFUIOBUFS;
1928SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW, &sendfileuiobufs,
1929 0, "");
1930
1931/* Macros to compute the number of mbufs needed depending on cluster size */
1932#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
1933#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
1934
1935/* Upper send limit in bytes (sendfileuiobufs * PAGESIZE) */
1936#define SENDFILE_MAX_BYTES (sendfileuiobufs << PGSHIFT)
1937
1938/* Upper send limit in the number of mbuf clusters */
1939#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
1940#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
1941
1942size_t mbuf_pkt_maxlen(mbuf_t m);
1943
1944__private_extern__ size_t
1945mbuf_pkt_maxlen(mbuf_t m)
1c79356b 1946{
2d21ac55 1947 size_t maxlen = 0;
1c79356b 1948
2d21ac55
A
1949 while (m) {
1950 maxlen += mbuf_maxlen(m);
1951 m = mbuf_next(m);
1c79356b 1952 }
2d21ac55 1953 return (maxlen);
1c79356b
A
1954}
1955
1c79356b 1956static void
2d21ac55
A
1957alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
1958 struct mbuf **m, boolean_t jumbocl)
1c79356b 1959{
2d21ac55 1960 unsigned int needed;
1c79356b 1961
2d21ac55
A
1962 if (pktlen == 0)
1963 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
1c79356b 1964
2d21ac55
A
1965 /*
1966 * Try to allocate for the whole thing. Since we want full control
1967 * over the buffer size and be able to accept partial result, we can't
1968 * use mbuf_allocpacket(). The logic below is similar to sosend().
1969 */
1970 *m = NULL;
1971 if (pktlen > NBPG && jumbocl) {
1972 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
1973 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
1974 }
1975 if (*m == NULL) {
1976 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
1977 *m = m_getpackets_internal(&needed, 1, how, 0, NBPG);
1978 }
1979
1980 /*
1981 * Our previous attempt(s) at allocation had failed; the system
1982 * may be short on mbufs, and we want to block until they are
1983 * available. This time, ask just for 1 mbuf and don't return
1984 * until we get it.
1985 */
1986 if (*m == NULL) {
1987 needed = 1;
1988 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, NBPG);
1c79356b 1989 }
2d21ac55
A
1990 if (*m == NULL)
1991 panic("%s: blocking allocation returned NULL\n", __func__);
1992
1993 *maxchunks = needed;
1c79356b
A
1994}
1995
1996/*
1997 * sendfile(2).
2d21ac55
A
1998 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
1999 * struct sf_hdtr *hdtr, int flags)
1c79356b
A
2000 *
2001 * Send a file specified by 'fd' and starting at 'offset' to a socket
2d21ac55
A
2002 * specified by 's'. Send only '*nbytes' of the file or until EOF if
2003 * *nbytes == 0. Optionally add a header and/or trailer to the socket
2004 * output. If specified, write the total number of bytes sent into *nbytes.
1c79356b
A
2005 */
2006int
2d21ac55 2007sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
1c79356b 2008{
91447636 2009 struct fileproc *fp;
1c79356b 2010 struct vnode *vp;
1c79356b 2011 struct socket *so;
2d21ac55
A
2012 struct writev_nocancel_args nuap;
2013 user_ssize_t writev_retval;
1c79356b 2014 struct sf_hdtr hdtr;
2d21ac55
A
2015 struct user_sf_hdtr user_hdtr;
2016 off_t off, xfsize;
2017 off_t nbytes = 0, sbytes = 0;
2018 int error = 0;
2019 size_t sizeof_hdtr;
2020 size_t size_of_iovec;
2021 off_t file_size;
2022 struct vfs_context context = *vfs_context_current();
2023
2024 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
2025 0, 0, 0, 0);
1c79356b
A
2026 /*
2027 * Do argument checking. Must be a regular file in, stream
2028 * type and connected socket out, positive offset.
2029 */
2d21ac55 2030 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
1c79356b 2031 goto done;
2d21ac55
A
2032 }
2033 if ((fp->f_flag & FREAD) == 0) {
91447636
A
2034 error = EBADF;
2035 goto done1;
1c79356b 2036 }
2d21ac55
A
2037 if (vnode_isreg(vp) == 0) {
2038 error = ENOTSUP;
91447636 2039 goto done1;
1c79356b 2040 }
91447636 2041 error = file_socket(uap->s, &so);
2d21ac55 2042 if (error) {
91447636 2043 goto done1;
2d21ac55 2044 }
55e303ae
A
2045 if (so == NULL) {
2046 error = EBADF;
91447636 2047 goto done2;
55e303ae 2048 }
1c79356b
A
2049 if (so->so_type != SOCK_STREAM) {
2050 error = EINVAL;
2d21ac55 2051 goto done2;
1c79356b
A
2052 }
2053 if ((so->so_state & SS_ISCONNECTED) == 0) {
2054 error = ENOTCONN;
2d21ac55 2055 goto done2;
1c79356b
A
2056 }
2057 if (uap->offset < 0) {
2058 error = EINVAL;
2d21ac55 2059 goto done2;
1c79356b 2060 }
2d21ac55
A
2061 if (uap->nbytes == USER_ADDR_NULL) {
2062 error = EINVAL;
2063 goto done2;
2064 }
2065 if (uap->flags != 0) {
2066 error = EINVAL;
2067 goto done2;
2068 }
2069
2070 context.vc_ucred = fp->f_fglob->fg_cred;
2071
2072#if CONFIG_MACF_SOCKET_SUBSET
2073 /* JMM - fetch connected sockaddr? */
2074 error = mac_socket_check_send(context.vc_ucred, so, NULL);
2075 if (error)
2076 goto done2;
2077#endif
2078
2079 /*
2080 * Get number of bytes to send
2081 * Should it applies to size of header and trailer?
2082 * JMM - error handling?
2083 */
2084 copyin(uap->nbytes, &nbytes, sizeof (off_t));
1c79356b
A
2085
2086 /*
2087 * If specified, get the pointer to the sf_hdtr struct for
2088 * any headers/trailers.
2089 */
2d21ac55
A
2090 if (uap->hdtr != USER_ADDR_NULL) {
2091 caddr_t hdtrp;
2092
2093 bzero(&user_hdtr, sizeof (user_hdtr));
2094 if (IS_64BIT_PROCESS(p)) {
2095 hdtrp = (caddr_t)&user_hdtr;
2096 sizeof_hdtr = sizeof (user_hdtr);
2097 size_of_iovec = sizeof (struct user_iovec);
2098 } else {
2099 hdtrp = (caddr_t)&hdtr;
2100 sizeof_hdtr = sizeof (hdtr);
2101 size_of_iovec = sizeof (struct iovec);
2102 }
2103 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
1c79356b 2104 if (error)
2d21ac55
A
2105 goto done2;
2106 /* need to copy if user process is not 64-bit */
2107 if (!IS_64BIT_PROCESS(p)) {
2108 user_hdtr.headers = CAST_USER_ADDR_T(hdtr.headers);
2109 user_hdtr.hdr_cnt = hdtr.hdr_cnt;
2110 user_hdtr.trailers = CAST_USER_ADDR_T(hdtr.trailers);
2111 user_hdtr.trl_cnt = hdtr.trl_cnt;
2112 }
2113
1c79356b
A
2114 /*
2115 * Send any headers. Wimp out and use writev(2).
2116 */
2d21ac55
A
2117 if (user_hdtr.headers != USER_ADDR_NULL) {
2118 bzero(&nuap, sizeof (struct writev_args));
1c79356b 2119 nuap.fd = uap->s;
2d21ac55
A
2120 nuap.iovp = user_hdtr.headers;
2121 nuap.iovcnt = user_hdtr.hdr_cnt;
2122 error = writev_nocancel(p, &nuap, &writev_retval);
1c79356b 2123 if (error)
2d21ac55
A
2124 goto done2;
2125 sbytes += writev_retval;
1c79356b
A
2126 }
2127 }
2128
2129 /*
2d21ac55
A
2130 * Get the file size for 2 reasons:
2131 * 1. We don't want to allocate more mbufs than necessary
2132 * 2. We don't want to read past the end of file
1c79356b 2133 */
2d21ac55
A
2134 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0)
2135 goto done2;
1c79356b
A
2136
2137 /*
2d21ac55
A
2138 * Simply read file data into a chain of mbufs that used with scatter
2139 * gather reads. We're not (yet?) setup to use zero copy external
2140 * mbufs that point to the file pages.
1c79356b 2141 */
2d21ac55
A
2142 socket_lock(so, 1);
2143 error = sblock(&so->so_snd, M_WAIT);
2144 if (error) {
2145 socket_unlock(so, 1);
2146 goto done2;
2147 }
1c79356b 2148 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2d21ac55
A
2149 mbuf_t m0 = NULL, m;
2150 unsigned int nbufs = sendfileuiobufs, i;
2151 uio_t auio;
2152 char uio_buf[UIO_SIZEOF(sendfileuiobufs)]; /* 1 KB !!! */
2153 size_t uiolen;
2154 user_ssize_t rlen;
2155 off_t pgoff;
2156 size_t pktlen;
2157 boolean_t jumbocl;
1c79356b 2158
1c79356b 2159 /*
2d21ac55
A
2160 * Calculate the amount to transfer.
2161 * Align to round number of pages.
2162 * Not to exceed send socket buffer,
1c79356b
A
2163 * the EOF, or the passed in nbytes.
2164 */
2d21ac55
A
2165 xfsize = sbspace(&so->so_snd);
2166
2167 if (xfsize <= 0) {
2168 if (so->so_state & SS_CANTSENDMORE) {
2169 error = EPIPE;
2170 goto done3;
2171 } else if ((so->so_state & SS_NBIO)) {
2172 error = EAGAIN;
2173 goto done3;
2174 } else {
2175 xfsize = PAGE_SIZE;
2176 }
2177 }
2178
2179 if (xfsize > SENDFILE_MAX_BYTES)
2180 xfsize = SENDFILE_MAX_BYTES;
2181 else if (xfsize > PAGE_SIZE)
2182 xfsize = trunc_page(xfsize);
2183 pgoff = off & PAGE_MASK_64;
2184 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
1c79356b 2185 xfsize = PAGE_SIZE_64 - pgoff;
2d21ac55
A
2186 if (nbytes && xfsize > (nbytes - sbytes))
2187 xfsize = nbytes - sbytes;
2188 if (xfsize <= 0)
2189 break;
2190 if (off + xfsize > file_size)
2191 xfsize = file_size - off;
1c79356b
A
2192 if (xfsize <= 0)
2193 break;
2d21ac55 2194
1c79356b 2195 /*
2d21ac55
A
2196 * Attempt to use larger than system page-size clusters for
2197 * large writes only if there is a jumbo cluster pool and
2198 * if the socket is marked accordingly.
1c79356b 2199 */
2d21ac55
A
2200 jumbocl = sosendjcl && njcl > 0 &&
2201 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
2202
2203 socket_unlock(so, 0);
2204 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
2205 pktlen = mbuf_pkt_maxlen(m0);
2206 if (pktlen < xfsize)
2207 xfsize = pktlen;
2208
2209 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
2210 UIO_READ, &uio_buf[0], sizeof (uio_buf));
2211 if (auio == NULL) {
2212 //printf("sendfile: uio_createwithbuffer failed\n");
2213 mbuf_freem(m0);
2214 error = ENXIO;
2215 socket_lock(so, 0);
2216 goto done3;
1c79356b 2217 }
1c79356b 2218
2d21ac55
A
2219 for (i = 0, m = m0, uiolen = 0;
2220 i < nbufs && m != NULL && uiolen < xfsize;
2221 i++, m = mbuf_next(m)) {
2222 size_t mlen = mbuf_maxlen(m);
2223
2224 if (mlen + uiolen > xfsize)
2225 mlen = xfsize - uiolen;
2226 mbuf_setlen(m, mlen);
2227 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
2228 mlen);
2229 uiolen += mlen;
2230 }
2231
2232 if (xfsize != uio_resid(auio))
2233 printf("sendfile: xfsize: %lld != uio_resid(auio): "
2234 "%lld\n", xfsize, uio_resid(auio));
2235
2236 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
2237 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2238 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2239 error = fo_read(fp, auio, FOF_OFFSET, &context);
2240 socket_lock(so, 0);
2241 if (error != 0) {
2242 if (uio_resid(auio) != xfsize && (error == ERESTART ||
2243 error == EINTR || error == EWOULDBLOCK)) {
2244 error = 0;
2245 } else {
2246 mbuf_freem(m0);
2247 goto done3;
1c79356b 2248 }
1c79356b 2249 }
2d21ac55
A
2250 xfsize -= uio_resid(auio);
2251 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
2252 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2253 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2254
2255 if (xfsize == 0) {
2256 //printf("sendfile: fo_read 0 bytes, EOF\n");
2257 break;
91447636 2258 }
2d21ac55
A
2259 if (xfsize + off > file_size)
2260 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
2261 "%lld\n", xfsize, off, file_size);
2262 for (i = 0, m = m0, rlen = 0;
2263 i < nbufs && m != NULL && rlen < xfsize;
2264 i++, m = mbuf_next(m)) {
2265 size_t mlen = mbuf_maxlen(m);
2266
2267 if (rlen + mlen > xfsize)
2268 mlen = xfsize - rlen;
2269 mbuf_setlen(m, mlen);
2270
2271 rlen += mlen;
2272 }
2273 mbuf_pkthdr_setlen(m0, xfsize);
2274
1c79356b
A
2275retry_space:
2276 /*
2277 * Make sure that the socket is still able to take more data.
2278 * CANTSENDMORE being true usually means that the connection
2279 * was closed. so_error is true when an error was sensed after
2280 * a previous send.
2281 * The state is checked after the page mapping and buffer
2282 * allocation above since those operations may block and make
2283 * any socket checks stale. From this point forward, nothing
2284 * blocks before the pru_send (or more accurately, any blocking
2285 * results in a loop back to here to re-check).
2286 */
2287 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2288 if (so->so_state & SS_CANTSENDMORE) {
2289 error = EPIPE;
2290 } else {
2291 error = so->so_error;
2292 so->so_error = 0;
2293 }
2d21ac55
A
2294 m_freem(m0);
2295 goto done3;
1c79356b
A
2296 }
2297 /*
2298 * Wait for socket space to become available. We do this just
2299 * after checking the connection state above in order to avoid
2300 * a race condition with sbwait().
2301 */
2d21ac55 2302 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
1c79356b 2303 if (so->so_state & SS_NBIO) {
2d21ac55 2304 m_freem(m0);
1c79356b 2305 error = EAGAIN;
2d21ac55 2306 goto done3;
1c79356b 2307 }
2d21ac55
A
2308 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
2309 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
1c79356b 2310 error = sbwait(&so->so_snd);
2d21ac55
A
2311 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
2312 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
1c79356b
A
2313 /*
2314 * An error from sbwait usually indicates that we've
2315 * been interrupted by a signal. If we've sent anything
2316 * then return bytes sent, otherwise return the error.
2317 */
2318 if (error) {
2d21ac55
A
2319 m_freem(m0);
2320 goto done3;
1c79356b
A
2321 }
2322 goto retry_space;
2323 }
2d21ac55
A
2324 {
2325 /*
2326 * Socket filter processing
2327 */
2328 struct socket_filter_entry *filter;
2329 int filtered = 0;
2330 struct mbuf *control = NULL;
2331 boolean_t recursive = (so->so_send_filt_thread != NULL);
2332
2333 error = 0;
2334 for (filter = so->so_filt; filter && (error == 0);
2335 filter = filter->sfe_next_onsocket) {
2336 if (filter->sfe_filter->sf_filter.sf_data_out) {
2337 if (filtered == 0) {
2338 filtered = 1;
2339 so->so_send_filt_thread =
2340 current_thread();
2341 sflt_use(so);
2342 socket_unlock(so, 0);
2343 }
2344 error = filter->sfe_filter->sf_filter.
2345 sf_data_out(filter->sfe_cookie, so,
2346 NULL, &m0, &control, 0);
2347 }
2348 }
2349
2350 if (filtered) {
2351 /*
2352 * At this point, we've run at least one filter.
2353 * The socket is unlocked as is the socket
2354 * buffer. Clear the recorded filter thread
2355 * only when we are outside of a filter's
2356 * context. This allows for a filter to issue
2357 * multiple inject calls from its sf_data_out
2358 * callback routine.
2359 */
2360 socket_lock(so, 0);
2361 sflt_unuse(so);
2362 if (!recursive)
2363 so->so_send_filt_thread = 0;
2364 if (error) {
2365 if (error == EJUSTRETURN) {
2366 error = 0;
2367 continue;
2368 }
2369 goto done3;
2370 }
2371 }
2372 /*
2373 * End Socket filter processing
2374 */
2375 }
2376 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2377 uap->s, 0, 0, 0, 0);
2378 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
2379 0, 0, p);
2380 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2381 uap->s, 0, 0, 0, 0);
1c79356b 2382 if (error) {
2d21ac55 2383 goto done3;
1c79356b
A
2384 }
2385 }
2d21ac55 2386 sbunlock(&so->so_snd, 0); /* will unlock socket */
1c79356b
A
2387 /*
2388 * Send trailers. Wimp out and use writev(2).
2389 */
2d21ac55
A
2390 if (uap->hdtr != USER_ADDR_NULL &&
2391 user_hdtr.trailers != USER_ADDR_NULL) {
2392 bzero(&nuap, sizeof (struct writev_args));
2393 nuap.fd = uap->s;
2394 nuap.iovp = user_hdtr.trailers;
2395 nuap.iovcnt = user_hdtr.trl_cnt;
2396 error = writev_nocancel(p, &nuap, &writev_retval);
2397 if (error)
2398 goto done2;
2399 sbytes += writev_retval;
1c79356b 2400 }
91447636
A
2401done2:
2402 file_drop(uap->s);
2403done1:
2404 file_drop(uap->fd);
1c79356b 2405done:
2d21ac55 2406 if (uap->nbytes != USER_ADDR_NULL) {
91447636 2407 /* XXX this appears bogus for some early failure conditions */
2d21ac55 2408 copyout(&sbytes, uap->nbytes, sizeof (off_t));
1c79356b 2409 }
2d21ac55
A
2410 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
2411 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
2412 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
1c79356b 2413 return (error);
91447636 2414done3:
2d21ac55 2415 sbunlock(&so->so_snd, 0); /* will unlock socket */
91447636 2416 goto done2;
1c79356b
A
2417}
2418
2d21ac55
A
2419
2420#endif /* SENDFILE */