]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
xnu-1228.5.18.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <kern/lock.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/signalvar.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/kernel.h>
87 #include <sys/uio_internal.h>
88 #include <sys/kauth.h>
89
90 #include <bsm/audit_kernel.h>
91
92 #include <sys/kdebug.h>
93 #include <sys/sysproto.h>
94 #include <netinet/in.h>
95 #include <net/route.h>
96 #include <netinet/in_pcb.h>
97
98 #if CONFIG_MACF_SOCKET_SUBSET
99 #include <security/mac_framework.h>
100 #endif /* MAC_SOCKET_SUBSET */
101
102 #define f_flag f_fglob->fg_flag
103 #define f_type f_fglob->fg_type
104 #define f_msgcount f_fglob->fg_msgcount
105 #define f_cred f_fglob->fg_cred
106 #define f_ops f_fglob->fg_ops
107 #define f_offset f_fglob->fg_offset
108 #define f_data f_fglob->fg_data
109
110
111 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
112 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
113 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
114 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
115 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
116 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
117 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
118 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
119 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
120 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
121 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
122 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
123 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
124 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
125
126
127 #define HACK_FOR_4056224 1
128 #if HACK_FOR_4056224
129 static pid_t last_pid_4056224 = 0;
130 #endif /* HACK_FOR_4056224 */
131
132 /* TODO: should be in header file */
133 int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
134
135 static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int,
136 register_t *);
137 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
138 register_t *);
139 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
140 size_t, boolean_t);
141 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
142 user_addr_t, size_t, boolean_t);
143 #if SENDFILE
144 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
145 boolean_t);
146 #endif /* SENDFILE */
147
148 /*
149 * System call interface to the socket abstraction.
150 */
151
152 extern struct fileops socketops;
153
154 /*
155 * Returns: 0 Success
156 * EACCES Mandatory Access Control failure
157 * falloc:ENFILE
158 * falloc:EMFILE
159 * falloc:ENOMEM
160 * socreate:EAFNOSUPPORT
161 * socreate:EPROTOTYPE
162 * socreate:EPROTONOSUPPORT
163 * socreate:ENOBUFS
164 * socreate:ENOMEM
165 * socreate:EISCONN
166 * socreate:??? [other protocol families, IPSEC]
167 */
168 int
169 socket(struct proc *p, struct socket_args *uap, register_t *retval)
170 {
171 struct socket *so;
172 struct fileproc *fp;
173 int fd, error;
174
175 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
176 #if CONFIG_MACF_SOCKET_SUBSET
177 if ((error = mac_socket_check_create(kauth_cred_get(), uap->domain,
178 uap->type, uap->protocol)) != 0)
179 return (error);
180 #endif /* MAC_SOCKET_SUBSET */
181
182 error = falloc(p, &fp, &fd, vfs_context_current());
183 if (error) {
184 return (error);
185 }
186 fp->f_flag = FREAD|FWRITE;
187 fp->f_type = DTYPE_SOCKET;
188 fp->f_ops = &socketops;
189
190 error = socreate(uap->domain, &so, uap->type, uap->protocol);
191 if (error) {
192 fp_free(p, fd, fp);
193 } else {
194 thread_t thread;
195 struct uthread *ut;
196
197 thread = current_thread();
198 ut = get_bsdthread_info(thread);
199
200 /* if this is a backgrounded thread then throttle all new sockets */
201 if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) {
202 so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
203 so->so_background_thread = thread;
204 }
205 fp->f_data = (caddr_t)so;
206
207 proc_fdlock(p);
208 procfdtbl_releasefd(p, fd, NULL);
209
210 fp_drop(p, fd, fp, 1);
211 proc_fdunlock(p);
212
213 *retval = fd;
214 }
215 return (error);
216 }
217
218 /*
219 * Returns: 0 Success
220 * EDESTADDRREQ Destination address required
221 * EBADF Bad file descriptor
222 * EACCES Mandatory Access Control failure
223 * file_socket:ENOTSOCK
224 * file_socket:EBADF
225 * getsockaddr:ENAMETOOLONG Filename too long
226 * getsockaddr:EINVAL Invalid argument
227 * getsockaddr:ENOMEM Not enough space
228 * getsockaddr:EFAULT Bad address
229 * sobind:???
230 */
231 /* ARGSUSED */
232 int
233 bind(__unused proc_t p, struct bind_args *uap, __unused register_t *retval)
234 {
235 struct sockaddr_storage ss;
236 struct sockaddr *sa = NULL;
237 struct socket *so;
238 boolean_t want_free = TRUE;
239 int error;
240
241 AUDIT_ARG(fd, uap->s);
242 error = file_socket(uap->s, &so);
243 if (error != 0)
244 return (error);
245 if (so == NULL) {
246 error = EBADF;
247 goto out;
248 }
249 if (uap->name == USER_ADDR_NULL) {
250 error = EDESTADDRREQ;
251 goto out;
252 }
253 if (uap->namelen > sizeof (ss)) {
254 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
255 } else {
256 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
257 if (error == 0) {
258 sa = (struct sockaddr *)&ss;
259 want_free = FALSE;
260 }
261 }
262 if (error != 0)
263 goto out;
264 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
265 #if CONFIG_MACF_SOCKET_SUBSET
266 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
267 error = sobind(so, sa);
268 #else
269 error = sobind(so, sa);
270 #endif /* MAC_SOCKET_SUBSET */
271 if (want_free)
272 FREE(sa, M_SONAME);
273 out:
274 file_drop(uap->s);
275 return (error);
276 }
277
278 /*
279 * Returns: 0 Success
280 * EBADF
281 * EACCES Mandatory Access Control failure
282 * file_socket:ENOTSOCK
283 * file_socket:EBADF
284 * solisten:EINVAL
285 * solisten:EOPNOTSUPP
286 * solisten:???
287 */
288 int
289 listen(__unused struct proc *p, struct listen_args *uap,
290 __unused register_t *retval)
291 {
292 int error;
293 struct socket *so;
294
295 AUDIT_ARG(fd, uap->s);
296 error = file_socket(uap->s, &so);
297 if (error)
298 return (error);
299 if (so != NULL)
300 #if CONFIG_MACF_SOCKET_SUBSET
301 {
302 error = mac_socket_check_listen(kauth_cred_get(), so);
303 if (error == 0)
304 error = solisten(so, uap->backlog);
305 }
306 #else
307 error = solisten(so, uap->backlog);
308 #endif /* MAC_SOCKET_SUBSET */
309 else
310 error = EBADF;
311
312 file_drop(uap->s);
313 return (error);
314 }
315
316 /*
317 * Returns: fp_getfsock:EBADF Bad file descriptor
318 * fp_getfsock:EOPNOTSUPP ...
319 * xlate => :ENOTSOCK Socket operation on non-socket
320 * :EFAULT Bad address on copyin/copyout
321 * :EBADF Bad file descriptor
322 * :EOPNOTSUPP Operation not supported on socket
323 * :EINVAL Invalid argument
324 * :EWOULDBLOCK Operation would block
325 * :ECONNABORTED Connection aborted
326 * :EINTR Interrupted function
327 * :EACCES Mandatory Access Control failure
328 * falloc_locked:ENFILE Too many files open in system
329 * falloc_locked::EMFILE Too many open files
330 * falloc_locked::ENOMEM Not enough space
331 * 0 Success
332 */
333 int
334 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
335 register_t *retval)
336 {
337 struct fileproc *fp;
338 struct sockaddr *sa = NULL;
339 socklen_t namelen;
340 int error;
341 struct socket *head, *so = NULL;
342 lck_mtx_t *mutex_held;
343 int fd = uap->s;
344 int newfd;
345 short fflag; /* type must match fp->f_flag */
346 int dosocklock = 0;
347
348 *retval = -1;
349
350 AUDIT_ARG(fd, uap->s);
351
352 if (uap->name) {
353 error = copyin(uap->anamelen, (caddr_t)&namelen,
354 sizeof (socklen_t));
355 if (error)
356 return (error);
357 }
358 error = fp_getfsock(p, fd, &fp, &head);
359 if (error) {
360 if (error == EOPNOTSUPP)
361 error = ENOTSOCK;
362 return (error);
363 }
364 if (head == NULL) {
365 error = EBADF;
366 goto out;
367 }
368 #if CONFIG_MACF_SOCKET_SUBSET
369 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
370 goto out;
371 #endif /* MAC_SOCKET_SUBSET */
372
373 socket_lock(head, 1);
374
375 if (head->so_proto->pr_getlock != NULL) {
376 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
377 dosocklock = 1;
378 } else {
379 mutex_held = head->so_proto->pr_domain->dom_mtx;
380 dosocklock = 0;
381 }
382
383 if ((head->so_options & SO_ACCEPTCONN) == 0) {
384 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
385 error = EOPNOTSUPP;
386 } else {
387 /* POSIX: The socket is not accepting connections */
388 error = EINVAL;
389 }
390 socket_unlock(head, 1);
391 goto out;
392 }
393 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
394 socket_unlock(head, 1);
395 error = EWOULDBLOCK;
396 goto out;
397 }
398 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
399 if (head->so_state & SS_CANTRCVMORE) {
400 head->so_error = ECONNABORTED;
401 break;
402 }
403 if (head->so_usecount < 1)
404 panic("accept: head=%p refcount=%d\n", head,
405 head->so_usecount);
406 error = msleep((caddr_t)&head->so_timeo, mutex_held,
407 PSOCK | PCATCH, "accept", 0);
408 if (head->so_usecount < 1)
409 panic("accept: 2 head=%p refcount=%d\n", head,
410 head->so_usecount);
411 if ((head->so_state & SS_DRAINING)) {
412 error = ECONNABORTED;
413 }
414 if (error) {
415 socket_unlock(head, 1);
416 goto out;
417 }
418 }
419 if (head->so_error) {
420 error = head->so_error;
421 head->so_error = 0;
422 socket_unlock(head, 1);
423 goto out;
424 }
425
426
427 /*
428 * At this point we know that there is at least one connection
429 * ready to be accepted. Remove it from the queue prior to
430 * allocating the file descriptor for it since falloc() may
431 * block allowing another process to accept the connection
432 * instead.
433 */
434 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
435 so = TAILQ_FIRST(&head->so_comp);
436 TAILQ_REMOVE(&head->so_comp, so, so_list);
437 head->so_qlen--;
438 /* unlock head to avoid deadlock with select, keep a ref on head */
439 socket_unlock(head, 0);
440
441 #if CONFIG_MACF_SOCKET_SUBSET
442 /*
443 * Pass the pre-accepted socket to the MAC framework. This is
444 * cheaper than allocating a file descriptor for the socket,
445 * calling the protocol accept callback, and possibly freeing
446 * the file descriptor should the MAC check fails.
447 */
448 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
449 so->so_state &= ~(SS_NOFDREF | SS_COMP);
450 so->so_head = NULL;
451 soclose(so);
452 /* Drop reference on listening socket */
453 sodereference(head);
454 goto out;
455 }
456 #endif /* MAC_SOCKET_SUBSET */
457
458 /*
459 * Pass the pre-accepted socket to any interested socket filter(s).
460 * Upon failure, the socket would have been closed by the callee.
461 */
462 if (so->so_filt != NULL && (error = soacceptfilter(so)) != 0) {
463 /* Drop reference on listening socket */
464 sodereference(head);
465 /* Propagate socket filter's error code to the caller */
466 goto out;
467 }
468
469 fflag = fp->f_flag;
470 error = falloc(p, &fp, &newfd, vfs_context_current());
471 if (error) {
472 /*
473 * Probably ran out of file descriptors. Put the
474 * unaccepted connection back onto the queue and
475 * do another wakeup so some other process might
476 * have a chance at it.
477 */
478 socket_lock(head, 0);
479 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
480 head->so_qlen++;
481 wakeup_one((caddr_t)&head->so_timeo);
482 socket_unlock(head, 1);
483 goto out;
484 }
485 *retval = newfd;
486 fp->f_type = DTYPE_SOCKET;
487 fp->f_flag = fflag;
488 fp->f_ops = &socketops;
489 fp->f_data = (caddr_t)so;
490 socket_lock(head, 0);
491 if (dosocklock)
492 socket_lock(so, 1);
493 so->so_state &= ~SS_COMP;
494 so->so_head = NULL;
495 (void) soacceptlock(so, &sa, 0);
496 socket_unlock(head, 1);
497 if (sa == NULL) {
498 namelen = 0;
499 if (uap->name)
500 goto gotnoname;
501 if (dosocklock)
502 socket_unlock(so, 1);
503 error = 0;
504 goto releasefd;
505 }
506 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
507
508 if (uap->name) {
509 socklen_t sa_len;
510
511 /* save sa_len before it is destroyed */
512 sa_len = sa->sa_len;
513 namelen = MIN(namelen, sa_len);
514 error = copyout(sa, uap->name, namelen);
515 if (!error)
516 /* return the actual, untruncated address length */
517 namelen = sa_len;
518 gotnoname:
519 error = copyout((caddr_t)&namelen, uap->anamelen,
520 sizeof (socklen_t));
521 }
522 FREE(sa, M_SONAME);
523
524 /*
525 * If the socket has been marked as inactive by soacceptfilter(),
526 * disallow further operations on it. We explicitly call shutdown
527 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
528 * states are set for the socket. This would also flush out data
529 * hanging off the receive list of this socket.
530 */
531 if (so->so_flags & SOF_DEFUNCT) {
532 (void) soshutdownlock(so, SHUT_RD);
533 (void) soshutdownlock(so, SHUT_WR);
534 (void) sodisconnectlocked(so);
535 }
536
537 if (dosocklock)
538 socket_unlock(so, 1);
539
540 releasefd:
541 proc_fdlock(p);
542 procfdtbl_releasefd(p, newfd, NULL);
543 fp_drop(p, newfd, fp, 1);
544 proc_fdunlock(p);
545
546 out:
547 file_drop(fd);
548 return (error);
549 }
550
551 int
552 accept(struct proc *p, struct accept_args *uap, register_t *retval)
553 {
554 __pthread_testcancel(1);
555 return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval));
556 }
557
558 /*
559 * Returns: 0 Success
560 * EBADF Bad file descriptor
561 * EALREADY Connection already in progress
562 * EINPROGRESS Operation in progress
563 * ECONNABORTED Connection aborted
564 * EINTR Interrupted function
565 * EACCES Mandatory Access Control failure
566 * file_socket:ENOTSOCK
567 * file_socket:EBADF
568 * getsockaddr:ENAMETOOLONG Filename too long
569 * getsockaddr:EINVAL Invalid argument
570 * getsockaddr:ENOMEM Not enough space
571 * getsockaddr:EFAULT Bad address
572 * soconnectlock:EOPNOTSUPP
573 * soconnectlock:EISCONN
574 * soconnectlock:??? [depends on protocol, filters]
575 * msleep:EINTR
576 *
577 * Imputed: so_error error may be set from so_error, which
578 * may have been set by soconnectlock.
579 */
580 /* ARGSUSED */
581 int
582 connect(struct proc *p, struct connect_args *uap, register_t *retval)
583 {
584 __pthread_testcancel(1);
585 return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval));
586 }
587
588 int
589 connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused register_t *retval)
590 {
591 struct socket *so;
592 struct sockaddr_storage ss;
593 struct sockaddr *sa = NULL;
594 lck_mtx_t *mutex_held;
595 boolean_t want_free = TRUE;
596 int error;
597 int fd = uap->s;
598 boolean_t dgram;
599
600 AUDIT_ARG(fd, uap->s);
601 error = file_socket(fd, &so);
602 if (error != 0)
603 return (error);
604 if (so == NULL) {
605 error = EBADF;
606 goto out;
607 }
608
609 /*
610 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
611 * if this is a datagram socket; translate for other types.
612 */
613 dgram = (so->so_type == SOCK_DGRAM);
614
615 /* Get socket address now before we obtain socket lock */
616 if (uap->namelen > sizeof (ss)) {
617 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
618 } else {
619 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
620 if (error == 0) {
621 sa = (struct sockaddr *)&ss;
622 want_free = FALSE;
623 }
624 }
625 if (error != 0)
626 goto out;
627
628 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
629 #if CONFIG_MACF_SOCKET_SUBSET
630 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
631 if (want_free)
632 FREE(sa, M_SONAME);
633 goto out;
634 }
635 #endif /* MAC_SOCKET_SUBSET */
636 socket_lock(so, 1);
637
638 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
639 if (want_free)
640 FREE(sa, M_SONAME);
641 socket_unlock(so, 1);
642 error = EALREADY;
643 goto out;
644 }
645 error = soconnectlock(so, sa, 0);
646 if (error)
647 goto bad;
648 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
649 if (want_free)
650 FREE(sa, M_SONAME);
651 socket_unlock(so, 1);
652 error = EINPROGRESS;
653 goto out;
654 }
655 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
656 if (so->so_proto->pr_getlock != NULL)
657 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
658 else
659 mutex_held = so->so_proto->pr_domain->dom_mtx;
660 error = msleep((caddr_t)&so->so_timeo, mutex_held,
661 PSOCK | PCATCH, "connect", 0);
662 if ((so->so_state & SS_DRAINING)) {
663 error = ECONNABORTED;
664 }
665 if (error)
666 break;
667 }
668 if (error == 0) {
669 error = so->so_error;
670 so->so_error = 0;
671 }
672 bad:
673 so->so_state &= ~SS_ISCONNECTING;
674 socket_unlock(so, 1);
675 if (want_free)
676 FREE(sa, M_SONAME);
677 if (error == ERESTART)
678 error = EINTR;
679 out:
680 file_drop(fd);
681 return (error);
682 }
683
684 /*
685 * Returns: 0 Success
686 * socreate:EAFNOSUPPORT
687 * socreate:EPROTOTYPE
688 * socreate:EPROTONOSUPPORT
689 * socreate:ENOBUFS
690 * socreate:ENOMEM
691 * socreate:EISCONN
692 * socreate:??? [other protocol families, IPSEC]
693 * falloc:ENFILE
694 * falloc:EMFILE
695 * falloc:ENOMEM
696 * copyout:EFAULT
697 * soconnect2:EINVAL
698 * soconnect2:EPROTOTYPE
699 * soconnect2:??? [other protocol families[
700 */
701 int
702 socketpair(struct proc *p, struct socketpair_args *uap,
703 __unused register_t *retval)
704 {
705 struct fileproc *fp1, *fp2;
706 struct socket *so1, *so2;
707 int fd, error, sv[2];
708
709 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
710 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
711 if (error)
712 return (error);
713 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
714 if (error)
715 goto free1;
716
717 error = falloc(p, &fp1, &fd, vfs_context_current());
718 if (error) {
719 goto free2;
720 }
721 fp1->f_flag = FREAD|FWRITE;
722 fp1->f_type = DTYPE_SOCKET;
723 fp1->f_ops = &socketops;
724 fp1->f_data = (caddr_t)so1;
725 sv[0] = fd;
726
727 error = falloc(p, &fp2, &fd, vfs_context_current());
728 if (error) {
729 goto free3;
730 }
731 fp2->f_flag = FREAD|FWRITE;
732 fp2->f_type = DTYPE_SOCKET;
733 fp2->f_ops = &socketops;
734 fp2->f_data = (caddr_t)so2;
735 sv[1] = fd;
736
737 error = soconnect2(so1, so2);
738 if (error) {
739 goto free4;
740 }
741 if (uap->type == SOCK_DGRAM) {
742 /*
743 * Datagram socket connection is asymmetric.
744 */
745 error = soconnect2(so2, so1);
746 if (error) {
747 goto free4;
748 }
749 }
750
751 proc_fdlock(p);
752 procfdtbl_releasefd(p, sv[0], NULL);
753 procfdtbl_releasefd(p, sv[1], NULL);
754 fp_drop(p, sv[0], fp1, 1);
755 fp_drop(p, sv[1], fp2, 1);
756 proc_fdunlock(p);
757
758 error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof (int));
759 return (error);
760 free4:
761 fp_free(p, sv[1], fp2);
762 free3:
763 fp_free(p, sv[0], fp1);
764 free2:
765 (void) soclose(so2);
766 free1:
767 (void) soclose(so1);
768 return (error);
769 }
770
771 /*
772 * Returns: 0 Success
773 * EINVAL
774 * ENOBUFS
775 * EBADF
776 * EPIPE
777 * EACCES Mandatory Access Control failure
778 * file_socket:ENOTSOCK
779 * file_socket:EBADF
780 * getsockaddr:ENAMETOOLONG Filename too long
781 * getsockaddr:EINVAL Invalid argument
782 * getsockaddr:ENOMEM Not enough space
783 * getsockaddr:EFAULT Bad address
784 * <pru_sosend>:EACCES[TCP]
785 * <pru_sosend>:EADDRINUSE[TCP]
786 * <pru_sosend>:EADDRNOTAVAIL[TCP]
787 * <pru_sosend>:EAFNOSUPPORT[TCP]
788 * <pru_sosend>:EAGAIN[TCP]
789 * <pru_sosend>:EBADF
790 * <pru_sosend>:ECONNRESET[TCP]
791 * <pru_sosend>:EFAULT
792 * <pru_sosend>:EHOSTUNREACH[TCP]
793 * <pru_sosend>:EINTR
794 * <pru_sosend>:EINVAL
795 * <pru_sosend>:EISCONN[AF_INET]
796 * <pru_sosend>:EMSGSIZE[TCP]
797 * <pru_sosend>:ENETDOWN[TCP]
798 * <pru_sosend>:ENETUNREACH[TCP]
799 * <pru_sosend>:ENOBUFS
800 * <pru_sosend>:ENOMEM[TCP]
801 * <pru_sosend>:ENOTCONN[AF_INET]
802 * <pru_sosend>:EOPNOTSUPP
803 * <pru_sosend>:EPERM[TCP]
804 * <pru_sosend>:EPIPE
805 * <pru_sosend>:EWOULDBLOCK
806 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
807 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
808 * <pru_sosend>:??? [value from so_error]
809 * sockargs:???
810 */
811 static int
812 sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
813 int flags, register_t *retval)
814 {
815 struct mbuf *control = NULL;
816 struct sockaddr_storage ss;
817 struct sockaddr *to = NULL;
818 boolean_t want_free = TRUE;
819 int error;
820 struct socket *so;
821 user_ssize_t len;
822
823 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
824
825 error = file_socket(s, &so);
826 if (error) {
827 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
828 return (error);
829 }
830 if (so == NULL) {
831 error = EBADF;
832 goto out;
833 }
834 if (mp->msg_name != USER_ADDR_NULL) {
835 if (mp->msg_namelen > sizeof (ss)) {
836 error = getsockaddr(so, &to, mp->msg_name,
837 mp->msg_namelen, TRUE);
838 } else {
839 error = getsockaddr_s(so, &ss, mp->msg_name,
840 mp->msg_namelen, TRUE);
841 if (error == 0) {
842 to = (struct sockaddr *)&ss;
843 want_free = FALSE;
844 }
845 }
846 if (error != 0)
847 goto out;
848 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
849 }
850 if (mp->msg_control != USER_ADDR_NULL) {
851 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
852 error = EINVAL;
853 goto bad;
854 }
855 error = sockargs(&control, mp->msg_control,
856 mp->msg_controllen, MT_CONTROL);
857 if (error != 0)
858 goto bad;
859 }
860
861 #if CONFIG_MACF_SOCKET_SUBSET
862 /*
863 * We check the state without holding the socket lock;
864 * if a race condition occurs, it would simply result
865 * in an extra call to the MAC check function.
866 */
867 if (!(so->so_state & SS_ISCONNECTED) &&
868 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
869 goto bad;
870 #endif /* MAC_SOCKET_SUBSET */
871
872 len = uio_resid(uiop);
873 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control,
874 flags);
875 if (error != 0) {
876 if (uio_resid(uiop) != len && (error == ERESTART ||
877 error == EINTR || error == EWOULDBLOCK))
878 error = 0;
879 /* Generation of SIGPIPE can be controlled per socket */
880 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
881 psignal(p, SIGPIPE);
882 }
883 if (error == 0)
884 *retval = (int)(len - uio_resid(uiop));
885 bad:
886 if (to != NULL && want_free)
887 FREE(to, M_SONAME);
888 out:
889 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
890 file_drop(s);
891 return (error);
892 }
893
894 /*
895 * Returns: 0 Success
896 * ENOMEM
897 * sendit:??? [see sendit definition in this file]
898 * write:??? [4056224: applicable for pipes]
899 */
900 int
901 sendto(struct proc *p, struct sendto_args *uap, register_t *retval)
902 {
903 __pthread_testcancel(1);
904 return(sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
905 }
906
907 int
908 sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, register_t *retval)
909 {
910 struct user_msghdr msg;
911 int error;
912 uio_t auio = NULL;
913
914 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
915 AUDIT_ARG(fd, uap->s);
916
917 auio = uio_create(1, 0,
918 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
919 UIO_WRITE);
920 if (auio == NULL) {
921 return (ENOMEM);
922 }
923 uio_addiov(auio, uap->buf, uap->len);
924
925 msg.msg_name = uap->to;
926 msg.msg_namelen = uap->tolen;
927 /* no need to set up msg_iov. sendit uses uio_t we send it */
928 msg.msg_iov = 0;
929 msg.msg_iovlen = 0;
930 msg.msg_control = 0;
931 msg.msg_flags = 0;
932
933 error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
934
935 if (auio != NULL) {
936 uio_free(auio);
937 }
938
939 #if HACK_FOR_4056224
940 /*
941 * Radar 4056224
942 * Temporary workaround to let send() and recv() work over
943 * a pipe for binary compatibility
944 * This will be removed in the release following Tiger
945 */
946 if (error == ENOTSOCK) {
947 struct fileproc *fp;
948
949 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
950 (void) fp_drop(p, uap->s, fp, 0);
951
952 if (fp->f_type == DTYPE_PIPE) {
953 struct write_args write_uap;
954 user_ssize_t write_retval;
955
956 if (p->p_pid > last_pid_4056224) {
957 last_pid_4056224 = p->p_pid;
958
959 printf("%s[%d] uses send/recv "
960 "on a pipe\n", p->p_comm, p->p_pid);
961 }
962
963 bzero(&write_uap, sizeof (struct write_args));
964 write_uap.fd = uap->s;
965 write_uap.cbuf = uap->buf;
966 write_uap.nbyte = uap->len;
967
968 error = write(p, &write_uap, &write_retval);
969 *retval = (int)write_retval;
970 }
971 }
972 }
973 #endif /* HACK_FOR_4056224 */
974
975 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
976
977 return (error);
978 }
979
980 /*
981 * Returns: 0 Success
982 * ENOBUFS
983 * copyin:EFAULT
984 * sendit:??? [see sendit definition in this file]
985 */
986 int
987 sendmsg(struct proc *p, struct sendmsg_args *uap, register_t *retval)
988 {
989 __pthread_testcancel(1);
990 return(sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval));
991 }
992
993 int
994 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, register_t *retval)
995 {
996 struct msghdr msg;
997 struct user_msghdr user_msg;
998 caddr_t msghdrp;
999 int size_of_msghdr;
1000 int error;
1001 int size_of_iovec;
1002 uio_t auio = NULL;
1003 struct user_iovec *iovp;
1004
1005 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1006 AUDIT_ARG(fd, uap->s);
1007 if (IS_64BIT_PROCESS(p)) {
1008 msghdrp = (caddr_t)&user_msg;
1009 size_of_msghdr = sizeof (user_msg);
1010 size_of_iovec = sizeof (struct user_iovec);
1011 } else {
1012 msghdrp = (caddr_t)&msg;
1013 size_of_msghdr = sizeof (msg);
1014 size_of_iovec = sizeof (struct iovec);
1015 }
1016 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1017 if (error) {
1018 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1019 return (error);
1020 }
1021
1022 /* only need to copy if user process is not 64-bit */
1023 if (!IS_64BIT_PROCESS(p)) {
1024 user_msg.msg_flags = msg.msg_flags;
1025 user_msg.msg_controllen = msg.msg_controllen;
1026 user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
1027 user_msg.msg_iovlen = msg.msg_iovlen;
1028 user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
1029 user_msg.msg_namelen = msg.msg_namelen;
1030 user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
1031 }
1032
1033 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1034 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1035 0, 0, 0, 0);
1036 return (EMSGSIZE);
1037 }
1038
1039 /* allocate a uio large enough to hold the number of iovecs passed */
1040 auio = uio_create(user_msg.msg_iovlen, 0,
1041 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1042 UIO_WRITE);
1043 if (auio == NULL) {
1044 error = ENOBUFS;
1045 goto done;
1046 }
1047
1048 if (user_msg.msg_iovlen) {
1049 /*
1050 * get location of iovecs within the uio.
1051 * then copyin the iovecs from user space.
1052 */
1053 iovp = uio_iovsaddr(auio);
1054 if (iovp == NULL) {
1055 error = ENOBUFS;
1056 goto done;
1057 }
1058 error = copyin(user_msg.msg_iov, (caddr_t)iovp,
1059 (user_msg.msg_iovlen * size_of_iovec));
1060 if (error)
1061 goto done;
1062 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1063
1064 /* finish setup of uio_t */
1065 uio_calculateresid(auio);
1066 } else {
1067 user_msg.msg_iov = 0;
1068 }
1069
1070 /* msg_flags is ignored for send */
1071 user_msg.msg_flags = 0;
1072
1073 error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
1074 done:
1075 if (auio != NULL) {
1076 uio_free(auio);
1077 }
1078 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1079
1080 return (error);
1081 }
1082
1083 /*
1084 * Returns: 0 Success
1085 * ENOTSOCK
1086 * EINVAL
1087 * EBADF
1088 * EACCES Mandatory Access Control failure
1089 * copyout:EFAULT
1090 * fp_lookup:EBADF
1091 * <pru_soreceive>:ENOBUFS
1092 * <pru_soreceive>:ENOTCONN
1093 * <pru_soreceive>:EWOULDBLOCK
1094 * <pru_soreceive>:EFAULT
1095 * <pru_soreceive>:EINTR
1096 * <pru_soreceive>:EBADF
1097 * <pru_soreceive>:EINVAL
1098 * <pru_soreceive>:EMSGSIZE
1099 * <pru_soreceive>:???
1100 *
1101 * Notes: Additional return values from calls through <pru_soreceive>
1102 * depend on protocols other than TCP or AF_UNIX, which are
1103 * documented above.
1104 */
1105 static int
1106 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1107 user_addr_t namelenp, register_t *retval)
1108 {
1109 int len, error;
1110 struct mbuf *m, *control = 0;
1111 user_addr_t ctlbuf;
1112 struct socket *so;
1113 struct sockaddr *fromsa = 0;
1114 struct fileproc *fp;
1115
1116 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1117 proc_fdlock(p);
1118 if ((error = fp_lookup(p, s, &fp, 1))) {
1119 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1120 proc_fdunlock(p);
1121 return (error);
1122 }
1123 if (fp->f_type != DTYPE_SOCKET) {
1124 fp_drop(p, s, fp, 1);
1125 proc_fdunlock(p);
1126 return (ENOTSOCK);
1127 }
1128
1129 so = (struct socket *)fp->f_data;
1130 if (so == NULL) {
1131 fp_drop(p, s, fp, 1);
1132 proc_fdunlock(p);
1133 return (EBADF);
1134 }
1135
1136 proc_fdunlock(p);
1137
1138 #if CONFIG_MACF_SOCKET_SUBSET
1139 /*
1140 * We check the state without holding the socket lock;
1141 * if a race condition occurs, it would simply result
1142 * in an extra call to the MAC check function.
1143 */
1144 if (!(so->so_state & SS_ISCONNECTED) &&
1145 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1146 goto out1;
1147 #endif /* MAC_SOCKET_SUBSET */
1148 if (uio_resid(uiop) < 0) {
1149 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1150 error = EINVAL;
1151 goto out1;
1152 }
1153
1154 len = uio_resid(uiop);
1155 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1156 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1157 &mp->msg_flags);
1158 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), fromsa);
1159 if (error) {
1160 if (uio_resid(uiop) != len && (error == ERESTART ||
1161 error == EINTR || error == EWOULDBLOCK))
1162 error = 0;
1163 }
1164
1165 if (error)
1166 goto out;
1167
1168 *retval = len - uio_resid(uiop);
1169 if (mp->msg_name) {
1170 socklen_t sa_len = 0;
1171
1172 len = mp->msg_namelen;
1173 if (len <= 0 || fromsa == 0) {
1174 len = 0;
1175 } else {
1176 #ifndef MIN
1177 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1178 #endif
1179 sa_len = fromsa->sa_len;
1180 len = MIN((unsigned int)len, sa_len);
1181 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1182 if (error)
1183 goto out;
1184 }
1185 mp->msg_namelen = sa_len;
1186 /* return the actual, untruncated address length */
1187 if (namelenp &&
1188 (error = copyout((caddr_t)&sa_len, namelenp,
1189 sizeof (int)))) {
1190 goto out;
1191 }
1192 }
1193 if (mp->msg_control) {
1194 len = mp->msg_controllen;
1195 m = control;
1196 mp->msg_controllen = 0;
1197 ctlbuf = mp->msg_control;
1198
1199 while (m && len > 0) {
1200 unsigned int tocopy;
1201
1202 if (len >= m->m_len) {
1203 tocopy = m->m_len;
1204 } else {
1205 mp->msg_flags |= MSG_CTRUNC;
1206 tocopy = len;
1207 }
1208
1209 error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf,
1210 tocopy);
1211 if (error)
1212 goto out;
1213
1214 ctlbuf += tocopy;
1215 len -= tocopy;
1216 m = m->m_next;
1217 }
1218 mp->msg_controllen = ctlbuf - mp->msg_control;
1219 }
1220 out:
1221 if (fromsa)
1222 FREE(fromsa, M_SONAME);
1223 if (control)
1224 m_freem(control);
1225 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1226 out1:
1227 fp_drop(p, s, fp, 0);
1228 return (error);
1229 }
1230
1231
1232 /*
1233 * Returns: 0 Success
1234 * ENOMEM
1235 * copyin:EFAULT
1236 * recvit:???
1237 * read:??? [4056224: applicable for pipes]
1238 *
1239 * Notes: The read entry point is only called as part of support for
1240 * binary backward compatability; new code should use read
1241 * instead of recv or recvfrom when attempting to read data
1242 * from pipes.
1243 *
1244 * For full documentation of the return codes from recvit, see
1245 * the block header for the recvit function.
1246 */
1247 int
1248 recvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval)
1249 {
1250 __pthread_testcancel(1);
1251 return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval));
1252 }
1253
1254 int
1255 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, register_t *retval)
1256 {
1257 struct user_msghdr msg;
1258 int error;
1259 uio_t auio = NULL;
1260
1261 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
1262 AUDIT_ARG(fd, uap->s);
1263
1264 if (uap->fromlenaddr) {
1265 error = copyin(uap->fromlenaddr,
1266 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1267 if (error)
1268 return (error);
1269 } else {
1270 msg.msg_namelen = 0;
1271 }
1272 msg.msg_name = uap->from;
1273 auio = uio_create(1, 0,
1274 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1275 UIO_READ);
1276 if (auio == NULL) {
1277 return (ENOMEM);
1278 }
1279
1280 uio_addiov(auio, uap->buf, uap->len);
1281 /* no need to set up msg_iov. recvit uses uio_t we send it */
1282 msg.msg_iov = 0;
1283 msg.msg_iovlen = 0;
1284 msg.msg_control = 0;
1285 msg.msg_controllen = 0;
1286 msg.msg_flags = uap->flags;
1287 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1288 if (auio != NULL) {
1289 uio_free(auio);
1290 }
1291
1292 #if HACK_FOR_4056224
1293 /*
1294 * Radar 4056224
1295 * Temporary workaround to let send() and recv() work over
1296 * a pipe for binary compatibility
1297 * This will be removed in the release following Tiger
1298 */
1299 if (error == ENOTSOCK && proc_is64bit(p) == 0) {
1300 struct fileproc *fp;
1301
1302 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
1303 (void) fp_drop(p, uap->s, fp, 0);
1304
1305 if (fp->f_type == DTYPE_PIPE) {
1306 struct read_args read_uap;
1307 user_ssize_t read_retval;
1308
1309 if (p->p_pid > last_pid_4056224) {
1310 last_pid_4056224 = p->p_pid;
1311
1312 printf("%s[%d] uses send/recv on "
1313 "a pipe\n", p->p_comm, p->p_pid);
1314 }
1315
1316 bzero(&read_uap, sizeof (struct read_args));
1317 read_uap.fd = uap->s;
1318 read_uap.cbuf = uap->buf;
1319 read_uap.nbyte = uap->len;
1320
1321 error = read(p, &read_uap, &read_retval);
1322 *retval = (int)read_retval;
1323 }
1324 }
1325 }
1326 #endif /* HACK_FOR_4056224 */
1327
1328 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1329
1330 return (error);
1331 }
1332
1333 /*
1334 * Returns: 0 Success
1335 * EMSGSIZE
1336 * ENOMEM
1337 * copyin:EFAULT
1338 * copyout:EFAULT
1339 * recvit:???
1340 *
1341 * Notes: For full documentation of the return codes from recvit, see
1342 * the block header for the recvit function.
1343 */
1344 int
1345 recvmsg(struct proc *p, struct recvmsg_args *uap, register_t *retval)
1346 {
1347 __pthread_testcancel(1);
1348 return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval));
1349 }
1350
1351 int
1352 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, register_t *retval)
1353 {
1354 struct msghdr msg;
1355 struct user_msghdr user_msg;
1356 caddr_t msghdrp;
1357 int size_of_msghdr;
1358 user_addr_t uiov;
1359 int error;
1360 int size_of_iovec;
1361 uio_t auio = NULL;
1362 struct user_iovec *iovp;
1363
1364 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1365 AUDIT_ARG(fd, uap->s);
1366 if (IS_64BIT_PROCESS(p)) {
1367 msghdrp = (caddr_t)&user_msg;
1368 size_of_msghdr = sizeof (user_msg);
1369 size_of_iovec = sizeof (struct user_iovec);
1370 } else {
1371 msghdrp = (caddr_t)&msg;
1372 size_of_msghdr = sizeof (msg);
1373 size_of_iovec = sizeof (struct iovec);
1374 }
1375 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1376 if (error) {
1377 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1378 return (error);
1379 }
1380
1381 /* only need to copy if user process is not 64-bit */
1382 if (!IS_64BIT_PROCESS(p)) {
1383 user_msg.msg_flags = msg.msg_flags;
1384 user_msg.msg_controllen = msg.msg_controllen;
1385 user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
1386 user_msg.msg_iovlen = msg.msg_iovlen;
1387 user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
1388 user_msg.msg_namelen = msg.msg_namelen;
1389 user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
1390 }
1391
1392 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1393 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
1394 0, 0, 0, 0);
1395 return (EMSGSIZE);
1396 }
1397
1398 user_msg.msg_flags = uap->flags;
1399
1400 /* allocate a uio large enough to hold the number of iovecs passed */
1401 auio = uio_create(user_msg.msg_iovlen, 0,
1402 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1403 UIO_READ);
1404 if (auio == NULL) {
1405 error = ENOMEM;
1406 goto done;
1407 }
1408
1409 /*
1410 * get location of iovecs within the uio. then copyin the iovecs from
1411 * user space.
1412 */
1413 iovp = uio_iovsaddr(auio);
1414 if (iovp == NULL) {
1415 error = ENOMEM;
1416 goto done;
1417 }
1418 uiov = user_msg.msg_iov;
1419 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1420 error = copyin(uiov, (caddr_t)iovp,
1421 (user_msg.msg_iovlen * size_of_iovec));
1422 if (error)
1423 goto done;
1424
1425 /* finish setup of uio_t */
1426 uio_calculateresid(auio);
1427
1428 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1429 if (!error) {
1430 user_msg.msg_iov = uiov;
1431 /* only need to copy if user process is not 64-bit */
1432 if (!IS_64BIT_PROCESS(p)) {
1433 // LP64todo - do all these change? if not, then no need to copy all of them!
1434 msg.msg_flags = user_msg.msg_flags;
1435 msg.msg_controllen = user_msg.msg_controllen;
1436 msg.msg_control =
1437 CAST_DOWN(caddr_t, user_msg.msg_control);
1438 msg.msg_iovlen = user_msg.msg_iovlen;
1439 msg.msg_iov = (struct iovec *)
1440 CAST_DOWN(caddr_t, user_msg.msg_iov);
1441 msg.msg_namelen = user_msg.msg_namelen;
1442 msg.msg_name = CAST_DOWN(caddr_t, user_msg.msg_name);
1443 }
1444 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1445 }
1446 done:
1447 if (auio != NULL) {
1448 uio_free(auio);
1449 }
1450 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1451 return (error);
1452 }
1453
1454 /*
1455 * Returns: 0 Success
1456 * EBADF
1457 * file_socket:ENOTSOCK
1458 * file_socket:EBADF
1459 * soshutdown:EINVAL
1460 * soshutdown:ENOTCONN
1461 * soshutdown:EADDRNOTAVAIL[TCP]
1462 * soshutdown:ENOBUFS[TCP]
1463 * soshutdown:EMSGSIZE[TCP]
1464 * soshutdown:EHOSTUNREACH[TCP]
1465 * soshutdown:ENETUNREACH[TCP]
1466 * soshutdown:ENETDOWN[TCP]
1467 * soshutdown:ENOMEM[TCP]
1468 * soshutdown:EACCES[TCP]
1469 * soshutdown:EMSGSIZE[TCP]
1470 * soshutdown:ENOBUFS[TCP]
1471 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1472 * soshutdown:??? [other protocol families]
1473 */
1474 /* ARGSUSED */
1475 int
1476 shutdown(__unused struct proc *p, struct shutdown_args *uap,
1477 __unused register_t *retval)
1478 {
1479 struct socket *so;
1480 int error;
1481
1482 AUDIT_ARG(fd, uap->s);
1483 error = file_socket(uap->s, &so);
1484 if (error)
1485 return (error);
1486 if (so == NULL) {
1487 error = EBADF;
1488 goto out;
1489 }
1490 error = soshutdown((struct socket *)so, uap->how);
1491 out:
1492 file_drop(uap->s);
1493 return (error);
1494 }
1495
1496 /*
1497 * Returns: 0 Success
1498 * EFAULT
1499 * EINVAL
1500 * EACCES Mandatory Access Control failure
1501 * file_socket:ENOTSOCK
1502 * file_socket:EBADF
1503 * sosetopt:EINVAL
1504 * sosetopt:ENOPROTOOPT
1505 * sosetopt:ENOBUFS
1506 * sosetopt:EDOM
1507 * sosetopt:EFAULT
1508 * sosetopt:EOPNOTSUPP[AF_UNIX]
1509 * sosetopt:???
1510 */
1511 /* ARGSUSED */
1512 int
1513 setsockopt(struct proc *p, struct setsockopt_args *uap,
1514 __unused register_t *retval)
1515 {
1516 struct socket *so;
1517 struct sockopt sopt;
1518 int error;
1519
1520 AUDIT_ARG(fd, uap->s);
1521 if (uap->val == 0 && uap->valsize != 0)
1522 return (EFAULT);
1523 /* No bounds checking on size (it's unsigned) */
1524
1525 error = file_socket(uap->s, &so);
1526 if (error)
1527 return (error);
1528
1529 sopt.sopt_dir = SOPT_SET;
1530 sopt.sopt_level = uap->level;
1531 sopt.sopt_name = uap->name;
1532 sopt.sopt_val = uap->val;
1533 sopt.sopt_valsize = uap->valsize;
1534 sopt.sopt_p = p;
1535
1536 if (so == NULL) {
1537 error = EINVAL;
1538 goto out;
1539 }
1540 #if CONFIG_MACF_SOCKET_SUBSET
1541 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
1542 &sopt)) != 0)
1543 goto out;
1544 #endif /* MAC_SOCKET_SUBSET */
1545 error = sosetopt(so, &sopt);
1546 out:
1547 file_drop(uap->s);
1548 return (error);
1549 }
1550
1551
1552
1553 /*
1554 * Returns: 0 Success
1555 * EINVAL
1556 * EBADF
1557 * EACCES Mandatory Access Control failure
1558 * copyin:EFAULT
1559 * copyout:EFAULT
1560 * file_socket:ENOTSOCK
1561 * file_socket:EBADF
1562 * sogetopt:???
1563 */
1564 int
1565 getsockopt(struct proc *p, struct getsockopt_args *uap,
1566 __unused register_t *retval)
1567 {
1568 int error;
1569 socklen_t valsize;
1570 struct sockopt sopt;
1571 struct socket *so;
1572
1573 error = file_socket(uap->s, &so);
1574 if (error)
1575 return (error);
1576 if (uap->val) {
1577 error = copyin(uap->avalsize, (caddr_t)&valsize,
1578 sizeof (valsize));
1579 if (error)
1580 goto out;
1581 /* No bounds checking on size (it's unsigned) */
1582 } else {
1583 valsize = 0;
1584 }
1585 sopt.sopt_dir = SOPT_GET;
1586 sopt.sopt_level = uap->level;
1587 sopt.sopt_name = uap->name;
1588 sopt.sopt_val = uap->val;
1589 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1590 sopt.sopt_p = p;
1591
1592 if (so == NULL) {
1593 error = EBADF;
1594 goto out;
1595 }
1596 #if CONFIG_MACF_SOCKET_SUBSET
1597 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
1598 &sopt)) != 0)
1599 goto out;
1600 #endif /* MAC_SOCKET_SUBSET */
1601 error = sogetopt((struct socket *)so, &sopt);
1602 if (error == 0) {
1603 valsize = sopt.sopt_valsize;
1604 error = copyout((caddr_t)&valsize, uap->avalsize,
1605 sizeof (valsize));
1606 }
1607 out:
1608 file_drop(uap->s);
1609 return (error);
1610 }
1611
1612
1613 /*
1614 * Get socket name.
1615 *
1616 * Returns: 0 Success
1617 * EBADF
1618 * file_socket:ENOTSOCK
1619 * file_socket:EBADF
1620 * copyin:EFAULT
1621 * copyout:EFAULT
1622 * <pru_sockaddr>:ENOBUFS[TCP]
1623 * <pru_sockaddr>:ECONNRESET[TCP]
1624 * <pru_sockaddr>:EINVAL[AF_UNIX]
1625 * <sf_getsockname>:???
1626 */
1627 /* ARGSUSED */
1628 int
1629 getsockname(__unused struct proc *p, struct getsockname_args *uap,
1630 __unused register_t *retval)
1631 {
1632 struct socket *so;
1633 struct sockaddr *sa;
1634 socklen_t len;
1635 socklen_t sa_len;
1636 int error;
1637
1638 error = file_socket(uap->fdes, &so);
1639 if (error)
1640 return (error);
1641 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1642 if (error)
1643 goto out;
1644 if (so == NULL) {
1645 error = EBADF;
1646 goto out;
1647 }
1648 sa = 0;
1649 socket_lock(so, 1);
1650 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1651 if (error == 0) {
1652 struct socket_filter_entry *filter;
1653 int filtered = 0;
1654 for (filter = so->so_filt; filter && error == 0;
1655 filter = filter->sfe_next_onsocket) {
1656 if (filter->sfe_filter->sf_filter.sf_getsockname) {
1657 if (!filtered) {
1658 filtered = 1;
1659 sflt_use(so);
1660 socket_unlock(so, 0);
1661 }
1662 error = filter->sfe_filter->sf_filter.
1663 sf_getsockname(filter->sfe_cookie, so, &sa);
1664 }
1665 }
1666
1667 if (error == EJUSTRETURN)
1668 error = 0;
1669
1670 if (filtered) {
1671 socket_lock(so, 0);
1672 sflt_unuse(so);
1673 }
1674 }
1675 socket_unlock(so, 1);
1676 if (error)
1677 goto bad;
1678 if (sa == 0) {
1679 len = 0;
1680 goto gotnothing;
1681 }
1682
1683 sa_len = sa->sa_len;
1684 len = MIN(len, sa_len);
1685 error = copyout((caddr_t)sa, uap->asa, len);
1686 if (error)
1687 goto bad;
1688 /* return the actual, untruncated address length */
1689 len = sa_len;
1690 gotnothing:
1691 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1692 bad:
1693 if (sa)
1694 FREE(sa, M_SONAME);
1695 out:
1696 file_drop(uap->fdes);
1697 return (error);
1698 }
1699
1700 /*
1701 * Get name of peer for connected socket.
1702 *
1703 * Returns: 0 Success
1704 * EBADF
1705 * EINVAL
1706 * ENOTCONN
1707 * file_socket:ENOTSOCK
1708 * file_socket:EBADF
1709 * copyin:EFAULT
1710 * copyout:EFAULT
1711 * <pru_peeraddr>:???
1712 * <sf_getpeername>:???
1713 */
1714 /* ARGSUSED */
1715 int
1716 getpeername(__unused struct proc *p, struct getpeername_args *uap,
1717 __unused register_t *retval)
1718 {
1719 struct socket *so;
1720 struct sockaddr *sa;
1721 socklen_t len;
1722 socklen_t sa_len;
1723 int error;
1724
1725 error = file_socket(uap->fdes, &so);
1726 if (error)
1727 return (error);
1728 if (so == NULL) {
1729 error = EBADF;
1730 goto out;
1731 }
1732
1733 socket_lock(so, 1);
1734
1735 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1736 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1737 /* the socket has been shutdown, no more getpeername's */
1738 socket_unlock(so, 1);
1739 error = EINVAL;
1740 goto out;
1741 }
1742
1743 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1744 socket_unlock(so, 1);
1745 error = ENOTCONN;
1746 goto out;
1747 }
1748 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1749 if (error) {
1750 socket_unlock(so, 1);
1751 goto out;
1752 }
1753 sa = 0;
1754 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1755 if (error == 0) {
1756 struct socket_filter_entry *filter;
1757 int filtered = 0;
1758 for (filter = so->so_filt; filter && error == 0;
1759 filter = filter->sfe_next_onsocket) {
1760 if (filter->sfe_filter->sf_filter.sf_getpeername) {
1761 if (!filtered) {
1762 filtered = 1;
1763 sflt_use(so);
1764 socket_unlock(so, 0);
1765 }
1766 error = filter->sfe_filter->sf_filter.
1767 sf_getpeername(filter->sfe_cookie, so, &sa);
1768 }
1769 }
1770
1771 if (error == EJUSTRETURN)
1772 error = 0;
1773
1774 if (filtered) {
1775 socket_lock(so, 0);
1776 sflt_unuse(so);
1777 }
1778 }
1779 socket_unlock(so, 1);
1780 if (error)
1781 goto bad;
1782 if (sa == 0) {
1783 len = 0;
1784 goto gotnothing;
1785 }
1786 sa_len = sa->sa_len;
1787 len = MIN(len, sa_len);
1788 error = copyout(sa, uap->asa, len);
1789 if (error)
1790 goto bad;
1791 /* return the actual, untruncated address length */
1792 len = sa_len;
1793 gotnothing:
1794 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1795 bad:
1796 if (sa) FREE(sa, M_SONAME);
1797 out:
1798 file_drop(uap->fdes);
1799 return (error);
1800 }
1801
1802 int
1803 sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1804 {
1805 struct sockaddr *sa;
1806 struct mbuf *m;
1807 int error;
1808
1809 if ((u_int)buflen > MLEN) {
1810 if (type == MT_SONAME && (u_int)buflen <= 112)
1811 buflen = MLEN; /* unix domain compat. hack */
1812 else if ((u_int)buflen > MCLBYTES)
1813 return (EINVAL);
1814 }
1815 m = m_get(M_WAIT, type);
1816 if (m == NULL)
1817 return (ENOBUFS);
1818 if ((u_int)buflen > MLEN) {
1819 MCLGET(m, M_WAIT);
1820 if ((m->m_flags & M_EXT) == 0) {
1821 m_free(m);
1822 return (ENOBUFS);
1823 }
1824 }
1825 m->m_len = buflen;
1826 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
1827 if (error) {
1828 (void) m_free(m);
1829 } else {
1830 *mp = m;
1831 if (type == MT_SONAME) {
1832 sa = mtod(m, struct sockaddr *);
1833 sa->sa_len = buflen;
1834 }
1835 }
1836 return (error);
1837 }
1838
1839 /*
1840 * Given a user_addr_t of length len, allocate and fill out a *sa.
1841 *
1842 * Returns: 0 Success
1843 * ENAMETOOLONG Filename too long
1844 * EINVAL Invalid argument
1845 * ENOMEM Not enough space
1846 * copyin:EFAULT Bad address
1847 */
1848 static int
1849 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
1850 size_t len, boolean_t translate_unspec)
1851 {
1852 struct sockaddr *sa;
1853 int error;
1854
1855 if (len > SOCK_MAXADDRLEN)
1856 return (ENAMETOOLONG);
1857
1858 if (len < offsetof(struct sockaddr, sa_data[0]))
1859 return (EINVAL);
1860
1861 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
1862 if (sa == NULL) {
1863 return (ENOMEM);
1864 }
1865 error = copyin(uaddr, (caddr_t)sa, len);
1866 if (error) {
1867 FREE(sa, M_SONAME);
1868 } else {
1869 /*
1870 * Force sa_family to AF_INET on AF_INET sockets to handle
1871 * legacy applications that use AF_UNSPEC (0). On all other
1872 * sockets we leave it unchanged and let the lower layer
1873 * handle it.
1874 */
1875 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
1876 INP_CHECK_SOCKAF(so, AF_INET) &&
1877 len == sizeof (struct sockaddr_in))
1878 sa->sa_family = AF_INET;
1879
1880 sa->sa_len = len;
1881 *namp = sa;
1882 }
1883 return (error);
1884 }
1885
1886 static int
1887 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
1888 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1889 {
1890 int error;
1891
1892 if (ss == NULL || uaddr == USER_ADDR_NULL ||
1893 len < offsetof(struct sockaddr, sa_data[0]))
1894 return (EINVAL);
1895
1896 /*
1897 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
1898 * so the check here is inclusive.
1899 */
1900 if (len > sizeof (*ss))
1901 return (ENAMETOOLONG);
1902
1903 bzero(ss, sizeof (*ss));
1904 error = copyin(uaddr, (caddr_t)ss, len);
1905 if (error == 0) {
1906 /*
1907 * Force sa_family to AF_INET on AF_INET sockets to handle
1908 * legacy applications that use AF_UNSPEC (0). On all other
1909 * sockets we leave it unchanged and let the lower layer
1910 * handle it.
1911 */
1912 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
1913 INP_CHECK_SOCKAF(so, AF_INET) &&
1914 len == sizeof (struct sockaddr_in))
1915 ss->ss_family = AF_INET;
1916
1917 ss->ss_len = len;
1918 }
1919 return (error);
1920 }
1921
1922 #if SENDFILE
1923
1924 SYSCTL_DECL(_kern_ipc);
1925
1926 #define SFUIOBUFS 64
1927 static int sendfileuiobufs = SFUIOBUFS;
1928 SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW, &sendfileuiobufs,
1929 0, "");
1930
1931 /* Macros to compute the number of mbufs needed depending on cluster size */
1932 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
1933 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
1934
1935 /* Upper send limit in bytes (sendfileuiobufs * PAGESIZE) */
1936 #define SENDFILE_MAX_BYTES (sendfileuiobufs << PGSHIFT)
1937
1938 /* Upper send limit in the number of mbuf clusters */
1939 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
1940 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
1941
1942 size_t mbuf_pkt_maxlen(mbuf_t m);
1943
1944 __private_extern__ size_t
1945 mbuf_pkt_maxlen(mbuf_t m)
1946 {
1947 size_t maxlen = 0;
1948
1949 while (m) {
1950 maxlen += mbuf_maxlen(m);
1951 m = mbuf_next(m);
1952 }
1953 return (maxlen);
1954 }
1955
1956 static void
1957 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
1958 struct mbuf **m, boolean_t jumbocl)
1959 {
1960 unsigned int needed;
1961
1962 if (pktlen == 0)
1963 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
1964
1965 /*
1966 * Try to allocate for the whole thing. Since we want full control
1967 * over the buffer size and be able to accept partial result, we can't
1968 * use mbuf_allocpacket(). The logic below is similar to sosend().
1969 */
1970 *m = NULL;
1971 if (pktlen > NBPG && jumbocl) {
1972 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
1973 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
1974 }
1975 if (*m == NULL) {
1976 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
1977 *m = m_getpackets_internal(&needed, 1, how, 0, NBPG);
1978 }
1979
1980 /*
1981 * Our previous attempt(s) at allocation had failed; the system
1982 * may be short on mbufs, and we want to block until they are
1983 * available. This time, ask just for 1 mbuf and don't return
1984 * until we get it.
1985 */
1986 if (*m == NULL) {
1987 needed = 1;
1988 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, NBPG);
1989 }
1990 if (*m == NULL)
1991 panic("%s: blocking allocation returned NULL\n", __func__);
1992
1993 *maxchunks = needed;
1994 }
1995
1996 /*
1997 * sendfile(2).
1998 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
1999 * struct sf_hdtr *hdtr, int flags)
2000 *
2001 * Send a file specified by 'fd' and starting at 'offset' to a socket
2002 * specified by 's'. Send only '*nbytes' of the file or until EOF if
2003 * *nbytes == 0. Optionally add a header and/or trailer to the socket
2004 * output. If specified, write the total number of bytes sent into *nbytes.
2005 */
2006 int
2007 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
2008 {
2009 struct fileproc *fp;
2010 struct vnode *vp;
2011 struct socket *so;
2012 struct writev_nocancel_args nuap;
2013 user_ssize_t writev_retval;
2014 struct sf_hdtr hdtr;
2015 struct user_sf_hdtr user_hdtr;
2016 off_t off, xfsize;
2017 off_t nbytes = 0, sbytes = 0;
2018 int error = 0;
2019 size_t sizeof_hdtr;
2020 size_t size_of_iovec;
2021 off_t file_size;
2022 struct vfs_context context = *vfs_context_current();
2023
2024 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
2025 0, 0, 0, 0);
2026 /*
2027 * Do argument checking. Must be a regular file in, stream
2028 * type and connected socket out, positive offset.
2029 */
2030 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
2031 goto done;
2032 }
2033 if ((fp->f_flag & FREAD) == 0) {
2034 error = EBADF;
2035 goto done1;
2036 }
2037 if (vnode_isreg(vp) == 0) {
2038 error = ENOTSUP;
2039 goto done1;
2040 }
2041 error = file_socket(uap->s, &so);
2042 if (error) {
2043 goto done1;
2044 }
2045 if (so == NULL) {
2046 error = EBADF;
2047 goto done2;
2048 }
2049 if (so->so_type != SOCK_STREAM) {
2050 error = EINVAL;
2051 goto done2;
2052 }
2053 if ((so->so_state & SS_ISCONNECTED) == 0) {
2054 error = ENOTCONN;
2055 goto done2;
2056 }
2057 if (uap->offset < 0) {
2058 error = EINVAL;
2059 goto done2;
2060 }
2061 if (uap->nbytes == USER_ADDR_NULL) {
2062 error = EINVAL;
2063 goto done2;
2064 }
2065 if (uap->flags != 0) {
2066 error = EINVAL;
2067 goto done2;
2068 }
2069
2070 context.vc_ucred = fp->f_fglob->fg_cred;
2071
2072 #if CONFIG_MACF_SOCKET_SUBSET
2073 /* JMM - fetch connected sockaddr? */
2074 error = mac_socket_check_send(context.vc_ucred, so, NULL);
2075 if (error)
2076 goto done2;
2077 #endif
2078
2079 /*
2080 * Get number of bytes to send
2081 * Should it applies to size of header and trailer?
2082 * JMM - error handling?
2083 */
2084 copyin(uap->nbytes, &nbytes, sizeof (off_t));
2085
2086 /*
2087 * If specified, get the pointer to the sf_hdtr struct for
2088 * any headers/trailers.
2089 */
2090 if (uap->hdtr != USER_ADDR_NULL) {
2091 caddr_t hdtrp;
2092
2093 bzero(&user_hdtr, sizeof (user_hdtr));
2094 if (IS_64BIT_PROCESS(p)) {
2095 hdtrp = (caddr_t)&user_hdtr;
2096 sizeof_hdtr = sizeof (user_hdtr);
2097 size_of_iovec = sizeof (struct user_iovec);
2098 } else {
2099 hdtrp = (caddr_t)&hdtr;
2100 sizeof_hdtr = sizeof (hdtr);
2101 size_of_iovec = sizeof (struct iovec);
2102 }
2103 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
2104 if (error)
2105 goto done2;
2106 /* need to copy if user process is not 64-bit */
2107 if (!IS_64BIT_PROCESS(p)) {
2108 user_hdtr.headers = CAST_USER_ADDR_T(hdtr.headers);
2109 user_hdtr.hdr_cnt = hdtr.hdr_cnt;
2110 user_hdtr.trailers = CAST_USER_ADDR_T(hdtr.trailers);
2111 user_hdtr.trl_cnt = hdtr.trl_cnt;
2112 }
2113
2114 /*
2115 * Send any headers. Wimp out and use writev(2).
2116 */
2117 if (user_hdtr.headers != USER_ADDR_NULL) {
2118 bzero(&nuap, sizeof (struct writev_args));
2119 nuap.fd = uap->s;
2120 nuap.iovp = user_hdtr.headers;
2121 nuap.iovcnt = user_hdtr.hdr_cnt;
2122 error = writev_nocancel(p, &nuap, &writev_retval);
2123 if (error)
2124 goto done2;
2125 sbytes += writev_retval;
2126 }
2127 }
2128
2129 /*
2130 * Get the file size for 2 reasons:
2131 * 1. We don't want to allocate more mbufs than necessary
2132 * 2. We don't want to read past the end of file
2133 */
2134 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0)
2135 goto done2;
2136
2137 /*
2138 * Simply read file data into a chain of mbufs that used with scatter
2139 * gather reads. We're not (yet?) setup to use zero copy external
2140 * mbufs that point to the file pages.
2141 */
2142 socket_lock(so, 1);
2143 error = sblock(&so->so_snd, M_WAIT);
2144 if (error) {
2145 socket_unlock(so, 1);
2146 goto done2;
2147 }
2148 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2149 mbuf_t m0 = NULL, m;
2150 unsigned int nbufs = sendfileuiobufs, i;
2151 uio_t auio;
2152 char uio_buf[UIO_SIZEOF(sendfileuiobufs)]; /* 1 KB !!! */
2153 size_t uiolen;
2154 user_ssize_t rlen;
2155 off_t pgoff;
2156 size_t pktlen;
2157 boolean_t jumbocl;
2158
2159 /*
2160 * Calculate the amount to transfer.
2161 * Align to round number of pages.
2162 * Not to exceed send socket buffer,
2163 * the EOF, or the passed in nbytes.
2164 */
2165 xfsize = sbspace(&so->so_snd);
2166
2167 if (xfsize <= 0) {
2168 if (so->so_state & SS_CANTSENDMORE) {
2169 error = EPIPE;
2170 goto done3;
2171 } else if ((so->so_state & SS_NBIO)) {
2172 error = EAGAIN;
2173 goto done3;
2174 } else {
2175 xfsize = PAGE_SIZE;
2176 }
2177 }
2178
2179 if (xfsize > SENDFILE_MAX_BYTES)
2180 xfsize = SENDFILE_MAX_BYTES;
2181 else if (xfsize > PAGE_SIZE)
2182 xfsize = trunc_page(xfsize);
2183 pgoff = off & PAGE_MASK_64;
2184 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
2185 xfsize = PAGE_SIZE_64 - pgoff;
2186 if (nbytes && xfsize > (nbytes - sbytes))
2187 xfsize = nbytes - sbytes;
2188 if (xfsize <= 0)
2189 break;
2190 if (off + xfsize > file_size)
2191 xfsize = file_size - off;
2192 if (xfsize <= 0)
2193 break;
2194
2195 /*
2196 * Attempt to use larger than system page-size clusters for
2197 * large writes only if there is a jumbo cluster pool and
2198 * if the socket is marked accordingly.
2199 */
2200 jumbocl = sosendjcl && njcl > 0 &&
2201 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
2202
2203 socket_unlock(so, 0);
2204 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
2205 pktlen = mbuf_pkt_maxlen(m0);
2206 if (pktlen < xfsize)
2207 xfsize = pktlen;
2208
2209 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
2210 UIO_READ, &uio_buf[0], sizeof (uio_buf));
2211 if (auio == NULL) {
2212 //printf("sendfile: uio_createwithbuffer failed\n");
2213 mbuf_freem(m0);
2214 error = ENXIO;
2215 socket_lock(so, 0);
2216 goto done3;
2217 }
2218
2219 for (i = 0, m = m0, uiolen = 0;
2220 i < nbufs && m != NULL && uiolen < xfsize;
2221 i++, m = mbuf_next(m)) {
2222 size_t mlen = mbuf_maxlen(m);
2223
2224 if (mlen + uiolen > xfsize)
2225 mlen = xfsize - uiolen;
2226 mbuf_setlen(m, mlen);
2227 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
2228 mlen);
2229 uiolen += mlen;
2230 }
2231
2232 if (xfsize != uio_resid(auio))
2233 printf("sendfile: xfsize: %lld != uio_resid(auio): "
2234 "%lld\n", xfsize, uio_resid(auio));
2235
2236 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
2237 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2238 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2239 error = fo_read(fp, auio, FOF_OFFSET, &context);
2240 socket_lock(so, 0);
2241 if (error != 0) {
2242 if (uio_resid(auio) != xfsize && (error == ERESTART ||
2243 error == EINTR || error == EWOULDBLOCK)) {
2244 error = 0;
2245 } else {
2246 mbuf_freem(m0);
2247 goto done3;
2248 }
2249 }
2250 xfsize -= uio_resid(auio);
2251 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
2252 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2253 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2254
2255 if (xfsize == 0) {
2256 //printf("sendfile: fo_read 0 bytes, EOF\n");
2257 break;
2258 }
2259 if (xfsize + off > file_size)
2260 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
2261 "%lld\n", xfsize, off, file_size);
2262 for (i = 0, m = m0, rlen = 0;
2263 i < nbufs && m != NULL && rlen < xfsize;
2264 i++, m = mbuf_next(m)) {
2265 size_t mlen = mbuf_maxlen(m);
2266
2267 if (rlen + mlen > xfsize)
2268 mlen = xfsize - rlen;
2269 mbuf_setlen(m, mlen);
2270
2271 rlen += mlen;
2272 }
2273 mbuf_pkthdr_setlen(m0, xfsize);
2274
2275 retry_space:
2276 /*
2277 * Make sure that the socket is still able to take more data.
2278 * CANTSENDMORE being true usually means that the connection
2279 * was closed. so_error is true when an error was sensed after
2280 * a previous send.
2281 * The state is checked after the page mapping and buffer
2282 * allocation above since those operations may block and make
2283 * any socket checks stale. From this point forward, nothing
2284 * blocks before the pru_send (or more accurately, any blocking
2285 * results in a loop back to here to re-check).
2286 */
2287 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2288 if (so->so_state & SS_CANTSENDMORE) {
2289 error = EPIPE;
2290 } else {
2291 error = so->so_error;
2292 so->so_error = 0;
2293 }
2294 m_freem(m0);
2295 goto done3;
2296 }
2297 /*
2298 * Wait for socket space to become available. We do this just
2299 * after checking the connection state above in order to avoid
2300 * a race condition with sbwait().
2301 */
2302 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
2303 if (so->so_state & SS_NBIO) {
2304 m_freem(m0);
2305 error = EAGAIN;
2306 goto done3;
2307 }
2308 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
2309 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
2310 error = sbwait(&so->so_snd);
2311 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
2312 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
2313 /*
2314 * An error from sbwait usually indicates that we've
2315 * been interrupted by a signal. If we've sent anything
2316 * then return bytes sent, otherwise return the error.
2317 */
2318 if (error) {
2319 m_freem(m0);
2320 goto done3;
2321 }
2322 goto retry_space;
2323 }
2324 {
2325 /*
2326 * Socket filter processing
2327 */
2328 struct socket_filter_entry *filter;
2329 int filtered = 0;
2330 struct mbuf *control = NULL;
2331 boolean_t recursive = (so->so_send_filt_thread != NULL);
2332
2333 error = 0;
2334 for (filter = so->so_filt; filter && (error == 0);
2335 filter = filter->sfe_next_onsocket) {
2336 if (filter->sfe_filter->sf_filter.sf_data_out) {
2337 if (filtered == 0) {
2338 filtered = 1;
2339 so->so_send_filt_thread =
2340 current_thread();
2341 sflt_use(so);
2342 socket_unlock(so, 0);
2343 }
2344 error = filter->sfe_filter->sf_filter.
2345 sf_data_out(filter->sfe_cookie, so,
2346 NULL, &m0, &control, 0);
2347 }
2348 }
2349
2350 if (filtered) {
2351 /*
2352 * At this point, we've run at least one filter.
2353 * The socket is unlocked as is the socket
2354 * buffer. Clear the recorded filter thread
2355 * only when we are outside of a filter's
2356 * context. This allows for a filter to issue
2357 * multiple inject calls from its sf_data_out
2358 * callback routine.
2359 */
2360 socket_lock(so, 0);
2361 sflt_unuse(so);
2362 if (!recursive)
2363 so->so_send_filt_thread = 0;
2364 if (error) {
2365 if (error == EJUSTRETURN) {
2366 error = 0;
2367 continue;
2368 }
2369 goto done3;
2370 }
2371 }
2372 /*
2373 * End Socket filter processing
2374 */
2375 }
2376 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2377 uap->s, 0, 0, 0, 0);
2378 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
2379 0, 0, p);
2380 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2381 uap->s, 0, 0, 0, 0);
2382 if (error) {
2383 goto done3;
2384 }
2385 }
2386 sbunlock(&so->so_snd, 0); /* will unlock socket */
2387 /*
2388 * Send trailers. Wimp out and use writev(2).
2389 */
2390 if (uap->hdtr != USER_ADDR_NULL &&
2391 user_hdtr.trailers != USER_ADDR_NULL) {
2392 bzero(&nuap, sizeof (struct writev_args));
2393 nuap.fd = uap->s;
2394 nuap.iovp = user_hdtr.trailers;
2395 nuap.iovcnt = user_hdtr.trl_cnt;
2396 error = writev_nocancel(p, &nuap, &writev_retval);
2397 if (error)
2398 goto done2;
2399 sbytes += writev_retval;
2400 }
2401 done2:
2402 file_drop(uap->s);
2403 done1:
2404 file_drop(uap->fd);
2405 done:
2406 if (uap->nbytes != USER_ADDR_NULL) {
2407 /* XXX this appears bogus for some early failure conditions */
2408 copyout(&sbytes, uap->nbytes, sizeof (off_t));
2409 }
2410 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
2411 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
2412 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
2413 return (error);
2414 done3:
2415 sbunlock(&so->so_snd, 0); /* will unlock socket */
2416 goto done2;
2417 }
2418
2419
2420 #endif /* SENDFILE */