]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
xnu-1504.9.17.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <kern/lock.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/signalvar.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/kernel.h>
87 #include <sys/uio_internal.h>
88 #include <sys/kauth.h>
89
90 #include <security/audit/audit.h>
91
92 #include <sys/kdebug.h>
93 #include <sys/sysproto.h>
94 #include <netinet/in.h>
95 #include <net/route.h>
96 #include <netinet/in_pcb.h>
97
98 #if CONFIG_MACF_SOCKET_SUBSET
99 #include <security/mac_framework.h>
100 #endif /* MAC_SOCKET_SUBSET */
101
102 #define f_flag f_fglob->fg_flag
103 #define f_type f_fglob->fg_type
104 #define f_msgcount f_fglob->fg_msgcount
105 #define f_cred f_fglob->fg_cred
106 #define f_ops f_fglob->fg_ops
107 #define f_offset f_fglob->fg_offset
108 #define f_data f_fglob->fg_data
109
110
111 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
112 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
113 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
114 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
115 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
116 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
117 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
118 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
119 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
120 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
121 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
122 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
123 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
124 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
125
126
127 #define HACK_FOR_4056224 1
128 #if HACK_FOR_4056224
129 static pid_t last_pid_4056224 = 0;
130 #endif /* HACK_FOR_4056224 */
131
132 /* TODO: should be in header file */
133 int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
134
135 static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int,
136 int32_t *);
137 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
138 int32_t *);
139 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
140 size_t, boolean_t);
141 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
142 user_addr_t, size_t, boolean_t);
143 #if SENDFILE
144 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
145 boolean_t);
146 #endif /* SENDFILE */
147
148 /*
149 * System call interface to the socket abstraction.
150 */
151
152 extern struct fileops socketops;
153
154 /*
155 * Returns: 0 Success
156 * EACCES Mandatory Access Control failure
157 * falloc:ENFILE
158 * falloc:EMFILE
159 * falloc:ENOMEM
160 * socreate:EAFNOSUPPORT
161 * socreate:EPROTOTYPE
162 * socreate:EPROTONOSUPPORT
163 * socreate:ENOBUFS
164 * socreate:ENOMEM
165 * socreate:EISCONN
166 * socreate:??? [other protocol families, IPSEC]
167 */
168 int
169 socket(struct proc *p, struct socket_args *uap, int32_t *retval)
170 {
171 struct socket *so;
172 struct fileproc *fp;
173 int fd, error;
174
175 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
176 #if CONFIG_MACF_SOCKET_SUBSET
177 if ((error = mac_socket_check_create(kauth_cred_get(), uap->domain,
178 uap->type, uap->protocol)) != 0)
179 return (error);
180 #endif /* MAC_SOCKET_SUBSET */
181
182 error = falloc(p, &fp, &fd, vfs_context_current());
183 if (error) {
184 return (error);
185 }
186 fp->f_flag = FREAD|FWRITE;
187 fp->f_type = DTYPE_SOCKET;
188 fp->f_ops = &socketops;
189
190 error = socreate(uap->domain, &so, uap->type, uap->protocol);
191 if (error) {
192 fp_free(p, fd, fp);
193 } else {
194 fp->f_data = (caddr_t)so;
195
196 proc_fdlock(p);
197 procfdtbl_releasefd(p, fd, NULL);
198
199 fp_drop(p, fd, fp, 1);
200 proc_fdunlock(p);
201
202 *retval = fd;
203 }
204 return (error);
205 }
206
207 /*
208 * Returns: 0 Success
209 * EDESTADDRREQ Destination address required
210 * EBADF Bad file descriptor
211 * EACCES Mandatory Access Control failure
212 * file_socket:ENOTSOCK
213 * file_socket:EBADF
214 * getsockaddr:ENAMETOOLONG Filename too long
215 * getsockaddr:EINVAL Invalid argument
216 * getsockaddr:ENOMEM Not enough space
217 * getsockaddr:EFAULT Bad address
218 * sobind:???
219 */
220 /* ARGSUSED */
221 int
222 bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
223 {
224 struct sockaddr_storage ss;
225 struct sockaddr *sa = NULL;
226 struct socket *so;
227 boolean_t want_free = TRUE;
228 int error;
229
230 AUDIT_ARG(fd, uap->s);
231 error = file_socket(uap->s, &so);
232 if (error != 0)
233 return (error);
234 if (so == NULL) {
235 error = EBADF;
236 goto out;
237 }
238 if (uap->name == USER_ADDR_NULL) {
239 error = EDESTADDRREQ;
240 goto out;
241 }
242 if (uap->namelen > sizeof (ss)) {
243 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
244 } else {
245 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
246 if (error == 0) {
247 sa = (struct sockaddr *)&ss;
248 want_free = FALSE;
249 }
250 }
251 if (error != 0)
252 goto out;
253 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
254 #if CONFIG_MACF_SOCKET_SUBSET
255 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
256 error = sobind(so, sa);
257 #else
258 error = sobind(so, sa);
259 #endif /* MAC_SOCKET_SUBSET */
260 if (want_free)
261 FREE(sa, M_SONAME);
262 out:
263 file_drop(uap->s);
264 return (error);
265 }
266
267 /*
268 * Returns: 0 Success
269 * EBADF
270 * EACCES Mandatory Access Control failure
271 * file_socket:ENOTSOCK
272 * file_socket:EBADF
273 * solisten:EINVAL
274 * solisten:EOPNOTSUPP
275 * solisten:???
276 */
277 int
278 listen(__unused struct proc *p, struct listen_args *uap,
279 __unused int32_t *retval)
280 {
281 int error;
282 struct socket *so;
283
284 AUDIT_ARG(fd, uap->s);
285 error = file_socket(uap->s, &so);
286 if (error)
287 return (error);
288 if (so != NULL)
289 #if CONFIG_MACF_SOCKET_SUBSET
290 {
291 error = mac_socket_check_listen(kauth_cred_get(), so);
292 if (error == 0)
293 error = solisten(so, uap->backlog);
294 }
295 #else
296 error = solisten(so, uap->backlog);
297 #endif /* MAC_SOCKET_SUBSET */
298 else
299 error = EBADF;
300
301 file_drop(uap->s);
302 return (error);
303 }
304
305 /*
306 * Returns: fp_getfsock:EBADF Bad file descriptor
307 * fp_getfsock:EOPNOTSUPP ...
308 * xlate => :ENOTSOCK Socket operation on non-socket
309 * :EFAULT Bad address on copyin/copyout
310 * :EBADF Bad file descriptor
311 * :EOPNOTSUPP Operation not supported on socket
312 * :EINVAL Invalid argument
313 * :EWOULDBLOCK Operation would block
314 * :ECONNABORTED Connection aborted
315 * :EINTR Interrupted function
316 * :EACCES Mandatory Access Control failure
317 * falloc_locked:ENFILE Too many files open in system
318 * falloc_locked::EMFILE Too many open files
319 * falloc_locked::ENOMEM Not enough space
320 * 0 Success
321 */
322 int
323 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
324 int32_t *retval)
325 {
326 struct fileproc *fp;
327 struct sockaddr *sa = NULL;
328 socklen_t namelen;
329 int error;
330 struct socket *head, *so = NULL;
331 lck_mtx_t *mutex_held;
332 int fd = uap->s;
333 int newfd;
334 short fflag; /* type must match fp->f_flag */
335 int dosocklock = 0;
336
337 *retval = -1;
338
339 AUDIT_ARG(fd, uap->s);
340
341 if (uap->name) {
342 error = copyin(uap->anamelen, (caddr_t)&namelen,
343 sizeof (socklen_t));
344 if (error)
345 return (error);
346 }
347 error = fp_getfsock(p, fd, &fp, &head);
348 if (error) {
349 if (error == EOPNOTSUPP)
350 error = ENOTSOCK;
351 return (error);
352 }
353 if (head == NULL) {
354 error = EBADF;
355 goto out;
356 }
357 #if CONFIG_MACF_SOCKET_SUBSET
358 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
359 goto out;
360 #endif /* MAC_SOCKET_SUBSET */
361
362 socket_lock(head, 1);
363
364 if (head->so_proto->pr_getlock != NULL) {
365 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
366 dosocklock = 1;
367 } else {
368 mutex_held = head->so_proto->pr_domain->dom_mtx;
369 dosocklock = 0;
370 }
371
372 if ((head->so_options & SO_ACCEPTCONN) == 0) {
373 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
374 error = EOPNOTSUPP;
375 } else {
376 /* POSIX: The socket is not accepting connections */
377 error = EINVAL;
378 }
379 socket_unlock(head, 1);
380 goto out;
381 }
382 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
383 socket_unlock(head, 1);
384 error = EWOULDBLOCK;
385 goto out;
386 }
387 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
388 if (head->so_state & SS_CANTRCVMORE) {
389 head->so_error = ECONNABORTED;
390 break;
391 }
392 if (head->so_usecount < 1)
393 panic("accept: head=%p refcount=%d\n", head,
394 head->so_usecount);
395 error = msleep((caddr_t)&head->so_timeo, mutex_held,
396 PSOCK | PCATCH, "accept", 0);
397 if (head->so_usecount < 1)
398 panic("accept: 2 head=%p refcount=%d\n", head,
399 head->so_usecount);
400 if ((head->so_state & SS_DRAINING)) {
401 error = ECONNABORTED;
402 }
403 if (error) {
404 socket_unlock(head, 1);
405 goto out;
406 }
407 }
408 if (head->so_error) {
409 error = head->so_error;
410 head->so_error = 0;
411 socket_unlock(head, 1);
412 goto out;
413 }
414
415
416 /*
417 * At this point we know that there is at least one connection
418 * ready to be accepted. Remove it from the queue prior to
419 * allocating the file descriptor for it since falloc() may
420 * block allowing another process to accept the connection
421 * instead.
422 */
423 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
424 so = TAILQ_FIRST(&head->so_comp);
425 TAILQ_REMOVE(&head->so_comp, so, so_list);
426 head->so_qlen--;
427 /* unlock head to avoid deadlock with select, keep a ref on head */
428 socket_unlock(head, 0);
429
430 #if CONFIG_MACF_SOCKET_SUBSET
431 /*
432 * Pass the pre-accepted socket to the MAC framework. This is
433 * cheaper than allocating a file descriptor for the socket,
434 * calling the protocol accept callback, and possibly freeing
435 * the file descriptor should the MAC check fails.
436 */
437 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
438 so->so_state &= ~(SS_NOFDREF | SS_COMP);
439 so->so_head = NULL;
440 soclose(so);
441 /* Drop reference on listening socket */
442 sodereference(head);
443 goto out;
444 }
445 #endif /* MAC_SOCKET_SUBSET */
446
447 /*
448 * Pass the pre-accepted socket to any interested socket filter(s).
449 * Upon failure, the socket would have been closed by the callee.
450 */
451 if (so->so_filt != NULL && (error = soacceptfilter(so)) != 0) {
452 /* Drop reference on listening socket */
453 sodereference(head);
454 /* Propagate socket filter's error code to the caller */
455 goto out;
456 }
457
458 fflag = fp->f_flag;
459 error = falloc(p, &fp, &newfd, vfs_context_current());
460 if (error) {
461 /*
462 * Probably ran out of file descriptors. Put the
463 * unaccepted connection back onto the queue and
464 * do another wakeup so some other process might
465 * have a chance at it.
466 */
467 socket_lock(head, 0);
468 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
469 head->so_qlen++;
470 wakeup_one((caddr_t)&head->so_timeo);
471 socket_unlock(head, 1);
472 goto out;
473 }
474 *retval = newfd;
475 fp->f_type = DTYPE_SOCKET;
476 fp->f_flag = fflag;
477 fp->f_ops = &socketops;
478 fp->f_data = (caddr_t)so;
479 socket_lock(head, 0);
480 if (dosocklock)
481 socket_lock(so, 1);
482 so->so_state &= ~SS_COMP;
483 so->so_head = NULL;
484 (void) soacceptlock(so, &sa, 0);
485 socket_unlock(head, 1);
486 if (sa == NULL) {
487 namelen = 0;
488 if (uap->name)
489 goto gotnoname;
490 error = 0;
491 goto releasefd;
492 }
493 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
494
495 if (uap->name) {
496 socklen_t sa_len;
497
498 /* save sa_len before it is destroyed */
499 sa_len = sa->sa_len;
500 namelen = MIN(namelen, sa_len);
501 error = copyout(sa, uap->name, namelen);
502 if (!error)
503 /* return the actual, untruncated address length */
504 namelen = sa_len;
505 gotnoname:
506 error = copyout((caddr_t)&namelen, uap->anamelen,
507 sizeof (socklen_t));
508 }
509 FREE(sa, M_SONAME);
510
511 releasefd:
512 /*
513 * If the socket has been marked as inactive by soacceptfilter(),
514 * disallow further operations on it. We explicitly call shutdown
515 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
516 * states are set for the socket. This would also flush out data
517 * hanging off the receive list of this socket.
518 */
519 if (so->so_flags & SOF_DEFUNCT) {
520 (void) soshutdownlock(so, SHUT_RD);
521 (void) soshutdownlock(so, SHUT_WR);
522 (void) sodisconnectlocked(so);
523 }
524
525 if (dosocklock)
526 socket_unlock(so, 1);
527
528 proc_fdlock(p);
529 procfdtbl_releasefd(p, newfd, NULL);
530 fp_drop(p, newfd, fp, 1);
531 proc_fdunlock(p);
532
533 out:
534 file_drop(fd);
535 return (error);
536 }
537
538 int
539 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
540 {
541 __pthread_testcancel(1);
542 return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval));
543 }
544
545 /*
546 * Returns: 0 Success
547 * EBADF Bad file descriptor
548 * EALREADY Connection already in progress
549 * EINPROGRESS Operation in progress
550 * ECONNABORTED Connection aborted
551 * EINTR Interrupted function
552 * EACCES Mandatory Access Control failure
553 * file_socket:ENOTSOCK
554 * file_socket:EBADF
555 * getsockaddr:ENAMETOOLONG Filename too long
556 * getsockaddr:EINVAL Invalid argument
557 * getsockaddr:ENOMEM Not enough space
558 * getsockaddr:EFAULT Bad address
559 * soconnectlock:EOPNOTSUPP
560 * soconnectlock:EISCONN
561 * soconnectlock:??? [depends on protocol, filters]
562 * msleep:EINTR
563 *
564 * Imputed: so_error error may be set from so_error, which
565 * may have been set by soconnectlock.
566 */
567 /* ARGSUSED */
568 int
569 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
570 {
571 __pthread_testcancel(1);
572 return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval));
573 }
574
575 int
576 connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused int32_t *retval)
577 {
578 struct socket *so;
579 struct sockaddr_storage ss;
580 struct sockaddr *sa = NULL;
581 lck_mtx_t *mutex_held;
582 boolean_t want_free = TRUE;
583 int error;
584 int fd = uap->s;
585 boolean_t dgram;
586
587 AUDIT_ARG(fd, uap->s);
588 error = file_socket(fd, &so);
589 if (error != 0)
590 return (error);
591 if (so == NULL) {
592 error = EBADF;
593 goto out;
594 }
595
596 /*
597 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
598 * if this is a datagram socket; translate for other types.
599 */
600 dgram = (so->so_type == SOCK_DGRAM);
601
602 /* Get socket address now before we obtain socket lock */
603 if (uap->namelen > sizeof (ss)) {
604 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
605 } else {
606 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
607 if (error == 0) {
608 sa = (struct sockaddr *)&ss;
609 want_free = FALSE;
610 }
611 }
612 if (error != 0)
613 goto out;
614
615 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
616 #if CONFIG_MACF_SOCKET_SUBSET
617 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
618 if (want_free)
619 FREE(sa, M_SONAME);
620 goto out;
621 }
622 #endif /* MAC_SOCKET_SUBSET */
623 socket_lock(so, 1);
624
625 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
626 if (want_free)
627 FREE(sa, M_SONAME);
628 socket_unlock(so, 1);
629 error = EALREADY;
630 goto out;
631 }
632 error = soconnectlock(so, sa, 0);
633 if (error)
634 goto bad;
635 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
636 if (want_free)
637 FREE(sa, M_SONAME);
638 socket_unlock(so, 1);
639 error = EINPROGRESS;
640 goto out;
641 }
642 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
643 if (so->so_proto->pr_getlock != NULL)
644 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
645 else
646 mutex_held = so->so_proto->pr_domain->dom_mtx;
647 error = msleep((caddr_t)&so->so_timeo, mutex_held,
648 PSOCK | PCATCH, "connect", 0);
649 if ((so->so_state & SS_DRAINING)) {
650 error = ECONNABORTED;
651 }
652 if (error)
653 break;
654 }
655 if (error == 0) {
656 error = so->so_error;
657 so->so_error = 0;
658 }
659 bad:
660 so->so_state &= ~SS_ISCONNECTING;
661 socket_unlock(so, 1);
662 if (want_free)
663 FREE(sa, M_SONAME);
664 if (error == ERESTART)
665 error = EINTR;
666 out:
667 file_drop(fd);
668 return (error);
669 }
670
671 /*
672 * Returns: 0 Success
673 * socreate:EAFNOSUPPORT
674 * socreate:EPROTOTYPE
675 * socreate:EPROTONOSUPPORT
676 * socreate:ENOBUFS
677 * socreate:ENOMEM
678 * socreate:EISCONN
679 * socreate:??? [other protocol families, IPSEC]
680 * falloc:ENFILE
681 * falloc:EMFILE
682 * falloc:ENOMEM
683 * copyout:EFAULT
684 * soconnect2:EINVAL
685 * soconnect2:EPROTOTYPE
686 * soconnect2:??? [other protocol families[
687 */
688 int
689 socketpair(struct proc *p, struct socketpair_args *uap,
690 __unused int32_t *retval)
691 {
692 struct fileproc *fp1, *fp2;
693 struct socket *so1, *so2;
694 int fd, error, sv[2];
695
696 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
697 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
698 if (error)
699 return (error);
700 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
701 if (error)
702 goto free1;
703
704 error = falloc(p, &fp1, &fd, vfs_context_current());
705 if (error) {
706 goto free2;
707 }
708 fp1->f_flag = FREAD|FWRITE;
709 fp1->f_type = DTYPE_SOCKET;
710 fp1->f_ops = &socketops;
711 fp1->f_data = (caddr_t)so1;
712 sv[0] = fd;
713
714 error = falloc(p, &fp2, &fd, vfs_context_current());
715 if (error) {
716 goto free3;
717 }
718 fp2->f_flag = FREAD|FWRITE;
719 fp2->f_type = DTYPE_SOCKET;
720 fp2->f_ops = &socketops;
721 fp2->f_data = (caddr_t)so2;
722 sv[1] = fd;
723
724 error = soconnect2(so1, so2);
725 if (error) {
726 goto free4;
727 }
728 if (uap->type == SOCK_DGRAM) {
729 /*
730 * Datagram socket connection is asymmetric.
731 */
732 error = soconnect2(so2, so1);
733 if (error) {
734 goto free4;
735 }
736 }
737
738 proc_fdlock(p);
739 procfdtbl_releasefd(p, sv[0], NULL);
740 procfdtbl_releasefd(p, sv[1], NULL);
741 fp_drop(p, sv[0], fp1, 1);
742 fp_drop(p, sv[1], fp2, 1);
743 proc_fdunlock(p);
744
745 error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof (int));
746 return (error);
747 free4:
748 fp_free(p, sv[1], fp2);
749 free3:
750 fp_free(p, sv[0], fp1);
751 free2:
752 (void) soclose(so2);
753 free1:
754 (void) soclose(so1);
755 return (error);
756 }
757
758 /*
759 * Returns: 0 Success
760 * EINVAL
761 * ENOBUFS
762 * EBADF
763 * EPIPE
764 * EACCES Mandatory Access Control failure
765 * file_socket:ENOTSOCK
766 * file_socket:EBADF
767 * getsockaddr:ENAMETOOLONG Filename too long
768 * getsockaddr:EINVAL Invalid argument
769 * getsockaddr:ENOMEM Not enough space
770 * getsockaddr:EFAULT Bad address
771 * <pru_sosend>:EACCES[TCP]
772 * <pru_sosend>:EADDRINUSE[TCP]
773 * <pru_sosend>:EADDRNOTAVAIL[TCP]
774 * <pru_sosend>:EAFNOSUPPORT[TCP]
775 * <pru_sosend>:EAGAIN[TCP]
776 * <pru_sosend>:EBADF
777 * <pru_sosend>:ECONNRESET[TCP]
778 * <pru_sosend>:EFAULT
779 * <pru_sosend>:EHOSTUNREACH[TCP]
780 * <pru_sosend>:EINTR
781 * <pru_sosend>:EINVAL
782 * <pru_sosend>:EISCONN[AF_INET]
783 * <pru_sosend>:EMSGSIZE[TCP]
784 * <pru_sosend>:ENETDOWN[TCP]
785 * <pru_sosend>:ENETUNREACH[TCP]
786 * <pru_sosend>:ENOBUFS
787 * <pru_sosend>:ENOMEM[TCP]
788 * <pru_sosend>:ENOTCONN[AF_INET]
789 * <pru_sosend>:EOPNOTSUPP
790 * <pru_sosend>:EPERM[TCP]
791 * <pru_sosend>:EPIPE
792 * <pru_sosend>:EWOULDBLOCK
793 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
794 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
795 * <pru_sosend>:??? [value from so_error]
796 * sockargs:???
797 */
798 static int
799 sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
800 int flags, int32_t *retval)
801 {
802 struct mbuf *control = NULL;
803 struct sockaddr_storage ss;
804 struct sockaddr *to = NULL;
805 boolean_t want_free = TRUE;
806 int error;
807 struct socket *so;
808 user_ssize_t len;
809
810 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
811
812 error = file_socket(s, &so);
813 if (error) {
814 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
815 return (error);
816 }
817 if (so == NULL) {
818 error = EBADF;
819 goto out;
820 }
821 if (mp->msg_name != USER_ADDR_NULL) {
822 if (mp->msg_namelen > sizeof (ss)) {
823 error = getsockaddr(so, &to, mp->msg_name,
824 mp->msg_namelen, TRUE);
825 } else {
826 error = getsockaddr_s(so, &ss, mp->msg_name,
827 mp->msg_namelen, TRUE);
828 if (error == 0) {
829 to = (struct sockaddr *)&ss;
830 want_free = FALSE;
831 }
832 }
833 if (error != 0)
834 goto out;
835 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
836 }
837 if (mp->msg_control != USER_ADDR_NULL) {
838 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
839 error = EINVAL;
840 goto bad;
841 }
842 error = sockargs(&control, mp->msg_control,
843 mp->msg_controllen, MT_CONTROL);
844 if (error != 0)
845 goto bad;
846 }
847
848 #if CONFIG_MACF_SOCKET_SUBSET
849 /*
850 * We check the state without holding the socket lock;
851 * if a race condition occurs, it would simply result
852 * in an extra call to the MAC check function.
853 */
854 if (!(so->so_state & SS_ISCONNECTED) &&
855 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
856 goto bad;
857 #endif /* MAC_SOCKET_SUBSET */
858
859 len = uio_resid(uiop);
860 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control,
861 flags);
862 if (error != 0) {
863 if (uio_resid(uiop) != len && (error == ERESTART ||
864 error == EINTR || error == EWOULDBLOCK))
865 error = 0;
866 /* Generation of SIGPIPE can be controlled per socket */
867 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
868 psignal(p, SIGPIPE);
869 }
870 if (error == 0)
871 *retval = (int)(len - uio_resid(uiop));
872 bad:
873 if (to != NULL && want_free)
874 FREE(to, M_SONAME);
875 out:
876 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
877 file_drop(s);
878 return (error);
879 }
880
881 /*
882 * Returns: 0 Success
883 * ENOMEM
884 * sendit:??? [see sendit definition in this file]
885 * write:??? [4056224: applicable for pipes]
886 */
887 int
888 sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
889 {
890 __pthread_testcancel(1);
891 return(sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
892 }
893
894 int
895 sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, int32_t *retval)
896 {
897 struct user_msghdr msg;
898 int error;
899 uio_t auio = NULL;
900
901 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
902 AUDIT_ARG(fd, uap->s);
903
904 auio = uio_create(1, 0,
905 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
906 UIO_WRITE);
907 if (auio == NULL) {
908 return (ENOMEM);
909 }
910 uio_addiov(auio, uap->buf, uap->len);
911
912 msg.msg_name = uap->to;
913 msg.msg_namelen = uap->tolen;
914 /* no need to set up msg_iov. sendit uses uio_t we send it */
915 msg.msg_iov = 0;
916 msg.msg_iovlen = 0;
917 msg.msg_control = 0;
918 msg.msg_flags = 0;
919
920 error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
921
922 if (auio != NULL) {
923 uio_free(auio);
924 }
925
926 #if HACK_FOR_4056224
927 /*
928 * Radar 4056224
929 * Temporary workaround to let send() and recv() work over
930 * a pipe for binary compatibility
931 * This will be removed in the release following Tiger
932 */
933 if (error == ENOTSOCK) {
934 struct fileproc *fp;
935
936 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
937 (void) fp_drop(p, uap->s, fp, 0);
938
939 if (fp->f_type == DTYPE_PIPE) {
940 struct write_args write_uap;
941 user_ssize_t write_retval;
942
943 if (p->p_pid > last_pid_4056224) {
944 last_pid_4056224 = p->p_pid;
945
946 printf("%s[%d] uses send/recv "
947 "on a pipe\n", p->p_comm, p->p_pid);
948 }
949
950 bzero(&write_uap, sizeof (struct write_args));
951 write_uap.fd = uap->s;
952 write_uap.cbuf = uap->buf;
953 write_uap.nbyte = uap->len;
954
955 error = write(p, &write_uap, &write_retval);
956 *retval = (int)write_retval;
957 }
958 }
959 }
960 #endif /* HACK_FOR_4056224 */
961
962 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
963
964 return (error);
965 }
966
967 /*
968 * Returns: 0 Success
969 * ENOBUFS
970 * copyin:EFAULT
971 * sendit:??? [see sendit definition in this file]
972 */
973 int
974 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
975 {
976 __pthread_testcancel(1);
977 return(sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval));
978 }
979
980 int
981 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval)
982 {
983 struct user32_msghdr msg32;
984 struct user64_msghdr msg64;
985 struct user_msghdr user_msg;
986 caddr_t msghdrp;
987 int size_of_msghdr;
988 int error;
989 uio_t auio = NULL;
990 struct user_iovec *iovp;
991
992 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
993 AUDIT_ARG(fd, uap->s);
994 if (IS_64BIT_PROCESS(p)) {
995 msghdrp = (caddr_t)&msg64;
996 size_of_msghdr = sizeof (msg64);
997 } else {
998 msghdrp = (caddr_t)&msg32;
999 size_of_msghdr = sizeof (msg32);
1000 }
1001 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1002 if (error) {
1003 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1004 return (error);
1005 }
1006
1007 if (IS_64BIT_PROCESS(p)) {
1008 user_msg.msg_flags = msg64.msg_flags;
1009 user_msg.msg_controllen = msg64.msg_controllen;
1010 user_msg.msg_control = msg64.msg_control;
1011 user_msg.msg_iovlen = msg64.msg_iovlen;
1012 user_msg.msg_iov = msg64.msg_iov;
1013 user_msg.msg_namelen = msg64.msg_namelen;
1014 user_msg.msg_name = msg64.msg_name;
1015 } else {
1016 user_msg.msg_flags = msg32.msg_flags;
1017 user_msg.msg_controllen = msg32.msg_controllen;
1018 user_msg.msg_control = msg32.msg_control;
1019 user_msg.msg_iovlen = msg32.msg_iovlen;
1020 user_msg.msg_iov = msg32.msg_iov;
1021 user_msg.msg_namelen = msg32.msg_namelen;
1022 user_msg.msg_name = msg32.msg_name;
1023 }
1024
1025 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1026 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1027 0, 0, 0, 0);
1028 return (EMSGSIZE);
1029 }
1030
1031 /* allocate a uio large enough to hold the number of iovecs passed */
1032 auio = uio_create(user_msg.msg_iovlen, 0,
1033 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1034 UIO_WRITE);
1035 if (auio == NULL) {
1036 error = ENOBUFS;
1037 goto done;
1038 }
1039
1040 if (user_msg.msg_iovlen) {
1041 /*
1042 * get location of iovecs within the uio.
1043 * then copyin the iovecs from user space.
1044 */
1045 iovp = uio_iovsaddr(auio);
1046 if (iovp == NULL) {
1047 error = ENOBUFS;
1048 goto done;
1049 }
1050 error = copyin_user_iovec_array(user_msg.msg_iov,
1051 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1052 user_msg.msg_iovlen, iovp);
1053 if (error)
1054 goto done;
1055 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1056
1057 /* finish setup of uio_t */
1058 uio_calculateresid(auio);
1059 } else {
1060 user_msg.msg_iov = 0;
1061 }
1062
1063 /* msg_flags is ignored for send */
1064 user_msg.msg_flags = 0;
1065
1066 error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
1067 done:
1068 if (auio != NULL) {
1069 uio_free(auio);
1070 }
1071 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1072
1073 return (error);
1074 }
1075
1076 /*
1077 * Returns: 0 Success
1078 * ENOTSOCK
1079 * EINVAL
1080 * EBADF
1081 * EACCES Mandatory Access Control failure
1082 * copyout:EFAULT
1083 * fp_lookup:EBADF
1084 * <pru_soreceive>:ENOBUFS
1085 * <pru_soreceive>:ENOTCONN
1086 * <pru_soreceive>:EWOULDBLOCK
1087 * <pru_soreceive>:EFAULT
1088 * <pru_soreceive>:EINTR
1089 * <pru_soreceive>:EBADF
1090 * <pru_soreceive>:EINVAL
1091 * <pru_soreceive>:EMSGSIZE
1092 * <pru_soreceive>:???
1093 *
1094 * Notes: Additional return values from calls through <pru_soreceive>
1095 * depend on protocols other than TCP or AF_UNIX, which are
1096 * documented above.
1097 */
1098 static int
1099 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1100 user_addr_t namelenp, int32_t *retval)
1101 {
1102 int len, error;
1103 struct mbuf *m, *control = 0;
1104 user_addr_t ctlbuf;
1105 struct socket *so;
1106 struct sockaddr *fromsa = 0;
1107 struct fileproc *fp;
1108
1109 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1110 proc_fdlock(p);
1111 if ((error = fp_lookup(p, s, &fp, 1))) {
1112 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1113 proc_fdunlock(p);
1114 return (error);
1115 }
1116 if (fp->f_type != DTYPE_SOCKET) {
1117 fp_drop(p, s, fp, 1);
1118 proc_fdunlock(p);
1119 return (ENOTSOCK);
1120 }
1121
1122 so = (struct socket *)fp->f_data;
1123 if (so == NULL) {
1124 fp_drop(p, s, fp, 1);
1125 proc_fdunlock(p);
1126 return (EBADF);
1127 }
1128
1129 proc_fdunlock(p);
1130
1131 #if CONFIG_MACF_SOCKET_SUBSET
1132 /*
1133 * We check the state without holding the socket lock;
1134 * if a race condition occurs, it would simply result
1135 * in an extra call to the MAC check function.
1136 */
1137 if (!(so->so_state & SS_ISCONNECTED) &&
1138 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1139 goto out1;
1140 #endif /* MAC_SOCKET_SUBSET */
1141 if (uio_resid(uiop) < 0) {
1142 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1143 error = EINVAL;
1144 goto out1;
1145 }
1146
1147 len = uio_resid(uiop);
1148 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1149 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1150 &mp->msg_flags);
1151 if (fromsa)
1152 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1153 fromsa);
1154 if (error) {
1155 if (uio_resid(uiop) != len && (error == ERESTART ||
1156 error == EINTR || error == EWOULDBLOCK))
1157 error = 0;
1158 }
1159
1160 if (error)
1161 goto out;
1162
1163 *retval = len - uio_resid(uiop);
1164 if (mp->msg_name) {
1165 socklen_t sa_len = 0;
1166
1167 len = mp->msg_namelen;
1168 if (len <= 0 || fromsa == 0) {
1169 len = 0;
1170 } else {
1171 #ifndef MIN
1172 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1173 #endif
1174 sa_len = fromsa->sa_len;
1175 len = MIN((unsigned int)len, sa_len);
1176 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1177 if (error)
1178 goto out;
1179 }
1180 mp->msg_namelen = sa_len;
1181 /* return the actual, untruncated address length */
1182 if (namelenp &&
1183 (error = copyout((caddr_t)&sa_len, namelenp,
1184 sizeof (int)))) {
1185 goto out;
1186 }
1187 }
1188 if (mp->msg_control) {
1189 len = mp->msg_controllen;
1190 m = control;
1191 mp->msg_controllen = 0;
1192 ctlbuf = mp->msg_control;
1193
1194 while (m && len > 0) {
1195 unsigned int tocopy;
1196 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1197
1198 /*
1199 * SCM_TIMESTAMP hack because struct timeval has a
1200 * different size for 32 bits and 64 bits processes
1201 */
1202 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1203 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
1204 struct cmsghdr *tmp_cp = (struct cmsghdr *)tmp_buffer;
1205 int tmp_space;
1206 struct timeval *tv = (struct timeval *)CMSG_DATA(cp);
1207
1208 tmp_cp->cmsg_level = SOL_SOCKET;
1209 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1210
1211 if (proc_is64bit(p)) {
1212 struct user64_timeval *tv64 = (struct user64_timeval *)CMSG_DATA(tmp_cp);
1213
1214 tv64->tv_sec = tv->tv_sec;
1215 tv64->tv_usec = tv->tv_usec;
1216
1217 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1218 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1219 } else {
1220 struct user32_timeval *tv32 = (struct user32_timeval *)CMSG_DATA(tmp_cp);
1221
1222 tv32->tv_sec = tv->tv_sec;
1223 tv32->tv_usec = tv->tv_usec;
1224
1225 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1226 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1227 }
1228 if (len >= tmp_space) {
1229 tocopy = tmp_space;
1230 } else {
1231 mp->msg_flags |= MSG_CTRUNC;
1232 tocopy = len;
1233 }
1234 error = copyout(tmp_buffer, ctlbuf, tocopy);
1235 if (error)
1236 goto out;
1237
1238 } else {
1239 if (len >= m->m_len) {
1240 tocopy = m->m_len;
1241 } else {
1242 mp->msg_flags |= MSG_CTRUNC;
1243 tocopy = len;
1244 }
1245
1246 error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf,
1247 tocopy);
1248 if (error)
1249 goto out;
1250 }
1251
1252 ctlbuf += tocopy;
1253 len -= tocopy;
1254 m = m->m_next;
1255 }
1256 mp->msg_controllen = ctlbuf - mp->msg_control;
1257 }
1258 out:
1259 if (fromsa)
1260 FREE(fromsa, M_SONAME);
1261 if (control)
1262 m_freem(control);
1263 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1264 out1:
1265 fp_drop(p, s, fp, 0);
1266 return (error);
1267 }
1268
1269
1270 /*
1271 * Returns: 0 Success
1272 * ENOMEM
1273 * copyin:EFAULT
1274 * recvit:???
1275 * read:??? [4056224: applicable for pipes]
1276 *
1277 * Notes: The read entry point is only called as part of support for
1278 * binary backward compatability; new code should use read
1279 * instead of recv or recvfrom when attempting to read data
1280 * from pipes.
1281 *
1282 * For full documentation of the return codes from recvit, see
1283 * the block header for the recvit function.
1284 */
1285 int
1286 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
1287 {
1288 __pthread_testcancel(1);
1289 return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval));
1290 }
1291
1292 int
1293 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval)
1294 {
1295 struct user_msghdr msg;
1296 int error;
1297 uio_t auio = NULL;
1298
1299 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
1300 AUDIT_ARG(fd, uap->s);
1301
1302 if (uap->fromlenaddr) {
1303 error = copyin(uap->fromlenaddr,
1304 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1305 if (error)
1306 return (error);
1307 } else {
1308 msg.msg_namelen = 0;
1309 }
1310 msg.msg_name = uap->from;
1311 auio = uio_create(1, 0,
1312 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1313 UIO_READ);
1314 if (auio == NULL) {
1315 return (ENOMEM);
1316 }
1317
1318 uio_addiov(auio, uap->buf, uap->len);
1319 /* no need to set up msg_iov. recvit uses uio_t we send it */
1320 msg.msg_iov = 0;
1321 msg.msg_iovlen = 0;
1322 msg.msg_control = 0;
1323 msg.msg_controllen = 0;
1324 msg.msg_flags = uap->flags;
1325 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1326 if (auio != NULL) {
1327 uio_free(auio);
1328 }
1329
1330 #if HACK_FOR_4056224
1331 /*
1332 * Radar 4056224
1333 * Temporary workaround to let send() and recv() work over
1334 * a pipe for binary compatibility
1335 * This will be removed in the release following Tiger
1336 */
1337 if (error == ENOTSOCK && proc_is64bit(p) == 0) {
1338 struct fileproc *fp;
1339
1340 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
1341 (void) fp_drop(p, uap->s, fp, 0);
1342
1343 if (fp->f_type == DTYPE_PIPE) {
1344 struct read_args read_uap;
1345 user_ssize_t read_retval;
1346
1347 if (p->p_pid > last_pid_4056224) {
1348 last_pid_4056224 = p->p_pid;
1349
1350 printf("%s[%d] uses send/recv on "
1351 "a pipe\n", p->p_comm, p->p_pid);
1352 }
1353
1354 bzero(&read_uap, sizeof (struct read_args));
1355 read_uap.fd = uap->s;
1356 read_uap.cbuf = uap->buf;
1357 read_uap.nbyte = uap->len;
1358
1359 error = read(p, &read_uap, &read_retval);
1360 *retval = (int)read_retval;
1361 }
1362 }
1363 }
1364 #endif /* HACK_FOR_4056224 */
1365
1366 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1367
1368 return (error);
1369 }
1370
1371 /*
1372 * Returns: 0 Success
1373 * EMSGSIZE
1374 * ENOMEM
1375 * copyin:EFAULT
1376 * copyout:EFAULT
1377 * recvit:???
1378 *
1379 * Notes: For full documentation of the return codes from recvit, see
1380 * the block header for the recvit function.
1381 */
1382 int
1383 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1384 {
1385 __pthread_testcancel(1);
1386 return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval));
1387 }
1388
1389 int
1390 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval)
1391 {
1392 struct user32_msghdr msg32;
1393 struct user64_msghdr msg64;
1394 struct user_msghdr user_msg;
1395 caddr_t msghdrp;
1396 int size_of_msghdr;
1397 user_addr_t uiov;
1398 int error;
1399 uio_t auio = NULL;
1400 struct user_iovec *iovp;
1401
1402 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1403 AUDIT_ARG(fd, uap->s);
1404 if (IS_64BIT_PROCESS(p)) {
1405 msghdrp = (caddr_t)&msg64;
1406 size_of_msghdr = sizeof (msg64);
1407 } else {
1408 msghdrp = (caddr_t)&msg32;
1409 size_of_msghdr = sizeof (msg32);
1410 }
1411 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1412 if (error) {
1413 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1414 return (error);
1415 }
1416
1417 /* only need to copy if user process is not 64-bit */
1418 if (IS_64BIT_PROCESS(p)) {
1419 user_msg.msg_flags = msg64.msg_flags;
1420 user_msg.msg_controllen = msg64.msg_controllen;
1421 user_msg.msg_control = msg64.msg_control;
1422 user_msg.msg_iovlen = msg64.msg_iovlen;
1423 user_msg.msg_iov = msg64.msg_iov;
1424 user_msg.msg_namelen = msg64.msg_namelen;
1425 user_msg.msg_name = msg64.msg_name;
1426 } else {
1427 user_msg.msg_flags = msg32.msg_flags;
1428 user_msg.msg_controllen = msg32.msg_controllen;
1429 user_msg.msg_control = msg32.msg_control;
1430 user_msg.msg_iovlen = msg32.msg_iovlen;
1431 user_msg.msg_iov = msg32.msg_iov;
1432 user_msg.msg_namelen = msg32.msg_namelen;
1433 user_msg.msg_name = msg32.msg_name;
1434 }
1435
1436 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1437 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
1438 0, 0, 0, 0);
1439 return (EMSGSIZE);
1440 }
1441
1442 user_msg.msg_flags = uap->flags;
1443
1444 /* allocate a uio large enough to hold the number of iovecs passed */
1445 auio = uio_create(user_msg.msg_iovlen, 0,
1446 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1447 UIO_READ);
1448 if (auio == NULL) {
1449 error = ENOMEM;
1450 goto done;
1451 }
1452
1453 /*
1454 * get location of iovecs within the uio. then copyin the iovecs from
1455 * user space.
1456 */
1457 iovp = uio_iovsaddr(auio);
1458 if (iovp == NULL) {
1459 error = ENOMEM;
1460 goto done;
1461 }
1462 uiov = user_msg.msg_iov;
1463 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1464 error = copyin_user_iovec_array(uiov,
1465 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1466 user_msg.msg_iovlen, iovp);
1467 if (error)
1468 goto done;
1469
1470 /* finish setup of uio_t */
1471 uio_calculateresid(auio);
1472
1473 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1474 if (!error) {
1475 user_msg.msg_iov = uiov;
1476 if (IS_64BIT_PROCESS(p)) {
1477 msg64.msg_flags = user_msg.msg_flags;
1478 msg64.msg_controllen = user_msg.msg_controllen;
1479 msg64.msg_control = user_msg.msg_control;
1480 msg64.msg_iovlen = user_msg.msg_iovlen;
1481 msg64.msg_iov = user_msg.msg_iov;
1482 msg64.msg_namelen = user_msg.msg_namelen;
1483 msg64.msg_name = user_msg.msg_name;
1484 } else {
1485 msg32.msg_flags = user_msg.msg_flags;
1486 msg32.msg_controllen = user_msg.msg_controllen;
1487 msg32.msg_control = user_msg.msg_control;
1488 msg32.msg_iovlen = user_msg.msg_iovlen;
1489 msg32.msg_iov = user_msg.msg_iov;
1490 msg32.msg_namelen = user_msg.msg_namelen;
1491 msg32.msg_name = user_msg.msg_name;
1492 }
1493 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1494 }
1495 done:
1496 if (auio != NULL) {
1497 uio_free(auio);
1498 }
1499 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1500 return (error);
1501 }
1502
1503 /*
1504 * Returns: 0 Success
1505 * EBADF
1506 * file_socket:ENOTSOCK
1507 * file_socket:EBADF
1508 * soshutdown:EINVAL
1509 * soshutdown:ENOTCONN
1510 * soshutdown:EADDRNOTAVAIL[TCP]
1511 * soshutdown:ENOBUFS[TCP]
1512 * soshutdown:EMSGSIZE[TCP]
1513 * soshutdown:EHOSTUNREACH[TCP]
1514 * soshutdown:ENETUNREACH[TCP]
1515 * soshutdown:ENETDOWN[TCP]
1516 * soshutdown:ENOMEM[TCP]
1517 * soshutdown:EACCES[TCP]
1518 * soshutdown:EMSGSIZE[TCP]
1519 * soshutdown:ENOBUFS[TCP]
1520 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1521 * soshutdown:??? [other protocol families]
1522 */
1523 /* ARGSUSED */
1524 int
1525 shutdown(__unused struct proc *p, struct shutdown_args *uap,
1526 __unused int32_t *retval)
1527 {
1528 struct socket *so;
1529 int error;
1530
1531 AUDIT_ARG(fd, uap->s);
1532 error = file_socket(uap->s, &so);
1533 if (error)
1534 return (error);
1535 if (so == NULL) {
1536 error = EBADF;
1537 goto out;
1538 }
1539 error = soshutdown((struct socket *)so, uap->how);
1540 out:
1541 file_drop(uap->s);
1542 return (error);
1543 }
1544
1545 /*
1546 * Returns: 0 Success
1547 * EFAULT
1548 * EINVAL
1549 * EACCES Mandatory Access Control failure
1550 * file_socket:ENOTSOCK
1551 * file_socket:EBADF
1552 * sosetopt:EINVAL
1553 * sosetopt:ENOPROTOOPT
1554 * sosetopt:ENOBUFS
1555 * sosetopt:EDOM
1556 * sosetopt:EFAULT
1557 * sosetopt:EOPNOTSUPP[AF_UNIX]
1558 * sosetopt:???
1559 */
1560 /* ARGSUSED */
1561 int
1562 setsockopt(struct proc *p, struct setsockopt_args *uap,
1563 __unused int32_t *retval)
1564 {
1565 struct socket *so;
1566 struct sockopt sopt;
1567 int error;
1568
1569 AUDIT_ARG(fd, uap->s);
1570 if (uap->val == 0 && uap->valsize != 0)
1571 return (EFAULT);
1572 /* No bounds checking on size (it's unsigned) */
1573
1574 error = file_socket(uap->s, &so);
1575 if (error)
1576 return (error);
1577
1578 sopt.sopt_dir = SOPT_SET;
1579 sopt.sopt_level = uap->level;
1580 sopt.sopt_name = uap->name;
1581 sopt.sopt_val = uap->val;
1582 sopt.sopt_valsize = uap->valsize;
1583 sopt.sopt_p = p;
1584
1585 if (so == NULL) {
1586 error = EINVAL;
1587 goto out;
1588 }
1589 #if CONFIG_MACF_SOCKET_SUBSET
1590 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
1591 &sopt)) != 0)
1592 goto out;
1593 #endif /* MAC_SOCKET_SUBSET */
1594 error = sosetopt(so, &sopt);
1595 out:
1596 file_drop(uap->s);
1597 return (error);
1598 }
1599
1600
1601
1602 /*
1603 * Returns: 0 Success
1604 * EINVAL
1605 * EBADF
1606 * EACCES Mandatory Access Control failure
1607 * copyin:EFAULT
1608 * copyout:EFAULT
1609 * file_socket:ENOTSOCK
1610 * file_socket:EBADF
1611 * sogetopt:???
1612 */
1613 int
1614 getsockopt(struct proc *p, struct getsockopt_args *uap,
1615 __unused int32_t *retval)
1616 {
1617 int error;
1618 socklen_t valsize;
1619 struct sockopt sopt;
1620 struct socket *so;
1621
1622 error = file_socket(uap->s, &so);
1623 if (error)
1624 return (error);
1625 if (uap->val) {
1626 error = copyin(uap->avalsize, (caddr_t)&valsize,
1627 sizeof (valsize));
1628 if (error)
1629 goto out;
1630 /* No bounds checking on size (it's unsigned) */
1631 } else {
1632 valsize = 0;
1633 }
1634 sopt.sopt_dir = SOPT_GET;
1635 sopt.sopt_level = uap->level;
1636 sopt.sopt_name = uap->name;
1637 sopt.sopt_val = uap->val;
1638 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1639 sopt.sopt_p = p;
1640
1641 if (so == NULL) {
1642 error = EBADF;
1643 goto out;
1644 }
1645 #if CONFIG_MACF_SOCKET_SUBSET
1646 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
1647 &sopt)) != 0)
1648 goto out;
1649 #endif /* MAC_SOCKET_SUBSET */
1650 error = sogetopt((struct socket *)so, &sopt);
1651 if (error == 0) {
1652 valsize = sopt.sopt_valsize;
1653 error = copyout((caddr_t)&valsize, uap->avalsize,
1654 sizeof (valsize));
1655 }
1656 out:
1657 file_drop(uap->s);
1658 return (error);
1659 }
1660
1661
1662 /*
1663 * Get socket name.
1664 *
1665 * Returns: 0 Success
1666 * EBADF
1667 * file_socket:ENOTSOCK
1668 * file_socket:EBADF
1669 * copyin:EFAULT
1670 * copyout:EFAULT
1671 * <pru_sockaddr>:ENOBUFS[TCP]
1672 * <pru_sockaddr>:ECONNRESET[TCP]
1673 * <pru_sockaddr>:EINVAL[AF_UNIX]
1674 * <sf_getsockname>:???
1675 */
1676 /* ARGSUSED */
1677 int
1678 getsockname(__unused struct proc *p, struct getsockname_args *uap,
1679 __unused int32_t *retval)
1680 {
1681 struct socket *so;
1682 struct sockaddr *sa;
1683 socklen_t len;
1684 socklen_t sa_len;
1685 int error;
1686
1687 error = file_socket(uap->fdes, &so);
1688 if (error)
1689 return (error);
1690 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1691 if (error)
1692 goto out;
1693 if (so == NULL) {
1694 error = EBADF;
1695 goto out;
1696 }
1697 sa = 0;
1698 socket_lock(so, 1);
1699 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1700 if (error == 0) {
1701 struct socket_filter_entry *filter;
1702 int filtered = 0;
1703 for (filter = so->so_filt; filter && error == 0;
1704 filter = filter->sfe_next_onsocket) {
1705 if (filter->sfe_filter->sf_filter.sf_getsockname) {
1706 if (!filtered) {
1707 filtered = 1;
1708 sflt_use(so);
1709 socket_unlock(so, 0);
1710 }
1711 error = filter->sfe_filter->sf_filter.
1712 sf_getsockname(filter->sfe_cookie, so, &sa);
1713 }
1714 }
1715
1716 if (error == EJUSTRETURN)
1717 error = 0;
1718
1719 if (filtered) {
1720 socket_lock(so, 0);
1721 sflt_unuse(so);
1722 }
1723 }
1724 socket_unlock(so, 1);
1725 if (error)
1726 goto bad;
1727 if (sa == 0) {
1728 len = 0;
1729 goto gotnothing;
1730 }
1731
1732 sa_len = sa->sa_len;
1733 len = MIN(len, sa_len);
1734 error = copyout((caddr_t)sa, uap->asa, len);
1735 if (error)
1736 goto bad;
1737 /* return the actual, untruncated address length */
1738 len = sa_len;
1739 gotnothing:
1740 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1741 bad:
1742 if (sa)
1743 FREE(sa, M_SONAME);
1744 out:
1745 file_drop(uap->fdes);
1746 return (error);
1747 }
1748
1749 /*
1750 * Get name of peer for connected socket.
1751 *
1752 * Returns: 0 Success
1753 * EBADF
1754 * EINVAL
1755 * ENOTCONN
1756 * file_socket:ENOTSOCK
1757 * file_socket:EBADF
1758 * copyin:EFAULT
1759 * copyout:EFAULT
1760 * <pru_peeraddr>:???
1761 * <sf_getpeername>:???
1762 */
1763 /* ARGSUSED */
1764 int
1765 getpeername(__unused struct proc *p, struct getpeername_args *uap,
1766 __unused int32_t *retval)
1767 {
1768 struct socket *so;
1769 struct sockaddr *sa;
1770 socklen_t len;
1771 socklen_t sa_len;
1772 int error;
1773
1774 error = file_socket(uap->fdes, &so);
1775 if (error)
1776 return (error);
1777 if (so == NULL) {
1778 error = EBADF;
1779 goto out;
1780 }
1781
1782 socket_lock(so, 1);
1783
1784 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1785 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1786 /* the socket has been shutdown, no more getpeername's */
1787 socket_unlock(so, 1);
1788 error = EINVAL;
1789 goto out;
1790 }
1791
1792 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1793 socket_unlock(so, 1);
1794 error = ENOTCONN;
1795 goto out;
1796 }
1797 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1798 if (error) {
1799 socket_unlock(so, 1);
1800 goto out;
1801 }
1802 sa = 0;
1803 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1804 if (error == 0) {
1805 struct socket_filter_entry *filter;
1806 int filtered = 0;
1807 for (filter = so->so_filt; filter && error == 0;
1808 filter = filter->sfe_next_onsocket) {
1809 if (filter->sfe_filter->sf_filter.sf_getpeername) {
1810 if (!filtered) {
1811 filtered = 1;
1812 sflt_use(so);
1813 socket_unlock(so, 0);
1814 }
1815 error = filter->sfe_filter->sf_filter.
1816 sf_getpeername(filter->sfe_cookie, so, &sa);
1817 }
1818 }
1819
1820 if (error == EJUSTRETURN)
1821 error = 0;
1822
1823 if (filtered) {
1824 socket_lock(so, 0);
1825 sflt_unuse(so);
1826 }
1827 }
1828 socket_unlock(so, 1);
1829 if (error)
1830 goto bad;
1831 if (sa == 0) {
1832 len = 0;
1833 goto gotnothing;
1834 }
1835 sa_len = sa->sa_len;
1836 len = MIN(len, sa_len);
1837 error = copyout(sa, uap->asa, len);
1838 if (error)
1839 goto bad;
1840 /* return the actual, untruncated address length */
1841 len = sa_len;
1842 gotnothing:
1843 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1844 bad:
1845 if (sa) FREE(sa, M_SONAME);
1846 out:
1847 file_drop(uap->fdes);
1848 return (error);
1849 }
1850
1851 int
1852 sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1853 {
1854 struct sockaddr *sa;
1855 struct mbuf *m;
1856 int error;
1857
1858 int alloc_buflen = buflen;
1859 #ifdef __LP64__
1860 /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */
1861 if(type == MT_CONTROL)
1862 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr);
1863 #endif
1864 if ((u_int)alloc_buflen > MLEN) {
1865 if (type == MT_SONAME && (u_int)alloc_buflen <= 112)
1866 alloc_buflen = MLEN; /* unix domain compat. hack */
1867 else if ((u_int)alloc_buflen > MCLBYTES)
1868 return (EINVAL);
1869 }
1870 m = m_get(M_WAIT, type);
1871 if (m == NULL)
1872 return (ENOBUFS);
1873 if ((u_int)alloc_buflen > MLEN) {
1874 MCLGET(m, M_WAIT);
1875 if ((m->m_flags & M_EXT) == 0) {
1876 m_free(m);
1877 return (ENOBUFS);
1878 }
1879 }
1880 /* K64: We still copyin the original buflen because it gets expanded later
1881 * and we lie about the size of the mbuf because it only affects unp_* functions
1882 */
1883 m->m_len = buflen;
1884 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
1885 if (error) {
1886 (void) m_free(m);
1887 } else {
1888 *mp = m;
1889 if (type == MT_SONAME) {
1890 sa = mtod(m, struct sockaddr *);
1891 sa->sa_len = buflen;
1892 }
1893 }
1894 return (error);
1895 }
1896
1897 /*
1898 * Given a user_addr_t of length len, allocate and fill out a *sa.
1899 *
1900 * Returns: 0 Success
1901 * ENAMETOOLONG Filename too long
1902 * EINVAL Invalid argument
1903 * ENOMEM Not enough space
1904 * copyin:EFAULT Bad address
1905 */
1906 static int
1907 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
1908 size_t len, boolean_t translate_unspec)
1909 {
1910 struct sockaddr *sa;
1911 int error;
1912
1913 if (len > SOCK_MAXADDRLEN)
1914 return (ENAMETOOLONG);
1915
1916 if (len < offsetof(struct sockaddr, sa_data[0]))
1917 return (EINVAL);
1918
1919 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
1920 if (sa == NULL) {
1921 return (ENOMEM);
1922 }
1923 error = copyin(uaddr, (caddr_t)sa, len);
1924 if (error) {
1925 FREE(sa, M_SONAME);
1926 } else {
1927 /*
1928 * Force sa_family to AF_INET on AF_INET sockets to handle
1929 * legacy applications that use AF_UNSPEC (0). On all other
1930 * sockets we leave it unchanged and let the lower layer
1931 * handle it.
1932 */
1933 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
1934 INP_CHECK_SOCKAF(so, AF_INET) &&
1935 len == sizeof (struct sockaddr_in))
1936 sa->sa_family = AF_INET;
1937
1938 sa->sa_len = len;
1939 *namp = sa;
1940 }
1941 return (error);
1942 }
1943
1944 static int
1945 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
1946 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1947 {
1948 int error;
1949
1950 if (ss == NULL || uaddr == USER_ADDR_NULL ||
1951 len < offsetof(struct sockaddr, sa_data[0]))
1952 return (EINVAL);
1953
1954 /*
1955 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
1956 * so the check here is inclusive.
1957 */
1958 if (len > sizeof (*ss))
1959 return (ENAMETOOLONG);
1960
1961 bzero(ss, sizeof (*ss));
1962 error = copyin(uaddr, (caddr_t)ss, len);
1963 if (error == 0) {
1964 /*
1965 * Force sa_family to AF_INET on AF_INET sockets to handle
1966 * legacy applications that use AF_UNSPEC (0). On all other
1967 * sockets we leave it unchanged and let the lower layer
1968 * handle it.
1969 */
1970 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
1971 INP_CHECK_SOCKAF(so, AF_INET) &&
1972 len == sizeof (struct sockaddr_in))
1973 ss->ss_family = AF_INET;
1974
1975 ss->ss_len = len;
1976 }
1977 return (error);
1978 }
1979
1980 #if SENDFILE
1981
1982 SYSCTL_DECL(_kern_ipc);
1983
1984 #define SFUIOBUFS 64
1985 static int sendfileuiobufs = SFUIOBUFS;
1986 SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW, &sendfileuiobufs,
1987 0, "");
1988
1989 /* Macros to compute the number of mbufs needed depending on cluster size */
1990 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
1991 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
1992
1993 /* Upper send limit in bytes (sendfileuiobufs * PAGESIZE) */
1994 #define SENDFILE_MAX_BYTES (sendfileuiobufs << PGSHIFT)
1995
1996 /* Upper send limit in the number of mbuf clusters */
1997 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
1998 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
1999
2000 size_t mbuf_pkt_maxlen(mbuf_t m);
2001
2002 __private_extern__ size_t
2003 mbuf_pkt_maxlen(mbuf_t m)
2004 {
2005 size_t maxlen = 0;
2006
2007 while (m) {
2008 maxlen += mbuf_maxlen(m);
2009 m = mbuf_next(m);
2010 }
2011 return (maxlen);
2012 }
2013
2014 static void
2015 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
2016 struct mbuf **m, boolean_t jumbocl)
2017 {
2018 unsigned int needed;
2019
2020 if (pktlen == 0)
2021 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
2022
2023 /*
2024 * Try to allocate for the whole thing. Since we want full control
2025 * over the buffer size and be able to accept partial result, we can't
2026 * use mbuf_allocpacket(). The logic below is similar to sosend().
2027 */
2028 *m = NULL;
2029 if (pktlen > NBPG && jumbocl) {
2030 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
2031 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
2032 }
2033 if (*m == NULL) {
2034 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
2035 *m = m_getpackets_internal(&needed, 1, how, 0, NBPG);
2036 }
2037
2038 /*
2039 * Our previous attempt(s) at allocation had failed; the system
2040 * may be short on mbufs, and we want to block until they are
2041 * available. This time, ask just for 1 mbuf and don't return
2042 * until we get it.
2043 */
2044 if (*m == NULL) {
2045 needed = 1;
2046 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, NBPG);
2047 }
2048 if (*m == NULL)
2049 panic("%s: blocking allocation returned NULL\n", __func__);
2050
2051 *maxchunks = needed;
2052 }
2053
2054 /*
2055 * sendfile(2).
2056 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
2057 * struct sf_hdtr *hdtr, int flags)
2058 *
2059 * Send a file specified by 'fd' and starting at 'offset' to a socket
2060 * specified by 's'. Send only '*nbytes' of the file or until EOF if
2061 * *nbytes == 0. Optionally add a header and/or trailer to the socket
2062 * output. If specified, write the total number of bytes sent into *nbytes.
2063 */
2064 int
2065 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
2066 {
2067 struct fileproc *fp;
2068 struct vnode *vp;
2069 struct socket *so;
2070 struct writev_nocancel_args nuap;
2071 user_ssize_t writev_retval;
2072 struct user_sf_hdtr user_hdtr;
2073 struct user32_sf_hdtr user32_hdtr;
2074 struct user64_sf_hdtr user64_hdtr;
2075 off_t off, xfsize;
2076 off_t nbytes = 0, sbytes = 0;
2077 int error = 0;
2078 size_t sizeof_hdtr;
2079 off_t file_size;
2080 struct vfs_context context = *vfs_context_current();
2081
2082 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
2083 0, 0, 0, 0);
2084
2085 AUDIT_ARG(fd, uap->fd);
2086 AUDIT_ARG(value32, uap->s);
2087
2088 /*
2089 * Do argument checking. Must be a regular file in, stream
2090 * type and connected socket out, positive offset.
2091 */
2092 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
2093 goto done;
2094 }
2095 if ((fp->f_flag & FREAD) == 0) {
2096 error = EBADF;
2097 goto done1;
2098 }
2099 if (vnode_isreg(vp) == 0) {
2100 error = ENOTSUP;
2101 goto done1;
2102 }
2103 error = file_socket(uap->s, &so);
2104 if (error) {
2105 goto done1;
2106 }
2107 if (so == NULL) {
2108 error = EBADF;
2109 goto done2;
2110 }
2111 if (so->so_type != SOCK_STREAM) {
2112 error = EINVAL;
2113 goto done2;
2114 }
2115 if ((so->so_state & SS_ISCONNECTED) == 0) {
2116 error = ENOTCONN;
2117 goto done2;
2118 }
2119 if (uap->offset < 0) {
2120 error = EINVAL;
2121 goto done2;
2122 }
2123 if (uap->nbytes == USER_ADDR_NULL) {
2124 error = EINVAL;
2125 goto done2;
2126 }
2127 if (uap->flags != 0) {
2128 error = EINVAL;
2129 goto done2;
2130 }
2131
2132 context.vc_ucred = fp->f_fglob->fg_cred;
2133
2134 #if CONFIG_MACF_SOCKET_SUBSET
2135 /* JMM - fetch connected sockaddr? */
2136 error = mac_socket_check_send(context.vc_ucred, so, NULL);
2137 if (error)
2138 goto done2;
2139 #endif
2140
2141 /*
2142 * Get number of bytes to send
2143 * Should it applies to size of header and trailer?
2144 * JMM - error handling?
2145 */
2146 copyin(uap->nbytes, &nbytes, sizeof (off_t));
2147
2148 /*
2149 * If specified, get the pointer to the sf_hdtr struct for
2150 * any headers/trailers.
2151 */
2152 if (uap->hdtr != USER_ADDR_NULL) {
2153 caddr_t hdtrp;
2154
2155 bzero(&user_hdtr, sizeof (user_hdtr));
2156 if (IS_64BIT_PROCESS(p)) {
2157 hdtrp = (caddr_t)&user64_hdtr;
2158 sizeof_hdtr = sizeof (user64_hdtr);
2159 } else {
2160 hdtrp = (caddr_t)&user32_hdtr;
2161 sizeof_hdtr = sizeof (user32_hdtr);
2162 }
2163 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
2164 if (error)
2165 goto done2;
2166 if (IS_64BIT_PROCESS(p)) {
2167 user_hdtr.headers = user64_hdtr.headers;
2168 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
2169 user_hdtr.trailers = user64_hdtr.trailers;
2170 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
2171 } else {
2172 user_hdtr.headers = user32_hdtr.headers;
2173 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
2174 user_hdtr.trailers = user32_hdtr.trailers;
2175 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2176 }
2177
2178 /*
2179 * Send any headers. Wimp out and use writev(2).
2180 */
2181 if (user_hdtr.headers != USER_ADDR_NULL) {
2182 bzero(&nuap, sizeof (struct writev_args));
2183 nuap.fd = uap->s;
2184 nuap.iovp = user_hdtr.headers;
2185 nuap.iovcnt = user_hdtr.hdr_cnt;
2186 error = writev_nocancel(p, &nuap, &writev_retval);
2187 if (error)
2188 goto done2;
2189 sbytes += writev_retval;
2190 }
2191 }
2192
2193 /*
2194 * Get the file size for 2 reasons:
2195 * 1. We don't want to allocate more mbufs than necessary
2196 * 2. We don't want to read past the end of file
2197 */
2198 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0)
2199 goto done2;
2200
2201 /*
2202 * Simply read file data into a chain of mbufs that used with scatter
2203 * gather reads. We're not (yet?) setup to use zero copy external
2204 * mbufs that point to the file pages.
2205 */
2206 socket_lock(so, 1);
2207 error = sblock(&so->so_snd, M_WAIT);
2208 if (error) {
2209 socket_unlock(so, 1);
2210 goto done2;
2211 }
2212 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2213 mbuf_t m0 = NULL, m;
2214 unsigned int nbufs = sendfileuiobufs, i;
2215 uio_t auio;
2216 char uio_buf[UIO_SIZEOF(sendfileuiobufs)]; /* 1 KB !!! */
2217 size_t uiolen;
2218 user_ssize_t rlen;
2219 off_t pgoff;
2220 size_t pktlen;
2221 boolean_t jumbocl;
2222
2223 /*
2224 * Calculate the amount to transfer.
2225 * Align to round number of pages.
2226 * Not to exceed send socket buffer,
2227 * the EOF, or the passed in nbytes.
2228 */
2229 xfsize = sbspace(&so->so_snd);
2230
2231 if (xfsize <= 0) {
2232 if (so->so_state & SS_CANTSENDMORE) {
2233 error = EPIPE;
2234 goto done3;
2235 } else if ((so->so_state & SS_NBIO)) {
2236 error = EAGAIN;
2237 goto done3;
2238 } else {
2239 xfsize = PAGE_SIZE;
2240 }
2241 }
2242
2243 if (xfsize > SENDFILE_MAX_BYTES)
2244 xfsize = SENDFILE_MAX_BYTES;
2245 else if (xfsize > PAGE_SIZE)
2246 xfsize = trunc_page(xfsize);
2247 pgoff = off & PAGE_MASK_64;
2248 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
2249 xfsize = PAGE_SIZE_64 - pgoff;
2250 if (nbytes && xfsize > (nbytes - sbytes))
2251 xfsize = nbytes - sbytes;
2252 if (xfsize <= 0)
2253 break;
2254 if (off + xfsize > file_size)
2255 xfsize = file_size - off;
2256 if (xfsize <= 0)
2257 break;
2258
2259 /*
2260 * Attempt to use larger than system page-size clusters for
2261 * large writes only if there is a jumbo cluster pool and
2262 * if the socket is marked accordingly.
2263 */
2264 jumbocl = sosendjcl && njcl > 0 &&
2265 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
2266
2267 socket_unlock(so, 0);
2268 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
2269 pktlen = mbuf_pkt_maxlen(m0);
2270 if (pktlen < (size_t)xfsize)
2271 xfsize = pktlen;
2272
2273 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
2274 UIO_READ, &uio_buf[0], sizeof (uio_buf));
2275 if (auio == NULL) {
2276 //printf("sendfile: uio_createwithbuffer failed\n");
2277 mbuf_freem(m0);
2278 error = ENXIO;
2279 socket_lock(so, 0);
2280 goto done3;
2281 }
2282
2283 for (i = 0, m = m0, uiolen = 0;
2284 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2285 i++, m = mbuf_next(m)) {
2286 size_t mlen = mbuf_maxlen(m);
2287
2288 if (mlen + uiolen > (size_t)xfsize)
2289 mlen = xfsize - uiolen;
2290 mbuf_setlen(m, mlen);
2291 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
2292 mlen);
2293 uiolen += mlen;
2294 }
2295
2296 if (xfsize != uio_resid(auio))
2297 printf("sendfile: xfsize: %lld != uio_resid(auio): "
2298 "%lld\n", xfsize, uio_resid(auio));
2299
2300 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
2301 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2302 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2303 error = fo_read(fp, auio, FOF_OFFSET, &context);
2304 socket_lock(so, 0);
2305 if (error != 0) {
2306 if (uio_resid(auio) != xfsize && (error == ERESTART ||
2307 error == EINTR || error == EWOULDBLOCK)) {
2308 error = 0;
2309 } else {
2310 mbuf_freem(m0);
2311 goto done3;
2312 }
2313 }
2314 xfsize -= uio_resid(auio);
2315 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
2316 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2317 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2318
2319 if (xfsize == 0) {
2320 //printf("sendfile: fo_read 0 bytes, EOF\n");
2321 break;
2322 }
2323 if (xfsize + off > file_size)
2324 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
2325 "%lld\n", xfsize, off, file_size);
2326 for (i = 0, m = m0, rlen = 0;
2327 i < nbufs && m != NULL && rlen < xfsize;
2328 i++, m = mbuf_next(m)) {
2329 size_t mlen = mbuf_maxlen(m);
2330
2331 if (rlen + mlen > (size_t)xfsize)
2332 mlen = xfsize - rlen;
2333 mbuf_setlen(m, mlen);
2334
2335 rlen += mlen;
2336 }
2337 mbuf_pkthdr_setlen(m0, xfsize);
2338
2339 retry_space:
2340 /*
2341 * Make sure that the socket is still able to take more data.
2342 * CANTSENDMORE being true usually means that the connection
2343 * was closed. so_error is true when an error was sensed after
2344 * a previous send.
2345 * The state is checked after the page mapping and buffer
2346 * allocation above since those operations may block and make
2347 * any socket checks stale. From this point forward, nothing
2348 * blocks before the pru_send (or more accurately, any blocking
2349 * results in a loop back to here to re-check).
2350 */
2351 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2352 if (so->so_state & SS_CANTSENDMORE) {
2353 error = EPIPE;
2354 } else {
2355 error = so->so_error;
2356 so->so_error = 0;
2357 }
2358 m_freem(m0);
2359 goto done3;
2360 }
2361 /*
2362 * Wait for socket space to become available. We do this just
2363 * after checking the connection state above in order to avoid
2364 * a race condition with sbwait().
2365 */
2366 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
2367 if (so->so_state & SS_NBIO) {
2368 m_freem(m0);
2369 error = EAGAIN;
2370 goto done3;
2371 }
2372 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
2373 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
2374 error = sbwait(&so->so_snd);
2375 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
2376 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
2377 /*
2378 * An error from sbwait usually indicates that we've
2379 * been interrupted by a signal. If we've sent anything
2380 * then return bytes sent, otherwise return the error.
2381 */
2382 if (error) {
2383 m_freem(m0);
2384 goto done3;
2385 }
2386 goto retry_space;
2387 }
2388 {
2389 /*
2390 * Socket filter processing
2391 */
2392 struct socket_filter_entry *filter;
2393 int filtered = 0;
2394 struct mbuf *control = NULL;
2395 boolean_t recursive = (so->so_send_filt_thread != NULL);
2396
2397 error = 0;
2398 for (filter = so->so_filt; filter && (error == 0);
2399 filter = filter->sfe_next_onsocket) {
2400 if (filter->sfe_filter->sf_filter.sf_data_out) {
2401 if (filtered == 0) {
2402 filtered = 1;
2403 so->so_send_filt_thread =
2404 current_thread();
2405 sflt_use(so);
2406 socket_unlock(so, 0);
2407 }
2408 error = filter->sfe_filter->sf_filter.
2409 sf_data_out(filter->sfe_cookie, so,
2410 NULL, &m0, &control, 0);
2411 }
2412 }
2413
2414 if (filtered) {
2415 /*
2416 * At this point, we've run at least one filter.
2417 * The socket is unlocked as is the socket
2418 * buffer. Clear the recorded filter thread
2419 * only when we are outside of a filter's
2420 * context. This allows for a filter to issue
2421 * multiple inject calls from its sf_data_out
2422 * callback routine.
2423 */
2424 socket_lock(so, 0);
2425 sflt_unuse(so);
2426 if (!recursive)
2427 so->so_send_filt_thread = 0;
2428 if (error) {
2429 if (error == EJUSTRETURN) {
2430 error = 0;
2431 continue;
2432 }
2433 goto done3;
2434 }
2435 }
2436 /*
2437 * End Socket filter processing
2438 */
2439 }
2440 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2441 uap->s, 0, 0, 0, 0);
2442 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
2443 0, 0, p);
2444 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2445 uap->s, 0, 0, 0, 0);
2446 if (error) {
2447 goto done3;
2448 }
2449 }
2450 sbunlock(&so->so_snd, 0); /* will unlock socket */
2451 /*
2452 * Send trailers. Wimp out and use writev(2).
2453 */
2454 if (uap->hdtr != USER_ADDR_NULL &&
2455 user_hdtr.trailers != USER_ADDR_NULL) {
2456 bzero(&nuap, sizeof (struct writev_args));
2457 nuap.fd = uap->s;
2458 nuap.iovp = user_hdtr.trailers;
2459 nuap.iovcnt = user_hdtr.trl_cnt;
2460 error = writev_nocancel(p, &nuap, &writev_retval);
2461 if (error)
2462 goto done2;
2463 sbytes += writev_retval;
2464 }
2465 done2:
2466 file_drop(uap->s);
2467 done1:
2468 file_drop(uap->fd);
2469 done:
2470 if (uap->nbytes != USER_ADDR_NULL) {
2471 /* XXX this appears bogus for some early failure conditions */
2472 copyout(&sbytes, uap->nbytes, sizeof (off_t));
2473 }
2474 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
2475 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
2476 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
2477 return (error);
2478 done3:
2479 sbunlock(&so->so_snd, 0); /* will unlock socket */
2480 goto done2;
2481 }
2482
2483
2484 #endif /* SENDFILE */