]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
xnu-1699.24.23.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <kern/lock.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/signalvar.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/kernel.h>
87 #include <sys/uio_internal.h>
88 #include <sys/kauth.h>
89 #include <kern/task.h>
90
91 #include <security/audit/audit.h>
92
93 #include <sys/kdebug.h>
94 #include <sys/sysproto.h>
95 #include <netinet/in.h>
96 #include <net/route.h>
97 #include <netinet/in_pcb.h>
98
99 #if CONFIG_MACF_SOCKET_SUBSET
100 #include <security/mac_framework.h>
101 #endif /* MAC_SOCKET_SUBSET */
102
103 #define f_flag f_fglob->fg_flag
104 #define f_type f_fglob->fg_type
105 #define f_msgcount f_fglob->fg_msgcount
106 #define f_cred f_fglob->fg_cred
107 #define f_ops f_fglob->fg_ops
108 #define f_offset f_fglob->fg_offset
109 #define f_data f_fglob->fg_data
110
111
112 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
113 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
114 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
115 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
116 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
117 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
118 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
119 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
120 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
121 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
122 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
123 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
124 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
125 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
126
127
128 #define HACK_FOR_4056224 1
129 #if HACK_FOR_4056224
130 static pid_t last_pid_4056224 = 0;
131 #endif /* HACK_FOR_4056224 */
132
133 /* TODO: should be in header file */
134 int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
135
136 static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int,
137 int32_t *);
138 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
139 int32_t *);
140 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
141 size_t, boolean_t);
142 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
143 user_addr_t, size_t, boolean_t);
144 #if SENDFILE
145 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
146 boolean_t);
147 #endif /* SENDFILE */
148
149 /*
150 * System call interface to the socket abstraction.
151 */
152
153 extern struct fileops socketops;
154
155 /*
156 * Returns: 0 Success
157 * EACCES Mandatory Access Control failure
158 * falloc:ENFILE
159 * falloc:EMFILE
160 * falloc:ENOMEM
161 * socreate:EAFNOSUPPORT
162 * socreate:EPROTOTYPE
163 * socreate:EPROTONOSUPPORT
164 * socreate:ENOBUFS
165 * socreate:ENOMEM
166 * socreate:EISCONN
167 * socreate:??? [other protocol families, IPSEC]
168 */
169 int
170 socket(struct proc *p, struct socket_args *uap, int32_t *retval)
171 {
172 struct socket *so;
173 struct fileproc *fp;
174 int fd, error;
175
176 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
177 #if CONFIG_MACF_SOCKET_SUBSET
178 if ((error = mac_socket_check_create(kauth_cred_get(), uap->domain,
179 uap->type, uap->protocol)) != 0)
180 return (error);
181 #endif /* MAC_SOCKET_SUBSET */
182
183 error = falloc(p, &fp, &fd, vfs_context_current());
184 if (error) {
185 return (error);
186 }
187 fp->f_flag = FREAD|FWRITE;
188 fp->f_type = DTYPE_SOCKET;
189 fp->f_ops = &socketops;
190
191 error = socreate(uap->domain, &so, uap->type, uap->protocol);
192 if (error) {
193 fp_free(p, fd, fp);
194 } else {
195 thread_t thread;
196 struct uthread *ut;
197
198 thread = current_thread();
199 ut = get_bsdthread_info(thread);
200
201 /* if this is a backgrounded thread then throttle all new sockets */
202 #if !CONFIG_EMBEDDED
203 if (proc_get_selfthread_isbackground() != 0)
204 #else /* !CONFIG_EMBEDDED */
205 if ( (ut->uu_flag & UT_BACKGROUND) != 0 )
206 #endif /* !CONFIG_EMBEDDED */
207 {
208 so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
209 so->so_background_thread = thread;
210 }
211 fp->f_data = (caddr_t)so;
212
213 proc_fdlock(p);
214 procfdtbl_releasefd(p, fd, NULL);
215
216 fp_drop(p, fd, fp, 1);
217 proc_fdunlock(p);
218
219 *retval = fd;
220 }
221 return (error);
222 }
223
224 /*
225 * Returns: 0 Success
226 * EDESTADDRREQ Destination address required
227 * EBADF Bad file descriptor
228 * EACCES Mandatory Access Control failure
229 * file_socket:ENOTSOCK
230 * file_socket:EBADF
231 * getsockaddr:ENAMETOOLONG Filename too long
232 * getsockaddr:EINVAL Invalid argument
233 * getsockaddr:ENOMEM Not enough space
234 * getsockaddr:EFAULT Bad address
235 * sobind:???
236 */
237 /* ARGSUSED */
238 int
239 bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
240 {
241 struct sockaddr_storage ss;
242 struct sockaddr *sa = NULL;
243 struct socket *so;
244 boolean_t want_free = TRUE;
245 int error;
246
247 AUDIT_ARG(fd, uap->s);
248 error = file_socket(uap->s, &so);
249 if (error != 0)
250 return (error);
251 if (so == NULL) {
252 error = EBADF;
253 goto out;
254 }
255 if (uap->name == USER_ADDR_NULL) {
256 error = EDESTADDRREQ;
257 goto out;
258 }
259 if (uap->namelen > sizeof (ss)) {
260 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
261 } else {
262 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
263 if (error == 0) {
264 sa = (struct sockaddr *)&ss;
265 want_free = FALSE;
266 }
267 }
268 if (error != 0)
269 goto out;
270 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
271 #if CONFIG_MACF_SOCKET_SUBSET
272 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
273 error = sobind(so, sa);
274 #else
275 error = sobind(so, sa);
276 #endif /* MAC_SOCKET_SUBSET */
277 if (want_free)
278 FREE(sa, M_SONAME);
279 out:
280 file_drop(uap->s);
281 return (error);
282 }
283
284 /*
285 * Returns: 0 Success
286 * EBADF
287 * EACCES Mandatory Access Control failure
288 * file_socket:ENOTSOCK
289 * file_socket:EBADF
290 * solisten:EINVAL
291 * solisten:EOPNOTSUPP
292 * solisten:???
293 */
294 int
295 listen(__unused struct proc *p, struct listen_args *uap,
296 __unused int32_t *retval)
297 {
298 int error;
299 struct socket *so;
300
301 AUDIT_ARG(fd, uap->s);
302 error = file_socket(uap->s, &so);
303 if (error)
304 return (error);
305 if (so != NULL)
306 #if CONFIG_MACF_SOCKET_SUBSET
307 {
308 error = mac_socket_check_listen(kauth_cred_get(), so);
309 if (error == 0)
310 error = solisten(so, uap->backlog);
311 }
312 #else
313 error = solisten(so, uap->backlog);
314 #endif /* MAC_SOCKET_SUBSET */
315 else
316 error = EBADF;
317
318 file_drop(uap->s);
319 return (error);
320 }
321
322 /*
323 * Returns: fp_getfsock:EBADF Bad file descriptor
324 * fp_getfsock:EOPNOTSUPP ...
325 * xlate => :ENOTSOCK Socket operation on non-socket
326 * :EFAULT Bad address on copyin/copyout
327 * :EBADF Bad file descriptor
328 * :EOPNOTSUPP Operation not supported on socket
329 * :EINVAL Invalid argument
330 * :EWOULDBLOCK Operation would block
331 * :ECONNABORTED Connection aborted
332 * :EINTR Interrupted function
333 * :EACCES Mandatory Access Control failure
334 * falloc_locked:ENFILE Too many files open in system
335 * falloc_locked::EMFILE Too many open files
336 * falloc_locked::ENOMEM Not enough space
337 * 0 Success
338 */
339 int
340 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
341 int32_t *retval)
342 {
343 struct fileproc *fp;
344 struct sockaddr *sa = NULL;
345 socklen_t namelen;
346 int error;
347 struct socket *head, *so = NULL;
348 lck_mtx_t *mutex_held;
349 int fd = uap->s;
350 int newfd;
351 short fflag; /* type must match fp->f_flag */
352 int dosocklock = 0;
353
354 *retval = -1;
355
356 AUDIT_ARG(fd, uap->s);
357
358 if (uap->name) {
359 error = copyin(uap->anamelen, (caddr_t)&namelen,
360 sizeof (socklen_t));
361 if (error)
362 return (error);
363 }
364 error = fp_getfsock(p, fd, &fp, &head);
365 if (error) {
366 if (error == EOPNOTSUPP)
367 error = ENOTSOCK;
368 return (error);
369 }
370 if (head == NULL) {
371 error = EBADF;
372 goto out;
373 }
374 #if CONFIG_MACF_SOCKET_SUBSET
375 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
376 goto out;
377 #endif /* MAC_SOCKET_SUBSET */
378
379 socket_lock(head, 1);
380
381 if (head->so_proto->pr_getlock != NULL) {
382 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
383 dosocklock = 1;
384 } else {
385 mutex_held = head->so_proto->pr_domain->dom_mtx;
386 dosocklock = 0;
387 }
388
389 if ((head->so_options & SO_ACCEPTCONN) == 0) {
390 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
391 error = EOPNOTSUPP;
392 } else {
393 /* POSIX: The socket is not accepting connections */
394 error = EINVAL;
395 }
396 socket_unlock(head, 1);
397 goto out;
398 }
399 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
400 socket_unlock(head, 1);
401 error = EWOULDBLOCK;
402 goto out;
403 }
404 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
405 if (head->so_state & SS_CANTRCVMORE) {
406 head->so_error = ECONNABORTED;
407 break;
408 }
409 if (head->so_usecount < 1)
410 panic("accept: head=%p refcount=%d\n", head,
411 head->so_usecount);
412 error = msleep((caddr_t)&head->so_timeo, mutex_held,
413 PSOCK | PCATCH, "accept", 0);
414 if (head->so_usecount < 1)
415 panic("accept: 2 head=%p refcount=%d\n", head,
416 head->so_usecount);
417 if ((head->so_state & SS_DRAINING)) {
418 error = ECONNABORTED;
419 }
420 if (error) {
421 socket_unlock(head, 1);
422 goto out;
423 }
424 }
425 if (head->so_error) {
426 error = head->so_error;
427 head->so_error = 0;
428 socket_unlock(head, 1);
429 goto out;
430 }
431
432
433 /*
434 * At this point we know that there is at least one connection
435 * ready to be accepted. Remove it from the queue prior to
436 * allocating the file descriptor for it since falloc() may
437 * block allowing another process to accept the connection
438 * instead.
439 */
440 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
441 so = TAILQ_FIRST(&head->so_comp);
442 TAILQ_REMOVE(&head->so_comp, so, so_list);
443 head->so_qlen--;
444 /* unlock head to avoid deadlock with select, keep a ref on head */
445 socket_unlock(head, 0);
446
447 #if CONFIG_MACF_SOCKET_SUBSET
448 /*
449 * Pass the pre-accepted socket to the MAC framework. This is
450 * cheaper than allocating a file descriptor for the socket,
451 * calling the protocol accept callback, and possibly freeing
452 * the file descriptor should the MAC check fails.
453 */
454 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
455 so->so_state &= ~(SS_NOFDREF | SS_COMP);
456 so->so_head = NULL;
457 soclose(so);
458 /* Drop reference on listening socket */
459 sodereference(head);
460 goto out;
461 }
462 #endif /* MAC_SOCKET_SUBSET */
463
464 /*
465 * Pass the pre-accepted socket to any interested socket filter(s).
466 * Upon failure, the socket would have been closed by the callee.
467 */
468 if (so->so_filt != NULL && (error = soacceptfilter(so)) != 0) {
469 /* Drop reference on listening socket */
470 sodereference(head);
471 /* Propagate socket filter's error code to the caller */
472 goto out;
473 }
474
475 fflag = fp->f_flag;
476 error = falloc(p, &fp, &newfd, vfs_context_current());
477 if (error) {
478 /*
479 * Probably ran out of file descriptors. Put the
480 * unaccepted connection back onto the queue and
481 * do another wakeup so some other process might
482 * have a chance at it.
483 */
484 socket_lock(head, 0);
485 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
486 head->so_qlen++;
487 wakeup_one((caddr_t)&head->so_timeo);
488 socket_unlock(head, 1);
489 goto out;
490 }
491 *retval = newfd;
492 fp->f_type = DTYPE_SOCKET;
493 fp->f_flag = fflag;
494 fp->f_ops = &socketops;
495 fp->f_data = (caddr_t)so;
496 socket_lock(head, 0);
497 if (dosocklock)
498 socket_lock(so, 1);
499 so->so_state &= ~SS_COMP;
500 so->so_head = NULL;
501 (void) soacceptlock(so, &sa, 0);
502 socket_unlock(head, 1);
503 if (sa == NULL) {
504 namelen = 0;
505 if (uap->name)
506 goto gotnoname;
507 error = 0;
508 goto releasefd;
509 }
510 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
511
512 if (uap->name) {
513 socklen_t sa_len;
514
515 /* save sa_len before it is destroyed */
516 sa_len = sa->sa_len;
517 namelen = MIN(namelen, sa_len);
518 error = copyout(sa, uap->name, namelen);
519 if (!error)
520 /* return the actual, untruncated address length */
521 namelen = sa_len;
522 gotnoname:
523 error = copyout((caddr_t)&namelen, uap->anamelen,
524 sizeof (socklen_t));
525 }
526 FREE(sa, M_SONAME);
527
528 releasefd:
529 /*
530 * If the socket has been marked as inactive by sosetdefunct(),
531 * disallow further operations on it.
532 */
533 if (so->so_flags & SOF_DEFUNCT) {
534 sodefunct(current_proc(), so,
535 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
536 }
537
538 if (dosocklock)
539 socket_unlock(so, 1);
540
541 proc_fdlock(p);
542 procfdtbl_releasefd(p, newfd, NULL);
543 fp_drop(p, newfd, fp, 1);
544 proc_fdunlock(p);
545
546 out:
547 file_drop(fd);
548 return (error);
549 }
550
551 int
552 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
553 {
554 __pthread_testcancel(1);
555 return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval));
556 }
557
558 /*
559 * Returns: 0 Success
560 * EBADF Bad file descriptor
561 * EALREADY Connection already in progress
562 * EINPROGRESS Operation in progress
563 * ECONNABORTED Connection aborted
564 * EINTR Interrupted function
565 * EACCES Mandatory Access Control failure
566 * file_socket:ENOTSOCK
567 * file_socket:EBADF
568 * getsockaddr:ENAMETOOLONG Filename too long
569 * getsockaddr:EINVAL Invalid argument
570 * getsockaddr:ENOMEM Not enough space
571 * getsockaddr:EFAULT Bad address
572 * soconnectlock:EOPNOTSUPP
573 * soconnectlock:EISCONN
574 * soconnectlock:??? [depends on protocol, filters]
575 * msleep:EINTR
576 *
577 * Imputed: so_error error may be set from so_error, which
578 * may have been set by soconnectlock.
579 */
580 /* ARGSUSED */
581 int
582 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
583 {
584 __pthread_testcancel(1);
585 return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval));
586 }
587
588 int
589 connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused int32_t *retval)
590 {
591 struct socket *so;
592 struct sockaddr_storage ss;
593 struct sockaddr *sa = NULL;
594 lck_mtx_t *mutex_held;
595 boolean_t want_free = TRUE;
596 int error;
597 int fd = uap->s;
598 boolean_t dgram;
599
600 AUDIT_ARG(fd, uap->s);
601 error = file_socket(fd, &so);
602 if (error != 0)
603 return (error);
604 if (so == NULL) {
605 error = EBADF;
606 goto out;
607 }
608
609 /*
610 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
611 * if this is a datagram socket; translate for other types.
612 */
613 dgram = (so->so_type == SOCK_DGRAM);
614
615 /* Get socket address now before we obtain socket lock */
616 if (uap->namelen > sizeof (ss)) {
617 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
618 } else {
619 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
620 if (error == 0) {
621 sa = (struct sockaddr *)&ss;
622 want_free = FALSE;
623 }
624 }
625 if (error != 0)
626 goto out;
627
628 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
629 #if CONFIG_MACF_SOCKET_SUBSET
630 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
631 if (want_free)
632 FREE(sa, M_SONAME);
633 goto out;
634 }
635 #endif /* MAC_SOCKET_SUBSET */
636 socket_lock(so, 1);
637
638 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
639 if (want_free)
640 FREE(sa, M_SONAME);
641 socket_unlock(so, 1);
642 error = EALREADY;
643 goto out;
644 }
645 error = soconnectlock(so, sa, 0);
646 if (error)
647 goto bad;
648 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
649 if (want_free)
650 FREE(sa, M_SONAME);
651 socket_unlock(so, 1);
652 error = EINPROGRESS;
653 goto out;
654 }
655 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
656 if (so->so_proto->pr_getlock != NULL)
657 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
658 else
659 mutex_held = so->so_proto->pr_domain->dom_mtx;
660 error = msleep((caddr_t)&so->so_timeo, mutex_held,
661 PSOCK | PCATCH, "connect", 0);
662 if ((so->so_state & SS_DRAINING)) {
663 error = ECONNABORTED;
664 }
665 if (error)
666 break;
667 }
668 if (error == 0) {
669 error = so->so_error;
670 so->so_error = 0;
671 }
672 bad:
673 so->so_state &= ~SS_ISCONNECTING;
674 socket_unlock(so, 1);
675 if (want_free)
676 FREE(sa, M_SONAME);
677 if (error == ERESTART)
678 error = EINTR;
679 out:
680 file_drop(fd);
681 return (error);
682 }
683
684 /*
685 * Returns: 0 Success
686 * socreate:EAFNOSUPPORT
687 * socreate:EPROTOTYPE
688 * socreate:EPROTONOSUPPORT
689 * socreate:ENOBUFS
690 * socreate:ENOMEM
691 * socreate:EISCONN
692 * socreate:??? [other protocol families, IPSEC]
693 * falloc:ENFILE
694 * falloc:EMFILE
695 * falloc:ENOMEM
696 * copyout:EFAULT
697 * soconnect2:EINVAL
698 * soconnect2:EPROTOTYPE
699 * soconnect2:??? [other protocol families[
700 */
701 int
702 socketpair(struct proc *p, struct socketpair_args *uap,
703 __unused int32_t *retval)
704 {
705 struct fileproc *fp1, *fp2;
706 struct socket *so1, *so2;
707 int fd, error, sv[2];
708
709 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
710 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
711 if (error)
712 return (error);
713 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
714 if (error)
715 goto free1;
716
717 error = falloc(p, &fp1, &fd, vfs_context_current());
718 if (error) {
719 goto free2;
720 }
721 fp1->f_flag = FREAD|FWRITE;
722 fp1->f_type = DTYPE_SOCKET;
723 fp1->f_ops = &socketops;
724 fp1->f_data = (caddr_t)so1;
725 sv[0] = fd;
726
727 error = falloc(p, &fp2, &fd, vfs_context_current());
728 if (error) {
729 goto free3;
730 }
731 fp2->f_flag = FREAD|FWRITE;
732 fp2->f_type = DTYPE_SOCKET;
733 fp2->f_ops = &socketops;
734 fp2->f_data = (caddr_t)so2;
735 sv[1] = fd;
736
737 error = soconnect2(so1, so2);
738 if (error) {
739 goto free4;
740 }
741 if (uap->type == SOCK_DGRAM) {
742 /*
743 * Datagram socket connection is asymmetric.
744 */
745 error = soconnect2(so2, so1);
746 if (error) {
747 goto free4;
748 }
749 }
750
751 if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
752 goto free4;
753
754 proc_fdlock(p);
755 procfdtbl_releasefd(p, sv[0], NULL);
756 procfdtbl_releasefd(p, sv[1], NULL);
757 fp_drop(p, sv[0], fp1, 1);
758 fp_drop(p, sv[1], fp2, 1);
759 proc_fdunlock(p);
760
761 return (0);
762 free4:
763 fp_free(p, sv[1], fp2);
764 free3:
765 fp_free(p, sv[0], fp1);
766 free2:
767 (void) soclose(so2);
768 free1:
769 (void) soclose(so1);
770 return (error);
771 }
772
773 /*
774 * Returns: 0 Success
775 * EINVAL
776 * ENOBUFS
777 * EBADF
778 * EPIPE
779 * EACCES Mandatory Access Control failure
780 * file_socket:ENOTSOCK
781 * file_socket:EBADF
782 * getsockaddr:ENAMETOOLONG Filename too long
783 * getsockaddr:EINVAL Invalid argument
784 * getsockaddr:ENOMEM Not enough space
785 * getsockaddr:EFAULT Bad address
786 * <pru_sosend>:EACCES[TCP]
787 * <pru_sosend>:EADDRINUSE[TCP]
788 * <pru_sosend>:EADDRNOTAVAIL[TCP]
789 * <pru_sosend>:EAFNOSUPPORT[TCP]
790 * <pru_sosend>:EAGAIN[TCP]
791 * <pru_sosend>:EBADF
792 * <pru_sosend>:ECONNRESET[TCP]
793 * <pru_sosend>:EFAULT
794 * <pru_sosend>:EHOSTUNREACH[TCP]
795 * <pru_sosend>:EINTR
796 * <pru_sosend>:EINVAL
797 * <pru_sosend>:EISCONN[AF_INET]
798 * <pru_sosend>:EMSGSIZE[TCP]
799 * <pru_sosend>:ENETDOWN[TCP]
800 * <pru_sosend>:ENETUNREACH[TCP]
801 * <pru_sosend>:ENOBUFS
802 * <pru_sosend>:ENOMEM[TCP]
803 * <pru_sosend>:ENOTCONN[AF_INET]
804 * <pru_sosend>:EOPNOTSUPP
805 * <pru_sosend>:EPERM[TCP]
806 * <pru_sosend>:EPIPE
807 * <pru_sosend>:EWOULDBLOCK
808 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
809 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
810 * <pru_sosend>:??? [value from so_error]
811 * sockargs:???
812 */
813 static int
814 sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
815 int flags, int32_t *retval)
816 {
817 struct mbuf *control = NULL;
818 struct sockaddr_storage ss;
819 struct sockaddr *to = NULL;
820 boolean_t want_free = TRUE;
821 int error;
822 struct socket *so;
823 user_ssize_t len;
824
825 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
826
827 error = file_socket(s, &so);
828 if (error) {
829 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
830 return (error);
831 }
832 if (so == NULL) {
833 error = EBADF;
834 goto out;
835 }
836 if (mp->msg_name != USER_ADDR_NULL) {
837 if (mp->msg_namelen > sizeof (ss)) {
838 error = getsockaddr(so, &to, mp->msg_name,
839 mp->msg_namelen, TRUE);
840 } else {
841 error = getsockaddr_s(so, &ss, mp->msg_name,
842 mp->msg_namelen, TRUE);
843 if (error == 0) {
844 to = (struct sockaddr *)&ss;
845 want_free = FALSE;
846 }
847 }
848 if (error != 0)
849 goto out;
850 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
851 }
852 if (mp->msg_control != USER_ADDR_NULL) {
853 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
854 error = EINVAL;
855 goto bad;
856 }
857 error = sockargs(&control, mp->msg_control,
858 mp->msg_controllen, MT_CONTROL);
859 if (error != 0)
860 goto bad;
861 }
862
863 #if CONFIG_MACF_SOCKET_SUBSET
864 /*
865 * We check the state without holding the socket lock;
866 * if a race condition occurs, it would simply result
867 * in an extra call to the MAC check function.
868 */
869 if (!(so->so_state & SS_ISCONNECTED) &&
870 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
871 goto bad;
872 #endif /* MAC_SOCKET_SUBSET */
873
874 len = uio_resid(uiop);
875 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control,
876 flags);
877 if (error != 0) {
878 if (uio_resid(uiop) != len && (error == ERESTART ||
879 error == EINTR || error == EWOULDBLOCK))
880 error = 0;
881 /* Generation of SIGPIPE can be controlled per socket */
882 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
883 psignal(p, SIGPIPE);
884 }
885 if (error == 0)
886 *retval = (int)(len - uio_resid(uiop));
887 bad:
888 if (to != NULL && want_free)
889 FREE(to, M_SONAME);
890 out:
891 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
892 file_drop(s);
893 return (error);
894 }
895
896 /*
897 * Returns: 0 Success
898 * ENOMEM
899 * sendit:??? [see sendit definition in this file]
900 * write:??? [4056224: applicable for pipes]
901 */
902 int
903 sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
904 {
905 __pthread_testcancel(1);
906 return(sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
907 }
908
909 int
910 sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, int32_t *retval)
911 {
912 struct user_msghdr msg;
913 int error;
914 uio_t auio = NULL;
915
916 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
917 AUDIT_ARG(fd, uap->s);
918
919 auio = uio_create(1, 0,
920 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
921 UIO_WRITE);
922 if (auio == NULL) {
923 return (ENOMEM);
924 }
925 uio_addiov(auio, uap->buf, uap->len);
926
927 msg.msg_name = uap->to;
928 msg.msg_namelen = uap->tolen;
929 /* no need to set up msg_iov. sendit uses uio_t we send it */
930 msg.msg_iov = 0;
931 msg.msg_iovlen = 0;
932 msg.msg_control = 0;
933 msg.msg_flags = 0;
934
935 error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
936
937 if (auio != NULL) {
938 uio_free(auio);
939 }
940
941 #if HACK_FOR_4056224
942 /*
943 * Radar 4056224
944 * Temporary workaround to let send() and recv() work over
945 * a pipe for binary compatibility
946 * This will be removed in the release following Tiger
947 */
948 if (error == ENOTSOCK) {
949 struct fileproc *fp;
950
951 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
952 (void) fp_drop(p, uap->s, fp, 0);
953
954 if (fp->f_type == DTYPE_PIPE) {
955 struct write_args write_uap;
956 user_ssize_t write_retval;
957
958 if (p->p_pid > last_pid_4056224) {
959 last_pid_4056224 = p->p_pid;
960
961 printf("%s[%d] uses send/recv "
962 "on a pipe\n", p->p_comm, p->p_pid);
963 }
964
965 bzero(&write_uap, sizeof (struct write_args));
966 write_uap.fd = uap->s;
967 write_uap.cbuf = uap->buf;
968 write_uap.nbyte = uap->len;
969
970 error = write(p, &write_uap, &write_retval);
971 *retval = (int)write_retval;
972 }
973 }
974 }
975 #endif /* HACK_FOR_4056224 */
976
977 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
978
979 return (error);
980 }
981
982 /*
983 * Returns: 0 Success
984 * ENOBUFS
985 * copyin:EFAULT
986 * sendit:??? [see sendit definition in this file]
987 */
988 int
989 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
990 {
991 __pthread_testcancel(1);
992 return(sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval));
993 }
994
995 int
996 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval)
997 {
998 struct user32_msghdr msg32;
999 struct user64_msghdr msg64;
1000 struct user_msghdr user_msg;
1001 caddr_t msghdrp;
1002 int size_of_msghdr;
1003 int error;
1004 uio_t auio = NULL;
1005 struct user_iovec *iovp;
1006
1007 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1008 AUDIT_ARG(fd, uap->s);
1009 if (IS_64BIT_PROCESS(p)) {
1010 msghdrp = (caddr_t)&msg64;
1011 size_of_msghdr = sizeof (msg64);
1012 } else {
1013 msghdrp = (caddr_t)&msg32;
1014 size_of_msghdr = sizeof (msg32);
1015 }
1016 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1017 if (error) {
1018 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1019 return (error);
1020 }
1021
1022 if (IS_64BIT_PROCESS(p)) {
1023 user_msg.msg_flags = msg64.msg_flags;
1024 user_msg.msg_controllen = msg64.msg_controllen;
1025 user_msg.msg_control = msg64.msg_control;
1026 user_msg.msg_iovlen = msg64.msg_iovlen;
1027 user_msg.msg_iov = msg64.msg_iov;
1028 user_msg.msg_namelen = msg64.msg_namelen;
1029 user_msg.msg_name = msg64.msg_name;
1030 } else {
1031 user_msg.msg_flags = msg32.msg_flags;
1032 user_msg.msg_controllen = msg32.msg_controllen;
1033 user_msg.msg_control = msg32.msg_control;
1034 user_msg.msg_iovlen = msg32.msg_iovlen;
1035 user_msg.msg_iov = msg32.msg_iov;
1036 user_msg.msg_namelen = msg32.msg_namelen;
1037 user_msg.msg_name = msg32.msg_name;
1038 }
1039
1040 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1041 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1042 0, 0, 0, 0);
1043 return (EMSGSIZE);
1044 }
1045
1046 /* allocate a uio large enough to hold the number of iovecs passed */
1047 auio = uio_create(user_msg.msg_iovlen, 0,
1048 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1049 UIO_WRITE);
1050 if (auio == NULL) {
1051 error = ENOBUFS;
1052 goto done;
1053 }
1054
1055 if (user_msg.msg_iovlen) {
1056 /*
1057 * get location of iovecs within the uio.
1058 * then copyin the iovecs from user space.
1059 */
1060 iovp = uio_iovsaddr(auio);
1061 if (iovp == NULL) {
1062 error = ENOBUFS;
1063 goto done;
1064 }
1065 error = copyin_user_iovec_array(user_msg.msg_iov,
1066 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1067 user_msg.msg_iovlen, iovp);
1068 if (error)
1069 goto done;
1070 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1071
1072 /* finish setup of uio_t */
1073 uio_calculateresid(auio);
1074 } else {
1075 user_msg.msg_iov = 0;
1076 }
1077
1078 /* msg_flags is ignored for send */
1079 user_msg.msg_flags = 0;
1080
1081 error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
1082 done:
1083 if (auio != NULL) {
1084 uio_free(auio);
1085 }
1086 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1087
1088 return (error);
1089 }
1090
1091 /*
1092 * Returns: 0 Success
1093 * ENOTSOCK
1094 * EINVAL
1095 * EBADF
1096 * EACCES Mandatory Access Control failure
1097 * copyout:EFAULT
1098 * fp_lookup:EBADF
1099 * <pru_soreceive>:ENOBUFS
1100 * <pru_soreceive>:ENOTCONN
1101 * <pru_soreceive>:EWOULDBLOCK
1102 * <pru_soreceive>:EFAULT
1103 * <pru_soreceive>:EINTR
1104 * <pru_soreceive>:EBADF
1105 * <pru_soreceive>:EINVAL
1106 * <pru_soreceive>:EMSGSIZE
1107 * <pru_soreceive>:???
1108 *
1109 * Notes: Additional return values from calls through <pru_soreceive>
1110 * depend on protocols other than TCP or AF_UNIX, which are
1111 * documented above.
1112 */
1113 static int
1114 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1115 user_addr_t namelenp, int32_t *retval)
1116 {
1117 int len, error;
1118 struct mbuf *m, *control = 0;
1119 user_addr_t ctlbuf;
1120 struct socket *so;
1121 struct sockaddr *fromsa = 0;
1122 struct fileproc *fp;
1123
1124 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1125 proc_fdlock(p);
1126 if ((error = fp_lookup(p, s, &fp, 1))) {
1127 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1128 proc_fdunlock(p);
1129 return (error);
1130 }
1131 if (fp->f_type != DTYPE_SOCKET) {
1132 fp_drop(p, s, fp, 1);
1133 proc_fdunlock(p);
1134 return (ENOTSOCK);
1135 }
1136
1137 so = (struct socket *)fp->f_data;
1138 if (so == NULL) {
1139 fp_drop(p, s, fp, 1);
1140 proc_fdunlock(p);
1141 return (EBADF);
1142 }
1143
1144 proc_fdunlock(p);
1145
1146 #if CONFIG_MACF_SOCKET_SUBSET
1147 /*
1148 * We check the state without holding the socket lock;
1149 * if a race condition occurs, it would simply result
1150 * in an extra call to the MAC check function.
1151 */
1152 if (!(so->so_state & SS_ISCONNECTED) &&
1153 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1154 goto out1;
1155 #endif /* MAC_SOCKET_SUBSET */
1156 if (uio_resid(uiop) < 0) {
1157 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1158 error = EINVAL;
1159 goto out1;
1160 }
1161
1162 len = uio_resid(uiop);
1163 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1164 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1165 &mp->msg_flags);
1166 if (fromsa)
1167 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1168 fromsa);
1169 if (error) {
1170 if (uio_resid(uiop) != len && (error == ERESTART ||
1171 error == EINTR || error == EWOULDBLOCK))
1172 error = 0;
1173 }
1174
1175 if (error)
1176 goto out;
1177
1178 *retval = len - uio_resid(uiop);
1179 if (mp->msg_name) {
1180 socklen_t sa_len = 0;
1181
1182 len = mp->msg_namelen;
1183 if (len <= 0 || fromsa == 0) {
1184 len = 0;
1185 } else {
1186 #ifndef MIN
1187 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1188 #endif
1189 sa_len = fromsa->sa_len;
1190 len = MIN((unsigned int)len, sa_len);
1191 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1192 if (error)
1193 goto out;
1194 }
1195 mp->msg_namelen = sa_len;
1196 /* return the actual, untruncated address length */
1197 if (namelenp &&
1198 (error = copyout((caddr_t)&sa_len, namelenp,
1199 sizeof (int)))) {
1200 goto out;
1201 }
1202 }
1203 if (mp->msg_control) {
1204 len = mp->msg_controllen;
1205 m = control;
1206 mp->msg_controllen = 0;
1207 ctlbuf = mp->msg_control;
1208
1209 while (m && len > 0) {
1210 unsigned int tocopy;
1211 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1212 int cp_size = CMSG_ALIGN(cp->cmsg_len);
1213 int buflen = m->m_len;
1214
1215 while (buflen > 0 && len > 0) {
1216
1217 /*
1218 SCM_TIMESTAMP hack because struct timeval has a
1219 * different size for 32 bits and 64 bits processes
1220 */
1221 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1222 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
1223 struct cmsghdr *tmp_cp = (struct cmsghdr *)tmp_buffer;
1224 int tmp_space;
1225 struct timeval *tv = (struct timeval *)CMSG_DATA(cp);
1226
1227 tmp_cp->cmsg_level = SOL_SOCKET;
1228 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1229
1230 if (proc_is64bit(p)) {
1231 struct user64_timeval *tv64 = (struct user64_timeval *)CMSG_DATA(tmp_cp);
1232
1233 tv64->tv_sec = tv->tv_sec;
1234 tv64->tv_usec = tv->tv_usec;
1235
1236 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1237 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1238 } else {
1239 struct user32_timeval *tv32 = (struct user32_timeval *)CMSG_DATA(tmp_cp);
1240
1241 tv32->tv_sec = tv->tv_sec;
1242 tv32->tv_usec = tv->tv_usec;
1243
1244 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1245 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1246 }
1247 if (len >= tmp_space) {
1248 tocopy = tmp_space;
1249 } else {
1250 mp->msg_flags |= MSG_CTRUNC;
1251 tocopy = len;
1252 }
1253 error = copyout(tmp_buffer, ctlbuf, tocopy);
1254 if (error)
1255 goto out;
1256
1257 } else {
1258
1259 if (cp_size > buflen) {
1260 panic("cp_size > buflen, something wrong with alignment!");
1261 }
1262
1263 if (len >= cp_size) {
1264 tocopy = cp_size;
1265 } else {
1266 mp->msg_flags |= MSG_CTRUNC;
1267 tocopy = len;
1268 }
1269
1270 error = copyout((caddr_t) cp, ctlbuf,
1271 tocopy);
1272 if (error)
1273 goto out;
1274 }
1275
1276
1277 ctlbuf += tocopy;
1278 len -= tocopy;
1279
1280 buflen -= cp_size;
1281 cp = (struct cmsghdr *) ((unsigned char *) cp + cp_size);
1282 cp_size = CMSG_ALIGN(cp->cmsg_len);
1283 }
1284
1285 m = m->m_next;
1286 }
1287 mp->msg_controllen = ctlbuf - mp->msg_control;
1288 }
1289 out:
1290 if (fromsa)
1291 FREE(fromsa, M_SONAME);
1292 if (control)
1293 m_freem(control);
1294 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1295 out1:
1296 fp_drop(p, s, fp, 0);
1297 return (error);
1298 }
1299
1300 /*
1301 * Returns: 0 Success
1302 * ENOMEM
1303 * copyin:EFAULT
1304 * recvit:???
1305 * read:??? [4056224: applicable for pipes]
1306 *
1307 * Notes: The read entry point is only called as part of support for
1308 * binary backward compatability; new code should use read
1309 * instead of recv or recvfrom when attempting to read data
1310 * from pipes.
1311 *
1312 * For full documentation of the return codes from recvit, see
1313 * the block header for the recvit function.
1314 */
1315 int
1316 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
1317 {
1318 __pthread_testcancel(1);
1319 return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval));
1320 }
1321
1322 int
1323 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval)
1324 {
1325 struct user_msghdr msg;
1326 int error;
1327 uio_t auio = NULL;
1328
1329 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
1330 AUDIT_ARG(fd, uap->s);
1331
1332 if (uap->fromlenaddr) {
1333 error = copyin(uap->fromlenaddr,
1334 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1335 if (error)
1336 return (error);
1337 } else {
1338 msg.msg_namelen = 0;
1339 }
1340 msg.msg_name = uap->from;
1341 auio = uio_create(1, 0,
1342 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1343 UIO_READ);
1344 if (auio == NULL) {
1345 return (ENOMEM);
1346 }
1347
1348 uio_addiov(auio, uap->buf, uap->len);
1349 /* no need to set up msg_iov. recvit uses uio_t we send it */
1350 msg.msg_iov = 0;
1351 msg.msg_iovlen = 0;
1352 msg.msg_control = 0;
1353 msg.msg_controllen = 0;
1354 msg.msg_flags = uap->flags;
1355 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1356 if (auio != NULL) {
1357 uio_free(auio);
1358 }
1359
1360 #if HACK_FOR_4056224
1361 /*
1362 * Radar 4056224
1363 * Temporary workaround to let send() and recv() work over
1364 * a pipe for binary compatibility
1365 * This will be removed in the release following Tiger
1366 */
1367 if (error == ENOTSOCK && proc_is64bit(p) == 0) {
1368 struct fileproc *fp;
1369
1370 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
1371 (void) fp_drop(p, uap->s, fp, 0);
1372
1373 if (fp->f_type == DTYPE_PIPE) {
1374 struct read_args read_uap;
1375 user_ssize_t read_retval;
1376
1377 if (p->p_pid > last_pid_4056224) {
1378 last_pid_4056224 = p->p_pid;
1379
1380 printf("%s[%d] uses send/recv on "
1381 "a pipe\n", p->p_comm, p->p_pid);
1382 }
1383
1384 bzero(&read_uap, sizeof (struct read_args));
1385 read_uap.fd = uap->s;
1386 read_uap.cbuf = uap->buf;
1387 read_uap.nbyte = uap->len;
1388
1389 error = read(p, &read_uap, &read_retval);
1390 *retval = (int)read_retval;
1391 }
1392 }
1393 }
1394 #endif /* HACK_FOR_4056224 */
1395
1396 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1397
1398 return (error);
1399 }
1400
1401 /*
1402 * Returns: 0 Success
1403 * EMSGSIZE
1404 * ENOMEM
1405 * copyin:EFAULT
1406 * copyout:EFAULT
1407 * recvit:???
1408 *
1409 * Notes: For full documentation of the return codes from recvit, see
1410 * the block header for the recvit function.
1411 */
1412 int
1413 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1414 {
1415 __pthread_testcancel(1);
1416 return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval));
1417 }
1418
1419 int
1420 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval)
1421 {
1422 struct user32_msghdr msg32;
1423 struct user64_msghdr msg64;
1424 struct user_msghdr user_msg;
1425 caddr_t msghdrp;
1426 int size_of_msghdr;
1427 user_addr_t uiov;
1428 int error;
1429 uio_t auio = NULL;
1430 struct user_iovec *iovp;
1431
1432 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1433 AUDIT_ARG(fd, uap->s);
1434 if (IS_64BIT_PROCESS(p)) {
1435 msghdrp = (caddr_t)&msg64;
1436 size_of_msghdr = sizeof (msg64);
1437 } else {
1438 msghdrp = (caddr_t)&msg32;
1439 size_of_msghdr = sizeof (msg32);
1440 }
1441 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1442 if (error) {
1443 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1444 return (error);
1445 }
1446
1447 /* only need to copy if user process is not 64-bit */
1448 if (IS_64BIT_PROCESS(p)) {
1449 user_msg.msg_flags = msg64.msg_flags;
1450 user_msg.msg_controllen = msg64.msg_controllen;
1451 user_msg.msg_control = msg64.msg_control;
1452 user_msg.msg_iovlen = msg64.msg_iovlen;
1453 user_msg.msg_iov = msg64.msg_iov;
1454 user_msg.msg_namelen = msg64.msg_namelen;
1455 user_msg.msg_name = msg64.msg_name;
1456 } else {
1457 user_msg.msg_flags = msg32.msg_flags;
1458 user_msg.msg_controllen = msg32.msg_controllen;
1459 user_msg.msg_control = msg32.msg_control;
1460 user_msg.msg_iovlen = msg32.msg_iovlen;
1461 user_msg.msg_iov = msg32.msg_iov;
1462 user_msg.msg_namelen = msg32.msg_namelen;
1463 user_msg.msg_name = msg32.msg_name;
1464 }
1465
1466 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1467 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
1468 0, 0, 0, 0);
1469 return (EMSGSIZE);
1470 }
1471
1472 user_msg.msg_flags = uap->flags;
1473
1474 /* allocate a uio large enough to hold the number of iovecs passed */
1475 auio = uio_create(user_msg.msg_iovlen, 0,
1476 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1477 UIO_READ);
1478 if (auio == NULL) {
1479 error = ENOMEM;
1480 goto done;
1481 }
1482
1483 /*
1484 * get location of iovecs within the uio. then copyin the iovecs from
1485 * user space.
1486 */
1487 iovp = uio_iovsaddr(auio);
1488 if (iovp == NULL) {
1489 error = ENOMEM;
1490 goto done;
1491 }
1492 uiov = user_msg.msg_iov;
1493 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1494 error = copyin_user_iovec_array(uiov,
1495 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1496 user_msg.msg_iovlen, iovp);
1497 if (error)
1498 goto done;
1499
1500 /* finish setup of uio_t */
1501 uio_calculateresid(auio);
1502
1503 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1504 if (!error) {
1505 user_msg.msg_iov = uiov;
1506 if (IS_64BIT_PROCESS(p)) {
1507 msg64.msg_flags = user_msg.msg_flags;
1508 msg64.msg_controllen = user_msg.msg_controllen;
1509 msg64.msg_control = user_msg.msg_control;
1510 msg64.msg_iovlen = user_msg.msg_iovlen;
1511 msg64.msg_iov = user_msg.msg_iov;
1512 msg64.msg_namelen = user_msg.msg_namelen;
1513 msg64.msg_name = user_msg.msg_name;
1514 } else {
1515 msg32.msg_flags = user_msg.msg_flags;
1516 msg32.msg_controllen = user_msg.msg_controllen;
1517 msg32.msg_control = user_msg.msg_control;
1518 msg32.msg_iovlen = user_msg.msg_iovlen;
1519 msg32.msg_iov = user_msg.msg_iov;
1520 msg32.msg_namelen = user_msg.msg_namelen;
1521 msg32.msg_name = user_msg.msg_name;
1522 }
1523 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1524 }
1525 done:
1526 if (auio != NULL) {
1527 uio_free(auio);
1528 }
1529 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1530 return (error);
1531 }
1532
1533 /*
1534 * Returns: 0 Success
1535 * EBADF
1536 * file_socket:ENOTSOCK
1537 * file_socket:EBADF
1538 * soshutdown:EINVAL
1539 * soshutdown:ENOTCONN
1540 * soshutdown:EADDRNOTAVAIL[TCP]
1541 * soshutdown:ENOBUFS[TCP]
1542 * soshutdown:EMSGSIZE[TCP]
1543 * soshutdown:EHOSTUNREACH[TCP]
1544 * soshutdown:ENETUNREACH[TCP]
1545 * soshutdown:ENETDOWN[TCP]
1546 * soshutdown:ENOMEM[TCP]
1547 * soshutdown:EACCES[TCP]
1548 * soshutdown:EMSGSIZE[TCP]
1549 * soshutdown:ENOBUFS[TCP]
1550 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1551 * soshutdown:??? [other protocol families]
1552 */
1553 /* ARGSUSED */
1554 int
1555 shutdown(__unused struct proc *p, struct shutdown_args *uap,
1556 __unused int32_t *retval)
1557 {
1558 struct socket *so;
1559 int error;
1560
1561 AUDIT_ARG(fd, uap->s);
1562 error = file_socket(uap->s, &so);
1563 if (error)
1564 return (error);
1565 if (so == NULL) {
1566 error = EBADF;
1567 goto out;
1568 }
1569 error = soshutdown((struct socket *)so, uap->how);
1570 out:
1571 file_drop(uap->s);
1572 return (error);
1573 }
1574
1575 /*
1576 * Returns: 0 Success
1577 * EFAULT
1578 * EINVAL
1579 * EACCES Mandatory Access Control failure
1580 * file_socket:ENOTSOCK
1581 * file_socket:EBADF
1582 * sosetopt:EINVAL
1583 * sosetopt:ENOPROTOOPT
1584 * sosetopt:ENOBUFS
1585 * sosetopt:EDOM
1586 * sosetopt:EFAULT
1587 * sosetopt:EOPNOTSUPP[AF_UNIX]
1588 * sosetopt:???
1589 */
1590 /* ARGSUSED */
1591 int
1592 setsockopt(struct proc *p, struct setsockopt_args *uap,
1593 __unused int32_t *retval)
1594 {
1595 struct socket *so;
1596 struct sockopt sopt;
1597 int error;
1598
1599 AUDIT_ARG(fd, uap->s);
1600 if (uap->val == 0 && uap->valsize != 0)
1601 return (EFAULT);
1602 /* No bounds checking on size (it's unsigned) */
1603
1604 error = file_socket(uap->s, &so);
1605 if (error)
1606 return (error);
1607
1608 sopt.sopt_dir = SOPT_SET;
1609 sopt.sopt_level = uap->level;
1610 sopt.sopt_name = uap->name;
1611 sopt.sopt_val = uap->val;
1612 sopt.sopt_valsize = uap->valsize;
1613 sopt.sopt_p = p;
1614
1615 if (so == NULL) {
1616 error = EINVAL;
1617 goto out;
1618 }
1619 #if CONFIG_MACF_SOCKET_SUBSET
1620 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
1621 &sopt)) != 0)
1622 goto out;
1623 #endif /* MAC_SOCKET_SUBSET */
1624 error = sosetopt(so, &sopt);
1625 out:
1626 file_drop(uap->s);
1627 return (error);
1628 }
1629
1630
1631
1632 /*
1633 * Returns: 0 Success
1634 * EINVAL
1635 * EBADF
1636 * EACCES Mandatory Access Control failure
1637 * copyin:EFAULT
1638 * copyout:EFAULT
1639 * file_socket:ENOTSOCK
1640 * file_socket:EBADF
1641 * sogetopt:???
1642 */
1643 int
1644 getsockopt(struct proc *p, struct getsockopt_args *uap,
1645 __unused int32_t *retval)
1646 {
1647 int error;
1648 socklen_t valsize;
1649 struct sockopt sopt;
1650 struct socket *so;
1651
1652 error = file_socket(uap->s, &so);
1653 if (error)
1654 return (error);
1655 if (uap->val) {
1656 error = copyin(uap->avalsize, (caddr_t)&valsize,
1657 sizeof (valsize));
1658 if (error)
1659 goto out;
1660 /* No bounds checking on size (it's unsigned) */
1661 } else {
1662 valsize = 0;
1663 }
1664 sopt.sopt_dir = SOPT_GET;
1665 sopt.sopt_level = uap->level;
1666 sopt.sopt_name = uap->name;
1667 sopt.sopt_val = uap->val;
1668 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1669 sopt.sopt_p = p;
1670
1671 if (so == NULL) {
1672 error = EBADF;
1673 goto out;
1674 }
1675 #if CONFIG_MACF_SOCKET_SUBSET
1676 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
1677 &sopt)) != 0)
1678 goto out;
1679 #endif /* MAC_SOCKET_SUBSET */
1680 error = sogetopt((struct socket *)so, &sopt);
1681 if (error == 0) {
1682 valsize = sopt.sopt_valsize;
1683 error = copyout((caddr_t)&valsize, uap->avalsize,
1684 sizeof (valsize));
1685 }
1686 out:
1687 file_drop(uap->s);
1688 return (error);
1689 }
1690
1691
1692 /*
1693 * Get socket name.
1694 *
1695 * Returns: 0 Success
1696 * EBADF
1697 * file_socket:ENOTSOCK
1698 * file_socket:EBADF
1699 * copyin:EFAULT
1700 * copyout:EFAULT
1701 * <pru_sockaddr>:ENOBUFS[TCP]
1702 * <pru_sockaddr>:ECONNRESET[TCP]
1703 * <pru_sockaddr>:EINVAL[AF_UNIX]
1704 * <sf_getsockname>:???
1705 */
1706 /* ARGSUSED */
1707 int
1708 getsockname(__unused struct proc *p, struct getsockname_args *uap,
1709 __unused int32_t *retval)
1710 {
1711 struct socket *so;
1712 struct sockaddr *sa;
1713 socklen_t len;
1714 socklen_t sa_len;
1715 int error;
1716
1717 error = file_socket(uap->fdes, &so);
1718 if (error)
1719 return (error);
1720 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1721 if (error)
1722 goto out;
1723 if (so == NULL) {
1724 error = EBADF;
1725 goto out;
1726 }
1727 sa = 0;
1728 socket_lock(so, 1);
1729 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1730 if (error == 0) {
1731 error = sflt_getsockname(so, &sa);
1732 if (error == EJUSTRETURN)
1733 error = 0;
1734 }
1735 socket_unlock(so, 1);
1736 if (error)
1737 goto bad;
1738 if (sa == 0) {
1739 len = 0;
1740 goto gotnothing;
1741 }
1742
1743 sa_len = sa->sa_len;
1744 len = MIN(len, sa_len);
1745 error = copyout((caddr_t)sa, uap->asa, len);
1746 if (error)
1747 goto bad;
1748 /* return the actual, untruncated address length */
1749 len = sa_len;
1750 gotnothing:
1751 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1752 bad:
1753 if (sa)
1754 FREE(sa, M_SONAME);
1755 out:
1756 file_drop(uap->fdes);
1757 return (error);
1758 }
1759
1760 /*
1761 * Get name of peer for connected socket.
1762 *
1763 * Returns: 0 Success
1764 * EBADF
1765 * EINVAL
1766 * ENOTCONN
1767 * file_socket:ENOTSOCK
1768 * file_socket:EBADF
1769 * copyin:EFAULT
1770 * copyout:EFAULT
1771 * <pru_peeraddr>:???
1772 * <sf_getpeername>:???
1773 */
1774 /* ARGSUSED */
1775 int
1776 getpeername(__unused struct proc *p, struct getpeername_args *uap,
1777 __unused int32_t *retval)
1778 {
1779 struct socket *so;
1780 struct sockaddr *sa;
1781 socklen_t len;
1782 socklen_t sa_len;
1783 int error;
1784
1785 error = file_socket(uap->fdes, &so);
1786 if (error)
1787 return (error);
1788 if (so == NULL) {
1789 error = EBADF;
1790 goto out;
1791 }
1792
1793 socket_lock(so, 1);
1794
1795 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1796 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1797 /* the socket has been shutdown, no more getpeername's */
1798 socket_unlock(so, 1);
1799 error = EINVAL;
1800 goto out;
1801 }
1802
1803 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1804 socket_unlock(so, 1);
1805 error = ENOTCONN;
1806 goto out;
1807 }
1808 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1809 if (error) {
1810 socket_unlock(so, 1);
1811 goto out;
1812 }
1813 sa = 0;
1814 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1815 if (error == 0) {
1816 error = sflt_getpeername(so, &sa);
1817 if (error == EJUSTRETURN)
1818 error = 0;
1819 }
1820 socket_unlock(so, 1);
1821 if (error)
1822 goto bad;
1823 if (sa == 0) {
1824 len = 0;
1825 goto gotnothing;
1826 }
1827 sa_len = sa->sa_len;
1828 len = MIN(len, sa_len);
1829 error = copyout(sa, uap->asa, len);
1830 if (error)
1831 goto bad;
1832 /* return the actual, untruncated address length */
1833 len = sa_len;
1834 gotnothing:
1835 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1836 bad:
1837 if (sa) FREE(sa, M_SONAME);
1838 out:
1839 file_drop(uap->fdes);
1840 return (error);
1841 }
1842
1843 int
1844 sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1845 {
1846 struct sockaddr *sa;
1847 struct mbuf *m;
1848 int error;
1849
1850 size_t alloc_buflen = (size_t)buflen;
1851
1852 if(alloc_buflen > INT_MAX/2)
1853 return (EINVAL);
1854 #ifdef __LP64__
1855 /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */
1856 if(type == MT_CONTROL)
1857 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr);
1858 #endif
1859 if (alloc_buflen > MLEN) {
1860 if (type == MT_SONAME && alloc_buflen <= 112)
1861 alloc_buflen = MLEN; /* unix domain compat. hack */
1862 else if (alloc_buflen > MCLBYTES)
1863 return (EINVAL);
1864 }
1865 m = m_get(M_WAIT, type);
1866 if (m == NULL)
1867 return (ENOBUFS);
1868 if (alloc_buflen > MLEN) {
1869 MCLGET(m, M_WAIT);
1870 if ((m->m_flags & M_EXT) == 0) {
1871 m_free(m);
1872 return (ENOBUFS);
1873 }
1874 }
1875 /* K64: We still copyin the original buflen because it gets expanded later
1876 * and we lie about the size of the mbuf because it only affects unp_* functions
1877 */
1878 m->m_len = buflen;
1879 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
1880 if (error) {
1881 (void) m_free(m);
1882 } else {
1883 *mp = m;
1884 if (type == MT_SONAME) {
1885 sa = mtod(m, struct sockaddr *);
1886 sa->sa_len = buflen;
1887 }
1888 }
1889 return (error);
1890 }
1891
1892 /*
1893 * Given a user_addr_t of length len, allocate and fill out a *sa.
1894 *
1895 * Returns: 0 Success
1896 * ENAMETOOLONG Filename too long
1897 * EINVAL Invalid argument
1898 * ENOMEM Not enough space
1899 * copyin:EFAULT Bad address
1900 */
1901 static int
1902 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
1903 size_t len, boolean_t translate_unspec)
1904 {
1905 struct sockaddr *sa;
1906 int error;
1907
1908 if (len > SOCK_MAXADDRLEN)
1909 return (ENAMETOOLONG);
1910
1911 if (len < offsetof(struct sockaddr, sa_data[0]))
1912 return (EINVAL);
1913
1914 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
1915 if (sa == NULL) {
1916 return (ENOMEM);
1917 }
1918 error = copyin(uaddr, (caddr_t)sa, len);
1919 if (error) {
1920 FREE(sa, M_SONAME);
1921 } else {
1922 /*
1923 * Force sa_family to AF_INET on AF_INET sockets to handle
1924 * legacy applications that use AF_UNSPEC (0). On all other
1925 * sockets we leave it unchanged and let the lower layer
1926 * handle it.
1927 */
1928 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
1929 INP_CHECK_SOCKAF(so, AF_INET) &&
1930 len == sizeof (struct sockaddr_in))
1931 sa->sa_family = AF_INET;
1932
1933 sa->sa_len = len;
1934 *namp = sa;
1935 }
1936 return (error);
1937 }
1938
1939 static int
1940 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
1941 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1942 {
1943 int error;
1944
1945 if (ss == NULL || uaddr == USER_ADDR_NULL ||
1946 len < offsetof(struct sockaddr, sa_data[0]))
1947 return (EINVAL);
1948
1949 /*
1950 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
1951 * so the check here is inclusive.
1952 */
1953 if (len > sizeof (*ss))
1954 return (ENAMETOOLONG);
1955
1956 bzero(ss, sizeof (*ss));
1957 error = copyin(uaddr, (caddr_t)ss, len);
1958 if (error == 0) {
1959 /*
1960 * Force sa_family to AF_INET on AF_INET sockets to handle
1961 * legacy applications that use AF_UNSPEC (0). On all other
1962 * sockets we leave it unchanged and let the lower layer
1963 * handle it.
1964 */
1965 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
1966 INP_CHECK_SOCKAF(so, AF_INET) &&
1967 len == sizeof (struct sockaddr_in))
1968 ss->ss_family = AF_INET;
1969
1970 ss->ss_len = len;
1971 }
1972 return (error);
1973 }
1974
1975 #if SENDFILE
1976
1977 SYSCTL_DECL(_kern_ipc);
1978
1979 #define SFUIOBUFS 64
1980 static int sendfileuiobufs = SFUIOBUFS;
1981 SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW | CTLFLAG_LOCKED, &sendfileuiobufs,
1982 0, "");
1983
1984 /* Macros to compute the number of mbufs needed depending on cluster size */
1985 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
1986 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
1987
1988 /* Upper send limit in bytes (sendfileuiobufs * PAGESIZE) */
1989 #define SENDFILE_MAX_BYTES (sendfileuiobufs << PGSHIFT)
1990
1991 /* Upper send limit in the number of mbuf clusters */
1992 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
1993 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
1994
1995 size_t mbuf_pkt_maxlen(mbuf_t m);
1996
1997 __private_extern__ size_t
1998 mbuf_pkt_maxlen(mbuf_t m)
1999 {
2000 size_t maxlen = 0;
2001
2002 while (m) {
2003 maxlen += mbuf_maxlen(m);
2004 m = mbuf_next(m);
2005 }
2006 return (maxlen);
2007 }
2008
2009 static void
2010 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
2011 struct mbuf **m, boolean_t jumbocl)
2012 {
2013 unsigned int needed;
2014
2015 if (pktlen == 0)
2016 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
2017
2018 /*
2019 * Try to allocate for the whole thing. Since we want full control
2020 * over the buffer size and be able to accept partial result, we can't
2021 * use mbuf_allocpacket(). The logic below is similar to sosend().
2022 */
2023 *m = NULL;
2024 if (pktlen > MBIGCLBYTES && jumbocl) {
2025 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
2026 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
2027 }
2028 if (*m == NULL) {
2029 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
2030 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
2031 }
2032
2033 /*
2034 * Our previous attempt(s) at allocation had failed; the system
2035 * may be short on mbufs, and we want to block until they are
2036 * available. This time, ask just for 1 mbuf and don't return
2037 * until we get it.
2038 */
2039 if (*m == NULL) {
2040 needed = 1;
2041 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
2042 }
2043 if (*m == NULL)
2044 panic("%s: blocking allocation returned NULL\n", __func__);
2045
2046 *maxchunks = needed;
2047 }
2048
2049 /*
2050 * sendfile(2).
2051 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
2052 * struct sf_hdtr *hdtr, int flags)
2053 *
2054 * Send a file specified by 'fd' and starting at 'offset' to a socket
2055 * specified by 's'. Send only '*nbytes' of the file or until EOF if
2056 * *nbytes == 0. Optionally add a header and/or trailer to the socket
2057 * output. If specified, write the total number of bytes sent into *nbytes.
2058 */
2059 int
2060 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
2061 {
2062 struct fileproc *fp;
2063 struct vnode *vp;
2064 struct socket *so;
2065 struct writev_nocancel_args nuap;
2066 user_ssize_t writev_retval;
2067 struct user_sf_hdtr user_hdtr;
2068 struct user32_sf_hdtr user32_hdtr;
2069 struct user64_sf_hdtr user64_hdtr;
2070 off_t off, xfsize;
2071 off_t nbytes = 0, sbytes = 0;
2072 int error = 0;
2073 size_t sizeof_hdtr;
2074 off_t file_size;
2075 struct vfs_context context = *vfs_context_current();
2076
2077 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
2078 0, 0, 0, 0);
2079
2080 AUDIT_ARG(fd, uap->fd);
2081 AUDIT_ARG(value32, uap->s);
2082
2083 /*
2084 * Do argument checking. Must be a regular file in, stream
2085 * type and connected socket out, positive offset.
2086 */
2087 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
2088 goto done;
2089 }
2090 if ((fp->f_flag & FREAD) == 0) {
2091 error = EBADF;
2092 goto done1;
2093 }
2094 if (vnode_isreg(vp) == 0) {
2095 error = ENOTSUP;
2096 goto done1;
2097 }
2098 error = file_socket(uap->s, &so);
2099 if (error) {
2100 goto done1;
2101 }
2102 if (so == NULL) {
2103 error = EBADF;
2104 goto done2;
2105 }
2106 if (so->so_type != SOCK_STREAM) {
2107 error = EINVAL;
2108 goto done2;
2109 }
2110 if ((so->so_state & SS_ISCONNECTED) == 0) {
2111 error = ENOTCONN;
2112 goto done2;
2113 }
2114 if (uap->offset < 0) {
2115 error = EINVAL;
2116 goto done2;
2117 }
2118 if (uap->nbytes == USER_ADDR_NULL) {
2119 error = EINVAL;
2120 goto done2;
2121 }
2122 if (uap->flags != 0) {
2123 error = EINVAL;
2124 goto done2;
2125 }
2126
2127 context.vc_ucred = fp->f_fglob->fg_cred;
2128
2129 #if CONFIG_MACF_SOCKET_SUBSET
2130 /* JMM - fetch connected sockaddr? */
2131 error = mac_socket_check_send(context.vc_ucred, so, NULL);
2132 if (error)
2133 goto done2;
2134 #endif
2135
2136 /*
2137 * Get number of bytes to send
2138 * Should it applies to size of header and trailer?
2139 * JMM - error handling?
2140 */
2141 copyin(uap->nbytes, &nbytes, sizeof (off_t));
2142
2143 /*
2144 * If specified, get the pointer to the sf_hdtr struct for
2145 * any headers/trailers.
2146 */
2147 if (uap->hdtr != USER_ADDR_NULL) {
2148 caddr_t hdtrp;
2149
2150 bzero(&user_hdtr, sizeof (user_hdtr));
2151 if (IS_64BIT_PROCESS(p)) {
2152 hdtrp = (caddr_t)&user64_hdtr;
2153 sizeof_hdtr = sizeof (user64_hdtr);
2154 } else {
2155 hdtrp = (caddr_t)&user32_hdtr;
2156 sizeof_hdtr = sizeof (user32_hdtr);
2157 }
2158 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
2159 if (error)
2160 goto done2;
2161 if (IS_64BIT_PROCESS(p)) {
2162 user_hdtr.headers = user64_hdtr.headers;
2163 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
2164 user_hdtr.trailers = user64_hdtr.trailers;
2165 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
2166 } else {
2167 user_hdtr.headers = user32_hdtr.headers;
2168 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
2169 user_hdtr.trailers = user32_hdtr.trailers;
2170 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2171 }
2172
2173 /*
2174 * Send any headers. Wimp out and use writev(2).
2175 */
2176 if (user_hdtr.headers != USER_ADDR_NULL) {
2177 bzero(&nuap, sizeof (struct writev_args));
2178 nuap.fd = uap->s;
2179 nuap.iovp = user_hdtr.headers;
2180 nuap.iovcnt = user_hdtr.hdr_cnt;
2181 error = writev_nocancel(p, &nuap, &writev_retval);
2182 if (error)
2183 goto done2;
2184 sbytes += writev_retval;
2185 }
2186 }
2187
2188 /*
2189 * Get the file size for 2 reasons:
2190 * 1. We don't want to allocate more mbufs than necessary
2191 * 2. We don't want to read past the end of file
2192 */
2193 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0)
2194 goto done2;
2195
2196 /*
2197 * Simply read file data into a chain of mbufs that used with scatter
2198 * gather reads. We're not (yet?) setup to use zero copy external
2199 * mbufs that point to the file pages.
2200 */
2201 socket_lock(so, 1);
2202 error = sblock(&so->so_snd, M_WAIT);
2203 if (error) {
2204 socket_unlock(so, 1);
2205 goto done2;
2206 }
2207 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2208 mbuf_t m0 = NULL, m;
2209 unsigned int nbufs = sendfileuiobufs, i;
2210 uio_t auio;
2211 char uio_buf[UIO_SIZEOF(sendfileuiobufs)]; /* 1 KB !!! */
2212 size_t uiolen;
2213 user_ssize_t rlen;
2214 off_t pgoff;
2215 size_t pktlen;
2216 boolean_t jumbocl;
2217
2218 /*
2219 * Calculate the amount to transfer.
2220 * Align to round number of pages.
2221 * Not to exceed send socket buffer,
2222 * the EOF, or the passed in nbytes.
2223 */
2224 xfsize = sbspace(&so->so_snd);
2225
2226 if (xfsize <= 0) {
2227 if (so->so_state & SS_CANTSENDMORE) {
2228 error = EPIPE;
2229 goto done3;
2230 } else if ((so->so_state & SS_NBIO)) {
2231 error = EAGAIN;
2232 goto done3;
2233 } else {
2234 xfsize = PAGE_SIZE;
2235 }
2236 }
2237
2238 if (xfsize > SENDFILE_MAX_BYTES)
2239 xfsize = SENDFILE_MAX_BYTES;
2240 else if (xfsize > PAGE_SIZE)
2241 xfsize = trunc_page(xfsize);
2242 pgoff = off & PAGE_MASK_64;
2243 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
2244 xfsize = PAGE_SIZE_64 - pgoff;
2245 if (nbytes && xfsize > (nbytes - sbytes))
2246 xfsize = nbytes - sbytes;
2247 if (xfsize <= 0)
2248 break;
2249 if (off + xfsize > file_size)
2250 xfsize = file_size - off;
2251 if (xfsize <= 0)
2252 break;
2253
2254 /*
2255 * Attempt to use larger than system page-size clusters for
2256 * large writes only if there is a jumbo cluster pool and
2257 * if the socket is marked accordingly.
2258 */
2259 jumbocl = sosendjcl && njcl > 0 &&
2260 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
2261
2262 socket_unlock(so, 0);
2263 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
2264 pktlen = mbuf_pkt_maxlen(m0);
2265 if (pktlen < (size_t)xfsize)
2266 xfsize = pktlen;
2267
2268 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
2269 UIO_READ, &uio_buf[0], sizeof (uio_buf));
2270 if (auio == NULL) {
2271 //printf("sendfile: uio_createwithbuffer failed\n");
2272 mbuf_freem(m0);
2273 error = ENXIO;
2274 socket_lock(so, 0);
2275 goto done3;
2276 }
2277
2278 for (i = 0, m = m0, uiolen = 0;
2279 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2280 i++, m = mbuf_next(m)) {
2281 size_t mlen = mbuf_maxlen(m);
2282
2283 if (mlen + uiolen > (size_t)xfsize)
2284 mlen = xfsize - uiolen;
2285 mbuf_setlen(m, mlen);
2286 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
2287 mlen);
2288 uiolen += mlen;
2289 }
2290
2291 if (xfsize != uio_resid(auio))
2292 printf("sendfile: xfsize: %lld != uio_resid(auio): "
2293 "%lld\n", xfsize, (long long)uio_resid(auio));
2294
2295 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
2296 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2297 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2298 error = fo_read(fp, auio, FOF_OFFSET, &context);
2299 socket_lock(so, 0);
2300 if (error != 0) {
2301 if (uio_resid(auio) != xfsize && (error == ERESTART ||
2302 error == EINTR || error == EWOULDBLOCK)) {
2303 error = 0;
2304 } else {
2305 mbuf_freem(m0);
2306 goto done3;
2307 }
2308 }
2309 xfsize -= uio_resid(auio);
2310 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
2311 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2312 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2313
2314 if (xfsize == 0) {
2315 //printf("sendfile: fo_read 0 bytes, EOF\n");
2316 break;
2317 }
2318 if (xfsize + off > file_size)
2319 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
2320 "%lld\n", xfsize, off, file_size);
2321 for (i = 0, m = m0, rlen = 0;
2322 i < nbufs && m != NULL && rlen < xfsize;
2323 i++, m = mbuf_next(m)) {
2324 size_t mlen = mbuf_maxlen(m);
2325
2326 if (rlen + mlen > (size_t)xfsize)
2327 mlen = xfsize - rlen;
2328 mbuf_setlen(m, mlen);
2329
2330 rlen += mlen;
2331 }
2332 mbuf_pkthdr_setlen(m0, xfsize);
2333
2334 retry_space:
2335 /*
2336 * Make sure that the socket is still able to take more data.
2337 * CANTSENDMORE being true usually means that the connection
2338 * was closed. so_error is true when an error was sensed after
2339 * a previous send.
2340 * The state is checked after the page mapping and buffer
2341 * allocation above since those operations may block and make
2342 * any socket checks stale. From this point forward, nothing
2343 * blocks before the pru_send (or more accurately, any blocking
2344 * results in a loop back to here to re-check).
2345 */
2346 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2347 if (so->so_state & SS_CANTSENDMORE) {
2348 error = EPIPE;
2349 } else {
2350 error = so->so_error;
2351 so->so_error = 0;
2352 }
2353 m_freem(m0);
2354 goto done3;
2355 }
2356 /*
2357 * Wait for socket space to become available. We do this just
2358 * after checking the connection state above in order to avoid
2359 * a race condition with sbwait().
2360 */
2361 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
2362 if (so->so_state & SS_NBIO) {
2363 m_freem(m0);
2364 error = EAGAIN;
2365 goto done3;
2366 }
2367 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
2368 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
2369 error = sbwait(&so->so_snd);
2370 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
2371 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
2372 /*
2373 * An error from sbwait usually indicates that we've
2374 * been interrupted by a signal. If we've sent anything
2375 * then return bytes sent, otherwise return the error.
2376 */
2377 if (error) {
2378 m_freem(m0);
2379 goto done3;
2380 }
2381 goto retry_space;
2382 }
2383
2384 struct mbuf *control = NULL;
2385 {
2386 /*
2387 * Socket filter processing
2388 */
2389
2390 error = sflt_data_out(so, NULL, &m0, &control, 0);
2391 if (error) {
2392 if (error == EJUSTRETURN) {
2393 error = 0;
2394 continue;
2395 }
2396 goto done3;
2397 }
2398 /*
2399 * End Socket filter processing
2400 */
2401 }
2402 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2403 uap->s, 0, 0, 0, 0);
2404 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
2405 0, control, p);
2406 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2407 uap->s, 0, 0, 0, 0);
2408 if (error) {
2409 goto done3;
2410 }
2411 }
2412 sbunlock(&so->so_snd, 0); /* will unlock socket */
2413 /*
2414 * Send trailers. Wimp out and use writev(2).
2415 */
2416 if (uap->hdtr != USER_ADDR_NULL &&
2417 user_hdtr.trailers != USER_ADDR_NULL) {
2418 bzero(&nuap, sizeof (struct writev_args));
2419 nuap.fd = uap->s;
2420 nuap.iovp = user_hdtr.trailers;
2421 nuap.iovcnt = user_hdtr.trl_cnt;
2422 error = writev_nocancel(p, &nuap, &writev_retval);
2423 if (error)
2424 goto done2;
2425 sbytes += writev_retval;
2426 }
2427 done2:
2428 file_drop(uap->s);
2429 done1:
2430 file_drop(uap->fd);
2431 done:
2432 if (uap->nbytes != USER_ADDR_NULL) {
2433 /* XXX this appears bogus for some early failure conditions */
2434 copyout(&sbytes, uap->nbytes, sizeof (off_t));
2435 }
2436 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
2437 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
2438 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
2439 return (error);
2440 done3:
2441 sbunlock(&so->so_snd, 0); /* will unlock socket */
2442 goto done2;
2443 }
2444
2445
2446 #endif /* SENDFILE */