]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
xnu-1504.3.12.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65 /*
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
69 * Version 2.0.
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <kern/lock.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/signalvar.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/kernel.h>
87 #include <sys/uio_internal.h>
88 #include <sys/kauth.h>
89
90 #include <security/audit/audit.h>
91
92 #include <sys/kdebug.h>
93 #include <sys/sysproto.h>
94 #include <netinet/in.h>
95 #include <net/route.h>
96 #include <netinet/in_pcb.h>
97
98 #if CONFIG_MACF_SOCKET_SUBSET
99 #include <security/mac_framework.h>
100 #endif /* MAC_SOCKET_SUBSET */
101
102 #define f_flag f_fglob->fg_flag
103 #define f_type f_fglob->fg_type
104 #define f_msgcount f_fglob->fg_msgcount
105 #define f_cred f_fglob->fg_cred
106 #define f_ops f_fglob->fg_ops
107 #define f_offset f_fglob->fg_offset
108 #define f_data f_fglob->fg_data
109
110
111 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
112 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
113 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
114 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
115 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
116 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
117 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
118 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
119 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
120 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
121 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
122 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
123 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
124 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
125
126
127 #define HACK_FOR_4056224 1
128 #if HACK_FOR_4056224
129 static pid_t last_pid_4056224 = 0;
130 #endif /* HACK_FOR_4056224 */
131
132 /* TODO: should be in header file */
133 int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
134
135 static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int,
136 int32_t *);
137 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
138 int32_t *);
139 static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
140 size_t, boolean_t);
141 static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
142 user_addr_t, size_t, boolean_t);
143 #if SENDFILE
144 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
145 boolean_t);
146 #endif /* SENDFILE */
147
148 /*
149 * System call interface to the socket abstraction.
150 */
151
152 extern struct fileops socketops;
153
154 /*
155 * Returns: 0 Success
156 * EACCES Mandatory Access Control failure
157 * falloc:ENFILE
158 * falloc:EMFILE
159 * falloc:ENOMEM
160 * socreate:EAFNOSUPPORT
161 * socreate:EPROTOTYPE
162 * socreate:EPROTONOSUPPORT
163 * socreate:ENOBUFS
164 * socreate:ENOMEM
165 * socreate:EISCONN
166 * socreate:??? [other protocol families, IPSEC]
167 */
168 int
169 socket(struct proc *p, struct socket_args *uap, int32_t *retval)
170 {
171 struct socket *so;
172 struct fileproc *fp;
173 int fd, error;
174
175 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
176 #if CONFIG_MACF_SOCKET_SUBSET
177 if ((error = mac_socket_check_create(kauth_cred_get(), uap->domain,
178 uap->type, uap->protocol)) != 0)
179 return (error);
180 #endif /* MAC_SOCKET_SUBSET */
181
182 error = falloc(p, &fp, &fd, vfs_context_current());
183 if (error) {
184 return (error);
185 }
186 fp->f_flag = FREAD|FWRITE;
187 fp->f_type = DTYPE_SOCKET;
188 fp->f_ops = &socketops;
189
190 error = socreate(uap->domain, &so, uap->type, uap->protocol);
191 if (error) {
192 fp_free(p, fd, fp);
193 } else {
194 thread_t thread;
195 struct uthread *ut;
196
197 thread = current_thread();
198 ut = get_bsdthread_info(thread);
199
200 /* if this is a backgrounded thread then throttle all new sockets */
201 if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) {
202 so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
203 so->so_background_thread = thread;
204 }
205 fp->f_data = (caddr_t)so;
206
207 proc_fdlock(p);
208 procfdtbl_releasefd(p, fd, NULL);
209
210 fp_drop(p, fd, fp, 1);
211 proc_fdunlock(p);
212
213 *retval = fd;
214 }
215 return (error);
216 }
217
218 /*
219 * Returns: 0 Success
220 * EDESTADDRREQ Destination address required
221 * EBADF Bad file descriptor
222 * EACCES Mandatory Access Control failure
223 * file_socket:ENOTSOCK
224 * file_socket:EBADF
225 * getsockaddr:ENAMETOOLONG Filename too long
226 * getsockaddr:EINVAL Invalid argument
227 * getsockaddr:ENOMEM Not enough space
228 * getsockaddr:EFAULT Bad address
229 * sobind:???
230 */
231 /* ARGSUSED */
232 int
233 bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
234 {
235 struct sockaddr_storage ss;
236 struct sockaddr *sa = NULL;
237 struct socket *so;
238 boolean_t want_free = TRUE;
239 int error;
240
241 AUDIT_ARG(fd, uap->s);
242 error = file_socket(uap->s, &so);
243 if (error != 0)
244 return (error);
245 if (so == NULL) {
246 error = EBADF;
247 goto out;
248 }
249 if (uap->name == USER_ADDR_NULL) {
250 error = EDESTADDRREQ;
251 goto out;
252 }
253 if (uap->namelen > sizeof (ss)) {
254 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
255 } else {
256 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
257 if (error == 0) {
258 sa = (struct sockaddr *)&ss;
259 want_free = FALSE;
260 }
261 }
262 if (error != 0)
263 goto out;
264 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
265 #if CONFIG_MACF_SOCKET_SUBSET
266 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
267 error = sobind(so, sa);
268 #else
269 error = sobind(so, sa);
270 #endif /* MAC_SOCKET_SUBSET */
271 if (want_free)
272 FREE(sa, M_SONAME);
273 out:
274 file_drop(uap->s);
275 return (error);
276 }
277
278 /*
279 * Returns: 0 Success
280 * EBADF
281 * EACCES Mandatory Access Control failure
282 * file_socket:ENOTSOCK
283 * file_socket:EBADF
284 * solisten:EINVAL
285 * solisten:EOPNOTSUPP
286 * solisten:???
287 */
288 int
289 listen(__unused struct proc *p, struct listen_args *uap,
290 __unused int32_t *retval)
291 {
292 int error;
293 struct socket *so;
294
295 AUDIT_ARG(fd, uap->s);
296 error = file_socket(uap->s, &so);
297 if (error)
298 return (error);
299 if (so != NULL)
300 #if CONFIG_MACF_SOCKET_SUBSET
301 {
302 error = mac_socket_check_listen(kauth_cred_get(), so);
303 if (error == 0)
304 error = solisten(so, uap->backlog);
305 }
306 #else
307 error = solisten(so, uap->backlog);
308 #endif /* MAC_SOCKET_SUBSET */
309 else
310 error = EBADF;
311
312 file_drop(uap->s);
313 return (error);
314 }
315
316 /*
317 * Returns: fp_getfsock:EBADF Bad file descriptor
318 * fp_getfsock:EOPNOTSUPP ...
319 * xlate => :ENOTSOCK Socket operation on non-socket
320 * :EFAULT Bad address on copyin/copyout
321 * :EBADF Bad file descriptor
322 * :EOPNOTSUPP Operation not supported on socket
323 * :EINVAL Invalid argument
324 * :EWOULDBLOCK Operation would block
325 * :ECONNABORTED Connection aborted
326 * :EINTR Interrupted function
327 * :EACCES Mandatory Access Control failure
328 * falloc_locked:ENFILE Too many files open in system
329 * falloc_locked::EMFILE Too many open files
330 * falloc_locked::ENOMEM Not enough space
331 * 0 Success
332 */
333 int
334 accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
335 int32_t *retval)
336 {
337 struct fileproc *fp;
338 struct sockaddr *sa = NULL;
339 socklen_t namelen;
340 int error;
341 struct socket *head, *so = NULL;
342 lck_mtx_t *mutex_held;
343 int fd = uap->s;
344 int newfd;
345 short fflag; /* type must match fp->f_flag */
346 int dosocklock = 0;
347
348 *retval = -1;
349
350 AUDIT_ARG(fd, uap->s);
351
352 if (uap->name) {
353 error = copyin(uap->anamelen, (caddr_t)&namelen,
354 sizeof (socklen_t));
355 if (error)
356 return (error);
357 }
358 error = fp_getfsock(p, fd, &fp, &head);
359 if (error) {
360 if (error == EOPNOTSUPP)
361 error = ENOTSOCK;
362 return (error);
363 }
364 if (head == NULL) {
365 error = EBADF;
366 goto out;
367 }
368 #if CONFIG_MACF_SOCKET_SUBSET
369 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
370 goto out;
371 #endif /* MAC_SOCKET_SUBSET */
372
373 socket_lock(head, 1);
374
375 if (head->so_proto->pr_getlock != NULL) {
376 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
377 dosocklock = 1;
378 } else {
379 mutex_held = head->so_proto->pr_domain->dom_mtx;
380 dosocklock = 0;
381 }
382
383 if ((head->so_options & SO_ACCEPTCONN) == 0) {
384 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
385 error = EOPNOTSUPP;
386 } else {
387 /* POSIX: The socket is not accepting connections */
388 error = EINVAL;
389 }
390 socket_unlock(head, 1);
391 goto out;
392 }
393 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
394 socket_unlock(head, 1);
395 error = EWOULDBLOCK;
396 goto out;
397 }
398 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
399 if (head->so_state & SS_CANTRCVMORE) {
400 head->so_error = ECONNABORTED;
401 break;
402 }
403 if (head->so_usecount < 1)
404 panic("accept: head=%p refcount=%d\n", head,
405 head->so_usecount);
406 error = msleep((caddr_t)&head->so_timeo, mutex_held,
407 PSOCK | PCATCH, "accept", 0);
408 if (head->so_usecount < 1)
409 panic("accept: 2 head=%p refcount=%d\n", head,
410 head->so_usecount);
411 if ((head->so_state & SS_DRAINING)) {
412 error = ECONNABORTED;
413 }
414 if (error) {
415 socket_unlock(head, 1);
416 goto out;
417 }
418 }
419 if (head->so_error) {
420 error = head->so_error;
421 head->so_error = 0;
422 socket_unlock(head, 1);
423 goto out;
424 }
425
426
427 /*
428 * At this point we know that there is at least one connection
429 * ready to be accepted. Remove it from the queue prior to
430 * allocating the file descriptor for it since falloc() may
431 * block allowing another process to accept the connection
432 * instead.
433 */
434 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
435 so = TAILQ_FIRST(&head->so_comp);
436 TAILQ_REMOVE(&head->so_comp, so, so_list);
437 head->so_qlen--;
438 /* unlock head to avoid deadlock with select, keep a ref on head */
439 socket_unlock(head, 0);
440
441 #if CONFIG_MACF_SOCKET_SUBSET
442 /*
443 * Pass the pre-accepted socket to the MAC framework. This is
444 * cheaper than allocating a file descriptor for the socket,
445 * calling the protocol accept callback, and possibly freeing
446 * the file descriptor should the MAC check fails.
447 */
448 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
449 so->so_state &= ~(SS_NOFDREF | SS_COMP);
450 so->so_head = NULL;
451 soclose(so);
452 /* Drop reference on listening socket */
453 sodereference(head);
454 goto out;
455 }
456 #endif /* MAC_SOCKET_SUBSET */
457
458 /*
459 * Pass the pre-accepted socket to any interested socket filter(s).
460 * Upon failure, the socket would have been closed by the callee.
461 */
462 if (so->so_filt != NULL && (error = soacceptfilter(so)) != 0) {
463 /* Drop reference on listening socket */
464 sodereference(head);
465 /* Propagate socket filter's error code to the caller */
466 goto out;
467 }
468
469 fflag = fp->f_flag;
470 error = falloc(p, &fp, &newfd, vfs_context_current());
471 if (error) {
472 /*
473 * Probably ran out of file descriptors. Put the
474 * unaccepted connection back onto the queue and
475 * do another wakeup so some other process might
476 * have a chance at it.
477 */
478 socket_lock(head, 0);
479 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
480 head->so_qlen++;
481 wakeup_one((caddr_t)&head->so_timeo);
482 socket_unlock(head, 1);
483 goto out;
484 }
485 *retval = newfd;
486 fp->f_type = DTYPE_SOCKET;
487 fp->f_flag = fflag;
488 fp->f_ops = &socketops;
489 fp->f_data = (caddr_t)so;
490 socket_lock(head, 0);
491 if (dosocklock)
492 socket_lock(so, 1);
493 so->so_state &= ~SS_COMP;
494 so->so_head = NULL;
495 (void) soacceptlock(so, &sa, 0);
496 socket_unlock(head, 1);
497 if (sa == NULL) {
498 namelen = 0;
499 if (uap->name)
500 goto gotnoname;
501 error = 0;
502 goto releasefd;
503 }
504 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
505
506 if (uap->name) {
507 socklen_t sa_len;
508
509 /* save sa_len before it is destroyed */
510 sa_len = sa->sa_len;
511 namelen = MIN(namelen, sa_len);
512 error = copyout(sa, uap->name, namelen);
513 if (!error)
514 /* return the actual, untruncated address length */
515 namelen = sa_len;
516 gotnoname:
517 error = copyout((caddr_t)&namelen, uap->anamelen,
518 sizeof (socklen_t));
519 }
520 FREE(sa, M_SONAME);
521
522 releasefd:
523 /*
524 * If the socket has been marked as inactive by soacceptfilter(),
525 * disallow further operations on it. We explicitly call shutdown
526 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
527 * states are set for the socket. This would also flush out data
528 * hanging off the receive list of this socket.
529 */
530 if (so->so_flags & SOF_DEFUNCT) {
531 (void) soshutdownlock(so, SHUT_RD);
532 (void) soshutdownlock(so, SHUT_WR);
533 (void) sodisconnectlocked(so);
534 }
535
536 if (dosocklock)
537 socket_unlock(so, 1);
538
539 proc_fdlock(p);
540 procfdtbl_releasefd(p, newfd, NULL);
541 fp_drop(p, newfd, fp, 1);
542 proc_fdunlock(p);
543
544 out:
545 file_drop(fd);
546 return (error);
547 }
548
549 int
550 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
551 {
552 __pthread_testcancel(1);
553 return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval));
554 }
555
556 /*
557 * Returns: 0 Success
558 * EBADF Bad file descriptor
559 * EALREADY Connection already in progress
560 * EINPROGRESS Operation in progress
561 * ECONNABORTED Connection aborted
562 * EINTR Interrupted function
563 * EACCES Mandatory Access Control failure
564 * file_socket:ENOTSOCK
565 * file_socket:EBADF
566 * getsockaddr:ENAMETOOLONG Filename too long
567 * getsockaddr:EINVAL Invalid argument
568 * getsockaddr:ENOMEM Not enough space
569 * getsockaddr:EFAULT Bad address
570 * soconnectlock:EOPNOTSUPP
571 * soconnectlock:EISCONN
572 * soconnectlock:??? [depends on protocol, filters]
573 * msleep:EINTR
574 *
575 * Imputed: so_error error may be set from so_error, which
576 * may have been set by soconnectlock.
577 */
578 /* ARGSUSED */
579 int
580 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
581 {
582 __pthread_testcancel(1);
583 return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval));
584 }
585
586 int
587 connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused int32_t *retval)
588 {
589 struct socket *so;
590 struct sockaddr_storage ss;
591 struct sockaddr *sa = NULL;
592 lck_mtx_t *mutex_held;
593 boolean_t want_free = TRUE;
594 int error;
595 int fd = uap->s;
596 boolean_t dgram;
597
598 AUDIT_ARG(fd, uap->s);
599 error = file_socket(fd, &so);
600 if (error != 0)
601 return (error);
602 if (so == NULL) {
603 error = EBADF;
604 goto out;
605 }
606
607 /*
608 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
609 * if this is a datagram socket; translate for other types.
610 */
611 dgram = (so->so_type == SOCK_DGRAM);
612
613 /* Get socket address now before we obtain socket lock */
614 if (uap->namelen > sizeof (ss)) {
615 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
616 } else {
617 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
618 if (error == 0) {
619 sa = (struct sockaddr *)&ss;
620 want_free = FALSE;
621 }
622 }
623 if (error != 0)
624 goto out;
625
626 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
627 #if CONFIG_MACF_SOCKET_SUBSET
628 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
629 if (want_free)
630 FREE(sa, M_SONAME);
631 goto out;
632 }
633 #endif /* MAC_SOCKET_SUBSET */
634 socket_lock(so, 1);
635
636 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
637 if (want_free)
638 FREE(sa, M_SONAME);
639 socket_unlock(so, 1);
640 error = EALREADY;
641 goto out;
642 }
643 error = soconnectlock(so, sa, 0);
644 if (error)
645 goto bad;
646 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
647 if (want_free)
648 FREE(sa, M_SONAME);
649 socket_unlock(so, 1);
650 error = EINPROGRESS;
651 goto out;
652 }
653 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
654 if (so->so_proto->pr_getlock != NULL)
655 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
656 else
657 mutex_held = so->so_proto->pr_domain->dom_mtx;
658 error = msleep((caddr_t)&so->so_timeo, mutex_held,
659 PSOCK | PCATCH, "connect", 0);
660 if ((so->so_state & SS_DRAINING)) {
661 error = ECONNABORTED;
662 }
663 if (error)
664 break;
665 }
666 if (error == 0) {
667 error = so->so_error;
668 so->so_error = 0;
669 }
670 bad:
671 so->so_state &= ~SS_ISCONNECTING;
672 socket_unlock(so, 1);
673 if (want_free)
674 FREE(sa, M_SONAME);
675 if (error == ERESTART)
676 error = EINTR;
677 out:
678 file_drop(fd);
679 return (error);
680 }
681
682 /*
683 * Returns: 0 Success
684 * socreate:EAFNOSUPPORT
685 * socreate:EPROTOTYPE
686 * socreate:EPROTONOSUPPORT
687 * socreate:ENOBUFS
688 * socreate:ENOMEM
689 * socreate:EISCONN
690 * socreate:??? [other protocol families, IPSEC]
691 * falloc:ENFILE
692 * falloc:EMFILE
693 * falloc:ENOMEM
694 * copyout:EFAULT
695 * soconnect2:EINVAL
696 * soconnect2:EPROTOTYPE
697 * soconnect2:??? [other protocol families[
698 */
699 int
700 socketpair(struct proc *p, struct socketpair_args *uap,
701 __unused int32_t *retval)
702 {
703 struct fileproc *fp1, *fp2;
704 struct socket *so1, *so2;
705 int fd, error, sv[2];
706
707 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
708 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
709 if (error)
710 return (error);
711 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
712 if (error)
713 goto free1;
714
715 error = falloc(p, &fp1, &fd, vfs_context_current());
716 if (error) {
717 goto free2;
718 }
719 fp1->f_flag = FREAD|FWRITE;
720 fp1->f_type = DTYPE_SOCKET;
721 fp1->f_ops = &socketops;
722 fp1->f_data = (caddr_t)so1;
723 sv[0] = fd;
724
725 error = falloc(p, &fp2, &fd, vfs_context_current());
726 if (error) {
727 goto free3;
728 }
729 fp2->f_flag = FREAD|FWRITE;
730 fp2->f_type = DTYPE_SOCKET;
731 fp2->f_ops = &socketops;
732 fp2->f_data = (caddr_t)so2;
733 sv[1] = fd;
734
735 error = soconnect2(so1, so2);
736 if (error) {
737 goto free4;
738 }
739 if (uap->type == SOCK_DGRAM) {
740 /*
741 * Datagram socket connection is asymmetric.
742 */
743 error = soconnect2(so2, so1);
744 if (error) {
745 goto free4;
746 }
747 }
748
749 proc_fdlock(p);
750 procfdtbl_releasefd(p, sv[0], NULL);
751 procfdtbl_releasefd(p, sv[1], NULL);
752 fp_drop(p, sv[0], fp1, 1);
753 fp_drop(p, sv[1], fp2, 1);
754 proc_fdunlock(p);
755
756 error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof (int));
757 return (error);
758 free4:
759 fp_free(p, sv[1], fp2);
760 free3:
761 fp_free(p, sv[0], fp1);
762 free2:
763 (void) soclose(so2);
764 free1:
765 (void) soclose(so1);
766 return (error);
767 }
768
769 /*
770 * Returns: 0 Success
771 * EINVAL
772 * ENOBUFS
773 * EBADF
774 * EPIPE
775 * EACCES Mandatory Access Control failure
776 * file_socket:ENOTSOCK
777 * file_socket:EBADF
778 * getsockaddr:ENAMETOOLONG Filename too long
779 * getsockaddr:EINVAL Invalid argument
780 * getsockaddr:ENOMEM Not enough space
781 * getsockaddr:EFAULT Bad address
782 * <pru_sosend>:EACCES[TCP]
783 * <pru_sosend>:EADDRINUSE[TCP]
784 * <pru_sosend>:EADDRNOTAVAIL[TCP]
785 * <pru_sosend>:EAFNOSUPPORT[TCP]
786 * <pru_sosend>:EAGAIN[TCP]
787 * <pru_sosend>:EBADF
788 * <pru_sosend>:ECONNRESET[TCP]
789 * <pru_sosend>:EFAULT
790 * <pru_sosend>:EHOSTUNREACH[TCP]
791 * <pru_sosend>:EINTR
792 * <pru_sosend>:EINVAL
793 * <pru_sosend>:EISCONN[AF_INET]
794 * <pru_sosend>:EMSGSIZE[TCP]
795 * <pru_sosend>:ENETDOWN[TCP]
796 * <pru_sosend>:ENETUNREACH[TCP]
797 * <pru_sosend>:ENOBUFS
798 * <pru_sosend>:ENOMEM[TCP]
799 * <pru_sosend>:ENOTCONN[AF_INET]
800 * <pru_sosend>:EOPNOTSUPP
801 * <pru_sosend>:EPERM[TCP]
802 * <pru_sosend>:EPIPE
803 * <pru_sosend>:EWOULDBLOCK
804 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
805 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
806 * <pru_sosend>:??? [value from so_error]
807 * sockargs:???
808 */
809 static int
810 sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
811 int flags, int32_t *retval)
812 {
813 struct mbuf *control = NULL;
814 struct sockaddr_storage ss;
815 struct sockaddr *to = NULL;
816 boolean_t want_free = TRUE;
817 int error;
818 struct socket *so;
819 user_ssize_t len;
820
821 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
822
823 error = file_socket(s, &so);
824 if (error) {
825 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
826 return (error);
827 }
828 if (so == NULL) {
829 error = EBADF;
830 goto out;
831 }
832 if (mp->msg_name != USER_ADDR_NULL) {
833 if (mp->msg_namelen > sizeof (ss)) {
834 error = getsockaddr(so, &to, mp->msg_name,
835 mp->msg_namelen, TRUE);
836 } else {
837 error = getsockaddr_s(so, &ss, mp->msg_name,
838 mp->msg_namelen, TRUE);
839 if (error == 0) {
840 to = (struct sockaddr *)&ss;
841 want_free = FALSE;
842 }
843 }
844 if (error != 0)
845 goto out;
846 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
847 }
848 if (mp->msg_control != USER_ADDR_NULL) {
849 if (mp->msg_controllen < sizeof (struct cmsghdr)) {
850 error = EINVAL;
851 goto bad;
852 }
853 error = sockargs(&control, mp->msg_control,
854 mp->msg_controllen, MT_CONTROL);
855 if (error != 0)
856 goto bad;
857 }
858
859 #if CONFIG_MACF_SOCKET_SUBSET
860 /*
861 * We check the state without holding the socket lock;
862 * if a race condition occurs, it would simply result
863 * in an extra call to the MAC check function.
864 */
865 if (!(so->so_state & SS_ISCONNECTED) &&
866 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
867 goto bad;
868 #endif /* MAC_SOCKET_SUBSET */
869
870 len = uio_resid(uiop);
871 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control,
872 flags);
873 if (error != 0) {
874 if (uio_resid(uiop) != len && (error == ERESTART ||
875 error == EINTR || error == EWOULDBLOCK))
876 error = 0;
877 /* Generation of SIGPIPE can be controlled per socket */
878 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
879 psignal(p, SIGPIPE);
880 }
881 if (error == 0)
882 *retval = (int)(len - uio_resid(uiop));
883 bad:
884 if (to != NULL && want_free)
885 FREE(to, M_SONAME);
886 out:
887 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
888 file_drop(s);
889 return (error);
890 }
891
892 /*
893 * Returns: 0 Success
894 * ENOMEM
895 * sendit:??? [see sendit definition in this file]
896 * write:??? [4056224: applicable for pipes]
897 */
898 int
899 sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
900 {
901 __pthread_testcancel(1);
902 return(sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
903 }
904
905 int
906 sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, int32_t *retval)
907 {
908 struct user_msghdr msg;
909 int error;
910 uio_t auio = NULL;
911
912 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
913 AUDIT_ARG(fd, uap->s);
914
915 auio = uio_create(1, 0,
916 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
917 UIO_WRITE);
918 if (auio == NULL) {
919 return (ENOMEM);
920 }
921 uio_addiov(auio, uap->buf, uap->len);
922
923 msg.msg_name = uap->to;
924 msg.msg_namelen = uap->tolen;
925 /* no need to set up msg_iov. sendit uses uio_t we send it */
926 msg.msg_iov = 0;
927 msg.msg_iovlen = 0;
928 msg.msg_control = 0;
929 msg.msg_flags = 0;
930
931 error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
932
933 if (auio != NULL) {
934 uio_free(auio);
935 }
936
937 #if HACK_FOR_4056224
938 /*
939 * Radar 4056224
940 * Temporary workaround to let send() and recv() work over
941 * a pipe for binary compatibility
942 * This will be removed in the release following Tiger
943 */
944 if (error == ENOTSOCK) {
945 struct fileproc *fp;
946
947 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
948 (void) fp_drop(p, uap->s, fp, 0);
949
950 if (fp->f_type == DTYPE_PIPE) {
951 struct write_args write_uap;
952 user_ssize_t write_retval;
953
954 if (p->p_pid > last_pid_4056224) {
955 last_pid_4056224 = p->p_pid;
956
957 printf("%s[%d] uses send/recv "
958 "on a pipe\n", p->p_comm, p->p_pid);
959 }
960
961 bzero(&write_uap, sizeof (struct write_args));
962 write_uap.fd = uap->s;
963 write_uap.cbuf = uap->buf;
964 write_uap.nbyte = uap->len;
965
966 error = write(p, &write_uap, &write_retval);
967 *retval = (int)write_retval;
968 }
969 }
970 }
971 #endif /* HACK_FOR_4056224 */
972
973 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
974
975 return (error);
976 }
977
978 /*
979 * Returns: 0 Success
980 * ENOBUFS
981 * copyin:EFAULT
982 * sendit:??? [see sendit definition in this file]
983 */
984 int
985 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
986 {
987 __pthread_testcancel(1);
988 return(sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval));
989 }
990
991 int
992 sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval)
993 {
994 struct user32_msghdr msg32;
995 struct user64_msghdr msg64;
996 struct user_msghdr user_msg;
997 caddr_t msghdrp;
998 int size_of_msghdr;
999 int error;
1000 uio_t auio = NULL;
1001 struct user_iovec *iovp;
1002
1003 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1004 AUDIT_ARG(fd, uap->s);
1005 if (IS_64BIT_PROCESS(p)) {
1006 msghdrp = (caddr_t)&msg64;
1007 size_of_msghdr = sizeof (msg64);
1008 } else {
1009 msghdrp = (caddr_t)&msg32;
1010 size_of_msghdr = sizeof (msg32);
1011 }
1012 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1013 if (error) {
1014 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1015 return (error);
1016 }
1017
1018 if (IS_64BIT_PROCESS(p)) {
1019 user_msg.msg_flags = msg64.msg_flags;
1020 user_msg.msg_controllen = msg64.msg_controllen;
1021 user_msg.msg_control = msg64.msg_control;
1022 user_msg.msg_iovlen = msg64.msg_iovlen;
1023 user_msg.msg_iov = msg64.msg_iov;
1024 user_msg.msg_namelen = msg64.msg_namelen;
1025 user_msg.msg_name = msg64.msg_name;
1026 } else {
1027 user_msg.msg_flags = msg32.msg_flags;
1028 user_msg.msg_controllen = msg32.msg_controllen;
1029 user_msg.msg_control = msg32.msg_control;
1030 user_msg.msg_iovlen = msg32.msg_iovlen;
1031 user_msg.msg_iov = msg32.msg_iov;
1032 user_msg.msg_namelen = msg32.msg_namelen;
1033 user_msg.msg_name = msg32.msg_name;
1034 }
1035
1036 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1037 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
1038 0, 0, 0, 0);
1039 return (EMSGSIZE);
1040 }
1041
1042 /* allocate a uio large enough to hold the number of iovecs passed */
1043 auio = uio_create(user_msg.msg_iovlen, 0,
1044 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1045 UIO_WRITE);
1046 if (auio == NULL) {
1047 error = ENOBUFS;
1048 goto done;
1049 }
1050
1051 if (user_msg.msg_iovlen) {
1052 /*
1053 * get location of iovecs within the uio.
1054 * then copyin the iovecs from user space.
1055 */
1056 iovp = uio_iovsaddr(auio);
1057 if (iovp == NULL) {
1058 error = ENOBUFS;
1059 goto done;
1060 }
1061 error = copyin_user_iovec_array(user_msg.msg_iov,
1062 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1063 user_msg.msg_iovlen, iovp);
1064 if (error)
1065 goto done;
1066 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1067
1068 /* finish setup of uio_t */
1069 uio_calculateresid(auio);
1070 } else {
1071 user_msg.msg_iov = 0;
1072 }
1073
1074 /* msg_flags is ignored for send */
1075 user_msg.msg_flags = 0;
1076
1077 error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
1078 done:
1079 if (auio != NULL) {
1080 uio_free(auio);
1081 }
1082 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1083
1084 return (error);
1085 }
1086
1087 /*
1088 * Returns: 0 Success
1089 * ENOTSOCK
1090 * EINVAL
1091 * EBADF
1092 * EACCES Mandatory Access Control failure
1093 * copyout:EFAULT
1094 * fp_lookup:EBADF
1095 * <pru_soreceive>:ENOBUFS
1096 * <pru_soreceive>:ENOTCONN
1097 * <pru_soreceive>:EWOULDBLOCK
1098 * <pru_soreceive>:EFAULT
1099 * <pru_soreceive>:EINTR
1100 * <pru_soreceive>:EBADF
1101 * <pru_soreceive>:EINVAL
1102 * <pru_soreceive>:EMSGSIZE
1103 * <pru_soreceive>:???
1104 *
1105 * Notes: Additional return values from calls through <pru_soreceive>
1106 * depend on protocols other than TCP or AF_UNIX, which are
1107 * documented above.
1108 */
1109 static int
1110 recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
1111 user_addr_t namelenp, int32_t *retval)
1112 {
1113 int len, error;
1114 struct mbuf *m, *control = 0;
1115 user_addr_t ctlbuf;
1116 struct socket *so;
1117 struct sockaddr *fromsa = 0;
1118 struct fileproc *fp;
1119
1120 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1121 proc_fdlock(p);
1122 if ((error = fp_lookup(p, s, &fp, 1))) {
1123 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1124 proc_fdunlock(p);
1125 return (error);
1126 }
1127 if (fp->f_type != DTYPE_SOCKET) {
1128 fp_drop(p, s, fp, 1);
1129 proc_fdunlock(p);
1130 return (ENOTSOCK);
1131 }
1132
1133 so = (struct socket *)fp->f_data;
1134 if (so == NULL) {
1135 fp_drop(p, s, fp, 1);
1136 proc_fdunlock(p);
1137 return (EBADF);
1138 }
1139
1140 proc_fdunlock(p);
1141
1142 #if CONFIG_MACF_SOCKET_SUBSET
1143 /*
1144 * We check the state without holding the socket lock;
1145 * if a race condition occurs, it would simply result
1146 * in an extra call to the MAC check function.
1147 */
1148 if (!(so->so_state & SS_ISCONNECTED) &&
1149 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
1150 goto out1;
1151 #endif /* MAC_SOCKET_SUBSET */
1152 if (uio_resid(uiop) < 0) {
1153 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
1154 error = EINVAL;
1155 goto out1;
1156 }
1157
1158 len = uio_resid(uiop);
1159 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
1160 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
1161 &mp->msg_flags);
1162 if (fromsa)
1163 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
1164 fromsa);
1165 if (error) {
1166 if (uio_resid(uiop) != len && (error == ERESTART ||
1167 error == EINTR || error == EWOULDBLOCK))
1168 error = 0;
1169 }
1170
1171 if (error)
1172 goto out;
1173
1174 *retval = len - uio_resid(uiop);
1175 if (mp->msg_name) {
1176 socklen_t sa_len = 0;
1177
1178 len = mp->msg_namelen;
1179 if (len <= 0 || fromsa == 0) {
1180 len = 0;
1181 } else {
1182 #ifndef MIN
1183 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1184 #endif
1185 sa_len = fromsa->sa_len;
1186 len = MIN((unsigned int)len, sa_len);
1187 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1188 if (error)
1189 goto out;
1190 }
1191 mp->msg_namelen = sa_len;
1192 /* return the actual, untruncated address length */
1193 if (namelenp &&
1194 (error = copyout((caddr_t)&sa_len, namelenp,
1195 sizeof (int)))) {
1196 goto out;
1197 }
1198 }
1199 if (mp->msg_control) {
1200 len = mp->msg_controllen;
1201 m = control;
1202 mp->msg_controllen = 0;
1203 ctlbuf = mp->msg_control;
1204
1205 while (m && len > 0) {
1206 unsigned int tocopy;
1207 struct cmsghdr *cp = mtod(m, struct cmsghdr *);
1208
1209 /*
1210 * SCM_TIMESTAMP hack because struct timeval has a
1211 * different size for 32 bits and 64 bits processes
1212 */
1213 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
1214 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
1215 struct cmsghdr *tmp_cp = (struct cmsghdr *)tmp_buffer;
1216 int tmp_space;
1217 struct timeval *tv = (struct timeval *)CMSG_DATA(cp);
1218
1219 tmp_cp->cmsg_level = SOL_SOCKET;
1220 tmp_cp->cmsg_type = SCM_TIMESTAMP;
1221
1222 if (proc_is64bit(p)) {
1223 struct user64_timeval *tv64 = (struct user64_timeval *)CMSG_DATA(tmp_cp);
1224
1225 tv64->tv_sec = tv->tv_sec;
1226 tv64->tv_usec = tv->tv_usec;
1227
1228 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
1229 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
1230 } else {
1231 struct user32_timeval *tv32 = (struct user32_timeval *)CMSG_DATA(tmp_cp);
1232
1233 tv32->tv_sec = tv->tv_sec;
1234 tv32->tv_usec = tv->tv_usec;
1235
1236 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
1237 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
1238 }
1239 if (len >= tmp_space) {
1240 tocopy = tmp_space;
1241 } else {
1242 mp->msg_flags |= MSG_CTRUNC;
1243 tocopy = len;
1244 }
1245 error = copyout(tmp_buffer, ctlbuf, tocopy);
1246 if (error)
1247 goto out;
1248
1249 } else {
1250 if (len >= m->m_len) {
1251 tocopy = m->m_len;
1252 } else {
1253 mp->msg_flags |= MSG_CTRUNC;
1254 tocopy = len;
1255 }
1256
1257 error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf,
1258 tocopy);
1259 if (error)
1260 goto out;
1261 }
1262
1263 ctlbuf += tocopy;
1264 len -= tocopy;
1265 m = m->m_next;
1266 }
1267 mp->msg_controllen = ctlbuf - mp->msg_control;
1268 }
1269 out:
1270 if (fromsa)
1271 FREE(fromsa, M_SONAME);
1272 if (control)
1273 m_freem(control);
1274 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
1275 out1:
1276 fp_drop(p, s, fp, 0);
1277 return (error);
1278 }
1279
1280
1281 /*
1282 * Returns: 0 Success
1283 * ENOMEM
1284 * copyin:EFAULT
1285 * recvit:???
1286 * read:??? [4056224: applicable for pipes]
1287 *
1288 * Notes: The read entry point is only called as part of support for
1289 * binary backward compatability; new code should use read
1290 * instead of recv or recvfrom when attempting to read data
1291 * from pipes.
1292 *
1293 * For full documentation of the return codes from recvit, see
1294 * the block header for the recvit function.
1295 */
1296 int
1297 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
1298 {
1299 __pthread_testcancel(1);
1300 return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval));
1301 }
1302
1303 int
1304 recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval)
1305 {
1306 struct user_msghdr msg;
1307 int error;
1308 uio_t auio = NULL;
1309
1310 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
1311 AUDIT_ARG(fd, uap->s);
1312
1313 if (uap->fromlenaddr) {
1314 error = copyin(uap->fromlenaddr,
1315 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1316 if (error)
1317 return (error);
1318 } else {
1319 msg.msg_namelen = 0;
1320 }
1321 msg.msg_name = uap->from;
1322 auio = uio_create(1, 0,
1323 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1324 UIO_READ);
1325 if (auio == NULL) {
1326 return (ENOMEM);
1327 }
1328
1329 uio_addiov(auio, uap->buf, uap->len);
1330 /* no need to set up msg_iov. recvit uses uio_t we send it */
1331 msg.msg_iov = 0;
1332 msg.msg_iovlen = 0;
1333 msg.msg_control = 0;
1334 msg.msg_controllen = 0;
1335 msg.msg_flags = uap->flags;
1336 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1337 if (auio != NULL) {
1338 uio_free(auio);
1339 }
1340
1341 #if HACK_FOR_4056224
1342 /*
1343 * Radar 4056224
1344 * Temporary workaround to let send() and recv() work over
1345 * a pipe for binary compatibility
1346 * This will be removed in the release following Tiger
1347 */
1348 if (error == ENOTSOCK && proc_is64bit(p) == 0) {
1349 struct fileproc *fp;
1350
1351 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
1352 (void) fp_drop(p, uap->s, fp, 0);
1353
1354 if (fp->f_type == DTYPE_PIPE) {
1355 struct read_args read_uap;
1356 user_ssize_t read_retval;
1357
1358 if (p->p_pid > last_pid_4056224) {
1359 last_pid_4056224 = p->p_pid;
1360
1361 printf("%s[%d] uses send/recv on "
1362 "a pipe\n", p->p_comm, p->p_pid);
1363 }
1364
1365 bzero(&read_uap, sizeof (struct read_args));
1366 read_uap.fd = uap->s;
1367 read_uap.cbuf = uap->buf;
1368 read_uap.nbyte = uap->len;
1369
1370 error = read(p, &read_uap, &read_retval);
1371 *retval = (int)read_retval;
1372 }
1373 }
1374 }
1375 #endif /* HACK_FOR_4056224 */
1376
1377 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
1378
1379 return (error);
1380 }
1381
1382 /*
1383 * Returns: 0 Success
1384 * EMSGSIZE
1385 * ENOMEM
1386 * copyin:EFAULT
1387 * copyout:EFAULT
1388 * recvit:???
1389 *
1390 * Notes: For full documentation of the return codes from recvit, see
1391 * the block header for the recvit function.
1392 */
1393 int
1394 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
1395 {
1396 __pthread_testcancel(1);
1397 return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval));
1398 }
1399
1400 int
1401 recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval)
1402 {
1403 struct user32_msghdr msg32;
1404 struct user64_msghdr msg64;
1405 struct user_msghdr user_msg;
1406 caddr_t msghdrp;
1407 int size_of_msghdr;
1408 user_addr_t uiov;
1409 int error;
1410 uio_t auio = NULL;
1411 struct user_iovec *iovp;
1412
1413 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
1414 AUDIT_ARG(fd, uap->s);
1415 if (IS_64BIT_PROCESS(p)) {
1416 msghdrp = (caddr_t)&msg64;
1417 size_of_msghdr = sizeof (msg64);
1418 } else {
1419 msghdrp = (caddr_t)&msg32;
1420 size_of_msghdr = sizeof (msg32);
1421 }
1422 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1423 if (error) {
1424 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1425 return (error);
1426 }
1427
1428 /* only need to copy if user process is not 64-bit */
1429 if (IS_64BIT_PROCESS(p)) {
1430 user_msg.msg_flags = msg64.msg_flags;
1431 user_msg.msg_controllen = msg64.msg_controllen;
1432 user_msg.msg_control = msg64.msg_control;
1433 user_msg.msg_iovlen = msg64.msg_iovlen;
1434 user_msg.msg_iov = msg64.msg_iov;
1435 user_msg.msg_namelen = msg64.msg_namelen;
1436 user_msg.msg_name = msg64.msg_name;
1437 } else {
1438 user_msg.msg_flags = msg32.msg_flags;
1439 user_msg.msg_controllen = msg32.msg_controllen;
1440 user_msg.msg_control = msg32.msg_control;
1441 user_msg.msg_iovlen = msg32.msg_iovlen;
1442 user_msg.msg_iov = msg32.msg_iov;
1443 user_msg.msg_namelen = msg32.msg_namelen;
1444 user_msg.msg_name = msg32.msg_name;
1445 }
1446
1447 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1448 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
1449 0, 0, 0, 0);
1450 return (EMSGSIZE);
1451 }
1452
1453 user_msg.msg_flags = uap->flags;
1454
1455 /* allocate a uio large enough to hold the number of iovecs passed */
1456 auio = uio_create(user_msg.msg_iovlen, 0,
1457 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1458 UIO_READ);
1459 if (auio == NULL) {
1460 error = ENOMEM;
1461 goto done;
1462 }
1463
1464 /*
1465 * get location of iovecs within the uio. then copyin the iovecs from
1466 * user space.
1467 */
1468 iovp = uio_iovsaddr(auio);
1469 if (iovp == NULL) {
1470 error = ENOMEM;
1471 goto done;
1472 }
1473 uiov = user_msg.msg_iov;
1474 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1475 error = copyin_user_iovec_array(uiov,
1476 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
1477 user_msg.msg_iovlen, iovp);
1478 if (error)
1479 goto done;
1480
1481 /* finish setup of uio_t */
1482 uio_calculateresid(auio);
1483
1484 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1485 if (!error) {
1486 user_msg.msg_iov = uiov;
1487 if (IS_64BIT_PROCESS(p)) {
1488 msg64.msg_flags = user_msg.msg_flags;
1489 msg64.msg_controllen = user_msg.msg_controllen;
1490 msg64.msg_control = user_msg.msg_control;
1491 msg64.msg_iovlen = user_msg.msg_iovlen;
1492 msg64.msg_iov = user_msg.msg_iov;
1493 msg64.msg_namelen = user_msg.msg_namelen;
1494 msg64.msg_name = user_msg.msg_name;
1495 } else {
1496 msg32.msg_flags = user_msg.msg_flags;
1497 msg32.msg_controllen = user_msg.msg_controllen;
1498 msg32.msg_control = user_msg.msg_control;
1499 msg32.msg_iovlen = user_msg.msg_iovlen;
1500 msg32.msg_iov = user_msg.msg_iov;
1501 msg32.msg_namelen = user_msg.msg_namelen;
1502 msg32.msg_name = user_msg.msg_name;
1503 }
1504 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1505 }
1506 done:
1507 if (auio != NULL) {
1508 uio_free(auio);
1509 }
1510 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
1511 return (error);
1512 }
1513
1514 /*
1515 * Returns: 0 Success
1516 * EBADF
1517 * file_socket:ENOTSOCK
1518 * file_socket:EBADF
1519 * soshutdown:EINVAL
1520 * soshutdown:ENOTCONN
1521 * soshutdown:EADDRNOTAVAIL[TCP]
1522 * soshutdown:ENOBUFS[TCP]
1523 * soshutdown:EMSGSIZE[TCP]
1524 * soshutdown:EHOSTUNREACH[TCP]
1525 * soshutdown:ENETUNREACH[TCP]
1526 * soshutdown:ENETDOWN[TCP]
1527 * soshutdown:ENOMEM[TCP]
1528 * soshutdown:EACCES[TCP]
1529 * soshutdown:EMSGSIZE[TCP]
1530 * soshutdown:ENOBUFS[TCP]
1531 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1532 * soshutdown:??? [other protocol families]
1533 */
1534 /* ARGSUSED */
1535 int
1536 shutdown(__unused struct proc *p, struct shutdown_args *uap,
1537 __unused int32_t *retval)
1538 {
1539 struct socket *so;
1540 int error;
1541
1542 AUDIT_ARG(fd, uap->s);
1543 error = file_socket(uap->s, &so);
1544 if (error)
1545 return (error);
1546 if (so == NULL) {
1547 error = EBADF;
1548 goto out;
1549 }
1550 error = soshutdown((struct socket *)so, uap->how);
1551 out:
1552 file_drop(uap->s);
1553 return (error);
1554 }
1555
1556 /*
1557 * Returns: 0 Success
1558 * EFAULT
1559 * EINVAL
1560 * EACCES Mandatory Access Control failure
1561 * file_socket:ENOTSOCK
1562 * file_socket:EBADF
1563 * sosetopt:EINVAL
1564 * sosetopt:ENOPROTOOPT
1565 * sosetopt:ENOBUFS
1566 * sosetopt:EDOM
1567 * sosetopt:EFAULT
1568 * sosetopt:EOPNOTSUPP[AF_UNIX]
1569 * sosetopt:???
1570 */
1571 /* ARGSUSED */
1572 int
1573 setsockopt(struct proc *p, struct setsockopt_args *uap,
1574 __unused int32_t *retval)
1575 {
1576 struct socket *so;
1577 struct sockopt sopt;
1578 int error;
1579
1580 AUDIT_ARG(fd, uap->s);
1581 if (uap->val == 0 && uap->valsize != 0)
1582 return (EFAULT);
1583 /* No bounds checking on size (it's unsigned) */
1584
1585 error = file_socket(uap->s, &so);
1586 if (error)
1587 return (error);
1588
1589 sopt.sopt_dir = SOPT_SET;
1590 sopt.sopt_level = uap->level;
1591 sopt.sopt_name = uap->name;
1592 sopt.sopt_val = uap->val;
1593 sopt.sopt_valsize = uap->valsize;
1594 sopt.sopt_p = p;
1595
1596 if (so == NULL) {
1597 error = EINVAL;
1598 goto out;
1599 }
1600 #if CONFIG_MACF_SOCKET_SUBSET
1601 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
1602 &sopt)) != 0)
1603 goto out;
1604 #endif /* MAC_SOCKET_SUBSET */
1605 error = sosetopt(so, &sopt);
1606 out:
1607 file_drop(uap->s);
1608 return (error);
1609 }
1610
1611
1612
1613 /*
1614 * Returns: 0 Success
1615 * EINVAL
1616 * EBADF
1617 * EACCES Mandatory Access Control failure
1618 * copyin:EFAULT
1619 * copyout:EFAULT
1620 * file_socket:ENOTSOCK
1621 * file_socket:EBADF
1622 * sogetopt:???
1623 */
1624 int
1625 getsockopt(struct proc *p, struct getsockopt_args *uap,
1626 __unused int32_t *retval)
1627 {
1628 int error;
1629 socklen_t valsize;
1630 struct sockopt sopt;
1631 struct socket *so;
1632
1633 error = file_socket(uap->s, &so);
1634 if (error)
1635 return (error);
1636 if (uap->val) {
1637 error = copyin(uap->avalsize, (caddr_t)&valsize,
1638 sizeof (valsize));
1639 if (error)
1640 goto out;
1641 /* No bounds checking on size (it's unsigned) */
1642 } else {
1643 valsize = 0;
1644 }
1645 sopt.sopt_dir = SOPT_GET;
1646 sopt.sopt_level = uap->level;
1647 sopt.sopt_name = uap->name;
1648 sopt.sopt_val = uap->val;
1649 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1650 sopt.sopt_p = p;
1651
1652 if (so == NULL) {
1653 error = EBADF;
1654 goto out;
1655 }
1656 #if CONFIG_MACF_SOCKET_SUBSET
1657 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
1658 &sopt)) != 0)
1659 goto out;
1660 #endif /* MAC_SOCKET_SUBSET */
1661 error = sogetopt((struct socket *)so, &sopt);
1662 if (error == 0) {
1663 valsize = sopt.sopt_valsize;
1664 error = copyout((caddr_t)&valsize, uap->avalsize,
1665 sizeof (valsize));
1666 }
1667 out:
1668 file_drop(uap->s);
1669 return (error);
1670 }
1671
1672
1673 /*
1674 * Get socket name.
1675 *
1676 * Returns: 0 Success
1677 * EBADF
1678 * file_socket:ENOTSOCK
1679 * file_socket:EBADF
1680 * copyin:EFAULT
1681 * copyout:EFAULT
1682 * <pru_sockaddr>:ENOBUFS[TCP]
1683 * <pru_sockaddr>:ECONNRESET[TCP]
1684 * <pru_sockaddr>:EINVAL[AF_UNIX]
1685 * <sf_getsockname>:???
1686 */
1687 /* ARGSUSED */
1688 int
1689 getsockname(__unused struct proc *p, struct getsockname_args *uap,
1690 __unused int32_t *retval)
1691 {
1692 struct socket *so;
1693 struct sockaddr *sa;
1694 socklen_t len;
1695 socklen_t sa_len;
1696 int error;
1697
1698 error = file_socket(uap->fdes, &so);
1699 if (error)
1700 return (error);
1701 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1702 if (error)
1703 goto out;
1704 if (so == NULL) {
1705 error = EBADF;
1706 goto out;
1707 }
1708 sa = 0;
1709 socket_lock(so, 1);
1710 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1711 if (error == 0) {
1712 struct socket_filter_entry *filter;
1713 int filtered = 0;
1714 for (filter = so->so_filt; filter && error == 0;
1715 filter = filter->sfe_next_onsocket) {
1716 if (filter->sfe_filter->sf_filter.sf_getsockname) {
1717 if (!filtered) {
1718 filtered = 1;
1719 sflt_use(so);
1720 socket_unlock(so, 0);
1721 }
1722 error = filter->sfe_filter->sf_filter.
1723 sf_getsockname(filter->sfe_cookie, so, &sa);
1724 }
1725 }
1726
1727 if (error == EJUSTRETURN)
1728 error = 0;
1729
1730 if (filtered) {
1731 socket_lock(so, 0);
1732 sflt_unuse(so);
1733 }
1734 }
1735 socket_unlock(so, 1);
1736 if (error)
1737 goto bad;
1738 if (sa == 0) {
1739 len = 0;
1740 goto gotnothing;
1741 }
1742
1743 sa_len = sa->sa_len;
1744 len = MIN(len, sa_len);
1745 error = copyout((caddr_t)sa, uap->asa, len);
1746 if (error)
1747 goto bad;
1748 /* return the actual, untruncated address length */
1749 len = sa_len;
1750 gotnothing:
1751 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1752 bad:
1753 if (sa)
1754 FREE(sa, M_SONAME);
1755 out:
1756 file_drop(uap->fdes);
1757 return (error);
1758 }
1759
1760 /*
1761 * Get name of peer for connected socket.
1762 *
1763 * Returns: 0 Success
1764 * EBADF
1765 * EINVAL
1766 * ENOTCONN
1767 * file_socket:ENOTSOCK
1768 * file_socket:EBADF
1769 * copyin:EFAULT
1770 * copyout:EFAULT
1771 * <pru_peeraddr>:???
1772 * <sf_getpeername>:???
1773 */
1774 /* ARGSUSED */
1775 int
1776 getpeername(__unused struct proc *p, struct getpeername_args *uap,
1777 __unused int32_t *retval)
1778 {
1779 struct socket *so;
1780 struct sockaddr *sa;
1781 socklen_t len;
1782 socklen_t sa_len;
1783 int error;
1784
1785 error = file_socket(uap->fdes, &so);
1786 if (error)
1787 return (error);
1788 if (so == NULL) {
1789 error = EBADF;
1790 goto out;
1791 }
1792
1793 socket_lock(so, 1);
1794
1795 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1796 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1797 /* the socket has been shutdown, no more getpeername's */
1798 socket_unlock(so, 1);
1799 error = EINVAL;
1800 goto out;
1801 }
1802
1803 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1804 socket_unlock(so, 1);
1805 error = ENOTCONN;
1806 goto out;
1807 }
1808 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
1809 if (error) {
1810 socket_unlock(so, 1);
1811 goto out;
1812 }
1813 sa = 0;
1814 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1815 if (error == 0) {
1816 struct socket_filter_entry *filter;
1817 int filtered = 0;
1818 for (filter = so->so_filt; filter && error == 0;
1819 filter = filter->sfe_next_onsocket) {
1820 if (filter->sfe_filter->sf_filter.sf_getpeername) {
1821 if (!filtered) {
1822 filtered = 1;
1823 sflt_use(so);
1824 socket_unlock(so, 0);
1825 }
1826 error = filter->sfe_filter->sf_filter.
1827 sf_getpeername(filter->sfe_cookie, so, &sa);
1828 }
1829 }
1830
1831 if (error == EJUSTRETURN)
1832 error = 0;
1833
1834 if (filtered) {
1835 socket_lock(so, 0);
1836 sflt_unuse(so);
1837 }
1838 }
1839 socket_unlock(so, 1);
1840 if (error)
1841 goto bad;
1842 if (sa == 0) {
1843 len = 0;
1844 goto gotnothing;
1845 }
1846 sa_len = sa->sa_len;
1847 len = MIN(len, sa_len);
1848 error = copyout(sa, uap->asa, len);
1849 if (error)
1850 goto bad;
1851 /* return the actual, untruncated address length */
1852 len = sa_len;
1853 gotnothing:
1854 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
1855 bad:
1856 if (sa) FREE(sa, M_SONAME);
1857 out:
1858 file_drop(uap->fdes);
1859 return (error);
1860 }
1861
1862 int
1863 sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
1864 {
1865 struct sockaddr *sa;
1866 struct mbuf *m;
1867 int error;
1868
1869 int alloc_buflen = buflen;
1870 #ifdef __LP64__
1871 /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */
1872 if(type == MT_CONTROL)
1873 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr);
1874 #endif
1875 if ((u_int)alloc_buflen > MLEN) {
1876 if (type == MT_SONAME && (u_int)alloc_buflen <= 112)
1877 alloc_buflen = MLEN; /* unix domain compat. hack */
1878 else if ((u_int)alloc_buflen > MCLBYTES)
1879 return (EINVAL);
1880 }
1881 m = m_get(M_WAIT, type);
1882 if (m == NULL)
1883 return (ENOBUFS);
1884 if ((u_int)alloc_buflen > MLEN) {
1885 MCLGET(m, M_WAIT);
1886 if ((m->m_flags & M_EXT) == 0) {
1887 m_free(m);
1888 return (ENOBUFS);
1889 }
1890 }
1891 /* K64: We still copyin the original buflen because it gets expanded later
1892 * and we lie about the size of the mbuf because it only affects unp_* functions
1893 */
1894 m->m_len = buflen;
1895 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
1896 if (error) {
1897 (void) m_free(m);
1898 } else {
1899 *mp = m;
1900 if (type == MT_SONAME) {
1901 sa = mtod(m, struct sockaddr *);
1902 sa->sa_len = buflen;
1903 }
1904 }
1905 return (error);
1906 }
1907
1908 /*
1909 * Given a user_addr_t of length len, allocate and fill out a *sa.
1910 *
1911 * Returns: 0 Success
1912 * ENAMETOOLONG Filename too long
1913 * EINVAL Invalid argument
1914 * ENOMEM Not enough space
1915 * copyin:EFAULT Bad address
1916 */
1917 static int
1918 getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
1919 size_t len, boolean_t translate_unspec)
1920 {
1921 struct sockaddr *sa;
1922 int error;
1923
1924 if (len > SOCK_MAXADDRLEN)
1925 return (ENAMETOOLONG);
1926
1927 if (len < offsetof(struct sockaddr, sa_data[0]))
1928 return (EINVAL);
1929
1930 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
1931 if (sa == NULL) {
1932 return (ENOMEM);
1933 }
1934 error = copyin(uaddr, (caddr_t)sa, len);
1935 if (error) {
1936 FREE(sa, M_SONAME);
1937 } else {
1938 /*
1939 * Force sa_family to AF_INET on AF_INET sockets to handle
1940 * legacy applications that use AF_UNSPEC (0). On all other
1941 * sockets we leave it unchanged and let the lower layer
1942 * handle it.
1943 */
1944 if (translate_unspec && sa->sa_family == AF_UNSPEC &&
1945 INP_CHECK_SOCKAF(so, AF_INET) &&
1946 len == sizeof (struct sockaddr_in))
1947 sa->sa_family = AF_INET;
1948
1949 sa->sa_len = len;
1950 *namp = sa;
1951 }
1952 return (error);
1953 }
1954
1955 static int
1956 getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
1957 user_addr_t uaddr, size_t len, boolean_t translate_unspec)
1958 {
1959 int error;
1960
1961 if (ss == NULL || uaddr == USER_ADDR_NULL ||
1962 len < offsetof(struct sockaddr, sa_data[0]))
1963 return (EINVAL);
1964
1965 /*
1966 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
1967 * so the check here is inclusive.
1968 */
1969 if (len > sizeof (*ss))
1970 return (ENAMETOOLONG);
1971
1972 bzero(ss, sizeof (*ss));
1973 error = copyin(uaddr, (caddr_t)ss, len);
1974 if (error == 0) {
1975 /*
1976 * Force sa_family to AF_INET on AF_INET sockets to handle
1977 * legacy applications that use AF_UNSPEC (0). On all other
1978 * sockets we leave it unchanged and let the lower layer
1979 * handle it.
1980 */
1981 if (translate_unspec && ss->ss_family == AF_UNSPEC &&
1982 INP_CHECK_SOCKAF(so, AF_INET) &&
1983 len == sizeof (struct sockaddr_in))
1984 ss->ss_family = AF_INET;
1985
1986 ss->ss_len = len;
1987 }
1988 return (error);
1989 }
1990
1991 #if SENDFILE
1992
1993 SYSCTL_DECL(_kern_ipc);
1994
1995 #define SFUIOBUFS 64
1996 static int sendfileuiobufs = SFUIOBUFS;
1997 SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW, &sendfileuiobufs,
1998 0, "");
1999
2000 /* Macros to compute the number of mbufs needed depending on cluster size */
2001 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
2002 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
2003
2004 /* Upper send limit in bytes (sendfileuiobufs * PAGESIZE) */
2005 #define SENDFILE_MAX_BYTES (sendfileuiobufs << PGSHIFT)
2006
2007 /* Upper send limit in the number of mbuf clusters */
2008 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
2009 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
2010
2011 size_t mbuf_pkt_maxlen(mbuf_t m);
2012
2013 __private_extern__ size_t
2014 mbuf_pkt_maxlen(mbuf_t m)
2015 {
2016 size_t maxlen = 0;
2017
2018 while (m) {
2019 maxlen += mbuf_maxlen(m);
2020 m = mbuf_next(m);
2021 }
2022 return (maxlen);
2023 }
2024
2025 static void
2026 alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
2027 struct mbuf **m, boolean_t jumbocl)
2028 {
2029 unsigned int needed;
2030
2031 if (pktlen == 0)
2032 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
2033
2034 /*
2035 * Try to allocate for the whole thing. Since we want full control
2036 * over the buffer size and be able to accept partial result, we can't
2037 * use mbuf_allocpacket(). The logic below is similar to sosend().
2038 */
2039 *m = NULL;
2040 if (pktlen > NBPG && jumbocl) {
2041 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
2042 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
2043 }
2044 if (*m == NULL) {
2045 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
2046 *m = m_getpackets_internal(&needed, 1, how, 0, NBPG);
2047 }
2048
2049 /*
2050 * Our previous attempt(s) at allocation had failed; the system
2051 * may be short on mbufs, and we want to block until they are
2052 * available. This time, ask just for 1 mbuf and don't return
2053 * until we get it.
2054 */
2055 if (*m == NULL) {
2056 needed = 1;
2057 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, NBPG);
2058 }
2059 if (*m == NULL)
2060 panic("%s: blocking allocation returned NULL\n", __func__);
2061
2062 *maxchunks = needed;
2063 }
2064
2065 /*
2066 * sendfile(2).
2067 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
2068 * struct sf_hdtr *hdtr, int flags)
2069 *
2070 * Send a file specified by 'fd' and starting at 'offset' to a socket
2071 * specified by 's'. Send only '*nbytes' of the file or until EOF if
2072 * *nbytes == 0. Optionally add a header and/or trailer to the socket
2073 * output. If specified, write the total number of bytes sent into *nbytes.
2074 */
2075 int
2076 sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
2077 {
2078 struct fileproc *fp;
2079 struct vnode *vp;
2080 struct socket *so;
2081 struct writev_nocancel_args nuap;
2082 user_ssize_t writev_retval;
2083 struct user_sf_hdtr user_hdtr;
2084 struct user32_sf_hdtr user32_hdtr;
2085 struct user64_sf_hdtr user64_hdtr;
2086 off_t off, xfsize;
2087 off_t nbytes = 0, sbytes = 0;
2088 int error = 0;
2089 size_t sizeof_hdtr;
2090 off_t file_size;
2091 struct vfs_context context = *vfs_context_current();
2092
2093 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
2094 0, 0, 0, 0);
2095
2096 AUDIT_ARG(fd, uap->fd);
2097 AUDIT_ARG(value32, uap->s);
2098
2099 /*
2100 * Do argument checking. Must be a regular file in, stream
2101 * type and connected socket out, positive offset.
2102 */
2103 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
2104 goto done;
2105 }
2106 if ((fp->f_flag & FREAD) == 0) {
2107 error = EBADF;
2108 goto done1;
2109 }
2110 if (vnode_isreg(vp) == 0) {
2111 error = ENOTSUP;
2112 goto done1;
2113 }
2114 error = file_socket(uap->s, &so);
2115 if (error) {
2116 goto done1;
2117 }
2118 if (so == NULL) {
2119 error = EBADF;
2120 goto done2;
2121 }
2122 if (so->so_type != SOCK_STREAM) {
2123 error = EINVAL;
2124 goto done2;
2125 }
2126 if ((so->so_state & SS_ISCONNECTED) == 0) {
2127 error = ENOTCONN;
2128 goto done2;
2129 }
2130 if (uap->offset < 0) {
2131 error = EINVAL;
2132 goto done2;
2133 }
2134 if (uap->nbytes == USER_ADDR_NULL) {
2135 error = EINVAL;
2136 goto done2;
2137 }
2138 if (uap->flags != 0) {
2139 error = EINVAL;
2140 goto done2;
2141 }
2142
2143 context.vc_ucred = fp->f_fglob->fg_cred;
2144
2145 #if CONFIG_MACF_SOCKET_SUBSET
2146 /* JMM - fetch connected sockaddr? */
2147 error = mac_socket_check_send(context.vc_ucred, so, NULL);
2148 if (error)
2149 goto done2;
2150 #endif
2151
2152 /*
2153 * Get number of bytes to send
2154 * Should it applies to size of header and trailer?
2155 * JMM - error handling?
2156 */
2157 copyin(uap->nbytes, &nbytes, sizeof (off_t));
2158
2159 /*
2160 * If specified, get the pointer to the sf_hdtr struct for
2161 * any headers/trailers.
2162 */
2163 if (uap->hdtr != USER_ADDR_NULL) {
2164 caddr_t hdtrp;
2165
2166 bzero(&user_hdtr, sizeof (user_hdtr));
2167 if (IS_64BIT_PROCESS(p)) {
2168 hdtrp = (caddr_t)&user64_hdtr;
2169 sizeof_hdtr = sizeof (user64_hdtr);
2170 } else {
2171 hdtrp = (caddr_t)&user32_hdtr;
2172 sizeof_hdtr = sizeof (user32_hdtr);
2173 }
2174 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
2175 if (error)
2176 goto done2;
2177 if (IS_64BIT_PROCESS(p)) {
2178 user_hdtr.headers = user64_hdtr.headers;
2179 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
2180 user_hdtr.trailers = user64_hdtr.trailers;
2181 user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
2182 } else {
2183 user_hdtr.headers = user32_hdtr.headers;
2184 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
2185 user_hdtr.trailers = user32_hdtr.trailers;
2186 user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
2187 }
2188
2189 /*
2190 * Send any headers. Wimp out and use writev(2).
2191 */
2192 if (user_hdtr.headers != USER_ADDR_NULL) {
2193 bzero(&nuap, sizeof (struct writev_args));
2194 nuap.fd = uap->s;
2195 nuap.iovp = user_hdtr.headers;
2196 nuap.iovcnt = user_hdtr.hdr_cnt;
2197 error = writev_nocancel(p, &nuap, &writev_retval);
2198 if (error)
2199 goto done2;
2200 sbytes += writev_retval;
2201 }
2202 }
2203
2204 /*
2205 * Get the file size for 2 reasons:
2206 * 1. We don't want to allocate more mbufs than necessary
2207 * 2. We don't want to read past the end of file
2208 */
2209 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0)
2210 goto done2;
2211
2212 /*
2213 * Simply read file data into a chain of mbufs that used with scatter
2214 * gather reads. We're not (yet?) setup to use zero copy external
2215 * mbufs that point to the file pages.
2216 */
2217 socket_lock(so, 1);
2218 error = sblock(&so->so_snd, M_WAIT);
2219 if (error) {
2220 socket_unlock(so, 1);
2221 goto done2;
2222 }
2223 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
2224 mbuf_t m0 = NULL, m;
2225 unsigned int nbufs = sendfileuiobufs, i;
2226 uio_t auio;
2227 char uio_buf[UIO_SIZEOF(sendfileuiobufs)]; /* 1 KB !!! */
2228 size_t uiolen;
2229 user_ssize_t rlen;
2230 off_t pgoff;
2231 size_t pktlen;
2232 boolean_t jumbocl;
2233
2234 /*
2235 * Calculate the amount to transfer.
2236 * Align to round number of pages.
2237 * Not to exceed send socket buffer,
2238 * the EOF, or the passed in nbytes.
2239 */
2240 xfsize = sbspace(&so->so_snd);
2241
2242 if (xfsize <= 0) {
2243 if (so->so_state & SS_CANTSENDMORE) {
2244 error = EPIPE;
2245 goto done3;
2246 } else if ((so->so_state & SS_NBIO)) {
2247 error = EAGAIN;
2248 goto done3;
2249 } else {
2250 xfsize = PAGE_SIZE;
2251 }
2252 }
2253
2254 if (xfsize > SENDFILE_MAX_BYTES)
2255 xfsize = SENDFILE_MAX_BYTES;
2256 else if (xfsize > PAGE_SIZE)
2257 xfsize = trunc_page(xfsize);
2258 pgoff = off & PAGE_MASK_64;
2259 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
2260 xfsize = PAGE_SIZE_64 - pgoff;
2261 if (nbytes && xfsize > (nbytes - sbytes))
2262 xfsize = nbytes - sbytes;
2263 if (xfsize <= 0)
2264 break;
2265 if (off + xfsize > file_size)
2266 xfsize = file_size - off;
2267 if (xfsize <= 0)
2268 break;
2269
2270 /*
2271 * Attempt to use larger than system page-size clusters for
2272 * large writes only if there is a jumbo cluster pool and
2273 * if the socket is marked accordingly.
2274 */
2275 jumbocl = sosendjcl && njcl > 0 &&
2276 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
2277
2278 socket_unlock(so, 0);
2279 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
2280 pktlen = mbuf_pkt_maxlen(m0);
2281 if (pktlen < (size_t)xfsize)
2282 xfsize = pktlen;
2283
2284 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
2285 UIO_READ, &uio_buf[0], sizeof (uio_buf));
2286 if (auio == NULL) {
2287 //printf("sendfile: uio_createwithbuffer failed\n");
2288 mbuf_freem(m0);
2289 error = ENXIO;
2290 socket_lock(so, 0);
2291 goto done3;
2292 }
2293
2294 for (i = 0, m = m0, uiolen = 0;
2295 i < nbufs && m != NULL && uiolen < (size_t)xfsize;
2296 i++, m = mbuf_next(m)) {
2297 size_t mlen = mbuf_maxlen(m);
2298
2299 if (mlen + uiolen > (size_t)xfsize)
2300 mlen = xfsize - uiolen;
2301 mbuf_setlen(m, mlen);
2302 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
2303 mlen);
2304 uiolen += mlen;
2305 }
2306
2307 if (xfsize != uio_resid(auio))
2308 printf("sendfile: xfsize: %lld != uio_resid(auio): "
2309 "%lld\n", xfsize, uio_resid(auio));
2310
2311 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
2312 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2313 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2314 error = fo_read(fp, auio, FOF_OFFSET, &context);
2315 socket_lock(so, 0);
2316 if (error != 0) {
2317 if (uio_resid(auio) != xfsize && (error == ERESTART ||
2318 error == EINTR || error == EWOULDBLOCK)) {
2319 error = 0;
2320 } else {
2321 mbuf_freem(m0);
2322 goto done3;
2323 }
2324 }
2325 xfsize -= uio_resid(auio);
2326 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
2327 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
2328 (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
2329
2330 if (xfsize == 0) {
2331 //printf("sendfile: fo_read 0 bytes, EOF\n");
2332 break;
2333 }
2334 if (xfsize + off > file_size)
2335 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
2336 "%lld\n", xfsize, off, file_size);
2337 for (i = 0, m = m0, rlen = 0;
2338 i < nbufs && m != NULL && rlen < xfsize;
2339 i++, m = mbuf_next(m)) {
2340 size_t mlen = mbuf_maxlen(m);
2341
2342 if (rlen + mlen > (size_t)xfsize)
2343 mlen = xfsize - rlen;
2344 mbuf_setlen(m, mlen);
2345
2346 rlen += mlen;
2347 }
2348 mbuf_pkthdr_setlen(m0, xfsize);
2349
2350 retry_space:
2351 /*
2352 * Make sure that the socket is still able to take more data.
2353 * CANTSENDMORE being true usually means that the connection
2354 * was closed. so_error is true when an error was sensed after
2355 * a previous send.
2356 * The state is checked after the page mapping and buffer
2357 * allocation above since those operations may block and make
2358 * any socket checks stale. From this point forward, nothing
2359 * blocks before the pru_send (or more accurately, any blocking
2360 * results in a loop back to here to re-check).
2361 */
2362 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2363 if (so->so_state & SS_CANTSENDMORE) {
2364 error = EPIPE;
2365 } else {
2366 error = so->so_error;
2367 so->so_error = 0;
2368 }
2369 m_freem(m0);
2370 goto done3;
2371 }
2372 /*
2373 * Wait for socket space to become available. We do this just
2374 * after checking the connection state above in order to avoid
2375 * a race condition with sbwait().
2376 */
2377 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
2378 if (so->so_state & SS_NBIO) {
2379 m_freem(m0);
2380 error = EAGAIN;
2381 goto done3;
2382 }
2383 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
2384 DBG_FUNC_START), uap->s, 0, 0, 0, 0);
2385 error = sbwait(&so->so_snd);
2386 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
2387 DBG_FUNC_END), uap->s, 0, 0, 0, 0);
2388 /*
2389 * An error from sbwait usually indicates that we've
2390 * been interrupted by a signal. If we've sent anything
2391 * then return bytes sent, otherwise return the error.
2392 */
2393 if (error) {
2394 m_freem(m0);
2395 goto done3;
2396 }
2397 goto retry_space;
2398 }
2399 {
2400 /*
2401 * Socket filter processing
2402 */
2403 struct socket_filter_entry *filter;
2404 int filtered = 0;
2405 struct mbuf *control = NULL;
2406 boolean_t recursive = (so->so_send_filt_thread != NULL);
2407
2408 error = 0;
2409 for (filter = so->so_filt; filter && (error == 0);
2410 filter = filter->sfe_next_onsocket) {
2411 if (filter->sfe_filter->sf_filter.sf_data_out) {
2412 if (filtered == 0) {
2413 filtered = 1;
2414 so->so_send_filt_thread =
2415 current_thread();
2416 sflt_use(so);
2417 socket_unlock(so, 0);
2418 }
2419 error = filter->sfe_filter->sf_filter.
2420 sf_data_out(filter->sfe_cookie, so,
2421 NULL, &m0, &control, 0);
2422 }
2423 }
2424
2425 if (filtered) {
2426 /*
2427 * At this point, we've run at least one filter.
2428 * The socket is unlocked as is the socket
2429 * buffer. Clear the recorded filter thread
2430 * only when we are outside of a filter's
2431 * context. This allows for a filter to issue
2432 * multiple inject calls from its sf_data_out
2433 * callback routine.
2434 */
2435 socket_lock(so, 0);
2436 sflt_unuse(so);
2437 if (!recursive)
2438 so->so_send_filt_thread = 0;
2439 if (error) {
2440 if (error == EJUSTRETURN) {
2441 error = 0;
2442 continue;
2443 }
2444 goto done3;
2445 }
2446 }
2447 /*
2448 * End Socket filter processing
2449 */
2450 }
2451 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2452 uap->s, 0, 0, 0, 0);
2453 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
2454 0, 0, p);
2455 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
2456 uap->s, 0, 0, 0, 0);
2457 if (error) {
2458 goto done3;
2459 }
2460 }
2461 sbunlock(&so->so_snd, 0); /* will unlock socket */
2462 /*
2463 * Send trailers. Wimp out and use writev(2).
2464 */
2465 if (uap->hdtr != USER_ADDR_NULL &&
2466 user_hdtr.trailers != USER_ADDR_NULL) {
2467 bzero(&nuap, sizeof (struct writev_args));
2468 nuap.fd = uap->s;
2469 nuap.iovp = user_hdtr.trailers;
2470 nuap.iovcnt = user_hdtr.trl_cnt;
2471 error = writev_nocancel(p, &nuap, &writev_retval);
2472 if (error)
2473 goto done2;
2474 sbytes += writev_retval;
2475 }
2476 done2:
2477 file_drop(uap->s);
2478 done1:
2479 file_drop(uap->fd);
2480 done:
2481 if (uap->nbytes != USER_ADDR_NULL) {
2482 /* XXX this appears bogus for some early failure conditions */
2483 copyout(&sbytes, uap->nbytes, sizeof (off_t));
2484 }
2485 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
2486 (unsigned int)((sbytes >> 32) & 0x0ffffffff),
2487 (unsigned int)(sbytes & 0x0ffffffff), error, 0);
2488 return (error);
2489 done3:
2490 sbunlock(&so->so_snd, 0); /* will unlock socket */
2491 goto done2;
2492 }
2493
2494
2495 #endif /* SENDFILE */