]>
git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kpi_socket.c
2 * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/types.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <sys/param.h>
37 #include <sys/errno.h>
38 #include <sys/malloc.h>
39 #include <sys/protosw.h>
40 #include <sys/domain.h>
42 #include <sys/fcntl.h>
43 #include <sys/filio.h>
44 #include <sys/uio_internal.h>
45 #include <kern/lock.h>
46 #include <netinet/in.h>
48 extern int soclose_locked(struct socket
*so
);
49 extern void soclose_wait_locked(struct socket
*so
);
50 extern int so_isdstlocal(struct socket
*so
);
52 errno_t
sock_send_internal(
54 const struct msghdr
*msg
,
59 typedef void (*so_upcall
)(struct socket
*, caddr_t
, int );
65 struct sockaddr
*from
,
73 struct socket
*new_so
;
74 lck_mtx_t
*mutex_held
;
78 if (sock
== NULL
|| new_sock
== NULL
) return EINVAL
;
80 if ((sock
->so_options
& SO_ACCEPTCONN
) == 0) {
81 socket_unlock(sock
, 1);
84 if ((flags
& ~(MSG_DONTWAIT
)) != 0) {
85 socket_unlock(sock
, 1);
88 if (((flags
& MSG_DONTWAIT
) != 0 || (sock
->so_state
& SS_NBIO
) != 0) &&
89 sock
->so_comp
.tqh_first
== NULL
) {
90 socket_unlock(sock
, 1);
94 if (sock
->so_proto
->pr_getlock
!= NULL
) {
95 mutex_held
= (*sock
->so_proto
->pr_getlock
)(sock
, 0);
99 mutex_held
= sock
->so_proto
->pr_domain
->dom_mtx
;
103 while (TAILQ_EMPTY(&sock
->so_comp
) && sock
->so_error
== 0) {
104 if (sock
->so_state
& SS_CANTRCVMORE
) {
105 sock
->so_error
= ECONNABORTED
;
108 error
= msleep((caddr_t
)&sock
->so_timeo
, mutex_held
, PSOCK
| PCATCH
, "sock_accept", NULL
);
110 socket_unlock(sock
, 1);
114 if (sock
->so_error
) {
115 error
= sock
->so_error
;
117 socket_unlock(sock
, 1);
121 new_so
= TAILQ_FIRST(&sock
->so_comp
);
122 TAILQ_REMOVE(&sock
->so_comp
, new_so
, so_list
);
126 * Pass the pre-accepted socket to any interested socket filter(s).
127 * Upon failure, the socket would have been closed by the callee.
129 if (new_so
->so_filt
!= NULL
) {
131 * Temporarily drop the listening socket's lock before we
132 * hand off control over to the socket filter(s), but keep
133 * a reference so that it won't go away. We'll grab it
134 * again once we're done with the filter(s).
136 socket_unlock(sock
, 0);
137 if ((error
= soacceptfilter(new_so
)) != 0) {
138 /* Drop reference on listening socket */
142 socket_lock(sock
, 0);
146 lck_mtx_assert(new_so
->so_proto
->pr_getlock(new_so
, 0),
147 LCK_MTX_ASSERT_NOTOWNED
);
148 socket_lock(new_so
, 1);
151 new_so
->so_state
&= ~SS_COMP
;
152 new_so
->so_head
= NULL
;
153 (void) soacceptlock(new_so
, &sa
, 0);
155 socket_unlock(sock
, 1); /* release the head */
158 new_so
->so_upcall
= (so_upcall
) callback
;
159 new_so
->so_upcallarg
= cookie
;
160 new_so
->so_rcv
.sb_flags
|= SB_UPCALL
;
162 new_so
->so_snd
.sb_flags
|= SB_UPCALL
;
168 if (fromlen
> sa
->sa_len
) fromlen
= sa
->sa_len
;
169 memcpy(from
, sa
, fromlen
);
171 if (sa
) FREE(sa
, M_SONAME
);
174 * If the socket has been marked as inactive by soacceptfilter(),
175 * disallow further operations on it. We explicitly call shutdown
176 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
177 * states are set for the socket. This would also flush out data
178 * hanging off the receive list of this socket.
180 if (new_so
->so_flags
& SOF_DEFUNCT
) {
181 (void) soshutdownlock(new_so
, SHUT_RD
);
182 (void) soshutdownlock(new_so
, SHUT_WR
);
183 (void) sodisconnectlocked(new_so
);
188 socket_unlock(new_so
, 1);
195 const struct sockaddr
*to
)
197 if (sock
== NULL
|| to
== NULL
) return EINVAL
;
199 return sobind(sock
, (struct sockaddr
*)(uintptr_t)to
);
205 const struct sockaddr
*to
,
209 lck_mtx_t
*mutex_held
;
211 if (sock
== NULL
|| to
== NULL
) return EINVAL
;
213 socket_lock(sock
, 1);
215 if ((sock
->so_state
& SS_ISCONNECTING
) &&
216 ((sock
->so_state
& SS_NBIO
) != 0 ||
217 (flags
& MSG_DONTWAIT
) != 0)) {
218 socket_unlock(sock
, 1);
221 error
= soconnectlock(sock
, (struct sockaddr
*)(uintptr_t)to
, 0);
223 if ((sock
->so_state
& SS_ISCONNECTING
) &&
224 ((sock
->so_state
& SS_NBIO
) != 0 || (flags
& MSG_DONTWAIT
) != 0)) {
225 socket_unlock(sock
, 1);
229 if (sock
->so_proto
->pr_getlock
!= NULL
)
230 mutex_held
= (*sock
->so_proto
->pr_getlock
)(sock
, 0);
232 mutex_held
= sock
->so_proto
->pr_domain
->dom_mtx
;
234 while ((sock
->so_state
& SS_ISCONNECTING
) && sock
->so_error
== 0) {
235 error
= msleep((caddr_t
)&sock
->so_timeo
, mutex_held
, PSOCK
| PCATCH
,
236 "sock_connect", NULL
);
242 error
= sock
->so_error
;
247 sock
->so_state
&= ~SS_ISCONNECTING
;
249 socket_unlock(sock
, 1);
256 const struct timeval
*tv
)
258 lck_mtx_t
* mutex_held
;
262 socket_lock(sock
, 1);
264 // Check if we're already connected or if we've already errored out
265 if ((sock
->so_state
& SS_ISCONNECTING
) == 0 || sock
->so_error
) {
266 if (sock
->so_error
) {
267 retval
= sock
->so_error
;
271 if ((sock
->so_state
& SS_ISCONNECTED
) != 0)
279 // copied translation from timeval to hertz from SO_RCVTIMEO handling
280 if (tv
->tv_sec
< 0 || tv
->tv_sec
> SHRT_MAX
/ hz
||
281 tv
->tv_usec
< 0 || tv
->tv_usec
>= 1000000) {
286 ts
.tv_sec
= tv
->tv_sec
;
287 ts
.tv_nsec
= (tv
->tv_usec
* NSEC_PER_USEC
);
288 if ( (ts
.tv_sec
+ (ts
.tv_nsec
/NSEC_PER_SEC
))/100 > SHRT_MAX
) {
293 if (sock
->so_proto
->pr_getlock
!= NULL
)
294 mutex_held
= (*sock
->so_proto
->pr_getlock
)(sock
, 0);
296 mutex_held
= sock
->so_proto
->pr_domain
->dom_mtx
;
298 msleep((caddr_t
)&sock
->so_timeo
, mutex_held
, PSOCK
, "sock_connectwait", &ts
);
300 // Check if we're still waiting to connect
301 if ((sock
->so_state
& SS_ISCONNECTING
) && sock
->so_error
== 0) {
302 retval
= EINPROGRESS
;
306 if (sock
->so_error
) {
307 retval
= sock
->so_error
;
312 socket_unlock(sock
, 1);
321 socket_lock(sock
, 1);
324 sock
->so_rcv
.sb_flags
|= SB_NOINTR
; // This isn't safe
325 sock
->so_snd
.sb_flags
|= SB_NOINTR
; // This isn't safe
328 sock
->so_rcv
.sb_flags
&= ~SB_NOINTR
; // This isn't safe
329 sock
->so_snd
.sb_flags
&= ~SB_NOINTR
; // This isn't safe
332 socket_unlock(sock
, 1);
338 sock_getpeername(socket_t sock
, struct sockaddr
*peername
, int peernamelen
)
341 struct sockaddr
*sa
= NULL
;
343 if (sock
== NULL
|| peername
== NULL
|| peernamelen
< 0)
346 socket_lock(sock
, 1);
347 if (!(sock
->so_state
& (SS_ISCONNECTED
|SS_ISCONFIRMING
))) {
348 socket_unlock(sock
, 1);
351 error
= sogetaddr_locked(sock
, &sa
, 1);
352 socket_unlock(sock
, 1);
354 if (peernamelen
> sa
->sa_len
)
355 peernamelen
= sa
->sa_len
;
356 memcpy(peername
, sa
, peernamelen
);
363 sock_getsockname(socket_t sock
, struct sockaddr
*sockname
, int socknamelen
)
366 struct sockaddr
*sa
= NULL
;
368 if (sock
== NULL
|| sockname
== NULL
|| socknamelen
< 0)
371 socket_lock(sock
, 1);
372 error
= sogetaddr_locked(sock
, &sa
, 0);
373 socket_unlock(sock
, 1);
375 if (socknamelen
> sa
->sa_len
)
376 socknamelen
= sa
->sa_len
;
377 memcpy(sockname
, sa
, socknamelen
);
383 __private_extern__
int
384 sogetaddr_locked(struct socket
*so
, struct sockaddr
**psa
, int peer
)
388 if (so
== NULL
|| psa
== NULL
)
392 error
= peer
? so
->so_proto
->pr_usrreqs
->pru_peeraddr(so
, psa
) :
393 so
->so_proto
->pr_usrreqs
->pru_sockaddr(so
, psa
);
395 if (error
== 0 && *psa
== NULL
) {
397 } else if (error
!= 0 && *psa
!= NULL
) {
398 FREE(*psa
, M_SONAME
);
405 sock_getaddr(socket_t sock
, struct sockaddr
**psa
, int peer
)
409 if (sock
== NULL
|| psa
== NULL
)
412 socket_lock(sock
, 1);
413 error
= sogetaddr_locked(sock
, psa
, peer
);
414 socket_unlock(sock
, 1);
420 sock_freeaddr(struct sockaddr
*sa
)
437 if (sock
== NULL
|| optval
== NULL
|| optlen
== NULL
) return EINVAL
;
438 sopt
.sopt_dir
= SOPT_GET
;
439 sopt
.sopt_level
= level
;
440 sopt
.sopt_name
= optname
;
441 sopt
.sopt_val
= CAST_USER_ADDR_T(optval
);
442 sopt
.sopt_valsize
= *optlen
;
443 sopt
.sopt_p
= kernproc
;
444 error
= sogetopt(sock
, &sopt
); /* will lock socket */
445 if (error
== 0) *optlen
= sopt
.sopt_valsize
;
452 unsigned long request
,
455 return soioctl(sock
, request
, argp
, kernproc
); /* will lock socket */
468 if (sock
== NULL
|| optval
== NULL
) return EINVAL
;
469 sopt
.sopt_dir
= SOPT_SET
;
470 sopt
.sopt_level
= level
;
471 sopt
.sopt_name
= optname
;
472 sopt
.sopt_val
= CAST_USER_ADDR_T(optval
);
473 sopt
.sopt_valsize
= optlen
;
474 sopt
.sopt_p
= kernproc
;
475 return sosetopt(sock
, &sopt
); /* will lock socket */
487 if (sock
== NULL
|| optval
== NULL
|| optlen
== 0) return EINVAL
;
489 sopt
.sopt_dir
= SOPT_SET
;
490 sopt
.sopt_val
= CAST_USER_ADDR_T(optval
);
491 sopt
.sopt_valsize
= optlen
;
492 sopt
.sopt_p
= kernproc
;
494 socket_lock(sock
, 1);
495 if (!(sock
->so_state
& SS_ISCONNECTED
)) {
496 /* If the socket is not connected then we don't know
497 * if the destination is on LAN or not. Skip
498 * setting traffic class in this case
504 if (sock
->so_proto
== NULL
|| sock
->so_proto
->pr_domain
== NULL
|| sock
->so_pcb
== NULL
) {
509 /* Check if the destination address is LAN or link local address.
510 * We do not want to set traffic class bits if the destination
513 if (!so_isdstlocal(sock
)) {
517 switch (sock
->so_proto
->pr_domain
->dom_family
) {
519 sopt
.sopt_level
= IPPROTO_IP
;
520 sopt
.sopt_name
= IP_TOS
;
523 sopt
.sopt_level
= IPPROTO_IPV6
;
524 sopt
.sopt_name
= IPV6_TCLASS
;
531 socket_unlock(sock
, 1);
532 return sosetopt(sock
, &sopt
);
534 socket_unlock(sock
, 1);
547 if (sock
== NULL
|| optval
== NULL
|| optlen
== NULL
) return EINVAL
;
549 sopt
.sopt_dir
= SOPT_GET
;
550 sopt
.sopt_val
= CAST_USER_ADDR_T(optval
);
551 sopt
.sopt_valsize
= *optlen
;
552 sopt
.sopt_p
= kernproc
;
554 socket_lock(sock
, 1);
555 if (sock
->so_proto
== NULL
|| sock
->so_proto
->pr_domain
== NULL
) {
556 socket_unlock(sock
, 1);
560 switch (sock
->so_proto
->pr_domain
->dom_family
) {
562 sopt
.sopt_level
= IPPROTO_IP
;
563 sopt
.sopt_name
= IP_TOS
;
566 sopt
.sopt_level
= IPPROTO_IPV6
;
567 sopt
.sopt_name
= IPV6_TCLASS
;
570 socket_unlock(sock
, 1);
574 socket_unlock(sock
, 1);
575 error
= sogetopt(sock
, &sopt
); /* will lock socket */
576 if (error
== 0) *optlen
= sopt
.sopt_valsize
;
585 if (sock
== NULL
) return EINVAL
;
586 return solisten(sock
, backlog
); /* will lock socket */
590 sock_receive_internal(
598 struct mbuf
*control
= NULL
;
601 struct sockaddr
*fromsa
;
602 char uio_buf
[ UIO_SIZEOF((msg
!= NULL
) ? msg
->msg_iovlen
: 0) ];
604 if (sock
== NULL
) return EINVAL
;
606 auio
= uio_createwithbuffer(((msg
!= NULL
) ? msg
->msg_iovlen
: 0),
607 0, UIO_SYSSPACE
, UIO_READ
,
608 &uio_buf
[0], sizeof(uio_buf
));
609 if (msg
&& data
== NULL
) {
611 struct iovec
*tempp
= msg
->msg_iov
;
613 for (i
= 0; i
< msg
->msg_iovlen
; i
++) {
614 uio_addiov(auio
, CAST_USER_ADDR_T((tempp
+ i
)->iov_base
), (tempp
+ i
)->iov_len
);
616 if (uio_resid(auio
) < 0) return EINVAL
;
619 uio_setresid(auio
, (uio_resid(auio
) + *recvdlen
));
621 length
= uio_resid(auio
);
626 /* let pru_soreceive handle the socket locking */
627 error
= sock
->so_proto
->pr_usrreqs
->pru_soreceive(sock
, &fromsa
, auio
,
628 data
, (msg
&& msg
->msg_control
) ? &control
: NULL
, &flags
);
629 if (error
) goto cleanup
;
632 *recvdlen
= length
- uio_resid(auio
);
634 msg
->msg_flags
= flags
;
639 salen
= msg
->msg_namelen
;
640 if (msg
->msg_namelen
> 0 && fromsa
!= 0)
642 salen
= MIN(salen
, fromsa
->sa_len
);
643 memcpy(msg
->msg_name
, fromsa
,
644 msg
->msg_namelen
> fromsa
->sa_len
? fromsa
->sa_len
: msg
->msg_namelen
);
648 if (msg
->msg_control
)
650 struct mbuf
* m
= control
;
651 u_char
* ctlbuf
= msg
->msg_control
;
652 int clen
= msg
->msg_controllen
;
653 msg
->msg_controllen
= 0;
655 while (m
&& clen
> 0)
658 if (clen
>= m
->m_len
)
664 msg
->msg_flags
|= MSG_CTRUNC
;
667 memcpy(ctlbuf
, mtod(m
, caddr_t
), tocopy
);
672 msg
->msg_controllen
= (uintptr_t)ctlbuf
- (uintptr_t)msg
->msg_control
;
677 if (control
) m_freem(control
);
678 if (fromsa
) FREE(fromsa
, M_SONAME
);
690 (msg
->msg_iovlen
< 1) ||
691 (msg
->msg_iov
[0].iov_len
== 0) ||
692 (msg
->msg_iov
[0].iov_base
== NULL
))
694 return sock_receive_internal(sock
, msg
, NULL
, flags
, recvdlen
);
705 if (data
== NULL
|| recvlen
== 0 || *recvlen
<= 0 || (msg
&&
706 (msg
->msg_iov
!= NULL
|| msg
->msg_iovlen
!= 0)))
708 return sock_receive_internal(sock
, msg
, data
, flags
, recvlen
);
714 const struct msghdr
*msg
,
720 struct mbuf
*control
= NULL
;
723 char uio_buf
[ UIO_SIZEOF((msg
!= NULL
? msg
->msg_iovlen
: 1)) ];
730 if (data
== 0 && msg
!= NULL
) {
731 struct iovec
*tempp
= msg
->msg_iov
;
733 auio
= uio_createwithbuffer(msg
->msg_iovlen
, 0, UIO_SYSSPACE
, UIO_WRITE
,
734 &uio_buf
[0], sizeof(uio_buf
));
739 for (i
= 0; i
< msg
->msg_iovlen
; i
++) {
740 uio_addiov(auio
, CAST_USER_ADDR_T((tempp
+ i
)->iov_base
), (tempp
+ i
)->iov_len
);
743 if (uio_resid(auio
) < 0) {
754 datalen
= uio_resid(auio
);
756 datalen
= data
->m_pkthdr
.len
;
758 if (msg
&& msg
->msg_control
)
760 if ((size_t)msg
->msg_controllen
< sizeof(struct cmsghdr
)) return EINVAL
;
761 if ((size_t)msg
->msg_controllen
> MLEN
) return EINVAL
;
762 control
= m_get(M_NOWAIT
, MT_CONTROL
);
763 if (control
== NULL
) {
767 memcpy(mtod(control
, caddr_t
), msg
->msg_control
, msg
->msg_controllen
);
768 control
->m_len
= msg
->msg_controllen
;
771 error
= sock
->so_proto
->pr_usrreqs
->pru_sosend(sock
, msg
!= NULL
?
772 (struct sockaddr
*)msg
->msg_name
: NULL
, auio
, data
, control
, flags
);
775 * Residual data is possible in the case of IO vectors but not
776 * in the mbuf case since the latter is treated as atomic send.
777 * If pru_sosend() consumed a portion of the iovecs data and
778 * the error returned is transient, treat it as success; this
779 * is consistent with sendit() behavior.
781 if (auio
!= NULL
&& uio_resid(auio
) != datalen
&&
782 (error
== ERESTART
|| error
== EINTR
|| error
== EWOULDBLOCK
))
785 if (error
== 0 && sentlen
!= NULL
) {
787 *sentlen
= datalen
- uio_resid(auio
);
795 * In cases where we detect an error before returning, we need to
796 * free the mbuf chain if there is one. sosend (and pru_sosend) will
797 * free the mbuf chain if they encounter an error.
812 const struct msghdr
*msg
,
816 if (msg
== NULL
|| msg
->msg_iov
== NULL
|| msg
->msg_iovlen
< 1)
818 return sock_send_internal(sock
, msg
, NULL
, flags
, sentlen
);
824 const struct msghdr
*msg
,
829 if (data
== NULL
|| (msg
&&
830 (msg
->msg_iov
!= NULL
|| msg
->msg_iovlen
!= 0))) {
835 return sock_send_internal(sock
, msg
, data
, flags
, sentlen
);
843 if (sock
== NULL
) return EINVAL
;
844 return soshutdown(sock
, how
);
853 sock_upcall callback
,
858 if (new_so
== NULL
) return EINVAL
;
859 /* socreate will create an initial so_count */
860 error
= socreate(domain
, new_so
, type
, protocol
);
861 if (error
== 0 && callback
)
863 (*new_so
)->so_rcv
.sb_flags
|= SB_UPCALL
;
865 (*new_so
)->so_snd
.sb_flags
|= SB_UPCALL
;
867 (*new_so
)->so_upcall
= (so_upcall
)callback
;
868 (*new_so
)->so_upcallarg
= context
;
877 if (sock
== NULL
) return;
881 /* Do we want this to be APPLE_PRIVATE API?: YES (LD 12/23/04)*/
886 if (sock
== NULL
) return;
887 socket_lock(sock
, 1);
888 sock
->so_retaincnt
++;
889 sock
->so_usecount
++; /* add extra reference for holding the socket */
890 socket_unlock(sock
, 1);
893 /* Do we want this to be APPLE_PRIVATE API? */
895 sock_release(socket_t sock
)
899 socket_lock(sock
, 1);
901 if (sock
->so_flags
& SOF_UPCALLINUSE
)
902 soclose_wait_locked(sock
);
904 sock
->so_retaincnt
--;
905 if (sock
->so_retaincnt
< 0)
906 panic("sock_release: negative retain count for sock=%p "
907 "cnt=%x\n", sock
, sock
->so_retaincnt
);
908 if ((sock
->so_retaincnt
== 0) && (sock
->so_usecount
== 2)) {
909 /* close socket only if the FD is not holding it */
910 soclose_locked(sock
);
912 /* remove extra reference holding the socket */
915 socket_unlock(sock
, 1);
923 if (sock
== NULL
) return EINVAL
;
924 socket_lock(sock
, 1);
927 sock
->so_state
|= SS_PRIV
;
931 sock
->so_state
&= ~SS_PRIV
;
933 socket_unlock(sock
, 1);
942 socket_lock(sock
, 1);
943 retval
= (sock
->so_state
& SS_ISCONNECTED
) != 0;
944 socket_unlock(sock
, 1);
953 socket_lock(sock
, 1);
954 retval
= (sock
->so_state
& SS_NBIO
) != 0;
955 socket_unlock(sock
, 1);
966 socket_lock(sock
, 1);
968 *outDomain
= sock
->so_proto
->pr_domain
->dom_family
;
970 *outType
= sock
->so_type
;
972 *outProtocol
= sock
->so_proto
->pr_protocol
;
973 socket_unlock(sock
, 1);
978 * Return the listening socket of a pre-accepted socket. It returns the
979 * listener (so_head) value of a given socket. This is intended to be
980 * called by a socket filter during a filter attach (sf_attach) callback.
981 * The value returned by this routine is safe to be used only in the
982 * context of that callback, because we hold the listener's lock across
983 * the sflt_initsock() call.
986 sock_getlistener(socket_t sock
)
988 return (sock
->so_head
);