X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..bd504ef0e0b883cdd7917b73b3574eb9ce669905:/bsd/kern/kpi_socket.c diff --git a/bsd/kern/kpi_socket.c b/bsd/kern/kpi_socket.c index f7658fee6..3de525cbe 100644 --- a/bsd/kern/kpi_socket.c +++ b/bsd/kern/kpi_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -43,9 +43,12 @@ #include #include #include +#include +#include extern int soclose_locked(struct socket *so); extern void soclose_wait_locked(struct socket *so); +extern int so_isdstlocal(struct socket *so); errno_t sock_send_internal( socket_t sock, @@ -169,18 +172,13 @@ sock_accept( if (sa) FREE(sa, M_SONAME); /* - * If the socket has been marked as inactive by soacceptfilter(), - * disallow further operations on it. We explicitly call shutdown - * on both data directions to ensure that SS_CANT{RCV,SEND}MORE - * states are set for the socket. This would also flush out data - * hanging off the receive list of this socket. + * If the socket has been marked as inactive by sosetdefunct(), + * disallow further operations on it. */ if (new_so->so_flags & SOF_DEFUNCT) { - (void) soshutdownlock(new_so, SHUT_RD); - (void) soshutdownlock(new_so, SHUT_WR); - (void) sodisconnectlocked(new_so); + (void) sodefunct(current_proc(), new_so, + SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL); } - *new_sock = new_so; if (dosocklock) socket_unlock(new_so, 1); @@ -192,9 +190,30 @@ sock_bind( socket_t sock, const struct sockaddr *to) { - if (sock == NULL || to == NULL) return EINVAL; + int error = 0; + struct sockaddr *sa = NULL; + struct sockaddr_storage ss; + boolean_t want_free = TRUE; + + if (sock == NULL || to == NULL) + return EINVAL; + + if (to->sa_len > sizeof(ss)) { + MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, M_WAITOK); + if (sa == NULL) + return ENOBUFS; + } else { + sa = (struct sockaddr *)&ss; + want_free = FALSE; + } + memcpy(sa, to, to->sa_len); + + error = sobind(sock, sa); - return sobind(sock, (struct sockaddr*)to); + if (sa != NULL && want_free == TRUE) + FREE(sa, M_SONAME); + + return error; } errno_t @@ -205,23 +224,37 @@ sock_connect( { int error = 0; lck_mtx_t *mutex_held; + struct sockaddr *sa = NULL; + struct sockaddr_storage ss; + boolean_t want_free = TRUE; if (sock == NULL || to == NULL) return EINVAL; + + if (to->sa_len > sizeof(ss)) { + MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, + (flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK); + if (sa == NULL) + return ENOBUFS; + } else { + sa = (struct sockaddr *)&ss; + want_free = FALSE; + } + memcpy(sa, to, to->sa_len); socket_lock(sock, 1); if ((sock->so_state & SS_ISCONNECTING) && ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) { - socket_unlock(sock, 1); - return EALREADY; + error = EALREADY; + goto out; } - error = soconnectlock(sock, (struct sockaddr*)to, 0); + error = soconnectlock(sock, sa, 0); if (!error) { if ((sock->so_state & SS_ISCONNECTING) && ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) { - socket_unlock(sock, 1); - return EINPROGRESS; + error = EINPROGRESS; + goto out; } if (sock->so_proto->pr_getlock != NULL) @@ -244,7 +277,12 @@ sock_connect( else { sock->so_state &= ~SS_ISCONNECTING; } +out: socket_unlock(sock, 1); + + if (sa != NULL && want_free == TRUE) + FREE(sa, M_SONAME); + return error; } @@ -346,7 +384,7 @@ sock_getpeername(socket_t sock, struct sockaddr *peername, int peernamelen) socket_unlock(sock, 1); return (ENOTCONN); } - error = sock_getaddr(sock, &sa, 1); + error = sogetaddr_locked(sock, &sa, 1); socket_unlock(sock, 1); if (error == 0) { if (peernamelen > sa->sa_len) @@ -367,7 +405,7 @@ sock_getsockname(socket_t sock, struct sockaddr *sockname, int socknamelen) return (EINVAL); socket_lock(sock, 1); - error = sock_getaddr(sock, &sa, 0); + error = sogetaddr_locked(sock, &sa, 0); socket_unlock(sock, 1); if (error == 0) { if (socknamelen > sa->sa_len) @@ -378,17 +416,17 @@ sock_getsockname(socket_t sock, struct sockaddr *sockname, int socknamelen) return (error); } -errno_t -sock_getaddr(socket_t sock, struct sockaddr **psa, int peer) +__private_extern__ int +sogetaddr_locked(struct socket *so, struct sockaddr **psa, int peer) { int error; - if (sock == NULL || psa == NULL) + if (so == NULL || psa == NULL) return (EINVAL); *psa = NULL; - error = peer ? sock->so_proto->pr_usrreqs->pru_peeraddr(sock, psa) : - sock->so_proto->pr_usrreqs->pru_sockaddr(sock, psa); + error = peer ? so->so_proto->pr_usrreqs->pru_peeraddr(so, psa) : + so->so_proto->pr_usrreqs->pru_sockaddr(so, psa); if (error == 0 && *psa == NULL) { error = ENOMEM; @@ -399,6 +437,21 @@ sock_getaddr(socket_t sock, struct sockaddr **psa, int peer) return (error); } +errno_t +sock_getaddr(socket_t sock, struct sockaddr **psa, int peer) +{ + int error; + + if (sock == NULL || psa == NULL) + return (EINVAL); + + socket_lock(sock, 1); + error = sogetaddr_locked(sock, psa, peer); + socket_unlock(sock, 1); + + return (error); +} + void sock_freeaddr(struct sockaddr *sa) { @@ -423,7 +476,7 @@ sock_getsockopt( sopt.sopt_name = optname; sopt.sopt_val = CAST_USER_ADDR_T(optval); sopt.sopt_valsize = *optlen; - sopt.sopt_p = NULL; + sopt.sopt_p = kernproc; error = sogetopt(sock, &sopt); /* will lock socket */ if (error == 0) *optlen = sopt.sopt_valsize; return error; @@ -435,7 +488,7 @@ sock_ioctl( unsigned long request, void *argp) { - return soioctl(sock, request, argp, NULL); /* will lock socket */ + return soioctl(sock, request, argp, kernproc); /* will lock socket */ } errno_t @@ -454,10 +507,156 @@ sock_setsockopt( sopt.sopt_name = optname; sopt.sopt_val = CAST_USER_ADDR_T(optval); sopt.sopt_valsize = optlen; - sopt.sopt_p = NULL; + sopt.sopt_p = kernproc; return sosetopt(sock, &sopt); /* will lock socket */ } +/* + * This follows the recommended mappings between DSCP code points and WMM access classes + */ +static u_int32_t so_tc_from_dscp(u_int8_t dscp); +static u_int32_t +so_tc_from_dscp(u_int8_t dscp) +{ + u_int32_t tc; + + if (dscp >= 0x30 && dscp <= 0x3f) + tc = SO_TC_VO; + else if (dscp >= 0x20 && dscp <= 0x2f) + tc = SO_TC_VI; + else if (dscp >= 0x08 && dscp <= 0x17) + tc = SO_TC_BK; + else + tc = SO_TC_BE; + + return (tc); +} + +errno_t +sock_settclassopt( + socket_t sock, + const void *optval, + size_t optlen) { + + errno_t error = 0; + struct sockopt sopt; + int sotc; + + if (sock == NULL || optval == NULL || optlen != sizeof(int)) return EINVAL; + + socket_lock(sock, 1); + if (!(sock->so_state & SS_ISCONNECTED)) { + /* If the socket is not connected then we don't know + * if the destination is on LAN or not. Skip + * setting traffic class in this case + */ + error = ENOTCONN; + goto out; + } + + if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL || sock->so_pcb == NULL) { + error = EINVAL; + goto out; + } + + /* + * Set the socket traffic class based on the passed DSCP code point + * regardless of the scope of the destination + */ + sotc = so_tc_from_dscp((*(const int *)optval) >> 2); + + sopt.sopt_dir = SOPT_SET; + sopt.sopt_val = CAST_USER_ADDR_T(&sotc); + sopt.sopt_valsize = sizeof(sotc); + sopt.sopt_p = kernproc; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_TRAFFIC_CLASS; + + socket_unlock(sock, 0); + error = sosetopt(sock, &sopt); + socket_lock(sock, 0); + + if (error != 0) { + printf("sock_settclassopt: sosetopt SO_TRAFFIC_CLASS failed %d\n", error); + goto out; + } + + /* Check if the destination address is LAN or link local address. + * We do not want to set traffic class bits if the destination + * is not local + */ + if (!so_isdstlocal(sock)) { + goto out; + } + + sopt.sopt_dir = SOPT_SET; + sopt.sopt_val = CAST_USER_ADDR_T(optval); + sopt.sopt_valsize = optlen; + sopt.sopt_p = kernproc; + + switch (sock->so_proto->pr_domain->dom_family) { + case AF_INET: + sopt.sopt_level = IPPROTO_IP; + sopt.sopt_name = IP_TOS; + break; + case AF_INET6: + sopt.sopt_level = IPPROTO_IPV6; + sopt.sopt_name = IPV6_TCLASS; + break; + default: + error = EINVAL; + goto out; + } + + socket_unlock(sock, 1); + return sosetopt(sock, &sopt); +out: + socket_unlock(sock, 1); + return error; +} + +errno_t +sock_gettclassopt( + socket_t sock, + void *optval, + size_t *optlen) { + + errno_t error = 0; + struct sockopt sopt; + + if (sock == NULL || optval == NULL || optlen == NULL) return EINVAL; + + sopt.sopt_dir = SOPT_GET; + sopt.sopt_val = CAST_USER_ADDR_T(optval); + sopt.sopt_valsize = *optlen; + sopt.sopt_p = kernproc; + + socket_lock(sock, 1); + if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) { + socket_unlock(sock, 1); + return EINVAL; + } + + switch (sock->so_proto->pr_domain->dom_family) { + case AF_INET: + sopt.sopt_level = IPPROTO_IP; + sopt.sopt_name = IP_TOS; + break; + case AF_INET6: + sopt.sopt_level = IPPROTO_IPV6; + sopt.sopt_name = IPV6_TCLASS; + break; + default: + socket_unlock(sock, 1); + return EINVAL; + + } + socket_unlock(sock, 1); + error = sogetopt(sock, &sopt); /* will lock socket */ + if (error == 0) *optlen = sopt.sopt_valsize; + return error; +} + errno_t sock_listen( socket_t sock, @@ -489,7 +688,7 @@ sock_receive_internal( &uio_buf[0], sizeof(uio_buf)); if (msg && data == NULL) { int i; - struct iovec_32 *tempp = (struct iovec_32 *) msg->msg_iov; + struct iovec *tempp = msg->msg_iov; for (i = 0; i < msg->msg_iovlen; i++) { uio_addiov(auio, CAST_USER_ADDR_T((tempp + i)->iov_base), (tempp + i)->iov_len); @@ -503,19 +702,10 @@ sock_receive_internal( if (recvdlen) *recvdlen = 0; - - if (msg && msg->msg_control) { - if ((size_t)msg->msg_controllen < sizeof(struct cmsghdr)) return EINVAL; - if ((size_t)msg->msg_controllen > MLEN) return EINVAL; - control = m_get(M_NOWAIT, MT_CONTROL); - if (control == NULL) return ENOMEM; - memcpy(mtod(control, caddr_t), msg->msg_control, msg->msg_controllen); - control->m_len = msg->msg_controllen; - } /* let pru_soreceive handle the socket locking */ error = sock->so_proto->pr_usrreqs->pru_soreceive(sock, &fromsa, auio, - data, control ? &control : NULL, &flags); + data, (msg && msg->msg_control) ? &control : NULL, &flags); if (error) goto cleanup; if (recvdlen) @@ -559,7 +749,7 @@ sock_receive_internal( clen -= tocopy; m = m->m_next; } - msg->msg_controllen = (u_int32_t)ctlbuf - (u_int32_t)msg->msg_control; + msg->msg_controllen = (uintptr_t)ctlbuf - (uintptr_t)msg->msg_control; } } @@ -618,7 +808,7 @@ sock_send_internal( } if (data == 0 && msg != NULL) { - struct iovec_32 *tempp = (struct iovec_32 *) msg->msg_iov; + struct iovec *tempp = msg->msg_iov; auio = uio_createwithbuffer(msg->msg_iovlen, 0, UIO_SYSSPACE, UIO_WRITE, &uio_buf[0], sizeof(uio_buf)); @@ -756,6 +946,8 @@ sock_socket( #endif (*new_so)->so_upcall = (so_upcall)callback; (*new_so)->so_upcallarg = context; + (*new_so)->last_pid = 0; + (*new_so)->last_upid = 0; } return error; } @@ -788,7 +980,7 @@ sock_release(socket_t sock) return; socket_lock(sock, 1); - if (sock->so_flags & SOF_UPCALLINUSE) + if (sock->so_upcallusecount) soclose_wait_locked(sock); sock->so_retaincnt--; @@ -877,3 +1069,115 @@ sock_getlistener(socket_t sock) { return (sock->so_head); } + +static inline void +sock_set_tcp_stream_priority(socket_t sock) +{ + if ((sock->so_proto->pr_domain->dom_family == AF_INET || + sock->so_proto->pr_domain->dom_family == AF_INET6) && + sock->so_proto->pr_type == SOCK_STREAM) { + + set_tcp_stream_priority(sock); + + } +} + +/* + * Caller must have ensured socket is valid and won't be going away. + */ +void +socket_set_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags) +{ + (void) OSBitOrAtomic(flags, &sock->so_traffic_mgt_flags); + sock_set_tcp_stream_priority(sock); +} + +void +socket_set_traffic_mgt_flags(socket_t sock, u_int32_t flags) +{ + socket_lock(sock, 1); + socket_set_traffic_mgt_flags_locked(sock, flags); + socket_unlock(sock, 1); +} + +/* + * Caller must have ensured socket is valid and won't be going away. + */ +void +socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags) +{ + (void) OSBitAndAtomic(~flags, &sock->so_traffic_mgt_flags); + sock_set_tcp_stream_priority(sock); +} + +void +socket_clear_traffic_mgt_flags(socket_t sock, u_int32_t flags) +{ + socket_lock(sock, 1); + socket_clear_traffic_mgt_flags_locked(sock, flags); + socket_unlock(sock, 1); +} + + +/* + * Caller must have ensured socket is valid and won't be going away. + */ +errno_t +socket_defunct(struct proc *p, socket_t so, int level) +{ + errno_t retval; + + if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC && + level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) + return (EINVAL); + + socket_lock(so, 1); + /* + * SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC level is meant to tear down + * all of mDNSResponder IPC sockets, currently those of AF_UNIX; note + * that this is an implementation artifact of mDNSResponder. We do + * a quick test against the socket buffers for SB_UNIX, since that + * would have been set by unp_attach() at socket creation time. + */ + if (level == SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC && + (so->so_rcv.sb_flags & so->so_snd.sb_flags & SB_UNIX) != SB_UNIX) { + socket_unlock(so, 1); + return (EOPNOTSUPP); + } + retval = sosetdefunct(p, so, level, TRUE); + if (retval == 0) + retval = sodefunct(p, so, level); + socket_unlock(so, 1); + return (retval); +} + +errno_t +sock_setupcall(socket_t sock, sock_upcall callback, void* context) +{ + if (sock == NULL) + return EINVAL; + + /* + * Note that we don't wait for any in progress upcall to complete. + */ + socket_lock(sock, 1); + + sock->so_upcall = (so_upcall) callback; + sock->so_upcallarg = context; + if (callback) { + sock->so_rcv.sb_flags |= SB_UPCALL; +#if CONFIG_SOWUPCALL + sock->so_snd.sb_flags |= SB_UPCALL; +#endif /* CONFIG_SOWUPCALL */ + } else { + sock->so_rcv.sb_flags &= ~SB_UPCALL; +#if CONFIG_SOWUPCALL + sock->so_snd.sb_flags &= ~SB_UPCALL; +#endif /* CONFIG_SOWUPCALL */ + } + + socket_unlock(sock, 1); + + return 0; +} +