2 * Copyright (c) 2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/domain.h>
30 #include <sys/socket.h>
31 #include <sys/protosw.h>
32 #include <sys/mcache.h>
33 #include <sys/systm.h>
34 #include <sys/sysctl.h>
35 #include <sys/random.h>
37 #include <sys/vsock_domain.h>
38 #include <sys/vsock_transport.h>
39 #include <kern/task.h>
40 #include <kern/zalloc.h>
41 #include <kern/locks.h>
42 #include <machine/atomic.h>
44 #define sotovsockpcb(so) ((struct vsockpcb *)(so)->so_pcb)
46 #define VSOCK_PORT_RESERVED 1024
48 /* VSock Protocol Globals */
50 static struct vsock_transport
* _Atomic the_vsock_transport
= NULL
;
51 static ZONE_DECLARE(vsockpcb_zone
, "vsockpcbzone",
52 sizeof(struct vsockpcb
), ZC_NONE
);
53 static LCK_GRP_DECLARE(vsock_lock_grp
, "vsock");
54 static struct vsockpcbinfo vsockinfo
;
56 static uint32_t vsock_sendspace
= VSOCK_MAX_PACKET_SIZE
* 8;
57 static uint32_t vsock_recvspace
= VSOCK_MAX_PACKET_SIZE
* 8;
59 /* VSock PCB Helpers */
62 vsock_get_peer_space(struct vsockpcb
*pcb
)
64 return pcb
->peer_buf_alloc
- (pcb
->tx_cnt
- pcb
->peer_fwd_cnt
);
67 static struct vsockpcb
*
68 vsock_get_matching_pcb(struct vsock_address src
, struct vsock_address dst
)
70 struct vsockpcb
*preferred
= NULL
;
71 struct vsockpcb
*match
= NULL
;
72 struct vsockpcb
*pcb
= NULL
;
74 lck_rw_lock_shared(&vsockinfo
.bound_lock
);
75 LIST_FOREACH(pcb
, &vsockinfo
.bound
, bound
) {
76 // Source cid and port must match. Only destination port must match. (Allows for a changing CID during migration)
77 socket_lock(pcb
->so
, 1);
78 if ((pcb
->so
->so_state
& SS_ISCONNECTED
|| pcb
->so
->so_state
& SS_ISCONNECTING
) &&
79 pcb
->local_address
.cid
== src
.cid
&& pcb
->local_address
.port
== src
.port
&&
80 pcb
->remote_address
.port
== dst
.port
) {
83 } else if ((pcb
->local_address
.cid
== src
.cid
|| pcb
->local_address
.cid
== VMADDR_CID_ANY
) &&
84 pcb
->local_address
.port
== src
.port
) {
87 socket_unlock(pcb
->so
, 1);
89 if (!preferred
&& match
) {
90 socket_lock(match
->so
, 1);
93 lck_rw_done(&vsockinfo
.bound_lock
);
99 vsock_bind_address_if_free(struct vsockpcb
*pcb
, uint32_t local_cid
, uint32_t local_port
, uint32_t remote_cid
, uint32_t remote_port
)
101 socket_lock_assert_owned(pcb
->so
);
104 if (local_port
!= VMADDR_PORT_ANY
&& local_port
< VSOCK_PORT_RESERVED
&&
105 current_task() != kernel_task
&& proc_suser(current_proc()) != 0) {
110 const bool check_remote
= (remote_cid
!= VMADDR_CID_ANY
&& remote_port
!= VMADDR_PORT_ANY
);
112 struct vsockpcb
*pcb_match
= NULL
;
114 socket_unlock(pcb
->so
, 0);
115 lck_rw_lock_exclusive(&vsockinfo
.bound_lock
);
116 LIST_FOREACH(pcb_match
, &vsockinfo
.bound
, bound
) {
117 socket_lock(pcb_match
->so
, 1);
118 if (pcb
== pcb_match
||
119 (!check_remote
&& pcb_match
->local_address
.port
== local_port
) ||
120 (check_remote
&& pcb_match
->local_address
.port
== local_port
&&
121 pcb_match
->remote_address
.cid
== remote_cid
&& pcb_match
->remote_address
.port
== remote_port
)) {
122 socket_unlock(pcb_match
->so
, 1);
126 socket_unlock(pcb_match
->so
, 1);
128 socket_lock(pcb
->so
, 0);
130 pcb
->local_address
= (struct vsock_address
) { .cid
= local_cid
, .port
= local_port
};
131 pcb
->remote_address
= (struct vsock_address
) { .cid
= remote_cid
, .port
= remote_port
};
132 LIST_INSERT_HEAD(&vsockinfo
.bound
, pcb
, bound
);
134 lck_rw_done(&vsockinfo
.bound_lock
);
136 return taken
? EADDRINUSE
: 0;
140 vsock_bind_address(struct vsockpcb
*pcb
, struct vsock_address laddr
, struct vsock_address raddr
)
146 socket_lock_assert_owned(pcb
->so
);
148 // Certain CIDs are reserved.
149 if (laddr
.cid
== VMADDR_CID_HYPERVISOR
|| laddr
.cid
== VMADDR_CID_RESERVED
|| laddr
.cid
== VMADDR_CID_HOST
) {
150 return EADDRNOTAVAIL
;
153 // Remote address must be fully specified or not specified at all.
154 if ((raddr
.cid
== VMADDR_CID_ANY
) ^ (raddr
.port
== VMADDR_PORT_ANY
)) {
158 // Cannot bind if already bound.
159 if (pcb
->local_address
.port
!= VMADDR_PORT_ANY
) {
163 uint32_t transport_cid
;
164 struct vsock_transport
*transport
= pcb
->transport
;
165 errno_t error
= transport
->get_cid(transport
->provider
, &transport_cid
);
170 // Local CID must be this transport's CID or any.
171 if (laddr
.cid
!= transport_cid
&& laddr
.cid
!= VMADDR_CID_ANY
) {
175 if (laddr
.port
!= VMADDR_PORT_ANY
) {
176 error
= vsock_bind_address_if_free(pcb
, laddr
.cid
, laddr
.port
, raddr
.cid
, raddr
.port
);
178 lck_mtx_lock(&vsockinfo
.port_lock
);
180 const uint32_t first
= VSOCK_PORT_RESERVED
;
181 const uint32_t last
= VMADDR_PORT_ANY
- 1;
182 uint32_t count
= last
- first
+ 1;
183 uint32_t *last_port
= &vsockinfo
.last_port
;
185 if (pcb
->so
->so_flags
& SOF_BINDRANDOMPORT
) {
187 read_frandom(&random
, sizeof(random
));
188 *last_port
= first
+ (random
% count
);
193 lck_mtx_unlock(&vsockinfo
.port_lock
);
194 return EADDRNOTAVAIL
;
199 if (*last_port
< first
|| *last_port
> last
) {
203 error
= vsock_bind_address_if_free(pcb
, laddr
.cid
, *last_port
, raddr
.cid
, raddr
.port
);
206 lck_mtx_unlock(&vsockinfo
.port_lock
);
213 vsock_unbind_pcb(struct vsockpcb
*pcb
, bool is_locked
)
219 socket_lock_assert_owned(pcb
->so
);
221 soisdisconnected(pcb
->so
);
223 if (!pcb
->bound
.le_prev
) {
228 socket_unlock(pcb
->so
, 0);
229 lck_rw_lock_exclusive(&vsockinfo
.bound_lock
);
230 socket_lock(pcb
->so
, 0);
231 if (!pcb
->bound
.le_prev
) {
232 lck_rw_done(&vsockinfo
.bound_lock
);
237 LIST_REMOVE(pcb
, bound
);
238 pcb
->bound
.le_next
= NULL
;
239 pcb
->bound
.le_prev
= NULL
;
242 lck_rw_done(&vsockinfo
.bound_lock
);
246 static struct sockaddr
*
247 vsock_new_sockaddr(struct vsock_address
*address
)
253 struct sockaddr_vm
*addr
;
254 MALLOC(addr
, struct sockaddr_vm
*, sizeof(*addr
), M_SONAME
,
260 addr
->svm_len
= sizeof(*addr
);
261 addr
->svm_family
= AF_VSOCK
;
262 addr
->svm_port
= address
->port
;
263 addr
->svm_cid
= address
->cid
;
265 return (struct sockaddr
*)addr
;
269 vsock_pcb_send_message(struct vsockpcb
*pcb
, enum vsock_operation operation
, mbuf_t m
)
278 socket_lock_assert_owned(pcb
->so
);
282 struct vsock_address dst
= pcb
->remote_address
;
283 if (dst
.cid
== VMADDR_CID_ANY
|| dst
.port
== VMADDR_PORT_ANY
) {
290 struct vsock_address src
= pcb
->local_address
;
291 if (src
.cid
== VMADDR_CID_ANY
) {
292 uint32_t transport_cid
;
293 struct vsock_transport
*transport
= pcb
->transport
;
294 error
= transport
->get_cid(transport
->provider
, &transport_cid
);
301 src
.cid
= transport_cid
;
304 uint32_t buf_alloc
= pcb
->so
->so_rcv
.sb_hiwat
;
305 uint32_t fwd_cnt
= pcb
->fwd_cnt
;
307 if (src
.cid
== dst
.cid
) {
308 pcb
->last_buf_alloc
= buf_alloc
;
309 pcb
->last_fwd_cnt
= fwd_cnt
;
311 socket_unlock(pcb
->so
, 0);
312 error
= vsock_put_message(src
, dst
, operation
, buf_alloc
, fwd_cnt
, m
);
313 socket_lock(pcb
->so
, 0);
315 struct vsock_transport
*transport
= pcb
->transport
;
316 error
= transport
->put_message(transport
->provider
, src
, dst
, operation
, buf_alloc
, fwd_cnt
, m
);
319 pcb
->last_buf_alloc
= buf_alloc
;
320 pcb
->last_fwd_cnt
= fwd_cnt
;
328 vsock_pcb_reset_address(struct vsock_address src
, struct vsock_address dst
)
330 if (dst
.cid
== VMADDR_CID_ANY
|| dst
.port
== VMADDR_PORT_ANY
) {
335 struct vsock_transport
*transport
= NULL
;
337 if (src
.cid
== VMADDR_CID_ANY
) {
338 transport
= os_atomic_load(&the_vsock_transport
, relaxed
);
339 if (transport
== NULL
) {
343 uint32_t transport_cid
;
344 error
= transport
->get_cid(transport
->provider
, &transport_cid
);
348 src
.cid
= transport_cid
;
351 if (src
.cid
== dst
.cid
) {
352 error
= vsock_put_message(src
, dst
, VSOCK_RESET
, 0, 0, NULL
);
355 transport
= os_atomic_load(&the_vsock_transport
, relaxed
);
356 if (transport
== NULL
) {
360 error
= transport
->put_message(transport
->provider
, src
, dst
, VSOCK_RESET
, 0, 0, NULL
);
367 vsock_pcb_safe_reset_address(struct vsockpcb
*pcb
, struct vsock_address src
, struct vsock_address dst
)
370 socket_lock_assert_owned(pcb
->so
);
371 socket_unlock(pcb
->so
, 0);
373 errno_t error
= vsock_pcb_reset_address(src
, dst
);
375 socket_lock(pcb
->so
, 0);
381 vsock_pcb_connect(struct vsockpcb
*pcb
)
383 return vsock_pcb_send_message(pcb
, VSOCK_REQUEST
, NULL
);
387 vsock_pcb_respond(struct vsockpcb
*pcb
)
389 return vsock_pcb_send_message(pcb
, VSOCK_RESPONSE
, NULL
);
393 vsock_pcb_send(struct vsockpcb
*pcb
, mbuf_t m
)
395 return vsock_pcb_send_message(pcb
, VSOCK_PAYLOAD
, m
);
399 vsock_pcb_shutdown_send(struct vsockpcb
*pcb
)
401 return vsock_pcb_send_message(pcb
, VSOCK_SHUTDOWN_SEND
, NULL
);
405 vsock_pcb_reset(struct vsockpcb
*pcb
)
407 return vsock_pcb_send_message(pcb
, VSOCK_RESET
, NULL
);
411 vsock_pcb_credit_update(struct vsockpcb
*pcb
)
413 return vsock_pcb_send_message(pcb
, VSOCK_CREDIT_UPDATE
, NULL
);
417 vsock_pcb_credit_request(struct vsockpcb
*pcb
)
419 return vsock_pcb_send_message(pcb
, VSOCK_CREDIT_REQUEST
, NULL
);
423 vsock_disconnect_pcb_common(struct vsockpcb
*pcb
, bool is_locked
)
425 socket_lock_assert_owned(pcb
->so
);
426 vsock_unbind_pcb(pcb
, is_locked
);
427 return vsock_pcb_reset(pcb
);
431 vsock_disconnect_pcb_locked(struct vsockpcb
*pcb
)
433 return vsock_disconnect_pcb_common(pcb
, true);
437 vsock_disconnect_pcb(struct vsockpcb
*pcb
)
439 return vsock_disconnect_pcb_common(pcb
, false);
443 vsock_sockaddr_vm_validate(struct vsockpcb
*pcb
, struct sockaddr_vm
*addr
)
445 if (!pcb
|| !pcb
->so
|| !addr
) {
449 // Validate address length.
450 if (addr
->svm_len
< sizeof(struct sockaddr_vm
)) {
454 // Validate address family.
455 if (addr
->svm_family
!= AF_UNSPEC
&& addr
->svm_family
!= AF_VSOCK
) {
459 // Only stream is supported currently.
460 if (pcb
->so
->so_type
!= SOCK_STREAM
) {
466 /* VSock Receive Handlers */
469 vsock_put_message_connected(struct vsockpcb
*pcb
, enum vsock_operation op
, mbuf_t m
)
471 socket_lock_assert_owned(pcb
->so
);
477 error
= vsock_disconnect_pcb(pcb
);
479 case VSOCK_SHUTDOWN_RECEIVE
:
480 socantsendmore(pcb
->so
);
482 case VSOCK_SHUTDOWN_SEND
:
483 socantrcvmore(pcb
->so
);
486 // Add data to the receive queue then wakeup any reading threads.
487 error
= !sbappendstream(&pcb
->so
->so_rcv
, m
);
493 vsock_unbind_pcb(pcb
, false);
504 vsock_put_message_connecting(struct vsockpcb
*pcb
, enum vsock_operation op
)
506 socket_lock_assert_owned(pcb
->so
);
512 soisconnected(pcb
->so
);
515 pcb
->so
->so_error
= EAGAIN
;
516 error
= vsock_disconnect_pcb(pcb
);
519 vsock_disconnect_pcb(pcb
);
528 vsock_put_message_listening(struct vsockpcb
*pcb
, enum vsock_operation op
, struct vsock_address src
, struct vsock_address dst
)
530 socket_lock_assert_owned(pcb
->so
);
532 struct sockaddr_vm addr
;
533 struct socket
*so2
= NULL
;
534 struct vsockpcb
*pcb2
= NULL
;
540 addr
= (struct sockaddr_vm
) {
541 .svm_len
= sizeof(addr
),
542 .svm_family
= AF_VSOCK
,
544 .svm_port
= pcb
->local_address
.port
,
545 .svm_cid
= pcb
->local_address
.cid
547 so2
= sonewconn(pcb
->so
, 0, (struct sockaddr
*)&addr
);
549 // It is likely that the backlog is full. Deny this request.
550 vsock_pcb_safe_reset_address(pcb
, dst
, src
);
551 error
= ECONNREFUSED
;
555 pcb2
= sotovsockpcb(so2
);
561 error
= vsock_bind_address(pcb2
, dst
, src
);
566 error
= vsock_pcb_respond(pcb2
);
575 soisdisconnected(so2
);
577 vsock_unbind_pcb(pcb2
, false);
579 socket_unlock(so2
, 1);
580 vsock_pcb_reset_address(dst
, src
);
582 socket_unlock(so2
, 0);
584 socket_lock(pcb
->so
, 0);
588 error
= vsock_pcb_safe_reset_address(pcb
, dst
, src
);
591 vsock_pcb_safe_reset_address(pcb
, dst
, src
);
599 /* VSock Transport */
602 vsock_add_transport(struct vsock_transport
*transport
)
604 if (transport
== NULL
|| transport
->provider
== NULL
) {
607 if (!os_atomic_cmpxchg((void * volatile *)&the_vsock_transport
, NULL
, transport
, acq_rel
)) {
614 vsock_remove_transport(struct vsock_transport
*transport
)
616 if (!os_atomic_cmpxchg((void * volatile *)&the_vsock_transport
, transport
, NULL
, acq_rel
)) {
623 vsock_reset_transport(struct vsock_transport
*transport
)
625 if (transport
== NULL
) {
630 struct vsockpcb
*pcb
= NULL
;
631 struct vsockpcb
*tmp_pcb
= NULL
;
633 lck_rw_lock_exclusive(&vsockinfo
.bound_lock
);
634 LIST_FOREACH_SAFE(pcb
, &vsockinfo
.bound
, bound
, tmp_pcb
) {
635 // Disconnect this transport's sockets. Listen and bind sockets must stay alive.
636 socket_lock(pcb
->so
, 1);
637 if (pcb
->transport
== transport
&& pcb
->so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
| SS_ISDISCONNECTING
)) {
638 errno_t dc_error
= vsock_disconnect_pcb_locked(pcb
);
639 if (dc_error
&& !error
) {
643 socket_unlock(pcb
->so
, 1);
645 lck_rw_done(&vsockinfo
.bound_lock
);
651 vsock_put_message(struct vsock_address src
, struct vsock_address dst
, enum vsock_operation op
, uint32_t buf_alloc
, uint32_t fwd_cnt
, mbuf_t m
)
653 struct vsockpcb
*pcb
= vsock_get_matching_pcb(dst
, src
);
655 if (op
!= VSOCK_RESET
) {
656 vsock_pcb_reset_address(dst
, src
);
664 socket_lock_assert_owned(pcb
->so
);
666 struct socket
*so
= pcb
->so
;
669 // Check if the peer's buffer has changed. Update our view of the peer's forwarded bytes.
670 int buffers_changed
= (pcb
->peer_buf_alloc
!= buf_alloc
) || (pcb
->peer_fwd_cnt
) != fwd_cnt
;
671 pcb
->peer_buf_alloc
= buf_alloc
;
672 pcb
->peer_fwd_cnt
= fwd_cnt
;
674 // Peer's buffer has enough space for the next packet. Notify any threads waiting for space.
675 if (buffers_changed
&& vsock_get_peer_space(pcb
) >= pcb
->waiting_send_size
) {
680 case VSOCK_CREDIT_REQUEST
:
681 error
= vsock_pcb_credit_update(pcb
);
683 case VSOCK_CREDIT_UPDATE
:
686 if (so
->so_state
& SS_ISCONNECTED
) {
687 error
= vsock_put_message_connected(pcb
, op
, m
);
689 } else if (so
->so_state
& SS_ISCONNECTING
) {
690 error
= vsock_put_message_connecting(pcb
, op
);
691 } else if (so
->so_options
& SO_ACCEPTCONN
) {
692 error
= vsock_put_message_listening(pcb
, op
, src
, dst
);
694 // Reset the connection for other states such as 'disconnecting'.
695 error
= vsock_disconnect_pcb(pcb
);
702 socket_unlock(so
, 1);
714 vsock_pcblist SYSCTL_HANDLER_ARGS
716 #pragma unused(oidp,arg2)
720 // Only stream is supported.
721 if ((intptr_t)arg1
!= SOCK_STREAM
) {
725 // Get the generation count and the count of all vsock sockets.
726 lck_rw_lock_shared(&vsockinfo
.all_lock
);
727 uint64_t n
= vsockinfo
.all_pcb_count
;
728 vsock_gen_t gen_count
= vsockinfo
.vsock_gencnt
;
729 lck_rw_done(&vsockinfo
.all_lock
);
731 const size_t xpcb_len
= sizeof(struct xvsockpcb
);
732 struct xvsockpgen xvg
;
735 * The process of preparing the PCB list is too time-consuming and
736 * resource-intensive to repeat twice on every request.
738 if (req
->oldptr
== USER_ADDR_NULL
) {
739 req
->oldidx
= (size_t)(2 * sizeof(xvg
) + (n
+ n
/ 8) * xpcb_len
);
743 if (req
->newptr
!= USER_ADDR_NULL
) {
747 bzero(&xvg
, sizeof(xvg
));
748 xvg
.xvg_len
= sizeof(xvg
);
750 xvg
.xvg_gen
= gen_count
;
751 xvg
.xvg_sogen
= so_gencnt
;
752 error
= SYSCTL_OUT(req
, &xvg
, sizeof(xvg
));
757 // Return if no sockets exist.
762 lck_rw_lock_shared(&vsockinfo
.all_lock
);
765 struct vsockpcb
*pcb
= NULL
;
766 TAILQ_FOREACH(pcb
, &vsockinfo
.all
, all
) {
767 // Bail if there is not enough user buffer for this next socket.
768 if (req
->oldlen
- req
->oldidx
- sizeof(xvg
) < xpcb_len
) {
772 // Populate the socket structure.
773 socket_lock(pcb
->so
, 1);
774 if (pcb
->vsock_gencnt
<= gen_count
) {
775 struct xvsockpcb xpcb
;
776 bzero(&xpcb
, xpcb_len
);
777 xpcb
.xv_len
= xpcb_len
;
778 xpcb
.xv_vsockpp
= (uint64_t)VM_KERNEL_ADDRHASH(pcb
);
779 xpcb
.xvp_local_cid
= pcb
->local_address
.cid
;
780 xpcb
.xvp_local_port
= pcb
->local_address
.port
;
781 xpcb
.xvp_remote_cid
= pcb
->remote_address
.cid
;
782 xpcb
.xvp_remote_port
= pcb
->remote_address
.port
;
783 xpcb
.xvp_rxcnt
= pcb
->fwd_cnt
;
784 xpcb
.xvp_txcnt
= pcb
->tx_cnt
;
785 xpcb
.xvp_peer_rxhiwat
= pcb
->peer_buf_alloc
;
786 xpcb
.xvp_peer_rxcnt
= pcb
->peer_fwd_cnt
;
787 xpcb
.xvp_last_pid
= pcb
->so
->last_pid
;
788 xpcb
.xvp_gencnt
= pcb
->vsock_gencnt
;
790 sotoxsocket(pcb
->so
, &xpcb
.xv_socket
);
792 socket_unlock(pcb
->so
, 1);
794 error
= SYSCTL_OUT(req
, &xpcb
, xpcb_len
);
800 socket_unlock(pcb
->so
, 1);
804 // Update the generation count to match the sockets being returned.
805 gen_count
= vsockinfo
.vsock_gencnt
;
807 lck_rw_done(&vsockinfo
.all_lock
);
811 * Give the user an updated idea of our state.
812 * If the generation differs from what we told
813 * her before, she knows that something happened
814 * while we were processing this request, and it
815 * might be necessary to retry.
817 bzero(&xvg
, sizeof(xvg
));
818 xvg
.xvg_len
= sizeof(xvg
);
820 xvg
.xvg_gen
= gen_count
;
821 xvg
.xvg_sogen
= so_gencnt
;
822 error
= SYSCTL_OUT(req
, &xvg
, sizeof(xvg
));
829 SYSCTL_NODE(_net
, OID_AUTO
, vsock
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "vsock");
830 SYSCTL_UINT(_net_vsock
, OID_AUTO
, sendspace
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
831 &vsock_sendspace
, 0, "Maximum outgoing vsock datagram size");
832 SYSCTL_UINT(_net_vsock
, OID_AUTO
, recvspace
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
833 &vsock_recvspace
, 0, "Maximum incoming vsock datagram size");
834 SYSCTL_PROC(_net_vsock
, OID_AUTO
, pcblist
,
835 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
,
836 (caddr_t
)(long)SOCK_STREAM
, 0, vsock_pcblist
, "S,xvsockpcb",
837 "List of active vsock sockets");
843 vsock_attach(struct socket
*so
, int proto
, struct proc
*p
)
845 #pragma unused(proto, p)
847 // Attach should only be run once per socket.
848 struct vsockpcb
*pcb
= sotovsockpcb(so
);
853 // Get the transport for this socket.
854 struct vsock_transport
*transport
= os_atomic_load(&the_vsock_transport
, relaxed
);
855 if (transport
== NULL
) {
859 // Reserve send and receive buffers.
860 errno_t error
= soreserve(so
, vsock_sendspace
, vsock_recvspace
);
865 // Initialize the vsock protocol control block.
866 pcb
= zalloc(vsockpcb_zone
);
870 bzero(pcb
, sizeof(*pcb
));
872 pcb
->transport
= transport
;
873 pcb
->local_address
= (struct vsock_address
) {
874 .cid
= VMADDR_CID_ANY
,
875 .port
= VMADDR_PORT_ANY
877 pcb
->remote_address
= (struct vsock_address
) {
878 .cid
= VMADDR_CID_ANY
,
879 .port
= VMADDR_PORT_ANY
883 // Tell the transport that this socket has attached.
884 error
= transport
->attach_socket(transport
->provider
);
889 // Add to the list of all vsock sockets.
890 lck_rw_lock_exclusive(&vsockinfo
.all_lock
);
891 TAILQ_INSERT_TAIL(&vsockinfo
.all
, pcb
, all
);
892 vsockinfo
.all_pcb_count
++;
893 pcb
->vsock_gencnt
= ++vsockinfo
.vsock_gencnt
;
894 lck_rw_done(&vsockinfo
.all_lock
);
900 vsock_control(struct socket
*so
, u_long cmd
, caddr_t data
, struct ifnet
*ifp
, struct proc
*p
)
904 VERIFY(so
!= NULL
|| p
== kernproc
);
906 if (cmd
!= IOCTL_VM_SOCKETS_GET_LOCAL_CID
) {
910 struct vsock_transport
*transport
;
912 struct vsockpcb
*pcb
= sotovsockpcb(so
);
916 transport
= pcb
->transport
;
918 transport
= os_atomic_load(&the_vsock_transport
, relaxed
);
921 if (transport
== NULL
) {
925 uint32_t transport_cid
;
926 errno_t error
= transport
->get_cid(transport
->provider
, &transport_cid
);
931 memcpy(data
, &transport_cid
, sizeof(transport_cid
));
937 vsock_detach(struct socket
*so
)
939 struct vsockpcb
*pcb
= sotovsockpcb(so
);
944 vsock_unbind_pcb(pcb
, false);
946 // Tell the transport that this socket has detached.
947 struct vsock_transport
*transport
= pcb
->transport
;
948 errno_t error
= transport
->detach_socket(transport
->provider
);
953 // Remove from the list of all vsock sockets.
954 lck_rw_lock_exclusive(&vsockinfo
.all_lock
);
955 TAILQ_REMOVE(&vsockinfo
.all
, pcb
, all
);
956 pcb
->all
.tqe_next
= NULL
;
957 pcb
->all
.tqe_prev
= NULL
;
958 vsockinfo
.all_pcb_count
--;
959 vsockinfo
.vsock_gencnt
++;
960 lck_rw_done(&vsockinfo
.all_lock
);
962 // Deallocate any resources.
963 zfree(vsockpcb_zone
, pcb
);
965 so
->so_flags
|= SOF_PCBCLEARING
;
972 vsock_abort(struct socket
*so
)
974 soisdisconnected(so
);
975 return vsock_detach(so
);
979 vsock_bind(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
983 struct vsockpcb
*pcb
= sotovsockpcb(so
);
988 struct sockaddr_vm
*addr
= (struct sockaddr_vm
*)nam
;
990 errno_t error
= vsock_sockaddr_vm_validate(pcb
, addr
);
995 struct vsock_address laddr
= (struct vsock_address
) {
996 .cid
= addr
->svm_cid
,
997 .port
= addr
->svm_port
,
1000 struct vsock_address raddr
= (struct vsock_address
) {
1001 .cid
= VMADDR_CID_ANY
,
1002 .port
= VMADDR_PORT_ANY
,
1005 error
= vsock_bind_address(pcb
, laddr
, raddr
);
1014 vsock_listen(struct socket
*so
, struct proc
*p
)
1018 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1023 // Only stream is supported currently.
1024 if (so
->so_type
!= SOCK_STREAM
) {
1025 return EAFNOSUPPORT
;
1028 struct vsock_address
*addr
= &pcb
->local_address
;
1030 if (addr
->port
== VMADDR_CID_ANY
) {
1034 struct vsock_transport
*transport
= pcb
->transport
;
1035 uint32_t transport_cid
;
1036 errno_t error
= transport
->get_cid(transport
->provider
, &transport_cid
);
1041 // Can listen on the transport's cid or any.
1042 if (addr
->cid
!= transport_cid
&& addr
->cid
!= VMADDR_CID_ANY
) {
1050 vsock_accept(struct socket
*so
, struct sockaddr
**nam
)
1052 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1057 // Do not accept disconnected sockets.
1058 if (so
->so_state
& SS_ISDISCONNECTED
) {
1059 return ECONNABORTED
;
1062 *nam
= vsock_new_sockaddr(&pcb
->remote_address
);
1068 vsock_connect(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
1072 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1077 struct sockaddr_vm
*addr
= (struct sockaddr_vm
*)nam
;
1079 errno_t error
= vsock_sockaddr_vm_validate(pcb
, addr
);
1084 uint32_t transport_cid
;
1085 struct vsock_transport
*transport
= pcb
->transport
;
1086 error
= transport
->get_cid(transport
->provider
, &transport_cid
);
1091 // Only supporting connections to the host, hypervisor, or self for now.
1092 if (addr
->svm_cid
!= VMADDR_CID_HOST
&&
1093 addr
->svm_cid
!= VMADDR_CID_HYPERVISOR
&&
1094 addr
->svm_cid
!= transport_cid
) {
1100 // Set the remote and local address.
1101 struct vsock_address remote_addr
= (struct vsock_address
) {
1102 .cid
= addr
->svm_cid
,
1103 .port
= addr
->svm_port
,
1106 struct vsock_address local_addr
= (struct vsock_address
) {
1107 .cid
= transport_cid
,
1108 .port
= VMADDR_PORT_ANY
,
1111 // Bind to the address.
1112 error
= vsock_bind_address(pcb
, local_addr
, remote_addr
);
1117 // Attempt a connection using the socket's transport.
1118 error
= vsock_pcb_connect(pcb
);
1123 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1124 // Don't wait for peer's response if non-blocking.
1125 if (so
->so_state
& SS_NBIO
) {
1126 error
= EINPROGRESS
;
1130 struct timespec ts
= (struct timespec
) {
1131 .tv_sec
= so
->so_snd
.sb_timeo
.tv_sec
,
1132 .tv_nsec
= so
->so_snd
.sb_timeo
.tv_usec
* 1000,
1135 lck_mtx_t
*mutex_held
;
1136 if (so
->so_proto
->pr_getlock
!= NULL
) {
1137 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1139 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1142 // Wait until we receive a response to the connect request.
1143 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
, PSOCK
| PCATCH
, "vsock_connect", &ts
);
1145 if (error
== EAGAIN
) {
1153 if (so
->so_error
&& !error
) {
1154 error
= so
->so_error
;
1158 error
= !(so
->so_state
& SS_ISCONNECTED
);
1161 vsock_unbind_pcb(pcb
, false);
1169 vsock_disconnect(struct socket
*so
)
1171 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1176 return vsock_disconnect_pcb(pcb
);
1180 vsock_sockaddr(struct socket
*so
, struct sockaddr
**nam
)
1182 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1187 *nam
= vsock_new_sockaddr(&pcb
->local_address
);
1193 vsock_peeraddr(struct socket
*so
, struct sockaddr
**nam
)
1195 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1200 *nam
= vsock_new_sockaddr(&pcb
->remote_address
);
1206 vsock_send(struct socket
*so
, int flags
, struct mbuf
*m
, struct sockaddr
*nam
, struct mbuf
*control
, proc_t p
)
1208 #pragma unused(flags, nam, p)
1210 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1211 if (pcb
== NULL
|| m
== NULL
) {
1215 if (control
!= NULL
) {
1220 // Ensure this socket is connected.
1221 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1230 const size_t len
= mbuf_pkthdr_len(m
);
1231 uint32_t free_space
= vsock_get_peer_space(pcb
);
1233 // Ensure the peer has enough space in their receive buffer.
1234 while (len
> free_space
) {
1235 // Record the number of free peer bytes necessary before we can send.
1236 if (len
> pcb
->waiting_send_size
) {
1237 pcb
->waiting_send_size
= len
;
1240 // Send a credit request.
1241 error
= vsock_pcb_credit_request(pcb
);
1249 // Check again in case free space was automatically updated in loopback case.
1250 free_space
= vsock_get_peer_space(pcb
);
1251 if (len
<= free_space
) {
1252 pcb
->waiting_send_size
= 0;
1256 // Bail if this is a non-blocking socket.
1257 if (so
->so_state
& SS_NBIO
) {
1264 // Wait until our peer has enough free space in their receive buffer.
1265 error
= sbwait(&so
->so_snd
);
1266 pcb
->waiting_send_size
= 0;
1274 // Bail if an error occured or we can't send more.
1275 if (so
->so_state
& SS_CANTSENDMORE
) {
1280 } else if (so
->so_error
) {
1281 error
= so
->so_error
;
1289 free_space
= vsock_get_peer_space(pcb
);
1292 // Send a payload over the transport.
1293 error
= vsock_pcb_send(pcb
, m
);
1304 vsock_shutdown(struct socket
*so
)
1306 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1313 // Tell peer we will no longer send.
1314 errno_t error
= vsock_pcb_shutdown_send(pcb
);
1323 vsock_soreceive(struct socket
*so
, struct sockaddr
**psa
, struct uio
*uio
,
1324 struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
1326 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1331 user_ssize_t length
= uio_resid(uio
);
1332 int result
= soreceive(so
, psa
, uio
, mp0
, controlp
, flagsp
);
1333 length
-= uio_resid(uio
);
1337 pcb
->fwd_cnt
+= length
;
1339 const uint32_t threshold
= VSOCK_MAX_PACKET_SIZE
;
1341 // Send a credit update if is possible that the peer will no longer send.
1342 if ((pcb
->fwd_cnt
- pcb
->last_fwd_cnt
+ threshold
) >= pcb
->last_buf_alloc
) {
1343 errno_t error
= vsock_pcb_credit_update(pcb
);
1344 if (!result
&& error
) {
1349 socket_unlock(so
, 1);
1354 static struct pr_usrreqs vsock_usrreqs
= {
1355 .pru_abort
= vsock_abort
,
1356 .pru_attach
= vsock_attach
,
1357 .pru_control
= vsock_control
,
1358 .pru_detach
= vsock_detach
,
1359 .pru_bind
= vsock_bind
,
1360 .pru_listen
= vsock_listen
,
1361 .pru_accept
= vsock_accept
,
1362 .pru_connect
= vsock_connect
,
1363 .pru_disconnect
= vsock_disconnect
,
1364 .pru_send
= vsock_send
,
1365 .pru_shutdown
= vsock_shutdown
,
1366 .pru_sockaddr
= vsock_sockaddr
,
1367 .pru_peeraddr
= vsock_peeraddr
,
1368 .pru_sosend
= sosend
,
1369 .pru_soreceive
= vsock_soreceive
,
1373 vsock_init(struct protosw
*pp
, struct domain
*dp
)
1377 static int vsock_initialized
= 0;
1378 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
| PR_ATTACHED
)) == PR_ATTACHED
);
1379 if (!os_atomic_cmpxchg((volatile int *)&vsock_initialized
, 0, 1, acq_rel
)) {
1383 // Setup VSock protocol info struct.
1384 lck_rw_init(&vsockinfo
.all_lock
, &vsock_lock_grp
, LCK_ATTR_NULL
);
1385 lck_rw_init(&vsockinfo
.bound_lock
, &vsock_lock_grp
, LCK_ATTR_NULL
);
1386 lck_mtx_init(&vsockinfo
.port_lock
, &vsock_lock_grp
, LCK_ATTR_NULL
);
1387 TAILQ_INIT(&vsockinfo
.all
);
1388 LIST_INIT(&vsockinfo
.bound
);
1389 vsockinfo
.last_port
= VMADDR_PORT_ANY
;
1392 static struct protosw vsocksw
[] = {
1394 .pr_type
= SOCK_STREAM
,
1396 .pr_flags
= PR_CONNREQUIRED
| PR_WANTRCVD
,
1397 .pr_init
= vsock_init
,
1398 .pr_usrreqs
= &vsock_usrreqs
,
1402 static const int vsock_proto_count
= (sizeof(vsocksw
) / sizeof(struct protosw
));
1406 static struct domain
*vsock_domain
= NULL
;
1409 vsock_dinit(struct domain
*dp
)
1411 // The VSock domain is initialized with a singleton pattern.
1412 VERIFY(!(dp
->dom_flags
& DOM_INITIALIZED
));
1413 VERIFY(vsock_domain
== NULL
);
1416 // Add protocols and initialize.
1417 for (int i
= 0; i
< vsock_proto_count
; i
++) {
1418 net_add_proto((struct protosw
*)&vsocksw
[i
], dp
, 1);
1422 struct domain vsockdomain_s
= {
1423 .dom_family
= PF_VSOCK
,
1424 .dom_name
= "vsock",
1425 .dom_init
= vsock_dinit
,
1426 .dom_maxrtkey
= sizeof(struct sockaddr_vm
),
1427 .dom_protohdrlen
= sizeof(struct sockaddr_vm
),