2 * Copyright (c) 2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/domain.h>
30 #include <sys/socket.h>
31 #include <sys/protosw.h>
32 #include <sys/mcache.h>
33 #include <sys/systm.h>
34 #include <sys/sysctl.h>
35 #include <sys/random.h>
37 #include <sys/vsock_domain.h>
38 #include <sys/vsock_transport.h>
39 #include <kern/task.h>
40 #include <kern/zalloc.h>
41 #include <kern/locks.h>
42 #include <machine/atomic.h>
44 #define sotovsockpcb(so) ((struct vsockpcb *)(so)->so_pcb)
46 #define VSOCK_PORT_RESERVED 1024
48 /* VSock Protocol Globals */
50 static struct vsock_transport
* _Atomic the_vsock_transport
= NULL
;
51 static ZONE_DECLARE(vsockpcb_zone
, "vsockpcbzone",
52 sizeof(struct vsockpcb
), ZC_NONE
);
53 static struct vsockpcbinfo vsockinfo
;
55 static uint32_t vsock_sendspace
= VSOCK_MAX_PACKET_SIZE
* 8;
56 static uint32_t vsock_recvspace
= VSOCK_MAX_PACKET_SIZE
* 8;
58 /* VSock PCB Helpers */
61 vsock_get_peer_space(struct vsockpcb
*pcb
)
63 return pcb
->peer_buf_alloc
- (pcb
->tx_cnt
- pcb
->peer_fwd_cnt
);
66 static struct vsockpcb
*
67 vsock_get_matching_pcb(struct vsock_address src
, struct vsock_address dst
)
69 struct vsockpcb
*preferred
= NULL
;
70 struct vsockpcb
*match
= NULL
;
71 struct vsockpcb
*pcb
= NULL
;
73 lck_rw_lock_shared(vsockinfo
.bound_lock
);
74 LIST_FOREACH(pcb
, &vsockinfo
.bound
, bound
) {
75 // Source cid and port must match. Only destination port must match. (Allows for a changing CID during migration)
76 socket_lock(pcb
->so
, 1);
77 if ((pcb
->so
->so_state
& SS_ISCONNECTED
|| pcb
->so
->so_state
& SS_ISCONNECTING
) &&
78 pcb
->local_address
.cid
== src
.cid
&& pcb
->local_address
.port
== src
.port
&&
79 pcb
->remote_address
.port
== dst
.port
) {
82 } else if ((pcb
->local_address
.cid
== src
.cid
|| pcb
->local_address
.cid
== VMADDR_CID_ANY
) &&
83 pcb
->local_address
.port
== src
.port
) {
86 socket_unlock(pcb
->so
, 1);
88 if (!preferred
&& match
) {
89 socket_lock(match
->so
, 1);
92 lck_rw_done(vsockinfo
.bound_lock
);
98 vsock_bind_address_if_free(struct vsockpcb
*pcb
, uint32_t local_cid
, uint32_t local_port
, uint32_t remote_cid
, uint32_t remote_port
)
100 socket_lock_assert_owned(pcb
->so
);
103 if (local_port
!= VMADDR_PORT_ANY
&& local_port
< VSOCK_PORT_RESERVED
&&
104 current_task() != kernel_task
&& proc_suser(current_proc()) != 0) {
109 const bool check_remote
= (remote_cid
!= VMADDR_CID_ANY
&& remote_port
!= VMADDR_PORT_ANY
);
111 struct vsockpcb
*pcb_match
= NULL
;
113 socket_unlock(pcb
->so
, 0);
114 lck_rw_lock_exclusive(vsockinfo
.bound_lock
);
115 LIST_FOREACH(pcb_match
, &vsockinfo
.bound
, bound
) {
116 socket_lock(pcb_match
->so
, 1);
117 if (pcb
== pcb_match
||
118 (!check_remote
&& pcb_match
->local_address
.port
== local_port
) ||
119 (check_remote
&& pcb_match
->local_address
.port
== local_port
&&
120 pcb_match
->remote_address
.cid
== remote_cid
&& pcb_match
->remote_address
.port
== remote_port
)) {
121 socket_unlock(pcb_match
->so
, 1);
125 socket_unlock(pcb_match
->so
, 1);
127 socket_lock(pcb
->so
, 0);
129 pcb
->local_address
= (struct vsock_address
) { .cid
= local_cid
, .port
= local_port
};
130 pcb
->remote_address
= (struct vsock_address
) { .cid
= remote_cid
, .port
= remote_port
};
131 LIST_INSERT_HEAD(&vsockinfo
.bound
, pcb
, bound
);
133 lck_rw_done(vsockinfo
.bound_lock
);
135 return taken
? EADDRINUSE
: 0;
139 vsock_bind_address(struct vsockpcb
*pcb
, struct vsock_address laddr
, struct vsock_address raddr
)
145 socket_lock_assert_owned(pcb
->so
);
147 // Certain CIDs are reserved.
148 if (laddr
.cid
== VMADDR_CID_HYPERVISOR
|| laddr
.cid
== VMADDR_CID_RESERVED
|| laddr
.cid
== VMADDR_CID_HOST
) {
149 return EADDRNOTAVAIL
;
152 // Remote address must be fully specified or not specified at all.
153 if ((raddr
.cid
== VMADDR_CID_ANY
) ^ (raddr
.port
== VMADDR_PORT_ANY
)) {
157 // Cannot bind if already bound.
158 if (pcb
->local_address
.port
!= VMADDR_PORT_ANY
) {
162 uint32_t transport_cid
;
163 struct vsock_transport
*transport
= pcb
->transport
;
164 errno_t error
= transport
->get_cid(transport
->provider
, &transport_cid
);
169 // Local CID must be this transport's CID or any.
170 if (laddr
.cid
!= transport_cid
&& laddr
.cid
!= VMADDR_CID_ANY
) {
174 if (laddr
.port
!= VMADDR_PORT_ANY
) {
175 error
= vsock_bind_address_if_free(pcb
, laddr
.cid
, laddr
.port
, raddr
.cid
, raddr
.port
);
177 lck_mtx_lock(&vsockinfo
.port_lock
);
179 const uint32_t first
= VSOCK_PORT_RESERVED
;
180 const uint32_t last
= VMADDR_PORT_ANY
- 1;
181 uint32_t count
= last
- first
+ 1;
182 uint32_t *last_port
= &vsockinfo
.last_port
;
184 if (pcb
->so
->so_flags
& SOF_BINDRANDOMPORT
) {
186 read_frandom(&random
, sizeof(random
));
187 *last_port
= first
+ (random
% count
);
192 lck_mtx_unlock(&vsockinfo
.port_lock
);
193 return EADDRNOTAVAIL
;
198 if (*last_port
< first
|| *last_port
> last
) {
202 error
= vsock_bind_address_if_free(pcb
, laddr
.cid
, *last_port
, raddr
.cid
, raddr
.port
);
205 lck_mtx_unlock(&vsockinfo
.port_lock
);
212 vsock_unbind_pcb(struct vsockpcb
*pcb
, bool is_locked
)
218 socket_lock_assert_owned(pcb
->so
);
220 soisdisconnected(pcb
->so
);
222 if (!pcb
->bound
.le_prev
) {
227 socket_unlock(pcb
->so
, 0);
228 lck_rw_lock_exclusive(vsockinfo
.bound_lock
);
229 socket_lock(pcb
->so
, 0);
230 if (!pcb
->bound
.le_prev
) {
231 lck_rw_done(vsockinfo
.bound_lock
);
236 LIST_REMOVE(pcb
, bound
);
237 pcb
->bound
.le_next
= NULL
;
238 pcb
->bound
.le_prev
= NULL
;
241 lck_rw_done(vsockinfo
.bound_lock
);
245 static struct sockaddr
*
246 vsock_new_sockaddr(struct vsock_address
*address
)
252 struct sockaddr_vm
*addr
;
253 MALLOC(addr
, struct sockaddr_vm
*, sizeof(*addr
), M_SONAME
, M_WAITOK
);
258 bzero(addr
, sizeof(*addr
));
259 addr
->svm_len
= sizeof(*addr
);
260 addr
->svm_family
= AF_VSOCK
;
261 addr
->svm_port
= address
->port
;
262 addr
->svm_cid
= address
->cid
;
264 return (struct sockaddr
*)addr
;
268 vsock_pcb_send_message(struct vsockpcb
*pcb
, enum vsock_operation operation
, mbuf_t m
)
277 socket_lock_assert_owned(pcb
->so
);
281 struct vsock_address dst
= pcb
->remote_address
;
282 if (dst
.cid
== VMADDR_CID_ANY
|| dst
.port
== VMADDR_PORT_ANY
) {
289 struct vsock_address src
= pcb
->local_address
;
290 if (src
.cid
== VMADDR_CID_ANY
) {
291 uint32_t transport_cid
;
292 struct vsock_transport
*transport
= pcb
->transport
;
293 error
= transport
->get_cid(transport
->provider
, &transport_cid
);
300 src
.cid
= transport_cid
;
303 uint32_t buf_alloc
= pcb
->so
->so_rcv
.sb_hiwat
;
304 uint32_t fwd_cnt
= pcb
->fwd_cnt
;
306 if (src
.cid
== dst
.cid
) {
307 pcb
->last_buf_alloc
= buf_alloc
;
308 pcb
->last_fwd_cnt
= fwd_cnt
;
310 socket_unlock(pcb
->so
, 0);
311 error
= vsock_put_message(src
, dst
, operation
, buf_alloc
, fwd_cnt
, m
);
312 socket_lock(pcb
->so
, 0);
314 struct vsock_transport
*transport
= pcb
->transport
;
315 error
= transport
->put_message(transport
->provider
, src
, dst
, operation
, buf_alloc
, fwd_cnt
, m
);
318 pcb
->last_buf_alloc
= buf_alloc
;
319 pcb
->last_fwd_cnt
= fwd_cnt
;
327 vsock_pcb_reset_address(struct vsock_address src
, struct vsock_address dst
)
329 if (dst
.cid
== VMADDR_CID_ANY
|| dst
.port
== VMADDR_PORT_ANY
) {
334 struct vsock_transport
*transport
= NULL
;
336 if (src
.cid
== VMADDR_CID_ANY
) {
337 transport
= os_atomic_load(&the_vsock_transport
, relaxed
);
338 if (transport
== NULL
) {
342 uint32_t transport_cid
;
343 error
= transport
->get_cid(transport
->provider
, &transport_cid
);
347 src
.cid
= transport_cid
;
350 if (src
.cid
== dst
.cid
) {
351 error
= vsock_put_message(src
, dst
, VSOCK_RESET
, 0, 0, NULL
);
354 transport
= os_atomic_load(&the_vsock_transport
, relaxed
);
355 if (transport
== NULL
) {
359 error
= transport
->put_message(transport
->provider
, src
, dst
, VSOCK_RESET
, 0, 0, NULL
);
366 vsock_pcb_safe_reset_address(struct vsockpcb
*pcb
, struct vsock_address src
, struct vsock_address dst
)
369 socket_lock_assert_owned(pcb
->so
);
370 socket_unlock(pcb
->so
, 0);
372 errno_t error
= vsock_pcb_reset_address(src
, dst
);
374 socket_lock(pcb
->so
, 0);
380 vsock_pcb_connect(struct vsockpcb
*pcb
)
382 return vsock_pcb_send_message(pcb
, VSOCK_REQUEST
, NULL
);
386 vsock_pcb_respond(struct vsockpcb
*pcb
)
388 return vsock_pcb_send_message(pcb
, VSOCK_RESPONSE
, NULL
);
392 vsock_pcb_send(struct vsockpcb
*pcb
, mbuf_t m
)
394 return vsock_pcb_send_message(pcb
, VSOCK_PAYLOAD
, m
);
398 vsock_pcb_shutdown_send(struct vsockpcb
*pcb
)
400 return vsock_pcb_send_message(pcb
, VSOCK_SHUTDOWN_SEND
, NULL
);
404 vsock_pcb_reset(struct vsockpcb
*pcb
)
406 return vsock_pcb_send_message(pcb
, VSOCK_RESET
, NULL
);
410 vsock_pcb_credit_update(struct vsockpcb
*pcb
)
412 return vsock_pcb_send_message(pcb
, VSOCK_CREDIT_UPDATE
, NULL
);
416 vsock_pcb_credit_request(struct vsockpcb
*pcb
)
418 return vsock_pcb_send_message(pcb
, VSOCK_CREDIT_REQUEST
, NULL
);
422 vsock_disconnect_pcb_common(struct vsockpcb
*pcb
, bool is_locked
)
424 socket_lock_assert_owned(pcb
->so
);
425 vsock_unbind_pcb(pcb
, is_locked
);
426 return vsock_pcb_reset(pcb
);
430 vsock_disconnect_pcb_locked(struct vsockpcb
*pcb
)
432 return vsock_disconnect_pcb_common(pcb
, true);
436 vsock_disconnect_pcb(struct vsockpcb
*pcb
)
438 return vsock_disconnect_pcb_common(pcb
, false);
442 vsock_sockaddr_vm_validate(struct vsockpcb
*pcb
, struct sockaddr_vm
*addr
)
444 if (!pcb
|| !pcb
->so
|| !addr
) {
448 // Validate address length.
449 if (addr
->svm_len
< sizeof(struct sockaddr_vm
)) {
453 // Validate address family.
454 if (addr
->svm_family
!= AF_UNSPEC
&& addr
->svm_family
!= AF_VSOCK
) {
458 // Only stream is supported currently.
459 if (pcb
->so
->so_type
!= SOCK_STREAM
) {
465 /* VSock Receive Handlers */
468 vsock_put_message_connected(struct vsockpcb
*pcb
, enum vsock_operation op
, mbuf_t m
)
470 socket_lock_assert_owned(pcb
->so
);
476 error
= vsock_disconnect_pcb(pcb
);
478 case VSOCK_SHUTDOWN_RECEIVE
:
479 socantsendmore(pcb
->so
);
481 case VSOCK_SHUTDOWN_SEND
:
482 socantrcvmore(pcb
->so
);
485 // Add data to the receive queue then wakeup any reading threads.
486 error
= !sbappendstream(&pcb
->so
->so_rcv
, m
);
492 vsock_unbind_pcb(pcb
, false);
503 vsock_put_message_connecting(struct vsockpcb
*pcb
, enum vsock_operation op
)
505 socket_lock_assert_owned(pcb
->so
);
511 soisconnected(pcb
->so
);
514 pcb
->so
->so_error
= EAGAIN
;
515 error
= vsock_disconnect_pcb(pcb
);
518 vsock_disconnect_pcb(pcb
);
527 vsock_put_message_listening(struct vsockpcb
*pcb
, enum vsock_operation op
, struct vsock_address src
, struct vsock_address dst
)
529 socket_lock_assert_owned(pcb
->so
);
531 struct sockaddr_vm addr
;
532 struct socket
*so2
= NULL
;
533 struct vsockpcb
*pcb2
= NULL
;
539 addr
= (struct sockaddr_vm
) {
540 .svm_len
= sizeof(addr
),
541 .svm_family
= AF_VSOCK
,
543 .svm_port
= pcb
->local_address
.port
,
544 .svm_cid
= pcb
->local_address
.cid
546 so2
= sonewconn(pcb
->so
, 0, (struct sockaddr
*)&addr
);
548 // It is likely that the backlog is full. Deny this request.
549 vsock_pcb_safe_reset_address(pcb
, dst
, src
);
550 error
= ECONNREFUSED
;
554 pcb2
= sotovsockpcb(so2
);
560 error
= vsock_bind_address(pcb2
, dst
, src
);
565 error
= vsock_pcb_respond(pcb2
);
574 soisdisconnected(so2
);
576 vsock_unbind_pcb(pcb2
, false);
578 socket_unlock(so2
, 1);
579 vsock_pcb_reset_address(dst
, src
);
581 socket_unlock(so2
, 0);
583 socket_lock(pcb
->so
, 0);
587 error
= vsock_pcb_safe_reset_address(pcb
, dst
, src
);
590 vsock_pcb_safe_reset_address(pcb
, dst
, src
);
598 /* VSock Transport */
601 vsock_add_transport(struct vsock_transport
*transport
)
603 if (transport
== NULL
|| transport
->provider
== NULL
) {
606 if (!os_atomic_cmpxchg((void * volatile *)&the_vsock_transport
, NULL
, transport
, acq_rel
)) {
613 vsock_remove_transport(struct vsock_transport
*transport
)
615 if (!os_atomic_cmpxchg((void * volatile *)&the_vsock_transport
, transport
, NULL
, acq_rel
)) {
622 vsock_reset_transport(struct vsock_transport
*transport
)
624 if (transport
== NULL
) {
629 struct vsockpcb
*pcb
= NULL
;
630 struct vsockpcb
*tmp_pcb
= NULL
;
632 lck_rw_lock_exclusive(vsockinfo
.bound_lock
);
633 LIST_FOREACH_SAFE(pcb
, &vsockinfo
.bound
, bound
, tmp_pcb
) {
634 // Disconnect this transport's sockets. Listen and bind sockets must stay alive.
635 socket_lock(pcb
->so
, 1);
636 if (pcb
->transport
== transport
&& pcb
->so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
| SS_ISDISCONNECTING
)) {
637 errno_t dc_error
= vsock_disconnect_pcb_locked(pcb
);
638 if (dc_error
&& !error
) {
642 socket_unlock(pcb
->so
, 1);
644 lck_rw_done(vsockinfo
.bound_lock
);
650 vsock_put_message(struct vsock_address src
, struct vsock_address dst
, enum vsock_operation op
, uint32_t buf_alloc
, uint32_t fwd_cnt
, mbuf_t m
)
652 struct vsockpcb
*pcb
= vsock_get_matching_pcb(dst
, src
);
654 if (op
!= VSOCK_RESET
) {
655 vsock_pcb_reset_address(dst
, src
);
663 socket_lock_assert_owned(pcb
->so
);
665 struct socket
*so
= pcb
->so
;
668 // Check if the peer's buffer has changed. Update our view of the peer's forwarded bytes.
669 int buffers_changed
= (pcb
->peer_buf_alloc
!= buf_alloc
) || (pcb
->peer_fwd_cnt
) != fwd_cnt
;
670 pcb
->peer_buf_alloc
= buf_alloc
;
671 pcb
->peer_fwd_cnt
= fwd_cnt
;
673 // Peer's buffer has enough space for the next packet. Notify any threads waiting for space.
674 if (buffers_changed
&& vsock_get_peer_space(pcb
) >= pcb
->waiting_send_size
) {
679 case VSOCK_CREDIT_REQUEST
:
680 error
= vsock_pcb_credit_update(pcb
);
682 case VSOCK_CREDIT_UPDATE
:
685 if (so
->so_state
& SS_ISCONNECTED
) {
686 error
= vsock_put_message_connected(pcb
, op
, m
);
688 } else if (so
->so_state
& SS_ISCONNECTING
) {
689 error
= vsock_put_message_connecting(pcb
, op
);
690 } else if (so
->so_options
& SO_ACCEPTCONN
) {
691 error
= vsock_put_message_listening(pcb
, op
, src
, dst
);
693 // Reset the connection for other states such as 'disconnecting'.
694 error
= vsock_disconnect_pcb(pcb
);
701 socket_unlock(so
, 1);
713 vsock_pcblist SYSCTL_HANDLER_ARGS
715 #pragma unused(oidp,arg2)
719 // Only stream is supported.
720 if ((intptr_t)arg1
!= SOCK_STREAM
) {
724 // Get the generation count and the count of all vsock sockets.
725 lck_rw_lock_shared(vsockinfo
.all_lock
);
726 uint64_t n
= vsockinfo
.all_pcb_count
;
727 vsock_gen_t gen_count
= vsockinfo
.vsock_gencnt
;
728 lck_rw_done(vsockinfo
.all_lock
);
730 const size_t xpcb_len
= sizeof(struct xvsockpcb
);
731 struct xvsockpgen xvg
;
734 * The process of preparing the PCB list is too time-consuming and
735 * resource-intensive to repeat twice on every request.
737 if (req
->oldptr
== USER_ADDR_NULL
) {
738 req
->oldidx
= (size_t)(2 * sizeof(xvg
) + (n
+ n
/ 8) * xpcb_len
);
742 if (req
->newptr
!= USER_ADDR_NULL
) {
746 bzero(&xvg
, sizeof(xvg
));
747 xvg
.xvg_len
= sizeof(xvg
);
749 xvg
.xvg_gen
= gen_count
;
750 xvg
.xvg_sogen
= so_gencnt
;
751 error
= SYSCTL_OUT(req
, &xvg
, sizeof(xvg
));
756 // Return if no sockets exist.
761 lck_rw_lock_shared(vsockinfo
.all_lock
);
764 struct vsockpcb
*pcb
= NULL
;
765 TAILQ_FOREACH(pcb
, &vsockinfo
.all
, all
) {
766 // Bail if there is not enough user buffer for this next socket.
767 if (req
->oldlen
- req
->oldidx
- sizeof(xvg
) < xpcb_len
) {
771 // Populate the socket structure.
772 socket_lock(pcb
->so
, 1);
773 if (pcb
->vsock_gencnt
<= gen_count
) {
774 struct xvsockpcb xpcb
;
775 bzero(&xpcb
, xpcb_len
);
776 xpcb
.xv_len
= xpcb_len
;
777 xpcb
.xv_vsockpp
= (uint64_t)VM_KERNEL_ADDRHASH(pcb
);
778 xpcb
.xvp_local_cid
= pcb
->local_address
.cid
;
779 xpcb
.xvp_local_port
= pcb
->local_address
.port
;
780 xpcb
.xvp_remote_cid
= pcb
->remote_address
.cid
;
781 xpcb
.xvp_remote_port
= pcb
->remote_address
.port
;
782 xpcb
.xvp_rxcnt
= pcb
->fwd_cnt
;
783 xpcb
.xvp_txcnt
= pcb
->tx_cnt
;
784 xpcb
.xvp_peer_rxhiwat
= pcb
->peer_buf_alloc
;
785 xpcb
.xvp_peer_rxcnt
= pcb
->peer_fwd_cnt
;
786 xpcb
.xvp_last_pid
= pcb
->so
->last_pid
;
787 xpcb
.xvp_gencnt
= pcb
->vsock_gencnt
;
789 sotoxsocket(pcb
->so
, &xpcb
.xv_socket
);
791 socket_unlock(pcb
->so
, 1);
793 error
= SYSCTL_OUT(req
, &xpcb
, xpcb_len
);
799 socket_unlock(pcb
->so
, 1);
803 // Update the generation count to match the sockets being returned.
804 gen_count
= vsockinfo
.vsock_gencnt
;
806 lck_rw_done(vsockinfo
.all_lock
);
810 * Give the user an updated idea of our state.
811 * If the generation differs from what we told
812 * her before, she knows that something happened
813 * while we were processing this request, and it
814 * might be necessary to retry.
816 bzero(&xvg
, sizeof(xvg
));
817 xvg
.xvg_len
= sizeof(xvg
);
819 xvg
.xvg_gen
= gen_count
;
820 xvg
.xvg_sogen
= so_gencnt
;
821 error
= SYSCTL_OUT(req
, &xvg
, sizeof(xvg
));
828 SYSCTL_NODE(_net
, OID_AUTO
, vsock
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "vsock");
829 SYSCTL_UINT(_net_vsock
, OID_AUTO
, sendspace
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
830 &vsock_sendspace
, 0, "Maximum outgoing vsock datagram size");
831 SYSCTL_UINT(_net_vsock
, OID_AUTO
, recvspace
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
832 &vsock_recvspace
, 0, "Maximum incoming vsock datagram size");
833 SYSCTL_PROC(_net_vsock
, OID_AUTO
, pcblist
,
834 CTLTYPE_STRUCT
| CTLFLAG_RD
| CTLFLAG_LOCKED
,
835 (caddr_t
)(long)SOCK_STREAM
, 0, vsock_pcblist
, "S,xvsockpcb",
836 "List of active vsock sockets");
842 vsock_attach(struct socket
*so
, int proto
, struct proc
*p
)
844 #pragma unused(proto, p)
846 // Attach should only be run once per socket.
847 struct vsockpcb
*pcb
= sotovsockpcb(so
);
852 // Get the transport for this socket.
853 struct vsock_transport
*transport
= os_atomic_load(&the_vsock_transport
, relaxed
);
854 if (transport
== NULL
) {
858 // Reserve send and receive buffers.
859 errno_t error
= soreserve(so
, vsock_sendspace
, vsock_recvspace
);
864 // Initialize the vsock protocol control block.
865 pcb
= zalloc(vsockpcb_zone
);
869 bzero(pcb
, sizeof(*pcb
));
871 pcb
->transport
= transport
;
872 pcb
->local_address
= (struct vsock_address
) {
873 .cid
= VMADDR_CID_ANY
,
874 .port
= VMADDR_PORT_ANY
876 pcb
->remote_address
= (struct vsock_address
) {
877 .cid
= VMADDR_CID_ANY
,
878 .port
= VMADDR_PORT_ANY
882 // Tell the transport that this socket has attached.
883 error
= transport
->attach_socket(transport
->provider
);
888 // Add to the list of all vsock sockets.
889 lck_rw_lock_exclusive(vsockinfo
.all_lock
);
890 TAILQ_INSERT_TAIL(&vsockinfo
.all
, pcb
, all
);
891 vsockinfo
.all_pcb_count
++;
892 pcb
->vsock_gencnt
= ++vsockinfo
.vsock_gencnt
;
893 lck_rw_done(vsockinfo
.all_lock
);
899 vsock_control(struct socket
*so
, u_long cmd
, caddr_t data
, struct ifnet
*ifp
, struct proc
*p
)
903 VERIFY(so
!= NULL
|| p
== kernproc
);
905 if (cmd
!= IOCTL_VM_SOCKETS_GET_LOCAL_CID
) {
909 struct vsock_transport
*transport
;
911 struct vsockpcb
*pcb
= sotovsockpcb(so
);
915 transport
= pcb
->transport
;
917 transport
= os_atomic_load(&the_vsock_transport
, relaxed
);
920 if (transport
== NULL
) {
924 uint32_t transport_cid
;
925 errno_t error
= transport
->get_cid(transport
->provider
, &transport_cid
);
930 memcpy(data
, &transport_cid
, sizeof(transport_cid
));
936 vsock_detach(struct socket
*so
)
938 struct vsockpcb
*pcb
= sotovsockpcb(so
);
943 vsock_unbind_pcb(pcb
, false);
945 // Tell the transport that this socket has detached.
946 struct vsock_transport
*transport
= pcb
->transport
;
947 errno_t error
= transport
->detach_socket(transport
->provider
);
952 // Remove from the list of all vsock sockets.
953 lck_rw_lock_exclusive(vsockinfo
.all_lock
);
954 TAILQ_REMOVE(&vsockinfo
.all
, pcb
, all
);
955 pcb
->all
.tqe_next
= NULL
;
956 pcb
->all
.tqe_prev
= NULL
;
957 vsockinfo
.all_pcb_count
--;
958 vsockinfo
.vsock_gencnt
++;
959 lck_rw_done(vsockinfo
.all_lock
);
961 // Deallocate any resources.
962 zfree(vsockpcb_zone
, pcb
);
964 so
->so_flags
|= SOF_PCBCLEARING
;
971 vsock_abort(struct socket
*so
)
973 soisdisconnected(so
);
974 return vsock_detach(so
);
978 vsock_bind(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
982 struct vsockpcb
*pcb
= sotovsockpcb(so
);
987 struct sockaddr_vm
*addr
= (struct sockaddr_vm
*)nam
;
989 errno_t error
= vsock_sockaddr_vm_validate(pcb
, addr
);
994 struct vsock_address laddr
= (struct vsock_address
) {
995 .cid
= addr
->svm_cid
,
996 .port
= addr
->svm_port
,
999 struct vsock_address raddr
= (struct vsock_address
) {
1000 .cid
= VMADDR_CID_ANY
,
1001 .port
= VMADDR_PORT_ANY
,
1004 error
= vsock_bind_address(pcb
, laddr
, raddr
);
1013 vsock_listen(struct socket
*so
, struct proc
*p
)
1017 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1022 // Only stream is supported currently.
1023 if (so
->so_type
!= SOCK_STREAM
) {
1024 return EAFNOSUPPORT
;
1027 struct vsock_address
*addr
= &pcb
->local_address
;
1029 if (addr
->port
== VMADDR_CID_ANY
) {
1033 struct vsock_transport
*transport
= pcb
->transport
;
1034 uint32_t transport_cid
;
1035 errno_t error
= transport
->get_cid(transport
->provider
, &transport_cid
);
1040 // Can listen on the transport's cid or any.
1041 if (addr
->cid
!= transport_cid
&& addr
->cid
!= VMADDR_CID_ANY
) {
1049 vsock_accept(struct socket
*so
, struct sockaddr
**nam
)
1051 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1056 // Do not accept disconnected sockets.
1057 if (so
->so_state
& SS_ISDISCONNECTED
) {
1058 return ECONNABORTED
;
1061 *nam
= vsock_new_sockaddr(&pcb
->remote_address
);
1067 vsock_connect(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
1071 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1076 struct sockaddr_vm
*addr
= (struct sockaddr_vm
*)nam
;
1078 errno_t error
= vsock_sockaddr_vm_validate(pcb
, addr
);
1083 uint32_t transport_cid
;
1084 struct vsock_transport
*transport
= pcb
->transport
;
1085 error
= transport
->get_cid(transport
->provider
, &transport_cid
);
1090 // Only supporting connections to the host, hypervisor, or self for now.
1091 if (addr
->svm_cid
!= VMADDR_CID_HOST
&&
1092 addr
->svm_cid
!= VMADDR_CID_HYPERVISOR
&&
1093 addr
->svm_cid
!= transport_cid
) {
1099 // Set the remote and local address.
1100 struct vsock_address remote_addr
= (struct vsock_address
) {
1101 .cid
= addr
->svm_cid
,
1102 .port
= addr
->svm_port
,
1105 struct vsock_address local_addr
= (struct vsock_address
) {
1106 .cid
= transport_cid
,
1107 .port
= VMADDR_PORT_ANY
,
1110 // Bind to the address.
1111 error
= vsock_bind_address(pcb
, local_addr
, remote_addr
);
1116 // Attempt a connection using the socket's transport.
1117 error
= vsock_pcb_connect(pcb
);
1122 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1123 // Don't wait for peer's response if non-blocking.
1124 if (so
->so_state
& SS_NBIO
) {
1125 error
= EINPROGRESS
;
1129 struct timespec ts
= (struct timespec
) {
1130 .tv_sec
= so
->so_snd
.sb_timeo
.tv_sec
,
1131 .tv_nsec
= so
->so_snd
.sb_timeo
.tv_usec
* 1000,
1134 lck_mtx_t
*mutex_held
;
1135 if (so
->so_proto
->pr_getlock
!= NULL
) {
1136 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1138 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1141 // Wait until we receive a response to the connect request.
1142 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
, PSOCK
| PCATCH
, "vsock_connect", &ts
);
1144 if (error
== EAGAIN
) {
1152 if (so
->so_error
&& !error
) {
1153 error
= so
->so_error
;
1157 error
= !(so
->so_state
& SS_ISCONNECTED
);
1160 vsock_unbind_pcb(pcb
, false);
1168 vsock_disconnect(struct socket
*so
)
1170 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1175 return vsock_disconnect_pcb(pcb
);
1179 vsock_sockaddr(struct socket
*so
, struct sockaddr
**nam
)
1181 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1186 *nam
= vsock_new_sockaddr(&pcb
->local_address
);
1192 vsock_peeraddr(struct socket
*so
, struct sockaddr
**nam
)
1194 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1199 *nam
= vsock_new_sockaddr(&pcb
->remote_address
);
1205 vsock_send(struct socket
*so
, int flags
, struct mbuf
*m
, struct sockaddr
*nam
, struct mbuf
*control
, proc_t p
)
1207 #pragma unused(flags, nam, p)
1209 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1210 if (pcb
== NULL
|| m
== NULL
) {
1214 if (control
!= NULL
) {
1219 // Ensure this socket is connected.
1220 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1229 const size_t len
= mbuf_pkthdr_len(m
);
1230 uint32_t free_space
= vsock_get_peer_space(pcb
);
1232 // Ensure the peer has enough space in their receive buffer.
1233 while (len
> free_space
) {
1234 // Record the number of free peer bytes necessary before we can send.
1235 if (len
> pcb
->waiting_send_size
) {
1236 pcb
->waiting_send_size
= len
;
1239 // Send a credit request.
1240 error
= vsock_pcb_credit_request(pcb
);
1248 // Check again in case free space was automatically updated in loopback case.
1249 free_space
= vsock_get_peer_space(pcb
);
1250 if (len
<= free_space
) {
1251 pcb
->waiting_send_size
= 0;
1255 // Bail if this is a non-blocking socket.
1256 if (so
->so_state
& SS_NBIO
) {
1263 // Wait until our peer has enough free space in their receive buffer.
1264 error
= sbwait(&so
->so_snd
);
1265 pcb
->waiting_send_size
= 0;
1273 // Bail if an error occured or we can't send more.
1274 if (so
->so_state
& SS_CANTSENDMORE
) {
1279 } else if (so
->so_error
) {
1280 error
= so
->so_error
;
1288 free_space
= vsock_get_peer_space(pcb
);
1291 // Send a payload over the transport.
1292 error
= vsock_pcb_send(pcb
, m
);
1303 vsock_shutdown(struct socket
*so
)
1305 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1312 // Tell peer we will no longer send.
1313 errno_t error
= vsock_pcb_shutdown_send(pcb
);
1322 vsock_soreceive(struct socket
*so
, struct sockaddr
**psa
, struct uio
*uio
,
1323 struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
1325 struct vsockpcb
*pcb
= sotovsockpcb(so
);
1330 user_ssize_t length
= uio_resid(uio
);
1331 int result
= soreceive(so
, psa
, uio
, mp0
, controlp
, flagsp
);
1332 length
-= uio_resid(uio
);
1336 pcb
->fwd_cnt
+= length
;
1338 const uint32_t threshold
= VSOCK_MAX_PACKET_SIZE
;
1340 // Send a credit update if is possible that the peer will no longer send.
1341 if ((pcb
->fwd_cnt
- pcb
->last_fwd_cnt
+ threshold
) >= pcb
->last_buf_alloc
) {
1342 errno_t error
= vsock_pcb_credit_update(pcb
);
1343 if (!result
&& error
) {
1348 socket_unlock(so
, 1);
1353 static struct pr_usrreqs vsock_usrreqs
= {
1354 .pru_abort
= vsock_abort
,
1355 .pru_attach
= vsock_attach
,
1356 .pru_control
= vsock_control
,
1357 .pru_detach
= vsock_detach
,
1358 .pru_bind
= vsock_bind
,
1359 .pru_listen
= vsock_listen
,
1360 .pru_accept
= vsock_accept
,
1361 .pru_connect
= vsock_connect
,
1362 .pru_disconnect
= vsock_disconnect
,
1363 .pru_send
= vsock_send
,
1364 .pru_shutdown
= vsock_shutdown
,
1365 .pru_sockaddr
= vsock_sockaddr
,
1366 .pru_peeraddr
= vsock_peeraddr
,
1367 .pru_sosend
= sosend
,
1368 .pru_soreceive
= vsock_soreceive
,
1372 vsock_init(struct protosw
*pp
, struct domain
*dp
)
1376 static int vsock_initialized
= 0;
1377 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
| PR_ATTACHED
)) == PR_ATTACHED
);
1378 if (!os_atomic_cmpxchg((volatile int *)&vsock_initialized
, 0, 1, acq_rel
)) {
1382 // Setup VSock protocol info struct.
1383 vsockinfo
.vsock_lock_grp_attr
= lck_grp_attr_alloc_init();
1384 vsockinfo
.vsock_lock_grp
= lck_grp_alloc_init("vsock", vsockinfo
.vsock_lock_grp_attr
);
1385 vsockinfo
.vsock_lock_attr
= lck_attr_alloc_init();
1386 if ((vsockinfo
.all_lock
= lck_rw_alloc_init(vsockinfo
.vsock_lock_grp
, vsockinfo
.vsock_lock_attr
)) == NULL
||
1387 (vsockinfo
.bound_lock
= lck_rw_alloc_init(vsockinfo
.vsock_lock_grp
, vsockinfo
.vsock_lock_attr
)) == NULL
) {
1388 panic("%s: unable to allocate PCB lock\n", __func__
);
1391 lck_mtx_init(&vsockinfo
.port_lock
, vsockinfo
.vsock_lock_grp
, vsockinfo
.vsock_lock_attr
);
1392 TAILQ_INIT(&vsockinfo
.all
);
1393 LIST_INIT(&vsockinfo
.bound
);
1394 vsockinfo
.last_port
= VMADDR_PORT_ANY
;
1397 static struct protosw vsocksw
[] = {
1399 .pr_type
= SOCK_STREAM
,
1401 .pr_flags
= PR_CONNREQUIRED
| PR_WANTRCVD
,
1402 .pr_init
= vsock_init
,
1403 .pr_usrreqs
= &vsock_usrreqs
,
1407 static const int vsock_proto_count
= (sizeof(vsocksw
) / sizeof(struct protosw
));
1411 static struct domain
*vsock_domain
= NULL
;
1414 vsock_dinit(struct domain
*dp
)
1416 // The VSock domain is initialized with a singleton pattern.
1417 VERIFY(!(dp
->dom_flags
& DOM_INITIALIZED
));
1418 VERIFY(vsock_domain
== NULL
);
1421 // Add protocols and initialize.
1422 for (int i
= 0; i
< vsock_proto_count
; i
++) {
1423 net_add_proto((struct protosw
*)&vsocksw
[i
], dp
, 1);
1427 struct domain vsockdomain_s
= {
1428 .dom_family
= PF_VSOCK
,
1429 .dom_name
= "vsock",
1430 .dom_init
= vsock_dinit
,
1431 .dom_maxrtkey
= sizeof(struct sockaddr_vm
),
1432 .dom_protohdrlen
= sizeof(struct sockaddr_vm
),