]>
Commit | Line | Data |
---|---|---|
f427ee49 A |
1 | /* |
2 | * Copyright (c) 2020 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/domain.h> | |
30 | #include <sys/socket.h> | |
31 | #include <sys/protosw.h> | |
32 | #include <sys/mcache.h> | |
33 | #include <sys/systm.h> | |
34 | #include <sys/sysctl.h> | |
35 | #include <sys/random.h> | |
36 | #include <sys/mbuf.h> | |
37 | #include <sys/vsock_domain.h> | |
38 | #include <sys/vsock_transport.h> | |
39 | #include <kern/task.h> | |
40 | #include <kern/zalloc.h> | |
41 | #include <kern/locks.h> | |
42 | #include <machine/atomic.h> | |
43 | ||
44 | #define sotovsockpcb(so) ((struct vsockpcb *)(so)->so_pcb) | |
45 | ||
46 | #define VSOCK_PORT_RESERVED 1024 | |
47 | ||
48 | /* VSock Protocol Globals */ | |
49 | ||
50 | static struct vsock_transport * _Atomic the_vsock_transport = NULL; | |
51 | static ZONE_DECLARE(vsockpcb_zone, "vsockpcbzone", | |
52 | sizeof(struct vsockpcb), ZC_NONE); | |
c3c9b80d | 53 | static LCK_GRP_DECLARE(vsock_lock_grp, "vsock"); |
f427ee49 A |
54 | static struct vsockpcbinfo vsockinfo; |
55 | ||
56 | static uint32_t vsock_sendspace = VSOCK_MAX_PACKET_SIZE * 8; | |
57 | static uint32_t vsock_recvspace = VSOCK_MAX_PACKET_SIZE * 8; | |
58 | ||
59 | /* VSock PCB Helpers */ | |
60 | ||
61 | static uint32_t | |
62 | vsock_get_peer_space(struct vsockpcb *pcb) | |
63 | { | |
64 | return pcb->peer_buf_alloc - (pcb->tx_cnt - pcb->peer_fwd_cnt); | |
65 | } | |
66 | ||
67 | static struct vsockpcb * | |
68 | vsock_get_matching_pcb(struct vsock_address src, struct vsock_address dst) | |
69 | { | |
70 | struct vsockpcb *preferred = NULL; | |
71 | struct vsockpcb *match = NULL; | |
72 | struct vsockpcb *pcb = NULL; | |
73 | ||
c3c9b80d | 74 | lck_rw_lock_shared(&vsockinfo.bound_lock); |
f427ee49 A |
75 | LIST_FOREACH(pcb, &vsockinfo.bound, bound) { |
76 | // Source cid and port must match. Only destination port must match. (Allows for a changing CID during migration) | |
77 | socket_lock(pcb->so, 1); | |
78 | if ((pcb->so->so_state & SS_ISCONNECTED || pcb->so->so_state & SS_ISCONNECTING) && | |
79 | pcb->local_address.cid == src.cid && pcb->local_address.port == src.port && | |
80 | pcb->remote_address.port == dst.port) { | |
81 | preferred = pcb; | |
82 | break; | |
83 | } else if ((pcb->local_address.cid == src.cid || pcb->local_address.cid == VMADDR_CID_ANY) && | |
84 | pcb->local_address.port == src.port) { | |
85 | match = pcb; | |
86 | } | |
87 | socket_unlock(pcb->so, 1); | |
88 | } | |
89 | if (!preferred && match) { | |
90 | socket_lock(match->so, 1); | |
91 | preferred = match; | |
92 | } | |
c3c9b80d | 93 | lck_rw_done(&vsockinfo.bound_lock); |
f427ee49 A |
94 | |
95 | return preferred; | |
96 | } | |
97 | ||
98 | static errno_t | |
99 | vsock_bind_address_if_free(struct vsockpcb *pcb, uint32_t local_cid, uint32_t local_port, uint32_t remote_cid, uint32_t remote_port) | |
100 | { | |
101 | socket_lock_assert_owned(pcb->so); | |
102 | ||
103 | // Privileged ports. | |
104 | if (local_port != VMADDR_PORT_ANY && local_port < VSOCK_PORT_RESERVED && | |
105 | current_task() != kernel_task && proc_suser(current_proc()) != 0) { | |
106 | return EACCES; | |
107 | } | |
108 | ||
109 | bool taken = false; | |
110 | const bool check_remote = (remote_cid != VMADDR_CID_ANY && remote_port != VMADDR_PORT_ANY); | |
111 | ||
112 | struct vsockpcb *pcb_match = NULL; | |
113 | ||
114 | socket_unlock(pcb->so, 0); | |
c3c9b80d | 115 | lck_rw_lock_exclusive(&vsockinfo.bound_lock); |
f427ee49 A |
116 | LIST_FOREACH(pcb_match, &vsockinfo.bound, bound) { |
117 | socket_lock(pcb_match->so, 1); | |
118 | if (pcb == pcb_match || | |
119 | (!check_remote && pcb_match->local_address.port == local_port) || | |
120 | (check_remote && pcb_match->local_address.port == local_port && | |
121 | pcb_match->remote_address.cid == remote_cid && pcb_match->remote_address.port == remote_port)) { | |
122 | socket_unlock(pcb_match->so, 1); | |
123 | taken = true; | |
124 | break; | |
125 | } | |
126 | socket_unlock(pcb_match->so, 1); | |
127 | } | |
128 | socket_lock(pcb->so, 0); | |
129 | if (!taken) { | |
130 | pcb->local_address = (struct vsock_address) { .cid = local_cid, .port = local_port }; | |
131 | pcb->remote_address = (struct vsock_address) { .cid = remote_cid, .port = remote_port }; | |
132 | LIST_INSERT_HEAD(&vsockinfo.bound, pcb, bound); | |
133 | } | |
c3c9b80d | 134 | lck_rw_done(&vsockinfo.bound_lock); |
f427ee49 A |
135 | |
136 | return taken ? EADDRINUSE : 0; | |
137 | } | |
138 | ||
139 | static errno_t | |
140 | vsock_bind_address(struct vsockpcb *pcb, struct vsock_address laddr, struct vsock_address raddr) | |
141 | { | |
142 | if (!pcb) { | |
143 | return EINVAL; | |
144 | } | |
145 | ||
146 | socket_lock_assert_owned(pcb->so); | |
147 | ||
148 | // Certain CIDs are reserved. | |
149 | if (laddr.cid == VMADDR_CID_HYPERVISOR || laddr.cid == VMADDR_CID_RESERVED || laddr.cid == VMADDR_CID_HOST) { | |
150 | return EADDRNOTAVAIL; | |
151 | } | |
152 | ||
153 | // Remote address must be fully specified or not specified at all. | |
154 | if ((raddr.cid == VMADDR_CID_ANY) ^ (raddr.port == VMADDR_PORT_ANY)) { | |
155 | return EINVAL; | |
156 | } | |
157 | ||
158 | // Cannot bind if already bound. | |
159 | if (pcb->local_address.port != VMADDR_PORT_ANY) { | |
160 | return EINVAL; | |
161 | } | |
162 | ||
163 | uint32_t transport_cid; | |
164 | struct vsock_transport *transport = pcb->transport; | |
165 | errno_t error = transport->get_cid(transport->provider, &transport_cid); | |
166 | if (error) { | |
167 | return error; | |
168 | } | |
169 | ||
170 | // Local CID must be this transport's CID or any. | |
171 | if (laddr.cid != transport_cid && laddr.cid != VMADDR_CID_ANY) { | |
172 | return EINVAL; | |
173 | } | |
174 | ||
175 | if (laddr.port != VMADDR_PORT_ANY) { | |
176 | error = vsock_bind_address_if_free(pcb, laddr.cid, laddr.port, raddr.cid, raddr.port); | |
177 | } else { | |
178 | lck_mtx_lock(&vsockinfo.port_lock); | |
179 | ||
180 | const uint32_t first = VSOCK_PORT_RESERVED; | |
181 | const uint32_t last = VMADDR_PORT_ANY - 1; | |
182 | uint32_t count = last - first + 1; | |
183 | uint32_t *last_port = &vsockinfo.last_port; | |
184 | ||
185 | if (pcb->so->so_flags & SOF_BINDRANDOMPORT) { | |
186 | uint32_t random = 0; | |
187 | read_frandom(&random, sizeof(random)); | |
188 | *last_port = first + (random % count); | |
189 | } | |
190 | ||
191 | do { | |
192 | if (count == 0) { | |
193 | lck_mtx_unlock(&vsockinfo.port_lock); | |
194 | return EADDRNOTAVAIL; | |
195 | } | |
196 | count--; | |
197 | ||
198 | ++*last_port; | |
199 | if (*last_port < first || *last_port > last) { | |
200 | *last_port = first; | |
201 | } | |
202 | ||
203 | error = vsock_bind_address_if_free(pcb, laddr.cid, *last_port, raddr.cid, raddr.port); | |
204 | } while (error); | |
205 | ||
206 | lck_mtx_unlock(&vsockinfo.port_lock); | |
207 | } | |
208 | ||
209 | return error; | |
210 | } | |
211 | ||
212 | static void | |
213 | vsock_unbind_pcb(struct vsockpcb *pcb, bool is_locked) | |
214 | { | |
215 | if (!pcb) { | |
216 | return; | |
217 | } | |
218 | ||
219 | socket_lock_assert_owned(pcb->so); | |
220 | ||
221 | soisdisconnected(pcb->so); | |
222 | ||
223 | if (!pcb->bound.le_prev) { | |
224 | return; | |
225 | } | |
226 | ||
227 | if (!is_locked) { | |
228 | socket_unlock(pcb->so, 0); | |
c3c9b80d | 229 | lck_rw_lock_exclusive(&vsockinfo.bound_lock); |
f427ee49 A |
230 | socket_lock(pcb->so, 0); |
231 | if (!pcb->bound.le_prev) { | |
c3c9b80d | 232 | lck_rw_done(&vsockinfo.bound_lock); |
f427ee49 A |
233 | return; |
234 | } | |
235 | } | |
236 | ||
237 | LIST_REMOVE(pcb, bound); | |
238 | pcb->bound.le_next = NULL; | |
239 | pcb->bound.le_prev = NULL; | |
240 | ||
241 | if (!is_locked) { | |
c3c9b80d | 242 | lck_rw_done(&vsockinfo.bound_lock); |
f427ee49 A |
243 | } |
244 | } | |
245 | ||
246 | static struct sockaddr * | |
247 | vsock_new_sockaddr(struct vsock_address *address) | |
248 | { | |
249 | if (!address) { | |
250 | return NULL; | |
251 | } | |
252 | ||
253 | struct sockaddr_vm *addr; | |
c3c9b80d A |
254 | MALLOC(addr, struct sockaddr_vm *, sizeof(*addr), M_SONAME, |
255 | M_WAITOK | M_ZERO); | |
f427ee49 A |
256 | if (!addr) { |
257 | return NULL; | |
258 | } | |
259 | ||
f427ee49 A |
260 | addr->svm_len = sizeof(*addr); |
261 | addr->svm_family = AF_VSOCK; | |
262 | addr->svm_port = address->port; | |
263 | addr->svm_cid = address->cid; | |
264 | ||
265 | return (struct sockaddr *)addr; | |
266 | } | |
267 | ||
268 | static errno_t | |
269 | vsock_pcb_send_message(struct vsockpcb *pcb, enum vsock_operation operation, mbuf_t m) | |
270 | { | |
271 | if (!pcb) { | |
272 | if (m != NULL) { | |
273 | mbuf_freem_list(m); | |
274 | } | |
275 | return EINVAL; | |
276 | } | |
277 | ||
278 | socket_lock_assert_owned(pcb->so); | |
279 | ||
280 | errno_t error; | |
281 | ||
282 | struct vsock_address dst = pcb->remote_address; | |
283 | if (dst.cid == VMADDR_CID_ANY || dst.port == VMADDR_PORT_ANY) { | |
284 | if (m != NULL) { | |
285 | mbuf_freem_list(m); | |
286 | } | |
287 | return EINVAL; | |
288 | } | |
289 | ||
290 | struct vsock_address src = pcb->local_address; | |
291 | if (src.cid == VMADDR_CID_ANY) { | |
292 | uint32_t transport_cid; | |
293 | struct vsock_transport *transport = pcb->transport; | |
294 | error = transport->get_cid(transport->provider, &transport_cid); | |
295 | if (error) { | |
296 | if (m != NULL) { | |
297 | mbuf_freem_list(m); | |
298 | } | |
299 | return error; | |
300 | } | |
301 | src.cid = transport_cid; | |
302 | } | |
303 | ||
304 | uint32_t buf_alloc = pcb->so->so_rcv.sb_hiwat; | |
305 | uint32_t fwd_cnt = pcb->fwd_cnt; | |
306 | ||
307 | if (src.cid == dst.cid) { | |
308 | pcb->last_buf_alloc = buf_alloc; | |
309 | pcb->last_fwd_cnt = fwd_cnt; | |
310 | ||
311 | socket_unlock(pcb->so, 0); | |
312 | error = vsock_put_message(src, dst, operation, buf_alloc, fwd_cnt, m); | |
313 | socket_lock(pcb->so, 0); | |
314 | } else { | |
315 | struct vsock_transport *transport = pcb->transport; | |
316 | error = transport->put_message(transport->provider, src, dst, operation, buf_alloc, fwd_cnt, m); | |
317 | ||
318 | if (!error) { | |
319 | pcb->last_buf_alloc = buf_alloc; | |
320 | pcb->last_fwd_cnt = fwd_cnt; | |
321 | } | |
322 | } | |
323 | ||
324 | return error; | |
325 | } | |
326 | ||
327 | static errno_t | |
328 | vsock_pcb_reset_address(struct vsock_address src, struct vsock_address dst) | |
329 | { | |
330 | if (dst.cid == VMADDR_CID_ANY || dst.port == VMADDR_PORT_ANY) { | |
331 | return EINVAL; | |
332 | } | |
333 | ||
334 | errno_t error; | |
335 | struct vsock_transport *transport = NULL; | |
336 | ||
337 | if (src.cid == VMADDR_CID_ANY) { | |
338 | transport = os_atomic_load(&the_vsock_transport, relaxed); | |
339 | if (transport == NULL) { | |
340 | return ENODEV; | |
341 | } | |
342 | ||
343 | uint32_t transport_cid; | |
344 | error = transport->get_cid(transport->provider, &transport_cid); | |
345 | if (error) { | |
346 | return error; | |
347 | } | |
348 | src.cid = transport_cid; | |
349 | } | |
350 | ||
351 | if (src.cid == dst.cid) { | |
352 | error = vsock_put_message(src, dst, VSOCK_RESET, 0, 0, NULL); | |
353 | } else { | |
354 | if (!transport) { | |
355 | transport = os_atomic_load(&the_vsock_transport, relaxed); | |
356 | if (transport == NULL) { | |
357 | return ENODEV; | |
358 | } | |
359 | } | |
360 | error = transport->put_message(transport->provider, src, dst, VSOCK_RESET, 0, 0, NULL); | |
361 | } | |
362 | ||
363 | return error; | |
364 | } | |
365 | ||
366 | static errno_t | |
367 | vsock_pcb_safe_reset_address(struct vsockpcb *pcb, struct vsock_address src, struct vsock_address dst) | |
368 | { | |
369 | if (pcb) { | |
370 | socket_lock_assert_owned(pcb->so); | |
371 | socket_unlock(pcb->so, 0); | |
372 | } | |
373 | errno_t error = vsock_pcb_reset_address(src, dst); | |
374 | if (pcb) { | |
375 | socket_lock(pcb->so, 0); | |
376 | } | |
377 | return error; | |
378 | } | |
379 | ||
380 | static errno_t | |
381 | vsock_pcb_connect(struct vsockpcb *pcb) | |
382 | { | |
383 | return vsock_pcb_send_message(pcb, VSOCK_REQUEST, NULL); | |
384 | } | |
385 | ||
386 | static errno_t | |
387 | vsock_pcb_respond(struct vsockpcb *pcb) | |
388 | { | |
389 | return vsock_pcb_send_message(pcb, VSOCK_RESPONSE, NULL); | |
390 | } | |
391 | ||
392 | static errno_t | |
393 | vsock_pcb_send(struct vsockpcb *pcb, mbuf_t m) | |
394 | { | |
395 | return vsock_pcb_send_message(pcb, VSOCK_PAYLOAD, m); | |
396 | } | |
397 | ||
398 | static errno_t | |
399 | vsock_pcb_shutdown_send(struct vsockpcb *pcb) | |
400 | { | |
401 | return vsock_pcb_send_message(pcb, VSOCK_SHUTDOWN_SEND, NULL); | |
402 | } | |
403 | ||
404 | static errno_t | |
405 | vsock_pcb_reset(struct vsockpcb *pcb) | |
406 | { | |
407 | return vsock_pcb_send_message(pcb, VSOCK_RESET, NULL); | |
408 | } | |
409 | ||
410 | static errno_t | |
411 | vsock_pcb_credit_update(struct vsockpcb *pcb) | |
412 | { | |
413 | return vsock_pcb_send_message(pcb, VSOCK_CREDIT_UPDATE, NULL); | |
414 | } | |
415 | ||
416 | static errno_t | |
417 | vsock_pcb_credit_request(struct vsockpcb *pcb) | |
418 | { | |
419 | return vsock_pcb_send_message(pcb, VSOCK_CREDIT_REQUEST, NULL); | |
420 | } | |
421 | ||
422 | static errno_t | |
423 | vsock_disconnect_pcb_common(struct vsockpcb *pcb, bool is_locked) | |
424 | { | |
425 | socket_lock_assert_owned(pcb->so); | |
426 | vsock_unbind_pcb(pcb, is_locked); | |
427 | return vsock_pcb_reset(pcb); | |
428 | } | |
429 | ||
430 | static errno_t | |
431 | vsock_disconnect_pcb_locked(struct vsockpcb *pcb) | |
432 | { | |
433 | return vsock_disconnect_pcb_common(pcb, true); | |
434 | } | |
435 | ||
436 | static errno_t | |
437 | vsock_disconnect_pcb(struct vsockpcb *pcb) | |
438 | { | |
439 | return vsock_disconnect_pcb_common(pcb, false); | |
440 | } | |
441 | ||
442 | static errno_t | |
443 | vsock_sockaddr_vm_validate(struct vsockpcb *pcb, struct sockaddr_vm *addr) | |
444 | { | |
445 | if (!pcb || !pcb->so || !addr) { | |
446 | return EINVAL; | |
447 | } | |
448 | ||
449 | // Validate address length. | |
450 | if (addr->svm_len < sizeof(struct sockaddr_vm)) { | |
451 | return EINVAL; | |
452 | } | |
453 | ||
454 | // Validate address family. | |
455 | if (addr->svm_family != AF_UNSPEC && addr->svm_family != AF_VSOCK) { | |
456 | return EAFNOSUPPORT; | |
457 | } | |
458 | ||
459 | // Only stream is supported currently. | |
460 | if (pcb->so->so_type != SOCK_STREAM) { | |
461 | return EAFNOSUPPORT; | |
462 | } | |
463 | ||
464 | return 0; | |
465 | } | |
466 | /* VSock Receive Handlers */ | |
467 | ||
468 | static errno_t | |
469 | vsock_put_message_connected(struct vsockpcb *pcb, enum vsock_operation op, mbuf_t m) | |
470 | { | |
471 | socket_lock_assert_owned(pcb->so); | |
472 | ||
473 | errno_t error = 0; | |
474 | ||
475 | switch (op) { | |
476 | case VSOCK_SHUTDOWN: | |
477 | error = vsock_disconnect_pcb(pcb); | |
478 | break; | |
479 | case VSOCK_SHUTDOWN_RECEIVE: | |
480 | socantsendmore(pcb->so); | |
481 | break; | |
482 | case VSOCK_SHUTDOWN_SEND: | |
483 | socantrcvmore(pcb->so); | |
484 | break; | |
485 | case VSOCK_PAYLOAD: | |
486 | // Add data to the receive queue then wakeup any reading threads. | |
487 | error = !sbappendstream(&pcb->so->so_rcv, m); | |
488 | if (!error) { | |
489 | sorwakeup(pcb->so); | |
490 | } | |
491 | break; | |
492 | case VSOCK_RESET: | |
493 | vsock_unbind_pcb(pcb, false); | |
494 | break; | |
495 | default: | |
496 | error = ENOTSUP; | |
497 | break; | |
498 | } | |
499 | ||
500 | return error; | |
501 | } | |
502 | ||
503 | static errno_t | |
504 | vsock_put_message_connecting(struct vsockpcb *pcb, enum vsock_operation op) | |
505 | { | |
506 | socket_lock_assert_owned(pcb->so); | |
507 | ||
508 | errno_t error = 0; | |
509 | ||
510 | switch (op) { | |
511 | case VSOCK_RESPONSE: | |
512 | soisconnected(pcb->so); | |
513 | break; | |
514 | case VSOCK_RESET: | |
515 | pcb->so->so_error = EAGAIN; | |
516 | error = vsock_disconnect_pcb(pcb); | |
517 | break; | |
518 | default: | |
519 | vsock_disconnect_pcb(pcb); | |
520 | error = ENOTSUP; | |
521 | break; | |
522 | } | |
523 | ||
524 | return error; | |
525 | } | |
526 | ||
527 | static errno_t | |
528 | vsock_put_message_listening(struct vsockpcb *pcb, enum vsock_operation op, struct vsock_address src, struct vsock_address dst) | |
529 | { | |
530 | socket_lock_assert_owned(pcb->so); | |
531 | ||
532 | struct sockaddr_vm addr; | |
533 | struct socket *so2 = NULL; | |
534 | struct vsockpcb *pcb2 = NULL; | |
535 | ||
536 | errno_t error = 0; | |
537 | ||
538 | switch (op) { | |
539 | case VSOCK_REQUEST: | |
540 | addr = (struct sockaddr_vm) { | |
541 | .svm_len = sizeof(addr), | |
542 | .svm_family = AF_VSOCK, | |
543 | .svm_reserved1 = 0, | |
544 | .svm_port = pcb->local_address.port, | |
545 | .svm_cid = pcb->local_address.cid | |
546 | }; | |
547 | so2 = sonewconn(pcb->so, 0, (struct sockaddr *)&addr); | |
548 | if (!so2) { | |
549 | // It is likely that the backlog is full. Deny this request. | |
550 | vsock_pcb_safe_reset_address(pcb, dst, src); | |
551 | error = ECONNREFUSED; | |
552 | break; | |
553 | } | |
554 | ||
555 | pcb2 = sotovsockpcb(so2); | |
556 | if (!pcb2) { | |
557 | error = EINVAL; | |
558 | goto done; | |
559 | } | |
560 | ||
561 | error = vsock_bind_address(pcb2, dst, src); | |
562 | if (error) { | |
563 | goto done; | |
564 | } | |
565 | ||
566 | error = vsock_pcb_respond(pcb2); | |
567 | if (error) { | |
568 | goto done; | |
569 | } | |
570 | ||
571 | soisconnected(so2); | |
572 | ||
573 | done: | |
574 | if (error) { | |
575 | soisdisconnected(so2); | |
576 | if (pcb2) { | |
577 | vsock_unbind_pcb(pcb2, false); | |
578 | } | |
579 | socket_unlock(so2, 1); | |
580 | vsock_pcb_reset_address(dst, src); | |
581 | } else { | |
582 | socket_unlock(so2, 0); | |
583 | } | |
584 | socket_lock(pcb->so, 0); | |
585 | ||
586 | break; | |
587 | case VSOCK_RESET: | |
588 | error = vsock_pcb_safe_reset_address(pcb, dst, src); | |
589 | break; | |
590 | default: | |
591 | vsock_pcb_safe_reset_address(pcb, dst, src); | |
592 | error = ENOTSUP; | |
593 | break; | |
594 | } | |
595 | ||
596 | return error; | |
597 | } | |
598 | ||
599 | /* VSock Transport */ | |
600 | ||
601 | errno_t | |
602 | vsock_add_transport(struct vsock_transport *transport) | |
603 | { | |
604 | if (transport == NULL || transport->provider == NULL) { | |
605 | return EINVAL; | |
606 | } | |
607 | if (!os_atomic_cmpxchg((void * volatile *)&the_vsock_transport, NULL, transport, acq_rel)) { | |
608 | return EEXIST; | |
609 | } | |
610 | return 0; | |
611 | } | |
612 | ||
613 | errno_t | |
614 | vsock_remove_transport(struct vsock_transport *transport) | |
615 | { | |
616 | if (!os_atomic_cmpxchg((void * volatile *)&the_vsock_transport, transport, NULL, acq_rel)) { | |
617 | return ENODEV; | |
618 | } | |
619 | return 0; | |
620 | } | |
621 | ||
622 | errno_t | |
623 | vsock_reset_transport(struct vsock_transport *transport) | |
624 | { | |
625 | if (transport == NULL) { | |
626 | return EINVAL; | |
627 | } | |
628 | ||
629 | errno_t error = 0; | |
630 | struct vsockpcb *pcb = NULL; | |
631 | struct vsockpcb *tmp_pcb = NULL; | |
632 | ||
c3c9b80d | 633 | lck_rw_lock_exclusive(&vsockinfo.bound_lock); |
f427ee49 A |
634 | LIST_FOREACH_SAFE(pcb, &vsockinfo.bound, bound, tmp_pcb) { |
635 | // Disconnect this transport's sockets. Listen and bind sockets must stay alive. | |
636 | socket_lock(pcb->so, 1); | |
637 | if (pcb->transport == transport && pcb->so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) { | |
638 | errno_t dc_error = vsock_disconnect_pcb_locked(pcb); | |
639 | if (dc_error && !error) { | |
640 | error = dc_error; | |
641 | } | |
642 | } | |
643 | socket_unlock(pcb->so, 1); | |
644 | } | |
c3c9b80d | 645 | lck_rw_done(&vsockinfo.bound_lock); |
f427ee49 A |
646 | |
647 | return error; | |
648 | } | |
649 | ||
650 | errno_t | |
651 | vsock_put_message(struct vsock_address src, struct vsock_address dst, enum vsock_operation op, uint32_t buf_alloc, uint32_t fwd_cnt, mbuf_t m) | |
652 | { | |
653 | struct vsockpcb *pcb = vsock_get_matching_pcb(dst, src); | |
654 | if (!pcb) { | |
655 | if (op != VSOCK_RESET) { | |
656 | vsock_pcb_reset_address(dst, src); | |
657 | } | |
658 | if (m != NULL) { | |
659 | mbuf_freem_list(m); | |
660 | } | |
661 | return EINVAL; | |
662 | } | |
663 | ||
664 | socket_lock_assert_owned(pcb->so); | |
665 | ||
666 | struct socket *so = pcb->so; | |
667 | errno_t error = 0; | |
668 | ||
669 | // Check if the peer's buffer has changed. Update our view of the peer's forwarded bytes. | |
670 | int buffers_changed = (pcb->peer_buf_alloc != buf_alloc) || (pcb->peer_fwd_cnt) != fwd_cnt; | |
671 | pcb->peer_buf_alloc = buf_alloc; | |
672 | pcb->peer_fwd_cnt = fwd_cnt; | |
673 | ||
674 | // Peer's buffer has enough space for the next packet. Notify any threads waiting for space. | |
675 | if (buffers_changed && vsock_get_peer_space(pcb) >= pcb->waiting_send_size) { | |
676 | sowwakeup(so); | |
677 | } | |
678 | ||
679 | switch (op) { | |
680 | case VSOCK_CREDIT_REQUEST: | |
681 | error = vsock_pcb_credit_update(pcb); | |
682 | break; | |
683 | case VSOCK_CREDIT_UPDATE: | |
684 | break; | |
685 | default: | |
686 | if (so->so_state & SS_ISCONNECTED) { | |
687 | error = vsock_put_message_connected(pcb, op, m); | |
688 | m = NULL; | |
689 | } else if (so->so_state & SS_ISCONNECTING) { | |
690 | error = vsock_put_message_connecting(pcb, op); | |
691 | } else if (so->so_options & SO_ACCEPTCONN) { | |
692 | error = vsock_put_message_listening(pcb, op, src, dst); | |
693 | } else { | |
694 | // Reset the connection for other states such as 'disconnecting'. | |
695 | error = vsock_disconnect_pcb(pcb); | |
696 | if (!error) { | |
697 | error = ENODEV; | |
698 | } | |
699 | } | |
700 | break; | |
701 | } | |
702 | socket_unlock(so, 1); | |
703 | ||
704 | if (m != NULL) { | |
705 | mbuf_freem_list(m); | |
706 | } | |
707 | ||
708 | return error; | |
709 | } | |
710 | ||
711 | /* VSock Sysctl */ | |
712 | ||
713 | static int | |
714 | vsock_pcblist SYSCTL_HANDLER_ARGS | |
715 | { | |
716 | #pragma unused(oidp,arg2) | |
717 | ||
718 | int error; | |
719 | ||
720 | // Only stream is supported. | |
721 | if ((intptr_t)arg1 != SOCK_STREAM) { | |
722 | return EINVAL; | |
723 | } | |
724 | ||
725 | // Get the generation count and the count of all vsock sockets. | |
c3c9b80d | 726 | lck_rw_lock_shared(&vsockinfo.all_lock); |
f427ee49 A |
727 | uint64_t n = vsockinfo.all_pcb_count; |
728 | vsock_gen_t gen_count = vsockinfo.vsock_gencnt; | |
c3c9b80d | 729 | lck_rw_done(&vsockinfo.all_lock); |
f427ee49 A |
730 | |
731 | const size_t xpcb_len = sizeof(struct xvsockpcb); | |
732 | struct xvsockpgen xvg; | |
733 | ||
734 | /* | |
735 | * The process of preparing the PCB list is too time-consuming and | |
736 | * resource-intensive to repeat twice on every request. | |
737 | */ | |
738 | if (req->oldptr == USER_ADDR_NULL) { | |
739 | req->oldidx = (size_t)(2 * sizeof(xvg) + (n + n / 8) * xpcb_len); | |
740 | return 0; | |
741 | } | |
742 | ||
743 | if (req->newptr != USER_ADDR_NULL) { | |
744 | return EPERM; | |
745 | } | |
746 | ||
747 | bzero(&xvg, sizeof(xvg)); | |
748 | xvg.xvg_len = sizeof(xvg); | |
749 | xvg.xvg_count = n; | |
750 | xvg.xvg_gen = gen_count; | |
751 | xvg.xvg_sogen = so_gencnt; | |
752 | error = SYSCTL_OUT(req, &xvg, sizeof(xvg)); | |
753 | if (error) { | |
754 | return error; | |
755 | } | |
756 | ||
757 | // Return if no sockets exist. | |
758 | if (n == 0) { | |
759 | return 0; | |
760 | } | |
761 | ||
c3c9b80d | 762 | lck_rw_lock_shared(&vsockinfo.all_lock); |
f427ee49 A |
763 | |
764 | n = 0; | |
765 | struct vsockpcb *pcb = NULL; | |
766 | TAILQ_FOREACH(pcb, &vsockinfo.all, all) { | |
767 | // Bail if there is not enough user buffer for this next socket. | |
768 | if (req->oldlen - req->oldidx - sizeof(xvg) < xpcb_len) { | |
769 | break; | |
770 | } | |
771 | ||
772 | // Populate the socket structure. | |
773 | socket_lock(pcb->so, 1); | |
774 | if (pcb->vsock_gencnt <= gen_count) { | |
775 | struct xvsockpcb xpcb; | |
776 | bzero(&xpcb, xpcb_len); | |
777 | xpcb.xv_len = xpcb_len; | |
778 | xpcb.xv_vsockpp = (uint64_t)VM_KERNEL_ADDRHASH(pcb); | |
779 | xpcb.xvp_local_cid = pcb->local_address.cid; | |
780 | xpcb.xvp_local_port = pcb->local_address.port; | |
781 | xpcb.xvp_remote_cid = pcb->remote_address.cid; | |
782 | xpcb.xvp_remote_port = pcb->remote_address.port; | |
783 | xpcb.xvp_rxcnt = pcb->fwd_cnt; | |
784 | xpcb.xvp_txcnt = pcb->tx_cnt; | |
785 | xpcb.xvp_peer_rxhiwat = pcb->peer_buf_alloc; | |
786 | xpcb.xvp_peer_rxcnt = pcb->peer_fwd_cnt; | |
787 | xpcb.xvp_last_pid = pcb->so->last_pid; | |
788 | xpcb.xvp_gencnt = pcb->vsock_gencnt; | |
789 | if (pcb->so) { | |
790 | sotoxsocket(pcb->so, &xpcb.xv_socket); | |
791 | } | |
792 | socket_unlock(pcb->so, 1); | |
793 | ||
794 | error = SYSCTL_OUT(req, &xpcb, xpcb_len); | |
795 | if (error != 0) { | |
796 | break; | |
797 | } | |
798 | n++; | |
799 | } else { | |
800 | socket_unlock(pcb->so, 1); | |
801 | } | |
802 | } | |
803 | ||
804 | // Update the generation count to match the sockets being returned. | |
805 | gen_count = vsockinfo.vsock_gencnt; | |
806 | ||
c3c9b80d | 807 | lck_rw_done(&vsockinfo.all_lock); |
f427ee49 A |
808 | |
809 | if (!error) { | |
810 | /* | |
811 | * Give the user an updated idea of our state. | |
812 | * If the generation differs from what we told | |
813 | * her before, she knows that something happened | |
814 | * while we were processing this request, and it | |
815 | * might be necessary to retry. | |
816 | */ | |
817 | bzero(&xvg, sizeof(xvg)); | |
818 | xvg.xvg_len = sizeof(xvg); | |
819 | xvg.xvg_count = n; | |
820 | xvg.xvg_gen = gen_count; | |
821 | xvg.xvg_sogen = so_gencnt; | |
822 | error = SYSCTL_OUT(req, &xvg, sizeof(xvg)); | |
823 | } | |
824 | ||
825 | return error; | |
826 | } | |
827 | ||
828 | #ifdef SYSCTL_DECL | |
829 | SYSCTL_NODE(_net, OID_AUTO, vsock, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "vsock"); | |
830 | SYSCTL_UINT(_net_vsock, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED, | |
831 | &vsock_sendspace, 0, "Maximum outgoing vsock datagram size"); | |
832 | SYSCTL_UINT(_net_vsock, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED, | |
833 | &vsock_recvspace, 0, "Maximum incoming vsock datagram size"); | |
834 | SYSCTL_PROC(_net_vsock, OID_AUTO, pcblist, | |
835 | CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, | |
836 | (caddr_t)(long)SOCK_STREAM, 0, vsock_pcblist, "S,xvsockpcb", | |
837 | "List of active vsock sockets"); | |
838 | #endif | |
839 | ||
840 | /* VSock Protocol */ | |
841 | ||
842 | static int | |
843 | vsock_attach(struct socket *so, int proto, struct proc *p) | |
844 | { | |
845 | #pragma unused(proto, p) | |
846 | ||
847 | // Attach should only be run once per socket. | |
848 | struct vsockpcb *pcb = sotovsockpcb(so); | |
849 | if (pcb) { | |
850 | return EINVAL; | |
851 | } | |
852 | ||
853 | // Get the transport for this socket. | |
854 | struct vsock_transport *transport = os_atomic_load(&the_vsock_transport, relaxed); | |
855 | if (transport == NULL) { | |
856 | return ENODEV; | |
857 | } | |
858 | ||
859 | // Reserve send and receive buffers. | |
860 | errno_t error = soreserve(so, vsock_sendspace, vsock_recvspace); | |
861 | if (error) { | |
862 | return error; | |
863 | } | |
864 | ||
865 | // Initialize the vsock protocol control block. | |
866 | pcb = zalloc(vsockpcb_zone); | |
867 | if (pcb == NULL) { | |
868 | return ENOBUFS; | |
869 | } | |
870 | bzero(pcb, sizeof(*pcb)); | |
871 | pcb->so = so; | |
872 | pcb->transport = transport; | |
873 | pcb->local_address = (struct vsock_address) { | |
874 | .cid = VMADDR_CID_ANY, | |
875 | .port = VMADDR_PORT_ANY | |
876 | }; | |
877 | pcb->remote_address = (struct vsock_address) { | |
878 | .cid = VMADDR_CID_ANY, | |
879 | .port = VMADDR_PORT_ANY | |
880 | }; | |
881 | so->so_pcb = pcb; | |
882 | ||
883 | // Tell the transport that this socket has attached. | |
884 | error = transport->attach_socket(transport->provider); | |
885 | if (error) { | |
886 | return error; | |
887 | } | |
888 | ||
889 | // Add to the list of all vsock sockets. | |
c3c9b80d | 890 | lck_rw_lock_exclusive(&vsockinfo.all_lock); |
f427ee49 A |
891 | TAILQ_INSERT_TAIL(&vsockinfo.all, pcb, all); |
892 | vsockinfo.all_pcb_count++; | |
893 | pcb->vsock_gencnt = ++vsockinfo.vsock_gencnt; | |
c3c9b80d | 894 | lck_rw_done(&vsockinfo.all_lock); |
f427ee49 A |
895 | |
896 | return 0; | |
897 | } | |
898 | ||
899 | static int | |
900 | vsock_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p) | |
901 | { | |
902 | #pragma unused(ifp) | |
903 | ||
904 | VERIFY(so != NULL || p == kernproc); | |
905 | ||
906 | if (cmd != IOCTL_VM_SOCKETS_GET_LOCAL_CID) { | |
907 | return EINVAL; | |
908 | } | |
909 | ||
910 | struct vsock_transport *transport; | |
911 | if (so) { | |
912 | struct vsockpcb *pcb = sotovsockpcb(so); | |
913 | if (pcb == NULL) { | |
914 | return EINVAL; | |
915 | } | |
916 | transport = pcb->transport; | |
917 | } else { | |
918 | transport = os_atomic_load(&the_vsock_transport, relaxed); | |
919 | } | |
920 | ||
921 | if (transport == NULL) { | |
922 | return ENODEV; | |
923 | } | |
924 | ||
925 | uint32_t transport_cid; | |
926 | errno_t error = transport->get_cid(transport->provider, &transport_cid); | |
927 | if (error) { | |
928 | return error; | |
929 | } | |
930 | ||
931 | memcpy(data, &transport_cid, sizeof(transport_cid)); | |
932 | ||
933 | return 0; | |
934 | } | |
935 | ||
936 | static int | |
937 | vsock_detach(struct socket *so) | |
938 | { | |
939 | struct vsockpcb *pcb = sotovsockpcb(so); | |
940 | if (pcb == NULL) { | |
941 | return EINVAL; | |
942 | } | |
943 | ||
944 | vsock_unbind_pcb(pcb, false); | |
945 | ||
946 | // Tell the transport that this socket has detached. | |
947 | struct vsock_transport *transport = pcb->transport; | |
948 | errno_t error = transport->detach_socket(transport->provider); | |
949 | if (error) { | |
950 | return error; | |
951 | } | |
952 | ||
953 | // Remove from the list of all vsock sockets. | |
c3c9b80d | 954 | lck_rw_lock_exclusive(&vsockinfo.all_lock); |
f427ee49 A |
955 | TAILQ_REMOVE(&vsockinfo.all, pcb, all); |
956 | pcb->all.tqe_next = NULL; | |
957 | pcb->all.tqe_prev = NULL; | |
958 | vsockinfo.all_pcb_count--; | |
959 | vsockinfo.vsock_gencnt++; | |
c3c9b80d | 960 | lck_rw_done(&vsockinfo.all_lock); |
f427ee49 A |
961 | |
962 | // Deallocate any resources. | |
963 | zfree(vsockpcb_zone, pcb); | |
964 | so->so_pcb = 0; | |
965 | so->so_flags |= SOF_PCBCLEARING; | |
966 | sofree(so); | |
967 | ||
968 | return 0; | |
969 | } | |
970 | ||
971 | static int | |
972 | vsock_abort(struct socket *so) | |
973 | { | |
974 | soisdisconnected(so); | |
975 | return vsock_detach(so); | |
976 | } | |
977 | ||
978 | static int | |
979 | vsock_bind(struct socket *so, struct sockaddr *nam, struct proc *p) | |
980 | { | |
981 | #pragma unused(p) | |
982 | ||
983 | struct vsockpcb *pcb = sotovsockpcb(so); | |
984 | if (pcb == NULL) { | |
985 | return EINVAL; | |
986 | } | |
987 | ||
988 | struct sockaddr_vm *addr = (struct sockaddr_vm *)nam; | |
989 | ||
990 | errno_t error = vsock_sockaddr_vm_validate(pcb, addr); | |
991 | if (error) { | |
992 | return error; | |
993 | } | |
994 | ||
995 | struct vsock_address laddr = (struct vsock_address) { | |
996 | .cid = addr->svm_cid, | |
997 | .port = addr->svm_port, | |
998 | }; | |
999 | ||
1000 | struct vsock_address raddr = (struct vsock_address) { | |
1001 | .cid = VMADDR_CID_ANY, | |
1002 | .port = VMADDR_PORT_ANY, | |
1003 | }; | |
1004 | ||
1005 | error = vsock_bind_address(pcb, laddr, raddr); | |
1006 | if (error) { | |
1007 | return error; | |
1008 | } | |
1009 | ||
1010 | return 0; | |
1011 | } | |
1012 | ||
1013 | static int | |
1014 | vsock_listen(struct socket *so, struct proc *p) | |
1015 | { | |
1016 | #pragma unused(p) | |
1017 | ||
1018 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1019 | if (pcb == NULL) { | |
1020 | return EINVAL; | |
1021 | } | |
1022 | ||
1023 | // Only stream is supported currently. | |
1024 | if (so->so_type != SOCK_STREAM) { | |
1025 | return EAFNOSUPPORT; | |
1026 | } | |
1027 | ||
1028 | struct vsock_address *addr = &pcb->local_address; | |
1029 | ||
1030 | if (addr->port == VMADDR_CID_ANY) { | |
1031 | return EFAULT; | |
1032 | } | |
1033 | ||
1034 | struct vsock_transport *transport = pcb->transport; | |
1035 | uint32_t transport_cid; | |
1036 | errno_t error = transport->get_cid(transport->provider, &transport_cid); | |
1037 | if (error) { | |
1038 | return error; | |
1039 | } | |
1040 | ||
1041 | // Can listen on the transport's cid or any. | |
1042 | if (addr->cid != transport_cid && addr->cid != VMADDR_CID_ANY) { | |
1043 | return EFAULT; | |
1044 | } | |
1045 | ||
1046 | return 0; | |
1047 | } | |
1048 | ||
1049 | static int | |
1050 | vsock_accept(struct socket *so, struct sockaddr **nam) | |
1051 | { | |
1052 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1053 | if (pcb == NULL) { | |
1054 | return EINVAL; | |
1055 | } | |
1056 | ||
1057 | // Do not accept disconnected sockets. | |
1058 | if (so->so_state & SS_ISDISCONNECTED) { | |
1059 | return ECONNABORTED; | |
1060 | } | |
1061 | ||
1062 | *nam = vsock_new_sockaddr(&pcb->remote_address); | |
1063 | ||
1064 | return 0; | |
1065 | } | |
1066 | ||
1067 | static int | |
1068 | vsock_connect(struct socket *so, struct sockaddr *nam, struct proc *p) | |
1069 | { | |
1070 | #pragma unused(p) | |
1071 | ||
1072 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1073 | if (pcb == NULL) { | |
1074 | return EINVAL; | |
1075 | } | |
1076 | ||
1077 | struct sockaddr_vm *addr = (struct sockaddr_vm *)nam; | |
1078 | ||
1079 | errno_t error = vsock_sockaddr_vm_validate(pcb, addr); | |
1080 | if (error) { | |
1081 | return error; | |
1082 | } | |
1083 | ||
1084 | uint32_t transport_cid; | |
1085 | struct vsock_transport *transport = pcb->transport; | |
1086 | error = transport->get_cid(transport->provider, &transport_cid); | |
1087 | if (error) { | |
1088 | return error; | |
1089 | } | |
1090 | ||
1091 | // Only supporting connections to the host, hypervisor, or self for now. | |
1092 | if (addr->svm_cid != VMADDR_CID_HOST && | |
1093 | addr->svm_cid != VMADDR_CID_HYPERVISOR && | |
1094 | addr->svm_cid != transport_cid) { | |
1095 | return EFAULT; | |
1096 | } | |
1097 | ||
1098 | soisconnecting(so); | |
1099 | ||
1100 | // Set the remote and local address. | |
1101 | struct vsock_address remote_addr = (struct vsock_address) { | |
1102 | .cid = addr->svm_cid, | |
1103 | .port = addr->svm_port, | |
1104 | }; | |
1105 | ||
1106 | struct vsock_address local_addr = (struct vsock_address) { | |
1107 | .cid = transport_cid, | |
1108 | .port = VMADDR_PORT_ANY, | |
1109 | }; | |
1110 | ||
1111 | // Bind to the address. | |
1112 | error = vsock_bind_address(pcb, local_addr, remote_addr); | |
1113 | if (error) { | |
1114 | goto cleanup; | |
1115 | } | |
1116 | ||
1117 | // Attempt a connection using the socket's transport. | |
1118 | error = vsock_pcb_connect(pcb); | |
1119 | if (error) { | |
1120 | goto cleanup; | |
1121 | } | |
1122 | ||
1123 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
1124 | // Don't wait for peer's response if non-blocking. | |
1125 | if (so->so_state & SS_NBIO) { | |
1126 | error = EINPROGRESS; | |
1127 | goto done; | |
1128 | } | |
1129 | ||
1130 | struct timespec ts = (struct timespec) { | |
1131 | .tv_sec = so->so_snd.sb_timeo.tv_sec, | |
1132 | .tv_nsec = so->so_snd.sb_timeo.tv_usec * 1000, | |
1133 | }; | |
1134 | ||
1135 | lck_mtx_t *mutex_held; | |
1136 | if (so->so_proto->pr_getlock != NULL) { | |
1137 | mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK); | |
1138 | } else { | |
1139 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
1140 | } | |
1141 | ||
1142 | // Wait until we receive a response to the connect request. | |
1143 | error = msleep((caddr_t)&so->so_timeo, mutex_held, PSOCK | PCATCH, "vsock_connect", &ts); | |
1144 | if (error) { | |
1145 | if (error == EAGAIN) { | |
1146 | error = ETIMEDOUT; | |
1147 | } | |
1148 | goto cleanup; | |
1149 | } | |
1150 | } | |
1151 | ||
1152 | cleanup: | |
1153 | if (so->so_error && !error) { | |
1154 | error = so->so_error; | |
1155 | so->so_error = 0; | |
1156 | } | |
1157 | if (!error) { | |
1158 | error = !(so->so_state & SS_ISCONNECTED); | |
1159 | } | |
1160 | if (error) { | |
1161 | vsock_unbind_pcb(pcb, false); | |
1162 | } | |
1163 | ||
1164 | done: | |
1165 | return error; | |
1166 | } | |
1167 | ||
1168 | static int | |
1169 | vsock_disconnect(struct socket *so) | |
1170 | { | |
1171 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1172 | if (pcb == NULL) { | |
1173 | return EINVAL; | |
1174 | } | |
1175 | ||
1176 | return vsock_disconnect_pcb(pcb); | |
1177 | } | |
1178 | ||
1179 | static int | |
1180 | vsock_sockaddr(struct socket *so, struct sockaddr **nam) | |
1181 | { | |
1182 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1183 | if (pcb == NULL) { | |
1184 | return EINVAL; | |
1185 | } | |
1186 | ||
1187 | *nam = vsock_new_sockaddr(&pcb->local_address); | |
1188 | ||
1189 | return 0; | |
1190 | } | |
1191 | ||
1192 | static int | |
1193 | vsock_peeraddr(struct socket *so, struct sockaddr **nam) | |
1194 | { | |
1195 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1196 | if (pcb == NULL) { | |
1197 | return EINVAL; | |
1198 | } | |
1199 | ||
1200 | *nam = vsock_new_sockaddr(&pcb->remote_address); | |
1201 | ||
1202 | return 0; | |
1203 | } | |
1204 | ||
1205 | static int | |
1206 | vsock_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, proc_t p) | |
1207 | { | |
1208 | #pragma unused(flags, nam, p) | |
1209 | ||
1210 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1211 | if (pcb == NULL || m == NULL) { | |
1212 | return EINVAL; | |
1213 | } | |
1214 | ||
1215 | if (control != NULL) { | |
1216 | m_freem(control); | |
1217 | return EOPNOTSUPP; | |
1218 | } | |
1219 | ||
1220 | // Ensure this socket is connected. | |
1221 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
1222 | if (m != NULL) { | |
1223 | mbuf_freem_list(m); | |
1224 | } | |
1225 | return EPERM; | |
1226 | } | |
1227 | ||
1228 | errno_t error; | |
1229 | ||
1230 | const size_t len = mbuf_pkthdr_len(m); | |
1231 | uint32_t free_space = vsock_get_peer_space(pcb); | |
1232 | ||
1233 | // Ensure the peer has enough space in their receive buffer. | |
1234 | while (len > free_space) { | |
1235 | // Record the number of free peer bytes necessary before we can send. | |
1236 | if (len > pcb->waiting_send_size) { | |
1237 | pcb->waiting_send_size = len; | |
1238 | } | |
1239 | ||
1240 | // Send a credit request. | |
1241 | error = vsock_pcb_credit_request(pcb); | |
1242 | if (error) { | |
1243 | if (m != NULL) { | |
1244 | mbuf_freem_list(m); | |
1245 | } | |
1246 | return error; | |
1247 | } | |
1248 | ||
1249 | // Check again in case free space was automatically updated in loopback case. | |
1250 | free_space = vsock_get_peer_space(pcb); | |
1251 | if (len <= free_space) { | |
1252 | pcb->waiting_send_size = 0; | |
1253 | break; | |
1254 | } | |
1255 | ||
1256 | // Bail if this is a non-blocking socket. | |
1257 | if (so->so_state & SS_NBIO) { | |
1258 | if (m != NULL) { | |
1259 | mbuf_freem_list(m); | |
1260 | } | |
1261 | return EWOULDBLOCK; | |
1262 | } | |
1263 | ||
1264 | // Wait until our peer has enough free space in their receive buffer. | |
1265 | error = sbwait(&so->so_snd); | |
1266 | pcb->waiting_send_size = 0; | |
1267 | if (error) { | |
1268 | if (m != NULL) { | |
1269 | mbuf_freem_list(m); | |
1270 | } | |
1271 | return error; | |
1272 | } | |
1273 | ||
1274 | // Bail if an error occured or we can't send more. | |
1275 | if (so->so_state & SS_CANTSENDMORE) { | |
1276 | if (m != NULL) { | |
1277 | mbuf_freem_list(m); | |
1278 | } | |
1279 | return EPIPE; | |
1280 | } else if (so->so_error) { | |
1281 | error = so->so_error; | |
1282 | so->so_error = 0; | |
1283 | if (m != NULL) { | |
1284 | mbuf_freem_list(m); | |
1285 | } | |
1286 | return error; | |
1287 | } | |
1288 | ||
1289 | free_space = vsock_get_peer_space(pcb); | |
1290 | } | |
1291 | ||
1292 | // Send a payload over the transport. | |
1293 | error = vsock_pcb_send(pcb, m); | |
1294 | if (error) { | |
1295 | return error; | |
1296 | } | |
1297 | ||
1298 | pcb->tx_cnt += len; | |
1299 | ||
1300 | return 0; | |
1301 | } | |
1302 | ||
1303 | static int | |
1304 | vsock_shutdown(struct socket *so) | |
1305 | { | |
1306 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1307 | if (pcb == NULL) { | |
1308 | return EINVAL; | |
1309 | } | |
1310 | ||
1311 | socantsendmore(so); | |
1312 | ||
1313 | // Tell peer we will no longer send. | |
1314 | errno_t error = vsock_pcb_shutdown_send(pcb); | |
1315 | if (error) { | |
1316 | return error; | |
1317 | } | |
1318 | ||
1319 | return 0; | |
1320 | } | |
1321 | ||
1322 | static int | |
1323 | vsock_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, | |
1324 | struct mbuf **mp0, struct mbuf **controlp, int *flagsp) | |
1325 | { | |
1326 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1327 | if (pcb == NULL) { | |
1328 | return EINVAL; | |
1329 | } | |
1330 | ||
1331 | user_ssize_t length = uio_resid(uio); | |
1332 | int result = soreceive(so, psa, uio, mp0, controlp, flagsp); | |
1333 | length -= uio_resid(uio); | |
1334 | ||
1335 | socket_lock(so, 1); | |
1336 | ||
1337 | pcb->fwd_cnt += length; | |
1338 | ||
1339 | const uint32_t threshold = VSOCK_MAX_PACKET_SIZE; | |
1340 | ||
1341 | // Send a credit update if is possible that the peer will no longer send. | |
1342 | if ((pcb->fwd_cnt - pcb->last_fwd_cnt + threshold) >= pcb->last_buf_alloc) { | |
1343 | errno_t error = vsock_pcb_credit_update(pcb); | |
1344 | if (!result && error) { | |
1345 | result = error; | |
1346 | } | |
1347 | } | |
1348 | ||
1349 | socket_unlock(so, 1); | |
1350 | ||
1351 | return result; | |
1352 | } | |
1353 | ||
1354 | static struct pr_usrreqs vsock_usrreqs = { | |
1355 | .pru_abort = vsock_abort, | |
1356 | .pru_attach = vsock_attach, | |
1357 | .pru_control = vsock_control, | |
1358 | .pru_detach = vsock_detach, | |
1359 | .pru_bind = vsock_bind, | |
1360 | .pru_listen = vsock_listen, | |
1361 | .pru_accept = vsock_accept, | |
1362 | .pru_connect = vsock_connect, | |
1363 | .pru_disconnect = vsock_disconnect, | |
1364 | .pru_send = vsock_send, | |
1365 | .pru_shutdown = vsock_shutdown, | |
1366 | .pru_sockaddr = vsock_sockaddr, | |
1367 | .pru_peeraddr = vsock_peeraddr, | |
1368 | .pru_sosend = sosend, | |
1369 | .pru_soreceive = vsock_soreceive, | |
1370 | }; | |
1371 | ||
1372 | static void | |
1373 | vsock_init(struct protosw *pp, struct domain *dp) | |
1374 | { | |
1375 | #pragma unused(dp) | |
1376 | ||
1377 | static int vsock_initialized = 0; | |
1378 | VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED); | |
1379 | if (!os_atomic_cmpxchg((volatile int *)&vsock_initialized, 0, 1, acq_rel)) { | |
1380 | return; | |
1381 | } | |
1382 | ||
1383 | // Setup VSock protocol info struct. | |
c3c9b80d A |
1384 | lck_rw_init(&vsockinfo.all_lock, &vsock_lock_grp, LCK_ATTR_NULL); |
1385 | lck_rw_init(&vsockinfo.bound_lock, &vsock_lock_grp, LCK_ATTR_NULL); | |
1386 | lck_mtx_init(&vsockinfo.port_lock, &vsock_lock_grp, LCK_ATTR_NULL); | |
f427ee49 A |
1387 | TAILQ_INIT(&vsockinfo.all); |
1388 | LIST_INIT(&vsockinfo.bound); | |
1389 | vsockinfo.last_port = VMADDR_PORT_ANY; | |
1390 | } | |
1391 | ||
1392 | static struct protosw vsocksw[] = { | |
1393 | { | |
1394 | .pr_type = SOCK_STREAM, | |
1395 | .pr_protocol = 0, | |
1396 | .pr_flags = PR_CONNREQUIRED | PR_WANTRCVD, | |
1397 | .pr_init = vsock_init, | |
1398 | .pr_usrreqs = &vsock_usrreqs, | |
1399 | } | |
1400 | }; | |
1401 | ||
1402 | static const int vsock_proto_count = (sizeof(vsocksw) / sizeof(struct protosw)); | |
1403 | ||
1404 | /* VSock Domain */ | |
1405 | ||
1406 | static struct domain *vsock_domain = NULL; | |
1407 | ||
1408 | static void | |
1409 | vsock_dinit(struct domain *dp) | |
1410 | { | |
1411 | // The VSock domain is initialized with a singleton pattern. | |
1412 | VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); | |
1413 | VERIFY(vsock_domain == NULL); | |
1414 | vsock_domain = dp; | |
1415 | ||
1416 | // Add protocols and initialize. | |
1417 | for (int i = 0; i < vsock_proto_count; i++) { | |
1418 | net_add_proto((struct protosw *)&vsocksw[i], dp, 1); | |
1419 | } | |
1420 | } | |
1421 | ||
1422 | struct domain vsockdomain_s = { | |
1423 | .dom_family = PF_VSOCK, | |
1424 | .dom_name = "vsock", | |
1425 | .dom_init = vsock_dinit, | |
1426 | .dom_maxrtkey = sizeof(struct sockaddr_vm), | |
1427 | .dom_protohdrlen = sizeof(struct sockaddr_vm), | |
1428 | }; |