]>
Commit | Line | Data |
---|---|---|
f427ee49 A |
1 | /* |
2 | * Copyright (c) 2020 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/domain.h> | |
30 | #include <sys/socket.h> | |
31 | #include <sys/protosw.h> | |
32 | #include <sys/mcache.h> | |
33 | #include <sys/systm.h> | |
34 | #include <sys/sysctl.h> | |
35 | #include <sys/random.h> | |
36 | #include <sys/mbuf.h> | |
37 | #include <sys/vsock_domain.h> | |
38 | #include <sys/vsock_transport.h> | |
39 | #include <kern/task.h> | |
40 | #include <kern/zalloc.h> | |
41 | #include <kern/locks.h> | |
42 | #include <machine/atomic.h> | |
43 | ||
44 | #define sotovsockpcb(so) ((struct vsockpcb *)(so)->so_pcb) | |
45 | ||
46 | #define VSOCK_PORT_RESERVED 1024 | |
47 | ||
48 | /* VSock Protocol Globals */ | |
49 | ||
50 | static struct vsock_transport * _Atomic the_vsock_transport = NULL; | |
51 | static ZONE_DECLARE(vsockpcb_zone, "vsockpcbzone", | |
52 | sizeof(struct vsockpcb), ZC_NONE); | |
53 | static struct vsockpcbinfo vsockinfo; | |
54 | ||
55 | static uint32_t vsock_sendspace = VSOCK_MAX_PACKET_SIZE * 8; | |
56 | static uint32_t vsock_recvspace = VSOCK_MAX_PACKET_SIZE * 8; | |
57 | ||
58 | /* VSock PCB Helpers */ | |
59 | ||
60 | static uint32_t | |
61 | vsock_get_peer_space(struct vsockpcb *pcb) | |
62 | { | |
63 | return pcb->peer_buf_alloc - (pcb->tx_cnt - pcb->peer_fwd_cnt); | |
64 | } | |
65 | ||
66 | static struct vsockpcb * | |
67 | vsock_get_matching_pcb(struct vsock_address src, struct vsock_address dst) | |
68 | { | |
69 | struct vsockpcb *preferred = NULL; | |
70 | struct vsockpcb *match = NULL; | |
71 | struct vsockpcb *pcb = NULL; | |
72 | ||
73 | lck_rw_lock_shared(vsockinfo.bound_lock); | |
74 | LIST_FOREACH(pcb, &vsockinfo.bound, bound) { | |
75 | // Source cid and port must match. Only destination port must match. (Allows for a changing CID during migration) | |
76 | socket_lock(pcb->so, 1); | |
77 | if ((pcb->so->so_state & SS_ISCONNECTED || pcb->so->so_state & SS_ISCONNECTING) && | |
78 | pcb->local_address.cid == src.cid && pcb->local_address.port == src.port && | |
79 | pcb->remote_address.port == dst.port) { | |
80 | preferred = pcb; | |
81 | break; | |
82 | } else if ((pcb->local_address.cid == src.cid || pcb->local_address.cid == VMADDR_CID_ANY) && | |
83 | pcb->local_address.port == src.port) { | |
84 | match = pcb; | |
85 | } | |
86 | socket_unlock(pcb->so, 1); | |
87 | } | |
88 | if (!preferred && match) { | |
89 | socket_lock(match->so, 1); | |
90 | preferred = match; | |
91 | } | |
92 | lck_rw_done(vsockinfo.bound_lock); | |
93 | ||
94 | return preferred; | |
95 | } | |
96 | ||
97 | static errno_t | |
98 | vsock_bind_address_if_free(struct vsockpcb *pcb, uint32_t local_cid, uint32_t local_port, uint32_t remote_cid, uint32_t remote_port) | |
99 | { | |
100 | socket_lock_assert_owned(pcb->so); | |
101 | ||
102 | // Privileged ports. | |
103 | if (local_port != VMADDR_PORT_ANY && local_port < VSOCK_PORT_RESERVED && | |
104 | current_task() != kernel_task && proc_suser(current_proc()) != 0) { | |
105 | return EACCES; | |
106 | } | |
107 | ||
108 | bool taken = false; | |
109 | const bool check_remote = (remote_cid != VMADDR_CID_ANY && remote_port != VMADDR_PORT_ANY); | |
110 | ||
111 | struct vsockpcb *pcb_match = NULL; | |
112 | ||
113 | socket_unlock(pcb->so, 0); | |
114 | lck_rw_lock_exclusive(vsockinfo.bound_lock); | |
115 | LIST_FOREACH(pcb_match, &vsockinfo.bound, bound) { | |
116 | socket_lock(pcb_match->so, 1); | |
117 | if (pcb == pcb_match || | |
118 | (!check_remote && pcb_match->local_address.port == local_port) || | |
119 | (check_remote && pcb_match->local_address.port == local_port && | |
120 | pcb_match->remote_address.cid == remote_cid && pcb_match->remote_address.port == remote_port)) { | |
121 | socket_unlock(pcb_match->so, 1); | |
122 | taken = true; | |
123 | break; | |
124 | } | |
125 | socket_unlock(pcb_match->so, 1); | |
126 | } | |
127 | socket_lock(pcb->so, 0); | |
128 | if (!taken) { | |
129 | pcb->local_address = (struct vsock_address) { .cid = local_cid, .port = local_port }; | |
130 | pcb->remote_address = (struct vsock_address) { .cid = remote_cid, .port = remote_port }; | |
131 | LIST_INSERT_HEAD(&vsockinfo.bound, pcb, bound); | |
132 | } | |
133 | lck_rw_done(vsockinfo.bound_lock); | |
134 | ||
135 | return taken ? EADDRINUSE : 0; | |
136 | } | |
137 | ||
138 | static errno_t | |
139 | vsock_bind_address(struct vsockpcb *pcb, struct vsock_address laddr, struct vsock_address raddr) | |
140 | { | |
141 | if (!pcb) { | |
142 | return EINVAL; | |
143 | } | |
144 | ||
145 | socket_lock_assert_owned(pcb->so); | |
146 | ||
147 | // Certain CIDs are reserved. | |
148 | if (laddr.cid == VMADDR_CID_HYPERVISOR || laddr.cid == VMADDR_CID_RESERVED || laddr.cid == VMADDR_CID_HOST) { | |
149 | return EADDRNOTAVAIL; | |
150 | } | |
151 | ||
152 | // Remote address must be fully specified or not specified at all. | |
153 | if ((raddr.cid == VMADDR_CID_ANY) ^ (raddr.port == VMADDR_PORT_ANY)) { | |
154 | return EINVAL; | |
155 | } | |
156 | ||
157 | // Cannot bind if already bound. | |
158 | if (pcb->local_address.port != VMADDR_PORT_ANY) { | |
159 | return EINVAL; | |
160 | } | |
161 | ||
162 | uint32_t transport_cid; | |
163 | struct vsock_transport *transport = pcb->transport; | |
164 | errno_t error = transport->get_cid(transport->provider, &transport_cid); | |
165 | if (error) { | |
166 | return error; | |
167 | } | |
168 | ||
169 | // Local CID must be this transport's CID or any. | |
170 | if (laddr.cid != transport_cid && laddr.cid != VMADDR_CID_ANY) { | |
171 | return EINVAL; | |
172 | } | |
173 | ||
174 | if (laddr.port != VMADDR_PORT_ANY) { | |
175 | error = vsock_bind_address_if_free(pcb, laddr.cid, laddr.port, raddr.cid, raddr.port); | |
176 | } else { | |
177 | lck_mtx_lock(&vsockinfo.port_lock); | |
178 | ||
179 | const uint32_t first = VSOCK_PORT_RESERVED; | |
180 | const uint32_t last = VMADDR_PORT_ANY - 1; | |
181 | uint32_t count = last - first + 1; | |
182 | uint32_t *last_port = &vsockinfo.last_port; | |
183 | ||
184 | if (pcb->so->so_flags & SOF_BINDRANDOMPORT) { | |
185 | uint32_t random = 0; | |
186 | read_frandom(&random, sizeof(random)); | |
187 | *last_port = first + (random % count); | |
188 | } | |
189 | ||
190 | do { | |
191 | if (count == 0) { | |
192 | lck_mtx_unlock(&vsockinfo.port_lock); | |
193 | return EADDRNOTAVAIL; | |
194 | } | |
195 | count--; | |
196 | ||
197 | ++*last_port; | |
198 | if (*last_port < first || *last_port > last) { | |
199 | *last_port = first; | |
200 | } | |
201 | ||
202 | error = vsock_bind_address_if_free(pcb, laddr.cid, *last_port, raddr.cid, raddr.port); | |
203 | } while (error); | |
204 | ||
205 | lck_mtx_unlock(&vsockinfo.port_lock); | |
206 | } | |
207 | ||
208 | return error; | |
209 | } | |
210 | ||
211 | static void | |
212 | vsock_unbind_pcb(struct vsockpcb *pcb, bool is_locked) | |
213 | { | |
214 | if (!pcb) { | |
215 | return; | |
216 | } | |
217 | ||
218 | socket_lock_assert_owned(pcb->so); | |
219 | ||
220 | soisdisconnected(pcb->so); | |
221 | ||
222 | if (!pcb->bound.le_prev) { | |
223 | return; | |
224 | } | |
225 | ||
226 | if (!is_locked) { | |
227 | socket_unlock(pcb->so, 0); | |
228 | lck_rw_lock_exclusive(vsockinfo.bound_lock); | |
229 | socket_lock(pcb->so, 0); | |
230 | if (!pcb->bound.le_prev) { | |
231 | lck_rw_done(vsockinfo.bound_lock); | |
232 | return; | |
233 | } | |
234 | } | |
235 | ||
236 | LIST_REMOVE(pcb, bound); | |
237 | pcb->bound.le_next = NULL; | |
238 | pcb->bound.le_prev = NULL; | |
239 | ||
240 | if (!is_locked) { | |
241 | lck_rw_done(vsockinfo.bound_lock); | |
242 | } | |
243 | } | |
244 | ||
245 | static struct sockaddr * | |
246 | vsock_new_sockaddr(struct vsock_address *address) | |
247 | { | |
248 | if (!address) { | |
249 | return NULL; | |
250 | } | |
251 | ||
252 | struct sockaddr_vm *addr; | |
253 | MALLOC(addr, struct sockaddr_vm *, sizeof(*addr), M_SONAME, M_WAITOK); | |
254 | if (!addr) { | |
255 | return NULL; | |
256 | } | |
257 | ||
258 | bzero(addr, sizeof(*addr)); | |
259 | addr->svm_len = sizeof(*addr); | |
260 | addr->svm_family = AF_VSOCK; | |
261 | addr->svm_port = address->port; | |
262 | addr->svm_cid = address->cid; | |
263 | ||
264 | return (struct sockaddr *)addr; | |
265 | } | |
266 | ||
267 | static errno_t | |
268 | vsock_pcb_send_message(struct vsockpcb *pcb, enum vsock_operation operation, mbuf_t m) | |
269 | { | |
270 | if (!pcb) { | |
271 | if (m != NULL) { | |
272 | mbuf_freem_list(m); | |
273 | } | |
274 | return EINVAL; | |
275 | } | |
276 | ||
277 | socket_lock_assert_owned(pcb->so); | |
278 | ||
279 | errno_t error; | |
280 | ||
281 | struct vsock_address dst = pcb->remote_address; | |
282 | if (dst.cid == VMADDR_CID_ANY || dst.port == VMADDR_PORT_ANY) { | |
283 | if (m != NULL) { | |
284 | mbuf_freem_list(m); | |
285 | } | |
286 | return EINVAL; | |
287 | } | |
288 | ||
289 | struct vsock_address src = pcb->local_address; | |
290 | if (src.cid == VMADDR_CID_ANY) { | |
291 | uint32_t transport_cid; | |
292 | struct vsock_transport *transport = pcb->transport; | |
293 | error = transport->get_cid(transport->provider, &transport_cid); | |
294 | if (error) { | |
295 | if (m != NULL) { | |
296 | mbuf_freem_list(m); | |
297 | } | |
298 | return error; | |
299 | } | |
300 | src.cid = transport_cid; | |
301 | } | |
302 | ||
303 | uint32_t buf_alloc = pcb->so->so_rcv.sb_hiwat; | |
304 | uint32_t fwd_cnt = pcb->fwd_cnt; | |
305 | ||
306 | if (src.cid == dst.cid) { | |
307 | pcb->last_buf_alloc = buf_alloc; | |
308 | pcb->last_fwd_cnt = fwd_cnt; | |
309 | ||
310 | socket_unlock(pcb->so, 0); | |
311 | error = vsock_put_message(src, dst, operation, buf_alloc, fwd_cnt, m); | |
312 | socket_lock(pcb->so, 0); | |
313 | } else { | |
314 | struct vsock_transport *transport = pcb->transport; | |
315 | error = transport->put_message(transport->provider, src, dst, operation, buf_alloc, fwd_cnt, m); | |
316 | ||
317 | if (!error) { | |
318 | pcb->last_buf_alloc = buf_alloc; | |
319 | pcb->last_fwd_cnt = fwd_cnt; | |
320 | } | |
321 | } | |
322 | ||
323 | return error; | |
324 | } | |
325 | ||
326 | static errno_t | |
327 | vsock_pcb_reset_address(struct vsock_address src, struct vsock_address dst) | |
328 | { | |
329 | if (dst.cid == VMADDR_CID_ANY || dst.port == VMADDR_PORT_ANY) { | |
330 | return EINVAL; | |
331 | } | |
332 | ||
333 | errno_t error; | |
334 | struct vsock_transport *transport = NULL; | |
335 | ||
336 | if (src.cid == VMADDR_CID_ANY) { | |
337 | transport = os_atomic_load(&the_vsock_transport, relaxed); | |
338 | if (transport == NULL) { | |
339 | return ENODEV; | |
340 | } | |
341 | ||
342 | uint32_t transport_cid; | |
343 | error = transport->get_cid(transport->provider, &transport_cid); | |
344 | if (error) { | |
345 | return error; | |
346 | } | |
347 | src.cid = transport_cid; | |
348 | } | |
349 | ||
350 | if (src.cid == dst.cid) { | |
351 | error = vsock_put_message(src, dst, VSOCK_RESET, 0, 0, NULL); | |
352 | } else { | |
353 | if (!transport) { | |
354 | transport = os_atomic_load(&the_vsock_transport, relaxed); | |
355 | if (transport == NULL) { | |
356 | return ENODEV; | |
357 | } | |
358 | } | |
359 | error = transport->put_message(transport->provider, src, dst, VSOCK_RESET, 0, 0, NULL); | |
360 | } | |
361 | ||
362 | return error; | |
363 | } | |
364 | ||
365 | static errno_t | |
366 | vsock_pcb_safe_reset_address(struct vsockpcb *pcb, struct vsock_address src, struct vsock_address dst) | |
367 | { | |
368 | if (pcb) { | |
369 | socket_lock_assert_owned(pcb->so); | |
370 | socket_unlock(pcb->so, 0); | |
371 | } | |
372 | errno_t error = vsock_pcb_reset_address(src, dst); | |
373 | if (pcb) { | |
374 | socket_lock(pcb->so, 0); | |
375 | } | |
376 | return error; | |
377 | } | |
378 | ||
379 | static errno_t | |
380 | vsock_pcb_connect(struct vsockpcb *pcb) | |
381 | { | |
382 | return vsock_pcb_send_message(pcb, VSOCK_REQUEST, NULL); | |
383 | } | |
384 | ||
385 | static errno_t | |
386 | vsock_pcb_respond(struct vsockpcb *pcb) | |
387 | { | |
388 | return vsock_pcb_send_message(pcb, VSOCK_RESPONSE, NULL); | |
389 | } | |
390 | ||
391 | static errno_t | |
392 | vsock_pcb_send(struct vsockpcb *pcb, mbuf_t m) | |
393 | { | |
394 | return vsock_pcb_send_message(pcb, VSOCK_PAYLOAD, m); | |
395 | } | |
396 | ||
397 | static errno_t | |
398 | vsock_pcb_shutdown_send(struct vsockpcb *pcb) | |
399 | { | |
400 | return vsock_pcb_send_message(pcb, VSOCK_SHUTDOWN_SEND, NULL); | |
401 | } | |
402 | ||
403 | static errno_t | |
404 | vsock_pcb_reset(struct vsockpcb *pcb) | |
405 | { | |
406 | return vsock_pcb_send_message(pcb, VSOCK_RESET, NULL); | |
407 | } | |
408 | ||
409 | static errno_t | |
410 | vsock_pcb_credit_update(struct vsockpcb *pcb) | |
411 | { | |
412 | return vsock_pcb_send_message(pcb, VSOCK_CREDIT_UPDATE, NULL); | |
413 | } | |
414 | ||
415 | static errno_t | |
416 | vsock_pcb_credit_request(struct vsockpcb *pcb) | |
417 | { | |
418 | return vsock_pcb_send_message(pcb, VSOCK_CREDIT_REQUEST, NULL); | |
419 | } | |
420 | ||
421 | static errno_t | |
422 | vsock_disconnect_pcb_common(struct vsockpcb *pcb, bool is_locked) | |
423 | { | |
424 | socket_lock_assert_owned(pcb->so); | |
425 | vsock_unbind_pcb(pcb, is_locked); | |
426 | return vsock_pcb_reset(pcb); | |
427 | } | |
428 | ||
429 | static errno_t | |
430 | vsock_disconnect_pcb_locked(struct vsockpcb *pcb) | |
431 | { | |
432 | return vsock_disconnect_pcb_common(pcb, true); | |
433 | } | |
434 | ||
435 | static errno_t | |
436 | vsock_disconnect_pcb(struct vsockpcb *pcb) | |
437 | { | |
438 | return vsock_disconnect_pcb_common(pcb, false); | |
439 | } | |
440 | ||
441 | static errno_t | |
442 | vsock_sockaddr_vm_validate(struct vsockpcb *pcb, struct sockaddr_vm *addr) | |
443 | { | |
444 | if (!pcb || !pcb->so || !addr) { | |
445 | return EINVAL; | |
446 | } | |
447 | ||
448 | // Validate address length. | |
449 | if (addr->svm_len < sizeof(struct sockaddr_vm)) { | |
450 | return EINVAL; | |
451 | } | |
452 | ||
453 | // Validate address family. | |
454 | if (addr->svm_family != AF_UNSPEC && addr->svm_family != AF_VSOCK) { | |
455 | return EAFNOSUPPORT; | |
456 | } | |
457 | ||
458 | // Only stream is supported currently. | |
459 | if (pcb->so->so_type != SOCK_STREAM) { | |
460 | return EAFNOSUPPORT; | |
461 | } | |
462 | ||
463 | return 0; | |
464 | } | |
465 | /* VSock Receive Handlers */ | |
466 | ||
467 | static errno_t | |
468 | vsock_put_message_connected(struct vsockpcb *pcb, enum vsock_operation op, mbuf_t m) | |
469 | { | |
470 | socket_lock_assert_owned(pcb->so); | |
471 | ||
472 | errno_t error = 0; | |
473 | ||
474 | switch (op) { | |
475 | case VSOCK_SHUTDOWN: | |
476 | error = vsock_disconnect_pcb(pcb); | |
477 | break; | |
478 | case VSOCK_SHUTDOWN_RECEIVE: | |
479 | socantsendmore(pcb->so); | |
480 | break; | |
481 | case VSOCK_SHUTDOWN_SEND: | |
482 | socantrcvmore(pcb->so); | |
483 | break; | |
484 | case VSOCK_PAYLOAD: | |
485 | // Add data to the receive queue then wakeup any reading threads. | |
486 | error = !sbappendstream(&pcb->so->so_rcv, m); | |
487 | if (!error) { | |
488 | sorwakeup(pcb->so); | |
489 | } | |
490 | break; | |
491 | case VSOCK_RESET: | |
492 | vsock_unbind_pcb(pcb, false); | |
493 | break; | |
494 | default: | |
495 | error = ENOTSUP; | |
496 | break; | |
497 | } | |
498 | ||
499 | return error; | |
500 | } | |
501 | ||
502 | static errno_t | |
503 | vsock_put_message_connecting(struct vsockpcb *pcb, enum vsock_operation op) | |
504 | { | |
505 | socket_lock_assert_owned(pcb->so); | |
506 | ||
507 | errno_t error = 0; | |
508 | ||
509 | switch (op) { | |
510 | case VSOCK_RESPONSE: | |
511 | soisconnected(pcb->so); | |
512 | break; | |
513 | case VSOCK_RESET: | |
514 | pcb->so->so_error = EAGAIN; | |
515 | error = vsock_disconnect_pcb(pcb); | |
516 | break; | |
517 | default: | |
518 | vsock_disconnect_pcb(pcb); | |
519 | error = ENOTSUP; | |
520 | break; | |
521 | } | |
522 | ||
523 | return error; | |
524 | } | |
525 | ||
526 | static errno_t | |
527 | vsock_put_message_listening(struct vsockpcb *pcb, enum vsock_operation op, struct vsock_address src, struct vsock_address dst) | |
528 | { | |
529 | socket_lock_assert_owned(pcb->so); | |
530 | ||
531 | struct sockaddr_vm addr; | |
532 | struct socket *so2 = NULL; | |
533 | struct vsockpcb *pcb2 = NULL; | |
534 | ||
535 | errno_t error = 0; | |
536 | ||
537 | switch (op) { | |
538 | case VSOCK_REQUEST: | |
539 | addr = (struct sockaddr_vm) { | |
540 | .svm_len = sizeof(addr), | |
541 | .svm_family = AF_VSOCK, | |
542 | .svm_reserved1 = 0, | |
543 | .svm_port = pcb->local_address.port, | |
544 | .svm_cid = pcb->local_address.cid | |
545 | }; | |
546 | so2 = sonewconn(pcb->so, 0, (struct sockaddr *)&addr); | |
547 | if (!so2) { | |
548 | // It is likely that the backlog is full. Deny this request. | |
549 | vsock_pcb_safe_reset_address(pcb, dst, src); | |
550 | error = ECONNREFUSED; | |
551 | break; | |
552 | } | |
553 | ||
554 | pcb2 = sotovsockpcb(so2); | |
555 | if (!pcb2) { | |
556 | error = EINVAL; | |
557 | goto done; | |
558 | } | |
559 | ||
560 | error = vsock_bind_address(pcb2, dst, src); | |
561 | if (error) { | |
562 | goto done; | |
563 | } | |
564 | ||
565 | error = vsock_pcb_respond(pcb2); | |
566 | if (error) { | |
567 | goto done; | |
568 | } | |
569 | ||
570 | soisconnected(so2); | |
571 | ||
572 | done: | |
573 | if (error) { | |
574 | soisdisconnected(so2); | |
575 | if (pcb2) { | |
576 | vsock_unbind_pcb(pcb2, false); | |
577 | } | |
578 | socket_unlock(so2, 1); | |
579 | vsock_pcb_reset_address(dst, src); | |
580 | } else { | |
581 | socket_unlock(so2, 0); | |
582 | } | |
583 | socket_lock(pcb->so, 0); | |
584 | ||
585 | break; | |
586 | case VSOCK_RESET: | |
587 | error = vsock_pcb_safe_reset_address(pcb, dst, src); | |
588 | break; | |
589 | default: | |
590 | vsock_pcb_safe_reset_address(pcb, dst, src); | |
591 | error = ENOTSUP; | |
592 | break; | |
593 | } | |
594 | ||
595 | return error; | |
596 | } | |
597 | ||
598 | /* VSock Transport */ | |
599 | ||
600 | errno_t | |
601 | vsock_add_transport(struct vsock_transport *transport) | |
602 | { | |
603 | if (transport == NULL || transport->provider == NULL) { | |
604 | return EINVAL; | |
605 | } | |
606 | if (!os_atomic_cmpxchg((void * volatile *)&the_vsock_transport, NULL, transport, acq_rel)) { | |
607 | return EEXIST; | |
608 | } | |
609 | return 0; | |
610 | } | |
611 | ||
612 | errno_t | |
613 | vsock_remove_transport(struct vsock_transport *transport) | |
614 | { | |
615 | if (!os_atomic_cmpxchg((void * volatile *)&the_vsock_transport, transport, NULL, acq_rel)) { | |
616 | return ENODEV; | |
617 | } | |
618 | return 0; | |
619 | } | |
620 | ||
621 | errno_t | |
622 | vsock_reset_transport(struct vsock_transport *transport) | |
623 | { | |
624 | if (transport == NULL) { | |
625 | return EINVAL; | |
626 | } | |
627 | ||
628 | errno_t error = 0; | |
629 | struct vsockpcb *pcb = NULL; | |
630 | struct vsockpcb *tmp_pcb = NULL; | |
631 | ||
632 | lck_rw_lock_exclusive(vsockinfo.bound_lock); | |
633 | LIST_FOREACH_SAFE(pcb, &vsockinfo.bound, bound, tmp_pcb) { | |
634 | // Disconnect this transport's sockets. Listen and bind sockets must stay alive. | |
635 | socket_lock(pcb->so, 1); | |
636 | if (pcb->transport == transport && pcb->so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) { | |
637 | errno_t dc_error = vsock_disconnect_pcb_locked(pcb); | |
638 | if (dc_error && !error) { | |
639 | error = dc_error; | |
640 | } | |
641 | } | |
642 | socket_unlock(pcb->so, 1); | |
643 | } | |
644 | lck_rw_done(vsockinfo.bound_lock); | |
645 | ||
646 | return error; | |
647 | } | |
648 | ||
649 | errno_t | |
650 | vsock_put_message(struct vsock_address src, struct vsock_address dst, enum vsock_operation op, uint32_t buf_alloc, uint32_t fwd_cnt, mbuf_t m) | |
651 | { | |
652 | struct vsockpcb *pcb = vsock_get_matching_pcb(dst, src); | |
653 | if (!pcb) { | |
654 | if (op != VSOCK_RESET) { | |
655 | vsock_pcb_reset_address(dst, src); | |
656 | } | |
657 | if (m != NULL) { | |
658 | mbuf_freem_list(m); | |
659 | } | |
660 | return EINVAL; | |
661 | } | |
662 | ||
663 | socket_lock_assert_owned(pcb->so); | |
664 | ||
665 | struct socket *so = pcb->so; | |
666 | errno_t error = 0; | |
667 | ||
668 | // Check if the peer's buffer has changed. Update our view of the peer's forwarded bytes. | |
669 | int buffers_changed = (pcb->peer_buf_alloc != buf_alloc) || (pcb->peer_fwd_cnt) != fwd_cnt; | |
670 | pcb->peer_buf_alloc = buf_alloc; | |
671 | pcb->peer_fwd_cnt = fwd_cnt; | |
672 | ||
673 | // Peer's buffer has enough space for the next packet. Notify any threads waiting for space. | |
674 | if (buffers_changed && vsock_get_peer_space(pcb) >= pcb->waiting_send_size) { | |
675 | sowwakeup(so); | |
676 | } | |
677 | ||
678 | switch (op) { | |
679 | case VSOCK_CREDIT_REQUEST: | |
680 | error = vsock_pcb_credit_update(pcb); | |
681 | break; | |
682 | case VSOCK_CREDIT_UPDATE: | |
683 | break; | |
684 | default: | |
685 | if (so->so_state & SS_ISCONNECTED) { | |
686 | error = vsock_put_message_connected(pcb, op, m); | |
687 | m = NULL; | |
688 | } else if (so->so_state & SS_ISCONNECTING) { | |
689 | error = vsock_put_message_connecting(pcb, op); | |
690 | } else if (so->so_options & SO_ACCEPTCONN) { | |
691 | error = vsock_put_message_listening(pcb, op, src, dst); | |
692 | } else { | |
693 | // Reset the connection for other states such as 'disconnecting'. | |
694 | error = vsock_disconnect_pcb(pcb); | |
695 | if (!error) { | |
696 | error = ENODEV; | |
697 | } | |
698 | } | |
699 | break; | |
700 | } | |
701 | socket_unlock(so, 1); | |
702 | ||
703 | if (m != NULL) { | |
704 | mbuf_freem_list(m); | |
705 | } | |
706 | ||
707 | return error; | |
708 | } | |
709 | ||
710 | /* VSock Sysctl */ | |
711 | ||
712 | static int | |
713 | vsock_pcblist SYSCTL_HANDLER_ARGS | |
714 | { | |
715 | #pragma unused(oidp,arg2) | |
716 | ||
717 | int error; | |
718 | ||
719 | // Only stream is supported. | |
720 | if ((intptr_t)arg1 != SOCK_STREAM) { | |
721 | return EINVAL; | |
722 | } | |
723 | ||
724 | // Get the generation count and the count of all vsock sockets. | |
725 | lck_rw_lock_shared(vsockinfo.all_lock); | |
726 | uint64_t n = vsockinfo.all_pcb_count; | |
727 | vsock_gen_t gen_count = vsockinfo.vsock_gencnt; | |
728 | lck_rw_done(vsockinfo.all_lock); | |
729 | ||
730 | const size_t xpcb_len = sizeof(struct xvsockpcb); | |
731 | struct xvsockpgen xvg; | |
732 | ||
733 | /* | |
734 | * The process of preparing the PCB list is too time-consuming and | |
735 | * resource-intensive to repeat twice on every request. | |
736 | */ | |
737 | if (req->oldptr == USER_ADDR_NULL) { | |
738 | req->oldidx = (size_t)(2 * sizeof(xvg) + (n + n / 8) * xpcb_len); | |
739 | return 0; | |
740 | } | |
741 | ||
742 | if (req->newptr != USER_ADDR_NULL) { | |
743 | return EPERM; | |
744 | } | |
745 | ||
746 | bzero(&xvg, sizeof(xvg)); | |
747 | xvg.xvg_len = sizeof(xvg); | |
748 | xvg.xvg_count = n; | |
749 | xvg.xvg_gen = gen_count; | |
750 | xvg.xvg_sogen = so_gencnt; | |
751 | error = SYSCTL_OUT(req, &xvg, sizeof(xvg)); | |
752 | if (error) { | |
753 | return error; | |
754 | } | |
755 | ||
756 | // Return if no sockets exist. | |
757 | if (n == 0) { | |
758 | return 0; | |
759 | } | |
760 | ||
761 | lck_rw_lock_shared(vsockinfo.all_lock); | |
762 | ||
763 | n = 0; | |
764 | struct vsockpcb *pcb = NULL; | |
765 | TAILQ_FOREACH(pcb, &vsockinfo.all, all) { | |
766 | // Bail if there is not enough user buffer for this next socket. | |
767 | if (req->oldlen - req->oldidx - sizeof(xvg) < xpcb_len) { | |
768 | break; | |
769 | } | |
770 | ||
771 | // Populate the socket structure. | |
772 | socket_lock(pcb->so, 1); | |
773 | if (pcb->vsock_gencnt <= gen_count) { | |
774 | struct xvsockpcb xpcb; | |
775 | bzero(&xpcb, xpcb_len); | |
776 | xpcb.xv_len = xpcb_len; | |
777 | xpcb.xv_vsockpp = (uint64_t)VM_KERNEL_ADDRHASH(pcb); | |
778 | xpcb.xvp_local_cid = pcb->local_address.cid; | |
779 | xpcb.xvp_local_port = pcb->local_address.port; | |
780 | xpcb.xvp_remote_cid = pcb->remote_address.cid; | |
781 | xpcb.xvp_remote_port = pcb->remote_address.port; | |
782 | xpcb.xvp_rxcnt = pcb->fwd_cnt; | |
783 | xpcb.xvp_txcnt = pcb->tx_cnt; | |
784 | xpcb.xvp_peer_rxhiwat = pcb->peer_buf_alloc; | |
785 | xpcb.xvp_peer_rxcnt = pcb->peer_fwd_cnt; | |
786 | xpcb.xvp_last_pid = pcb->so->last_pid; | |
787 | xpcb.xvp_gencnt = pcb->vsock_gencnt; | |
788 | if (pcb->so) { | |
789 | sotoxsocket(pcb->so, &xpcb.xv_socket); | |
790 | } | |
791 | socket_unlock(pcb->so, 1); | |
792 | ||
793 | error = SYSCTL_OUT(req, &xpcb, xpcb_len); | |
794 | if (error != 0) { | |
795 | break; | |
796 | } | |
797 | n++; | |
798 | } else { | |
799 | socket_unlock(pcb->so, 1); | |
800 | } | |
801 | } | |
802 | ||
803 | // Update the generation count to match the sockets being returned. | |
804 | gen_count = vsockinfo.vsock_gencnt; | |
805 | ||
806 | lck_rw_done(vsockinfo.all_lock); | |
807 | ||
808 | if (!error) { | |
809 | /* | |
810 | * Give the user an updated idea of our state. | |
811 | * If the generation differs from what we told | |
812 | * her before, she knows that something happened | |
813 | * while we were processing this request, and it | |
814 | * might be necessary to retry. | |
815 | */ | |
816 | bzero(&xvg, sizeof(xvg)); | |
817 | xvg.xvg_len = sizeof(xvg); | |
818 | xvg.xvg_count = n; | |
819 | xvg.xvg_gen = gen_count; | |
820 | xvg.xvg_sogen = so_gencnt; | |
821 | error = SYSCTL_OUT(req, &xvg, sizeof(xvg)); | |
822 | } | |
823 | ||
824 | return error; | |
825 | } | |
826 | ||
827 | #ifdef SYSCTL_DECL | |
828 | SYSCTL_NODE(_net, OID_AUTO, vsock, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "vsock"); | |
829 | SYSCTL_UINT(_net_vsock, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED, | |
830 | &vsock_sendspace, 0, "Maximum outgoing vsock datagram size"); | |
831 | SYSCTL_UINT(_net_vsock, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED, | |
832 | &vsock_recvspace, 0, "Maximum incoming vsock datagram size"); | |
833 | SYSCTL_PROC(_net_vsock, OID_AUTO, pcblist, | |
834 | CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, | |
835 | (caddr_t)(long)SOCK_STREAM, 0, vsock_pcblist, "S,xvsockpcb", | |
836 | "List of active vsock sockets"); | |
837 | #endif | |
838 | ||
839 | /* VSock Protocol */ | |
840 | ||
841 | static int | |
842 | vsock_attach(struct socket *so, int proto, struct proc *p) | |
843 | { | |
844 | #pragma unused(proto, p) | |
845 | ||
846 | // Attach should only be run once per socket. | |
847 | struct vsockpcb *pcb = sotovsockpcb(so); | |
848 | if (pcb) { | |
849 | return EINVAL; | |
850 | } | |
851 | ||
852 | // Get the transport for this socket. | |
853 | struct vsock_transport *transport = os_atomic_load(&the_vsock_transport, relaxed); | |
854 | if (transport == NULL) { | |
855 | return ENODEV; | |
856 | } | |
857 | ||
858 | // Reserve send and receive buffers. | |
859 | errno_t error = soreserve(so, vsock_sendspace, vsock_recvspace); | |
860 | if (error) { | |
861 | return error; | |
862 | } | |
863 | ||
864 | // Initialize the vsock protocol control block. | |
865 | pcb = zalloc(vsockpcb_zone); | |
866 | if (pcb == NULL) { | |
867 | return ENOBUFS; | |
868 | } | |
869 | bzero(pcb, sizeof(*pcb)); | |
870 | pcb->so = so; | |
871 | pcb->transport = transport; | |
872 | pcb->local_address = (struct vsock_address) { | |
873 | .cid = VMADDR_CID_ANY, | |
874 | .port = VMADDR_PORT_ANY | |
875 | }; | |
876 | pcb->remote_address = (struct vsock_address) { | |
877 | .cid = VMADDR_CID_ANY, | |
878 | .port = VMADDR_PORT_ANY | |
879 | }; | |
880 | so->so_pcb = pcb; | |
881 | ||
882 | // Tell the transport that this socket has attached. | |
883 | error = transport->attach_socket(transport->provider); | |
884 | if (error) { | |
885 | return error; | |
886 | } | |
887 | ||
888 | // Add to the list of all vsock sockets. | |
889 | lck_rw_lock_exclusive(vsockinfo.all_lock); | |
890 | TAILQ_INSERT_TAIL(&vsockinfo.all, pcb, all); | |
891 | vsockinfo.all_pcb_count++; | |
892 | pcb->vsock_gencnt = ++vsockinfo.vsock_gencnt; | |
893 | lck_rw_done(vsockinfo.all_lock); | |
894 | ||
895 | return 0; | |
896 | } | |
897 | ||
898 | static int | |
899 | vsock_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p) | |
900 | { | |
901 | #pragma unused(ifp) | |
902 | ||
903 | VERIFY(so != NULL || p == kernproc); | |
904 | ||
905 | if (cmd != IOCTL_VM_SOCKETS_GET_LOCAL_CID) { | |
906 | return EINVAL; | |
907 | } | |
908 | ||
909 | struct vsock_transport *transport; | |
910 | if (so) { | |
911 | struct vsockpcb *pcb = sotovsockpcb(so); | |
912 | if (pcb == NULL) { | |
913 | return EINVAL; | |
914 | } | |
915 | transport = pcb->transport; | |
916 | } else { | |
917 | transport = os_atomic_load(&the_vsock_transport, relaxed); | |
918 | } | |
919 | ||
920 | if (transport == NULL) { | |
921 | return ENODEV; | |
922 | } | |
923 | ||
924 | uint32_t transport_cid; | |
925 | errno_t error = transport->get_cid(transport->provider, &transport_cid); | |
926 | if (error) { | |
927 | return error; | |
928 | } | |
929 | ||
930 | memcpy(data, &transport_cid, sizeof(transport_cid)); | |
931 | ||
932 | return 0; | |
933 | } | |
934 | ||
935 | static int | |
936 | vsock_detach(struct socket *so) | |
937 | { | |
938 | struct vsockpcb *pcb = sotovsockpcb(so); | |
939 | if (pcb == NULL) { | |
940 | return EINVAL; | |
941 | } | |
942 | ||
943 | vsock_unbind_pcb(pcb, false); | |
944 | ||
945 | // Tell the transport that this socket has detached. | |
946 | struct vsock_transport *transport = pcb->transport; | |
947 | errno_t error = transport->detach_socket(transport->provider); | |
948 | if (error) { | |
949 | return error; | |
950 | } | |
951 | ||
952 | // Remove from the list of all vsock sockets. | |
953 | lck_rw_lock_exclusive(vsockinfo.all_lock); | |
954 | TAILQ_REMOVE(&vsockinfo.all, pcb, all); | |
955 | pcb->all.tqe_next = NULL; | |
956 | pcb->all.tqe_prev = NULL; | |
957 | vsockinfo.all_pcb_count--; | |
958 | vsockinfo.vsock_gencnt++; | |
959 | lck_rw_done(vsockinfo.all_lock); | |
960 | ||
961 | // Deallocate any resources. | |
962 | zfree(vsockpcb_zone, pcb); | |
963 | so->so_pcb = 0; | |
964 | so->so_flags |= SOF_PCBCLEARING; | |
965 | sofree(so); | |
966 | ||
967 | return 0; | |
968 | } | |
969 | ||
970 | static int | |
971 | vsock_abort(struct socket *so) | |
972 | { | |
973 | soisdisconnected(so); | |
974 | return vsock_detach(so); | |
975 | } | |
976 | ||
977 | static int | |
978 | vsock_bind(struct socket *so, struct sockaddr *nam, struct proc *p) | |
979 | { | |
980 | #pragma unused(p) | |
981 | ||
982 | struct vsockpcb *pcb = sotovsockpcb(so); | |
983 | if (pcb == NULL) { | |
984 | return EINVAL; | |
985 | } | |
986 | ||
987 | struct sockaddr_vm *addr = (struct sockaddr_vm *)nam; | |
988 | ||
989 | errno_t error = vsock_sockaddr_vm_validate(pcb, addr); | |
990 | if (error) { | |
991 | return error; | |
992 | } | |
993 | ||
994 | struct vsock_address laddr = (struct vsock_address) { | |
995 | .cid = addr->svm_cid, | |
996 | .port = addr->svm_port, | |
997 | }; | |
998 | ||
999 | struct vsock_address raddr = (struct vsock_address) { | |
1000 | .cid = VMADDR_CID_ANY, | |
1001 | .port = VMADDR_PORT_ANY, | |
1002 | }; | |
1003 | ||
1004 | error = vsock_bind_address(pcb, laddr, raddr); | |
1005 | if (error) { | |
1006 | return error; | |
1007 | } | |
1008 | ||
1009 | return 0; | |
1010 | } | |
1011 | ||
1012 | static int | |
1013 | vsock_listen(struct socket *so, struct proc *p) | |
1014 | { | |
1015 | #pragma unused(p) | |
1016 | ||
1017 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1018 | if (pcb == NULL) { | |
1019 | return EINVAL; | |
1020 | } | |
1021 | ||
1022 | // Only stream is supported currently. | |
1023 | if (so->so_type != SOCK_STREAM) { | |
1024 | return EAFNOSUPPORT; | |
1025 | } | |
1026 | ||
1027 | struct vsock_address *addr = &pcb->local_address; | |
1028 | ||
1029 | if (addr->port == VMADDR_CID_ANY) { | |
1030 | return EFAULT; | |
1031 | } | |
1032 | ||
1033 | struct vsock_transport *transport = pcb->transport; | |
1034 | uint32_t transport_cid; | |
1035 | errno_t error = transport->get_cid(transport->provider, &transport_cid); | |
1036 | if (error) { | |
1037 | return error; | |
1038 | } | |
1039 | ||
1040 | // Can listen on the transport's cid or any. | |
1041 | if (addr->cid != transport_cid && addr->cid != VMADDR_CID_ANY) { | |
1042 | return EFAULT; | |
1043 | } | |
1044 | ||
1045 | return 0; | |
1046 | } | |
1047 | ||
1048 | static int | |
1049 | vsock_accept(struct socket *so, struct sockaddr **nam) | |
1050 | { | |
1051 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1052 | if (pcb == NULL) { | |
1053 | return EINVAL; | |
1054 | } | |
1055 | ||
1056 | // Do not accept disconnected sockets. | |
1057 | if (so->so_state & SS_ISDISCONNECTED) { | |
1058 | return ECONNABORTED; | |
1059 | } | |
1060 | ||
1061 | *nam = vsock_new_sockaddr(&pcb->remote_address); | |
1062 | ||
1063 | return 0; | |
1064 | } | |
1065 | ||
1066 | static int | |
1067 | vsock_connect(struct socket *so, struct sockaddr *nam, struct proc *p) | |
1068 | { | |
1069 | #pragma unused(p) | |
1070 | ||
1071 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1072 | if (pcb == NULL) { | |
1073 | return EINVAL; | |
1074 | } | |
1075 | ||
1076 | struct sockaddr_vm *addr = (struct sockaddr_vm *)nam; | |
1077 | ||
1078 | errno_t error = vsock_sockaddr_vm_validate(pcb, addr); | |
1079 | if (error) { | |
1080 | return error; | |
1081 | } | |
1082 | ||
1083 | uint32_t transport_cid; | |
1084 | struct vsock_transport *transport = pcb->transport; | |
1085 | error = transport->get_cid(transport->provider, &transport_cid); | |
1086 | if (error) { | |
1087 | return error; | |
1088 | } | |
1089 | ||
1090 | // Only supporting connections to the host, hypervisor, or self for now. | |
1091 | if (addr->svm_cid != VMADDR_CID_HOST && | |
1092 | addr->svm_cid != VMADDR_CID_HYPERVISOR && | |
1093 | addr->svm_cid != transport_cid) { | |
1094 | return EFAULT; | |
1095 | } | |
1096 | ||
1097 | soisconnecting(so); | |
1098 | ||
1099 | // Set the remote and local address. | |
1100 | struct vsock_address remote_addr = (struct vsock_address) { | |
1101 | .cid = addr->svm_cid, | |
1102 | .port = addr->svm_port, | |
1103 | }; | |
1104 | ||
1105 | struct vsock_address local_addr = (struct vsock_address) { | |
1106 | .cid = transport_cid, | |
1107 | .port = VMADDR_PORT_ANY, | |
1108 | }; | |
1109 | ||
1110 | // Bind to the address. | |
1111 | error = vsock_bind_address(pcb, local_addr, remote_addr); | |
1112 | if (error) { | |
1113 | goto cleanup; | |
1114 | } | |
1115 | ||
1116 | // Attempt a connection using the socket's transport. | |
1117 | error = vsock_pcb_connect(pcb); | |
1118 | if (error) { | |
1119 | goto cleanup; | |
1120 | } | |
1121 | ||
1122 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
1123 | // Don't wait for peer's response if non-blocking. | |
1124 | if (so->so_state & SS_NBIO) { | |
1125 | error = EINPROGRESS; | |
1126 | goto done; | |
1127 | } | |
1128 | ||
1129 | struct timespec ts = (struct timespec) { | |
1130 | .tv_sec = so->so_snd.sb_timeo.tv_sec, | |
1131 | .tv_nsec = so->so_snd.sb_timeo.tv_usec * 1000, | |
1132 | }; | |
1133 | ||
1134 | lck_mtx_t *mutex_held; | |
1135 | if (so->so_proto->pr_getlock != NULL) { | |
1136 | mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK); | |
1137 | } else { | |
1138 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
1139 | } | |
1140 | ||
1141 | // Wait until we receive a response to the connect request. | |
1142 | error = msleep((caddr_t)&so->so_timeo, mutex_held, PSOCK | PCATCH, "vsock_connect", &ts); | |
1143 | if (error) { | |
1144 | if (error == EAGAIN) { | |
1145 | error = ETIMEDOUT; | |
1146 | } | |
1147 | goto cleanup; | |
1148 | } | |
1149 | } | |
1150 | ||
1151 | cleanup: | |
1152 | if (so->so_error && !error) { | |
1153 | error = so->so_error; | |
1154 | so->so_error = 0; | |
1155 | } | |
1156 | if (!error) { | |
1157 | error = !(so->so_state & SS_ISCONNECTED); | |
1158 | } | |
1159 | if (error) { | |
1160 | vsock_unbind_pcb(pcb, false); | |
1161 | } | |
1162 | ||
1163 | done: | |
1164 | return error; | |
1165 | } | |
1166 | ||
1167 | static int | |
1168 | vsock_disconnect(struct socket *so) | |
1169 | { | |
1170 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1171 | if (pcb == NULL) { | |
1172 | return EINVAL; | |
1173 | } | |
1174 | ||
1175 | return vsock_disconnect_pcb(pcb); | |
1176 | } | |
1177 | ||
1178 | static int | |
1179 | vsock_sockaddr(struct socket *so, struct sockaddr **nam) | |
1180 | { | |
1181 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1182 | if (pcb == NULL) { | |
1183 | return EINVAL; | |
1184 | } | |
1185 | ||
1186 | *nam = vsock_new_sockaddr(&pcb->local_address); | |
1187 | ||
1188 | return 0; | |
1189 | } | |
1190 | ||
1191 | static int | |
1192 | vsock_peeraddr(struct socket *so, struct sockaddr **nam) | |
1193 | { | |
1194 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1195 | if (pcb == NULL) { | |
1196 | return EINVAL; | |
1197 | } | |
1198 | ||
1199 | *nam = vsock_new_sockaddr(&pcb->remote_address); | |
1200 | ||
1201 | return 0; | |
1202 | } | |
1203 | ||
1204 | static int | |
1205 | vsock_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, proc_t p) | |
1206 | { | |
1207 | #pragma unused(flags, nam, p) | |
1208 | ||
1209 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1210 | if (pcb == NULL || m == NULL) { | |
1211 | return EINVAL; | |
1212 | } | |
1213 | ||
1214 | if (control != NULL) { | |
1215 | m_freem(control); | |
1216 | return EOPNOTSUPP; | |
1217 | } | |
1218 | ||
1219 | // Ensure this socket is connected. | |
1220 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
1221 | if (m != NULL) { | |
1222 | mbuf_freem_list(m); | |
1223 | } | |
1224 | return EPERM; | |
1225 | } | |
1226 | ||
1227 | errno_t error; | |
1228 | ||
1229 | const size_t len = mbuf_pkthdr_len(m); | |
1230 | uint32_t free_space = vsock_get_peer_space(pcb); | |
1231 | ||
1232 | // Ensure the peer has enough space in their receive buffer. | |
1233 | while (len > free_space) { | |
1234 | // Record the number of free peer bytes necessary before we can send. | |
1235 | if (len > pcb->waiting_send_size) { | |
1236 | pcb->waiting_send_size = len; | |
1237 | } | |
1238 | ||
1239 | // Send a credit request. | |
1240 | error = vsock_pcb_credit_request(pcb); | |
1241 | if (error) { | |
1242 | if (m != NULL) { | |
1243 | mbuf_freem_list(m); | |
1244 | } | |
1245 | return error; | |
1246 | } | |
1247 | ||
1248 | // Check again in case free space was automatically updated in loopback case. | |
1249 | free_space = vsock_get_peer_space(pcb); | |
1250 | if (len <= free_space) { | |
1251 | pcb->waiting_send_size = 0; | |
1252 | break; | |
1253 | } | |
1254 | ||
1255 | // Bail if this is a non-blocking socket. | |
1256 | if (so->so_state & SS_NBIO) { | |
1257 | if (m != NULL) { | |
1258 | mbuf_freem_list(m); | |
1259 | } | |
1260 | return EWOULDBLOCK; | |
1261 | } | |
1262 | ||
1263 | // Wait until our peer has enough free space in their receive buffer. | |
1264 | error = sbwait(&so->so_snd); | |
1265 | pcb->waiting_send_size = 0; | |
1266 | if (error) { | |
1267 | if (m != NULL) { | |
1268 | mbuf_freem_list(m); | |
1269 | } | |
1270 | return error; | |
1271 | } | |
1272 | ||
1273 | // Bail if an error occured or we can't send more. | |
1274 | if (so->so_state & SS_CANTSENDMORE) { | |
1275 | if (m != NULL) { | |
1276 | mbuf_freem_list(m); | |
1277 | } | |
1278 | return EPIPE; | |
1279 | } else if (so->so_error) { | |
1280 | error = so->so_error; | |
1281 | so->so_error = 0; | |
1282 | if (m != NULL) { | |
1283 | mbuf_freem_list(m); | |
1284 | } | |
1285 | return error; | |
1286 | } | |
1287 | ||
1288 | free_space = vsock_get_peer_space(pcb); | |
1289 | } | |
1290 | ||
1291 | // Send a payload over the transport. | |
1292 | error = vsock_pcb_send(pcb, m); | |
1293 | if (error) { | |
1294 | return error; | |
1295 | } | |
1296 | ||
1297 | pcb->tx_cnt += len; | |
1298 | ||
1299 | return 0; | |
1300 | } | |
1301 | ||
1302 | static int | |
1303 | vsock_shutdown(struct socket *so) | |
1304 | { | |
1305 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1306 | if (pcb == NULL) { | |
1307 | return EINVAL; | |
1308 | } | |
1309 | ||
1310 | socantsendmore(so); | |
1311 | ||
1312 | // Tell peer we will no longer send. | |
1313 | errno_t error = vsock_pcb_shutdown_send(pcb); | |
1314 | if (error) { | |
1315 | return error; | |
1316 | } | |
1317 | ||
1318 | return 0; | |
1319 | } | |
1320 | ||
1321 | static int | |
1322 | vsock_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, | |
1323 | struct mbuf **mp0, struct mbuf **controlp, int *flagsp) | |
1324 | { | |
1325 | struct vsockpcb *pcb = sotovsockpcb(so); | |
1326 | if (pcb == NULL) { | |
1327 | return EINVAL; | |
1328 | } | |
1329 | ||
1330 | user_ssize_t length = uio_resid(uio); | |
1331 | int result = soreceive(so, psa, uio, mp0, controlp, flagsp); | |
1332 | length -= uio_resid(uio); | |
1333 | ||
1334 | socket_lock(so, 1); | |
1335 | ||
1336 | pcb->fwd_cnt += length; | |
1337 | ||
1338 | const uint32_t threshold = VSOCK_MAX_PACKET_SIZE; | |
1339 | ||
1340 | // Send a credit update if is possible that the peer will no longer send. | |
1341 | if ((pcb->fwd_cnt - pcb->last_fwd_cnt + threshold) >= pcb->last_buf_alloc) { | |
1342 | errno_t error = vsock_pcb_credit_update(pcb); | |
1343 | if (!result && error) { | |
1344 | result = error; | |
1345 | } | |
1346 | } | |
1347 | ||
1348 | socket_unlock(so, 1); | |
1349 | ||
1350 | return result; | |
1351 | } | |
1352 | ||
1353 | static struct pr_usrreqs vsock_usrreqs = { | |
1354 | .pru_abort = vsock_abort, | |
1355 | .pru_attach = vsock_attach, | |
1356 | .pru_control = vsock_control, | |
1357 | .pru_detach = vsock_detach, | |
1358 | .pru_bind = vsock_bind, | |
1359 | .pru_listen = vsock_listen, | |
1360 | .pru_accept = vsock_accept, | |
1361 | .pru_connect = vsock_connect, | |
1362 | .pru_disconnect = vsock_disconnect, | |
1363 | .pru_send = vsock_send, | |
1364 | .pru_shutdown = vsock_shutdown, | |
1365 | .pru_sockaddr = vsock_sockaddr, | |
1366 | .pru_peeraddr = vsock_peeraddr, | |
1367 | .pru_sosend = sosend, | |
1368 | .pru_soreceive = vsock_soreceive, | |
1369 | }; | |
1370 | ||
1371 | static void | |
1372 | vsock_init(struct protosw *pp, struct domain *dp) | |
1373 | { | |
1374 | #pragma unused(dp) | |
1375 | ||
1376 | static int vsock_initialized = 0; | |
1377 | VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED); | |
1378 | if (!os_atomic_cmpxchg((volatile int *)&vsock_initialized, 0, 1, acq_rel)) { | |
1379 | return; | |
1380 | } | |
1381 | ||
1382 | // Setup VSock protocol info struct. | |
1383 | vsockinfo.vsock_lock_grp_attr = lck_grp_attr_alloc_init(); | |
1384 | vsockinfo.vsock_lock_grp = lck_grp_alloc_init("vsock", vsockinfo.vsock_lock_grp_attr); | |
1385 | vsockinfo.vsock_lock_attr = lck_attr_alloc_init(); | |
1386 | if ((vsockinfo.all_lock = lck_rw_alloc_init(vsockinfo.vsock_lock_grp, vsockinfo.vsock_lock_attr)) == NULL || | |
1387 | (vsockinfo.bound_lock = lck_rw_alloc_init(vsockinfo.vsock_lock_grp, vsockinfo.vsock_lock_attr)) == NULL) { | |
1388 | panic("%s: unable to allocate PCB lock\n", __func__); | |
1389 | /* NOTREACHED */ | |
1390 | } | |
1391 | lck_mtx_init(&vsockinfo.port_lock, vsockinfo.vsock_lock_grp, vsockinfo.vsock_lock_attr); | |
1392 | TAILQ_INIT(&vsockinfo.all); | |
1393 | LIST_INIT(&vsockinfo.bound); | |
1394 | vsockinfo.last_port = VMADDR_PORT_ANY; | |
1395 | } | |
1396 | ||
1397 | static struct protosw vsocksw[] = { | |
1398 | { | |
1399 | .pr_type = SOCK_STREAM, | |
1400 | .pr_protocol = 0, | |
1401 | .pr_flags = PR_CONNREQUIRED | PR_WANTRCVD, | |
1402 | .pr_init = vsock_init, | |
1403 | .pr_usrreqs = &vsock_usrreqs, | |
1404 | } | |
1405 | }; | |
1406 | ||
1407 | static const int vsock_proto_count = (sizeof(vsocksw) / sizeof(struct protosw)); | |
1408 | ||
1409 | /* VSock Domain */ | |
1410 | ||
1411 | static struct domain *vsock_domain = NULL; | |
1412 | ||
1413 | static void | |
1414 | vsock_dinit(struct domain *dp) | |
1415 | { | |
1416 | // The VSock domain is initialized with a singleton pattern. | |
1417 | VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); | |
1418 | VERIFY(vsock_domain == NULL); | |
1419 | vsock_domain = dp; | |
1420 | ||
1421 | // Add protocols and initialize. | |
1422 | for (int i = 0; i < vsock_proto_count; i++) { | |
1423 | net_add_proto((struct protosw *)&vsocksw[i], dp, 1); | |
1424 | } | |
1425 | } | |
1426 | ||
1427 | struct domain vsockdomain_s = { | |
1428 | .dom_family = PF_VSOCK, | |
1429 | .dom_name = "vsock", | |
1430 | .dom_init = vsock_dinit, | |
1431 | .dom_maxrtkey = sizeof(struct sockaddr_vm), | |
1432 | .dom_protohdrlen = sizeof(struct sockaddr_vm), | |
1433 | }; |