]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kpi_socket.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / bsd / kern / kpi_socket.c
1 /*
2 * Copyright (c) 2003-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #define __KPI__
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/types.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <sys/param.h>
36 #include <sys/proc.h>
37 #include <sys/errno.h>
38 #include <sys/malloc.h>
39 #include <sys/protosw.h>
40 #include <sys/domain.h>
41 #include <sys/mbuf.h>
42 #include <sys/mcache.h>
43 #include <sys/fcntl.h>
44 #include <sys/filio.h>
45 #include <sys/uio_internal.h>
46 #include <kern/locks.h>
47 #include <net/net_api_stats.h>
48 #include <netinet/in.h>
49 #include <libkern/OSAtomic.h>
50 #include <stdbool.h>
51
52 static errno_t sock_send_internal(socket_t, const struct msghdr *,
53 mbuf_t, int, size_t *);
54
55 #undef sock_accept
56 #undef sock_socket
57 errno_t sock_accept(socket_t so, struct sockaddr *from, int fromlen,
58 int flags, sock_upcall callback, void *cookie, socket_t *new_so);
59 errno_t sock_socket(int domain, int type, int protocol, sock_upcall callback,
60 void *context, socket_t *new_so);
61
62 static errno_t sock_accept_common(socket_t sock, struct sockaddr *from,
63 int fromlen, int flags, sock_upcall callback, void *cookie,
64 socket_t *new_sock, bool is_internal);
65 static errno_t sock_socket_common(int domain, int type, int protocol,
66 sock_upcall callback, void *context, socket_t *new_so, bool is_internal);
67
68 errno_t
69 sock_accept_common(socket_t sock, struct sockaddr *from, int fromlen, int flags,
70 sock_upcall callback, void *cookie, socket_t *new_sock, bool is_internal)
71 {
72 struct sockaddr *sa;
73 struct socket *new_so;
74 lck_mtx_t *mutex_held;
75 int dosocklock;
76 errno_t error = 0;
77
78 if (sock == NULL || new_sock == NULL) {
79 return EINVAL;
80 }
81
82 socket_lock(sock, 1);
83 if ((sock->so_options & SO_ACCEPTCONN) == 0) {
84 socket_unlock(sock, 1);
85 return EINVAL;
86 }
87 if ((flags & ~(MSG_DONTWAIT)) != 0) {
88 socket_unlock(sock, 1);
89 return ENOTSUP;
90 }
91 check_again:
92 if (((flags & MSG_DONTWAIT) != 0 || (sock->so_state & SS_NBIO) != 0) &&
93 sock->so_comp.tqh_first == NULL) {
94 socket_unlock(sock, 1);
95 return EWOULDBLOCK;
96 }
97
98 if (sock->so_proto->pr_getlock != NULL) {
99 mutex_held = (*sock->so_proto->pr_getlock)(sock, PR_F_WILLUNLOCK);
100 dosocklock = 1;
101 } else {
102 mutex_held = sock->so_proto->pr_domain->dom_mtx;
103 dosocklock = 0;
104 }
105
106 while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) {
107 if (sock->so_state & SS_CANTRCVMORE) {
108 sock->so_error = ECONNABORTED;
109 break;
110 }
111 error = msleep((caddr_t)&sock->so_timeo, mutex_held,
112 PSOCK | PCATCH, "sock_accept", NULL);
113 if (error != 0) {
114 socket_unlock(sock, 1);
115 return error;
116 }
117 }
118 if (sock->so_error != 0) {
119 error = sock->so_error;
120 sock->so_error = 0;
121 socket_unlock(sock, 1);
122 return error;
123 }
124
125 so_acquire_accept_list(sock, NULL);
126 if (TAILQ_EMPTY(&sock->so_comp)) {
127 so_release_accept_list(sock);
128 goto check_again;
129 }
130 new_so = TAILQ_FIRST(&sock->so_comp);
131 TAILQ_REMOVE(&sock->so_comp, new_so, so_list);
132 new_so->so_state &= ~SS_COMP;
133 new_so->so_head = NULL;
134 sock->so_qlen--;
135
136 so_release_accept_list(sock);
137
138 /*
139 * Count the accepted socket as an in-kernel socket
140 */
141 new_so->so_flags1 |= SOF1_IN_KERNEL_SOCKET;
142 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_total);
143 if (is_internal) {
144 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_os_total);
145 }
146
147 /*
148 * Pass the pre-accepted socket to any interested socket filter(s).
149 * Upon failure, the socket would have been closed by the callee.
150 */
151 if (new_so->so_filt != NULL) {
152 /*
153 * Temporarily drop the listening socket's lock before we
154 * hand off control over to the socket filter(s), but keep
155 * a reference so that it won't go away. We'll grab it
156 * again once we're done with the filter(s).
157 */
158 socket_unlock(sock, 0);
159 if ((error = soacceptfilter(new_so, sock)) != 0) {
160 /* Drop reference on listening socket */
161 sodereference(sock);
162 return error;
163 }
164 socket_lock(sock, 0);
165 }
166
167 if (dosocklock) {
168 LCK_MTX_ASSERT(new_so->so_proto->pr_getlock(new_so, 0),
169 LCK_MTX_ASSERT_NOTOWNED);
170 socket_lock(new_so, 1);
171 }
172
173 (void) soacceptlock(new_so, &sa, 0);
174
175 socket_unlock(sock, 1); /* release the head */
176
177 /* see comments in sock_setupcall() */
178 if (callback != NULL) {
179 #if (defined(__arm__) || defined(__arm64__))
180 sock_setupcalls_locked(new_so, callback, cookie, callback, cookie, 0);
181 #else /* (defined(__arm__) || defined(__arm64__)) */
182 sock_setupcalls_locked(new_so, callback, cookie, NULL, NULL, 0);
183 #endif /* (defined(__arm__) || defined(__arm64__)) */
184 }
185
186 if (sa != NULL && from != NULL) {
187 if (fromlen > sa->sa_len) {
188 fromlen = sa->sa_len;
189 }
190 memcpy(from, sa, fromlen);
191 }
192 if (sa != NULL) {
193 FREE(sa, M_SONAME);
194 }
195
196 /*
197 * If the socket has been marked as inactive by sosetdefunct(),
198 * disallow further operations on it.
199 */
200 if (new_so->so_flags & SOF_DEFUNCT) {
201 (void) sodefunct(current_proc(), new_so,
202 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
203 }
204 *new_sock = new_so;
205 if (dosocklock) {
206 socket_unlock(new_so, 1);
207 }
208 return error;
209 }
210
211 errno_t
212 sock_accept(socket_t sock, struct sockaddr *from, int fromlen, int flags,
213 sock_upcall callback, void *cookie, socket_t *new_sock)
214 {
215 return sock_accept_common(sock, from, fromlen, flags,
216 callback, cookie, new_sock, false);
217 }
218
219 errno_t
220 sock_accept_internal(socket_t sock, struct sockaddr *from, int fromlen, int flags,
221 sock_upcall callback, void *cookie, socket_t *new_sock)
222 {
223 return sock_accept_common(sock, from, fromlen, flags,
224 callback, cookie, new_sock, true);
225 }
226
227 errno_t
228 sock_bind(socket_t sock, const struct sockaddr *to)
229 {
230 int error = 0;
231 struct sockaddr *sa = NULL;
232 struct sockaddr_storage ss;
233 boolean_t want_free = TRUE;
234
235 if (sock == NULL || to == NULL) {
236 return EINVAL;
237 }
238
239 if (to->sa_len > sizeof(ss)) {
240 MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, M_WAITOK);
241 if (sa == NULL) {
242 return ENOBUFS;
243 }
244 } else {
245 sa = (struct sockaddr *)&ss;
246 want_free = FALSE;
247 }
248 memcpy(sa, to, to->sa_len);
249
250 error = sobindlock(sock, sa, 1); /* will lock socket */
251
252 if (sa != NULL && want_free == TRUE) {
253 FREE(sa, M_SONAME);
254 }
255
256 return error;
257 }
258
259 errno_t
260 sock_connect(socket_t sock, const struct sockaddr *to, int flags)
261 {
262 int error = 0;
263 lck_mtx_t *mutex_held;
264 struct sockaddr *sa = NULL;
265 struct sockaddr_storage ss;
266 boolean_t want_free = TRUE;
267
268 if (sock == NULL || to == NULL) {
269 return EINVAL;
270 }
271
272 if (to->sa_len > sizeof(ss)) {
273 MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME,
274 (flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK);
275 if (sa == NULL) {
276 return ENOBUFS;
277 }
278 } else {
279 sa = (struct sockaddr *)&ss;
280 want_free = FALSE;
281 }
282 memcpy(sa, to, to->sa_len);
283
284 socket_lock(sock, 1);
285
286 if ((sock->so_state & SS_ISCONNECTING) &&
287 ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) {
288 error = EALREADY;
289 goto out;
290 }
291 error = soconnectlock(sock, sa, 0);
292 if (!error) {
293 if ((sock->so_state & SS_ISCONNECTING) &&
294 ((sock->so_state & SS_NBIO) != 0 ||
295 (flags & MSG_DONTWAIT) != 0)) {
296 error = EINPROGRESS;
297 goto out;
298 }
299
300 if (sock->so_proto->pr_getlock != NULL) {
301 mutex_held = (*sock->so_proto->pr_getlock)(sock, PR_F_WILLUNLOCK);
302 } else {
303 mutex_held = sock->so_proto->pr_domain->dom_mtx;
304 }
305
306 while ((sock->so_state & SS_ISCONNECTING) &&
307 sock->so_error == 0) {
308 error = msleep((caddr_t)&sock->so_timeo,
309 mutex_held, PSOCK | PCATCH, "sock_connect", NULL);
310 if (error != 0) {
311 break;
312 }
313 }
314
315 if (error == 0) {
316 error = sock->so_error;
317 sock->so_error = 0;
318 }
319 } else {
320 sock->so_state &= ~SS_ISCONNECTING;
321 }
322 out:
323 socket_unlock(sock, 1);
324
325 if (sa != NULL && want_free == TRUE) {
326 FREE(sa, M_SONAME);
327 }
328
329 return error;
330 }
331
332 errno_t
333 sock_connectwait(socket_t sock, const struct timeval *tv)
334 {
335 lck_mtx_t *mutex_held;
336 errno_t retval = 0;
337 struct timespec ts;
338
339 socket_lock(sock, 1);
340
341 /* Check if we're already connected or if we've already errored out */
342 if ((sock->so_state & SS_ISCONNECTING) == 0 || sock->so_error != 0) {
343 if (sock->so_error != 0) {
344 retval = sock->so_error;
345 sock->so_error = 0;
346 } else {
347 if ((sock->so_state & SS_ISCONNECTED) != 0) {
348 retval = 0;
349 } else {
350 retval = EINVAL;
351 }
352 }
353 goto done;
354 }
355
356 /* copied translation from timeval to hertz from SO_RCVTIMEO handling */
357 if (tv->tv_sec < 0 || tv->tv_sec > SHRT_MAX / hz ||
358 tv->tv_usec < 0 || tv->tv_usec >= 1000000) {
359 retval = EDOM;
360 goto done;
361 }
362
363 ts.tv_sec = tv->tv_sec;
364 ts.tv_nsec = (tv->tv_usec * (integer_t)NSEC_PER_USEC);
365 if ((ts.tv_sec + (ts.tv_nsec / (long)NSEC_PER_SEC)) / 100 > SHRT_MAX) {
366 retval = EDOM;
367 goto done;
368 }
369
370 if (sock->so_proto->pr_getlock != NULL) {
371 mutex_held = (*sock->so_proto->pr_getlock)(sock, PR_F_WILLUNLOCK);
372 } else {
373 mutex_held = sock->so_proto->pr_domain->dom_mtx;
374 }
375
376 msleep((caddr_t)&sock->so_timeo, mutex_held,
377 PSOCK, "sock_connectwait", &ts);
378
379 /* Check if we're still waiting to connect */
380 if ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) {
381 retval = EINPROGRESS;
382 goto done;
383 }
384
385 if (sock->so_error != 0) {
386 retval = sock->so_error;
387 sock->so_error = 0;
388 }
389
390 done:
391 socket_unlock(sock, 1);
392 return retval;
393 }
394
395 errno_t
396 sock_nointerrupt(socket_t sock, int on)
397 {
398 socket_lock(sock, 1);
399
400 if (on) {
401 sock->so_rcv.sb_flags |= SB_NOINTR; /* This isn't safe */
402 sock->so_snd.sb_flags |= SB_NOINTR; /* This isn't safe */
403 } else {
404 sock->so_rcv.sb_flags &= ~SB_NOINTR; /* This isn't safe */
405 sock->so_snd.sb_flags &= ~SB_NOINTR; /* This isn't safe */
406 }
407
408 socket_unlock(sock, 1);
409
410 return 0;
411 }
412
413 errno_t
414 sock_getpeername(socket_t sock, struct sockaddr *peername, int peernamelen)
415 {
416 int error;
417 struct sockaddr *sa = NULL;
418
419 if (sock == NULL || peername == NULL || peernamelen < 0) {
420 return EINVAL;
421 }
422
423 socket_lock(sock, 1);
424 if (!(sock->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING))) {
425 socket_unlock(sock, 1);
426 return ENOTCONN;
427 }
428 error = sogetaddr_locked(sock, &sa, 1);
429 socket_unlock(sock, 1);
430 if (error == 0) {
431 if (peernamelen > sa->sa_len) {
432 peernamelen = sa->sa_len;
433 }
434 memcpy(peername, sa, peernamelen);
435 FREE(sa, M_SONAME);
436 }
437 return error;
438 }
439
440 errno_t
441 sock_getsockname(socket_t sock, struct sockaddr *sockname, int socknamelen)
442 {
443 int error;
444 struct sockaddr *sa = NULL;
445
446 if (sock == NULL || sockname == NULL || socknamelen < 0) {
447 return EINVAL;
448 }
449
450 socket_lock(sock, 1);
451 error = sogetaddr_locked(sock, &sa, 0);
452 socket_unlock(sock, 1);
453 if (error == 0) {
454 if (socknamelen > sa->sa_len) {
455 socknamelen = sa->sa_len;
456 }
457 memcpy(sockname, sa, socknamelen);
458 FREE(sa, M_SONAME);
459 }
460 return error;
461 }
462
463 __private_extern__ int
464 sogetaddr_locked(struct socket *so, struct sockaddr **psa, int peer)
465 {
466 int error;
467
468 if (so == NULL || psa == NULL) {
469 return EINVAL;
470 }
471
472 *psa = NULL;
473 error = peer ? so->so_proto->pr_usrreqs->pru_peeraddr(so, psa) :
474 so->so_proto->pr_usrreqs->pru_sockaddr(so, psa);
475
476 if (error == 0 && *psa == NULL) {
477 error = ENOMEM;
478 } else if (error != 0 && *psa != NULL) {
479 FREE(*psa, M_SONAME);
480 *psa = NULL;
481 }
482 return error;
483 }
484
485 errno_t
486 sock_getaddr(socket_t sock, struct sockaddr **psa, int peer)
487 {
488 int error;
489
490 if (sock == NULL || psa == NULL) {
491 return EINVAL;
492 }
493
494 socket_lock(sock, 1);
495 error = sogetaddr_locked(sock, psa, peer);
496 socket_unlock(sock, 1);
497
498 return error;
499 }
500
501 void
502 sock_freeaddr(struct sockaddr *sa)
503 {
504 if (sa != NULL) {
505 FREE(sa, M_SONAME);
506 }
507 }
508
509 errno_t
510 sock_getsockopt(socket_t sock, int level, int optname, void *optval,
511 int *optlen)
512 {
513 int error = 0;
514 struct sockopt sopt;
515
516 if (sock == NULL || optval == NULL || optlen == NULL) {
517 return EINVAL;
518 }
519
520 sopt.sopt_dir = SOPT_GET;
521 sopt.sopt_level = level;
522 sopt.sopt_name = optname;
523 sopt.sopt_val = CAST_USER_ADDR_T(optval);
524 sopt.sopt_valsize = *optlen;
525 sopt.sopt_p = kernproc;
526 error = sogetoptlock(sock, &sopt, 1); /* will lock socket */
527 if (error == 0) {
528 *optlen = (uint32_t)sopt.sopt_valsize;
529 }
530 return error;
531 }
532
533 errno_t
534 sock_ioctl(socket_t sock, unsigned long request, void *argp)
535 {
536 return soioctl(sock, request, argp, kernproc); /* will lock socket */
537 }
538
539 errno_t
540 sock_setsockopt(socket_t sock, int level, int optname, const void *optval,
541 int optlen)
542 {
543 struct sockopt sopt;
544
545 if (sock == NULL || optval == NULL) {
546 return EINVAL;
547 }
548
549 sopt.sopt_dir = SOPT_SET;
550 sopt.sopt_level = level;
551 sopt.sopt_name = optname;
552 sopt.sopt_val = CAST_USER_ADDR_T(optval);
553 sopt.sopt_valsize = optlen;
554 sopt.sopt_p = kernproc;
555 return sosetoptlock(sock, &sopt, 1); /* will lock socket */
556 }
557
558 /*
559 * This follows the recommended mappings between DSCP code points
560 * and WMM access classes.
561 */
562 static uint32_t
563 so_tc_from_dscp(uint8_t dscp)
564 {
565 uint32_t tc;
566
567 if (dscp >= 0x30 && dscp <= 0x3f) {
568 tc = SO_TC_VO;
569 } else if (dscp >= 0x20 && dscp <= 0x2f) {
570 tc = SO_TC_VI;
571 } else if (dscp >= 0x08 && dscp <= 0x17) {
572 tc = SO_TC_BK_SYS;
573 } else {
574 tc = SO_TC_BE;
575 }
576
577 return tc;
578 }
579
580 errno_t
581 sock_settclassopt(socket_t sock, const void *optval, size_t optlen)
582 {
583 errno_t error = 0;
584 struct sockopt sopt;
585 int sotc;
586
587 if (sock == NULL || optval == NULL || optlen != sizeof(int)) {
588 return EINVAL;
589 }
590
591 socket_lock(sock, 1);
592 if (!(sock->so_state & SS_ISCONNECTED)) {
593 /*
594 * If the socket is not connected then we don't know
595 * if the destination is on LAN or not. Skip
596 * setting traffic class in this case
597 */
598 error = ENOTCONN;
599 goto out;
600 }
601
602 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL ||
603 sock->so_pcb == NULL) {
604 error = EINVAL;
605 goto out;
606 }
607
608 /*
609 * Set the socket traffic class based on the passed DSCP code point
610 * regardless of the scope of the destination
611 */
612 sotc = so_tc_from_dscp((uint8_t)((*(const int *)optval) >> 2));
613
614 sopt.sopt_dir = SOPT_SET;
615 sopt.sopt_val = CAST_USER_ADDR_T(&sotc);
616 sopt.sopt_valsize = sizeof(sotc);
617 sopt.sopt_p = kernproc;
618 sopt.sopt_level = SOL_SOCKET;
619 sopt.sopt_name = SO_TRAFFIC_CLASS;
620
621 error = sosetoptlock(sock, &sopt, 0); /* already locked */
622
623 if (error != 0) {
624 printf("%s: sosetopt SO_TRAFFIC_CLASS failed %d\n",
625 __func__, error);
626 goto out;
627 }
628
629 /*
630 * Check if the destination address is LAN or link local address.
631 * We do not want to set traffic class bits if the destination
632 * is not local.
633 */
634 if (!so_isdstlocal(sock)) {
635 goto out;
636 }
637
638 sopt.sopt_dir = SOPT_SET;
639 sopt.sopt_val = CAST_USER_ADDR_T(optval);
640 sopt.sopt_valsize = optlen;
641 sopt.sopt_p = kernproc;
642
643 switch (SOCK_DOM(sock)) {
644 case PF_INET:
645 sopt.sopt_level = IPPROTO_IP;
646 sopt.sopt_name = IP_TOS;
647 break;
648 case PF_INET6:
649 sopt.sopt_level = IPPROTO_IPV6;
650 sopt.sopt_name = IPV6_TCLASS;
651 break;
652 default:
653 error = EINVAL;
654 goto out;
655 }
656
657 error = sosetoptlock(sock, &sopt, 0); /* already locked */
658 socket_unlock(sock, 1);
659 return error;
660 out:
661 socket_unlock(sock, 1);
662 return error;
663 }
664
665 errno_t
666 sock_gettclassopt(socket_t sock, void *optval, size_t *optlen)
667 {
668 errno_t error = 0;
669 struct sockopt sopt;
670
671 if (sock == NULL || optval == NULL || optlen == NULL) {
672 return EINVAL;
673 }
674
675 sopt.sopt_dir = SOPT_GET;
676 sopt.sopt_val = CAST_USER_ADDR_T(optval);
677 sopt.sopt_valsize = *optlen;
678 sopt.sopt_p = kernproc;
679
680 socket_lock(sock, 1);
681 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) {
682 socket_unlock(sock, 1);
683 return EINVAL;
684 }
685
686 switch (SOCK_DOM(sock)) {
687 case PF_INET:
688 sopt.sopt_level = IPPROTO_IP;
689 sopt.sopt_name = IP_TOS;
690 break;
691 case PF_INET6:
692 sopt.sopt_level = IPPROTO_IPV6;
693 sopt.sopt_name = IPV6_TCLASS;
694 break;
695 default:
696 socket_unlock(sock, 1);
697 return EINVAL;
698 }
699 error = sogetoptlock(sock, &sopt, 0); /* already locked */
700 socket_unlock(sock, 1);
701 if (error == 0) {
702 *optlen = sopt.sopt_valsize;
703 }
704 return error;
705 }
706
707 errno_t
708 sock_listen(socket_t sock, int backlog)
709 {
710 if (sock == NULL) {
711 return EINVAL;
712 }
713
714 return solisten(sock, backlog); /* will lock socket */
715 }
716
717 errno_t
718 sock_receive_internal(socket_t sock, struct msghdr *msg, mbuf_t *data,
719 int flags, size_t *recvdlen)
720 {
721 uio_t auio;
722 struct mbuf *control = NULL;
723 int error = 0;
724 user_ssize_t length = 0;
725 struct sockaddr *fromsa = NULL;
726 char uio_buf[UIO_SIZEOF((msg != NULL) ? msg->msg_iovlen : 0)];
727
728 if (sock == NULL) {
729 return EINVAL;
730 }
731
732 auio = uio_createwithbuffer(((msg != NULL) ? msg->msg_iovlen : 0),
733 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
734 if (msg != NULL && data == NULL) {
735 int i;
736 struct iovec *tempp = msg->msg_iov;
737
738 for (i = 0; i < msg->msg_iovlen; i++) {
739 uio_addiov(auio,
740 CAST_USER_ADDR_T((tempp + i)->iov_base),
741 (tempp + i)->iov_len);
742 }
743 if (uio_resid(auio) < 0) {
744 return EINVAL;
745 }
746 } else if (recvdlen != NULL) {
747 uio_setresid(auio, (uio_resid(auio) + *recvdlen));
748 }
749 length = uio_resid(auio);
750
751 if (recvdlen != NULL) {
752 *recvdlen = 0;
753 }
754
755 /* let pru_soreceive handle the socket locking */
756 error = sock->so_proto->pr_usrreqs->pru_soreceive(sock, &fromsa, auio,
757 data, (msg && msg->msg_control) ? &control : NULL, &flags);
758 if (error != 0) {
759 goto cleanup;
760 }
761
762 if (recvdlen != NULL) {
763 *recvdlen = length - uio_resid(auio);
764 }
765 if (msg != NULL) {
766 msg->msg_flags = flags;
767
768 if (msg->msg_name != NULL) {
769 int salen;
770 salen = msg->msg_namelen;
771 if (msg->msg_namelen > 0 && fromsa != NULL) {
772 salen = MIN(salen, fromsa->sa_len);
773 memcpy(msg->msg_name, fromsa,
774 msg->msg_namelen > fromsa->sa_len ?
775 fromsa->sa_len : msg->msg_namelen);
776 }
777 }
778
779 if (msg->msg_control != NULL) {
780 struct mbuf *m = control;
781 u_char *ctlbuf = msg->msg_control;
782 int clen = msg->msg_controllen;
783
784 msg->msg_controllen = 0;
785
786 while (m != NULL && clen > 0) {
787 unsigned int tocopy;
788
789 if (clen >= m->m_len) {
790 tocopy = m->m_len;
791 } else {
792 msg->msg_flags |= MSG_CTRUNC;
793 tocopy = clen;
794 }
795 memcpy(ctlbuf, mtod(m, caddr_t), tocopy);
796 ctlbuf += tocopy;
797 clen -= tocopy;
798 m = m->m_next;
799 }
800 msg->msg_controllen =
801 (socklen_t)((uintptr_t)ctlbuf - (uintptr_t)msg->msg_control);
802 }
803 }
804
805 cleanup:
806 if (control != NULL) {
807 m_freem(control);
808 }
809 if (fromsa != NULL) {
810 FREE(fromsa, M_SONAME);
811 }
812 return error;
813 }
814
815 errno_t
816 sock_receive(socket_t sock, struct msghdr *msg, int flags, size_t *recvdlen)
817 {
818 if ((msg == NULL) || (msg->msg_iovlen < 1) ||
819 (msg->msg_iov[0].iov_len == 0) ||
820 (msg->msg_iov[0].iov_base == NULL)) {
821 return EINVAL;
822 }
823
824 return sock_receive_internal(sock, msg, NULL, flags, recvdlen);
825 }
826
827 errno_t
828 sock_receivembuf(socket_t sock, struct msghdr *msg, mbuf_t *data, int flags,
829 size_t *recvlen)
830 {
831 if (data == NULL || recvlen == 0 || *recvlen <= 0 || (msg != NULL &&
832 (msg->msg_iov != NULL || msg->msg_iovlen != 0))) {
833 return EINVAL;
834 }
835
836 return sock_receive_internal(sock, msg, data, flags, recvlen);
837 }
838
839 errno_t
840 sock_send_internal(socket_t sock, const struct msghdr *msg, mbuf_t data,
841 int flags, size_t *sentlen)
842 {
843 uio_t auio = NULL;
844 struct mbuf *control = NULL;
845 int error = 0;
846 user_ssize_t datalen = 0;
847 char uio_buf[UIO_SIZEOF((msg != NULL ? msg->msg_iovlen : 1))];
848
849 if (sock == NULL) {
850 error = EINVAL;
851 goto errorout;
852 }
853
854 if (data == NULL && msg != NULL) {
855 struct iovec *tempp = msg->msg_iov;
856
857 auio = uio_createwithbuffer(msg->msg_iovlen, 0,
858 UIO_SYSSPACE, UIO_WRITE, &uio_buf[0], sizeof(uio_buf));
859 if (tempp != NULL) {
860 int i;
861
862 for (i = 0; i < msg->msg_iovlen; i++) {
863 uio_addiov(auio,
864 CAST_USER_ADDR_T((tempp + i)->iov_base),
865 (tempp + i)->iov_len);
866 }
867
868 if (uio_resid(auio) < 0) {
869 error = EINVAL;
870 goto errorout;
871 }
872 }
873 }
874
875 if (sentlen != NULL) {
876 *sentlen = 0;
877 }
878
879 if (auio != NULL) {
880 datalen = uio_resid(auio);
881 } else {
882 datalen = data->m_pkthdr.len;
883 }
884
885 if (msg != NULL && msg->msg_control) {
886 if ((size_t)msg->msg_controllen < sizeof(struct cmsghdr)) {
887 error = EINVAL;
888 goto errorout;
889 }
890
891 if ((size_t)msg->msg_controllen > MLEN) {
892 error = EINVAL;
893 goto errorout;
894 }
895
896 control = m_get(M_NOWAIT, MT_CONTROL);
897 if (control == NULL) {
898 error = ENOMEM;
899 goto errorout;
900 }
901 memcpy(mtod(control, caddr_t), msg->msg_control,
902 msg->msg_controllen);
903 control->m_len = msg->msg_controllen;
904 }
905
906 error = sock->so_proto->pr_usrreqs->pru_sosend(sock, msg != NULL ?
907 (struct sockaddr *)msg->msg_name : NULL, auio, data,
908 control, flags);
909
910 /*
911 * Residual data is possible in the case of IO vectors but not
912 * in the mbuf case since the latter is treated as atomic send.
913 * If pru_sosend() consumed a portion of the iovecs data and
914 * the error returned is transient, treat it as success; this
915 * is consistent with sendit() behavior.
916 */
917 if (auio != NULL && uio_resid(auio) != datalen &&
918 (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) {
919 error = 0;
920 }
921
922 if (error == 0 && sentlen != NULL) {
923 if (auio != NULL) {
924 *sentlen = datalen - uio_resid(auio);
925 } else {
926 *sentlen = datalen;
927 }
928 }
929
930 return error;
931
932 /*
933 * In cases where we detect an error before returning, we need to
934 * free the mbuf chain if there is one. sosend (and pru_sosend) will
935 * free the mbuf chain if they encounter an error.
936 */
937 errorout:
938 if (control) {
939 m_freem(control);
940 }
941 if (data) {
942 m_freem(data);
943 }
944 if (sentlen) {
945 *sentlen = 0;
946 }
947 return error;
948 }
949
950 errno_t
951 sock_send(socket_t sock, const struct msghdr *msg, int flags, size_t *sentlen)
952 {
953 if (msg == NULL || msg->msg_iov == NULL || msg->msg_iovlen < 1) {
954 return EINVAL;
955 }
956
957 return sock_send_internal(sock, msg, NULL, flags, sentlen);
958 }
959
960 errno_t
961 sock_sendmbuf(socket_t sock, const struct msghdr *msg, mbuf_t data,
962 int flags, size_t *sentlen)
963 {
964 if (data == NULL || (msg != NULL && (msg->msg_iov != NULL ||
965 msg->msg_iovlen != 0))) {
966 if (data != NULL) {
967 m_freem(data);
968 }
969 return EINVAL;
970 }
971 return sock_send_internal(sock, msg, data, flags, sentlen);
972 }
973
974 errno_t
975 sock_shutdown(socket_t sock, int how)
976 {
977 if (sock == NULL) {
978 return EINVAL;
979 }
980
981 return soshutdown(sock, how);
982 }
983
984 errno_t
985 sock_socket_common(int domain, int type, int protocol, sock_upcall callback,
986 void *context, socket_t *new_so, bool is_internal)
987 {
988 int error = 0;
989
990 if (new_so == NULL) {
991 return EINVAL;
992 }
993
994 /* socreate will create an initial so_count */
995 error = socreate(domain, new_so, type, protocol);
996 if (error == 0) {
997 /*
998 * This is an in-kernel socket
999 */
1000 (*new_so)->so_flags1 |= SOF1_IN_KERNEL_SOCKET;
1001 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_total);
1002 if (is_internal) {
1003 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_os_total);
1004 }
1005
1006 /* see comments in sock_setupcall() */
1007 if (callback != NULL) {
1008 sock_setupcall(*new_so, callback, context);
1009 }
1010 /*
1011 * last_pid and last_upid should be zero for sockets
1012 * created using sock_socket
1013 */
1014 (*new_so)->last_pid = 0;
1015 (*new_so)->last_upid = 0;
1016 }
1017 return error;
1018 }
1019
1020 errno_t
1021 sock_socket_internal(int domain, int type, int protocol, sock_upcall callback,
1022 void *context, socket_t *new_so)
1023 {
1024 return sock_socket_common(domain, type, protocol, callback,
1025 context, new_so, true);
1026 }
1027
1028 errno_t
1029 sock_socket(int domain, int type, int protocol, sock_upcall callback,
1030 void *context, socket_t *new_so)
1031 {
1032 return sock_socket_common(domain, type, protocol, callback,
1033 context, new_so, false);
1034 }
1035
1036 void
1037 sock_close(socket_t sock)
1038 {
1039 if (sock == NULL) {
1040 return;
1041 }
1042
1043 soclose(sock);
1044 }
1045
1046 /* Do we want this to be APPLE_PRIVATE API?: YES (LD 12/23/04) */
1047 void
1048 sock_retain(socket_t sock)
1049 {
1050 if (sock == NULL) {
1051 return;
1052 }
1053
1054 socket_lock(sock, 1);
1055 sock->so_retaincnt++;
1056 sock->so_usecount++; /* add extra reference for holding the socket */
1057 socket_unlock(sock, 1);
1058 }
1059
1060 /* Do we want this to be APPLE_PRIVATE API? */
1061 void
1062 sock_release(socket_t sock)
1063 {
1064 if (sock == NULL) {
1065 return;
1066 }
1067
1068 socket_lock(sock, 1);
1069 if (sock->so_upcallusecount > 0) {
1070 soclose_wait_locked(sock);
1071 }
1072
1073 sock->so_retaincnt--;
1074 if (sock->so_retaincnt < 0) {
1075 panic("%s: negative retain count (%d) for sock=%p\n",
1076 __func__, sock->so_retaincnt, sock);
1077 /* NOTREACHED */
1078 }
1079 /*
1080 * Check SS_NOFDREF in case a close happened as sock_retain()
1081 * was grabbing the lock
1082 */
1083 if ((sock->so_retaincnt == 0) && (sock->so_usecount == 2) &&
1084 (!(sock->so_state & SS_NOFDREF) ||
1085 (sock->so_flags & SOF_MP_SUBFLOW))) {
1086 /* close socket only if the FD is not holding it */
1087 soclose_locked(sock);
1088 } else {
1089 /* remove extra reference holding the socket */
1090 VERIFY(sock->so_usecount > 1);
1091 sock->so_usecount--;
1092 }
1093 socket_unlock(sock, 1);
1094 }
1095
1096 errno_t
1097 sock_setpriv(socket_t sock, int on)
1098 {
1099 if (sock == NULL) {
1100 return EINVAL;
1101 }
1102
1103 socket_lock(sock, 1);
1104 if (on) {
1105 sock->so_state |= SS_PRIV;
1106 } else {
1107 sock->so_state &= ~SS_PRIV;
1108 }
1109 socket_unlock(sock, 1);
1110 return 0;
1111 }
1112
1113 int
1114 sock_isconnected(socket_t sock)
1115 {
1116 int retval;
1117
1118 socket_lock(sock, 1);
1119 retval = ((sock->so_state & SS_ISCONNECTED) ? 1 : 0);
1120 socket_unlock(sock, 1);
1121 return retval;
1122 }
1123
1124 int
1125 sock_isnonblocking(socket_t sock)
1126 {
1127 int retval;
1128
1129 socket_lock(sock, 1);
1130 retval = ((sock->so_state & SS_NBIO) ? 1 : 0);
1131 socket_unlock(sock, 1);
1132 return retval;
1133 }
1134
1135 errno_t
1136 sock_gettype(socket_t sock, int *outDomain, int *outType, int *outProtocol)
1137 {
1138 socket_lock(sock, 1);
1139 if (outDomain != NULL) {
1140 *outDomain = SOCK_DOM(sock);
1141 }
1142 if (outType != NULL) {
1143 *outType = sock->so_type;
1144 }
1145 if (outProtocol != NULL) {
1146 *outProtocol = SOCK_PROTO(sock);
1147 }
1148 socket_unlock(sock, 1);
1149 return 0;
1150 }
1151
1152 /*
1153 * Return the listening socket of a pre-accepted socket. It returns the
1154 * listener (so_head) value of a given socket. This is intended to be
1155 * called by a socket filter during a filter attach (sf_attach) callback.
1156 * The value returned by this routine is safe to be used only in the
1157 * context of that callback, because we hold the listener's lock across
1158 * the sflt_initsock() call.
1159 */
1160 socket_t
1161 sock_getlistener(socket_t sock)
1162 {
1163 return sock->so_head;
1164 }
1165
1166 static inline void
1167 sock_set_tcp_stream_priority(socket_t sock)
1168 {
1169 if ((SOCK_DOM(sock) == PF_INET || SOCK_DOM(sock) == PF_INET6) &&
1170 SOCK_TYPE(sock) == SOCK_STREAM) {
1171 set_tcp_stream_priority(sock);
1172 }
1173 }
1174
1175 /*
1176 * Caller must have ensured socket is valid and won't be going away.
1177 */
1178 void
1179 socket_set_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
1180 {
1181 u_int32_t soflags1 = 0;
1182
1183 if ((flags & TRAFFIC_MGT_SO_BACKGROUND)) {
1184 soflags1 |= SOF1_TRAFFIC_MGT_SO_BACKGROUND;
1185 }
1186 if ((flags & TRAFFIC_MGT_TCP_RECVBG)) {
1187 soflags1 |= SOF1_TRAFFIC_MGT_TCP_RECVBG;
1188 }
1189
1190 (void) OSBitOrAtomic(soflags1, &sock->so_flags1);
1191
1192 sock_set_tcp_stream_priority(sock);
1193 }
1194
1195 void
1196 socket_set_traffic_mgt_flags(socket_t sock, u_int8_t flags)
1197 {
1198 socket_lock(sock, 1);
1199 socket_set_traffic_mgt_flags_locked(sock, flags);
1200 socket_unlock(sock, 1);
1201 }
1202
1203 /*
1204 * Caller must have ensured socket is valid and won't be going away.
1205 */
1206 void
1207 socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
1208 {
1209 u_int32_t soflags1 = 0;
1210
1211 if ((flags & TRAFFIC_MGT_SO_BACKGROUND)) {
1212 soflags1 |= SOF1_TRAFFIC_MGT_SO_BACKGROUND;
1213 }
1214 if ((flags & TRAFFIC_MGT_TCP_RECVBG)) {
1215 soflags1 |= SOF1_TRAFFIC_MGT_TCP_RECVBG;
1216 }
1217
1218 (void) OSBitAndAtomic(~soflags1, &sock->so_flags1);
1219
1220 sock_set_tcp_stream_priority(sock);
1221 }
1222
1223 void
1224 socket_clear_traffic_mgt_flags(socket_t sock, u_int8_t flags)
1225 {
1226 socket_lock(sock, 1);
1227 socket_clear_traffic_mgt_flags_locked(sock, flags);
1228 socket_unlock(sock, 1);
1229 }
1230
1231
1232 /*
1233 * Caller must have ensured socket is valid and won't be going away.
1234 */
1235 errno_t
1236 socket_defunct(struct proc *p, socket_t so, int level)
1237 {
1238 errno_t retval;
1239
1240 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1241 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1242 return EINVAL;
1243 }
1244
1245 socket_lock(so, 1);
1246 /*
1247 * SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC level is meant to tear down
1248 * all of mDNSResponder IPC sockets, currently those of AF_UNIX; note
1249 * that this is an implementation artifact of mDNSResponder. We do
1250 * a quick test against the socket buffers for SB_UNIX, since that
1251 * would have been set by unp_attach() at socket creation time.
1252 */
1253 if (level == SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1254 (so->so_rcv.sb_flags & so->so_snd.sb_flags & SB_UNIX) != SB_UNIX) {
1255 socket_unlock(so, 1);
1256 return EOPNOTSUPP;
1257 }
1258 retval = sosetdefunct(p, so, level, TRUE);
1259 if (retval == 0) {
1260 retval = sodefunct(p, so, level);
1261 }
1262 socket_unlock(so, 1);
1263 return retval;
1264 }
1265
1266 void
1267 sock_setupcalls_locked(socket_t sock, sock_upcall rcallback, void *rcontext,
1268 sock_upcall wcallback, void *wcontext, int locked)
1269 {
1270 if (rcallback != NULL) {
1271 sock->so_rcv.sb_flags |= SB_UPCALL;
1272 if (locked) {
1273 sock->so_rcv.sb_flags |= SB_UPCALL_LOCK;
1274 }
1275 sock->so_rcv.sb_upcall = rcallback;
1276 sock->so_rcv.sb_upcallarg = rcontext;
1277 } else {
1278 sock->so_rcv.sb_flags &= ~(SB_UPCALL | SB_UPCALL_LOCK);
1279 sock->so_rcv.sb_upcall = NULL;
1280 sock->so_rcv.sb_upcallarg = NULL;
1281 }
1282
1283 if (wcallback != NULL) {
1284 sock->so_snd.sb_flags |= SB_UPCALL;
1285 if (locked) {
1286 sock->so_snd.sb_flags |= SB_UPCALL_LOCK;
1287 }
1288 sock->so_snd.sb_upcall = wcallback;
1289 sock->so_snd.sb_upcallarg = wcontext;
1290 } else {
1291 sock->so_snd.sb_flags &= ~(SB_UPCALL | SB_UPCALL_LOCK);
1292 sock->so_snd.sb_upcall = NULL;
1293 sock->so_snd.sb_upcallarg = NULL;
1294 }
1295 }
1296
1297 errno_t
1298 sock_setupcall(socket_t sock, sock_upcall callback, void *context)
1299 {
1300 if (sock == NULL) {
1301 return EINVAL;
1302 }
1303
1304 /*
1305 * Note that we don't wait for any in progress upcall to complete.
1306 * On embedded, sock_setupcall() causes both read and write
1307 * callbacks to be set; on desktop, only read callback is set
1308 * to maintain legacy KPI behavior.
1309 *
1310 * The newer sock_setupcalls() KPI should be used instead to set
1311 * the read and write callbacks and their respective parameters.
1312 */
1313 socket_lock(sock, 1);
1314 #if (defined(__arm__) || defined(__arm64__))
1315 sock_setupcalls_locked(sock, callback, context, callback, context, 0);
1316 #else /* (defined(__arm__) || defined(__arm64__)) */
1317 sock_setupcalls_locked(sock, callback, context, NULL, NULL, 0);
1318 #endif /* (defined(__arm__) || defined(__arm64__)) */
1319 socket_unlock(sock, 1);
1320
1321 return 0;
1322 }
1323
1324 errno_t
1325 sock_setupcalls(socket_t sock, sock_upcall rcallback, void *rcontext,
1326 sock_upcall wcallback, void *wcontext)
1327 {
1328 if (sock == NULL) {
1329 return EINVAL;
1330 }
1331
1332 /*
1333 * Note that we don't wait for any in progress upcall to complete.
1334 */
1335 socket_lock(sock, 1);
1336 sock_setupcalls_locked(sock, rcallback, rcontext, wcallback, wcontext, 0);
1337 socket_unlock(sock, 1);
1338
1339 return 0;
1340 }
1341
1342 void
1343 sock_catchevents_locked(socket_t sock, sock_evupcall ecallback, void *econtext,
1344 long emask)
1345 {
1346 socket_lock_assert_owned(sock);
1347
1348 /*
1349 * Note that we don't wait for any in progress upcall to complete.
1350 */
1351 if (ecallback != NULL) {
1352 sock->so_event = ecallback;
1353 sock->so_eventarg = econtext;
1354 sock->so_eventmask = (uint32_t)emask;
1355 } else {
1356 sock->so_event = sonullevent;
1357 sock->so_eventarg = NULL;
1358 sock->so_eventmask = 0;
1359 }
1360 }
1361
1362 errno_t
1363 sock_catchevents(socket_t sock, sock_evupcall ecallback, void *econtext,
1364 long emask)
1365 {
1366 if (sock == NULL) {
1367 return EINVAL;
1368 }
1369
1370 socket_lock(sock, 1);
1371 sock_catchevents_locked(sock, ecallback, econtext, emask);
1372 socket_unlock(sock, 1);
1373
1374 return 0;
1375 }
1376
1377 /*
1378 * Returns true whether or not a socket belongs to the kernel.
1379 */
1380 int
1381 sock_iskernel(socket_t so)
1382 {
1383 return so && so->last_pid == 0;
1384 }