]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/kern/kpi_socket.c
xnu-4903.231.4.tar.gz
[apple/xnu.git] / bsd / kern / kpi_socket.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2003-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#define __KPI__
30#include <sys/systm.h>
31#include <sys/kernel.h>
32#include <sys/types.h>
33#include <sys/socket.h>
34#include <sys/socketvar.h>
35#include <sys/param.h>
36#include <sys/proc.h>
37#include <sys/errno.h>
38#include <sys/malloc.h>
39#include <sys/protosw.h>
40#include <sys/domain.h>
41#include <sys/mbuf.h>
42#include <sys/mcache.h>
43#include <sys/fcntl.h>
44#include <sys/filio.h>
45#include <sys/uio_internal.h>
46#include <kern/locks.h>
47#include <net/net_api_stats.h>
48#include <netinet/in.h>
49#include <libkern/OSAtomic.h>
50#include <stdbool.h>
51
52static errno_t sock_send_internal(socket_t, const struct msghdr *,
53 mbuf_t, int, size_t *);
54
55#undef sock_accept
56#undef sock_socket
57errno_t sock_accept(socket_t so, struct sockaddr *from, int fromlen,
58 int flags, sock_upcall callback, void *cookie, socket_t *new_so);
59errno_t sock_socket(int domain, int type, int protocol, sock_upcall callback,
60 void *context, socket_t *new_so);
61
62static errno_t sock_accept_common(socket_t sock, struct sockaddr *from,
63 int fromlen, int flags, sock_upcall callback, void *cookie,
64 socket_t *new_sock, bool is_internal);
65static errno_t sock_socket_common(int domain, int type, int protocol,
66 sock_upcall callback, void *context, socket_t *new_so, bool is_internal);
67
68errno_t
69sock_accept_common(socket_t sock, struct sockaddr *from, int fromlen, int flags,
70 sock_upcall callback, void *cookie, socket_t *new_sock, bool is_internal)
71{
72 struct sockaddr *sa;
73 struct socket *new_so;
74 lck_mtx_t *mutex_held;
75 int dosocklock;
76 errno_t error = 0;
77
78 if (sock == NULL || new_sock == NULL)
79 return (EINVAL);
80
81 socket_lock(sock, 1);
82 if ((sock->so_options & SO_ACCEPTCONN) == 0) {
83 socket_unlock(sock, 1);
84 return (EINVAL);
85 }
86 if ((flags & ~(MSG_DONTWAIT)) != 0) {
87 socket_unlock(sock, 1);
88 return (ENOTSUP);
89 }
90check_again:
91 if (((flags & MSG_DONTWAIT) != 0 || (sock->so_state & SS_NBIO) != 0) &&
92 sock->so_comp.tqh_first == NULL) {
93 socket_unlock(sock, 1);
94 return (EWOULDBLOCK);
95 }
96
97 if (sock->so_proto->pr_getlock != NULL) {
98 mutex_held = (*sock->so_proto->pr_getlock)(sock, PR_F_WILLUNLOCK);
99 dosocklock = 1;
100 } else {
101 mutex_held = sock->so_proto->pr_domain->dom_mtx;
102 dosocklock = 0;
103 }
104
105 while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) {
106 if (sock->so_state & SS_CANTRCVMORE) {
107 sock->so_error = ECONNABORTED;
108 break;
109 }
110 error = msleep((caddr_t)&sock->so_timeo, mutex_held,
111 PSOCK | PCATCH, "sock_accept", NULL);
112 if (error != 0) {
113 socket_unlock(sock, 1);
114 return (error);
115 }
116 }
117 if (sock->so_error != 0) {
118 error = sock->so_error;
119 sock->so_error = 0;
120 socket_unlock(sock, 1);
121 return (error);
122 }
123
124 so_acquire_accept_list(sock, NULL);
125 if (TAILQ_EMPTY(&sock->so_comp)) {
126 so_release_accept_list(sock);
127 goto check_again;
128 }
129 new_so = TAILQ_FIRST(&sock->so_comp);
130 TAILQ_REMOVE(&sock->so_comp, new_so, so_list);
131 new_so->so_state &= ~SS_COMP;
132 new_so->so_head = NULL;
133 sock->so_qlen--;
134
135 so_release_accept_list(sock);
136
137 /*
138 * Count the accepted socket as an in-kernel socket
139 */
140 new_so->so_flags1 |= SOF1_IN_KERNEL_SOCKET;
141 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_total);
142 if (is_internal) {
143 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_os_total);
144 }
145
146 /*
147 * Pass the pre-accepted socket to any interested socket filter(s).
148 * Upon failure, the socket would have been closed by the callee.
149 */
150 if (new_so->so_filt != NULL) {
151 /*
152 * Temporarily drop the listening socket's lock before we
153 * hand off control over to the socket filter(s), but keep
154 * a reference so that it won't go away. We'll grab it
155 * again once we're done with the filter(s).
156 */
157 socket_unlock(sock, 0);
158 if ((error = soacceptfilter(new_so, sock)) != 0) {
159 /* Drop reference on listening socket */
160 sodereference(sock);
161 return (error);
162 }
163 socket_lock(sock, 0);
164 }
165
166 if (dosocklock) {
167 LCK_MTX_ASSERT(new_so->so_proto->pr_getlock(new_so, 0),
168 LCK_MTX_ASSERT_NOTOWNED);
169 socket_lock(new_so, 1);
170 }
171
172 (void) soacceptlock(new_so, &sa, 0);
173
174 socket_unlock(sock, 1); /* release the head */
175
176 /* see comments in sock_setupcall() */
177 if (callback != NULL) {
178#if CONFIG_EMBEDDED
179 sock_setupcalls_locked(new_so, callback, cookie, callback, cookie, 0);
180#else
181 sock_setupcalls_locked(new_so, callback, cookie, NULL, NULL, 0);
182#endif /* !CONFIG_EMBEDDED */
183 }
184
185 if (sa != NULL && from != NULL) {
186 if (fromlen > sa->sa_len)
187 fromlen = sa->sa_len;
188 memcpy(from, sa, fromlen);
189 }
190 if (sa != NULL)
191 FREE(sa, M_SONAME);
192
193 /*
194 * If the socket has been marked as inactive by sosetdefunct(),
195 * disallow further operations on it.
196 */
197 if (new_so->so_flags & SOF_DEFUNCT) {
198 (void) sodefunct(current_proc(), new_so,
199 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
200 }
201 *new_sock = new_so;
202 if (dosocklock)
203 socket_unlock(new_so, 1);
204 return (error);
205}
206
207errno_t
208sock_accept(socket_t sock, struct sockaddr *from, int fromlen, int flags,
209 sock_upcall callback, void *cookie, socket_t *new_sock)
210{
211 return (sock_accept_common(sock, from, fromlen, flags,
212 callback, cookie, new_sock, false));
213}
214
215errno_t
216sock_accept_internal(socket_t sock, struct sockaddr *from, int fromlen, int flags,
217 sock_upcall callback, void *cookie, socket_t *new_sock)
218{
219 return (sock_accept_common(sock, from, fromlen, flags,
220 callback, cookie, new_sock, true));
221}
222
223errno_t
224sock_bind(socket_t sock, const struct sockaddr *to)
225{
226 int error = 0;
227 struct sockaddr *sa = NULL;
228 struct sockaddr_storage ss;
229 boolean_t want_free = TRUE;
230
231 if (sock == NULL || to == NULL)
232 return (EINVAL);
233
234 if (to->sa_len > sizeof (ss)) {
235 MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, M_WAITOK);
236 if (sa == NULL)
237 return (ENOBUFS);
238 } else {
239 sa = (struct sockaddr *)&ss;
240 want_free = FALSE;
241 }
242 memcpy(sa, to, to->sa_len);
243
244 error = sobindlock(sock, sa, 1); /* will lock socket */
245
246 if (sa != NULL && want_free == TRUE)
247 FREE(sa, M_SONAME);
248
249 return (error);
250}
251
252errno_t
253sock_connect(socket_t sock, const struct sockaddr *to, int flags)
254{
255 int error = 0;
256 lck_mtx_t *mutex_held;
257 struct sockaddr *sa = NULL;
258 struct sockaddr_storage ss;
259 boolean_t want_free = TRUE;
260
261 if (sock == NULL || to == NULL)
262 return (EINVAL);
263
264 if (to->sa_len > sizeof (ss)) {
265 MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME,
266 (flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK);
267 if (sa == NULL)
268 return (ENOBUFS);
269 } else {
270 sa = (struct sockaddr *)&ss;
271 want_free = FALSE;
272 }
273 memcpy(sa, to, to->sa_len);
274
275 socket_lock(sock, 1);
276
277 if ((sock->so_state & SS_ISCONNECTING) &&
278 ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) {
279 error = EALREADY;
280 goto out;
281 }
282 error = soconnectlock(sock, sa, 0);
283 if (!error) {
284 if ((sock->so_state & SS_ISCONNECTING) &&
285 ((sock->so_state & SS_NBIO) != 0 ||
286 (flags & MSG_DONTWAIT) != 0)) {
287 error = EINPROGRESS;
288 goto out;
289 }
290
291 if (sock->so_proto->pr_getlock != NULL)
292 mutex_held = (*sock->so_proto->pr_getlock)(sock, PR_F_WILLUNLOCK);
293 else
294 mutex_held = sock->so_proto->pr_domain->dom_mtx;
295
296 while ((sock->so_state & SS_ISCONNECTING) &&
297 sock->so_error == 0) {
298 error = msleep((caddr_t)&sock->so_timeo,
299 mutex_held, PSOCK | PCATCH, "sock_connect", NULL);
300 if (error != 0)
301 break;
302 }
303
304 if (error == 0) {
305 error = sock->so_error;
306 sock->so_error = 0;
307 }
308 } else {
309 sock->so_state &= ~SS_ISCONNECTING;
310 }
311out:
312 socket_unlock(sock, 1);
313
314 if (sa != NULL && want_free == TRUE)
315 FREE(sa, M_SONAME);
316
317 return (error);
318}
319
320errno_t
321sock_connectwait(socket_t sock, const struct timeval *tv)
322{
323 lck_mtx_t *mutex_held;
324 errno_t retval = 0;
325 struct timespec ts;
326
327 socket_lock(sock, 1);
328
329 /* Check if we're already connected or if we've already errored out */
330 if ((sock->so_state & SS_ISCONNECTING) == 0 || sock->so_error != 0) {
331 if (sock->so_error != 0) {
332 retval = sock->so_error;
333 sock->so_error = 0;
334 } else {
335 if ((sock->so_state & SS_ISCONNECTED) != 0)
336 retval = 0;
337 else
338 retval = EINVAL;
339 }
340 goto done;
341 }
342
343 /* copied translation from timeval to hertz from SO_RCVTIMEO handling */
344 if (tv->tv_sec < 0 || tv->tv_sec > SHRT_MAX / hz ||
345 tv->tv_usec < 0 || tv->tv_usec >= 1000000) {
346 retval = EDOM;
347 goto done;
348 }
349
350 ts.tv_sec = tv->tv_sec;
351 ts.tv_nsec = (tv->tv_usec * (integer_t)NSEC_PER_USEC);
352 if ((ts.tv_sec + (ts.tv_nsec/(long)NSEC_PER_SEC))/100 > SHRT_MAX) {
353 retval = EDOM;
354 goto done;
355 }
356
357 if (sock->so_proto->pr_getlock != NULL)
358 mutex_held = (*sock->so_proto->pr_getlock)(sock, PR_F_WILLUNLOCK);
359 else
360 mutex_held = sock->so_proto->pr_domain->dom_mtx;
361
362 msleep((caddr_t)&sock->so_timeo, mutex_held,
363 PSOCK, "sock_connectwait", &ts);
364
365 /* Check if we're still waiting to connect */
366 if ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) {
367 retval = EINPROGRESS;
368 goto done;
369 }
370
371 if (sock->so_error != 0) {
372 retval = sock->so_error;
373 sock->so_error = 0;
374 }
375
376done:
377 socket_unlock(sock, 1);
378 return (retval);
379}
380
381errno_t
382sock_nointerrupt(socket_t sock, int on)
383{
384 socket_lock(sock, 1);
385
386 if (on) {
387 sock->so_rcv.sb_flags |= SB_NOINTR; /* This isn't safe */
388 sock->so_snd.sb_flags |= SB_NOINTR; /* This isn't safe */
389 } else {
390 sock->so_rcv.sb_flags &= ~SB_NOINTR; /* This isn't safe */
391 sock->so_snd.sb_flags &= ~SB_NOINTR; /* This isn't safe */
392 }
393
394 socket_unlock(sock, 1);
395
396 return (0);
397}
398
399errno_t
400sock_getpeername(socket_t sock, struct sockaddr *peername, int peernamelen)
401{
402 int error;
403 struct sockaddr *sa = NULL;
404
405 if (sock == NULL || peername == NULL || peernamelen < 0)
406 return (EINVAL);
407
408 socket_lock(sock, 1);
409 if (!(sock->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING))) {
410 socket_unlock(sock, 1);
411 return (ENOTCONN);
412 }
413 error = sogetaddr_locked(sock, &sa, 1);
414 socket_unlock(sock, 1);
415 if (error == 0) {
416 if (peernamelen > sa->sa_len)
417 peernamelen = sa->sa_len;
418 memcpy(peername, sa, peernamelen);
419 FREE(sa, M_SONAME);
420 }
421 return (error);
422}
423
424errno_t
425sock_getsockname(socket_t sock, struct sockaddr *sockname, int socknamelen)
426{
427 int error;
428 struct sockaddr *sa = NULL;
429
430 if (sock == NULL || sockname == NULL || socknamelen < 0)
431 return (EINVAL);
432
433 socket_lock(sock, 1);
434 error = sogetaddr_locked(sock, &sa, 0);
435 socket_unlock(sock, 1);
436 if (error == 0) {
437 if (socknamelen > sa->sa_len)
438 socknamelen = sa->sa_len;
439 memcpy(sockname, sa, socknamelen);
440 FREE(sa, M_SONAME);
441 }
442 return (error);
443}
444
445__private_extern__ int
446sogetaddr_locked(struct socket *so, struct sockaddr **psa, int peer)
447{
448 int error;
449
450 if (so == NULL || psa == NULL)
451 return (EINVAL);
452
453 *psa = NULL;
454 error = peer ? so->so_proto->pr_usrreqs->pru_peeraddr(so, psa) :
455 so->so_proto->pr_usrreqs->pru_sockaddr(so, psa);
456
457 if (error == 0 && *psa == NULL) {
458 error = ENOMEM;
459 } else if (error != 0 && *psa != NULL) {
460 FREE(*psa, M_SONAME);
461 *psa = NULL;
462 }
463 return (error);
464}
465
466errno_t
467sock_getaddr(socket_t sock, struct sockaddr **psa, int peer)
468{
469 int error;
470
471 if (sock == NULL || psa == NULL)
472 return (EINVAL);
473
474 socket_lock(sock, 1);
475 error = sogetaddr_locked(sock, psa, peer);
476 socket_unlock(sock, 1);
477
478 return (error);
479}
480
481void
482sock_freeaddr(struct sockaddr *sa)
483{
484 if (sa != NULL)
485 FREE(sa, M_SONAME);
486}
487
488errno_t
489sock_getsockopt(socket_t sock, int level, int optname, void *optval,
490 int *optlen)
491{
492 int error = 0;
493 struct sockopt sopt;
494
495 if (sock == NULL || optval == NULL || optlen == NULL)
496 return (EINVAL);
497
498 sopt.sopt_dir = SOPT_GET;
499 sopt.sopt_level = level;
500 sopt.sopt_name = optname;
501 sopt.sopt_val = CAST_USER_ADDR_T(optval);
502 sopt.sopt_valsize = *optlen;
503 sopt.sopt_p = kernproc;
504 error = sogetoptlock(sock, &sopt, 1); /* will lock socket */
505 if (error == 0)
506 *optlen = sopt.sopt_valsize;
507 return (error);
508}
509
510errno_t
511sock_ioctl(socket_t sock, unsigned long request, void *argp)
512{
513 return (soioctl(sock, request, argp, kernproc)); /* will lock socket */
514}
515
516errno_t
517sock_setsockopt(socket_t sock, int level, int optname, const void *optval,
518 int optlen)
519{
520 struct sockopt sopt;
521
522 if (sock == NULL || optval == NULL)
523 return (EINVAL);
524
525 sopt.sopt_dir = SOPT_SET;
526 sopt.sopt_level = level;
527 sopt.sopt_name = optname;
528 sopt.sopt_val = CAST_USER_ADDR_T(optval);
529 sopt.sopt_valsize = optlen;
530 sopt.sopt_p = kernproc;
531 return (sosetoptlock(sock, &sopt, 1)); /* will lock socket */
532}
533
534/*
535 * This follows the recommended mappings between DSCP code points
536 * and WMM access classes.
537 */
538static u_int32_t so_tc_from_dscp(u_int8_t dscp);
539static u_int32_t
540so_tc_from_dscp(u_int8_t dscp)
541{
542 u_int32_t tc;
543
544 if (dscp >= 0x30 && dscp <= 0x3f)
545 tc = SO_TC_VO;
546 else if (dscp >= 0x20 && dscp <= 0x2f)
547 tc = SO_TC_VI;
548 else if (dscp >= 0x08 && dscp <= 0x17)
549 tc = SO_TC_BK_SYS;
550 else
551 tc = SO_TC_BE;
552
553 return (tc);
554}
555
556errno_t
557sock_settclassopt(socket_t sock, const void *optval, size_t optlen)
558{
559 errno_t error = 0;
560 struct sockopt sopt;
561 int sotc;
562
563 if (sock == NULL || optval == NULL || optlen != sizeof (int))
564 return (EINVAL);
565
566 socket_lock(sock, 1);
567 if (!(sock->so_state & SS_ISCONNECTED)) {
568 /*
569 * If the socket is not connected then we don't know
570 * if the destination is on LAN or not. Skip
571 * setting traffic class in this case
572 */
573 error = ENOTCONN;
574 goto out;
575 }
576
577 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL ||
578 sock->so_pcb == NULL) {
579 error = EINVAL;
580 goto out;
581 }
582
583 /*
584 * Set the socket traffic class based on the passed DSCP code point
585 * regardless of the scope of the destination
586 */
587 sotc = so_tc_from_dscp((*(const int *)optval) >> 2);
588
589 sopt.sopt_dir = SOPT_SET;
590 sopt.sopt_val = CAST_USER_ADDR_T(&sotc);
591 sopt.sopt_valsize = sizeof (sotc);
592 sopt.sopt_p = kernproc;
593 sopt.sopt_level = SOL_SOCKET;
594 sopt.sopt_name = SO_TRAFFIC_CLASS;
595
596 error = sosetoptlock(sock, &sopt, 0); /* already locked */
597
598 if (error != 0) {
599 printf("%s: sosetopt SO_TRAFFIC_CLASS failed %d\n",
600 __func__, error);
601 goto out;
602 }
603
604 /*
605 * Check if the destination address is LAN or link local address.
606 * We do not want to set traffic class bits if the destination
607 * is not local.
608 */
609 if (!so_isdstlocal(sock))
610 goto out;
611
612 sopt.sopt_dir = SOPT_SET;
613 sopt.sopt_val = CAST_USER_ADDR_T(optval);
614 sopt.sopt_valsize = optlen;
615 sopt.sopt_p = kernproc;
616
617 switch (SOCK_DOM(sock)) {
618 case PF_INET:
619 sopt.sopt_level = IPPROTO_IP;
620 sopt.sopt_name = IP_TOS;
621 break;
622 case PF_INET6:
623 sopt.sopt_level = IPPROTO_IPV6;
624 sopt.sopt_name = IPV6_TCLASS;
625 break;
626 default:
627 error = EINVAL;
628 goto out;
629 }
630
631 error = sosetoptlock(sock, &sopt, 0); /* already locked */
632 socket_unlock(sock, 1);
633 return (error);
634out:
635 socket_unlock(sock, 1);
636 return (error);
637}
638
639errno_t
640sock_gettclassopt(socket_t sock, void *optval, size_t *optlen)
641{
642 errno_t error = 0;
643 struct sockopt sopt;
644
645 if (sock == NULL || optval == NULL || optlen == NULL)
646 return (EINVAL);
647
648 sopt.sopt_dir = SOPT_GET;
649 sopt.sopt_val = CAST_USER_ADDR_T(optval);
650 sopt.sopt_valsize = *optlen;
651 sopt.sopt_p = kernproc;
652
653 socket_lock(sock, 1);
654 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) {
655 socket_unlock(sock, 1);
656 return (EINVAL);
657 }
658
659 switch (SOCK_DOM(sock)) {
660 case PF_INET:
661 sopt.sopt_level = IPPROTO_IP;
662 sopt.sopt_name = IP_TOS;
663 break;
664 case PF_INET6:
665 sopt.sopt_level = IPPROTO_IPV6;
666 sopt.sopt_name = IPV6_TCLASS;
667 break;
668 default:
669 socket_unlock(sock, 1);
670 return (EINVAL);
671
672 }
673 error = sogetoptlock(sock, &sopt, 0); /* already locked */
674 socket_unlock(sock, 1);
675 if (error == 0)
676 *optlen = sopt.sopt_valsize;
677 return (error);
678}
679
680errno_t
681sock_listen(socket_t sock, int backlog)
682{
683 if (sock == NULL)
684 return (EINVAL);
685
686 return (solisten(sock, backlog)); /* will lock socket */
687}
688
689errno_t
690sock_receive_internal(socket_t sock, struct msghdr *msg, mbuf_t *data,
691 int flags, size_t *recvdlen)
692{
693 uio_t auio;
694 struct mbuf *control = NULL;
695 int error = 0;
696 int length = 0;
697 struct sockaddr *fromsa = NULL;
698 char uio_buf[ UIO_SIZEOF((msg != NULL) ? msg->msg_iovlen : 0) ];
699
700 if (sock == NULL)
701 return (EINVAL);
702
703 auio = uio_createwithbuffer(((msg != NULL) ? msg->msg_iovlen : 0),
704 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof (uio_buf));
705 if (msg != NULL && data == NULL) {
706 int i;
707 struct iovec *tempp = msg->msg_iov;
708
709 for (i = 0; i < msg->msg_iovlen; i++) {
710 uio_addiov(auio,
711 CAST_USER_ADDR_T((tempp + i)->iov_base),
712 (tempp + i)->iov_len);
713 }
714 if (uio_resid(auio) < 0)
715 return (EINVAL);
716 } else if (recvdlen != NULL) {
717 uio_setresid(auio, (uio_resid(auio) + *recvdlen));
718 }
719 length = uio_resid(auio);
720
721 if (recvdlen != NULL)
722 *recvdlen = 0;
723
724 /* let pru_soreceive handle the socket locking */
725 error = sock->so_proto->pr_usrreqs->pru_soreceive(sock, &fromsa, auio,
726 data, (msg && msg->msg_control) ? &control : NULL, &flags);
727 if (error != 0)
728 goto cleanup;
729
730 if (recvdlen != NULL)
731 *recvdlen = length - uio_resid(auio);
732 if (msg != NULL) {
733 msg->msg_flags = flags;
734
735 if (msg->msg_name != NULL) {
736 int salen;
737 salen = msg->msg_namelen;
738 if (msg->msg_namelen > 0 && fromsa != NULL) {
739 salen = MIN(salen, fromsa->sa_len);
740 memcpy(msg->msg_name, fromsa,
741 msg->msg_namelen > fromsa->sa_len ?
742 fromsa->sa_len : msg->msg_namelen);
743 }
744 }
745
746 if (msg->msg_control != NULL) {
747 struct mbuf *m = control;
748 u_char *ctlbuf = msg->msg_control;
749 int clen = msg->msg_controllen;
750
751 msg->msg_controllen = 0;
752
753 while (m != NULL && clen > 0) {
754 unsigned int tocopy;
755
756 if (clen >= m->m_len) {
757 tocopy = m->m_len;
758 } else {
759 msg->msg_flags |= MSG_CTRUNC;
760 tocopy = clen;
761 }
762 memcpy(ctlbuf, mtod(m, caddr_t), tocopy);
763 ctlbuf += tocopy;
764 clen -= tocopy;
765 m = m->m_next;
766 }
767 msg->msg_controllen =
768 (uintptr_t)ctlbuf - (uintptr_t)msg->msg_control;
769 }
770 }
771
772cleanup:
773 if (control != NULL)
774 m_freem(control);
775 if (fromsa != NULL)
776 FREE(fromsa, M_SONAME);
777 return (error);
778}
779
780errno_t
781sock_receive(socket_t sock, struct msghdr *msg, int flags, size_t *recvdlen)
782{
783 if ((msg == NULL) || (msg->msg_iovlen < 1) ||
784 (msg->msg_iov[0].iov_len == 0) ||
785 (msg->msg_iov[0].iov_base == NULL))
786 return (EINVAL);
787
788 return (sock_receive_internal(sock, msg, NULL, flags, recvdlen));
789}
790
791errno_t
792sock_receivembuf(socket_t sock, struct msghdr *msg, mbuf_t *data, int flags,
793 size_t *recvlen)
794{
795 if (data == NULL || recvlen == 0 || *recvlen <= 0 || (msg != NULL &&
796 (msg->msg_iov != NULL || msg->msg_iovlen != 0)))
797 return (EINVAL);
798
799 return (sock_receive_internal(sock, msg, data, flags, recvlen));
800}
801
802errno_t
803sock_send_internal(socket_t sock, const struct msghdr *msg, mbuf_t data,
804 int flags, size_t *sentlen)
805{
806 uio_t auio = NULL;
807 struct mbuf *control = NULL;
808 int error = 0;
809 int datalen = 0;
810 char uio_buf[ UIO_SIZEOF((msg != NULL ? msg->msg_iovlen : 1)) ];
811
812 if (sock == NULL) {
813 error = EINVAL;
814 goto errorout;
815 }
816
817 if (data == NULL && msg != NULL) {
818 struct iovec *tempp = msg->msg_iov;
819
820 auio = uio_createwithbuffer(msg->msg_iovlen, 0,
821 UIO_SYSSPACE, UIO_WRITE, &uio_buf[0], sizeof (uio_buf));
822 if (tempp != NULL) {
823 int i;
824
825 for (i = 0; i < msg->msg_iovlen; i++) {
826 uio_addiov(auio,
827 CAST_USER_ADDR_T((tempp + i)->iov_base),
828 (tempp + i)->iov_len);
829 }
830
831 if (uio_resid(auio) < 0) {
832 error = EINVAL;
833 goto errorout;
834 }
835 }
836 }
837
838 if (sentlen != NULL)
839 *sentlen = 0;
840
841 if (auio != NULL)
842 datalen = uio_resid(auio);
843 else
844 datalen = data->m_pkthdr.len;
845
846 if (msg != NULL && msg->msg_control) {
847 if ((size_t)msg->msg_controllen < sizeof (struct cmsghdr)) {
848 error = EINVAL;
849 goto errorout;
850 }
851
852 if ((size_t)msg->msg_controllen > MLEN) {
853 error = EINVAL;
854 goto errorout;
855 }
856
857 control = m_get(M_NOWAIT, MT_CONTROL);
858 if (control == NULL) {
859 error = ENOMEM;
860 goto errorout;
861 }
862 memcpy(mtod(control, caddr_t), msg->msg_control,
863 msg->msg_controllen);
864 control->m_len = msg->msg_controllen;
865 }
866
867 error = sock->so_proto->pr_usrreqs->pru_sosend(sock, msg != NULL ?
868 (struct sockaddr *)msg->msg_name : NULL, auio, data,
869 control, flags);
870
871 /*
872 * Residual data is possible in the case of IO vectors but not
873 * in the mbuf case since the latter is treated as atomic send.
874 * If pru_sosend() consumed a portion of the iovecs data and
875 * the error returned is transient, treat it as success; this
876 * is consistent with sendit() behavior.
877 */
878 if (auio != NULL && uio_resid(auio) != datalen &&
879 (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
880 error = 0;
881
882 if (error == 0 && sentlen != NULL) {
883 if (auio != NULL)
884 *sentlen = datalen - uio_resid(auio);
885 else
886 *sentlen = datalen;
887 }
888
889 return (error);
890
891/*
892 * In cases where we detect an error before returning, we need to
893 * free the mbuf chain if there is one. sosend (and pru_sosend) will
894 * free the mbuf chain if they encounter an error.
895 */
896errorout:
897 if (control)
898 m_freem(control);
899 if (data)
900 m_freem(data);
901 if (sentlen)
902 *sentlen = 0;
903 return (error);
904}
905
906errno_t
907sock_send(socket_t sock, const struct msghdr *msg, int flags, size_t *sentlen)
908{
909 if (msg == NULL || msg->msg_iov == NULL || msg->msg_iovlen < 1)
910 return (EINVAL);
911
912 return (sock_send_internal(sock, msg, NULL, flags, sentlen));
913}
914
915errno_t
916sock_sendmbuf(socket_t sock, const struct msghdr *msg, mbuf_t data,
917 int flags, size_t *sentlen)
918{
919 if (data == NULL || (msg != NULL && (msg->msg_iov != NULL ||
920 msg->msg_iovlen != 0))) {
921 if (data != NULL)
922 m_freem(data);
923 return (EINVAL);
924 }
925 return (sock_send_internal(sock, msg, data, flags, sentlen));
926}
927
928errno_t
929sock_shutdown(socket_t sock, int how)
930{
931 if (sock == NULL)
932 return (EINVAL);
933
934 return (soshutdown(sock, how));
935}
936
937errno_t
938sock_socket_common(int domain, int type, int protocol, sock_upcall callback,
939 void *context, socket_t *new_so, bool is_internal)
940{
941 int error = 0;
942
943 if (new_so == NULL)
944 return (EINVAL);
945
946 /* socreate will create an initial so_count */
947 error = socreate(domain, new_so, type, protocol);
948 if (error == 0) {
949 /*
950 * This is an in-kernel socket
951 */
952 (*new_so)->so_flags1 |= SOF1_IN_KERNEL_SOCKET;
953 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_total);
954 if (is_internal) {
955 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_os_total);
956 }
957
958 /* see comments in sock_setupcall() */
959 if (callback != NULL) {
960 sock_setupcall(*new_so, callback, context);
961 }
962 /*
963 * last_pid and last_upid should be zero for sockets
964 * created using sock_socket
965 */
966 (*new_so)->last_pid = 0;
967 (*new_so)->last_upid = 0;
968 }
969 return (error);
970}
971
972errno_t
973sock_socket_internal(int domain, int type, int protocol, sock_upcall callback,
974 void *context, socket_t *new_so)
975{
976 return (sock_socket_common(domain, type, protocol, callback,
977 context, new_so, true));
978}
979
980errno_t
981sock_socket(int domain, int type, int protocol, sock_upcall callback,
982 void *context, socket_t *new_so)
983{
984 return (sock_socket_common(domain, type, protocol, callback,
985 context, new_so, false));
986}
987
988void
989sock_close(socket_t sock)
990{
991 if (sock == NULL)
992 return;
993
994 soclose(sock);
995}
996
997/* Do we want this to be APPLE_PRIVATE API?: YES (LD 12/23/04) */
998void
999sock_retain(socket_t sock)
1000{
1001 if (sock == NULL)
1002 return;
1003
1004 socket_lock(sock, 1);
1005 sock->so_retaincnt++;
1006 sock->so_usecount++; /* add extra reference for holding the socket */
1007 socket_unlock(sock, 1);
1008}
1009
1010/* Do we want this to be APPLE_PRIVATE API? */
1011void
1012sock_release(socket_t sock)
1013{
1014 if (sock == NULL)
1015 return;
1016
1017 socket_lock(sock, 1);
1018 if (sock->so_upcallusecount > 0)
1019 soclose_wait_locked(sock);
1020
1021 sock->so_retaincnt--;
1022 if (sock->so_retaincnt < 0) {
1023 panic("%s: negative retain count (%d) for sock=%p\n",
1024 __func__, sock->so_retaincnt, sock);
1025 /* NOTREACHED */
1026 }
1027 /*
1028 * Check SS_NOFDREF in case a close happened as sock_retain()
1029 * was grabbing the lock
1030 */
1031 if ((sock->so_retaincnt == 0) && (sock->so_usecount == 2) &&
1032 (!(sock->so_state & SS_NOFDREF) ||
1033 (sock->so_flags & SOF_MP_SUBFLOW))) {
1034 /* close socket only if the FD is not holding it */
1035 soclose_locked(sock);
1036 } else {
1037 /* remove extra reference holding the socket */
1038 VERIFY(sock->so_usecount > 1);
1039 sock->so_usecount--;
1040 }
1041 socket_unlock(sock, 1);
1042}
1043
1044errno_t
1045sock_setpriv(socket_t sock, int on)
1046{
1047 if (sock == NULL)
1048 return (EINVAL);
1049
1050 socket_lock(sock, 1);
1051 if (on)
1052 sock->so_state |= SS_PRIV;
1053 else
1054 sock->so_state &= ~SS_PRIV;
1055 socket_unlock(sock, 1);
1056 return (0);
1057}
1058
1059int
1060sock_isconnected(socket_t sock)
1061{
1062 int retval;
1063
1064 socket_lock(sock, 1);
1065 retval = ((sock->so_state & SS_ISCONNECTED) ? 1 : 0);
1066 socket_unlock(sock, 1);
1067 return (retval);
1068}
1069
1070int
1071sock_isnonblocking(socket_t sock)
1072{
1073 int retval;
1074
1075 socket_lock(sock, 1);
1076 retval = ((sock->so_state & SS_NBIO) ? 1 : 0);
1077 socket_unlock(sock, 1);
1078 return (retval);
1079}
1080
1081errno_t
1082sock_gettype(socket_t sock, int *outDomain, int *outType, int *outProtocol)
1083{
1084 socket_lock(sock, 1);
1085 if (outDomain != NULL)
1086 *outDomain = SOCK_DOM(sock);
1087 if (outType != NULL)
1088 *outType = sock->so_type;
1089 if (outProtocol != NULL)
1090 *outProtocol = SOCK_PROTO(sock);
1091 socket_unlock(sock, 1);
1092 return (0);
1093}
1094
1095/*
1096 * Return the listening socket of a pre-accepted socket. It returns the
1097 * listener (so_head) value of a given socket. This is intended to be
1098 * called by a socket filter during a filter attach (sf_attach) callback.
1099 * The value returned by this routine is safe to be used only in the
1100 * context of that callback, because we hold the listener's lock across
1101 * the sflt_initsock() call.
1102 */
1103socket_t
1104sock_getlistener(socket_t sock)
1105{
1106 return (sock->so_head);
1107}
1108
1109static inline void
1110sock_set_tcp_stream_priority(socket_t sock)
1111{
1112 if ((SOCK_DOM(sock) == PF_INET || SOCK_DOM(sock) == PF_INET6) &&
1113 SOCK_TYPE(sock) == SOCK_STREAM) {
1114 set_tcp_stream_priority(sock);
1115 }
1116}
1117
1118/*
1119 * Caller must have ensured socket is valid and won't be going away.
1120 */
1121void
1122socket_set_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
1123{
1124 u_int32_t soflags1 = 0;
1125
1126 if ((flags & TRAFFIC_MGT_SO_BACKGROUND))
1127 soflags1 |= SOF1_TRAFFIC_MGT_SO_BACKGROUND;
1128 if ((flags & TRAFFIC_MGT_TCP_RECVBG))
1129 soflags1 |= SOF1_TRAFFIC_MGT_TCP_RECVBG;
1130
1131 (void) OSBitOrAtomic(soflags1, &sock->so_flags1);
1132
1133 sock_set_tcp_stream_priority(sock);
1134}
1135
1136void
1137socket_set_traffic_mgt_flags(socket_t sock, u_int8_t flags)
1138{
1139 socket_lock(sock, 1);
1140 socket_set_traffic_mgt_flags_locked(sock, flags);
1141 socket_unlock(sock, 1);
1142}
1143
1144/*
1145 * Caller must have ensured socket is valid and won't be going away.
1146 */
1147void
1148socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
1149{
1150 u_int32_t soflags1 = 0;
1151
1152 if ((flags & TRAFFIC_MGT_SO_BACKGROUND))
1153 soflags1 |= SOF1_TRAFFIC_MGT_SO_BACKGROUND;
1154 if ((flags & TRAFFIC_MGT_TCP_RECVBG))
1155 soflags1 |= SOF1_TRAFFIC_MGT_TCP_RECVBG;
1156
1157 (void) OSBitAndAtomic(~soflags1, &sock->so_flags1);
1158
1159 sock_set_tcp_stream_priority(sock);
1160}
1161
1162void
1163socket_clear_traffic_mgt_flags(socket_t sock, u_int8_t flags)
1164{
1165 socket_lock(sock, 1);
1166 socket_clear_traffic_mgt_flags_locked(sock, flags);
1167 socket_unlock(sock, 1);
1168}
1169
1170
1171/*
1172 * Caller must have ensured socket is valid and won't be going away.
1173 */
1174errno_t
1175socket_defunct(struct proc *p, socket_t so, int level)
1176{
1177 errno_t retval;
1178
1179 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1180 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
1181 return (EINVAL);
1182
1183 socket_lock(so, 1);
1184 /*
1185 * SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC level is meant to tear down
1186 * all of mDNSResponder IPC sockets, currently those of AF_UNIX; note
1187 * that this is an implementation artifact of mDNSResponder. We do
1188 * a quick test against the socket buffers for SB_UNIX, since that
1189 * would have been set by unp_attach() at socket creation time.
1190 */
1191 if (level == SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1192 (so->so_rcv.sb_flags & so->so_snd.sb_flags & SB_UNIX) != SB_UNIX) {
1193 socket_unlock(so, 1);
1194 return (EOPNOTSUPP);
1195 }
1196 retval = sosetdefunct(p, so, level, TRUE);
1197 if (retval == 0)
1198 retval = sodefunct(p, so, level);
1199 socket_unlock(so, 1);
1200 return (retval);
1201}
1202
1203void
1204sock_setupcalls_locked(socket_t sock, sock_upcall rcallback, void *rcontext,
1205 sock_upcall wcallback, void *wcontext, int locked)
1206{
1207 if (rcallback != NULL) {
1208 sock->so_rcv.sb_flags |= SB_UPCALL;
1209 if (locked)
1210 sock->so_rcv.sb_flags |= SB_UPCALL_LOCK;
1211 sock->so_rcv.sb_upcall = rcallback;
1212 sock->so_rcv.sb_upcallarg = rcontext;
1213 } else {
1214 sock->so_rcv.sb_flags &= ~(SB_UPCALL | SB_UPCALL_LOCK);
1215 sock->so_rcv.sb_upcall = NULL;
1216 sock->so_rcv.sb_upcallarg = NULL;
1217 }
1218
1219 if (wcallback != NULL) {
1220 sock->so_snd.sb_flags |= SB_UPCALL;
1221 if (locked)
1222 sock->so_snd.sb_flags |= SB_UPCALL_LOCK;
1223 sock->so_snd.sb_upcall = wcallback;
1224 sock->so_snd.sb_upcallarg = wcontext;
1225 } else {
1226 sock->so_snd.sb_flags &= ~(SB_UPCALL | SB_UPCALL_LOCK);
1227 sock->so_snd.sb_upcall = NULL;
1228 sock->so_snd.sb_upcallarg = NULL;
1229 }
1230}
1231
1232errno_t
1233sock_setupcall(socket_t sock, sock_upcall callback, void *context)
1234{
1235 if (sock == NULL)
1236 return (EINVAL);
1237
1238 /*
1239 * Note that we don't wait for any in progress upcall to complete.
1240 * On embedded, sock_setupcall() causes both read and write
1241 * callbacks to be set; on desktop, only read callback is set
1242 * to maintain legacy KPI behavior.
1243 *
1244 * The newer sock_setupcalls() KPI should be used instead to set
1245 * the read and write callbacks and their respective parameters.
1246 */
1247 socket_lock(sock, 1);
1248#if CONFIG_EMBEDDED
1249 sock_setupcalls_locked(sock, callback, context, callback, context, 0);
1250#else
1251 sock_setupcalls_locked(sock, callback, context, NULL, NULL, 0);
1252#endif /* !CONFIG_EMBEDDED */
1253 socket_unlock(sock, 1);
1254
1255 return (0);
1256}
1257
1258errno_t
1259sock_setupcalls(socket_t sock, sock_upcall rcallback, void *rcontext,
1260 sock_upcall wcallback, void *wcontext)
1261{
1262 if (sock == NULL)
1263 return (EINVAL);
1264
1265 /*
1266 * Note that we don't wait for any in progress upcall to complete.
1267 */
1268 socket_lock(sock, 1);
1269 sock_setupcalls_locked(sock, rcallback, rcontext, wcallback, wcontext, 0);
1270 socket_unlock(sock, 1);
1271
1272 return (0);
1273}
1274
1275void
1276sock_catchevents_locked(socket_t sock, sock_evupcall ecallback, void *econtext,
1277 u_int32_t emask)
1278{
1279 socket_lock_assert_owned(sock);
1280
1281 /*
1282 * Note that we don't wait for any in progress upcall to complete.
1283 */
1284 if (ecallback != NULL) {
1285 sock->so_event = ecallback;
1286 sock->so_eventarg = econtext;
1287 sock->so_eventmask = emask;
1288 } else {
1289 sock->so_event = sonullevent;
1290 sock->so_eventarg = NULL;
1291 sock->so_eventmask = 0;
1292 }
1293}
1294
1295errno_t
1296sock_catchevents(socket_t sock, sock_evupcall ecallback, void *econtext,
1297 u_int32_t emask)
1298{
1299 if (sock == NULL)
1300 return (EINVAL);
1301
1302 socket_lock(sock, 1);
1303 sock_catchevents_locked(sock, ecallback, econtext, emask);
1304 socket_unlock(sock, 1);
1305
1306 return (0);
1307}
1308
1309/*
1310 * Returns true whether or not a socket belongs to the kernel.
1311 */
1312int
1313sock_iskernel(socket_t so)
1314{
1315 return (so && so->last_pid == 0);
1316}