]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kpi_socket.c
xnu-4903.231.4.tar.gz
[apple/xnu.git] / bsd / kern / kpi_socket.c
1 /*
2 * Copyright (c) 2003-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #define __KPI__
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/types.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <sys/param.h>
36 #include <sys/proc.h>
37 #include <sys/errno.h>
38 #include <sys/malloc.h>
39 #include <sys/protosw.h>
40 #include <sys/domain.h>
41 #include <sys/mbuf.h>
42 #include <sys/mcache.h>
43 #include <sys/fcntl.h>
44 #include <sys/filio.h>
45 #include <sys/uio_internal.h>
46 #include <kern/locks.h>
47 #include <net/net_api_stats.h>
48 #include <netinet/in.h>
49 #include <libkern/OSAtomic.h>
50 #include <stdbool.h>
51
52 static errno_t sock_send_internal(socket_t, const struct msghdr *,
53 mbuf_t, int, size_t *);
54
55 #undef sock_accept
56 #undef sock_socket
57 errno_t sock_accept(socket_t so, struct sockaddr *from, int fromlen,
58 int flags, sock_upcall callback, void *cookie, socket_t *new_so);
59 errno_t sock_socket(int domain, int type, int protocol, sock_upcall callback,
60 void *context, socket_t *new_so);
61
62 static errno_t sock_accept_common(socket_t sock, struct sockaddr *from,
63 int fromlen, int flags, sock_upcall callback, void *cookie,
64 socket_t *new_sock, bool is_internal);
65 static errno_t sock_socket_common(int domain, int type, int protocol,
66 sock_upcall callback, void *context, socket_t *new_so, bool is_internal);
67
68 errno_t
69 sock_accept_common(socket_t sock, struct sockaddr *from, int fromlen, int flags,
70 sock_upcall callback, void *cookie, socket_t *new_sock, bool is_internal)
71 {
72 struct sockaddr *sa;
73 struct socket *new_so;
74 lck_mtx_t *mutex_held;
75 int dosocklock;
76 errno_t error = 0;
77
78 if (sock == NULL || new_sock == NULL)
79 return (EINVAL);
80
81 socket_lock(sock, 1);
82 if ((sock->so_options & SO_ACCEPTCONN) == 0) {
83 socket_unlock(sock, 1);
84 return (EINVAL);
85 }
86 if ((flags & ~(MSG_DONTWAIT)) != 0) {
87 socket_unlock(sock, 1);
88 return (ENOTSUP);
89 }
90 check_again:
91 if (((flags & MSG_DONTWAIT) != 0 || (sock->so_state & SS_NBIO) != 0) &&
92 sock->so_comp.tqh_first == NULL) {
93 socket_unlock(sock, 1);
94 return (EWOULDBLOCK);
95 }
96
97 if (sock->so_proto->pr_getlock != NULL) {
98 mutex_held = (*sock->so_proto->pr_getlock)(sock, PR_F_WILLUNLOCK);
99 dosocklock = 1;
100 } else {
101 mutex_held = sock->so_proto->pr_domain->dom_mtx;
102 dosocklock = 0;
103 }
104
105 while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) {
106 if (sock->so_state & SS_CANTRCVMORE) {
107 sock->so_error = ECONNABORTED;
108 break;
109 }
110 error = msleep((caddr_t)&sock->so_timeo, mutex_held,
111 PSOCK | PCATCH, "sock_accept", NULL);
112 if (error != 0) {
113 socket_unlock(sock, 1);
114 return (error);
115 }
116 }
117 if (sock->so_error != 0) {
118 error = sock->so_error;
119 sock->so_error = 0;
120 socket_unlock(sock, 1);
121 return (error);
122 }
123
124 so_acquire_accept_list(sock, NULL);
125 if (TAILQ_EMPTY(&sock->so_comp)) {
126 so_release_accept_list(sock);
127 goto check_again;
128 }
129 new_so = TAILQ_FIRST(&sock->so_comp);
130 TAILQ_REMOVE(&sock->so_comp, new_so, so_list);
131 new_so->so_state &= ~SS_COMP;
132 new_so->so_head = NULL;
133 sock->so_qlen--;
134
135 so_release_accept_list(sock);
136
137 /*
138 * Count the accepted socket as an in-kernel socket
139 */
140 new_so->so_flags1 |= SOF1_IN_KERNEL_SOCKET;
141 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_total);
142 if (is_internal) {
143 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_os_total);
144 }
145
146 /*
147 * Pass the pre-accepted socket to any interested socket filter(s).
148 * Upon failure, the socket would have been closed by the callee.
149 */
150 if (new_so->so_filt != NULL) {
151 /*
152 * Temporarily drop the listening socket's lock before we
153 * hand off control over to the socket filter(s), but keep
154 * a reference so that it won't go away. We'll grab it
155 * again once we're done with the filter(s).
156 */
157 socket_unlock(sock, 0);
158 if ((error = soacceptfilter(new_so, sock)) != 0) {
159 /* Drop reference on listening socket */
160 sodereference(sock);
161 return (error);
162 }
163 socket_lock(sock, 0);
164 }
165
166 if (dosocklock) {
167 LCK_MTX_ASSERT(new_so->so_proto->pr_getlock(new_so, 0),
168 LCK_MTX_ASSERT_NOTOWNED);
169 socket_lock(new_so, 1);
170 }
171
172 (void) soacceptlock(new_so, &sa, 0);
173
174 socket_unlock(sock, 1); /* release the head */
175
176 /* see comments in sock_setupcall() */
177 if (callback != NULL) {
178 #if CONFIG_EMBEDDED
179 sock_setupcalls_locked(new_so, callback, cookie, callback, cookie, 0);
180 #else
181 sock_setupcalls_locked(new_so, callback, cookie, NULL, NULL, 0);
182 #endif /* !CONFIG_EMBEDDED */
183 }
184
185 if (sa != NULL && from != NULL) {
186 if (fromlen > sa->sa_len)
187 fromlen = sa->sa_len;
188 memcpy(from, sa, fromlen);
189 }
190 if (sa != NULL)
191 FREE(sa, M_SONAME);
192
193 /*
194 * If the socket has been marked as inactive by sosetdefunct(),
195 * disallow further operations on it.
196 */
197 if (new_so->so_flags & SOF_DEFUNCT) {
198 (void) sodefunct(current_proc(), new_so,
199 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
200 }
201 *new_sock = new_so;
202 if (dosocklock)
203 socket_unlock(new_so, 1);
204 return (error);
205 }
206
207 errno_t
208 sock_accept(socket_t sock, struct sockaddr *from, int fromlen, int flags,
209 sock_upcall callback, void *cookie, socket_t *new_sock)
210 {
211 return (sock_accept_common(sock, from, fromlen, flags,
212 callback, cookie, new_sock, false));
213 }
214
215 errno_t
216 sock_accept_internal(socket_t sock, struct sockaddr *from, int fromlen, int flags,
217 sock_upcall callback, void *cookie, socket_t *new_sock)
218 {
219 return (sock_accept_common(sock, from, fromlen, flags,
220 callback, cookie, new_sock, true));
221 }
222
223 errno_t
224 sock_bind(socket_t sock, const struct sockaddr *to)
225 {
226 int error = 0;
227 struct sockaddr *sa = NULL;
228 struct sockaddr_storage ss;
229 boolean_t want_free = TRUE;
230
231 if (sock == NULL || to == NULL)
232 return (EINVAL);
233
234 if (to->sa_len > sizeof (ss)) {
235 MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, M_WAITOK);
236 if (sa == NULL)
237 return (ENOBUFS);
238 } else {
239 sa = (struct sockaddr *)&ss;
240 want_free = FALSE;
241 }
242 memcpy(sa, to, to->sa_len);
243
244 error = sobindlock(sock, sa, 1); /* will lock socket */
245
246 if (sa != NULL && want_free == TRUE)
247 FREE(sa, M_SONAME);
248
249 return (error);
250 }
251
252 errno_t
253 sock_connect(socket_t sock, const struct sockaddr *to, int flags)
254 {
255 int error = 0;
256 lck_mtx_t *mutex_held;
257 struct sockaddr *sa = NULL;
258 struct sockaddr_storage ss;
259 boolean_t want_free = TRUE;
260
261 if (sock == NULL || to == NULL)
262 return (EINVAL);
263
264 if (to->sa_len > sizeof (ss)) {
265 MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME,
266 (flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK);
267 if (sa == NULL)
268 return (ENOBUFS);
269 } else {
270 sa = (struct sockaddr *)&ss;
271 want_free = FALSE;
272 }
273 memcpy(sa, to, to->sa_len);
274
275 socket_lock(sock, 1);
276
277 if ((sock->so_state & SS_ISCONNECTING) &&
278 ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) {
279 error = EALREADY;
280 goto out;
281 }
282 error = soconnectlock(sock, sa, 0);
283 if (!error) {
284 if ((sock->so_state & SS_ISCONNECTING) &&
285 ((sock->so_state & SS_NBIO) != 0 ||
286 (flags & MSG_DONTWAIT) != 0)) {
287 error = EINPROGRESS;
288 goto out;
289 }
290
291 if (sock->so_proto->pr_getlock != NULL)
292 mutex_held = (*sock->so_proto->pr_getlock)(sock, PR_F_WILLUNLOCK);
293 else
294 mutex_held = sock->so_proto->pr_domain->dom_mtx;
295
296 while ((sock->so_state & SS_ISCONNECTING) &&
297 sock->so_error == 0) {
298 error = msleep((caddr_t)&sock->so_timeo,
299 mutex_held, PSOCK | PCATCH, "sock_connect", NULL);
300 if (error != 0)
301 break;
302 }
303
304 if (error == 0) {
305 error = sock->so_error;
306 sock->so_error = 0;
307 }
308 } else {
309 sock->so_state &= ~SS_ISCONNECTING;
310 }
311 out:
312 socket_unlock(sock, 1);
313
314 if (sa != NULL && want_free == TRUE)
315 FREE(sa, M_SONAME);
316
317 return (error);
318 }
319
320 errno_t
321 sock_connectwait(socket_t sock, const struct timeval *tv)
322 {
323 lck_mtx_t *mutex_held;
324 errno_t retval = 0;
325 struct timespec ts;
326
327 socket_lock(sock, 1);
328
329 /* Check if we're already connected or if we've already errored out */
330 if ((sock->so_state & SS_ISCONNECTING) == 0 || sock->so_error != 0) {
331 if (sock->so_error != 0) {
332 retval = sock->so_error;
333 sock->so_error = 0;
334 } else {
335 if ((sock->so_state & SS_ISCONNECTED) != 0)
336 retval = 0;
337 else
338 retval = EINVAL;
339 }
340 goto done;
341 }
342
343 /* copied translation from timeval to hertz from SO_RCVTIMEO handling */
344 if (tv->tv_sec < 0 || tv->tv_sec > SHRT_MAX / hz ||
345 tv->tv_usec < 0 || tv->tv_usec >= 1000000) {
346 retval = EDOM;
347 goto done;
348 }
349
350 ts.tv_sec = tv->tv_sec;
351 ts.tv_nsec = (tv->tv_usec * (integer_t)NSEC_PER_USEC);
352 if ((ts.tv_sec + (ts.tv_nsec/(long)NSEC_PER_SEC))/100 > SHRT_MAX) {
353 retval = EDOM;
354 goto done;
355 }
356
357 if (sock->so_proto->pr_getlock != NULL)
358 mutex_held = (*sock->so_proto->pr_getlock)(sock, PR_F_WILLUNLOCK);
359 else
360 mutex_held = sock->so_proto->pr_domain->dom_mtx;
361
362 msleep((caddr_t)&sock->so_timeo, mutex_held,
363 PSOCK, "sock_connectwait", &ts);
364
365 /* Check if we're still waiting to connect */
366 if ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) {
367 retval = EINPROGRESS;
368 goto done;
369 }
370
371 if (sock->so_error != 0) {
372 retval = sock->so_error;
373 sock->so_error = 0;
374 }
375
376 done:
377 socket_unlock(sock, 1);
378 return (retval);
379 }
380
381 errno_t
382 sock_nointerrupt(socket_t sock, int on)
383 {
384 socket_lock(sock, 1);
385
386 if (on) {
387 sock->so_rcv.sb_flags |= SB_NOINTR; /* This isn't safe */
388 sock->so_snd.sb_flags |= SB_NOINTR; /* This isn't safe */
389 } else {
390 sock->so_rcv.sb_flags &= ~SB_NOINTR; /* This isn't safe */
391 sock->so_snd.sb_flags &= ~SB_NOINTR; /* This isn't safe */
392 }
393
394 socket_unlock(sock, 1);
395
396 return (0);
397 }
398
399 errno_t
400 sock_getpeername(socket_t sock, struct sockaddr *peername, int peernamelen)
401 {
402 int error;
403 struct sockaddr *sa = NULL;
404
405 if (sock == NULL || peername == NULL || peernamelen < 0)
406 return (EINVAL);
407
408 socket_lock(sock, 1);
409 if (!(sock->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING))) {
410 socket_unlock(sock, 1);
411 return (ENOTCONN);
412 }
413 error = sogetaddr_locked(sock, &sa, 1);
414 socket_unlock(sock, 1);
415 if (error == 0) {
416 if (peernamelen > sa->sa_len)
417 peernamelen = sa->sa_len;
418 memcpy(peername, sa, peernamelen);
419 FREE(sa, M_SONAME);
420 }
421 return (error);
422 }
423
424 errno_t
425 sock_getsockname(socket_t sock, struct sockaddr *sockname, int socknamelen)
426 {
427 int error;
428 struct sockaddr *sa = NULL;
429
430 if (sock == NULL || sockname == NULL || socknamelen < 0)
431 return (EINVAL);
432
433 socket_lock(sock, 1);
434 error = sogetaddr_locked(sock, &sa, 0);
435 socket_unlock(sock, 1);
436 if (error == 0) {
437 if (socknamelen > sa->sa_len)
438 socknamelen = sa->sa_len;
439 memcpy(sockname, sa, socknamelen);
440 FREE(sa, M_SONAME);
441 }
442 return (error);
443 }
444
445 __private_extern__ int
446 sogetaddr_locked(struct socket *so, struct sockaddr **psa, int peer)
447 {
448 int error;
449
450 if (so == NULL || psa == NULL)
451 return (EINVAL);
452
453 *psa = NULL;
454 error = peer ? so->so_proto->pr_usrreqs->pru_peeraddr(so, psa) :
455 so->so_proto->pr_usrreqs->pru_sockaddr(so, psa);
456
457 if (error == 0 && *psa == NULL) {
458 error = ENOMEM;
459 } else if (error != 0 && *psa != NULL) {
460 FREE(*psa, M_SONAME);
461 *psa = NULL;
462 }
463 return (error);
464 }
465
466 errno_t
467 sock_getaddr(socket_t sock, struct sockaddr **psa, int peer)
468 {
469 int error;
470
471 if (sock == NULL || psa == NULL)
472 return (EINVAL);
473
474 socket_lock(sock, 1);
475 error = sogetaddr_locked(sock, psa, peer);
476 socket_unlock(sock, 1);
477
478 return (error);
479 }
480
481 void
482 sock_freeaddr(struct sockaddr *sa)
483 {
484 if (sa != NULL)
485 FREE(sa, M_SONAME);
486 }
487
488 errno_t
489 sock_getsockopt(socket_t sock, int level, int optname, void *optval,
490 int *optlen)
491 {
492 int error = 0;
493 struct sockopt sopt;
494
495 if (sock == NULL || optval == NULL || optlen == NULL)
496 return (EINVAL);
497
498 sopt.sopt_dir = SOPT_GET;
499 sopt.sopt_level = level;
500 sopt.sopt_name = optname;
501 sopt.sopt_val = CAST_USER_ADDR_T(optval);
502 sopt.sopt_valsize = *optlen;
503 sopt.sopt_p = kernproc;
504 error = sogetoptlock(sock, &sopt, 1); /* will lock socket */
505 if (error == 0)
506 *optlen = sopt.sopt_valsize;
507 return (error);
508 }
509
510 errno_t
511 sock_ioctl(socket_t sock, unsigned long request, void *argp)
512 {
513 return (soioctl(sock, request, argp, kernproc)); /* will lock socket */
514 }
515
516 errno_t
517 sock_setsockopt(socket_t sock, int level, int optname, const void *optval,
518 int optlen)
519 {
520 struct sockopt sopt;
521
522 if (sock == NULL || optval == NULL)
523 return (EINVAL);
524
525 sopt.sopt_dir = SOPT_SET;
526 sopt.sopt_level = level;
527 sopt.sopt_name = optname;
528 sopt.sopt_val = CAST_USER_ADDR_T(optval);
529 sopt.sopt_valsize = optlen;
530 sopt.sopt_p = kernproc;
531 return (sosetoptlock(sock, &sopt, 1)); /* will lock socket */
532 }
533
534 /*
535 * This follows the recommended mappings between DSCP code points
536 * and WMM access classes.
537 */
538 static u_int32_t so_tc_from_dscp(u_int8_t dscp);
539 static u_int32_t
540 so_tc_from_dscp(u_int8_t dscp)
541 {
542 u_int32_t tc;
543
544 if (dscp >= 0x30 && dscp <= 0x3f)
545 tc = SO_TC_VO;
546 else if (dscp >= 0x20 && dscp <= 0x2f)
547 tc = SO_TC_VI;
548 else if (dscp >= 0x08 && dscp <= 0x17)
549 tc = SO_TC_BK_SYS;
550 else
551 tc = SO_TC_BE;
552
553 return (tc);
554 }
555
556 errno_t
557 sock_settclassopt(socket_t sock, const void *optval, size_t optlen)
558 {
559 errno_t error = 0;
560 struct sockopt sopt;
561 int sotc;
562
563 if (sock == NULL || optval == NULL || optlen != sizeof (int))
564 return (EINVAL);
565
566 socket_lock(sock, 1);
567 if (!(sock->so_state & SS_ISCONNECTED)) {
568 /*
569 * If the socket is not connected then we don't know
570 * if the destination is on LAN or not. Skip
571 * setting traffic class in this case
572 */
573 error = ENOTCONN;
574 goto out;
575 }
576
577 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL ||
578 sock->so_pcb == NULL) {
579 error = EINVAL;
580 goto out;
581 }
582
583 /*
584 * Set the socket traffic class based on the passed DSCP code point
585 * regardless of the scope of the destination
586 */
587 sotc = so_tc_from_dscp((*(const int *)optval) >> 2);
588
589 sopt.sopt_dir = SOPT_SET;
590 sopt.sopt_val = CAST_USER_ADDR_T(&sotc);
591 sopt.sopt_valsize = sizeof (sotc);
592 sopt.sopt_p = kernproc;
593 sopt.sopt_level = SOL_SOCKET;
594 sopt.sopt_name = SO_TRAFFIC_CLASS;
595
596 error = sosetoptlock(sock, &sopt, 0); /* already locked */
597
598 if (error != 0) {
599 printf("%s: sosetopt SO_TRAFFIC_CLASS failed %d\n",
600 __func__, error);
601 goto out;
602 }
603
604 /*
605 * Check if the destination address is LAN or link local address.
606 * We do not want to set traffic class bits if the destination
607 * is not local.
608 */
609 if (!so_isdstlocal(sock))
610 goto out;
611
612 sopt.sopt_dir = SOPT_SET;
613 sopt.sopt_val = CAST_USER_ADDR_T(optval);
614 sopt.sopt_valsize = optlen;
615 sopt.sopt_p = kernproc;
616
617 switch (SOCK_DOM(sock)) {
618 case PF_INET:
619 sopt.sopt_level = IPPROTO_IP;
620 sopt.sopt_name = IP_TOS;
621 break;
622 case PF_INET6:
623 sopt.sopt_level = IPPROTO_IPV6;
624 sopt.sopt_name = IPV6_TCLASS;
625 break;
626 default:
627 error = EINVAL;
628 goto out;
629 }
630
631 error = sosetoptlock(sock, &sopt, 0); /* already locked */
632 socket_unlock(sock, 1);
633 return (error);
634 out:
635 socket_unlock(sock, 1);
636 return (error);
637 }
638
639 errno_t
640 sock_gettclassopt(socket_t sock, void *optval, size_t *optlen)
641 {
642 errno_t error = 0;
643 struct sockopt sopt;
644
645 if (sock == NULL || optval == NULL || optlen == NULL)
646 return (EINVAL);
647
648 sopt.sopt_dir = SOPT_GET;
649 sopt.sopt_val = CAST_USER_ADDR_T(optval);
650 sopt.sopt_valsize = *optlen;
651 sopt.sopt_p = kernproc;
652
653 socket_lock(sock, 1);
654 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) {
655 socket_unlock(sock, 1);
656 return (EINVAL);
657 }
658
659 switch (SOCK_DOM(sock)) {
660 case PF_INET:
661 sopt.sopt_level = IPPROTO_IP;
662 sopt.sopt_name = IP_TOS;
663 break;
664 case PF_INET6:
665 sopt.sopt_level = IPPROTO_IPV6;
666 sopt.sopt_name = IPV6_TCLASS;
667 break;
668 default:
669 socket_unlock(sock, 1);
670 return (EINVAL);
671
672 }
673 error = sogetoptlock(sock, &sopt, 0); /* already locked */
674 socket_unlock(sock, 1);
675 if (error == 0)
676 *optlen = sopt.sopt_valsize;
677 return (error);
678 }
679
680 errno_t
681 sock_listen(socket_t sock, int backlog)
682 {
683 if (sock == NULL)
684 return (EINVAL);
685
686 return (solisten(sock, backlog)); /* will lock socket */
687 }
688
689 errno_t
690 sock_receive_internal(socket_t sock, struct msghdr *msg, mbuf_t *data,
691 int flags, size_t *recvdlen)
692 {
693 uio_t auio;
694 struct mbuf *control = NULL;
695 int error = 0;
696 int length = 0;
697 struct sockaddr *fromsa = NULL;
698 char uio_buf[ UIO_SIZEOF((msg != NULL) ? msg->msg_iovlen : 0) ];
699
700 if (sock == NULL)
701 return (EINVAL);
702
703 auio = uio_createwithbuffer(((msg != NULL) ? msg->msg_iovlen : 0),
704 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof (uio_buf));
705 if (msg != NULL && data == NULL) {
706 int i;
707 struct iovec *tempp = msg->msg_iov;
708
709 for (i = 0; i < msg->msg_iovlen; i++) {
710 uio_addiov(auio,
711 CAST_USER_ADDR_T((tempp + i)->iov_base),
712 (tempp + i)->iov_len);
713 }
714 if (uio_resid(auio) < 0)
715 return (EINVAL);
716 } else if (recvdlen != NULL) {
717 uio_setresid(auio, (uio_resid(auio) + *recvdlen));
718 }
719 length = uio_resid(auio);
720
721 if (recvdlen != NULL)
722 *recvdlen = 0;
723
724 /* let pru_soreceive handle the socket locking */
725 error = sock->so_proto->pr_usrreqs->pru_soreceive(sock, &fromsa, auio,
726 data, (msg && msg->msg_control) ? &control : NULL, &flags);
727 if (error != 0)
728 goto cleanup;
729
730 if (recvdlen != NULL)
731 *recvdlen = length - uio_resid(auio);
732 if (msg != NULL) {
733 msg->msg_flags = flags;
734
735 if (msg->msg_name != NULL) {
736 int salen;
737 salen = msg->msg_namelen;
738 if (msg->msg_namelen > 0 && fromsa != NULL) {
739 salen = MIN(salen, fromsa->sa_len);
740 memcpy(msg->msg_name, fromsa,
741 msg->msg_namelen > fromsa->sa_len ?
742 fromsa->sa_len : msg->msg_namelen);
743 }
744 }
745
746 if (msg->msg_control != NULL) {
747 struct mbuf *m = control;
748 u_char *ctlbuf = msg->msg_control;
749 int clen = msg->msg_controllen;
750
751 msg->msg_controllen = 0;
752
753 while (m != NULL && clen > 0) {
754 unsigned int tocopy;
755
756 if (clen >= m->m_len) {
757 tocopy = m->m_len;
758 } else {
759 msg->msg_flags |= MSG_CTRUNC;
760 tocopy = clen;
761 }
762 memcpy(ctlbuf, mtod(m, caddr_t), tocopy);
763 ctlbuf += tocopy;
764 clen -= tocopy;
765 m = m->m_next;
766 }
767 msg->msg_controllen =
768 (uintptr_t)ctlbuf - (uintptr_t)msg->msg_control;
769 }
770 }
771
772 cleanup:
773 if (control != NULL)
774 m_freem(control);
775 if (fromsa != NULL)
776 FREE(fromsa, M_SONAME);
777 return (error);
778 }
779
780 errno_t
781 sock_receive(socket_t sock, struct msghdr *msg, int flags, size_t *recvdlen)
782 {
783 if ((msg == NULL) || (msg->msg_iovlen < 1) ||
784 (msg->msg_iov[0].iov_len == 0) ||
785 (msg->msg_iov[0].iov_base == NULL))
786 return (EINVAL);
787
788 return (sock_receive_internal(sock, msg, NULL, flags, recvdlen));
789 }
790
791 errno_t
792 sock_receivembuf(socket_t sock, struct msghdr *msg, mbuf_t *data, int flags,
793 size_t *recvlen)
794 {
795 if (data == NULL || recvlen == 0 || *recvlen <= 0 || (msg != NULL &&
796 (msg->msg_iov != NULL || msg->msg_iovlen != 0)))
797 return (EINVAL);
798
799 return (sock_receive_internal(sock, msg, data, flags, recvlen));
800 }
801
802 errno_t
803 sock_send_internal(socket_t sock, const struct msghdr *msg, mbuf_t data,
804 int flags, size_t *sentlen)
805 {
806 uio_t auio = NULL;
807 struct mbuf *control = NULL;
808 int error = 0;
809 int datalen = 0;
810 char uio_buf[ UIO_SIZEOF((msg != NULL ? msg->msg_iovlen : 1)) ];
811
812 if (sock == NULL) {
813 error = EINVAL;
814 goto errorout;
815 }
816
817 if (data == NULL && msg != NULL) {
818 struct iovec *tempp = msg->msg_iov;
819
820 auio = uio_createwithbuffer(msg->msg_iovlen, 0,
821 UIO_SYSSPACE, UIO_WRITE, &uio_buf[0], sizeof (uio_buf));
822 if (tempp != NULL) {
823 int i;
824
825 for (i = 0; i < msg->msg_iovlen; i++) {
826 uio_addiov(auio,
827 CAST_USER_ADDR_T((tempp + i)->iov_base),
828 (tempp + i)->iov_len);
829 }
830
831 if (uio_resid(auio) < 0) {
832 error = EINVAL;
833 goto errorout;
834 }
835 }
836 }
837
838 if (sentlen != NULL)
839 *sentlen = 0;
840
841 if (auio != NULL)
842 datalen = uio_resid(auio);
843 else
844 datalen = data->m_pkthdr.len;
845
846 if (msg != NULL && msg->msg_control) {
847 if ((size_t)msg->msg_controllen < sizeof (struct cmsghdr)) {
848 error = EINVAL;
849 goto errorout;
850 }
851
852 if ((size_t)msg->msg_controllen > MLEN) {
853 error = EINVAL;
854 goto errorout;
855 }
856
857 control = m_get(M_NOWAIT, MT_CONTROL);
858 if (control == NULL) {
859 error = ENOMEM;
860 goto errorout;
861 }
862 memcpy(mtod(control, caddr_t), msg->msg_control,
863 msg->msg_controllen);
864 control->m_len = msg->msg_controllen;
865 }
866
867 error = sock->so_proto->pr_usrreqs->pru_sosend(sock, msg != NULL ?
868 (struct sockaddr *)msg->msg_name : NULL, auio, data,
869 control, flags);
870
871 /*
872 * Residual data is possible in the case of IO vectors but not
873 * in the mbuf case since the latter is treated as atomic send.
874 * If pru_sosend() consumed a portion of the iovecs data and
875 * the error returned is transient, treat it as success; this
876 * is consistent with sendit() behavior.
877 */
878 if (auio != NULL && uio_resid(auio) != datalen &&
879 (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
880 error = 0;
881
882 if (error == 0 && sentlen != NULL) {
883 if (auio != NULL)
884 *sentlen = datalen - uio_resid(auio);
885 else
886 *sentlen = datalen;
887 }
888
889 return (error);
890
891 /*
892 * In cases where we detect an error before returning, we need to
893 * free the mbuf chain if there is one. sosend (and pru_sosend) will
894 * free the mbuf chain if they encounter an error.
895 */
896 errorout:
897 if (control)
898 m_freem(control);
899 if (data)
900 m_freem(data);
901 if (sentlen)
902 *sentlen = 0;
903 return (error);
904 }
905
906 errno_t
907 sock_send(socket_t sock, const struct msghdr *msg, int flags, size_t *sentlen)
908 {
909 if (msg == NULL || msg->msg_iov == NULL || msg->msg_iovlen < 1)
910 return (EINVAL);
911
912 return (sock_send_internal(sock, msg, NULL, flags, sentlen));
913 }
914
915 errno_t
916 sock_sendmbuf(socket_t sock, const struct msghdr *msg, mbuf_t data,
917 int flags, size_t *sentlen)
918 {
919 if (data == NULL || (msg != NULL && (msg->msg_iov != NULL ||
920 msg->msg_iovlen != 0))) {
921 if (data != NULL)
922 m_freem(data);
923 return (EINVAL);
924 }
925 return (sock_send_internal(sock, msg, data, flags, sentlen));
926 }
927
928 errno_t
929 sock_shutdown(socket_t sock, int how)
930 {
931 if (sock == NULL)
932 return (EINVAL);
933
934 return (soshutdown(sock, how));
935 }
936
937 errno_t
938 sock_socket_common(int domain, int type, int protocol, sock_upcall callback,
939 void *context, socket_t *new_so, bool is_internal)
940 {
941 int error = 0;
942
943 if (new_so == NULL)
944 return (EINVAL);
945
946 /* socreate will create an initial so_count */
947 error = socreate(domain, new_so, type, protocol);
948 if (error == 0) {
949 /*
950 * This is an in-kernel socket
951 */
952 (*new_so)->so_flags1 |= SOF1_IN_KERNEL_SOCKET;
953 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_total);
954 if (is_internal) {
955 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_in_kernel_os_total);
956 }
957
958 /* see comments in sock_setupcall() */
959 if (callback != NULL) {
960 sock_setupcall(*new_so, callback, context);
961 }
962 /*
963 * last_pid and last_upid should be zero for sockets
964 * created using sock_socket
965 */
966 (*new_so)->last_pid = 0;
967 (*new_so)->last_upid = 0;
968 }
969 return (error);
970 }
971
972 errno_t
973 sock_socket_internal(int domain, int type, int protocol, sock_upcall callback,
974 void *context, socket_t *new_so)
975 {
976 return (sock_socket_common(domain, type, protocol, callback,
977 context, new_so, true));
978 }
979
980 errno_t
981 sock_socket(int domain, int type, int protocol, sock_upcall callback,
982 void *context, socket_t *new_so)
983 {
984 return (sock_socket_common(domain, type, protocol, callback,
985 context, new_so, false));
986 }
987
988 void
989 sock_close(socket_t sock)
990 {
991 if (sock == NULL)
992 return;
993
994 soclose(sock);
995 }
996
997 /* Do we want this to be APPLE_PRIVATE API?: YES (LD 12/23/04) */
998 void
999 sock_retain(socket_t sock)
1000 {
1001 if (sock == NULL)
1002 return;
1003
1004 socket_lock(sock, 1);
1005 sock->so_retaincnt++;
1006 sock->so_usecount++; /* add extra reference for holding the socket */
1007 socket_unlock(sock, 1);
1008 }
1009
1010 /* Do we want this to be APPLE_PRIVATE API? */
1011 void
1012 sock_release(socket_t sock)
1013 {
1014 if (sock == NULL)
1015 return;
1016
1017 socket_lock(sock, 1);
1018 if (sock->so_upcallusecount > 0)
1019 soclose_wait_locked(sock);
1020
1021 sock->so_retaincnt--;
1022 if (sock->so_retaincnt < 0) {
1023 panic("%s: negative retain count (%d) for sock=%p\n",
1024 __func__, sock->so_retaincnt, sock);
1025 /* NOTREACHED */
1026 }
1027 /*
1028 * Check SS_NOFDREF in case a close happened as sock_retain()
1029 * was grabbing the lock
1030 */
1031 if ((sock->so_retaincnt == 0) && (sock->so_usecount == 2) &&
1032 (!(sock->so_state & SS_NOFDREF) ||
1033 (sock->so_flags & SOF_MP_SUBFLOW))) {
1034 /* close socket only if the FD is not holding it */
1035 soclose_locked(sock);
1036 } else {
1037 /* remove extra reference holding the socket */
1038 VERIFY(sock->so_usecount > 1);
1039 sock->so_usecount--;
1040 }
1041 socket_unlock(sock, 1);
1042 }
1043
1044 errno_t
1045 sock_setpriv(socket_t sock, int on)
1046 {
1047 if (sock == NULL)
1048 return (EINVAL);
1049
1050 socket_lock(sock, 1);
1051 if (on)
1052 sock->so_state |= SS_PRIV;
1053 else
1054 sock->so_state &= ~SS_PRIV;
1055 socket_unlock(sock, 1);
1056 return (0);
1057 }
1058
1059 int
1060 sock_isconnected(socket_t sock)
1061 {
1062 int retval;
1063
1064 socket_lock(sock, 1);
1065 retval = ((sock->so_state & SS_ISCONNECTED) ? 1 : 0);
1066 socket_unlock(sock, 1);
1067 return (retval);
1068 }
1069
1070 int
1071 sock_isnonblocking(socket_t sock)
1072 {
1073 int retval;
1074
1075 socket_lock(sock, 1);
1076 retval = ((sock->so_state & SS_NBIO) ? 1 : 0);
1077 socket_unlock(sock, 1);
1078 return (retval);
1079 }
1080
1081 errno_t
1082 sock_gettype(socket_t sock, int *outDomain, int *outType, int *outProtocol)
1083 {
1084 socket_lock(sock, 1);
1085 if (outDomain != NULL)
1086 *outDomain = SOCK_DOM(sock);
1087 if (outType != NULL)
1088 *outType = sock->so_type;
1089 if (outProtocol != NULL)
1090 *outProtocol = SOCK_PROTO(sock);
1091 socket_unlock(sock, 1);
1092 return (0);
1093 }
1094
1095 /*
1096 * Return the listening socket of a pre-accepted socket. It returns the
1097 * listener (so_head) value of a given socket. This is intended to be
1098 * called by a socket filter during a filter attach (sf_attach) callback.
1099 * The value returned by this routine is safe to be used only in the
1100 * context of that callback, because we hold the listener's lock across
1101 * the sflt_initsock() call.
1102 */
1103 socket_t
1104 sock_getlistener(socket_t sock)
1105 {
1106 return (sock->so_head);
1107 }
1108
1109 static inline void
1110 sock_set_tcp_stream_priority(socket_t sock)
1111 {
1112 if ((SOCK_DOM(sock) == PF_INET || SOCK_DOM(sock) == PF_INET6) &&
1113 SOCK_TYPE(sock) == SOCK_STREAM) {
1114 set_tcp_stream_priority(sock);
1115 }
1116 }
1117
1118 /*
1119 * Caller must have ensured socket is valid and won't be going away.
1120 */
1121 void
1122 socket_set_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
1123 {
1124 u_int32_t soflags1 = 0;
1125
1126 if ((flags & TRAFFIC_MGT_SO_BACKGROUND))
1127 soflags1 |= SOF1_TRAFFIC_MGT_SO_BACKGROUND;
1128 if ((flags & TRAFFIC_MGT_TCP_RECVBG))
1129 soflags1 |= SOF1_TRAFFIC_MGT_TCP_RECVBG;
1130
1131 (void) OSBitOrAtomic(soflags1, &sock->so_flags1);
1132
1133 sock_set_tcp_stream_priority(sock);
1134 }
1135
1136 void
1137 socket_set_traffic_mgt_flags(socket_t sock, u_int8_t flags)
1138 {
1139 socket_lock(sock, 1);
1140 socket_set_traffic_mgt_flags_locked(sock, flags);
1141 socket_unlock(sock, 1);
1142 }
1143
1144 /*
1145 * Caller must have ensured socket is valid and won't be going away.
1146 */
1147 void
1148 socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
1149 {
1150 u_int32_t soflags1 = 0;
1151
1152 if ((flags & TRAFFIC_MGT_SO_BACKGROUND))
1153 soflags1 |= SOF1_TRAFFIC_MGT_SO_BACKGROUND;
1154 if ((flags & TRAFFIC_MGT_TCP_RECVBG))
1155 soflags1 |= SOF1_TRAFFIC_MGT_TCP_RECVBG;
1156
1157 (void) OSBitAndAtomic(~soflags1, &sock->so_flags1);
1158
1159 sock_set_tcp_stream_priority(sock);
1160 }
1161
1162 void
1163 socket_clear_traffic_mgt_flags(socket_t sock, u_int8_t flags)
1164 {
1165 socket_lock(sock, 1);
1166 socket_clear_traffic_mgt_flags_locked(sock, flags);
1167 socket_unlock(sock, 1);
1168 }
1169
1170
1171 /*
1172 * Caller must have ensured socket is valid and won't be going away.
1173 */
1174 errno_t
1175 socket_defunct(struct proc *p, socket_t so, int level)
1176 {
1177 errno_t retval;
1178
1179 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1180 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
1181 return (EINVAL);
1182
1183 socket_lock(so, 1);
1184 /*
1185 * SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC level is meant to tear down
1186 * all of mDNSResponder IPC sockets, currently those of AF_UNIX; note
1187 * that this is an implementation artifact of mDNSResponder. We do
1188 * a quick test against the socket buffers for SB_UNIX, since that
1189 * would have been set by unp_attach() at socket creation time.
1190 */
1191 if (level == SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1192 (so->so_rcv.sb_flags & so->so_snd.sb_flags & SB_UNIX) != SB_UNIX) {
1193 socket_unlock(so, 1);
1194 return (EOPNOTSUPP);
1195 }
1196 retval = sosetdefunct(p, so, level, TRUE);
1197 if (retval == 0)
1198 retval = sodefunct(p, so, level);
1199 socket_unlock(so, 1);
1200 return (retval);
1201 }
1202
1203 void
1204 sock_setupcalls_locked(socket_t sock, sock_upcall rcallback, void *rcontext,
1205 sock_upcall wcallback, void *wcontext, int locked)
1206 {
1207 if (rcallback != NULL) {
1208 sock->so_rcv.sb_flags |= SB_UPCALL;
1209 if (locked)
1210 sock->so_rcv.sb_flags |= SB_UPCALL_LOCK;
1211 sock->so_rcv.sb_upcall = rcallback;
1212 sock->so_rcv.sb_upcallarg = rcontext;
1213 } else {
1214 sock->so_rcv.sb_flags &= ~(SB_UPCALL | SB_UPCALL_LOCK);
1215 sock->so_rcv.sb_upcall = NULL;
1216 sock->so_rcv.sb_upcallarg = NULL;
1217 }
1218
1219 if (wcallback != NULL) {
1220 sock->so_snd.sb_flags |= SB_UPCALL;
1221 if (locked)
1222 sock->so_snd.sb_flags |= SB_UPCALL_LOCK;
1223 sock->so_snd.sb_upcall = wcallback;
1224 sock->so_snd.sb_upcallarg = wcontext;
1225 } else {
1226 sock->so_snd.sb_flags &= ~(SB_UPCALL | SB_UPCALL_LOCK);
1227 sock->so_snd.sb_upcall = NULL;
1228 sock->so_snd.sb_upcallarg = NULL;
1229 }
1230 }
1231
1232 errno_t
1233 sock_setupcall(socket_t sock, sock_upcall callback, void *context)
1234 {
1235 if (sock == NULL)
1236 return (EINVAL);
1237
1238 /*
1239 * Note that we don't wait for any in progress upcall to complete.
1240 * On embedded, sock_setupcall() causes both read and write
1241 * callbacks to be set; on desktop, only read callback is set
1242 * to maintain legacy KPI behavior.
1243 *
1244 * The newer sock_setupcalls() KPI should be used instead to set
1245 * the read and write callbacks and their respective parameters.
1246 */
1247 socket_lock(sock, 1);
1248 #if CONFIG_EMBEDDED
1249 sock_setupcalls_locked(sock, callback, context, callback, context, 0);
1250 #else
1251 sock_setupcalls_locked(sock, callback, context, NULL, NULL, 0);
1252 #endif /* !CONFIG_EMBEDDED */
1253 socket_unlock(sock, 1);
1254
1255 return (0);
1256 }
1257
1258 errno_t
1259 sock_setupcalls(socket_t sock, sock_upcall rcallback, void *rcontext,
1260 sock_upcall wcallback, void *wcontext)
1261 {
1262 if (sock == NULL)
1263 return (EINVAL);
1264
1265 /*
1266 * Note that we don't wait for any in progress upcall to complete.
1267 */
1268 socket_lock(sock, 1);
1269 sock_setupcalls_locked(sock, rcallback, rcontext, wcallback, wcontext, 0);
1270 socket_unlock(sock, 1);
1271
1272 return (0);
1273 }
1274
1275 void
1276 sock_catchevents_locked(socket_t sock, sock_evupcall ecallback, void *econtext,
1277 u_int32_t emask)
1278 {
1279 socket_lock_assert_owned(sock);
1280
1281 /*
1282 * Note that we don't wait for any in progress upcall to complete.
1283 */
1284 if (ecallback != NULL) {
1285 sock->so_event = ecallback;
1286 sock->so_eventarg = econtext;
1287 sock->so_eventmask = emask;
1288 } else {
1289 sock->so_event = sonullevent;
1290 sock->so_eventarg = NULL;
1291 sock->so_eventmask = 0;
1292 }
1293 }
1294
1295 errno_t
1296 sock_catchevents(socket_t sock, sock_evupcall ecallback, void *econtext,
1297 u_int32_t emask)
1298 {
1299 if (sock == NULL)
1300 return (EINVAL);
1301
1302 socket_lock(sock, 1);
1303 sock_catchevents_locked(sock, ecallback, econtext, emask);
1304 socket_unlock(sock, 1);
1305
1306 return (0);
1307 }
1308
1309 /*
1310 * Returns true whether or not a socket belongs to the kernel.
1311 */
1312 int
1313 sock_iskernel(socket_t so)
1314 {
1315 return (so && so->last_pid == 0);
1316 }