]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kpi_socket.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / bsd / kern / kpi_socket.c
1 /*
2 * Copyright (c) 2003-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #define __KPI__
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/types.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <sys/param.h>
36 #include <sys/proc.h>
37 #include <sys/errno.h>
38 #include <sys/malloc.h>
39 #include <sys/protosw.h>
40 #include <sys/domain.h>
41 #include <sys/mbuf.h>
42 #include <sys/mcache.h>
43 #include <sys/fcntl.h>
44 #include <sys/filio.h>
45 #include <sys/uio_internal.h>
46 #include <kern/locks.h>
47 #include <netinet/in.h>
48 #include <libkern/OSAtomic.h>
49
50 static errno_t sock_send_internal(socket_t, const struct msghdr *,
51 mbuf_t, int, size_t *);
52 static void sock_setupcalls_common(socket_t, sock_upcall, void *,
53 sock_upcall, void *);
54
55 errno_t
56 sock_accept(socket_t sock, struct sockaddr *from, int fromlen, int flags,
57 sock_upcall callback, void *cookie, socket_t *new_sock)
58 {
59 struct sockaddr *sa;
60 struct socket *new_so;
61 lck_mtx_t *mutex_held;
62 int dosocklock;
63 errno_t error = 0;
64
65 if (sock == NULL || new_sock == NULL)
66 return (EINVAL);
67
68 socket_lock(sock, 1);
69 if ((sock->so_options & SO_ACCEPTCONN) == 0) {
70 socket_unlock(sock, 1);
71 return (EINVAL);
72 }
73 if ((flags & ~(MSG_DONTWAIT)) != 0) {
74 socket_unlock(sock, 1);
75 return (ENOTSUP);
76 }
77 check_again:
78 if (((flags & MSG_DONTWAIT) != 0 || (sock->so_state & SS_NBIO) != 0) &&
79 sock->so_comp.tqh_first == NULL) {
80 socket_unlock(sock, 1);
81 return (EWOULDBLOCK);
82 }
83
84 if (sock->so_proto->pr_getlock != NULL) {
85 mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
86 dosocklock = 1;
87 } else {
88 mutex_held = sock->so_proto->pr_domain->dom_mtx;
89 dosocklock = 0;
90 }
91
92 while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) {
93 if (sock->so_state & SS_CANTRCVMORE) {
94 sock->so_error = ECONNABORTED;
95 break;
96 }
97 error = msleep((caddr_t)&sock->so_timeo, mutex_held,
98 PSOCK | PCATCH, "sock_accept", NULL);
99 if (error != 0) {
100 socket_unlock(sock, 1);
101 return (error);
102 }
103 }
104 if (sock->so_error != 0) {
105 error = sock->so_error;
106 sock->so_error = 0;
107 socket_unlock(sock, 1);
108 return (error);
109 }
110
111 so_acquire_accept_list(sock, NULL);
112 if (TAILQ_EMPTY(&sock->so_comp)) {
113 so_release_accept_list(sock);
114 goto check_again;
115 }
116 new_so = TAILQ_FIRST(&sock->so_comp);
117 TAILQ_REMOVE(&sock->so_comp, new_so, so_list);
118 new_so->so_state &= ~SS_COMP;
119 new_so->so_head = NULL;
120 sock->so_qlen--;
121
122 so_release_accept_list(sock);
123
124 /*
125 * Pass the pre-accepted socket to any interested socket filter(s).
126 * Upon failure, the socket would have been closed by the callee.
127 */
128 if (new_so->so_filt != NULL) {
129 /*
130 * Temporarily drop the listening socket's lock before we
131 * hand off control over to the socket filter(s), but keep
132 * a reference so that it won't go away. We'll grab it
133 * again once we're done with the filter(s).
134 */
135 socket_unlock(sock, 0);
136 if ((error = soacceptfilter(new_so, sock)) != 0) {
137 /* Drop reference on listening socket */
138 sodereference(sock);
139 return (error);
140 }
141 socket_lock(sock, 0);
142 }
143
144 if (dosocklock) {
145 lck_mtx_assert(new_so->so_proto->pr_getlock(new_so, 0),
146 LCK_MTX_ASSERT_NOTOWNED);
147 socket_lock(new_so, 1);
148 }
149
150 (void) soacceptlock(new_so, &sa, 0);
151
152 socket_unlock(sock, 1); /* release the head */
153
154 /* see comments in sock_setupcall() */
155 if (callback != NULL) {
156 sock_setupcalls_common(new_so, callback, cookie, NULL, NULL);
157 }
158
159 if (sa != NULL && from != NULL) {
160 if (fromlen > sa->sa_len)
161 fromlen = sa->sa_len;
162 memcpy(from, sa, fromlen);
163 }
164 if (sa != NULL)
165 FREE(sa, M_SONAME);
166
167 /*
168 * If the socket has been marked as inactive by sosetdefunct(),
169 * disallow further operations on it.
170 */
171 if (new_so->so_flags & SOF_DEFUNCT) {
172 (void) sodefunct(current_proc(), new_so,
173 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
174 }
175 *new_sock = new_so;
176 if (dosocklock)
177 socket_unlock(new_so, 1);
178 return (error);
179 }
180
181 errno_t
182 sock_bind(socket_t sock, const struct sockaddr *to)
183 {
184 int error = 0;
185 struct sockaddr *sa = NULL;
186 struct sockaddr_storage ss;
187 boolean_t want_free = TRUE;
188
189 if (sock == NULL || to == NULL)
190 return (EINVAL);
191
192 if (to->sa_len > sizeof (ss)) {
193 MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, M_WAITOK);
194 if (sa == NULL)
195 return (ENOBUFS);
196 } else {
197 sa = (struct sockaddr *)&ss;
198 want_free = FALSE;
199 }
200 memcpy(sa, to, to->sa_len);
201
202 error = sobindlock(sock, sa, 1); /* will lock socket */
203
204 if (sa != NULL && want_free == TRUE)
205 FREE(sa, M_SONAME);
206
207 return (error);
208 }
209
210 errno_t
211 sock_connect(socket_t sock, const struct sockaddr *to, int flags)
212 {
213 int error = 0;
214 lck_mtx_t *mutex_held;
215 struct sockaddr *sa = NULL;
216 struct sockaddr_storage ss;
217 boolean_t want_free = TRUE;
218
219 if (sock == NULL || to == NULL)
220 return (EINVAL);
221
222 if (to->sa_len > sizeof (ss)) {
223 MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME,
224 (flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK);
225 if (sa == NULL)
226 return (ENOBUFS);
227 } else {
228 sa = (struct sockaddr *)&ss;
229 want_free = FALSE;
230 }
231 memcpy(sa, to, to->sa_len);
232
233 socket_lock(sock, 1);
234
235 if ((sock->so_state & SS_ISCONNECTING) &&
236 ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) {
237 error = EALREADY;
238 goto out;
239 }
240 error = soconnectlock(sock, sa, 0);
241 if (!error) {
242 if ((sock->so_state & SS_ISCONNECTING) &&
243 ((sock->so_state & SS_NBIO) != 0 ||
244 (flags & MSG_DONTWAIT) != 0)) {
245 error = EINPROGRESS;
246 goto out;
247 }
248
249 if (sock->so_proto->pr_getlock != NULL)
250 mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
251 else
252 mutex_held = sock->so_proto->pr_domain->dom_mtx;
253
254 while ((sock->so_state & SS_ISCONNECTING) &&
255 sock->so_error == 0) {
256 error = msleep((caddr_t)&sock->so_timeo,
257 mutex_held, PSOCK | PCATCH, "sock_connect", NULL);
258 if (error != 0)
259 break;
260 }
261
262 if (error == 0) {
263 error = sock->so_error;
264 sock->so_error = 0;
265 }
266 } else {
267 sock->so_state &= ~SS_ISCONNECTING;
268 }
269 out:
270 socket_unlock(sock, 1);
271
272 if (sa != NULL && want_free == TRUE)
273 FREE(sa, M_SONAME);
274
275 return (error);
276 }
277
278 errno_t
279 sock_connectwait(socket_t sock, const struct timeval *tv)
280 {
281 lck_mtx_t *mutex_held;
282 errno_t retval = 0;
283 struct timespec ts;
284
285 socket_lock(sock, 1);
286
287 /* Check if we're already connected or if we've already errored out */
288 if ((sock->so_state & SS_ISCONNECTING) == 0 || sock->so_error != 0) {
289 if (sock->so_error != 0) {
290 retval = sock->so_error;
291 sock->so_error = 0;
292 } else {
293 if ((sock->so_state & SS_ISCONNECTED) != 0)
294 retval = 0;
295 else
296 retval = EINVAL;
297 }
298 goto done;
299 }
300
301 /* copied translation from timeval to hertz from SO_RCVTIMEO handling */
302 if (tv->tv_sec < 0 || tv->tv_sec > SHRT_MAX / hz ||
303 tv->tv_usec < 0 || tv->tv_usec >= 1000000) {
304 retval = EDOM;
305 goto done;
306 }
307
308 ts.tv_sec = tv->tv_sec;
309 ts.tv_nsec = (tv->tv_usec * (integer_t)NSEC_PER_USEC);
310 if ((ts.tv_sec + (ts.tv_nsec/(long)NSEC_PER_SEC))/100 > SHRT_MAX) {
311 retval = EDOM;
312 goto done;
313 }
314
315 if (sock->so_proto->pr_getlock != NULL)
316 mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
317 else
318 mutex_held = sock->so_proto->pr_domain->dom_mtx;
319
320 msleep((caddr_t)&sock->so_timeo, mutex_held,
321 PSOCK, "sock_connectwait", &ts);
322
323 /* Check if we're still waiting to connect */
324 if ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) {
325 retval = EINPROGRESS;
326 goto done;
327 }
328
329 if (sock->so_error != 0) {
330 retval = sock->so_error;
331 sock->so_error = 0;
332 }
333
334 done:
335 socket_unlock(sock, 1);
336 return (retval);
337 }
338
339 errno_t
340 sock_nointerrupt(socket_t sock, int on)
341 {
342 socket_lock(sock, 1);
343
344 if (on) {
345 sock->so_rcv.sb_flags |= SB_NOINTR; /* This isn't safe */
346 sock->so_snd.sb_flags |= SB_NOINTR; /* This isn't safe */
347 } else {
348 sock->so_rcv.sb_flags &= ~SB_NOINTR; /* This isn't safe */
349 sock->so_snd.sb_flags &= ~SB_NOINTR; /* This isn't safe */
350 }
351
352 socket_unlock(sock, 1);
353
354 return (0);
355 }
356
357 errno_t
358 sock_getpeername(socket_t sock, struct sockaddr *peername, int peernamelen)
359 {
360 int error;
361 struct sockaddr *sa = NULL;
362
363 if (sock == NULL || peername == NULL || peernamelen < 0)
364 return (EINVAL);
365
366 socket_lock(sock, 1);
367 if (!(sock->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING))) {
368 socket_unlock(sock, 1);
369 return (ENOTCONN);
370 }
371 error = sogetaddr_locked(sock, &sa, 1);
372 socket_unlock(sock, 1);
373 if (error == 0) {
374 if (peernamelen > sa->sa_len)
375 peernamelen = sa->sa_len;
376 memcpy(peername, sa, peernamelen);
377 FREE(sa, M_SONAME);
378 }
379 return (error);
380 }
381
382 errno_t
383 sock_getsockname(socket_t sock, struct sockaddr *sockname, int socknamelen)
384 {
385 int error;
386 struct sockaddr *sa = NULL;
387
388 if (sock == NULL || sockname == NULL || socknamelen < 0)
389 return (EINVAL);
390
391 socket_lock(sock, 1);
392 error = sogetaddr_locked(sock, &sa, 0);
393 socket_unlock(sock, 1);
394 if (error == 0) {
395 if (socknamelen > sa->sa_len)
396 socknamelen = sa->sa_len;
397 memcpy(sockname, sa, socknamelen);
398 FREE(sa, M_SONAME);
399 }
400 return (error);
401 }
402
403 __private_extern__ int
404 sogetaddr_locked(struct socket *so, struct sockaddr **psa, int peer)
405 {
406 int error;
407
408 if (so == NULL || psa == NULL)
409 return (EINVAL);
410
411 *psa = NULL;
412 error = peer ? so->so_proto->pr_usrreqs->pru_peeraddr(so, psa) :
413 so->so_proto->pr_usrreqs->pru_sockaddr(so, psa);
414
415 if (error == 0 && *psa == NULL) {
416 error = ENOMEM;
417 } else if (error != 0 && *psa != NULL) {
418 FREE(*psa, M_SONAME);
419 *psa = NULL;
420 }
421 return (error);
422 }
423
424 errno_t
425 sock_getaddr(socket_t sock, struct sockaddr **psa, int peer)
426 {
427 int error;
428
429 if (sock == NULL || psa == NULL)
430 return (EINVAL);
431
432 socket_lock(sock, 1);
433 error = sogetaddr_locked(sock, psa, peer);
434 socket_unlock(sock, 1);
435
436 return (error);
437 }
438
439 void
440 sock_freeaddr(struct sockaddr *sa)
441 {
442 if (sa != NULL)
443 FREE(sa, M_SONAME);
444 }
445
446 errno_t
447 sock_getsockopt(socket_t sock, int level, int optname, void *optval,
448 int *optlen)
449 {
450 int error = 0;
451 struct sockopt sopt;
452
453 if (sock == NULL || optval == NULL || optlen == NULL)
454 return (EINVAL);
455
456 sopt.sopt_dir = SOPT_GET;
457 sopt.sopt_level = level;
458 sopt.sopt_name = optname;
459 sopt.sopt_val = CAST_USER_ADDR_T(optval);
460 sopt.sopt_valsize = *optlen;
461 sopt.sopt_p = kernproc;
462 error = sogetoptlock(sock, &sopt, 1); /* will lock socket */
463 if (error == 0)
464 *optlen = sopt.sopt_valsize;
465 return (error);
466 }
467
468 errno_t
469 sock_ioctl(socket_t sock, unsigned long request, void *argp)
470 {
471 return (soioctl(sock, request, argp, kernproc)); /* will lock socket */
472 }
473
474 errno_t
475 sock_setsockopt(socket_t sock, int level, int optname, const void *optval,
476 int optlen)
477 {
478 struct sockopt sopt;
479
480 if (sock == NULL || optval == NULL)
481 return (EINVAL);
482
483 sopt.sopt_dir = SOPT_SET;
484 sopt.sopt_level = level;
485 sopt.sopt_name = optname;
486 sopt.sopt_val = CAST_USER_ADDR_T(optval);
487 sopt.sopt_valsize = optlen;
488 sopt.sopt_p = kernproc;
489 return (sosetoptlock(sock, &sopt, 1)); /* will lock socket */
490 }
491
492 /*
493 * This follows the recommended mappings between DSCP code points
494 * and WMM access classes.
495 */
496 static u_int32_t so_tc_from_dscp(u_int8_t dscp);
497 static u_int32_t
498 so_tc_from_dscp(u_int8_t dscp)
499 {
500 u_int32_t tc;
501
502 if (dscp >= 0x30 && dscp <= 0x3f)
503 tc = SO_TC_VO;
504 else if (dscp >= 0x20 && dscp <= 0x2f)
505 tc = SO_TC_VI;
506 else if (dscp >= 0x08 && dscp <= 0x17)
507 tc = SO_TC_BK_SYS;
508 else
509 tc = SO_TC_BE;
510
511 return (tc);
512 }
513
514 errno_t
515 sock_settclassopt(socket_t sock, const void *optval, size_t optlen)
516 {
517 errno_t error = 0;
518 struct sockopt sopt;
519 int sotc;
520
521 if (sock == NULL || optval == NULL || optlen != sizeof (int))
522 return (EINVAL);
523
524 socket_lock(sock, 1);
525 if (!(sock->so_state & SS_ISCONNECTED)) {
526 /*
527 * If the socket is not connected then we don't know
528 * if the destination is on LAN or not. Skip
529 * setting traffic class in this case
530 */
531 error = ENOTCONN;
532 goto out;
533 }
534
535 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL ||
536 sock->so_pcb == NULL) {
537 error = EINVAL;
538 goto out;
539 }
540
541 /*
542 * Set the socket traffic class based on the passed DSCP code point
543 * regardless of the scope of the destination
544 */
545 sotc = so_tc_from_dscp((*(const int *)optval) >> 2);
546
547 sopt.sopt_dir = SOPT_SET;
548 sopt.sopt_val = CAST_USER_ADDR_T(&sotc);
549 sopt.sopt_valsize = sizeof (sotc);
550 sopt.sopt_p = kernproc;
551 sopt.sopt_level = SOL_SOCKET;
552 sopt.sopt_name = SO_TRAFFIC_CLASS;
553
554 error = sosetoptlock(sock, &sopt, 0); /* already locked */
555
556 if (error != 0) {
557 printf("%s: sosetopt SO_TRAFFIC_CLASS failed %d\n",
558 __func__, error);
559 goto out;
560 }
561
562 /*
563 * Check if the destination address is LAN or link local address.
564 * We do not want to set traffic class bits if the destination
565 * is not local.
566 */
567 if (!so_isdstlocal(sock))
568 goto out;
569
570 sopt.sopt_dir = SOPT_SET;
571 sopt.sopt_val = CAST_USER_ADDR_T(optval);
572 sopt.sopt_valsize = optlen;
573 sopt.sopt_p = kernproc;
574
575 switch (SOCK_DOM(sock)) {
576 case PF_INET:
577 sopt.sopt_level = IPPROTO_IP;
578 sopt.sopt_name = IP_TOS;
579 break;
580 case PF_INET6:
581 sopt.sopt_level = IPPROTO_IPV6;
582 sopt.sopt_name = IPV6_TCLASS;
583 break;
584 default:
585 error = EINVAL;
586 goto out;
587 }
588
589 error = sosetoptlock(sock, &sopt, 0); /* already locked */
590 socket_unlock(sock, 1);
591 return (error);
592 out:
593 socket_unlock(sock, 1);
594 return (error);
595 }
596
597 errno_t
598 sock_gettclassopt(socket_t sock, void *optval, size_t *optlen)
599 {
600 errno_t error = 0;
601 struct sockopt sopt;
602
603 if (sock == NULL || optval == NULL || optlen == NULL)
604 return (EINVAL);
605
606 sopt.sopt_dir = SOPT_GET;
607 sopt.sopt_val = CAST_USER_ADDR_T(optval);
608 sopt.sopt_valsize = *optlen;
609 sopt.sopt_p = kernproc;
610
611 socket_lock(sock, 1);
612 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) {
613 socket_unlock(sock, 1);
614 return (EINVAL);
615 }
616
617 switch (SOCK_DOM(sock)) {
618 case PF_INET:
619 sopt.sopt_level = IPPROTO_IP;
620 sopt.sopt_name = IP_TOS;
621 break;
622 case PF_INET6:
623 sopt.sopt_level = IPPROTO_IPV6;
624 sopt.sopt_name = IPV6_TCLASS;
625 break;
626 default:
627 socket_unlock(sock, 1);
628 return (EINVAL);
629
630 }
631 error = sogetoptlock(sock, &sopt, 0); /* already locked */
632 socket_unlock(sock, 1);
633 if (error == 0)
634 *optlen = sopt.sopt_valsize;
635 return (error);
636 }
637
638 errno_t
639 sock_listen(socket_t sock, int backlog)
640 {
641 if (sock == NULL)
642 return (EINVAL);
643
644 return (solisten(sock, backlog)); /* will lock socket */
645 }
646
647 errno_t
648 sock_receive_internal(socket_t sock, struct msghdr *msg, mbuf_t *data,
649 int flags, size_t *recvdlen)
650 {
651 uio_t auio;
652 struct mbuf *control = NULL;
653 int error = 0;
654 int length = 0;
655 struct sockaddr *fromsa = NULL;
656 char uio_buf[ UIO_SIZEOF((msg != NULL) ? msg->msg_iovlen : 0) ];
657
658 if (sock == NULL)
659 return (EINVAL);
660
661 auio = uio_createwithbuffer(((msg != NULL) ? msg->msg_iovlen : 0),
662 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof (uio_buf));
663 if (msg != NULL && data == NULL) {
664 int i;
665 struct iovec *tempp = msg->msg_iov;
666
667 for (i = 0; i < msg->msg_iovlen; i++) {
668 uio_addiov(auio,
669 CAST_USER_ADDR_T((tempp + i)->iov_base),
670 (tempp + i)->iov_len);
671 }
672 if (uio_resid(auio) < 0)
673 return (EINVAL);
674 } else if (recvdlen != NULL) {
675 uio_setresid(auio, (uio_resid(auio) + *recvdlen));
676 }
677 length = uio_resid(auio);
678
679 if (recvdlen != NULL)
680 *recvdlen = 0;
681
682 /* let pru_soreceive handle the socket locking */
683 error = sock->so_proto->pr_usrreqs->pru_soreceive(sock, &fromsa, auio,
684 data, (msg && msg->msg_control) ? &control : NULL, &flags);
685 if (error != 0)
686 goto cleanup;
687
688 if (recvdlen != NULL)
689 *recvdlen = length - uio_resid(auio);
690 if (msg != NULL) {
691 msg->msg_flags = flags;
692
693 if (msg->msg_name != NULL) {
694 int salen;
695 salen = msg->msg_namelen;
696 if (msg->msg_namelen > 0 && fromsa != NULL) {
697 salen = MIN(salen, fromsa->sa_len);
698 memcpy(msg->msg_name, fromsa,
699 msg->msg_namelen > fromsa->sa_len ?
700 fromsa->sa_len : msg->msg_namelen);
701 }
702 }
703
704 if (msg->msg_control != NULL) {
705 struct mbuf *m = control;
706 u_char *ctlbuf = msg->msg_control;
707 int clen = msg->msg_controllen;
708
709 msg->msg_controllen = 0;
710
711 while (m != NULL && clen > 0) {
712 unsigned int tocopy;
713
714 if (clen >= m->m_len) {
715 tocopy = m->m_len;
716 } else {
717 msg->msg_flags |= MSG_CTRUNC;
718 tocopy = clen;
719 }
720 memcpy(ctlbuf, mtod(m, caddr_t), tocopy);
721 ctlbuf += tocopy;
722 clen -= tocopy;
723 m = m->m_next;
724 }
725 msg->msg_controllen =
726 (uintptr_t)ctlbuf - (uintptr_t)msg->msg_control;
727 }
728 }
729
730 cleanup:
731 if (control != NULL)
732 m_freem(control);
733 if (fromsa != NULL)
734 FREE(fromsa, M_SONAME);
735 return (error);
736 }
737
738 errno_t
739 sock_receive(socket_t sock, struct msghdr *msg, int flags, size_t *recvdlen)
740 {
741 if ((msg == NULL) || (msg->msg_iovlen < 1) ||
742 (msg->msg_iov[0].iov_len == 0) ||
743 (msg->msg_iov[0].iov_base == NULL))
744 return (EINVAL);
745
746 return (sock_receive_internal(sock, msg, NULL, flags, recvdlen));
747 }
748
749 errno_t
750 sock_receivembuf(socket_t sock, struct msghdr *msg, mbuf_t *data, int flags,
751 size_t *recvlen)
752 {
753 if (data == NULL || recvlen == 0 || *recvlen <= 0 || (msg != NULL &&
754 (msg->msg_iov != NULL || msg->msg_iovlen != 0)))
755 return (EINVAL);
756
757 return (sock_receive_internal(sock, msg, data, flags, recvlen));
758 }
759
760 errno_t
761 sock_send_internal(socket_t sock, const struct msghdr *msg, mbuf_t data,
762 int flags, size_t *sentlen)
763 {
764 uio_t auio = NULL;
765 struct mbuf *control = NULL;
766 int error = 0;
767 int datalen = 0;
768 char uio_buf[ UIO_SIZEOF((msg != NULL ? msg->msg_iovlen : 1)) ];
769
770 if (sock == NULL) {
771 error = EINVAL;
772 goto errorout;
773 }
774
775 if (data == NULL && msg != NULL) {
776 struct iovec *tempp = msg->msg_iov;
777
778 auio = uio_createwithbuffer(msg->msg_iovlen, 0,
779 UIO_SYSSPACE, UIO_WRITE, &uio_buf[0], sizeof (uio_buf));
780 if (tempp != NULL) {
781 int i;
782
783 for (i = 0; i < msg->msg_iovlen; i++) {
784 uio_addiov(auio,
785 CAST_USER_ADDR_T((tempp + i)->iov_base),
786 (tempp + i)->iov_len);
787 }
788
789 if (uio_resid(auio) < 0) {
790 error = EINVAL;
791 goto errorout;
792 }
793 }
794 }
795
796 if (sentlen != NULL)
797 *sentlen = 0;
798
799 if (auio != NULL)
800 datalen = uio_resid(auio);
801 else
802 datalen = data->m_pkthdr.len;
803
804 if (msg != NULL && msg->msg_control) {
805 if ((size_t)msg->msg_controllen < sizeof (struct cmsghdr)) {
806 error = EINVAL;
807 goto errorout;
808 }
809
810 if ((size_t)msg->msg_controllen > MLEN) {
811 error = EINVAL;
812 goto errorout;
813 }
814
815 control = m_get(M_NOWAIT, MT_CONTROL);
816 if (control == NULL) {
817 error = ENOMEM;
818 goto errorout;
819 }
820 memcpy(mtod(control, caddr_t), msg->msg_control,
821 msg->msg_controllen);
822 control->m_len = msg->msg_controllen;
823 }
824
825 error = sock->so_proto->pr_usrreqs->pru_sosend(sock, msg != NULL ?
826 (struct sockaddr *)msg->msg_name : NULL, auio, data,
827 control, flags);
828
829 /*
830 * Residual data is possible in the case of IO vectors but not
831 * in the mbuf case since the latter is treated as atomic send.
832 * If pru_sosend() consumed a portion of the iovecs data and
833 * the error returned is transient, treat it as success; this
834 * is consistent with sendit() behavior.
835 */
836 if (auio != NULL && uio_resid(auio) != datalen &&
837 (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
838 error = 0;
839
840 if (error == 0 && sentlen != NULL) {
841 if (auio != NULL)
842 *sentlen = datalen - uio_resid(auio);
843 else
844 *sentlen = datalen;
845 }
846
847 return (error);
848
849 /*
850 * In cases where we detect an error before returning, we need to
851 * free the mbuf chain if there is one. sosend (and pru_sosend) will
852 * free the mbuf chain if they encounter an error.
853 */
854 errorout:
855 if (control)
856 m_freem(control);
857 if (data)
858 m_freem(data);
859 if (sentlen)
860 *sentlen = 0;
861 return (error);
862 }
863
864 errno_t
865 sock_send(socket_t sock, const struct msghdr *msg, int flags, size_t *sentlen)
866 {
867 if (msg == NULL || msg->msg_iov == NULL || msg->msg_iovlen < 1)
868 return (EINVAL);
869
870 return (sock_send_internal(sock, msg, NULL, flags, sentlen));
871 }
872
873 errno_t
874 sock_sendmbuf(socket_t sock, const struct msghdr *msg, mbuf_t data,
875 int flags, size_t *sentlen)
876 {
877 if (data == NULL || (msg != NULL && (msg->msg_iov != NULL ||
878 msg->msg_iovlen != 0))) {
879 if (data != NULL)
880 m_freem(data);
881 return (EINVAL);
882 }
883 return (sock_send_internal(sock, msg, data, flags, sentlen));
884 }
885
886 errno_t
887 sock_shutdown(socket_t sock, int how)
888 {
889 if (sock == NULL)
890 return (EINVAL);
891
892 return (soshutdown(sock, how));
893 }
894
895
896 errno_t
897 sock_socket(int domain, int type, int protocol, sock_upcall callback,
898 void *context, socket_t *new_so)
899 {
900 int error = 0;
901
902 if (new_so == NULL)
903 return (EINVAL);
904
905 /* socreate will create an initial so_count */
906 error = socreate(domain, new_so, type, protocol);
907 if (error == 0) {
908 /* see comments in sock_setupcall() */
909 if (callback != NULL) {
910 sock_setupcalls_common(*new_so, callback, context,
911 NULL, NULL);
912 }
913 /*
914 * last_pid and last_upid should be zero for sockets
915 * created using sock_socket
916 */
917 (*new_so)->last_pid = 0;
918 (*new_so)->last_upid = 0;
919 }
920 return (error);
921 }
922
923 void
924 sock_close(socket_t sock)
925 {
926 if (sock == NULL)
927 return;
928
929 soclose(sock);
930 }
931
932 /* Do we want this to be APPLE_PRIVATE API?: YES (LD 12/23/04) */
933 void
934 sock_retain(socket_t sock)
935 {
936 if (sock == NULL)
937 return;
938
939 socket_lock(sock, 1);
940 sock->so_retaincnt++;
941 sock->so_usecount++; /* add extra reference for holding the socket */
942 socket_unlock(sock, 1);
943 }
944
945 /* Do we want this to be APPLE_PRIVATE API? */
946 void
947 sock_release(socket_t sock)
948 {
949 if (sock == NULL)
950 return;
951
952 socket_lock(sock, 1);
953 if (sock->so_upcallusecount > 0)
954 soclose_wait_locked(sock);
955
956 sock->so_retaincnt--;
957 if (sock->so_retaincnt < 0) {
958 panic("%s: negative retain count (%d) for sock=%p\n",
959 __func__, sock->so_retaincnt, sock);
960 /* NOTREACHED */
961 }
962 /*
963 * Check SS_NOFDREF in case a close happened as sock_retain()
964 * was grabbing the lock
965 */
966 if ((sock->so_retaincnt == 0) && (sock->so_usecount == 2) &&
967 (!(sock->so_state & SS_NOFDREF) ||
968 (sock->so_flags & SOF_MP_SUBFLOW))) {
969 /* close socket only if the FD is not holding it */
970 soclose_locked(sock);
971 } else {
972 /* remove extra reference holding the socket */
973 VERIFY(sock->so_usecount > 1);
974 sock->so_usecount--;
975 }
976 socket_unlock(sock, 1);
977 }
978
979 errno_t
980 sock_setpriv(socket_t sock, int on)
981 {
982 if (sock == NULL)
983 return (EINVAL);
984
985 socket_lock(sock, 1);
986 if (on)
987 sock->so_state |= SS_PRIV;
988 else
989 sock->so_state &= ~SS_PRIV;
990 socket_unlock(sock, 1);
991 return (0);
992 }
993
994 int
995 sock_isconnected(socket_t sock)
996 {
997 int retval;
998
999 socket_lock(sock, 1);
1000 retval = ((sock->so_state & SS_ISCONNECTED) ? 1 : 0);
1001 socket_unlock(sock, 1);
1002 return (retval);
1003 }
1004
1005 int
1006 sock_isnonblocking(socket_t sock)
1007 {
1008 int retval;
1009
1010 socket_lock(sock, 1);
1011 retval = ((sock->so_state & SS_NBIO) ? 1 : 0);
1012 socket_unlock(sock, 1);
1013 return (retval);
1014 }
1015
1016 errno_t
1017 sock_gettype(socket_t sock, int *outDomain, int *outType, int *outProtocol)
1018 {
1019 socket_lock(sock, 1);
1020 if (outDomain != NULL)
1021 *outDomain = SOCK_DOM(sock);
1022 if (outType != NULL)
1023 *outType = sock->so_type;
1024 if (outProtocol != NULL)
1025 *outProtocol = SOCK_PROTO(sock);
1026 socket_unlock(sock, 1);
1027 return (0);
1028 }
1029
1030 /*
1031 * Return the listening socket of a pre-accepted socket. It returns the
1032 * listener (so_head) value of a given socket. This is intended to be
1033 * called by a socket filter during a filter attach (sf_attach) callback.
1034 * The value returned by this routine is safe to be used only in the
1035 * context of that callback, because we hold the listener's lock across
1036 * the sflt_initsock() call.
1037 */
1038 socket_t
1039 sock_getlistener(socket_t sock)
1040 {
1041 return (sock->so_head);
1042 }
1043
1044 static inline void
1045 sock_set_tcp_stream_priority(socket_t sock)
1046 {
1047 if ((SOCK_DOM(sock) == PF_INET || SOCK_DOM(sock) == PF_INET6) &&
1048 SOCK_TYPE(sock) == SOCK_STREAM) {
1049 set_tcp_stream_priority(sock);
1050 }
1051 }
1052
1053 /*
1054 * Caller must have ensured socket is valid and won't be going away.
1055 */
1056 void
1057 socket_set_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
1058 {
1059 u_int32_t soflags1 = 0;
1060
1061 if ((flags & TRAFFIC_MGT_SO_BACKGROUND))
1062 soflags1 |= SOF1_TRAFFIC_MGT_SO_BACKGROUND;
1063 if ((flags & TRAFFIC_MGT_TCP_RECVBG))
1064 soflags1 |= SOF1_TRAFFIC_MGT_TCP_RECVBG;
1065
1066 (void) OSBitOrAtomic(soflags1, &sock->so_flags1);
1067
1068 sock_set_tcp_stream_priority(sock);
1069 }
1070
1071 void
1072 socket_set_traffic_mgt_flags(socket_t sock, u_int8_t flags)
1073 {
1074 socket_lock(sock, 1);
1075 socket_set_traffic_mgt_flags_locked(sock, flags);
1076 socket_unlock(sock, 1);
1077 }
1078
1079 /*
1080 * Caller must have ensured socket is valid and won't be going away.
1081 */
1082 void
1083 socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
1084 {
1085 u_int32_t soflags1 = 0;
1086
1087 if ((flags & TRAFFIC_MGT_SO_BACKGROUND))
1088 soflags1 |= SOF1_TRAFFIC_MGT_SO_BACKGROUND;
1089 if ((flags & TRAFFIC_MGT_TCP_RECVBG))
1090 soflags1 |= SOF1_TRAFFIC_MGT_TCP_RECVBG;
1091
1092 (void) OSBitAndAtomic(~soflags1, &sock->so_flags1);
1093
1094 sock_set_tcp_stream_priority(sock);
1095 }
1096
1097 void
1098 socket_clear_traffic_mgt_flags(socket_t sock, u_int8_t flags)
1099 {
1100 socket_lock(sock, 1);
1101 socket_clear_traffic_mgt_flags_locked(sock, flags);
1102 socket_unlock(sock, 1);
1103 }
1104
1105
1106 /*
1107 * Caller must have ensured socket is valid and won't be going away.
1108 */
1109 errno_t
1110 socket_defunct(struct proc *p, socket_t so, int level)
1111 {
1112 errno_t retval;
1113
1114 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1115 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
1116 return (EINVAL);
1117
1118 socket_lock(so, 1);
1119 /*
1120 * SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC level is meant to tear down
1121 * all of mDNSResponder IPC sockets, currently those of AF_UNIX; note
1122 * that this is an implementation artifact of mDNSResponder. We do
1123 * a quick test against the socket buffers for SB_UNIX, since that
1124 * would have been set by unp_attach() at socket creation time.
1125 */
1126 if (level == SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1127 (so->so_rcv.sb_flags & so->so_snd.sb_flags & SB_UNIX) != SB_UNIX) {
1128 socket_unlock(so, 1);
1129 return (EOPNOTSUPP);
1130 }
1131 retval = sosetdefunct(p, so, level, TRUE);
1132 if (retval == 0)
1133 retval = sodefunct(p, so, level);
1134 socket_unlock(so, 1);
1135 return (retval);
1136 }
1137
1138 static void
1139 sock_setupcalls_common(socket_t sock, sock_upcall rcallback, void *rcontext,
1140 sock_upcall wcallback, void *wcontext)
1141 {
1142 if (rcallback != NULL) {
1143 sock->so_rcv.sb_flags |= SB_UPCALL;
1144 sock->so_rcv.sb_upcall = rcallback;
1145 sock->so_rcv.sb_upcallarg = rcontext;
1146 } else {
1147 sock->so_rcv.sb_flags &= ~SB_UPCALL;
1148 sock->so_rcv.sb_upcall = NULL;
1149 sock->so_rcv.sb_upcallarg = NULL;
1150 }
1151
1152 if (wcallback != NULL) {
1153 sock->so_snd.sb_flags |= SB_UPCALL;
1154 sock->so_snd.sb_upcall = wcallback;
1155 sock->so_snd.sb_upcallarg = wcontext;
1156 } else {
1157 sock->so_snd.sb_flags &= ~SB_UPCALL;
1158 sock->so_snd.sb_upcall = NULL;
1159 sock->so_snd.sb_upcallarg = NULL;
1160 }
1161 }
1162
1163 errno_t
1164 sock_setupcall(socket_t sock, sock_upcall callback, void *context)
1165 {
1166 if (sock == NULL)
1167 return (EINVAL);
1168
1169 /*
1170 * Note that we don't wait for any in progress upcall to complete.
1171 * On embedded, sock_setupcall() causes both read and write
1172 * callbacks to be set; on desktop, only read callback is set
1173 * to maintain legacy KPI behavior.
1174 *
1175 * The newer sock_setupcalls() KPI should be used instead to set
1176 * the read and write callbacks and their respective parameters.
1177 */
1178 socket_lock(sock, 1);
1179 sock_setupcalls_common(sock, callback, context, NULL, NULL);
1180 socket_unlock(sock, 1);
1181
1182 return (0);
1183 }
1184
1185 errno_t
1186 sock_setupcalls(socket_t sock, sock_upcall rcallback, void *rcontext,
1187 sock_upcall wcallback, void *wcontext)
1188 {
1189 if (sock == NULL)
1190 return (EINVAL);
1191
1192 /*
1193 * Note that we don't wait for any in progress upcall to complete.
1194 */
1195 socket_lock(sock, 1);
1196 sock_setupcalls_common(sock, rcallback, rcontext, wcallback, wcontext);
1197 socket_unlock(sock, 1);
1198
1199 return (0);
1200 }
1201
1202 errno_t
1203 sock_catchevents(socket_t sock, sock_evupcall ecallback, void *econtext,
1204 u_int32_t emask)
1205 {
1206 if (sock == NULL)
1207 return (EINVAL);
1208
1209 /*
1210 * Note that we don't wait for any in progress upcall to complete.
1211 */
1212 socket_lock(sock, 1);
1213 if (ecallback != NULL) {
1214 sock->so_event = ecallback;
1215 sock->so_eventarg = econtext;
1216 sock->so_eventmask = emask;
1217 } else {
1218 sock->so_event = sonullevent;
1219 sock->so_eventarg = NULL;
1220 sock->so_eventmask = 0;
1221 }
1222 socket_unlock(sock, 1);
1223
1224 return (0);
1225 }
1226
1227 /*
1228 * Returns true whether or not a socket belongs to the kernel.
1229 */
1230 int
1231 sock_iskernel(socket_t so)
1232 {
1233 return (so && so->last_pid == 0);
1234 }