]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kpi_socket.c
xnu-3789.31.2.tar.gz
[apple/xnu.git] / bsd / kern / kpi_socket.c
1 /*
2 * Copyright (c) 2003-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #define __KPI__
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/types.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <sys/param.h>
36 #include <sys/proc.h>
37 #include <sys/errno.h>
38 #include <sys/malloc.h>
39 #include <sys/protosw.h>
40 #include <sys/domain.h>
41 #include <sys/mbuf.h>
42 #include <sys/mcache.h>
43 #include <sys/fcntl.h>
44 #include <sys/filio.h>
45 #include <sys/uio_internal.h>
46 #include <kern/locks.h>
47 #include <netinet/in.h>
48 #include <libkern/OSAtomic.h>
49
50 static errno_t sock_send_internal(socket_t, const struct msghdr *,
51 mbuf_t, int, size_t *);
52 static void sock_setupcalls_common(socket_t, sock_upcall, void *,
53 sock_upcall, void *);
54
55 errno_t
56 sock_accept(socket_t sock, struct sockaddr *from, int fromlen, int flags,
57 sock_upcall callback, void *cookie, socket_t *new_sock)
58 {
59 struct sockaddr *sa;
60 struct socket *new_so;
61 lck_mtx_t *mutex_held;
62 int dosocklock;
63 errno_t error = 0;
64
65 if (sock == NULL || new_sock == NULL)
66 return (EINVAL);
67
68 socket_lock(sock, 1);
69 if ((sock->so_options & SO_ACCEPTCONN) == 0) {
70 socket_unlock(sock, 1);
71 return (EINVAL);
72 }
73 if ((flags & ~(MSG_DONTWAIT)) != 0) {
74 socket_unlock(sock, 1);
75 return (ENOTSUP);
76 }
77 if (((flags & MSG_DONTWAIT) != 0 || (sock->so_state & SS_NBIO) != 0) &&
78 sock->so_comp.tqh_first == NULL) {
79 socket_unlock(sock, 1);
80 return (EWOULDBLOCK);
81 }
82
83 if (sock->so_proto->pr_getlock != NULL) {
84 mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
85 dosocklock = 1;
86 } else {
87 mutex_held = sock->so_proto->pr_domain->dom_mtx;
88 dosocklock = 0;
89 }
90
91 while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) {
92 if (sock->so_state & SS_CANTRCVMORE) {
93 sock->so_error = ECONNABORTED;
94 break;
95 }
96 error = msleep((caddr_t)&sock->so_timeo, mutex_held,
97 PSOCK | PCATCH, "sock_accept", NULL);
98 if (error != 0) {
99 socket_unlock(sock, 1);
100 return (error);
101 }
102 }
103 if (sock->so_error != 0) {
104 error = sock->so_error;
105 sock->so_error = 0;
106 socket_unlock(sock, 1);
107 return (error);
108 }
109
110 new_so = TAILQ_FIRST(&sock->so_comp);
111 TAILQ_REMOVE(&sock->so_comp, new_so, so_list);
112 new_so->so_state &= ~SS_COMP;
113 new_so->so_head = NULL;
114 sock->so_qlen--;
115
116 /*
117 * Pass the pre-accepted socket to any interested socket filter(s).
118 * Upon failure, the socket would have been closed by the callee.
119 */
120 if (new_so->so_filt != NULL) {
121 /*
122 * Temporarily drop the listening socket's lock before we
123 * hand off control over to the socket filter(s), but keep
124 * a reference so that it won't go away. We'll grab it
125 * again once we're done with the filter(s).
126 */
127 socket_unlock(sock, 0);
128 if ((error = soacceptfilter(new_so, sock)) != 0) {
129 /* Drop reference on listening socket */
130 sodereference(sock);
131 return (error);
132 }
133 socket_lock(sock, 0);
134 }
135
136 if (dosocklock) {
137 lck_mtx_assert(new_so->so_proto->pr_getlock(new_so, 0),
138 LCK_MTX_ASSERT_NOTOWNED);
139 socket_lock(new_so, 1);
140 }
141
142 (void) soacceptlock(new_so, &sa, 0);
143
144 socket_unlock(sock, 1); /* release the head */
145
146 /* see comments in sock_setupcall() */
147 if (callback != NULL) {
148 sock_setupcalls_common(new_so, callback, cookie, NULL, NULL);
149 }
150
151 if (sa != NULL && from != NULL) {
152 if (fromlen > sa->sa_len)
153 fromlen = sa->sa_len;
154 memcpy(from, sa, fromlen);
155 }
156 if (sa != NULL)
157 FREE(sa, M_SONAME);
158
159 /*
160 * If the socket has been marked as inactive by sosetdefunct(),
161 * disallow further operations on it.
162 */
163 if (new_so->so_flags & SOF_DEFUNCT) {
164 (void) sodefunct(current_proc(), new_so,
165 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
166 }
167 *new_sock = new_so;
168 if (dosocklock)
169 socket_unlock(new_so, 1);
170 return (error);
171 }
172
173 errno_t
174 sock_bind(socket_t sock, const struct sockaddr *to)
175 {
176 int error = 0;
177 struct sockaddr *sa = NULL;
178 struct sockaddr_storage ss;
179 boolean_t want_free = TRUE;
180
181 if (sock == NULL || to == NULL)
182 return (EINVAL);
183
184 if (to->sa_len > sizeof (ss)) {
185 MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, M_WAITOK);
186 if (sa == NULL)
187 return (ENOBUFS);
188 } else {
189 sa = (struct sockaddr *)&ss;
190 want_free = FALSE;
191 }
192 memcpy(sa, to, to->sa_len);
193
194 error = sobindlock(sock, sa, 1); /* will lock socket */
195
196 if (sa != NULL && want_free == TRUE)
197 FREE(sa, M_SONAME);
198
199 return (error);
200 }
201
202 errno_t
203 sock_connect(socket_t sock, const struct sockaddr *to, int flags)
204 {
205 int error = 0;
206 lck_mtx_t *mutex_held;
207 struct sockaddr *sa = NULL;
208 struct sockaddr_storage ss;
209 boolean_t want_free = TRUE;
210
211 if (sock == NULL || to == NULL)
212 return (EINVAL);
213
214 if (to->sa_len > sizeof (ss)) {
215 MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME,
216 (flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK);
217 if (sa == NULL)
218 return (ENOBUFS);
219 } else {
220 sa = (struct sockaddr *)&ss;
221 want_free = FALSE;
222 }
223 memcpy(sa, to, to->sa_len);
224
225 socket_lock(sock, 1);
226
227 if ((sock->so_state & SS_ISCONNECTING) &&
228 ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) {
229 error = EALREADY;
230 goto out;
231 }
232 error = soconnectlock(sock, sa, 0);
233 if (!error) {
234 if ((sock->so_state & SS_ISCONNECTING) &&
235 ((sock->so_state & SS_NBIO) != 0 ||
236 (flags & MSG_DONTWAIT) != 0)) {
237 error = EINPROGRESS;
238 goto out;
239 }
240
241 if (sock->so_proto->pr_getlock != NULL)
242 mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
243 else
244 mutex_held = sock->so_proto->pr_domain->dom_mtx;
245
246 while ((sock->so_state & SS_ISCONNECTING) &&
247 sock->so_error == 0) {
248 error = msleep((caddr_t)&sock->so_timeo,
249 mutex_held, PSOCK | PCATCH, "sock_connect", NULL);
250 if (error != 0)
251 break;
252 }
253
254 if (error == 0) {
255 error = sock->so_error;
256 sock->so_error = 0;
257 }
258 } else {
259 sock->so_state &= ~SS_ISCONNECTING;
260 }
261 out:
262 socket_unlock(sock, 1);
263
264 if (sa != NULL && want_free == TRUE)
265 FREE(sa, M_SONAME);
266
267 return (error);
268 }
269
270 errno_t
271 sock_connectwait(socket_t sock, const struct timeval *tv)
272 {
273 lck_mtx_t *mutex_held;
274 errno_t retval = 0;
275 struct timespec ts;
276
277 socket_lock(sock, 1);
278
279 /* Check if we're already connected or if we've already errored out */
280 if ((sock->so_state & SS_ISCONNECTING) == 0 || sock->so_error != 0) {
281 if (sock->so_error != 0) {
282 retval = sock->so_error;
283 sock->so_error = 0;
284 } else {
285 if ((sock->so_state & SS_ISCONNECTED) != 0)
286 retval = 0;
287 else
288 retval = EINVAL;
289 }
290 goto done;
291 }
292
293 /* copied translation from timeval to hertz from SO_RCVTIMEO handling */
294 if (tv->tv_sec < 0 || tv->tv_sec > SHRT_MAX / hz ||
295 tv->tv_usec < 0 || tv->tv_usec >= 1000000) {
296 retval = EDOM;
297 goto done;
298 }
299
300 ts.tv_sec = tv->tv_sec;
301 ts.tv_nsec = (tv->tv_usec * (integer_t)NSEC_PER_USEC);
302 if ((ts.tv_sec + (ts.tv_nsec/(long)NSEC_PER_SEC))/100 > SHRT_MAX) {
303 retval = EDOM;
304 goto done;
305 }
306
307 if (sock->so_proto->pr_getlock != NULL)
308 mutex_held = (*sock->so_proto->pr_getlock)(sock, 0);
309 else
310 mutex_held = sock->so_proto->pr_domain->dom_mtx;
311
312 msleep((caddr_t)&sock->so_timeo, mutex_held,
313 PSOCK, "sock_connectwait", &ts);
314
315 /* Check if we're still waiting to connect */
316 if ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) {
317 retval = EINPROGRESS;
318 goto done;
319 }
320
321 if (sock->so_error != 0) {
322 retval = sock->so_error;
323 sock->so_error = 0;
324 }
325
326 done:
327 socket_unlock(sock, 1);
328 return (retval);
329 }
330
331 errno_t
332 sock_nointerrupt(socket_t sock, int on)
333 {
334 socket_lock(sock, 1);
335
336 if (on) {
337 sock->so_rcv.sb_flags |= SB_NOINTR; /* This isn't safe */
338 sock->so_snd.sb_flags |= SB_NOINTR; /* This isn't safe */
339 } else {
340 sock->so_rcv.sb_flags &= ~SB_NOINTR; /* This isn't safe */
341 sock->so_snd.sb_flags &= ~SB_NOINTR; /* This isn't safe */
342 }
343
344 socket_unlock(sock, 1);
345
346 return (0);
347 }
348
349 errno_t
350 sock_getpeername(socket_t sock, struct sockaddr *peername, int peernamelen)
351 {
352 int error;
353 struct sockaddr *sa = NULL;
354
355 if (sock == NULL || peername == NULL || peernamelen < 0)
356 return (EINVAL);
357
358 socket_lock(sock, 1);
359 if (!(sock->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING))) {
360 socket_unlock(sock, 1);
361 return (ENOTCONN);
362 }
363 error = sogetaddr_locked(sock, &sa, 1);
364 socket_unlock(sock, 1);
365 if (error == 0) {
366 if (peernamelen > sa->sa_len)
367 peernamelen = sa->sa_len;
368 memcpy(peername, sa, peernamelen);
369 FREE(sa, M_SONAME);
370 }
371 return (error);
372 }
373
374 errno_t
375 sock_getsockname(socket_t sock, struct sockaddr *sockname, int socknamelen)
376 {
377 int error;
378 struct sockaddr *sa = NULL;
379
380 if (sock == NULL || sockname == NULL || socknamelen < 0)
381 return (EINVAL);
382
383 socket_lock(sock, 1);
384 error = sogetaddr_locked(sock, &sa, 0);
385 socket_unlock(sock, 1);
386 if (error == 0) {
387 if (socknamelen > sa->sa_len)
388 socknamelen = sa->sa_len;
389 memcpy(sockname, sa, socknamelen);
390 FREE(sa, M_SONAME);
391 }
392 return (error);
393 }
394
395 __private_extern__ int
396 sogetaddr_locked(struct socket *so, struct sockaddr **psa, int peer)
397 {
398 int error;
399
400 if (so == NULL || psa == NULL)
401 return (EINVAL);
402
403 *psa = NULL;
404 error = peer ? so->so_proto->pr_usrreqs->pru_peeraddr(so, psa) :
405 so->so_proto->pr_usrreqs->pru_sockaddr(so, psa);
406
407 if (error == 0 && *psa == NULL) {
408 error = ENOMEM;
409 } else if (error != 0 && *psa != NULL) {
410 FREE(*psa, M_SONAME);
411 *psa = NULL;
412 }
413 return (error);
414 }
415
416 errno_t
417 sock_getaddr(socket_t sock, struct sockaddr **psa, int peer)
418 {
419 int error;
420
421 if (sock == NULL || psa == NULL)
422 return (EINVAL);
423
424 socket_lock(sock, 1);
425 error = sogetaddr_locked(sock, psa, peer);
426 socket_unlock(sock, 1);
427
428 return (error);
429 }
430
431 void
432 sock_freeaddr(struct sockaddr *sa)
433 {
434 if (sa != NULL)
435 FREE(sa, M_SONAME);
436 }
437
438 errno_t
439 sock_getsockopt(socket_t sock, int level, int optname, void *optval,
440 int *optlen)
441 {
442 int error = 0;
443 struct sockopt sopt;
444
445 if (sock == NULL || optval == NULL || optlen == NULL)
446 return (EINVAL);
447
448 sopt.sopt_dir = SOPT_GET;
449 sopt.sopt_level = level;
450 sopt.sopt_name = optname;
451 sopt.sopt_val = CAST_USER_ADDR_T(optval);
452 sopt.sopt_valsize = *optlen;
453 sopt.sopt_p = kernproc;
454 error = sogetoptlock(sock, &sopt, 1); /* will lock socket */
455 if (error == 0)
456 *optlen = sopt.sopt_valsize;
457 return (error);
458 }
459
460 errno_t
461 sock_ioctl(socket_t sock, unsigned long request, void *argp)
462 {
463 return (soioctl(sock, request, argp, kernproc)); /* will lock socket */
464 }
465
466 errno_t
467 sock_setsockopt(socket_t sock, int level, int optname, const void *optval,
468 int optlen)
469 {
470 struct sockopt sopt;
471
472 if (sock == NULL || optval == NULL)
473 return (EINVAL);
474
475 sopt.sopt_dir = SOPT_SET;
476 sopt.sopt_level = level;
477 sopt.sopt_name = optname;
478 sopt.sopt_val = CAST_USER_ADDR_T(optval);
479 sopt.sopt_valsize = optlen;
480 sopt.sopt_p = kernproc;
481 return (sosetoptlock(sock, &sopt, 1)); /* will lock socket */
482 }
483
484 /*
485 * This follows the recommended mappings between DSCP code points
486 * and WMM access classes.
487 */
488 static u_int32_t so_tc_from_dscp(u_int8_t dscp);
489 static u_int32_t
490 so_tc_from_dscp(u_int8_t dscp)
491 {
492 u_int32_t tc;
493
494 if (dscp >= 0x30 && dscp <= 0x3f)
495 tc = SO_TC_VO;
496 else if (dscp >= 0x20 && dscp <= 0x2f)
497 tc = SO_TC_VI;
498 else if (dscp >= 0x08 && dscp <= 0x17)
499 tc = SO_TC_BK_SYS;
500 else
501 tc = SO_TC_BE;
502
503 return (tc);
504 }
505
506 errno_t
507 sock_settclassopt(socket_t sock, const void *optval, size_t optlen)
508 {
509 errno_t error = 0;
510 struct sockopt sopt;
511 int sotc;
512
513 if (sock == NULL || optval == NULL || optlen != sizeof (int))
514 return (EINVAL);
515
516 socket_lock(sock, 1);
517 if (!(sock->so_state & SS_ISCONNECTED)) {
518 /*
519 * If the socket is not connected then we don't know
520 * if the destination is on LAN or not. Skip
521 * setting traffic class in this case
522 */
523 error = ENOTCONN;
524 goto out;
525 }
526
527 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL ||
528 sock->so_pcb == NULL) {
529 error = EINVAL;
530 goto out;
531 }
532
533 /*
534 * Set the socket traffic class based on the passed DSCP code point
535 * regardless of the scope of the destination
536 */
537 sotc = so_tc_from_dscp((*(const int *)optval) >> 2);
538
539 sopt.sopt_dir = SOPT_SET;
540 sopt.sopt_val = CAST_USER_ADDR_T(&sotc);
541 sopt.sopt_valsize = sizeof (sotc);
542 sopt.sopt_p = kernproc;
543 sopt.sopt_level = SOL_SOCKET;
544 sopt.sopt_name = SO_TRAFFIC_CLASS;
545
546 error = sosetoptlock(sock, &sopt, 0); /* already locked */
547
548 if (error != 0) {
549 printf("%s: sosetopt SO_TRAFFIC_CLASS failed %d\n",
550 __func__, error);
551 goto out;
552 }
553
554 /*
555 * Check if the destination address is LAN or link local address.
556 * We do not want to set traffic class bits if the destination
557 * is not local.
558 */
559 if (!so_isdstlocal(sock))
560 goto out;
561
562 sopt.sopt_dir = SOPT_SET;
563 sopt.sopt_val = CAST_USER_ADDR_T(optval);
564 sopt.sopt_valsize = optlen;
565 sopt.sopt_p = kernproc;
566
567 switch (SOCK_DOM(sock)) {
568 case PF_INET:
569 sopt.sopt_level = IPPROTO_IP;
570 sopt.sopt_name = IP_TOS;
571 break;
572 case PF_INET6:
573 sopt.sopt_level = IPPROTO_IPV6;
574 sopt.sopt_name = IPV6_TCLASS;
575 break;
576 default:
577 error = EINVAL;
578 goto out;
579 }
580
581 error = sosetoptlock(sock, &sopt, 0); /* already locked */
582 socket_unlock(sock, 1);
583 return (error);
584 out:
585 socket_unlock(sock, 1);
586 return (error);
587 }
588
589 errno_t
590 sock_gettclassopt(socket_t sock, void *optval, size_t *optlen)
591 {
592 errno_t error = 0;
593 struct sockopt sopt;
594
595 if (sock == NULL || optval == NULL || optlen == NULL)
596 return (EINVAL);
597
598 sopt.sopt_dir = SOPT_GET;
599 sopt.sopt_val = CAST_USER_ADDR_T(optval);
600 sopt.sopt_valsize = *optlen;
601 sopt.sopt_p = kernproc;
602
603 socket_lock(sock, 1);
604 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) {
605 socket_unlock(sock, 1);
606 return (EINVAL);
607 }
608
609 switch (SOCK_DOM(sock)) {
610 case PF_INET:
611 sopt.sopt_level = IPPROTO_IP;
612 sopt.sopt_name = IP_TOS;
613 break;
614 case PF_INET6:
615 sopt.sopt_level = IPPROTO_IPV6;
616 sopt.sopt_name = IPV6_TCLASS;
617 break;
618 default:
619 socket_unlock(sock, 1);
620 return (EINVAL);
621
622 }
623 error = sogetoptlock(sock, &sopt, 0); /* already locked */
624 socket_unlock(sock, 1);
625 if (error == 0)
626 *optlen = sopt.sopt_valsize;
627 return (error);
628 }
629
630 errno_t
631 sock_listen(socket_t sock, int backlog)
632 {
633 if (sock == NULL)
634 return (EINVAL);
635
636 return (solisten(sock, backlog)); /* will lock socket */
637 }
638
639 errno_t
640 sock_receive_internal(socket_t sock, struct msghdr *msg, mbuf_t *data,
641 int flags, size_t *recvdlen)
642 {
643 uio_t auio;
644 struct mbuf *control = NULL;
645 int error = 0;
646 int length = 0;
647 struct sockaddr *fromsa = NULL;
648 char uio_buf[ UIO_SIZEOF((msg != NULL) ? msg->msg_iovlen : 0) ];
649
650 if (sock == NULL)
651 return (EINVAL);
652
653 auio = uio_createwithbuffer(((msg != NULL) ? msg->msg_iovlen : 0),
654 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof (uio_buf));
655 if (msg != NULL && data == NULL) {
656 int i;
657 struct iovec *tempp = msg->msg_iov;
658
659 for (i = 0; i < msg->msg_iovlen; i++) {
660 uio_addiov(auio,
661 CAST_USER_ADDR_T((tempp + i)->iov_base),
662 (tempp + i)->iov_len);
663 }
664 if (uio_resid(auio) < 0)
665 return (EINVAL);
666 } else if (recvdlen != NULL) {
667 uio_setresid(auio, (uio_resid(auio) + *recvdlen));
668 }
669 length = uio_resid(auio);
670
671 if (recvdlen != NULL)
672 *recvdlen = 0;
673
674 /* let pru_soreceive handle the socket locking */
675 error = sock->so_proto->pr_usrreqs->pru_soreceive(sock, &fromsa, auio,
676 data, (msg && msg->msg_control) ? &control : NULL, &flags);
677 if (error != 0)
678 goto cleanup;
679
680 if (recvdlen != NULL)
681 *recvdlen = length - uio_resid(auio);
682 if (msg != NULL) {
683 msg->msg_flags = flags;
684
685 if (msg->msg_name != NULL) {
686 int salen;
687 salen = msg->msg_namelen;
688 if (msg->msg_namelen > 0 && fromsa != NULL) {
689 salen = MIN(salen, fromsa->sa_len);
690 memcpy(msg->msg_name, fromsa,
691 msg->msg_namelen > fromsa->sa_len ?
692 fromsa->sa_len : msg->msg_namelen);
693 }
694 }
695
696 if (msg->msg_control != NULL) {
697 struct mbuf *m = control;
698 u_char *ctlbuf = msg->msg_control;
699 int clen = msg->msg_controllen;
700
701 msg->msg_controllen = 0;
702
703 while (m != NULL && clen > 0) {
704 unsigned int tocopy;
705
706 if (clen >= m->m_len) {
707 tocopy = m->m_len;
708 } else {
709 msg->msg_flags |= MSG_CTRUNC;
710 tocopy = clen;
711 }
712 memcpy(ctlbuf, mtod(m, caddr_t), tocopy);
713 ctlbuf += tocopy;
714 clen -= tocopy;
715 m = m->m_next;
716 }
717 msg->msg_controllen =
718 (uintptr_t)ctlbuf - (uintptr_t)msg->msg_control;
719 }
720 }
721
722 cleanup:
723 if (control != NULL)
724 m_freem(control);
725 if (fromsa != NULL)
726 FREE(fromsa, M_SONAME);
727 return (error);
728 }
729
730 errno_t
731 sock_receive(socket_t sock, struct msghdr *msg, int flags, size_t *recvdlen)
732 {
733 if ((msg == NULL) || (msg->msg_iovlen < 1) ||
734 (msg->msg_iov[0].iov_len == 0) ||
735 (msg->msg_iov[0].iov_base == NULL))
736 return (EINVAL);
737
738 return (sock_receive_internal(sock, msg, NULL, flags, recvdlen));
739 }
740
741 errno_t
742 sock_receivembuf(socket_t sock, struct msghdr *msg, mbuf_t *data, int flags,
743 size_t *recvlen)
744 {
745 if (data == NULL || recvlen == 0 || *recvlen <= 0 || (msg != NULL &&
746 (msg->msg_iov != NULL || msg->msg_iovlen != 0)))
747 return (EINVAL);
748
749 return (sock_receive_internal(sock, msg, data, flags, recvlen));
750 }
751
752 errno_t
753 sock_send_internal(socket_t sock, const struct msghdr *msg, mbuf_t data,
754 int flags, size_t *sentlen)
755 {
756 uio_t auio = NULL;
757 struct mbuf *control = NULL;
758 int error = 0;
759 int datalen = 0;
760 char uio_buf[ UIO_SIZEOF((msg != NULL ? msg->msg_iovlen : 1)) ];
761
762 if (sock == NULL) {
763 error = EINVAL;
764 goto errorout;
765 }
766
767 if (data == NULL && msg != NULL) {
768 struct iovec *tempp = msg->msg_iov;
769
770 auio = uio_createwithbuffer(msg->msg_iovlen, 0,
771 UIO_SYSSPACE, UIO_WRITE, &uio_buf[0], sizeof (uio_buf));
772 if (tempp != NULL) {
773 int i;
774
775 for (i = 0; i < msg->msg_iovlen; i++) {
776 uio_addiov(auio,
777 CAST_USER_ADDR_T((tempp + i)->iov_base),
778 (tempp + i)->iov_len);
779 }
780
781 if (uio_resid(auio) < 0) {
782 error = EINVAL;
783 goto errorout;
784 }
785 }
786 }
787
788 if (sentlen != NULL)
789 *sentlen = 0;
790
791 if (auio != NULL)
792 datalen = uio_resid(auio);
793 else
794 datalen = data->m_pkthdr.len;
795
796 if (msg != NULL && msg->msg_control) {
797 if ((size_t)msg->msg_controllen < sizeof (struct cmsghdr)) {
798 error = EINVAL;
799 goto errorout;
800 }
801
802 if ((size_t)msg->msg_controllen > MLEN) {
803 error = EINVAL;
804 goto errorout;
805 }
806
807 control = m_get(M_NOWAIT, MT_CONTROL);
808 if (control == NULL) {
809 error = ENOMEM;
810 goto errorout;
811 }
812 memcpy(mtod(control, caddr_t), msg->msg_control,
813 msg->msg_controllen);
814 control->m_len = msg->msg_controllen;
815 }
816
817 error = sock->so_proto->pr_usrreqs->pru_sosend(sock, msg != NULL ?
818 (struct sockaddr *)msg->msg_name : NULL, auio, data,
819 control, flags);
820
821 /*
822 * Residual data is possible in the case of IO vectors but not
823 * in the mbuf case since the latter is treated as atomic send.
824 * If pru_sosend() consumed a portion of the iovecs data and
825 * the error returned is transient, treat it as success; this
826 * is consistent with sendit() behavior.
827 */
828 if (auio != NULL && uio_resid(auio) != datalen &&
829 (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
830 error = 0;
831
832 if (error == 0 && sentlen != NULL) {
833 if (auio != NULL)
834 *sentlen = datalen - uio_resid(auio);
835 else
836 *sentlen = datalen;
837 }
838
839 return (error);
840
841 /*
842 * In cases where we detect an error before returning, we need to
843 * free the mbuf chain if there is one. sosend (and pru_sosend) will
844 * free the mbuf chain if they encounter an error.
845 */
846 errorout:
847 if (control)
848 m_freem(control);
849 if (data)
850 m_freem(data);
851 if (sentlen)
852 *sentlen = 0;
853 return (error);
854 }
855
856 errno_t
857 sock_send(socket_t sock, const struct msghdr *msg, int flags, size_t *sentlen)
858 {
859 if (msg == NULL || msg->msg_iov == NULL || msg->msg_iovlen < 1)
860 return (EINVAL);
861
862 return (sock_send_internal(sock, msg, NULL, flags, sentlen));
863 }
864
865 errno_t
866 sock_sendmbuf(socket_t sock, const struct msghdr *msg, mbuf_t data,
867 int flags, size_t *sentlen)
868 {
869 if (data == NULL || (msg != NULL && (msg->msg_iov != NULL ||
870 msg->msg_iovlen != 0))) {
871 if (data != NULL)
872 m_freem(data);
873 return (EINVAL);
874 }
875 return (sock_send_internal(sock, msg, data, flags, sentlen));
876 }
877
878 errno_t
879 sock_shutdown(socket_t sock, int how)
880 {
881 if (sock == NULL)
882 return (EINVAL);
883
884 return (soshutdown(sock, how));
885 }
886
887
888 errno_t
889 sock_socket(int domain, int type, int protocol, sock_upcall callback,
890 void *context, socket_t *new_so)
891 {
892 int error = 0;
893
894 if (new_so == NULL)
895 return (EINVAL);
896
897 /* socreate will create an initial so_count */
898 error = socreate(domain, new_so, type, protocol);
899 if (error == 0) {
900 /* see comments in sock_setupcall() */
901 if (callback != NULL) {
902 sock_setupcalls_common(*new_so, callback, context,
903 NULL, NULL);
904 }
905 /*
906 * last_pid and last_upid should be zero for sockets
907 * created using sock_socket
908 */
909 (*new_so)->last_pid = 0;
910 (*new_so)->last_upid = 0;
911 }
912 return (error);
913 }
914
915 void
916 sock_close(socket_t sock)
917 {
918 if (sock == NULL)
919 return;
920
921 soclose(sock);
922 }
923
924 /* Do we want this to be APPLE_PRIVATE API?: YES (LD 12/23/04) */
925 void
926 sock_retain(socket_t sock)
927 {
928 if (sock == NULL)
929 return;
930
931 socket_lock(sock, 1);
932 sock->so_retaincnt++;
933 sock->so_usecount++; /* add extra reference for holding the socket */
934 socket_unlock(sock, 1);
935 }
936
937 /* Do we want this to be APPLE_PRIVATE API? */
938 void
939 sock_release(socket_t sock)
940 {
941 if (sock == NULL)
942 return;
943
944 socket_lock(sock, 1);
945 if (sock->so_upcallusecount > 0)
946 soclose_wait_locked(sock);
947
948 sock->so_retaincnt--;
949 if (sock->so_retaincnt < 0) {
950 panic("%s: negative retain count (%d) for sock=%p\n",
951 __func__, sock->so_retaincnt, sock);
952 /* NOTREACHED */
953 }
954 /*
955 * Check SS_NOFDREF in case a close happened as sock_retain()
956 * was grabbing the lock
957 */
958 if ((sock->so_retaincnt == 0) && (sock->so_usecount == 2) &&
959 (!(sock->so_state & SS_NOFDREF) ||
960 (sock->so_flags & SOF_MP_SUBFLOW))) {
961 /* close socket only if the FD is not holding it */
962 soclose_locked(sock);
963 } else {
964 /* remove extra reference holding the socket */
965 VERIFY(sock->so_usecount > 1);
966 sock->so_usecount--;
967 }
968 socket_unlock(sock, 1);
969 }
970
971 errno_t
972 sock_setpriv(socket_t sock, int on)
973 {
974 if (sock == NULL)
975 return (EINVAL);
976
977 socket_lock(sock, 1);
978 if (on)
979 sock->so_state |= SS_PRIV;
980 else
981 sock->so_state &= ~SS_PRIV;
982 socket_unlock(sock, 1);
983 return (0);
984 }
985
986 int
987 sock_isconnected(socket_t sock)
988 {
989 int retval;
990
991 socket_lock(sock, 1);
992 retval = ((sock->so_state & SS_ISCONNECTED) ? 1 : 0);
993 socket_unlock(sock, 1);
994 return (retval);
995 }
996
997 int
998 sock_isnonblocking(socket_t sock)
999 {
1000 int retval;
1001
1002 socket_lock(sock, 1);
1003 retval = ((sock->so_state & SS_NBIO) ? 1 : 0);
1004 socket_unlock(sock, 1);
1005 return (retval);
1006 }
1007
1008 errno_t
1009 sock_gettype(socket_t sock, int *outDomain, int *outType, int *outProtocol)
1010 {
1011 socket_lock(sock, 1);
1012 if (outDomain != NULL)
1013 *outDomain = SOCK_DOM(sock);
1014 if (outType != NULL)
1015 *outType = sock->so_type;
1016 if (outProtocol != NULL)
1017 *outProtocol = SOCK_PROTO(sock);
1018 socket_unlock(sock, 1);
1019 return (0);
1020 }
1021
1022 /*
1023 * Return the listening socket of a pre-accepted socket. It returns the
1024 * listener (so_head) value of a given socket. This is intended to be
1025 * called by a socket filter during a filter attach (sf_attach) callback.
1026 * The value returned by this routine is safe to be used only in the
1027 * context of that callback, because we hold the listener's lock across
1028 * the sflt_initsock() call.
1029 */
1030 socket_t
1031 sock_getlistener(socket_t sock)
1032 {
1033 return (sock->so_head);
1034 }
1035
1036 static inline void
1037 sock_set_tcp_stream_priority(socket_t sock)
1038 {
1039 if ((SOCK_DOM(sock) == PF_INET || SOCK_DOM(sock) == PF_INET6) &&
1040 SOCK_TYPE(sock) == SOCK_STREAM) {
1041 set_tcp_stream_priority(sock);
1042 }
1043 }
1044
1045 /*
1046 * Caller must have ensured socket is valid and won't be going away.
1047 */
1048 void
1049 socket_set_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
1050 {
1051 u_int32_t soflags1 = 0;
1052
1053 if ((flags & TRAFFIC_MGT_SO_BACKGROUND))
1054 soflags1 |= SOF1_TRAFFIC_MGT_SO_BACKGROUND;
1055 if ((flags & TRAFFIC_MGT_TCP_RECVBG))
1056 soflags1 |= SOF1_TRAFFIC_MGT_TCP_RECVBG;
1057
1058 (void) OSBitOrAtomic(soflags1, &sock->so_flags1);
1059
1060 sock_set_tcp_stream_priority(sock);
1061 }
1062
1063 void
1064 socket_set_traffic_mgt_flags(socket_t sock, u_int8_t flags)
1065 {
1066 socket_lock(sock, 1);
1067 socket_set_traffic_mgt_flags_locked(sock, flags);
1068 socket_unlock(sock, 1);
1069 }
1070
1071 /*
1072 * Caller must have ensured socket is valid and won't be going away.
1073 */
1074 void
1075 socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
1076 {
1077 u_int32_t soflags1 = 0;
1078
1079 if ((flags & TRAFFIC_MGT_SO_BACKGROUND))
1080 soflags1 |= SOF1_TRAFFIC_MGT_SO_BACKGROUND;
1081 if ((flags & TRAFFIC_MGT_TCP_RECVBG))
1082 soflags1 |= SOF1_TRAFFIC_MGT_TCP_RECVBG;
1083
1084 (void) OSBitAndAtomic(~soflags1, &sock->so_flags1);
1085
1086 sock_set_tcp_stream_priority(sock);
1087 }
1088
1089 void
1090 socket_clear_traffic_mgt_flags(socket_t sock, u_int8_t flags)
1091 {
1092 socket_lock(sock, 1);
1093 socket_clear_traffic_mgt_flags_locked(sock, flags);
1094 socket_unlock(sock, 1);
1095 }
1096
1097
1098 /*
1099 * Caller must have ensured socket is valid and won't be going away.
1100 */
1101 errno_t
1102 socket_defunct(struct proc *p, socket_t so, int level)
1103 {
1104 errno_t retval;
1105
1106 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1107 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
1108 return (EINVAL);
1109
1110 socket_lock(so, 1);
1111 /*
1112 * SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC level is meant to tear down
1113 * all of mDNSResponder IPC sockets, currently those of AF_UNIX; note
1114 * that this is an implementation artifact of mDNSResponder. We do
1115 * a quick test against the socket buffers for SB_UNIX, since that
1116 * would have been set by unp_attach() at socket creation time.
1117 */
1118 if (level == SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1119 (so->so_rcv.sb_flags & so->so_snd.sb_flags & SB_UNIX) != SB_UNIX) {
1120 socket_unlock(so, 1);
1121 return (EOPNOTSUPP);
1122 }
1123 retval = sosetdefunct(p, so, level, TRUE);
1124 if (retval == 0)
1125 retval = sodefunct(p, so, level);
1126 socket_unlock(so, 1);
1127 return (retval);
1128 }
1129
1130 static void
1131 sock_setupcalls_common(socket_t sock, sock_upcall rcallback, void *rcontext,
1132 sock_upcall wcallback, void *wcontext)
1133 {
1134 if (rcallback != NULL) {
1135 sock->so_rcv.sb_flags |= SB_UPCALL;
1136 sock->so_rcv.sb_upcall = rcallback;
1137 sock->so_rcv.sb_upcallarg = rcontext;
1138 } else {
1139 sock->so_rcv.sb_flags &= ~SB_UPCALL;
1140 sock->so_rcv.sb_upcall = NULL;
1141 sock->so_rcv.sb_upcallarg = NULL;
1142 }
1143
1144 if (wcallback != NULL) {
1145 sock->so_snd.sb_flags |= SB_UPCALL;
1146 sock->so_snd.sb_upcall = wcallback;
1147 sock->so_snd.sb_upcallarg = wcontext;
1148 } else {
1149 sock->so_snd.sb_flags &= ~SB_UPCALL;
1150 sock->so_snd.sb_upcall = NULL;
1151 sock->so_snd.sb_upcallarg = NULL;
1152 }
1153 }
1154
1155 errno_t
1156 sock_setupcall(socket_t sock, sock_upcall callback, void *context)
1157 {
1158 if (sock == NULL)
1159 return (EINVAL);
1160
1161 /*
1162 * Note that we don't wait for any in progress upcall to complete.
1163 * On embedded, sock_setupcall() causes both read and write
1164 * callbacks to be set; on desktop, only read callback is set
1165 * to maintain legacy KPI behavior.
1166 *
1167 * The newer sock_setupcalls() KPI should be used instead to set
1168 * the read and write callbacks and their respective parameters.
1169 */
1170 socket_lock(sock, 1);
1171 sock_setupcalls_common(sock, callback, context, NULL, NULL);
1172 socket_unlock(sock, 1);
1173
1174 return (0);
1175 }
1176
1177 errno_t
1178 sock_setupcalls(socket_t sock, sock_upcall rcallback, void *rcontext,
1179 sock_upcall wcallback, void *wcontext)
1180 {
1181 if (sock == NULL)
1182 return (EINVAL);
1183
1184 /*
1185 * Note that we don't wait for any in progress upcall to complete.
1186 */
1187 socket_lock(sock, 1);
1188 sock_setupcalls_common(sock, rcallback, rcontext, wcallback, wcontext);
1189 socket_unlock(sock, 1);
1190
1191 return (0);
1192 }
1193
1194 errno_t
1195 sock_catchevents(socket_t sock, sock_evupcall ecallback, void *econtext,
1196 u_int32_t emask)
1197 {
1198 if (sock == NULL)
1199 return (EINVAL);
1200
1201 /*
1202 * Note that we don't wait for any in progress upcall to complete.
1203 */
1204 socket_lock(sock, 1);
1205 if (ecallback != NULL) {
1206 sock->so_event = ecallback;
1207 sock->so_eventarg = econtext;
1208 sock->so_eventmask = emask;
1209 } else {
1210 sock->so_event = sonullevent;
1211 sock->so_eventarg = NULL;
1212 sock->so_eventmask = 0;
1213 }
1214 socket_unlock(sock, 1);
1215
1216 return (0);
1217 }
1218
1219 /*
1220 * Returns true whether or not a socket belongs to the kernel.
1221 */
1222 int
1223 sock_iskernel(socket_t so)
1224 {
1225 return (so && so->last_pid == 0);
1226 }