]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_usrreq.c
xnu-6153.81.5.tar.gz
[apple/xnu.git] / bsd / kern / uipc_usrreq.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
62 /*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/guarded.h>
77 #include <sys/filedesc.h>
78 #include <sys/lock.h>
79 #include <sys/mbuf.h>
80 #include <sys/namei.h>
81 #include <sys/proc_internal.h>
82 #include <sys/kauth.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/stat.h>
87 #include <sys/sysctl.h>
88 #include <sys/un.h>
89 #include <sys/unpcb.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/kdebug.h>
92 #include <sys/mcache.h>
93
94 #include <kern/zalloc.h>
95 #include <kern/locks.h>
96
97 #if CONFIG_MACF
98 #include <security/mac_framework.h>
99 #endif /* CONFIG_MACF */
100
101 #include <mach/vm_param.h>
102
103 /*
104 * Maximum number of FDs that can be passed in an mbuf
105 */
106 #define UIPC_MAX_CMSG_FD 512
107
108 #define f_msgcount f_fglob->fg_msgcount
109 #define f_cred f_fglob->fg_cred
110 #define f_ops f_fglob->fg_ops
111 #define f_offset f_fglob->fg_offset
112 #define f_data f_fglob->fg_data
113 struct zone *unp_zone;
114 static unp_gen_t unp_gencnt;
115 static u_int unp_count;
116
117 static lck_attr_t *unp_mtx_attr;
118 static lck_grp_t *unp_mtx_grp;
119 static lck_grp_attr_t *unp_mtx_grp_attr;
120 static lck_rw_t *unp_list_mtx;
121
122 static lck_mtx_t *unp_disconnect_lock;
123 static lck_mtx_t *unp_connect_lock;
124 static u_int disconnect_in_progress;
125
126 extern lck_mtx_t *uipc_lock;
127 static struct unp_head unp_shead, unp_dhead;
128
129 /*
130 * mDNSResponder tracing. When enabled, endpoints connected to
131 * /var/run/mDNSResponder will be traced; during each send on
132 * the traced socket, we log the PID and process name of the
133 * sending process. We also print out a bit of info related
134 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
135 * of mDNSResponder stays the same.
136 */
137 #define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
138
139 static int unpst_tracemdns; /* enable tracing */
140
141 #define MDNS_IPC_MSG_HDR_VERSION_1 1
142
143 struct mdns_ipc_msg_hdr {
144 uint32_t version;
145 uint32_t datalen;
146 uint32_t ipc_flags;
147 uint32_t op;
148 union {
149 void *context;
150 uint32_t u32[2];
151 } __attribute__((packed));
152 uint32_t reg_index;
153 } __attribute__((packed));
154
155 /*
156 * Unix communications domain.
157 *
158 * TODO:
159 * SEQPACKET, RDM
160 * rethink name space problems
161 * need a proper out-of-band
162 * lock pushdown
163 */
164 static struct sockaddr sun_noname = { .sa_len = sizeof(sun_noname), .sa_family = AF_LOCAL, .sa_data = { 0 } };
165 static ino_t unp_ino; /* prototype for fake inode numbers */
166
167 static int unp_attach(struct socket *);
168 static void unp_detach(struct unpcb *);
169 static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
170 static int unp_connect(struct socket *, struct sockaddr *, proc_t);
171 static void unp_disconnect(struct unpcb *);
172 static void unp_shutdown(struct unpcb *);
173 static void unp_drop(struct unpcb *, int);
174 __private_extern__ void unp_gc(void);
175 static void unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
176 static void unp_mark(struct fileglob *, __unused void *);
177 static void unp_discard(struct fileglob *, void *);
178 static int unp_internalize(struct mbuf *, proc_t);
179 static int unp_listen(struct unpcb *, proc_t);
180 static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
181 static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
182
183 static void
184 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
185 {
186 if (so < conn_so) {
187 socket_lock(conn_so, 1);
188 } else {
189 struct unpcb *unp = sotounpcb(so);
190 unp->unp_flags |= UNP_DONTDISCONNECT;
191 unp->rw_thrcount++;
192 socket_unlock(so, 0);
193
194 /* Get the locks in the correct order */
195 socket_lock(conn_so, 1);
196 socket_lock(so, 0);
197 unp->rw_thrcount--;
198 if (unp->rw_thrcount == 0) {
199 unp->unp_flags &= ~UNP_DONTDISCONNECT;
200 wakeup(unp);
201 }
202 }
203 }
204
205 static int
206 uipc_abort(struct socket *so)
207 {
208 struct unpcb *unp = sotounpcb(so);
209
210 if (unp == 0) {
211 return EINVAL;
212 }
213 unp_drop(unp, ECONNABORTED);
214 unp_detach(unp);
215 sofree(so);
216 return 0;
217 }
218
219 static int
220 uipc_accept(struct socket *so, struct sockaddr **nam)
221 {
222 struct unpcb *unp = sotounpcb(so);
223
224 if (unp == 0) {
225 return EINVAL;
226 }
227
228 /*
229 * Pass back name of connected socket,
230 * if it was bound and we are still connected
231 * (our peer may have closed already!).
232 */
233 if (unp->unp_conn && unp->unp_conn->unp_addr) {
234 *nam = dup_sockaddr((struct sockaddr *)
235 unp->unp_conn->unp_addr, 1);
236 } else {
237 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
238 }
239 return 0;
240 }
241
242 /*
243 * Returns: 0 Success
244 * EISCONN
245 * unp_attach:
246 */
247 static int
248 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
249 {
250 struct unpcb *unp = sotounpcb(so);
251
252 if (unp != 0) {
253 return EISCONN;
254 }
255 return unp_attach(so);
256 }
257
258 static int
259 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
260 {
261 struct unpcb *unp = sotounpcb(so);
262
263 if (unp == 0) {
264 return EINVAL;
265 }
266
267 return unp_bind(unp, nam, p);
268 }
269
270 /*
271 * Returns: 0 Success
272 * EINVAL
273 * unp_connect:??? [See elsewhere in this file]
274 */
275 static int
276 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
277 {
278 struct unpcb *unp = sotounpcb(so);
279
280 if (unp == 0) {
281 return EINVAL;
282 }
283 return unp_connect(so, nam, p);
284 }
285
286 /*
287 * Returns: 0 Success
288 * EINVAL
289 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
290 * unp_connect2:EINVAL Invalid argument
291 */
292 static int
293 uipc_connect2(struct socket *so1, struct socket *so2)
294 {
295 struct unpcb *unp = sotounpcb(so1);
296
297 if (unp == 0) {
298 return EINVAL;
299 }
300
301 return unp_connect2(so1, so2);
302 }
303
304 /* control is EOPNOTSUPP */
305
306 static int
307 uipc_detach(struct socket *so)
308 {
309 struct unpcb *unp = sotounpcb(so);
310
311 if (unp == 0) {
312 return EINVAL;
313 }
314
315 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
316 unp_detach(unp);
317 return 0;
318 }
319
320 static int
321 uipc_disconnect(struct socket *so)
322 {
323 struct unpcb *unp = sotounpcb(so);
324
325 if (unp == 0) {
326 return EINVAL;
327 }
328 unp_disconnect(unp);
329 return 0;
330 }
331
332 /*
333 * Returns: 0 Success
334 * EINVAL
335 */
336 static int
337 uipc_listen(struct socket *so, __unused proc_t p)
338 {
339 struct unpcb *unp = sotounpcb(so);
340
341 if (unp == 0 || unp->unp_vnode == 0) {
342 return EINVAL;
343 }
344 return unp_listen(unp, p);
345 }
346
347 static int
348 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
349 {
350 struct unpcb *unp = sotounpcb(so);
351
352 if (unp == NULL) {
353 return EINVAL;
354 }
355 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
356 *nam = dup_sockaddr((struct sockaddr *)
357 unp->unp_conn->unp_addr, 1);
358 } else {
359 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
360 }
361 return 0;
362 }
363
364 static int
365 uipc_rcvd(struct socket *so, __unused int flags)
366 {
367 struct unpcb *unp = sotounpcb(so);
368 struct socket *so2;
369
370 if (unp == 0) {
371 return EINVAL;
372 }
373 switch (so->so_type) {
374 case SOCK_DGRAM:
375 panic("uipc_rcvd DGRAM?");
376 /*NOTREACHED*/
377
378 case SOCK_STREAM:
379 #define rcv (&so->so_rcv)
380 #define snd (&so2->so_snd)
381 if (unp->unp_conn == 0) {
382 break;
383 }
384
385 so2 = unp->unp_conn->unp_socket;
386 unp_get_locks_in_order(so, so2);
387 /*
388 * Adjust backpressure on sender
389 * and wakeup any waiting to write.
390 */
391 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
392 unp->unp_mbcnt = rcv->sb_mbcnt;
393 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
394 unp->unp_cc = rcv->sb_cc;
395 if (sb_notify(&so2->so_snd)) {
396 sowakeup(so2, &so2->so_snd, so);
397 }
398
399 socket_unlock(so2, 1);
400
401 #undef snd
402 #undef rcv
403 break;
404
405 default:
406 panic("uipc_rcvd unknown socktype");
407 }
408 return 0;
409 }
410
411 /* pru_rcvoob is EOPNOTSUPP */
412
413 /*
414 * Returns: 0 Success
415 * EINVAL
416 * EOPNOTSUPP
417 * EPIPE
418 * ENOTCONN
419 * EISCONN
420 * unp_internalize:EINVAL
421 * unp_internalize:EBADF
422 * unp_connect:EAFNOSUPPORT Address family not supported
423 * unp_connect:EINVAL Invalid argument
424 * unp_connect:ENOTSOCK Not a socket
425 * unp_connect:ECONNREFUSED Connection refused
426 * unp_connect:EISCONN Socket is connected
427 * unp_connect:EPROTOTYPE Protocol wrong type for socket
428 * unp_connect:???
429 * sbappendaddr:ENOBUFS [5th argument, contents modified]
430 * sbappendaddr:??? [whatever a filter author chooses]
431 */
432 static int
433 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
434 struct mbuf *control, proc_t p)
435 {
436 int error = 0;
437 struct unpcb *unp = sotounpcb(so);
438 struct socket *so2;
439
440 if (unp == 0) {
441 error = EINVAL;
442 goto release;
443 }
444 if (flags & PRUS_OOB) {
445 error = EOPNOTSUPP;
446 goto release;
447 }
448
449 if (control) {
450 /* release lock to avoid deadlock (4436174) */
451 socket_unlock(so, 0);
452 error = unp_internalize(control, p);
453 socket_lock(so, 0);
454 if (error) {
455 goto release;
456 }
457 }
458
459 switch (so->so_type) {
460 case SOCK_DGRAM:
461 {
462 struct sockaddr *from;
463
464 if (nam) {
465 if (unp->unp_conn) {
466 error = EISCONN;
467 break;
468 }
469 error = unp_connect(so, nam, p);
470 if (error) {
471 break;
472 }
473 } else {
474 if (unp->unp_conn == 0) {
475 error = ENOTCONN;
476 break;
477 }
478 }
479
480 so2 = unp->unp_conn->unp_socket;
481 if (so != so2) {
482 unp_get_locks_in_order(so, so2);
483 }
484
485 if (unp->unp_addr) {
486 from = (struct sockaddr *)unp->unp_addr;
487 } else {
488 from = &sun_noname;
489 }
490 /*
491 * sbappendaddr() will fail when the receiver runs out of
492 * space; in contrast to SOCK_STREAM, we will lose messages
493 * for the SOCK_DGRAM case when the receiver's queue overflows.
494 * SB_UNIX on the socket buffer implies that the callee will
495 * not free the control message, if any, because we would need
496 * to call unp_dispose() on it.
497 */
498 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
499 control = NULL;
500 if (sb_notify(&so2->so_rcv)) {
501 sowakeup(so2, &so2->so_rcv, so);
502 }
503 } else if (control != NULL && error == 0) {
504 /* A socket filter took control; don't touch it */
505 control = NULL;
506 }
507
508 if (so != so2) {
509 socket_unlock(so2, 1);
510 }
511
512 m = NULL;
513 if (nam) {
514 unp_disconnect(unp);
515 }
516 break;
517 }
518
519 case SOCK_STREAM: {
520 int didreceive = 0;
521 #define rcv (&so2->so_rcv)
522 #define snd (&so->so_snd)
523 /* Connect if not connected yet. */
524 /*
525 * Note: A better implementation would complain
526 * if not equal to the peer's address.
527 */
528 if ((so->so_state & SS_ISCONNECTED) == 0) {
529 if (nam) {
530 error = unp_connect(so, nam, p);
531 if (error) {
532 break; /* XXX */
533 }
534 } else {
535 error = ENOTCONN;
536 break;
537 }
538 }
539
540 if (so->so_state & SS_CANTSENDMORE) {
541 error = EPIPE;
542 break;
543 }
544 if (unp->unp_conn == 0) {
545 panic("uipc_send connected but no connection?");
546 }
547
548 so2 = unp->unp_conn->unp_socket;
549 unp_get_locks_in_order(so, so2);
550
551 /* Check socket state again as we might have unlocked the socket
552 * while trying to get the locks in order
553 */
554
555 if ((so->so_state & SS_CANTSENDMORE)) {
556 error = EPIPE;
557 socket_unlock(so2, 1);
558 break;
559 }
560
561 if (unp->unp_flags & UNP_TRACE_MDNS) {
562 struct mdns_ipc_msg_hdr hdr;
563
564 if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
565 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
566 printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
567 __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
568 }
569 }
570
571 /*
572 * Send to paired receive port, and then reduce send buffer
573 * hiwater marks to maintain backpressure. Wake up readers.
574 * SB_UNIX flag will allow new record to be appended to the
575 * receiver's queue even when it is already full. It is
576 * possible, however, that append might fail. In that case,
577 * we will need to call unp_dispose() on the control message;
578 * the callee will not free it since SB_UNIX is set.
579 */
580 didreceive = control ?
581 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
582
583 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
584 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
585 if ((int32_t)snd->sb_hiwat >=
586 (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
587 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
588 } else {
589 snd->sb_hiwat = 0;
590 }
591 unp->unp_conn->unp_cc = rcv->sb_cc;
592 if (didreceive) {
593 control = NULL;
594 if (sb_notify(&so2->so_rcv)) {
595 sowakeup(so2, &so2->so_rcv, so);
596 }
597 } else if (control != NULL && error == 0) {
598 /* A socket filter took control; don't touch it */
599 control = NULL;
600 }
601
602 socket_unlock(so2, 1);
603 m = NULL;
604 #undef snd
605 #undef rcv
606 }
607 break;
608
609 default:
610 panic("uipc_send unknown socktype");
611 }
612
613 /*
614 * SEND_EOF is equivalent to a SEND followed by
615 * a SHUTDOWN.
616 */
617 if (flags & PRUS_EOF) {
618 socantsendmore(so);
619 unp_shutdown(unp);
620 }
621
622 if (control && error != 0) {
623 socket_unlock(so, 0);
624 unp_dispose(control);
625 socket_lock(so, 0);
626 }
627
628 release:
629 if (control) {
630 m_freem(control);
631 }
632 if (m) {
633 m_freem(m);
634 }
635 return error;
636 }
637
638 static int
639 uipc_sense(struct socket *so, void *ub, int isstat64)
640 {
641 struct unpcb *unp = sotounpcb(so);
642 struct socket *so2;
643 blksize_t blksize;
644
645 if (unp == 0) {
646 return EINVAL;
647 }
648
649 blksize = so->so_snd.sb_hiwat;
650 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
651 so2 = unp->unp_conn->unp_socket;
652 blksize += so2->so_rcv.sb_cc;
653 }
654 if (unp->unp_ino == 0) {
655 unp->unp_ino = unp_ino++;
656 }
657
658 if (isstat64 != 0) {
659 struct stat64 *sb64;
660
661 sb64 = (struct stat64 *)ub;
662 sb64->st_blksize = blksize;
663 sb64->st_dev = NODEV;
664 sb64->st_ino = (ino64_t)unp->unp_ino;
665 } else {
666 struct stat *sb;
667
668 sb = (struct stat *)ub;
669 sb->st_blksize = blksize;
670 sb->st_dev = NODEV;
671 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
672 }
673
674 return 0;
675 }
676
677 /*
678 * Returns: 0 Success
679 * EINVAL
680 *
681 * Notes: This is not strictly correct, as unp_shutdown() also calls
682 * socantrcvmore(). These should maybe both be conditionalized
683 * on the 'how' argument in soshutdown() as called from the
684 * shutdown() system call.
685 */
686 static int
687 uipc_shutdown(struct socket *so)
688 {
689 struct unpcb *unp = sotounpcb(so);
690
691 if (unp == 0) {
692 return EINVAL;
693 }
694 socantsendmore(so);
695 unp_shutdown(unp);
696 return 0;
697 }
698
699 /*
700 * Returns: 0 Success
701 * EINVAL Invalid argument
702 */
703 static int
704 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
705 {
706 struct unpcb *unp = sotounpcb(so);
707
708 if (unp == NULL) {
709 return EINVAL;
710 }
711 if (unp->unp_addr != NULL) {
712 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
713 } else {
714 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
715 }
716 return 0;
717 }
718
719 struct pr_usrreqs uipc_usrreqs = {
720 .pru_abort = uipc_abort,
721 .pru_accept = uipc_accept,
722 .pru_attach = uipc_attach,
723 .pru_bind = uipc_bind,
724 .pru_connect = uipc_connect,
725 .pru_connect2 = uipc_connect2,
726 .pru_detach = uipc_detach,
727 .pru_disconnect = uipc_disconnect,
728 .pru_listen = uipc_listen,
729 .pru_peeraddr = uipc_peeraddr,
730 .pru_rcvd = uipc_rcvd,
731 .pru_send = uipc_send,
732 .pru_sense = uipc_sense,
733 .pru_shutdown = uipc_shutdown,
734 .pru_sockaddr = uipc_sockaddr,
735 .pru_sosend = sosend,
736 .pru_soreceive = soreceive,
737 };
738
739 int
740 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
741 {
742 struct unpcb *unp = sotounpcb(so);
743 int error = 0;
744 pid_t peerpid;
745 struct socket *peerso;
746
747 switch (sopt->sopt_dir) {
748 case SOPT_GET:
749 switch (sopt->sopt_name) {
750 case LOCAL_PEERCRED:
751 if (unp->unp_flags & UNP_HAVEPC) {
752 error = sooptcopyout(sopt, &unp->unp_peercred,
753 sizeof(unp->unp_peercred));
754 } else {
755 if (so->so_type == SOCK_STREAM) {
756 error = ENOTCONN;
757 } else {
758 error = EINVAL;
759 }
760 }
761 break;
762 case LOCAL_PEERPID:
763 case LOCAL_PEEREPID:
764 if (unp->unp_conn == NULL) {
765 error = ENOTCONN;
766 break;
767 }
768 peerso = unp->unp_conn->unp_socket;
769 if (peerso == NULL) {
770 panic("peer is connected but has no socket?");
771 }
772 unp_get_locks_in_order(so, peerso);
773 if (sopt->sopt_name == LOCAL_PEEREPID &&
774 peerso->so_flags & SOF_DELEGATED) {
775 peerpid = peerso->e_pid;
776 } else {
777 peerpid = peerso->last_pid;
778 }
779 socket_unlock(peerso, 1);
780 error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
781 break;
782 case LOCAL_PEERUUID:
783 case LOCAL_PEEREUUID:
784 if (unp->unp_conn == NULL) {
785 error = ENOTCONN;
786 break;
787 }
788 peerso = unp->unp_conn->unp_socket;
789 if (peerso == NULL) {
790 panic("peer is connected but has no socket?");
791 }
792 unp_get_locks_in_order(so, peerso);
793 if (sopt->sopt_name == LOCAL_PEEREUUID &&
794 peerso->so_flags & SOF_DELEGATED) {
795 error = sooptcopyout(sopt, &peerso->e_uuid,
796 sizeof(peerso->e_uuid));
797 } else {
798 error = sooptcopyout(sopt, &peerso->last_uuid,
799 sizeof(peerso->last_uuid));
800 }
801 socket_unlock(peerso, 1);
802 break;
803 default:
804 error = EOPNOTSUPP;
805 break;
806 }
807 break;
808 case SOPT_SET:
809 default:
810 error = EOPNOTSUPP;
811 break;
812 }
813
814 return error;
815 }
816
817 /*
818 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
819 * for stream sockets, although the total for sender and receiver is
820 * actually only PIPSIZ.
821 * Datagram sockets really use the sendspace as the maximum datagram size,
822 * and don't really want to reserve the sendspace. Their recvspace should
823 * be large enough for at least one max-size datagram plus address.
824 */
825 #ifndef PIPSIZ
826 #define PIPSIZ 8192
827 #endif
828 static u_int32_t unpst_sendspace = PIPSIZ;
829 static u_int32_t unpst_recvspace = PIPSIZ;
830 static u_int32_t unpdg_sendspace = 2 * 1024; /* really max datagram size */
831 static u_int32_t unpdg_recvspace = 4 * 1024;
832
833 static int unp_rights; /* file descriptors in flight */
834 static int unp_disposed; /* discarded file descriptors */
835
836 SYSCTL_DECL(_net_local_stream);
837 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
838 &unpst_sendspace, 0, "");
839 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
840 &unpst_recvspace, 0, "");
841 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
842 &unpst_tracemdns, 0, "");
843 SYSCTL_DECL(_net_local_dgram);
844 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
845 &unpdg_sendspace, 0, "");
846 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
847 &unpdg_recvspace, 0, "");
848 SYSCTL_DECL(_net_local);
849 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
850
851 /*
852 * Returns: 0 Success
853 * ENOBUFS
854 * soreserve:ENOBUFS
855 */
856 static int
857 unp_attach(struct socket *so)
858 {
859 struct unpcb *unp;
860 int error = 0;
861
862 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
863 switch (so->so_type) {
864 case SOCK_STREAM:
865 error = soreserve(so, unpst_sendspace, unpst_recvspace);
866 break;
867
868 case SOCK_DGRAM:
869 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
870 break;
871
872 default:
873 panic("unp_attach");
874 }
875 if (error) {
876 return error;
877 }
878 }
879 unp = (struct unpcb *)zalloc(unp_zone);
880 if (unp == NULL) {
881 return ENOBUFS;
882 }
883 bzero(unp, sizeof(*unp));
884
885 lck_mtx_init(&unp->unp_mtx,
886 unp_mtx_grp, unp_mtx_attr);
887
888 lck_rw_lock_exclusive(unp_list_mtx);
889 LIST_INIT(&unp->unp_refs);
890 unp->unp_socket = so;
891 unp->unp_gencnt = ++unp_gencnt;
892 unp_count++;
893 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
894 &unp_dhead : &unp_shead, unp, unp_link);
895 lck_rw_done(unp_list_mtx);
896 so->so_pcb = (caddr_t)unp;
897 /*
898 * Mark AF_UNIX socket buffers accordingly so that:
899 *
900 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
901 * the lack of space; this essentially loosens the sbspace() check,
902 * since there is disconnect between sosend() and uipc_send() with
903 * respect to flow control that might result in our dropping the
904 * data in uipc_send(). By setting this, we allow for slightly
905 * more records to be appended to the receiving socket to avoid
906 * losing data (which we can't afford in the SOCK_STREAM case).
907 * Flow control still takes place since we adjust the sender's
908 * hiwat during each send. This doesn't affect the SOCK_DGRAM
909 * case and append would still fail when the queue overflows.
910 *
911 * b. In the presence of control messages containing internalized
912 * file descriptors, the append routines will not free them since
913 * we'd need to undo the work first via unp_dispose().
914 */
915 so->so_rcv.sb_flags |= SB_UNIX;
916 so->so_snd.sb_flags |= SB_UNIX;
917 return 0;
918 }
919
920 static void
921 unp_detach(struct unpcb *unp)
922 {
923 int so_locked = 1;
924
925 lck_rw_lock_exclusive(unp_list_mtx);
926 LIST_REMOVE(unp, unp_link);
927 --unp_count;
928 ++unp_gencnt;
929 lck_rw_done(unp_list_mtx);
930 if (unp->unp_vnode) {
931 struct vnode *tvp = NULL;
932 socket_unlock(unp->unp_socket, 0);
933
934 /* Holding unp_connect_lock will avoid a race between
935 * a thread closing the listening socket and a thread
936 * connecting to it.
937 */
938 lck_mtx_lock(unp_connect_lock);
939 socket_lock(unp->unp_socket, 0);
940 if (unp->unp_vnode) {
941 tvp = unp->unp_vnode;
942 unp->unp_vnode->v_socket = NULL;
943 unp->unp_vnode = NULL;
944 }
945 lck_mtx_unlock(unp_connect_lock);
946 if (tvp != NULL) {
947 vnode_rele(tvp); /* drop the usecount */
948 }
949 }
950 if (unp->unp_conn) {
951 unp_disconnect(unp);
952 }
953 while (unp->unp_refs.lh_first) {
954 struct unpcb *unp2 = NULL;
955
956 /* This datagram socket is connected to one or more
957 * sockets. In order to avoid a race condition between removing
958 * this reference and closing the connected socket, we need
959 * to check disconnect_in_progress
960 */
961 if (so_locked == 1) {
962 socket_unlock(unp->unp_socket, 0);
963 so_locked = 0;
964 }
965 lck_mtx_lock(unp_disconnect_lock);
966 while (disconnect_in_progress != 0) {
967 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
968 PSOCK, "disconnect", NULL);
969 }
970 disconnect_in_progress = 1;
971 lck_mtx_unlock(unp_disconnect_lock);
972
973 /* Now we are sure that any unpcb socket disconnect is not happening */
974 if (unp->unp_refs.lh_first != NULL) {
975 unp2 = unp->unp_refs.lh_first;
976 socket_lock(unp2->unp_socket, 1);
977 }
978
979 lck_mtx_lock(unp_disconnect_lock);
980 disconnect_in_progress = 0;
981 wakeup(&disconnect_in_progress);
982 lck_mtx_unlock(unp_disconnect_lock);
983
984 if (unp2 != NULL) {
985 /* We already locked this socket and have a reference on it */
986 unp_drop(unp2, ECONNRESET);
987 socket_unlock(unp2->unp_socket, 1);
988 }
989 }
990
991 if (so_locked == 0) {
992 socket_lock(unp->unp_socket, 0);
993 so_locked = 1;
994 }
995 soisdisconnected(unp->unp_socket);
996 /* makes sure we're getting dealloced */
997 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
998 }
999
1000 /*
1001 * Returns: 0 Success
1002 * EAFNOSUPPORT
1003 * EINVAL
1004 * EADDRINUSE
1005 * namei:??? [anything namei can return]
1006 * vnode_authorize:??? [anything vnode_authorize can return]
1007 *
1008 * Notes: p at this point is the current process, as this function is
1009 * only called by sobind().
1010 */
1011 static int
1012 unp_bind(
1013 struct unpcb *unp,
1014 struct sockaddr *nam,
1015 proc_t p)
1016 {
1017 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1018 struct vnode *vp, *dvp;
1019 struct vnode_attr va;
1020 vfs_context_t ctx = vfs_context_current();
1021 int error, namelen;
1022 struct nameidata nd;
1023 struct socket *so = unp->unp_socket;
1024 char buf[SOCK_MAXADDRLEN];
1025
1026 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1027 return EAFNOSUPPORT;
1028 }
1029
1030 /*
1031 * Check if the socket is already bound to an address
1032 */
1033 if (unp->unp_vnode != NULL) {
1034 return EINVAL;
1035 }
1036 /*
1037 * Check if the socket may have been shut down
1038 */
1039 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1040 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1041 return EINVAL;
1042 }
1043
1044 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1045 if (namelen <= 0) {
1046 return EINVAL;
1047 }
1048 /*
1049 * Note: sun_path is not a zero terminated "C" string
1050 */
1051 if (namelen >= SOCK_MAXADDRLEN) {
1052 return EINVAL;
1053 }
1054 bcopy(soun->sun_path, buf, namelen);
1055 buf[namelen] = 0;
1056
1057 socket_unlock(so, 0);
1058
1059 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1060 CAST_USER_ADDR_T(buf), ctx);
1061 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1062 error = namei(&nd);
1063 if (error) {
1064 socket_lock(so, 0);
1065 return error;
1066 }
1067 dvp = nd.ni_dvp;
1068 vp = nd.ni_vp;
1069
1070 if (vp != NULL) {
1071 /*
1072 * need to do this before the vnode_put of dvp
1073 * since we may have to release an fs_nodelock
1074 */
1075 nameidone(&nd);
1076
1077 vnode_put(dvp);
1078 vnode_put(vp);
1079
1080 socket_lock(so, 0);
1081 return EADDRINUSE;
1082 }
1083
1084 VATTR_INIT(&va);
1085 VATTR_SET(&va, va_type, VSOCK);
1086 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
1087
1088 #if CONFIG_MACF
1089 error = mac_vnode_check_create(ctx,
1090 nd.ni_dvp, &nd.ni_cnd, &va);
1091
1092 if (error == 0)
1093 #endif /* CONFIG_MACF */
1094 #if CONFIG_MACF_SOCKET_SUBSET
1095 error = mac_vnode_check_uipc_bind(ctx,
1096 nd.ni_dvp, &nd.ni_cnd, &va);
1097
1098 if (error == 0)
1099 #endif /* MAC_SOCKET_SUBSET */
1100 /* authorize before creating */
1101 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1102
1103 if (!error) {
1104 /* create the socket */
1105 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1106 }
1107
1108 nameidone(&nd);
1109 vnode_put(dvp);
1110
1111 if (error) {
1112 socket_lock(so, 0);
1113 return error;
1114 }
1115
1116 socket_lock(so, 0);
1117
1118 if (unp->unp_vnode != NULL) {
1119 vnode_put(vp); /* drop the iocount */
1120 return EINVAL;
1121 }
1122
1123 error = vnode_ref(vp); /* gain a longterm reference */
1124 if (error) {
1125 vnode_put(vp); /* drop the iocount */
1126 return error;
1127 }
1128
1129 vp->v_socket = unp->unp_socket;
1130 unp->unp_vnode = vp;
1131 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1132 vnode_put(vp); /* drop the iocount */
1133
1134 return 0;
1135 }
1136
1137
1138 /*
1139 * Returns: 0 Success
1140 * EAFNOSUPPORT Address family not supported
1141 * EINVAL Invalid argument
1142 * ENOTSOCK Not a socket
1143 * ECONNREFUSED Connection refused
1144 * EPROTOTYPE Protocol wrong type for socket
1145 * EISCONN Socket is connected
1146 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1147 * unp_connect2:EINVAL Invalid argument
1148 * namei:??? [anything namei can return]
1149 * vnode_authorize:???? [anything vnode_authorize can return]
1150 *
1151 * Notes: p at this point is the current process, as this function is
1152 * only called by sosend(), sendfile(), and soconnectlock().
1153 */
1154 static int
1155 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1156 {
1157 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1158 struct vnode *vp;
1159 struct socket *so2, *so3, *list_so = NULL;
1160 struct unpcb *unp, *unp2, *unp3;
1161 vfs_context_t ctx = vfs_context_current();
1162 int error, len;
1163 struct nameidata nd;
1164 char buf[SOCK_MAXADDRLEN];
1165
1166 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1167 return EAFNOSUPPORT;
1168 }
1169
1170 unp = sotounpcb(so);
1171 so2 = so3 = NULL;
1172
1173 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1174 if (len <= 0) {
1175 return EINVAL;
1176 }
1177 /*
1178 * Note: sun_path is not a zero terminated "C" string
1179 */
1180 if (len >= SOCK_MAXADDRLEN) {
1181 return EINVAL;
1182 }
1183 bcopy(soun->sun_path, buf, len);
1184 buf[len] = 0;
1185
1186 socket_unlock(so, 0);
1187
1188 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1189 CAST_USER_ADDR_T(buf), ctx);
1190 error = namei(&nd);
1191 if (error) {
1192 socket_lock(so, 0);
1193 return error;
1194 }
1195 nameidone(&nd);
1196 vp = nd.ni_vp;
1197 if (vp->v_type != VSOCK) {
1198 error = ENOTSOCK;
1199 socket_lock(so, 0);
1200 goto out;
1201 }
1202
1203 #if CONFIG_MACF_SOCKET_SUBSET
1204 error = mac_vnode_check_uipc_connect(ctx, vp, so);
1205 if (error) {
1206 socket_lock(so, 0);
1207 goto out;
1208 }
1209 #endif /* MAC_SOCKET_SUBSET */
1210
1211 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1212 if (error) {
1213 socket_lock(so, 0);
1214 goto out;
1215 }
1216
1217 lck_mtx_lock(unp_connect_lock);
1218
1219 if (vp->v_socket == 0) {
1220 lck_mtx_unlock(unp_connect_lock);
1221 error = ECONNREFUSED;
1222 socket_lock(so, 0);
1223 goto out;
1224 }
1225
1226 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1227 so2 = vp->v_socket;
1228 lck_mtx_unlock(unp_connect_lock);
1229
1230
1231 if (so2->so_pcb == NULL) {
1232 error = ECONNREFUSED;
1233 if (so != so2) {
1234 socket_unlock(so2, 1);
1235 socket_lock(so, 0);
1236 } else {
1237 /* Release the reference held for the listen socket */
1238 VERIFY(so2->so_usecount > 0);
1239 so2->so_usecount--;
1240 }
1241 goto out;
1242 }
1243
1244 if (so < so2) {
1245 socket_unlock(so2, 0);
1246 socket_lock(so, 0);
1247 socket_lock(so2, 0);
1248 } else if (so > so2) {
1249 socket_lock(so, 0);
1250 }
1251 /*
1252 * Check if socket was connected while we were trying to
1253 * get the socket locks in order.
1254 * XXX - probably shouldn't return an error for SOCK_DGRAM
1255 */
1256 if ((so->so_state & SS_ISCONNECTED) != 0) {
1257 error = EISCONN;
1258 goto decref_out;
1259 }
1260
1261 if (so->so_type != so2->so_type) {
1262 error = EPROTOTYPE;
1263 goto decref_out;
1264 }
1265
1266 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1267 /* Release the incoming socket but keep a reference */
1268 socket_unlock(so, 0);
1269
1270 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1271 (so3 = sonewconn(so2, 0, nam)) == 0) {
1272 error = ECONNREFUSED;
1273 if (so != so2) {
1274 socket_unlock(so2, 1);
1275 socket_lock(so, 0);
1276 } else {
1277 socket_lock(so, 0);
1278 /* Release the reference held for
1279 * listen socket.
1280 */
1281 VERIFY(so2->so_usecount > 0);
1282 so2->so_usecount--;
1283 }
1284 goto out;
1285 }
1286 unp2 = sotounpcb(so2);
1287 unp3 = sotounpcb(so3);
1288 if (unp2->unp_addr) {
1289 unp3->unp_addr = (struct sockaddr_un *)
1290 dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1291 }
1292
1293 /*
1294 * unp_peercred management:
1295 *
1296 * The connecter's (client's) credentials are copied
1297 * from its process structure at the time of connect()
1298 * (which is now).
1299 */
1300 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1301 unp3->unp_flags |= UNP_HAVEPC;
1302 /*
1303 * The receiver's (server's) credentials are copied
1304 * from the unp_peercred member of socket on which the
1305 * former called listen(); unp_listen() cached that
1306 * process's credentials at that time so we can use
1307 * them now.
1308 */
1309 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1310 ("unp_connect: listener without cached peercred"));
1311
1312 /* Here we need to have both so and so2 locks and so2
1313 * is already locked. Lock ordering is required.
1314 */
1315 if (so < so2) {
1316 socket_unlock(so2, 0);
1317 socket_lock(so, 0);
1318 socket_lock(so2, 0);
1319 } else {
1320 socket_lock(so, 0);
1321 }
1322
1323 /* Check again if the socket state changed when its lock was released */
1324 if ((so->so_state & SS_ISCONNECTED) != 0) {
1325 error = EISCONN;
1326 socket_unlock(so2, 1);
1327 socket_lock(so3, 0);
1328 sofreelastref(so3, 1);
1329 goto out;
1330 }
1331 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1332 sizeof(unp->unp_peercred));
1333 unp->unp_flags |= UNP_HAVEPC;
1334
1335 #if CONFIG_MACF_SOCKET
1336 /* XXXMAC: recursive lock: SOCK_LOCK(so); */
1337 mac_socketpeer_label_associate_socket(so, so3);
1338 mac_socketpeer_label_associate_socket(so3, so);
1339 /* XXXMAC: SOCK_UNLOCK(so); */
1340 #endif /* MAC_SOCKET */
1341
1342 /* Hold the reference on listening socket until the end */
1343 socket_unlock(so2, 0);
1344 list_so = so2;
1345
1346 /* Lock ordering doesn't matter because so3 was just created */
1347 socket_lock(so3, 1);
1348 so2 = so3;
1349
1350 /*
1351 * Enable tracing for mDNSResponder endpoints. (The use
1352 * of sizeof instead of strlen below takes the null
1353 * terminating character into account.)
1354 */
1355 if (unpst_tracemdns &&
1356 !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1357 sizeof(MDNSRESPONDER_PATH))) {
1358 unp->unp_flags |= UNP_TRACE_MDNS;
1359 unp2->unp_flags |= UNP_TRACE_MDNS;
1360 }
1361 }
1362
1363 error = unp_connect2(so, so2);
1364
1365 decref_out:
1366 if (so2 != NULL) {
1367 if (so != so2) {
1368 socket_unlock(so2, 1);
1369 } else {
1370 /* Release the extra reference held for the listen socket.
1371 * This is possible only for SOCK_DGRAM sockets. We refuse
1372 * connecting to the same socket for SOCK_STREAM sockets.
1373 */
1374 VERIFY(so2->so_usecount > 0);
1375 so2->so_usecount--;
1376 }
1377 }
1378
1379 if (list_so != NULL) {
1380 socket_lock(list_so, 0);
1381 socket_unlock(list_so, 1);
1382 }
1383
1384 out:
1385 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1386 vnode_put(vp);
1387 return error;
1388 }
1389
1390 /*
1391 * Returns: 0 Success
1392 * EPROTOTYPE Protocol wrong type for socket
1393 * EINVAL Invalid argument
1394 */
1395 int
1396 unp_connect2(struct socket *so, struct socket *so2)
1397 {
1398 struct unpcb *unp = sotounpcb(so);
1399 struct unpcb *unp2;
1400
1401 if (so2->so_type != so->so_type) {
1402 return EPROTOTYPE;
1403 }
1404
1405 unp2 = sotounpcb(so2);
1406
1407 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1408 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1409
1410 /* Verify both sockets are still opened */
1411 if (unp == 0 || unp2 == 0) {
1412 return EINVAL;
1413 }
1414
1415 unp->unp_conn = unp2;
1416 so2->so_usecount++;
1417
1418 switch (so->so_type) {
1419 case SOCK_DGRAM:
1420 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1421
1422 if (so != so2) {
1423 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1424 /* Keep an extra reference on so2 that will be dropped
1425 * soon after getting the locks in order
1426 */
1427 socket_unlock(so2, 0);
1428 soisconnected(so);
1429 unp_get_locks_in_order(so, so2);
1430 VERIFY(so2->so_usecount > 0);
1431 so2->so_usecount--;
1432 } else {
1433 soisconnected(so);
1434 }
1435
1436 break;
1437
1438 case SOCK_STREAM:
1439 /* This takes care of socketpair */
1440 if (!(unp->unp_flags & UNP_HAVEPC) &&
1441 !(unp2->unp_flags & UNP_HAVEPC)) {
1442 cru2x(kauth_cred_get(), &unp->unp_peercred);
1443 unp->unp_flags |= UNP_HAVEPC;
1444
1445 cru2x(kauth_cred_get(), &unp2->unp_peercred);
1446 unp2->unp_flags |= UNP_HAVEPC;
1447 }
1448 unp2->unp_conn = unp;
1449 so->so_usecount++;
1450
1451 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1452 socket_unlock(so, 0);
1453 soisconnected(so2);
1454
1455 /* Keep an extra reference on so2, that will be dropped soon after
1456 * getting the locks in order again.
1457 */
1458 socket_unlock(so2, 0);
1459
1460 socket_lock(so, 0);
1461 soisconnected(so);
1462
1463 unp_get_locks_in_order(so, so2);
1464 /* Decrement the extra reference left before */
1465 VERIFY(so2->so_usecount > 0);
1466 so2->so_usecount--;
1467 break;
1468
1469 default:
1470 panic("unknown socket type %d in unp_connect2", so->so_type);
1471 }
1472 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1473 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1474 return 0;
1475 }
1476
1477 static void
1478 unp_disconnect(struct unpcb *unp)
1479 {
1480 struct unpcb *unp2 = NULL;
1481 struct socket *so2 = NULL, *so;
1482 struct socket *waitso;
1483 int so_locked = 1, strdisconn = 0;
1484
1485 so = unp->unp_socket;
1486 if (unp->unp_conn == NULL) {
1487 return;
1488 }
1489 lck_mtx_lock(unp_disconnect_lock);
1490 while (disconnect_in_progress != 0) {
1491 if (so_locked == 1) {
1492 socket_unlock(so, 0);
1493 so_locked = 0;
1494 }
1495 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
1496 PSOCK, "disconnect", NULL);
1497 }
1498 disconnect_in_progress = 1;
1499 lck_mtx_unlock(unp_disconnect_lock);
1500
1501 if (so_locked == 0) {
1502 socket_lock(so, 0);
1503 so_locked = 1;
1504 }
1505
1506 unp2 = unp->unp_conn;
1507
1508 if (unp2 == 0 || unp2->unp_socket == NULL) {
1509 goto out;
1510 }
1511 so2 = unp2->unp_socket;
1512
1513 try_again:
1514 if (so == so2) {
1515 if (so_locked == 0) {
1516 socket_lock(so, 0);
1517 }
1518 waitso = so;
1519 } else if (so < so2) {
1520 if (so_locked == 0) {
1521 socket_lock(so, 0);
1522 }
1523 socket_lock(so2, 1);
1524 waitso = so2;
1525 } else {
1526 if (so_locked == 1) {
1527 socket_unlock(so, 0);
1528 }
1529 socket_lock(so2, 1);
1530 socket_lock(so, 0);
1531 waitso = so;
1532 }
1533 so_locked = 1;
1534
1535 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1536 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1537
1538 /* Check for the UNP_DONTDISCONNECT flag, if it
1539 * is set, release both sockets and go to sleep
1540 */
1541
1542 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1543 if (so != so2) {
1544 socket_unlock(so2, 1);
1545 }
1546 so_locked = 0;
1547
1548 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1549 PSOCK | PDROP, "unpdisconnect", NULL);
1550 goto try_again;
1551 }
1552
1553 if (unp->unp_conn == NULL) {
1554 panic("unp_conn became NULL after sleep");
1555 }
1556
1557 unp->unp_conn = NULL;
1558 VERIFY(so2->so_usecount > 0);
1559 so2->so_usecount--;
1560
1561 if (unp->unp_flags & UNP_TRACE_MDNS) {
1562 unp->unp_flags &= ~UNP_TRACE_MDNS;
1563 }
1564
1565 switch (unp->unp_socket->so_type) {
1566 case SOCK_DGRAM:
1567 LIST_REMOVE(unp, unp_reflink);
1568 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1569 if (so != so2) {
1570 socket_unlock(so2, 1);
1571 }
1572 break;
1573
1574 case SOCK_STREAM:
1575 unp2->unp_conn = NULL;
1576 VERIFY(so->so_usecount > 0);
1577 so->so_usecount--;
1578
1579 /* Set the socket state correctly but do a wakeup later when
1580 * we release all locks except the socket lock, this will avoid
1581 * a deadlock.
1582 */
1583 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1584 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1585
1586 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1587 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1588
1589 if (unp2->unp_flags & UNP_TRACE_MDNS) {
1590 unp2->unp_flags &= ~UNP_TRACE_MDNS;
1591 }
1592
1593 strdisconn = 1;
1594 break;
1595 default:
1596 panic("unknown socket type %d", so->so_type);
1597 }
1598 out:
1599 lck_mtx_lock(unp_disconnect_lock);
1600 disconnect_in_progress = 0;
1601 wakeup(&disconnect_in_progress);
1602 lck_mtx_unlock(unp_disconnect_lock);
1603
1604 if (strdisconn) {
1605 socket_unlock(so, 0);
1606 soisdisconnected(so2);
1607 socket_unlock(so2, 1);
1608
1609 socket_lock(so, 0);
1610 soisdisconnected(so);
1611 }
1612 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1613 return;
1614 }
1615
1616 /*
1617 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1618 * The unpcb_compat data structure is passed to user space and must not change.
1619 */
1620 static void
1621 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1622 {
1623 #if defined(__LP64__)
1624 cp->unp_link.le_next = (u_int32_t)
1625 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1626 cp->unp_link.le_prev = (u_int32_t)
1627 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1628 #else
1629 cp->unp_link.le_next = (struct unpcb_compat *)
1630 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1631 cp->unp_link.le_prev = (struct unpcb_compat **)
1632 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1633 #endif
1634 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1635 VM_KERNEL_ADDRPERM(up->unp_socket);
1636 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1637 VM_KERNEL_ADDRPERM(up->unp_vnode);
1638 cp->unp_ino = up->unp_ino;
1639 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1640 VM_KERNEL_ADDRPERM(up->unp_conn);
1641 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1642 #if defined(__LP64__)
1643 cp->unp_reflink.le_next =
1644 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1645 cp->unp_reflink.le_prev =
1646 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1647 #else
1648 cp->unp_reflink.le_next =
1649 (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1650 cp->unp_reflink.le_prev =
1651 (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1652 #endif
1653 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1654 VM_KERNEL_ADDRPERM(up->unp_addr);
1655 cp->unp_cc = up->unp_cc;
1656 cp->unp_mbcnt = up->unp_mbcnt;
1657 cp->unp_gencnt = up->unp_gencnt;
1658 }
1659
1660 static int
1661 unp_pcblist SYSCTL_HANDLER_ARGS
1662 {
1663 #pragma unused(oidp,arg2)
1664 int error, i, n;
1665 struct unpcb *unp, **unp_list;
1666 unp_gen_t gencnt;
1667 struct xunpgen xug;
1668 struct unp_head *head;
1669
1670 lck_rw_lock_shared(unp_list_mtx);
1671 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1672
1673 /*
1674 * The process of preparing the PCB list is too time-consuming and
1675 * resource-intensive to repeat twice on every request.
1676 */
1677 if (req->oldptr == USER_ADDR_NULL) {
1678 n = unp_count;
1679 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1680 sizeof(struct xunpcb);
1681 lck_rw_done(unp_list_mtx);
1682 return 0;
1683 }
1684
1685 if (req->newptr != USER_ADDR_NULL) {
1686 lck_rw_done(unp_list_mtx);
1687 return EPERM;
1688 }
1689
1690 /*
1691 * OK, now we're committed to doing something.
1692 */
1693 gencnt = unp_gencnt;
1694 n = unp_count;
1695
1696 bzero(&xug, sizeof(xug));
1697 xug.xug_len = sizeof(xug);
1698 xug.xug_count = n;
1699 xug.xug_gen = gencnt;
1700 xug.xug_sogen = so_gencnt;
1701 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1702 if (error) {
1703 lck_rw_done(unp_list_mtx);
1704 return error;
1705 }
1706
1707 /*
1708 * We are done if there is no pcb
1709 */
1710 if (n == 0) {
1711 lck_rw_done(unp_list_mtx);
1712 return 0;
1713 }
1714
1715 MALLOC(unp_list, struct unpcb **, n * sizeof(*unp_list),
1716 M_TEMP, M_WAITOK);
1717 if (unp_list == 0) {
1718 lck_rw_done(unp_list_mtx);
1719 return ENOMEM;
1720 }
1721
1722 for (unp = head->lh_first, i = 0; unp && i < n;
1723 unp = unp->unp_link.le_next) {
1724 if (unp->unp_gencnt <= gencnt) {
1725 unp_list[i++] = unp;
1726 }
1727 }
1728 n = i; /* in case we lost some during malloc */
1729
1730 error = 0;
1731 for (i = 0; i < n; i++) {
1732 unp = unp_list[i];
1733 if (unp->unp_gencnt <= gencnt) {
1734 struct xunpcb xu;
1735
1736 bzero(&xu, sizeof(xu));
1737 xu.xu_len = sizeof(xu);
1738 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1739 VM_KERNEL_ADDRPERM(unp);
1740 /*
1741 * XXX - need more locking here to protect against
1742 * connect/disconnect races for SMP.
1743 */
1744 if (unp->unp_addr) {
1745 bcopy(unp->unp_addr, &xu.xu_au,
1746 unp->unp_addr->sun_len);
1747 }
1748 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1749 bcopy(unp->unp_conn->unp_addr,
1750 &xu.xu_cau,
1751 unp->unp_conn->unp_addr->sun_len);
1752 }
1753 unpcb_to_compat(unp, &xu.xu_unp);
1754 sotoxsocket(unp->unp_socket, &xu.xu_socket);
1755 error = SYSCTL_OUT(req, &xu, sizeof(xu));
1756 }
1757 }
1758 if (!error) {
1759 /*
1760 * Give the user an updated idea of our state.
1761 * If the generation differs from what we told
1762 * her before, she knows that something happened
1763 * while we were processing this request, and it
1764 * might be necessary to retry.
1765 */
1766 bzero(&xug, sizeof(xug));
1767 xug.xug_len = sizeof(xug);
1768 xug.xug_gen = unp_gencnt;
1769 xug.xug_sogen = so_gencnt;
1770 xug.xug_count = unp_count;
1771 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1772 }
1773 FREE(unp_list, M_TEMP);
1774 lck_rw_done(unp_list_mtx);
1775 return error;
1776 }
1777
1778 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1779 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1780 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1781 "List of active local datagram sockets");
1782 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1783 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1784 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1785 "List of active local stream sockets");
1786
1787 #if !CONFIG_EMBEDDED
1788
1789 static int
1790 unp_pcblist64 SYSCTL_HANDLER_ARGS
1791 {
1792 #pragma unused(oidp,arg2)
1793 int error, i, n;
1794 struct unpcb *unp, **unp_list;
1795 unp_gen_t gencnt;
1796 struct xunpgen xug;
1797 struct unp_head *head;
1798
1799 lck_rw_lock_shared(unp_list_mtx);
1800 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1801
1802 /*
1803 * The process of preparing the PCB list is too time-consuming and
1804 * resource-intensive to repeat twice on every request.
1805 */
1806 if (req->oldptr == USER_ADDR_NULL) {
1807 n = unp_count;
1808 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1809 (sizeof(struct xunpcb64));
1810 lck_rw_done(unp_list_mtx);
1811 return 0;
1812 }
1813
1814 if (req->newptr != USER_ADDR_NULL) {
1815 lck_rw_done(unp_list_mtx);
1816 return EPERM;
1817 }
1818
1819 /*
1820 * OK, now we're committed to doing something.
1821 */
1822 gencnt = unp_gencnt;
1823 n = unp_count;
1824
1825 bzero(&xug, sizeof(xug));
1826 xug.xug_len = sizeof(xug);
1827 xug.xug_count = n;
1828 xug.xug_gen = gencnt;
1829 xug.xug_sogen = so_gencnt;
1830 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1831 if (error) {
1832 lck_rw_done(unp_list_mtx);
1833 return error;
1834 }
1835
1836 /*
1837 * We are done if there is no pcb
1838 */
1839 if (n == 0) {
1840 lck_rw_done(unp_list_mtx);
1841 return 0;
1842 }
1843
1844 MALLOC(unp_list, struct unpcb **, n * sizeof(*unp_list),
1845 M_TEMP, M_WAITOK);
1846 if (unp_list == 0) {
1847 lck_rw_done(unp_list_mtx);
1848 return ENOMEM;
1849 }
1850
1851 for (unp = head->lh_first, i = 0; unp && i < n;
1852 unp = unp->unp_link.le_next) {
1853 if (unp->unp_gencnt <= gencnt) {
1854 unp_list[i++] = unp;
1855 }
1856 }
1857 n = i; /* in case we lost some during malloc */
1858
1859 error = 0;
1860 for (i = 0; i < n; i++) {
1861 unp = unp_list[i];
1862 if (unp->unp_gencnt <= gencnt) {
1863 struct xunpcb64 xu;
1864 size_t xu_len = sizeof(struct xunpcb64);
1865
1866 bzero(&xu, xu_len);
1867 xu.xu_len = xu_len;
1868 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1869 xu.xunp_link.le_next = (u_int64_t)
1870 VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1871 xu.xunp_link.le_prev = (u_int64_t)
1872 VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1873 xu.xunp_socket = (u_int64_t)
1874 VM_KERNEL_ADDRPERM(unp->unp_socket);
1875 xu.xunp_vnode = (u_int64_t)
1876 VM_KERNEL_ADDRPERM(unp->unp_vnode);
1877 xu.xunp_ino = unp->unp_ino;
1878 xu.xunp_conn = (u_int64_t)
1879 VM_KERNEL_ADDRPERM(unp->unp_conn);
1880 xu.xunp_refs = (u_int64_t)
1881 VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1882 xu.xunp_reflink.le_next = (u_int64_t)
1883 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1884 xu.xunp_reflink.le_prev = (u_int64_t)
1885 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1886 xu.xunp_cc = unp->unp_cc;
1887 xu.xunp_mbcnt = unp->unp_mbcnt;
1888 xu.xunp_gencnt = unp->unp_gencnt;
1889
1890 if (unp->unp_socket) {
1891 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1892 }
1893
1894 /*
1895 * XXX - need more locking here to protect against
1896 * connect/disconnect races for SMP.
1897 */
1898 if (unp->unp_addr) {
1899 bcopy(unp->unp_addr, &xu.xu_au,
1900 unp->unp_addr->sun_len);
1901 }
1902 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1903 bcopy(unp->unp_conn->unp_addr,
1904 &xu.xu_cau,
1905 unp->unp_conn->unp_addr->sun_len);
1906 }
1907
1908 error = SYSCTL_OUT(req, &xu, xu_len);
1909 }
1910 }
1911 if (!error) {
1912 /*
1913 * Give the user an updated idea of our state.
1914 * If the generation differs from what we told
1915 * her before, she knows that something happened
1916 * while we were processing this request, and it
1917 * might be necessary to retry.
1918 */
1919 bzero(&xug, sizeof(xug));
1920 xug.xug_len = sizeof(xug);
1921 xug.xug_gen = unp_gencnt;
1922 xug.xug_sogen = so_gencnt;
1923 xug.xug_count = unp_count;
1924 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1925 }
1926 FREE(unp_list, M_TEMP);
1927 lck_rw_done(unp_list_mtx);
1928 return error;
1929 }
1930
1931 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
1932 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1933 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1934 "List of active local datagram sockets 64 bit");
1935 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
1936 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1937 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1938 "List of active local stream sockets 64 bit");
1939
1940 #endif /* !CONFIG_EMBEDDED */
1941
1942 static void
1943 unp_shutdown(struct unpcb *unp)
1944 {
1945 struct socket *so = unp->unp_socket;
1946 struct socket *so2;
1947 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1948 so2 = unp->unp_conn->unp_socket;
1949 unp_get_locks_in_order(so, so2);
1950 socantrcvmore(so2);
1951 socket_unlock(so2, 1);
1952 }
1953 }
1954
1955 static void
1956 unp_drop(struct unpcb *unp, int errno)
1957 {
1958 struct socket *so = unp->unp_socket;
1959
1960 so->so_error = errno;
1961 unp_disconnect(unp);
1962 }
1963
1964 /*
1965 * Returns: 0 Success
1966 * EMSGSIZE The new fd's will not fit
1967 * ENOBUFS Cannot alloc struct fileproc
1968 */
1969 int
1970 unp_externalize(struct mbuf *rights)
1971 {
1972 proc_t p = current_proc(); /* XXX */
1973 int i;
1974 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1975 struct fileglob **rp = (struct fileglob **)(cm + 1);
1976 int *fds = (int *)(cm + 1);
1977 struct fileproc *fp;
1978 struct fileproc **fileproc_l;
1979 int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
1980 int f, error = 0;
1981
1982 MALLOC(fileproc_l, struct fileproc **,
1983 newfds * sizeof(struct fileproc *), M_TEMP, M_WAITOK);
1984 if (fileproc_l == NULL) {
1985 error = ENOMEM;
1986 goto discard;
1987 }
1988
1989 proc_fdlock(p);
1990
1991 /*
1992 * if the new FD's will not fit, then we free them all
1993 */
1994 if (!fdavail(p, newfds)) {
1995 proc_fdunlock(p);
1996 error = EMSGSIZE;
1997 goto discard;
1998 }
1999 /*
2000 * now change each pointer to an fd in the global table to
2001 * an integer that is the index to the local fd table entry
2002 * that we set up to point to the global one we are transferring.
2003 * XXX (1) this assumes a pointer and int are the same size,
2004 * XXX or the mbuf can hold the expansion
2005 * XXX (2) allocation failures should be non-fatal
2006 */
2007 for (i = 0; i < newfds; i++) {
2008 #if CONFIG_MACF_SOCKET
2009 /*
2010 * If receive access is denied, don't pass along
2011 * and error message, just discard the descriptor.
2012 */
2013 if (mac_file_check_receive(kauth_cred_get(), rp[i])) {
2014 proc_fdunlock(p);
2015 unp_discard(rp[i], p);
2016 fds[i] = 0;
2017 proc_fdlock(p);
2018 continue;
2019 }
2020 #endif
2021 if (fdalloc(p, 0, &f)) {
2022 panic("unp_externalize:fdalloc");
2023 }
2024 fp = fileproc_alloc_init(NULL);
2025 if (fp == NULL) {
2026 panic("unp_externalize: MALLOC_ZONE");
2027 }
2028 fp->f_fglob = rp[i];
2029 if (fg_removeuipc_mark(rp[i])) {
2030 /*
2031 * Take an iocount on the fp for completing the
2032 * removal from the global msg queue
2033 */
2034 os_ref_retain_locked(&fp->f_iocount);
2035 fileproc_l[i] = fp;
2036 } else {
2037 fileproc_l[i] = NULL;
2038 }
2039 procfdtbl_releasefd(p, f, fp);
2040 fds[i] = f;
2041 }
2042 proc_fdunlock(p);
2043
2044 for (i = 0; i < newfds; i++) {
2045 if (fileproc_l[i] != NULL) {
2046 VERIFY(fileproc_l[i]->f_fglob != NULL &&
2047 (fileproc_l[i]->f_fglob->fg_lflags & FG_RMMSGQ));
2048 VERIFY(fds[i] >= 0);
2049 fg_removeuipc(fileproc_l[i]->f_fglob);
2050
2051 /* Drop the iocount */
2052 fp_drop(p, fds[i], fileproc_l[i], 0);
2053 fileproc_l[i] = NULL;
2054 }
2055 if (fds[i] != 0) {
2056 (void) OSAddAtomic(-1, &unp_rights);
2057 }
2058 }
2059
2060 discard:
2061 if (fileproc_l != NULL) {
2062 FREE(fileproc_l, M_TEMP);
2063 }
2064 if (error) {
2065 for (i = 0; i < newfds; i++) {
2066 unp_discard(*rp, p);
2067 *rp++ = NULL;
2068 }
2069 }
2070 return error;
2071 }
2072
2073 void
2074 unp_init(void)
2075 {
2076 _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2077 unp_zone = zinit(sizeof(struct unpcb),
2078 (nmbclusters * sizeof(struct unpcb)), 4096, "unpzone");
2079
2080 if (unp_zone == 0) {
2081 panic("unp_init");
2082 }
2083 LIST_INIT(&unp_dhead);
2084 LIST_INIT(&unp_shead);
2085
2086 /*
2087 * allocate lock group attribute and group for udp pcb mutexes
2088 */
2089 unp_mtx_grp_attr = lck_grp_attr_alloc_init();
2090
2091 unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
2092
2093 unp_mtx_attr = lck_attr_alloc_init();
2094
2095 if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp,
2096 unp_mtx_attr)) == NULL) {
2097 return; /* pretty much dead if this fails... */
2098 }
2099 if ((unp_disconnect_lock = lck_mtx_alloc_init(unp_mtx_grp,
2100 unp_mtx_attr)) == NULL) {
2101 return;
2102 }
2103
2104 if ((unp_connect_lock = lck_mtx_alloc_init(unp_mtx_grp,
2105 unp_mtx_attr)) == NULL) {
2106 return;
2107 }
2108 }
2109
2110 #ifndef MIN
2111 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
2112 #endif
2113
2114 /*
2115 * Returns: 0 Success
2116 * EINVAL
2117 * fdgetf_noref:EBADF
2118 */
2119 static int
2120 unp_internalize(struct mbuf *control, proc_t p)
2121 {
2122 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2123 int *fds;
2124 struct fileglob **rp;
2125 struct fileproc *fp;
2126 int i, error;
2127 int oldfds;
2128 uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2129
2130 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2131 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2132 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2133 return EINVAL;
2134 }
2135 oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2136 bzero(fg_ins, sizeof(fg_ins));
2137
2138 proc_fdlock(p);
2139 fds = (int *)(cm + 1);
2140
2141 for (i = 0; i < oldfds; i++) {
2142 struct fileproc *tmpfp;
2143 if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) {
2144 proc_fdunlock(p);
2145 return error;
2146 } else if (!file_issendable(p, tmpfp)) {
2147 proc_fdunlock(p);
2148 return EINVAL;
2149 } else if (FP_ISGUARDED(tmpfp, GUARD_SOCKET_IPC)) {
2150 error = fp_guard_exception(p,
2151 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2152 proc_fdunlock(p);
2153 return error;
2154 }
2155 }
2156 rp = (struct fileglob **)(cm + 1);
2157
2158 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2159 * and doing them in-order would result in stomping over unprocessed fd's
2160 */
2161 for (i = (oldfds - 1); i >= 0; i--) {
2162 (void) fdgetf_noref(p, fds[i], &fp);
2163 if (fg_insertuipc_mark(fp->f_fglob)) {
2164 fg_ins[i / 8] |= 0x80 >> (i % 8);
2165 }
2166 rp[i] = fp->f_fglob;
2167 }
2168 proc_fdunlock(p);
2169
2170 for (i = 0; i < oldfds; i++) {
2171 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2172 VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2173 fg_insertuipc(rp[i]);
2174 }
2175 (void) OSAddAtomic(1, &unp_rights);
2176 }
2177
2178 return 0;
2179 }
2180
2181 static int unp_defer, unp_gcing, unp_gcwait;
2182 static thread_t unp_gcthread = NULL;
2183
2184 /* always called under uipc_lock */
2185 void
2186 unp_gc_wait(void)
2187 {
2188 if (unp_gcthread == current_thread()) {
2189 return;
2190 }
2191
2192 while (unp_gcing != 0) {
2193 unp_gcwait = 1;
2194 msleep(&unp_gcing, uipc_lock, 0, "unp_gc_wait", NULL);
2195 }
2196 }
2197
2198
2199 __private_extern__ void
2200 unp_gc(void)
2201 {
2202 struct fileglob *fg, *nextfg;
2203 struct socket *so;
2204 static struct fileglob **extra_ref;
2205 struct fileglob **fpp;
2206 int nunref, i;
2207 int need_gcwakeup = 0;
2208
2209 lck_mtx_lock(uipc_lock);
2210 if (unp_gcing) {
2211 lck_mtx_unlock(uipc_lock);
2212 return;
2213 }
2214 unp_gcing = 1;
2215 unp_defer = 0;
2216 unp_gcthread = current_thread();
2217 lck_mtx_unlock(uipc_lock);
2218 /*
2219 * before going through all this, set all FDs to
2220 * be NOT defered and NOT externally accessible
2221 */
2222 for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
2223 lck_mtx_lock(&fg->fg_lock);
2224 fg->fg_flag &= ~(FMARK | FDEFER);
2225 lck_mtx_unlock(&fg->fg_lock);
2226 }
2227 do {
2228 for (fg = fmsghead.lh_first; fg != 0;
2229 fg = fg->f_msglist.le_next) {
2230 lck_mtx_lock(&fg->fg_lock);
2231 /*
2232 * If the file is not open, skip it
2233 */
2234 if (fg->fg_count == 0) {
2235 lck_mtx_unlock(&fg->fg_lock);
2236 continue;
2237 }
2238 /*
2239 * If we already marked it as 'defer' in a
2240 * previous pass, then try process it this time
2241 * and un-mark it
2242 */
2243 if (fg->fg_flag & FDEFER) {
2244 fg->fg_flag &= ~FDEFER;
2245 unp_defer--;
2246 } else {
2247 /*
2248 * if it's not defered, then check if it's
2249 * already marked.. if so skip it
2250 */
2251 if (fg->fg_flag & FMARK) {
2252 lck_mtx_unlock(&fg->fg_lock);
2253 continue;
2254 }
2255 /*
2256 * If all references are from messages
2257 * in transit, then skip it. it's not
2258 * externally accessible.
2259 */
2260 if (fg->fg_count == fg->fg_msgcount) {
2261 lck_mtx_unlock(&fg->fg_lock);
2262 continue;
2263 }
2264 /*
2265 * If it got this far then it must be
2266 * externally accessible.
2267 */
2268 fg->fg_flag |= FMARK;
2269 }
2270 /*
2271 * either it was defered, or it is externally
2272 * accessible and not already marked so.
2273 * Now check if it is possibly one of OUR sockets.
2274 */
2275 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2276 (so = (struct socket *)fg->fg_data) == 0) {
2277 lck_mtx_unlock(&fg->fg_lock);
2278 continue;
2279 }
2280 if (so->so_proto->pr_domain != localdomain ||
2281 (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2282 lck_mtx_unlock(&fg->fg_lock);
2283 continue;
2284 }
2285 #ifdef notdef
2286 if (so->so_rcv.sb_flags & SB_LOCK) {
2287 /*
2288 * This is problematical; it's not clear
2289 * we need to wait for the sockbuf to be
2290 * unlocked (on a uniprocessor, at least),
2291 * and it's also not clear what to do
2292 * if sbwait returns an error due to receipt
2293 * of a signal. If sbwait does return
2294 * an error, we'll go into an infinite
2295 * loop. Delete all of this for now.
2296 */
2297 (void) sbwait(&so->so_rcv);
2298 goto restart;
2299 }
2300 #endif
2301 /*
2302 * So, Ok, it's one of our sockets and it IS externally
2303 * accessible (or was defered). Now we look
2304 * to see if we hold any file descriptors in its
2305 * message buffers. Follow those links and mark them
2306 * as accessible too.
2307 *
2308 * In case a file is passed onto itself we need to
2309 * release the file lock.
2310 */
2311 lck_mtx_unlock(&fg->fg_lock);
2312
2313 unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
2314 }
2315 } while (unp_defer);
2316 /*
2317 * We grab an extra reference to each of the file table entries
2318 * that are not otherwise accessible and then free the rights
2319 * that are stored in messages on them.
2320 *
2321 * The bug in the orginal code is a little tricky, so I'll describe
2322 * what's wrong with it here.
2323 *
2324 * It is incorrect to simply unp_discard each entry for f_msgcount
2325 * times -- consider the case of sockets A and B that contain
2326 * references to each other. On a last close of some other socket,
2327 * we trigger a gc since the number of outstanding rights (unp_rights)
2328 * is non-zero. If during the sweep phase the gc code un_discards,
2329 * we end up doing a (full) closef on the descriptor. A closef on A
2330 * results in the following chain. Closef calls soo_close, which
2331 * calls soclose. Soclose calls first (through the switch
2332 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
2333 * returns because the previous instance had set unp_gcing, and
2334 * we return all the way back to soclose, which marks the socket
2335 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
2336 * to free up the rights that are queued in messages on the socket A,
2337 * i.e., the reference on B. The sorflush calls via the dom_dispose
2338 * switch unp_dispose, which unp_scans with unp_discard. This second
2339 * instance of unp_discard just calls closef on B.
2340 *
2341 * Well, a similar chain occurs on B, resulting in a sorflush on B,
2342 * which results in another closef on A. Unfortunately, A is already
2343 * being closed, and the descriptor has already been marked with
2344 * SS_NOFDREF, and soclose panics at this point.
2345 *
2346 * Here, we first take an extra reference to each inaccessible
2347 * descriptor. Then, we call sorflush ourself, since we know
2348 * it is a Unix domain socket anyhow. After we destroy all the
2349 * rights carried in messages, we do a last closef to get rid
2350 * of our extra reference. This is the last close, and the
2351 * unp_detach etc will shut down the socket.
2352 *
2353 * 91/09/19, bsy@cs.cmu.edu
2354 */
2355 extra_ref = _MALLOC(nfiles * sizeof(struct fileglob *),
2356 M_FILEGLOB, M_WAITOK);
2357 if (extra_ref == NULL) {
2358 goto bail;
2359 }
2360 for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0;
2361 fg = nextfg) {
2362 lck_mtx_lock(&fg->fg_lock);
2363
2364 nextfg = fg->f_msglist.le_next;
2365 /*
2366 * If it's not open, skip it
2367 */
2368 if (fg->fg_count == 0) {
2369 lck_mtx_unlock(&fg->fg_lock);
2370 continue;
2371 }
2372 /*
2373 * If all refs are from msgs, and it's not marked accessible
2374 * then it must be referenced from some unreachable cycle
2375 * of (shut-down) FDs, so include it in our
2376 * list of FDs to remove
2377 */
2378 if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) {
2379 fg->fg_count++;
2380 *fpp++ = fg;
2381 nunref++;
2382 }
2383 lck_mtx_unlock(&fg->fg_lock);
2384 }
2385 /*
2386 * for each FD on our hit list, do the following two things
2387 */
2388 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2389 struct fileglob *tfg;
2390
2391 tfg = *fpp;
2392
2393 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET &&
2394 tfg->fg_data != NULL) {
2395 so = (struct socket *)(tfg->fg_data);
2396
2397 socket_lock(so, 0);
2398
2399 sorflush(so);
2400
2401 socket_unlock(so, 0);
2402 }
2403 }
2404 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2405 closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL);
2406 }
2407
2408 FREE(extra_ref, M_FILEGLOB);
2409 bail:
2410 lck_mtx_lock(uipc_lock);
2411 unp_gcing = 0;
2412 unp_gcthread = NULL;
2413
2414 if (unp_gcwait != 0) {
2415 unp_gcwait = 0;
2416 need_gcwakeup = 1;
2417 }
2418 lck_mtx_unlock(uipc_lock);
2419
2420 if (need_gcwakeup != 0) {
2421 wakeup(&unp_gcing);
2422 }
2423 }
2424
2425 void
2426 unp_dispose(struct mbuf *m)
2427 {
2428 if (m) {
2429 unp_scan(m, unp_discard, NULL);
2430 }
2431 }
2432
2433 /*
2434 * Returns: 0 Success
2435 */
2436 static int
2437 unp_listen(struct unpcb *unp, proc_t p)
2438 {
2439 kauth_cred_t safecred = kauth_cred_proc_ref(p);
2440 cru2x(safecred, &unp->unp_peercred);
2441 kauth_cred_unref(&safecred);
2442 unp->unp_flags |= UNP_HAVEPCCACHED;
2443 return 0;
2444 }
2445
2446 static void
2447 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2448 {
2449 struct mbuf *m;
2450 struct fileglob **rp;
2451 struct cmsghdr *cm;
2452 int i;
2453 int qfds;
2454
2455 while (m0) {
2456 for (m = m0; m; m = m->m_next) {
2457 if (m->m_type == MT_CONTROL &&
2458 (size_t)m->m_len >= sizeof(*cm)) {
2459 cm = mtod(m, struct cmsghdr *);
2460 if (cm->cmsg_level != SOL_SOCKET ||
2461 cm->cmsg_type != SCM_RIGHTS) {
2462 continue;
2463 }
2464 qfds = (cm->cmsg_len - sizeof(*cm)) /
2465 sizeof(int);
2466 rp = (struct fileglob **)(cm + 1);
2467 for (i = 0; i < qfds; i++) {
2468 (*op)(*rp++, arg);
2469 }
2470 break; /* XXX, but saves time */
2471 }
2472 }
2473 m0 = m0->m_act;
2474 }
2475 }
2476
2477 static void
2478 unp_mark(struct fileglob *fg, __unused void *arg)
2479 {
2480 lck_mtx_lock(&fg->fg_lock);
2481
2482 if (fg->fg_flag & FMARK) {
2483 lck_mtx_unlock(&fg->fg_lock);
2484 return;
2485 }
2486 fg->fg_flag |= (FMARK | FDEFER);
2487
2488 lck_mtx_unlock(&fg->fg_lock);
2489
2490 unp_defer++;
2491 }
2492
2493 static void
2494 unp_discard(struct fileglob *fg, void *p)
2495 {
2496 if (p == NULL) {
2497 p = current_proc(); /* XXX */
2498 }
2499 (void) OSAddAtomic(1, &unp_disposed);
2500 if (fg_removeuipc_mark(fg)) {
2501 VERIFY(fg->fg_lflags & FG_RMMSGQ);
2502 fg_removeuipc(fg);
2503 }
2504 (void) OSAddAtomic(-1, &unp_rights);
2505
2506 proc_fdlock(p);
2507 (void) closef_locked((struct fileproc *)0, fg, p);
2508 proc_fdunlock(p);
2509 }
2510
2511 int
2512 unp_lock(struct socket *so, int refcount, void * lr)
2513 {
2514 void * lr_saved;
2515 if (lr == 0) {
2516 lr_saved = (void *) __builtin_return_address(0);
2517 } else {
2518 lr_saved = lr;
2519 }
2520
2521 if (so->so_pcb) {
2522 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2523 } else {
2524 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2525 so, lr_saved, so->so_usecount);
2526 }
2527
2528 if (so->so_usecount < 0) {
2529 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2530 so, so->so_pcb, lr_saved, so->so_usecount);
2531 }
2532
2533 if (refcount) {
2534 VERIFY(so->so_usecount > 0);
2535 so->so_usecount++;
2536 }
2537 so->lock_lr[so->next_lock_lr] = lr_saved;
2538 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2539 return 0;
2540 }
2541
2542 int
2543 unp_unlock(struct socket *so, int refcount, void * lr)
2544 {
2545 void * lr_saved;
2546 lck_mtx_t * mutex_held = NULL;
2547 struct unpcb *unp = sotounpcb(so);
2548
2549 if (lr == 0) {
2550 lr_saved = (void *) __builtin_return_address(0);
2551 } else {
2552 lr_saved = lr;
2553 }
2554
2555 if (refcount) {
2556 so->so_usecount--;
2557 }
2558
2559 if (so->so_usecount < 0) {
2560 panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2561 }
2562 if (so->so_pcb == NULL) {
2563 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2564 } else {
2565 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2566 }
2567 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2568 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2569 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2570
2571 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2572 sofreelastref(so, 1);
2573
2574 if (unp->unp_addr) {
2575 FREE(unp->unp_addr, M_SONAME);
2576 }
2577
2578 lck_mtx_unlock(mutex_held);
2579
2580 lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
2581 zfree(unp_zone, unp);
2582
2583 unp_gc();
2584 } else {
2585 lck_mtx_unlock(mutex_held);
2586 }
2587
2588 return 0;
2589 }
2590
2591 lck_mtx_t *
2592 unp_getlock(struct socket *so, __unused int flags)
2593 {
2594 struct unpcb *unp = (struct unpcb *)so->so_pcb;
2595
2596
2597 if (so->so_pcb) {
2598 if (so->so_usecount < 0) {
2599 panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2600 }
2601 return &unp->unp_mtx;
2602 } else {
2603 panic("unp_getlock: so=%p NULL so_pcb\n", so);
2604 return so->so_proto->pr_domain->dom_mtx;
2605 }
2606 }