]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_usrreq.c
2925a6fee059ddb6429917a3d7e3110b6b26f0c7
[apple/xnu.git] / bsd / kern / uipc_usrreq.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
62 /*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/guarded.h>
77 #include <sys/filedesc.h>
78 #include <sys/lock.h>
79 #include <sys/mbuf.h>
80 #include <sys/namei.h>
81 #include <sys/proc_internal.h>
82 #include <sys/kauth.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/stat.h>
87 #include <sys/sysctl.h>
88 #include <sys/un.h>
89 #include <sys/unpcb.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/kdebug.h>
92 #include <sys/mcache.h>
93
94 #include <kern/zalloc.h>
95 #include <kern/locks.h>
96
97 #if CONFIG_MACF
98 #include <security/mac_framework.h>
99 #endif /* CONFIG_MACF */
100
101 #include <mach/vm_param.h>
102
103 /*
104 * Maximum number of FDs that can be passed in an mbuf
105 */
106 #define UIPC_MAX_CMSG_FD 512
107
108 #define f_msgcount f_fglob->fg_msgcount
109 #define f_cred f_fglob->fg_cred
110 #define f_ops f_fglob->fg_ops
111 #define f_offset f_fglob->fg_offset
112 #define f_data f_fglob->fg_data
113 struct zone *unp_zone;
114 static unp_gen_t unp_gencnt;
115 static u_int unp_count;
116
117 static lck_attr_t *unp_mtx_attr;
118 static lck_grp_t *unp_mtx_grp;
119 static lck_grp_attr_t *unp_mtx_grp_attr;
120 static lck_rw_t *unp_list_mtx;
121
122 static lck_mtx_t *unp_disconnect_lock;
123 static lck_mtx_t *unp_connect_lock;
124 static u_int disconnect_in_progress;
125
126 extern lck_mtx_t *uipc_lock;
127 static struct unp_head unp_shead, unp_dhead;
128
129 /*
130 * mDNSResponder tracing. When enabled, endpoints connected to
131 * /var/run/mDNSResponder will be traced; during each send on
132 * the traced socket, we log the PID and process name of the
133 * sending process. We also print out a bit of info related
134 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
135 * of mDNSResponder stays the same.
136 */
137 #define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
138
139 static int unpst_tracemdns; /* enable tracing */
140
141 #define MDNS_IPC_MSG_HDR_VERSION_1 1
142
143 struct mdns_ipc_msg_hdr {
144 uint32_t version;
145 uint32_t datalen;
146 uint32_t ipc_flags;
147 uint32_t op;
148 union {
149 void *context;
150 uint32_t u32[2];
151 } __attribute__((packed));
152 uint32_t reg_index;
153 } __attribute__((packed));
154
155 /*
156 * Unix communications domain.
157 *
158 * TODO:
159 * SEQPACKET, RDM
160 * rethink name space problems
161 * need a proper out-of-band
162 * lock pushdown
163 */
164 static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL, { 0 } };
165 static ino_t unp_ino; /* prototype for fake inode numbers */
166
167 static int unp_attach(struct socket *);
168 static void unp_detach(struct unpcb *);
169 static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
170 static int unp_connect(struct socket *, struct sockaddr *, proc_t);
171 static void unp_disconnect(struct unpcb *);
172 static void unp_shutdown(struct unpcb *);
173 static void unp_drop(struct unpcb *, int);
174 __private_extern__ void unp_gc(void);
175 static void unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
176 static void unp_mark(struct fileglob *, __unused void *);
177 static void unp_discard(struct fileglob *, void *);
178 static int unp_internalize(struct mbuf *, proc_t);
179 static int unp_listen(struct unpcb *, proc_t);
180 static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
181 static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
182
183 static void
184 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
185 {
186 if (so < conn_so) {
187 socket_lock(conn_so, 1);
188 } else {
189 struct unpcb *unp = sotounpcb(so);
190 unp->unp_flags |= UNP_DONTDISCONNECT;
191 unp->rw_thrcount++;
192 socket_unlock(so, 0);
193
194 /* Get the locks in the correct order */
195 socket_lock(conn_so, 1);
196 socket_lock(so, 0);
197 unp->rw_thrcount--;
198 if (unp->rw_thrcount == 0) {
199 unp->unp_flags &= ~UNP_DONTDISCONNECT;
200 wakeup(unp);
201 }
202 }
203 }
204
205 static int
206 uipc_abort(struct socket *so)
207 {
208 struct unpcb *unp = sotounpcb(so);
209
210 if (unp == 0) {
211 return EINVAL;
212 }
213 unp_drop(unp, ECONNABORTED);
214 unp_detach(unp);
215 sofree(so);
216 return 0;
217 }
218
219 static int
220 uipc_accept(struct socket *so, struct sockaddr **nam)
221 {
222 struct unpcb *unp = sotounpcb(so);
223
224 if (unp == 0) {
225 return EINVAL;
226 }
227
228 /*
229 * Pass back name of connected socket,
230 * if it was bound and we are still connected
231 * (our peer may have closed already!).
232 */
233 if (unp->unp_conn && unp->unp_conn->unp_addr) {
234 *nam = dup_sockaddr((struct sockaddr *)
235 unp->unp_conn->unp_addr, 1);
236 } else {
237 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
238 }
239 return 0;
240 }
241
242 /*
243 * Returns: 0 Success
244 * EISCONN
245 * unp_attach:
246 */
247 static int
248 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
249 {
250 struct unpcb *unp = sotounpcb(so);
251
252 if (unp != 0) {
253 return EISCONN;
254 }
255 return unp_attach(so);
256 }
257
258 static int
259 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
260 {
261 struct unpcb *unp = sotounpcb(so);
262
263 if (unp == 0) {
264 return EINVAL;
265 }
266
267 return unp_bind(unp, nam, p);
268 }
269
270 /*
271 * Returns: 0 Success
272 * EINVAL
273 * unp_connect:??? [See elsewhere in this file]
274 */
275 static int
276 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
277 {
278 struct unpcb *unp = sotounpcb(so);
279
280 if (unp == 0) {
281 return EINVAL;
282 }
283 return unp_connect(so, nam, p);
284 }
285
286 /*
287 * Returns: 0 Success
288 * EINVAL
289 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
290 * unp_connect2:EINVAL Invalid argument
291 */
292 static int
293 uipc_connect2(struct socket *so1, struct socket *so2)
294 {
295 struct unpcb *unp = sotounpcb(so1);
296
297 if (unp == 0) {
298 return EINVAL;
299 }
300
301 return unp_connect2(so1, so2);
302 }
303
304 /* control is EOPNOTSUPP */
305
306 static int
307 uipc_detach(struct socket *so)
308 {
309 struct unpcb *unp = sotounpcb(so);
310
311 if (unp == 0) {
312 return EINVAL;
313 }
314
315 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
316 unp_detach(unp);
317 return 0;
318 }
319
320 static int
321 uipc_disconnect(struct socket *so)
322 {
323 struct unpcb *unp = sotounpcb(so);
324
325 if (unp == 0) {
326 return EINVAL;
327 }
328 unp_disconnect(unp);
329 return 0;
330 }
331
332 /*
333 * Returns: 0 Success
334 * EINVAL
335 */
336 static int
337 uipc_listen(struct socket *so, __unused proc_t p)
338 {
339 struct unpcb *unp = sotounpcb(so);
340
341 if (unp == 0 || unp->unp_vnode == 0) {
342 return EINVAL;
343 }
344 return unp_listen(unp, p);
345 }
346
347 static int
348 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
349 {
350 struct unpcb *unp = sotounpcb(so);
351
352 if (unp == NULL) {
353 return EINVAL;
354 }
355 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
356 *nam = dup_sockaddr((struct sockaddr *)
357 unp->unp_conn->unp_addr, 1);
358 } else {
359 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
360 }
361 return 0;
362 }
363
364 static int
365 uipc_rcvd(struct socket *so, __unused int flags)
366 {
367 struct unpcb *unp = sotounpcb(so);
368 struct socket *so2;
369
370 if (unp == 0) {
371 return EINVAL;
372 }
373 switch (so->so_type) {
374 case SOCK_DGRAM:
375 panic("uipc_rcvd DGRAM?");
376 /*NOTREACHED*/
377
378 case SOCK_STREAM:
379 #define rcv (&so->so_rcv)
380 #define snd (&so2->so_snd)
381 if (unp->unp_conn == 0) {
382 break;
383 }
384
385 so2 = unp->unp_conn->unp_socket;
386 unp_get_locks_in_order(so, so2);
387 /*
388 * Adjust backpressure on sender
389 * and wakeup any waiting to write.
390 */
391 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
392 unp->unp_mbcnt = rcv->sb_mbcnt;
393 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
394 unp->unp_cc = rcv->sb_cc;
395 sowwakeup(so2);
396
397 socket_unlock(so2, 1);
398
399 #undef snd
400 #undef rcv
401 break;
402
403 default:
404 panic("uipc_rcvd unknown socktype");
405 }
406 return 0;
407 }
408
409 /* pru_rcvoob is EOPNOTSUPP */
410
411 /*
412 * Returns: 0 Success
413 * EINVAL
414 * EOPNOTSUPP
415 * EPIPE
416 * ENOTCONN
417 * EISCONN
418 * unp_internalize:EINVAL
419 * unp_internalize:EBADF
420 * unp_connect:EAFNOSUPPORT Address family not supported
421 * unp_connect:EINVAL Invalid argument
422 * unp_connect:ENOTSOCK Not a socket
423 * unp_connect:ECONNREFUSED Connection refused
424 * unp_connect:EISCONN Socket is connected
425 * unp_connect:EPROTOTYPE Protocol wrong type for socket
426 * unp_connect:???
427 * sbappendaddr:ENOBUFS [5th argument, contents modified]
428 * sbappendaddr:??? [whatever a filter author chooses]
429 */
430 static int
431 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
432 struct mbuf *control, proc_t p)
433 {
434 int error = 0;
435 struct unpcb *unp = sotounpcb(so);
436 struct socket *so2;
437
438 if (unp == 0) {
439 error = EINVAL;
440 goto release;
441 }
442 if (flags & PRUS_OOB) {
443 error = EOPNOTSUPP;
444 goto release;
445 }
446
447 if (control) {
448 /* release lock to avoid deadlock (4436174) */
449 socket_unlock(so, 0);
450 error = unp_internalize(control, p);
451 socket_lock(so, 0);
452 if (error) {
453 goto release;
454 }
455 }
456
457 switch (so->so_type) {
458 case SOCK_DGRAM:
459 {
460 struct sockaddr *from;
461
462 if (nam) {
463 if (unp->unp_conn) {
464 error = EISCONN;
465 break;
466 }
467 error = unp_connect(so, nam, p);
468 if (error) {
469 break;
470 }
471 } else {
472 if (unp->unp_conn == 0) {
473 error = ENOTCONN;
474 break;
475 }
476 }
477
478 so2 = unp->unp_conn->unp_socket;
479 if (so != so2) {
480 unp_get_locks_in_order(so, so2);
481 }
482
483 if (unp->unp_addr) {
484 from = (struct sockaddr *)unp->unp_addr;
485 } else {
486 from = &sun_noname;
487 }
488 /*
489 * sbappendaddr() will fail when the receiver runs out of
490 * space; in contrast to SOCK_STREAM, we will lose messages
491 * for the SOCK_DGRAM case when the receiver's queue overflows.
492 * SB_UNIX on the socket buffer implies that the callee will
493 * not free the control message, if any, because we would need
494 * to call unp_dispose() on it.
495 */
496 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
497 control = NULL;
498 sorwakeup(so2);
499 } else if (control != NULL && error == 0) {
500 /* A socket filter took control; don't touch it */
501 control = NULL;
502 }
503
504 if (so != so2) {
505 socket_unlock(so2, 1);
506 }
507
508 m = NULL;
509 if (nam) {
510 unp_disconnect(unp);
511 }
512 break;
513 }
514
515 case SOCK_STREAM: {
516 int didreceive = 0;
517 #define rcv (&so2->so_rcv)
518 #define snd (&so->so_snd)
519 /* Connect if not connected yet. */
520 /*
521 * Note: A better implementation would complain
522 * if not equal to the peer's address.
523 */
524 if ((so->so_state & SS_ISCONNECTED) == 0) {
525 if (nam) {
526 error = unp_connect(so, nam, p);
527 if (error) {
528 break; /* XXX */
529 }
530 } else {
531 error = ENOTCONN;
532 break;
533 }
534 }
535
536 if (so->so_state & SS_CANTSENDMORE) {
537 error = EPIPE;
538 break;
539 }
540 if (unp->unp_conn == 0) {
541 panic("uipc_send connected but no connection?");
542 }
543
544 so2 = unp->unp_conn->unp_socket;
545 unp_get_locks_in_order(so, so2);
546
547 /* Check socket state again as we might have unlocked the socket
548 * while trying to get the locks in order
549 */
550
551 if ((so->so_state & SS_CANTSENDMORE)) {
552 error = EPIPE;
553 socket_unlock(so2, 1);
554 break;
555 }
556
557 if (unp->unp_flags & UNP_TRACE_MDNS) {
558 struct mdns_ipc_msg_hdr hdr;
559
560 if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
561 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
562 printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
563 __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
564 }
565 }
566
567 /*
568 * Send to paired receive port, and then reduce send buffer
569 * hiwater marks to maintain backpressure. Wake up readers.
570 * SB_UNIX flag will allow new record to be appended to the
571 * receiver's queue even when it is already full. It is
572 * possible, however, that append might fail. In that case,
573 * we will need to call unp_dispose() on the control message;
574 * the callee will not free it since SB_UNIX is set.
575 */
576 didreceive = control ?
577 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
578
579 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
580 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
581 if ((int32_t)snd->sb_hiwat >=
582 (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
583 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
584 } else {
585 snd->sb_hiwat = 0;
586 }
587 unp->unp_conn->unp_cc = rcv->sb_cc;
588 if (didreceive) {
589 control = NULL;
590 sorwakeup(so2);
591 } else if (control != NULL && error == 0) {
592 /* A socket filter took control; don't touch it */
593 control = NULL;
594 }
595
596 socket_unlock(so2, 1);
597 m = NULL;
598 #undef snd
599 #undef rcv
600 }
601 break;
602
603 default:
604 panic("uipc_send unknown socktype");
605 }
606
607 /*
608 * SEND_EOF is equivalent to a SEND followed by
609 * a SHUTDOWN.
610 */
611 if (flags & PRUS_EOF) {
612 socantsendmore(so);
613 unp_shutdown(unp);
614 }
615
616 if (control && error != 0) {
617 socket_unlock(so, 0);
618 unp_dispose(control);
619 socket_lock(so, 0);
620 }
621
622 release:
623 if (control) {
624 m_freem(control);
625 }
626 if (m) {
627 m_freem(m);
628 }
629 return error;
630 }
631
632 static int
633 uipc_sense(struct socket *so, void *ub, int isstat64)
634 {
635 struct unpcb *unp = sotounpcb(so);
636 struct socket *so2;
637 blksize_t blksize;
638
639 if (unp == 0) {
640 return EINVAL;
641 }
642
643 blksize = so->so_snd.sb_hiwat;
644 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
645 so2 = unp->unp_conn->unp_socket;
646 blksize += so2->so_rcv.sb_cc;
647 }
648 if (unp->unp_ino == 0) {
649 unp->unp_ino = unp_ino++;
650 }
651
652 if (isstat64 != 0) {
653 struct stat64 *sb64;
654
655 sb64 = (struct stat64 *)ub;
656 sb64->st_blksize = blksize;
657 sb64->st_dev = NODEV;
658 sb64->st_ino = (ino64_t)unp->unp_ino;
659 } else {
660 struct stat *sb;
661
662 sb = (struct stat *)ub;
663 sb->st_blksize = blksize;
664 sb->st_dev = NODEV;
665 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
666 }
667
668 return 0;
669 }
670
671 /*
672 * Returns: 0 Success
673 * EINVAL
674 *
675 * Notes: This is not strictly correct, as unp_shutdown() also calls
676 * socantrcvmore(). These should maybe both be conditionalized
677 * on the 'how' argument in soshutdown() as called from the
678 * shutdown() system call.
679 */
680 static int
681 uipc_shutdown(struct socket *so)
682 {
683 struct unpcb *unp = sotounpcb(so);
684
685 if (unp == 0) {
686 return EINVAL;
687 }
688 socantsendmore(so);
689 unp_shutdown(unp);
690 return 0;
691 }
692
693 /*
694 * Returns: 0 Success
695 * EINVAL Invalid argument
696 */
697 static int
698 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
699 {
700 struct unpcb *unp = sotounpcb(so);
701
702 if (unp == NULL) {
703 return EINVAL;
704 }
705 if (unp->unp_addr != NULL) {
706 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
707 } else {
708 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
709 }
710 return 0;
711 }
712
713 struct pr_usrreqs uipc_usrreqs = {
714 .pru_abort = uipc_abort,
715 .pru_accept = uipc_accept,
716 .pru_attach = uipc_attach,
717 .pru_bind = uipc_bind,
718 .pru_connect = uipc_connect,
719 .pru_connect2 = uipc_connect2,
720 .pru_detach = uipc_detach,
721 .pru_disconnect = uipc_disconnect,
722 .pru_listen = uipc_listen,
723 .pru_peeraddr = uipc_peeraddr,
724 .pru_rcvd = uipc_rcvd,
725 .pru_send = uipc_send,
726 .pru_sense = uipc_sense,
727 .pru_shutdown = uipc_shutdown,
728 .pru_sockaddr = uipc_sockaddr,
729 .pru_sosend = sosend,
730 .pru_soreceive = soreceive,
731 };
732
733 int
734 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
735 {
736 struct unpcb *unp = sotounpcb(so);
737 int error = 0;
738 pid_t peerpid;
739 struct socket *peerso;
740
741 switch (sopt->sopt_dir) {
742 case SOPT_GET:
743 switch (sopt->sopt_name) {
744 case LOCAL_PEERCRED:
745 if (unp->unp_flags & UNP_HAVEPC) {
746 error = sooptcopyout(sopt, &unp->unp_peercred,
747 sizeof(unp->unp_peercred));
748 } else {
749 if (so->so_type == SOCK_STREAM) {
750 error = ENOTCONN;
751 } else {
752 error = EINVAL;
753 }
754 }
755 break;
756 case LOCAL_PEERPID:
757 case LOCAL_PEEREPID:
758 if (unp->unp_conn == NULL) {
759 error = ENOTCONN;
760 break;
761 }
762 peerso = unp->unp_conn->unp_socket;
763 if (peerso == NULL) {
764 panic("peer is connected but has no socket?");
765 }
766 unp_get_locks_in_order(so, peerso);
767 if (sopt->sopt_name == LOCAL_PEEREPID &&
768 peerso->so_flags & SOF_DELEGATED) {
769 peerpid = peerso->e_pid;
770 } else {
771 peerpid = peerso->last_pid;
772 }
773 socket_unlock(peerso, 1);
774 error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
775 break;
776 case LOCAL_PEERUUID:
777 case LOCAL_PEEREUUID:
778 if (unp->unp_conn == NULL) {
779 error = ENOTCONN;
780 break;
781 }
782 peerso = unp->unp_conn->unp_socket;
783 if (peerso == NULL) {
784 panic("peer is connected but has no socket?");
785 }
786 unp_get_locks_in_order(so, peerso);
787 if (sopt->sopt_name == LOCAL_PEEREUUID &&
788 peerso->so_flags & SOF_DELEGATED) {
789 error = sooptcopyout(sopt, &peerso->e_uuid,
790 sizeof(peerso->e_uuid));
791 } else {
792 error = sooptcopyout(sopt, &peerso->last_uuid,
793 sizeof(peerso->last_uuid));
794 }
795 socket_unlock(peerso, 1);
796 break;
797 default:
798 error = EOPNOTSUPP;
799 break;
800 }
801 break;
802 case SOPT_SET:
803 default:
804 error = EOPNOTSUPP;
805 break;
806 }
807
808 return error;
809 }
810
811 /*
812 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
813 * for stream sockets, although the total for sender and receiver is
814 * actually only PIPSIZ.
815 * Datagram sockets really use the sendspace as the maximum datagram size,
816 * and don't really want to reserve the sendspace. Their recvspace should
817 * be large enough for at least one max-size datagram plus address.
818 */
819 #ifndef PIPSIZ
820 #define PIPSIZ 8192
821 #endif
822 static u_int32_t unpst_sendspace = PIPSIZ;
823 static u_int32_t unpst_recvspace = PIPSIZ;
824 static u_int32_t unpdg_sendspace = 2 * 1024; /* really max datagram size */
825 static u_int32_t unpdg_recvspace = 4 * 1024;
826
827 static int unp_rights; /* file descriptors in flight */
828 static int unp_disposed; /* discarded file descriptors */
829
830 SYSCTL_DECL(_net_local_stream);
831 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
832 &unpst_sendspace, 0, "");
833 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
834 &unpst_recvspace, 0, "");
835 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
836 &unpst_tracemdns, 0, "");
837 SYSCTL_DECL(_net_local_dgram);
838 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
839 &unpdg_sendspace, 0, "");
840 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
841 &unpdg_recvspace, 0, "");
842 SYSCTL_DECL(_net_local);
843 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
844
845 /*
846 * Returns: 0 Success
847 * ENOBUFS
848 * soreserve:ENOBUFS
849 */
850 static int
851 unp_attach(struct socket *so)
852 {
853 struct unpcb *unp;
854 int error = 0;
855
856 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
857 switch (so->so_type) {
858 case SOCK_STREAM:
859 error = soreserve(so, unpst_sendspace, unpst_recvspace);
860 break;
861
862 case SOCK_DGRAM:
863 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
864 break;
865
866 default:
867 panic("unp_attach");
868 }
869 if (error) {
870 return error;
871 }
872 }
873 unp = (struct unpcb *)zalloc(unp_zone);
874 if (unp == NULL) {
875 return ENOBUFS;
876 }
877 bzero(unp, sizeof(*unp));
878
879 lck_mtx_init(&unp->unp_mtx,
880 unp_mtx_grp, unp_mtx_attr);
881
882 lck_rw_lock_exclusive(unp_list_mtx);
883 LIST_INIT(&unp->unp_refs);
884 unp->unp_socket = so;
885 unp->unp_gencnt = ++unp_gencnt;
886 unp_count++;
887 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
888 &unp_dhead : &unp_shead, unp, unp_link);
889 lck_rw_done(unp_list_mtx);
890 so->so_pcb = (caddr_t)unp;
891 /*
892 * Mark AF_UNIX socket buffers accordingly so that:
893 *
894 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
895 * the lack of space; this essentially loosens the sbspace() check,
896 * since there is disconnect between sosend() and uipc_send() with
897 * respect to flow control that might result in our dropping the
898 * data in uipc_send(). By setting this, we allow for slightly
899 * more records to be appended to the receiving socket to avoid
900 * losing data (which we can't afford in the SOCK_STREAM case).
901 * Flow control still takes place since we adjust the sender's
902 * hiwat during each send. This doesn't affect the SOCK_DGRAM
903 * case and append would still fail when the queue overflows.
904 *
905 * b. In the presence of control messages containing internalized
906 * file descriptors, the append routines will not free them since
907 * we'd need to undo the work first via unp_dispose().
908 */
909 so->so_rcv.sb_flags |= SB_UNIX;
910 so->so_snd.sb_flags |= SB_UNIX;
911 return 0;
912 }
913
914 static void
915 unp_detach(struct unpcb *unp)
916 {
917 int so_locked = 1;
918
919 lck_rw_lock_exclusive(unp_list_mtx);
920 LIST_REMOVE(unp, unp_link);
921 --unp_count;
922 ++unp_gencnt;
923 lck_rw_done(unp_list_mtx);
924 if (unp->unp_vnode) {
925 struct vnode *tvp = NULL;
926 socket_unlock(unp->unp_socket, 0);
927
928 /* Holding unp_connect_lock will avoid a race between
929 * a thread closing the listening socket and a thread
930 * connecting to it.
931 */
932 lck_mtx_lock(unp_connect_lock);
933 socket_lock(unp->unp_socket, 0);
934 if (unp->unp_vnode) {
935 tvp = unp->unp_vnode;
936 unp->unp_vnode->v_socket = NULL;
937 unp->unp_vnode = NULL;
938 }
939 lck_mtx_unlock(unp_connect_lock);
940 if (tvp != NULL) {
941 vnode_rele(tvp); /* drop the usecount */
942 }
943 }
944 if (unp->unp_conn) {
945 unp_disconnect(unp);
946 }
947 while (unp->unp_refs.lh_first) {
948 struct unpcb *unp2 = NULL;
949
950 /* This datagram socket is connected to one or more
951 * sockets. In order to avoid a race condition between removing
952 * this reference and closing the connected socket, we need
953 * to check disconnect_in_progress
954 */
955 if (so_locked == 1) {
956 socket_unlock(unp->unp_socket, 0);
957 so_locked = 0;
958 }
959 lck_mtx_lock(unp_disconnect_lock);
960 while (disconnect_in_progress != 0) {
961 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
962 PSOCK, "disconnect", NULL);
963 }
964 disconnect_in_progress = 1;
965 lck_mtx_unlock(unp_disconnect_lock);
966
967 /* Now we are sure that any unpcb socket disconnect is not happening */
968 if (unp->unp_refs.lh_first != NULL) {
969 unp2 = unp->unp_refs.lh_first;
970 socket_lock(unp2->unp_socket, 1);
971 }
972
973 lck_mtx_lock(unp_disconnect_lock);
974 disconnect_in_progress = 0;
975 wakeup(&disconnect_in_progress);
976 lck_mtx_unlock(unp_disconnect_lock);
977
978 if (unp2 != NULL) {
979 /* We already locked this socket and have a reference on it */
980 unp_drop(unp2, ECONNRESET);
981 socket_unlock(unp2->unp_socket, 1);
982 }
983 }
984
985 if (so_locked == 0) {
986 socket_lock(unp->unp_socket, 0);
987 so_locked = 1;
988 }
989 soisdisconnected(unp->unp_socket);
990 /* makes sure we're getting dealloced */
991 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
992 }
993
994 /*
995 * Returns: 0 Success
996 * EAFNOSUPPORT
997 * EINVAL
998 * EADDRINUSE
999 * namei:??? [anything namei can return]
1000 * vnode_authorize:??? [anything vnode_authorize can return]
1001 *
1002 * Notes: p at this point is the current process, as this function is
1003 * only called by sobind().
1004 */
1005 static int
1006 unp_bind(
1007 struct unpcb *unp,
1008 struct sockaddr *nam,
1009 proc_t p)
1010 {
1011 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1012 struct vnode *vp, *dvp;
1013 struct vnode_attr va;
1014 vfs_context_t ctx = vfs_context_current();
1015 int error, namelen;
1016 struct nameidata nd;
1017 struct socket *so = unp->unp_socket;
1018 char buf[SOCK_MAXADDRLEN];
1019
1020 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1021 return EAFNOSUPPORT;
1022 }
1023
1024 /*
1025 * Check if the socket is already bound to an address
1026 */
1027 if (unp->unp_vnode != NULL) {
1028 return EINVAL;
1029 }
1030 /*
1031 * Check if the socket may have been shut down
1032 */
1033 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1034 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1035 return EINVAL;
1036 }
1037
1038 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1039 if (namelen <= 0) {
1040 return EINVAL;
1041 }
1042 /*
1043 * Note: sun_path is not a zero terminated "C" string
1044 */
1045 if (namelen >= SOCK_MAXADDRLEN) {
1046 return EINVAL;
1047 }
1048 bcopy(soun->sun_path, buf, namelen);
1049 buf[namelen] = 0;
1050
1051 socket_unlock(so, 0);
1052
1053 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1054 CAST_USER_ADDR_T(buf), ctx);
1055 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1056 error = namei(&nd);
1057 if (error) {
1058 socket_lock(so, 0);
1059 return error;
1060 }
1061 dvp = nd.ni_dvp;
1062 vp = nd.ni_vp;
1063
1064 if (vp != NULL) {
1065 /*
1066 * need to do this before the vnode_put of dvp
1067 * since we may have to release an fs_nodelock
1068 */
1069 nameidone(&nd);
1070
1071 vnode_put(dvp);
1072 vnode_put(vp);
1073
1074 socket_lock(so, 0);
1075 return EADDRINUSE;
1076 }
1077
1078 VATTR_INIT(&va);
1079 VATTR_SET(&va, va_type, VSOCK);
1080 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
1081
1082 #if CONFIG_MACF
1083 error = mac_vnode_check_create(ctx,
1084 nd.ni_dvp, &nd.ni_cnd, &va);
1085
1086 if (error == 0)
1087 #endif /* CONFIG_MACF */
1088 #if CONFIG_MACF_SOCKET_SUBSET
1089 error = mac_vnode_check_uipc_bind(ctx,
1090 nd.ni_dvp, &nd.ni_cnd, &va);
1091
1092 if (error == 0)
1093 #endif /* MAC_SOCKET_SUBSET */
1094 /* authorize before creating */
1095 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1096
1097 if (!error) {
1098 /* create the socket */
1099 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1100 }
1101
1102 nameidone(&nd);
1103 vnode_put(dvp);
1104
1105 if (error) {
1106 socket_lock(so, 0);
1107 return error;
1108 }
1109
1110 socket_lock(so, 0);
1111
1112 if (unp->unp_vnode != NULL) {
1113 vnode_put(vp); /* drop the iocount */
1114 return EINVAL;
1115 }
1116
1117 error = vnode_ref(vp); /* gain a longterm reference */
1118 if (error) {
1119 vnode_put(vp); /* drop the iocount */
1120 return error;
1121 }
1122
1123 vp->v_socket = unp->unp_socket;
1124 unp->unp_vnode = vp;
1125 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1126 vnode_put(vp); /* drop the iocount */
1127
1128 return 0;
1129 }
1130
1131
1132 /*
1133 * Returns: 0 Success
1134 * EAFNOSUPPORT Address family not supported
1135 * EINVAL Invalid argument
1136 * ENOTSOCK Not a socket
1137 * ECONNREFUSED Connection refused
1138 * EPROTOTYPE Protocol wrong type for socket
1139 * EISCONN Socket is connected
1140 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1141 * unp_connect2:EINVAL Invalid argument
1142 * namei:??? [anything namei can return]
1143 * vnode_authorize:???? [anything vnode_authorize can return]
1144 *
1145 * Notes: p at this point is the current process, as this function is
1146 * only called by sosend(), sendfile(), and soconnectlock().
1147 */
1148 static int
1149 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1150 {
1151 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1152 struct vnode *vp;
1153 struct socket *so2, *so3, *list_so = NULL;
1154 struct unpcb *unp, *unp2, *unp3;
1155 vfs_context_t ctx = vfs_context_current();
1156 int error, len;
1157 struct nameidata nd;
1158 char buf[SOCK_MAXADDRLEN];
1159
1160 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1161 return EAFNOSUPPORT;
1162 }
1163
1164 unp = sotounpcb(so);
1165 so2 = so3 = NULL;
1166
1167 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1168 if (len <= 0) {
1169 return EINVAL;
1170 }
1171 /*
1172 * Note: sun_path is not a zero terminated "C" string
1173 */
1174 if (len >= SOCK_MAXADDRLEN) {
1175 return EINVAL;
1176 }
1177 bcopy(soun->sun_path, buf, len);
1178 buf[len] = 0;
1179
1180 socket_unlock(so, 0);
1181
1182 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1183 CAST_USER_ADDR_T(buf), ctx);
1184 error = namei(&nd);
1185 if (error) {
1186 socket_lock(so, 0);
1187 return error;
1188 }
1189 nameidone(&nd);
1190 vp = nd.ni_vp;
1191 if (vp->v_type != VSOCK) {
1192 error = ENOTSOCK;
1193 socket_lock(so, 0);
1194 goto out;
1195 }
1196
1197 #if CONFIG_MACF_SOCKET_SUBSET
1198 error = mac_vnode_check_uipc_connect(ctx, vp, so);
1199 if (error) {
1200 socket_lock(so, 0);
1201 goto out;
1202 }
1203 #endif /* MAC_SOCKET_SUBSET */
1204
1205 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1206 if (error) {
1207 socket_lock(so, 0);
1208 goto out;
1209 }
1210
1211 lck_mtx_lock(unp_connect_lock);
1212
1213 if (vp->v_socket == 0) {
1214 lck_mtx_unlock(unp_connect_lock);
1215 error = ECONNREFUSED;
1216 socket_lock(so, 0);
1217 goto out;
1218 }
1219
1220 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1221 so2 = vp->v_socket;
1222 lck_mtx_unlock(unp_connect_lock);
1223
1224
1225 if (so2->so_pcb == NULL) {
1226 error = ECONNREFUSED;
1227 if (so != so2) {
1228 socket_unlock(so2, 1);
1229 socket_lock(so, 0);
1230 } else {
1231 /* Release the reference held for the listen socket */
1232 VERIFY(so2->so_usecount > 0);
1233 so2->so_usecount--;
1234 }
1235 goto out;
1236 }
1237
1238 if (so < so2) {
1239 socket_unlock(so2, 0);
1240 socket_lock(so, 0);
1241 socket_lock(so2, 0);
1242 } else if (so > so2) {
1243 socket_lock(so, 0);
1244 }
1245 /*
1246 * Check if socket was connected while we were trying to
1247 * get the socket locks in order.
1248 * XXX - probably shouldn't return an error for SOCK_DGRAM
1249 */
1250 if ((so->so_state & SS_ISCONNECTED) != 0) {
1251 error = EISCONN;
1252 goto decref_out;
1253 }
1254
1255 if (so->so_type != so2->so_type) {
1256 error = EPROTOTYPE;
1257 goto decref_out;
1258 }
1259
1260 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1261 /* Release the incoming socket but keep a reference */
1262 socket_unlock(so, 0);
1263
1264 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1265 (so3 = sonewconn(so2, 0, nam)) == 0) {
1266 error = ECONNREFUSED;
1267 if (so != so2) {
1268 socket_unlock(so2, 1);
1269 socket_lock(so, 0);
1270 } else {
1271 socket_lock(so, 0);
1272 /* Release the reference held for
1273 * listen socket.
1274 */
1275 VERIFY(so2->so_usecount > 0);
1276 so2->so_usecount--;
1277 }
1278 goto out;
1279 }
1280 unp2 = sotounpcb(so2);
1281 unp3 = sotounpcb(so3);
1282 if (unp2->unp_addr) {
1283 unp3->unp_addr = (struct sockaddr_un *)
1284 dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1285 }
1286
1287 /*
1288 * unp_peercred management:
1289 *
1290 * The connecter's (client's) credentials are copied
1291 * from its process structure at the time of connect()
1292 * (which is now).
1293 */
1294 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1295 unp3->unp_flags |= UNP_HAVEPC;
1296 /*
1297 * The receiver's (server's) credentials are copied
1298 * from the unp_peercred member of socket on which the
1299 * former called listen(); unp_listen() cached that
1300 * process's credentials at that time so we can use
1301 * them now.
1302 */
1303 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1304 ("unp_connect: listener without cached peercred"));
1305
1306 /* Here we need to have both so and so2 locks and so2
1307 * is already locked. Lock ordering is required.
1308 */
1309 if (so < so2) {
1310 socket_unlock(so2, 0);
1311 socket_lock(so, 0);
1312 socket_lock(so2, 0);
1313 } else {
1314 socket_lock(so, 0);
1315 }
1316
1317 /* Check again if the socket state changed when its lock was released */
1318 if ((so->so_state & SS_ISCONNECTED) != 0) {
1319 error = EISCONN;
1320 socket_unlock(so2, 1);
1321 socket_lock(so3, 0);
1322 sofreelastref(so3, 1);
1323 goto out;
1324 }
1325 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1326 sizeof(unp->unp_peercred));
1327 unp->unp_flags |= UNP_HAVEPC;
1328
1329 #if CONFIG_MACF_SOCKET
1330 /* XXXMAC: recursive lock: SOCK_LOCK(so); */
1331 mac_socketpeer_label_associate_socket(so, so3);
1332 mac_socketpeer_label_associate_socket(so3, so);
1333 /* XXXMAC: SOCK_UNLOCK(so); */
1334 #endif /* MAC_SOCKET */
1335
1336 /* Hold the reference on listening socket until the end */
1337 socket_unlock(so2, 0);
1338 list_so = so2;
1339
1340 /* Lock ordering doesn't matter because so3 was just created */
1341 socket_lock(so3, 1);
1342 so2 = so3;
1343
1344 /*
1345 * Enable tracing for mDNSResponder endpoints. (The use
1346 * of sizeof instead of strlen below takes the null
1347 * terminating character into account.)
1348 */
1349 if (unpst_tracemdns &&
1350 !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1351 sizeof(MDNSRESPONDER_PATH))) {
1352 unp->unp_flags |= UNP_TRACE_MDNS;
1353 unp2->unp_flags |= UNP_TRACE_MDNS;
1354 }
1355 }
1356
1357 error = unp_connect2(so, so2);
1358
1359 decref_out:
1360 if (so2 != NULL) {
1361 if (so != so2) {
1362 socket_unlock(so2, 1);
1363 } else {
1364 /* Release the extra reference held for the listen socket.
1365 * This is possible only for SOCK_DGRAM sockets. We refuse
1366 * connecting to the same socket for SOCK_STREAM sockets.
1367 */
1368 VERIFY(so2->so_usecount > 0);
1369 so2->so_usecount--;
1370 }
1371 }
1372
1373 if (list_so != NULL) {
1374 socket_lock(list_so, 0);
1375 socket_unlock(list_so, 1);
1376 }
1377
1378 out:
1379 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1380 vnode_put(vp);
1381 return error;
1382 }
1383
1384 /*
1385 * Returns: 0 Success
1386 * EPROTOTYPE Protocol wrong type for socket
1387 * EINVAL Invalid argument
1388 */
1389 int
1390 unp_connect2(struct socket *so, struct socket *so2)
1391 {
1392 struct unpcb *unp = sotounpcb(so);
1393 struct unpcb *unp2;
1394
1395 if (so2->so_type != so->so_type) {
1396 return EPROTOTYPE;
1397 }
1398
1399 unp2 = sotounpcb(so2);
1400
1401 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1402 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1403
1404 /* Verify both sockets are still opened */
1405 if (unp == 0 || unp2 == 0) {
1406 return EINVAL;
1407 }
1408
1409 unp->unp_conn = unp2;
1410 so2->so_usecount++;
1411
1412 switch (so->so_type) {
1413 case SOCK_DGRAM:
1414 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1415
1416 if (so != so2) {
1417 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1418 /* Keep an extra reference on so2 that will be dropped
1419 * soon after getting the locks in order
1420 */
1421 socket_unlock(so2, 0);
1422 soisconnected(so);
1423 unp_get_locks_in_order(so, so2);
1424 VERIFY(so2->so_usecount > 0);
1425 so2->so_usecount--;
1426 } else {
1427 soisconnected(so);
1428 }
1429
1430 break;
1431
1432 case SOCK_STREAM:
1433 /* This takes care of socketpair */
1434 if (!(unp->unp_flags & UNP_HAVEPC) &&
1435 !(unp2->unp_flags & UNP_HAVEPC)) {
1436 cru2x(kauth_cred_get(), &unp->unp_peercred);
1437 unp->unp_flags |= UNP_HAVEPC;
1438
1439 cru2x(kauth_cred_get(), &unp2->unp_peercred);
1440 unp2->unp_flags |= UNP_HAVEPC;
1441 }
1442 unp2->unp_conn = unp;
1443 so->so_usecount++;
1444
1445 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1446 socket_unlock(so, 0);
1447 soisconnected(so2);
1448
1449 /* Keep an extra reference on so2, that will be dropped soon after
1450 * getting the locks in order again.
1451 */
1452 socket_unlock(so2, 0);
1453
1454 socket_lock(so, 0);
1455 soisconnected(so);
1456
1457 unp_get_locks_in_order(so, so2);
1458 /* Decrement the extra reference left before */
1459 VERIFY(so2->so_usecount > 0);
1460 so2->so_usecount--;
1461 break;
1462
1463 default:
1464 panic("unknown socket type %d in unp_connect2", so->so_type);
1465 }
1466 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1467 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1468 return 0;
1469 }
1470
1471 static void
1472 unp_disconnect(struct unpcb *unp)
1473 {
1474 struct unpcb *unp2 = NULL;
1475 struct socket *so2 = NULL, *so;
1476 struct socket *waitso;
1477 int so_locked = 1, strdisconn = 0;
1478
1479 so = unp->unp_socket;
1480 if (unp->unp_conn == NULL) {
1481 return;
1482 }
1483 lck_mtx_lock(unp_disconnect_lock);
1484 while (disconnect_in_progress != 0) {
1485 if (so_locked == 1) {
1486 socket_unlock(so, 0);
1487 so_locked = 0;
1488 }
1489 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
1490 PSOCK, "disconnect", NULL);
1491 }
1492 disconnect_in_progress = 1;
1493 lck_mtx_unlock(unp_disconnect_lock);
1494
1495 if (so_locked == 0) {
1496 socket_lock(so, 0);
1497 so_locked = 1;
1498 }
1499
1500 unp2 = unp->unp_conn;
1501
1502 if (unp2 == 0 || unp2->unp_socket == NULL) {
1503 goto out;
1504 }
1505 so2 = unp2->unp_socket;
1506
1507 try_again:
1508 if (so == so2) {
1509 if (so_locked == 0) {
1510 socket_lock(so, 0);
1511 }
1512 waitso = so;
1513 } else if (so < so2) {
1514 if (so_locked == 0) {
1515 socket_lock(so, 0);
1516 }
1517 socket_lock(so2, 1);
1518 waitso = so2;
1519 } else {
1520 if (so_locked == 1) {
1521 socket_unlock(so, 0);
1522 }
1523 socket_lock(so2, 1);
1524 socket_lock(so, 0);
1525 waitso = so;
1526 }
1527 so_locked = 1;
1528
1529 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1530 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1531
1532 /* Check for the UNP_DONTDISCONNECT flag, if it
1533 * is set, release both sockets and go to sleep
1534 */
1535
1536 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1537 if (so != so2) {
1538 socket_unlock(so2, 1);
1539 }
1540 so_locked = 0;
1541
1542 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1543 PSOCK | PDROP, "unpdisconnect", NULL);
1544 goto try_again;
1545 }
1546
1547 if (unp->unp_conn == NULL) {
1548 panic("unp_conn became NULL after sleep");
1549 }
1550
1551 unp->unp_conn = NULL;
1552 VERIFY(so2->so_usecount > 0);
1553 so2->so_usecount--;
1554
1555 if (unp->unp_flags & UNP_TRACE_MDNS) {
1556 unp->unp_flags &= ~UNP_TRACE_MDNS;
1557 }
1558
1559 switch (unp->unp_socket->so_type) {
1560 case SOCK_DGRAM:
1561 LIST_REMOVE(unp, unp_reflink);
1562 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1563 if (so != so2) {
1564 socket_unlock(so2, 1);
1565 }
1566 break;
1567
1568 case SOCK_STREAM:
1569 unp2->unp_conn = NULL;
1570 VERIFY(so->so_usecount > 0);
1571 so->so_usecount--;
1572
1573 /* Set the socket state correctly but do a wakeup later when
1574 * we release all locks except the socket lock, this will avoid
1575 * a deadlock.
1576 */
1577 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1578 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1579
1580 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1581 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1582
1583 if (unp2->unp_flags & UNP_TRACE_MDNS) {
1584 unp2->unp_flags &= ~UNP_TRACE_MDNS;
1585 }
1586
1587 strdisconn = 1;
1588 break;
1589 default:
1590 panic("unknown socket type %d", so->so_type);
1591 }
1592 out:
1593 lck_mtx_lock(unp_disconnect_lock);
1594 disconnect_in_progress = 0;
1595 wakeup(&disconnect_in_progress);
1596 lck_mtx_unlock(unp_disconnect_lock);
1597
1598 if (strdisconn) {
1599 socket_unlock(so, 0);
1600 soisdisconnected(so2);
1601 socket_unlock(so2, 1);
1602
1603 socket_lock(so, 0);
1604 soisdisconnected(so);
1605 }
1606 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1607 return;
1608 }
1609
1610 /*
1611 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1612 * The unpcb_compat data structure is passed to user space and must not change.
1613 */
1614 static void
1615 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1616 {
1617 #if defined(__LP64__)
1618 cp->unp_link.le_next = (u_int32_t)
1619 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1620 cp->unp_link.le_prev = (u_int32_t)
1621 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1622 #else
1623 cp->unp_link.le_next = (struct unpcb_compat *)
1624 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1625 cp->unp_link.le_prev = (struct unpcb_compat **)
1626 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1627 #endif
1628 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1629 VM_KERNEL_ADDRPERM(up->unp_socket);
1630 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1631 VM_KERNEL_ADDRPERM(up->unp_vnode);
1632 cp->unp_ino = up->unp_ino;
1633 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1634 VM_KERNEL_ADDRPERM(up->unp_conn);
1635 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1636 #if defined(__LP64__)
1637 cp->unp_reflink.le_next =
1638 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1639 cp->unp_reflink.le_prev =
1640 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1641 #else
1642 cp->unp_reflink.le_next =
1643 (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1644 cp->unp_reflink.le_prev =
1645 (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1646 #endif
1647 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1648 VM_KERNEL_ADDRPERM(up->unp_addr);
1649 cp->unp_cc = up->unp_cc;
1650 cp->unp_mbcnt = up->unp_mbcnt;
1651 cp->unp_gencnt = up->unp_gencnt;
1652 }
1653
1654 static int
1655 unp_pcblist SYSCTL_HANDLER_ARGS
1656 {
1657 #pragma unused(oidp,arg2)
1658 int error, i, n;
1659 struct unpcb *unp, **unp_list;
1660 unp_gen_t gencnt;
1661 struct xunpgen xug;
1662 struct unp_head *head;
1663
1664 lck_rw_lock_shared(unp_list_mtx);
1665 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1666
1667 /*
1668 * The process of preparing the PCB list is too time-consuming and
1669 * resource-intensive to repeat twice on every request.
1670 */
1671 if (req->oldptr == USER_ADDR_NULL) {
1672 n = unp_count;
1673 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1674 sizeof(struct xunpcb);
1675 lck_rw_done(unp_list_mtx);
1676 return 0;
1677 }
1678
1679 if (req->newptr != USER_ADDR_NULL) {
1680 lck_rw_done(unp_list_mtx);
1681 return EPERM;
1682 }
1683
1684 /*
1685 * OK, now we're committed to doing something.
1686 */
1687 gencnt = unp_gencnt;
1688 n = unp_count;
1689
1690 bzero(&xug, sizeof(xug));
1691 xug.xug_len = sizeof(xug);
1692 xug.xug_count = n;
1693 xug.xug_gen = gencnt;
1694 xug.xug_sogen = so_gencnt;
1695 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1696 if (error) {
1697 lck_rw_done(unp_list_mtx);
1698 return error;
1699 }
1700
1701 /*
1702 * We are done if there is no pcb
1703 */
1704 if (n == 0) {
1705 lck_rw_done(unp_list_mtx);
1706 return 0;
1707 }
1708
1709 MALLOC(unp_list, struct unpcb **, n * sizeof(*unp_list),
1710 M_TEMP, M_WAITOK);
1711 if (unp_list == 0) {
1712 lck_rw_done(unp_list_mtx);
1713 return ENOMEM;
1714 }
1715
1716 for (unp = head->lh_first, i = 0; unp && i < n;
1717 unp = unp->unp_link.le_next) {
1718 if (unp->unp_gencnt <= gencnt) {
1719 unp_list[i++] = unp;
1720 }
1721 }
1722 n = i; /* in case we lost some during malloc */
1723
1724 error = 0;
1725 for (i = 0; i < n; i++) {
1726 unp = unp_list[i];
1727 if (unp->unp_gencnt <= gencnt) {
1728 struct xunpcb xu;
1729
1730 bzero(&xu, sizeof(xu));
1731 xu.xu_len = sizeof(xu);
1732 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1733 VM_KERNEL_ADDRPERM(unp);
1734 /*
1735 * XXX - need more locking here to protect against
1736 * connect/disconnect races for SMP.
1737 */
1738 if (unp->unp_addr) {
1739 bcopy(unp->unp_addr, &xu.xu_addr,
1740 unp->unp_addr->sun_len);
1741 }
1742 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1743 bcopy(unp->unp_conn->unp_addr,
1744 &xu.xu_caddr,
1745 unp->unp_conn->unp_addr->sun_len);
1746 }
1747 unpcb_to_compat(unp, &xu.xu_unp);
1748 sotoxsocket(unp->unp_socket, &xu.xu_socket);
1749 error = SYSCTL_OUT(req, &xu, sizeof(xu));
1750 }
1751 }
1752 if (!error) {
1753 /*
1754 * Give the user an updated idea of our state.
1755 * If the generation differs from what we told
1756 * her before, she knows that something happened
1757 * while we were processing this request, and it
1758 * might be necessary to retry.
1759 */
1760 bzero(&xug, sizeof(xug));
1761 xug.xug_len = sizeof(xug);
1762 xug.xug_gen = unp_gencnt;
1763 xug.xug_sogen = so_gencnt;
1764 xug.xug_count = unp_count;
1765 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1766 }
1767 FREE(unp_list, M_TEMP);
1768 lck_rw_done(unp_list_mtx);
1769 return error;
1770 }
1771
1772 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1773 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1774 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1775 "List of active local datagram sockets");
1776 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1777 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1778 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1779 "List of active local stream sockets");
1780
1781 #if !CONFIG_EMBEDDED
1782
1783 static int
1784 unp_pcblist64 SYSCTL_HANDLER_ARGS
1785 {
1786 #pragma unused(oidp,arg2)
1787 int error, i, n;
1788 struct unpcb *unp, **unp_list;
1789 unp_gen_t gencnt;
1790 struct xunpgen xug;
1791 struct unp_head *head;
1792
1793 lck_rw_lock_shared(unp_list_mtx);
1794 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1795
1796 /*
1797 * The process of preparing the PCB list is too time-consuming and
1798 * resource-intensive to repeat twice on every request.
1799 */
1800 if (req->oldptr == USER_ADDR_NULL) {
1801 n = unp_count;
1802 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1803 (sizeof(struct xunpcb64));
1804 lck_rw_done(unp_list_mtx);
1805 return 0;
1806 }
1807
1808 if (req->newptr != USER_ADDR_NULL) {
1809 lck_rw_done(unp_list_mtx);
1810 return EPERM;
1811 }
1812
1813 /*
1814 * OK, now we're committed to doing something.
1815 */
1816 gencnt = unp_gencnt;
1817 n = unp_count;
1818
1819 bzero(&xug, sizeof(xug));
1820 xug.xug_len = sizeof(xug);
1821 xug.xug_count = n;
1822 xug.xug_gen = gencnt;
1823 xug.xug_sogen = so_gencnt;
1824 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1825 if (error) {
1826 lck_rw_done(unp_list_mtx);
1827 return error;
1828 }
1829
1830 /*
1831 * We are done if there is no pcb
1832 */
1833 if (n == 0) {
1834 lck_rw_done(unp_list_mtx);
1835 return 0;
1836 }
1837
1838 MALLOC(unp_list, struct unpcb **, n * sizeof(*unp_list),
1839 M_TEMP, M_WAITOK);
1840 if (unp_list == 0) {
1841 lck_rw_done(unp_list_mtx);
1842 return ENOMEM;
1843 }
1844
1845 for (unp = head->lh_first, i = 0; unp && i < n;
1846 unp = unp->unp_link.le_next) {
1847 if (unp->unp_gencnt <= gencnt) {
1848 unp_list[i++] = unp;
1849 }
1850 }
1851 n = i; /* in case we lost some during malloc */
1852
1853 error = 0;
1854 for (i = 0; i < n; i++) {
1855 unp = unp_list[i];
1856 if (unp->unp_gencnt <= gencnt) {
1857 struct xunpcb64 xu;
1858 size_t xu_len = sizeof(struct xunpcb64);
1859
1860 bzero(&xu, xu_len);
1861 xu.xu_len = xu_len;
1862 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1863 xu.xunp_link.le_next = (u_int64_t)
1864 VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1865 xu.xunp_link.le_prev = (u_int64_t)
1866 VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1867 xu.xunp_socket = (u_int64_t)
1868 VM_KERNEL_ADDRPERM(unp->unp_socket);
1869 xu.xunp_vnode = (u_int64_t)
1870 VM_KERNEL_ADDRPERM(unp->unp_vnode);
1871 xu.xunp_ino = unp->unp_ino;
1872 xu.xunp_conn = (u_int64_t)
1873 VM_KERNEL_ADDRPERM(unp->unp_conn);
1874 xu.xunp_refs = (u_int64_t)
1875 VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1876 xu.xunp_reflink.le_next = (u_int64_t)
1877 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1878 xu.xunp_reflink.le_prev = (u_int64_t)
1879 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1880 xu.xunp_cc = unp->unp_cc;
1881 xu.xunp_mbcnt = unp->unp_mbcnt;
1882 xu.xunp_gencnt = unp->unp_gencnt;
1883
1884 if (unp->unp_socket) {
1885 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1886 }
1887
1888 /*
1889 * XXX - need more locking here to protect against
1890 * connect/disconnect races for SMP.
1891 */
1892 if (unp->unp_addr) {
1893 bcopy(unp->unp_addr, &xu.xunp_addr,
1894 unp->unp_addr->sun_len);
1895 }
1896 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1897 bcopy(unp->unp_conn->unp_addr,
1898 &xu.xunp_caddr,
1899 unp->unp_conn->unp_addr->sun_len);
1900 }
1901
1902 error = SYSCTL_OUT(req, &xu, xu_len);
1903 }
1904 }
1905 if (!error) {
1906 /*
1907 * Give the user an updated idea of our state.
1908 * If the generation differs from what we told
1909 * her before, she knows that something happened
1910 * while we were processing this request, and it
1911 * might be necessary to retry.
1912 */
1913 bzero(&xug, sizeof(xug));
1914 xug.xug_len = sizeof(xug);
1915 xug.xug_gen = unp_gencnt;
1916 xug.xug_sogen = so_gencnt;
1917 xug.xug_count = unp_count;
1918 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1919 }
1920 FREE(unp_list, M_TEMP);
1921 lck_rw_done(unp_list_mtx);
1922 return error;
1923 }
1924
1925 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
1926 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1927 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1928 "List of active local datagram sockets 64 bit");
1929 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
1930 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1931 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1932 "List of active local stream sockets 64 bit");
1933
1934 #endif /* !CONFIG_EMBEDDED */
1935
1936 static void
1937 unp_shutdown(struct unpcb *unp)
1938 {
1939 struct socket *so = unp->unp_socket;
1940 struct socket *so2;
1941 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1942 so2 = unp->unp_conn->unp_socket;
1943 unp_get_locks_in_order(so, so2);
1944 socantrcvmore(so2);
1945 socket_unlock(so2, 1);
1946 }
1947 }
1948
1949 static void
1950 unp_drop(struct unpcb *unp, int errno)
1951 {
1952 struct socket *so = unp->unp_socket;
1953
1954 so->so_error = errno;
1955 unp_disconnect(unp);
1956 }
1957
1958 /*
1959 * Returns: 0 Success
1960 * EMSGSIZE The new fd's will not fit
1961 * ENOBUFS Cannot alloc struct fileproc
1962 */
1963 int
1964 unp_externalize(struct mbuf *rights)
1965 {
1966 proc_t p = current_proc(); /* XXX */
1967 int i;
1968 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1969 struct fileglob **rp = (struct fileglob **)(cm + 1);
1970 int *fds = (int *)(cm + 1);
1971 struct fileproc *fp;
1972 struct fileproc **fileproc_l;
1973 int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
1974 int f, error = 0;
1975
1976 MALLOC(fileproc_l, struct fileproc **,
1977 newfds * sizeof(struct fileproc *), M_TEMP, M_WAITOK);
1978 if (fileproc_l == NULL) {
1979 error = ENOMEM;
1980 goto discard;
1981 }
1982
1983 proc_fdlock(p);
1984
1985 /*
1986 * if the new FD's will not fit, then we free them all
1987 */
1988 if (!fdavail(p, newfds)) {
1989 proc_fdunlock(p);
1990 error = EMSGSIZE;
1991 goto discard;
1992 }
1993 /*
1994 * now change each pointer to an fd in the global table to
1995 * an integer that is the index to the local fd table entry
1996 * that we set up to point to the global one we are transferring.
1997 * XXX (1) this assumes a pointer and int are the same size,
1998 * XXX or the mbuf can hold the expansion
1999 * XXX (2) allocation failures should be non-fatal
2000 */
2001 for (i = 0; i < newfds; i++) {
2002 #if CONFIG_MACF_SOCKET
2003 /*
2004 * If receive access is denied, don't pass along
2005 * and error message, just discard the descriptor.
2006 */
2007 if (mac_file_check_receive(kauth_cred_get(), rp[i])) {
2008 proc_fdunlock(p);
2009 unp_discard(rp[i], p);
2010 fds[i] = 0;
2011 proc_fdlock(p);
2012 continue;
2013 }
2014 #endif
2015 if (fdalloc(p, 0, &f)) {
2016 panic("unp_externalize:fdalloc");
2017 }
2018 fp = fileproc_alloc_init(NULL);
2019 if (fp == NULL) {
2020 panic("unp_externalize: MALLOC_ZONE");
2021 }
2022 fp->f_iocount = 0;
2023 fp->f_fglob = rp[i];
2024 if (fg_removeuipc_mark(rp[i])) {
2025 /*
2026 * Take an iocount on the fp for completing the
2027 * removal from the global msg queue
2028 */
2029 fp->f_iocount++;
2030 fileproc_l[i] = fp;
2031 } else {
2032 fileproc_l[i] = NULL;
2033 }
2034 procfdtbl_releasefd(p, f, fp);
2035 fds[i] = f;
2036 }
2037 proc_fdunlock(p);
2038
2039 for (i = 0; i < newfds; i++) {
2040 if (fileproc_l[i] != NULL) {
2041 VERIFY(fileproc_l[i]->f_fglob != NULL &&
2042 (fileproc_l[i]->f_fglob->fg_lflags & FG_RMMSGQ));
2043 VERIFY(fds[i] >= 0);
2044 fg_removeuipc(fileproc_l[i]->f_fglob);
2045
2046 /* Drop the iocount */
2047 fp_drop(p, fds[i], fileproc_l[i], 0);
2048 fileproc_l[i] = NULL;
2049 }
2050 if (fds[i] != 0) {
2051 (void) OSAddAtomic(-1, &unp_rights);
2052 }
2053 }
2054
2055 discard:
2056 if (fileproc_l != NULL) {
2057 FREE(fileproc_l, M_TEMP);
2058 }
2059 if (error) {
2060 for (i = 0; i < newfds; i++) {
2061 unp_discard(*rp, p);
2062 *rp++ = NULL;
2063 }
2064 }
2065 return error;
2066 }
2067
2068 void
2069 unp_init(void)
2070 {
2071 _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2072 unp_zone = zinit(sizeof(struct unpcb),
2073 (nmbclusters * sizeof(struct unpcb)), 4096, "unpzone");
2074
2075 if (unp_zone == 0) {
2076 panic("unp_init");
2077 }
2078 LIST_INIT(&unp_dhead);
2079 LIST_INIT(&unp_shead);
2080
2081 /*
2082 * allocate lock group attribute and group for udp pcb mutexes
2083 */
2084 unp_mtx_grp_attr = lck_grp_attr_alloc_init();
2085
2086 unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
2087
2088 unp_mtx_attr = lck_attr_alloc_init();
2089
2090 if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp,
2091 unp_mtx_attr)) == NULL) {
2092 return; /* pretty much dead if this fails... */
2093 }
2094 if ((unp_disconnect_lock = lck_mtx_alloc_init(unp_mtx_grp,
2095 unp_mtx_attr)) == NULL) {
2096 return;
2097 }
2098
2099 if ((unp_connect_lock = lck_mtx_alloc_init(unp_mtx_grp,
2100 unp_mtx_attr)) == NULL) {
2101 return;
2102 }
2103 }
2104
2105 #ifndef MIN
2106 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
2107 #endif
2108
2109 /*
2110 * Returns: 0 Success
2111 * EINVAL
2112 * fdgetf_noref:EBADF
2113 */
2114 static int
2115 unp_internalize(struct mbuf *control, proc_t p)
2116 {
2117 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2118 int *fds;
2119 struct fileglob **rp;
2120 struct fileproc *fp;
2121 int i, error;
2122 int oldfds;
2123 uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2124
2125 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2126 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2127 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2128 return EINVAL;
2129 }
2130 oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2131 bzero(fg_ins, sizeof(fg_ins));
2132
2133 proc_fdlock(p);
2134 fds = (int *)(cm + 1);
2135
2136 for (i = 0; i < oldfds; i++) {
2137 struct fileproc *tmpfp;
2138 if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) {
2139 proc_fdunlock(p);
2140 return error;
2141 } else if (!file_issendable(p, tmpfp)) {
2142 proc_fdunlock(p);
2143 return EINVAL;
2144 } else if (FP_ISGUARDED(tmpfp, GUARD_SOCKET_IPC)) {
2145 error = fp_guard_exception(p,
2146 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2147 proc_fdunlock(p);
2148 return error;
2149 }
2150 }
2151 rp = (struct fileglob **)(cm + 1);
2152
2153 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2154 * and doing them in-order would result in stomping over unprocessed fd's
2155 */
2156 for (i = (oldfds - 1); i >= 0; i--) {
2157 (void) fdgetf_noref(p, fds[i], &fp);
2158 if (fg_insertuipc_mark(fp->f_fglob)) {
2159 fg_ins[i / 8] |= 0x80 >> (i % 8);
2160 }
2161 rp[i] = fp->f_fglob;
2162 }
2163 proc_fdunlock(p);
2164
2165 for (i = 0; i < oldfds; i++) {
2166 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2167 VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2168 fg_insertuipc(rp[i]);
2169 }
2170 (void) OSAddAtomic(1, &unp_rights);
2171 }
2172
2173 return 0;
2174 }
2175
2176 static int unp_defer, unp_gcing, unp_gcwait;
2177 static thread_t unp_gcthread = NULL;
2178
2179 /* always called under uipc_lock */
2180 void
2181 unp_gc_wait(void)
2182 {
2183 if (unp_gcthread == current_thread()) {
2184 return;
2185 }
2186
2187 while (unp_gcing != 0) {
2188 unp_gcwait = 1;
2189 msleep(&unp_gcing, uipc_lock, 0, "unp_gc_wait", NULL);
2190 }
2191 }
2192
2193
2194 __private_extern__ void
2195 unp_gc(void)
2196 {
2197 struct fileglob *fg, *nextfg;
2198 struct socket *so;
2199 static struct fileglob **extra_ref;
2200 struct fileglob **fpp;
2201 int nunref, i;
2202 int need_gcwakeup = 0;
2203
2204 lck_mtx_lock(uipc_lock);
2205 if (unp_gcing) {
2206 lck_mtx_unlock(uipc_lock);
2207 return;
2208 }
2209 unp_gcing = 1;
2210 unp_defer = 0;
2211 unp_gcthread = current_thread();
2212 lck_mtx_unlock(uipc_lock);
2213 /*
2214 * before going through all this, set all FDs to
2215 * be NOT defered and NOT externally accessible
2216 */
2217 for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
2218 lck_mtx_lock(&fg->fg_lock);
2219 fg->fg_flag &= ~(FMARK | FDEFER);
2220 lck_mtx_unlock(&fg->fg_lock);
2221 }
2222 do {
2223 for (fg = fmsghead.lh_first; fg != 0;
2224 fg = fg->f_msglist.le_next) {
2225 lck_mtx_lock(&fg->fg_lock);
2226 /*
2227 * If the file is not open, skip it
2228 */
2229 if (fg->fg_count == 0) {
2230 lck_mtx_unlock(&fg->fg_lock);
2231 continue;
2232 }
2233 /*
2234 * If we already marked it as 'defer' in a
2235 * previous pass, then try process it this time
2236 * and un-mark it
2237 */
2238 if (fg->fg_flag & FDEFER) {
2239 fg->fg_flag &= ~FDEFER;
2240 unp_defer--;
2241 } else {
2242 /*
2243 * if it's not defered, then check if it's
2244 * already marked.. if so skip it
2245 */
2246 if (fg->fg_flag & FMARK) {
2247 lck_mtx_unlock(&fg->fg_lock);
2248 continue;
2249 }
2250 /*
2251 * If all references are from messages
2252 * in transit, then skip it. it's not
2253 * externally accessible.
2254 */
2255 if (fg->fg_count == fg->fg_msgcount) {
2256 lck_mtx_unlock(&fg->fg_lock);
2257 continue;
2258 }
2259 /*
2260 * If it got this far then it must be
2261 * externally accessible.
2262 */
2263 fg->fg_flag |= FMARK;
2264 }
2265 /*
2266 * either it was defered, or it is externally
2267 * accessible and not already marked so.
2268 * Now check if it is possibly one of OUR sockets.
2269 */
2270 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2271 (so = (struct socket *)fg->fg_data) == 0) {
2272 lck_mtx_unlock(&fg->fg_lock);
2273 continue;
2274 }
2275 if (so->so_proto->pr_domain != localdomain ||
2276 (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2277 lck_mtx_unlock(&fg->fg_lock);
2278 continue;
2279 }
2280 #ifdef notdef
2281 if (so->so_rcv.sb_flags & SB_LOCK) {
2282 /*
2283 * This is problematical; it's not clear
2284 * we need to wait for the sockbuf to be
2285 * unlocked (on a uniprocessor, at least),
2286 * and it's also not clear what to do
2287 * if sbwait returns an error due to receipt
2288 * of a signal. If sbwait does return
2289 * an error, we'll go into an infinite
2290 * loop. Delete all of this for now.
2291 */
2292 (void) sbwait(&so->so_rcv);
2293 goto restart;
2294 }
2295 #endif
2296 /*
2297 * So, Ok, it's one of our sockets and it IS externally
2298 * accessible (or was defered). Now we look
2299 * to see if we hold any file descriptors in its
2300 * message buffers. Follow those links and mark them
2301 * as accessible too.
2302 *
2303 * In case a file is passed onto itself we need to
2304 * release the file lock.
2305 */
2306 lck_mtx_unlock(&fg->fg_lock);
2307
2308 unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
2309 }
2310 } while (unp_defer);
2311 /*
2312 * We grab an extra reference to each of the file table entries
2313 * that are not otherwise accessible and then free the rights
2314 * that are stored in messages on them.
2315 *
2316 * The bug in the orginal code is a little tricky, so I'll describe
2317 * what's wrong with it here.
2318 *
2319 * It is incorrect to simply unp_discard each entry for f_msgcount
2320 * times -- consider the case of sockets A and B that contain
2321 * references to each other. On a last close of some other socket,
2322 * we trigger a gc since the number of outstanding rights (unp_rights)
2323 * is non-zero. If during the sweep phase the gc code un_discards,
2324 * we end up doing a (full) closef on the descriptor. A closef on A
2325 * results in the following chain. Closef calls soo_close, which
2326 * calls soclose. Soclose calls first (through the switch
2327 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
2328 * returns because the previous instance had set unp_gcing, and
2329 * we return all the way back to soclose, which marks the socket
2330 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
2331 * to free up the rights that are queued in messages on the socket A,
2332 * i.e., the reference on B. The sorflush calls via the dom_dispose
2333 * switch unp_dispose, which unp_scans with unp_discard. This second
2334 * instance of unp_discard just calls closef on B.
2335 *
2336 * Well, a similar chain occurs on B, resulting in a sorflush on B,
2337 * which results in another closef on A. Unfortunately, A is already
2338 * being closed, and the descriptor has already been marked with
2339 * SS_NOFDREF, and soclose panics at this point.
2340 *
2341 * Here, we first take an extra reference to each inaccessible
2342 * descriptor. Then, we call sorflush ourself, since we know
2343 * it is a Unix domain socket anyhow. After we destroy all the
2344 * rights carried in messages, we do a last closef to get rid
2345 * of our extra reference. This is the last close, and the
2346 * unp_detach etc will shut down the socket.
2347 *
2348 * 91/09/19, bsy@cs.cmu.edu
2349 */
2350 extra_ref = _MALLOC(nfiles * sizeof(struct fileglob *),
2351 M_FILEGLOB, M_WAITOK);
2352 if (extra_ref == NULL) {
2353 goto bail;
2354 }
2355 for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0;
2356 fg = nextfg) {
2357 lck_mtx_lock(&fg->fg_lock);
2358
2359 nextfg = fg->f_msglist.le_next;
2360 /*
2361 * If it's not open, skip it
2362 */
2363 if (fg->fg_count == 0) {
2364 lck_mtx_unlock(&fg->fg_lock);
2365 continue;
2366 }
2367 /*
2368 * If all refs are from msgs, and it's not marked accessible
2369 * then it must be referenced from some unreachable cycle
2370 * of (shut-down) FDs, so include it in our
2371 * list of FDs to remove
2372 */
2373 if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) {
2374 fg->fg_count++;
2375 *fpp++ = fg;
2376 nunref++;
2377 }
2378 lck_mtx_unlock(&fg->fg_lock);
2379 }
2380 /*
2381 * for each FD on our hit list, do the following two things
2382 */
2383 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2384 struct fileglob *tfg;
2385
2386 tfg = *fpp;
2387
2388 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET &&
2389 tfg->fg_data != NULL) {
2390 so = (struct socket *)(tfg->fg_data);
2391
2392 socket_lock(so, 0);
2393
2394 sorflush(so);
2395
2396 socket_unlock(so, 0);
2397 }
2398 }
2399 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2400 closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL);
2401 }
2402
2403 FREE(extra_ref, M_FILEGLOB);
2404 bail:
2405 lck_mtx_lock(uipc_lock);
2406 unp_gcing = 0;
2407 unp_gcthread = NULL;
2408
2409 if (unp_gcwait != 0) {
2410 unp_gcwait = 0;
2411 need_gcwakeup = 1;
2412 }
2413 lck_mtx_unlock(uipc_lock);
2414
2415 if (need_gcwakeup != 0) {
2416 wakeup(&unp_gcing);
2417 }
2418 }
2419
2420 void
2421 unp_dispose(struct mbuf *m)
2422 {
2423 if (m) {
2424 unp_scan(m, unp_discard, NULL);
2425 }
2426 }
2427
2428 /*
2429 * Returns: 0 Success
2430 */
2431 static int
2432 unp_listen(struct unpcb *unp, proc_t p)
2433 {
2434 kauth_cred_t safecred = kauth_cred_proc_ref(p);
2435 cru2x(safecred, &unp->unp_peercred);
2436 kauth_cred_unref(&safecred);
2437 unp->unp_flags |= UNP_HAVEPCCACHED;
2438 return 0;
2439 }
2440
2441 static void
2442 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2443 {
2444 struct mbuf *m;
2445 struct fileglob **rp;
2446 struct cmsghdr *cm;
2447 int i;
2448 int qfds;
2449
2450 while (m0) {
2451 for (m = m0; m; m = m->m_next) {
2452 if (m->m_type == MT_CONTROL &&
2453 (size_t)m->m_len >= sizeof(*cm)) {
2454 cm = mtod(m, struct cmsghdr *);
2455 if (cm->cmsg_level != SOL_SOCKET ||
2456 cm->cmsg_type != SCM_RIGHTS) {
2457 continue;
2458 }
2459 qfds = (cm->cmsg_len - sizeof(*cm)) /
2460 sizeof(int);
2461 rp = (struct fileglob **)(cm + 1);
2462 for (i = 0; i < qfds; i++) {
2463 (*op)(*rp++, arg);
2464 }
2465 break; /* XXX, but saves time */
2466 }
2467 }
2468 m0 = m0->m_act;
2469 }
2470 }
2471
2472 static void
2473 unp_mark(struct fileglob *fg, __unused void *arg)
2474 {
2475 lck_mtx_lock(&fg->fg_lock);
2476
2477 if (fg->fg_flag & FMARK) {
2478 lck_mtx_unlock(&fg->fg_lock);
2479 return;
2480 }
2481 fg->fg_flag |= (FMARK | FDEFER);
2482
2483 lck_mtx_unlock(&fg->fg_lock);
2484
2485 unp_defer++;
2486 }
2487
2488 static void
2489 unp_discard(struct fileglob *fg, void *p)
2490 {
2491 if (p == NULL) {
2492 p = current_proc(); /* XXX */
2493 }
2494 (void) OSAddAtomic(1, &unp_disposed);
2495 if (fg_removeuipc_mark(fg)) {
2496 VERIFY(fg->fg_lflags & FG_RMMSGQ);
2497 fg_removeuipc(fg);
2498 }
2499 (void) OSAddAtomic(-1, &unp_rights);
2500
2501 proc_fdlock(p);
2502 (void) closef_locked((struct fileproc *)0, fg, p);
2503 proc_fdunlock(p);
2504 }
2505
2506 int
2507 unp_lock(struct socket *so, int refcount, void * lr)
2508 {
2509 void * lr_saved;
2510 if (lr == 0) {
2511 lr_saved = (void *) __builtin_return_address(0);
2512 } else {
2513 lr_saved = lr;
2514 }
2515
2516 if (so->so_pcb) {
2517 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2518 } else {
2519 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2520 so, lr_saved, so->so_usecount);
2521 }
2522
2523 if (so->so_usecount < 0) {
2524 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2525 so, so->so_pcb, lr_saved, so->so_usecount);
2526 }
2527
2528 if (refcount) {
2529 VERIFY(so->so_usecount > 0);
2530 so->so_usecount++;
2531 }
2532 so->lock_lr[so->next_lock_lr] = lr_saved;
2533 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2534 return 0;
2535 }
2536
2537 int
2538 unp_unlock(struct socket *so, int refcount, void * lr)
2539 {
2540 void * lr_saved;
2541 lck_mtx_t * mutex_held = NULL;
2542 struct unpcb *unp = sotounpcb(so);
2543
2544 if (lr == 0) {
2545 lr_saved = (void *) __builtin_return_address(0);
2546 } else {
2547 lr_saved = lr;
2548 }
2549
2550 if (refcount) {
2551 so->so_usecount--;
2552 }
2553
2554 if (so->so_usecount < 0) {
2555 panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2556 }
2557 if (so->so_pcb == NULL) {
2558 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2559 } else {
2560 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2561 }
2562 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2563 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2564 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2565
2566 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2567 sofreelastref(so, 1);
2568
2569 if (unp->unp_addr) {
2570 FREE(unp->unp_addr, M_SONAME);
2571 }
2572
2573 lck_mtx_unlock(mutex_held);
2574
2575 lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
2576 zfree(unp_zone, unp);
2577
2578 unp_gc();
2579 } else {
2580 lck_mtx_unlock(mutex_held);
2581 }
2582
2583 return 0;
2584 }
2585
2586 lck_mtx_t *
2587 unp_getlock(struct socket *so, __unused int flags)
2588 {
2589 struct unpcb *unp = (struct unpcb *)so->so_pcb;
2590
2591
2592 if (so->so_pcb) {
2593 if (so->so_usecount < 0) {
2594 panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2595 }
2596 return &unp->unp_mtx;
2597 } else {
2598 panic("unp_getlock: so=%p NULL so_pcb\n", so);
2599 return so->so_proto->pr_domain->dom_mtx;
2600 }
2601 }