]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_usrreq.c
xnu-2050.22.13.tar.gz
[apple/xnu.git] / bsd / kern / uipc_usrreq.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
62 /*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/filedesc.h>
77 #include <sys/lock.h>
78 #include <sys/mbuf.h>
79 #include <sys/namei.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/protosw.h>
83 #include <sys/socket.h>
84 #include <sys/socketvar.h>
85 #include <sys/stat.h>
86 #include <sys/sysctl.h>
87 #include <sys/un.h>
88 #include <sys/unpcb.h>
89 #include <sys/vnode_internal.h>
90 #include <sys/kdebug.h>
91
92 #include <kern/zalloc.h>
93 #include <kern/locks.h>
94
95 #if CONFIG_MACF
96 #include <security/mac_framework.h>
97 #endif /* CONFIG_MACF */
98
99 #include <mach/vm_param.h>
100
101 #define f_msgcount f_fglob->fg_msgcount
102 #define f_cred f_fglob->fg_cred
103 #define f_ops f_fglob->fg_ops
104 #define f_offset f_fglob->fg_offset
105 #define f_data f_fglob->fg_data
106 struct zone *unp_zone;
107 static unp_gen_t unp_gencnt;
108 static u_int unp_count;
109
110 static lck_attr_t *unp_mtx_attr;
111 static lck_grp_t *unp_mtx_grp;
112 static lck_grp_attr_t *unp_mtx_grp_attr;
113 static lck_rw_t *unp_list_mtx;
114
115 static lck_mtx_t *unp_disconnect_lock;
116 static lck_mtx_t *unp_connect_lock;
117 static u_int disconnect_in_progress;
118
119 extern lck_mtx_t *uipc_lock;
120 static struct unp_head unp_shead, unp_dhead;
121
122 /*
123 * mDNSResponder tracing. When enabled, endpoints connected to
124 * /var/run/mDNSResponder will be traced; during each send on
125 * the traced socket, we log the PID and process name of the
126 * sending process. We also print out a bit of info related
127 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
128 * of mDNSResponder stays the same.
129 */
130 #define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
131
132 static int unpst_tracemdns; /* enable tracing */
133
134 #define MDNS_IPC_MSG_HDR_VERSION_1 1
135
136 struct mdns_ipc_msg_hdr {
137 uint32_t version;
138 uint32_t datalen;
139 uint32_t ipc_flags;
140 uint32_t op;
141 union {
142 void *context;
143 uint32_t u32[2];
144 } __attribute__((packed));
145 uint32_t reg_index;
146 } __attribute__((packed));
147
148 /*
149 * Unix communications domain.
150 *
151 * TODO:
152 * SEQPACKET, RDM
153 * rethink name space problems
154 * need a proper out-of-band
155 * lock pushdown
156 */
157 static struct sockaddr sun_noname = { sizeof (sun_noname), AF_LOCAL, { 0 } };
158 static ino_t unp_ino; /* prototype for fake inode numbers */
159
160 static int unp_attach(struct socket *);
161 static void unp_detach(struct unpcb *);
162 static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
163 static int unp_connect(struct socket *, struct sockaddr *, proc_t);
164 static void unp_disconnect(struct unpcb *);
165 static void unp_shutdown(struct unpcb *);
166 static void unp_drop(struct unpcb *, int);
167 __private_extern__ void unp_gc(void);
168 static void unp_scan(struct mbuf *, void (*)(struct fileglob *));
169 static void unp_mark(struct fileglob *);
170 static void unp_discard(struct fileglob *);
171 static void unp_discard_fdlocked(struct fileglob *, proc_t);
172 static int unp_internalize(struct mbuf *, proc_t);
173 static int unp_listen(struct unpcb *, proc_t);
174 static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
175 static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
176
177 static void
178 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
179 {
180 if (so < conn_so) {
181 socket_lock(conn_so, 1);
182 } else {
183 struct unpcb *unp = sotounpcb(so);
184 unp->unp_flags |= UNP_DONTDISCONNECT;
185 unp->rw_thrcount++;
186 socket_unlock(so, 0);
187
188 /* Get the locks in the correct order */
189 socket_lock(conn_so, 1);
190 socket_lock(so, 0);
191 unp->rw_thrcount--;
192 if (unp->rw_thrcount == 0) {
193 unp->unp_flags &= ~UNP_DONTDISCONNECT;
194 wakeup(unp);
195 }
196 }
197 }
198
199 static int
200 uipc_abort(struct socket *so)
201 {
202 struct unpcb *unp = sotounpcb(so);
203
204 if (unp == 0)
205 return (EINVAL);
206 unp_drop(unp, ECONNABORTED);
207 unp_detach(unp);
208 sofree(so);
209 return (0);
210 }
211
212 static int
213 uipc_accept(struct socket *so, struct sockaddr **nam)
214 {
215 struct unpcb *unp = sotounpcb(so);
216
217 if (unp == 0)
218 return (EINVAL);
219
220 /*
221 * Pass back name of connected socket,
222 * if it was bound and we are still connected
223 * (our peer may have closed already!).
224 */
225 if (unp->unp_conn && unp->unp_conn->unp_addr) {
226 *nam = dup_sockaddr((struct sockaddr *)
227 unp->unp_conn->unp_addr, 1);
228 } else {
229 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
230 }
231 return (0);
232 }
233
234 /*
235 * Returns: 0 Success
236 * EISCONN
237 * unp_attach:
238 */
239 static int
240 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
241 {
242 struct unpcb *unp = sotounpcb(so);
243
244 if (unp != 0)
245 return (EISCONN);
246 return (unp_attach(so));
247 }
248
249 static int
250 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
251 {
252 struct unpcb *unp = sotounpcb(so);
253
254 if (unp == 0)
255 return (EINVAL);
256
257 return (unp_bind(unp, nam, p));
258 }
259
260 /*
261 * Returns: 0 Success
262 * EINVAL
263 * unp_connect:??? [See elsewhere in this file]
264 */
265 static int
266 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
267 {
268 struct unpcb *unp = sotounpcb(so);
269
270 if (unp == 0)
271 return (EINVAL);
272 return (unp_connect(so, nam, p));
273 }
274
275 /*
276 * Returns: 0 Success
277 * EINVAL
278 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
279 * unp_connect2:EINVAL Invalid argument
280 */
281 static int
282 uipc_connect2(struct socket *so1, struct socket *so2)
283 {
284 struct unpcb *unp = sotounpcb(so1);
285
286 if (unp == 0)
287 return (EINVAL);
288
289 return (unp_connect2(so1, so2));
290 }
291
292 /* control is EOPNOTSUPP */
293
294 static int
295 uipc_detach(struct socket *so)
296 {
297 struct unpcb *unp = sotounpcb(so);
298
299 if (unp == 0)
300 return (EINVAL);
301
302 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
303 unp_detach(unp);
304 return (0);
305 }
306
307 static int
308 uipc_disconnect(struct socket *so)
309 {
310 struct unpcb *unp = sotounpcb(so);
311
312 if (unp == 0)
313 return (EINVAL);
314 unp_disconnect(unp);
315 return (0);
316 }
317
318 /*
319 * Returns: 0 Success
320 * EINVAL
321 */
322 static int
323 uipc_listen(struct socket *so, __unused proc_t p)
324 {
325 struct unpcb *unp = sotounpcb(so);
326
327 if (unp == 0 || unp->unp_vnode == 0)
328 return (EINVAL);
329 return (unp_listen(unp, p));
330 }
331
332 static int
333 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
334 {
335 struct unpcb *unp = sotounpcb(so);
336
337 if (unp == NULL)
338 return (EINVAL);
339 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
340 *nam = dup_sockaddr((struct sockaddr *)
341 unp->unp_conn->unp_addr, 1);
342 } else {
343 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
344 }
345 return (0);
346 }
347
348 static int
349 uipc_rcvd(struct socket *so, __unused int flags)
350 {
351 struct unpcb *unp = sotounpcb(so);
352 struct socket *so2;
353
354 if (unp == 0)
355 return (EINVAL);
356 switch (so->so_type) {
357 case SOCK_DGRAM:
358 panic("uipc_rcvd DGRAM?");
359 /*NOTREACHED*/
360
361 case SOCK_STREAM:
362 #define rcv (&so->so_rcv)
363 #define snd (&so2->so_snd)
364 if (unp->unp_conn == 0)
365 break;
366
367 so2 = unp->unp_conn->unp_socket;
368 unp_get_locks_in_order(so, so2);
369 /*
370 * Adjust backpressure on sender
371 * and wakeup any waiting to write.
372 */
373 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
374 unp->unp_mbcnt = rcv->sb_mbcnt;
375 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
376 unp->unp_cc = rcv->sb_cc;
377 sowwakeup(so2);
378
379 socket_unlock(so2, 1);
380
381 #undef snd
382 #undef rcv
383 break;
384
385 default:
386 panic("uipc_rcvd unknown socktype");
387 }
388 return (0);
389 }
390
391 /* pru_rcvoob is EOPNOTSUPP */
392
393 /*
394 * Returns: 0 Success
395 * EINVAL
396 * EOPNOTSUPP
397 * EPIPE
398 * ENOTCONN
399 * EISCONN
400 * unp_internalize:EINVAL
401 * unp_internalize:EBADF
402 * unp_connect:EAFNOSUPPORT Address family not supported
403 * unp_connect:EINVAL Invalid argument
404 * unp_connect:ENOTSOCK Not a socket
405 * unp_connect:ECONNREFUSED Connection refused
406 * unp_connect:EISCONN Socket is connected
407 * unp_connect:EPROTOTYPE Protocol wrong type for socket
408 * unp_connect:???
409 * sbappendaddr:ENOBUFS [5th argument, contents modified]
410 * sbappendaddr:??? [whatever a filter author chooses]
411 */
412 static int
413 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
414 struct mbuf *control, proc_t p)
415 {
416 int error = 0;
417 struct unpcb *unp = sotounpcb(so);
418 struct socket *so2;
419
420 if (unp == 0) {
421 error = EINVAL;
422 goto release;
423 }
424 if (flags & PRUS_OOB) {
425 error = EOPNOTSUPP;
426 goto release;
427 }
428
429 if (control) {
430 /* release lock to avoid deadlock (4436174) */
431 socket_unlock(so, 0);
432 error = unp_internalize(control, p);
433 socket_lock(so, 0);
434 if (error)
435 goto release;
436 }
437
438 switch (so->so_type) {
439 case SOCK_DGRAM:
440 {
441 struct sockaddr *from;
442
443 if (nam) {
444 if (unp->unp_conn) {
445 error = EISCONN;
446 break;
447 }
448 error = unp_connect(so, nam, p);
449 if (error)
450 break;
451 } else {
452 if (unp->unp_conn == 0) {
453 error = ENOTCONN;
454 break;
455 }
456 }
457
458 so2 = unp->unp_conn->unp_socket;
459 if (so != so2)
460 unp_get_locks_in_order(so, so2);
461
462 if (unp->unp_addr)
463 from = (struct sockaddr *)unp->unp_addr;
464 else
465 from = &sun_noname;
466 /*
467 * sbappendaddr() will fail when the receiver runs out of
468 * space; in contrast to SOCK_STREAM, we will lose messages
469 * for the SOCK_DGRAM case when the receiver's queue overflows.
470 * SB_UNIX on the socket buffer implies that the callee will
471 * not free the control message, if any, because we would need
472 * to call unp_dispose() on it.
473 */
474 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
475 control = NULL;
476 sorwakeup(so2);
477 } else if (control != NULL && error == 0) {
478 /* A socket filter took control; don't touch it */
479 control = NULL;
480 }
481
482 if (so != so2)
483 socket_unlock(so2, 1);
484
485 m = NULL;
486 if (nam)
487 unp_disconnect(unp);
488 break;
489 }
490
491 case SOCK_STREAM: {
492 int didreceive = 0;
493 #define rcv (&so2->so_rcv)
494 #define snd (&so->so_snd)
495 /* Connect if not connected yet. */
496 /*
497 * Note: A better implementation would complain
498 * if not equal to the peer's address.
499 */
500 if ((so->so_state & SS_ISCONNECTED) == 0) {
501 if (nam) {
502 error = unp_connect(so, nam, p);
503 if (error)
504 break; /* XXX */
505 } else {
506 error = ENOTCONN;
507 break;
508 }
509 }
510
511 if (so->so_state & SS_CANTSENDMORE) {
512 error = EPIPE;
513 break;
514 }
515 if (unp->unp_conn == 0)
516 panic("uipc_send connected but no connection?");
517
518 so2 = unp->unp_conn->unp_socket;
519 unp_get_locks_in_order(so, so2);
520
521 /* Check socket state again as we might have unlocked the socket
522 * while trying to get the locks in order
523 */
524
525 if ((so->so_state & SS_CANTSENDMORE)) {
526 error = EPIPE;
527 socket_unlock(so2, 1);
528 break;
529 }
530
531 if (unp->unp_flags & UNP_TRACE_MDNS) {
532 struct mdns_ipc_msg_hdr hdr;
533
534 if (mbuf_copydata(m, 0, sizeof (hdr), &hdr) == 0 &&
535 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
536 printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
537 __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
538 }
539 }
540
541 /*
542 * Send to paired receive port, and then reduce send buffer
543 * hiwater marks to maintain backpressure. Wake up readers.
544 * SB_UNIX flag will allow new record to be appended to the
545 * receiver's queue even when it is already full. It is
546 * possible, however, that append might fail. In that case,
547 * we will need to call unp_dispose() on the control message;
548 * the callee will not free it since SB_UNIX is set.
549 */
550 didreceive = control ?
551 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
552
553 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
554 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
555 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
556 unp->unp_conn->unp_cc = rcv->sb_cc;
557 if (didreceive) {
558 control = NULL;
559 sorwakeup(so2);
560 } else if (control != NULL && error == 0) {
561 /* A socket filter took control; don't touch it */
562 control = NULL;
563 }
564
565 socket_unlock(so2, 1);
566 m = NULL;
567 #undef snd
568 #undef rcv
569 }
570 break;
571
572 default:
573 panic("uipc_send unknown socktype");
574 }
575
576 /*
577 * SEND_EOF is equivalent to a SEND followed by
578 * a SHUTDOWN.
579 */
580 if (flags & PRUS_EOF) {
581 socantsendmore(so);
582 unp_shutdown(unp);
583 }
584
585 if (control && error != 0) {
586 socket_unlock(so, 0);
587 unp_dispose(control);
588 socket_lock(so, 0);
589 }
590
591 release:
592 if (control)
593 m_freem(control);
594 if (m)
595 m_freem(m);
596 return (error);
597 }
598
599 static int
600 uipc_sense(struct socket *so, void *ub, int isstat64)
601 {
602 struct unpcb *unp = sotounpcb(so);
603 struct socket *so2;
604 blksize_t blksize;
605
606 if (unp == 0)
607 return (EINVAL);
608
609 blksize = so->so_snd.sb_hiwat;
610 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
611 so2 = unp->unp_conn->unp_socket;
612 blksize += so2->so_rcv.sb_cc;
613 }
614 if (unp->unp_ino == 0)
615 unp->unp_ino = unp_ino++;
616
617 if (isstat64 != 0) {
618 struct stat64 *sb64;
619
620 sb64 = (struct stat64 *)ub;
621 sb64->st_blksize = blksize;
622 sb64->st_dev = NODEV;
623 sb64->st_ino = (ino64_t)unp->unp_ino;
624 } else {
625 struct stat *sb;
626
627 sb = (struct stat *)ub;
628 sb->st_blksize = blksize;
629 sb->st_dev = NODEV;
630 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
631 }
632
633 return (0);
634 }
635
636 /*
637 * Returns: 0 Success
638 * EINVAL
639 *
640 * Notes: This is not strictly correct, as unp_shutdown() also calls
641 * socantrcvmore(). These should maybe both be conditionalized
642 * on the 'how' argument in soshutdown() as called from the
643 * shutdown() system call.
644 */
645 static int
646 uipc_shutdown(struct socket *so)
647 {
648 struct unpcb *unp = sotounpcb(so);
649
650 if (unp == 0)
651 return (EINVAL);
652 socantsendmore(so);
653 unp_shutdown(unp);
654 return (0);
655 }
656
657 /*
658 * Returns: 0 Success
659 * EINVAL Invalid argument
660 */
661 static int
662 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
663 {
664 struct unpcb *unp = sotounpcb(so);
665
666 if (unp == NULL)
667 return (EINVAL);
668 if (unp->unp_addr != NULL) {
669 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
670 } else {
671 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
672 }
673 return (0);
674 }
675
676 struct pr_usrreqs uipc_usrreqs = {
677 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
678 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
679 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
680 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
681 sosend, soreceive, pru_sopoll_notsupp
682 };
683
684 int
685 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
686 {
687 struct unpcb *unp = sotounpcb(so);
688 int error;
689
690 switch (sopt->sopt_dir) {
691 case SOPT_GET:
692 switch (sopt->sopt_name) {
693 case LOCAL_PEERCRED:
694 if (unp->unp_flags & UNP_HAVEPC) {
695 error = sooptcopyout(sopt, &unp->unp_peercred,
696 sizeof (unp->unp_peercred));
697 } else {
698 if (so->so_type == SOCK_STREAM)
699 error = ENOTCONN;
700 else
701 error = EINVAL;
702 }
703 break;
704 case LOCAL_PEERPID:
705 if (unp->unp_conn != NULL) {
706 if (unp->unp_conn->unp_socket != NULL) {
707 pid_t peerpid = unp->unp_conn->unp_socket->last_pid;
708 error = sooptcopyout(sopt, &peerpid, sizeof (peerpid));
709 } else {
710 panic("peer is connected but has no socket?");
711 }
712 } else {
713 error = ENOTCONN;
714 }
715 break;
716 default:
717 error = EOPNOTSUPP;
718 break;
719 }
720 break;
721 case SOPT_SET:
722 default:
723 error = EOPNOTSUPP;
724 break;
725 }
726 return (error);
727 }
728
729 /*
730 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
731 * for stream sockets, although the total for sender and receiver is
732 * actually only PIPSIZ.
733 * Datagram sockets really use the sendspace as the maximum datagram size,
734 * and don't really want to reserve the sendspace. Their recvspace should
735 * be large enough for at least one max-size datagram plus address.
736 */
737 #ifndef PIPSIZ
738 #define PIPSIZ 8192
739 #endif
740 static u_int32_t unpst_sendspace = PIPSIZ;
741 static u_int32_t unpst_recvspace = PIPSIZ;
742 static u_int32_t unpdg_sendspace = 2*1024; /* really max datagram size */
743 static u_int32_t unpdg_recvspace = 4*1024;
744
745 static int unp_rights; /* file descriptors in flight */
746 static int unp_disposed; /* discarded file descriptors */
747
748 SYSCTL_DECL(_net_local_stream);
749 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
750 &unpst_sendspace, 0, "");
751 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
752 &unpst_recvspace, 0, "");
753 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
754 &unpst_tracemdns, 0, "");
755 SYSCTL_DECL(_net_local_dgram);
756 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
757 &unpdg_sendspace, 0, "");
758 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
759 &unpdg_recvspace, 0, "");
760 SYSCTL_DECL(_net_local);
761 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
762
763 /*
764 * Returns: 0 Success
765 * ENOBUFS
766 * soreserve:ENOBUFS
767 */
768 static int
769 unp_attach(struct socket *so)
770 {
771 struct unpcb *unp;
772 int error = 0;
773
774 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
775 switch (so->so_type) {
776
777 case SOCK_STREAM:
778 error = soreserve(so, unpst_sendspace, unpst_recvspace);
779 break;
780
781 case SOCK_DGRAM:
782 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
783 break;
784
785 default:
786 panic("unp_attach");
787 }
788 if (error)
789 return (error);
790 }
791 unp = (struct unpcb *)zalloc(unp_zone);
792 if (unp == NULL)
793 return (ENOBUFS);
794 bzero(unp, sizeof (*unp));
795
796 lck_mtx_init(&unp->unp_mtx,
797 unp_mtx_grp, unp_mtx_attr);
798
799 lck_rw_lock_exclusive(unp_list_mtx);
800 LIST_INIT(&unp->unp_refs);
801 unp->unp_socket = so;
802 unp->unp_gencnt = ++unp_gencnt;
803 unp_count++;
804 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
805 &unp_dhead : &unp_shead, unp, unp_link);
806 lck_rw_done(unp_list_mtx);
807 so->so_pcb = (caddr_t)unp;
808 /*
809 * Mark AF_UNIX socket buffers accordingly so that:
810 *
811 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
812 * the lack of space; this essentially loosens the sbspace() check,
813 * since there is disconnect between sosend() and uipc_send() with
814 * respect to flow control that might result in our dropping the
815 * data in uipc_send(). By setting this, we allow for slightly
816 * more records to be appended to the receiving socket to avoid
817 * losing data (which we can't afford in the SOCK_STREAM case).
818 * Flow control still takes place since we adjust the sender's
819 * hiwat during each send. This doesn't affect the SOCK_DGRAM
820 * case and append would still fail when the queue overflows.
821 *
822 * b. In the presence of control messages containing internalized
823 * file descriptors, the append routines will not free them since
824 * we'd need to undo the work first via unp_dispose().
825 */
826 so->so_rcv.sb_flags |= SB_UNIX;
827 so->so_snd.sb_flags |= SB_UNIX;
828 return (0);
829 }
830
831 static void
832 unp_detach(struct unpcb *unp)
833 {
834 int so_locked = 1;
835
836 lck_rw_lock_exclusive(unp_list_mtx);
837 LIST_REMOVE(unp, unp_link);
838 --unp_count;
839 ++unp_gencnt;
840 lck_rw_done(unp_list_mtx);
841 if (unp->unp_vnode) {
842 struct vnode *tvp = NULL;
843 socket_unlock(unp->unp_socket, 0);
844
845 /* Holding unp_connect_lock will avoid a race between
846 * a thread closing the listening socket and a thread
847 * connecting to it.
848 */
849 lck_mtx_lock(unp_connect_lock);
850 socket_lock(unp->unp_socket, 0);
851 if (unp->unp_vnode) {
852 tvp = unp->unp_vnode;
853 unp->unp_vnode->v_socket = NULL;
854 unp->unp_vnode = NULL;
855 }
856 lck_mtx_unlock(unp_connect_lock);
857 if (tvp != NULL)
858 vnode_rele(tvp); /* drop the usecount */
859 }
860 if (unp->unp_conn)
861 unp_disconnect(unp);
862 while (unp->unp_refs.lh_first) {
863 struct unpcb *unp2 = NULL;
864
865 /* This datagram socket is connected to one or more
866 * sockets. In order to avoid a race condition between removing
867 * this reference and closing the connected socket, we need
868 * to check disconnect_in_progress
869 */
870 if (so_locked == 1) {
871 socket_unlock(unp->unp_socket, 0);
872 so_locked = 0;
873 }
874 lck_mtx_lock(unp_disconnect_lock);
875 while (disconnect_in_progress != 0) {
876 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
877 PSOCK, "disconnect", NULL);
878 }
879 disconnect_in_progress = 1;
880 lck_mtx_unlock(unp_disconnect_lock);
881
882 /* Now we are sure that any unpcb socket disconnect is not happening */
883 if (unp->unp_refs.lh_first != NULL) {
884 unp2 = unp->unp_refs.lh_first;
885 socket_lock(unp2->unp_socket, 1);
886 }
887
888 lck_mtx_lock(unp_disconnect_lock);
889 disconnect_in_progress = 0;
890 wakeup(&disconnect_in_progress);
891 lck_mtx_unlock(unp_disconnect_lock);
892
893 if (unp2 != NULL) {
894 /* We already locked this socket and have a reference on it */
895 unp_drop(unp2, ECONNRESET);
896 socket_unlock(unp2->unp_socket, 1);
897 }
898 }
899
900 if (so_locked == 0) {
901 socket_lock(unp->unp_socket, 0);
902 so_locked = 1;
903 }
904 soisdisconnected(unp->unp_socket);
905 /* makes sure we're getting dealloced */
906 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
907 }
908
909 /*
910 * Returns: 0 Success
911 * EAFNOSUPPORT
912 * EINVAL
913 * EADDRINUSE
914 * namei:??? [anything namei can return]
915 * vnode_authorize:??? [anything vnode_authorize can return]
916 *
917 * Notes: p at this point is the current process, as this function is
918 * only called by sobind().
919 */
920 static int
921 unp_bind(
922 struct unpcb *unp,
923 struct sockaddr *nam,
924 proc_t p)
925 {
926 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
927 struct vnode *vp, *dvp;
928 struct vnode_attr va;
929 vfs_context_t ctx = vfs_context_current();
930 int error, namelen;
931 struct nameidata nd;
932 struct socket *so = unp->unp_socket;
933 char buf[SOCK_MAXADDRLEN];
934
935 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
936 return (EAFNOSUPPORT);
937 }
938
939 if (unp->unp_vnode != NULL)
940 return (EINVAL);
941 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
942 if (namelen <= 0)
943 return (EINVAL);
944
945 socket_unlock(so, 0);
946
947 strlcpy(buf, soun->sun_path, namelen+1);
948 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
949 CAST_USER_ADDR_T(buf), ctx);
950 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
951 error = namei(&nd);
952 if (error) {
953 socket_lock(so, 0);
954 return (error);
955 }
956 dvp = nd.ni_dvp;
957 vp = nd.ni_vp;
958
959 if (vp != NULL) {
960 /*
961 * need to do this before the vnode_put of dvp
962 * since we may have to release an fs_nodelock
963 */
964 nameidone(&nd);
965
966 vnode_put(dvp);
967 vnode_put(vp);
968
969 socket_lock(so, 0);
970 return (EADDRINUSE);
971 }
972
973 VATTR_INIT(&va);
974 VATTR_SET(&va, va_type, VSOCK);
975 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
976
977 #if CONFIG_MACF
978 error = mac_vnode_check_create(ctx,
979 nd.ni_dvp, &nd.ni_cnd, &va);
980
981 if (error == 0)
982 #endif /* CONFIG_MACF */
983 #if CONFIG_MACF_SOCKET_SUBSET
984 error = mac_vnode_check_uipc_bind(ctx,
985 nd.ni_dvp, &nd.ni_cnd, &va);
986
987 if (error == 0)
988 #endif /* MAC_SOCKET_SUBSET */
989 /* authorize before creating */
990 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
991
992 if (!error) {
993 /* create the socket */
994 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
995 }
996
997 nameidone(&nd);
998 vnode_put(dvp);
999
1000 if (error) {
1001 socket_lock(so, 0);
1002 return (error);
1003 }
1004 vnode_ref(vp); /* gain a longterm reference */
1005 socket_lock(so, 0);
1006 vp->v_socket = unp->unp_socket;
1007 unp->unp_vnode = vp;
1008 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1009 vnode_put(vp); /* drop the iocount */
1010
1011 return (0);
1012 }
1013
1014
1015 /*
1016 * Returns: 0 Success
1017 * EAFNOSUPPORT Address family not supported
1018 * EINVAL Invalid argument
1019 * ENOTSOCK Not a socket
1020 * ECONNREFUSED Connection refused
1021 * EPROTOTYPE Protocol wrong type for socket
1022 * EISCONN Socket is connected
1023 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1024 * unp_connect2:EINVAL Invalid argument
1025 * namei:??? [anything namei can return]
1026 * vnode_authorize:???? [anything vnode_authorize can return]
1027 *
1028 * Notes: p at this point is the current process, as this function is
1029 * only called by sosend(), sendfile(), and soconnectlock().
1030 */
1031 static int
1032 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1033 {
1034 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1035 struct vnode *vp;
1036 struct socket *so2, *so3, *list_so=NULL;
1037 struct unpcb *unp, *unp2, *unp3;
1038 vfs_context_t ctx = vfs_context_current();
1039 int error, len;
1040 struct nameidata nd;
1041 char buf[SOCK_MAXADDRLEN];
1042
1043 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1044 return (EAFNOSUPPORT);
1045 }
1046
1047 unp = sotounpcb(so);
1048 so2 = so3 = NULL;
1049
1050 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1051 if (len <= 0)
1052 return (EINVAL);
1053
1054 strlcpy(buf, soun->sun_path, len+1);
1055 socket_unlock(so, 0);
1056
1057 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1058 CAST_USER_ADDR_T(buf), ctx);
1059 error = namei(&nd);
1060 if (error) {
1061 socket_lock(so, 0);
1062 return (error);
1063 }
1064 nameidone(&nd);
1065 vp = nd.ni_vp;
1066 if (vp->v_type != VSOCK) {
1067 error = ENOTSOCK;
1068 socket_lock(so, 0);
1069 goto out;
1070 }
1071
1072 #if CONFIG_MACF_SOCKET_SUBSET
1073 error = mac_vnode_check_uipc_connect(ctx, vp);
1074 if (error) {
1075 socket_lock(so, 0);
1076 goto out;
1077 }
1078 #endif /* MAC_SOCKET_SUBSET */
1079
1080 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1081 if (error) {
1082 socket_lock(so, 0);
1083 goto out;
1084 }
1085
1086 lck_mtx_lock(unp_connect_lock);
1087
1088 if (vp->v_socket == 0) {
1089 lck_mtx_unlock(unp_connect_lock);
1090 error = ECONNREFUSED;
1091 socket_lock(so, 0);
1092 goto out;
1093 }
1094
1095 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1096 so2 = vp->v_socket;
1097 lck_mtx_unlock(unp_connect_lock);
1098
1099
1100 if (so2->so_pcb == NULL) {
1101 error = ECONNREFUSED;
1102 if (so != so2) {
1103 socket_unlock(so2, 1);
1104 socket_lock(so, 0);
1105 } else {
1106 /* Release the reference held for the listen socket */
1107 so2->so_usecount--;
1108 }
1109 goto out;
1110 }
1111
1112 if (so < so2) {
1113 socket_unlock(so2, 0);
1114 socket_lock(so, 0);
1115 socket_lock(so2, 0);
1116 } else if (so > so2) {
1117 socket_lock(so, 0);
1118 }
1119 /*
1120 * Check if socket was connected while we were trying to
1121 * get the socket locks in order.
1122 * XXX - probably shouldn't return an error for SOCK_DGRAM
1123 */
1124 if ((so->so_state & SS_ISCONNECTED) != 0) {
1125 error = EISCONN;
1126 goto decref_out;
1127 }
1128
1129 if (so->so_type != so2->so_type) {
1130 error = EPROTOTYPE;
1131 goto decref_out;
1132 }
1133
1134 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1135 /* Release the incoming socket but keep a reference */
1136 socket_unlock(so, 0);
1137
1138 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1139 (so3 = sonewconn(so2, 0, nam)) == 0) {
1140 error = ECONNREFUSED;
1141 if (so != so2) {
1142 socket_unlock(so2, 1);
1143 socket_lock(so, 0);
1144 } else {
1145 socket_lock(so, 0);
1146 /* Release the reference held for
1147 * listen socket.
1148 */
1149 so2->so_usecount--;
1150 }
1151 goto out;
1152 }
1153 unp2 = sotounpcb(so2);
1154 unp3 = sotounpcb(so3);
1155 if (unp2->unp_addr)
1156 unp3->unp_addr = (struct sockaddr_un *)
1157 dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1158
1159 /*
1160 * unp_peercred management:
1161 *
1162 * The connecter's (client's) credentials are copied
1163 * from its process structure at the time of connect()
1164 * (which is now).
1165 */
1166 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1167 unp3->unp_flags |= UNP_HAVEPC;
1168 /*
1169 * The receiver's (server's) credentials are copied
1170 * from the unp_peercred member of socket on which the
1171 * former called listen(); unp_listen() cached that
1172 * process's credentials at that time so we can use
1173 * them now.
1174 */
1175 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1176 ("unp_connect: listener without cached peercred"));
1177
1178 /* Here we need to have both so and so2 locks and so2
1179 * is already locked. Lock ordering is required.
1180 */
1181 if (so < so2) {
1182 socket_unlock(so2, 0);
1183 socket_lock(so, 0);
1184 socket_lock(so2, 0);
1185 } else {
1186 socket_lock(so, 0);
1187 }
1188
1189 /* Check again if the socket state changed when its lock was released */
1190 if ((so->so_state & SS_ISCONNECTED) != 0) {
1191 error = EISCONN;
1192 socket_unlock(so2, 1);
1193 socket_lock(so3, 0);
1194 sofreelastref(so3, 1);
1195 goto out;
1196 }
1197 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1198 sizeof (unp->unp_peercred));
1199 unp->unp_flags |= UNP_HAVEPC;
1200
1201 #if CONFIG_MACF_SOCKET
1202 /* XXXMAC: recursive lock: SOCK_LOCK(so); */
1203 mac_socketpeer_label_associate_socket(so, so3);
1204 mac_socketpeer_label_associate_socket(so3, so);
1205 /* XXXMAC: SOCK_UNLOCK(so); */
1206 #endif /* MAC_SOCKET */
1207
1208 /* Hold the reference on listening socket until the end */
1209 socket_unlock(so2, 0);
1210 list_so = so2;
1211
1212 /* Lock ordering doesn't matter because so3 was just created */
1213 socket_lock(so3, 1);
1214 so2 = so3;
1215
1216 /*
1217 * Enable tracing for mDNSResponder endpoints. (The use
1218 * of sizeof instead of strlen below takes the null
1219 * terminating character into account.)
1220 */
1221 if (unpst_tracemdns &&
1222 !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1223 sizeof (MDNSRESPONDER_PATH))) {
1224 unp->unp_flags |= UNP_TRACE_MDNS;
1225 unp2->unp_flags |= UNP_TRACE_MDNS;
1226 }
1227 }
1228
1229 error = unp_connect2(so, so2);
1230
1231 decref_out:
1232 if (so2 != NULL) {
1233 if (so != so2) {
1234 socket_unlock(so2, 1);
1235 } else {
1236 /* Release the extra reference held for the listen socket.
1237 * This is possible only for SOCK_DGRAM sockets. We refuse
1238 * connecting to the same socket for SOCK_STREAM sockets.
1239 */
1240 so2->so_usecount--;
1241 }
1242 }
1243
1244 if (list_so != NULL) {
1245 socket_lock(list_so, 0);
1246 socket_unlock(list_so, 1);
1247 }
1248
1249 out:
1250 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1251 vnode_put(vp);
1252 return (error);
1253 }
1254
1255 /*
1256 * Returns: 0 Success
1257 * EPROTOTYPE Protocol wrong type for socket
1258 * EINVAL Invalid argument
1259 */
1260 int
1261 unp_connect2(struct socket *so, struct socket *so2)
1262 {
1263 struct unpcb *unp = sotounpcb(so);
1264 struct unpcb *unp2;
1265
1266 if (so2->so_type != so->so_type)
1267 return (EPROTOTYPE);
1268
1269 unp2 = sotounpcb(so2);
1270
1271 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1272 lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1273
1274 /* Verify both sockets are still opened */
1275 if (unp == 0 || unp2 == 0)
1276 return (EINVAL);
1277
1278 unp->unp_conn = unp2;
1279 so2->so_usecount++;
1280
1281 switch (so->so_type) {
1282
1283 case SOCK_DGRAM:
1284 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1285
1286 if (so != so2) {
1287 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1288 /* Keep an extra reference on so2 that will be dropped
1289 * soon after getting the locks in order
1290 */
1291 socket_unlock(so2, 0);
1292 soisconnected(so);
1293 unp_get_locks_in_order(so, so2);
1294 so2->so_usecount--;
1295 } else {
1296 soisconnected(so);
1297 }
1298
1299 break;
1300
1301 case SOCK_STREAM:
1302 /* This takes care of socketpair */
1303 if (!(unp->unp_flags & UNP_HAVEPC) &&
1304 !(unp2->unp_flags & UNP_HAVEPC)) {
1305 cru2x(kauth_cred_get(), &unp->unp_peercred);
1306 unp->unp_flags |= UNP_HAVEPC;
1307
1308 cru2x(kauth_cred_get(), &unp2->unp_peercred);
1309 unp2->unp_flags |= UNP_HAVEPC;
1310 }
1311 unp2->unp_conn = unp;
1312 so->so_usecount++;
1313
1314 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1315 socket_unlock(so, 0);
1316 soisconnected(so2);
1317
1318 /* Keep an extra reference on so2, that will be dropped soon after
1319 * getting the locks in order again.
1320 */
1321 socket_unlock(so2, 0);
1322
1323 socket_lock(so, 0);
1324 soisconnected(so);
1325
1326 unp_get_locks_in_order(so, so2);
1327 /* Decrement the extra reference left before */
1328 so2->so_usecount--;
1329 break;
1330
1331 default:
1332 panic("unknown socket type %d in unp_connect2", so->so_type);
1333 }
1334 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1335 lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1336 return (0);
1337 }
1338
1339 static void
1340 unp_disconnect(struct unpcb *unp)
1341 {
1342 struct unpcb *unp2 = NULL;
1343 struct socket *so2 = NULL, *so;
1344 struct socket *waitso;
1345 int so_locked = 1, strdisconn = 0;
1346
1347 so = unp->unp_socket;
1348 if (unp->unp_conn == NULL) {
1349 return;
1350 }
1351 lck_mtx_lock(unp_disconnect_lock);
1352 while (disconnect_in_progress != 0) {
1353 if (so_locked == 1) {
1354 socket_unlock(so, 0);
1355 so_locked = 0;
1356 }
1357 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
1358 PSOCK, "disconnect", NULL);
1359 }
1360 disconnect_in_progress = 1;
1361 lck_mtx_unlock(unp_disconnect_lock);
1362
1363 if (so_locked == 0) {
1364 socket_lock(so, 0);
1365 so_locked = 1;
1366 }
1367
1368 unp2 = unp->unp_conn;
1369
1370 if (unp2 == 0 || unp2->unp_socket == NULL) {
1371 goto out;
1372 }
1373 so2 = unp2->unp_socket;
1374
1375 try_again:
1376 if (so == so2) {
1377 if (so_locked == 0) {
1378 socket_lock(so, 0);
1379 }
1380 waitso = so;
1381 } else if (so < so2) {
1382 if (so_locked == 0) {
1383 socket_lock(so, 0);
1384 }
1385 socket_lock(so2, 1);
1386 waitso = so2;
1387 } else {
1388 if (so_locked == 1) {
1389 socket_unlock(so, 0);
1390 }
1391 socket_lock(so2, 1);
1392 socket_lock(so, 0);
1393 waitso = so;
1394 }
1395 so_locked = 1;
1396
1397 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1398 lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1399
1400 /* Check for the UNP_DONTDISCONNECT flag, if it
1401 * is set, release both sockets and go to sleep
1402 */
1403
1404 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1405 if (so != so2) {
1406 socket_unlock(so2, 1);
1407 }
1408 so_locked = 0;
1409
1410 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1411 PSOCK | PDROP, "unpdisconnect", NULL);
1412 goto try_again;
1413 }
1414
1415 if (unp->unp_conn == NULL) {
1416 panic("unp_conn became NULL after sleep");
1417 }
1418
1419 unp->unp_conn = NULL;
1420 so2->so_usecount--;
1421
1422 if (unp->unp_flags & UNP_TRACE_MDNS)
1423 unp->unp_flags &= ~UNP_TRACE_MDNS;
1424
1425 switch (unp->unp_socket->so_type) {
1426
1427 case SOCK_DGRAM:
1428 LIST_REMOVE(unp, unp_reflink);
1429 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1430 if (so != so2)
1431 socket_unlock(so2, 1);
1432 break;
1433
1434 case SOCK_STREAM:
1435 unp2->unp_conn = NULL;
1436 so->so_usecount--;
1437
1438 /* Set the socket state correctly but do a wakeup later when
1439 * we release all locks except the socket lock, this will avoid
1440 * a deadlock.
1441 */
1442 unp->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1443 unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1444
1445 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1446 unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1447
1448 if (unp2->unp_flags & UNP_TRACE_MDNS)
1449 unp2->unp_flags &= ~UNP_TRACE_MDNS;
1450
1451 strdisconn = 1;
1452 break;
1453 default:
1454 panic("unknown socket type %d", so->so_type);
1455 }
1456 out:
1457 lck_mtx_lock(unp_disconnect_lock);
1458 disconnect_in_progress = 0;
1459 wakeup(&disconnect_in_progress);
1460 lck_mtx_unlock(unp_disconnect_lock);
1461
1462 if (strdisconn) {
1463 socket_unlock(so, 0);
1464 soisdisconnected(so2);
1465 socket_unlock(so2, 1);
1466
1467 socket_lock(so,0);
1468 soisdisconnected(so);
1469 }
1470 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1471 return;
1472 }
1473
1474 /*
1475 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1476 * The unpcb_compat data structure is passed to user space and must not change.
1477 */
1478 static void
1479 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1480 {
1481 #if defined(__LP64__)
1482 cp->unp_link.le_next = (u_int32_t)
1483 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1484 cp->unp_link.le_prev = (u_int32_t)
1485 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1486 #else
1487 cp->unp_link.le_next = (struct unpcb_compat *)
1488 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1489 cp->unp_link.le_prev = (struct unpcb_compat **)
1490 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1491 #endif
1492 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1493 VM_KERNEL_ADDRPERM(up->unp_socket);
1494 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1495 VM_KERNEL_ADDRPERM(up->unp_vnode);
1496 cp->unp_ino = up->unp_ino;
1497 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1498 VM_KERNEL_ADDRPERM(up->unp_conn);
1499 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1500 #if defined(__LP64__)
1501 cp->unp_reflink.le_next =
1502 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1503 cp->unp_reflink.le_prev =
1504 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1505 #else
1506 cp->unp_reflink.le_next =
1507 (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1508 cp->unp_reflink.le_prev =
1509 (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1510 #endif
1511 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1512 VM_KERNEL_ADDRPERM(up->unp_addr);
1513 cp->unp_cc = up->unp_cc;
1514 cp->unp_mbcnt = up->unp_mbcnt;
1515 cp->unp_gencnt = up->unp_gencnt;
1516 }
1517
1518 static int
1519 unp_pcblist SYSCTL_HANDLER_ARGS
1520 {
1521 #pragma unused(oidp,arg2)
1522 int error, i, n;
1523 struct unpcb *unp, **unp_list;
1524 unp_gen_t gencnt;
1525 struct xunpgen xug;
1526 struct unp_head *head;
1527
1528 lck_rw_lock_shared(unp_list_mtx);
1529 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1530
1531 /*
1532 * The process of preparing the PCB list is too time-consuming and
1533 * resource-intensive to repeat twice on every request.
1534 */
1535 if (req->oldptr == USER_ADDR_NULL) {
1536 n = unp_count;
1537 req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1538 sizeof (struct xunpcb);
1539 lck_rw_done(unp_list_mtx);
1540 return (0);
1541 }
1542
1543 if (req->newptr != USER_ADDR_NULL) {
1544 lck_rw_done(unp_list_mtx);
1545 return (EPERM);
1546 }
1547
1548 /*
1549 * OK, now we're committed to doing something.
1550 */
1551 gencnt = unp_gencnt;
1552 n = unp_count;
1553
1554 bzero(&xug, sizeof (xug));
1555 xug.xug_len = sizeof (xug);
1556 xug.xug_count = n;
1557 xug.xug_gen = gencnt;
1558 xug.xug_sogen = so_gencnt;
1559 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1560 if (error) {
1561 lck_rw_done(unp_list_mtx);
1562 return (error);
1563 }
1564
1565 /*
1566 * We are done if there is no pcb
1567 */
1568 if (n == 0) {
1569 lck_rw_done(unp_list_mtx);
1570 return (0);
1571 }
1572
1573 MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1574 M_TEMP, M_WAITOK);
1575 if (unp_list == 0) {
1576 lck_rw_done(unp_list_mtx);
1577 return (ENOMEM);
1578 }
1579
1580 for (unp = head->lh_first, i = 0; unp && i < n;
1581 unp = unp->unp_link.le_next) {
1582 if (unp->unp_gencnt <= gencnt)
1583 unp_list[i++] = unp;
1584 }
1585 n = i; /* in case we lost some during malloc */
1586
1587 error = 0;
1588 for (i = 0; i < n; i++) {
1589 unp = unp_list[i];
1590 if (unp->unp_gencnt <= gencnt) {
1591 struct xunpcb xu;
1592
1593 bzero(&xu, sizeof (xu));
1594 xu.xu_len = sizeof (xu);
1595 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1596 VM_KERNEL_ADDRPERM(unp);
1597 /*
1598 * XXX - need more locking here to protect against
1599 * connect/disconnect races for SMP.
1600 */
1601 if (unp->unp_addr)
1602 bcopy(unp->unp_addr, &xu.xu_addr,
1603 unp->unp_addr->sun_len);
1604 if (unp->unp_conn && unp->unp_conn->unp_addr)
1605 bcopy(unp->unp_conn->unp_addr,
1606 &xu.xu_caddr,
1607 unp->unp_conn->unp_addr->sun_len);
1608 unpcb_to_compat(unp, &xu.xu_unp);
1609 sotoxsocket(unp->unp_socket, &xu.xu_socket);
1610 error = SYSCTL_OUT(req, &xu, sizeof (xu));
1611 }
1612 }
1613 if (!error) {
1614 /*
1615 * Give the user an updated idea of our state.
1616 * If the generation differs from what we told
1617 * her before, she knows that something happened
1618 * while we were processing this request, and it
1619 * might be necessary to retry.
1620 */
1621 bzero(&xug, sizeof (xug));
1622 xug.xug_len = sizeof (xug);
1623 xug.xug_gen = unp_gencnt;
1624 xug.xug_sogen = so_gencnt;
1625 xug.xug_count = unp_count;
1626 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1627 }
1628 FREE(unp_list, M_TEMP);
1629 lck_rw_done(unp_list_mtx);
1630 return (error);
1631 }
1632
1633 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
1634 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1635 "List of active local datagram sockets");
1636 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
1637 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1638 "List of active local stream sockets");
1639
1640 #if !CONFIG_EMBEDDED
1641
1642 static int
1643 unp_pcblist64 SYSCTL_HANDLER_ARGS
1644 {
1645 #pragma unused(oidp,arg2)
1646 int error, i, n;
1647 struct unpcb *unp, **unp_list;
1648 unp_gen_t gencnt;
1649 struct xunpgen xug;
1650 struct unp_head *head;
1651
1652 lck_rw_lock_shared(unp_list_mtx);
1653 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1654
1655 /*
1656 * The process of preparing the PCB list is too time-consuming and
1657 * resource-intensive to repeat twice on every request.
1658 */
1659 if (req->oldptr == USER_ADDR_NULL) {
1660 n = unp_count;
1661 req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1662 (sizeof (struct xunpcb64));
1663 lck_rw_done(unp_list_mtx);
1664 return (0);
1665 }
1666
1667 if (req->newptr != USER_ADDR_NULL) {
1668 lck_rw_done(unp_list_mtx);
1669 return (EPERM);
1670 }
1671
1672 /*
1673 * OK, now we're committed to doing something.
1674 */
1675 gencnt = unp_gencnt;
1676 n = unp_count;
1677
1678 bzero(&xug, sizeof (xug));
1679 xug.xug_len = sizeof (xug);
1680 xug.xug_count = n;
1681 xug.xug_gen = gencnt;
1682 xug.xug_sogen = so_gencnt;
1683 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1684 if (error) {
1685 lck_rw_done(unp_list_mtx);
1686 return (error);
1687 }
1688
1689 /*
1690 * We are done if there is no pcb
1691 */
1692 if (n == 0) {
1693 lck_rw_done(unp_list_mtx);
1694 return (0);
1695 }
1696
1697 MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1698 M_TEMP, M_WAITOK);
1699 if (unp_list == 0) {
1700 lck_rw_done(unp_list_mtx);
1701 return (ENOMEM);
1702 }
1703
1704 for (unp = head->lh_first, i = 0; unp && i < n;
1705 unp = unp->unp_link.le_next) {
1706 if (unp->unp_gencnt <= gencnt)
1707 unp_list[i++] = unp;
1708 }
1709 n = i; /* in case we lost some during malloc */
1710
1711 error = 0;
1712 for (i = 0; i < n; i++) {
1713 unp = unp_list[i];
1714 if (unp->unp_gencnt <= gencnt) {
1715 struct xunpcb64 xu;
1716 size_t xu_len = sizeof(struct xunpcb64);
1717
1718 bzero(&xu, xu_len);
1719 xu.xu_len = xu_len;
1720 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1721 xu.xunp_link.le_next = (u_int64_t)
1722 VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1723 xu.xunp_link.le_prev = (u_int64_t)
1724 VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1725 xu.xunp_socket = (u_int64_t)
1726 VM_KERNEL_ADDRPERM(unp->unp_socket);
1727 xu.xunp_vnode = (u_int64_t)
1728 VM_KERNEL_ADDRPERM(unp->unp_vnode);
1729 xu.xunp_ino = unp->unp_ino;
1730 xu.xunp_conn = (u_int64_t)
1731 VM_KERNEL_ADDRPERM(unp->unp_conn);
1732 xu.xunp_refs = (u_int64_t)
1733 VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1734 xu.xunp_reflink.le_next = (u_int64_t)
1735 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1736 xu.xunp_reflink.le_prev = (u_int64_t)
1737 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1738 xu.xunp_cc = unp->unp_cc;
1739 xu.xunp_mbcnt = unp->unp_mbcnt;
1740 xu.xunp_gencnt = unp->unp_gencnt;
1741
1742 if (unp->unp_socket)
1743 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1744
1745 /*
1746 * XXX - need more locking here to protect against
1747 * connect/disconnect races for SMP.
1748 */
1749 if (unp->unp_addr)
1750 bcopy(unp->unp_addr, &xu.xunp_addr,
1751 unp->unp_addr->sun_len);
1752 if (unp->unp_conn && unp->unp_conn->unp_addr)
1753 bcopy(unp->unp_conn->unp_addr,
1754 &xu.xunp_caddr,
1755 unp->unp_conn->unp_addr->sun_len);
1756
1757 error = SYSCTL_OUT(req, &xu, xu_len);
1758 }
1759 }
1760 if (!error) {
1761 /*
1762 * Give the user an updated idea of our state.
1763 * If the generation differs from what we told
1764 * her before, she knows that something happened
1765 * while we were processing this request, and it
1766 * might be necessary to retry.
1767 */
1768 bzero(&xug, sizeof (xug));
1769 xug.xug_len = sizeof (xug);
1770 xug.xug_gen = unp_gencnt;
1771 xug.xug_sogen = so_gencnt;
1772 xug.xug_count = unp_count;
1773 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1774 }
1775 FREE(unp_list, M_TEMP);
1776 lck_rw_done(unp_list_mtx);
1777 return (error);
1778 }
1779
1780 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED,
1781 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1782 "List of active local datagram sockets 64 bit");
1783 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED,
1784 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1785 "List of active local stream sockets 64 bit");
1786
1787 #endif /* !CONFIG_EMBEDDED */
1788
1789 static void
1790 unp_shutdown(struct unpcb *unp)
1791 {
1792 struct socket *so = unp->unp_socket;
1793 struct socket *so2;
1794 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1795 so2 = unp->unp_conn->unp_socket;
1796 unp_get_locks_in_order(so, so2);
1797 socantrcvmore(so2);
1798 socket_unlock(so2, 1);
1799 }
1800 }
1801
1802 static void
1803 unp_drop(struct unpcb *unp, int errno)
1804 {
1805 struct socket *so = unp->unp_socket;
1806
1807 so->so_error = errno;
1808 unp_disconnect(unp);
1809 }
1810
1811 /*
1812 * Returns: 0 Success
1813 * EMSGSIZE The new fd's will not fit
1814 * ENOBUFS Cannot alloc struct fileproc
1815 */
1816 int
1817 unp_externalize(struct mbuf *rights)
1818 {
1819 proc_t p = current_proc(); /* XXX */
1820 int i;
1821 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1822 struct fileglob **rp = (struct fileglob **)(cm + 1);
1823 int *fds = (int *)(cm + 1);
1824 struct fileproc *fp;
1825 struct fileglob *fg;
1826 int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1827 int f;
1828
1829 proc_fdlock(p);
1830
1831 /*
1832 * if the new FD's will not fit, then we free them all
1833 */
1834 if (!fdavail(p, newfds)) {
1835 for (i = 0; i < newfds; i++) {
1836 fg = *rp;
1837 unp_discard_fdlocked(fg, p);
1838 *rp++ = NULL;
1839 }
1840 proc_fdunlock(p);
1841
1842 return (EMSGSIZE);
1843 }
1844 /*
1845 * now change each pointer to an fd in the global table to
1846 * an integer that is the index to the local fd table entry
1847 * that we set up to point to the global one we are transferring.
1848 * XXX (1) this assumes a pointer and int are the same size,
1849 * XXX or the mbuf can hold the expansion
1850 * XXX (2) allocation failures should be non-fatal
1851 */
1852 for (i = 0; i < newfds; i++) {
1853 #if CONFIG_MACF_SOCKET
1854 /*
1855 * If receive access is denied, don't pass along
1856 * and error message, just discard the descriptor.
1857 */
1858 if (mac_file_check_receive(kauth_cred_get(), *rp)) {
1859 fg = *rp;
1860 *rp++ = 0;
1861 unp_discard_fdlocked(fg, p);
1862 continue;
1863 }
1864 #endif
1865 if (fdalloc(p, 0, &f))
1866 panic("unp_externalize:fdalloc");
1867 fg = rp[i];
1868 MALLOC_ZONE(fp, struct fileproc *, sizeof (struct fileproc),
1869 M_FILEPROC, M_WAITOK);
1870 if (fp == NULL)
1871 panic("unp_externalize: MALLOC_ZONE");
1872 bzero(fp, sizeof (struct fileproc));
1873 fp->f_iocount = 0;
1874 fp->f_fglob = fg;
1875 fg_removeuipc(fg);
1876 procfdtbl_releasefd(p, f, fp);
1877 (void) OSAddAtomic(-1, &unp_rights);
1878 fds[i] = f;
1879 }
1880 proc_fdunlock(p);
1881
1882 return (0);
1883 }
1884
1885 void
1886 unp_init(void)
1887 {
1888 unp_zone = zinit(sizeof (struct unpcb),
1889 (nmbclusters * sizeof (struct unpcb)), 4096, "unpzone");
1890
1891 if (unp_zone == 0)
1892 panic("unp_init");
1893 LIST_INIT(&unp_dhead);
1894 LIST_INIT(&unp_shead);
1895
1896 /*
1897 * allocate lock group attribute and group for udp pcb mutexes
1898 */
1899 unp_mtx_grp_attr = lck_grp_attr_alloc_init();
1900
1901 unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
1902
1903 unp_mtx_attr = lck_attr_alloc_init();
1904
1905 if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp,
1906 unp_mtx_attr)) == NULL)
1907 return; /* pretty much dead if this fails... */
1908
1909 if ((unp_disconnect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1910 unp_mtx_attr)) == NULL)
1911 return;
1912
1913 if ((unp_connect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1914 unp_mtx_attr)) == NULL)
1915 return;
1916 }
1917
1918 #ifndef MIN
1919 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
1920 #endif
1921
1922 /*
1923 * Returns: 0 Success
1924 * EINVAL
1925 * fdgetf_noref:EBADF
1926 */
1927 static int
1928 unp_internalize(struct mbuf *control, proc_t p)
1929 {
1930 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1931 int *fds;
1932 struct fileglob **rp;
1933 struct fileproc *fp;
1934 int i, error;
1935 int oldfds;
1936
1937 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
1938 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1939 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
1940 return (EINVAL);
1941 }
1942 oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1943
1944 proc_fdlock(p);
1945 fds = (int *)(cm + 1);
1946
1947 for (i = 0; i < oldfds; i++) {
1948 struct fileproc *tmpfp;
1949 if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) {
1950 proc_fdunlock(p);
1951 return (error);
1952 } else if (!filetype_issendable(tmpfp->f_fglob->fg_type)) {
1953 proc_fdunlock(p);
1954 return (EINVAL);
1955 }
1956 }
1957 rp = (struct fileglob **)(cm + 1);
1958
1959 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
1960 * and doing them in-order would result in stomping over unprocessed fd's
1961 */
1962 for (i = (oldfds - 1); i >= 0; i--) {
1963 (void) fdgetf_noref(p, fds[i], &fp);
1964 fg_insertuipc(fp->f_fglob);
1965 rp[i] = fp->f_fglob;
1966 (void) OSAddAtomic(1, &unp_rights);
1967 }
1968 proc_fdunlock(p);
1969
1970 return (0);
1971 }
1972
1973 static int unp_defer, unp_gcing, unp_gcwait;
1974 static thread_t unp_gcthread = NULL;
1975
1976 /* always called under uipc_lock */
1977 void
1978 unp_gc_wait(void)
1979 {
1980 if (unp_gcthread == current_thread())
1981 return;
1982
1983 while (unp_gcing != 0) {
1984 unp_gcwait = 1;
1985 msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL);
1986 }
1987 }
1988
1989
1990 __private_extern__ void
1991 unp_gc(void)
1992 {
1993 struct fileglob *fg, *nextfg;
1994 struct socket *so;
1995 static struct fileglob **extra_ref;
1996 struct fileglob **fpp;
1997 int nunref, i;
1998 int need_gcwakeup = 0;
1999
2000 lck_mtx_lock(uipc_lock);
2001 if (unp_gcing) {
2002 lck_mtx_unlock(uipc_lock);
2003 return;
2004 }
2005 unp_gcing = 1;
2006 unp_defer = 0;
2007 unp_gcthread = current_thread();
2008 lck_mtx_unlock(uipc_lock);
2009 /*
2010 * before going through all this, set all FDs to
2011 * be NOT defered and NOT externally accessible
2012 */
2013 for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
2014 lck_mtx_lock(&fg->fg_lock);
2015 fg->fg_flag &= ~(FMARK|FDEFER);
2016 lck_mtx_unlock(&fg->fg_lock);
2017 }
2018 do {
2019 for (fg = fmsghead.lh_first; fg != 0;
2020 fg = fg->f_msglist.le_next) {
2021 lck_mtx_lock(&fg->fg_lock);
2022 /*
2023 * If the file is not open, skip it
2024 */
2025 if (fg->fg_count == 0) {
2026 lck_mtx_unlock(&fg->fg_lock);
2027 continue;
2028 }
2029 /*
2030 * If we already marked it as 'defer' in a
2031 * previous pass, then try process it this time
2032 * and un-mark it
2033 */
2034 if (fg->fg_flag & FDEFER) {
2035 fg->fg_flag &= ~FDEFER;
2036 unp_defer--;
2037 } else {
2038 /*
2039 * if it's not defered, then check if it's
2040 * already marked.. if so skip it
2041 */
2042 if (fg->fg_flag & FMARK) {
2043 lck_mtx_unlock(&fg->fg_lock);
2044 continue;
2045 }
2046 /*
2047 * If all references are from messages
2048 * in transit, then skip it. it's not
2049 * externally accessible.
2050 */
2051 if (fg->fg_count == fg->fg_msgcount) {
2052 lck_mtx_unlock(&fg->fg_lock);
2053 continue;
2054 }
2055 /*
2056 * If it got this far then it must be
2057 * externally accessible.
2058 */
2059 fg->fg_flag |= FMARK;
2060 }
2061 /*
2062 * either it was defered, or it is externally
2063 * accessible and not already marked so.
2064 * Now check if it is possibly one of OUR sockets.
2065 */
2066 if (fg->fg_type != DTYPE_SOCKET ||
2067 (so = (struct socket *)fg->fg_data) == 0) {
2068 lck_mtx_unlock(&fg->fg_lock);
2069 continue;
2070 }
2071 if (so->so_proto->pr_domain != &localdomain ||
2072 (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
2073 lck_mtx_unlock(&fg->fg_lock);
2074 continue;
2075 }
2076 #ifdef notdef
2077 /*
2078 * if this code is enabled need to run
2079 * under network funnel
2080 */
2081 if (so->so_rcv.sb_flags & SB_LOCK) {
2082 /*
2083 * This is problematical; it's not clear
2084 * we need to wait for the sockbuf to be
2085 * unlocked (on a uniprocessor, at least),
2086 * and it's also not clear what to do
2087 * if sbwait returns an error due to receipt
2088 * of a signal. If sbwait does return
2089 * an error, we'll go into an infinite
2090 * loop. Delete all of this for now.
2091 */
2092 (void) sbwait(&so->so_rcv);
2093 goto restart;
2094 }
2095 #endif
2096 /*
2097 * So, Ok, it's one of our sockets and it IS externally
2098 * accessible (or was defered). Now we look
2099 * to see if we hold any file descriptors in its
2100 * message buffers. Follow those links and mark them
2101 * as accessible too.
2102 *
2103 * In case a file is passed onto itself we need to
2104 * release the file lock.
2105 */
2106 lck_mtx_unlock(&fg->fg_lock);
2107
2108 unp_scan(so->so_rcv.sb_mb, unp_mark);
2109 }
2110 } while (unp_defer);
2111 /*
2112 * We grab an extra reference to each of the file table entries
2113 * that are not otherwise accessible and then free the rights
2114 * that are stored in messages on them.
2115 *
2116 * The bug in the orginal code is a little tricky, so I'll describe
2117 * what's wrong with it here.
2118 *
2119 * It is incorrect to simply unp_discard each entry for f_msgcount
2120 * times -- consider the case of sockets A and B that contain
2121 * references to each other. On a last close of some other socket,
2122 * we trigger a gc since the number of outstanding rights (unp_rights)
2123 * is non-zero. If during the sweep phase the gc code un_discards,
2124 * we end up doing a (full) closef on the descriptor. A closef on A
2125 * results in the following chain. Closef calls soo_close, which
2126 * calls soclose. Soclose calls first (through the switch
2127 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
2128 * returns because the previous instance had set unp_gcing, and
2129 * we return all the way back to soclose, which marks the socket
2130 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
2131 * to free up the rights that are queued in messages on the socket A,
2132 * i.e., the reference on B. The sorflush calls via the dom_dispose
2133 * switch unp_dispose, which unp_scans with unp_discard. This second
2134 * instance of unp_discard just calls closef on B.
2135 *
2136 * Well, a similar chain occurs on B, resulting in a sorflush on B,
2137 * which results in another closef on A. Unfortunately, A is already
2138 * being closed, and the descriptor has already been marked with
2139 * SS_NOFDREF, and soclose panics at this point.
2140 *
2141 * Here, we first take an extra reference to each inaccessible
2142 * descriptor. Then, we call sorflush ourself, since we know
2143 * it is a Unix domain socket anyhow. After we destroy all the
2144 * rights carried in messages, we do a last closef to get rid
2145 * of our extra reference. This is the last close, and the
2146 * unp_detach etc will shut down the socket.
2147 *
2148 * 91/09/19, bsy@cs.cmu.edu
2149 */
2150 extra_ref = _MALLOC(nfiles * sizeof (struct fileglob *),
2151 M_FILEGLOB, M_WAITOK);
2152 if (extra_ref == NULL)
2153 goto bail;
2154 for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0;
2155 fg = nextfg) {
2156 lck_mtx_lock(&fg->fg_lock);
2157
2158 nextfg = fg->f_msglist.le_next;
2159 /*
2160 * If it's not open, skip it
2161 */
2162 if (fg->fg_count == 0) {
2163 lck_mtx_unlock(&fg->fg_lock);
2164 continue;
2165 }
2166 /*
2167 * If all refs are from msgs, and it's not marked accessible
2168 * then it must be referenced from some unreachable cycle
2169 * of (shut-down) FDs, so include it in our
2170 * list of FDs to remove
2171 */
2172 if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) {
2173 fg->fg_count++;
2174 *fpp++ = fg;
2175 nunref++;
2176 }
2177 lck_mtx_unlock(&fg->fg_lock);
2178 }
2179 /*
2180 * for each FD on our hit list, do the following two things
2181 */
2182 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2183 struct fileglob *tfg;
2184
2185 tfg = *fpp;
2186
2187 if (tfg->fg_type == DTYPE_SOCKET && tfg->fg_data != NULL) {
2188 so = (struct socket *)(tfg->fg_data);
2189
2190 socket_lock(so, 0);
2191
2192 sorflush(so);
2193
2194 socket_unlock(so, 0);
2195 }
2196 }
2197 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
2198 closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL);
2199
2200 FREE((caddr_t)extra_ref, M_FILEGLOB);
2201 bail:
2202 lck_mtx_lock(uipc_lock);
2203 unp_gcing = 0;
2204 unp_gcthread = NULL;
2205
2206 if (unp_gcwait != 0) {
2207 unp_gcwait = 0;
2208 need_gcwakeup = 1;
2209 }
2210 lck_mtx_unlock(uipc_lock);
2211
2212 if (need_gcwakeup != 0)
2213 wakeup(&unp_gcing);
2214 }
2215
2216 void
2217 unp_dispose(struct mbuf *m)
2218 {
2219 if (m) {
2220 unp_scan(m, unp_discard);
2221 }
2222 }
2223
2224 /*
2225 * Returns: 0 Success
2226 */
2227 static int
2228 unp_listen(struct unpcb *unp, proc_t p)
2229 {
2230 kauth_cred_t safecred = kauth_cred_proc_ref(p);
2231 cru2x(safecred, &unp->unp_peercred);
2232 kauth_cred_unref(&safecred);
2233 unp->unp_flags |= UNP_HAVEPCCACHED;
2234 return (0);
2235 }
2236
2237 /* should run under kernel funnel */
2238 static void
2239 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *))
2240 {
2241 struct mbuf *m;
2242 struct fileglob **rp;
2243 struct cmsghdr *cm;
2244 int i;
2245 int qfds;
2246
2247 while (m0) {
2248 for (m = m0; m; m = m->m_next)
2249 if (m->m_type == MT_CONTROL &&
2250 (size_t)m->m_len >= sizeof (*cm)) {
2251 cm = mtod(m, struct cmsghdr *);
2252 if (cm->cmsg_level != SOL_SOCKET ||
2253 cm->cmsg_type != SCM_RIGHTS)
2254 continue;
2255 qfds = (cm->cmsg_len - sizeof (*cm)) /
2256 sizeof (int);
2257 rp = (struct fileglob **)(cm + 1);
2258 for (i = 0; i < qfds; i++)
2259 (*op)(*rp++);
2260 break; /* XXX, but saves time */
2261 }
2262 m0 = m0->m_act;
2263 }
2264 }
2265
2266 /* should run under kernel funnel */
2267 static void
2268 unp_mark(struct fileglob *fg)
2269 {
2270 lck_mtx_lock(&fg->fg_lock);
2271
2272 if (fg->fg_flag & FMARK) {
2273 lck_mtx_unlock(&fg->fg_lock);
2274 return;
2275 }
2276 fg->fg_flag |= (FMARK|FDEFER);
2277
2278 lck_mtx_unlock(&fg->fg_lock);
2279
2280 unp_defer++;
2281 }
2282
2283 /* should run under kernel funnel */
2284 static void
2285 unp_discard(struct fileglob *fg)
2286 {
2287 proc_t p = current_proc(); /* XXX */
2288
2289 (void) OSAddAtomic(1, &unp_disposed);
2290
2291 proc_fdlock(p);
2292 unp_discard_fdlocked(fg, p);
2293 proc_fdunlock(p);
2294 }
2295 static void
2296 unp_discard_fdlocked(struct fileglob *fg, proc_t p)
2297 {
2298 fg_removeuipc(fg);
2299
2300 (void) OSAddAtomic(-1, &unp_rights);
2301 (void) closef_locked((struct fileproc *)0, fg, p);
2302 }
2303
2304 int
2305 unp_lock(struct socket *so, int refcount, void * lr)
2306 {
2307 void * lr_saved;
2308 if (lr == 0)
2309 lr_saved = (void *) __builtin_return_address(0);
2310 else lr_saved = lr;
2311
2312 if (so->so_pcb) {
2313 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2314 } else {
2315 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2316 so, lr_saved, so->so_usecount);
2317 }
2318
2319 if (so->so_usecount < 0)
2320 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2321 so, so->so_pcb, lr_saved, so->so_usecount);
2322
2323 if (refcount)
2324 so->so_usecount++;
2325
2326 so->lock_lr[so->next_lock_lr] = lr_saved;
2327 so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
2328 return (0);
2329 }
2330
2331 int
2332 unp_unlock(struct socket *so, int refcount, void * lr)
2333 {
2334 void * lr_saved;
2335 lck_mtx_t * mutex_held = NULL;
2336 struct unpcb *unp = sotounpcb(so);
2337
2338 if (lr == 0)
2339 lr_saved = (void *) __builtin_return_address(0);
2340 else lr_saved = lr;
2341
2342 if (refcount)
2343 so->so_usecount--;
2344
2345 if (so->so_usecount < 0)
2346 panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2347 if (so->so_pcb == NULL) {
2348 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2349 } else {
2350 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2351 }
2352 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2353 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2354 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
2355
2356 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2357 sofreelastref(so, 1);
2358
2359 if (unp->unp_addr)
2360 FREE(unp->unp_addr, M_SONAME);
2361
2362 lck_mtx_unlock(mutex_held);
2363
2364 lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
2365 zfree(unp_zone, unp);
2366
2367 unp_gc();
2368 } else {
2369 lck_mtx_unlock(mutex_held);
2370 }
2371
2372 return (0);
2373 }
2374
2375 lck_mtx_t *
2376 unp_getlock(struct socket *so, __unused int locktype)
2377 {
2378 struct unpcb *unp = (struct unpcb *)so->so_pcb;
2379
2380
2381 if (so->so_pcb) {
2382 if (so->so_usecount < 0)
2383 panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2384 return(&unp->unp_mtx);
2385 } else {
2386 panic("unp_getlock: so=%p NULL so_pcb\n", so);
2387 return (so->so_proto->pr_domain->dom_mtx);
2388 }
2389 }
2390