]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_usrreq.c
xnu-2782.10.72.tar.gz
[apple/xnu.git] / bsd / kern / uipc_usrreq.c
1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
62 /*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/guarded.h>
77 #include <sys/filedesc.h>
78 #include <sys/lock.h>
79 #include <sys/mbuf.h>
80 #include <sys/namei.h>
81 #include <sys/proc_internal.h>
82 #include <sys/kauth.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/stat.h>
87 #include <sys/sysctl.h>
88 #include <sys/un.h>
89 #include <sys/unpcb.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/kdebug.h>
92
93 #include <kern/zalloc.h>
94 #include <kern/locks.h>
95
96 #if CONFIG_MACF
97 #include <security/mac_framework.h>
98 #endif /* CONFIG_MACF */
99
100 #include <mach/vm_param.h>
101
102 #define f_msgcount f_fglob->fg_msgcount
103 #define f_cred f_fglob->fg_cred
104 #define f_ops f_fglob->fg_ops
105 #define f_offset f_fglob->fg_offset
106 #define f_data f_fglob->fg_data
107 struct zone *unp_zone;
108 static unp_gen_t unp_gencnt;
109 static u_int unp_count;
110
111 static lck_attr_t *unp_mtx_attr;
112 static lck_grp_t *unp_mtx_grp;
113 static lck_grp_attr_t *unp_mtx_grp_attr;
114 static lck_rw_t *unp_list_mtx;
115
116 static lck_mtx_t *unp_disconnect_lock;
117 static lck_mtx_t *unp_connect_lock;
118 static u_int disconnect_in_progress;
119
120 extern lck_mtx_t *uipc_lock;
121 static struct unp_head unp_shead, unp_dhead;
122
123 /*
124 * mDNSResponder tracing. When enabled, endpoints connected to
125 * /var/run/mDNSResponder will be traced; during each send on
126 * the traced socket, we log the PID and process name of the
127 * sending process. We also print out a bit of info related
128 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
129 * of mDNSResponder stays the same.
130 */
131 #define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
132
133 static int unpst_tracemdns; /* enable tracing */
134
135 #define MDNS_IPC_MSG_HDR_VERSION_1 1
136
137 struct mdns_ipc_msg_hdr {
138 uint32_t version;
139 uint32_t datalen;
140 uint32_t ipc_flags;
141 uint32_t op;
142 union {
143 void *context;
144 uint32_t u32[2];
145 } __attribute__((packed));
146 uint32_t reg_index;
147 } __attribute__((packed));
148
149 /*
150 * Unix communications domain.
151 *
152 * TODO:
153 * SEQPACKET, RDM
154 * rethink name space problems
155 * need a proper out-of-band
156 * lock pushdown
157 */
158 static struct sockaddr sun_noname = { sizeof (sun_noname), AF_LOCAL, { 0 } };
159 static ino_t unp_ino; /* prototype for fake inode numbers */
160
161 static int unp_attach(struct socket *);
162 static void unp_detach(struct unpcb *);
163 static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
164 static int unp_connect(struct socket *, struct sockaddr *, proc_t);
165 static void unp_disconnect(struct unpcb *);
166 static void unp_shutdown(struct unpcb *);
167 static void unp_drop(struct unpcb *, int);
168 __private_extern__ void unp_gc(void);
169 static void unp_scan(struct mbuf *, void (*)(struct fileglob *));
170 static void unp_mark(struct fileglob *);
171 static void unp_discard(struct fileglob *);
172 static void unp_discard_fdlocked(struct fileglob *, proc_t);
173 static int unp_internalize(struct mbuf *, proc_t);
174 static int unp_listen(struct unpcb *, proc_t);
175 static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
176 static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
177
178 static void
179 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
180 {
181 if (so < conn_so) {
182 socket_lock(conn_so, 1);
183 } else {
184 struct unpcb *unp = sotounpcb(so);
185 unp->unp_flags |= UNP_DONTDISCONNECT;
186 unp->rw_thrcount++;
187 socket_unlock(so, 0);
188
189 /* Get the locks in the correct order */
190 socket_lock(conn_so, 1);
191 socket_lock(so, 0);
192 unp->rw_thrcount--;
193 if (unp->rw_thrcount == 0) {
194 unp->unp_flags &= ~UNP_DONTDISCONNECT;
195 wakeup(unp);
196 }
197 }
198 }
199
200 static int
201 uipc_abort(struct socket *so)
202 {
203 struct unpcb *unp = sotounpcb(so);
204
205 if (unp == 0)
206 return (EINVAL);
207 unp_drop(unp, ECONNABORTED);
208 unp_detach(unp);
209 sofree(so);
210 return (0);
211 }
212
213 static int
214 uipc_accept(struct socket *so, struct sockaddr **nam)
215 {
216 struct unpcb *unp = sotounpcb(so);
217
218 if (unp == 0)
219 return (EINVAL);
220
221 /*
222 * Pass back name of connected socket,
223 * if it was bound and we are still connected
224 * (our peer may have closed already!).
225 */
226 if (unp->unp_conn && unp->unp_conn->unp_addr) {
227 *nam = dup_sockaddr((struct sockaddr *)
228 unp->unp_conn->unp_addr, 1);
229 } else {
230 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
231 }
232 return (0);
233 }
234
235 /*
236 * Returns: 0 Success
237 * EISCONN
238 * unp_attach:
239 */
240 static int
241 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
242 {
243 struct unpcb *unp = sotounpcb(so);
244
245 if (unp != 0)
246 return (EISCONN);
247 return (unp_attach(so));
248 }
249
250 static int
251 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
252 {
253 struct unpcb *unp = sotounpcb(so);
254
255 if (unp == 0)
256 return (EINVAL);
257
258 return (unp_bind(unp, nam, p));
259 }
260
261 /*
262 * Returns: 0 Success
263 * EINVAL
264 * unp_connect:??? [See elsewhere in this file]
265 */
266 static int
267 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
268 {
269 struct unpcb *unp = sotounpcb(so);
270
271 if (unp == 0)
272 return (EINVAL);
273 return (unp_connect(so, nam, p));
274 }
275
276 /*
277 * Returns: 0 Success
278 * EINVAL
279 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
280 * unp_connect2:EINVAL Invalid argument
281 */
282 static int
283 uipc_connect2(struct socket *so1, struct socket *so2)
284 {
285 struct unpcb *unp = sotounpcb(so1);
286
287 if (unp == 0)
288 return (EINVAL);
289
290 return (unp_connect2(so1, so2));
291 }
292
293 /* control is EOPNOTSUPP */
294
295 static int
296 uipc_detach(struct socket *so)
297 {
298 struct unpcb *unp = sotounpcb(so);
299
300 if (unp == 0)
301 return (EINVAL);
302
303 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
304 unp_detach(unp);
305 return (0);
306 }
307
308 static int
309 uipc_disconnect(struct socket *so)
310 {
311 struct unpcb *unp = sotounpcb(so);
312
313 if (unp == 0)
314 return (EINVAL);
315 unp_disconnect(unp);
316 return (0);
317 }
318
319 /*
320 * Returns: 0 Success
321 * EINVAL
322 */
323 static int
324 uipc_listen(struct socket *so, __unused proc_t p)
325 {
326 struct unpcb *unp = sotounpcb(so);
327
328 if (unp == 0 || unp->unp_vnode == 0)
329 return (EINVAL);
330 return (unp_listen(unp, p));
331 }
332
333 static int
334 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
335 {
336 struct unpcb *unp = sotounpcb(so);
337
338 if (unp == NULL)
339 return (EINVAL);
340 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
341 *nam = dup_sockaddr((struct sockaddr *)
342 unp->unp_conn->unp_addr, 1);
343 } else {
344 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
345 }
346 return (0);
347 }
348
349 static int
350 uipc_rcvd(struct socket *so, __unused int flags)
351 {
352 struct unpcb *unp = sotounpcb(so);
353 struct socket *so2;
354
355 if (unp == 0)
356 return (EINVAL);
357 switch (so->so_type) {
358 case SOCK_DGRAM:
359 panic("uipc_rcvd DGRAM?");
360 /*NOTREACHED*/
361
362 case SOCK_STREAM:
363 #define rcv (&so->so_rcv)
364 #define snd (&so2->so_snd)
365 if (unp->unp_conn == 0)
366 break;
367
368 so2 = unp->unp_conn->unp_socket;
369 unp_get_locks_in_order(so, so2);
370 /*
371 * Adjust backpressure on sender
372 * and wakeup any waiting to write.
373 */
374 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
375 unp->unp_mbcnt = rcv->sb_mbcnt;
376 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
377 unp->unp_cc = rcv->sb_cc;
378 sowwakeup(so2);
379
380 socket_unlock(so2, 1);
381
382 #undef snd
383 #undef rcv
384 break;
385
386 default:
387 panic("uipc_rcvd unknown socktype");
388 }
389 return (0);
390 }
391
392 /* pru_rcvoob is EOPNOTSUPP */
393
394 /*
395 * Returns: 0 Success
396 * EINVAL
397 * EOPNOTSUPP
398 * EPIPE
399 * ENOTCONN
400 * EISCONN
401 * unp_internalize:EINVAL
402 * unp_internalize:EBADF
403 * unp_connect:EAFNOSUPPORT Address family not supported
404 * unp_connect:EINVAL Invalid argument
405 * unp_connect:ENOTSOCK Not a socket
406 * unp_connect:ECONNREFUSED Connection refused
407 * unp_connect:EISCONN Socket is connected
408 * unp_connect:EPROTOTYPE Protocol wrong type for socket
409 * unp_connect:???
410 * sbappendaddr:ENOBUFS [5th argument, contents modified]
411 * sbappendaddr:??? [whatever a filter author chooses]
412 */
413 static int
414 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
415 struct mbuf *control, proc_t p)
416 {
417 int error = 0;
418 struct unpcb *unp = sotounpcb(so);
419 struct socket *so2;
420
421 if (unp == 0) {
422 error = EINVAL;
423 goto release;
424 }
425 if (flags & PRUS_OOB) {
426 error = EOPNOTSUPP;
427 goto release;
428 }
429
430 if (control) {
431 /* release lock to avoid deadlock (4436174) */
432 socket_unlock(so, 0);
433 error = unp_internalize(control, p);
434 socket_lock(so, 0);
435 if (error)
436 goto release;
437 }
438
439 switch (so->so_type) {
440 case SOCK_DGRAM:
441 {
442 struct sockaddr *from;
443
444 if (nam) {
445 if (unp->unp_conn) {
446 error = EISCONN;
447 break;
448 }
449 error = unp_connect(so, nam, p);
450 if (error)
451 break;
452 } else {
453 if (unp->unp_conn == 0) {
454 error = ENOTCONN;
455 break;
456 }
457 }
458
459 so2 = unp->unp_conn->unp_socket;
460 if (so != so2)
461 unp_get_locks_in_order(so, so2);
462
463 if (unp->unp_addr)
464 from = (struct sockaddr *)unp->unp_addr;
465 else
466 from = &sun_noname;
467 /*
468 * sbappendaddr() will fail when the receiver runs out of
469 * space; in contrast to SOCK_STREAM, we will lose messages
470 * for the SOCK_DGRAM case when the receiver's queue overflows.
471 * SB_UNIX on the socket buffer implies that the callee will
472 * not free the control message, if any, because we would need
473 * to call unp_dispose() on it.
474 */
475 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
476 control = NULL;
477 sorwakeup(so2);
478 } else if (control != NULL && error == 0) {
479 /* A socket filter took control; don't touch it */
480 control = NULL;
481 }
482
483 if (so != so2)
484 socket_unlock(so2, 1);
485
486 m = NULL;
487 if (nam)
488 unp_disconnect(unp);
489 break;
490 }
491
492 case SOCK_STREAM: {
493 int didreceive = 0;
494 #define rcv (&so2->so_rcv)
495 #define snd (&so->so_snd)
496 /* Connect if not connected yet. */
497 /*
498 * Note: A better implementation would complain
499 * if not equal to the peer's address.
500 */
501 if ((so->so_state & SS_ISCONNECTED) == 0) {
502 if (nam) {
503 error = unp_connect(so, nam, p);
504 if (error)
505 break; /* XXX */
506 } else {
507 error = ENOTCONN;
508 break;
509 }
510 }
511
512 if (so->so_state & SS_CANTSENDMORE) {
513 error = EPIPE;
514 break;
515 }
516 if (unp->unp_conn == 0)
517 panic("uipc_send connected but no connection?");
518
519 so2 = unp->unp_conn->unp_socket;
520 unp_get_locks_in_order(so, so2);
521
522 /* Check socket state again as we might have unlocked the socket
523 * while trying to get the locks in order
524 */
525
526 if ((so->so_state & SS_CANTSENDMORE)) {
527 error = EPIPE;
528 socket_unlock(so2, 1);
529 break;
530 }
531
532 if (unp->unp_flags & UNP_TRACE_MDNS) {
533 struct mdns_ipc_msg_hdr hdr;
534
535 if (mbuf_copydata(m, 0, sizeof (hdr), &hdr) == 0 &&
536 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
537 printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
538 __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
539 }
540 }
541
542 /*
543 * Send to paired receive port, and then reduce send buffer
544 * hiwater marks to maintain backpressure. Wake up readers.
545 * SB_UNIX flag will allow new record to be appended to the
546 * receiver's queue even when it is already full. It is
547 * possible, however, that append might fail. In that case,
548 * we will need to call unp_dispose() on the control message;
549 * the callee will not free it since SB_UNIX is set.
550 */
551 didreceive = control ?
552 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
553
554 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
555 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
556 if ((int32_t)snd->sb_hiwat >=
557 (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
558 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
559 } else {
560 snd->sb_hiwat = 0;
561 }
562 unp->unp_conn->unp_cc = rcv->sb_cc;
563 if (didreceive) {
564 control = NULL;
565 sorwakeup(so2);
566 } else if (control != NULL && error == 0) {
567 /* A socket filter took control; don't touch it */
568 control = NULL;
569 }
570
571 socket_unlock(so2, 1);
572 m = NULL;
573 #undef snd
574 #undef rcv
575 }
576 break;
577
578 default:
579 panic("uipc_send unknown socktype");
580 }
581
582 /*
583 * SEND_EOF is equivalent to a SEND followed by
584 * a SHUTDOWN.
585 */
586 if (flags & PRUS_EOF) {
587 socantsendmore(so);
588 unp_shutdown(unp);
589 }
590
591 if (control && error != 0) {
592 socket_unlock(so, 0);
593 unp_dispose(control);
594 socket_lock(so, 0);
595 }
596
597 release:
598 if (control)
599 m_freem(control);
600 if (m)
601 m_freem(m);
602 return (error);
603 }
604
605 static int
606 uipc_sense(struct socket *so, void *ub, int isstat64)
607 {
608 struct unpcb *unp = sotounpcb(so);
609 struct socket *so2;
610 blksize_t blksize;
611
612 if (unp == 0)
613 return (EINVAL);
614
615 blksize = so->so_snd.sb_hiwat;
616 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
617 so2 = unp->unp_conn->unp_socket;
618 blksize += so2->so_rcv.sb_cc;
619 }
620 if (unp->unp_ino == 0)
621 unp->unp_ino = unp_ino++;
622
623 if (isstat64 != 0) {
624 struct stat64 *sb64;
625
626 sb64 = (struct stat64 *)ub;
627 sb64->st_blksize = blksize;
628 sb64->st_dev = NODEV;
629 sb64->st_ino = (ino64_t)unp->unp_ino;
630 } else {
631 struct stat *sb;
632
633 sb = (struct stat *)ub;
634 sb->st_blksize = blksize;
635 sb->st_dev = NODEV;
636 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
637 }
638
639 return (0);
640 }
641
642 /*
643 * Returns: 0 Success
644 * EINVAL
645 *
646 * Notes: This is not strictly correct, as unp_shutdown() also calls
647 * socantrcvmore(). These should maybe both be conditionalized
648 * on the 'how' argument in soshutdown() as called from the
649 * shutdown() system call.
650 */
651 static int
652 uipc_shutdown(struct socket *so)
653 {
654 struct unpcb *unp = sotounpcb(so);
655
656 if (unp == 0)
657 return (EINVAL);
658 socantsendmore(so);
659 unp_shutdown(unp);
660 return (0);
661 }
662
663 /*
664 * Returns: 0 Success
665 * EINVAL Invalid argument
666 */
667 static int
668 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
669 {
670 struct unpcb *unp = sotounpcb(so);
671
672 if (unp == NULL)
673 return (EINVAL);
674 if (unp->unp_addr != NULL) {
675 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
676 } else {
677 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
678 }
679 return (0);
680 }
681
682 struct pr_usrreqs uipc_usrreqs = {
683 .pru_abort = uipc_abort,
684 .pru_accept = uipc_accept,
685 .pru_attach = uipc_attach,
686 .pru_bind = uipc_bind,
687 .pru_connect = uipc_connect,
688 .pru_connect2 = uipc_connect2,
689 .pru_detach = uipc_detach,
690 .pru_disconnect = uipc_disconnect,
691 .pru_listen = uipc_listen,
692 .pru_peeraddr = uipc_peeraddr,
693 .pru_rcvd = uipc_rcvd,
694 .pru_send = uipc_send,
695 .pru_sense = uipc_sense,
696 .pru_shutdown = uipc_shutdown,
697 .pru_sockaddr = uipc_sockaddr,
698 .pru_sosend = sosend,
699 .pru_soreceive = soreceive,
700 };
701
702 int
703 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
704 {
705 struct unpcb *unp = sotounpcb(so);
706 int error = 0;
707 pid_t peerpid;
708 struct socket *peerso;
709
710 switch (sopt->sopt_dir) {
711 case SOPT_GET:
712 switch (sopt->sopt_name) {
713 case LOCAL_PEERCRED:
714 if (unp->unp_flags & UNP_HAVEPC) {
715 error = sooptcopyout(sopt, &unp->unp_peercred,
716 sizeof (unp->unp_peercred));
717 } else {
718 if (so->so_type == SOCK_STREAM)
719 error = ENOTCONN;
720 else
721 error = EINVAL;
722 }
723 break;
724 case LOCAL_PEERPID:
725 case LOCAL_PEEREPID:
726 if (unp->unp_conn == NULL) {
727 error = ENOTCONN;
728 break;
729 }
730 peerso = unp->unp_conn->unp_socket;
731 if (peerso == NULL)
732 panic("peer is connected but has no socket?");
733 unp_get_locks_in_order(so, peerso);
734 if (sopt->sopt_name == LOCAL_PEEREPID &&
735 peerso->so_flags & SOF_DELEGATED)
736 peerpid = peerso->e_pid;
737 else
738 peerpid = peerso->last_pid;
739 socket_unlock(peerso, 1);
740 error = sooptcopyout(sopt, &peerpid, sizeof (peerpid));
741 break;
742 case LOCAL_PEERUUID:
743 case LOCAL_PEEREUUID:
744 if (unp->unp_conn == NULL) {
745 error = ENOTCONN;
746 break;
747 }
748 peerso = unp->unp_conn->unp_socket;
749 if (peerso == NULL)
750 panic("peer is connected but has no socket?");
751 unp_get_locks_in_order(so, peerso);
752 if (sopt->sopt_name == LOCAL_PEEREUUID &&
753 peerso->so_flags & SOF_DELEGATED)
754 error = sooptcopyout(sopt, &peerso->e_uuid,
755 sizeof (peerso->e_uuid));
756 else
757 error = sooptcopyout(sopt, &peerso->last_uuid,
758 sizeof (peerso->last_uuid));
759 socket_unlock(peerso, 1);
760 break;
761 default:
762 error = EOPNOTSUPP;
763 break;
764 }
765 break;
766 case SOPT_SET:
767 default:
768 error = EOPNOTSUPP;
769 break;
770 }
771
772 return (error);
773 }
774
775 /*
776 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
777 * for stream sockets, although the total for sender and receiver is
778 * actually only PIPSIZ.
779 * Datagram sockets really use the sendspace as the maximum datagram size,
780 * and don't really want to reserve the sendspace. Their recvspace should
781 * be large enough for at least one max-size datagram plus address.
782 */
783 #ifndef PIPSIZ
784 #define PIPSIZ 8192
785 #endif
786 static u_int32_t unpst_sendspace = PIPSIZ;
787 static u_int32_t unpst_recvspace = PIPSIZ;
788 static u_int32_t unpdg_sendspace = 2*1024; /* really max datagram size */
789 static u_int32_t unpdg_recvspace = 4*1024;
790
791 static int unp_rights; /* file descriptors in flight */
792 static int unp_disposed; /* discarded file descriptors */
793
794 SYSCTL_DECL(_net_local_stream);
795 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
796 &unpst_sendspace, 0, "");
797 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
798 &unpst_recvspace, 0, "");
799 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
800 &unpst_tracemdns, 0, "");
801 SYSCTL_DECL(_net_local_dgram);
802 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
803 &unpdg_sendspace, 0, "");
804 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
805 &unpdg_recvspace, 0, "");
806 SYSCTL_DECL(_net_local);
807 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
808
809 /*
810 * Returns: 0 Success
811 * ENOBUFS
812 * soreserve:ENOBUFS
813 */
814 static int
815 unp_attach(struct socket *so)
816 {
817 struct unpcb *unp;
818 int error = 0;
819
820 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
821 switch (so->so_type) {
822
823 case SOCK_STREAM:
824 error = soreserve(so, unpst_sendspace, unpst_recvspace);
825 break;
826
827 case SOCK_DGRAM:
828 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
829 break;
830
831 default:
832 panic("unp_attach");
833 }
834 if (error)
835 return (error);
836 }
837 unp = (struct unpcb *)zalloc(unp_zone);
838 if (unp == NULL)
839 return (ENOBUFS);
840 bzero(unp, sizeof (*unp));
841
842 lck_mtx_init(&unp->unp_mtx,
843 unp_mtx_grp, unp_mtx_attr);
844
845 lck_rw_lock_exclusive(unp_list_mtx);
846 LIST_INIT(&unp->unp_refs);
847 unp->unp_socket = so;
848 unp->unp_gencnt = ++unp_gencnt;
849 unp_count++;
850 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
851 &unp_dhead : &unp_shead, unp, unp_link);
852 lck_rw_done(unp_list_mtx);
853 so->so_pcb = (caddr_t)unp;
854 /*
855 * Mark AF_UNIX socket buffers accordingly so that:
856 *
857 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
858 * the lack of space; this essentially loosens the sbspace() check,
859 * since there is disconnect between sosend() and uipc_send() with
860 * respect to flow control that might result in our dropping the
861 * data in uipc_send(). By setting this, we allow for slightly
862 * more records to be appended to the receiving socket to avoid
863 * losing data (which we can't afford in the SOCK_STREAM case).
864 * Flow control still takes place since we adjust the sender's
865 * hiwat during each send. This doesn't affect the SOCK_DGRAM
866 * case and append would still fail when the queue overflows.
867 *
868 * b. In the presence of control messages containing internalized
869 * file descriptors, the append routines will not free them since
870 * we'd need to undo the work first via unp_dispose().
871 */
872 so->so_rcv.sb_flags |= SB_UNIX;
873 so->so_snd.sb_flags |= SB_UNIX;
874 return (0);
875 }
876
877 static void
878 unp_detach(struct unpcb *unp)
879 {
880 int so_locked = 1;
881
882 lck_rw_lock_exclusive(unp_list_mtx);
883 LIST_REMOVE(unp, unp_link);
884 --unp_count;
885 ++unp_gencnt;
886 lck_rw_done(unp_list_mtx);
887 if (unp->unp_vnode) {
888 struct vnode *tvp = NULL;
889 socket_unlock(unp->unp_socket, 0);
890
891 /* Holding unp_connect_lock will avoid a race between
892 * a thread closing the listening socket and a thread
893 * connecting to it.
894 */
895 lck_mtx_lock(unp_connect_lock);
896 socket_lock(unp->unp_socket, 0);
897 if (unp->unp_vnode) {
898 tvp = unp->unp_vnode;
899 unp->unp_vnode->v_socket = NULL;
900 unp->unp_vnode = NULL;
901 }
902 lck_mtx_unlock(unp_connect_lock);
903 if (tvp != NULL)
904 vnode_rele(tvp); /* drop the usecount */
905 }
906 if (unp->unp_conn)
907 unp_disconnect(unp);
908 while (unp->unp_refs.lh_first) {
909 struct unpcb *unp2 = NULL;
910
911 /* This datagram socket is connected to one or more
912 * sockets. In order to avoid a race condition between removing
913 * this reference and closing the connected socket, we need
914 * to check disconnect_in_progress
915 */
916 if (so_locked == 1) {
917 socket_unlock(unp->unp_socket, 0);
918 so_locked = 0;
919 }
920 lck_mtx_lock(unp_disconnect_lock);
921 while (disconnect_in_progress != 0) {
922 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
923 PSOCK, "disconnect", NULL);
924 }
925 disconnect_in_progress = 1;
926 lck_mtx_unlock(unp_disconnect_lock);
927
928 /* Now we are sure that any unpcb socket disconnect is not happening */
929 if (unp->unp_refs.lh_first != NULL) {
930 unp2 = unp->unp_refs.lh_first;
931 socket_lock(unp2->unp_socket, 1);
932 }
933
934 lck_mtx_lock(unp_disconnect_lock);
935 disconnect_in_progress = 0;
936 wakeup(&disconnect_in_progress);
937 lck_mtx_unlock(unp_disconnect_lock);
938
939 if (unp2 != NULL) {
940 /* We already locked this socket and have a reference on it */
941 unp_drop(unp2, ECONNRESET);
942 socket_unlock(unp2->unp_socket, 1);
943 }
944 }
945
946 if (so_locked == 0) {
947 socket_lock(unp->unp_socket, 0);
948 so_locked = 1;
949 }
950 soisdisconnected(unp->unp_socket);
951 /* makes sure we're getting dealloced */
952 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
953 }
954
955 /*
956 * Returns: 0 Success
957 * EAFNOSUPPORT
958 * EINVAL
959 * EADDRINUSE
960 * namei:??? [anything namei can return]
961 * vnode_authorize:??? [anything vnode_authorize can return]
962 *
963 * Notes: p at this point is the current process, as this function is
964 * only called by sobind().
965 */
966 static int
967 unp_bind(
968 struct unpcb *unp,
969 struct sockaddr *nam,
970 proc_t p)
971 {
972 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
973 struct vnode *vp, *dvp;
974 struct vnode_attr va;
975 vfs_context_t ctx = vfs_context_current();
976 int error, namelen;
977 struct nameidata nd;
978 struct socket *so = unp->unp_socket;
979 char buf[SOCK_MAXADDRLEN];
980
981 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
982 return (EAFNOSUPPORT);
983 }
984
985 if (unp->unp_vnode != NULL)
986 return (EINVAL);
987 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
988 if (namelen <= 0)
989 return (EINVAL);
990
991 socket_unlock(so, 0);
992
993 strlcpy(buf, soun->sun_path, namelen+1);
994 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
995 CAST_USER_ADDR_T(buf), ctx);
996 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
997 error = namei(&nd);
998 if (error) {
999 socket_lock(so, 0);
1000 return (error);
1001 }
1002 dvp = nd.ni_dvp;
1003 vp = nd.ni_vp;
1004
1005 if (vp != NULL) {
1006 /*
1007 * need to do this before the vnode_put of dvp
1008 * since we may have to release an fs_nodelock
1009 */
1010 nameidone(&nd);
1011
1012 vnode_put(dvp);
1013 vnode_put(vp);
1014
1015 socket_lock(so, 0);
1016 return (EADDRINUSE);
1017 }
1018
1019 VATTR_INIT(&va);
1020 VATTR_SET(&va, va_type, VSOCK);
1021 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
1022
1023 #if CONFIG_MACF
1024 error = mac_vnode_check_create(ctx,
1025 nd.ni_dvp, &nd.ni_cnd, &va);
1026
1027 if (error == 0)
1028 #endif /* CONFIG_MACF */
1029 #if CONFIG_MACF_SOCKET_SUBSET
1030 error = mac_vnode_check_uipc_bind(ctx,
1031 nd.ni_dvp, &nd.ni_cnd, &va);
1032
1033 if (error == 0)
1034 #endif /* MAC_SOCKET_SUBSET */
1035 /* authorize before creating */
1036 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1037
1038 if (!error) {
1039 /* create the socket */
1040 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1041 }
1042
1043 nameidone(&nd);
1044 vnode_put(dvp);
1045
1046 if (error) {
1047 socket_lock(so, 0);
1048 return (error);
1049 }
1050 vnode_ref(vp); /* gain a longterm reference */
1051 socket_lock(so, 0);
1052 vp->v_socket = unp->unp_socket;
1053 unp->unp_vnode = vp;
1054 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1055 vnode_put(vp); /* drop the iocount */
1056
1057 return (0);
1058 }
1059
1060
1061 /*
1062 * Returns: 0 Success
1063 * EAFNOSUPPORT Address family not supported
1064 * EINVAL Invalid argument
1065 * ENOTSOCK Not a socket
1066 * ECONNREFUSED Connection refused
1067 * EPROTOTYPE Protocol wrong type for socket
1068 * EISCONN Socket is connected
1069 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1070 * unp_connect2:EINVAL Invalid argument
1071 * namei:??? [anything namei can return]
1072 * vnode_authorize:???? [anything vnode_authorize can return]
1073 *
1074 * Notes: p at this point is the current process, as this function is
1075 * only called by sosend(), sendfile(), and soconnectlock().
1076 */
1077 static int
1078 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1079 {
1080 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1081 struct vnode *vp;
1082 struct socket *so2, *so3, *list_so=NULL;
1083 struct unpcb *unp, *unp2, *unp3;
1084 vfs_context_t ctx = vfs_context_current();
1085 int error, len;
1086 struct nameidata nd;
1087 char buf[SOCK_MAXADDRLEN];
1088
1089 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1090 return (EAFNOSUPPORT);
1091 }
1092
1093 unp = sotounpcb(so);
1094 so2 = so3 = NULL;
1095
1096 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1097 if (len <= 0)
1098 return (EINVAL);
1099
1100 strlcpy(buf, soun->sun_path, len+1);
1101 socket_unlock(so, 0);
1102
1103 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1104 CAST_USER_ADDR_T(buf), ctx);
1105 error = namei(&nd);
1106 if (error) {
1107 socket_lock(so, 0);
1108 return (error);
1109 }
1110 nameidone(&nd);
1111 vp = nd.ni_vp;
1112 if (vp->v_type != VSOCK) {
1113 error = ENOTSOCK;
1114 socket_lock(so, 0);
1115 goto out;
1116 }
1117
1118 #if CONFIG_MACF_SOCKET_SUBSET
1119 error = mac_vnode_check_uipc_connect(ctx, vp);
1120 if (error) {
1121 socket_lock(so, 0);
1122 goto out;
1123 }
1124 #endif /* MAC_SOCKET_SUBSET */
1125
1126 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1127 if (error) {
1128 socket_lock(so, 0);
1129 goto out;
1130 }
1131
1132 lck_mtx_lock(unp_connect_lock);
1133
1134 if (vp->v_socket == 0) {
1135 lck_mtx_unlock(unp_connect_lock);
1136 error = ECONNREFUSED;
1137 socket_lock(so, 0);
1138 goto out;
1139 }
1140
1141 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1142 so2 = vp->v_socket;
1143 lck_mtx_unlock(unp_connect_lock);
1144
1145
1146 if (so2->so_pcb == NULL) {
1147 error = ECONNREFUSED;
1148 if (so != so2) {
1149 socket_unlock(so2, 1);
1150 socket_lock(so, 0);
1151 } else {
1152 /* Release the reference held for the listen socket */
1153 so2->so_usecount--;
1154 }
1155 goto out;
1156 }
1157
1158 if (so < so2) {
1159 socket_unlock(so2, 0);
1160 socket_lock(so, 0);
1161 socket_lock(so2, 0);
1162 } else if (so > so2) {
1163 socket_lock(so, 0);
1164 }
1165 /*
1166 * Check if socket was connected while we were trying to
1167 * get the socket locks in order.
1168 * XXX - probably shouldn't return an error for SOCK_DGRAM
1169 */
1170 if ((so->so_state & SS_ISCONNECTED) != 0) {
1171 error = EISCONN;
1172 goto decref_out;
1173 }
1174
1175 if (so->so_type != so2->so_type) {
1176 error = EPROTOTYPE;
1177 goto decref_out;
1178 }
1179
1180 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1181 /* Release the incoming socket but keep a reference */
1182 socket_unlock(so, 0);
1183
1184 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1185 (so3 = sonewconn(so2, 0, nam)) == 0) {
1186 error = ECONNREFUSED;
1187 if (so != so2) {
1188 socket_unlock(so2, 1);
1189 socket_lock(so, 0);
1190 } else {
1191 socket_lock(so, 0);
1192 /* Release the reference held for
1193 * listen socket.
1194 */
1195 so2->so_usecount--;
1196 }
1197 goto out;
1198 }
1199 unp2 = sotounpcb(so2);
1200 unp3 = sotounpcb(so3);
1201 if (unp2->unp_addr)
1202 unp3->unp_addr = (struct sockaddr_un *)
1203 dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1204
1205 /*
1206 * unp_peercred management:
1207 *
1208 * The connecter's (client's) credentials are copied
1209 * from its process structure at the time of connect()
1210 * (which is now).
1211 */
1212 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1213 unp3->unp_flags |= UNP_HAVEPC;
1214 /*
1215 * The receiver's (server's) credentials are copied
1216 * from the unp_peercred member of socket on which the
1217 * former called listen(); unp_listen() cached that
1218 * process's credentials at that time so we can use
1219 * them now.
1220 */
1221 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1222 ("unp_connect: listener without cached peercred"));
1223
1224 /* Here we need to have both so and so2 locks and so2
1225 * is already locked. Lock ordering is required.
1226 */
1227 if (so < so2) {
1228 socket_unlock(so2, 0);
1229 socket_lock(so, 0);
1230 socket_lock(so2, 0);
1231 } else {
1232 socket_lock(so, 0);
1233 }
1234
1235 /* Check again if the socket state changed when its lock was released */
1236 if ((so->so_state & SS_ISCONNECTED) != 0) {
1237 error = EISCONN;
1238 socket_unlock(so2, 1);
1239 socket_lock(so3, 0);
1240 sofreelastref(so3, 1);
1241 goto out;
1242 }
1243 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1244 sizeof (unp->unp_peercred));
1245 unp->unp_flags |= UNP_HAVEPC;
1246
1247 #if CONFIG_MACF_SOCKET
1248 /* XXXMAC: recursive lock: SOCK_LOCK(so); */
1249 mac_socketpeer_label_associate_socket(so, so3);
1250 mac_socketpeer_label_associate_socket(so3, so);
1251 /* XXXMAC: SOCK_UNLOCK(so); */
1252 #endif /* MAC_SOCKET */
1253
1254 /* Hold the reference on listening socket until the end */
1255 socket_unlock(so2, 0);
1256 list_so = so2;
1257
1258 /* Lock ordering doesn't matter because so3 was just created */
1259 socket_lock(so3, 1);
1260 so2 = so3;
1261
1262 /*
1263 * Enable tracing for mDNSResponder endpoints. (The use
1264 * of sizeof instead of strlen below takes the null
1265 * terminating character into account.)
1266 */
1267 if (unpst_tracemdns &&
1268 !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1269 sizeof (MDNSRESPONDER_PATH))) {
1270 unp->unp_flags |= UNP_TRACE_MDNS;
1271 unp2->unp_flags |= UNP_TRACE_MDNS;
1272 }
1273 }
1274
1275 error = unp_connect2(so, so2);
1276
1277 decref_out:
1278 if (so2 != NULL) {
1279 if (so != so2) {
1280 socket_unlock(so2, 1);
1281 } else {
1282 /* Release the extra reference held for the listen socket.
1283 * This is possible only for SOCK_DGRAM sockets. We refuse
1284 * connecting to the same socket for SOCK_STREAM sockets.
1285 */
1286 so2->so_usecount--;
1287 }
1288 }
1289
1290 if (list_so != NULL) {
1291 socket_lock(list_so, 0);
1292 socket_unlock(list_so, 1);
1293 }
1294
1295 out:
1296 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1297 vnode_put(vp);
1298 return (error);
1299 }
1300
1301 /*
1302 * Returns: 0 Success
1303 * EPROTOTYPE Protocol wrong type for socket
1304 * EINVAL Invalid argument
1305 */
1306 int
1307 unp_connect2(struct socket *so, struct socket *so2)
1308 {
1309 struct unpcb *unp = sotounpcb(so);
1310 struct unpcb *unp2;
1311
1312 if (so2->so_type != so->so_type)
1313 return (EPROTOTYPE);
1314
1315 unp2 = sotounpcb(so2);
1316
1317 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1318 lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1319
1320 /* Verify both sockets are still opened */
1321 if (unp == 0 || unp2 == 0)
1322 return (EINVAL);
1323
1324 unp->unp_conn = unp2;
1325 so2->so_usecount++;
1326
1327 switch (so->so_type) {
1328
1329 case SOCK_DGRAM:
1330 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1331
1332 if (so != so2) {
1333 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1334 /* Keep an extra reference on so2 that will be dropped
1335 * soon after getting the locks in order
1336 */
1337 socket_unlock(so2, 0);
1338 soisconnected(so);
1339 unp_get_locks_in_order(so, so2);
1340 so2->so_usecount--;
1341 } else {
1342 soisconnected(so);
1343 }
1344
1345 break;
1346
1347 case SOCK_STREAM:
1348 /* This takes care of socketpair */
1349 if (!(unp->unp_flags & UNP_HAVEPC) &&
1350 !(unp2->unp_flags & UNP_HAVEPC)) {
1351 cru2x(kauth_cred_get(), &unp->unp_peercred);
1352 unp->unp_flags |= UNP_HAVEPC;
1353
1354 cru2x(kauth_cred_get(), &unp2->unp_peercred);
1355 unp2->unp_flags |= UNP_HAVEPC;
1356 }
1357 unp2->unp_conn = unp;
1358 so->so_usecount++;
1359
1360 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1361 socket_unlock(so, 0);
1362 soisconnected(so2);
1363
1364 /* Keep an extra reference on so2, that will be dropped soon after
1365 * getting the locks in order again.
1366 */
1367 socket_unlock(so2, 0);
1368
1369 socket_lock(so, 0);
1370 soisconnected(so);
1371
1372 unp_get_locks_in_order(so, so2);
1373 /* Decrement the extra reference left before */
1374 so2->so_usecount--;
1375 break;
1376
1377 default:
1378 panic("unknown socket type %d in unp_connect2", so->so_type);
1379 }
1380 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1381 lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1382 return (0);
1383 }
1384
1385 static void
1386 unp_disconnect(struct unpcb *unp)
1387 {
1388 struct unpcb *unp2 = NULL;
1389 struct socket *so2 = NULL, *so;
1390 struct socket *waitso;
1391 int so_locked = 1, strdisconn = 0;
1392
1393 so = unp->unp_socket;
1394 if (unp->unp_conn == NULL) {
1395 return;
1396 }
1397 lck_mtx_lock(unp_disconnect_lock);
1398 while (disconnect_in_progress != 0) {
1399 if (so_locked == 1) {
1400 socket_unlock(so, 0);
1401 so_locked = 0;
1402 }
1403 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
1404 PSOCK, "disconnect", NULL);
1405 }
1406 disconnect_in_progress = 1;
1407 lck_mtx_unlock(unp_disconnect_lock);
1408
1409 if (so_locked == 0) {
1410 socket_lock(so, 0);
1411 so_locked = 1;
1412 }
1413
1414 unp2 = unp->unp_conn;
1415
1416 if (unp2 == 0 || unp2->unp_socket == NULL) {
1417 goto out;
1418 }
1419 so2 = unp2->unp_socket;
1420
1421 try_again:
1422 if (so == so2) {
1423 if (so_locked == 0) {
1424 socket_lock(so, 0);
1425 }
1426 waitso = so;
1427 } else if (so < so2) {
1428 if (so_locked == 0) {
1429 socket_lock(so, 0);
1430 }
1431 socket_lock(so2, 1);
1432 waitso = so2;
1433 } else {
1434 if (so_locked == 1) {
1435 socket_unlock(so, 0);
1436 }
1437 socket_lock(so2, 1);
1438 socket_lock(so, 0);
1439 waitso = so;
1440 }
1441 so_locked = 1;
1442
1443 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1444 lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1445
1446 /* Check for the UNP_DONTDISCONNECT flag, if it
1447 * is set, release both sockets and go to sleep
1448 */
1449
1450 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1451 if (so != so2) {
1452 socket_unlock(so2, 1);
1453 }
1454 so_locked = 0;
1455
1456 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1457 PSOCK | PDROP, "unpdisconnect", NULL);
1458 goto try_again;
1459 }
1460
1461 if (unp->unp_conn == NULL) {
1462 panic("unp_conn became NULL after sleep");
1463 }
1464
1465 unp->unp_conn = NULL;
1466 so2->so_usecount--;
1467
1468 if (unp->unp_flags & UNP_TRACE_MDNS)
1469 unp->unp_flags &= ~UNP_TRACE_MDNS;
1470
1471 switch (unp->unp_socket->so_type) {
1472
1473 case SOCK_DGRAM:
1474 LIST_REMOVE(unp, unp_reflink);
1475 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1476 if (so != so2)
1477 socket_unlock(so2, 1);
1478 break;
1479
1480 case SOCK_STREAM:
1481 unp2->unp_conn = NULL;
1482 so->so_usecount--;
1483
1484 /* Set the socket state correctly but do a wakeup later when
1485 * we release all locks except the socket lock, this will avoid
1486 * a deadlock.
1487 */
1488 unp->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1489 unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1490
1491 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1492 unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1493
1494 if (unp2->unp_flags & UNP_TRACE_MDNS)
1495 unp2->unp_flags &= ~UNP_TRACE_MDNS;
1496
1497 strdisconn = 1;
1498 break;
1499 default:
1500 panic("unknown socket type %d", so->so_type);
1501 }
1502 out:
1503 lck_mtx_lock(unp_disconnect_lock);
1504 disconnect_in_progress = 0;
1505 wakeup(&disconnect_in_progress);
1506 lck_mtx_unlock(unp_disconnect_lock);
1507
1508 if (strdisconn) {
1509 socket_unlock(so, 0);
1510 soisdisconnected(so2);
1511 socket_unlock(so2, 1);
1512
1513 socket_lock(so,0);
1514 soisdisconnected(so);
1515 }
1516 lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1517 return;
1518 }
1519
1520 /*
1521 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1522 * The unpcb_compat data structure is passed to user space and must not change.
1523 */
1524 static void
1525 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1526 {
1527 #if defined(__LP64__)
1528 cp->unp_link.le_next = (u_int32_t)
1529 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1530 cp->unp_link.le_prev = (u_int32_t)
1531 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1532 #else
1533 cp->unp_link.le_next = (struct unpcb_compat *)
1534 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1535 cp->unp_link.le_prev = (struct unpcb_compat **)
1536 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1537 #endif
1538 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1539 VM_KERNEL_ADDRPERM(up->unp_socket);
1540 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1541 VM_KERNEL_ADDRPERM(up->unp_vnode);
1542 cp->unp_ino = up->unp_ino;
1543 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1544 VM_KERNEL_ADDRPERM(up->unp_conn);
1545 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1546 #if defined(__LP64__)
1547 cp->unp_reflink.le_next =
1548 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1549 cp->unp_reflink.le_prev =
1550 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1551 #else
1552 cp->unp_reflink.le_next =
1553 (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1554 cp->unp_reflink.le_prev =
1555 (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1556 #endif
1557 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1558 VM_KERNEL_ADDRPERM(up->unp_addr);
1559 cp->unp_cc = up->unp_cc;
1560 cp->unp_mbcnt = up->unp_mbcnt;
1561 cp->unp_gencnt = up->unp_gencnt;
1562 }
1563
1564 static int
1565 unp_pcblist SYSCTL_HANDLER_ARGS
1566 {
1567 #pragma unused(oidp,arg2)
1568 int error, i, n;
1569 struct unpcb *unp, **unp_list;
1570 unp_gen_t gencnt;
1571 struct xunpgen xug;
1572 struct unp_head *head;
1573
1574 lck_rw_lock_shared(unp_list_mtx);
1575 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1576
1577 /*
1578 * The process of preparing the PCB list is too time-consuming and
1579 * resource-intensive to repeat twice on every request.
1580 */
1581 if (req->oldptr == USER_ADDR_NULL) {
1582 n = unp_count;
1583 req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1584 sizeof (struct xunpcb);
1585 lck_rw_done(unp_list_mtx);
1586 return (0);
1587 }
1588
1589 if (req->newptr != USER_ADDR_NULL) {
1590 lck_rw_done(unp_list_mtx);
1591 return (EPERM);
1592 }
1593
1594 /*
1595 * OK, now we're committed to doing something.
1596 */
1597 gencnt = unp_gencnt;
1598 n = unp_count;
1599
1600 bzero(&xug, sizeof (xug));
1601 xug.xug_len = sizeof (xug);
1602 xug.xug_count = n;
1603 xug.xug_gen = gencnt;
1604 xug.xug_sogen = so_gencnt;
1605 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1606 if (error) {
1607 lck_rw_done(unp_list_mtx);
1608 return (error);
1609 }
1610
1611 /*
1612 * We are done if there is no pcb
1613 */
1614 if (n == 0) {
1615 lck_rw_done(unp_list_mtx);
1616 return (0);
1617 }
1618
1619 MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1620 M_TEMP, M_WAITOK);
1621 if (unp_list == 0) {
1622 lck_rw_done(unp_list_mtx);
1623 return (ENOMEM);
1624 }
1625
1626 for (unp = head->lh_first, i = 0; unp && i < n;
1627 unp = unp->unp_link.le_next) {
1628 if (unp->unp_gencnt <= gencnt)
1629 unp_list[i++] = unp;
1630 }
1631 n = i; /* in case we lost some during malloc */
1632
1633 error = 0;
1634 for (i = 0; i < n; i++) {
1635 unp = unp_list[i];
1636 if (unp->unp_gencnt <= gencnt) {
1637 struct xunpcb xu;
1638
1639 bzero(&xu, sizeof (xu));
1640 xu.xu_len = sizeof (xu);
1641 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1642 VM_KERNEL_ADDRPERM(unp);
1643 /*
1644 * XXX - need more locking here to protect against
1645 * connect/disconnect races for SMP.
1646 */
1647 if (unp->unp_addr)
1648 bcopy(unp->unp_addr, &xu.xu_addr,
1649 unp->unp_addr->sun_len);
1650 if (unp->unp_conn && unp->unp_conn->unp_addr)
1651 bcopy(unp->unp_conn->unp_addr,
1652 &xu.xu_caddr,
1653 unp->unp_conn->unp_addr->sun_len);
1654 unpcb_to_compat(unp, &xu.xu_unp);
1655 sotoxsocket(unp->unp_socket, &xu.xu_socket);
1656 error = SYSCTL_OUT(req, &xu, sizeof (xu));
1657 }
1658 }
1659 if (!error) {
1660 /*
1661 * Give the user an updated idea of our state.
1662 * If the generation differs from what we told
1663 * her before, she knows that something happened
1664 * while we were processing this request, and it
1665 * might be necessary to retry.
1666 */
1667 bzero(&xug, sizeof (xug));
1668 xug.xug_len = sizeof (xug);
1669 xug.xug_gen = unp_gencnt;
1670 xug.xug_sogen = so_gencnt;
1671 xug.xug_count = unp_count;
1672 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1673 }
1674 FREE(unp_list, M_TEMP);
1675 lck_rw_done(unp_list_mtx);
1676 return (error);
1677 }
1678
1679 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1680 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1681 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1682 "List of active local datagram sockets");
1683 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1684 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1685 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1686 "List of active local stream sockets");
1687
1688
1689 static int
1690 unp_pcblist64 SYSCTL_HANDLER_ARGS
1691 {
1692 #pragma unused(oidp,arg2)
1693 int error, i, n;
1694 struct unpcb *unp, **unp_list;
1695 unp_gen_t gencnt;
1696 struct xunpgen xug;
1697 struct unp_head *head;
1698
1699 lck_rw_lock_shared(unp_list_mtx);
1700 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1701
1702 /*
1703 * The process of preparing the PCB list is too time-consuming and
1704 * resource-intensive to repeat twice on every request.
1705 */
1706 if (req->oldptr == USER_ADDR_NULL) {
1707 n = unp_count;
1708 req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1709 (sizeof (struct xunpcb64));
1710 lck_rw_done(unp_list_mtx);
1711 return (0);
1712 }
1713
1714 if (req->newptr != USER_ADDR_NULL) {
1715 lck_rw_done(unp_list_mtx);
1716 return (EPERM);
1717 }
1718
1719 /*
1720 * OK, now we're committed to doing something.
1721 */
1722 gencnt = unp_gencnt;
1723 n = unp_count;
1724
1725 bzero(&xug, sizeof (xug));
1726 xug.xug_len = sizeof (xug);
1727 xug.xug_count = n;
1728 xug.xug_gen = gencnt;
1729 xug.xug_sogen = so_gencnt;
1730 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1731 if (error) {
1732 lck_rw_done(unp_list_mtx);
1733 return (error);
1734 }
1735
1736 /*
1737 * We are done if there is no pcb
1738 */
1739 if (n == 0) {
1740 lck_rw_done(unp_list_mtx);
1741 return (0);
1742 }
1743
1744 MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1745 M_TEMP, M_WAITOK);
1746 if (unp_list == 0) {
1747 lck_rw_done(unp_list_mtx);
1748 return (ENOMEM);
1749 }
1750
1751 for (unp = head->lh_first, i = 0; unp && i < n;
1752 unp = unp->unp_link.le_next) {
1753 if (unp->unp_gencnt <= gencnt)
1754 unp_list[i++] = unp;
1755 }
1756 n = i; /* in case we lost some during malloc */
1757
1758 error = 0;
1759 for (i = 0; i < n; i++) {
1760 unp = unp_list[i];
1761 if (unp->unp_gencnt <= gencnt) {
1762 struct xunpcb64 xu;
1763 size_t xu_len = sizeof(struct xunpcb64);
1764
1765 bzero(&xu, xu_len);
1766 xu.xu_len = xu_len;
1767 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1768 xu.xunp_link.le_next = (u_int64_t)
1769 VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1770 xu.xunp_link.le_prev = (u_int64_t)
1771 VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1772 xu.xunp_socket = (u_int64_t)
1773 VM_KERNEL_ADDRPERM(unp->unp_socket);
1774 xu.xunp_vnode = (u_int64_t)
1775 VM_KERNEL_ADDRPERM(unp->unp_vnode);
1776 xu.xunp_ino = unp->unp_ino;
1777 xu.xunp_conn = (u_int64_t)
1778 VM_KERNEL_ADDRPERM(unp->unp_conn);
1779 xu.xunp_refs = (u_int64_t)
1780 VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1781 xu.xunp_reflink.le_next = (u_int64_t)
1782 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1783 xu.xunp_reflink.le_prev = (u_int64_t)
1784 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1785 xu.xunp_cc = unp->unp_cc;
1786 xu.xunp_mbcnt = unp->unp_mbcnt;
1787 xu.xunp_gencnt = unp->unp_gencnt;
1788
1789 if (unp->unp_socket)
1790 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1791
1792 /*
1793 * XXX - need more locking here to protect against
1794 * connect/disconnect races for SMP.
1795 */
1796 if (unp->unp_addr)
1797 bcopy(unp->unp_addr, &xu.xunp_addr,
1798 unp->unp_addr->sun_len);
1799 if (unp->unp_conn && unp->unp_conn->unp_addr)
1800 bcopy(unp->unp_conn->unp_addr,
1801 &xu.xunp_caddr,
1802 unp->unp_conn->unp_addr->sun_len);
1803
1804 error = SYSCTL_OUT(req, &xu, xu_len);
1805 }
1806 }
1807 if (!error) {
1808 /*
1809 * Give the user an updated idea of our state.
1810 * If the generation differs from what we told
1811 * her before, she knows that something happened
1812 * while we were processing this request, and it
1813 * might be necessary to retry.
1814 */
1815 bzero(&xug, sizeof (xug));
1816 xug.xug_len = sizeof (xug);
1817 xug.xug_gen = unp_gencnt;
1818 xug.xug_sogen = so_gencnt;
1819 xug.xug_count = unp_count;
1820 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1821 }
1822 FREE(unp_list, M_TEMP);
1823 lck_rw_done(unp_list_mtx);
1824 return (error);
1825 }
1826
1827 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
1828 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1829 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1830 "List of active local datagram sockets 64 bit");
1831 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
1832 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1833 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1834 "List of active local stream sockets 64 bit");
1835
1836
1837 static void
1838 unp_shutdown(struct unpcb *unp)
1839 {
1840 struct socket *so = unp->unp_socket;
1841 struct socket *so2;
1842 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1843 so2 = unp->unp_conn->unp_socket;
1844 unp_get_locks_in_order(so, so2);
1845 socantrcvmore(so2);
1846 socket_unlock(so2, 1);
1847 }
1848 }
1849
1850 static void
1851 unp_drop(struct unpcb *unp, int errno)
1852 {
1853 struct socket *so = unp->unp_socket;
1854
1855 so->so_error = errno;
1856 unp_disconnect(unp);
1857 }
1858
1859 /*
1860 * Returns: 0 Success
1861 * EMSGSIZE The new fd's will not fit
1862 * ENOBUFS Cannot alloc struct fileproc
1863 */
1864 int
1865 unp_externalize(struct mbuf *rights)
1866 {
1867 proc_t p = current_proc(); /* XXX */
1868 int i;
1869 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1870 struct fileglob **rp = (struct fileglob **)(cm + 1);
1871 int *fds = (int *)(cm + 1);
1872 struct fileproc *fp;
1873 struct fileglob *fg;
1874 int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1875 int f;
1876
1877 proc_fdlock(p);
1878
1879 /*
1880 * if the new FD's will not fit, then we free them all
1881 */
1882 if (!fdavail(p, newfds)) {
1883 for (i = 0; i < newfds; i++) {
1884 fg = *rp;
1885 unp_discard_fdlocked(fg, p);
1886 *rp++ = NULL;
1887 }
1888 proc_fdunlock(p);
1889
1890 return (EMSGSIZE);
1891 }
1892 /*
1893 * now change each pointer to an fd in the global table to
1894 * an integer that is the index to the local fd table entry
1895 * that we set up to point to the global one we are transferring.
1896 * XXX (1) this assumes a pointer and int are the same size,
1897 * XXX or the mbuf can hold the expansion
1898 * XXX (2) allocation failures should be non-fatal
1899 */
1900 for (i = 0; i < newfds; i++) {
1901 #if CONFIG_MACF_SOCKET
1902 /*
1903 * If receive access is denied, don't pass along
1904 * and error message, just discard the descriptor.
1905 */
1906 if (mac_file_check_receive(kauth_cred_get(), *rp)) {
1907 fg = *rp;
1908 *rp++ = 0;
1909 unp_discard_fdlocked(fg, p);
1910 continue;
1911 }
1912 #endif
1913 if (fdalloc(p, 0, &f))
1914 panic("unp_externalize:fdalloc");
1915 fg = rp[i];
1916 fp = fileproc_alloc_init(NULL);
1917 if (fp == NULL)
1918 panic("unp_externalize: MALLOC_ZONE");
1919 fp->f_iocount = 0;
1920 fp->f_fglob = fg;
1921 fg_removeuipc(fg);
1922 procfdtbl_releasefd(p, f, fp);
1923 (void) OSAddAtomic(-1, &unp_rights);
1924 fds[i] = f;
1925 }
1926 proc_fdunlock(p);
1927
1928 return (0);
1929 }
1930
1931 void
1932 unp_init(void)
1933 {
1934 unp_zone = zinit(sizeof (struct unpcb),
1935 (nmbclusters * sizeof (struct unpcb)), 4096, "unpzone");
1936
1937 if (unp_zone == 0)
1938 panic("unp_init");
1939 LIST_INIT(&unp_dhead);
1940 LIST_INIT(&unp_shead);
1941
1942 /*
1943 * allocate lock group attribute and group for udp pcb mutexes
1944 */
1945 unp_mtx_grp_attr = lck_grp_attr_alloc_init();
1946
1947 unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
1948
1949 unp_mtx_attr = lck_attr_alloc_init();
1950
1951 if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp,
1952 unp_mtx_attr)) == NULL)
1953 return; /* pretty much dead if this fails... */
1954
1955 if ((unp_disconnect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1956 unp_mtx_attr)) == NULL)
1957 return;
1958
1959 if ((unp_connect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1960 unp_mtx_attr)) == NULL)
1961 return;
1962 }
1963
1964 #ifndef MIN
1965 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
1966 #endif
1967
1968 /*
1969 * Returns: 0 Success
1970 * EINVAL
1971 * fdgetf_noref:EBADF
1972 */
1973 static int
1974 unp_internalize(struct mbuf *control, proc_t p)
1975 {
1976 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1977 int *fds;
1978 struct fileglob **rp;
1979 struct fileproc *fp;
1980 int i, error;
1981 int oldfds;
1982
1983 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
1984 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1985 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
1986 return (EINVAL);
1987 }
1988 oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1989
1990 proc_fdlock(p);
1991 fds = (int *)(cm + 1);
1992
1993 for (i = 0; i < oldfds; i++) {
1994 struct fileproc *tmpfp;
1995 if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) {
1996 proc_fdunlock(p);
1997 return (error);
1998 } else if (!filetype_issendable(FILEGLOB_DTYPE(tmpfp->f_fglob))) {
1999 proc_fdunlock(p);
2000 return (EINVAL);
2001 } else if (FP_ISGUARDED(tmpfp, GUARD_SOCKET_IPC)) {
2002 error = fp_guard_exception(p,
2003 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2004 proc_fdunlock(p);
2005 return (error);
2006 }
2007 }
2008 rp = (struct fileglob **)(cm + 1);
2009
2010 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2011 * and doing them in-order would result in stomping over unprocessed fd's
2012 */
2013 for (i = (oldfds - 1); i >= 0; i--) {
2014 (void) fdgetf_noref(p, fds[i], &fp);
2015 fg_insertuipc(fp->f_fglob);
2016 rp[i] = fp->f_fglob;
2017 (void) OSAddAtomic(1, &unp_rights);
2018 }
2019 proc_fdunlock(p);
2020
2021 return (0);
2022 }
2023
2024 static int unp_defer, unp_gcing, unp_gcwait;
2025 static thread_t unp_gcthread = NULL;
2026
2027 /* always called under uipc_lock */
2028 void
2029 unp_gc_wait(void)
2030 {
2031 if (unp_gcthread == current_thread())
2032 return;
2033
2034 while (unp_gcing != 0) {
2035 unp_gcwait = 1;
2036 msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL);
2037 }
2038 }
2039
2040
2041 __private_extern__ void
2042 unp_gc(void)
2043 {
2044 struct fileglob *fg, *nextfg;
2045 struct socket *so;
2046 static struct fileglob **extra_ref;
2047 struct fileglob **fpp;
2048 int nunref, i;
2049 int need_gcwakeup = 0;
2050
2051 lck_mtx_lock(uipc_lock);
2052 if (unp_gcing) {
2053 lck_mtx_unlock(uipc_lock);
2054 return;
2055 }
2056 unp_gcing = 1;
2057 unp_defer = 0;
2058 unp_gcthread = current_thread();
2059 lck_mtx_unlock(uipc_lock);
2060 /*
2061 * before going through all this, set all FDs to
2062 * be NOT defered and NOT externally accessible
2063 */
2064 for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
2065 lck_mtx_lock(&fg->fg_lock);
2066 fg->fg_flag &= ~(FMARK|FDEFER);
2067 lck_mtx_unlock(&fg->fg_lock);
2068 }
2069 do {
2070 for (fg = fmsghead.lh_first; fg != 0;
2071 fg = fg->f_msglist.le_next) {
2072 lck_mtx_lock(&fg->fg_lock);
2073 /*
2074 * If the file is not open, skip it
2075 */
2076 if (fg->fg_count == 0) {
2077 lck_mtx_unlock(&fg->fg_lock);
2078 continue;
2079 }
2080 /*
2081 * If we already marked it as 'defer' in a
2082 * previous pass, then try process it this time
2083 * and un-mark it
2084 */
2085 if (fg->fg_flag & FDEFER) {
2086 fg->fg_flag &= ~FDEFER;
2087 unp_defer--;
2088 } else {
2089 /*
2090 * if it's not defered, then check if it's
2091 * already marked.. if so skip it
2092 */
2093 if (fg->fg_flag & FMARK) {
2094 lck_mtx_unlock(&fg->fg_lock);
2095 continue;
2096 }
2097 /*
2098 * If all references are from messages
2099 * in transit, then skip it. it's not
2100 * externally accessible.
2101 */
2102 if (fg->fg_count == fg->fg_msgcount) {
2103 lck_mtx_unlock(&fg->fg_lock);
2104 continue;
2105 }
2106 /*
2107 * If it got this far then it must be
2108 * externally accessible.
2109 */
2110 fg->fg_flag |= FMARK;
2111 }
2112 /*
2113 * either it was defered, or it is externally
2114 * accessible and not already marked so.
2115 * Now check if it is possibly one of OUR sockets.
2116 */
2117 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2118 (so = (struct socket *)fg->fg_data) == 0) {
2119 lck_mtx_unlock(&fg->fg_lock);
2120 continue;
2121 }
2122 if (so->so_proto->pr_domain != localdomain ||
2123 (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
2124 lck_mtx_unlock(&fg->fg_lock);
2125 continue;
2126 }
2127 #ifdef notdef
2128 if (so->so_rcv.sb_flags & SB_LOCK) {
2129 /*
2130 * This is problematical; it's not clear
2131 * we need to wait for the sockbuf to be
2132 * unlocked (on a uniprocessor, at least),
2133 * and it's also not clear what to do
2134 * if sbwait returns an error due to receipt
2135 * of a signal. If sbwait does return
2136 * an error, we'll go into an infinite
2137 * loop. Delete all of this for now.
2138 */
2139 (void) sbwait(&so->so_rcv);
2140 goto restart;
2141 }
2142 #endif
2143 /*
2144 * So, Ok, it's one of our sockets and it IS externally
2145 * accessible (or was defered). Now we look
2146 * to see if we hold any file descriptors in its
2147 * message buffers. Follow those links and mark them
2148 * as accessible too.
2149 *
2150 * In case a file is passed onto itself we need to
2151 * release the file lock.
2152 */
2153 lck_mtx_unlock(&fg->fg_lock);
2154
2155 unp_scan(so->so_rcv.sb_mb, unp_mark);
2156 }
2157 } while (unp_defer);
2158 /*
2159 * We grab an extra reference to each of the file table entries
2160 * that are not otherwise accessible and then free the rights
2161 * that are stored in messages on them.
2162 *
2163 * The bug in the orginal code is a little tricky, so I'll describe
2164 * what's wrong with it here.
2165 *
2166 * It is incorrect to simply unp_discard each entry for f_msgcount
2167 * times -- consider the case of sockets A and B that contain
2168 * references to each other. On a last close of some other socket,
2169 * we trigger a gc since the number of outstanding rights (unp_rights)
2170 * is non-zero. If during the sweep phase the gc code un_discards,
2171 * we end up doing a (full) closef on the descriptor. A closef on A
2172 * results in the following chain. Closef calls soo_close, which
2173 * calls soclose. Soclose calls first (through the switch
2174 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
2175 * returns because the previous instance had set unp_gcing, and
2176 * we return all the way back to soclose, which marks the socket
2177 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
2178 * to free up the rights that are queued in messages on the socket A,
2179 * i.e., the reference on B. The sorflush calls via the dom_dispose
2180 * switch unp_dispose, which unp_scans with unp_discard. This second
2181 * instance of unp_discard just calls closef on B.
2182 *
2183 * Well, a similar chain occurs on B, resulting in a sorflush on B,
2184 * which results in another closef on A. Unfortunately, A is already
2185 * being closed, and the descriptor has already been marked with
2186 * SS_NOFDREF, and soclose panics at this point.
2187 *
2188 * Here, we first take an extra reference to each inaccessible
2189 * descriptor. Then, we call sorflush ourself, since we know
2190 * it is a Unix domain socket anyhow. After we destroy all the
2191 * rights carried in messages, we do a last closef to get rid
2192 * of our extra reference. This is the last close, and the
2193 * unp_detach etc will shut down the socket.
2194 *
2195 * 91/09/19, bsy@cs.cmu.edu
2196 */
2197 extra_ref = _MALLOC(nfiles * sizeof (struct fileglob *),
2198 M_FILEGLOB, M_WAITOK);
2199 if (extra_ref == NULL)
2200 goto bail;
2201 for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0;
2202 fg = nextfg) {
2203 lck_mtx_lock(&fg->fg_lock);
2204
2205 nextfg = fg->f_msglist.le_next;
2206 /*
2207 * If it's not open, skip it
2208 */
2209 if (fg->fg_count == 0) {
2210 lck_mtx_unlock(&fg->fg_lock);
2211 continue;
2212 }
2213 /*
2214 * If all refs are from msgs, and it's not marked accessible
2215 * then it must be referenced from some unreachable cycle
2216 * of (shut-down) FDs, so include it in our
2217 * list of FDs to remove
2218 */
2219 if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) {
2220 fg->fg_count++;
2221 *fpp++ = fg;
2222 nunref++;
2223 }
2224 lck_mtx_unlock(&fg->fg_lock);
2225 }
2226 /*
2227 * for each FD on our hit list, do the following two things
2228 */
2229 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2230 struct fileglob *tfg;
2231
2232 tfg = *fpp;
2233
2234 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET &&
2235 tfg->fg_data != NULL) {
2236 so = (struct socket *)(tfg->fg_data);
2237
2238 socket_lock(so, 0);
2239
2240 sorflush(so);
2241
2242 socket_unlock(so, 0);
2243 }
2244 }
2245 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
2246 closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL);
2247
2248 FREE((caddr_t)extra_ref, M_FILEGLOB);
2249 bail:
2250 lck_mtx_lock(uipc_lock);
2251 unp_gcing = 0;
2252 unp_gcthread = NULL;
2253
2254 if (unp_gcwait != 0) {
2255 unp_gcwait = 0;
2256 need_gcwakeup = 1;
2257 }
2258 lck_mtx_unlock(uipc_lock);
2259
2260 if (need_gcwakeup != 0)
2261 wakeup(&unp_gcing);
2262 }
2263
2264 void
2265 unp_dispose(struct mbuf *m)
2266 {
2267 if (m) {
2268 unp_scan(m, unp_discard);
2269 }
2270 }
2271
2272 /*
2273 * Returns: 0 Success
2274 */
2275 static int
2276 unp_listen(struct unpcb *unp, proc_t p)
2277 {
2278 kauth_cred_t safecred = kauth_cred_proc_ref(p);
2279 cru2x(safecred, &unp->unp_peercred);
2280 kauth_cred_unref(&safecred);
2281 unp->unp_flags |= UNP_HAVEPCCACHED;
2282 return (0);
2283 }
2284
2285 static void
2286 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *))
2287 {
2288 struct mbuf *m;
2289 struct fileglob **rp;
2290 struct cmsghdr *cm;
2291 int i;
2292 int qfds;
2293
2294 while (m0) {
2295 for (m = m0; m; m = m->m_next)
2296 if (m->m_type == MT_CONTROL &&
2297 (size_t)m->m_len >= sizeof (*cm)) {
2298 cm = mtod(m, struct cmsghdr *);
2299 if (cm->cmsg_level != SOL_SOCKET ||
2300 cm->cmsg_type != SCM_RIGHTS)
2301 continue;
2302 qfds = (cm->cmsg_len - sizeof (*cm)) /
2303 sizeof (int);
2304 rp = (struct fileglob **)(cm + 1);
2305 for (i = 0; i < qfds; i++)
2306 (*op)(*rp++);
2307 break; /* XXX, but saves time */
2308 }
2309 m0 = m0->m_act;
2310 }
2311 }
2312
2313 static void
2314 unp_mark(struct fileglob *fg)
2315 {
2316 lck_mtx_lock(&fg->fg_lock);
2317
2318 if (fg->fg_flag & FMARK) {
2319 lck_mtx_unlock(&fg->fg_lock);
2320 return;
2321 }
2322 fg->fg_flag |= (FMARK|FDEFER);
2323
2324 lck_mtx_unlock(&fg->fg_lock);
2325
2326 unp_defer++;
2327 }
2328
2329 static void
2330 unp_discard(struct fileglob *fg)
2331 {
2332 proc_t p = current_proc(); /* XXX */
2333
2334 (void) OSAddAtomic(1, &unp_disposed);
2335
2336 proc_fdlock(p);
2337 unp_discard_fdlocked(fg, p);
2338 proc_fdunlock(p);
2339 }
2340 static void
2341 unp_discard_fdlocked(struct fileglob *fg, proc_t p)
2342 {
2343 fg_removeuipc(fg);
2344
2345 (void) OSAddAtomic(-1, &unp_rights);
2346 (void) closef_locked((struct fileproc *)0, fg, p);
2347 }
2348
2349 int
2350 unp_lock(struct socket *so, int refcount, void * lr)
2351 {
2352 void * lr_saved;
2353 if (lr == 0)
2354 lr_saved = (void *) __builtin_return_address(0);
2355 else lr_saved = lr;
2356
2357 if (so->so_pcb) {
2358 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2359 } else {
2360 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2361 so, lr_saved, so->so_usecount);
2362 }
2363
2364 if (so->so_usecount < 0)
2365 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2366 so, so->so_pcb, lr_saved, so->so_usecount);
2367
2368 if (refcount)
2369 so->so_usecount++;
2370
2371 so->lock_lr[so->next_lock_lr] = lr_saved;
2372 so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
2373 return (0);
2374 }
2375
2376 int
2377 unp_unlock(struct socket *so, int refcount, void * lr)
2378 {
2379 void * lr_saved;
2380 lck_mtx_t * mutex_held = NULL;
2381 struct unpcb *unp = sotounpcb(so);
2382
2383 if (lr == 0)
2384 lr_saved = (void *) __builtin_return_address(0);
2385 else lr_saved = lr;
2386
2387 if (refcount)
2388 so->so_usecount--;
2389
2390 if (so->so_usecount < 0)
2391 panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2392 if (so->so_pcb == NULL) {
2393 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2394 } else {
2395 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2396 }
2397 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2398 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2399 so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
2400
2401 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2402 sofreelastref(so, 1);
2403
2404 if (unp->unp_addr)
2405 FREE(unp->unp_addr, M_SONAME);
2406
2407 lck_mtx_unlock(mutex_held);
2408
2409 lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
2410 zfree(unp_zone, unp);
2411
2412 unp_gc();
2413 } else {
2414 lck_mtx_unlock(mutex_held);
2415 }
2416
2417 return (0);
2418 }
2419
2420 lck_mtx_t *
2421 unp_getlock(struct socket *so, __unused int locktype)
2422 {
2423 struct unpcb *unp = (struct unpcb *)so->so_pcb;
2424
2425
2426 if (so->so_pcb) {
2427 if (so->so_usecount < 0)
2428 panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2429 return(&unp->unp_mtx);
2430 } else {
2431 panic("unp_getlock: so=%p NULL so_pcb\n", so);
2432 return (so->so_proto->pr_domain->dom_mtx);
2433 }
2434 }
2435