]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_usrreq.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / bsd / kern / uipc_usrreq.c
CommitLineData
1c79356b 1/*
f427ee49 2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
d9a64523 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
d9a64523 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
d9a64523 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
d9a64523 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
2d21ac55
A
62/*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
1c79356b
A
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/domain.h>
73#include <sys/fcntl.h>
0a7de745 74#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
91447636 75#include <sys/file_internal.h>
39236c6e 76#include <sys/guarded.h>
1c79356b
A
77#include <sys/filedesc.h>
78#include <sys/lock.h>
79#include <sys/mbuf.h>
80#include <sys/namei.h>
91447636
A
81#include <sys/proc_internal.h>
82#include <sys/kauth.h>
1c79356b
A
83#include <sys/protosw.h>
84#include <sys/socket.h>
85#include <sys/socketvar.h>
86#include <sys/stat.h>
87#include <sys/sysctl.h>
88#include <sys/un.h>
89#include <sys/unpcb.h>
91447636
A
90#include <sys/vnode_internal.h>
91#include <sys/kdebug.h>
3e170ce0 92#include <sys/mcache.h>
1c79356b
A
93
94#include <kern/zalloc.h>
91447636 95#include <kern/locks.h>
f427ee49 96#include <kern/task.h>
1c79356b 97
b0d623f7 98#if CONFIG_MACF
2d21ac55 99#include <security/mac_framework.h>
b0d623f7 100#endif /* CONFIG_MACF */
2d21ac55 101
316670eb
A
102#include <mach/vm_param.h>
103
3e170ce0
A
104/*
105 * Maximum number of FDs that can be passed in an mbuf
106 */
0a7de745
A
107#define UIPC_MAX_CMSG_FD 512
108
f427ee49 109ZONE_DECLARE(unp_zone, "unpzone", sizeof(struct unpcb), ZC_NONE);
0a7de745
A
110static unp_gen_t unp_gencnt;
111static u_int unp_count;
112
f427ee49
A
113static lck_attr_t *unp_mtx_attr;
114static lck_grp_t *unp_mtx_grp;
115static lck_grp_attr_t *unp_mtx_grp_attr;
116static lck_rw_t unp_list_mtx;
0a7de745 117
f427ee49
A
118static lck_mtx_t unp_disconnect_lock;
119static lck_mtx_t unp_connect_lock;
120static lck_mtx_t uipc_lock;
b0d623f7
A
121static u_int disconnect_in_progress;
122
f427ee49
A
123static struct unp_head unp_shead, unp_dhead;
124static int unp_defer, unp_gcing, unp_gcwait;
125static thread_t unp_gcthread = NULL;
126static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
127
1c79356b 128
6d2010ae
A
129/*
130 * mDNSResponder tracing. When enabled, endpoints connected to
131 * /var/run/mDNSResponder will be traced; during each send on
132 * the traced socket, we log the PID and process name of the
133 * sending process. We also print out a bit of info related
134 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
135 * of mDNSResponder stays the same.
136 */
0a7de745 137#define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
6d2010ae 138
0a7de745 139static int unpst_tracemdns; /* enable tracing */
6d2010ae 140
0a7de745 141#define MDNS_IPC_MSG_HDR_VERSION_1 1
6d2010ae
A
142
143struct mdns_ipc_msg_hdr {
144 uint32_t version;
145 uint32_t datalen;
146 uint32_t ipc_flags;
147 uint32_t op;
148 union {
149 void *context;
150 uint32_t u32[2];
151 } __attribute__((packed));
152 uint32_t reg_index;
153} __attribute__((packed));
154
1c79356b
A
155/*
156 * Unix communications domain.
157 *
158 * TODO:
159 * SEQPACKET, RDM
160 * rethink name space problems
161 * need a proper out-of-band
162 * lock pushdown
163 */
cb323159 164static struct sockaddr sun_noname = { .sa_len = sizeof(sun_noname), .sa_family = AF_LOCAL, .sa_data = { 0 } };
0a7de745
A
165static ino_t unp_ino; /* prototype for fake inode numbers */
166
167static int unp_attach(struct socket *);
168static void unp_detach(struct unpcb *);
169static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
170static int unp_connect(struct socket *, struct sockaddr *, proc_t);
171static void unp_disconnect(struct unpcb *);
172static void unp_shutdown(struct unpcb *);
173static void unp_drop(struct unpcb *, int);
174__private_extern__ void unp_gc(void);
175static void unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
176static void unp_mark(struct fileglob *, __unused void *);
177static void unp_discard(struct fileglob *, void *);
178static int unp_internalize(struct mbuf *, proc_t);
179static int unp_listen(struct unpcb *, proc_t);
180static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
b0d623f7
A
181static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
182
d9a64523
A
183static void
184unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
b0d623f7
A
185{
186 if (so < conn_so) {
187 socket_lock(conn_so, 1);
188 } else {
189 struct unpcb *unp = sotounpcb(so);
190 unp->unp_flags |= UNP_DONTDISCONNECT;
191 unp->rw_thrcount++;
192 socket_unlock(so, 0);
2d21ac55 193
b0d623f7
A
194 /* Get the locks in the correct order */
195 socket_lock(conn_so, 1);
196 socket_lock(so, 0);
197 unp->rw_thrcount--;
198 if (unp->rw_thrcount == 0) {
199 unp->unp_flags &= ~UNP_DONTDISCONNECT;
200 wakeup(unp);
201 }
202 }
203}
1c79356b
A
204
205static int
206uipc_abort(struct socket *so)
207{
208 struct unpcb *unp = sotounpcb(so);
209
0a7de745
A
210 if (unp == 0) {
211 return EINVAL;
212 }
1c79356b 213 unp_drop(unp, ECONNABORTED);
91447636
A
214 unp_detach(unp);
215 sofree(so);
0a7de745 216 return 0;
1c79356b
A
217}
218
219static int
220uipc_accept(struct socket *so, struct sockaddr **nam)
221{
222 struct unpcb *unp = sotounpcb(so);
223
0a7de745
A
224 if (unp == 0) {
225 return EINVAL;
226 }
1c79356b
A
227
228 /*
229 * Pass back name of connected socket,
230 * if it was bound and we are still connected
231 * (our peer may have closed already!).
232 */
233 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2d21ac55
A
234 *nam = dup_sockaddr((struct sockaddr *)
235 unp->unp_conn->unp_addr, 1);
1c79356b
A
236 } else {
237 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
238 }
0a7de745 239 return 0;
1c79356b
A
240}
241
2d21ac55
A
242/*
243 * Returns: 0 Success
244 * EISCONN
245 * unp_attach:
246 */
1c79356b 247static int
2d21ac55 248uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
1c79356b
A
249{
250 struct unpcb *unp = sotounpcb(so);
251
0a7de745
A
252 if (unp != 0) {
253 return EISCONN;
254 }
255 return unp_attach(so);
1c79356b
A
256}
257
258static int
2d21ac55 259uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
1c79356b
A
260{
261 struct unpcb *unp = sotounpcb(so);
262
0a7de745
A
263 if (unp == 0) {
264 return EINVAL;
265 }
1c79356b 266
0a7de745 267 return unp_bind(unp, nam, p);
1c79356b
A
268}
269
2d21ac55
A
270/*
271 * Returns: 0 Success
272 * EINVAL
273 * unp_connect:??? [See elsewhere in this file]
274 */
1c79356b 275static int
2d21ac55 276uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
1c79356b
A
277{
278 struct unpcb *unp = sotounpcb(so);
279
0a7de745
A
280 if (unp == 0) {
281 return EINVAL;
282 }
283 return unp_connect(so, nam, p);
1c79356b
A
284}
285
2d21ac55
A
286/*
287 * Returns: 0 Success
288 * EINVAL
289 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
290 * unp_connect2:EINVAL Invalid argument
291 */
1c79356b
A
292static int
293uipc_connect2(struct socket *so1, struct socket *so2)
294{
295 struct unpcb *unp = sotounpcb(so1);
296
0a7de745
A
297 if (unp == 0) {
298 return EINVAL;
299 }
1c79356b 300
0a7de745 301 return unp_connect2(so1, so2);
1c79356b
A
302}
303
304/* control is EOPNOTSUPP */
305
306static int
307uipc_detach(struct socket *so)
308{
309 struct unpcb *unp = sotounpcb(so);
310
0a7de745
A
311 if (unp == 0) {
312 return EINVAL;
313 }
1c79356b 314
5ba3f43e 315 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1c79356b 316 unp_detach(unp);
0a7de745 317 return 0;
1c79356b
A
318}
319
320static int
321uipc_disconnect(struct socket *so)
322{
323 struct unpcb *unp = sotounpcb(so);
324
0a7de745
A
325 if (unp == 0) {
326 return EINVAL;
327 }
1c79356b 328 unp_disconnect(unp);
0a7de745 329 return 0;
1c79356b
A
330}
331
2d21ac55
A
332/*
333 * Returns: 0 Success
334 * EINVAL
335 */
1c79356b 336static int
2d21ac55 337uipc_listen(struct socket *so, __unused proc_t p)
1c79356b
A
338{
339 struct unpcb *unp = sotounpcb(so);
340
0a7de745
A
341 if (unp == 0 || unp->unp_vnode == 0) {
342 return EINVAL;
343 }
344 return unp_listen(unp, p);
1c79356b
A
345}
346
347static int
348uipc_peeraddr(struct socket *so, struct sockaddr **nam)
349{
350 struct unpcb *unp = sotounpcb(so);
351
0a7de745
A
352 if (unp == NULL) {
353 return EINVAL;
354 }
2d21ac55
A
355 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
356 *nam = dup_sockaddr((struct sockaddr *)
357 unp->unp_conn->unp_addr, 1);
358 } else {
359 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
360 }
0a7de745 361 return 0;
1c79356b
A
362}
363
364static int
91447636 365uipc_rcvd(struct socket *so, __unused int flags)
1c79356b
A
366{
367 struct unpcb *unp = sotounpcb(so);
368 struct socket *so2;
369
0a7de745
A
370 if (unp == 0) {
371 return EINVAL;
372 }
1c79356b
A
373 switch (so->so_type) {
374 case SOCK_DGRAM:
375 panic("uipc_rcvd DGRAM?");
0a7de745 376 /*NOTREACHED*/
1c79356b
A
377
378 case SOCK_STREAM:
0a7de745
A
379#define rcv (&so->so_rcv)
380#define snd (&so2->so_snd)
381 if (unp->unp_conn == 0) {
1c79356b 382 break;
0a7de745 383 }
d9a64523 384
1c79356b 385 so2 = unp->unp_conn->unp_socket;
b0d623f7 386 unp_get_locks_in_order(so, so2);
1c79356b
A
387 /*
388 * Adjust backpressure on sender
389 * and wakeup any waiting to write.
390 */
391 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
392 unp->unp_mbcnt = rcv->sb_mbcnt;
393 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
394 unp->unp_cc = rcv->sb_cc;
cb323159
A
395 if (sb_notify(&so2->so_snd)) {
396 sowakeup(so2, &so2->so_snd, so);
397 }
b0d623f7
A
398
399 socket_unlock(so2, 1);
400
1c79356b
A
401#undef snd
402#undef rcv
403 break;
404
405 default:
406 panic("uipc_rcvd unknown socktype");
407 }
0a7de745 408 return 0;
1c79356b
A
409}
410
411/* pru_rcvoob is EOPNOTSUPP */
412
2d21ac55
A
413/*
414 * Returns: 0 Success
415 * EINVAL
416 * EOPNOTSUPP
417 * EPIPE
418 * ENOTCONN
419 * EISCONN
420 * unp_internalize:EINVAL
421 * unp_internalize:EBADF
422 * unp_connect:EAFNOSUPPORT Address family not supported
423 * unp_connect:EINVAL Invalid argument
424 * unp_connect:ENOTSOCK Not a socket
425 * unp_connect:ECONNREFUSED Connection refused
426 * unp_connect:EISCONN Socket is connected
427 * unp_connect:EPROTOTYPE Protocol wrong type for socket
428 * unp_connect:???
429 * sbappendaddr:ENOBUFS [5th argument, contents modified]
430 * sbappendaddr:??? [whatever a filter author chooses]
431 */
1c79356b
A
432static int
433uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
2d21ac55 434 struct mbuf *control, proc_t p)
1c79356b
A
435{
436 int error = 0;
437 struct unpcb *unp = sotounpcb(so);
438 struct socket *so2;
439
440 if (unp == 0) {
441 error = EINVAL;
442 goto release;
443 }
444 if (flags & PRUS_OOB) {
445 error = EOPNOTSUPP;
446 goto release;
447 }
448
13fec989 449 if (control) {
b0d623f7 450 /* release lock to avoid deadlock (4436174) */
2d21ac55 451 socket_unlock(so, 0);
13fec989
A
452 error = unp_internalize(control, p);
453 socket_lock(so, 0);
0a7de745 454 if (error) {
13fec989 455 goto release;
0a7de745 456 }
13fec989 457 }
1c79356b
A
458
459 switch (so->so_type) {
2d21ac55 460 case SOCK_DGRAM:
1c79356b
A
461 {
462 struct sockaddr *from;
463
464 if (nam) {
465 if (unp->unp_conn) {
466 error = EISCONN;
467 break;
468 }
469 error = unp_connect(so, nam, p);
0a7de745 470 if (error) {
eb6b6ca3 471 so->so_state &= ~SS_ISCONNECTING;
1c79356b 472 break;
0a7de745 473 }
1c79356b
A
474 } else {
475 if (unp->unp_conn == 0) {
476 error = ENOTCONN;
477 break;
478 }
479 }
b0d623f7 480
1c79356b 481 so2 = unp->unp_conn->unp_socket;
0a7de745 482 if (so != so2) {
6d2010ae 483 unp_get_locks_in_order(so, so2);
0a7de745 484 }
b0d623f7 485
0a7de745 486 if (unp->unp_addr) {
1c79356b 487 from = (struct sockaddr *)unp->unp_addr;
0a7de745 488 } else {
1c79356b 489 from = &sun_noname;
0a7de745 490 }
2d21ac55
A
491 /*
492 * sbappendaddr() will fail when the receiver runs out of
493 * space; in contrast to SOCK_STREAM, we will lose messages
494 * for the SOCK_DGRAM case when the receiver's queue overflows.
495 * SB_UNIX on the socket buffer implies that the callee will
496 * not free the control message, if any, because we would need
497 * to call unp_dispose() on it.
498 */
91447636 499 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
2d21ac55 500 control = NULL;
cb323159
A
501 if (sb_notify(&so2->so_rcv)) {
502 sowakeup(so2, &so2->so_rcv, so);
503 }
2d21ac55
A
504 } else if (control != NULL && error == 0) {
505 /* A socket filter took control; don't touch it */
506 control = NULL;
91447636 507 }
b0d623f7 508
0a7de745 509 if (so != so2) {
6d2010ae 510 socket_unlock(so2, 1);
0a7de745 511 }
b0d623f7 512
2d21ac55 513 m = NULL;
0a7de745 514 if (nam) {
1c79356b 515 unp_disconnect(unp);
0a7de745 516 }
1c79356b
A
517 break;
518 }
519
91447636
A
520 case SOCK_STREAM: {
521 int didreceive = 0;
0a7de745
A
522#define rcv (&so2->so_rcv)
523#define snd (&so->so_snd)
1c79356b
A
524 /* Connect if not connected yet. */
525 /*
526 * Note: A better implementation would complain
527 * if not equal to the peer's address.
528 */
529 if ((so->so_state & SS_ISCONNECTED) == 0) {
530 if (nam) {
531 error = unp_connect(so, nam, p);
0a7de745 532 if (error) {
eb6b6ca3 533 so->so_state &= ~SS_ISCONNECTING;
0a7de745
A
534 break; /* XXX */
535 }
1c79356b
A
536 } else {
537 error = ENOTCONN;
538 break;
539 }
540 }
541
542 if (so->so_state & SS_CANTSENDMORE) {
543 error = EPIPE;
544 break;
545 }
0a7de745 546 if (unp->unp_conn == 0) {
1c79356b 547 panic("uipc_send connected but no connection?");
0a7de745 548 }
b0d623f7 549
1c79356b 550 so2 = unp->unp_conn->unp_socket;
b0d623f7
A
551 unp_get_locks_in_order(so, so2);
552
d9a64523 553 /* Check socket state again as we might have unlocked the socket
b0d623f7
A
554 * while trying to get the locks in order
555 */
556
557 if ((so->so_state & SS_CANTSENDMORE)) {
558 error = EPIPE;
559 socket_unlock(so2, 1);
560 break;
d9a64523 561 }
b0d623f7 562
6d2010ae
A
563 if (unp->unp_flags & UNP_TRACE_MDNS) {
564 struct mdns_ipc_msg_hdr hdr;
565
0a7de745
A
566 if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
567 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
6d2010ae
A
568 printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
569 __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
570 }
571 }
572
1c79356b 573 /*
2d21ac55
A
574 * Send to paired receive port, and then reduce send buffer
575 * hiwater marks to maintain backpressure. Wake up readers.
576 * SB_UNIX flag will allow new record to be appended to the
577 * receiver's queue even when it is already full. It is
578 * possible, however, that append might fail. In that case,
579 * we will need to call unp_dispose() on the control message;
580 * the callee will not free it since SB_UNIX is set.
1c79356b 581 */
2d21ac55
A
582 didreceive = control ?
583 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
584
585 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
1c79356b 586 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
d9a64523 587 if ((int32_t)snd->sb_hiwat >=
fe8ab488
A
588 (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
589 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
590 } else {
591 snd->sb_hiwat = 0;
592 }
1c79356b 593 unp->unp_conn->unp_cc = rcv->sb_cc;
2d21ac55
A
594 if (didreceive) {
595 control = NULL;
cb323159
A
596 if (sb_notify(&so2->so_rcv)) {
597 sowakeup(so2, &so2->so_rcv, so);
598 }
2d21ac55
A
599 } else if (control != NULL && error == 0) {
600 /* A socket filter took control; don't touch it */
601 control = NULL;
602 }
b0d623f7
A
603
604 socket_unlock(so2, 1);
2d21ac55 605 m = NULL;
1c79356b
A
606#undef snd
607#undef rcv
0a7de745
A
608 }
609 break;
1c79356b
A
610
611 default:
612 panic("uipc_send unknown socktype");
613 }
614
615 /*
616 * SEND_EOF is equivalent to a SEND followed by
617 * a SHUTDOWN.
618 */
619 if (flags & PRUS_EOF) {
620 socantsendmore(so);
621 unp_shutdown(unp);
622 }
623
2d21ac55
A
624 if (control && error != 0) {
625 socket_unlock(so, 0);
91447636 626 unp_dispose(control);
2d21ac55
A
627 socket_lock(so, 0);
628 }
91447636 629
1c79356b 630release:
0a7de745 631 if (control) {
1c79356b 632 m_freem(control);
0a7de745
A
633 }
634 if (m) {
1c79356b 635 m_freem(m);
0a7de745
A
636 }
637 return error;
1c79356b
A
638}
639
640static int
2d21ac55 641uipc_sense(struct socket *so, void *ub, int isstat64)
1c79356b
A
642{
643 struct unpcb *unp = sotounpcb(so);
644 struct socket *so2;
2d21ac55 645 blksize_t blksize;
1c79356b 646
0a7de745
A
647 if (unp == 0) {
648 return EINVAL;
649 }
2d21ac55
A
650
651 blksize = so->so_snd.sb_hiwat;
1c79356b
A
652 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
653 so2 = unp->unp_conn->unp_socket;
2d21ac55 654 blksize += so2->so_rcv.sb_cc;
1c79356b 655 }
0a7de745 656 if (unp->unp_ino == 0) {
1c79356b 657 unp->unp_ino = unp_ino++;
0a7de745 658 }
2d21ac55
A
659
660 if (isstat64 != 0) {
661 struct stat64 *sb64;
662
663 sb64 = (struct stat64 *)ub;
664 sb64->st_blksize = blksize;
665 sb64->st_dev = NODEV;
666 sb64->st_ino = (ino64_t)unp->unp_ino;
667 } else {
668 struct stat *sb;
669
670 sb = (struct stat *)ub;
671 sb->st_blksize = blksize;
672 sb->st_dev = NODEV;
b0d623f7 673 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
2d21ac55
A
674 }
675
0a7de745 676 return 0;
1c79356b
A
677}
678
2d21ac55
A
679/*
680 * Returns: 0 Success
681 * EINVAL
682 *
683 * Notes: This is not strictly correct, as unp_shutdown() also calls
684 * socantrcvmore(). These should maybe both be conditionalized
685 * on the 'how' argument in soshutdown() as called from the
686 * shutdown() system call.
687 */
1c79356b
A
688static int
689uipc_shutdown(struct socket *so)
690{
691 struct unpcb *unp = sotounpcb(so);
692
0a7de745
A
693 if (unp == 0) {
694 return EINVAL;
695 }
1c79356b
A
696 socantsendmore(so);
697 unp_shutdown(unp);
0a7de745 698 return 0;
1c79356b
A
699}
700
2d21ac55
A
701/*
702 * Returns: 0 Success
703 * EINVAL Invalid argument
704 */
1c79356b
A
705static int
706uipc_sockaddr(struct socket *so, struct sockaddr **nam)
707{
708 struct unpcb *unp = sotounpcb(so);
709
0a7de745
A
710 if (unp == NULL) {
711 return EINVAL;
712 }
2d21ac55 713 if (unp->unp_addr != NULL) {
1c79356b 714 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
2d21ac55
A
715 } else {
716 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
717 }
0a7de745 718 return 0;
1c79356b
A
719}
720
721struct pr_usrreqs uipc_usrreqs = {
0a7de745
A
722 .pru_abort = uipc_abort,
723 .pru_accept = uipc_accept,
724 .pru_attach = uipc_attach,
725 .pru_bind = uipc_bind,
726 .pru_connect = uipc_connect,
727 .pru_connect2 = uipc_connect2,
728 .pru_detach = uipc_detach,
729 .pru_disconnect = uipc_disconnect,
730 .pru_listen = uipc_listen,
731 .pru_peeraddr = uipc_peeraddr,
732 .pru_rcvd = uipc_rcvd,
733 .pru_send = uipc_send,
734 .pru_sense = uipc_sense,
735 .pru_shutdown = uipc_shutdown,
736 .pru_sockaddr = uipc_sockaddr,
737 .pru_sosend = sosend,
738 .pru_soreceive = soreceive,
1c79356b 739};
91447636
A
740
741int
2d21ac55 742uipc_ctloutput(struct socket *so, struct sockopt *sopt)
91447636
A
743{
744 struct unpcb *unp = sotounpcb(so);
39236c6e
A
745 int error = 0;
746 pid_t peerpid;
f427ee49
A
747 proc_t p;
748 task_t t;
39236c6e 749 struct socket *peerso;
91447636
A
750
751 switch (sopt->sopt_dir) {
752 case SOPT_GET:
753 switch (sopt->sopt_name) {
754 case LOCAL_PEERCRED:
2d21ac55 755 if (unp->unp_flags & UNP_HAVEPC) {
91447636 756 error = sooptcopyout(sopt, &unp->unp_peercred,
0a7de745 757 sizeof(unp->unp_peercred));
2d21ac55 758 } else {
0a7de745 759 if (so->so_type == SOCK_STREAM) {
91447636 760 error = ENOTCONN;
0a7de745 761 } else {
91447636 762 error = EINVAL;
0a7de745 763 }
91447636
A
764 }
765 break;
316670eb 766 case LOCAL_PEERPID:
39236c6e
A
767 case LOCAL_PEEREPID:
768 if (unp->unp_conn == NULL) {
316670eb 769 error = ENOTCONN;
39236c6e 770 break;
316670eb 771 }
39236c6e 772 peerso = unp->unp_conn->unp_socket;
0a7de745 773 if (peerso == NULL) {
39236c6e 774 panic("peer is connected but has no socket?");
0a7de745 775 }
39236c6e
A
776 unp_get_locks_in_order(so, peerso);
777 if (sopt->sopt_name == LOCAL_PEEREPID &&
0a7de745 778 peerso->so_flags & SOF_DELEGATED) {
39236c6e 779 peerpid = peerso->e_pid;
0a7de745 780 } else {
39236c6e 781 peerpid = peerso->last_pid;
0a7de745 782 }
39236c6e 783 socket_unlock(peerso, 1);
0a7de745 784 error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
39236c6e
A
785 break;
786 case LOCAL_PEERUUID:
787 case LOCAL_PEEREUUID:
788 if (unp->unp_conn == NULL) {
789 error = ENOTCONN;
790 break;
791 }
792 peerso = unp->unp_conn->unp_socket;
0a7de745 793 if (peerso == NULL) {
39236c6e 794 panic("peer is connected but has no socket?");
0a7de745 795 }
39236c6e
A
796 unp_get_locks_in_order(so, peerso);
797 if (sopt->sopt_name == LOCAL_PEEREUUID &&
0a7de745 798 peerso->so_flags & SOF_DELEGATED) {
39236c6e 799 error = sooptcopyout(sopt, &peerso->e_uuid,
0a7de745
A
800 sizeof(peerso->e_uuid));
801 } else {
39236c6e 802 error = sooptcopyout(sopt, &peerso->last_uuid,
0a7de745
A
803 sizeof(peerso->last_uuid));
804 }
39236c6e 805 socket_unlock(peerso, 1);
316670eb 806 break;
f427ee49
A
807 case LOCAL_PEERTOKEN:
808 if (unp->unp_conn == NULL) {
809 error = ENOTCONN;
810 break;
811 }
812 peerso = unp->unp_conn->unp_socket;
813 if (peerso == NULL) {
814 panic("peer is connected but has no socket?");
815 }
816 unp_get_locks_in_order(so, peerso);
817 peerpid = peerso->last_pid;
818 p = proc_find(peerpid);
819 if (p != PROC_NULL) {
820 t = proc_task(p);
821 if (t != TASK_NULL) {
822 audit_token_t peertoken;
823 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
824 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {
825 error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));
826 } else {
827 error = EINVAL;
828 }
829 } else {
830 error = EINVAL;
831 }
832 proc_rele(p);
833 } else {
834 error = EINVAL;
835 }
836 socket_unlock(peerso, 1);
837 break;
91447636
A
838 default:
839 error = EOPNOTSUPP;
840 break;
841 }
842 break;
843 case SOPT_SET:
844 default:
845 error = EOPNOTSUPP;
846 break;
847 }
39236c6e 848
0a7de745 849 return error;
91447636 850}
2d21ac55 851
1c79356b
A
852/*
853 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
854 * for stream sockets, although the total for sender and receiver is
855 * actually only PIPSIZ.
856 * Datagram sockets really use the sendspace as the maximum datagram size,
857 * and don't really want to reserve the sendspace. Their recvspace should
858 * be large enough for at least one max-size datagram plus address.
859 */
860#ifndef PIPSIZ
0a7de745 861#define PIPSIZ 8192
1c79356b 862#endif
0a7de745
A
863static u_int32_t unpst_sendspace = PIPSIZ;
864static u_int32_t unpst_recvspace = PIPSIZ;
865static u_int32_t unpdg_sendspace = 2 * 1024; /* really max datagram size */
866static u_int32_t unpdg_recvspace = 4 * 1024;
1c79356b 867
0a7de745
A
868static int unp_rights; /* file descriptors in flight */
869static int unp_disposed; /* discarded file descriptors */
1c79356b
A
870
871SYSCTL_DECL(_net_local_stream);
6d2010ae 872SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 873 &unpst_sendspace, 0, "");
6d2010ae 874SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 875 &unpst_recvspace, 0, "");
6d2010ae 876SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 877 &unpst_tracemdns, 0, "");
1c79356b 878SYSCTL_DECL(_net_local_dgram);
6d2010ae 879SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 880 &unpdg_sendspace, 0, "");
6d2010ae 881SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 882 &unpdg_recvspace, 0, "");
1c79356b 883SYSCTL_DECL(_net_local);
6d2010ae 884SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
1c79356b 885
2d21ac55
A
886/*
887 * Returns: 0 Success
888 * ENOBUFS
889 * soreserve:ENOBUFS
890 */
1c79356b 891static int
91447636 892unp_attach(struct socket *so)
1c79356b 893{
91447636
A
894 struct unpcb *unp;
895 int error = 0;
1c79356b
A
896
897 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
898 switch (so->so_type) {
1c79356b
A
899 case SOCK_STREAM:
900 error = soreserve(so, unpst_sendspace, unpst_recvspace);
901 break;
902
903 case SOCK_DGRAM:
904 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
905 break;
906
907 default:
908 panic("unp_attach");
909 }
0a7de745
A
910 if (error) {
911 return error;
912 }
1c79356b 913 }
2d21ac55 914 unp = (struct unpcb *)zalloc(unp_zone);
0a7de745
A
915 if (unp == NULL) {
916 return ENOBUFS;
917 }
918 bzero(unp, sizeof(*unp));
b0d623f7 919
d9a64523 920 lck_mtx_init(&unp->unp_mtx,
0a7de745 921 unp_mtx_grp, unp_mtx_attr);
b0d623f7 922
f427ee49 923 lck_rw_lock_exclusive(&unp_list_mtx);
1c79356b
A
924 LIST_INIT(&unp->unp_refs);
925 unp->unp_socket = so;
91447636
A
926 unp->unp_gencnt = ++unp_gencnt;
927 unp_count++;
2d21ac55
A
928 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
929 &unp_dhead : &unp_shead, unp, unp_link);
f427ee49 930 lck_rw_done(&unp_list_mtx);
1c79356b 931 so->so_pcb = (caddr_t)unp;
2d21ac55
A
932 /*
933 * Mark AF_UNIX socket buffers accordingly so that:
934 *
935 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
936 * the lack of space; this essentially loosens the sbspace() check,
937 * since there is disconnect between sosend() and uipc_send() with
938 * respect to flow control that might result in our dropping the
939 * data in uipc_send(). By setting this, we allow for slightly
940 * more records to be appended to the receiving socket to avoid
941 * losing data (which we can't afford in the SOCK_STREAM case).
942 * Flow control still takes place since we adjust the sender's
943 * hiwat during each send. This doesn't affect the SOCK_DGRAM
944 * case and append would still fail when the queue overflows.
945 *
946 * b. In the presence of control messages containing internalized
947 * file descriptors, the append routines will not free them since
948 * we'd need to undo the work first via unp_dispose().
949 */
950 so->so_rcv.sb_flags |= SB_UNIX;
951 so->so_snd.sb_flags |= SB_UNIX;
0a7de745 952 return 0;
1c79356b
A
953}
954
955static void
91447636 956unp_detach(struct unpcb *unp)
1c79356b 957{
b7266188
A
958 int so_locked = 1;
959
f427ee49 960 lck_rw_lock_exclusive(&unp_list_mtx);
1c79356b 961 LIST_REMOVE(unp, unp_link);
d9a64523 962 --unp_count;
316670eb 963 ++unp_gencnt;
f427ee49 964 lck_rw_done(&unp_list_mtx);
1c79356b 965 if (unp->unp_vnode) {
b0d623f7
A
966 struct vnode *tvp = NULL;
967 socket_unlock(unp->unp_socket, 0);
968
969 /* Holding unp_connect_lock will avoid a race between
970 * a thread closing the listening socket and a thread
971 * connecting to it.
972 */
f427ee49 973 lck_mtx_lock(&unp_connect_lock);
b0d623f7
A
974 socket_lock(unp->unp_socket, 0);
975 if (unp->unp_vnode) {
976 tvp = unp->unp_vnode;
977 unp->unp_vnode->v_socket = NULL;
978 unp->unp_vnode = NULL;
979 }
f427ee49 980 lck_mtx_unlock(&unp_connect_lock);
0a7de745
A
981 if (tvp != NULL) {
982 vnode_rele(tvp); /* drop the usecount */
983 }
1c79356b 984 }
0a7de745 985 if (unp->unp_conn) {
1c79356b 986 unp_disconnect(unp);
0a7de745 987 }
b0d623f7 988 while (unp->unp_refs.lh_first) {
b7266188
A
989 struct unpcb *unp2 = NULL;
990
991 /* This datagram socket is connected to one or more
992 * sockets. In order to avoid a race condition between removing
d9a64523 993 * this reference and closing the connected socket, we need
b7266188
A
994 * to check disconnect_in_progress
995 */
996 if (so_locked == 1) {
997 socket_unlock(unp->unp_socket, 0);
998 so_locked = 0;
999 }
f427ee49 1000 lck_mtx_lock(&unp_disconnect_lock);
b7266188 1001 while (disconnect_in_progress != 0) {
f427ee49 1002 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
0a7de745 1003 PSOCK, "disconnect", NULL);
b7266188
A
1004 }
1005 disconnect_in_progress = 1;
f427ee49 1006 lck_mtx_unlock(&unp_disconnect_lock);
b7266188
A
1007
1008 /* Now we are sure that any unpcb socket disconnect is not happening */
1009 if (unp->unp_refs.lh_first != NULL) {
0a7de745
A
1010 unp2 = unp->unp_refs.lh_first;
1011 socket_lock(unp2->unp_socket, 1);
b7266188 1012 }
d9a64523 1013
f427ee49 1014 lck_mtx_lock(&unp_disconnect_lock);
b7266188
A
1015 disconnect_in_progress = 0;
1016 wakeup(&disconnect_in_progress);
f427ee49 1017 lck_mtx_unlock(&unp_disconnect_lock);
d9a64523 1018
b7266188
A
1019 if (unp2 != NULL) {
1020 /* We already locked this socket and have a reference on it */
0a7de745
A
1021 unp_drop(unp2, ECONNRESET);
1022 socket_unlock(unp2->unp_socket, 1);
b7266188
A
1023 }
1024 }
1025
1026 if (so_locked == 0) {
b0d623f7 1027 socket_lock(unp->unp_socket, 0);
b7266188 1028 so_locked = 1;
b0d623f7 1029 }
1c79356b 1030 soisdisconnected(unp->unp_socket);
2d21ac55
A
1031 /* makes sure we're getting dealloced */
1032 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1c79356b
A
1033}
1034
2d21ac55
A
1035/*
1036 * Returns: 0 Success
1037 * EAFNOSUPPORT
1038 * EINVAL
1039 * EADDRINUSE
1040 * namei:??? [anything namei can return]
1041 * vnode_authorize:??? [anything vnode_authorize can return]
1042 *
1043 * Notes: p at this point is the current process, as this function is
1044 * only called by sobind().
1045 */
1c79356b 1046static int
91447636
A
1047unp_bind(
1048 struct unpcb *unp,
1049 struct sockaddr *nam,
2d21ac55 1050 proc_t p)
1c79356b
A
1051{
1052 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
91447636
A
1053 struct vnode *vp, *dvp;
1054 struct vnode_attr va;
2d21ac55 1055 vfs_context_t ctx = vfs_context_current();
1c79356b
A
1056 int error, namelen;
1057 struct nameidata nd;
b0d623f7 1058 struct socket *so = unp->unp_socket;
1c79356b
A
1059 char buf[SOCK_MAXADDRLEN];
1060
2d21ac55 1061 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
0a7de745 1062 return EAFNOSUPPORT;
2d21ac55 1063 }
91447636 1064
5ba3f43e
A
1065 /*
1066 * Check if the socket is already bound to an address
1067 */
0a7de745
A
1068 if (unp->unp_vnode != NULL) {
1069 return EINVAL;
1070 }
5ba3f43e
A
1071 /*
1072 * Check if the socket may have been shut down
1073 */
1074 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
0a7de745
A
1075 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1076 return EINVAL;
1077 }
5ba3f43e 1078
1c79356b 1079 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
0a7de745
A
1080 if (namelen <= 0) {
1081 return EINVAL;
1082 }
490019cf
A
1083 /*
1084 * Note: sun_path is not a zero terminated "C" string
1085 */
0a7de745
A
1086 if (namelen >= SOCK_MAXADDRLEN) {
1087 return EINVAL;
1088 }
490019cf
A
1089 bcopy(soun->sun_path, buf, namelen);
1090 buf[namelen] = 0;
d9a64523 1091
b0d623f7
A
1092 socket_unlock(so, 0);
1093
6d2010ae 1094 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
2d21ac55
A
1095 CAST_USER_ADDR_T(buf), ctx);
1096 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1c79356b
A
1097 error = namei(&nd);
1098 if (error) {
b0d623f7 1099 socket_lock(so, 0);
0a7de745 1100 return error;
1c79356b 1101 }
91447636 1102 dvp = nd.ni_dvp;
1c79356b 1103 vp = nd.ni_vp;
91447636 1104
1c79356b 1105 if (vp != NULL) {
2d21ac55 1106 /*
91447636
A
1107 * need to do this before the vnode_put of dvp
1108 * since we may have to release an fs_nodelock
1109 */
1110 nameidone(&nd);
1111
1112 vnode_put(dvp);
1113 vnode_put(vp);
1114
b0d623f7 1115 socket_lock(so, 0);
0a7de745 1116 return EADDRINUSE;
1c79356b 1117 }
91447636 1118
2d21ac55
A
1119 VATTR_INIT(&va);
1120 VATTR_SET(&va, va_type, VSOCK);
1121 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
1122
b0d623f7 1123#if CONFIG_MACF
2d21ac55
A
1124 error = mac_vnode_check_create(ctx,
1125 nd.ni_dvp, &nd.ni_cnd, &va);
1126
1127 if (error == 0)
b0d623f7
A
1128#endif /* CONFIG_MACF */
1129#if CONFIG_MACF_SOCKET_SUBSET
1130 error = mac_vnode_check_uipc_bind(ctx,
1131 nd.ni_dvp, &nd.ni_cnd, &va);
1132
1133 if (error == 0)
1134#endif /* MAC_SOCKET_SUBSET */
91447636 1135 /* authorize before creating */
2d21ac55 1136 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
91447636
A
1137
1138 if (!error) {
91447636 1139 /* create the socket */
6d2010ae 1140 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
91447636 1141 }
2d21ac55 1142
91447636
A
1143 nameidone(&nd);
1144 vnode_put(dvp);
1145
1c79356b 1146 if (error) {
b0d623f7 1147 socket_lock(so, 0);
0a7de745 1148 return error;
1c79356b 1149 }
0a7de745 1150
b0d623f7 1151 socket_lock(so, 0);
0a7de745
A
1152
1153 if (unp->unp_vnode != NULL) {
1154 vnode_put(vp); /* drop the iocount */
1155 return EINVAL;
1156 }
1157
1158 error = vnode_ref(vp); /* gain a longterm reference */
1159 if (error) {
1160 vnode_put(vp); /* drop the iocount */
1161 return error;
1162 }
1163
1c79356b
A
1164 vp->v_socket = unp->unp_socket;
1165 unp->unp_vnode = vp;
1166 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
0a7de745 1167 vnode_put(vp); /* drop the iocount */
91447636 1168
0a7de745 1169 return 0;
1c79356b
A
1170}
1171
2d21ac55
A
1172
1173/*
1174 * Returns: 0 Success
1175 * EAFNOSUPPORT Address family not supported
1176 * EINVAL Invalid argument
1177 * ENOTSOCK Not a socket
1178 * ECONNREFUSED Connection refused
1179 * EPROTOTYPE Protocol wrong type for socket
1180 * EISCONN Socket is connected
1181 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1182 * unp_connect2:EINVAL Invalid argument
1183 * namei:??? [anything namei can return]
1184 * vnode_authorize:???? [anything vnode_authorize can return]
1185 *
1186 * Notes: p at this point is the current process, as this function is
1187 * only called by sosend(), sendfile(), and soconnectlock().
1188 */
1c79356b 1189static int
2d21ac55 1190unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1c79356b 1191{
91447636
A
1192 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1193 struct vnode *vp;
0a7de745 1194 struct socket *so2, *so3, *list_so = NULL;
91447636 1195 struct unpcb *unp, *unp2, *unp3;
2d21ac55 1196 vfs_context_t ctx = vfs_context_current();
1c79356b
A
1197 int error, len;
1198 struct nameidata nd;
1199 char buf[SOCK_MAXADDRLEN];
1200
2d21ac55 1201 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
0a7de745 1202 return EAFNOSUPPORT;
2d21ac55
A
1203 }
1204
b0d623f7 1205 unp = sotounpcb(so);
cc9f6e38 1206 so2 = so3 = NULL;
91447636 1207
1c79356b 1208 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
0a7de745
A
1209 if (len <= 0) {
1210 return EINVAL;
1211 }
490019cf
A
1212 /*
1213 * Note: sun_path is not a zero terminated "C" string
1214 */
0a7de745
A
1215 if (len >= SOCK_MAXADDRLEN) {
1216 return EINVAL;
1217 }
eb6b6ca3
A
1218
1219 soisconnecting(so);
1220
490019cf
A
1221 bcopy(soun->sun_path, buf, len);
1222 buf[len] = 0;
2d21ac55 1223
b0d623f7 1224 socket_unlock(so, 0);
1c79356b 1225
6d2010ae 1226 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
2d21ac55 1227 CAST_USER_ADDR_T(buf), ctx);
1c79356b
A
1228 error = namei(&nd);
1229 if (error) {
b0d623f7 1230 socket_lock(so, 0);
0a7de745 1231 return error;
1c79356b 1232 }
91447636 1233 nameidone(&nd);
1c79356b
A
1234 vp = nd.ni_vp;
1235 if (vp->v_type != VSOCK) {
1236 error = ENOTSOCK;
b0d623f7
A
1237 socket_lock(so, 0);
1238 goto out;
1c79356b 1239 }
91447636 1240
b0d623f7 1241#if CONFIG_MACF_SOCKET_SUBSET
39037602 1242 error = mac_vnode_check_uipc_connect(ctx, vp, so);
b0d623f7
A
1243 if (error) {
1244 socket_lock(so, 0);
1245 goto out;
1246 }
1247#endif /* MAC_SOCKET_SUBSET */
1248
2d21ac55 1249 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
b0d623f7
A
1250 if (error) {
1251 socket_lock(so, 0);
1252 goto out;
1253 }
1254
f427ee49 1255 lck_mtx_lock(&unp_connect_lock);
b0d623f7
A
1256
1257 if (vp->v_socket == 0) {
f427ee49 1258 lck_mtx_unlock(&unp_connect_lock);
1c79356b 1259 error = ECONNREFUSED;
b0d623f7
A
1260 socket_lock(so, 0);
1261 goto out;
1c79356b 1262 }
91447636 1263
b0d623f7
A
1264 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1265 so2 = vp->v_socket;
f427ee49 1266 lck_mtx_unlock(&unp_connect_lock);
91447636 1267
b0d623f7
A
1268
1269 if (so2->so_pcb == NULL) {
1270 error = ECONNREFUSED;
6d2010ae
A
1271 if (so != so2) {
1272 socket_unlock(so2, 1);
1273 socket_lock(so, 0);
1274 } else {
1275 /* Release the reference held for the listen socket */
d190cdc3 1276 VERIFY(so2->so_usecount > 0);
6d2010ae
A
1277 so2->so_usecount--;
1278 }
b0d623f7 1279 goto out;
1c79356b 1280 }
2d21ac55 1281
b0d623f7
A
1282 if (so < so2) {
1283 socket_unlock(so2, 0);
1284 socket_lock(so, 0);
1285 socket_lock(so2, 0);
6d2010ae 1286 } else if (so > so2) {
b0d623f7
A
1287 socket_lock(so, 0);
1288 }
55e303ae
A
1289 /*
1290 * Check if socket was connected while we were trying to
b0d623f7 1291 * get the socket locks in order.
55e303ae
A
1292 * XXX - probably shouldn't return an error for SOCK_DGRAM
1293 */
1294 if ((so->so_state & SS_ISCONNECTED) != 0) {
1295 error = EISCONN;
6d2010ae 1296 goto decref_out;
b0d623f7
A
1297 }
1298
1299 if (so->so_type != so2->so_type) {
b0d623f7 1300 error = EPROTOTYPE;
6d2010ae 1301 goto decref_out;
55e303ae 1302 }
2d21ac55 1303
1c79356b 1304 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
b0d623f7
A
1305 /* Release the incoming socket but keep a reference */
1306 socket_unlock(so, 0);
1307
1c79356b 1308 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
91447636 1309 (so3 = sonewconn(so2, 0, nam)) == 0) {
1c79356b 1310 error = ECONNREFUSED;
316670eb
A
1311 if (so != so2) {
1312 socket_unlock(so2, 1);
1313 socket_lock(so, 0);
1314 } else {
1315 socket_lock(so, 0);
1316 /* Release the reference held for
1317 * listen socket.
1318 */
d190cdc3 1319 VERIFY(so2->so_usecount > 0);
316670eb
A
1320 so2->so_usecount--;
1321 }
b0d623f7 1322 goto out;
1c79356b
A
1323 }
1324 unp2 = sotounpcb(so2);
1325 unp3 = sotounpcb(so3);
0a7de745 1326 if (unp2->unp_addr) {
1c79356b 1327 unp3->unp_addr = (struct sockaddr_un *)
2d21ac55 1328 dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
0a7de745 1329 }
91447636
A
1330
1331 /*
1332 * unp_peercred management:
1333 *
1334 * The connecter's (client's) credentials are copied
1335 * from its process structure at the time of connect()
1336 * (which is now).
1337 */
2d21ac55 1338 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
91447636
A
1339 unp3->unp_flags |= UNP_HAVEPC;
1340 /*
1341 * The receiver's (server's) credentials are copied
1342 * from the unp_peercred member of socket on which the
1343 * former called listen(); unp_listen() cached that
1344 * process's credentials at that time so we can use
1345 * them now.
1346 */
1347 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1348 ("unp_connect: listener without cached peercred"));
b0d623f7
A
1349
1350 /* Here we need to have both so and so2 locks and so2
1351 * is already locked. Lock ordering is required.
1352 */
1353 if (so < so2) {
1354 socket_unlock(so2, 0);
1355 socket_lock(so, 0);
1356 socket_lock(so2, 0);
1357 } else {
1358 socket_lock(so, 0);
1359 }
1360
1361 /* Check again if the socket state changed when its lock was released */
1362 if ((so->so_state & SS_ISCONNECTED) != 0) {
1363 error = EISCONN;
1364 socket_unlock(so2, 1);
1365 socket_lock(so3, 0);
1366 sofreelastref(so3, 1);
0a7de745 1367 goto out;
b0d623f7 1368 }
91447636 1369 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
0a7de745 1370 sizeof(unp->unp_peercred));
91447636
A
1371 unp->unp_flags |= UNP_HAVEPC;
1372
b0d623f7
A
1373 /* Hold the reference on listening socket until the end */
1374 socket_unlock(so2, 0);
1375 list_so = so2;
1376
1377 /* Lock ordering doesn't matter because so3 was just created */
1378 socket_lock(so3, 1);
1c79356b 1379 so2 = so3;
b0d623f7 1380
6d2010ae
A
1381 /*
1382 * Enable tracing for mDNSResponder endpoints. (The use
1383 * of sizeof instead of strlen below takes the null
1384 * terminating character into account.)
1385 */
1386 if (unpst_tracemdns &&
1387 !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
0a7de745 1388 sizeof(MDNSRESPONDER_PATH))) {
6d2010ae
A
1389 unp->unp_flags |= UNP_TRACE_MDNS;
1390 unp2->unp_flags |= UNP_TRACE_MDNS;
1391 }
1c79356b 1392 }
d9a64523 1393
1c79356b 1394 error = unp_connect2(so, so2);
6d2010ae
A
1395
1396decref_out:
b0d623f7 1397 if (so2 != NULL) {
6d2010ae
A
1398 if (so != so2) {
1399 socket_unlock(so2, 1);
1400 } else {
1401 /* Release the extra reference held for the listen socket.
1402 * This is possible only for SOCK_DGRAM sockets. We refuse
1403 * connecting to the same socket for SOCK_STREAM sockets.
1404 */
d190cdc3 1405 VERIFY(so2->so_usecount > 0);
6d2010ae
A
1406 so2->so_usecount--;
1407 }
b0d623f7
A
1408 }
1409
1410 if (list_so != NULL) {
1411 socket_lock(list_so, 0);
1412 socket_unlock(list_so, 1);
1413 }
6d2010ae 1414
b0d623f7 1415out:
5ba3f43e 1416 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
91447636 1417 vnode_put(vp);
0a7de745 1418 return error;
1c79356b
A
1419}
1420
2d21ac55
A
1421/*
1422 * Returns: 0 Success
1423 * EPROTOTYPE Protocol wrong type for socket
1424 * EINVAL Invalid argument
1425 */
1c79356b 1426int
2d21ac55 1427unp_connect2(struct socket *so, struct socket *so2)
1c79356b 1428{
91447636
A
1429 struct unpcb *unp = sotounpcb(so);
1430 struct unpcb *unp2;
1c79356b 1431
0a7de745
A
1432 if (so2->so_type != so->so_type) {
1433 return EPROTOTYPE;
1434 }
b0d623f7 1435
1c79356b 1436 unp2 = sotounpcb(so2);
0b4e3aa0 1437
5ba3f43e
A
1438 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1439 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7 1440
0b4e3aa0 1441 /* Verify both sockets are still opened */
0a7de745
A
1442 if (unp == 0 || unp2 == 0) {
1443 return EINVAL;
1444 }
0b4e3aa0 1445
1c79356b 1446 unp->unp_conn = unp2;
d9a64523
A
1447 so2->so_usecount++;
1448
1c79356b 1449 switch (so->so_type) {
1c79356b
A
1450 case SOCK_DGRAM:
1451 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
b0d623f7 1452
d9a64523 1453 if (so != so2) {
6d2010ae 1454 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
0a7de745 1455 /* Keep an extra reference on so2 that will be dropped
d9a64523
A
1456 * soon after getting the locks in order
1457 */
6d2010ae
A
1458 socket_unlock(so2, 0);
1459 soisconnected(so);
1460 unp_get_locks_in_order(so, so2);
d190cdc3 1461 VERIFY(so2->so_usecount > 0);
6d2010ae
A
1462 so2->so_usecount--;
1463 } else {
1464 soisconnected(so);
1465 }
b0d623f7 1466
1c79356b
A
1467 break;
1468
1469 case SOCK_STREAM:
2d21ac55
A
1470 /* This takes care of socketpair */
1471 if (!(unp->unp_flags & UNP_HAVEPC) &&
1472 !(unp2->unp_flags & UNP_HAVEPC)) {
1473 cru2x(kauth_cred_get(), &unp->unp_peercred);
1474 unp->unp_flags |= UNP_HAVEPC;
1475
1476 cru2x(kauth_cred_get(), &unp2->unp_peercred);
1477 unp2->unp_flags |= UNP_HAVEPC;
1478 }
1c79356b 1479 unp2->unp_conn = unp;
b0d623f7
A
1480 so->so_usecount++;
1481
1482 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1483 socket_unlock(so, 0);
1c79356b 1484 soisconnected(so2);
b0d623f7
A
1485
1486 /* Keep an extra reference on so2, that will be dropped soon after
1487 * getting the locks in order again.
1488 */
1489 socket_unlock(so2, 0);
1490
1491 socket_lock(so, 0);
1492 soisconnected(so);
1493
1494 unp_get_locks_in_order(so, so2);
1495 /* Decrement the extra reference left before */
d190cdc3 1496 VERIFY(so2->so_usecount > 0);
b0d623f7 1497 so2->so_usecount--;
1c79356b
A
1498 break;
1499
1500 default:
b0d623f7 1501 panic("unknown socket type %d in unp_connect2", so->so_type);
1c79356b 1502 }
5ba3f43e
A
1503 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1504 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
0a7de745 1505 return 0;
1c79356b
A
1506}
1507
1508static void
91447636 1509unp_disconnect(struct unpcb *unp)
1c79356b 1510{
b0d623f7
A
1511 struct unpcb *unp2 = NULL;
1512 struct socket *so2 = NULL, *so;
1513 struct socket *waitso;
1514 int so_locked = 1, strdisconn = 0;
1c79356b 1515
b0d623f7
A
1516 so = unp->unp_socket;
1517 if (unp->unp_conn == NULL) {
1c79356b 1518 return;
b0d623f7 1519 }
f427ee49 1520 lck_mtx_lock(&unp_disconnect_lock);
b0d623f7
A
1521 while (disconnect_in_progress != 0) {
1522 if (so_locked == 1) {
1523 socket_unlock(so, 0);
1524 so_locked = 0;
1525 }
f427ee49 1526 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
0a7de745 1527 PSOCK, "disconnect", NULL);
b0d623f7
A
1528 }
1529 disconnect_in_progress = 1;
f427ee49 1530 lck_mtx_unlock(&unp_disconnect_lock);
b0d623f7
A
1531
1532 if (so_locked == 0) {
1533 socket_lock(so, 0);
1534 so_locked = 1;
1535 }
1536
1537 unp2 = unp->unp_conn;
1538
1539 if (unp2 == 0 || unp2->unp_socket == NULL) {
1540 goto out;
1541 }
1542 so2 = unp2->unp_socket;
1543
1544try_again:
6d2010ae
A
1545 if (so == so2) {
1546 if (so_locked == 0) {
1547 socket_lock(so, 0);
1548 }
1549 waitso = so;
1550 } else if (so < so2) {
b0d623f7
A
1551 if (so_locked == 0) {
1552 socket_lock(so, 0);
1553 }
1554 socket_lock(so2, 1);
1555 waitso = so2;
1556 } else {
d9a64523 1557 if (so_locked == 1) {
b0d623f7
A
1558 socket_unlock(so, 0);
1559 }
1560 socket_lock(so2, 1);
1561 socket_lock(so, 0);
1562 waitso = so;
1563 }
6d2010ae 1564 so_locked = 1;
b0d623f7 1565
5ba3f43e
A
1566 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1567 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
1568
1569 /* Check for the UNP_DONTDISCONNECT flag, if it
1570 * is set, release both sockets and go to sleep
1571 */
d9a64523 1572
b0d623f7 1573 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
6d2010ae
A
1574 if (so != so2) {
1575 socket_unlock(so2, 1);
1576 }
b0d623f7
A
1577 so_locked = 0;
1578
d9a64523 1579 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
0a7de745 1580 PSOCK | PDROP, "unpdisconnect", NULL);
b0d623f7
A
1581 goto try_again;
1582 }
d9a64523 1583
b0d623f7
A
1584 if (unp->unp_conn == NULL) {
1585 panic("unp_conn became NULL after sleep");
1586 }
1587
2d21ac55 1588 unp->unp_conn = NULL;
d190cdc3 1589 VERIFY(so2->so_usecount > 0);
b0d623f7
A
1590 so2->so_usecount--;
1591
0a7de745 1592 if (unp->unp_flags & UNP_TRACE_MDNS) {
6d2010ae 1593 unp->unp_flags &= ~UNP_TRACE_MDNS;
0a7de745 1594 }
6d2010ae 1595
1c79356b 1596 switch (unp->unp_socket->so_type) {
1c79356b
A
1597 case SOCK_DGRAM:
1598 LIST_REMOVE(unp, unp_reflink);
1599 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
0a7de745 1600 if (so != so2) {
6d2010ae 1601 socket_unlock(so2, 1);
0a7de745 1602 }
1c79356b
A
1603 break;
1604
1605 case SOCK_STREAM:
2d21ac55 1606 unp2->unp_conn = NULL;
5ba3f43e 1607 VERIFY(so->so_usecount > 0);
b0d623f7
A
1608 so->so_usecount--;
1609
1610 /* Set the socket state correctly but do a wakeup later when
1611 * we release all locks except the socket lock, this will avoid
1612 * a deadlock.
1613 */
0a7de745
A
1614 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1615 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
b0d623f7 1616
0a7de745
A
1617 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1618 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
6d2010ae 1619
0a7de745 1620 if (unp2->unp_flags & UNP_TRACE_MDNS) {
6d2010ae 1621 unp2->unp_flags &= ~UNP_TRACE_MDNS;
0a7de745 1622 }
6d2010ae 1623
b0d623f7 1624 strdisconn = 1;
1c79356b 1625 break;
b0d623f7
A
1626 default:
1627 panic("unknown socket type %d", so->so_type);
1c79356b 1628 }
b0d623f7 1629out:
f427ee49 1630 lck_mtx_lock(&unp_disconnect_lock);
b0d623f7
A
1631 disconnect_in_progress = 0;
1632 wakeup(&disconnect_in_progress);
f427ee49 1633 lck_mtx_unlock(&unp_disconnect_lock);
1c79356b 1634
b0d623f7
A
1635 if (strdisconn) {
1636 socket_unlock(so, 0);
1637 soisdisconnected(so2);
1638 socket_unlock(so2, 1);
1c79356b 1639
0a7de745 1640 socket_lock(so, 0);
b0d623f7
A
1641 soisdisconnected(so);
1642 }
5ba3f43e 1643 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7 1644 return;
1c79356b 1645}
b0d623f7
A
1646
1647/*
1648 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1649 * The unpcb_compat data structure is passed to user space and must not change.
1650 */
1651static void
1652unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1653{
1654#if defined(__LP64__)
316670eb
A
1655 cp->unp_link.le_next = (u_int32_t)
1656 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1657 cp->unp_link.le_prev = (u_int32_t)
1658 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
b0d623f7 1659#else
316670eb
A
1660 cp->unp_link.le_next = (struct unpcb_compat *)
1661 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1662 cp->unp_link.le_prev = (struct unpcb_compat **)
1663 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
b0d623f7 1664#endif
316670eb
A
1665 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1666 VM_KERNEL_ADDRPERM(up->unp_socket);
1667 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1668 VM_KERNEL_ADDRPERM(up->unp_vnode);
b0d623f7
A
1669 cp->unp_ino = up->unp_ino;
1670 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
316670eb
A
1671 VM_KERNEL_ADDRPERM(up->unp_conn);
1672 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
b0d623f7
A
1673#if defined(__LP64__)
1674 cp->unp_reflink.le_next =
316670eb 1675 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
b0d623f7 1676 cp->unp_reflink.le_prev =
316670eb 1677 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
b0d623f7
A
1678#else
1679 cp->unp_reflink.le_next =
316670eb 1680 (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
b0d623f7 1681 cp->unp_reflink.le_prev =
316670eb 1682 (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1c79356b 1683#endif
b0d623f7 1684 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
316670eb 1685 VM_KERNEL_ADDRPERM(up->unp_addr);
b0d623f7
A
1686 cp->unp_cc = up->unp_cc;
1687 cp->unp_mbcnt = up->unp_mbcnt;
1688 cp->unp_gencnt = up->unp_gencnt;
1689}
1c79356b
A
1690
1691static int
1692unp_pcblist SYSCTL_HANDLER_ARGS
1693{
2d21ac55 1694#pragma unused(oidp,arg2)
1c79356b
A
1695 int error, i, n;
1696 struct unpcb *unp, **unp_list;
1697 unp_gen_t gencnt;
1698 struct xunpgen xug;
1699 struct unp_head *head;
1700
f427ee49 1701 lck_rw_lock_shared(&unp_list_mtx);
1c79356b
A
1702 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1703
1704 /*
1705 * The process of preparing the PCB list is too time-consuming and
1706 * resource-intensive to repeat twice on every request.
1707 */
91447636 1708 if (req->oldptr == USER_ADDR_NULL) {
1c79356b 1709 n = unp_count;
0a7de745
A
1710 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1711 sizeof(struct xunpcb);
f427ee49 1712 lck_rw_done(&unp_list_mtx);
0a7de745 1713 return 0;
1c79356b
A
1714 }
1715
91447636 1716 if (req->newptr != USER_ADDR_NULL) {
f427ee49 1717 lck_rw_done(&unp_list_mtx);
0a7de745 1718 return EPERM;
91447636 1719 }
1c79356b
A
1720
1721 /*
1722 * OK, now we're committed to doing something.
1723 */
1724 gencnt = unp_gencnt;
1725 n = unp_count;
1726
0a7de745
A
1727 bzero(&xug, sizeof(xug));
1728 xug.xug_len = sizeof(xug);
1c79356b
A
1729 xug.xug_count = n;
1730 xug.xug_gen = gencnt;
1731 xug.xug_sogen = so_gencnt;
0a7de745 1732 error = SYSCTL_OUT(req, &xug, sizeof(xug));
91447636 1733 if (error) {
f427ee49 1734 lck_rw_done(&unp_list_mtx);
0a7de745 1735 return error;
91447636 1736 }
1c79356b 1737
0b4e3aa0
A
1738 /*
1739 * We are done if there is no pcb
1740 */
0a7de745 1741 if (n == 0) {
f427ee49 1742 lck_rw_done(&unp_list_mtx);
0a7de745 1743 return 0;
91447636 1744 }
0b4e3aa0 1745
0a7de745 1746 MALLOC(unp_list, struct unpcb **, n * sizeof(*unp_list),
2d21ac55 1747 M_TEMP, M_WAITOK);
91447636 1748 if (unp_list == 0) {
f427ee49 1749 lck_rw_done(&unp_list_mtx);
0a7de745 1750 return ENOMEM;
91447636 1751 }
2d21ac55 1752
1c79356b 1753 for (unp = head->lh_first, i = 0; unp && i < n;
2d21ac55 1754 unp = unp->unp_link.le_next) {
0a7de745 1755 if (unp->unp_gencnt <= gencnt) {
1c79356b 1756 unp_list[i++] = unp;
0a7de745 1757 }
1c79356b 1758 }
0a7de745 1759 n = i; /* in case we lost some during malloc */
1c79356b
A
1760
1761 error = 0;
1762 for (i = 0; i < n; i++) {
1763 unp = unp_list[i];
1764 if (unp->unp_gencnt <= gencnt) {
1765 struct xunpcb xu;
3a60a9f5 1766
0a7de745
A
1767 bzero(&xu, sizeof(xu));
1768 xu.xu_len = sizeof(xu);
b0d623f7 1769 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
316670eb 1770 VM_KERNEL_ADDRPERM(unp);
1c79356b
A
1771 /*
1772 * XXX - need more locking here to protect against
1773 * connect/disconnect races for SMP.
1774 */
0a7de745 1775 if (unp->unp_addr) {
cb323159 1776 bcopy(unp->unp_addr, &xu.xu_au,
2d21ac55 1777 unp->unp_addr->sun_len);
0a7de745
A
1778 }
1779 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1c79356b 1780 bcopy(unp->unp_conn->unp_addr,
cb323159 1781 &xu.xu_cau,
2d21ac55 1782 unp->unp_conn->unp_addr->sun_len);
0a7de745 1783 }
b0d623f7 1784 unpcb_to_compat(unp, &xu.xu_unp);
1c79356b 1785 sotoxsocket(unp->unp_socket, &xu.xu_socket);
0a7de745 1786 error = SYSCTL_OUT(req, &xu, sizeof(xu));
1c79356b
A
1787 }
1788 }
1789 if (!error) {
1790 /*
1791 * Give the user an updated idea of our state.
1792 * If the generation differs from what we told
1793 * her before, she knows that something happened
1794 * while we were processing this request, and it
1795 * might be necessary to retry.
1796 */
0a7de745
A
1797 bzero(&xug, sizeof(xug));
1798 xug.xug_len = sizeof(xug);
1c79356b
A
1799 xug.xug_gen = unp_gencnt;
1800 xug.xug_sogen = so_gencnt;
1801 xug.xug_count = unp_count;
0a7de745 1802 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1c79356b
A
1803 }
1804 FREE(unp_list, M_TEMP);
f427ee49 1805 lck_rw_done(&unp_list_mtx);
0a7de745 1806 return error;
1c79356b
A
1807}
1808
fe8ab488 1809SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
0a7de745
A
1810 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1811 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1812 "List of active local datagram sockets");
fe8ab488 1813SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
0a7de745
A
1814 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1815 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1816 "List of active local stream sockets");
b0d623f7 1817
f427ee49 1818#if XNU_TARGET_OS_OSX
b0d623f7
A
1819
1820static int
1821unp_pcblist64 SYSCTL_HANDLER_ARGS
1822{
1823#pragma unused(oidp,arg2)
1824 int error, i, n;
1825 struct unpcb *unp, **unp_list;
1826 unp_gen_t gencnt;
1827 struct xunpgen xug;
1828 struct unp_head *head;
1829
f427ee49 1830 lck_rw_lock_shared(&unp_list_mtx);
b0d623f7
A
1831 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1832
1833 /*
1834 * The process of preparing the PCB list is too time-consuming and
1835 * resource-intensive to repeat twice on every request.
1836 */
1837 if (req->oldptr == USER_ADDR_NULL) {
1838 n = unp_count;
0a7de745
A
1839 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1840 (sizeof(struct xunpcb64));
f427ee49 1841 lck_rw_done(&unp_list_mtx);
0a7de745 1842 return 0;
b0d623f7
A
1843 }
1844
1845 if (req->newptr != USER_ADDR_NULL) {
f427ee49 1846 lck_rw_done(&unp_list_mtx);
0a7de745 1847 return EPERM;
b0d623f7
A
1848 }
1849
1850 /*
1851 * OK, now we're committed to doing something.
1852 */
1853 gencnt = unp_gencnt;
1854 n = unp_count;
1855
0a7de745
A
1856 bzero(&xug, sizeof(xug));
1857 xug.xug_len = sizeof(xug);
b0d623f7
A
1858 xug.xug_count = n;
1859 xug.xug_gen = gencnt;
1860 xug.xug_sogen = so_gencnt;
0a7de745 1861 error = SYSCTL_OUT(req, &xug, sizeof(xug));
b0d623f7 1862 if (error) {
f427ee49 1863 lck_rw_done(&unp_list_mtx);
0a7de745 1864 return error;
b0d623f7
A
1865 }
1866
1867 /*
1868 * We are done if there is no pcb
1869 */
0a7de745 1870 if (n == 0) {
f427ee49 1871 lck_rw_done(&unp_list_mtx);
0a7de745 1872 return 0;
b0d623f7
A
1873 }
1874
0a7de745 1875 MALLOC(unp_list, struct unpcb **, n * sizeof(*unp_list),
b0d623f7
A
1876 M_TEMP, M_WAITOK);
1877 if (unp_list == 0) {
f427ee49 1878 lck_rw_done(&unp_list_mtx);
0a7de745 1879 return ENOMEM;
b0d623f7
A
1880 }
1881
1882 for (unp = head->lh_first, i = 0; unp && i < n;
1883 unp = unp->unp_link.le_next) {
0a7de745 1884 if (unp->unp_gencnt <= gencnt) {
b0d623f7 1885 unp_list[i++] = unp;
0a7de745 1886 }
b0d623f7 1887 }
0a7de745 1888 n = i; /* in case we lost some during malloc */
b0d623f7
A
1889
1890 error = 0;
1891 for (i = 0; i < n; i++) {
1892 unp = unp_list[i];
1893 if (unp->unp_gencnt <= gencnt) {
1894 struct xunpcb64 xu;
0a7de745 1895 size_t xu_len = sizeof(struct xunpcb64);
b0d623f7
A
1896
1897 bzero(&xu, xu_len);
f427ee49 1898 xu.xu_len = (u_int32_t)xu_len;
316670eb
A
1899 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1900 xu.xunp_link.le_next = (u_int64_t)
1901 VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1902 xu.xunp_link.le_prev = (u_int64_t)
1903 VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1904 xu.xunp_socket = (u_int64_t)
1905 VM_KERNEL_ADDRPERM(unp->unp_socket);
1906 xu.xunp_vnode = (u_int64_t)
1907 VM_KERNEL_ADDRPERM(unp->unp_vnode);
b0d623f7 1908 xu.xunp_ino = unp->unp_ino;
316670eb
A
1909 xu.xunp_conn = (u_int64_t)
1910 VM_KERNEL_ADDRPERM(unp->unp_conn);
1911 xu.xunp_refs = (u_int64_t)
1912 VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1913 xu.xunp_reflink.le_next = (u_int64_t)
1914 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1915 xu.xunp_reflink.le_prev = (u_int64_t)
1916 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
b0d623f7
A
1917 xu.xunp_cc = unp->unp_cc;
1918 xu.xunp_mbcnt = unp->unp_mbcnt;
1919 xu.xunp_gencnt = unp->unp_gencnt;
1920
0a7de745 1921 if (unp->unp_socket) {
b0d623f7 1922 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
0a7de745 1923 }
b0d623f7
A
1924
1925 /*
1926 * XXX - need more locking here to protect against
1927 * connect/disconnect races for SMP.
1928 */
0a7de745 1929 if (unp->unp_addr) {
cb323159 1930 bcopy(unp->unp_addr, &xu.xu_au,
0a7de745
A
1931 unp->unp_addr->sun_len);
1932 }
1933 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1934 bcopy(unp->unp_conn->unp_addr,
cb323159 1935 &xu.xu_cau,
0a7de745
A
1936 unp->unp_conn->unp_addr->sun_len);
1937 }
b0d623f7
A
1938
1939 error = SYSCTL_OUT(req, &xu, xu_len);
1940 }
1941 }
1942 if (!error) {
1943 /*
1944 * Give the user an updated idea of our state.
1945 * If the generation differs from what we told
1946 * her before, she knows that something happened
1947 * while we were processing this request, and it
1948 * might be necessary to retry.
1949 */
0a7de745
A
1950 bzero(&xug, sizeof(xug));
1951 xug.xug_len = sizeof(xug);
b0d623f7
A
1952 xug.xug_gen = unp_gencnt;
1953 xug.xug_sogen = so_gencnt;
1954 xug.xug_count = unp_count;
0a7de745 1955 error = SYSCTL_OUT(req, &xug, sizeof(xug));
b0d623f7
A
1956 }
1957 FREE(unp_list, M_TEMP);
f427ee49 1958 lck_rw_done(&unp_list_mtx);
0a7de745 1959 return error;
b0d623f7
A
1960}
1961
fe8ab488 1962SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
0a7de745
A
1963 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1964 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1965 "List of active local datagram sockets 64 bit");
fe8ab488 1966SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
0a7de745
A
1967 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1968 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1969 "List of active local stream sockets 64 bit");
b0d623f7 1970
f427ee49 1971#endif /* XNU_TARGET_OS_OSX */
1c79356b
A
1972
1973static void
91447636 1974unp_shutdown(struct unpcb *unp)
1c79356b 1975{
b0d623f7
A
1976 struct socket *so = unp->unp_socket;
1977 struct socket *so2;
1978 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1979 so2 = unp->unp_conn->unp_socket;
1980 unp_get_locks_in_order(so, so2);
1981 socantrcvmore(so2);
1982 socket_unlock(so2, 1);
1983 }
1c79356b
A
1984}
1985
1986static void
2d21ac55 1987unp_drop(struct unpcb *unp, int errno)
1c79356b
A
1988{
1989 struct socket *so = unp->unp_socket;
1990
f427ee49 1991 so->so_error = (u_short)errno;
1c79356b 1992 unp_disconnect(unp);
1c79356b
A
1993}
1994
f427ee49
A
1995/* always called under uipc_lock */
1996static void
1997unp_gc_wait(void)
1998{
1999 if (unp_gcthread == current_thread()) {
2000 return;
2001 }
2002
2003 while (unp_gcing != 0) {
2004 unp_gcwait = 1;
2005 msleep(&unp_gcing, &uipc_lock, 0, "unp_gc_wait", NULL);
2006 }
2007}
2008
2009/*
2010 * fg_insertuipc_mark
2011 *
2012 * Description: Mark fileglob for insertion onto message queue if needed
2013 * Also takes fileglob reference
2014 *
2015 * Parameters: fg Fileglob pointer to insert
2016 *
2017 * Returns: true, if the fileglob needs to be inserted onto msg queue
2018 *
2019 * Locks: Takes and drops fg_lock, potentially many times
2020 */
2021static boolean_t
2022fg_insertuipc_mark(struct fileglob * fg)
2023{
2024 boolean_t insert = FALSE;
2025
2026 lck_mtx_lock_spin(&fg->fg_lock);
2027 while (fg->fg_lflags & FG_RMMSGQ) {
2028 lck_mtx_convert_spin(&fg->fg_lock);
2029
2030 fg->fg_lflags |= FG_WRMMSGQ;
2031 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
2032 }
2033
2a1bd2d3 2034 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
f427ee49
A
2035 fg->fg_msgcount++;
2036 if (fg->fg_msgcount == 1) {
2037 fg->fg_lflags |= FG_INSMSGQ;
2038 insert = TRUE;
2039 }
2040 lck_mtx_unlock(&fg->fg_lock);
2041 return insert;
2042}
2043
2044/*
2045 * fg_insertuipc
2046 *
2047 * Description: Insert marked fileglob onto message queue
2048 *
2049 * Parameters: fg Fileglob pointer to insert
2050 *
2051 * Returns: void
2052 *
2053 * Locks: Takes and drops fg_lock & uipc_lock
2054 * DO NOT call this function with proc_fdlock held as unp_gc()
2055 * can potentially try to acquire proc_fdlock, which can result
2056 * in a deadlock if this function is in unp_gc_wait().
2057 */
2058static void
2059fg_insertuipc(struct fileglob * fg)
2060{
2061 if (fg->fg_lflags & FG_INSMSGQ) {
2062 lck_mtx_lock_spin(&uipc_lock);
2063 unp_gc_wait();
2064 LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2065 lck_mtx_unlock(&uipc_lock);
2066 lck_mtx_lock(&fg->fg_lock);
2067 fg->fg_lflags &= ~FG_INSMSGQ;
2068 if (fg->fg_lflags & FG_WINSMSGQ) {
2069 fg->fg_lflags &= ~FG_WINSMSGQ;
2070 wakeup(&fg->fg_lflags);
2071 }
2072 lck_mtx_unlock(&fg->fg_lock);
2073 }
2074}
2075
2076/*
2077 * fg_removeuipc_mark
2078 *
2079 * Description: Mark the fileglob for removal from message queue if needed
2080 * Also releases fileglob message queue reference
2081 *
2082 * Parameters: fg Fileglob pointer to remove
2083 *
2084 * Returns: true, if the fileglob needs to be removed from msg queue
2085 *
2086 * Locks: Takes and drops fg_lock, potentially many times
2087 */
2088static boolean_t
2089fg_removeuipc_mark(struct fileglob * fg)
2090{
2091 boolean_t remove = FALSE;
2092
2093 lck_mtx_lock_spin(&fg->fg_lock);
2094 while (fg->fg_lflags & FG_INSMSGQ) {
2095 lck_mtx_convert_spin(&fg->fg_lock);
2096
2097 fg->fg_lflags |= FG_WINSMSGQ;
2098 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
2099 }
2100 fg->fg_msgcount--;
2101 if (fg->fg_msgcount == 0) {
2102 fg->fg_lflags |= FG_RMMSGQ;
2103 remove = TRUE;
2104 }
2105 lck_mtx_unlock(&fg->fg_lock);
2106 return remove;
2107}
2108
2109/*
2110 * fg_removeuipc
2111 *
2112 * Description: Remove marked fileglob from message queue
2113 *
2114 * Parameters: fg Fileglob pointer to remove
2115 *
2116 * Returns: void
2117 *
2118 * Locks: Takes and drops fg_lock & uipc_lock
2119 * DO NOT call this function with proc_fdlock held as unp_gc()
2120 * can potentially try to acquire proc_fdlock, which can result
2121 * in a deadlock if this function is in unp_gc_wait().
2122 */
2123static void
2124fg_removeuipc(struct fileglob * fg)
2125{
2126 if (fg->fg_lflags & FG_RMMSGQ) {
2127 lck_mtx_lock_spin(&uipc_lock);
2128 unp_gc_wait();
2129 LIST_REMOVE(fg, f_msglist);
2130 lck_mtx_unlock(&uipc_lock);
2131 lck_mtx_lock(&fg->fg_lock);
2132 fg->fg_lflags &= ~FG_RMMSGQ;
2133 if (fg->fg_lflags & FG_WRMMSGQ) {
2134 fg->fg_lflags &= ~FG_WRMMSGQ;
2135 wakeup(&fg->fg_lflags);
2136 }
2137 lck_mtx_unlock(&fg->fg_lock);
2138 }
2139}
2140
2d21ac55
A
2141/*
2142 * Returns: 0 Success
2143 * EMSGSIZE The new fd's will not fit
2144 * ENOBUFS Cannot alloc struct fileproc
2145 */
1c79356b 2146int
91447636 2147unp_externalize(struct mbuf *rights)
1c79356b 2148{
0a7de745 2149 proc_t p = current_proc(); /* XXX */
91447636
A
2150 int i;
2151 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2152 struct fileglob **rp = (struct fileglob **)(cm + 1);
b0d623f7 2153 int *fds = (int *)(cm + 1);
91447636 2154 struct fileproc *fp;
00867663 2155 struct fileproc **fileproc_l;
0a7de745 2156 int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
3e170ce0
A
2157 int f, error = 0;
2158
00867663 2159 MALLOC(fileproc_l, struct fileproc **,
0a7de745 2160 newfds * sizeof(struct fileproc *), M_TEMP, M_WAITOK);
00867663 2161 if (fileproc_l == NULL) {
3e170ce0
A
2162 error = ENOMEM;
2163 goto discard;
2164 }
1c79356b 2165
91447636 2166 proc_fdlock(p);
1c79356b
A
2167
2168 /*
2169 * if the new FD's will not fit, then we free them all
2170 */
2171 if (!fdavail(p, newfds)) {
91447636 2172 proc_fdunlock(p);
3e170ce0
A
2173 error = EMSGSIZE;
2174 goto discard;
1c79356b
A
2175 }
2176 /*
2d21ac55 2177 * now change each pointer to an fd in the global table to
1c79356b
A
2178 * an integer that is the index to the local fd table entry
2179 * that we set up to point to the global one we are transferring.
d9a64523 2180 * XXX (1) this assumes a pointer and int are the same size,
b0d623f7 2181 * XXX or the mbuf can hold the expansion
2d21ac55 2182 * XXX (2) allocation failures should be non-fatal
1c79356b
A
2183 */
2184 for (i = 0; i < newfds; i++) {
0a7de745 2185 if (fdalloc(p, 0, &f)) {
2d21ac55 2186 panic("unp_externalize:fdalloc");
0a7de745 2187 }
39236c6e 2188 fp = fileproc_alloc_init(NULL);
0a7de745 2189 if (fp == NULL) {
f427ee49 2190 panic("unp_externalize:fileproc_alloc_init");
0a7de745 2191 }
f427ee49 2192 fp->fp_glob = rp[i];
00867663 2193 if (fg_removeuipc_mark(rp[i])) {
00867663
A
2194 /*
2195 * Take an iocount on the fp for completing the
2196 * removal from the global msg queue
2197 */
f427ee49 2198 os_ref_retain_locked(&fp->fp_iocount);
00867663
A
2199 fileproc_l[i] = fp;
2200 } else {
2201 fileproc_l[i] = NULL;
2202 }
6601e61a 2203 procfdtbl_releasefd(p, f, fp);
b0d623f7 2204 fds[i] = f;
1c79356b 2205 }
91447636 2206 proc_fdunlock(p);
1c79356b 2207
3e170ce0 2208 for (i = 0; i < newfds; i++) {
00867663 2209 if (fileproc_l[i] != NULL) {
f427ee49
A
2210 VERIFY(fileproc_l[i]->fp_glob != NULL &&
2211 (fileproc_l[i]->fp_glob->fg_lflags & FG_RMMSGQ));
d9a64523 2212 VERIFY(fds[i] >= 0);
f427ee49 2213 fg_removeuipc(fileproc_l[i]->fp_glob);
00867663
A
2214
2215 /* Drop the iocount */
2216 fp_drop(p, fds[i], fileproc_l[i], 0);
2217 fileproc_l[i] = NULL;
3e170ce0 2218 }
0a7de745 2219 if (fds[i] != 0) {
3e170ce0 2220 (void) OSAddAtomic(-1, &unp_rights);
0a7de745 2221 }
3e170ce0
A
2222 }
2223
2224discard:
0a7de745 2225 if (fileproc_l != NULL) {
00867663 2226 FREE(fileproc_l, M_TEMP);
0a7de745 2227 }
3e170ce0
A
2228 if (error) {
2229 for (i = 0; i < newfds; i++) {
2230 unp_discard(*rp, p);
2231 *rp++ = NULL;
2232 }
2233 }
0a7de745 2234 return error;
1c79356b
A
2235}
2236
2237void
2238unp_init(void)
2239{
3e170ce0 2240 _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
1c79356b
A
2241 LIST_INIT(&unp_dhead);
2242 LIST_INIT(&unp_shead);
2d21ac55 2243
37839358
A
2244 /*
2245 * allocate lock group attribute and group for udp pcb mutexes
2246 */
2247 unp_mtx_grp_attr = lck_grp_attr_alloc_init();
2248
2249 unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
2d21ac55 2250
37839358
A
2251 unp_mtx_attr = lck_attr_alloc_init();
2252
f427ee49
A
2253 lck_mtx_init(&uipc_lock, unp_mtx_grp, unp_mtx_attr);
2254 lck_rw_init(&unp_list_mtx, unp_mtx_grp, unp_mtx_attr);
2255 lck_mtx_init(&unp_disconnect_lock, unp_mtx_grp, unp_mtx_attr);
2256 lck_mtx_init(&unp_connect_lock, unp_mtx_grp, unp_mtx_attr);
1c79356b
A
2257}
2258
2259#ifndef MIN
0a7de745 2260#define MIN(a, b) (((a) < (b)) ? (a) : (b))
1c79356b
A
2261#endif
2262
2d21ac55
A
2263/*
2264 * Returns: 0 Success
2265 * EINVAL
f427ee49 2266 * EBADF
2d21ac55 2267 */
1c79356b 2268static int
2d21ac55 2269unp_internalize(struct mbuf *control, proc_t p)
1c79356b 2270{
91447636 2271 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
b0d623f7 2272 int *fds;
91447636
A
2273 struct fileglob **rp;
2274 struct fileproc *fp;
2d21ac55 2275 int i, error;
1c79356b 2276 int oldfds;
3e170ce0 2277 uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
1c79356b 2278
2d21ac55 2279 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
1c79356b 2280 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
b0d623f7 2281 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
0a7de745 2282 return EINVAL;
1c79356b 2283 }
0a7de745 2284 oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
3e170ce0 2285 bzero(fg_ins, sizeof(fg_ins));
1c79356b 2286
91447636 2287 proc_fdlock(p);
b0d623f7 2288 fds = (int *)(cm + 1);
91447636
A
2289
2290 for (i = 0; i < oldfds; i++) {
b0d623f7 2291 struct fileproc *tmpfp;
f427ee49 2292 if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {
2d21ac55 2293 proc_fdunlock(p);
f427ee49
A
2294 return EBADF;
2295 } else if (!fg_sendable(tmpfp->fp_glob)) {
b0d623f7 2296 proc_fdunlock(p);
0a7de745 2297 return EINVAL;
a991bd8d 2298 } else if (fp_isguarded(tmpfp, GUARD_SOCKET_IPC)) {
39236c6e 2299 error = fp_guard_exception(p,
0a7de745 2300 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
39236c6e 2301 proc_fdunlock(p);
0a7de745 2302 return error;
2d21ac55 2303 }
91447636
A
2304 }
2305 rp = (struct fileglob **)(cm + 1);
1c79356b 2306
d9a64523 2307 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
b0d623f7
A
2308 * and doing them in-order would result in stomping over unprocessed fd's
2309 */
2310 for (i = (oldfds - 1); i >= 0; i--) {
f427ee49
A
2311 fp = fp_get_noref_locked(p, fds[i]);
2312 if (fg_insertuipc_mark(fp->fp_glob)) {
3e170ce0 2313 fg_ins[i / 8] |= 0x80 >> (i % 8);
0a7de745 2314 }
f427ee49 2315 rp[i] = fp->fp_glob;
1c79356b 2316 }
91447636 2317 proc_fdunlock(p);
1c79356b 2318
3e170ce0
A
2319 for (i = 0; i < oldfds; i++) {
2320 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2321 VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2322 fg_insertuipc(rp[i]);
2323 }
2324 (void) OSAddAtomic(1, &unp_rights);
2325 }
2326
0a7de745 2327 return 0;
1c79356b
A
2328}
2329
e2fac8b1 2330__private_extern__ void
2d21ac55 2331unp_gc(void)
1c79356b 2332{
2d21ac55
A
2333 struct fileglob *fg, *nextfg;
2334 struct socket *so;
e2fac8b1 2335 static struct fileglob **extra_ref;
b0d623f7 2336 struct fileglob **fpp;
1c79356b 2337 int nunref, i;
6601e61a 2338 int need_gcwakeup = 0;
2d21ac55 2339
f427ee49 2340 lck_mtx_lock(&uipc_lock);
91447636 2341 if (unp_gcing) {
f427ee49 2342 lck_mtx_unlock(&uipc_lock);
1c79356b 2343 return;
91447636 2344 }
1c79356b
A
2345 unp_gcing = 1;
2346 unp_defer = 0;
e2fac8b1 2347 unp_gcthread = current_thread();
f427ee49 2348 lck_mtx_unlock(&uipc_lock);
2d21ac55
A
2349 /*
2350 * before going through all this, set all FDs to
1c79356b
A
2351 * be NOT defered and NOT externally accessible
2352 */
f427ee49
A
2353 for (fg = unp_msghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
2354 os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
91447636 2355 }
1c79356b 2356 do {
f427ee49 2357 for (fg = unp_msghead.lh_first; fg != 0;
2d21ac55 2358 fg = fg->f_msglist.le_next) {
91447636 2359 lck_mtx_lock(&fg->fg_lock);
1c79356b
A
2360 /*
2361 * If the file is not open, skip it
2362 */
f427ee49 2363 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
91447636 2364 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2365 continue;
91447636 2366 }
1c79356b
A
2367 /*
2368 * If we already marked it as 'defer' in a
2369 * previous pass, then try process it this time
2370 * and un-mark it
2371 */
91447636 2372 if (fg->fg_flag & FDEFER) {
f427ee49 2373 os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
1c79356b
A
2374 unp_defer--;
2375 } else {
2376 /*
2377 * if it's not defered, then check if it's
2378 * already marked.. if so skip it
2379 */
2d21ac55 2380 if (fg->fg_flag & FMARK) {
91447636 2381 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2382 continue;
91447636 2383 }
2d21ac55 2384 /*
1c79356b 2385 * If all references are from messages
2d21ac55 2386 * in transit, then skip it. it's not
1c79356b 2387 * externally accessible.
2d21ac55 2388 */
f427ee49
A
2389 if (os_ref_get_count_raw(&fg->fg_count) ==
2390 fg->fg_msgcount) {
91447636 2391 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2392 continue;
91447636 2393 }
2d21ac55 2394 /*
1c79356b
A
2395 * If it got this far then it must be
2396 * externally accessible.
2397 */
f427ee49 2398 os_atomic_or(&fg->fg_flag, FMARK, relaxed);
1c79356b
A
2399 }
2400 /*
2d21ac55 2401 * either it was defered, or it is externally
1c79356b
A
2402 * accessible and not already marked so.
2403 * Now check if it is possibly one of OUR sockets.
2d21ac55 2404 */
39236c6e 2405 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
91447636
A
2406 (so = (struct socket *)fg->fg_data) == 0) {
2407 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2408 continue;
91447636 2409 }
39236c6e 2410 if (so->so_proto->pr_domain != localdomain ||
0a7de745 2411 (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
91447636 2412 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2413 continue;
91447636 2414 }
1c79356b 2415#ifdef notdef
1c79356b
A
2416 if (so->so_rcv.sb_flags & SB_LOCK) {
2417 /*
2418 * This is problematical; it's not clear
2419 * we need to wait for the sockbuf to be
2420 * unlocked (on a uniprocessor, at least),
2421 * and it's also not clear what to do
2422 * if sbwait returns an error due to receipt
2423 * of a signal. If sbwait does return
2424 * an error, we'll go into an infinite
2425 * loop. Delete all of this for now.
2426 */
2427 (void) sbwait(&so->so_rcv);
2428 goto restart;
2429 }
2430#endif
2431 /*
2432 * So, Ok, it's one of our sockets and it IS externally
2433 * accessible (or was defered). Now we look
2434 * to see if we hold any file descriptors in its
2d21ac55 2435 * message buffers. Follow those links and mark them
1c79356b 2436 * as accessible too.
e2fac8b1 2437 *
d9a64523 2438 * In case a file is passed onto itself we need to
e2fac8b1 2439 * release the file lock.
1c79356b 2440 */
91447636 2441 lck_mtx_unlock(&fg->fg_lock);
e2fac8b1 2442
3e170ce0 2443 unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
1c79356b
A
2444 }
2445 } while (unp_defer);
2446 /*
2447 * We grab an extra reference to each of the file table entries
2448 * that are not otherwise accessible and then free the rights
2449 * that are stored in messages on them.
2450 *
2451 * The bug in the orginal code is a little tricky, so I'll describe
2452 * what's wrong with it here.
2453 *
f427ee49 2454 * It is incorrect to simply unp_discard each entry for fg_msgcount
1c79356b
A
2455 * times -- consider the case of sockets A and B that contain
2456 * references to each other. On a last close of some other socket,
2457 * we trigger a gc since the number of outstanding rights (unp_rights)
2458 * is non-zero. If during the sweep phase the gc code un_discards,
2459 * we end up doing a (full) closef on the descriptor. A closef on A
2460 * results in the following chain. Closef calls soo_close, which
2461 * calls soclose. Soclose calls first (through the switch
2462 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
2463 * returns because the previous instance had set unp_gcing, and
2464 * we return all the way back to soclose, which marks the socket
2465 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
2466 * to free up the rights that are queued in messages on the socket A,
2467 * i.e., the reference on B. The sorflush calls via the dom_dispose
2468 * switch unp_dispose, which unp_scans with unp_discard. This second
2469 * instance of unp_discard just calls closef on B.
2470 *
2471 * Well, a similar chain occurs on B, resulting in a sorflush on B,
2472 * which results in another closef on A. Unfortunately, A is already
2473 * being closed, and the descriptor has already been marked with
2474 * SS_NOFDREF, and soclose panics at this point.
2475 *
2476 * Here, we first take an extra reference to each inaccessible
2477 * descriptor. Then, we call sorflush ourself, since we know
2478 * it is a Unix domain socket anyhow. After we destroy all the
2479 * rights carried in messages, we do a last closef to get rid
2480 * of our extra reference. This is the last close, and the
2481 * unp_detach etc will shut down the socket.
2482 *
2483 * 91/09/19, bsy@cs.cmu.edu
2484 */
f427ee49
A
2485 MALLOC(extra_ref, struct fileglob **, nfiles * sizeof(struct fileglob *),
2486 M_TEMP, M_WAITOK);
0a7de745 2487 if (extra_ref == NULL) {
b0d623f7 2488 goto bail;
0a7de745 2489 }
f427ee49 2490 for (nunref = 0, fg = unp_msghead.lh_first, fpp = extra_ref; fg != 0;
91447636
A
2491 fg = nextfg) {
2492 lck_mtx_lock(&fg->fg_lock);
2493
2494 nextfg = fg->f_msglist.le_next;
2d21ac55 2495 /*
1c79356b
A
2496 * If it's not open, skip it
2497 */
f427ee49 2498 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
91447636 2499 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2500 continue;
91447636 2501 }
2d21ac55 2502 /*
1c79356b
A
2503 * If all refs are from msgs, and it's not marked accessible
2504 * then it must be referenced from some unreachable cycle
2505 * of (shut-down) FDs, so include it in our
2506 * list of FDs to remove
2507 */
f427ee49
A
2508 if (fg->fg_flag & FMARK) {
2509 lck_mtx_unlock(&fg->fg_lock);
2510 continue;
2511 }
2512 if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {
2513 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
91447636 2514 *fpp++ = fg;
1c79356b 2515 nunref++;
1c79356b 2516 }
91447636 2517 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2518 }
2d21ac55 2519 /*
1c79356b
A
2520 * for each FD on our hit list, do the following two things
2521 */
2522 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
91447636 2523 struct fileglob *tfg;
1c79356b 2524
91447636 2525 tfg = *fpp;
1c79356b 2526
39236c6e
A
2527 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET &&
2528 tfg->fg_data != NULL) {
2d21ac55
A
2529 so = (struct socket *)(tfg->fg_data);
2530
e2fac8b1 2531 socket_lock(so, 0);
d9a64523 2532
2d21ac55
A
2533 sorflush(so);
2534
e2fac8b1 2535 socket_unlock(so, 0);
91447636
A
2536 }
2537 }
0a7de745 2538 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
f427ee49 2539 fg_drop(PROC_NULL, *fpp);
0a7de745 2540 }
2d21ac55 2541
f427ee49 2542 FREE(extra_ref, M_TEMP);
b0d623f7 2543bail:
f427ee49 2544 lck_mtx_lock(&uipc_lock);
1c79356b 2545 unp_gcing = 0;
e2fac8b1 2546 unp_gcthread = NULL;
6601e61a
A
2547
2548 if (unp_gcwait != 0) {
2549 unp_gcwait = 0;
2550 need_gcwakeup = 1;
2551 }
f427ee49 2552 lck_mtx_unlock(&uipc_lock);
6601e61a 2553
0a7de745 2554 if (need_gcwakeup != 0) {
6601e61a 2555 wakeup(&unp_gcing);
0a7de745 2556 }
1c79356b
A
2557}
2558
2559void
91447636 2560unp_dispose(struct mbuf *m)
1c79356b 2561{
1c79356b 2562 if (m) {
3e170ce0 2563 unp_scan(m, unp_discard, NULL);
1c79356b
A
2564 }
2565}
2566
2d21ac55
A
2567/*
2568 * Returns: 0 Success
2569 */
91447636 2570static int
2d21ac55 2571unp_listen(struct unpcb *unp, proc_t p)
91447636 2572{
0c530ab8
A
2573 kauth_cred_t safecred = kauth_cred_proc_ref(p);
2574 cru2x(safecred, &unp->unp_peercred);
2575 kauth_cred_unref(&safecred);
91447636 2576 unp->unp_flags |= UNP_HAVEPCCACHED;
0a7de745 2577 return 0;
91447636
A
2578}
2579
1c79356b 2580static void
3e170ce0 2581unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
1c79356b 2582{
91447636
A
2583 struct mbuf *m;
2584 struct fileglob **rp;
2585 struct cmsghdr *cm;
2586 int i;
1c79356b
A
2587 int qfds;
2588
2589 while (m0) {
0a7de745 2590 for (m = m0; m; m = m->m_next) {
1c79356b 2591 if (m->m_type == MT_CONTROL &&
0a7de745 2592 (size_t)m->m_len >= sizeof(*cm)) {
1c79356b
A
2593 cm = mtod(m, struct cmsghdr *);
2594 if (cm->cmsg_level != SOL_SOCKET ||
0a7de745 2595 cm->cmsg_type != SCM_RIGHTS) {
1c79356b 2596 continue;
0a7de745
A
2597 }
2598 qfds = (cm->cmsg_len - sizeof(*cm)) /
2599 sizeof(int);
91447636 2600 rp = (struct fileglob **)(cm + 1);
0a7de745 2601 for (i = 0; i < qfds; i++) {
3e170ce0 2602 (*op)(*rp++, arg);
0a7de745
A
2603 }
2604 break; /* XXX, but saves time */
1c79356b 2605 }
0a7de745 2606 }
1c79356b
A
2607 m0 = m0->m_act;
2608 }
2609}
2610
1c79356b 2611static void
3e170ce0 2612unp_mark(struct fileglob *fg, __unused void *arg)
1c79356b 2613{
f427ee49 2614 uint32_t oflags, nflags;
1c79356b 2615
f427ee49
A
2616 os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2617 if (oflags & FMARK) {
2618 os_atomic_rmw_loop_give_up(return );
2619 }
2620 nflags = oflags | FMARK | FDEFER;
2621 });
91447636 2622
1c79356b 2623 unp_defer++;
1c79356b
A
2624}
2625
1c79356b 2626static void
3e170ce0 2627unp_discard(struct fileglob *fg, void *p)
1c79356b 2628{
0a7de745
A
2629 if (p == NULL) {
2630 p = current_proc(); /* XXX */
2631 }
b0d623f7 2632 (void) OSAddAtomic(1, &unp_disposed);
3e170ce0
A
2633 if (fg_removeuipc_mark(fg)) {
2634 VERIFY(fg->fg_lflags & FG_RMMSGQ);
2635 fg_removeuipc(fg);
2636 }
2637 (void) OSAddAtomic(-1, &unp_rights);
91447636 2638
f427ee49 2639 (void) fg_drop(p, fg);
1c79356b 2640}
b0d623f7
A
2641
2642int
2643unp_lock(struct socket *so, int refcount, void * lr)
0a7de745
A
2644{
2645 void * lr_saved;
2646 if (lr == 0) {
2647 lr_saved = (void *) __builtin_return_address(0);
2648 } else {
2649 lr_saved = lr;
2650 }
2651
2652 if (so->so_pcb) {
2653 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2654 } else {
2655 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2656 so, lr_saved, so->so_usecount);
2657 }
2658
2659 if (so->so_usecount < 0) {
2660 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2661 so, so->so_pcb, lr_saved, so->so_usecount);
2662 }
2663
2664 if (refcount) {
d190cdc3
A
2665 VERIFY(so->so_usecount > 0);
2666 so->so_usecount++;
2667 }
0a7de745
A
2668 so->lock_lr[so->next_lock_lr] = lr_saved;
2669 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2670 return 0;
b0d623f7
A
2671}
2672
2673int
2674unp_unlock(struct socket *so, int refcount, void * lr)
2675{
0a7de745
A
2676 void * lr_saved;
2677 lck_mtx_t * mutex_held = NULL;
b0d623f7
A
2678 struct unpcb *unp = sotounpcb(so);
2679
0a7de745
A
2680 if (lr == 0) {
2681 lr_saved = (void *) __builtin_return_address(0);
2682 } else {
2683 lr_saved = lr;
2684 }
2685
2686 if (refcount) {
2687 so->so_usecount--;
2688 }
2689
2690 if (so->so_usecount < 0) {
2691 panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2692 }
2693 if (so->so_pcb == NULL) {
2694 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2695 } else {
2696 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2697 }
2698 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2699 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2700 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2701
2702 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
b0d623f7
A
2703 sofreelastref(so, 1);
2704
0a7de745 2705 if (unp->unp_addr) {
b0d623f7 2706 FREE(unp->unp_addr, M_SONAME);
0a7de745 2707 }
d9a64523 2708
b0d623f7 2709 lck_mtx_unlock(mutex_held);
b0d623f7 2710
316670eb 2711 lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
b0d623f7 2712 zfree(unp_zone, unp);
b0d623f7
A
2713
2714 unp_gc();
2715 } else {
2716 lck_mtx_unlock(mutex_held);
2717 }
2718
0a7de745 2719 return 0;
b0d623f7
A
2720}
2721
2722lck_mtx_t *
5ba3f43e 2723unp_getlock(struct socket *so, __unused int flags)
b0d623f7 2724{
0a7de745 2725 struct unpcb *unp = (struct unpcb *)so->so_pcb;
b0d623f7
A
2726
2727
0a7de745
A
2728 if (so->so_pcb) {
2729 if (so->so_usecount < 0) {
2730 panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2731 }
2732 return &unp->unp_mtx;
2733 } else {
2734 panic("unp_getlock: so=%p NULL so_pcb\n", so);
2735 return so->so_proto->pr_domain->dom_mtx;
2736 }
b0d623f7 2737}