]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/uipc_usrreq.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / kern / uipc_usrreq.c
CommitLineData
1c79356b 1/*
f427ee49 2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
d9a64523 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
d9a64523 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
d9a64523 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
d9a64523 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
2d21ac55
A
62/*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
1c79356b
A
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/domain.h>
73#include <sys/fcntl.h>
0a7de745 74#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
91447636 75#include <sys/file_internal.h>
39236c6e 76#include <sys/guarded.h>
1c79356b
A
77#include <sys/filedesc.h>
78#include <sys/lock.h>
79#include <sys/mbuf.h>
80#include <sys/namei.h>
91447636
A
81#include <sys/proc_internal.h>
82#include <sys/kauth.h>
1c79356b
A
83#include <sys/protosw.h>
84#include <sys/socket.h>
85#include <sys/socketvar.h>
86#include <sys/stat.h>
87#include <sys/sysctl.h>
88#include <sys/un.h>
89#include <sys/unpcb.h>
91447636
A
90#include <sys/vnode_internal.h>
91#include <sys/kdebug.h>
3e170ce0 92#include <sys/mcache.h>
1c79356b
A
93
94#include <kern/zalloc.h>
91447636 95#include <kern/locks.h>
f427ee49 96#include <kern/task.h>
1c79356b 97
b0d623f7 98#if CONFIG_MACF
2d21ac55 99#include <security/mac_framework.h>
b0d623f7 100#endif /* CONFIG_MACF */
2d21ac55 101
316670eb
A
102#include <mach/vm_param.h>
103
3e170ce0
A
104/*
105 * Maximum number of FDs that can be passed in an mbuf
106 */
0a7de745
A
107#define UIPC_MAX_CMSG_FD 512
108
f427ee49 109ZONE_DECLARE(unp_zone, "unpzone", sizeof(struct unpcb), ZC_NONE);
0a7de745
A
110static unp_gen_t unp_gencnt;
111static u_int unp_count;
112
c3c9b80d
A
113static LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);
114static LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");
115static LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);
116
117static LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);
118static LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);
119static LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);
120
b0d623f7
A
121static u_int disconnect_in_progress;
122
f427ee49
A
123static struct unp_head unp_shead, unp_dhead;
124static int unp_defer, unp_gcing, unp_gcwait;
125static thread_t unp_gcthread = NULL;
126static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
127
1c79356b 128
6d2010ae
A
129/*
130 * mDNSResponder tracing. When enabled, endpoints connected to
131 * /var/run/mDNSResponder will be traced; during each send on
132 * the traced socket, we log the PID and process name of the
133 * sending process. We also print out a bit of info related
134 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
135 * of mDNSResponder stays the same.
136 */
0a7de745 137#define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
6d2010ae 138
0a7de745 139static int unpst_tracemdns; /* enable tracing */
6d2010ae 140
0a7de745 141#define MDNS_IPC_MSG_HDR_VERSION_1 1
6d2010ae
A
142
143struct mdns_ipc_msg_hdr {
144 uint32_t version;
145 uint32_t datalen;
146 uint32_t ipc_flags;
147 uint32_t op;
148 union {
149 void *context;
150 uint32_t u32[2];
151 } __attribute__((packed));
152 uint32_t reg_index;
153} __attribute__((packed));
154
1c79356b
A
155/*
156 * Unix communications domain.
157 *
158 * TODO:
159 * SEQPACKET, RDM
160 * rethink name space problems
161 * need a proper out-of-band
162 * lock pushdown
163 */
cb323159 164static struct sockaddr sun_noname = { .sa_len = sizeof(sun_noname), .sa_family = AF_LOCAL, .sa_data = { 0 } };
0a7de745
A
165static ino_t unp_ino; /* prototype for fake inode numbers */
166
167static int unp_attach(struct socket *);
168static void unp_detach(struct unpcb *);
169static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
170static int unp_connect(struct socket *, struct sockaddr *, proc_t);
171static void unp_disconnect(struct unpcb *);
172static void unp_shutdown(struct unpcb *);
173static void unp_drop(struct unpcb *, int);
174__private_extern__ void unp_gc(void);
175static void unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
176static void unp_mark(struct fileglob *, __unused void *);
177static void unp_discard(struct fileglob *, void *);
178static int unp_internalize(struct mbuf *, proc_t);
179static int unp_listen(struct unpcb *, proc_t);
180static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
b0d623f7
A
181static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
182
d9a64523
A
183static void
184unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
b0d623f7
A
185{
186 if (so < conn_so) {
187 socket_lock(conn_so, 1);
188 } else {
189 struct unpcb *unp = sotounpcb(so);
190 unp->unp_flags |= UNP_DONTDISCONNECT;
191 unp->rw_thrcount++;
192 socket_unlock(so, 0);
2d21ac55 193
b0d623f7
A
194 /* Get the locks in the correct order */
195 socket_lock(conn_so, 1);
196 socket_lock(so, 0);
197 unp->rw_thrcount--;
198 if (unp->rw_thrcount == 0) {
199 unp->unp_flags &= ~UNP_DONTDISCONNECT;
200 wakeup(unp);
201 }
202 }
203}
1c79356b
A
204
205static int
206uipc_abort(struct socket *so)
207{
208 struct unpcb *unp = sotounpcb(so);
209
0a7de745
A
210 if (unp == 0) {
211 return EINVAL;
212 }
1c79356b 213 unp_drop(unp, ECONNABORTED);
91447636
A
214 unp_detach(unp);
215 sofree(so);
0a7de745 216 return 0;
1c79356b
A
217}
218
219static int
220uipc_accept(struct socket *so, struct sockaddr **nam)
221{
222 struct unpcb *unp = sotounpcb(so);
223
0a7de745
A
224 if (unp == 0) {
225 return EINVAL;
226 }
1c79356b
A
227
228 /*
229 * Pass back name of connected socket,
230 * if it was bound and we are still connected
231 * (our peer may have closed already!).
232 */
233 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2d21ac55
A
234 *nam = dup_sockaddr((struct sockaddr *)
235 unp->unp_conn->unp_addr, 1);
1c79356b
A
236 } else {
237 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
238 }
0a7de745 239 return 0;
1c79356b
A
240}
241
2d21ac55
A
242/*
243 * Returns: 0 Success
244 * EISCONN
245 * unp_attach:
246 */
1c79356b 247static int
2d21ac55 248uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
1c79356b
A
249{
250 struct unpcb *unp = sotounpcb(so);
251
0a7de745
A
252 if (unp != 0) {
253 return EISCONN;
254 }
255 return unp_attach(so);
1c79356b
A
256}
257
258static int
2d21ac55 259uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
1c79356b
A
260{
261 struct unpcb *unp = sotounpcb(so);
262
0a7de745
A
263 if (unp == 0) {
264 return EINVAL;
265 }
1c79356b 266
0a7de745 267 return unp_bind(unp, nam, p);
1c79356b
A
268}
269
2d21ac55
A
270/*
271 * Returns: 0 Success
272 * EINVAL
273 * unp_connect:??? [See elsewhere in this file]
274 */
1c79356b 275static int
2d21ac55 276uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
1c79356b
A
277{
278 struct unpcb *unp = sotounpcb(so);
279
0a7de745
A
280 if (unp == 0) {
281 return EINVAL;
282 }
283 return unp_connect(so, nam, p);
1c79356b
A
284}
285
2d21ac55
A
286/*
287 * Returns: 0 Success
288 * EINVAL
289 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
290 * unp_connect2:EINVAL Invalid argument
291 */
1c79356b
A
292static int
293uipc_connect2(struct socket *so1, struct socket *so2)
294{
295 struct unpcb *unp = sotounpcb(so1);
296
0a7de745
A
297 if (unp == 0) {
298 return EINVAL;
299 }
1c79356b 300
0a7de745 301 return unp_connect2(so1, so2);
1c79356b
A
302}
303
304/* control is EOPNOTSUPP */
305
306static int
307uipc_detach(struct socket *so)
308{
309 struct unpcb *unp = sotounpcb(so);
310
0a7de745
A
311 if (unp == 0) {
312 return EINVAL;
313 }
1c79356b 314
5ba3f43e 315 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1c79356b 316 unp_detach(unp);
0a7de745 317 return 0;
1c79356b
A
318}
319
320static int
321uipc_disconnect(struct socket *so)
322{
323 struct unpcb *unp = sotounpcb(so);
324
0a7de745
A
325 if (unp == 0) {
326 return EINVAL;
327 }
1c79356b 328 unp_disconnect(unp);
0a7de745 329 return 0;
1c79356b
A
330}
331
2d21ac55
A
332/*
333 * Returns: 0 Success
334 * EINVAL
335 */
1c79356b 336static int
2d21ac55 337uipc_listen(struct socket *so, __unused proc_t p)
1c79356b
A
338{
339 struct unpcb *unp = sotounpcb(so);
340
0a7de745
A
341 if (unp == 0 || unp->unp_vnode == 0) {
342 return EINVAL;
343 }
344 return unp_listen(unp, p);
1c79356b
A
345}
346
347static int
348uipc_peeraddr(struct socket *so, struct sockaddr **nam)
349{
350 struct unpcb *unp = sotounpcb(so);
351
0a7de745
A
352 if (unp == NULL) {
353 return EINVAL;
354 }
2d21ac55
A
355 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
356 *nam = dup_sockaddr((struct sockaddr *)
357 unp->unp_conn->unp_addr, 1);
358 } else {
359 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
360 }
0a7de745 361 return 0;
1c79356b
A
362}
363
364static int
91447636 365uipc_rcvd(struct socket *so, __unused int flags)
1c79356b
A
366{
367 struct unpcb *unp = sotounpcb(so);
368 struct socket *so2;
369
0a7de745
A
370 if (unp == 0) {
371 return EINVAL;
372 }
1c79356b
A
373 switch (so->so_type) {
374 case SOCK_DGRAM:
375 panic("uipc_rcvd DGRAM?");
0a7de745 376 /*NOTREACHED*/
1c79356b
A
377
378 case SOCK_STREAM:
0a7de745
A
379#define rcv (&so->so_rcv)
380#define snd (&so2->so_snd)
381 if (unp->unp_conn == 0) {
1c79356b 382 break;
0a7de745 383 }
d9a64523 384
1c79356b 385 so2 = unp->unp_conn->unp_socket;
b0d623f7 386 unp_get_locks_in_order(so, so2);
1c79356b
A
387 /*
388 * Adjust backpressure on sender
389 * and wakeup any waiting to write.
390 */
391 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
392 unp->unp_mbcnt = rcv->sb_mbcnt;
393 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
394 unp->unp_cc = rcv->sb_cc;
cb323159
A
395 if (sb_notify(&so2->so_snd)) {
396 sowakeup(so2, &so2->so_snd, so);
397 }
b0d623f7
A
398
399 socket_unlock(so2, 1);
400
1c79356b
A
401#undef snd
402#undef rcv
403 break;
404
405 default:
406 panic("uipc_rcvd unknown socktype");
407 }
0a7de745 408 return 0;
1c79356b
A
409}
410
411/* pru_rcvoob is EOPNOTSUPP */
412
2d21ac55
A
413/*
414 * Returns: 0 Success
415 * EINVAL
416 * EOPNOTSUPP
417 * EPIPE
418 * ENOTCONN
419 * EISCONN
420 * unp_internalize:EINVAL
421 * unp_internalize:EBADF
422 * unp_connect:EAFNOSUPPORT Address family not supported
423 * unp_connect:EINVAL Invalid argument
424 * unp_connect:ENOTSOCK Not a socket
425 * unp_connect:ECONNREFUSED Connection refused
426 * unp_connect:EISCONN Socket is connected
427 * unp_connect:EPROTOTYPE Protocol wrong type for socket
428 * unp_connect:???
429 * sbappendaddr:ENOBUFS [5th argument, contents modified]
430 * sbappendaddr:??? [whatever a filter author chooses]
431 */
1c79356b
A
432static int
433uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
2d21ac55 434 struct mbuf *control, proc_t p)
1c79356b
A
435{
436 int error = 0;
437 struct unpcb *unp = sotounpcb(so);
438 struct socket *so2;
439
440 if (unp == 0) {
441 error = EINVAL;
442 goto release;
443 }
444 if (flags & PRUS_OOB) {
445 error = EOPNOTSUPP;
446 goto release;
447 }
448
13fec989 449 if (control) {
b0d623f7 450 /* release lock to avoid deadlock (4436174) */
2d21ac55 451 socket_unlock(so, 0);
13fec989
A
452 error = unp_internalize(control, p);
453 socket_lock(so, 0);
0a7de745 454 if (error) {
13fec989 455 goto release;
0a7de745 456 }
13fec989 457 }
1c79356b
A
458
459 switch (so->so_type) {
2d21ac55 460 case SOCK_DGRAM:
1c79356b
A
461 {
462 struct sockaddr *from;
463
464 if (nam) {
465 if (unp->unp_conn) {
466 error = EISCONN;
467 break;
468 }
469 error = unp_connect(so, nam, p);
0a7de745 470 if (error) {
eb6b6ca3 471 so->so_state &= ~SS_ISCONNECTING;
1c79356b 472 break;
0a7de745 473 }
1c79356b
A
474 } else {
475 if (unp->unp_conn == 0) {
476 error = ENOTCONN;
477 break;
478 }
479 }
b0d623f7 480
1c79356b 481 so2 = unp->unp_conn->unp_socket;
0a7de745 482 if (so != so2) {
6d2010ae 483 unp_get_locks_in_order(so, so2);
0a7de745 484 }
b0d623f7 485
0a7de745 486 if (unp->unp_addr) {
1c79356b 487 from = (struct sockaddr *)unp->unp_addr;
0a7de745 488 } else {
1c79356b 489 from = &sun_noname;
0a7de745 490 }
2d21ac55
A
491 /*
492 * sbappendaddr() will fail when the receiver runs out of
493 * space; in contrast to SOCK_STREAM, we will lose messages
494 * for the SOCK_DGRAM case when the receiver's queue overflows.
495 * SB_UNIX on the socket buffer implies that the callee will
496 * not free the control message, if any, because we would need
497 * to call unp_dispose() on it.
498 */
91447636 499 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
2d21ac55 500 control = NULL;
cb323159
A
501 if (sb_notify(&so2->so_rcv)) {
502 sowakeup(so2, &so2->so_rcv, so);
503 }
2d21ac55
A
504 } else if (control != NULL && error == 0) {
505 /* A socket filter took control; don't touch it */
506 control = NULL;
91447636 507 }
b0d623f7 508
0a7de745 509 if (so != so2) {
6d2010ae 510 socket_unlock(so2, 1);
0a7de745 511 }
b0d623f7 512
2d21ac55 513 m = NULL;
0a7de745 514 if (nam) {
1c79356b 515 unp_disconnect(unp);
0a7de745 516 }
1c79356b
A
517 break;
518 }
519
91447636
A
520 case SOCK_STREAM: {
521 int didreceive = 0;
0a7de745
A
522#define rcv (&so2->so_rcv)
523#define snd (&so->so_snd)
1c79356b
A
524 /* Connect if not connected yet. */
525 /*
526 * Note: A better implementation would complain
527 * if not equal to the peer's address.
528 */
529 if ((so->so_state & SS_ISCONNECTED) == 0) {
530 if (nam) {
531 error = unp_connect(so, nam, p);
0a7de745 532 if (error) {
eb6b6ca3 533 so->so_state &= ~SS_ISCONNECTING;
0a7de745
A
534 break; /* XXX */
535 }
1c79356b
A
536 } else {
537 error = ENOTCONN;
538 break;
539 }
540 }
541
542 if (so->so_state & SS_CANTSENDMORE) {
543 error = EPIPE;
544 break;
545 }
0a7de745 546 if (unp->unp_conn == 0) {
1c79356b 547 panic("uipc_send connected but no connection?");
0a7de745 548 }
b0d623f7 549
1c79356b 550 so2 = unp->unp_conn->unp_socket;
b0d623f7
A
551 unp_get_locks_in_order(so, so2);
552
d9a64523 553 /* Check socket state again as we might have unlocked the socket
b0d623f7
A
554 * while trying to get the locks in order
555 */
556
557 if ((so->so_state & SS_CANTSENDMORE)) {
558 error = EPIPE;
559 socket_unlock(so2, 1);
560 break;
d9a64523 561 }
b0d623f7 562
6d2010ae
A
563 if (unp->unp_flags & UNP_TRACE_MDNS) {
564 struct mdns_ipc_msg_hdr hdr;
565
0a7de745
A
566 if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
567 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
6d2010ae
A
568 printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
569 __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
570 }
571 }
572
1c79356b 573 /*
2d21ac55
A
574 * Send to paired receive port, and then reduce send buffer
575 * hiwater marks to maintain backpressure. Wake up readers.
576 * SB_UNIX flag will allow new record to be appended to the
577 * receiver's queue even when it is already full. It is
578 * possible, however, that append might fail. In that case,
579 * we will need to call unp_dispose() on the control message;
580 * the callee will not free it since SB_UNIX is set.
1c79356b 581 */
2d21ac55
A
582 didreceive = control ?
583 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
584
585 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
1c79356b 586 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
d9a64523 587 if ((int32_t)snd->sb_hiwat >=
fe8ab488
A
588 (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
589 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
590 } else {
591 snd->sb_hiwat = 0;
592 }
1c79356b 593 unp->unp_conn->unp_cc = rcv->sb_cc;
2d21ac55
A
594 if (didreceive) {
595 control = NULL;
cb323159
A
596 if (sb_notify(&so2->so_rcv)) {
597 sowakeup(so2, &so2->so_rcv, so);
598 }
2d21ac55
A
599 } else if (control != NULL && error == 0) {
600 /* A socket filter took control; don't touch it */
601 control = NULL;
602 }
b0d623f7
A
603
604 socket_unlock(so2, 1);
2d21ac55 605 m = NULL;
1c79356b
A
606#undef snd
607#undef rcv
0a7de745
A
608 }
609 break;
1c79356b
A
610
611 default:
612 panic("uipc_send unknown socktype");
613 }
614
615 /*
616 * SEND_EOF is equivalent to a SEND followed by
617 * a SHUTDOWN.
618 */
619 if (flags & PRUS_EOF) {
620 socantsendmore(so);
621 unp_shutdown(unp);
622 }
623
2d21ac55
A
624 if (control && error != 0) {
625 socket_unlock(so, 0);
91447636 626 unp_dispose(control);
2d21ac55
A
627 socket_lock(so, 0);
628 }
91447636 629
1c79356b 630release:
0a7de745 631 if (control) {
1c79356b 632 m_freem(control);
0a7de745
A
633 }
634 if (m) {
1c79356b 635 m_freem(m);
0a7de745
A
636 }
637 return error;
1c79356b
A
638}
639
640static int
2d21ac55 641uipc_sense(struct socket *so, void *ub, int isstat64)
1c79356b
A
642{
643 struct unpcb *unp = sotounpcb(so);
644 struct socket *so2;
2d21ac55 645 blksize_t blksize;
1c79356b 646
0a7de745
A
647 if (unp == 0) {
648 return EINVAL;
649 }
2d21ac55
A
650
651 blksize = so->so_snd.sb_hiwat;
1c79356b
A
652 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
653 so2 = unp->unp_conn->unp_socket;
2d21ac55 654 blksize += so2->so_rcv.sb_cc;
1c79356b 655 }
0a7de745 656 if (unp->unp_ino == 0) {
1c79356b 657 unp->unp_ino = unp_ino++;
0a7de745 658 }
2d21ac55
A
659
660 if (isstat64 != 0) {
661 struct stat64 *sb64;
662
663 sb64 = (struct stat64 *)ub;
664 sb64->st_blksize = blksize;
665 sb64->st_dev = NODEV;
666 sb64->st_ino = (ino64_t)unp->unp_ino;
667 } else {
668 struct stat *sb;
669
670 sb = (struct stat *)ub;
671 sb->st_blksize = blksize;
672 sb->st_dev = NODEV;
b0d623f7 673 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
2d21ac55
A
674 }
675
0a7de745 676 return 0;
1c79356b
A
677}
678
2d21ac55
A
679/*
680 * Returns: 0 Success
681 * EINVAL
682 *
683 * Notes: This is not strictly correct, as unp_shutdown() also calls
684 * socantrcvmore(). These should maybe both be conditionalized
685 * on the 'how' argument in soshutdown() as called from the
686 * shutdown() system call.
687 */
1c79356b
A
688static int
689uipc_shutdown(struct socket *so)
690{
691 struct unpcb *unp = sotounpcb(so);
692
0a7de745
A
693 if (unp == 0) {
694 return EINVAL;
695 }
1c79356b
A
696 socantsendmore(so);
697 unp_shutdown(unp);
0a7de745 698 return 0;
1c79356b
A
699}
700
2d21ac55
A
701/*
702 * Returns: 0 Success
703 * EINVAL Invalid argument
704 */
1c79356b
A
705static int
706uipc_sockaddr(struct socket *so, struct sockaddr **nam)
707{
708 struct unpcb *unp = sotounpcb(so);
709
0a7de745
A
710 if (unp == NULL) {
711 return EINVAL;
712 }
2d21ac55 713 if (unp->unp_addr != NULL) {
1c79356b 714 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
2d21ac55
A
715 } else {
716 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
717 }
0a7de745 718 return 0;
1c79356b
A
719}
720
721struct pr_usrreqs uipc_usrreqs = {
0a7de745
A
722 .pru_abort = uipc_abort,
723 .pru_accept = uipc_accept,
724 .pru_attach = uipc_attach,
725 .pru_bind = uipc_bind,
726 .pru_connect = uipc_connect,
727 .pru_connect2 = uipc_connect2,
728 .pru_detach = uipc_detach,
729 .pru_disconnect = uipc_disconnect,
730 .pru_listen = uipc_listen,
731 .pru_peeraddr = uipc_peeraddr,
732 .pru_rcvd = uipc_rcvd,
733 .pru_send = uipc_send,
734 .pru_sense = uipc_sense,
735 .pru_shutdown = uipc_shutdown,
736 .pru_sockaddr = uipc_sockaddr,
737 .pru_sosend = sosend,
738 .pru_soreceive = soreceive,
1c79356b 739};
91447636
A
740
741int
2d21ac55 742uipc_ctloutput(struct socket *so, struct sockopt *sopt)
91447636
A
743{
744 struct unpcb *unp = sotounpcb(so);
39236c6e
A
745 int error = 0;
746 pid_t peerpid;
f427ee49
A
747 proc_t p;
748 task_t t;
39236c6e 749 struct socket *peerso;
91447636
A
750
751 switch (sopt->sopt_dir) {
752 case SOPT_GET:
753 switch (sopt->sopt_name) {
754 case LOCAL_PEERCRED:
2d21ac55 755 if (unp->unp_flags & UNP_HAVEPC) {
91447636 756 error = sooptcopyout(sopt, &unp->unp_peercred,
0a7de745 757 sizeof(unp->unp_peercred));
2d21ac55 758 } else {
0a7de745 759 if (so->so_type == SOCK_STREAM) {
91447636 760 error = ENOTCONN;
0a7de745 761 } else {
91447636 762 error = EINVAL;
0a7de745 763 }
91447636
A
764 }
765 break;
316670eb 766 case LOCAL_PEERPID:
39236c6e
A
767 case LOCAL_PEEREPID:
768 if (unp->unp_conn == NULL) {
316670eb 769 error = ENOTCONN;
39236c6e 770 break;
316670eb 771 }
39236c6e 772 peerso = unp->unp_conn->unp_socket;
0a7de745 773 if (peerso == NULL) {
39236c6e 774 panic("peer is connected but has no socket?");
0a7de745 775 }
39236c6e
A
776 unp_get_locks_in_order(so, peerso);
777 if (sopt->sopt_name == LOCAL_PEEREPID &&
0a7de745 778 peerso->so_flags & SOF_DELEGATED) {
39236c6e 779 peerpid = peerso->e_pid;
0a7de745 780 } else {
39236c6e 781 peerpid = peerso->last_pid;
0a7de745 782 }
39236c6e 783 socket_unlock(peerso, 1);
0a7de745 784 error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
39236c6e
A
785 break;
786 case LOCAL_PEERUUID:
787 case LOCAL_PEEREUUID:
788 if (unp->unp_conn == NULL) {
789 error = ENOTCONN;
790 break;
791 }
792 peerso = unp->unp_conn->unp_socket;
0a7de745 793 if (peerso == NULL) {
39236c6e 794 panic("peer is connected but has no socket?");
0a7de745 795 }
39236c6e
A
796 unp_get_locks_in_order(so, peerso);
797 if (sopt->sopt_name == LOCAL_PEEREUUID &&
0a7de745 798 peerso->so_flags & SOF_DELEGATED) {
39236c6e 799 error = sooptcopyout(sopt, &peerso->e_uuid,
0a7de745
A
800 sizeof(peerso->e_uuid));
801 } else {
39236c6e 802 error = sooptcopyout(sopt, &peerso->last_uuid,
0a7de745
A
803 sizeof(peerso->last_uuid));
804 }
39236c6e 805 socket_unlock(peerso, 1);
316670eb 806 break;
f427ee49
A
807 case LOCAL_PEERTOKEN:
808 if (unp->unp_conn == NULL) {
809 error = ENOTCONN;
810 break;
811 }
812 peerso = unp->unp_conn->unp_socket;
813 if (peerso == NULL) {
814 panic("peer is connected but has no socket?");
815 }
816 unp_get_locks_in_order(so, peerso);
817 peerpid = peerso->last_pid;
818 p = proc_find(peerpid);
819 if (p != PROC_NULL) {
820 t = proc_task(p);
821 if (t != TASK_NULL) {
822 audit_token_t peertoken;
823 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
824 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {
825 error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));
826 } else {
827 error = EINVAL;
828 }
829 } else {
830 error = EINVAL;
831 }
832 proc_rele(p);
833 } else {
834 error = EINVAL;
835 }
836 socket_unlock(peerso, 1);
837 break;
91447636
A
838 default:
839 error = EOPNOTSUPP;
840 break;
841 }
842 break;
843 case SOPT_SET:
844 default:
845 error = EOPNOTSUPP;
846 break;
847 }
39236c6e 848
0a7de745 849 return error;
91447636 850}
2d21ac55 851
1c79356b
A
852/*
853 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
854 * for stream sockets, although the total for sender and receiver is
855 * actually only PIPSIZ.
856 * Datagram sockets really use the sendspace as the maximum datagram size,
857 * and don't really want to reserve the sendspace. Their recvspace should
858 * be large enough for at least one max-size datagram plus address.
859 */
860#ifndef PIPSIZ
0a7de745 861#define PIPSIZ 8192
1c79356b 862#endif
0a7de745
A
863static u_int32_t unpst_sendspace = PIPSIZ;
864static u_int32_t unpst_recvspace = PIPSIZ;
865static u_int32_t unpdg_sendspace = 2 * 1024; /* really max datagram size */
866static u_int32_t unpdg_recvspace = 4 * 1024;
1c79356b 867
0a7de745
A
868static int unp_rights; /* file descriptors in flight */
869static int unp_disposed; /* discarded file descriptors */
1c79356b
A
870
871SYSCTL_DECL(_net_local_stream);
6d2010ae 872SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 873 &unpst_sendspace, 0, "");
6d2010ae 874SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 875 &unpst_recvspace, 0, "");
6d2010ae 876SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 877 &unpst_tracemdns, 0, "");
1c79356b 878SYSCTL_DECL(_net_local_dgram);
6d2010ae 879SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 880 &unpdg_sendspace, 0, "");
6d2010ae 881SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 882 &unpdg_recvspace, 0, "");
1c79356b 883SYSCTL_DECL(_net_local);
6d2010ae 884SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
1c79356b 885
2d21ac55
A
886/*
887 * Returns: 0 Success
888 * ENOBUFS
889 * soreserve:ENOBUFS
890 */
1c79356b 891static int
91447636 892unp_attach(struct socket *so)
1c79356b 893{
91447636
A
894 struct unpcb *unp;
895 int error = 0;
1c79356b
A
896
897 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
898 switch (so->so_type) {
1c79356b
A
899 case SOCK_STREAM:
900 error = soreserve(so, unpst_sendspace, unpst_recvspace);
901 break;
902
903 case SOCK_DGRAM:
904 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
905 break;
906
907 default:
908 panic("unp_attach");
909 }
0a7de745
A
910 if (error) {
911 return error;
912 }
1c79356b 913 }
2d21ac55 914 unp = (struct unpcb *)zalloc(unp_zone);
0a7de745
A
915 if (unp == NULL) {
916 return ENOBUFS;
917 }
918 bzero(unp, sizeof(*unp));
b0d623f7 919
c3c9b80d 920 lck_mtx_init(&unp->unp_mtx, &unp_mtx_grp, &unp_mtx_attr);
b0d623f7 921
f427ee49 922 lck_rw_lock_exclusive(&unp_list_mtx);
1c79356b
A
923 LIST_INIT(&unp->unp_refs);
924 unp->unp_socket = so;
91447636
A
925 unp->unp_gencnt = ++unp_gencnt;
926 unp_count++;
2d21ac55
A
927 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
928 &unp_dhead : &unp_shead, unp, unp_link);
f427ee49 929 lck_rw_done(&unp_list_mtx);
1c79356b 930 so->so_pcb = (caddr_t)unp;
2d21ac55
A
931 /*
932 * Mark AF_UNIX socket buffers accordingly so that:
933 *
934 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
935 * the lack of space; this essentially loosens the sbspace() check,
936 * since there is disconnect between sosend() and uipc_send() with
937 * respect to flow control that might result in our dropping the
938 * data in uipc_send(). By setting this, we allow for slightly
939 * more records to be appended to the receiving socket to avoid
940 * losing data (which we can't afford in the SOCK_STREAM case).
941 * Flow control still takes place since we adjust the sender's
942 * hiwat during each send. This doesn't affect the SOCK_DGRAM
943 * case and append would still fail when the queue overflows.
944 *
945 * b. In the presence of control messages containing internalized
946 * file descriptors, the append routines will not free them since
947 * we'd need to undo the work first via unp_dispose().
948 */
949 so->so_rcv.sb_flags |= SB_UNIX;
950 so->so_snd.sb_flags |= SB_UNIX;
0a7de745 951 return 0;
1c79356b
A
952}
953
954static void
91447636 955unp_detach(struct unpcb *unp)
1c79356b 956{
b7266188
A
957 int so_locked = 1;
958
f427ee49 959 lck_rw_lock_exclusive(&unp_list_mtx);
1c79356b 960 LIST_REMOVE(unp, unp_link);
d9a64523 961 --unp_count;
316670eb 962 ++unp_gencnt;
f427ee49 963 lck_rw_done(&unp_list_mtx);
1c79356b 964 if (unp->unp_vnode) {
b0d623f7
A
965 struct vnode *tvp = NULL;
966 socket_unlock(unp->unp_socket, 0);
967
968 /* Holding unp_connect_lock will avoid a race between
969 * a thread closing the listening socket and a thread
970 * connecting to it.
971 */
f427ee49 972 lck_mtx_lock(&unp_connect_lock);
b0d623f7
A
973 socket_lock(unp->unp_socket, 0);
974 if (unp->unp_vnode) {
975 tvp = unp->unp_vnode;
976 unp->unp_vnode->v_socket = NULL;
977 unp->unp_vnode = NULL;
978 }
f427ee49 979 lck_mtx_unlock(&unp_connect_lock);
0a7de745
A
980 if (tvp != NULL) {
981 vnode_rele(tvp); /* drop the usecount */
982 }
1c79356b 983 }
0a7de745 984 if (unp->unp_conn) {
1c79356b 985 unp_disconnect(unp);
0a7de745 986 }
b0d623f7 987 while (unp->unp_refs.lh_first) {
b7266188
A
988 struct unpcb *unp2 = NULL;
989
990 /* This datagram socket is connected to one or more
991 * sockets. In order to avoid a race condition between removing
d9a64523 992 * this reference and closing the connected socket, we need
b7266188
A
993 * to check disconnect_in_progress
994 */
995 if (so_locked == 1) {
996 socket_unlock(unp->unp_socket, 0);
997 so_locked = 0;
998 }
f427ee49 999 lck_mtx_lock(&unp_disconnect_lock);
b7266188 1000 while (disconnect_in_progress != 0) {
f427ee49 1001 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
0a7de745 1002 PSOCK, "disconnect", NULL);
b7266188
A
1003 }
1004 disconnect_in_progress = 1;
f427ee49 1005 lck_mtx_unlock(&unp_disconnect_lock);
b7266188
A
1006
1007 /* Now we are sure that any unpcb socket disconnect is not happening */
1008 if (unp->unp_refs.lh_first != NULL) {
0a7de745
A
1009 unp2 = unp->unp_refs.lh_first;
1010 socket_lock(unp2->unp_socket, 1);
b7266188 1011 }
d9a64523 1012
f427ee49 1013 lck_mtx_lock(&unp_disconnect_lock);
b7266188
A
1014 disconnect_in_progress = 0;
1015 wakeup(&disconnect_in_progress);
f427ee49 1016 lck_mtx_unlock(&unp_disconnect_lock);
d9a64523 1017
b7266188
A
1018 if (unp2 != NULL) {
1019 /* We already locked this socket and have a reference on it */
0a7de745
A
1020 unp_drop(unp2, ECONNRESET);
1021 socket_unlock(unp2->unp_socket, 1);
b7266188
A
1022 }
1023 }
1024
1025 if (so_locked == 0) {
b0d623f7 1026 socket_lock(unp->unp_socket, 0);
b7266188 1027 so_locked = 1;
b0d623f7 1028 }
1c79356b 1029 soisdisconnected(unp->unp_socket);
2d21ac55
A
1030 /* makes sure we're getting dealloced */
1031 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1c79356b
A
1032}
1033
2d21ac55
A
1034/*
1035 * Returns: 0 Success
1036 * EAFNOSUPPORT
1037 * EINVAL
1038 * EADDRINUSE
1039 * namei:??? [anything namei can return]
1040 * vnode_authorize:??? [anything vnode_authorize can return]
1041 *
1042 * Notes: p at this point is the current process, as this function is
1043 * only called by sobind().
1044 */
1c79356b 1045static int
91447636
A
1046unp_bind(
1047 struct unpcb *unp,
1048 struct sockaddr *nam,
2d21ac55 1049 proc_t p)
1c79356b
A
1050{
1051 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
91447636
A
1052 struct vnode *vp, *dvp;
1053 struct vnode_attr va;
2d21ac55 1054 vfs_context_t ctx = vfs_context_current();
1c79356b
A
1055 int error, namelen;
1056 struct nameidata nd;
b0d623f7 1057 struct socket *so = unp->unp_socket;
1c79356b
A
1058 char buf[SOCK_MAXADDRLEN];
1059
2d21ac55 1060 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
0a7de745 1061 return EAFNOSUPPORT;
2d21ac55 1062 }
91447636 1063
5ba3f43e
A
1064 /*
1065 * Check if the socket is already bound to an address
1066 */
0a7de745
A
1067 if (unp->unp_vnode != NULL) {
1068 return EINVAL;
1069 }
5ba3f43e
A
1070 /*
1071 * Check if the socket may have been shut down
1072 */
1073 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
0a7de745
A
1074 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1075 return EINVAL;
1076 }
5ba3f43e 1077
1c79356b 1078 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
0a7de745
A
1079 if (namelen <= 0) {
1080 return EINVAL;
1081 }
490019cf
A
1082 /*
1083 * Note: sun_path is not a zero terminated "C" string
1084 */
0a7de745
A
1085 if (namelen >= SOCK_MAXADDRLEN) {
1086 return EINVAL;
1087 }
490019cf
A
1088 bcopy(soun->sun_path, buf, namelen);
1089 buf[namelen] = 0;
d9a64523 1090
b0d623f7
A
1091 socket_unlock(so, 0);
1092
6d2010ae 1093 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
2d21ac55
A
1094 CAST_USER_ADDR_T(buf), ctx);
1095 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1c79356b
A
1096 error = namei(&nd);
1097 if (error) {
b0d623f7 1098 socket_lock(so, 0);
0a7de745 1099 return error;
1c79356b 1100 }
91447636 1101 dvp = nd.ni_dvp;
1c79356b 1102 vp = nd.ni_vp;
91447636 1103
1c79356b 1104 if (vp != NULL) {
2d21ac55 1105 /*
91447636
A
1106 * need to do this before the vnode_put of dvp
1107 * since we may have to release an fs_nodelock
1108 */
1109 nameidone(&nd);
1110
1111 vnode_put(dvp);
1112 vnode_put(vp);
1113
b0d623f7 1114 socket_lock(so, 0);
0a7de745 1115 return EADDRINUSE;
1c79356b 1116 }
91447636 1117
2d21ac55
A
1118 VATTR_INIT(&va);
1119 VATTR_SET(&va, va_type, VSOCK);
1120 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
1121
b0d623f7 1122#if CONFIG_MACF
2d21ac55
A
1123 error = mac_vnode_check_create(ctx,
1124 nd.ni_dvp, &nd.ni_cnd, &va);
1125
1126 if (error == 0)
b0d623f7
A
1127#endif /* CONFIG_MACF */
1128#if CONFIG_MACF_SOCKET_SUBSET
1129 error = mac_vnode_check_uipc_bind(ctx,
1130 nd.ni_dvp, &nd.ni_cnd, &va);
1131
1132 if (error == 0)
1133#endif /* MAC_SOCKET_SUBSET */
91447636 1134 /* authorize before creating */
2d21ac55 1135 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
91447636
A
1136
1137 if (!error) {
91447636 1138 /* create the socket */
6d2010ae 1139 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
91447636 1140 }
2d21ac55 1141
91447636
A
1142 nameidone(&nd);
1143 vnode_put(dvp);
1144
1c79356b 1145 if (error) {
b0d623f7 1146 socket_lock(so, 0);
0a7de745 1147 return error;
1c79356b 1148 }
0a7de745 1149
b0d623f7 1150 socket_lock(so, 0);
0a7de745
A
1151
1152 if (unp->unp_vnode != NULL) {
1153 vnode_put(vp); /* drop the iocount */
1154 return EINVAL;
1155 }
1156
1157 error = vnode_ref(vp); /* gain a longterm reference */
1158 if (error) {
1159 vnode_put(vp); /* drop the iocount */
1160 return error;
1161 }
1162
1c79356b
A
1163 vp->v_socket = unp->unp_socket;
1164 unp->unp_vnode = vp;
1165 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
0a7de745 1166 vnode_put(vp); /* drop the iocount */
91447636 1167
0a7de745 1168 return 0;
1c79356b
A
1169}
1170
2d21ac55
A
1171
1172/*
1173 * Returns: 0 Success
1174 * EAFNOSUPPORT Address family not supported
1175 * EINVAL Invalid argument
1176 * ENOTSOCK Not a socket
1177 * ECONNREFUSED Connection refused
1178 * EPROTOTYPE Protocol wrong type for socket
1179 * EISCONN Socket is connected
1180 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1181 * unp_connect2:EINVAL Invalid argument
1182 * namei:??? [anything namei can return]
1183 * vnode_authorize:???? [anything vnode_authorize can return]
1184 *
1185 * Notes: p at this point is the current process, as this function is
1186 * only called by sosend(), sendfile(), and soconnectlock().
1187 */
1c79356b 1188static int
2d21ac55 1189unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1c79356b 1190{
91447636
A
1191 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1192 struct vnode *vp;
0a7de745 1193 struct socket *so2, *so3, *list_so = NULL;
91447636 1194 struct unpcb *unp, *unp2, *unp3;
2d21ac55 1195 vfs_context_t ctx = vfs_context_current();
1c79356b
A
1196 int error, len;
1197 struct nameidata nd;
1198 char buf[SOCK_MAXADDRLEN];
1199
2d21ac55 1200 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
0a7de745 1201 return EAFNOSUPPORT;
2d21ac55
A
1202 }
1203
b0d623f7 1204 unp = sotounpcb(so);
cc9f6e38 1205 so2 = so3 = NULL;
91447636 1206
1c79356b 1207 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
0a7de745
A
1208 if (len <= 0) {
1209 return EINVAL;
1210 }
490019cf
A
1211 /*
1212 * Note: sun_path is not a zero terminated "C" string
1213 */
0a7de745
A
1214 if (len >= SOCK_MAXADDRLEN) {
1215 return EINVAL;
1216 }
eb6b6ca3
A
1217
1218 soisconnecting(so);
1219
490019cf
A
1220 bcopy(soun->sun_path, buf, len);
1221 buf[len] = 0;
2d21ac55 1222
b0d623f7 1223 socket_unlock(so, 0);
1c79356b 1224
6d2010ae 1225 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
2d21ac55 1226 CAST_USER_ADDR_T(buf), ctx);
1c79356b
A
1227 error = namei(&nd);
1228 if (error) {
b0d623f7 1229 socket_lock(so, 0);
0a7de745 1230 return error;
1c79356b 1231 }
91447636 1232 nameidone(&nd);
1c79356b
A
1233 vp = nd.ni_vp;
1234 if (vp->v_type != VSOCK) {
1235 error = ENOTSOCK;
b0d623f7
A
1236 socket_lock(so, 0);
1237 goto out;
1c79356b 1238 }
91447636 1239
b0d623f7 1240#if CONFIG_MACF_SOCKET_SUBSET
39037602 1241 error = mac_vnode_check_uipc_connect(ctx, vp, so);
b0d623f7
A
1242 if (error) {
1243 socket_lock(so, 0);
1244 goto out;
1245 }
1246#endif /* MAC_SOCKET_SUBSET */
1247
2d21ac55 1248 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
b0d623f7
A
1249 if (error) {
1250 socket_lock(so, 0);
1251 goto out;
1252 }
1253
f427ee49 1254 lck_mtx_lock(&unp_connect_lock);
b0d623f7
A
1255
1256 if (vp->v_socket == 0) {
f427ee49 1257 lck_mtx_unlock(&unp_connect_lock);
1c79356b 1258 error = ECONNREFUSED;
b0d623f7
A
1259 socket_lock(so, 0);
1260 goto out;
1c79356b 1261 }
91447636 1262
b0d623f7
A
1263 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1264 so2 = vp->v_socket;
f427ee49 1265 lck_mtx_unlock(&unp_connect_lock);
91447636 1266
b0d623f7
A
1267
1268 if (so2->so_pcb == NULL) {
1269 error = ECONNREFUSED;
6d2010ae
A
1270 if (so != so2) {
1271 socket_unlock(so2, 1);
1272 socket_lock(so, 0);
1273 } else {
1274 /* Release the reference held for the listen socket */
d190cdc3 1275 VERIFY(so2->so_usecount > 0);
6d2010ae
A
1276 so2->so_usecount--;
1277 }
b0d623f7 1278 goto out;
1c79356b 1279 }
2d21ac55 1280
b0d623f7
A
1281 if (so < so2) {
1282 socket_unlock(so2, 0);
1283 socket_lock(so, 0);
1284 socket_lock(so2, 0);
6d2010ae 1285 } else if (so > so2) {
b0d623f7
A
1286 socket_lock(so, 0);
1287 }
55e303ae
A
1288 /*
1289 * Check if socket was connected while we were trying to
b0d623f7 1290 * get the socket locks in order.
55e303ae
A
1291 * XXX - probably shouldn't return an error for SOCK_DGRAM
1292 */
1293 if ((so->so_state & SS_ISCONNECTED) != 0) {
1294 error = EISCONN;
6d2010ae 1295 goto decref_out;
b0d623f7
A
1296 }
1297
1298 if (so->so_type != so2->so_type) {
b0d623f7 1299 error = EPROTOTYPE;
6d2010ae 1300 goto decref_out;
55e303ae 1301 }
2d21ac55 1302
1c79356b 1303 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
b0d623f7
A
1304 /* Release the incoming socket but keep a reference */
1305 socket_unlock(so, 0);
1306
1c79356b 1307 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
91447636 1308 (so3 = sonewconn(so2, 0, nam)) == 0) {
1c79356b 1309 error = ECONNREFUSED;
316670eb
A
1310 if (so != so2) {
1311 socket_unlock(so2, 1);
1312 socket_lock(so, 0);
1313 } else {
1314 socket_lock(so, 0);
1315 /* Release the reference held for
1316 * listen socket.
1317 */
d190cdc3 1318 VERIFY(so2->so_usecount > 0);
316670eb
A
1319 so2->so_usecount--;
1320 }
b0d623f7 1321 goto out;
1c79356b
A
1322 }
1323 unp2 = sotounpcb(so2);
1324 unp3 = sotounpcb(so3);
0a7de745 1325 if (unp2->unp_addr) {
1c79356b 1326 unp3->unp_addr = (struct sockaddr_un *)
2d21ac55 1327 dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
0a7de745 1328 }
91447636
A
1329
1330 /*
1331 * unp_peercred management:
1332 *
1333 * The connecter's (client's) credentials are copied
1334 * from its process structure at the time of connect()
1335 * (which is now).
1336 */
2d21ac55 1337 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
91447636
A
1338 unp3->unp_flags |= UNP_HAVEPC;
1339 /*
1340 * The receiver's (server's) credentials are copied
1341 * from the unp_peercred member of socket on which the
1342 * former called listen(); unp_listen() cached that
1343 * process's credentials at that time so we can use
1344 * them now.
1345 */
1346 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1347 ("unp_connect: listener without cached peercred"));
b0d623f7
A
1348
1349 /* Here we need to have both so and so2 locks and so2
1350 * is already locked. Lock ordering is required.
1351 */
1352 if (so < so2) {
1353 socket_unlock(so2, 0);
1354 socket_lock(so, 0);
1355 socket_lock(so2, 0);
1356 } else {
1357 socket_lock(so, 0);
1358 }
1359
1360 /* Check again if the socket state changed when its lock was released */
1361 if ((so->so_state & SS_ISCONNECTED) != 0) {
1362 error = EISCONN;
1363 socket_unlock(so2, 1);
1364 socket_lock(so3, 0);
1365 sofreelastref(so3, 1);
0a7de745 1366 goto out;
b0d623f7 1367 }
91447636 1368 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
0a7de745 1369 sizeof(unp->unp_peercred));
91447636
A
1370 unp->unp_flags |= UNP_HAVEPC;
1371
b0d623f7
A
1372 /* Hold the reference on listening socket until the end */
1373 socket_unlock(so2, 0);
1374 list_so = so2;
1375
1376 /* Lock ordering doesn't matter because so3 was just created */
1377 socket_lock(so3, 1);
1c79356b 1378 so2 = so3;
b0d623f7 1379
6d2010ae
A
1380 /*
1381 * Enable tracing for mDNSResponder endpoints. (The use
1382 * of sizeof instead of strlen below takes the null
1383 * terminating character into account.)
1384 */
1385 if (unpst_tracemdns &&
1386 !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
0a7de745 1387 sizeof(MDNSRESPONDER_PATH))) {
6d2010ae
A
1388 unp->unp_flags |= UNP_TRACE_MDNS;
1389 unp2->unp_flags |= UNP_TRACE_MDNS;
1390 }
1c79356b 1391 }
d9a64523 1392
1c79356b 1393 error = unp_connect2(so, so2);
6d2010ae
A
1394
1395decref_out:
b0d623f7 1396 if (so2 != NULL) {
6d2010ae
A
1397 if (so != so2) {
1398 socket_unlock(so2, 1);
1399 } else {
1400 /* Release the extra reference held for the listen socket.
1401 * This is possible only for SOCK_DGRAM sockets. We refuse
1402 * connecting to the same socket for SOCK_STREAM sockets.
1403 */
d190cdc3 1404 VERIFY(so2->so_usecount > 0);
6d2010ae
A
1405 so2->so_usecount--;
1406 }
b0d623f7
A
1407 }
1408
1409 if (list_so != NULL) {
1410 socket_lock(list_so, 0);
1411 socket_unlock(list_so, 1);
1412 }
6d2010ae 1413
b0d623f7 1414out:
5ba3f43e 1415 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
91447636 1416 vnode_put(vp);
0a7de745 1417 return error;
1c79356b
A
1418}
1419
2d21ac55
A
1420/*
1421 * Returns: 0 Success
1422 * EPROTOTYPE Protocol wrong type for socket
1423 * EINVAL Invalid argument
1424 */
1c79356b 1425int
2d21ac55 1426unp_connect2(struct socket *so, struct socket *so2)
1c79356b 1427{
91447636
A
1428 struct unpcb *unp = sotounpcb(so);
1429 struct unpcb *unp2;
1c79356b 1430
0a7de745
A
1431 if (so2->so_type != so->so_type) {
1432 return EPROTOTYPE;
1433 }
b0d623f7 1434
1c79356b 1435 unp2 = sotounpcb(so2);
0b4e3aa0 1436
5ba3f43e
A
1437 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1438 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7 1439
0b4e3aa0 1440 /* Verify both sockets are still opened */
0a7de745
A
1441 if (unp == 0 || unp2 == 0) {
1442 return EINVAL;
1443 }
0b4e3aa0 1444
1c79356b 1445 unp->unp_conn = unp2;
d9a64523
A
1446 so2->so_usecount++;
1447
1c79356b 1448 switch (so->so_type) {
1c79356b
A
1449 case SOCK_DGRAM:
1450 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
b0d623f7 1451
d9a64523 1452 if (so != so2) {
6d2010ae 1453 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
0a7de745 1454 /* Keep an extra reference on so2 that will be dropped
d9a64523
A
1455 * soon after getting the locks in order
1456 */
6d2010ae
A
1457 socket_unlock(so2, 0);
1458 soisconnected(so);
1459 unp_get_locks_in_order(so, so2);
d190cdc3 1460 VERIFY(so2->so_usecount > 0);
6d2010ae
A
1461 so2->so_usecount--;
1462 } else {
1463 soisconnected(so);
1464 }
b0d623f7 1465
1c79356b
A
1466 break;
1467
1468 case SOCK_STREAM:
2d21ac55
A
1469 /* This takes care of socketpair */
1470 if (!(unp->unp_flags & UNP_HAVEPC) &&
1471 !(unp2->unp_flags & UNP_HAVEPC)) {
1472 cru2x(kauth_cred_get(), &unp->unp_peercred);
1473 unp->unp_flags |= UNP_HAVEPC;
1474
1475 cru2x(kauth_cred_get(), &unp2->unp_peercred);
1476 unp2->unp_flags |= UNP_HAVEPC;
1477 }
1c79356b 1478 unp2->unp_conn = unp;
b0d623f7
A
1479 so->so_usecount++;
1480
1481 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1482 socket_unlock(so, 0);
1c79356b 1483 soisconnected(so2);
b0d623f7
A
1484
1485 /* Keep an extra reference on so2, that will be dropped soon after
1486 * getting the locks in order again.
1487 */
1488 socket_unlock(so2, 0);
1489
1490 socket_lock(so, 0);
1491 soisconnected(so);
1492
1493 unp_get_locks_in_order(so, so2);
1494 /* Decrement the extra reference left before */
d190cdc3 1495 VERIFY(so2->so_usecount > 0);
b0d623f7 1496 so2->so_usecount--;
1c79356b
A
1497 break;
1498
1499 default:
b0d623f7 1500 panic("unknown socket type %d in unp_connect2", so->so_type);
1c79356b 1501 }
5ba3f43e
A
1502 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1503 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
0a7de745 1504 return 0;
1c79356b
A
1505}
1506
1507static void
91447636 1508unp_disconnect(struct unpcb *unp)
1c79356b 1509{
b0d623f7
A
1510 struct unpcb *unp2 = NULL;
1511 struct socket *so2 = NULL, *so;
1512 struct socket *waitso;
1513 int so_locked = 1, strdisconn = 0;
1c79356b 1514
b0d623f7
A
1515 so = unp->unp_socket;
1516 if (unp->unp_conn == NULL) {
1c79356b 1517 return;
b0d623f7 1518 }
f427ee49 1519 lck_mtx_lock(&unp_disconnect_lock);
b0d623f7
A
1520 while (disconnect_in_progress != 0) {
1521 if (so_locked == 1) {
1522 socket_unlock(so, 0);
1523 so_locked = 0;
1524 }
f427ee49 1525 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
0a7de745 1526 PSOCK, "disconnect", NULL);
b0d623f7
A
1527 }
1528 disconnect_in_progress = 1;
f427ee49 1529 lck_mtx_unlock(&unp_disconnect_lock);
b0d623f7
A
1530
1531 if (so_locked == 0) {
1532 socket_lock(so, 0);
1533 so_locked = 1;
1534 }
1535
1536 unp2 = unp->unp_conn;
1537
1538 if (unp2 == 0 || unp2->unp_socket == NULL) {
1539 goto out;
1540 }
1541 so2 = unp2->unp_socket;
1542
1543try_again:
6d2010ae
A
1544 if (so == so2) {
1545 if (so_locked == 0) {
1546 socket_lock(so, 0);
1547 }
1548 waitso = so;
1549 } else if (so < so2) {
b0d623f7
A
1550 if (so_locked == 0) {
1551 socket_lock(so, 0);
1552 }
1553 socket_lock(so2, 1);
1554 waitso = so2;
1555 } else {
d9a64523 1556 if (so_locked == 1) {
b0d623f7
A
1557 socket_unlock(so, 0);
1558 }
1559 socket_lock(so2, 1);
1560 socket_lock(so, 0);
1561 waitso = so;
1562 }
6d2010ae 1563 so_locked = 1;
b0d623f7 1564
5ba3f43e
A
1565 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1566 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
1567
1568 /* Check for the UNP_DONTDISCONNECT flag, if it
1569 * is set, release both sockets and go to sleep
1570 */
d9a64523 1571
b0d623f7 1572 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
6d2010ae
A
1573 if (so != so2) {
1574 socket_unlock(so2, 1);
1575 }
b0d623f7
A
1576 so_locked = 0;
1577
d9a64523 1578 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
0a7de745 1579 PSOCK | PDROP, "unpdisconnect", NULL);
b0d623f7
A
1580 goto try_again;
1581 }
d9a64523 1582
b0d623f7
A
1583 if (unp->unp_conn == NULL) {
1584 panic("unp_conn became NULL after sleep");
1585 }
1586
2d21ac55 1587 unp->unp_conn = NULL;
d190cdc3 1588 VERIFY(so2->so_usecount > 0);
b0d623f7
A
1589 so2->so_usecount--;
1590
0a7de745 1591 if (unp->unp_flags & UNP_TRACE_MDNS) {
6d2010ae 1592 unp->unp_flags &= ~UNP_TRACE_MDNS;
0a7de745 1593 }
6d2010ae 1594
1c79356b 1595 switch (unp->unp_socket->so_type) {
1c79356b
A
1596 case SOCK_DGRAM:
1597 LIST_REMOVE(unp, unp_reflink);
1598 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
0a7de745 1599 if (so != so2) {
6d2010ae 1600 socket_unlock(so2, 1);
0a7de745 1601 }
1c79356b
A
1602 break;
1603
1604 case SOCK_STREAM:
2d21ac55 1605 unp2->unp_conn = NULL;
5ba3f43e 1606 VERIFY(so->so_usecount > 0);
b0d623f7
A
1607 so->so_usecount--;
1608
1609 /* Set the socket state correctly but do a wakeup later when
1610 * we release all locks except the socket lock, this will avoid
1611 * a deadlock.
1612 */
0a7de745
A
1613 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1614 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
b0d623f7 1615
0a7de745
A
1616 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1617 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
6d2010ae 1618
0a7de745 1619 if (unp2->unp_flags & UNP_TRACE_MDNS) {
6d2010ae 1620 unp2->unp_flags &= ~UNP_TRACE_MDNS;
0a7de745 1621 }
6d2010ae 1622
b0d623f7 1623 strdisconn = 1;
1c79356b 1624 break;
b0d623f7
A
1625 default:
1626 panic("unknown socket type %d", so->so_type);
1c79356b 1627 }
b0d623f7 1628out:
f427ee49 1629 lck_mtx_lock(&unp_disconnect_lock);
b0d623f7
A
1630 disconnect_in_progress = 0;
1631 wakeup(&disconnect_in_progress);
f427ee49 1632 lck_mtx_unlock(&unp_disconnect_lock);
1c79356b 1633
b0d623f7
A
1634 if (strdisconn) {
1635 socket_unlock(so, 0);
1636 soisdisconnected(so2);
1637 socket_unlock(so2, 1);
1c79356b 1638
0a7de745 1639 socket_lock(so, 0);
b0d623f7
A
1640 soisdisconnected(so);
1641 }
5ba3f43e 1642 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
b0d623f7 1643 return;
1c79356b 1644}
b0d623f7
A
1645
1646/*
1647 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1648 * The unpcb_compat data structure is passed to user space and must not change.
1649 */
1650static void
1651unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1652{
1653#if defined(__LP64__)
316670eb
A
1654 cp->unp_link.le_next = (u_int32_t)
1655 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1656 cp->unp_link.le_prev = (u_int32_t)
1657 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
b0d623f7 1658#else
316670eb
A
1659 cp->unp_link.le_next = (struct unpcb_compat *)
1660 VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1661 cp->unp_link.le_prev = (struct unpcb_compat **)
1662 VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
b0d623f7 1663#endif
316670eb
A
1664 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1665 VM_KERNEL_ADDRPERM(up->unp_socket);
1666 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1667 VM_KERNEL_ADDRPERM(up->unp_vnode);
b0d623f7
A
1668 cp->unp_ino = up->unp_ino;
1669 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
316670eb
A
1670 VM_KERNEL_ADDRPERM(up->unp_conn);
1671 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
b0d623f7
A
1672#if defined(__LP64__)
1673 cp->unp_reflink.le_next =
316670eb 1674 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
b0d623f7 1675 cp->unp_reflink.le_prev =
316670eb 1676 (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
b0d623f7
A
1677#else
1678 cp->unp_reflink.le_next =
316670eb 1679 (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
b0d623f7 1680 cp->unp_reflink.le_prev =
316670eb 1681 (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1c79356b 1682#endif
b0d623f7 1683 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
316670eb 1684 VM_KERNEL_ADDRPERM(up->unp_addr);
b0d623f7
A
1685 cp->unp_cc = up->unp_cc;
1686 cp->unp_mbcnt = up->unp_mbcnt;
1687 cp->unp_gencnt = up->unp_gencnt;
1688}
1c79356b
A
1689
1690static int
1691unp_pcblist SYSCTL_HANDLER_ARGS
1692{
2d21ac55 1693#pragma unused(oidp,arg2)
1c79356b
A
1694 int error, i, n;
1695 struct unpcb *unp, **unp_list;
1696 unp_gen_t gencnt;
1697 struct xunpgen xug;
1698 struct unp_head *head;
1699
f427ee49 1700 lck_rw_lock_shared(&unp_list_mtx);
1c79356b
A
1701 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1702
1703 /*
1704 * The process of preparing the PCB list is too time-consuming and
1705 * resource-intensive to repeat twice on every request.
1706 */
91447636 1707 if (req->oldptr == USER_ADDR_NULL) {
1c79356b 1708 n = unp_count;
0a7de745
A
1709 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1710 sizeof(struct xunpcb);
f427ee49 1711 lck_rw_done(&unp_list_mtx);
0a7de745 1712 return 0;
1c79356b
A
1713 }
1714
91447636 1715 if (req->newptr != USER_ADDR_NULL) {
f427ee49 1716 lck_rw_done(&unp_list_mtx);
0a7de745 1717 return EPERM;
91447636 1718 }
1c79356b
A
1719
1720 /*
1721 * OK, now we're committed to doing something.
1722 */
1723 gencnt = unp_gencnt;
1724 n = unp_count;
1725
0a7de745
A
1726 bzero(&xug, sizeof(xug));
1727 xug.xug_len = sizeof(xug);
1c79356b
A
1728 xug.xug_count = n;
1729 xug.xug_gen = gencnt;
1730 xug.xug_sogen = so_gencnt;
0a7de745 1731 error = SYSCTL_OUT(req, &xug, sizeof(xug));
91447636 1732 if (error) {
f427ee49 1733 lck_rw_done(&unp_list_mtx);
0a7de745 1734 return error;
91447636 1735 }
1c79356b 1736
0b4e3aa0
A
1737 /*
1738 * We are done if there is no pcb
1739 */
0a7de745 1740 if (n == 0) {
f427ee49 1741 lck_rw_done(&unp_list_mtx);
0a7de745 1742 return 0;
91447636 1743 }
0b4e3aa0 1744
c3c9b80d
A
1745 size_t unp_list_len = n * sizeof(*unp_list);
1746 unp_list = kheap_alloc(KHEAP_TEMP, unp_list_len, Z_WAITOK);
91447636 1747 if (unp_list == 0) {
f427ee49 1748 lck_rw_done(&unp_list_mtx);
0a7de745 1749 return ENOMEM;
91447636 1750 }
2d21ac55 1751
1c79356b 1752 for (unp = head->lh_first, i = 0; unp && i < n;
2d21ac55 1753 unp = unp->unp_link.le_next) {
0a7de745 1754 if (unp->unp_gencnt <= gencnt) {
1c79356b 1755 unp_list[i++] = unp;
0a7de745 1756 }
1c79356b 1757 }
0a7de745 1758 n = i; /* in case we lost some during malloc */
1c79356b
A
1759
1760 error = 0;
1761 for (i = 0; i < n; i++) {
1762 unp = unp_list[i];
1763 if (unp->unp_gencnt <= gencnt) {
1764 struct xunpcb xu;
3a60a9f5 1765
0a7de745
A
1766 bzero(&xu, sizeof(xu));
1767 xu.xu_len = sizeof(xu);
b0d623f7 1768 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
316670eb 1769 VM_KERNEL_ADDRPERM(unp);
1c79356b
A
1770 /*
1771 * XXX - need more locking here to protect against
1772 * connect/disconnect races for SMP.
1773 */
0a7de745 1774 if (unp->unp_addr) {
cb323159 1775 bcopy(unp->unp_addr, &xu.xu_au,
2d21ac55 1776 unp->unp_addr->sun_len);
0a7de745
A
1777 }
1778 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1c79356b 1779 bcopy(unp->unp_conn->unp_addr,
cb323159 1780 &xu.xu_cau,
2d21ac55 1781 unp->unp_conn->unp_addr->sun_len);
0a7de745 1782 }
b0d623f7 1783 unpcb_to_compat(unp, &xu.xu_unp);
1c79356b 1784 sotoxsocket(unp->unp_socket, &xu.xu_socket);
0a7de745 1785 error = SYSCTL_OUT(req, &xu, sizeof(xu));
1c79356b
A
1786 }
1787 }
1788 if (!error) {
1789 /*
1790 * Give the user an updated idea of our state.
1791 * If the generation differs from what we told
1792 * her before, she knows that something happened
1793 * while we were processing this request, and it
1794 * might be necessary to retry.
1795 */
0a7de745
A
1796 bzero(&xug, sizeof(xug));
1797 xug.xug_len = sizeof(xug);
1c79356b
A
1798 xug.xug_gen = unp_gencnt;
1799 xug.xug_sogen = so_gencnt;
1800 xug.xug_count = unp_count;
0a7de745 1801 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1c79356b 1802 }
c3c9b80d 1803 kheap_free(KHEAP_TEMP, unp_list, unp_list_len);
f427ee49 1804 lck_rw_done(&unp_list_mtx);
0a7de745 1805 return error;
1c79356b
A
1806}
1807
fe8ab488 1808SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
0a7de745
A
1809 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1810 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1811 "List of active local datagram sockets");
fe8ab488 1812SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
0a7de745
A
1813 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1814 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1815 "List of active local stream sockets");
b0d623f7 1816
f427ee49 1817#if XNU_TARGET_OS_OSX
b0d623f7
A
1818
1819static int
1820unp_pcblist64 SYSCTL_HANDLER_ARGS
1821{
1822#pragma unused(oidp,arg2)
1823 int error, i, n;
1824 struct unpcb *unp, **unp_list;
1825 unp_gen_t gencnt;
1826 struct xunpgen xug;
1827 struct unp_head *head;
1828
f427ee49 1829 lck_rw_lock_shared(&unp_list_mtx);
b0d623f7
A
1830 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1831
1832 /*
1833 * The process of preparing the PCB list is too time-consuming and
1834 * resource-intensive to repeat twice on every request.
1835 */
1836 if (req->oldptr == USER_ADDR_NULL) {
1837 n = unp_count;
0a7de745
A
1838 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1839 (sizeof(struct xunpcb64));
f427ee49 1840 lck_rw_done(&unp_list_mtx);
0a7de745 1841 return 0;
b0d623f7
A
1842 }
1843
1844 if (req->newptr != USER_ADDR_NULL) {
f427ee49 1845 lck_rw_done(&unp_list_mtx);
0a7de745 1846 return EPERM;
b0d623f7
A
1847 }
1848
1849 /*
1850 * OK, now we're committed to doing something.
1851 */
1852 gencnt = unp_gencnt;
1853 n = unp_count;
1854
0a7de745
A
1855 bzero(&xug, sizeof(xug));
1856 xug.xug_len = sizeof(xug);
b0d623f7
A
1857 xug.xug_count = n;
1858 xug.xug_gen = gencnt;
1859 xug.xug_sogen = so_gencnt;
0a7de745 1860 error = SYSCTL_OUT(req, &xug, sizeof(xug));
b0d623f7 1861 if (error) {
f427ee49 1862 lck_rw_done(&unp_list_mtx);
0a7de745 1863 return error;
b0d623f7
A
1864 }
1865
1866 /*
1867 * We are done if there is no pcb
1868 */
0a7de745 1869 if (n == 0) {
f427ee49 1870 lck_rw_done(&unp_list_mtx);
0a7de745 1871 return 0;
b0d623f7
A
1872 }
1873
c3c9b80d
A
1874 size_t unp_list_size = n * sizeof(*unp_list);
1875 unp_list = kheap_alloc(KHEAP_TEMP, unp_list_size, Z_WAITOK);
b0d623f7 1876 if (unp_list == 0) {
f427ee49 1877 lck_rw_done(&unp_list_mtx);
0a7de745 1878 return ENOMEM;
b0d623f7
A
1879 }
1880
1881 for (unp = head->lh_first, i = 0; unp && i < n;
1882 unp = unp->unp_link.le_next) {
0a7de745 1883 if (unp->unp_gencnt <= gencnt) {
b0d623f7 1884 unp_list[i++] = unp;
0a7de745 1885 }
b0d623f7 1886 }
0a7de745 1887 n = i; /* in case we lost some during malloc */
b0d623f7
A
1888
1889 error = 0;
1890 for (i = 0; i < n; i++) {
1891 unp = unp_list[i];
1892 if (unp->unp_gencnt <= gencnt) {
1893 struct xunpcb64 xu;
0a7de745 1894 size_t xu_len = sizeof(struct xunpcb64);
b0d623f7
A
1895
1896 bzero(&xu, xu_len);
f427ee49 1897 xu.xu_len = (u_int32_t)xu_len;
316670eb
A
1898 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1899 xu.xunp_link.le_next = (u_int64_t)
1900 VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1901 xu.xunp_link.le_prev = (u_int64_t)
1902 VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1903 xu.xunp_socket = (u_int64_t)
1904 VM_KERNEL_ADDRPERM(unp->unp_socket);
1905 xu.xunp_vnode = (u_int64_t)
1906 VM_KERNEL_ADDRPERM(unp->unp_vnode);
b0d623f7 1907 xu.xunp_ino = unp->unp_ino;
316670eb
A
1908 xu.xunp_conn = (u_int64_t)
1909 VM_KERNEL_ADDRPERM(unp->unp_conn);
1910 xu.xunp_refs = (u_int64_t)
1911 VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1912 xu.xunp_reflink.le_next = (u_int64_t)
1913 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1914 xu.xunp_reflink.le_prev = (u_int64_t)
1915 VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
b0d623f7
A
1916 xu.xunp_cc = unp->unp_cc;
1917 xu.xunp_mbcnt = unp->unp_mbcnt;
1918 xu.xunp_gencnt = unp->unp_gencnt;
1919
0a7de745 1920 if (unp->unp_socket) {
b0d623f7 1921 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
0a7de745 1922 }
b0d623f7
A
1923
1924 /*
1925 * XXX - need more locking here to protect against
1926 * connect/disconnect races for SMP.
1927 */
0a7de745 1928 if (unp->unp_addr) {
cb323159 1929 bcopy(unp->unp_addr, &xu.xu_au,
0a7de745
A
1930 unp->unp_addr->sun_len);
1931 }
1932 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1933 bcopy(unp->unp_conn->unp_addr,
cb323159 1934 &xu.xu_cau,
0a7de745
A
1935 unp->unp_conn->unp_addr->sun_len);
1936 }
b0d623f7
A
1937
1938 error = SYSCTL_OUT(req, &xu, xu_len);
1939 }
1940 }
1941 if (!error) {
1942 /*
1943 * Give the user an updated idea of our state.
1944 * If the generation differs from what we told
1945 * her before, she knows that something happened
1946 * while we were processing this request, and it
1947 * might be necessary to retry.
1948 */
0a7de745
A
1949 bzero(&xug, sizeof(xug));
1950 xug.xug_len = sizeof(xug);
b0d623f7
A
1951 xug.xug_gen = unp_gencnt;
1952 xug.xug_sogen = so_gencnt;
1953 xug.xug_count = unp_count;
0a7de745 1954 error = SYSCTL_OUT(req, &xug, sizeof(xug));
b0d623f7 1955 }
c3c9b80d 1956 kheap_free(KHEAP_TEMP, unp_list, unp_list_size);
f427ee49 1957 lck_rw_done(&unp_list_mtx);
0a7de745 1958 return error;
b0d623f7
A
1959}
1960
fe8ab488 1961SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
0a7de745
A
1962 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1963 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1964 "List of active local datagram sockets 64 bit");
fe8ab488 1965SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
0a7de745
A
1966 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1967 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1968 "List of active local stream sockets 64 bit");
b0d623f7 1969
f427ee49 1970#endif /* XNU_TARGET_OS_OSX */
1c79356b
A
1971
1972static void
91447636 1973unp_shutdown(struct unpcb *unp)
1c79356b 1974{
b0d623f7
A
1975 struct socket *so = unp->unp_socket;
1976 struct socket *so2;
1977 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1978 so2 = unp->unp_conn->unp_socket;
1979 unp_get_locks_in_order(so, so2);
1980 socantrcvmore(so2);
1981 socket_unlock(so2, 1);
1982 }
1c79356b
A
1983}
1984
1985static void
2d21ac55 1986unp_drop(struct unpcb *unp, int errno)
1c79356b
A
1987{
1988 struct socket *so = unp->unp_socket;
1989
f427ee49 1990 so->so_error = (u_short)errno;
1c79356b 1991 unp_disconnect(unp);
1c79356b
A
1992}
1993
f427ee49
A
1994/* always called under uipc_lock */
1995static void
1996unp_gc_wait(void)
1997{
1998 if (unp_gcthread == current_thread()) {
1999 return;
2000 }
2001
2002 while (unp_gcing != 0) {
2003 unp_gcwait = 1;
2004 msleep(&unp_gcing, &uipc_lock, 0, "unp_gc_wait", NULL);
2005 }
2006}
2007
2008/*
2009 * fg_insertuipc_mark
2010 *
2011 * Description: Mark fileglob for insertion onto message queue if needed
2012 * Also takes fileglob reference
2013 *
2014 * Parameters: fg Fileglob pointer to insert
2015 *
2016 * Returns: true, if the fileglob needs to be inserted onto msg queue
2017 *
2018 * Locks: Takes and drops fg_lock, potentially many times
2019 */
2020static boolean_t
2021fg_insertuipc_mark(struct fileglob * fg)
2022{
2023 boolean_t insert = FALSE;
2024
2025 lck_mtx_lock_spin(&fg->fg_lock);
2026 while (fg->fg_lflags & FG_RMMSGQ) {
2027 lck_mtx_convert_spin(&fg->fg_lock);
2028
2029 fg->fg_lflags |= FG_WRMMSGQ;
2030 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
2031 }
2032
2a1bd2d3 2033 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
f427ee49
A
2034 fg->fg_msgcount++;
2035 if (fg->fg_msgcount == 1) {
2036 fg->fg_lflags |= FG_INSMSGQ;
2037 insert = TRUE;
2038 }
2039 lck_mtx_unlock(&fg->fg_lock);
2040 return insert;
2041}
2042
2043/*
2044 * fg_insertuipc
2045 *
2046 * Description: Insert marked fileglob onto message queue
2047 *
2048 * Parameters: fg Fileglob pointer to insert
2049 *
2050 * Returns: void
2051 *
2052 * Locks: Takes and drops fg_lock & uipc_lock
2053 * DO NOT call this function with proc_fdlock held as unp_gc()
2054 * can potentially try to acquire proc_fdlock, which can result
2055 * in a deadlock if this function is in unp_gc_wait().
2056 */
2057static void
2058fg_insertuipc(struct fileglob * fg)
2059{
2060 if (fg->fg_lflags & FG_INSMSGQ) {
2061 lck_mtx_lock_spin(&uipc_lock);
2062 unp_gc_wait();
2063 LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2064 lck_mtx_unlock(&uipc_lock);
2065 lck_mtx_lock(&fg->fg_lock);
2066 fg->fg_lflags &= ~FG_INSMSGQ;
2067 if (fg->fg_lflags & FG_WINSMSGQ) {
2068 fg->fg_lflags &= ~FG_WINSMSGQ;
2069 wakeup(&fg->fg_lflags);
2070 }
2071 lck_mtx_unlock(&fg->fg_lock);
2072 }
2073}
2074
2075/*
2076 * fg_removeuipc_mark
2077 *
2078 * Description: Mark the fileglob for removal from message queue if needed
2079 * Also releases fileglob message queue reference
2080 *
2081 * Parameters: fg Fileglob pointer to remove
2082 *
2083 * Returns: true, if the fileglob needs to be removed from msg queue
2084 *
2085 * Locks: Takes and drops fg_lock, potentially many times
2086 */
2087static boolean_t
2088fg_removeuipc_mark(struct fileglob * fg)
2089{
2090 boolean_t remove = FALSE;
2091
2092 lck_mtx_lock_spin(&fg->fg_lock);
2093 while (fg->fg_lflags & FG_INSMSGQ) {
2094 lck_mtx_convert_spin(&fg->fg_lock);
2095
2096 fg->fg_lflags |= FG_WINSMSGQ;
2097 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
2098 }
2099 fg->fg_msgcount--;
2100 if (fg->fg_msgcount == 0) {
2101 fg->fg_lflags |= FG_RMMSGQ;
2102 remove = TRUE;
2103 }
2104 lck_mtx_unlock(&fg->fg_lock);
2105 return remove;
2106}
2107
2108/*
2109 * fg_removeuipc
2110 *
2111 * Description: Remove marked fileglob from message queue
2112 *
2113 * Parameters: fg Fileglob pointer to remove
2114 *
2115 * Returns: void
2116 *
2117 * Locks: Takes and drops fg_lock & uipc_lock
2118 * DO NOT call this function with proc_fdlock held as unp_gc()
2119 * can potentially try to acquire proc_fdlock, which can result
2120 * in a deadlock if this function is in unp_gc_wait().
2121 */
2122static void
2123fg_removeuipc(struct fileglob * fg)
2124{
2125 if (fg->fg_lflags & FG_RMMSGQ) {
2126 lck_mtx_lock_spin(&uipc_lock);
2127 unp_gc_wait();
2128 LIST_REMOVE(fg, f_msglist);
2129 lck_mtx_unlock(&uipc_lock);
2130 lck_mtx_lock(&fg->fg_lock);
2131 fg->fg_lflags &= ~FG_RMMSGQ;
2132 if (fg->fg_lflags & FG_WRMMSGQ) {
2133 fg->fg_lflags &= ~FG_WRMMSGQ;
2134 wakeup(&fg->fg_lflags);
2135 }
2136 lck_mtx_unlock(&fg->fg_lock);
2137 }
2138}
2139
2d21ac55
A
2140/*
2141 * Returns: 0 Success
2142 * EMSGSIZE The new fd's will not fit
2143 * ENOBUFS Cannot alloc struct fileproc
2144 */
1c79356b 2145int
91447636 2146unp_externalize(struct mbuf *rights)
1c79356b 2147{
0a7de745 2148 proc_t p = current_proc(); /* XXX */
91447636
A
2149 int i;
2150 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2151 struct fileglob **rp = (struct fileglob **)(cm + 1);
b0d623f7 2152 int *fds = (int *)(cm + 1);
91447636 2153 struct fileproc *fp;
00867663 2154 struct fileproc **fileproc_l;
0a7de745 2155 int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
3e170ce0
A
2156 int f, error = 0;
2157
c3c9b80d
A
2158 fileproc_l = kheap_alloc(KHEAP_TEMP,
2159 newfds * sizeof(struct fileproc *), Z_WAITOK);
00867663 2160 if (fileproc_l == NULL) {
3e170ce0
A
2161 error = ENOMEM;
2162 goto discard;
2163 }
1c79356b 2164
91447636 2165 proc_fdlock(p);
1c79356b
A
2166
2167 /*
2168 * if the new FD's will not fit, then we free them all
2169 */
2170 if (!fdavail(p, newfds)) {
91447636 2171 proc_fdunlock(p);
3e170ce0
A
2172 error = EMSGSIZE;
2173 goto discard;
1c79356b
A
2174 }
2175 /*
2d21ac55 2176 * now change each pointer to an fd in the global table to
1c79356b
A
2177 * an integer that is the index to the local fd table entry
2178 * that we set up to point to the global one we are transferring.
d9a64523 2179 * XXX (1) this assumes a pointer and int are the same size,
b0d623f7 2180 * XXX or the mbuf can hold the expansion
2d21ac55 2181 * XXX (2) allocation failures should be non-fatal
1c79356b
A
2182 */
2183 for (i = 0; i < newfds; i++) {
0a7de745 2184 if (fdalloc(p, 0, &f)) {
2d21ac55 2185 panic("unp_externalize:fdalloc");
0a7de745 2186 }
39236c6e 2187 fp = fileproc_alloc_init(NULL);
0a7de745 2188 if (fp == NULL) {
f427ee49 2189 panic("unp_externalize:fileproc_alloc_init");
0a7de745 2190 }
f427ee49 2191 fp->fp_glob = rp[i];
00867663 2192 if (fg_removeuipc_mark(rp[i])) {
00867663
A
2193 /*
2194 * Take an iocount on the fp for completing the
2195 * removal from the global msg queue
2196 */
f427ee49 2197 os_ref_retain_locked(&fp->fp_iocount);
00867663
A
2198 fileproc_l[i] = fp;
2199 } else {
2200 fileproc_l[i] = NULL;
2201 }
6601e61a 2202 procfdtbl_releasefd(p, f, fp);
b0d623f7 2203 fds[i] = f;
1c79356b 2204 }
91447636 2205 proc_fdunlock(p);
1c79356b 2206
3e170ce0 2207 for (i = 0; i < newfds; i++) {
00867663 2208 if (fileproc_l[i] != NULL) {
f427ee49
A
2209 VERIFY(fileproc_l[i]->fp_glob != NULL &&
2210 (fileproc_l[i]->fp_glob->fg_lflags & FG_RMMSGQ));
d9a64523 2211 VERIFY(fds[i] >= 0);
f427ee49 2212 fg_removeuipc(fileproc_l[i]->fp_glob);
00867663
A
2213
2214 /* Drop the iocount */
2215 fp_drop(p, fds[i], fileproc_l[i], 0);
2216 fileproc_l[i] = NULL;
3e170ce0 2217 }
0a7de745 2218 if (fds[i] != 0) {
3e170ce0 2219 (void) OSAddAtomic(-1, &unp_rights);
0a7de745 2220 }
3e170ce0
A
2221 }
2222
2223discard:
c3c9b80d
A
2224 kheap_free(KHEAP_TEMP, fileproc_l,
2225 newfds * sizeof(struct fileproc *));
3e170ce0
A
2226 if (error) {
2227 for (i = 0; i < newfds; i++) {
2228 unp_discard(*rp, p);
2229 *rp++ = NULL;
2230 }
2231 }
0a7de745 2232 return error;
1c79356b
A
2233}
2234
2235void
2236unp_init(void)
2237{
3e170ce0 2238 _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
1c79356b
A
2239 LIST_INIT(&unp_dhead);
2240 LIST_INIT(&unp_shead);
2241}
2242
2243#ifndef MIN
0a7de745 2244#define MIN(a, b) (((a) < (b)) ? (a) : (b))
1c79356b
A
2245#endif
2246
2d21ac55
A
2247/*
2248 * Returns: 0 Success
2249 * EINVAL
f427ee49 2250 * EBADF
2d21ac55 2251 */
1c79356b 2252static int
2d21ac55 2253unp_internalize(struct mbuf *control, proc_t p)
1c79356b 2254{
91447636 2255 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
b0d623f7 2256 int *fds;
91447636
A
2257 struct fileglob **rp;
2258 struct fileproc *fp;
2d21ac55 2259 int i, error;
1c79356b 2260 int oldfds;
3e170ce0 2261 uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
1c79356b 2262
2d21ac55 2263 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
1c79356b 2264 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
b0d623f7 2265 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
0a7de745 2266 return EINVAL;
1c79356b 2267 }
0a7de745 2268 oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
3e170ce0 2269 bzero(fg_ins, sizeof(fg_ins));
1c79356b 2270
91447636 2271 proc_fdlock(p);
b0d623f7 2272 fds = (int *)(cm + 1);
91447636
A
2273
2274 for (i = 0; i < oldfds; i++) {
b0d623f7 2275 struct fileproc *tmpfp;
f427ee49 2276 if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {
2d21ac55 2277 proc_fdunlock(p);
f427ee49
A
2278 return EBADF;
2279 } else if (!fg_sendable(tmpfp->fp_glob)) {
b0d623f7 2280 proc_fdunlock(p);
0a7de745 2281 return EINVAL;
a991bd8d 2282 } else if (fp_isguarded(tmpfp, GUARD_SOCKET_IPC)) {
39236c6e 2283 error = fp_guard_exception(p,
0a7de745 2284 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
39236c6e 2285 proc_fdunlock(p);
0a7de745 2286 return error;
2d21ac55 2287 }
91447636
A
2288 }
2289 rp = (struct fileglob **)(cm + 1);
1c79356b 2290
d9a64523 2291 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
b0d623f7
A
2292 * and doing them in-order would result in stomping over unprocessed fd's
2293 */
2294 for (i = (oldfds - 1); i >= 0; i--) {
f427ee49
A
2295 fp = fp_get_noref_locked(p, fds[i]);
2296 if (fg_insertuipc_mark(fp->fp_glob)) {
3e170ce0 2297 fg_ins[i / 8] |= 0x80 >> (i % 8);
0a7de745 2298 }
f427ee49 2299 rp[i] = fp->fp_glob;
1c79356b 2300 }
91447636 2301 proc_fdunlock(p);
1c79356b 2302
3e170ce0
A
2303 for (i = 0; i < oldfds; i++) {
2304 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2305 VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2306 fg_insertuipc(rp[i]);
2307 }
2308 (void) OSAddAtomic(1, &unp_rights);
2309 }
2310
0a7de745 2311 return 0;
1c79356b
A
2312}
2313
e2fac8b1 2314__private_extern__ void
2d21ac55 2315unp_gc(void)
1c79356b 2316{
2d21ac55
A
2317 struct fileglob *fg, *nextfg;
2318 struct socket *so;
e2fac8b1 2319 static struct fileglob **extra_ref;
b0d623f7 2320 struct fileglob **fpp;
1c79356b 2321 int nunref, i;
6601e61a 2322 int need_gcwakeup = 0;
2d21ac55 2323
f427ee49 2324 lck_mtx_lock(&uipc_lock);
91447636 2325 if (unp_gcing) {
f427ee49 2326 lck_mtx_unlock(&uipc_lock);
1c79356b 2327 return;
91447636 2328 }
1c79356b
A
2329 unp_gcing = 1;
2330 unp_defer = 0;
e2fac8b1 2331 unp_gcthread = current_thread();
f427ee49 2332 lck_mtx_unlock(&uipc_lock);
2d21ac55
A
2333 /*
2334 * before going through all this, set all FDs to
1c79356b
A
2335 * be NOT defered and NOT externally accessible
2336 */
f427ee49
A
2337 for (fg = unp_msghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
2338 os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
91447636 2339 }
1c79356b 2340 do {
f427ee49 2341 for (fg = unp_msghead.lh_first; fg != 0;
2d21ac55 2342 fg = fg->f_msglist.le_next) {
91447636 2343 lck_mtx_lock(&fg->fg_lock);
1c79356b
A
2344 /*
2345 * If the file is not open, skip it
2346 */
f427ee49 2347 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
91447636 2348 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2349 continue;
91447636 2350 }
1c79356b
A
2351 /*
2352 * If we already marked it as 'defer' in a
2353 * previous pass, then try process it this time
2354 * and un-mark it
2355 */
91447636 2356 if (fg->fg_flag & FDEFER) {
f427ee49 2357 os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
1c79356b
A
2358 unp_defer--;
2359 } else {
2360 /*
2361 * if it's not defered, then check if it's
2362 * already marked.. if so skip it
2363 */
2d21ac55 2364 if (fg->fg_flag & FMARK) {
91447636 2365 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2366 continue;
91447636 2367 }
2d21ac55 2368 /*
1c79356b 2369 * If all references are from messages
2d21ac55 2370 * in transit, then skip it. it's not
1c79356b 2371 * externally accessible.
2d21ac55 2372 */
f427ee49
A
2373 if (os_ref_get_count_raw(&fg->fg_count) ==
2374 fg->fg_msgcount) {
91447636 2375 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2376 continue;
91447636 2377 }
2d21ac55 2378 /*
1c79356b
A
2379 * If it got this far then it must be
2380 * externally accessible.
2381 */
f427ee49 2382 os_atomic_or(&fg->fg_flag, FMARK, relaxed);
1c79356b
A
2383 }
2384 /*
2d21ac55 2385 * either it was defered, or it is externally
1c79356b
A
2386 * accessible and not already marked so.
2387 * Now check if it is possibly one of OUR sockets.
2d21ac55 2388 */
39236c6e 2389 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
91447636
A
2390 (so = (struct socket *)fg->fg_data) == 0) {
2391 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2392 continue;
91447636 2393 }
39236c6e 2394 if (so->so_proto->pr_domain != localdomain ||
0a7de745 2395 (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
91447636 2396 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2397 continue;
91447636 2398 }
1c79356b 2399#ifdef notdef
1c79356b
A
2400 if (so->so_rcv.sb_flags & SB_LOCK) {
2401 /*
2402 * This is problematical; it's not clear
2403 * we need to wait for the sockbuf to be
2404 * unlocked (on a uniprocessor, at least),
2405 * and it's also not clear what to do
2406 * if sbwait returns an error due to receipt
2407 * of a signal. If sbwait does return
2408 * an error, we'll go into an infinite
2409 * loop. Delete all of this for now.
2410 */
2411 (void) sbwait(&so->so_rcv);
2412 goto restart;
2413 }
2414#endif
2415 /*
2416 * So, Ok, it's one of our sockets and it IS externally
2417 * accessible (or was defered). Now we look
2418 * to see if we hold any file descriptors in its
2d21ac55 2419 * message buffers. Follow those links and mark them
1c79356b 2420 * as accessible too.
e2fac8b1 2421 *
d9a64523 2422 * In case a file is passed onto itself we need to
e2fac8b1 2423 * release the file lock.
1c79356b 2424 */
91447636 2425 lck_mtx_unlock(&fg->fg_lock);
e2fac8b1 2426
3e170ce0 2427 unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
1c79356b
A
2428 }
2429 } while (unp_defer);
2430 /*
2431 * We grab an extra reference to each of the file table entries
2432 * that are not otherwise accessible and then free the rights
2433 * that are stored in messages on them.
2434 *
2435 * The bug in the orginal code is a little tricky, so I'll describe
2436 * what's wrong with it here.
2437 *
f427ee49 2438 * It is incorrect to simply unp_discard each entry for fg_msgcount
1c79356b
A
2439 * times -- consider the case of sockets A and B that contain
2440 * references to each other. On a last close of some other socket,
2441 * we trigger a gc since the number of outstanding rights (unp_rights)
2442 * is non-zero. If during the sweep phase the gc code un_discards,
2443 * we end up doing a (full) closef on the descriptor. A closef on A
2444 * results in the following chain. Closef calls soo_close, which
2445 * calls soclose. Soclose calls first (through the switch
2446 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
2447 * returns because the previous instance had set unp_gcing, and
2448 * we return all the way back to soclose, which marks the socket
2449 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
2450 * to free up the rights that are queued in messages on the socket A,
2451 * i.e., the reference on B. The sorflush calls via the dom_dispose
2452 * switch unp_dispose, which unp_scans with unp_discard. This second
2453 * instance of unp_discard just calls closef on B.
2454 *
2455 * Well, a similar chain occurs on B, resulting in a sorflush on B,
2456 * which results in another closef on A. Unfortunately, A is already
2457 * being closed, and the descriptor has already been marked with
2458 * SS_NOFDREF, and soclose panics at this point.
2459 *
2460 * Here, we first take an extra reference to each inaccessible
2461 * descriptor. Then, we call sorflush ourself, since we know
2462 * it is a Unix domain socket anyhow. After we destroy all the
2463 * rights carried in messages, we do a last closef to get rid
2464 * of our extra reference. This is the last close, and the
2465 * unp_detach etc will shut down the socket.
2466 *
2467 * 91/09/19, bsy@cs.cmu.edu
2468 */
c3c9b80d
A
2469 size_t extra_ref_size = nfiles * sizeof(struct fileglob *);
2470 extra_ref = kheap_alloc(KHEAP_TEMP, extra_ref_size, Z_WAITOK);
0a7de745 2471 if (extra_ref == NULL) {
b0d623f7 2472 goto bail;
0a7de745 2473 }
f427ee49 2474 for (nunref = 0, fg = unp_msghead.lh_first, fpp = extra_ref; fg != 0;
91447636
A
2475 fg = nextfg) {
2476 lck_mtx_lock(&fg->fg_lock);
2477
2478 nextfg = fg->f_msglist.le_next;
2d21ac55 2479 /*
1c79356b
A
2480 * If it's not open, skip it
2481 */
f427ee49 2482 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
91447636 2483 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2484 continue;
91447636 2485 }
2d21ac55 2486 /*
1c79356b
A
2487 * If all refs are from msgs, and it's not marked accessible
2488 * then it must be referenced from some unreachable cycle
2489 * of (shut-down) FDs, so include it in our
2490 * list of FDs to remove
2491 */
f427ee49
A
2492 if (fg->fg_flag & FMARK) {
2493 lck_mtx_unlock(&fg->fg_lock);
2494 continue;
2495 }
2496 if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {
2497 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
91447636 2498 *fpp++ = fg;
1c79356b 2499 nunref++;
1c79356b 2500 }
91447636 2501 lck_mtx_unlock(&fg->fg_lock);
1c79356b 2502 }
2d21ac55 2503 /*
1c79356b
A
2504 * for each FD on our hit list, do the following two things
2505 */
2506 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
91447636 2507 struct fileglob *tfg;
1c79356b 2508
91447636 2509 tfg = *fpp;
1c79356b 2510
39236c6e
A
2511 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET &&
2512 tfg->fg_data != NULL) {
2d21ac55
A
2513 so = (struct socket *)(tfg->fg_data);
2514
e2fac8b1 2515 socket_lock(so, 0);
d9a64523 2516
2d21ac55
A
2517 sorflush(so);
2518
e2fac8b1 2519 socket_unlock(so, 0);
91447636
A
2520 }
2521 }
0a7de745 2522 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
f427ee49 2523 fg_drop(PROC_NULL, *fpp);
0a7de745 2524 }
2d21ac55 2525
c3c9b80d
A
2526 kheap_free(KHEAP_TEMP, extra_ref, extra_ref_size);
2527
b0d623f7 2528bail:
f427ee49 2529 lck_mtx_lock(&uipc_lock);
1c79356b 2530 unp_gcing = 0;
e2fac8b1 2531 unp_gcthread = NULL;
6601e61a
A
2532
2533 if (unp_gcwait != 0) {
2534 unp_gcwait = 0;
2535 need_gcwakeup = 1;
2536 }
f427ee49 2537 lck_mtx_unlock(&uipc_lock);
6601e61a 2538
0a7de745 2539 if (need_gcwakeup != 0) {
6601e61a 2540 wakeup(&unp_gcing);
0a7de745 2541 }
1c79356b
A
2542}
2543
2544void
91447636 2545unp_dispose(struct mbuf *m)
1c79356b 2546{
1c79356b 2547 if (m) {
3e170ce0 2548 unp_scan(m, unp_discard, NULL);
1c79356b
A
2549 }
2550}
2551
2d21ac55
A
2552/*
2553 * Returns: 0 Success
2554 */
91447636 2555static int
2d21ac55 2556unp_listen(struct unpcb *unp, proc_t p)
91447636 2557{
0c530ab8
A
2558 kauth_cred_t safecred = kauth_cred_proc_ref(p);
2559 cru2x(safecred, &unp->unp_peercred);
2560 kauth_cred_unref(&safecred);
91447636 2561 unp->unp_flags |= UNP_HAVEPCCACHED;
0a7de745 2562 return 0;
91447636
A
2563}
2564
1c79356b 2565static void
3e170ce0 2566unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
1c79356b 2567{
91447636
A
2568 struct mbuf *m;
2569 struct fileglob **rp;
2570 struct cmsghdr *cm;
2571 int i;
1c79356b
A
2572 int qfds;
2573
2574 while (m0) {
0a7de745 2575 for (m = m0; m; m = m->m_next) {
1c79356b 2576 if (m->m_type == MT_CONTROL &&
0a7de745 2577 (size_t)m->m_len >= sizeof(*cm)) {
1c79356b
A
2578 cm = mtod(m, struct cmsghdr *);
2579 if (cm->cmsg_level != SOL_SOCKET ||
0a7de745 2580 cm->cmsg_type != SCM_RIGHTS) {
1c79356b 2581 continue;
0a7de745
A
2582 }
2583 qfds = (cm->cmsg_len - sizeof(*cm)) /
2584 sizeof(int);
91447636 2585 rp = (struct fileglob **)(cm + 1);
0a7de745 2586 for (i = 0; i < qfds; i++) {
3e170ce0 2587 (*op)(*rp++, arg);
0a7de745
A
2588 }
2589 break; /* XXX, but saves time */
1c79356b 2590 }
0a7de745 2591 }
1c79356b
A
2592 m0 = m0->m_act;
2593 }
2594}
2595
1c79356b 2596static void
3e170ce0 2597unp_mark(struct fileglob *fg, __unused void *arg)
1c79356b 2598{
f427ee49 2599 uint32_t oflags, nflags;
1c79356b 2600
f427ee49
A
2601 os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2602 if (oflags & FMARK) {
2603 os_atomic_rmw_loop_give_up(return );
2604 }
2605 nflags = oflags | FMARK | FDEFER;
2606 });
91447636 2607
1c79356b 2608 unp_defer++;
1c79356b
A
2609}
2610
1c79356b 2611static void
3e170ce0 2612unp_discard(struct fileglob *fg, void *p)
1c79356b 2613{
0a7de745
A
2614 if (p == NULL) {
2615 p = current_proc(); /* XXX */
2616 }
b0d623f7 2617 (void) OSAddAtomic(1, &unp_disposed);
3e170ce0
A
2618 if (fg_removeuipc_mark(fg)) {
2619 VERIFY(fg->fg_lflags & FG_RMMSGQ);
2620 fg_removeuipc(fg);
2621 }
2622 (void) OSAddAtomic(-1, &unp_rights);
91447636 2623
f427ee49 2624 (void) fg_drop(p, fg);
1c79356b 2625}
b0d623f7
A
2626
2627int
2628unp_lock(struct socket *so, int refcount, void * lr)
0a7de745
A
2629{
2630 void * lr_saved;
2631 if (lr == 0) {
2632 lr_saved = (void *) __builtin_return_address(0);
2633 } else {
2634 lr_saved = lr;
2635 }
2636
2637 if (so->so_pcb) {
2638 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2639 } else {
2640 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2641 so, lr_saved, so->so_usecount);
2642 }
2643
2644 if (so->so_usecount < 0) {
2645 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2646 so, so->so_pcb, lr_saved, so->so_usecount);
2647 }
2648
2649 if (refcount) {
d190cdc3
A
2650 VERIFY(so->so_usecount > 0);
2651 so->so_usecount++;
2652 }
0a7de745
A
2653 so->lock_lr[so->next_lock_lr] = lr_saved;
2654 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2655 return 0;
b0d623f7
A
2656}
2657
2658int
2659unp_unlock(struct socket *so, int refcount, void * lr)
2660{
0a7de745
A
2661 void * lr_saved;
2662 lck_mtx_t * mutex_held = NULL;
b0d623f7
A
2663 struct unpcb *unp = sotounpcb(so);
2664
0a7de745
A
2665 if (lr == 0) {
2666 lr_saved = (void *) __builtin_return_address(0);
2667 } else {
2668 lr_saved = lr;
2669 }
2670
2671 if (refcount) {
2672 so->so_usecount--;
2673 }
2674
2675 if (so->so_usecount < 0) {
2676 panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2677 }
2678 if (so->so_pcb == NULL) {
2679 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2680 } else {
2681 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2682 }
2683 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2684 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2685 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2686
2687 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
b0d623f7
A
2688 sofreelastref(so, 1);
2689
0a7de745 2690 if (unp->unp_addr) {
b0d623f7 2691 FREE(unp->unp_addr, M_SONAME);
0a7de745 2692 }
d9a64523 2693
b0d623f7 2694 lck_mtx_unlock(mutex_held);
b0d623f7 2695
c3c9b80d 2696 lck_mtx_destroy(&unp->unp_mtx, &unp_mtx_grp);
b0d623f7 2697 zfree(unp_zone, unp);
b0d623f7
A
2698
2699 unp_gc();
2700 } else {
2701 lck_mtx_unlock(mutex_held);
2702 }
2703
0a7de745 2704 return 0;
b0d623f7
A
2705}
2706
2707lck_mtx_t *
5ba3f43e 2708unp_getlock(struct socket *so, __unused int flags)
b0d623f7 2709{
0a7de745 2710 struct unpcb *unp = (struct unpcb *)so->so_pcb;
b0d623f7
A
2711
2712
0a7de745
A
2713 if (so->so_pcb) {
2714 if (so->so_usecount < 0) {
2715 panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2716 }
2717 return &unp->unp_mtx;
2718 } else {
2719 panic("unp_getlock: so=%p NULL so_pcb\n", so);
2720 return so->so_proto->pr_domain->dom_mtx;
2721 }
b0d623f7 2722}