bsd/kern/uipc_usrreq.c

   1 /*
   2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
  61  */
  62 /*
  63  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  64  * support for mandatory and extensible security protections.  This notice
  65  * is included in support of clause 2.2 (b) of the Apple Public License,
  66  * Version 2.0.
  67  */
  68
  69 #include <sys/param.h>
  70 #include <sys/systm.h>
  71 #include <sys/kernel.h>
  72 #include <sys/domain.h>
  73 #include <sys/fcntl.h>
  74 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
  75 #include <sys/file_internal.h>
  76 #include <sys/guarded.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/lock.h>
  79 #include <sys/mbuf.h>
  80 #include <sys/namei.h>
  81 #include <sys/proc_internal.h>
  82 #include <sys/kauth.h>
  83 #include <sys/protosw.h>
  84 #include <sys/socket.h>
  85 #include <sys/socketvar.h>
  86 #include <sys/stat.h>
  87 #include <sys/sysctl.h>
  88 #include <sys/un.h>
  89 #include <sys/unpcb.h>
  90 #include <sys/vnode_internal.h>
  91 #include <sys/kdebug.h>
  92 #include <sys/mcache.h>
  93
  94 #include <kern/zalloc.h>
  95 #include <kern/locks.h>
  96 #include <kern/task.h>
  97
  98 #if CONFIG_MACF
  99 #include <security/mac_framework.h>
 100 #endif /* CONFIG_MACF */
 101
 102 #include <mach/vm_param.h>
 103
 104 /*
 105  * Maximum number of FDs that can be passed in an mbuf
 106  */
 107 #define UIPC_MAX_CMSG_FD        512
 108
 109 ZONE_DECLARE(unp_zone, "unpzone", sizeof(struct unpcb), ZC_NONE);
 110 static  unp_gen_t unp_gencnt;
 111 static  u_int unp_count;
 112
 113 static  lck_attr_t             *unp_mtx_attr;
 114 static  lck_grp_t              *unp_mtx_grp;
 115 static  lck_grp_attr_t         *unp_mtx_grp_attr;
 116 static  lck_rw_t                unp_list_mtx;
 117
 118 static  lck_mtx_t               unp_disconnect_lock;
 119 static  lck_mtx_t               unp_connect_lock;
 120 static  lck_mtx_t               uipc_lock;
 121 static  u_int                   disconnect_in_progress;
 122
 123 static struct unp_head unp_shead, unp_dhead;
 124 static int      unp_defer, unp_gcing, unp_gcwait;
 125 static thread_t unp_gcthread = NULL;
 126 static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
 127
 128
 129 /*
 130  * mDNSResponder tracing.  When enabled, endpoints connected to
 131  * /var/run/mDNSResponder will be traced; during each send on
 132  * the traced socket, we log the PID and process name of the
 133  * sending process.  We also print out a bit of info related
 134  * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
 135  * of mDNSResponder stays the same.
 136  */
 137 #define MDNSRESPONDER_PATH      "/var/run/mDNSResponder"
 138
 139 static int unpst_tracemdns;     /* enable tracing */
 140
 141 #define MDNS_IPC_MSG_HDR_VERSION_1      1
 142
 143 struct mdns_ipc_msg_hdr {
 144         uint32_t version;
 145         uint32_t datalen;
 146         uint32_t ipc_flags;
 147         uint32_t op;
 148         union {
 149                 void *context;
 150                 uint32_t u32[2];
 151         } __attribute__((packed));
 152         uint32_t reg_index;
 153 } __attribute__((packed));
 154
 155 /*
 156  * Unix communications domain.
 157  *
 158  * TODO:
 159  *      SEQPACKET, RDM
 160  *      rethink name space problems
 161  *      need a proper out-of-band
 162  *      lock pushdown
 163  */
 164 static struct   sockaddr sun_noname = { .sa_len = sizeof(sun_noname), .sa_family = AF_LOCAL, .sa_data = { 0 } };
 165 static ino_t    unp_ino;                /* prototype for fake inode numbers */
 166
 167 static int      unp_attach(struct socket *);
 168 static void     unp_detach(struct unpcb *);
 169 static int      unp_bind(struct unpcb *, struct sockaddr *, proc_t);
 170 static int      unp_connect(struct socket *, struct sockaddr *, proc_t);
 171 static void     unp_disconnect(struct unpcb *);
 172 static void     unp_shutdown(struct unpcb *);
 173 static void     unp_drop(struct unpcb *, int);
 174 __private_extern__ void unp_gc(void);
 175 static void     unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
 176 static void     unp_mark(struct fileglob *, __unused void *);
 177 static void     unp_discard(struct fileglob *, void *);
 178 static int      unp_internalize(struct mbuf *, proc_t);
 179 static int      unp_listen(struct unpcb *, proc_t);
 180 static void     unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
 181 static void     unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
 182
 183 static void
 184 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
 185 {
 186         if (so < conn_so) {
 187                 socket_lock(conn_so, 1);
 188         } else {
 189                 struct unpcb *unp = sotounpcb(so);
 190                 unp->unp_flags |= UNP_DONTDISCONNECT;
 191                 unp->rw_thrcount++;
 192                 socket_unlock(so, 0);
 193
 194                 /* Get the locks in the correct order */
 195                 socket_lock(conn_so, 1);
 196                 socket_lock(so, 0);
 197                 unp->rw_thrcount--;
 198                 if (unp->rw_thrcount == 0) {
 199                         unp->unp_flags &= ~UNP_DONTDISCONNECT;
 200                         wakeup(unp);
 201                 }
 202         }
 203 }
 204
 205 static int
 206 uipc_abort(struct socket *so)
 207 {
 208         struct unpcb *unp = sotounpcb(so);
 209
 210         if (unp == 0) {
 211                 return EINVAL;
 212         }
 213         unp_drop(unp, ECONNABORTED);
 214         unp_detach(unp);
 215         sofree(so);
 216         return 0;
 217 }
 218
 219 static int
 220 uipc_accept(struct socket *so, struct sockaddr **nam)
 221 {
 222         struct unpcb *unp = sotounpcb(so);
 223
 224         if (unp == 0) {
 225                 return EINVAL;
 226         }
 227
 228         /*
 229          * Pass back name of connected socket,
 230          * if it was bound and we are still connected
 231          * (our peer may have closed already!).
 232          */
 233         if (unp->unp_conn && unp->unp_conn->unp_addr) {
 234                 *nam = dup_sockaddr((struct sockaddr *)
 235                     unp->unp_conn->unp_addr, 1);
 236         } else {
 237                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 238         }
 239         return 0;
 240 }
 241
 242 /*
 243  * Returns:     0                       Success
 244  *              EISCONN
 245  *      unp_attach:
 246  */
 247 static int
 248 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
 249 {
 250         struct unpcb *unp = sotounpcb(so);
 251
 252         if (unp != 0) {
 253                 return EISCONN;
 254         }
 255         return unp_attach(so);
 256 }
 257
 258 static int
 259 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
 260 {
 261         struct unpcb *unp = sotounpcb(so);
 262
 263         if (unp == 0) {
 264                 return EINVAL;
 265         }
 266
 267         return unp_bind(unp, nam, p);
 268 }
 269
 270 /*
 271  * Returns:     0                       Success
 272  *              EINVAL
 273  *      unp_connect:???                 [See elsewhere in this file]
 274  */
 275 static int
 276 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
 277 {
 278         struct unpcb *unp = sotounpcb(so);
 279
 280         if (unp == 0) {
 281                 return EINVAL;
 282         }
 283         return unp_connect(so, nam, p);
 284 }
 285
 286 /*
 287  * Returns:     0                       Success
 288  *              EINVAL
 289  *      unp_connect2:EPROTOTYPE         Protocol wrong type for socket
 290  *      unp_connect2:EINVAL             Invalid argument
 291  */
 292 static int
 293 uipc_connect2(struct socket *so1, struct socket *so2)
 294 {
 295         struct unpcb *unp = sotounpcb(so1);
 296
 297         if (unp == 0) {
 298                 return EINVAL;
 299         }
 300
 301         return unp_connect2(so1, so2);
 302 }
 303
 304 /* control is EOPNOTSUPP */
 305
 306 static int
 307 uipc_detach(struct socket *so)
 308 {
 309         struct unpcb *unp = sotounpcb(so);
 310
 311         if (unp == 0) {
 312                 return EINVAL;
 313         }
 314
 315         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
 316         unp_detach(unp);
 317         return 0;
 318 }
 319
 320 static int
 321 uipc_disconnect(struct socket *so)
 322 {
 323         struct unpcb *unp = sotounpcb(so);
 324
 325         if (unp == 0) {
 326                 return EINVAL;
 327         }
 328         unp_disconnect(unp);
 329         return 0;
 330 }
 331
 332 /*
 333  * Returns:     0                       Success
 334  *              EINVAL
 335  */
 336 static int
 337 uipc_listen(struct socket *so, __unused proc_t p)
 338 {
 339         struct unpcb *unp = sotounpcb(so);
 340
 341         if (unp == 0 || unp->unp_vnode == 0) {
 342                 return EINVAL;
 343         }
 344         return unp_listen(unp, p);
 345 }
 346
 347 static int
 348 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 349 {
 350         struct unpcb *unp = sotounpcb(so);
 351
 352         if (unp == NULL) {
 353                 return EINVAL;
 354         }
 355         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
 356                 *nam = dup_sockaddr((struct sockaddr *)
 357                     unp->unp_conn->unp_addr, 1);
 358         } else {
 359                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 360         }
 361         return 0;
 362 }
 363
 364 static int
 365 uipc_rcvd(struct socket *so, __unused int flags)
 366 {
 367         struct unpcb *unp = sotounpcb(so);
 368         struct socket *so2;
 369
 370         if (unp == 0) {
 371                 return EINVAL;
 372         }
 373         switch (so->so_type) {
 374         case SOCK_DGRAM:
 375                 panic("uipc_rcvd DGRAM?");
 376         /*NOTREACHED*/
 377
 378         case SOCK_STREAM:
 379 #define rcv (&so->so_rcv)
 380 #define snd (&so2->so_snd)
 381                 if (unp->unp_conn == 0) {
 382                         break;
 383                 }
 384
 385                 so2 = unp->unp_conn->unp_socket;
 386                 unp_get_locks_in_order(so, so2);
 387                 /*
 388                  * Adjust backpressure on sender
 389                  * and wakeup any waiting to write.
 390                  */
 391                 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
 392                 unp->unp_mbcnt = rcv->sb_mbcnt;
 393                 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
 394                 unp->unp_cc = rcv->sb_cc;
 395                 if (sb_notify(&so2->so_snd)) {
 396                         sowakeup(so2, &so2->so_snd, so);
 397                 }
 398
 399                 socket_unlock(so2, 1);
 400
 401 #undef snd
 402 #undef rcv
 403                 break;
 404
 405         default:
 406                 panic("uipc_rcvd unknown socktype");
 407         }
 408         return 0;
 409 }
 410
 411 /* pru_rcvoob is EOPNOTSUPP */
 412
 413 /*
 414  * Returns:     0                       Success
 415  *              EINVAL
 416  *              EOPNOTSUPP
 417  *              EPIPE
 418  *              ENOTCONN
 419  *              EISCONN
 420  *      unp_internalize:EINVAL
 421  *      unp_internalize:EBADF
 422  *      unp_connect:EAFNOSUPPORT        Address family not supported
 423  *      unp_connect:EINVAL              Invalid argument
 424  *      unp_connect:ENOTSOCK            Not a socket
 425  *      unp_connect:ECONNREFUSED        Connection refused
 426  *      unp_connect:EISCONN             Socket is connected
 427  *      unp_connect:EPROTOTYPE          Protocol wrong type for socket
 428  *      unp_connect:???
 429  *      sbappendaddr:ENOBUFS            [5th argument, contents modified]
 430  *      sbappendaddr:???                [whatever a filter author chooses]
 431  */
 432 static int
 433 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 434     struct mbuf *control, proc_t p)
 435 {
 436         int error = 0;
 437         struct unpcb *unp = sotounpcb(so);
 438         struct socket *so2;
 439
 440         if (unp == 0) {
 441                 error = EINVAL;
 442                 goto release;
 443         }
 444         if (flags & PRUS_OOB) {
 445                 error = EOPNOTSUPP;
 446                 goto release;
 447         }
 448
 449         if (control) {
 450                 /* release lock to avoid deadlock (4436174) */
 451                 socket_unlock(so, 0);
 452                 error = unp_internalize(control, p);
 453                 socket_lock(so, 0);
 454                 if (error) {
 455                         goto release;
 456                 }
 457         }
 458
 459         switch (so->so_type) {
 460         case SOCK_DGRAM:
 461         {
 462                 struct sockaddr *from;
 463
 464                 if (nam) {
 465                         if (unp->unp_conn) {
 466                                 error = EISCONN;
 467                                 break;
 468                         }
 469                         error = unp_connect(so, nam, p);
 470                         if (error) {
 471                                 so->so_state &= ~SS_ISCONNECTING;
 472                                 break;
 473                         }
 474                 } else {
 475                         if (unp->unp_conn == 0) {
 476                                 error = ENOTCONN;
 477                                 break;
 478                         }
 479                 }
 480
 481                 so2 = unp->unp_conn->unp_socket;
 482                 if (so != so2) {
 483                         unp_get_locks_in_order(so, so2);
 484                 }
 485
 486                 if (unp->unp_addr) {
 487                         from = (struct sockaddr *)unp->unp_addr;
 488                 } else {
 489                         from = &sun_noname;
 490                 }
 491                 /*
 492                  * sbappendaddr() will fail when the receiver runs out of
 493                  * space; in contrast to SOCK_STREAM, we will lose messages
 494                  * for the SOCK_DGRAM case when the receiver's queue overflows.
 495                  * SB_UNIX on the socket buffer implies that the callee will
 496                  * not free the control message, if any, because we would need
 497                  * to call unp_dispose() on it.
 498                  */
 499                 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
 500                         control = NULL;
 501                         if (sb_notify(&so2->so_rcv)) {
 502                                 sowakeup(so2, &so2->so_rcv, so);
 503                         }
 504                 } else if (control != NULL && error == 0) {
 505                         /* A socket filter took control; don't touch it */
 506                         control = NULL;
 507                 }
 508
 509                 if (so != so2) {
 510                         socket_unlock(so2, 1);
 511                 }
 512
 513                 m = NULL;
 514                 if (nam) {
 515                         unp_disconnect(unp);
 516                 }
 517                 break;
 518         }
 519
 520         case SOCK_STREAM: {
 521                 int didreceive = 0;
 522 #define rcv (&so2->so_rcv)
 523 #define snd (&so->so_snd)
 524                 /* Connect if not connected yet. */
 525                 /*
 526                  * Note: A better implementation would complain
 527                  * if not equal to the peer's address.
 528                  */
 529                 if ((so->so_state & SS_ISCONNECTED) == 0) {
 530                         if (nam) {
 531                                 error = unp_connect(so, nam, p);
 532                                 if (error) {
 533                                         so->so_state &= ~SS_ISCONNECTING;
 534                                         break;  /* XXX */
 535                                 }
 536                         } else {
 537                                 error = ENOTCONN;
 538                                 break;
 539                         }
 540                 }
 541
 542                 if (so->so_state & SS_CANTSENDMORE) {
 543                         error = EPIPE;
 544                         break;
 545                 }
 546                 if (unp->unp_conn == 0) {
 547                         panic("uipc_send connected but no connection?");
 548                 }
 549
 550                 so2 = unp->unp_conn->unp_socket;
 551                 unp_get_locks_in_order(so, so2);
 552
 553                 /* Check socket state again as we might have unlocked the socket
 554                  * while trying to get the locks in order
 555                  */
 556
 557                 if ((so->so_state & SS_CANTSENDMORE)) {
 558                         error = EPIPE;
 559                         socket_unlock(so2, 1);
 560                         break;
 561                 }
 562
 563                 if (unp->unp_flags & UNP_TRACE_MDNS) {
 564                         struct mdns_ipc_msg_hdr hdr;
 565
 566                         if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
 567                             hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
 568                                 printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
 569                                     __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
 570                         }
 571                 }
 572
 573                 /*
 574                  * Send to paired receive port, and then reduce send buffer
 575                  * hiwater marks to maintain backpressure.  Wake up readers.
 576                  * SB_UNIX flag will allow new record to be appended to the
 577                  * receiver's queue even when it is already full.  It is
 578                  * possible, however, that append might fail.  In that case,
 579                  * we will need to call unp_dispose() on the control message;
 580                  * the callee will not free it since SB_UNIX is set.
 581                  */
 582                 didreceive = control ?
 583                     sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
 584
 585                 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
 586                 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
 587                 if ((int32_t)snd->sb_hiwat >=
 588                     (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
 589                         snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
 590                 } else {
 591                         snd->sb_hiwat = 0;
 592                 }
 593                 unp->unp_conn->unp_cc = rcv->sb_cc;
 594                 if (didreceive) {
 595                         control = NULL;
 596                         if (sb_notify(&so2->so_rcv)) {
 597                                 sowakeup(so2, &so2->so_rcv, so);
 598                         }
 599                 } else if (control != NULL && error == 0) {
 600                         /* A socket filter took control; don't touch it */
 601                         control = NULL;
 602                 }
 603
 604                 socket_unlock(so2, 1);
 605                 m = NULL;
 606 #undef snd
 607 #undef rcv
 608         }
 609         break;
 610
 611         default:
 612                 panic("uipc_send unknown socktype");
 613         }
 614
 615         /*
 616          * SEND_EOF is equivalent to a SEND followed by
 617          * a SHUTDOWN.
 618          */
 619         if (flags & PRUS_EOF) {
 620                 socantsendmore(so);
 621                 unp_shutdown(unp);
 622         }
 623
 624         if (control && error != 0) {
 625                 socket_unlock(so, 0);
 626                 unp_dispose(control);
 627                 socket_lock(so, 0);
 628         }
 629
 630 release:
 631         if (control) {
 632                 m_freem(control);
 633         }
 634         if (m) {
 635                 m_freem(m);
 636         }
 637         return error;
 638 }
 639
 640 static int
 641 uipc_sense(struct socket *so, void *ub, int isstat64)
 642 {
 643         struct unpcb *unp = sotounpcb(so);
 644         struct socket *so2;
 645         blksize_t blksize;
 646
 647         if (unp == 0) {
 648                 return EINVAL;
 649         }
 650
 651         blksize = so->so_snd.sb_hiwat;
 652         if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
 653                 so2 = unp->unp_conn->unp_socket;
 654                 blksize += so2->so_rcv.sb_cc;
 655         }
 656         if (unp->unp_ino == 0) {
 657                 unp->unp_ino = unp_ino++;
 658         }
 659
 660         if (isstat64 != 0) {
 661                 struct stat64  *sb64;
 662
 663                 sb64 = (struct stat64 *)ub;
 664                 sb64->st_blksize = blksize;
 665                 sb64->st_dev = NODEV;
 666                 sb64->st_ino = (ino64_t)unp->unp_ino;
 667         } else {
 668                 struct stat *sb;
 669
 670                 sb = (struct stat *)ub;
 671                 sb->st_blksize = blksize;
 672                 sb->st_dev = NODEV;
 673                 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
 674         }
 675
 676         return 0;
 677 }
 678
 679 /*
 680  * Returns:     0               Success
 681  *              EINVAL
 682  *
 683  * Notes:       This is not strictly correct, as unp_shutdown() also calls
 684  *              socantrcvmore().  These should maybe both be conditionalized
 685  *              on the 'how' argument in soshutdown() as called from the
 686  *              shutdown() system call.
 687  */
 688 static int
 689 uipc_shutdown(struct socket *so)
 690 {
 691         struct unpcb *unp = sotounpcb(so);
 692
 693         if (unp == 0) {
 694                 return EINVAL;
 695         }
 696         socantsendmore(so);
 697         unp_shutdown(unp);
 698         return 0;
 699 }
 700
 701 /*
 702  * Returns:     0                       Success
 703  *              EINVAL                  Invalid argument
 704  */
 705 static int
 706 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 707 {
 708         struct unpcb *unp = sotounpcb(so);
 709
 710         if (unp == NULL) {
 711                 return EINVAL;
 712         }
 713         if (unp->unp_addr != NULL) {
 714                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
 715         } else {
 716                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 717         }
 718         return 0;
 719 }
 720
 721 struct pr_usrreqs uipc_usrreqs = {
 722         .pru_abort =            uipc_abort,
 723         .pru_accept =           uipc_accept,
 724         .pru_attach =           uipc_attach,
 725         .pru_bind =             uipc_bind,
 726         .pru_connect =          uipc_connect,
 727         .pru_connect2 =         uipc_connect2,
 728         .pru_detach =           uipc_detach,
 729         .pru_disconnect =       uipc_disconnect,
 730         .pru_listen =           uipc_listen,
 731         .pru_peeraddr =         uipc_peeraddr,
 732         .pru_rcvd =             uipc_rcvd,
 733         .pru_send =             uipc_send,
 734         .pru_sense =            uipc_sense,
 735         .pru_shutdown =         uipc_shutdown,
 736         .pru_sockaddr =         uipc_sockaddr,
 737         .pru_sosend =           sosend,
 738         .pru_soreceive =        soreceive,
 739 };
 740
 741 int
 742 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 743 {
 744         struct unpcb *unp = sotounpcb(so);
 745         int error = 0;
 746         pid_t peerpid;
 747         proc_t p;
 748         task_t t;
 749         struct socket *peerso;
 750
 751         switch (sopt->sopt_dir) {
 752         case SOPT_GET:
 753                 switch (sopt->sopt_name) {
 754                 case LOCAL_PEERCRED:
 755                         if (unp->unp_flags & UNP_HAVEPC) {
 756                                 error = sooptcopyout(sopt, &unp->unp_peercred,
 757                                     sizeof(unp->unp_peercred));
 758                         } else {
 759                                 if (so->so_type == SOCK_STREAM) {
 760                                         error = ENOTCONN;
 761                                 } else {
 762                                         error = EINVAL;
 763                                 }
 764                         }
 765                         break;
 766                 case LOCAL_PEERPID:
 767                 case LOCAL_PEEREPID:
 768                         if (unp->unp_conn == NULL) {
 769                                 error = ENOTCONN;
 770                                 break;
 771                         }
 772                         peerso = unp->unp_conn->unp_socket;
 773                         if (peerso == NULL) {
 774                                 panic("peer is connected but has no socket?");
 775                         }
 776                         unp_get_locks_in_order(so, peerso);
 777                         if (sopt->sopt_name == LOCAL_PEEREPID &&
 778                             peerso->so_flags & SOF_DELEGATED) {
 779                                 peerpid = peerso->e_pid;
 780                         } else {
 781                                 peerpid = peerso->last_pid;
 782                         }
 783                         socket_unlock(peerso, 1);
 784                         error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
 785                         break;
 786                 case LOCAL_PEERUUID:
 787                 case LOCAL_PEEREUUID:
 788                         if (unp->unp_conn == NULL) {
 789                                 error = ENOTCONN;
 790                                 break;
 791                         }
 792                         peerso = unp->unp_conn->unp_socket;
 793                         if (peerso == NULL) {
 794                                 panic("peer is connected but has no socket?");
 795                         }
 796                         unp_get_locks_in_order(so, peerso);
 797                         if (sopt->sopt_name == LOCAL_PEEREUUID &&
 798                             peerso->so_flags & SOF_DELEGATED) {
 799                                 error = sooptcopyout(sopt, &peerso->e_uuid,
 800                                     sizeof(peerso->e_uuid));
 801                         } else {
 802                                 error = sooptcopyout(sopt, &peerso->last_uuid,
 803                                     sizeof(peerso->last_uuid));
 804                         }
 805                         socket_unlock(peerso, 1);
 806                         break;
 807                 case LOCAL_PEERTOKEN:
 808                         if (unp->unp_conn == NULL) {
 809                                 error = ENOTCONN;
 810                                 break;
 811                         }
 812                         peerso = unp->unp_conn->unp_socket;
 813                         if (peerso == NULL) {
 814                                 panic("peer is connected but has no socket?");
 815                         }
 816                         unp_get_locks_in_order(so, peerso);
 817                         peerpid = peerso->last_pid;
 818                         p = proc_find(peerpid);
 819                         if (p != PROC_NULL) {
 820                                 t = proc_task(p);
 821                                 if (t != TASK_NULL) {
 822                                         audit_token_t peertoken;
 823                                         mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
 824                                         if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {
 825                                                 error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));
 826                                         } else {
 827                                                 error = EINVAL;
 828                                         }
 829                                 } else {
 830                                         error = EINVAL;
 831                                 }
 832                                 proc_rele(p);
 833                         } else {
 834                                 error = EINVAL;
 835                         }
 836                         socket_unlock(peerso, 1);
 837                         break;
 838                 default:
 839                         error = EOPNOTSUPP;
 840                         break;
 841                 }
 842                 break;
 843         case SOPT_SET:
 844         default:
 845                 error = EOPNOTSUPP;
 846                 break;
 847         }
 848
 849         return error;
 850 }
 851
 852 /*
 853  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
 854  * for stream sockets, although the total for sender and receiver is
 855  * actually only PIPSIZ.
 856  * Datagram sockets really use the sendspace as the maximum datagram size,
 857  * and don't really want to reserve the sendspace.  Their recvspace should
 858  * be large enough for at least one max-size datagram plus address.
 859  */
 860 #ifndef PIPSIZ
 861 #define PIPSIZ  8192
 862 #endif
 863 static u_int32_t        unpst_sendspace = PIPSIZ;
 864 static u_int32_t        unpst_recvspace = PIPSIZ;
 865 static u_int32_t        unpdg_sendspace = 2 * 1024;       /* really max datagram size */
 866 static u_int32_t        unpdg_recvspace = 4 * 1024;
 867
 868 static int      unp_rights;                     /* file descriptors in flight */
 869 static int      unp_disposed;                   /* discarded file descriptors */
 870
 871 SYSCTL_DECL(_net_local_stream);
 872 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
 873     &unpst_sendspace, 0, "");
 874 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
 875     &unpst_recvspace, 0, "");
 876 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
 877     &unpst_tracemdns, 0, "");
 878 SYSCTL_DECL(_net_local_dgram);
 879 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
 880     &unpdg_sendspace, 0, "");
 881 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
 882     &unpdg_recvspace, 0, "");
 883 SYSCTL_DECL(_net_local);
 884 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
 885
 886 /*
 887  * Returns:     0                       Success
 888  *              ENOBUFS
 889  *      soreserve:ENOBUFS
 890  */
 891 static int
 892 unp_attach(struct socket *so)
 893 {
 894         struct unpcb *unp;
 895         int error = 0;
 896
 897         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 898                 switch (so->so_type) {
 899                 case SOCK_STREAM:
 900                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
 901                         break;
 902
 903                 case SOCK_DGRAM:
 904                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
 905                         break;
 906
 907                 default:
 908                         panic("unp_attach");
 909                 }
 910                 if (error) {
 911                         return error;
 912                 }
 913         }
 914         unp = (struct unpcb *)zalloc(unp_zone);
 915         if (unp == NULL) {
 916                 return ENOBUFS;
 917         }
 918         bzero(unp, sizeof(*unp));
 919
 920         lck_mtx_init(&unp->unp_mtx,
 921             unp_mtx_grp, unp_mtx_attr);
 922
 923         lck_rw_lock_exclusive(&unp_list_mtx);
 924         LIST_INIT(&unp->unp_refs);
 925         unp->unp_socket = so;
 926         unp->unp_gencnt = ++unp_gencnt;
 927         unp_count++;
 928         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
 929             &unp_dhead : &unp_shead, unp, unp_link);
 930         lck_rw_done(&unp_list_mtx);
 931         so->so_pcb = (caddr_t)unp;
 932         /*
 933          * Mark AF_UNIX socket buffers accordingly so that:
 934          *
 935          * a. In the SOCK_STREAM case, socket buffer append won't fail due to
 936          *    the lack of space; this essentially loosens the sbspace() check,
 937          *    since there is disconnect between sosend() and uipc_send() with
 938          *    respect to flow control that might result in our dropping the
 939          *    data in uipc_send().  By setting this, we allow for slightly
 940          *    more records to be appended to the receiving socket to avoid
 941          *    losing data (which we can't afford in the SOCK_STREAM case).
 942          *    Flow control still takes place since we adjust the sender's
 943          *    hiwat during each send.  This doesn't affect the SOCK_DGRAM
 944          *    case and append would still fail when the queue overflows.
 945          *
 946          * b. In the presence of control messages containing internalized
 947          *    file descriptors, the append routines will not free them since
 948          *    we'd need to undo the work first via unp_dispose().
 949          */
 950         so->so_rcv.sb_flags |= SB_UNIX;
 951         so->so_snd.sb_flags |= SB_UNIX;
 952         return 0;
 953 }
 954
 955 static void
 956 unp_detach(struct unpcb *unp)
 957 {
 958         int so_locked = 1;
 959
 960         lck_rw_lock_exclusive(&unp_list_mtx);
 961         LIST_REMOVE(unp, unp_link);
 962         --unp_count;
 963         ++unp_gencnt;
 964         lck_rw_done(&unp_list_mtx);
 965         if (unp->unp_vnode) {
 966                 struct vnode *tvp = NULL;
 967                 socket_unlock(unp->unp_socket, 0);
 968
 969                 /* Holding unp_connect_lock will avoid a race between
 970                  * a thread closing the listening socket and a thread
 971                  * connecting to it.
 972                  */
 973                 lck_mtx_lock(&unp_connect_lock);
 974                 socket_lock(unp->unp_socket, 0);
 975                 if (unp->unp_vnode) {
 976                         tvp = unp->unp_vnode;
 977                         unp->unp_vnode->v_socket = NULL;
 978                         unp->unp_vnode = NULL;
 979                 }
 980                 lck_mtx_unlock(&unp_connect_lock);
 981                 if (tvp != NULL) {
 982                         vnode_rele(tvp);                /* drop the usecount */
 983                 }
 984         }
 985         if (unp->unp_conn) {
 986                 unp_disconnect(unp);
 987         }
 988         while (unp->unp_refs.lh_first) {
 989                 struct unpcb *unp2 = NULL;
 990
 991                 /* This datagram socket is connected to one or more
 992                  * sockets. In order to avoid a race condition between removing
 993                  * this reference and closing the connected socket, we need
 994                  * to check disconnect_in_progress
 995                  */
 996                 if (so_locked == 1) {
 997                         socket_unlock(unp->unp_socket, 0);
 998                         so_locked = 0;
 999                 }
1000                 lck_mtx_lock(&unp_disconnect_lock);
1001                 while (disconnect_in_progress != 0) {
1002                         (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1003                             PSOCK, "disconnect", NULL);
1004                 }
1005                 disconnect_in_progress = 1;
1006                 lck_mtx_unlock(&unp_disconnect_lock);
1007
1008                 /* Now we are sure that any unpcb socket disconnect is not happening */
1009                 if (unp->unp_refs.lh_first != NULL) {
1010                         unp2 = unp->unp_refs.lh_first;
1011                         socket_lock(unp2->unp_socket, 1);
1012                 }
1013
1014                 lck_mtx_lock(&unp_disconnect_lock);
1015                 disconnect_in_progress = 0;
1016                 wakeup(&disconnect_in_progress);
1017                 lck_mtx_unlock(&unp_disconnect_lock);
1018
1019                 if (unp2 != NULL) {
1020                         /* We already locked this socket and have a reference on it */
1021                         unp_drop(unp2, ECONNRESET);
1022                         socket_unlock(unp2->unp_socket, 1);
1023                 }
1024         }
1025
1026         if (so_locked == 0) {
1027                 socket_lock(unp->unp_socket, 0);
1028                 so_locked = 1;
1029         }
1030         soisdisconnected(unp->unp_socket);
1031         /* makes sure we're getting dealloced */
1032         unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1033 }
1034
1035 /*
1036  * Returns:     0                       Success
1037  *              EAFNOSUPPORT
1038  *              EINVAL
1039  *              EADDRINUSE
1040  *              namei:???               [anything namei can return]
1041  *              vnode_authorize:???     [anything vnode_authorize can return]
1042  *
1043  * Notes:       p at this point is the current process, as this function is
1044  *              only called by sobind().
1045  */
1046 static int
1047 unp_bind(
1048         struct unpcb *unp,
1049         struct sockaddr *nam,
1050         proc_t p)
1051 {
1052         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1053         struct vnode *vp, *dvp;
1054         struct vnode_attr va;
1055         vfs_context_t ctx = vfs_context_current();
1056         int error, namelen;
1057         struct nameidata nd;
1058         struct socket *so = unp->unp_socket;
1059         char buf[SOCK_MAXADDRLEN];
1060
1061         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1062                 return EAFNOSUPPORT;
1063         }
1064
1065         /*
1066          * Check if the socket is already bound to an address
1067          */
1068         if (unp->unp_vnode != NULL) {
1069                 return EINVAL;
1070         }
1071         /*
1072          * Check if the socket may have been shut down
1073          */
1074         if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1075             (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1076                 return EINVAL;
1077         }
1078
1079         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1080         if (namelen <= 0) {
1081                 return EINVAL;
1082         }
1083         /*
1084          * Note: sun_path is not a zero terminated "C" string
1085          */
1086         if (namelen >= SOCK_MAXADDRLEN) {
1087                 return EINVAL;
1088         }
1089         bcopy(soun->sun_path, buf, namelen);
1090         buf[namelen] = 0;
1091
1092         socket_unlock(so, 0);
1093
1094         NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1095             CAST_USER_ADDR_T(buf), ctx);
1096         /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1097         error = namei(&nd);
1098         if (error) {
1099                 socket_lock(so, 0);
1100                 return error;
1101         }
1102         dvp = nd.ni_dvp;
1103         vp = nd.ni_vp;
1104
1105         if (vp != NULL) {
1106                 /*
1107                  * need to do this before the vnode_put of dvp
1108                  * since we may have to release an fs_nodelock
1109                  */
1110                 nameidone(&nd);
1111
1112                 vnode_put(dvp);
1113                 vnode_put(vp);
1114
1115                 socket_lock(so, 0);
1116                 return EADDRINUSE;
1117         }
1118
1119         VATTR_INIT(&va);
1120         VATTR_SET(&va, va_type, VSOCK);
1121         VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
1122
1123 #if CONFIG_MACF
1124         error = mac_vnode_check_create(ctx,
1125             nd.ni_dvp, &nd.ni_cnd, &va);
1126
1127         if (error == 0)
1128 #endif /* CONFIG_MACF */
1129 #if CONFIG_MACF_SOCKET_SUBSET
1130         error = mac_vnode_check_uipc_bind(ctx,
1131             nd.ni_dvp, &nd.ni_cnd, &va);
1132
1133         if (error == 0)
1134 #endif /* MAC_SOCKET_SUBSET */
1135         /* authorize before creating */
1136         error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1137
1138         if (!error) {
1139                 /* create the socket */
1140                 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1141         }
1142
1143         nameidone(&nd);
1144         vnode_put(dvp);
1145
1146         if (error) {
1147                 socket_lock(so, 0);
1148                 return error;
1149         }
1150
1151         socket_lock(so, 0);
1152
1153         if (unp->unp_vnode != NULL) {
1154                 vnode_put(vp); /* drop the iocount */
1155                 return EINVAL;
1156         }
1157
1158         error = vnode_ref(vp);  /* gain a longterm reference */
1159         if (error) {
1160                 vnode_put(vp); /* drop the iocount */
1161                 return error;
1162         }
1163
1164         vp->v_socket = unp->unp_socket;
1165         unp->unp_vnode = vp;
1166         unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1167         vnode_put(vp);          /* drop the iocount */
1168
1169         return 0;
1170 }
1171
1172
1173 /*
1174  * Returns:     0                       Success
1175  *              EAFNOSUPPORT            Address family not supported
1176  *              EINVAL                  Invalid argument
1177  *              ENOTSOCK                Not a socket
1178  *              ECONNREFUSED            Connection refused
1179  *              EPROTOTYPE              Protocol wrong type for socket
1180  *              EISCONN                 Socket is connected
1181  *      unp_connect2:EPROTOTYPE         Protocol wrong type for socket
1182  *      unp_connect2:EINVAL             Invalid argument
1183  *      namei:???                       [anything namei can return]
1184  *      vnode_authorize:????            [anything vnode_authorize can return]
1185  *
1186  * Notes:       p at this point is the current process, as this function is
1187  *              only called by sosend(), sendfile(), and soconnectlock().
1188  */
1189 static int
1190 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1191 {
1192         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1193         struct vnode *vp;
1194         struct socket *so2, *so3, *list_so = NULL;
1195         struct unpcb *unp, *unp2, *unp3;
1196         vfs_context_t ctx = vfs_context_current();
1197         int error, len;
1198         struct nameidata nd;
1199         char buf[SOCK_MAXADDRLEN];
1200
1201         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1202                 return EAFNOSUPPORT;
1203         }
1204
1205         unp = sotounpcb(so);
1206         so2 = so3 = NULL;
1207
1208         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1209         if (len <= 0) {
1210                 return EINVAL;
1211         }
1212         /*
1213          * Note: sun_path is not a zero terminated "C" string
1214          */
1215         if (len >= SOCK_MAXADDRLEN) {
1216                 return EINVAL;
1217         }
1218
1219         soisconnecting(so);
1220
1221         bcopy(soun->sun_path, buf, len);
1222         buf[len] = 0;
1223
1224         socket_unlock(so, 0);
1225
1226         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1227             CAST_USER_ADDR_T(buf), ctx);
1228         error = namei(&nd);
1229         if (error) {
1230                 socket_lock(so, 0);
1231                 return error;
1232         }
1233         nameidone(&nd);
1234         vp = nd.ni_vp;
1235         if (vp->v_type != VSOCK) {
1236                 error = ENOTSOCK;
1237                 socket_lock(so, 0);
1238                 goto out;
1239         }
1240
1241 #if CONFIG_MACF_SOCKET_SUBSET
1242         error = mac_vnode_check_uipc_connect(ctx, vp, so);
1243         if (error) {
1244                 socket_lock(so, 0);
1245                 goto out;
1246         }
1247 #endif /* MAC_SOCKET_SUBSET */
1248
1249         error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1250         if (error) {
1251                 socket_lock(so, 0);
1252                 goto out;
1253         }
1254
1255         lck_mtx_lock(&unp_connect_lock);
1256
1257         if (vp->v_socket == 0) {
1258                 lck_mtx_unlock(&unp_connect_lock);
1259                 error = ECONNREFUSED;
1260                 socket_lock(so, 0);
1261                 goto out;
1262         }
1263
1264         socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1265         so2 = vp->v_socket;
1266         lck_mtx_unlock(&unp_connect_lock);
1267
1268
1269         if (so2->so_pcb == NULL) {
1270                 error = ECONNREFUSED;
1271                 if (so != so2) {
1272                         socket_unlock(so2, 1);
1273                         socket_lock(so, 0);
1274                 } else {
1275                         /* Release the reference held for the listen socket */
1276                         VERIFY(so2->so_usecount > 0);
1277                         so2->so_usecount--;
1278                 }
1279                 goto out;
1280         }
1281
1282         if (so < so2) {
1283                 socket_unlock(so2, 0);
1284                 socket_lock(so, 0);
1285                 socket_lock(so2, 0);
1286         } else if (so > so2) {
1287                 socket_lock(so, 0);
1288         }
1289         /*
1290          * Check if socket was connected while we were trying to
1291          * get the socket locks in order.
1292          * XXX - probably shouldn't return an error for SOCK_DGRAM
1293          */
1294         if ((so->so_state & SS_ISCONNECTED) != 0) {
1295                 error = EISCONN;
1296                 goto decref_out;
1297         }
1298
1299         if (so->so_type != so2->so_type) {
1300                 error = EPROTOTYPE;
1301                 goto decref_out;
1302         }
1303
1304         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1305                 /* Release the incoming socket but keep a reference */
1306                 socket_unlock(so, 0);
1307
1308                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1309                     (so3 = sonewconn(so2, 0, nam)) == 0) {
1310                         error = ECONNREFUSED;
1311                         if (so != so2) {
1312                                 socket_unlock(so2, 1);
1313                                 socket_lock(so, 0);
1314                         } else {
1315                                 socket_lock(so, 0);
1316                                 /* Release the reference held for
1317                                  * listen socket.
1318                                  */
1319                                 VERIFY(so2->so_usecount > 0);
1320                                 so2->so_usecount--;
1321                         }
1322                         goto out;
1323                 }
1324                 unp2 = sotounpcb(so2);
1325                 unp3 = sotounpcb(so3);
1326                 if (unp2->unp_addr) {
1327                         unp3->unp_addr = (struct sockaddr_un *)
1328                             dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1329                 }
1330
1331                 /*
1332                  * unp_peercred management:
1333                  *
1334                  * The connecter's (client's) credentials are copied
1335                  * from its process structure at the time of connect()
1336                  * (which is now).
1337                  */
1338                 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1339                 unp3->unp_flags |= UNP_HAVEPC;
1340                 /*
1341                  * The receiver's (server's) credentials are copied
1342                  * from the unp_peercred member of socket on which the
1343                  * former called listen(); unp_listen() cached that
1344                  * process's credentials at that time so we can use
1345                  * them now.
1346                  */
1347                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1348                     ("unp_connect: listener without cached peercred"));
1349
1350                 /* Here we need to have both so and so2 locks and so2
1351                  * is already locked. Lock ordering is required.
1352                  */
1353                 if (so < so2) {
1354                         socket_unlock(so2, 0);
1355                         socket_lock(so, 0);
1356                         socket_lock(so2, 0);
1357                 } else {
1358                         socket_lock(so, 0);
1359                 }
1360
1361                 /* Check again if the socket state changed when its lock was released */
1362                 if ((so->so_state & SS_ISCONNECTED) != 0) {
1363                         error = EISCONN;
1364                         socket_unlock(so2, 1);
1365                         socket_lock(so3, 0);
1366                         sofreelastref(so3, 1);
1367                         goto out;
1368                 }
1369                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1370                     sizeof(unp->unp_peercred));
1371                 unp->unp_flags |= UNP_HAVEPC;
1372
1373                 /* Hold the reference on listening socket until the end */
1374                 socket_unlock(so2, 0);
1375                 list_so = so2;
1376
1377                 /* Lock ordering doesn't matter because so3 was just created */
1378                 socket_lock(so3, 1);
1379                 so2 = so3;
1380
1381                 /*
1382                  * Enable tracing for mDNSResponder endpoints.  (The use
1383                  * of sizeof instead of strlen below takes the null
1384                  * terminating character into account.)
1385                  */
1386                 if (unpst_tracemdns &&
1387                     !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1388                     sizeof(MDNSRESPONDER_PATH))) {
1389                         unp->unp_flags |= UNP_TRACE_MDNS;
1390                         unp2->unp_flags |= UNP_TRACE_MDNS;
1391                 }
1392         }
1393
1394         error = unp_connect2(so, so2);
1395
1396 decref_out:
1397         if (so2 != NULL) {
1398                 if (so != so2) {
1399                         socket_unlock(so2, 1);
1400                 } else {
1401                         /* Release the extra reference held for the listen socket.
1402                          * This is possible only for SOCK_DGRAM sockets. We refuse
1403                          * connecting to the same socket for SOCK_STREAM sockets.
1404                          */
1405                         VERIFY(so2->so_usecount > 0);
1406                         so2->so_usecount--;
1407                 }
1408         }
1409
1410         if (list_so != NULL) {
1411                 socket_lock(list_so, 0);
1412                 socket_unlock(list_so, 1);
1413         }
1414
1415 out:
1416         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1417         vnode_put(vp);
1418         return error;
1419 }
1420
1421 /*
1422  * Returns:     0                       Success
1423  *              EPROTOTYPE              Protocol wrong type for socket
1424  *              EINVAL                  Invalid argument
1425  */
1426 int
1427 unp_connect2(struct socket *so, struct socket *so2)
1428 {
1429         struct unpcb *unp = sotounpcb(so);
1430         struct unpcb *unp2;
1431
1432         if (so2->so_type != so->so_type) {
1433                 return EPROTOTYPE;
1434         }
1435
1436         unp2 = sotounpcb(so2);
1437
1438         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1439         LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1440
1441         /* Verify both sockets are still opened */
1442         if (unp == 0 || unp2 == 0) {
1443                 return EINVAL;
1444         }
1445
1446         unp->unp_conn = unp2;
1447         so2->so_usecount++;
1448
1449         switch (so->so_type) {
1450         case SOCK_DGRAM:
1451                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1452
1453                 if (so != so2) {
1454                         /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1455                         /* Keep an extra reference on so2 that will be dropped
1456                          * soon after getting the locks in order
1457                          */
1458                         socket_unlock(so2, 0);
1459                         soisconnected(so);
1460                         unp_get_locks_in_order(so, so2);
1461                         VERIFY(so2->so_usecount > 0);
1462                         so2->so_usecount--;
1463                 } else {
1464                         soisconnected(so);
1465                 }
1466
1467                 break;
1468
1469         case SOCK_STREAM:
1470                 /* This takes care of socketpair */
1471                 if (!(unp->unp_flags & UNP_HAVEPC) &&
1472                     !(unp2->unp_flags & UNP_HAVEPC)) {
1473                         cru2x(kauth_cred_get(), &unp->unp_peercred);
1474                         unp->unp_flags |= UNP_HAVEPC;
1475
1476                         cru2x(kauth_cred_get(), &unp2->unp_peercred);
1477                         unp2->unp_flags |= UNP_HAVEPC;
1478                 }
1479                 unp2->unp_conn = unp;
1480                 so->so_usecount++;
1481
1482                 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1483                 socket_unlock(so, 0);
1484                 soisconnected(so2);
1485
1486                 /* Keep an extra reference on so2, that will be dropped soon after
1487                  * getting the locks in order again.
1488                  */
1489                 socket_unlock(so2, 0);
1490
1491                 socket_lock(so, 0);
1492                 soisconnected(so);
1493
1494                 unp_get_locks_in_order(so, so2);
1495                 /* Decrement the extra reference left before */
1496                 VERIFY(so2->so_usecount > 0);
1497                 so2->so_usecount--;
1498                 break;
1499
1500         default:
1501                 panic("unknown socket type %d in unp_connect2", so->so_type);
1502         }
1503         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1504         LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1505         return 0;
1506 }
1507
1508 static void
1509 unp_disconnect(struct unpcb *unp)
1510 {
1511         struct unpcb *unp2 = NULL;
1512         struct socket *so2 = NULL, *so;
1513         struct socket *waitso;
1514         int so_locked = 1, strdisconn = 0;
1515
1516         so = unp->unp_socket;
1517         if (unp->unp_conn == NULL) {
1518                 return;
1519         }
1520         lck_mtx_lock(&unp_disconnect_lock);
1521         while (disconnect_in_progress != 0) {
1522                 if (so_locked == 1) {
1523                         socket_unlock(so, 0);
1524                         so_locked = 0;
1525                 }
1526                 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1527                     PSOCK, "disconnect", NULL);
1528         }
1529         disconnect_in_progress = 1;
1530         lck_mtx_unlock(&unp_disconnect_lock);
1531
1532         if (so_locked == 0) {
1533                 socket_lock(so, 0);
1534                 so_locked = 1;
1535         }
1536
1537         unp2 = unp->unp_conn;
1538
1539         if (unp2 == 0 || unp2->unp_socket == NULL) {
1540                 goto out;
1541         }
1542         so2 = unp2->unp_socket;
1543
1544 try_again:
1545         if (so == so2) {
1546                 if (so_locked == 0) {
1547                         socket_lock(so, 0);
1548                 }
1549                 waitso = so;
1550         } else if (so < so2) {
1551                 if (so_locked == 0) {
1552                         socket_lock(so, 0);
1553                 }
1554                 socket_lock(so2, 1);
1555                 waitso = so2;
1556         } else {
1557                 if (so_locked == 1) {
1558                         socket_unlock(so, 0);
1559                 }
1560                 socket_lock(so2, 1);
1561                 socket_lock(so, 0);
1562                 waitso = so;
1563         }
1564         so_locked = 1;
1565
1566         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1567         LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1568
1569         /* Check for the UNP_DONTDISCONNECT flag, if it
1570          * is set, release both sockets and go to sleep
1571          */
1572
1573         if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1574                 if (so != so2) {
1575                         socket_unlock(so2, 1);
1576                 }
1577                 so_locked = 0;
1578
1579                 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1580                     PSOCK | PDROP, "unpdisconnect", NULL);
1581                 goto try_again;
1582         }
1583
1584         if (unp->unp_conn == NULL) {
1585                 panic("unp_conn became NULL after sleep");
1586         }
1587
1588         unp->unp_conn = NULL;
1589         VERIFY(so2->so_usecount > 0);
1590         so2->so_usecount--;
1591
1592         if (unp->unp_flags & UNP_TRACE_MDNS) {
1593                 unp->unp_flags &= ~UNP_TRACE_MDNS;
1594         }
1595
1596         switch (unp->unp_socket->so_type) {
1597         case SOCK_DGRAM:
1598                 LIST_REMOVE(unp, unp_reflink);
1599                 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1600                 if (so != so2) {
1601                         socket_unlock(so2, 1);
1602                 }
1603                 break;
1604
1605         case SOCK_STREAM:
1606                 unp2->unp_conn = NULL;
1607                 VERIFY(so->so_usecount > 0);
1608                 so->so_usecount--;
1609
1610                 /* Set the socket state correctly but do a wakeup later when
1611                  * we release all locks except the socket lock, this will avoid
1612                  * a deadlock.
1613                  */
1614                 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1615                 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1616
1617                 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1618                 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1619
1620                 if (unp2->unp_flags & UNP_TRACE_MDNS) {
1621                         unp2->unp_flags &= ~UNP_TRACE_MDNS;
1622                 }
1623
1624                 strdisconn = 1;
1625                 break;
1626         default:
1627                 panic("unknown socket type %d", so->so_type);
1628         }
1629 out:
1630         lck_mtx_lock(&unp_disconnect_lock);
1631         disconnect_in_progress = 0;
1632         wakeup(&disconnect_in_progress);
1633         lck_mtx_unlock(&unp_disconnect_lock);
1634
1635         if (strdisconn) {
1636                 socket_unlock(so, 0);
1637                 soisdisconnected(so2);
1638                 socket_unlock(so2, 1);
1639
1640                 socket_lock(so, 0);
1641                 soisdisconnected(so);
1642         }
1643         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1644         return;
1645 }
1646
1647 /*
1648  * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1649  * The unpcb_compat data structure is passed to user space and must not change.
1650  */
1651 static void
1652 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1653 {
1654 #if defined(__LP64__)
1655         cp->unp_link.le_next = (u_int32_t)
1656             VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1657         cp->unp_link.le_prev = (u_int32_t)
1658             VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1659 #else
1660         cp->unp_link.le_next = (struct unpcb_compat *)
1661             VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1662         cp->unp_link.le_prev = (struct unpcb_compat **)
1663             VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1664 #endif
1665         cp->unp_socket = (_UNPCB_PTR(struct socket *))
1666             VM_KERNEL_ADDRPERM(up->unp_socket);
1667         cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1668             VM_KERNEL_ADDRPERM(up->unp_vnode);
1669         cp->unp_ino = up->unp_ino;
1670         cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1671             VM_KERNEL_ADDRPERM(up->unp_conn);
1672         cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1673 #if defined(__LP64__)
1674         cp->unp_reflink.le_next =
1675             (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1676         cp->unp_reflink.le_prev =
1677             (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1678 #else
1679         cp->unp_reflink.le_next =
1680             (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1681         cp->unp_reflink.le_prev =
1682             (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1683 #endif
1684         cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1685             VM_KERNEL_ADDRPERM(up->unp_addr);
1686         cp->unp_cc = up->unp_cc;
1687         cp->unp_mbcnt = up->unp_mbcnt;
1688         cp->unp_gencnt = up->unp_gencnt;
1689 }
1690
1691 static int
1692 unp_pcblist SYSCTL_HANDLER_ARGS
1693 {
1694 #pragma unused(oidp,arg2)
1695         int error, i, n;
1696         struct unpcb *unp, **unp_list;
1697         unp_gen_t gencnt;
1698         struct xunpgen xug;
1699         struct unp_head *head;
1700
1701         lck_rw_lock_shared(&unp_list_mtx);
1702         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1703
1704         /*
1705          * The process of preparing the PCB list is too time-consuming and
1706          * resource-intensive to repeat twice on every request.
1707          */
1708         if (req->oldptr == USER_ADDR_NULL) {
1709                 n = unp_count;
1710                 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1711                     sizeof(struct xunpcb);
1712                 lck_rw_done(&unp_list_mtx);
1713                 return 0;
1714         }
1715
1716         if (req->newptr != USER_ADDR_NULL) {
1717                 lck_rw_done(&unp_list_mtx);
1718                 return EPERM;
1719         }
1720
1721         /*
1722          * OK, now we're committed to doing something.
1723          */
1724         gencnt = unp_gencnt;
1725         n = unp_count;
1726
1727         bzero(&xug, sizeof(xug));
1728         xug.xug_len = sizeof(xug);
1729         xug.xug_count = n;
1730         xug.xug_gen = gencnt;
1731         xug.xug_sogen = so_gencnt;
1732         error = SYSCTL_OUT(req, &xug, sizeof(xug));
1733         if (error) {
1734                 lck_rw_done(&unp_list_mtx);
1735                 return error;
1736         }
1737
1738         /*
1739          * We are done if there is no pcb
1740          */
1741         if (n == 0) {
1742                 lck_rw_done(&unp_list_mtx);
1743                 return 0;
1744         }
1745
1746         MALLOC(unp_list, struct unpcb **, n * sizeof(*unp_list),
1747             M_TEMP, M_WAITOK);
1748         if (unp_list == 0) {
1749                 lck_rw_done(&unp_list_mtx);
1750                 return ENOMEM;
1751         }
1752
1753         for (unp = head->lh_first, i = 0; unp && i < n;
1754             unp = unp->unp_link.le_next) {
1755                 if (unp->unp_gencnt <= gencnt) {
1756                         unp_list[i++] = unp;
1757                 }
1758         }
1759         n = i;                  /* in case we lost some during malloc */
1760
1761         error = 0;
1762         for (i = 0; i < n; i++) {
1763                 unp = unp_list[i];
1764                 if (unp->unp_gencnt <= gencnt) {
1765                         struct xunpcb xu;
1766
1767                         bzero(&xu, sizeof(xu));
1768                         xu.xu_len = sizeof(xu);
1769                         xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1770                             VM_KERNEL_ADDRPERM(unp);
1771                         /*
1772                          * XXX - need more locking here to protect against
1773                          * connect/disconnect races for SMP.
1774                          */
1775                         if (unp->unp_addr) {
1776                                 bcopy(unp->unp_addr, &xu.xu_au,
1777                                     unp->unp_addr->sun_len);
1778                         }
1779                         if (unp->unp_conn && unp->unp_conn->unp_addr) {
1780                                 bcopy(unp->unp_conn->unp_addr,
1781                                     &xu.xu_cau,
1782                                     unp->unp_conn->unp_addr->sun_len);
1783                         }
1784                         unpcb_to_compat(unp, &xu.xu_unp);
1785                         sotoxsocket(unp->unp_socket, &xu.xu_socket);
1786                         error = SYSCTL_OUT(req, &xu, sizeof(xu));
1787                 }
1788         }
1789         if (!error) {
1790                 /*
1791                  * Give the user an updated idea of our state.
1792                  * If the generation differs from what we told
1793                  * her before, she knows that something happened
1794                  * while we were processing this request, and it
1795                  * might be necessary to retry.
1796                  */
1797                 bzero(&xug, sizeof(xug));
1798                 xug.xug_len = sizeof(xug);
1799                 xug.xug_gen = unp_gencnt;
1800                 xug.xug_sogen = so_gencnt;
1801                 xug.xug_count = unp_count;
1802                 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1803         }
1804         FREE(unp_list, M_TEMP);
1805         lck_rw_done(&unp_list_mtx);
1806         return error;
1807 }
1808
1809 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1810     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1811     (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1812     "List of active local datagram sockets");
1813 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1814     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1815     (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1816     "List of active local stream sockets");
1817
1818 #if XNU_TARGET_OS_OSX
1819
1820 static int
1821 unp_pcblist64 SYSCTL_HANDLER_ARGS
1822 {
1823 #pragma unused(oidp,arg2)
1824         int error, i, n;
1825         struct unpcb *unp, **unp_list;
1826         unp_gen_t gencnt;
1827         struct xunpgen xug;
1828         struct unp_head *head;
1829
1830         lck_rw_lock_shared(&unp_list_mtx);
1831         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1832
1833         /*
1834          * The process of preparing the PCB list is too time-consuming and
1835          * resource-intensive to repeat twice on every request.
1836          */
1837         if (req->oldptr == USER_ADDR_NULL) {
1838                 n = unp_count;
1839                 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1840                     (sizeof(struct xunpcb64));
1841                 lck_rw_done(&unp_list_mtx);
1842                 return 0;
1843         }
1844
1845         if (req->newptr != USER_ADDR_NULL) {
1846                 lck_rw_done(&unp_list_mtx);
1847                 return EPERM;
1848         }
1849
1850         /*
1851          * OK, now we're committed to doing something.
1852          */
1853         gencnt = unp_gencnt;
1854         n = unp_count;
1855
1856         bzero(&xug, sizeof(xug));
1857         xug.xug_len = sizeof(xug);
1858         xug.xug_count = n;
1859         xug.xug_gen = gencnt;
1860         xug.xug_sogen = so_gencnt;
1861         error = SYSCTL_OUT(req, &xug, sizeof(xug));
1862         if (error) {
1863                 lck_rw_done(&unp_list_mtx);
1864                 return error;
1865         }
1866
1867         /*
1868          * We are done if there is no pcb
1869          */
1870         if (n == 0) {
1871                 lck_rw_done(&unp_list_mtx);
1872                 return 0;
1873         }
1874
1875         MALLOC(unp_list, struct unpcb **, n * sizeof(*unp_list),
1876             M_TEMP, M_WAITOK);
1877         if (unp_list == 0) {
1878                 lck_rw_done(&unp_list_mtx);
1879                 return ENOMEM;
1880         }
1881
1882         for (unp = head->lh_first, i = 0; unp && i < n;
1883             unp = unp->unp_link.le_next) {
1884                 if (unp->unp_gencnt <= gencnt) {
1885                         unp_list[i++] = unp;
1886                 }
1887         }
1888         n = i;                  /* in case we lost some during malloc */
1889
1890         error = 0;
1891         for (i = 0; i < n; i++) {
1892                 unp = unp_list[i];
1893                 if (unp->unp_gencnt <= gencnt) {
1894                         struct xunpcb64 xu;
1895                         size_t          xu_len = sizeof(struct xunpcb64);
1896
1897                         bzero(&xu, xu_len);
1898                         xu.xu_len = (u_int32_t)xu_len;
1899                         xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1900                         xu.xunp_link.le_next = (u_int64_t)
1901                             VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1902                         xu.xunp_link.le_prev = (u_int64_t)
1903                             VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1904                         xu.xunp_socket = (u_int64_t)
1905                             VM_KERNEL_ADDRPERM(unp->unp_socket);
1906                         xu.xunp_vnode = (u_int64_t)
1907                             VM_KERNEL_ADDRPERM(unp->unp_vnode);
1908                         xu.xunp_ino = unp->unp_ino;
1909                         xu.xunp_conn = (u_int64_t)
1910                             VM_KERNEL_ADDRPERM(unp->unp_conn);
1911                         xu.xunp_refs = (u_int64_t)
1912                             VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1913                         xu.xunp_reflink.le_next = (u_int64_t)
1914                             VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1915                         xu.xunp_reflink.le_prev = (u_int64_t)
1916                             VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1917                         xu.xunp_cc = unp->unp_cc;
1918                         xu.xunp_mbcnt = unp->unp_mbcnt;
1919                         xu.xunp_gencnt = unp->unp_gencnt;
1920
1921                         if (unp->unp_socket) {
1922                                 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1923                         }
1924
1925                         /*
1926                          * XXX - need more locking here to protect against
1927                          * connect/disconnect races for SMP.
1928                          */
1929                         if (unp->unp_addr) {
1930                                 bcopy(unp->unp_addr, &xu.xu_au,
1931                                     unp->unp_addr->sun_len);
1932                         }
1933                         if (unp->unp_conn && unp->unp_conn->unp_addr) {
1934                                 bcopy(unp->unp_conn->unp_addr,
1935                                     &xu.xu_cau,
1936                                     unp->unp_conn->unp_addr->sun_len);
1937                         }
1938
1939                         error = SYSCTL_OUT(req, &xu, xu_len);
1940                 }
1941         }
1942         if (!error) {
1943                 /*
1944                  * Give the user an updated idea of our state.
1945                  * If the generation differs from what we told
1946                  * her before, she knows that something happened
1947                  * while we were processing this request, and it
1948                  * might be necessary to retry.
1949                  */
1950                 bzero(&xug, sizeof(xug));
1951                 xug.xug_len = sizeof(xug);
1952                 xug.xug_gen = unp_gencnt;
1953                 xug.xug_sogen = so_gencnt;
1954                 xug.xug_count = unp_count;
1955                 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1956         }
1957         FREE(unp_list, M_TEMP);
1958         lck_rw_done(&unp_list_mtx);
1959         return error;
1960 }
1961
1962 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
1963     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1964     (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1965     "List of active local datagram sockets 64 bit");
1966 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
1967     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1968     (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1969     "List of active local stream sockets 64 bit");
1970
1971 #endif /* XNU_TARGET_OS_OSX */
1972
1973 static void
1974 unp_shutdown(struct unpcb *unp)
1975 {
1976         struct socket *so = unp->unp_socket;
1977         struct socket *so2;
1978         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1979                 so2 = unp->unp_conn->unp_socket;
1980                 unp_get_locks_in_order(so, so2);
1981                 socantrcvmore(so2);
1982                 socket_unlock(so2, 1);
1983         }
1984 }
1985
1986 static void
1987 unp_drop(struct unpcb *unp, int errno)
1988 {
1989         struct socket *so = unp->unp_socket;
1990
1991         so->so_error = (u_short)errno;
1992         unp_disconnect(unp);
1993 }
1994
1995 /* always called under uipc_lock */
1996 static void
1997 unp_gc_wait(void)
1998 {
1999         if (unp_gcthread == current_thread()) {
2000                 return;
2001         }
2002
2003         while (unp_gcing != 0) {
2004                 unp_gcwait = 1;
2005                 msleep(&unp_gcing, &uipc_lock, 0, "unp_gc_wait", NULL);
2006         }
2007 }
2008
2009 /*
2010  * fg_insertuipc_mark
2011  *
2012  * Description: Mark fileglob for insertion onto message queue if needed
2013  *              Also takes fileglob reference
2014  *
2015  * Parameters:  fg      Fileglob pointer to insert
2016  *
2017  * Returns:     true, if the fileglob needs to be inserted onto msg queue
2018  *
2019  * Locks:       Takes and drops fg_lock, potentially many times
2020  */
2021 static boolean_t
2022 fg_insertuipc_mark(struct fileglob * fg)
2023 {
2024         boolean_t insert = FALSE;
2025
2026         lck_mtx_lock_spin(&fg->fg_lock);
2027         while (fg->fg_lflags & FG_RMMSGQ) {
2028                 lck_mtx_convert_spin(&fg->fg_lock);
2029
2030                 fg->fg_lflags |= FG_WRMMSGQ;
2031                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
2032         }
2033
2034         os_ref_retain_locked_raw(&fg->fg_count, &f_refgrp);
2035         fg->fg_msgcount++;
2036         if (fg->fg_msgcount == 1) {
2037                 fg->fg_lflags |= FG_INSMSGQ;
2038                 insert = TRUE;
2039         }
2040         lck_mtx_unlock(&fg->fg_lock);
2041         return insert;
2042 }
2043
2044 /*
2045  * fg_insertuipc
2046  *
2047  * Description: Insert marked fileglob onto message queue
2048  *
2049  * Parameters:  fg      Fileglob pointer to insert
2050  *
2051  * Returns:     void
2052  *
2053  * Locks:       Takes and drops fg_lock & uipc_lock
2054  *              DO NOT call this function with proc_fdlock held as unp_gc()
2055  *              can potentially try to acquire proc_fdlock, which can result
2056  *              in a deadlock if this function is in unp_gc_wait().
2057  */
2058 static void
2059 fg_insertuipc(struct fileglob * fg)
2060 {
2061         if (fg->fg_lflags & FG_INSMSGQ) {
2062                 lck_mtx_lock_spin(&uipc_lock);
2063                 unp_gc_wait();
2064                 LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2065                 lck_mtx_unlock(&uipc_lock);
2066                 lck_mtx_lock(&fg->fg_lock);
2067                 fg->fg_lflags &= ~FG_INSMSGQ;
2068                 if (fg->fg_lflags & FG_WINSMSGQ) {
2069                         fg->fg_lflags &= ~FG_WINSMSGQ;
2070                         wakeup(&fg->fg_lflags);
2071                 }
2072                 lck_mtx_unlock(&fg->fg_lock);
2073         }
2074 }
2075
2076 /*
2077  * fg_removeuipc_mark
2078  *
2079  * Description: Mark the fileglob for removal from message queue if needed
2080  *              Also releases fileglob message queue reference
2081  *
2082  * Parameters:  fg      Fileglob pointer to remove
2083  *
2084  * Returns:     true, if the fileglob needs to be removed from msg queue
2085  *
2086  * Locks:       Takes and drops fg_lock, potentially many times
2087  */
2088 static boolean_t
2089 fg_removeuipc_mark(struct fileglob * fg)
2090 {
2091         boolean_t remove = FALSE;
2092
2093         lck_mtx_lock_spin(&fg->fg_lock);
2094         while (fg->fg_lflags & FG_INSMSGQ) {
2095                 lck_mtx_convert_spin(&fg->fg_lock);
2096
2097                 fg->fg_lflags |= FG_WINSMSGQ;
2098                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
2099         }
2100         fg->fg_msgcount--;
2101         if (fg->fg_msgcount == 0) {
2102                 fg->fg_lflags |= FG_RMMSGQ;
2103                 remove = TRUE;
2104         }
2105         lck_mtx_unlock(&fg->fg_lock);
2106         return remove;
2107 }
2108
2109 /*
2110  * fg_removeuipc
2111  *
2112  * Description: Remove marked fileglob from message queue
2113  *
2114  * Parameters:  fg      Fileglob pointer to remove
2115  *
2116  * Returns:     void
2117  *
2118  * Locks:       Takes and drops fg_lock & uipc_lock
2119  *              DO NOT call this function with proc_fdlock held as unp_gc()
2120  *              can potentially try to acquire proc_fdlock, which can result
2121  *              in a deadlock if this function is in unp_gc_wait().
2122  */
2123 static void
2124 fg_removeuipc(struct fileglob * fg)
2125 {
2126         if (fg->fg_lflags & FG_RMMSGQ) {
2127                 lck_mtx_lock_spin(&uipc_lock);
2128                 unp_gc_wait();
2129                 LIST_REMOVE(fg, f_msglist);
2130                 lck_mtx_unlock(&uipc_lock);
2131                 lck_mtx_lock(&fg->fg_lock);
2132                 fg->fg_lflags &= ~FG_RMMSGQ;
2133                 if (fg->fg_lflags & FG_WRMMSGQ) {
2134                         fg->fg_lflags &= ~FG_WRMMSGQ;
2135                         wakeup(&fg->fg_lflags);
2136                 }
2137                 lck_mtx_unlock(&fg->fg_lock);
2138         }
2139 }
2140
2141 /*
2142  * Returns:     0                       Success
2143  *              EMSGSIZE                The new fd's will not fit
2144  *              ENOBUFS                 Cannot alloc struct fileproc
2145  */
2146 int
2147 unp_externalize(struct mbuf *rights)
2148 {
2149         proc_t p = current_proc();              /* XXX */
2150         int i;
2151         struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2152         struct fileglob **rp = (struct fileglob **)(cm + 1);
2153         int *fds = (int *)(cm + 1);
2154         struct fileproc *fp;
2155         struct fileproc **fileproc_l;
2156         int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2157         int f, error = 0;
2158
2159         MALLOC(fileproc_l, struct fileproc **,
2160             newfds * sizeof(struct fileproc *), M_TEMP, M_WAITOK);
2161         if (fileproc_l == NULL) {
2162                 error = ENOMEM;
2163                 goto discard;
2164         }
2165
2166         proc_fdlock(p);
2167
2168         /*
2169          * if the new FD's will not fit, then we free them all
2170          */
2171         if (!fdavail(p, newfds)) {
2172                 proc_fdunlock(p);
2173                 error = EMSGSIZE;
2174                 goto discard;
2175         }
2176         /*
2177          * now change each pointer to an fd in the global table to
2178          * an integer that is the index to the local fd table entry
2179          * that we set up to point to the global one we are transferring.
2180          * XXX (1) this assumes a pointer and int are the same size,
2181          * XXX     or the mbuf can hold the expansion
2182          * XXX (2) allocation failures should be non-fatal
2183          */
2184         for (i = 0; i < newfds; i++) {
2185                 if (fdalloc(p, 0, &f)) {
2186                         panic("unp_externalize:fdalloc");
2187                 }
2188                 fp = fileproc_alloc_init(NULL);
2189                 if (fp == NULL) {
2190                         panic("unp_externalize:fileproc_alloc_init");
2191                 }
2192                 fp->fp_glob = rp[i];
2193                 if (fg_removeuipc_mark(rp[i])) {
2194                         /*
2195                          * Take an iocount on the fp for completing the
2196                          * removal from the global msg queue
2197                          */
2198                         os_ref_retain_locked(&fp->fp_iocount);
2199                         fileproc_l[i] = fp;
2200                 } else {
2201                         fileproc_l[i] = NULL;
2202                 }
2203                 procfdtbl_releasefd(p, f, fp);
2204                 fds[i] = f;
2205         }
2206         proc_fdunlock(p);
2207
2208         for (i = 0; i < newfds; i++) {
2209                 if (fileproc_l[i] != NULL) {
2210                         VERIFY(fileproc_l[i]->fp_glob != NULL &&
2211                             (fileproc_l[i]->fp_glob->fg_lflags & FG_RMMSGQ));
2212                         VERIFY(fds[i] >= 0);
2213                         fg_removeuipc(fileproc_l[i]->fp_glob);
2214
2215                         /* Drop the iocount */
2216                         fp_drop(p, fds[i], fileproc_l[i], 0);
2217                         fileproc_l[i] = NULL;
2218                 }
2219                 if (fds[i] != 0) {
2220                         (void) OSAddAtomic(-1, &unp_rights);
2221                 }
2222         }
2223
2224 discard:
2225         if (fileproc_l != NULL) {
2226                 FREE(fileproc_l, M_TEMP);
2227         }
2228         if (error) {
2229                 for (i = 0; i < newfds; i++) {
2230                         unp_discard(*rp, p);
2231                         *rp++ = NULL;
2232                 }
2233         }
2234         return error;
2235 }
2236
2237 void
2238 unp_init(void)
2239 {
2240         _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2241         LIST_INIT(&unp_dhead);
2242         LIST_INIT(&unp_shead);
2243
2244         /*
2245          * allocate lock group attribute and group for udp pcb mutexes
2246          */
2247         unp_mtx_grp_attr = lck_grp_attr_alloc_init();
2248
2249         unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
2250
2251         unp_mtx_attr = lck_attr_alloc_init();
2252
2253         lck_mtx_init(&uipc_lock, unp_mtx_grp, unp_mtx_attr);
2254         lck_rw_init(&unp_list_mtx, unp_mtx_grp, unp_mtx_attr);
2255         lck_mtx_init(&unp_disconnect_lock, unp_mtx_grp, unp_mtx_attr);
2256         lck_mtx_init(&unp_connect_lock, unp_mtx_grp, unp_mtx_attr);
2257 }
2258
2259 #ifndef MIN
2260 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
2261 #endif
2262
2263 /*
2264  * Returns:     0                       Success
2265  *              EINVAL
2266  *              EBADF
2267  */
2268 static int
2269 unp_internalize(struct mbuf *control, proc_t p)
2270 {
2271         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2272         int *fds;
2273         struct fileglob **rp;
2274         struct fileproc *fp;
2275         int i, error;
2276         int oldfds;
2277         uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2278
2279         /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2280         if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2281             (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2282                 return EINVAL;
2283         }
2284         oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2285         bzero(fg_ins, sizeof(fg_ins));
2286
2287         proc_fdlock(p);
2288         fds = (int *)(cm + 1);
2289
2290         for (i = 0; i < oldfds; i++) {
2291                 struct fileproc *tmpfp;
2292                 if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {
2293                         proc_fdunlock(p);
2294                         return EBADF;
2295                 } else if (!fg_sendable(tmpfp->fp_glob)) {
2296                         proc_fdunlock(p);
2297                         return EINVAL;
2298                 } else if (FP_ISGUARDED(tmpfp, GUARD_SOCKET_IPC)) {
2299                         error = fp_guard_exception(p,
2300                             fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2301                         proc_fdunlock(p);
2302                         return error;
2303                 }
2304         }
2305         rp = (struct fileglob **)(cm + 1);
2306
2307         /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2308          * and doing them in-order would result in stomping over unprocessed fd's
2309          */
2310         for (i = (oldfds - 1); i >= 0; i--) {
2311                 fp = fp_get_noref_locked(p, fds[i]);
2312                 if (fg_insertuipc_mark(fp->fp_glob)) {
2313                         fg_ins[i / 8] |= 0x80 >> (i % 8);
2314                 }
2315                 rp[i] = fp->fp_glob;
2316         }
2317         proc_fdunlock(p);
2318
2319         for (i = 0; i < oldfds; i++) {
2320                 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2321                         VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2322                         fg_insertuipc(rp[i]);
2323                 }
2324                 (void) OSAddAtomic(1, &unp_rights);
2325         }
2326
2327         return 0;
2328 }
2329
2330 __private_extern__ void
2331 unp_gc(void)
2332 {
2333         struct fileglob *fg, *nextfg;
2334         struct socket *so;
2335         static struct fileglob **extra_ref;
2336         struct fileglob **fpp;
2337         int nunref, i;
2338         int need_gcwakeup = 0;
2339
2340         lck_mtx_lock(&uipc_lock);
2341         if (unp_gcing) {
2342                 lck_mtx_unlock(&uipc_lock);
2343                 return;
2344         }
2345         unp_gcing = 1;
2346         unp_defer = 0;
2347         unp_gcthread = current_thread();
2348         lck_mtx_unlock(&uipc_lock);
2349         /*
2350          * before going through all this, set all FDs to
2351          * be NOT defered and NOT externally accessible
2352          */
2353         for (fg = unp_msghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
2354                 os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
2355         }
2356         do {
2357                 for (fg = unp_msghead.lh_first; fg != 0;
2358                     fg = fg->f_msglist.le_next) {
2359                         lck_mtx_lock(&fg->fg_lock);
2360                         /*
2361                          * If the file is not open, skip it
2362                          */
2363                         if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2364                                 lck_mtx_unlock(&fg->fg_lock);
2365                                 continue;
2366                         }
2367                         /*
2368                          * If we already marked it as 'defer'  in a
2369                          * previous pass, then try process it this time
2370                          * and un-mark it
2371                          */
2372                         if (fg->fg_flag & FDEFER) {
2373                                 os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
2374                                 unp_defer--;
2375                         } else {
2376                                 /*
2377                                  * if it's not defered, then check if it's
2378                                  * already marked.. if so skip it
2379                                  */
2380                                 if (fg->fg_flag & FMARK) {
2381                                         lck_mtx_unlock(&fg->fg_lock);
2382                                         continue;
2383                                 }
2384                                 /*
2385                                  * If all references are from messages
2386                                  * in transit, then skip it. it's not
2387                                  * externally accessible.
2388                                  */
2389                                 if (os_ref_get_count_raw(&fg->fg_count) ==
2390                                     fg->fg_msgcount) {
2391                                         lck_mtx_unlock(&fg->fg_lock);
2392                                         continue;
2393                                 }
2394                                 /*
2395                                  * If it got this far then it must be
2396                                  * externally accessible.
2397                                  */
2398                                 os_atomic_or(&fg->fg_flag, FMARK, relaxed);
2399                         }
2400                         /*
2401                          * either it was defered, or it is externally
2402                          * accessible and not already marked so.
2403                          * Now check if it is possibly one of OUR sockets.
2404                          */
2405                         if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2406                             (so = (struct socket *)fg->fg_data) == 0) {
2407                                 lck_mtx_unlock(&fg->fg_lock);
2408                                 continue;
2409                         }
2410                         if (so->so_proto->pr_domain != localdomain ||
2411                             (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2412                                 lck_mtx_unlock(&fg->fg_lock);
2413                                 continue;
2414                         }
2415 #ifdef notdef
2416                         if (so->so_rcv.sb_flags & SB_LOCK) {
2417                                 /*
2418                                  * This is problematical; it's not clear
2419                                  * we need to wait for the sockbuf to be
2420                                  * unlocked (on a uniprocessor, at least),
2421                                  * and it's also not clear what to do
2422                                  * if sbwait returns an error due to receipt
2423                                  * of a signal.  If sbwait does return
2424                                  * an error, we'll go into an infinite
2425                                  * loop.  Delete all of this for now.
2426                                  */
2427                                 (void) sbwait(&so->so_rcv);
2428                                 goto restart;
2429                         }
2430 #endif
2431                         /*
2432                          * So, Ok, it's one of our sockets and it IS externally
2433                          * accessible (or was defered). Now we look
2434                          * to see if we hold any file descriptors in its
2435                          * message buffers. Follow those links and mark them
2436                          * as accessible too.
2437                          *
2438                          * In case a file is passed onto itself we need to
2439                          * release the file lock.
2440                          */
2441                         lck_mtx_unlock(&fg->fg_lock);
2442
2443                         unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
2444                 }
2445         } while (unp_defer);
2446         /*
2447          * We grab an extra reference to each of the file table entries
2448          * that are not otherwise accessible and then free the rights
2449          * that are stored in messages on them.
2450          *
2451          * The bug in the orginal code is a little tricky, so I'll describe
2452          * what's wrong with it here.
2453          *
2454          * It is incorrect to simply unp_discard each entry for fg_msgcount
2455          * times -- consider the case of sockets A and B that contain
2456          * references to each other.  On a last close of some other socket,
2457          * we trigger a gc since the number of outstanding rights (unp_rights)
2458          * is non-zero.  If during the sweep phase the gc code un_discards,
2459          * we end up doing a (full) closef on the descriptor.  A closef on A
2460          * results in the following chain.  Closef calls soo_close, which
2461          * calls soclose.   Soclose calls first (through the switch
2462          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
2463          * returns because the previous instance had set unp_gcing, and
2464          * we return all the way back to soclose, which marks the socket
2465          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
2466          * to free up the rights that are queued in messages on the socket A,
2467          * i.e., the reference on B.  The sorflush calls via the dom_dispose
2468          * switch unp_dispose, which unp_scans with unp_discard.  This second
2469          * instance of unp_discard just calls closef on B.
2470          *
2471          * Well, a similar chain occurs on B, resulting in a sorflush on B,
2472          * which results in another closef on A.  Unfortunately, A is already
2473          * being closed, and the descriptor has already been marked with
2474          * SS_NOFDREF, and soclose panics at this point.
2475          *
2476          * Here, we first take an extra reference to each inaccessible
2477          * descriptor.  Then, we call sorflush ourself, since we know
2478          * it is a Unix domain socket anyhow.  After we destroy all the
2479          * rights carried in messages, we do a last closef to get rid
2480          * of our extra reference.  This is the last close, and the
2481          * unp_detach etc will shut down the socket.
2482          *
2483          * 91/09/19, bsy@cs.cmu.edu
2484          */
2485         MALLOC(extra_ref, struct fileglob **, nfiles * sizeof(struct fileglob *),
2486             M_TEMP, M_WAITOK);
2487         if (extra_ref == NULL) {
2488                 goto bail;
2489         }
2490         for (nunref = 0, fg = unp_msghead.lh_first, fpp = extra_ref; fg != 0;
2491             fg = nextfg) {
2492                 lck_mtx_lock(&fg->fg_lock);
2493
2494                 nextfg = fg->f_msglist.le_next;
2495                 /*
2496                  * If it's not open, skip it
2497                  */
2498                 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2499                         lck_mtx_unlock(&fg->fg_lock);
2500                         continue;
2501                 }
2502                 /*
2503                  * If all refs are from msgs, and it's not marked accessible
2504                  * then it must be referenced from some unreachable cycle
2505                  * of (shut-down) FDs, so include it in our
2506                  * list of FDs to remove
2507                  */
2508                 if (fg->fg_flag & FMARK) {
2509                         lck_mtx_unlock(&fg->fg_lock);
2510                         continue;
2511                 }
2512                 if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {
2513                         os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2514                         *fpp++ = fg;
2515                         nunref++;
2516                 }
2517                 lck_mtx_unlock(&fg->fg_lock);
2518         }
2519         /*
2520          * for each FD on our hit list, do the following two things
2521          */
2522         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2523                 struct fileglob *tfg;
2524
2525                 tfg = *fpp;
2526
2527                 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET &&
2528                     tfg->fg_data != NULL) {
2529                         so = (struct socket *)(tfg->fg_data);
2530
2531                         socket_lock(so, 0);
2532
2533                         sorflush(so);
2534
2535                         socket_unlock(so, 0);
2536                 }
2537         }
2538         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2539                 fg_drop(PROC_NULL, *fpp);
2540         }
2541
2542         FREE(extra_ref, M_TEMP);
2543 bail:
2544         lck_mtx_lock(&uipc_lock);
2545         unp_gcing = 0;
2546         unp_gcthread = NULL;
2547
2548         if (unp_gcwait != 0) {
2549                 unp_gcwait = 0;
2550                 need_gcwakeup = 1;
2551         }
2552         lck_mtx_unlock(&uipc_lock);
2553
2554         if (need_gcwakeup != 0) {
2555                 wakeup(&unp_gcing);
2556         }
2557 }
2558
2559 void
2560 unp_dispose(struct mbuf *m)
2561 {
2562         if (m) {
2563                 unp_scan(m, unp_discard, NULL);
2564         }
2565 }
2566
2567 /*
2568  * Returns:     0                       Success
2569  */
2570 static int
2571 unp_listen(struct unpcb *unp, proc_t p)
2572 {
2573         kauth_cred_t safecred = kauth_cred_proc_ref(p);
2574         cru2x(safecred, &unp->unp_peercred);
2575         kauth_cred_unref(&safecred);
2576         unp->unp_flags |= UNP_HAVEPCCACHED;
2577         return 0;
2578 }
2579
2580 static void
2581 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2582 {
2583         struct mbuf *m;
2584         struct fileglob **rp;
2585         struct cmsghdr *cm;
2586         int i;
2587         int qfds;
2588
2589         while (m0) {
2590                 for (m = m0; m; m = m->m_next) {
2591                         if (m->m_type == MT_CONTROL &&
2592                             (size_t)m->m_len >= sizeof(*cm)) {
2593                                 cm = mtod(m, struct cmsghdr *);
2594                                 if (cm->cmsg_level != SOL_SOCKET ||
2595                                     cm->cmsg_type != SCM_RIGHTS) {
2596                                         continue;
2597                                 }
2598                                 qfds = (cm->cmsg_len - sizeof(*cm)) /
2599                                     sizeof(int);
2600                                 rp = (struct fileglob **)(cm + 1);
2601                                 for (i = 0; i < qfds; i++) {
2602                                         (*op)(*rp++, arg);
2603                                 }
2604                                 break;          /* XXX, but saves time */
2605                         }
2606                 }
2607                 m0 = m0->m_act;
2608         }
2609 }
2610
2611 static void
2612 unp_mark(struct fileglob *fg, __unused void *arg)
2613 {
2614         uint32_t oflags, nflags;
2615
2616         os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2617                 if (oflags & FMARK) {
2618                         os_atomic_rmw_loop_give_up(return );
2619                 }
2620                 nflags = oflags | FMARK | FDEFER;
2621         });
2622
2623         unp_defer++;
2624 }
2625
2626 static void
2627 unp_discard(struct fileglob *fg, void *p)
2628 {
2629         if (p == NULL) {
2630                 p = current_proc();             /* XXX */
2631         }
2632         (void) OSAddAtomic(1, &unp_disposed);
2633         if (fg_removeuipc_mark(fg)) {
2634                 VERIFY(fg->fg_lflags & FG_RMMSGQ);
2635                 fg_removeuipc(fg);
2636         }
2637         (void) OSAddAtomic(-1, &unp_rights);
2638
2639         (void) fg_drop(p, fg);
2640 }
2641
2642 int
2643 unp_lock(struct socket *so, int refcount, void * lr)
2644 {
2645         void * lr_saved;
2646         if (lr == 0) {
2647                 lr_saved = (void *)  __builtin_return_address(0);
2648         } else {
2649                 lr_saved = lr;
2650         }
2651
2652         if (so->so_pcb) {
2653                 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2654         } else {
2655                 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2656                     so, lr_saved, so->so_usecount);
2657         }
2658
2659         if (so->so_usecount < 0) {
2660                 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2661                     so, so->so_pcb, lr_saved, so->so_usecount);
2662         }
2663
2664         if (refcount) {
2665                 VERIFY(so->so_usecount > 0);
2666                 so->so_usecount++;
2667         }
2668         so->lock_lr[so->next_lock_lr] = lr_saved;
2669         so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2670         return 0;
2671 }
2672
2673 int
2674 unp_unlock(struct socket *so, int refcount, void * lr)
2675 {
2676         void * lr_saved;
2677         lck_mtx_t * mutex_held = NULL;
2678         struct unpcb *unp = sotounpcb(so);
2679
2680         if (lr == 0) {
2681                 lr_saved = (void *) __builtin_return_address(0);
2682         } else {
2683                 lr_saved = lr;
2684         }
2685
2686         if (refcount) {
2687                 so->so_usecount--;
2688         }
2689
2690         if (so->so_usecount < 0) {
2691                 panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2692         }
2693         if (so->so_pcb == NULL) {
2694                 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2695         } else {
2696                 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2697         }
2698         LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2699         so->unlock_lr[so->next_unlock_lr] = lr_saved;
2700         so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2701
2702         if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2703                 sofreelastref(so, 1);
2704
2705                 if (unp->unp_addr) {
2706                         FREE(unp->unp_addr, M_SONAME);
2707                 }
2708
2709                 lck_mtx_unlock(mutex_held);
2710
2711                 lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
2712                 zfree(unp_zone, unp);
2713
2714                 unp_gc();
2715         } else {
2716                 lck_mtx_unlock(mutex_held);
2717         }
2718
2719         return 0;
2720 }
2721
2722 lck_mtx_t *
2723 unp_getlock(struct socket *so, __unused int flags)
2724 {
2725         struct unpcb *unp = (struct unpcb *)so->so_pcb;
2726
2727
2728         if (so->so_pcb) {
2729                 if (so->so_usecount < 0) {
2730                         panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2731                 }
2732                 return &unp->unp_mtx;
2733         } else {
2734                 panic("unp_getlock: so=%p NULL so_pcb\n", so);
2735                 return so->so_proto->pr_domain->dom_mtx;
2736         }
2737 }