bsd/kern/uipc_usrreq.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
  61  */
  62 /*
  63  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  64  * support for mandatory and extensible security protections.  This notice
  65  * is included in support of clause 2.2 (b) of the Apple Public License,
  66  * Version 2.0.
  67  */
  68
  69 #include <sys/param.h>
  70 #include <sys/systm.h>
  71 #include <sys/kernel.h>
  72 #include <sys/domain.h>
  73 #include <sys/fcntl.h>
  74 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
  75 #include <sys/file_internal.h>
  76 #include <sys/guarded.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/lock.h>
  79 #include <sys/mbuf.h>
  80 #include <sys/namei.h>
  81 #include <sys/proc_internal.h>
  82 #include <sys/kauth.h>
  83 #include <sys/protosw.h>
  84 #include <sys/socket.h>
  85 #include <sys/socketvar.h>
  86 #include <sys/stat.h>
  87 #include <sys/sysctl.h>
  88 #include <sys/un.h>
  89 #include <sys/unpcb.h>
  90 #include <sys/vnode_internal.h>
  91 #include <sys/kdebug.h>
  92
  93 #include <kern/zalloc.h>
  94 #include <kern/locks.h>
  95
  96 #if CONFIG_MACF
  97 #include <security/mac_framework.h>
  98 #endif /* CONFIG_MACF */
  99
 100 #include <mach/vm_param.h>
 101
 102 #define f_msgcount f_fglob->fg_msgcount
 103 #define f_cred f_fglob->fg_cred
 104 #define f_ops f_fglob->fg_ops
 105 #define f_offset f_fglob->fg_offset
 106 #define f_data f_fglob->fg_data
 107 struct  zone *unp_zone;
 108 static  unp_gen_t unp_gencnt;
 109 static  u_int unp_count;
 110
 111 static  lck_attr_t              *unp_mtx_attr;
 112 static  lck_grp_t               *unp_mtx_grp;
 113 static  lck_grp_attr_t          *unp_mtx_grp_attr;
 114 static  lck_rw_t                *unp_list_mtx;
 115
 116 static  lck_mtx_t               *unp_disconnect_lock;
 117 static  lck_mtx_t               *unp_connect_lock;
 118 static  u_int                   disconnect_in_progress;
 119
 120 extern lck_mtx_t *uipc_lock;
 121 static  struct unp_head unp_shead, unp_dhead;
 122
 123 /*
 124  * mDNSResponder tracing.  When enabled, endpoints connected to
 125  * /var/run/mDNSResponder will be traced; during each send on
 126  * the traced socket, we log the PID and process name of the
 127  * sending process.  We also print out a bit of info related
 128  * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
 129  * of mDNSResponder stays the same.
 130  */
 131 #define MDNSRESPONDER_PATH      "/var/run/mDNSResponder"
 132
 133 static int unpst_tracemdns;     /* enable tracing */
 134
 135 #define MDNS_IPC_MSG_HDR_VERSION_1      1
 136
 137 struct mdns_ipc_msg_hdr {
 138         uint32_t version;
 139         uint32_t datalen;
 140         uint32_t ipc_flags;
 141         uint32_t op;
 142         union {
 143                 void *context;
 144                 uint32_t u32[2];
 145         } __attribute__((packed));
 146         uint32_t reg_index;
 147 } __attribute__((packed));
 148
 149 /*
 150  * Unix communications domain.
 151  *
 152  * TODO:
 153  *      SEQPACKET, RDM
 154  *      rethink name space problems
 155  *      need a proper out-of-band
 156  *      lock pushdown
 157  */
 158 static struct   sockaddr sun_noname = { sizeof (sun_noname), AF_LOCAL, { 0 } };
 159 static ino_t    unp_ino;                /* prototype for fake inode numbers */
 160
 161 static int      unp_attach(struct socket *);
 162 static void     unp_detach(struct unpcb *);
 163 static int      unp_bind(struct unpcb *, struct sockaddr *, proc_t);
 164 static int      unp_connect(struct socket *, struct sockaddr *, proc_t);
 165 static void     unp_disconnect(struct unpcb *);
 166 static void     unp_shutdown(struct unpcb *);
 167 static void     unp_drop(struct unpcb *, int);
 168 __private_extern__ void unp_gc(void);
 169 static void     unp_scan(struct mbuf *, void (*)(struct fileglob *));
 170 static void     unp_mark(struct fileglob *);
 171 static void     unp_discard(struct fileglob *);
 172 static void     unp_discard_fdlocked(struct fileglob *, proc_t);
 173 static int      unp_internalize(struct mbuf *, proc_t);
 174 static int      unp_listen(struct unpcb *, proc_t);
 175 static void     unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
 176 static void     unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
 177
 178 static void
 179 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
 180 {
 181         if (so < conn_so) {
 182                 socket_lock(conn_so, 1);
 183         } else {
 184                 struct unpcb *unp = sotounpcb(so);
 185                 unp->unp_flags |= UNP_DONTDISCONNECT;
 186                 unp->rw_thrcount++;
 187                 socket_unlock(so, 0);
 188
 189                 /* Get the locks in the correct order */
 190                 socket_lock(conn_so, 1);
 191                 socket_lock(so, 0);
 192                 unp->rw_thrcount--;
 193                 if (unp->rw_thrcount == 0) {
 194                         unp->unp_flags &= ~UNP_DONTDISCONNECT;
 195                         wakeup(unp);
 196                 }
 197         }
 198 }
 199
 200 static int
 201 uipc_abort(struct socket *so)
 202 {
 203         struct unpcb *unp = sotounpcb(so);
 204
 205         if (unp == 0)
 206                 return (EINVAL);
 207         unp_drop(unp, ECONNABORTED);
 208         unp_detach(unp);
 209         sofree(so);
 210         return (0);
 211 }
 212
 213 static int
 214 uipc_accept(struct socket *so, struct sockaddr **nam)
 215 {
 216         struct unpcb *unp = sotounpcb(so);
 217
 218         if (unp == 0)
 219                 return (EINVAL);
 220
 221         /*
 222          * Pass back name of connected socket,
 223          * if it was bound and we are still connected
 224          * (our peer may have closed already!).
 225          */
 226         if (unp->unp_conn && unp->unp_conn->unp_addr) {
 227                 *nam = dup_sockaddr((struct sockaddr *)
 228                     unp->unp_conn->unp_addr, 1);
 229         } else {
 230                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 231         }
 232         return (0);
 233 }
 234
 235 /*
 236  * Returns:     0                       Success
 237  *              EISCONN
 238  *      unp_attach:
 239  */
 240 static int
 241 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
 242 {
 243         struct unpcb *unp = sotounpcb(so);
 244
 245         if (unp != 0)
 246                 return (EISCONN);
 247         return (unp_attach(so));
 248 }
 249
 250 static int
 251 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
 252 {
 253         struct unpcb *unp = sotounpcb(so);
 254
 255         if (unp == 0)
 256                 return (EINVAL);
 257
 258         return (unp_bind(unp, nam, p));
 259 }
 260
 261 /*
 262  * Returns:     0                       Success
 263  *              EINVAL
 264  *      unp_connect:???                 [See elsewhere in this file]
 265  */
 266 static int
 267 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
 268 {
 269         struct unpcb *unp = sotounpcb(so);
 270
 271         if (unp == 0)
 272                 return (EINVAL);
 273         return (unp_connect(so, nam, p));
 274 }
 275
 276 /*
 277  * Returns:     0                       Success
 278  *              EINVAL
 279  *      unp_connect2:EPROTOTYPE         Protocol wrong type for socket
 280  *      unp_connect2:EINVAL             Invalid argument
 281  */
 282 static int
 283 uipc_connect2(struct socket *so1, struct socket *so2)
 284 {
 285         struct unpcb *unp = sotounpcb(so1);
 286
 287         if (unp == 0)
 288                 return (EINVAL);
 289
 290         return (unp_connect2(so1, so2));
 291 }
 292
 293 /* control is EOPNOTSUPP */
 294
 295 static int
 296 uipc_detach(struct socket *so)
 297 {
 298         struct unpcb *unp = sotounpcb(so);
 299
 300         if (unp == 0)
 301                 return (EINVAL);
 302
 303         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
 304         unp_detach(unp);
 305         return (0);
 306 }
 307
 308 static int
 309 uipc_disconnect(struct socket *so)
 310 {
 311         struct unpcb *unp = sotounpcb(so);
 312
 313         if (unp == 0)
 314                 return (EINVAL);
 315         unp_disconnect(unp);
 316         return (0);
 317 }
 318
 319 /*
 320  * Returns:     0                       Success
 321  *              EINVAL
 322  */
 323 static int
 324 uipc_listen(struct socket *so, __unused proc_t p)
 325 {
 326         struct unpcb *unp = sotounpcb(so);
 327
 328         if (unp == 0 || unp->unp_vnode == 0)
 329                 return (EINVAL);
 330         return (unp_listen(unp, p));
 331 }
 332
 333 static int
 334 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 335 {
 336         struct unpcb *unp = sotounpcb(so);
 337
 338         if (unp == NULL)
 339                 return (EINVAL);
 340         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
 341                 *nam = dup_sockaddr((struct sockaddr *)
 342                     unp->unp_conn->unp_addr, 1);
 343         } else {
 344                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 345         }
 346         return (0);
 347 }
 348
 349 static int
 350 uipc_rcvd(struct socket *so, __unused int flags)
 351 {
 352         struct unpcb *unp = sotounpcb(so);
 353         struct socket *so2;
 354
 355         if (unp == 0)
 356                 return (EINVAL);
 357         switch (so->so_type) {
 358         case SOCK_DGRAM:
 359                 panic("uipc_rcvd DGRAM?");
 360                 /*NOTREACHED*/
 361
 362         case SOCK_STREAM:
 363 #define rcv (&so->so_rcv)
 364 #define snd (&so2->so_snd)
 365                 if (unp->unp_conn == 0)
 366                         break;
 367
 368                 so2 = unp->unp_conn->unp_socket;
 369                 unp_get_locks_in_order(so, so2);
 370                 /*
 371                  * Adjust backpressure on sender
 372                  * and wakeup any waiting to write.
 373                  */
 374                 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
 375                 unp->unp_mbcnt = rcv->sb_mbcnt;
 376                 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
 377                 unp->unp_cc = rcv->sb_cc;
 378                 sowwakeup(so2);
 379
 380                 socket_unlock(so2, 1);
 381
 382 #undef snd
 383 #undef rcv
 384                 break;
 385
 386         default:
 387                 panic("uipc_rcvd unknown socktype");
 388         }
 389         return (0);
 390 }
 391
 392 /* pru_rcvoob is EOPNOTSUPP */
 393
 394 /*
 395  * Returns:     0                       Success
 396  *              EINVAL
 397  *              EOPNOTSUPP
 398  *              EPIPE
 399  *              ENOTCONN
 400  *              EISCONN
 401  *      unp_internalize:EINVAL
 402  *      unp_internalize:EBADF
 403  *      unp_connect:EAFNOSUPPORT        Address family not supported
 404  *      unp_connect:EINVAL              Invalid argument
 405  *      unp_connect:ENOTSOCK            Not a socket
 406  *      unp_connect:ECONNREFUSED        Connection refused
 407  *      unp_connect:EISCONN             Socket is connected
 408  *      unp_connect:EPROTOTYPE          Protocol wrong type for socket
 409  *      unp_connect:???
 410  *      sbappendaddr:ENOBUFS            [5th argument, contents modified]
 411  *      sbappendaddr:???                [whatever a filter author chooses]
 412  */
 413 static int
 414 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 415     struct mbuf *control, proc_t p)
 416 {
 417         int error = 0;
 418         struct unpcb *unp = sotounpcb(so);
 419         struct socket *so2;
 420
 421         if (unp == 0) {
 422                 error = EINVAL;
 423                 goto release;
 424         }
 425         if (flags & PRUS_OOB) {
 426                 error = EOPNOTSUPP;
 427                 goto release;
 428         }
 429
 430         if (control) {
 431                 /* release lock to avoid deadlock (4436174) */
 432                 socket_unlock(so, 0);
 433                 error = unp_internalize(control, p);
 434                 socket_lock(so, 0);
 435                 if (error)
 436                         goto release;
 437         }
 438
 439         switch (so->so_type) {
 440         case SOCK_DGRAM:
 441         {
 442                 struct sockaddr *from;
 443
 444                 if (nam) {
 445                         if (unp->unp_conn) {
 446                                 error = EISCONN;
 447                                 break;
 448                         }
 449                         error = unp_connect(so, nam, p);
 450                         if (error)
 451                                 break;
 452                 } else {
 453                         if (unp->unp_conn == 0) {
 454                                 error = ENOTCONN;
 455                                 break;
 456                         }
 457                 }
 458
 459                 so2 = unp->unp_conn->unp_socket;
 460                 if (so != so2)
 461                         unp_get_locks_in_order(so, so2);
 462
 463                 if (unp->unp_addr)
 464                         from = (struct sockaddr *)unp->unp_addr;
 465                 else
 466                         from = &sun_noname;
 467                 /*
 468                  * sbappendaddr() will fail when the receiver runs out of
 469                  * space; in contrast to SOCK_STREAM, we will lose messages
 470                  * for the SOCK_DGRAM case when the receiver's queue overflows.
 471                  * SB_UNIX on the socket buffer implies that the callee will
 472                  * not free the control message, if any, because we would need
 473                  * to call unp_dispose() on it.
 474                  */
 475                 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
 476                         control = NULL;
 477                         sorwakeup(so2);
 478                 } else if (control != NULL && error == 0) {
 479                         /* A socket filter took control; don't touch it */
 480                         control = NULL;
 481                 }
 482
 483                 if (so != so2)
 484                         socket_unlock(so2, 1);
 485
 486                 m = NULL;
 487                 if (nam)
 488                         unp_disconnect(unp);
 489                 break;
 490         }
 491
 492         case SOCK_STREAM: {
 493                 int didreceive = 0;
 494 #define rcv (&so2->so_rcv)
 495 #define snd (&so->so_snd)
 496                 /* Connect if not connected yet. */
 497                 /*
 498                  * Note: A better implementation would complain
 499                  * if not equal to the peer's address.
 500                  */
 501                 if ((so->so_state & SS_ISCONNECTED) == 0) {
 502                         if (nam) {
 503                                 error = unp_connect(so, nam, p);
 504                                 if (error)
 505                                         break;  /* XXX */
 506                         } else {
 507                                 error = ENOTCONN;
 508                                 break;
 509                         }
 510                 }
 511
 512                 if (so->so_state & SS_CANTSENDMORE) {
 513                         error = EPIPE;
 514                         break;
 515                 }
 516                 if (unp->unp_conn == 0)
 517                         panic("uipc_send connected but no connection?");
 518
 519                 so2 = unp->unp_conn->unp_socket;
 520                 unp_get_locks_in_order(so, so2);
 521
 522                 /* Check socket state again as we might have unlocked the socket
 523                  * while trying to get the locks in order
 524                  */
 525
 526                 if ((so->so_state & SS_CANTSENDMORE)) {
 527                         error = EPIPE;
 528                         socket_unlock(so2, 1);
 529                         break;
 530                 }
 531
 532                 if (unp->unp_flags & UNP_TRACE_MDNS) {
 533                         struct mdns_ipc_msg_hdr hdr;
 534
 535                         if (mbuf_copydata(m, 0, sizeof (hdr), &hdr) == 0 &&
 536                             hdr.version  == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
 537                                 printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
 538                                     __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
 539                         }
 540                 }
 541
 542                 /*
 543                  * Send to paired receive port, and then reduce send buffer
 544                  * hiwater marks to maintain backpressure.  Wake up readers.
 545                  * SB_UNIX flag will allow new record to be appended to the
 546                  * receiver's queue even when it is already full.  It is
 547                  * possible, however, that append might fail.  In that case,
 548                  * we will need to call unp_dispose() on the control message;
 549                  * the callee will not free it since SB_UNIX is set.
 550                  */
 551                 didreceive = control ?
 552                     sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
 553
 554                 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
 555                 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
 556                 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
 557                 unp->unp_conn->unp_cc = rcv->sb_cc;
 558                 if (didreceive) {
 559                         control = NULL;
 560                         sorwakeup(so2);
 561                 } else if (control != NULL && error == 0) {
 562                         /* A socket filter took control; don't touch it */
 563                         control = NULL;
 564                 }
 565
 566                 socket_unlock(so2, 1);
 567                 m = NULL;
 568 #undef snd
 569 #undef rcv
 570                 }
 571                 break;
 572
 573         default:
 574                 panic("uipc_send unknown socktype");
 575         }
 576
 577         /*
 578          * SEND_EOF is equivalent to a SEND followed by
 579          * a SHUTDOWN.
 580          */
 581         if (flags & PRUS_EOF) {
 582                 socantsendmore(so);
 583                 unp_shutdown(unp);
 584         }
 585
 586         if (control && error != 0) {
 587                 socket_unlock(so, 0);
 588                 unp_dispose(control);
 589                 socket_lock(so, 0);
 590         }
 591
 592 release:
 593         if (control)
 594                 m_freem(control);
 595         if (m)
 596                 m_freem(m);
 597         return (error);
 598 }
 599
 600 static int
 601 uipc_sense(struct socket *so, void *ub, int isstat64)
 602 {
 603         struct unpcb *unp = sotounpcb(so);
 604         struct socket *so2;
 605         blksize_t blksize;
 606
 607         if (unp == 0)
 608                 return (EINVAL);
 609
 610         blksize = so->so_snd.sb_hiwat;
 611         if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
 612                 so2 = unp->unp_conn->unp_socket;
 613                 blksize += so2->so_rcv.sb_cc;
 614         }
 615         if (unp->unp_ino == 0)
 616                 unp->unp_ino = unp_ino++;
 617
 618         if (isstat64 != 0) {
 619                 struct stat64  *sb64;
 620
 621                 sb64 = (struct stat64 *)ub;
 622                 sb64->st_blksize = blksize;
 623                 sb64->st_dev = NODEV;
 624                 sb64->st_ino = (ino64_t)unp->unp_ino;
 625         } else {
 626                 struct stat *sb;
 627
 628                 sb = (struct stat *)ub;
 629                 sb->st_blksize = blksize;
 630                 sb->st_dev = NODEV;
 631                 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
 632         }
 633
 634         return (0);
 635 }
 636
 637 /*
 638  * Returns:     0               Success
 639  *              EINVAL
 640  *
 641  * Notes:       This is not strictly correct, as unp_shutdown() also calls
 642  *              socantrcvmore().  These should maybe both be conditionalized
 643  *              on the 'how' argument in soshutdown() as called from the
 644  *              shutdown() system call.
 645  */
 646 static int
 647 uipc_shutdown(struct socket *so)
 648 {
 649         struct unpcb *unp = sotounpcb(so);
 650
 651         if (unp == 0)
 652                 return (EINVAL);
 653         socantsendmore(so);
 654         unp_shutdown(unp);
 655         return (0);
 656 }
 657
 658 /*
 659  * Returns:     0                       Success
 660  *              EINVAL                  Invalid argument
 661  */
 662 static int
 663 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 664 {
 665         struct unpcb *unp = sotounpcb(so);
 666
 667         if (unp == NULL)
 668                 return (EINVAL);
 669         if (unp->unp_addr != NULL) {
 670                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
 671         } else {
 672                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 673         }
 674         return (0);
 675 }
 676
 677 struct pr_usrreqs uipc_usrreqs = {
 678         .pru_abort =            uipc_abort,
 679         .pru_accept =           uipc_accept,
 680         .pru_attach =           uipc_attach,
 681         .pru_bind =             uipc_bind,
 682         .pru_connect =          uipc_connect,
 683         .pru_connect2 =         uipc_connect2,
 684         .pru_detach =           uipc_detach,
 685         .pru_disconnect =       uipc_disconnect,
 686         .pru_listen =           uipc_listen,
 687         .pru_peeraddr =         uipc_peeraddr,
 688         .pru_rcvd =             uipc_rcvd,
 689         .pru_send =             uipc_send,
 690         .pru_sense =            uipc_sense,
 691         .pru_shutdown =         uipc_shutdown,
 692         .pru_sockaddr =         uipc_sockaddr,
 693         .pru_sosend =           sosend,
 694         .pru_soreceive =        soreceive,
 695 };
 696
 697 int
 698 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 699 {
 700         struct unpcb *unp = sotounpcb(so);
 701         int error = 0;
 702         pid_t peerpid;
 703         struct socket *peerso;
 704
 705         switch (sopt->sopt_dir) {
 706         case SOPT_GET:
 707                 switch (sopt->sopt_name) {
 708                 case LOCAL_PEERCRED:
 709                         if (unp->unp_flags & UNP_HAVEPC) {
 710                                 error = sooptcopyout(sopt, &unp->unp_peercred,
 711                                     sizeof (unp->unp_peercred));
 712                         } else {
 713                                 if (so->so_type == SOCK_STREAM)
 714                                         error = ENOTCONN;
 715                                 else
 716                                         error = EINVAL;
 717                         }
 718                         break;
 719                 case LOCAL_PEERPID:
 720                 case LOCAL_PEEREPID:
 721                         if (unp->unp_conn == NULL) {
 722                                 error = ENOTCONN;
 723                                 break;
 724                         }
 725                         peerso = unp->unp_conn->unp_socket;
 726                         if (peerso == NULL)
 727                                 panic("peer is connected but has no socket?");
 728                         unp_get_locks_in_order(so, peerso);
 729                         if (sopt->sopt_name == LOCAL_PEEREPID &&
 730                             peerso->so_flags & SOF_DELEGATED)
 731                                 peerpid = peerso->e_pid;
 732                         else
 733                                 peerpid = peerso->last_pid;
 734                         socket_unlock(peerso, 1);
 735                         error = sooptcopyout(sopt, &peerpid, sizeof (peerpid));
 736                         break;
 737                 case LOCAL_PEERUUID:
 738                 case LOCAL_PEEREUUID:
 739                         if (unp->unp_conn == NULL) {
 740                                 error = ENOTCONN;
 741                                 break;
 742                         }
 743                         peerso = unp->unp_conn->unp_socket;
 744                         if (peerso == NULL)
 745                                 panic("peer is connected but has no socket?");
 746                         unp_get_locks_in_order(so, peerso);
 747                         if (sopt->sopt_name == LOCAL_PEEREUUID &&
 748                             peerso->so_flags & SOF_DELEGATED)
 749                                 error = sooptcopyout(sopt, &peerso->e_uuid,
 750                                     sizeof (peerso->e_uuid));
 751                         else
 752                                 error = sooptcopyout(sopt, &peerso->last_uuid,
 753                                     sizeof (peerso->last_uuid));
 754                         socket_unlock(peerso, 1);
 755                         break;
 756                 default:
 757                         error = EOPNOTSUPP;
 758                         break;
 759                 }
 760                 break;
 761         case SOPT_SET:
 762         default:
 763                 error = EOPNOTSUPP;
 764                 break;
 765         }
 766
 767         return (error);
 768 }
 769
 770 /*
 771  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
 772  * for stream sockets, although the total for sender and receiver is
 773  * actually only PIPSIZ.
 774  * Datagram sockets really use the sendspace as the maximum datagram size,
 775  * and don't really want to reserve the sendspace.  Their recvspace should
 776  * be large enough for at least one max-size datagram plus address.
 777  */
 778 #ifndef PIPSIZ
 779 #define PIPSIZ  8192
 780 #endif
 781 static u_int32_t        unpst_sendspace = PIPSIZ;
 782 static u_int32_t        unpst_recvspace = PIPSIZ;
 783 static u_int32_t        unpdg_sendspace = 2*1024;       /* really max datagram size */
 784 static u_int32_t        unpdg_recvspace = 4*1024;
 785
 786 static int      unp_rights;                     /* file descriptors in flight */
 787 static int      unp_disposed;                   /* discarded file descriptors */
 788
 789 SYSCTL_DECL(_net_local_stream);
 790 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
 791    &unpst_sendspace, 0, "");
 792 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
 793    &unpst_recvspace, 0, "");
 794 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
 795    &unpst_tracemdns, 0, "");
 796 SYSCTL_DECL(_net_local_dgram);
 797 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
 798    &unpdg_sendspace, 0, "");
 799 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
 800    &unpdg_recvspace, 0, "");
 801 SYSCTL_DECL(_net_local);
 802 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
 803
 804 /*
 805  * Returns:     0                       Success
 806  *              ENOBUFS
 807  *      soreserve:ENOBUFS
 808  */
 809 static int
 810 unp_attach(struct socket *so)
 811 {
 812         struct unpcb *unp;
 813         int error = 0;
 814
 815         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 816                 switch (so->so_type) {
 817
 818                 case SOCK_STREAM:
 819                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
 820                         break;
 821
 822                 case SOCK_DGRAM:
 823                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
 824                         break;
 825
 826                 default:
 827                         panic("unp_attach");
 828                 }
 829                 if (error)
 830                         return (error);
 831         }
 832         unp = (struct unpcb *)zalloc(unp_zone);
 833         if (unp == NULL)
 834                 return (ENOBUFS);
 835         bzero(unp, sizeof (*unp));
 836
 837         lck_mtx_init(&unp->unp_mtx,
 838                 unp_mtx_grp, unp_mtx_attr);
 839
 840         lck_rw_lock_exclusive(unp_list_mtx);
 841         LIST_INIT(&unp->unp_refs);
 842         unp->unp_socket = so;
 843         unp->unp_gencnt = ++unp_gencnt;
 844         unp_count++;
 845         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
 846             &unp_dhead : &unp_shead, unp, unp_link);
 847         lck_rw_done(unp_list_mtx);
 848         so->so_pcb = (caddr_t)unp;
 849         /*
 850          * Mark AF_UNIX socket buffers accordingly so that:
 851          *
 852          * a. In the SOCK_STREAM case, socket buffer append won't fail due to
 853          *    the lack of space; this essentially loosens the sbspace() check,
 854          *    since there is disconnect between sosend() and uipc_send() with
 855          *    respect to flow control that might result in our dropping the
 856          *    data in uipc_send().  By setting this, we allow for slightly
 857          *    more records to be appended to the receiving socket to avoid
 858          *    losing data (which we can't afford in the SOCK_STREAM case).
 859          *    Flow control still takes place since we adjust the sender's
 860          *    hiwat during each send.  This doesn't affect the SOCK_DGRAM
 861          *    case and append would still fail when the queue overflows.
 862          *
 863          * b. In the presence of control messages containing internalized
 864          *    file descriptors, the append routines will not free them since
 865          *    we'd need to undo the work first via unp_dispose().
 866          */
 867         so->so_rcv.sb_flags |= SB_UNIX;
 868         so->so_snd.sb_flags |= SB_UNIX;
 869         return (0);
 870 }
 871
 872 static void
 873 unp_detach(struct unpcb *unp)
 874 {
 875         int so_locked = 1;
 876
 877         lck_rw_lock_exclusive(unp_list_mtx);
 878         LIST_REMOVE(unp, unp_link);
 879         --unp_count;
 880         ++unp_gencnt;
 881         lck_rw_done(unp_list_mtx);
 882         if (unp->unp_vnode) {
 883                 struct vnode *tvp = NULL;
 884                 socket_unlock(unp->unp_socket, 0);
 885
 886                 /* Holding unp_connect_lock will avoid a race between
 887                  * a thread closing the listening socket and a thread
 888                  * connecting to it.
 889                  */
 890                 lck_mtx_lock(unp_connect_lock);
 891                 socket_lock(unp->unp_socket, 0);
 892                 if (unp->unp_vnode) {
 893                         tvp = unp->unp_vnode;
 894                         unp->unp_vnode->v_socket = NULL;
 895                         unp->unp_vnode = NULL;
 896                 }
 897                 lck_mtx_unlock(unp_connect_lock);
 898                 if (tvp != NULL)
 899                         vnode_rele(tvp);                /* drop the usecount */
 900         }
 901         if (unp->unp_conn)
 902                 unp_disconnect(unp);
 903         while (unp->unp_refs.lh_first) {
 904                 struct unpcb *unp2 = NULL;
 905
 906                 /* This datagram socket is connected to one or more
 907                  * sockets. In order to avoid a race condition between removing
 908                  * this reference and closing the connected socket, we need
 909                  * to check disconnect_in_progress
 910                  */
 911                 if (so_locked == 1) {
 912                         socket_unlock(unp->unp_socket, 0);
 913                         so_locked = 0;
 914                 }
 915                 lck_mtx_lock(unp_disconnect_lock);
 916                 while (disconnect_in_progress != 0) {
 917                         (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
 918                                 PSOCK, "disconnect", NULL);
 919                 }
 920                 disconnect_in_progress = 1;
 921                 lck_mtx_unlock(unp_disconnect_lock);
 922
 923                 /* Now we are sure that any unpcb socket disconnect is not happening */
 924                 if (unp->unp_refs.lh_first != NULL) {
 925                         unp2 = unp->unp_refs.lh_first;
 926                         socket_lock(unp2->unp_socket, 1);
 927                 }
 928
 929                 lck_mtx_lock(unp_disconnect_lock);
 930                 disconnect_in_progress = 0;
 931                 wakeup(&disconnect_in_progress);
 932                 lck_mtx_unlock(unp_disconnect_lock);
 933
 934                 if (unp2 != NULL) {
 935                         /* We already locked this socket and have a reference on it */
 936                         unp_drop(unp2, ECONNRESET);
 937                         socket_unlock(unp2->unp_socket, 1);
 938                 }
 939         }
 940
 941         if (so_locked == 0) {
 942                 socket_lock(unp->unp_socket, 0);
 943                 so_locked = 1;
 944         }
 945         soisdisconnected(unp->unp_socket);
 946         /* makes sure we're getting dealloced */
 947         unp->unp_socket->so_flags |= SOF_PCBCLEARING;
 948 }
 949
 950 /*
 951  * Returns:     0                       Success
 952  *              EAFNOSUPPORT
 953  *              EINVAL
 954  *              EADDRINUSE
 955  *              namei:???               [anything namei can return]
 956  *              vnode_authorize:???     [anything vnode_authorize can return]
 957  *
 958  * Notes:       p at this point is the current process, as this function is
 959  *              only called by sobind().
 960  */
 961 static int
 962 unp_bind(
 963         struct unpcb *unp,
 964         struct sockaddr *nam,
 965         proc_t p)
 966 {
 967         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 968         struct vnode *vp, *dvp;
 969         struct vnode_attr va;
 970         vfs_context_t ctx = vfs_context_current();
 971         int error, namelen;
 972         struct nameidata nd;
 973         struct socket *so = unp->unp_socket;
 974         char buf[SOCK_MAXADDRLEN];
 975
 976         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
 977                 return (EAFNOSUPPORT);
 978         }
 979
 980         if (unp->unp_vnode != NULL)
 981                 return (EINVAL);
 982         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
 983         if (namelen <= 0)
 984                 return (EINVAL);
 985
 986         socket_unlock(so, 0);
 987
 988         strlcpy(buf, soun->sun_path, namelen+1);
 989         NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
 990             CAST_USER_ADDR_T(buf), ctx);
 991         /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 992         error = namei(&nd);
 993         if (error) {
 994                 socket_lock(so, 0);
 995                 return (error);
 996         }
 997         dvp = nd.ni_dvp;
 998         vp = nd.ni_vp;
 999
1000         if (vp != NULL) {
1001                 /*
1002                  * need to do this before the vnode_put of dvp
1003                  * since we may have to release an fs_nodelock
1004                  */
1005                 nameidone(&nd);
1006
1007                 vnode_put(dvp);
1008                 vnode_put(vp);
1009
1010                 socket_lock(so, 0);
1011                 return (EADDRINUSE);
1012         }
1013
1014         VATTR_INIT(&va);
1015         VATTR_SET(&va, va_type, VSOCK);
1016         VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
1017
1018 #if CONFIG_MACF
1019         error = mac_vnode_check_create(ctx,
1020             nd.ni_dvp, &nd.ni_cnd, &va);
1021
1022         if (error == 0)
1023 #endif /* CONFIG_MACF */
1024 #if CONFIG_MACF_SOCKET_SUBSET
1025         error = mac_vnode_check_uipc_bind(ctx,
1026             nd.ni_dvp, &nd.ni_cnd, &va);
1027
1028         if (error == 0)
1029 #endif /* MAC_SOCKET_SUBSET */
1030         /* authorize before creating */
1031         error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1032
1033         if (!error) {
1034                 /* create the socket */
1035                 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1036         }
1037
1038         nameidone(&nd);
1039         vnode_put(dvp);
1040
1041         if (error) {
1042                 socket_lock(so, 0);
1043                 return (error);
1044         }
1045         vnode_ref(vp);  /* gain a longterm reference */
1046         socket_lock(so, 0);
1047         vp->v_socket = unp->unp_socket;
1048         unp->unp_vnode = vp;
1049         unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1050         vnode_put(vp);          /* drop the iocount */
1051
1052         return (0);
1053 }
1054
1055
1056 /*
1057  * Returns:     0                       Success
1058  *              EAFNOSUPPORT            Address family not supported
1059  *              EINVAL                  Invalid argument
1060  *              ENOTSOCK                Not a socket
1061  *              ECONNREFUSED            Connection refused
1062  *              EPROTOTYPE              Protocol wrong type for socket
1063  *              EISCONN                 Socket is connected
1064  *      unp_connect2:EPROTOTYPE         Protocol wrong type for socket
1065  *      unp_connect2:EINVAL             Invalid argument
1066  *      namei:???                       [anything namei can return]
1067  *      vnode_authorize:????            [anything vnode_authorize can return]
1068  *
1069  * Notes:       p at this point is the current process, as this function is
1070  *              only called by sosend(), sendfile(), and soconnectlock().
1071  */
1072 static int
1073 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1074 {
1075         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1076         struct vnode *vp;
1077         struct socket *so2, *so3, *list_so=NULL;
1078         struct unpcb *unp, *unp2, *unp3;
1079         vfs_context_t ctx = vfs_context_current();
1080         int error, len;
1081         struct nameidata nd;
1082         char buf[SOCK_MAXADDRLEN];
1083
1084         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1085                 return (EAFNOSUPPORT);
1086         }
1087
1088         unp = sotounpcb(so);
1089         so2 = so3 = NULL;
1090
1091         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1092         if (len <= 0)
1093                 return (EINVAL);
1094
1095         strlcpy(buf, soun->sun_path, len+1);
1096         socket_unlock(so, 0);
1097
1098         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1099             CAST_USER_ADDR_T(buf), ctx);
1100         error = namei(&nd);
1101         if (error) {
1102                 socket_lock(so, 0);
1103                 return (error);
1104         }
1105         nameidone(&nd);
1106         vp = nd.ni_vp;
1107         if (vp->v_type != VSOCK) {
1108                 error = ENOTSOCK;
1109                 socket_lock(so, 0);
1110                 goto out;
1111         }
1112
1113 #if CONFIG_MACF_SOCKET_SUBSET
1114         error = mac_vnode_check_uipc_connect(ctx, vp);
1115         if (error) {
1116                 socket_lock(so, 0);
1117                 goto out;
1118         }
1119 #endif /* MAC_SOCKET_SUBSET */
1120
1121         error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1122         if (error) {
1123                 socket_lock(so, 0);
1124                 goto out;
1125         }
1126
1127         lck_mtx_lock(unp_connect_lock);
1128
1129         if (vp->v_socket == 0) {
1130                 lck_mtx_unlock(unp_connect_lock);
1131                 error = ECONNREFUSED;
1132                 socket_lock(so, 0);
1133                 goto out;
1134         }
1135
1136         socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1137         so2 = vp->v_socket;
1138         lck_mtx_unlock(unp_connect_lock);
1139
1140
1141         if (so2->so_pcb == NULL) {
1142                 error = ECONNREFUSED;
1143                 if (so != so2) {
1144                         socket_unlock(so2, 1);
1145                         socket_lock(so, 0);
1146                 } else {
1147                         /* Release the reference held for the listen socket */
1148                         so2->so_usecount--;
1149                 }
1150                 goto out;
1151         }
1152
1153         if (so < so2) {
1154                 socket_unlock(so2, 0);
1155                 socket_lock(so, 0);
1156                 socket_lock(so2, 0);
1157         } else if (so > so2) {
1158                 socket_lock(so, 0);
1159         }
1160         /*
1161          * Check if socket was connected while we were trying to
1162          * get the socket locks in order.
1163          * XXX - probably shouldn't return an error for SOCK_DGRAM
1164          */
1165         if ((so->so_state & SS_ISCONNECTED) != 0) {
1166                 error = EISCONN;
1167                 goto decref_out;
1168         }
1169
1170         if (so->so_type != so2->so_type) {
1171                 error = EPROTOTYPE;
1172                 goto decref_out;
1173         }
1174
1175         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1176                 /* Release the incoming socket but keep a reference */
1177                 socket_unlock(so, 0);
1178
1179                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1180                     (so3 = sonewconn(so2, 0, nam)) == 0) {
1181                         error = ECONNREFUSED;
1182                         if (so != so2) {
1183                                 socket_unlock(so2, 1);
1184                                 socket_lock(so, 0);
1185                         } else {
1186                                 socket_lock(so, 0);
1187                                 /* Release the reference held for
1188                                  * listen socket.
1189                                  */
1190                                 so2->so_usecount--;
1191                         }
1192                         goto out;
1193                 }
1194                 unp2 = sotounpcb(so2);
1195                 unp3 = sotounpcb(so3);
1196                 if (unp2->unp_addr)
1197                         unp3->unp_addr = (struct sockaddr_un *)
1198                             dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1199
1200                 /*
1201                  * unp_peercred management:
1202                  *
1203                  * The connecter's (client's) credentials are copied
1204                  * from its process structure at the time of connect()
1205                  * (which is now).
1206                  */
1207                 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1208                 unp3->unp_flags |= UNP_HAVEPC;
1209                 /*
1210                  * The receiver's (server's) credentials are copied
1211                  * from the unp_peercred member of socket on which the
1212                  * former called listen(); unp_listen() cached that
1213                  * process's credentials at that time so we can use
1214                  * them now.
1215                  */
1216                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1217                     ("unp_connect: listener without cached peercred"));
1218
1219                 /* Here we need to have both so and so2 locks and so2
1220                  * is already locked. Lock ordering is required.
1221                  */
1222                 if (so < so2) {
1223                         socket_unlock(so2, 0);
1224                         socket_lock(so, 0);
1225                         socket_lock(so2, 0);
1226                 } else {
1227                         socket_lock(so, 0);
1228                 }
1229
1230                 /* Check again if the socket state changed when its lock was released */
1231                 if ((so->so_state & SS_ISCONNECTED) != 0) {
1232                         error = EISCONN;
1233                         socket_unlock(so2, 1);
1234                         socket_lock(so3, 0);
1235                         sofreelastref(so3, 1);
1236                         goto out;
1237                 }
1238                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1239                     sizeof (unp->unp_peercred));
1240                 unp->unp_flags |= UNP_HAVEPC;
1241
1242 #if CONFIG_MACF_SOCKET
1243                 /* XXXMAC: recursive lock: SOCK_LOCK(so); */
1244                 mac_socketpeer_label_associate_socket(so, so3);
1245                 mac_socketpeer_label_associate_socket(so3, so);
1246                 /* XXXMAC: SOCK_UNLOCK(so); */
1247 #endif /* MAC_SOCKET */
1248
1249                 /* Hold the reference on listening socket until the end */
1250                 socket_unlock(so2, 0);
1251                 list_so = so2;
1252
1253                 /* Lock ordering doesn't matter because so3 was just created */
1254                 socket_lock(so3, 1);
1255                 so2 = so3;
1256
1257                 /*
1258                  * Enable tracing for mDNSResponder endpoints.  (The use
1259                  * of sizeof instead of strlen below takes the null
1260                  * terminating character into account.)
1261                  */
1262                 if (unpst_tracemdns &&
1263                     !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1264                     sizeof (MDNSRESPONDER_PATH))) {
1265                         unp->unp_flags |= UNP_TRACE_MDNS;
1266                         unp2->unp_flags |= UNP_TRACE_MDNS;
1267                 }
1268         }
1269
1270         error = unp_connect2(so, so2);
1271
1272 decref_out:
1273         if (so2 != NULL) {
1274                 if (so != so2) {
1275                         socket_unlock(so2, 1);
1276                 } else {
1277                         /* Release the extra reference held for the listen socket.
1278                          * This is possible only for SOCK_DGRAM sockets. We refuse
1279                          * connecting to the same socket for SOCK_STREAM sockets.
1280                          */
1281                         so2->so_usecount--;
1282                 }
1283         }
1284
1285         if (list_so != NULL) {
1286                 socket_lock(list_so, 0);
1287                 socket_unlock(list_so, 1);
1288         }
1289
1290 out:
1291         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1292         vnode_put(vp);
1293         return (error);
1294 }
1295
1296 /*
1297  * Returns:     0                       Success
1298  *              EPROTOTYPE              Protocol wrong type for socket
1299  *              EINVAL                  Invalid argument
1300  */
1301 int
1302 unp_connect2(struct socket *so, struct socket *so2)
1303 {
1304         struct unpcb *unp = sotounpcb(so);
1305         struct unpcb *unp2;
1306
1307         if (so2->so_type != so->so_type)
1308                 return (EPROTOTYPE);
1309
1310         unp2 = sotounpcb(so2);
1311
1312         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1313         lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1314
1315         /* Verify both sockets are still opened */
1316         if (unp == 0 || unp2 == 0)
1317                 return (EINVAL);
1318
1319         unp->unp_conn = unp2;
1320         so2->so_usecount++;
1321
1322         switch (so->so_type) {
1323
1324         case SOCK_DGRAM:
1325                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1326
1327                 if (so != so2) {
1328                         /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1329                         /* Keep an extra reference on so2 that will be dropped
1330                          * soon after getting the locks in order
1331                          */
1332                         socket_unlock(so2, 0);
1333                         soisconnected(so);
1334                         unp_get_locks_in_order(so, so2);
1335                         so2->so_usecount--;
1336                 } else {
1337                         soisconnected(so);
1338                 }
1339
1340                 break;
1341
1342         case SOCK_STREAM:
1343                 /* This takes care of socketpair */
1344                 if (!(unp->unp_flags & UNP_HAVEPC) &&
1345                     !(unp2->unp_flags & UNP_HAVEPC)) {
1346                         cru2x(kauth_cred_get(), &unp->unp_peercred);
1347                         unp->unp_flags |= UNP_HAVEPC;
1348
1349                         cru2x(kauth_cred_get(), &unp2->unp_peercred);
1350                         unp2->unp_flags |= UNP_HAVEPC;
1351                 }
1352                 unp2->unp_conn = unp;
1353                 so->so_usecount++;
1354
1355                 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1356                 socket_unlock(so, 0);
1357                 soisconnected(so2);
1358
1359                 /* Keep an extra reference on so2, that will be dropped soon after
1360                  * getting the locks in order again.
1361                  */
1362                 socket_unlock(so2, 0);
1363
1364                 socket_lock(so, 0);
1365                 soisconnected(so);
1366
1367                 unp_get_locks_in_order(so, so2);
1368                 /* Decrement the extra reference left before */
1369                 so2->so_usecount--;
1370                 break;
1371
1372         default:
1373                 panic("unknown socket type %d in unp_connect2", so->so_type);
1374         }
1375         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1376         lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1377         return (0);
1378 }
1379
1380 static void
1381 unp_disconnect(struct unpcb *unp)
1382 {
1383         struct unpcb *unp2 = NULL;
1384         struct socket *so2 = NULL, *so;
1385         struct socket *waitso;
1386         int so_locked = 1, strdisconn = 0;
1387
1388         so = unp->unp_socket;
1389         if (unp->unp_conn == NULL) {
1390                 return;
1391         }
1392         lck_mtx_lock(unp_disconnect_lock);
1393         while (disconnect_in_progress != 0) {
1394                 if (so_locked == 1) {
1395                         socket_unlock(so, 0);
1396                         so_locked = 0;
1397                 }
1398                 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
1399                         PSOCK, "disconnect", NULL);
1400         }
1401         disconnect_in_progress = 1;
1402         lck_mtx_unlock(unp_disconnect_lock);
1403
1404         if (so_locked == 0) {
1405                 socket_lock(so, 0);
1406                 so_locked = 1;
1407         }
1408
1409         unp2 = unp->unp_conn;
1410
1411         if (unp2 == 0 || unp2->unp_socket == NULL) {
1412                 goto out;
1413         }
1414         so2 = unp2->unp_socket;
1415
1416 try_again:
1417         if (so == so2) {
1418                 if (so_locked == 0) {
1419                         socket_lock(so, 0);
1420                 }
1421                 waitso = so;
1422         } else if (so < so2) {
1423                 if (so_locked == 0) {
1424                         socket_lock(so, 0);
1425                 }
1426                 socket_lock(so2, 1);
1427                 waitso = so2;
1428         } else {
1429                 if (so_locked == 1) {
1430                         socket_unlock(so, 0);
1431                 }
1432                 socket_lock(so2, 1);
1433                 socket_lock(so, 0);
1434                 waitso = so;
1435         }
1436         so_locked = 1;
1437
1438         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1439         lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1440
1441         /* Check for the UNP_DONTDISCONNECT flag, if it
1442          * is set, release both sockets and go to sleep
1443          */
1444
1445         if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1446                 if (so != so2) {
1447                         socket_unlock(so2, 1);
1448                 }
1449                 so_locked = 0;
1450
1451                 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1452                         PSOCK | PDROP, "unpdisconnect", NULL);
1453                 goto try_again;
1454         }
1455
1456         if (unp->unp_conn == NULL) {
1457                 panic("unp_conn became NULL after sleep");
1458         }
1459
1460         unp->unp_conn = NULL;
1461         so2->so_usecount--;
1462
1463         if (unp->unp_flags & UNP_TRACE_MDNS)
1464                 unp->unp_flags &= ~UNP_TRACE_MDNS;
1465
1466         switch (unp->unp_socket->so_type) {
1467
1468         case SOCK_DGRAM:
1469                 LIST_REMOVE(unp, unp_reflink);
1470                 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1471                 if (so != so2)
1472                         socket_unlock(so2, 1);
1473                 break;
1474
1475         case SOCK_STREAM:
1476                 unp2->unp_conn = NULL;
1477                 so->so_usecount--;
1478
1479                 /* Set the socket state correctly but do a wakeup later when
1480                  * we release all locks except the socket lock, this will avoid
1481                  * a deadlock.
1482                  */
1483                 unp->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1484                 unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1485
1486                 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1487                 unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1488
1489                 if (unp2->unp_flags & UNP_TRACE_MDNS)
1490                         unp2->unp_flags &= ~UNP_TRACE_MDNS;
1491
1492                 strdisconn = 1;
1493                 break;
1494         default:
1495                 panic("unknown socket type %d", so->so_type);
1496         }
1497 out:
1498         lck_mtx_lock(unp_disconnect_lock);
1499         disconnect_in_progress = 0;
1500         wakeup(&disconnect_in_progress);
1501         lck_mtx_unlock(unp_disconnect_lock);
1502
1503         if (strdisconn) {
1504                 socket_unlock(so, 0);
1505                 soisdisconnected(so2);
1506                 socket_unlock(so2, 1);
1507
1508                 socket_lock(so,0);
1509                 soisdisconnected(so);
1510         }
1511         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1512         return;
1513 }
1514
1515 /*
1516  * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1517  * The unpcb_compat data structure is passed to user space and must not change.
1518  */
1519 static void
1520 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1521 {
1522 #if defined(__LP64__)
1523         cp->unp_link.le_next = (u_int32_t)
1524             VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1525         cp->unp_link.le_prev = (u_int32_t)
1526             VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1527 #else
1528         cp->unp_link.le_next = (struct unpcb_compat *)
1529             VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1530         cp->unp_link.le_prev = (struct unpcb_compat **)
1531             VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1532 #endif
1533         cp->unp_socket = (_UNPCB_PTR(struct socket *))
1534             VM_KERNEL_ADDRPERM(up->unp_socket);
1535         cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1536             VM_KERNEL_ADDRPERM(up->unp_vnode);
1537         cp->unp_ino = up->unp_ino;
1538         cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1539             VM_KERNEL_ADDRPERM(up->unp_conn);
1540         cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1541 #if defined(__LP64__)
1542         cp->unp_reflink.le_next =
1543             (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1544         cp->unp_reflink.le_prev =
1545             (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1546 #else
1547         cp->unp_reflink.le_next =
1548             (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1549         cp->unp_reflink.le_prev =
1550             (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1551 #endif
1552         cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1553             VM_KERNEL_ADDRPERM(up->unp_addr);
1554         cp->unp_cc = up->unp_cc;
1555         cp->unp_mbcnt = up->unp_mbcnt;
1556         cp->unp_gencnt = up->unp_gencnt;
1557 }
1558
1559 static int
1560 unp_pcblist SYSCTL_HANDLER_ARGS
1561 {
1562 #pragma unused(oidp,arg2)
1563         int error, i, n;
1564         struct unpcb *unp, **unp_list;
1565         unp_gen_t gencnt;
1566         struct xunpgen xug;
1567         struct unp_head *head;
1568
1569         lck_rw_lock_shared(unp_list_mtx);
1570         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1571
1572         /*
1573          * The process of preparing the PCB list is too time-consuming and
1574          * resource-intensive to repeat twice on every request.
1575          */
1576         if (req->oldptr == USER_ADDR_NULL) {
1577                 n = unp_count;
1578                 req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1579                     sizeof (struct xunpcb);
1580                 lck_rw_done(unp_list_mtx);
1581                 return (0);
1582         }
1583
1584         if (req->newptr != USER_ADDR_NULL) {
1585                 lck_rw_done(unp_list_mtx);
1586                 return (EPERM);
1587         }
1588
1589         /*
1590          * OK, now we're committed to doing something.
1591          */
1592         gencnt = unp_gencnt;
1593         n = unp_count;
1594
1595         bzero(&xug, sizeof (xug));
1596         xug.xug_len = sizeof (xug);
1597         xug.xug_count = n;
1598         xug.xug_gen = gencnt;
1599         xug.xug_sogen = so_gencnt;
1600         error = SYSCTL_OUT(req, &xug, sizeof (xug));
1601         if (error) {
1602                 lck_rw_done(unp_list_mtx);
1603                 return (error);
1604         }
1605
1606         /*
1607          * We are done if there is no pcb
1608          */
1609         if (n == 0)  {
1610                 lck_rw_done(unp_list_mtx);
1611                 return (0);
1612         }
1613
1614         MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1615             M_TEMP, M_WAITOK);
1616         if (unp_list == 0) {
1617                 lck_rw_done(unp_list_mtx);
1618                 return (ENOMEM);
1619         }
1620
1621         for (unp = head->lh_first, i = 0; unp && i < n;
1622             unp = unp->unp_link.le_next) {
1623                 if (unp->unp_gencnt <= gencnt)
1624                         unp_list[i++] = unp;
1625         }
1626         n = i;                  /* in case we lost some during malloc */
1627
1628         error = 0;
1629         for (i = 0; i < n; i++) {
1630                 unp = unp_list[i];
1631                 if (unp->unp_gencnt <= gencnt) {
1632                         struct xunpcb xu;
1633
1634                         bzero(&xu, sizeof (xu));
1635                         xu.xu_len = sizeof (xu);
1636                         xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1637                             VM_KERNEL_ADDRPERM(unp);
1638                         /*
1639                          * XXX - need more locking here to protect against
1640                          * connect/disconnect races for SMP.
1641                          */
1642                         if (unp->unp_addr)
1643                                 bcopy(unp->unp_addr, &xu.xu_addr,
1644                                     unp->unp_addr->sun_len);
1645                         if (unp->unp_conn && unp->unp_conn->unp_addr)
1646                                 bcopy(unp->unp_conn->unp_addr,
1647                                     &xu.xu_caddr,
1648                                     unp->unp_conn->unp_addr->sun_len);
1649                         unpcb_to_compat(unp, &xu.xu_unp);
1650                         sotoxsocket(unp->unp_socket, &xu.xu_socket);
1651                         error = SYSCTL_OUT(req, &xu, sizeof (xu));
1652                 }
1653         }
1654         if (!error) {
1655                 /*
1656                  * Give the user an updated idea of our state.
1657                  * If the generation differs from what we told
1658                  * her before, she knows that something happened
1659                  * while we were processing this request, and it
1660                  * might be necessary to retry.
1661                  */
1662                 bzero(&xug, sizeof (xug));
1663                 xug.xug_len = sizeof (xug);
1664                 xug.xug_gen = unp_gencnt;
1665                 xug.xug_sogen = so_gencnt;
1666                 xug.xug_count = unp_count;
1667                 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1668         }
1669         FREE(unp_list, M_TEMP);
1670         lck_rw_done(unp_list_mtx);
1671         return (error);
1672 }
1673
1674 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
1675             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1676             "List of active local datagram sockets");
1677 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
1678             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1679             "List of active local stream sockets");
1680
1681
1682 static int
1683 unp_pcblist64 SYSCTL_HANDLER_ARGS
1684 {
1685 #pragma unused(oidp,arg2)
1686         int error, i, n;
1687         struct unpcb *unp, **unp_list;
1688         unp_gen_t gencnt;
1689         struct xunpgen xug;
1690         struct unp_head *head;
1691
1692         lck_rw_lock_shared(unp_list_mtx);
1693         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1694
1695         /*
1696          * The process of preparing the PCB list is too time-consuming and
1697          * resource-intensive to repeat twice on every request.
1698          */
1699         if (req->oldptr == USER_ADDR_NULL) {
1700                 n = unp_count;
1701                 req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1702                     (sizeof (struct xunpcb64));
1703                 lck_rw_done(unp_list_mtx);
1704                 return (0);
1705         }
1706
1707         if (req->newptr != USER_ADDR_NULL) {
1708                 lck_rw_done(unp_list_mtx);
1709                 return (EPERM);
1710         }
1711
1712         /*
1713          * OK, now we're committed to doing something.
1714          */
1715         gencnt = unp_gencnt;
1716         n = unp_count;
1717
1718         bzero(&xug, sizeof (xug));
1719         xug.xug_len = sizeof (xug);
1720         xug.xug_count = n;
1721         xug.xug_gen = gencnt;
1722         xug.xug_sogen = so_gencnt;
1723         error = SYSCTL_OUT(req, &xug, sizeof (xug));
1724         if (error) {
1725                 lck_rw_done(unp_list_mtx);
1726                 return (error);
1727         }
1728
1729         /*
1730          * We are done if there is no pcb
1731          */
1732         if (n == 0)  {
1733                 lck_rw_done(unp_list_mtx);
1734                 return (0);
1735         }
1736
1737         MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1738             M_TEMP, M_WAITOK);
1739         if (unp_list == 0) {
1740                 lck_rw_done(unp_list_mtx);
1741                 return (ENOMEM);
1742         }
1743
1744         for (unp = head->lh_first, i = 0; unp && i < n;
1745             unp = unp->unp_link.le_next) {
1746                 if (unp->unp_gencnt <= gencnt)
1747                         unp_list[i++] = unp;
1748         }
1749         n = i;                  /* in case we lost some during malloc */
1750
1751         error = 0;
1752         for (i = 0; i < n; i++) {
1753                 unp = unp_list[i];
1754                 if (unp->unp_gencnt <= gencnt) {
1755                         struct xunpcb64 xu;
1756                         size_t          xu_len = sizeof(struct xunpcb64);
1757
1758                         bzero(&xu, xu_len);
1759                         xu.xu_len = xu_len;
1760                         xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1761                         xu.xunp_link.le_next = (u_int64_t)
1762                             VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1763                         xu.xunp_link.le_prev = (u_int64_t)
1764                             VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1765                         xu.xunp_socket = (u_int64_t)
1766                             VM_KERNEL_ADDRPERM(unp->unp_socket);
1767                         xu.xunp_vnode = (u_int64_t)
1768                             VM_KERNEL_ADDRPERM(unp->unp_vnode);
1769                         xu.xunp_ino = unp->unp_ino;
1770                         xu.xunp_conn = (u_int64_t)
1771                             VM_KERNEL_ADDRPERM(unp->unp_conn);
1772                         xu.xunp_refs = (u_int64_t)
1773                             VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1774                         xu.xunp_reflink.le_next = (u_int64_t)
1775                             VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1776                         xu.xunp_reflink.le_prev = (u_int64_t)
1777                             VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1778                         xu.xunp_cc = unp->unp_cc;
1779                         xu.xunp_mbcnt = unp->unp_mbcnt;
1780                         xu.xunp_gencnt = unp->unp_gencnt;
1781
1782                         if (unp->unp_socket)
1783                                 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1784
1785                         /*
1786                          * XXX - need more locking here to protect against
1787                          * connect/disconnect races for SMP.
1788                          */
1789                         if (unp->unp_addr)
1790                                 bcopy(unp->unp_addr, &xu.xunp_addr,
1791                                     unp->unp_addr->sun_len);
1792                         if (unp->unp_conn && unp->unp_conn->unp_addr)
1793                                 bcopy(unp->unp_conn->unp_addr,
1794                                     &xu.xunp_caddr,
1795                                     unp->unp_conn->unp_addr->sun_len);
1796
1797                         error = SYSCTL_OUT(req, &xu, xu_len);
1798                 }
1799         }
1800         if (!error) {
1801                 /*
1802                  * Give the user an updated idea of our state.
1803                  * If the generation differs from what we told
1804                  * her before, she knows that something happened
1805                  * while we were processing this request, and it
1806                  * might be necessary to retry.
1807                  */
1808                 bzero(&xug, sizeof (xug));
1809                 xug.xug_len = sizeof (xug);
1810                 xug.xug_gen = unp_gencnt;
1811                 xug.xug_sogen = so_gencnt;
1812                 xug.xug_count = unp_count;
1813                 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1814         }
1815         FREE(unp_list, M_TEMP);
1816         lck_rw_done(unp_list_mtx);
1817         return (error);
1818 }
1819
1820 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED,
1821             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1822             "List of active local datagram sockets 64 bit");
1823 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED,
1824             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1825             "List of active local stream sockets 64 bit");
1826
1827
1828 static void
1829 unp_shutdown(struct unpcb *unp)
1830 {
1831         struct socket *so = unp->unp_socket;
1832         struct socket *so2;
1833         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1834                 so2 = unp->unp_conn->unp_socket;
1835                 unp_get_locks_in_order(so, so2);
1836                 socantrcvmore(so2);
1837                 socket_unlock(so2, 1);
1838         }
1839 }
1840
1841 static void
1842 unp_drop(struct unpcb *unp, int errno)
1843 {
1844         struct socket *so = unp->unp_socket;
1845
1846         so->so_error = errno;
1847         unp_disconnect(unp);
1848 }
1849
1850 /*
1851  * Returns:     0                       Success
1852  *              EMSGSIZE                The new fd's will not fit
1853  *              ENOBUFS                 Cannot alloc struct fileproc
1854  */
1855 int
1856 unp_externalize(struct mbuf *rights)
1857 {
1858         proc_t p = current_proc();              /* XXX */
1859         int i;
1860         struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1861         struct fileglob **rp = (struct fileglob **)(cm + 1);
1862         int *fds = (int *)(cm + 1);
1863         struct fileproc *fp;
1864         struct fileglob *fg;
1865         int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1866         int f;
1867
1868         proc_fdlock(p);
1869
1870         /*
1871          * if the new FD's will not fit, then we free them all
1872          */
1873         if (!fdavail(p, newfds)) {
1874                 for (i = 0; i < newfds; i++) {
1875                         fg = *rp;
1876                         unp_discard_fdlocked(fg, p);
1877                         *rp++ = NULL;
1878                 }
1879                 proc_fdunlock(p);
1880
1881                 return (EMSGSIZE);
1882         }
1883         /*
1884          * now change each pointer to an fd in the global table to
1885          * an integer that is the index to the local fd table entry
1886          * that we set up to point to the global one we are transferring.
1887          * XXX (1) this assumes a pointer and int are the same size,
1888          * XXX     or the mbuf can hold the expansion
1889          * XXX (2) allocation failures should be non-fatal
1890          */
1891         for (i = 0; i < newfds; i++) {
1892 #if CONFIG_MACF_SOCKET
1893                 /*
1894                  * If receive access is denied, don't pass along
1895                  * and error message, just discard the descriptor.
1896                  */
1897                 if (mac_file_check_receive(kauth_cred_get(), *rp)) {
1898                         fg = *rp;
1899                         *rp++ = 0;
1900                         unp_discard_fdlocked(fg, p);
1901                         continue;
1902                 }
1903 #endif
1904                 if (fdalloc(p, 0, &f))
1905                         panic("unp_externalize:fdalloc");
1906                 fg = rp[i];
1907                 fp = fileproc_alloc_init(NULL);
1908                 if (fp == NULL)
1909                         panic("unp_externalize: MALLOC_ZONE");
1910                 fp->f_iocount = 0;
1911                 fp->f_fglob = fg;
1912                 fg_removeuipc(fg);
1913                 procfdtbl_releasefd(p, f, fp);
1914                 (void) OSAddAtomic(-1, &unp_rights);
1915                 fds[i] = f;
1916         }
1917         proc_fdunlock(p);
1918
1919         return (0);
1920 }
1921
1922 void
1923 unp_init(void)
1924 {
1925         unp_zone = zinit(sizeof (struct unpcb),
1926             (nmbclusters * sizeof (struct unpcb)), 4096, "unpzone");
1927
1928         if (unp_zone == 0)
1929                 panic("unp_init");
1930         LIST_INIT(&unp_dhead);
1931         LIST_INIT(&unp_shead);
1932
1933         /*
1934          * allocate lock group attribute and group for udp pcb mutexes
1935          */
1936         unp_mtx_grp_attr = lck_grp_attr_alloc_init();
1937
1938         unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
1939
1940         unp_mtx_attr = lck_attr_alloc_init();
1941
1942         if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp,
1943             unp_mtx_attr)) == NULL)
1944                 return; /* pretty much dead if this fails... */
1945
1946         if ((unp_disconnect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1947                 unp_mtx_attr)) == NULL)
1948                 return;
1949
1950         if ((unp_connect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1951                 unp_mtx_attr)) == NULL)
1952                 return;
1953 }
1954
1955 #ifndef MIN
1956 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
1957 #endif
1958
1959 /*
1960  * Returns:     0                       Success
1961  *              EINVAL
1962  *      fdgetf_noref:EBADF
1963  */
1964 static int
1965 unp_internalize(struct mbuf *control, proc_t p)
1966 {
1967         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1968         int *fds;
1969         struct fileglob **rp;
1970         struct fileproc *fp;
1971         int i, error;
1972         int oldfds;
1973
1974         /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
1975         if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1976             (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
1977                 return (EINVAL);
1978         }
1979         oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1980
1981         proc_fdlock(p);
1982         fds = (int *)(cm + 1);
1983
1984         for (i = 0; i < oldfds; i++) {
1985                 struct fileproc *tmpfp;
1986                 if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) {
1987                         proc_fdunlock(p);
1988                         return (error);
1989                 } else if (!filetype_issendable(FILEGLOB_DTYPE(tmpfp->f_fglob))) {
1990                         proc_fdunlock(p);
1991                         return (EINVAL);
1992                 } else if (FP_ISGUARDED(tmpfp, GUARD_SOCKET_IPC)) {
1993                         error = fp_guard_exception(p,
1994                                 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
1995                         proc_fdunlock(p);
1996                         return (error);
1997                 }
1998         }
1999         rp = (struct fileglob **)(cm + 1);
2000
2001         /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2002          * and doing them in-order would result in stomping over unprocessed fd's
2003          */
2004         for (i = (oldfds - 1); i >= 0; i--) {
2005                 (void) fdgetf_noref(p, fds[i], &fp);
2006                 fg_insertuipc(fp->f_fglob);
2007                 rp[i] = fp->f_fglob;
2008                 (void) OSAddAtomic(1, &unp_rights);
2009         }
2010         proc_fdunlock(p);
2011
2012         return (0);
2013 }
2014
2015 static int      unp_defer, unp_gcing, unp_gcwait;
2016 static thread_t unp_gcthread = NULL;
2017
2018 /* always called under uipc_lock */
2019 void
2020 unp_gc_wait(void)
2021 {
2022         if (unp_gcthread == current_thread())
2023                 return;
2024
2025         while (unp_gcing != 0) {
2026                 unp_gcwait = 1;
2027                 msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL);
2028         }
2029 }
2030
2031
2032 __private_extern__ void
2033 unp_gc(void)
2034 {
2035         struct fileglob *fg, *nextfg;
2036         struct socket *so;
2037         static struct fileglob **extra_ref;
2038         struct fileglob **fpp;
2039         int nunref, i;
2040         int need_gcwakeup = 0;
2041
2042         lck_mtx_lock(uipc_lock);
2043         if (unp_gcing) {
2044                 lck_mtx_unlock(uipc_lock);
2045                 return;
2046         }
2047         unp_gcing = 1;
2048         unp_defer = 0;
2049         unp_gcthread = current_thread();
2050         lck_mtx_unlock(uipc_lock);
2051         /*
2052          * before going through all this, set all FDs to
2053          * be NOT defered and NOT externally accessible
2054          */
2055         for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
2056                 lck_mtx_lock(&fg->fg_lock);
2057                 fg->fg_flag &= ~(FMARK|FDEFER);
2058                 lck_mtx_unlock(&fg->fg_lock);
2059         }
2060         do {
2061                 for (fg = fmsghead.lh_first; fg != 0;
2062                     fg = fg->f_msglist.le_next) {
2063                         lck_mtx_lock(&fg->fg_lock);
2064                         /*
2065                          * If the file is not open, skip it
2066                          */
2067                         if (fg->fg_count == 0) {
2068                                 lck_mtx_unlock(&fg->fg_lock);
2069                                 continue;
2070                         }
2071                         /*
2072                          * If we already marked it as 'defer'  in a
2073                          * previous pass, then try process it this time
2074                          * and un-mark it
2075                          */
2076                         if (fg->fg_flag & FDEFER) {
2077                                 fg->fg_flag &= ~FDEFER;
2078                                 unp_defer--;
2079                         } else {
2080                                 /*
2081                                  * if it's not defered, then check if it's
2082                                  * already marked.. if so skip it
2083                                  */
2084                                 if (fg->fg_flag & FMARK) {
2085                                         lck_mtx_unlock(&fg->fg_lock);
2086                                         continue;
2087                                 }
2088                                 /*
2089                                  * If all references are from messages
2090                                  * in transit, then skip it. it's not
2091                                  * externally accessible.
2092                                  */
2093                                 if (fg->fg_count == fg->fg_msgcount) {
2094                                         lck_mtx_unlock(&fg->fg_lock);
2095                                         continue;
2096                                 }
2097                                 /*
2098                                  * If it got this far then it must be
2099                                  * externally accessible.
2100                                  */
2101                                 fg->fg_flag |= FMARK;
2102                         }
2103                         /*
2104                          * either it was defered, or it is externally
2105                          * accessible and not already marked so.
2106                          * Now check if it is possibly one of OUR sockets.
2107                          */
2108                         if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2109                             (so = (struct socket *)fg->fg_data) == 0) {
2110                                 lck_mtx_unlock(&fg->fg_lock);
2111                                 continue;
2112                         }
2113                         if (so->so_proto->pr_domain != localdomain ||
2114                             (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
2115                                 lck_mtx_unlock(&fg->fg_lock);
2116                                 continue;
2117                         }
2118 #ifdef notdef
2119                         /*
2120                          * if this code is enabled need to run
2121                          * under network funnel
2122                          */
2123                         if (so->so_rcv.sb_flags & SB_LOCK) {
2124                                 /*
2125                                  * This is problematical; it's not clear
2126                                  * we need to wait for the sockbuf to be
2127                                  * unlocked (on a uniprocessor, at least),
2128                                  * and it's also not clear what to do
2129                                  * if sbwait returns an error due to receipt
2130                                  * of a signal.  If sbwait does return
2131                                  * an error, we'll go into an infinite
2132                                  * loop.  Delete all of this for now.
2133                                  */
2134                                 (void) sbwait(&so->so_rcv);
2135                                 goto restart;
2136                         }
2137 #endif
2138                         /*
2139                          * So, Ok, it's one of our sockets and it IS externally
2140                          * accessible (or was defered). Now we look
2141                          * to see if we hold any file descriptors in its
2142                          * message buffers. Follow those links and mark them
2143                          * as accessible too.
2144                          *
2145                          * In case a file is passed onto itself we need to
2146                          * release the file lock.
2147                          */
2148                         lck_mtx_unlock(&fg->fg_lock);
2149
2150                         unp_scan(so->so_rcv.sb_mb, unp_mark);
2151                 }
2152         } while (unp_defer);
2153         /*
2154          * We grab an extra reference to each of the file table entries
2155          * that are not otherwise accessible and then free the rights
2156          * that are stored in messages on them.
2157          *
2158          * The bug in the orginal code is a little tricky, so I'll describe
2159          * what's wrong with it here.
2160          *
2161          * It is incorrect to simply unp_discard each entry for f_msgcount
2162          * times -- consider the case of sockets A and B that contain
2163          * references to each other.  On a last close of some other socket,
2164          * we trigger a gc since the number of outstanding rights (unp_rights)
2165          * is non-zero.  If during the sweep phase the gc code un_discards,
2166          * we end up doing a (full) closef on the descriptor.  A closef on A
2167          * results in the following chain.  Closef calls soo_close, which
2168          * calls soclose.   Soclose calls first (through the switch
2169          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
2170          * returns because the previous instance had set unp_gcing, and
2171          * we return all the way back to soclose, which marks the socket
2172          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
2173          * to free up the rights that are queued in messages on the socket A,
2174          * i.e., the reference on B.  The sorflush calls via the dom_dispose
2175          * switch unp_dispose, which unp_scans with unp_discard.  This second
2176          * instance of unp_discard just calls closef on B.
2177          *
2178          * Well, a similar chain occurs on B, resulting in a sorflush on B,
2179          * which results in another closef on A.  Unfortunately, A is already
2180          * being closed, and the descriptor has already been marked with
2181          * SS_NOFDREF, and soclose panics at this point.
2182          *
2183          * Here, we first take an extra reference to each inaccessible
2184          * descriptor.  Then, we call sorflush ourself, since we know
2185          * it is a Unix domain socket anyhow.  After we destroy all the
2186          * rights carried in messages, we do a last closef to get rid
2187          * of our extra reference.  This is the last close, and the
2188          * unp_detach etc will shut down the socket.
2189          *
2190          * 91/09/19, bsy@cs.cmu.edu
2191          */
2192         extra_ref = _MALLOC(nfiles * sizeof (struct fileglob *),
2193             M_FILEGLOB, M_WAITOK);
2194         if (extra_ref == NULL)
2195                 goto bail;
2196         for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0;
2197             fg = nextfg) {
2198                 lck_mtx_lock(&fg->fg_lock);
2199
2200                 nextfg = fg->f_msglist.le_next;
2201                 /*
2202                  * If it's not open, skip it
2203                  */
2204                 if (fg->fg_count == 0) {
2205                         lck_mtx_unlock(&fg->fg_lock);
2206                         continue;
2207                 }
2208                 /*
2209                  * If all refs are from msgs, and it's not marked accessible
2210                  * then it must be referenced from some unreachable cycle
2211                  * of (shut-down) FDs, so include it in our
2212                  * list of FDs to remove
2213                  */
2214                 if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) {
2215                         fg->fg_count++;
2216                         *fpp++ = fg;
2217                         nunref++;
2218                 }
2219                 lck_mtx_unlock(&fg->fg_lock);
2220         }
2221         /*
2222          * for each FD on our hit list, do the following two things
2223          */
2224         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2225                 struct fileglob *tfg;
2226
2227                 tfg = *fpp;
2228
2229                 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET &&
2230                     tfg->fg_data != NULL) {
2231                         so = (struct socket *)(tfg->fg_data);
2232
2233                         socket_lock(so, 0);
2234
2235                         sorflush(so);
2236
2237                         socket_unlock(so, 0);
2238                 }
2239         }
2240         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
2241                 closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL);
2242
2243         FREE((caddr_t)extra_ref, M_FILEGLOB);
2244 bail:
2245         lck_mtx_lock(uipc_lock);
2246         unp_gcing = 0;
2247         unp_gcthread = NULL;
2248
2249         if (unp_gcwait != 0) {
2250                 unp_gcwait = 0;
2251                 need_gcwakeup = 1;
2252         }
2253         lck_mtx_unlock(uipc_lock);
2254
2255         if (need_gcwakeup != 0)
2256                 wakeup(&unp_gcing);
2257 }
2258
2259 void
2260 unp_dispose(struct mbuf *m)
2261 {
2262         if (m) {
2263                 unp_scan(m, unp_discard);
2264         }
2265 }
2266
2267 /*
2268  * Returns:     0                       Success
2269  */
2270 static int
2271 unp_listen(struct unpcb *unp, proc_t p)
2272 {
2273         kauth_cred_t safecred = kauth_cred_proc_ref(p);
2274         cru2x(safecred, &unp->unp_peercred);
2275         kauth_cred_unref(&safecred);
2276         unp->unp_flags |= UNP_HAVEPCCACHED;
2277         return (0);
2278 }
2279
2280 /* should run under kernel funnel */
2281 static void
2282 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *))
2283 {
2284         struct mbuf *m;
2285         struct fileglob **rp;
2286         struct cmsghdr *cm;
2287         int i;
2288         int qfds;
2289
2290         while (m0) {
2291                 for (m = m0; m; m = m->m_next)
2292                         if (m->m_type == MT_CONTROL &&
2293                             (size_t)m->m_len >= sizeof (*cm)) {
2294                                 cm = mtod(m, struct cmsghdr *);
2295                                 if (cm->cmsg_level != SOL_SOCKET ||
2296                                     cm->cmsg_type != SCM_RIGHTS)
2297                                         continue;
2298                                 qfds = (cm->cmsg_len - sizeof (*cm)) /
2299                                     sizeof (int);
2300                                 rp = (struct fileglob **)(cm + 1);
2301                                 for (i = 0; i < qfds; i++)
2302                                         (*op)(*rp++);
2303                                 break;          /* XXX, but saves time */
2304                         }
2305                 m0 = m0->m_act;
2306         }
2307 }
2308
2309 /* should run under kernel funnel */
2310 static void
2311 unp_mark(struct fileglob *fg)
2312 {
2313         lck_mtx_lock(&fg->fg_lock);
2314
2315         if (fg->fg_flag & FMARK) {
2316                 lck_mtx_unlock(&fg->fg_lock);
2317                 return;
2318         }
2319         fg->fg_flag |= (FMARK|FDEFER);
2320
2321         lck_mtx_unlock(&fg->fg_lock);
2322
2323         unp_defer++;
2324 }
2325
2326 /* should run under kernel funnel */
2327 static void
2328 unp_discard(struct fileglob *fg)
2329 {
2330         proc_t p = current_proc();              /* XXX */
2331
2332         (void) OSAddAtomic(1, &unp_disposed);
2333
2334         proc_fdlock(p);
2335         unp_discard_fdlocked(fg, p);
2336         proc_fdunlock(p);
2337 }
2338 static void
2339 unp_discard_fdlocked(struct fileglob *fg, proc_t p)
2340 {
2341         fg_removeuipc(fg);
2342
2343         (void) OSAddAtomic(-1, &unp_rights);
2344         (void) closef_locked((struct fileproc *)0, fg, p);
2345 }
2346
2347 int
2348 unp_lock(struct socket *so, int refcount, void * lr)
2349  {
2350         void * lr_saved;
2351         if (lr == 0)
2352                 lr_saved = (void *)  __builtin_return_address(0);
2353         else lr_saved = lr;
2354
2355         if (so->so_pcb) {
2356                 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2357         } else  {
2358                 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2359                         so, lr_saved, so->so_usecount);
2360         }
2361
2362         if (so->so_usecount < 0)
2363                 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2364                 so, so->so_pcb, lr_saved, so->so_usecount);
2365
2366         if (refcount)
2367                 so->so_usecount++;
2368
2369         so->lock_lr[so->next_lock_lr] = lr_saved;
2370         so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
2371         return (0);
2372 }
2373
2374 int
2375 unp_unlock(struct socket *so, int refcount, void * lr)
2376 {
2377         void * lr_saved;
2378         lck_mtx_t * mutex_held = NULL;
2379         struct unpcb *unp = sotounpcb(so);
2380
2381         if (lr == 0)
2382                 lr_saved = (void *) __builtin_return_address(0);
2383         else lr_saved = lr;
2384
2385         if (refcount)
2386                 so->so_usecount--;
2387
2388         if (so->so_usecount < 0)
2389                 panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2390         if (so->so_pcb == NULL) {
2391                 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2392         } else {
2393                 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2394         }
2395         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2396         so->unlock_lr[so->next_unlock_lr] = lr_saved;
2397         so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
2398
2399         if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2400                 sofreelastref(so, 1);
2401
2402                 if (unp->unp_addr)
2403                         FREE(unp->unp_addr, M_SONAME);
2404
2405                 lck_mtx_unlock(mutex_held);
2406
2407                 lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
2408                 zfree(unp_zone, unp);
2409
2410                 unp_gc();
2411         } else {
2412                 lck_mtx_unlock(mutex_held);
2413         }
2414
2415         return (0);
2416 }
2417
2418 lck_mtx_t *
2419 unp_getlock(struct socket *so, __unused int locktype)
2420 {
2421         struct unpcb *unp = (struct unpcb *)so->so_pcb;
2422
2423
2424         if (so->so_pcb)  {
2425                 if (so->so_usecount < 0)
2426                         panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2427                 return(&unp->unp_mtx);
2428         } else {
2429                 panic("unp_getlock: so=%p NULL so_pcb\n", so);
2430                 return (so->so_proto->pr_domain->dom_mtx);
2431         }
2432 }
2433