bsd/kern/uipc_usrreq.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
  61  */
  62 /*
  63  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  64  * support for mandatory and extensible security protections.  This notice
  65  * is included in support of clause 2.2 (b) of the Apple Public License,
  66  * Version 2.0.
  67  */
  68
  69 #include <sys/param.h>
  70 #include <sys/systm.h>
  71 #include <sys/kernel.h>
  72 #include <sys/domain.h>
  73 #include <sys/fcntl.h>
  74 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
  75 #include <sys/file_internal.h>
  76 #include <sys/filedesc.h>
  77 #include <sys/lock.h>
  78 #include <sys/mbuf.h>
  79 #include <sys/namei.h>
  80 #include <sys/proc_internal.h>
  81 #include <sys/kauth.h>
  82 #include <sys/protosw.h>
  83 #include <sys/socket.h>
  84 #include <sys/socketvar.h>
  85 #include <sys/stat.h>
  86 #include <sys/sysctl.h>
  87 #include <sys/un.h>
  88 #include <sys/unpcb.h>
  89 #include <sys/vnode_internal.h>
  90 #include <sys/kdebug.h>
  91
  92 #include <kern/zalloc.h>
  93 #include <kern/locks.h>
  94
  95 #if CONFIG_MACF
  96 #include <security/mac_framework.h>
  97 #endif /* CONFIG_MACF */
  98
  99 #include <mach/vm_param.h>
 100
 101 #define f_msgcount f_fglob->fg_msgcount
 102 #define f_cred f_fglob->fg_cred
 103 #define f_ops f_fglob->fg_ops
 104 #define f_offset f_fglob->fg_offset
 105 #define f_data f_fglob->fg_data
 106 struct  zone *unp_zone;
 107 static  unp_gen_t unp_gencnt;
 108 static  u_int unp_count;
 109
 110 static  lck_attr_t              *unp_mtx_attr;
 111 static  lck_grp_t               *unp_mtx_grp;
 112 static  lck_grp_attr_t          *unp_mtx_grp_attr;
 113 static  lck_rw_t                *unp_list_mtx;
 114
 115 static  lck_mtx_t               *unp_disconnect_lock;
 116 static  lck_mtx_t               *unp_connect_lock;
 117 static  u_int                   disconnect_in_progress;
 118
 119 extern lck_mtx_t *uipc_lock;
 120 static  struct unp_head unp_shead, unp_dhead;
 121
 122 /*
 123  * mDNSResponder tracing.  When enabled, endpoints connected to
 124  * /var/run/mDNSResponder will be traced; during each send on
 125  * the traced socket, we log the PID and process name of the
 126  * sending process.  We also print out a bit of info related
 127  * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
 128  * of mDNSResponder stays the same.
 129  */
 130 #define MDNSRESPONDER_PATH      "/var/run/mDNSResponder"
 131
 132 static int unpst_tracemdns;     /* enable tracing */
 133
 134 #define MDNS_IPC_MSG_HDR_VERSION_1      1
 135
 136 struct mdns_ipc_msg_hdr {
 137         uint32_t version;
 138         uint32_t datalen;
 139         uint32_t ipc_flags;
 140         uint32_t op;
 141         union {
 142                 void *context;
 143                 uint32_t u32[2];
 144         } __attribute__((packed));
 145         uint32_t reg_index;
 146 } __attribute__((packed));
 147
 148 /*
 149  * Unix communications domain.
 150  *
 151  * TODO:
 152  *      SEQPACKET, RDM
 153  *      rethink name space problems
 154  *      need a proper out-of-band
 155  *      lock pushdown
 156  */
 157 static struct   sockaddr sun_noname = { sizeof (sun_noname), AF_LOCAL, { 0 } };
 158 static ino_t    unp_ino;                /* prototype for fake inode numbers */
 159
 160 static int      unp_attach(struct socket *);
 161 static void     unp_detach(struct unpcb *);
 162 static int      unp_bind(struct unpcb *, struct sockaddr *, proc_t);
 163 static int      unp_connect(struct socket *, struct sockaddr *, proc_t);
 164 static void     unp_disconnect(struct unpcb *);
 165 static void     unp_shutdown(struct unpcb *);
 166 static void     unp_drop(struct unpcb *, int);
 167 __private_extern__ void unp_gc(void);
 168 static void     unp_scan(struct mbuf *, void (*)(struct fileglob *));
 169 static void     unp_mark(struct fileglob *);
 170 static void     unp_discard(struct fileglob *);
 171 static void     unp_discard_fdlocked(struct fileglob *, proc_t);
 172 static int      unp_internalize(struct mbuf *, proc_t);
 173 static int      unp_listen(struct unpcb *, proc_t);
 174 static void     unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
 175 static void     unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
 176
 177 static void
 178 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
 179 {
 180         if (so < conn_so) {
 181                 socket_lock(conn_so, 1);
 182         } else {
 183                 struct unpcb *unp = sotounpcb(so);
 184                 unp->unp_flags |= UNP_DONTDISCONNECT;
 185                 unp->rw_thrcount++;
 186                 socket_unlock(so, 0);
 187
 188                 /* Get the locks in the correct order */
 189                 socket_lock(conn_so, 1);
 190                 socket_lock(so, 0);
 191                 unp->rw_thrcount--;
 192                 if (unp->rw_thrcount == 0) {
 193                         unp->unp_flags &= ~UNP_DONTDISCONNECT;
 194                         wakeup(unp);
 195                 }
 196         }
 197 }
 198
 199 static int
 200 uipc_abort(struct socket *so)
 201 {
 202         struct unpcb *unp = sotounpcb(so);
 203
 204         if (unp == 0)
 205                 return (EINVAL);
 206         unp_drop(unp, ECONNABORTED);
 207         unp_detach(unp);
 208         sofree(so);
 209         return (0);
 210 }
 211
 212 static int
 213 uipc_accept(struct socket *so, struct sockaddr **nam)
 214 {
 215         struct unpcb *unp = sotounpcb(so);
 216
 217         if (unp == 0)
 218                 return (EINVAL);
 219
 220         /*
 221          * Pass back name of connected socket,
 222          * if it was bound and we are still connected
 223          * (our peer may have closed already!).
 224          */
 225         if (unp->unp_conn && unp->unp_conn->unp_addr) {
 226                 *nam = dup_sockaddr((struct sockaddr *)
 227                     unp->unp_conn->unp_addr, 1);
 228         } else {
 229                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 230         }
 231         return (0);
 232 }
 233
 234 /*
 235  * Returns:     0                       Success
 236  *              EISCONN
 237  *      unp_attach:
 238  */
 239 static int
 240 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
 241 {
 242         struct unpcb *unp = sotounpcb(so);
 243
 244         if (unp != 0)
 245                 return (EISCONN);
 246         return (unp_attach(so));
 247 }
 248
 249 static int
 250 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
 251 {
 252         struct unpcb *unp = sotounpcb(so);
 253
 254         if (unp == 0)
 255                 return (EINVAL);
 256
 257         return (unp_bind(unp, nam, p));
 258 }
 259
 260 /*
 261  * Returns:     0                       Success
 262  *              EINVAL
 263  *      unp_connect:???                 [See elsewhere in this file]
 264  */
 265 static int
 266 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
 267 {
 268         struct unpcb *unp = sotounpcb(so);
 269
 270         if (unp == 0)
 271                 return (EINVAL);
 272         return (unp_connect(so, nam, p));
 273 }
 274
 275 /*
 276  * Returns:     0                       Success
 277  *              EINVAL
 278  *      unp_connect2:EPROTOTYPE         Protocol wrong type for socket
 279  *      unp_connect2:EINVAL             Invalid argument
 280  */
 281 static int
 282 uipc_connect2(struct socket *so1, struct socket *so2)
 283 {
 284         struct unpcb *unp = sotounpcb(so1);
 285
 286         if (unp == 0)
 287                 return (EINVAL);
 288
 289         return (unp_connect2(so1, so2));
 290 }
 291
 292 /* control is EOPNOTSUPP */
 293
 294 static int
 295 uipc_detach(struct socket *so)
 296 {
 297         struct unpcb *unp = sotounpcb(so);
 298
 299         if (unp == 0)
 300                 return (EINVAL);
 301
 302         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
 303         unp_detach(unp);
 304         return (0);
 305 }
 306
 307 static int
 308 uipc_disconnect(struct socket *so)
 309 {
 310         struct unpcb *unp = sotounpcb(so);
 311
 312         if (unp == 0)
 313                 return (EINVAL);
 314         unp_disconnect(unp);
 315         return (0);
 316 }
 317
 318 /*
 319  * Returns:     0                       Success
 320  *              EINVAL
 321  */
 322 static int
 323 uipc_listen(struct socket *so, __unused proc_t p)
 324 {
 325         struct unpcb *unp = sotounpcb(so);
 326
 327         if (unp == 0 || unp->unp_vnode == 0)
 328                 return (EINVAL);
 329         return (unp_listen(unp, p));
 330 }
 331
 332 static int
 333 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 334 {
 335         struct unpcb *unp = sotounpcb(so);
 336
 337         if (unp == NULL)
 338                 return (EINVAL);
 339         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
 340                 *nam = dup_sockaddr((struct sockaddr *)
 341                     unp->unp_conn->unp_addr, 1);
 342         } else {
 343                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 344         }
 345         return (0);
 346 }
 347
 348 static int
 349 uipc_rcvd(struct socket *so, __unused int flags)
 350 {
 351         struct unpcb *unp = sotounpcb(so);
 352         struct socket *so2;
 353
 354         if (unp == 0)
 355                 return (EINVAL);
 356         switch (so->so_type) {
 357         case SOCK_DGRAM:
 358                 panic("uipc_rcvd DGRAM?");
 359                 /*NOTREACHED*/
 360
 361         case SOCK_STREAM:
 362 #define rcv (&so->so_rcv)
 363 #define snd (&so2->so_snd)
 364                 if (unp->unp_conn == 0)
 365                         break;
 366
 367                 so2 = unp->unp_conn->unp_socket;
 368                 unp_get_locks_in_order(so, so2);
 369                 /*
 370                  * Adjust backpressure on sender
 371                  * and wakeup any waiting to write.
 372                  */
 373                 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
 374                 unp->unp_mbcnt = rcv->sb_mbcnt;
 375                 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
 376                 unp->unp_cc = rcv->sb_cc;
 377                 sowwakeup(so2);
 378
 379                 socket_unlock(so2, 1);
 380
 381 #undef snd
 382 #undef rcv
 383                 break;
 384
 385         default:
 386                 panic("uipc_rcvd unknown socktype");
 387         }
 388         return (0);
 389 }
 390
 391 /* pru_rcvoob is EOPNOTSUPP */
 392
 393 /*
 394  * Returns:     0                       Success
 395  *              EINVAL
 396  *              EOPNOTSUPP
 397  *              EPIPE
 398  *              ENOTCONN
 399  *              EISCONN
 400  *      unp_internalize:EINVAL
 401  *      unp_internalize:EBADF
 402  *      unp_connect:EAFNOSUPPORT        Address family not supported
 403  *      unp_connect:EINVAL              Invalid argument
 404  *      unp_connect:ENOTSOCK            Not a socket
 405  *      unp_connect:ECONNREFUSED        Connection refused
 406  *      unp_connect:EISCONN             Socket is connected
 407  *      unp_connect:EPROTOTYPE          Protocol wrong type for socket
 408  *      unp_connect:???
 409  *      sbappendaddr:ENOBUFS            [5th argument, contents modified]
 410  *      sbappendaddr:???                [whatever a filter author chooses]
 411  */
 412 static int
 413 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 414     struct mbuf *control, proc_t p)
 415 {
 416         int error = 0;
 417         struct unpcb *unp = sotounpcb(so);
 418         struct socket *so2;
 419
 420         if (unp == 0) {
 421                 error = EINVAL;
 422                 goto release;
 423         }
 424         if (flags & PRUS_OOB) {
 425                 error = EOPNOTSUPP;
 426                 goto release;
 427         }
 428
 429         if (control) {
 430                 /* release lock to avoid deadlock (4436174) */
 431                 socket_unlock(so, 0);
 432                 error = unp_internalize(control, p);
 433                 socket_lock(so, 0);
 434                 if (error)
 435                         goto release;
 436         }
 437
 438         switch (so->so_type) {
 439         case SOCK_DGRAM:
 440         {
 441                 struct sockaddr *from;
 442
 443                 if (nam) {
 444                         if (unp->unp_conn) {
 445                                 error = EISCONN;
 446                                 break;
 447                         }
 448                         error = unp_connect(so, nam, p);
 449                         if (error)
 450                                 break;
 451                 } else {
 452                         if (unp->unp_conn == 0) {
 453                                 error = ENOTCONN;
 454                                 break;
 455                         }
 456                 }
 457
 458                 so2 = unp->unp_conn->unp_socket;
 459                 if (so != so2)
 460                         unp_get_locks_in_order(so, so2);
 461
 462                 if (unp->unp_addr)
 463                         from = (struct sockaddr *)unp->unp_addr;
 464                 else
 465                         from = &sun_noname;
 466                 /*
 467                  * sbappendaddr() will fail when the receiver runs out of
 468                  * space; in contrast to SOCK_STREAM, we will lose messages
 469                  * for the SOCK_DGRAM case when the receiver's queue overflows.
 470                  * SB_UNIX on the socket buffer implies that the callee will
 471                  * not free the control message, if any, because we would need
 472                  * to call unp_dispose() on it.
 473                  */
 474                 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
 475                         control = NULL;
 476                         sorwakeup(so2);
 477                 } else if (control != NULL && error == 0) {
 478                         /* A socket filter took control; don't touch it */
 479                         control = NULL;
 480                 }
 481
 482                 if (so != so2)
 483                         socket_unlock(so2, 1);
 484
 485                 m = NULL;
 486                 if (nam)
 487                         unp_disconnect(unp);
 488                 break;
 489         }
 490
 491         case SOCK_STREAM: {
 492                 int didreceive = 0;
 493 #define rcv (&so2->so_rcv)
 494 #define snd (&so->so_snd)
 495                 /* Connect if not connected yet. */
 496                 /*
 497                  * Note: A better implementation would complain
 498                  * if not equal to the peer's address.
 499                  */
 500                 if ((so->so_state & SS_ISCONNECTED) == 0) {
 501                         if (nam) {
 502                                 error = unp_connect(so, nam, p);
 503                                 if (error)
 504                                         break;  /* XXX */
 505                         } else {
 506                                 error = ENOTCONN;
 507                                 break;
 508                         }
 509                 }
 510
 511                 if (so->so_state & SS_CANTSENDMORE) {
 512                         error = EPIPE;
 513                         break;
 514                 }
 515                 if (unp->unp_conn == 0)
 516                         panic("uipc_send connected but no connection?");
 517
 518                 so2 = unp->unp_conn->unp_socket;
 519                 unp_get_locks_in_order(so, so2);
 520
 521                 /* Check socket state again as we might have unlocked the socket
 522                  * while trying to get the locks in order
 523                  */
 524
 525                 if ((so->so_state & SS_CANTSENDMORE)) {
 526                         error = EPIPE;
 527                         socket_unlock(so2, 1);
 528                         break;
 529                 }
 530
 531                 if (unp->unp_flags & UNP_TRACE_MDNS) {
 532                         struct mdns_ipc_msg_hdr hdr;
 533
 534                         if (mbuf_copydata(m, 0, sizeof (hdr), &hdr) == 0 &&
 535                             hdr.version  == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
 536                                 printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
 537                                     __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
 538                         }
 539                 }
 540
 541                 /*
 542                  * Send to paired receive port, and then reduce send buffer
 543                  * hiwater marks to maintain backpressure.  Wake up readers.
 544                  * SB_UNIX flag will allow new record to be appended to the
 545                  * receiver's queue even when it is already full.  It is
 546                  * possible, however, that append might fail.  In that case,
 547                  * we will need to call unp_dispose() on the control message;
 548                  * the callee will not free it since SB_UNIX is set.
 549                  */
 550                 didreceive = control ?
 551                     sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
 552
 553                 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
 554                 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
 555                 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
 556                 unp->unp_conn->unp_cc = rcv->sb_cc;
 557                 if (didreceive) {
 558                         control = NULL;
 559                         sorwakeup(so2);
 560                 } else if (control != NULL && error == 0) {
 561                         /* A socket filter took control; don't touch it */
 562                         control = NULL;
 563                 }
 564
 565                 socket_unlock(so2, 1);
 566                 m = NULL;
 567 #undef snd
 568 #undef rcv
 569                 }
 570                 break;
 571
 572         default:
 573                 panic("uipc_send unknown socktype");
 574         }
 575
 576         /*
 577          * SEND_EOF is equivalent to a SEND followed by
 578          * a SHUTDOWN.
 579          */
 580         if (flags & PRUS_EOF) {
 581                 socantsendmore(so);
 582                 unp_shutdown(unp);
 583         }
 584
 585         if (control && error != 0) {
 586                 socket_unlock(so, 0);
 587                 unp_dispose(control);
 588                 socket_lock(so, 0);
 589         }
 590
 591 release:
 592         if (control)
 593                 m_freem(control);
 594         if (m)
 595                 m_freem(m);
 596         return (error);
 597 }
 598
 599 static int
 600 uipc_sense(struct socket *so, void *ub, int isstat64)
 601 {
 602         struct unpcb *unp = sotounpcb(so);
 603         struct socket *so2;
 604         blksize_t blksize;
 605
 606         if (unp == 0)
 607                 return (EINVAL);
 608
 609         blksize = so->so_snd.sb_hiwat;
 610         if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
 611                 so2 = unp->unp_conn->unp_socket;
 612                 blksize += so2->so_rcv.sb_cc;
 613         }
 614         if (unp->unp_ino == 0)
 615                 unp->unp_ino = unp_ino++;
 616
 617         if (isstat64 != 0) {
 618                 struct stat64  *sb64;
 619
 620                 sb64 = (struct stat64 *)ub;
 621                 sb64->st_blksize = blksize;
 622                 sb64->st_dev = NODEV;
 623                 sb64->st_ino = (ino64_t)unp->unp_ino;
 624         } else {
 625                 struct stat *sb;
 626
 627                 sb = (struct stat *)ub;
 628                 sb->st_blksize = blksize;
 629                 sb->st_dev = NODEV;
 630                 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
 631         }
 632
 633         return (0);
 634 }
 635
 636 /*
 637  * Returns:     0               Success
 638  *              EINVAL
 639  *
 640  * Notes:       This is not strictly correct, as unp_shutdown() also calls
 641  *              socantrcvmore().  These should maybe both be conditionalized
 642  *              on the 'how' argument in soshutdown() as called from the
 643  *              shutdown() system call.
 644  */
 645 static int
 646 uipc_shutdown(struct socket *so)
 647 {
 648         struct unpcb *unp = sotounpcb(so);
 649
 650         if (unp == 0)
 651                 return (EINVAL);
 652         socantsendmore(so);
 653         unp_shutdown(unp);
 654         return (0);
 655 }
 656
 657 /*
 658  * Returns:     0                       Success
 659  *              EINVAL                  Invalid argument
 660  */
 661 static int
 662 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 663 {
 664         struct unpcb *unp = sotounpcb(so);
 665
 666         if (unp == NULL)
 667                 return (EINVAL);
 668         if (unp->unp_addr != NULL) {
 669                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
 670         } else {
 671                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 672         }
 673         return (0);
 674 }
 675
 676 struct pr_usrreqs uipc_usrreqs = {
 677         uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
 678         uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
 679         uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
 680         uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
 681         sosend, soreceive, pru_sopoll_notsupp
 682 };
 683
 684 int
 685 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 686 {
 687         struct unpcb *unp = sotounpcb(so);
 688         int error;
 689
 690         switch (sopt->sopt_dir) {
 691         case SOPT_GET:
 692                 switch (sopt->sopt_name) {
 693                 case LOCAL_PEERCRED:
 694                         if (unp->unp_flags & UNP_HAVEPC) {
 695                                 error = sooptcopyout(sopt, &unp->unp_peercred,
 696                                     sizeof (unp->unp_peercred));
 697                         } else {
 698                                 if (so->so_type == SOCK_STREAM)
 699                                         error = ENOTCONN;
 700                                 else
 701                                         error = EINVAL;
 702                         }
 703                         break;
 704                 case LOCAL_PEERPID:
 705                         if (unp->unp_conn != NULL) {
 706                                 if (unp->unp_conn->unp_socket != NULL) {
 707                                         pid_t peerpid = unp->unp_conn->unp_socket->last_pid;
 708                                         error = sooptcopyout(sopt, &peerpid, sizeof (peerpid));
 709                                 } else {
 710                                         panic("peer is connected but has no socket?");
 711                                 }
 712                         } else {
 713                                 error = ENOTCONN;
 714                         }
 715                         break;
 716                 default:
 717                         error = EOPNOTSUPP;
 718                         break;
 719                 }
 720                 break;
 721         case SOPT_SET:
 722         default:
 723                 error = EOPNOTSUPP;
 724                 break;
 725         }
 726         return (error);
 727 }
 728
 729 /*
 730  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
 731  * for stream sockets, although the total for sender and receiver is
 732  * actually only PIPSIZ.
 733  * Datagram sockets really use the sendspace as the maximum datagram size,
 734  * and don't really want to reserve the sendspace.  Their recvspace should
 735  * be large enough for at least one max-size datagram plus address.
 736  */
 737 #ifndef PIPSIZ
 738 #define PIPSIZ  8192
 739 #endif
 740 static u_int32_t        unpst_sendspace = PIPSIZ;
 741 static u_int32_t        unpst_recvspace = PIPSIZ;
 742 static u_int32_t        unpdg_sendspace = 2*1024;       /* really max datagram size */
 743 static u_int32_t        unpdg_recvspace = 4*1024;
 744
 745 static int      unp_rights;                     /* file descriptors in flight */
 746 static int      unp_disposed;                   /* discarded file descriptors */
 747
 748 SYSCTL_DECL(_net_local_stream);
 749 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
 750    &unpst_sendspace, 0, "");
 751 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
 752    &unpst_recvspace, 0, "");
 753 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
 754    &unpst_tracemdns, 0, "");
 755 SYSCTL_DECL(_net_local_dgram);
 756 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
 757    &unpdg_sendspace, 0, "");
 758 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
 759    &unpdg_recvspace, 0, "");
 760 SYSCTL_DECL(_net_local);
 761 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
 762
 763 /*
 764  * Returns:     0                       Success
 765  *              ENOBUFS
 766  *      soreserve:ENOBUFS
 767  */
 768 static int
 769 unp_attach(struct socket *so)
 770 {
 771         struct unpcb *unp;
 772         int error = 0;
 773
 774         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 775                 switch (so->so_type) {
 776
 777                 case SOCK_STREAM:
 778                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
 779                         break;
 780
 781                 case SOCK_DGRAM:
 782                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
 783                         break;
 784
 785                 default:
 786                         panic("unp_attach");
 787                 }
 788                 if (error)
 789                         return (error);
 790         }
 791         unp = (struct unpcb *)zalloc(unp_zone);
 792         if (unp == NULL)
 793                 return (ENOBUFS);
 794         bzero(unp, sizeof (*unp));
 795
 796         lck_mtx_init(&unp->unp_mtx,
 797                 unp_mtx_grp, unp_mtx_attr);
 798
 799         lck_rw_lock_exclusive(unp_list_mtx);
 800         LIST_INIT(&unp->unp_refs);
 801         unp->unp_socket = so;
 802         unp->unp_gencnt = ++unp_gencnt;
 803         unp_count++;
 804         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
 805             &unp_dhead : &unp_shead, unp, unp_link);
 806         lck_rw_done(unp_list_mtx);
 807         so->so_pcb = (caddr_t)unp;
 808         /*
 809          * Mark AF_UNIX socket buffers accordingly so that:
 810          *
 811          * a. In the SOCK_STREAM case, socket buffer append won't fail due to
 812          *    the lack of space; this essentially loosens the sbspace() check,
 813          *    since there is disconnect between sosend() and uipc_send() with
 814          *    respect to flow control that might result in our dropping the
 815          *    data in uipc_send().  By setting this, we allow for slightly
 816          *    more records to be appended to the receiving socket to avoid
 817          *    losing data (which we can't afford in the SOCK_STREAM case).
 818          *    Flow control still takes place since we adjust the sender's
 819          *    hiwat during each send.  This doesn't affect the SOCK_DGRAM
 820          *    case and append would still fail when the queue overflows.
 821          *
 822          * b. In the presence of control messages containing internalized
 823          *    file descriptors, the append routines will not free them since
 824          *    we'd need to undo the work first via unp_dispose().
 825          */
 826         so->so_rcv.sb_flags |= SB_UNIX;
 827         so->so_snd.sb_flags |= SB_UNIX;
 828         return (0);
 829 }
 830
 831 static void
 832 unp_detach(struct unpcb *unp)
 833 {
 834         int so_locked = 1;
 835
 836         lck_rw_lock_exclusive(unp_list_mtx);
 837         LIST_REMOVE(unp, unp_link);
 838         --unp_count;
 839         ++unp_gencnt;
 840         lck_rw_done(unp_list_mtx);
 841         if (unp->unp_vnode) {
 842                 struct vnode *tvp = NULL;
 843                 socket_unlock(unp->unp_socket, 0);
 844
 845                 /* Holding unp_connect_lock will avoid a race between
 846                  * a thread closing the listening socket and a thread
 847                  * connecting to it.
 848                  */
 849                 lck_mtx_lock(unp_connect_lock);
 850                 socket_lock(unp->unp_socket, 0);
 851                 if (unp->unp_vnode) {
 852                         tvp = unp->unp_vnode;
 853                         unp->unp_vnode->v_socket = NULL;
 854                         unp->unp_vnode = NULL;
 855                 }
 856                 lck_mtx_unlock(unp_connect_lock);
 857                 if (tvp != NULL)
 858                         vnode_rele(tvp);                /* drop the usecount */
 859         }
 860         if (unp->unp_conn)
 861                 unp_disconnect(unp);
 862         while (unp->unp_refs.lh_first) {
 863                 struct unpcb *unp2 = NULL;
 864
 865                 /* This datagram socket is connected to one or more
 866                  * sockets. In order to avoid a race condition between removing
 867                  * this reference and closing the connected socket, we need
 868                  * to check disconnect_in_progress
 869                  */
 870                 if (so_locked == 1) {
 871                         socket_unlock(unp->unp_socket, 0);
 872                         so_locked = 0;
 873                 }
 874                 lck_mtx_lock(unp_disconnect_lock);
 875                 while (disconnect_in_progress != 0) {
 876                         (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
 877                                 PSOCK, "disconnect", NULL);
 878                 }
 879                 disconnect_in_progress = 1;
 880                 lck_mtx_unlock(unp_disconnect_lock);
 881
 882                 /* Now we are sure that any unpcb socket disconnect is not happening */
 883                 if (unp->unp_refs.lh_first != NULL) {
 884                         unp2 = unp->unp_refs.lh_first;
 885                         socket_lock(unp2->unp_socket, 1);
 886                 }
 887
 888                 lck_mtx_lock(unp_disconnect_lock);
 889                 disconnect_in_progress = 0;
 890                 wakeup(&disconnect_in_progress);
 891                 lck_mtx_unlock(unp_disconnect_lock);
 892
 893                 if (unp2 != NULL) {
 894                         /* We already locked this socket and have a reference on it */
 895                         unp_drop(unp2, ECONNRESET);
 896                         socket_unlock(unp2->unp_socket, 1);
 897                 }
 898         }
 899
 900         if (so_locked == 0) {
 901                 socket_lock(unp->unp_socket, 0);
 902                 so_locked = 1;
 903         }
 904         soisdisconnected(unp->unp_socket);
 905         /* makes sure we're getting dealloced */
 906         unp->unp_socket->so_flags |= SOF_PCBCLEARING;
 907 }
 908
 909 /*
 910  * Returns:     0                       Success
 911  *              EAFNOSUPPORT
 912  *              EINVAL
 913  *              EADDRINUSE
 914  *              namei:???               [anything namei can return]
 915  *              vnode_authorize:???     [anything vnode_authorize can return]
 916  *
 917  * Notes:       p at this point is the current process, as this function is
 918  *              only called by sobind().
 919  */
 920 static int
 921 unp_bind(
 922         struct unpcb *unp,
 923         struct sockaddr *nam,
 924         proc_t p)
 925 {
 926         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 927         struct vnode *vp, *dvp;
 928         struct vnode_attr va;
 929         vfs_context_t ctx = vfs_context_current();
 930         int error, namelen;
 931         struct nameidata nd;
 932         struct socket *so = unp->unp_socket;
 933         char buf[SOCK_MAXADDRLEN];
 934
 935         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
 936                 return (EAFNOSUPPORT);
 937         }
 938
 939         if (unp->unp_vnode != NULL)
 940                 return (EINVAL);
 941         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
 942         if (namelen <= 0)
 943                 return (EINVAL);
 944
 945         socket_unlock(so, 0);
 946
 947         strlcpy(buf, soun->sun_path, namelen+1);
 948         NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
 949             CAST_USER_ADDR_T(buf), ctx);
 950         /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 951         error = namei(&nd);
 952         if (error) {
 953                 socket_lock(so, 0);
 954                 return (error);
 955         }
 956         dvp = nd.ni_dvp;
 957         vp = nd.ni_vp;
 958
 959         if (vp != NULL) {
 960                 /*
 961                  * need to do this before the vnode_put of dvp
 962                  * since we may have to release an fs_nodelock
 963                  */
 964                 nameidone(&nd);
 965
 966                 vnode_put(dvp);
 967                 vnode_put(vp);
 968
 969                 socket_lock(so, 0);
 970                 return (EADDRINUSE);
 971         }
 972
 973         VATTR_INIT(&va);
 974         VATTR_SET(&va, va_type, VSOCK);
 975         VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
 976
 977 #if CONFIG_MACF
 978         error = mac_vnode_check_create(ctx,
 979             nd.ni_dvp, &nd.ni_cnd, &va);
 980
 981         if (error == 0)
 982 #endif /* CONFIG_MACF */
 983 #if CONFIG_MACF_SOCKET_SUBSET
 984         error = mac_vnode_check_uipc_bind(ctx,
 985             nd.ni_dvp, &nd.ni_cnd, &va);
 986
 987         if (error == 0)
 988 #endif /* MAC_SOCKET_SUBSET */
 989         /* authorize before creating */
 990         error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
 991
 992         if (!error) {
 993                 /* create the socket */
 994                 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
 995         }
 996
 997         nameidone(&nd);
 998         vnode_put(dvp);
 999
1000         if (error) {
1001                 socket_lock(so, 0);
1002                 return (error);
1003         }
1004         vnode_ref(vp);  /* gain a longterm reference */
1005         socket_lock(so, 0);
1006         vp->v_socket = unp->unp_socket;
1007         unp->unp_vnode = vp;
1008         unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1009         vnode_put(vp);          /* drop the iocount */
1010
1011         return (0);
1012 }
1013
1014
1015 /*
1016  * Returns:     0                       Success
1017  *              EAFNOSUPPORT            Address family not supported
1018  *              EINVAL                  Invalid argument
1019  *              ENOTSOCK                Not a socket
1020  *              ECONNREFUSED            Connection refused
1021  *              EPROTOTYPE              Protocol wrong type for socket
1022  *              EISCONN                 Socket is connected
1023  *      unp_connect2:EPROTOTYPE         Protocol wrong type for socket
1024  *      unp_connect2:EINVAL             Invalid argument
1025  *      namei:???                       [anything namei can return]
1026  *      vnode_authorize:????            [anything vnode_authorize can return]
1027  *
1028  * Notes:       p at this point is the current process, as this function is
1029  *              only called by sosend(), sendfile(), and soconnectlock().
1030  */
1031 static int
1032 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1033 {
1034         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1035         struct vnode *vp;
1036         struct socket *so2, *so3, *list_so=NULL;
1037         struct unpcb *unp, *unp2, *unp3;
1038         vfs_context_t ctx = vfs_context_current();
1039         int error, len;
1040         struct nameidata nd;
1041         char buf[SOCK_MAXADDRLEN];
1042
1043         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1044                 return (EAFNOSUPPORT);
1045         }
1046
1047         unp = sotounpcb(so);
1048         so2 = so3 = NULL;
1049
1050         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1051         if (len <= 0)
1052                 return (EINVAL);
1053
1054         strlcpy(buf, soun->sun_path, len+1);
1055         socket_unlock(so, 0);
1056
1057         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1058             CAST_USER_ADDR_T(buf), ctx);
1059         error = namei(&nd);
1060         if (error) {
1061                 socket_lock(so, 0);
1062                 return (error);
1063         }
1064         nameidone(&nd);
1065         vp = nd.ni_vp;
1066         if (vp->v_type != VSOCK) {
1067                 error = ENOTSOCK;
1068                 socket_lock(so, 0);
1069                 goto out;
1070         }
1071
1072 #if CONFIG_MACF_SOCKET_SUBSET
1073         error = mac_vnode_check_uipc_connect(ctx, vp);
1074         if (error) {
1075                 socket_lock(so, 0);
1076                 goto out;
1077         }
1078 #endif /* MAC_SOCKET_SUBSET */
1079
1080         error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1081         if (error) {
1082                 socket_lock(so, 0);
1083                 goto out;
1084         }
1085
1086         lck_mtx_lock(unp_connect_lock);
1087
1088         if (vp->v_socket == 0) {
1089                 lck_mtx_unlock(unp_connect_lock);
1090                 error = ECONNREFUSED;
1091                 socket_lock(so, 0);
1092                 goto out;
1093         }
1094
1095         socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1096         so2 = vp->v_socket;
1097         lck_mtx_unlock(unp_connect_lock);
1098
1099
1100         if (so2->so_pcb == NULL) {
1101                 error = ECONNREFUSED;
1102                 if (so != so2) {
1103                         socket_unlock(so2, 1);
1104                         socket_lock(so, 0);
1105                 } else {
1106                         /* Release the reference held for the listen socket */
1107                         so2->so_usecount--;
1108                 }
1109                 goto out;
1110         }
1111
1112         if (so < so2) {
1113                 socket_unlock(so2, 0);
1114                 socket_lock(so, 0);
1115                 socket_lock(so2, 0);
1116         } else if (so > so2) {
1117                 socket_lock(so, 0);
1118         }
1119         /*
1120          * Check if socket was connected while we were trying to
1121          * get the socket locks in order.
1122          * XXX - probably shouldn't return an error for SOCK_DGRAM
1123          */
1124         if ((so->so_state & SS_ISCONNECTED) != 0) {
1125                 error = EISCONN;
1126                 goto decref_out;
1127         }
1128
1129         if (so->so_type != so2->so_type) {
1130                 error = EPROTOTYPE;
1131                 goto decref_out;
1132         }
1133
1134         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1135                 /* Release the incoming socket but keep a reference */
1136                 socket_unlock(so, 0);
1137
1138                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1139                     (so3 = sonewconn(so2, 0, nam)) == 0) {
1140                         error = ECONNREFUSED;
1141                         if (so != so2) {
1142                                 socket_unlock(so2, 1);
1143                                 socket_lock(so, 0);
1144                         } else {
1145                                 socket_lock(so, 0);
1146                                 /* Release the reference held for
1147                                  * listen socket.
1148                                  */
1149                                 so2->so_usecount--;
1150                         }
1151                         goto out;
1152                 }
1153                 unp2 = sotounpcb(so2);
1154                 unp3 = sotounpcb(so3);
1155                 if (unp2->unp_addr)
1156                         unp3->unp_addr = (struct sockaddr_un *)
1157                             dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1158
1159                 /*
1160                  * unp_peercred management:
1161                  *
1162                  * The connecter's (client's) credentials are copied
1163                  * from its process structure at the time of connect()
1164                  * (which is now).
1165                  */
1166                 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1167                 unp3->unp_flags |= UNP_HAVEPC;
1168                 /*
1169                  * The receiver's (server's) credentials are copied
1170                  * from the unp_peercred member of socket on which the
1171                  * former called listen(); unp_listen() cached that
1172                  * process's credentials at that time so we can use
1173                  * them now.
1174                  */
1175                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1176                     ("unp_connect: listener without cached peercred"));
1177
1178                 /* Here we need to have both so and so2 locks and so2
1179                  * is already locked. Lock ordering is required.
1180                  */
1181                 if (so < so2) {
1182                         socket_unlock(so2, 0);
1183                         socket_lock(so, 0);
1184                         socket_lock(so2, 0);
1185                 } else {
1186                         socket_lock(so, 0);
1187                 }
1188
1189                 /* Check again if the socket state changed when its lock was released */
1190                 if ((so->so_state & SS_ISCONNECTED) != 0) {
1191                         error = EISCONN;
1192                         socket_unlock(so2, 1);
1193                         socket_lock(so3, 0);
1194                         sofreelastref(so3, 1);
1195                         goto out;
1196                 }
1197                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1198                     sizeof (unp->unp_peercred));
1199                 unp->unp_flags |= UNP_HAVEPC;
1200
1201 #if CONFIG_MACF_SOCKET
1202                 /* XXXMAC: recursive lock: SOCK_LOCK(so); */
1203                 mac_socketpeer_label_associate_socket(so, so3);
1204                 mac_socketpeer_label_associate_socket(so3, so);
1205                 /* XXXMAC: SOCK_UNLOCK(so); */
1206 #endif /* MAC_SOCKET */
1207
1208                 /* Hold the reference on listening socket until the end */
1209                 socket_unlock(so2, 0);
1210                 list_so = so2;
1211
1212                 /* Lock ordering doesn't matter because so3 was just created */
1213                 socket_lock(so3, 1);
1214                 so2 = so3;
1215
1216                 /*
1217                  * Enable tracing for mDNSResponder endpoints.  (The use
1218                  * of sizeof instead of strlen below takes the null
1219                  * terminating character into account.)
1220                  */
1221                 if (unpst_tracemdns &&
1222                     !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1223                     sizeof (MDNSRESPONDER_PATH))) {
1224                         unp->unp_flags |= UNP_TRACE_MDNS;
1225                         unp2->unp_flags |= UNP_TRACE_MDNS;
1226                 }
1227         }
1228
1229         error = unp_connect2(so, so2);
1230
1231 decref_out:
1232         if (so2 != NULL) {
1233                 if (so != so2) {
1234                         socket_unlock(so2, 1);
1235                 } else {
1236                         /* Release the extra reference held for the listen socket.
1237                          * This is possible only for SOCK_DGRAM sockets. We refuse
1238                          * connecting to the same socket for SOCK_STREAM sockets.
1239                          */
1240                         so2->so_usecount--;
1241                 }
1242         }
1243
1244         if (list_so != NULL) {
1245                 socket_lock(list_so, 0);
1246                 socket_unlock(list_so, 1);
1247         }
1248
1249 out:
1250         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1251         vnode_put(vp);
1252         return (error);
1253 }
1254
1255 /*
1256  * Returns:     0                       Success
1257  *              EPROTOTYPE              Protocol wrong type for socket
1258  *              EINVAL                  Invalid argument
1259  */
1260 int
1261 unp_connect2(struct socket *so, struct socket *so2)
1262 {
1263         struct unpcb *unp = sotounpcb(so);
1264         struct unpcb *unp2;
1265
1266         if (so2->so_type != so->so_type)
1267                 return (EPROTOTYPE);
1268
1269         unp2 = sotounpcb(so2);
1270
1271         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1272         lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1273
1274         /* Verify both sockets are still opened */
1275         if (unp == 0 || unp2 == 0)
1276                 return (EINVAL);
1277
1278         unp->unp_conn = unp2;
1279         so2->so_usecount++;
1280
1281         switch (so->so_type) {
1282
1283         case SOCK_DGRAM:
1284                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1285
1286                 if (so != so2) {
1287                         /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1288                         /* Keep an extra reference on so2 that will be dropped
1289                          * soon after getting the locks in order
1290                          */
1291                         socket_unlock(so2, 0);
1292                         soisconnected(so);
1293                         unp_get_locks_in_order(so, so2);
1294                         so2->so_usecount--;
1295                 } else {
1296                         soisconnected(so);
1297                 }
1298
1299                 break;
1300
1301         case SOCK_STREAM:
1302                 /* This takes care of socketpair */
1303                 if (!(unp->unp_flags & UNP_HAVEPC) &&
1304                     !(unp2->unp_flags & UNP_HAVEPC)) {
1305                         cru2x(kauth_cred_get(), &unp->unp_peercred);
1306                         unp->unp_flags |= UNP_HAVEPC;
1307
1308                         cru2x(kauth_cred_get(), &unp2->unp_peercred);
1309                         unp2->unp_flags |= UNP_HAVEPC;
1310                 }
1311                 unp2->unp_conn = unp;
1312                 so->so_usecount++;
1313
1314                 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1315                 socket_unlock(so, 0);
1316                 soisconnected(so2);
1317
1318                 /* Keep an extra reference on so2, that will be dropped soon after
1319                  * getting the locks in order again.
1320                  */
1321                 socket_unlock(so2, 0);
1322
1323                 socket_lock(so, 0);
1324                 soisconnected(so);
1325
1326                 unp_get_locks_in_order(so, so2);
1327                 /* Decrement the extra reference left before */
1328                 so2->so_usecount--;
1329                 break;
1330
1331         default:
1332                 panic("unknown socket type %d in unp_connect2", so->so_type);
1333         }
1334         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1335         lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1336         return (0);
1337 }
1338
1339 static void
1340 unp_disconnect(struct unpcb *unp)
1341 {
1342         struct unpcb *unp2 = NULL;
1343         struct socket *so2 = NULL, *so;
1344         struct socket *waitso;
1345         int so_locked = 1, strdisconn = 0;
1346
1347         so = unp->unp_socket;
1348         if (unp->unp_conn == NULL) {
1349                 return;
1350         }
1351         lck_mtx_lock(unp_disconnect_lock);
1352         while (disconnect_in_progress != 0) {
1353                 if (so_locked == 1) {
1354                         socket_unlock(so, 0);
1355                         so_locked = 0;
1356                 }
1357                 (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock,
1358                         PSOCK, "disconnect", NULL);
1359         }
1360         disconnect_in_progress = 1;
1361         lck_mtx_unlock(unp_disconnect_lock);
1362
1363         if (so_locked == 0) {
1364                 socket_lock(so, 0);
1365                 so_locked = 1;
1366         }
1367
1368         unp2 = unp->unp_conn;
1369
1370         if (unp2 == 0 || unp2->unp_socket == NULL) {
1371                 goto out;
1372         }
1373         so2 = unp2->unp_socket;
1374
1375 try_again:
1376         if (so == so2) {
1377                 if (so_locked == 0) {
1378                         socket_lock(so, 0);
1379                 }
1380                 waitso = so;
1381         } else if (so < so2) {
1382                 if (so_locked == 0) {
1383                         socket_lock(so, 0);
1384                 }
1385                 socket_lock(so2, 1);
1386                 waitso = so2;
1387         } else {
1388                 if (so_locked == 1) {
1389                         socket_unlock(so, 0);
1390                 }
1391                 socket_lock(so2, 1);
1392                 socket_lock(so, 0);
1393                 waitso = so;
1394         }
1395         so_locked = 1;
1396
1397         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1398         lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1399
1400         /* Check for the UNP_DONTDISCONNECT flag, if it
1401          * is set, release both sockets and go to sleep
1402          */
1403
1404         if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1405                 if (so != so2) {
1406                         socket_unlock(so2, 1);
1407                 }
1408                 so_locked = 0;
1409
1410                 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1411                         PSOCK | PDROP, "unpdisconnect", NULL);
1412                 goto try_again;
1413         }
1414
1415         if (unp->unp_conn == NULL) {
1416                 panic("unp_conn became NULL after sleep");
1417         }
1418
1419         unp->unp_conn = NULL;
1420         so2->so_usecount--;
1421
1422         if (unp->unp_flags & UNP_TRACE_MDNS)
1423                 unp->unp_flags &= ~UNP_TRACE_MDNS;
1424
1425         switch (unp->unp_socket->so_type) {
1426
1427         case SOCK_DGRAM:
1428                 LIST_REMOVE(unp, unp_reflink);
1429                 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1430                 if (so != so2)
1431                         socket_unlock(so2, 1);
1432                 break;
1433
1434         case SOCK_STREAM:
1435                 unp2->unp_conn = NULL;
1436                 so->so_usecount--;
1437
1438                 /* Set the socket state correctly but do a wakeup later when
1439                  * we release all locks except the socket lock, this will avoid
1440                  * a deadlock.
1441                  */
1442                 unp->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1443                 unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1444
1445                 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1446                 unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1447
1448                 if (unp2->unp_flags & UNP_TRACE_MDNS)
1449                         unp2->unp_flags &= ~UNP_TRACE_MDNS;
1450
1451                 strdisconn = 1;
1452                 break;
1453         default:
1454                 panic("unknown socket type %d", so->so_type);
1455         }
1456 out:
1457         lck_mtx_lock(unp_disconnect_lock);
1458         disconnect_in_progress = 0;
1459         wakeup(&disconnect_in_progress);
1460         lck_mtx_unlock(unp_disconnect_lock);
1461
1462         if (strdisconn) {
1463                 socket_unlock(so, 0);
1464                 soisdisconnected(so2);
1465                 socket_unlock(so2, 1);
1466
1467                 socket_lock(so,0);
1468                 soisdisconnected(so);
1469         }
1470         lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1471         return;
1472 }
1473
1474 /*
1475  * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1476  * The unpcb_compat data structure is passed to user space and must not change.
1477  */
1478 static void
1479 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1480 {
1481 #if defined(__LP64__)
1482         cp->unp_link.le_next = (u_int32_t)
1483             VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1484         cp->unp_link.le_prev = (u_int32_t)
1485             VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1486 #else
1487         cp->unp_link.le_next = (struct unpcb_compat *)
1488             VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1489         cp->unp_link.le_prev = (struct unpcb_compat **)
1490             VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1491 #endif
1492         cp->unp_socket = (_UNPCB_PTR(struct socket *))
1493             VM_KERNEL_ADDRPERM(up->unp_socket);
1494         cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1495             VM_KERNEL_ADDRPERM(up->unp_vnode);
1496         cp->unp_ino = up->unp_ino;
1497         cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1498             VM_KERNEL_ADDRPERM(up->unp_conn);
1499         cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1500 #if defined(__LP64__)
1501         cp->unp_reflink.le_next =
1502             (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1503         cp->unp_reflink.le_prev =
1504             (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1505 #else
1506         cp->unp_reflink.le_next =
1507             (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1508         cp->unp_reflink.le_prev =
1509             (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1510 #endif
1511         cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1512             VM_KERNEL_ADDRPERM(up->unp_addr);
1513         cp->unp_cc = up->unp_cc;
1514         cp->unp_mbcnt = up->unp_mbcnt;
1515         cp->unp_gencnt = up->unp_gencnt;
1516 }
1517
1518 static int
1519 unp_pcblist SYSCTL_HANDLER_ARGS
1520 {
1521 #pragma unused(oidp,arg2)
1522         int error, i, n;
1523         struct unpcb *unp, **unp_list;
1524         unp_gen_t gencnt;
1525         struct xunpgen xug;
1526         struct unp_head *head;
1527
1528         lck_rw_lock_shared(unp_list_mtx);
1529         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1530
1531         /*
1532          * The process of preparing the PCB list is too time-consuming and
1533          * resource-intensive to repeat twice on every request.
1534          */
1535         if (req->oldptr == USER_ADDR_NULL) {
1536                 n = unp_count;
1537                 req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1538                     sizeof (struct xunpcb);
1539                 lck_rw_done(unp_list_mtx);
1540                 return (0);
1541         }
1542
1543         if (req->newptr != USER_ADDR_NULL) {
1544                 lck_rw_done(unp_list_mtx);
1545                 return (EPERM);
1546         }
1547
1548         /*
1549          * OK, now we're committed to doing something.
1550          */
1551         gencnt = unp_gencnt;
1552         n = unp_count;
1553
1554         bzero(&xug, sizeof (xug));
1555         xug.xug_len = sizeof (xug);
1556         xug.xug_count = n;
1557         xug.xug_gen = gencnt;
1558         xug.xug_sogen = so_gencnt;
1559         error = SYSCTL_OUT(req, &xug, sizeof (xug));
1560         if (error) {
1561                 lck_rw_done(unp_list_mtx);
1562                 return (error);
1563         }
1564
1565         /*
1566          * We are done if there is no pcb
1567          */
1568         if (n == 0)  {
1569                 lck_rw_done(unp_list_mtx);
1570                 return (0);
1571         }
1572
1573         MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1574             M_TEMP, M_WAITOK);
1575         if (unp_list == 0) {
1576                 lck_rw_done(unp_list_mtx);
1577                 return (ENOMEM);
1578         }
1579
1580         for (unp = head->lh_first, i = 0; unp && i < n;
1581             unp = unp->unp_link.le_next) {
1582                 if (unp->unp_gencnt <= gencnt)
1583                         unp_list[i++] = unp;
1584         }
1585         n = i;                  /* in case we lost some during malloc */
1586
1587         error = 0;
1588         for (i = 0; i < n; i++) {
1589                 unp = unp_list[i];
1590                 if (unp->unp_gencnt <= gencnt) {
1591                         struct xunpcb xu;
1592
1593                         bzero(&xu, sizeof (xu));
1594                         xu.xu_len = sizeof (xu);
1595                         xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1596                             VM_KERNEL_ADDRPERM(unp);
1597                         /*
1598                          * XXX - need more locking here to protect against
1599                          * connect/disconnect races for SMP.
1600                          */
1601                         if (unp->unp_addr)
1602                                 bcopy(unp->unp_addr, &xu.xu_addr,
1603                                     unp->unp_addr->sun_len);
1604                         if (unp->unp_conn && unp->unp_conn->unp_addr)
1605                                 bcopy(unp->unp_conn->unp_addr,
1606                                     &xu.xu_caddr,
1607                                     unp->unp_conn->unp_addr->sun_len);
1608                         unpcb_to_compat(unp, &xu.xu_unp);
1609                         sotoxsocket(unp->unp_socket, &xu.xu_socket);
1610                         error = SYSCTL_OUT(req, &xu, sizeof (xu));
1611                 }
1612         }
1613         if (!error) {
1614                 /*
1615                  * Give the user an updated idea of our state.
1616                  * If the generation differs from what we told
1617                  * her before, she knows that something happened
1618                  * while we were processing this request, and it
1619                  * might be necessary to retry.
1620                  */
1621                 bzero(&xug, sizeof (xug));
1622                 xug.xug_len = sizeof (xug);
1623                 xug.xug_gen = unp_gencnt;
1624                 xug.xug_sogen = so_gencnt;
1625                 xug.xug_count = unp_count;
1626                 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1627         }
1628         FREE(unp_list, M_TEMP);
1629         lck_rw_done(unp_list_mtx);
1630         return (error);
1631 }
1632
1633 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
1634             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1635             "List of active local datagram sockets");
1636 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
1637             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1638             "List of active local stream sockets");
1639
1640 #if !CONFIG_EMBEDDED
1641
1642 static int
1643 unp_pcblist64 SYSCTL_HANDLER_ARGS
1644 {
1645 #pragma unused(oidp,arg2)
1646         int error, i, n;
1647         struct unpcb *unp, **unp_list;
1648         unp_gen_t gencnt;
1649         struct xunpgen xug;
1650         struct unp_head *head;
1651
1652         lck_rw_lock_shared(unp_list_mtx);
1653         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1654
1655         /*
1656          * The process of preparing the PCB list is too time-consuming and
1657          * resource-intensive to repeat twice on every request.
1658          */
1659         if (req->oldptr == USER_ADDR_NULL) {
1660                 n = unp_count;
1661                 req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1662                     (sizeof (struct xunpcb64));
1663                 lck_rw_done(unp_list_mtx);
1664                 return (0);
1665         }
1666
1667         if (req->newptr != USER_ADDR_NULL) {
1668                 lck_rw_done(unp_list_mtx);
1669                 return (EPERM);
1670         }
1671
1672         /*
1673          * OK, now we're committed to doing something.
1674          */
1675         gencnt = unp_gencnt;
1676         n = unp_count;
1677
1678         bzero(&xug, sizeof (xug));
1679         xug.xug_len = sizeof (xug);
1680         xug.xug_count = n;
1681         xug.xug_gen = gencnt;
1682         xug.xug_sogen = so_gencnt;
1683         error = SYSCTL_OUT(req, &xug, sizeof (xug));
1684         if (error) {
1685                 lck_rw_done(unp_list_mtx);
1686                 return (error);
1687         }
1688
1689         /*
1690          * We are done if there is no pcb
1691          */
1692         if (n == 0)  {
1693                 lck_rw_done(unp_list_mtx);
1694                 return (0);
1695         }
1696
1697         MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1698             M_TEMP, M_WAITOK);
1699         if (unp_list == 0) {
1700                 lck_rw_done(unp_list_mtx);
1701                 return (ENOMEM);
1702         }
1703
1704         for (unp = head->lh_first, i = 0; unp && i < n;
1705             unp = unp->unp_link.le_next) {
1706                 if (unp->unp_gencnt <= gencnt)
1707                         unp_list[i++] = unp;
1708         }
1709         n = i;                  /* in case we lost some during malloc */
1710
1711         error = 0;
1712         for (i = 0; i < n; i++) {
1713                 unp = unp_list[i];
1714                 if (unp->unp_gencnt <= gencnt) {
1715                         struct xunpcb64 xu;
1716                         size_t          xu_len = sizeof(struct xunpcb64);
1717
1718                         bzero(&xu, xu_len);
1719                         xu.xu_len = xu_len;
1720                         xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1721                         xu.xunp_link.le_next = (u_int64_t)
1722                             VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1723                         xu.xunp_link.le_prev = (u_int64_t)
1724                             VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1725                         xu.xunp_socket = (u_int64_t)
1726                             VM_KERNEL_ADDRPERM(unp->unp_socket);
1727                         xu.xunp_vnode = (u_int64_t)
1728                             VM_KERNEL_ADDRPERM(unp->unp_vnode);
1729                         xu.xunp_ino = unp->unp_ino;
1730                         xu.xunp_conn = (u_int64_t)
1731                             VM_KERNEL_ADDRPERM(unp->unp_conn);
1732                         xu.xunp_refs = (u_int64_t)
1733                             VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1734                         xu.xunp_reflink.le_next = (u_int64_t)
1735                             VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1736                         xu.xunp_reflink.le_prev = (u_int64_t)
1737                             VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1738                         xu.xunp_cc = unp->unp_cc;
1739                         xu.xunp_mbcnt = unp->unp_mbcnt;
1740                         xu.xunp_gencnt = unp->unp_gencnt;
1741
1742                         if (unp->unp_socket)
1743                                 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1744
1745                         /*
1746                          * XXX - need more locking here to protect against
1747                          * connect/disconnect races for SMP.
1748                          */
1749                         if (unp->unp_addr)
1750                                 bcopy(unp->unp_addr, &xu.xunp_addr,
1751                                     unp->unp_addr->sun_len);
1752                         if (unp->unp_conn && unp->unp_conn->unp_addr)
1753                                 bcopy(unp->unp_conn->unp_addr,
1754                                     &xu.xunp_caddr,
1755                                     unp->unp_conn->unp_addr->sun_len);
1756
1757                         error = SYSCTL_OUT(req, &xu, xu_len);
1758                 }
1759         }
1760         if (!error) {
1761                 /*
1762                  * Give the user an updated idea of our state.
1763                  * If the generation differs from what we told
1764                  * her before, she knows that something happened
1765                  * while we were processing this request, and it
1766                  * might be necessary to retry.
1767                  */
1768                 bzero(&xug, sizeof (xug));
1769                 xug.xug_len = sizeof (xug);
1770                 xug.xug_gen = unp_gencnt;
1771                 xug.xug_sogen = so_gencnt;
1772                 xug.xug_count = unp_count;
1773                 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1774         }
1775         FREE(unp_list, M_TEMP);
1776         lck_rw_done(unp_list_mtx);
1777         return (error);
1778 }
1779
1780 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED,
1781             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1782             "List of active local datagram sockets 64 bit");
1783 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED,
1784             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1785             "List of active local stream sockets 64 bit");
1786
1787 #endif /* !CONFIG_EMBEDDED */
1788
1789 static void
1790 unp_shutdown(struct unpcb *unp)
1791 {
1792         struct socket *so = unp->unp_socket;
1793         struct socket *so2;
1794         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1795                 so2 = unp->unp_conn->unp_socket;
1796                 unp_get_locks_in_order(so, so2);
1797                 socantrcvmore(so2);
1798                 socket_unlock(so2, 1);
1799         }
1800 }
1801
1802 static void
1803 unp_drop(struct unpcb *unp, int errno)
1804 {
1805         struct socket *so = unp->unp_socket;
1806
1807         so->so_error = errno;
1808         unp_disconnect(unp);
1809 }
1810
1811 /*
1812  * Returns:     0                       Success
1813  *              EMSGSIZE                The new fd's will not fit
1814  *              ENOBUFS                 Cannot alloc struct fileproc
1815  */
1816 int
1817 unp_externalize(struct mbuf *rights)
1818 {
1819         proc_t p = current_proc();              /* XXX */
1820         int i;
1821         struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1822         struct fileglob **rp = (struct fileglob **)(cm + 1);
1823         int *fds = (int *)(cm + 1);
1824         struct fileproc *fp;
1825         struct fileglob *fg;
1826         int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1827         int f;
1828
1829         proc_fdlock(p);
1830
1831         /*
1832          * if the new FD's will not fit, then we free them all
1833          */
1834         if (!fdavail(p, newfds)) {
1835                 for (i = 0; i < newfds; i++) {
1836                         fg = *rp;
1837                         unp_discard_fdlocked(fg, p);
1838                         *rp++ = NULL;
1839                 }
1840                 proc_fdunlock(p);
1841
1842                 return (EMSGSIZE);
1843         }
1844         /*
1845          * now change each pointer to an fd in the global table to
1846          * an integer that is the index to the local fd table entry
1847          * that we set up to point to the global one we are transferring.
1848          * XXX (1) this assumes a pointer and int are the same size,
1849          * XXX     or the mbuf can hold the expansion
1850          * XXX (2) allocation failures should be non-fatal
1851          */
1852         for (i = 0; i < newfds; i++) {
1853 #if CONFIG_MACF_SOCKET
1854                 /*
1855                  * If receive access is denied, don't pass along
1856                  * and error message, just discard the descriptor.
1857                  */
1858                 if (mac_file_check_receive(kauth_cred_get(), *rp)) {
1859                         fg = *rp;
1860                         *rp++ = 0;
1861                         unp_discard_fdlocked(fg, p);
1862                         continue;
1863                 }
1864 #endif
1865                 if (fdalloc(p, 0, &f))
1866                         panic("unp_externalize:fdalloc");
1867                 fg = rp[i];
1868                 MALLOC_ZONE(fp, struct fileproc *, sizeof (struct fileproc),
1869                     M_FILEPROC, M_WAITOK);
1870                 if (fp == NULL)
1871                         panic("unp_externalize: MALLOC_ZONE");
1872                 bzero(fp, sizeof (struct fileproc));
1873                 fp->f_iocount = 0;
1874                 fp->f_fglob = fg;
1875                 fg_removeuipc(fg);
1876                 procfdtbl_releasefd(p, f, fp);
1877                 (void) OSAddAtomic(-1, &unp_rights);
1878                 fds[i] = f;
1879         }
1880         proc_fdunlock(p);
1881
1882         return (0);
1883 }
1884
1885 void
1886 unp_init(void)
1887 {
1888         unp_zone = zinit(sizeof (struct unpcb),
1889             (nmbclusters * sizeof (struct unpcb)), 4096, "unpzone");
1890
1891         if (unp_zone == 0)
1892                 panic("unp_init");
1893         LIST_INIT(&unp_dhead);
1894         LIST_INIT(&unp_shead);
1895
1896         /*
1897          * allocate lock group attribute and group for udp pcb mutexes
1898          */
1899         unp_mtx_grp_attr = lck_grp_attr_alloc_init();
1900
1901         unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
1902
1903         unp_mtx_attr = lck_attr_alloc_init();
1904
1905         if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp,
1906             unp_mtx_attr)) == NULL)
1907                 return; /* pretty much dead if this fails... */
1908
1909         if ((unp_disconnect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1910                 unp_mtx_attr)) == NULL)
1911                 return;
1912
1913         if ((unp_connect_lock = lck_mtx_alloc_init(unp_mtx_grp,
1914                 unp_mtx_attr)) == NULL)
1915                 return;
1916 }
1917
1918 #ifndef MIN
1919 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
1920 #endif
1921
1922 /*
1923  * Returns:     0                       Success
1924  *              EINVAL
1925  *      fdgetf_noref:EBADF
1926  */
1927 static int
1928 unp_internalize(struct mbuf *control, proc_t p)
1929 {
1930         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1931         int *fds;
1932         struct fileglob **rp;
1933         struct fileproc *fp;
1934         int i, error;
1935         int oldfds;
1936
1937         /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
1938         if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1939             (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
1940                 return (EINVAL);
1941         }
1942         oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1943
1944         proc_fdlock(p);
1945         fds = (int *)(cm + 1);
1946
1947         for (i = 0; i < oldfds; i++) {
1948                 struct fileproc *tmpfp;
1949                 if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) {
1950                         proc_fdunlock(p);
1951                         return (error);
1952                 } else if (!filetype_issendable(tmpfp->f_fglob->fg_type)) {
1953                         proc_fdunlock(p);
1954                         return (EINVAL);
1955                 }
1956         }
1957         rp = (struct fileglob **)(cm + 1);
1958
1959         /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
1960          * and doing them in-order would result in stomping over unprocessed fd's
1961          */
1962         for (i = (oldfds - 1); i >= 0; i--) {
1963                 (void) fdgetf_noref(p, fds[i], &fp);
1964                 fg_insertuipc(fp->f_fglob);
1965                 rp[i] = fp->f_fglob;
1966                 (void) OSAddAtomic(1, &unp_rights);
1967         }
1968         proc_fdunlock(p);
1969
1970         return (0);
1971 }
1972
1973 static int      unp_defer, unp_gcing, unp_gcwait;
1974 static thread_t unp_gcthread = NULL;
1975
1976 /* always called under uipc_lock */
1977 void
1978 unp_gc_wait(void)
1979 {
1980         if (unp_gcthread == current_thread())
1981                 return;
1982
1983         while (unp_gcing != 0) {
1984                 unp_gcwait = 1;
1985                 msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL);
1986         }
1987 }
1988
1989
1990 __private_extern__ void
1991 unp_gc(void)
1992 {
1993         struct fileglob *fg, *nextfg;
1994         struct socket *so;
1995         static struct fileglob **extra_ref;
1996         struct fileglob **fpp;
1997         int nunref, i;
1998         int need_gcwakeup = 0;
1999
2000         lck_mtx_lock(uipc_lock);
2001         if (unp_gcing) {
2002                 lck_mtx_unlock(uipc_lock);
2003                 return;
2004         }
2005         unp_gcing = 1;
2006         unp_defer = 0;
2007         unp_gcthread = current_thread();
2008         lck_mtx_unlock(uipc_lock);
2009         /*
2010          * before going through all this, set all FDs to
2011          * be NOT defered and NOT externally accessible
2012          */
2013         for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
2014                 lck_mtx_lock(&fg->fg_lock);
2015                 fg->fg_flag &= ~(FMARK|FDEFER);
2016                 lck_mtx_unlock(&fg->fg_lock);
2017         }
2018         do {
2019                 for (fg = fmsghead.lh_first; fg != 0;
2020                     fg = fg->f_msglist.le_next) {
2021                         lck_mtx_lock(&fg->fg_lock);
2022                         /*
2023                          * If the file is not open, skip it
2024                          */
2025                         if (fg->fg_count == 0) {
2026                                 lck_mtx_unlock(&fg->fg_lock);
2027                                 continue;
2028                         }
2029                         /*
2030                          * If we already marked it as 'defer'  in a
2031                          * previous pass, then try process it this time
2032                          * and un-mark it
2033                          */
2034                         if (fg->fg_flag & FDEFER) {
2035                                 fg->fg_flag &= ~FDEFER;
2036                                 unp_defer--;
2037                         } else {
2038                                 /*
2039                                  * if it's not defered, then check if it's
2040                                  * already marked.. if so skip it
2041                                  */
2042                                 if (fg->fg_flag & FMARK) {
2043                                         lck_mtx_unlock(&fg->fg_lock);
2044                                         continue;
2045                                 }
2046                                 /*
2047                                  * If all references are from messages
2048                                  * in transit, then skip it. it's not
2049                                  * externally accessible.
2050                                  */
2051                                 if (fg->fg_count == fg->fg_msgcount) {
2052                                         lck_mtx_unlock(&fg->fg_lock);
2053                                         continue;
2054                                 }
2055                                 /*
2056                                  * If it got this far then it must be
2057                                  * externally accessible.
2058                                  */
2059                                 fg->fg_flag |= FMARK;
2060                         }
2061                         /*
2062                          * either it was defered, or it is externally
2063                          * accessible and not already marked so.
2064                          * Now check if it is possibly one of OUR sockets.
2065                          */
2066                         if (fg->fg_type != DTYPE_SOCKET ||
2067                             (so = (struct socket *)fg->fg_data) == 0) {
2068                                 lck_mtx_unlock(&fg->fg_lock);
2069                                 continue;
2070                         }
2071                         if (so->so_proto->pr_domain != &localdomain ||
2072                             (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
2073                                 lck_mtx_unlock(&fg->fg_lock);
2074                                 continue;
2075                         }
2076 #ifdef notdef
2077                         /*
2078                          * if this code is enabled need to run
2079                          * under network funnel
2080                          */
2081                         if (so->so_rcv.sb_flags & SB_LOCK) {
2082                                 /*
2083                                  * This is problematical; it's not clear
2084                                  * we need to wait for the sockbuf to be
2085                                  * unlocked (on a uniprocessor, at least),
2086                                  * and it's also not clear what to do
2087                                  * if sbwait returns an error due to receipt
2088                                  * of a signal.  If sbwait does return
2089                                  * an error, we'll go into an infinite
2090                                  * loop.  Delete all of this for now.
2091                                  */
2092                                 (void) sbwait(&so->so_rcv);
2093                                 goto restart;
2094                         }
2095 #endif
2096                         /*
2097                          * So, Ok, it's one of our sockets and it IS externally
2098                          * accessible (or was defered). Now we look
2099                          * to see if we hold any file descriptors in its
2100                          * message buffers. Follow those links and mark them
2101                          * as accessible too.
2102                          *
2103                          * In case a file is passed onto itself we need to
2104                          * release the file lock.
2105                          */
2106                         lck_mtx_unlock(&fg->fg_lock);
2107
2108                         unp_scan(so->so_rcv.sb_mb, unp_mark);
2109                 }
2110         } while (unp_defer);
2111         /*
2112          * We grab an extra reference to each of the file table entries
2113          * that are not otherwise accessible and then free the rights
2114          * that are stored in messages on them.
2115          *
2116          * The bug in the orginal code is a little tricky, so I'll describe
2117          * what's wrong with it here.
2118          *
2119          * It is incorrect to simply unp_discard each entry for f_msgcount
2120          * times -- consider the case of sockets A and B that contain
2121          * references to each other.  On a last close of some other socket,
2122          * we trigger a gc since the number of outstanding rights (unp_rights)
2123          * is non-zero.  If during the sweep phase the gc code un_discards,
2124          * we end up doing a (full) closef on the descriptor.  A closef on A
2125          * results in the following chain.  Closef calls soo_close, which
2126          * calls soclose.   Soclose calls first (through the switch
2127          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
2128          * returns because the previous instance had set unp_gcing, and
2129          * we return all the way back to soclose, which marks the socket
2130          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
2131          * to free up the rights that are queued in messages on the socket A,
2132          * i.e., the reference on B.  The sorflush calls via the dom_dispose
2133          * switch unp_dispose, which unp_scans with unp_discard.  This second
2134          * instance of unp_discard just calls closef on B.
2135          *
2136          * Well, a similar chain occurs on B, resulting in a sorflush on B,
2137          * which results in another closef on A.  Unfortunately, A is already
2138          * being closed, and the descriptor has already been marked with
2139          * SS_NOFDREF, and soclose panics at this point.
2140          *
2141          * Here, we first take an extra reference to each inaccessible
2142          * descriptor.  Then, we call sorflush ourself, since we know
2143          * it is a Unix domain socket anyhow.  After we destroy all the
2144          * rights carried in messages, we do a last closef to get rid
2145          * of our extra reference.  This is the last close, and the
2146          * unp_detach etc will shut down the socket.
2147          *
2148          * 91/09/19, bsy@cs.cmu.edu
2149          */
2150         extra_ref = _MALLOC(nfiles * sizeof (struct fileglob *),
2151             M_FILEGLOB, M_WAITOK);
2152         if (extra_ref == NULL)
2153                 goto bail;
2154         for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0;
2155             fg = nextfg) {
2156                 lck_mtx_lock(&fg->fg_lock);
2157
2158                 nextfg = fg->f_msglist.le_next;
2159                 /*
2160                  * If it's not open, skip it
2161                  */
2162                 if (fg->fg_count == 0) {
2163                         lck_mtx_unlock(&fg->fg_lock);
2164                         continue;
2165                 }
2166                 /*
2167                  * If all refs are from msgs, and it's not marked accessible
2168                  * then it must be referenced from some unreachable cycle
2169                  * of (shut-down) FDs, so include it in our
2170                  * list of FDs to remove
2171                  */
2172                 if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) {
2173                         fg->fg_count++;
2174                         *fpp++ = fg;
2175                         nunref++;
2176                 }
2177                 lck_mtx_unlock(&fg->fg_lock);
2178         }
2179         /*
2180          * for each FD on our hit list, do the following two things
2181          */
2182         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2183                 struct fileglob *tfg;
2184
2185                 tfg = *fpp;
2186
2187                 if (tfg->fg_type == DTYPE_SOCKET && tfg->fg_data != NULL) {
2188                         so = (struct socket *)(tfg->fg_data);
2189
2190                         socket_lock(so, 0);
2191
2192                         sorflush(so);
2193
2194                         socket_unlock(so, 0);
2195                 }
2196         }
2197         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
2198                 closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL);
2199
2200         FREE((caddr_t)extra_ref, M_FILEGLOB);
2201 bail:
2202         lck_mtx_lock(uipc_lock);
2203         unp_gcing = 0;
2204         unp_gcthread = NULL;
2205
2206         if (unp_gcwait != 0) {
2207                 unp_gcwait = 0;
2208                 need_gcwakeup = 1;
2209         }
2210         lck_mtx_unlock(uipc_lock);
2211
2212         if (need_gcwakeup != 0)
2213                 wakeup(&unp_gcing);
2214 }
2215
2216 void
2217 unp_dispose(struct mbuf *m)
2218 {
2219         if (m) {
2220                 unp_scan(m, unp_discard);
2221         }
2222 }
2223
2224 /*
2225  * Returns:     0                       Success
2226  */
2227 static int
2228 unp_listen(struct unpcb *unp, proc_t p)
2229 {
2230         kauth_cred_t safecred = kauth_cred_proc_ref(p);
2231         cru2x(safecred, &unp->unp_peercred);
2232         kauth_cred_unref(&safecred);
2233         unp->unp_flags |= UNP_HAVEPCCACHED;
2234         return (0);
2235 }
2236
2237 /* should run under kernel funnel */
2238 static void
2239 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *))
2240 {
2241         struct mbuf *m;
2242         struct fileglob **rp;
2243         struct cmsghdr *cm;
2244         int i;
2245         int qfds;
2246
2247         while (m0) {
2248                 for (m = m0; m; m = m->m_next)
2249                         if (m->m_type == MT_CONTROL &&
2250                             (size_t)m->m_len >= sizeof (*cm)) {
2251                                 cm = mtod(m, struct cmsghdr *);
2252                                 if (cm->cmsg_level != SOL_SOCKET ||
2253                                     cm->cmsg_type != SCM_RIGHTS)
2254                                         continue;
2255                                 qfds = (cm->cmsg_len - sizeof (*cm)) /
2256                                     sizeof (int);
2257                                 rp = (struct fileglob **)(cm + 1);
2258                                 for (i = 0; i < qfds; i++)
2259                                         (*op)(*rp++);
2260                                 break;          /* XXX, but saves time */
2261                         }
2262                 m0 = m0->m_act;
2263         }
2264 }
2265
2266 /* should run under kernel funnel */
2267 static void
2268 unp_mark(struct fileglob *fg)
2269 {
2270         lck_mtx_lock(&fg->fg_lock);
2271
2272         if (fg->fg_flag & FMARK) {
2273                 lck_mtx_unlock(&fg->fg_lock);
2274                 return;
2275         }
2276         fg->fg_flag |= (FMARK|FDEFER);
2277
2278         lck_mtx_unlock(&fg->fg_lock);
2279
2280         unp_defer++;
2281 }
2282
2283 /* should run under kernel funnel */
2284 static void
2285 unp_discard(struct fileglob *fg)
2286 {
2287         proc_t p = current_proc();              /* XXX */
2288
2289         (void) OSAddAtomic(1, &unp_disposed);
2290
2291         proc_fdlock(p);
2292         unp_discard_fdlocked(fg, p);
2293         proc_fdunlock(p);
2294 }
2295 static void
2296 unp_discard_fdlocked(struct fileglob *fg, proc_t p)
2297 {
2298         fg_removeuipc(fg);
2299
2300         (void) OSAddAtomic(-1, &unp_rights);
2301         (void) closef_locked((struct fileproc *)0, fg, p);
2302 }
2303
2304 int
2305 unp_lock(struct socket *so, int refcount, void * lr)
2306  {
2307         void * lr_saved;
2308         if (lr == 0)
2309                 lr_saved = (void *)  __builtin_return_address(0);
2310         else lr_saved = lr;
2311
2312         if (so->so_pcb) {
2313                 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2314         } else  {
2315                 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",
2316                         so, lr_saved, so->so_usecount);
2317         }
2318
2319         if (so->so_usecount < 0)
2320                 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",
2321                 so, so->so_pcb, lr_saved, so->so_usecount);
2322
2323         if (refcount)
2324                 so->so_usecount++;
2325
2326         so->lock_lr[so->next_lock_lr] = lr_saved;
2327         so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
2328         return (0);
2329 }
2330
2331 int
2332 unp_unlock(struct socket *so, int refcount, void * lr)
2333 {
2334         void * lr_saved;
2335         lck_mtx_t * mutex_held = NULL;
2336         struct unpcb *unp = sotounpcb(so);
2337
2338         if (lr == 0)
2339                 lr_saved = (void *) __builtin_return_address(0);
2340         else lr_saved = lr;
2341
2342         if (refcount)
2343                 so->so_usecount--;
2344
2345         if (so->so_usecount < 0)
2346                 panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);
2347         if (so->so_pcb == NULL) {
2348                 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
2349         } else {
2350                 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2351         }
2352         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
2353         so->unlock_lr[so->next_unlock_lr] = lr_saved;
2354         so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
2355
2356         if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2357                 sofreelastref(so, 1);
2358
2359                 if (unp->unp_addr)
2360                         FREE(unp->unp_addr, M_SONAME);
2361
2362                 lck_mtx_unlock(mutex_held);
2363
2364                 lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
2365                 zfree(unp_zone, unp);
2366
2367                 unp_gc();
2368         } else {
2369                 lck_mtx_unlock(mutex_held);
2370         }
2371
2372         return (0);
2373 }
2374
2375 lck_mtx_t *
2376 unp_getlock(struct socket *so, __unused int locktype)
2377 {
2378         struct unpcb *unp = (struct unpcb *)so->so_pcb;
2379
2380
2381         if (so->so_pcb)  {
2382                 if (so->so_usecount < 0)
2383                         panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
2384                 return(&unp->unp_mtx);
2385         } else {
2386                 panic("unp_getlock: so=%p NULL so_pcb\n", so);
2387                 return (so->so_proto->pr_domain->dom_mtx);
2388         }
2389 }
2390